Path: blob/21.2-virgl/src/amd/llvm/ac_nir_to_llvm.c
7236 views
/*1* Copyright © 2016 Bas Nieuwenhuizen2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "ac_nir_to_llvm.h"24#include "ac_gpu_info.h"25#include "ac_binary.h"26#include "ac_llvm_build.h"27#include "ac_llvm_util.h"28#include "ac_shader_abi.h"29#include "ac_shader_util.h"30#include "nir/nir.h"31#include "nir/nir_deref.h"32#include "sid.h"33#include "util/bitscan.h"34#include "util/u_math.h"35#include <llvm/Config/llvm-config.h>3637struct ac_nir_context {38struct ac_llvm_context ac;39struct ac_shader_abi *abi;40const struct ac_shader_args *args;4142gl_shader_stage stage;43shader_info *info;4445LLVMValueRef *ssa_defs;4647LLVMValueRef scratch;48LLVMValueRef constant_data;4950struct hash_table *defs;51struct hash_table *phis;52struct hash_table *vars;53struct hash_table *verified_interp;5455LLVMValueRef main_function;56LLVMBasicBlockRef continue_block;57LLVMBasicBlockRef break_block;58};5960static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,61const nir_instr *instr, bool image);6263static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,64enum ac_descriptor_type desc_type, const nir_instr *instr,65LLVMValueRef index, bool image, bool write);6667static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, const nir_ssa_def *def)68{69LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);70if (def->num_components > 1) {71type = LLVMVectorType(type, def->num_components);72}73return type;74}7576static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)77{78assert(src.is_ssa);79return nir->ssa_defs[src.ssa->index];80}8182static LLVMValueRef get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned bit_size, unsigned c_off)83{84LLVMValueRef ptr = get_src(ctx, src);85LLVMValueRef lds_i8 = ctx->ac.lds;86if (ctx->stage != MESA_SHADER_COMPUTE)87lds_i8 = LLVMBuildBitCast(ctx->ac.builder, ctx->ac.lds, LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS), "");8889ptr = LLVMBuildAdd(ctx->ac.builder, ptr, LLVMConstInt(ctx->ac.i32, c_off, 0), "");90ptr = LLVMBuildGEP(ctx->ac.builder, lds_i8, &ptr, 1, "");91int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));9293LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, bit_size);9495return LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(type, addr_space), "");96}9798static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, const struct nir_block *b)99{100struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);101return (LLVMBasicBlockRef)entry->data;102}103104static LLVMValueRef get_alu_src(struct ac_nir_context *ctx, nir_alu_src src,105unsigned num_components)106{107LLVMValueRef value = get_src(ctx, src.src);108bool need_swizzle = false;109110assert(value);111unsigned src_components = ac_get_llvm_num_components(value);112for (unsigned i = 0; i < num_components; ++i) {113assert(src.swizzle[i] < src_components);114if (src.swizzle[i] != i)115need_swizzle = true;116}117118if (need_swizzle || num_components != src_components) {119LLVMValueRef masks[] = {LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),120LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),121LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),122LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};123124if (src_components > 1 && num_components == 1) {125value = LLVMBuildExtractElement(ctx->ac.builder, value, masks[0], "");126} else if (src_components == 1 && num_components > 1) {127LLVMValueRef values[] = {value, value, value, value};128value = ac_build_gather_values(&ctx->ac, values, num_components);129} else {130LLVMValueRef swizzle = LLVMConstVector(masks, num_components);131value = LLVMBuildShuffleVector(ctx->ac.builder, value, value, swizzle, "");132}133}134assert(!src.negate);135assert(!src.abs);136return value;137}138139static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, LLVMIntPredicate pred,140LLVMValueRef src0, LLVMValueRef src1)141{142src0 = ac_to_integer(ctx, src0);143src1 = ac_to_integer(ctx, src1);144return LLVMBuildICmp(ctx->builder, pred, src0, src1, "");145}146147static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx, LLVMRealPredicate pred,148LLVMValueRef src0, LLVMValueRef src1)149{150src0 = ac_to_float(ctx, src0);151src1 = ac_to_float(ctx, src1);152return LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");153}154155static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, const char *intrin,156LLVMTypeRef result_type, LLVMValueRef src0)157{158char name[64], type[64];159LLVMValueRef params[] = {160ac_to_float(ctx, src0),161};162163ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));164ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);165assert(length < sizeof(name));166return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);167}168169static LLVMValueRef emit_intrin_1f_param_scalar(struct ac_llvm_context *ctx, const char *intrin,170LLVMTypeRef result_type, LLVMValueRef src0)171{172if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind)173return emit_intrin_1f_param(ctx, intrin, result_type, src0);174175LLVMTypeRef elem_type = LLVMGetElementType(result_type);176LLVMValueRef ret = LLVMGetUndef(result_type);177178/* Scalarize the intrinsic, because vectors are not supported. */179for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) {180char name[64], type[64];181LLVMValueRef params[] = {182ac_to_float(ctx, ac_llvm_extract_elem(ctx, src0, i)),183};184185ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));186ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);187assert(length < sizeof(name));188ret = LLVMBuildInsertElement(189ctx->builder, ret,190ac_build_intrinsic(ctx, name, elem_type, params, 1, AC_FUNC_ATTR_READNONE),191LLVMConstInt(ctx->i32, i, 0), "");192}193return ret;194}195196static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, const char *intrin,197LLVMTypeRef result_type, LLVMValueRef src0,198LLVMValueRef src1)199{200char name[64], type[64];201LLVMValueRef params[] = {202ac_to_float(ctx, src0),203ac_to_float(ctx, src1),204};205206ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));207ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);208assert(length < sizeof(name));209return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);210}211212static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, const char *intrin,213LLVMTypeRef result_type, LLVMValueRef src0,214LLVMValueRef src1, LLVMValueRef src2)215{216char name[64], type[64];217LLVMValueRef params[] = {218ac_to_float(ctx, src0),219ac_to_float(ctx, src1),220ac_to_float(ctx, src2),221};222223ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));224ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);225assert(length < sizeof(name));226return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);227}228229static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1,230LLVMValueRef src2)231{232LLVMTypeRef src1_type = LLVMTypeOf(src1);233LLVMTypeRef src2_type = LLVMTypeOf(src2);234235if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&236LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {237src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");238} else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&239LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {240src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, "");241}242243return LLVMBuildSelect(ctx->builder, src0, ac_to_integer_or_pointer(ctx, src1),244ac_to_integer_or_pointer(ctx, src2), "");245}246247static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx, LLVMValueRef src0)248{249return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));250}251252static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, const char *intrin,253LLVMValueRef src0, LLVMValueRef src1)254{255LLVMTypeRef ret_type;256LLVMTypeRef types[] = {ctx->i32, ctx->i1};257LLVMValueRef res;258LLVMValueRef params[] = {src0, src1};259ret_type = LLVMStructTypeInContext(ctx->context, types, 2, true);260261res = ac_build_intrinsic(ctx, intrin, ret_type, params, 2, AC_FUNC_ATTR_READNONE);262263res = LLVMBuildExtractValue(ctx->builder, res, 1, "");264res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");265return res;266}267268static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)269{270assert(ac_get_elem_bits(ctx, LLVMTypeOf(src0)) == 1);271272switch (bitsize) {273case 16:274if (LLVMGetTypeKind(LLVMTypeOf(src0)) == LLVMVectorTypeKind) {275assert(LLVMGetVectorSize(LLVMTypeOf(src0)) == 2);276LLVMValueRef f[] = {277LLVMBuildSelect(ctx->builder, ac_llvm_extract_elem(ctx, src0, 0),278ctx->f16_1, ctx->f16_0, ""),279LLVMBuildSelect(ctx->builder, ac_llvm_extract_elem(ctx, src0, 1),280ctx->f16_1, ctx->f16_0, ""),281};282return ac_build_gather_values(ctx, f, 2);283}284return LLVMBuildSelect(ctx->builder, src0, ctx->f16_1, ctx->f16_0, "");285case 32:286return LLVMBuildSelect(ctx->builder, src0, ctx->f32_1, ctx->f32_0, "");287case 64:288return LLVMBuildSelect(ctx->builder, src0, ctx->f64_1, ctx->f64_0, "");289default:290unreachable("Unsupported bit size.");291}292}293294static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, LLVMValueRef src0)295{296src0 = ac_to_float(ctx, src0);297LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));298return LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, "");299}300301static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)302{303switch (bitsize) {304case 8:305return LLVMBuildSelect(ctx->builder, src0, ctx->i8_1, ctx->i8_0, "");306case 16:307return LLVMBuildSelect(ctx->builder, src0, ctx->i16_1, ctx->i16_0, "");308case 32:309return LLVMBuildSelect(ctx->builder, src0, ctx->i32_1, ctx->i32_0, "");310case 64:311return LLVMBuildSelect(ctx->builder, src0, ctx->i64_1, ctx->i64_0, "");312default:313unreachable("Unsupported bit size.");314}315}316317static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx, LLVMValueRef src0)318{319LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));320return LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, "");321}322323static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, LLVMValueRef src0)324{325LLVMValueRef result;326LLVMValueRef cond = NULL;327328src0 = ac_to_float(ctx, src0);329result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");330331if (ctx->chip_class >= GFX8) {332LLVMValueRef args[2];333/* Check if the result is a denormal - and flush to 0 if so. */334args[0] = result;335args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);336cond =337ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);338}339340/* need to convert back up to f32 */341result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");342343if (ctx->chip_class >= GFX8)344result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");345else {346/* for GFX6-GFX7 */347/* 0x38800000 is smallest half float value (2^-14) in 32-bit float,348* so compare the result and flush to 0 if it's smaller.349*/350LLVMValueRef temp, cond2;351temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);352cond = LLVMBuildFCmp(353ctx->builder, LLVMRealOGT,354LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),355temp, "");356cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealONE, temp, ctx->f32_0, "");357cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");358result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");359}360return result;361}362363static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx, LLVMValueRef src0,364LLVMValueRef src1)365{366LLVMValueRef dst64, result;367src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");368src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");369370dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");371dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");372result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");373return result;374}375376static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx, LLVMValueRef src0,377LLVMValueRef src1)378{379LLVMValueRef dst64, result;380src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");381src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");382383dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");384dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");385result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");386return result;387}388389static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx, LLVMValueRef bits, LLVMValueRef offset)390{391/* mask = ((1 << bits) - 1) << offset */392return LLVMBuildShl(393ctx->builder,394LLVMBuildSub(ctx->builder, LLVMBuildShl(ctx->builder, ctx->i32_1, bits, ""), ctx->i32_1, ""),395offset, "");396}397398static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx, LLVMValueRef mask,399LLVMValueRef insert, LLVMValueRef base)400{401/* Calculate:402* (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))403* Use the right-hand side, which the LLVM backend can convert to V_BFI.404*/405return LLVMBuildXor(406ctx->builder, base,407LLVMBuildAnd(ctx->builder, mask, LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");408}409410static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0,411LLVMValueRef (*pack)(struct ac_llvm_context *ctx,412LLVMValueRef args[2]))413{414LLVMValueRef comp[2];415416src0 = ac_to_float(ctx, src0);417comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");418comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");419420return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");421}422423static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0)424{425LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);426LLVMValueRef temps[2], val;427int i;428429for (i = 0; i < 2; i++) {430val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;431val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");432val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");433temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");434}435return ac_build_gather_values(ctx, temps, 2);436}437438static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, nir_op op, LLVMValueRef src0)439{440unsigned mask;441int idx;442LLVMValueRef result;443444if (op == nir_op_fddx_fine)445mask = AC_TID_MASK_LEFT;446else if (op == nir_op_fddy_fine)447mask = AC_TID_MASK_TOP;448else449mask = AC_TID_MASK_TOP_LEFT;450451/* for DDX we want to next X pixel, DDY next Y pixel. */452if (op == nir_op_fddx_fine || op == nir_op_fddx_coarse || op == nir_op_fddx)453idx = 1;454else455idx = 2;456457result = ac_build_ddxy(&ctx->ac, mask, idx, src0);458return result;459}460461struct waterfall_context {462LLVMBasicBlockRef phi_bb[2];463bool use_waterfall;464};465466/* To deal with divergent descriptors we can create a loop that handles all467* lanes with the same descriptor on a given iteration (henceforth a468* waterfall loop).469*470* These helper create the begin and end of the loop leaving the caller471* to implement the body.472*473* params:474* - ctx is the usal nir context475* - wctx is a temporary struct containing some loop info. Can be left uninitialized.476* - value is the possibly divergent value for which we built the loop477* - divergent is whether value is actually divergent. If false we just pass478* things through.479*/480static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx,481LLVMValueRef value, bool divergent)482{483/* If the app claims the value is divergent but it is constant we can484* end up with a dynamic index of NULL. */485if (!value)486divergent = false;487488wctx->use_waterfall = divergent;489if (!divergent)490return value;491492ac_build_bgnloop(&ctx->ac, 6000);493494LLVMValueRef active = LLVMConstInt(ctx->ac.i1, 1, false);495LLVMValueRef scalar_value[NIR_MAX_VEC_COMPONENTS];496497for (unsigned i = 0; i < ac_get_llvm_num_components(value); i++) {498LLVMValueRef comp = ac_llvm_extract_elem(&ctx->ac, value, i);499scalar_value[i] = ac_build_readlane(&ctx->ac, comp, NULL);500active = LLVMBuildAnd(ctx->ac.builder, active,501LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, comp, scalar_value[i], ""), "");502}503504wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder);505ac_build_ifcc(&ctx->ac, active, 6001);506507return ac_build_gather_values(&ctx->ac, scalar_value, ac_get_llvm_num_components(value));508}509510static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx,511LLVMValueRef value)512{513LLVMValueRef ret = NULL;514LLVMValueRef phi_src[2];515LLVMValueRef cc_phi_src[2] = {516LLVMConstInt(ctx->ac.i32, 0, false),517LLVMConstInt(ctx->ac.i32, 0xffffffff, false),518};519520if (!wctx->use_waterfall)521return value;522523wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder);524525ac_build_endif(&ctx->ac, 6001);526527if (value) {528phi_src[0] = LLVMGetUndef(LLVMTypeOf(value));529phi_src[1] = value;530531ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb);532}533534/*535* By using the optimization barrier on the exit decision, we decouple536* the operations from the break, and hence avoid LLVM hoisting the537* opteration into the break block.538*/539LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb);540ac_build_optimization_barrier(&ctx->ac, &cc, false);541542LLVMValueRef active =543LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2");544ac_build_ifcc(&ctx->ac, active, 6002);545ac_build_break(&ctx->ac);546ac_build_endif(&ctx->ac, 6002);547548ac_build_endloop(&ctx->ac, 6000);549return ret;550}551552static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)553{554LLVMValueRef src[4], result = NULL;555unsigned num_components = instr->dest.dest.ssa.num_components;556unsigned src_components;557LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);558559assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));560switch (instr->op) {561case nir_op_vec2:562case nir_op_vec3:563case nir_op_vec4:564case nir_op_vec5:565case nir_op_unpack_32_2x16:566case nir_op_unpack_64_2x32:567case nir_op_unpack_64_4x16:568src_components = 1;569break;570case nir_op_pack_half_2x16:571case nir_op_pack_snorm_2x16:572case nir_op_pack_unorm_2x16:573case nir_op_pack_32_2x16:574case nir_op_pack_64_2x32:575src_components = 2;576break;577case nir_op_unpack_half_2x16:578src_components = 1;579break;580case nir_op_cube_face_coord_amd:581case nir_op_cube_face_index_amd:582src_components = 3;583break;584case nir_op_pack_64_4x16:585src_components = 4;586break;587default:588src_components = num_components;589break;590}591for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)592src[i] = get_alu_src(ctx, instr->src[i], src_components);593594switch (instr->op) {595case nir_op_mov:596result = src[0];597break;598case nir_op_fneg:599src[0] = ac_to_float(&ctx->ac, src[0]);600result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");601if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {602/* fneg will be optimized by backend compiler with sign603* bit removed via XOR. This is probably a LLVM bug.604*/605result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);606}607break;608case nir_op_ineg:609if (instr->no_unsigned_wrap)610result = LLVMBuildNUWNeg(ctx->ac.builder, src[0], "");611else if (instr->no_signed_wrap)612result = LLVMBuildNSWNeg(ctx->ac.builder, src[0], "");613else614result = LLVMBuildNeg(ctx->ac.builder, src[0], "");615break;616case nir_op_inot:617result = LLVMBuildNot(ctx->ac.builder, src[0], "");618break;619case nir_op_iadd:620if (instr->no_unsigned_wrap)621result = LLVMBuildNUWAdd(ctx->ac.builder, src[0], src[1], "");622else if (instr->no_signed_wrap)623result = LLVMBuildNSWAdd(ctx->ac.builder, src[0], src[1], "");624else625result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");626break;627case nir_op_fadd:628src[0] = ac_to_float(&ctx->ac, src[0]);629src[1] = ac_to_float(&ctx->ac, src[1]);630result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");631break;632case nir_op_fsub:633src[0] = ac_to_float(&ctx->ac, src[0]);634src[1] = ac_to_float(&ctx->ac, src[1]);635result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");636break;637case nir_op_isub:638if (instr->no_unsigned_wrap)639result = LLVMBuildNUWSub(ctx->ac.builder, src[0], src[1], "");640else if (instr->no_signed_wrap)641result = LLVMBuildNSWSub(ctx->ac.builder, src[0], src[1], "");642else643result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");644break;645case nir_op_imul:646if (instr->no_unsigned_wrap)647result = LLVMBuildNUWMul(ctx->ac.builder, src[0], src[1], "");648else if (instr->no_signed_wrap)649result = LLVMBuildNSWMul(ctx->ac.builder, src[0], src[1], "");650else651result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");652break;653case nir_op_imod:654result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");655break;656case nir_op_umod:657result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");658break;659case nir_op_irem:660result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");661break;662case nir_op_idiv:663result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");664break;665case nir_op_udiv:666result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");667break;668case nir_op_fmul:669src[0] = ac_to_float(&ctx->ac, src[0]);670src[1] = ac_to_float(&ctx->ac, src[1]);671result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");672break;673case nir_op_frcp:674/* For doubles, we need precise division to pass GLCTS. */675if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && ac_get_type_size(def_type) == 8) {676result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1, ac_to_float(&ctx->ac, src[0]), "");677} else {678result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rcp",679ac_to_float_type(&ctx->ac, def_type), src[0]);680}681if (ctx->abi->clamp_div_by_zero)682result = ac_build_fmin(&ctx->ac, result,683LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));684break;685case nir_op_iand:686result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");687break;688case nir_op_ior:689result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");690break;691case nir_op_ixor:692result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");693break;694case nir_op_ishl:695if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <696ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))697src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");698else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >699ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))700src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");701result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");702break;703case nir_op_ishr:704if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <705ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))706src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");707else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >708ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))709src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");710result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");711break;712case nir_op_ushr:713if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <714ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))715src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");716else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >717ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))718src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");719result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");720break;721case nir_op_ilt:722result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);723break;724case nir_op_ine:725result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);726break;727case nir_op_ieq:728result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);729break;730case nir_op_ige:731result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);732break;733case nir_op_ult:734result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);735break;736case nir_op_uge:737result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);738break;739case nir_op_feq:740result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);741break;742case nir_op_fneu:743result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);744break;745case nir_op_flt:746result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);747break;748case nir_op_fge:749result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);750break;751case nir_op_fabs:752result =753emit_intrin_1f_param(&ctx->ac, "llvm.fabs", ac_to_float_type(&ctx->ac, def_type), src[0]);754if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {755/* fabs will be optimized by backend compiler with sign756* bit removed via AND.757*/758result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);759}760break;761case nir_op_fsat:762src[0] = ac_to_float(&ctx->ac, src[0]);763result = ac_build_fsat(&ctx->ac, src[0],764ac_to_float_type(&ctx->ac, def_type));765break;766case nir_op_iabs:767result = emit_iabs(&ctx->ac, src[0]);768break;769case nir_op_imax:770result = ac_build_imax(&ctx->ac, src[0], src[1]);771break;772case nir_op_imin:773result = ac_build_imin(&ctx->ac, src[0], src[1]);774break;775case nir_op_umax:776result = ac_build_umax(&ctx->ac, src[0], src[1]);777break;778case nir_op_umin:779result = ac_build_umin(&ctx->ac, src[0], src[1]);780break;781case nir_op_isign:782result = ac_build_isign(&ctx->ac, src[0]);783break;784case nir_op_fsign:785src[0] = ac_to_float(&ctx->ac, src[0]);786result = ac_build_fsign(&ctx->ac, src[0]);787break;788case nir_op_ffloor:789result =790emit_intrin_1f_param(&ctx->ac, "llvm.floor", ac_to_float_type(&ctx->ac, def_type), src[0]);791break;792case nir_op_ftrunc:793result =794emit_intrin_1f_param(&ctx->ac, "llvm.trunc", ac_to_float_type(&ctx->ac, def_type), src[0]);795break;796case nir_op_fceil:797result =798emit_intrin_1f_param(&ctx->ac, "llvm.ceil", ac_to_float_type(&ctx->ac, def_type), src[0]);799break;800case nir_op_fround_even:801result =802emit_intrin_1f_param(&ctx->ac, "llvm.rint", ac_to_float_type(&ctx->ac, def_type), src[0]);803break;804case nir_op_ffract:805result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract",806ac_to_float_type(&ctx->ac, def_type), src[0]);807break;808case nir_op_fsin:809result =810emit_intrin_1f_param(&ctx->ac, "llvm.sin", ac_to_float_type(&ctx->ac, def_type), src[0]);811break;812case nir_op_fcos:813result =814emit_intrin_1f_param(&ctx->ac, "llvm.cos", ac_to_float_type(&ctx->ac, def_type), src[0]);815break;816case nir_op_fsqrt:817result =818emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", ac_to_float_type(&ctx->ac, def_type), src[0]);819break;820case nir_op_fexp2:821result =822emit_intrin_1f_param(&ctx->ac, "llvm.exp2", ac_to_float_type(&ctx->ac, def_type), src[0]);823break;824case nir_op_flog2:825result =826emit_intrin_1f_param(&ctx->ac, "llvm.log2", ac_to_float_type(&ctx->ac, def_type), src[0]);827break;828case nir_op_frsq:829result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rsq",830ac_to_float_type(&ctx->ac, def_type), src[0]);831if (ctx->abi->clamp_div_by_zero)832result = ac_build_fmin(&ctx->ac, result,833LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));834break;835case nir_op_frexp_exp:836src[0] = ac_to_float(&ctx->ac, src[0]);837result = ac_build_frexp_exp(&ctx->ac, src[0], ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));838if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)839result = LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, "");840break;841case nir_op_frexp_sig:842src[0] = ac_to_float(&ctx->ac, src[0]);843result = ac_build_frexp_mant(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);844break;845case nir_op_fpow:846if (instr->dest.dest.ssa.bit_size != 32) {847/* 16 and 64 bits */848result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",849ac_to_float_type(&ctx->ac, def_type), src[0]);850result = LLVMBuildFMul(ctx->ac.builder, result, ac_to_float(&ctx->ac, src[1]), "");851result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",852ac_to_float_type(&ctx->ac, def_type), result);853break;854}855if (LLVM_VERSION_MAJOR >= 12) {856result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",857ac_to_float_type(&ctx->ac, def_type), src[0]);858result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32,859(LLVMValueRef[]){result, ac_to_float(&ctx->ac, src[1])},8602, AC_FUNC_ATTR_READNONE);861result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",862ac_to_float_type(&ctx->ac, def_type), result);863break;864}865/* Older LLVM doesn't have fmul.legacy. */866result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", ac_to_float_type(&ctx->ac, def_type),867src[0], src[1]);868break;869case nir_op_fmax:870result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type),871src[0], src[1]);872if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) {873/* Only pre-GFX9 chips do not flush denorms. */874result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);875}876break;877case nir_op_fmin:878result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", ac_to_float_type(&ctx->ac, def_type),879src[0], src[1]);880if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) {881/* Only pre-GFX9 chips do not flush denorms. */882result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);883}884break;885case nir_op_ffma:886/* FMA is slow on gfx6-8, so it shouldn't be used. */887assert(instr->dest.dest.ssa.bit_size != 32 || ctx->ac.chip_class >= GFX9);888result = emit_intrin_3f_param(&ctx->ac, "llvm.fma", ac_to_float_type(&ctx->ac, def_type),889src[0], src[1], src[2]);890break;891case nir_op_ldexp:892src[0] = ac_to_float(&ctx->ac, src[0]);893if (ac_get_elem_bits(&ctx->ac, def_type) == 32)894result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2,895AC_FUNC_ATTR_READNONE);896else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)897result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2,898AC_FUNC_ATTR_READNONE);899else900result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2,901AC_FUNC_ATTR_READNONE);902break;903case nir_op_bfm:904result = emit_bfm(&ctx->ac, src[0], src[1]);905break;906case nir_op_bitfield_select:907result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);908break;909case nir_op_ubfe:910result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false);911break;912case nir_op_ibfe:913result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true);914break;915case nir_op_bitfield_reverse:916result = ac_build_bitfield_reverse(&ctx->ac, src[0]);917break;918case nir_op_bit_count:919result = ac_build_bit_count(&ctx->ac, src[0]);920break;921case nir_op_vec2:922case nir_op_vec3:923case nir_op_vec4:924case nir_op_vec5:925for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)926src[i] = ac_to_integer(&ctx->ac, src[i]);927result = ac_build_gather_values(&ctx->ac, src, num_components);928break;929case nir_op_f2i8:930case nir_op_f2i16:931case nir_op_f2imp:932case nir_op_f2i32:933case nir_op_f2i64:934src[0] = ac_to_float(&ctx->ac, src[0]);935result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");936break;937case nir_op_f2u8:938case nir_op_f2u16:939case nir_op_f2ump:940case nir_op_f2u32:941case nir_op_f2u64:942src[0] = ac_to_float(&ctx->ac, src[0]);943result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");944break;945case nir_op_i2f16:946case nir_op_i2fmp:947case nir_op_i2f32:948case nir_op_i2f64:949result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");950break;951case nir_op_u2f16:952case nir_op_u2fmp:953case nir_op_u2f32:954case nir_op_u2f64:955result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");956break;957case nir_op_f2f16_rtz:958case nir_op_f2f16:959case nir_op_f2fmp:960src[0] = ac_to_float(&ctx->ac, src[0]);961962/* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,963* all f32->f16 conversions have to round towards zero, because both scalar964* and vec2 down-conversions have to round equally.965*/966if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL || instr->op == nir_op_f2f16_rtz) {967src[0] = ac_to_float(&ctx->ac, src[0]);968969if (LLVMTypeOf(src[0]) == ctx->ac.f64)970src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");971972/* Fast path conversion. This only works if NIR is vectorized973* to vec2 16.974*/975if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) {976LLVMValueRef args[] = {977ac_llvm_extract_elem(&ctx->ac, src[0], 0),978ac_llvm_extract_elem(&ctx->ac, src[0], 1),979};980result = ac_build_cvt_pkrtz_f16(&ctx->ac, args);981break;982}983984assert(ac_get_llvm_num_components(src[0]) == 1);985LLVMValueRef param[2] = {src[0], LLVMGetUndef(ctx->ac.f32)};986result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);987result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");988} else {989if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))990result =991LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");992else993result =994LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");995}996break;997case nir_op_f2f16_rtne:998case nir_op_f2f32:999case nir_op_f2f64:1000src[0] = ac_to_float(&ctx->ac, src[0]);1001if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))1002result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");1003else1004result =1005LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");1006break;1007case nir_op_u2u8:1008case nir_op_u2u16:1009case nir_op_u2u32:1010case nir_op_u2u64:1011if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))1012result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");1013else1014result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");1015break;1016case nir_op_i2i8:1017case nir_op_i2i16:1018case nir_op_i2imp:1019case nir_op_i2i32:1020case nir_op_i2i64:1021if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))1022result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");1023else1024result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");1025break;1026case nir_op_bcsel:1027result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);1028break;1029case nir_op_find_lsb:1030result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);1031break;1032case nir_op_ufind_msb:1033result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);1034break;1035case nir_op_ifind_msb:1036result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);1037break;1038case nir_op_uadd_carry:1039result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);1040break;1041case nir_op_usub_borrow:1042result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);1043break;1044case nir_op_b2f16:1045case nir_op_b2f32:1046case nir_op_b2f64:1047result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);1048break;1049case nir_op_f2b1:1050result = emit_f2b(&ctx->ac, src[0]);1051break;1052case nir_op_b2i8:1053case nir_op_b2i16:1054case nir_op_b2i32:1055case nir_op_b2i64:1056result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);1057break;1058case nir_op_i2b1:1059case nir_op_b2b1: /* after loads */1060result = emit_i2b(&ctx->ac, src[0]);1061break;1062case nir_op_b2b16: /* before stores */1063result = LLVMBuildZExt(ctx->ac.builder, src[0], ctx->ac.i16, "");1064break;1065case nir_op_b2b32: /* before stores */1066result = LLVMBuildZExt(ctx->ac.builder, src[0], ctx->ac.i32, "");1067break;1068case nir_op_fquantize2f16:1069result = emit_f2f16(&ctx->ac, src[0]);1070break;1071case nir_op_umul_high:1072result = emit_umul_high(&ctx->ac, src[0], src[1]);1073break;1074case nir_op_imul_high:1075result = emit_imul_high(&ctx->ac, src[0], src[1]);1076break;1077case nir_op_pack_half_2x16:1078result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16);1079break;1080case nir_op_pack_half_2x16_split:1081src[0] = ac_to_float(&ctx->ac, src[0]);1082src[1] = ac_to_float(&ctx->ac, src[1]);1083result = LLVMBuildBitCast(ctx->ac.builder,1084ac_build_cvt_pkrtz_f16(&ctx->ac, src),1085ctx->ac.i32, "");1086break;1087case nir_op_pack_snorm_2x16:1088result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16);1089break;1090case nir_op_pack_unorm_2x16:1091result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16);1092break;1093case nir_op_unpack_half_2x16:1094result = emit_unpack_half_2x16(&ctx->ac, src[0]);1095break;1096case nir_op_unpack_half_2x16_split_x: {1097assert(ac_get_llvm_num_components(src[0]) == 1);1098LLVMValueRef tmp = emit_unpack_half_2x16(&ctx->ac, src[0]);1099result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");1100break;1101}1102case nir_op_unpack_half_2x16_split_y: {1103assert(ac_get_llvm_num_components(src[0]) == 1);1104LLVMValueRef tmp = emit_unpack_half_2x16(&ctx->ac, src[0]);1105result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");1106break;1107}1108case nir_op_fddx:1109case nir_op_fddy:1110case nir_op_fddx_fine:1111case nir_op_fddy_fine:1112case nir_op_fddx_coarse:1113case nir_op_fddy_coarse:1114result = emit_ddxy(ctx, instr->op, src[0]);1115break;11161117case nir_op_unpack_64_4x16: {1118result = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v4i16, "");1119break;1120}1121case nir_op_pack_64_4x16: {1122result = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.i64, "");1123break;1124}11251126case nir_op_unpack_64_2x32: {1127result = LLVMBuildBitCast(ctx->ac.builder, src[0],1128ctx->ac.v2i32, "");1129break;1130}1131case nir_op_unpack_64_2x32_split_x: {1132assert(ac_get_llvm_num_components(src[0]) == 1);1133LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, "");1134result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");1135break;1136}1137case nir_op_unpack_64_2x32_split_y: {1138assert(ac_get_llvm_num_components(src[0]) == 1);1139LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, "");1140result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");1141break;1142}11431144case nir_op_pack_64_2x32: {1145result = LLVMBuildBitCast(ctx->ac.builder, src[0],1146ctx->ac.i64, "");1147break;1148}1149case nir_op_pack_64_2x32_split: {1150LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);1151result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");1152break;1153}11541155case nir_op_pack_32_2x16: {1156result = LLVMBuildBitCast(ctx->ac.builder, src[0],1157ctx->ac.i32, "");1158break;1159}1160case nir_op_pack_32_2x16_split: {1161LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);1162result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");1163break;1164}11651166case nir_op_unpack_32_2x16: {1167result = LLVMBuildBitCast(ctx->ac.builder, src[0],1168ctx->ac.v2i16, "");1169break;1170}1171case nir_op_unpack_32_2x16_split_x: {1172LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");1173result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");1174break;1175}1176case nir_op_unpack_32_2x16_split_y: {1177LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");1178result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");1179break;1180}11811182case nir_op_cube_face_coord_amd: {1183src[0] = ac_to_float(&ctx->ac, src[0]);1184LLVMValueRef results[2];1185LLVMValueRef in[3];1186for (unsigned chan = 0; chan < 3; chan++)1187in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);1188results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", ctx->ac.f32, in, 3,1189AC_FUNC_ATTR_READNONE);1190results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", ctx->ac.f32, in, 3,1191AC_FUNC_ATTR_READNONE);1192LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema", ctx->ac.f32, in, 3,1193AC_FUNC_ATTR_READNONE);1194results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);1195results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);1196LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);1197results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");1198results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");1199result = ac_build_gather_values(&ctx->ac, results, 2);1200break;1201}12021203case nir_op_cube_face_index_amd: {1204src[0] = ac_to_float(&ctx->ac, src[0]);1205LLVMValueRef in[3];1206for (unsigned chan = 0; chan < 3; chan++)1207in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);1208result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid", ctx->ac.f32, in, 3,1209AC_FUNC_ATTR_READNONE);1210break;1211}12121213case nir_op_extract_u8:1214case nir_op_extract_i8:1215case nir_op_extract_u16:1216case nir_op_extract_i16: {1217bool is_signed = instr->op == nir_op_extract_i16 || instr->op == nir_op_extract_i8;1218unsigned size = instr->op == nir_op_extract_u8 || instr->op == nir_op_extract_i8 ? 8 : 16;1219LLVMValueRef offset = LLVMConstInt(LLVMTypeOf(src[0]), nir_src_as_uint(instr->src[1].src) * size, false);1220result = LLVMBuildLShr(ctx->ac.builder, src[0], offset, "");1221result = LLVMBuildTrunc(ctx->ac.builder, result, LLVMIntTypeInContext(ctx->ac.context, size), "");1222if (is_signed)1223result = LLVMBuildSExt(ctx->ac.builder, result, LLVMTypeOf(src[0]), "");1224else1225result = LLVMBuildZExt(ctx->ac.builder, result, LLVMTypeOf(src[0]), "");1226break;1227}12281229case nir_op_insert_u8:1230case nir_op_insert_u16: {1231unsigned size = instr->op == nir_op_insert_u8 ? 8 : 16;1232LLVMValueRef offset = LLVMConstInt(LLVMTypeOf(src[0]), nir_src_as_uint(instr->src[1].src) * size, false);1233LLVMValueRef mask = LLVMConstInt(LLVMTypeOf(src[0]), u_bit_consecutive(0, size), false);1234result = LLVMBuildShl(ctx->ac.builder, LLVMBuildAnd(ctx->ac.builder, src[0], mask, ""), offset, "");1235break;1236}12371238default:1239fprintf(stderr, "Unknown NIR alu instr: ");1240nir_print_instr(&instr->instr, stderr);1241fprintf(stderr, "\n");1242abort();1243}12441245if (result) {1246assert(instr->dest.dest.is_ssa);1247result = ac_to_integer_or_pointer(&ctx->ac, result);1248ctx->ssa_defs[instr->dest.dest.ssa.index] = result;1249}1250}12511252static void visit_load_const(struct ac_nir_context *ctx, const nir_load_const_instr *instr)1253{1254LLVMValueRef values[4], value = NULL;1255LLVMTypeRef element_type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);12561257for (unsigned i = 0; i < instr->def.num_components; ++i) {1258switch (instr->def.bit_size) {1259case 1:1260values[i] = LLVMConstInt(element_type, instr->value[i].b, false);1261break;1262case 8:1263values[i] = LLVMConstInt(element_type, instr->value[i].u8, false);1264break;1265case 16:1266values[i] = LLVMConstInt(element_type, instr->value[i].u16, false);1267break;1268case 32:1269values[i] = LLVMConstInt(element_type, instr->value[i].u32, false);1270break;1271case 64:1272values[i] = LLVMConstInt(element_type, instr->value[i].u64, false);1273break;1274default:1275fprintf(stderr, "unsupported nir load_const bit_size: %d\n", instr->def.bit_size);1276abort();1277}1278}1279if (instr->def.num_components > 1) {1280value = LLVMConstVector(values, instr->def.num_components);1281} else1282value = values[0];12831284ctx->ssa_defs[instr->def.index] = value;1285}12861287static LLVMValueRef get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor,1288bool in_elements)1289{1290LLVMValueRef size =1291LLVMBuildExtractElement(ctx->ac.builder, descriptor, LLVMConstInt(ctx->ac.i32, 2, false), "");12921293/* GFX8 only */1294if (ctx->ac.chip_class == GFX8 && in_elements) {1295/* On GFX8, the descriptor contains the size in bytes,1296* but TXQ must return the size in elements.1297* The stride is always non-zero for resources using TXQ.1298*/1299LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, descriptor, ctx->ac.i32_1, "");1300stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, false), "");1301stride = LLVMBuildAnd(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");13021303size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");1304}1305return size;1306}13071308/* Gather4 should follow the same rules as bilinear filtering, but the hardware1309* incorrectly forces nearest filtering if the texture format is integer.1310* The only effect it has on Gather4, which always returns 4 texels for1311* bilinear filtering, is that the final coordinates are off by 0.5 of1312* the texel size.1313*1314* The workaround is to subtract 0.5 from the unnormalized coordinates,1315* or (0.5 / size) from the normalized coordinates.1316*1317* However, cube textures with 8_8_8_8 data formats require a different1318* workaround of overriding the num format to USCALED/SSCALED. This would lose1319* precision in 32-bit data formats, so it needs to be applied dynamically at1320* runtime. In this case, return an i1 value that indicates whether the1321* descriptor was overridden (and hence a fixup of the sampler result is needed).1322*/1323static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, nir_variable *var,1324struct ac_image_args *args, const nir_tex_instr *instr)1325{1326const struct glsl_type *type = glsl_without_array(var->type);1327enum glsl_base_type stype = glsl_get_sampler_result_type(type);1328LLVMValueRef wa_8888 = NULL;1329LLVMValueRef half_texel[2];1330LLVMValueRef result;13311332assert(stype == GLSL_TYPE_INT || stype == GLSL_TYPE_UINT);13331334if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {1335LLVMValueRef formats;1336LLVMValueRef data_format;1337LLVMValueRef wa_formats;13381339formats = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");13401341data_format = LLVMBuildLShr(ctx->builder, formats, LLVMConstInt(ctx->i32, 20, false), "");1342data_format =1343LLVMBuildAnd(ctx->builder, data_format, LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");1344wa_8888 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, data_format,1345LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");13461347uint32_t wa_num_format = stype == GLSL_TYPE_UINT1348? S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED)1349: S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);1350wa_formats = LLVMBuildAnd(ctx->builder, formats,1351LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false), "");1352wa_formats =1353LLVMBuildOr(ctx->builder, wa_formats, LLVMConstInt(ctx->i32, wa_num_format, false), "");13541355formats = LLVMBuildSelect(ctx->builder, wa_8888, wa_formats, formats, "");1356args->resource =1357LLVMBuildInsertElement(ctx->builder, args->resource, formats, ctx->i32_1, "");1358}13591360if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {1361assert(!wa_8888);1362half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);1363} else {1364struct ac_image_args resinfo = {0};1365LLVMBasicBlockRef bbs[2];13661367LLVMValueRef unnorm = NULL;1368LLVMValueRef default_offset = ctx->f32_0;1369if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D && !instr->is_array) {1370/* In vulkan, whether the sampler uses unnormalized1371* coordinates or not is a dynamic property of the1372* sampler. Hence, to figure out whether or not we1373* need to divide by the texture size, we need to test1374* the sampler at runtime. This tests the bit set by1375* radv_init_sampler().1376*/1377LLVMValueRef sampler0 =1378LLVMBuildExtractElement(ctx->builder, args->sampler, ctx->i32_0, "");1379sampler0 = LLVMBuildLShr(ctx->builder, sampler0, LLVMConstInt(ctx->i32, 15, false), "");1380sampler0 = LLVMBuildAnd(ctx->builder, sampler0, ctx->i32_1, "");1381unnorm = LLVMBuildICmp(ctx->builder, LLVMIntEQ, sampler0, ctx->i32_1, "");1382default_offset = LLVMConstReal(ctx->f32, -0.5);1383}13841385bbs[0] = LLVMGetInsertBlock(ctx->builder);1386if (wa_8888 || unnorm) {1387assert(!(wa_8888 && unnorm));1388LLVMValueRef not_needed = wa_8888 ? wa_8888 : unnorm;1389/* Skip the texture size query entirely if we don't need it. */1390ac_build_ifcc(ctx, LLVMBuildNot(ctx->builder, not_needed, ""), 2000);1391bbs[1] = LLVMGetInsertBlock(ctx->builder);1392}13931394/* Query the texture size. */1395resinfo.dim = ac_get_sampler_dim(ctx->chip_class, instr->sampler_dim, instr->is_array);1396resinfo.opcode = ac_image_get_resinfo;1397resinfo.dmask = 0xf;1398resinfo.lod = ctx->i32_0;1399resinfo.resource = args->resource;1400resinfo.attributes = AC_FUNC_ATTR_READNONE;1401LLVMValueRef size = ac_build_image_opcode(ctx, &resinfo);14021403/* Compute -0.5 / size. */1404for (unsigned c = 0; c < 2; c++) {1405half_texel[c] =1406LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, 0), "");1407half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");1408half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);1409half_texel[c] =1410LLVMBuildFMul(ctx->builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), "");1411}14121413if (wa_8888 || unnorm) {1414ac_build_endif(ctx, 2000);14151416for (unsigned c = 0; c < 2; c++) {1417LLVMValueRef values[2] = {default_offset, half_texel[c]};1418half_texel[c] = ac_build_phi(ctx, ctx->f32, 2, values, bbs);1419}1420}1421}14221423for (unsigned c = 0; c < 2; c++) {1424LLVMValueRef tmp;1425tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");1426args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");1427}14281429args->attributes = AC_FUNC_ATTR_READNONE;1430result = ac_build_image_opcode(ctx, args);14311432if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {1433LLVMValueRef tmp, tmp2;14341435/* if the cube workaround is in place, f2i the result. */1436for (unsigned c = 0; c < 4; c++) {1437tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");1438if (stype == GLSL_TYPE_UINT)1439tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");1440else1441tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");1442tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");1443tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");1444tmp = LLVMBuildSelect(ctx->builder, wa_8888, tmp2, tmp, "");1445tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");1446result =1447LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");1448}1449}1450return result;1451}14521453static nir_deref_instr *get_tex_texture_deref(const nir_tex_instr *instr)1454{1455nir_deref_instr *texture_deref_instr = NULL;14561457for (unsigned i = 0; i < instr->num_srcs; i++) {1458switch (instr->src[i].src_type) {1459case nir_tex_src_texture_deref:1460texture_deref_instr = nir_src_as_deref(instr->src[i].src);1461break;1462default:1463break;1464}1465}1466return texture_deref_instr;1467}14681469static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_tex_instr *instr,1470struct ac_image_args *args)1471{1472assert((!args->tfe || !args->d16) && "unsupported");14731474if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {1475unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);14761477assert(instr->dest.is_ssa);14781479/* Buffers don't support A16. */1480if (args->a16)1481args->coords[0] = LLVMBuildZExt(ctx->ac.builder, args->coords[0], ctx->ac.i32, "");14821483return ac_build_buffer_load_format(&ctx->ac, args->resource, args->coords[0], ctx->ac.i32_0,1484util_last_bit(mask), 0, true,1485instr->dest.ssa.bit_size == 16,1486args->tfe);1487}14881489args->opcode = ac_image_sample;14901491switch (instr->op) {1492case nir_texop_txf:1493case nir_texop_txf_ms:1494case nir_texop_samples_identical:1495args->opcode = args->level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS1496? ac_image_load1497: ac_image_load_mip;1498args->level_zero = false;1499break;1500case nir_texop_txs:1501case nir_texop_query_levels:1502args->opcode = ac_image_get_resinfo;1503if (!args->lod)1504args->lod = ctx->ac.i32_0;1505args->level_zero = false;1506break;1507case nir_texop_tex:1508if (ctx->stage != MESA_SHADER_FRAGMENT &&1509(ctx->stage != MESA_SHADER_COMPUTE ||1510ctx->info->cs.derivative_group == DERIVATIVE_GROUP_NONE)) {1511assert(!args->lod);1512args->level_zero = true;1513}1514break;1515case nir_texop_tg4:1516args->opcode = ac_image_gather4;1517if (!args->lod && !args->bias)1518args->level_zero = true;1519break;1520case nir_texop_lod:1521args->opcode = ac_image_get_lod;1522break;1523case nir_texop_fragment_fetch:1524case nir_texop_fragment_mask_fetch:1525args->opcode = ac_image_load;1526args->level_zero = false;1527break;1528default:1529break;1530}15311532/* Aldebaran doesn't have image_sample_lz, but image_sample behaves like lz. */1533if (!ctx->ac.info->has_3d_cube_border_color_mipmap)1534args->level_zero = false;15351536if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) {1537nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr);1538nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr);1539const struct glsl_type *type = glsl_without_array(var->type);1540enum glsl_base_type stype = glsl_get_sampler_result_type(type);1541if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {1542return lower_gather4_integer(&ctx->ac, var, args, instr);1543}1544}15451546/* Fixup for GFX9 which allocates 1D textures as 2D. */1547if (instr->op == nir_texop_lod && ctx->ac.chip_class == GFX9) {1548if ((args->dim == ac_image_2darray || args->dim == ac_image_2d) && !args->coords[1]) {1549args->coords[1] = ctx->ac.i32_0;1550}1551}15521553args->attributes = AC_FUNC_ATTR_READNONE;1554bool cs_derivs =1555ctx->stage == MESA_SHADER_COMPUTE && ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;1556if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) {1557/* Prevent texture instructions with implicit derivatives from being1558* sinked into branches. */1559switch (instr->op) {1560case nir_texop_tex:1561case nir_texop_txb:1562case nir_texop_lod:1563args->attributes |= AC_FUNC_ATTR_CONVERGENT;1564break;1565default:1566break;1567}1568}15691570return ac_build_image_opcode(&ctx->ac, args);1571}15721573static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)1574{1575LLVMValueRef ptr, addr;1576LLVMValueRef src0 = get_src(ctx, instr->src[0]);1577unsigned index = nir_intrinsic_base(instr);15781579addr = LLVMConstInt(ctx->ac.i32, index, 0);1580addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");15811582/* Load constant values from user SGPRS when possible, otherwise1583* fallback to the default path that loads directly from memory.1584*/1585if (LLVMIsConstant(src0) && instr->dest.ssa.bit_size == 32) {1586unsigned count = instr->dest.ssa.num_components;1587unsigned offset = index;15881589offset += LLVMConstIntGetZExtValue(src0);1590offset /= 4;15911592offset -= ctx->args->base_inline_push_consts;15931594unsigned num_inline_push_consts = ctx->args->num_inline_push_consts;1595if (offset + count <= num_inline_push_consts) {1596LLVMValueRef *const push_constants = alloca(num_inline_push_consts * sizeof(LLVMValueRef));1597for (unsigned i = 0; i < num_inline_push_consts; i++)1598push_constants[i] = ac_get_arg(&ctx->ac, ctx->args->inline_push_consts[i]);1599return ac_build_gather_values(&ctx->ac, push_constants + offset, count);1600}1601}16021603ptr =1604LLVMBuildGEP(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->push_constants), &addr, 1, "");16051606if (instr->dest.ssa.bit_size == 8) {1607unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;1608LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords);1609ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);1610LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");16111612LLVMValueRef params[3];1613if (load_dwords > 1) {1614LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, "");1615params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec,1616LLVMConstInt(ctx->ac.i32, 1, false), "");1617params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec,1618LLVMConstInt(ctx->ac.i32, 0, false), "");1619} else {1620res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, "");1621params[0] = ctx->ac.i32_0;1622params[1] = res;1623}1624params[2] = addr;1625res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0);16261627res = LLVMBuildTrunc(1628ctx->ac.builder, res,1629LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");1630if (instr->dest.ssa.num_components > 1)1631res = LLVMBuildBitCast(ctx->ac.builder, res,1632LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), "");1633return res;1634} else if (instr->dest.ssa.bit_size == 16) {1635unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;1636LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords);1637ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);1638LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");1639res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");1640LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, "");1641cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");1642LLVMValueRef mask[] = {1643LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),1644LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),1645LLVMConstInt(ctx->ac.i32, 4, false)};1646LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components);1647LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components);1648LLVMValueRef shuffle_aligned =1649LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, "");1650LLVMValueRef shuffle_unaligned =1651LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, "");1652res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, "");1653return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), "");1654}16551656ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa));16571658return LLVMBuildLoad(ctx->ac.builder, ptr, "");1659}16601661static LLVMValueRef visit_get_ssbo_size(struct ac_nir_context *ctx,1662const nir_intrinsic_instr *instr)1663{1664bool non_uniform = nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM;1665LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, get_src(ctx, instr->src[0]), false, non_uniform);1666return get_buffer_size(ctx, rsrc, false);1667}16681669static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,1670unsigned start, unsigned count)1671{1672LLVMValueRef mask[] = {ctx->i32_0, ctx->i32_1, LLVMConstInt(ctx->i32, 2, false),1673LLVMConstInt(ctx->i32, 3, false)};16741675unsigned src_elements = ac_get_llvm_num_components(src);16761677if (count == src_elements) {1678assert(start == 0);1679return src;1680} else if (count == 1) {1681assert(start < src_elements);1682return LLVMBuildExtractElement(ctx->builder, src, mask[start], "");1683} else {1684assert(start + count <= src_elements);1685assert(count <= 4);1686LLVMValueRef swizzle = LLVMConstVector(&mask[start], count);1687return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");1688}1689}16901691static unsigned get_cache_policy(struct ac_nir_context *ctx, enum gl_access_qualifier access,1692bool may_store_unaligned, bool writeonly_memory)1693{1694unsigned cache_policy = 0;16951696/* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All1697* store opcodes not aligned to a dword are affected. The only way to1698* get unaligned stores is through shader images.1699*/1700if (((may_store_unaligned && ctx->ac.chip_class == GFX6) ||1701/* If this is write-only, don't keep data in L1 to prevent1702* evicting L1 cache lines that may be needed by other1703* instructions.1704*/1705writeonly_memory || access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {1706cache_policy |= ac_glc;1707}17081709if (access & ACCESS_STREAM_CACHE_POLICY)1710cache_policy |= ac_slc | ac_glc;17111712return cache_policy;1713}17141715static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx, struct waterfall_context *wctx,1716const nir_intrinsic_instr *instr, nir_src src)1717{1718return enter_waterfall(ctx, wctx, get_src(ctx, src),1719nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);1720}17211722static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)1723{1724if (ctx->ac.postponed_kill) {1725LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");1726ac_build_ifcc(&ctx->ac, cond, 7000);1727}17281729LLVMValueRef src_data = get_src(ctx, instr->src[0]);1730int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;1731unsigned writemask = nir_intrinsic_write_mask(instr);1732enum gl_access_qualifier access = nir_intrinsic_access(instr);1733bool writeonly_memory = access & ACCESS_NON_READABLE;1734unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);17351736struct waterfall_context wctx;1737LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);17381739LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true, false);1740LLVMValueRef base_data = src_data;1741base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);1742LLVMValueRef base_offset = get_src(ctx, instr->src[2]);17431744while (writemask) {1745int start, count;1746LLVMValueRef data, offset;1747LLVMTypeRef data_type;17481749u_bit_scan_consecutive_range(&writemask, &start, &count);17501751if (count == 3 && (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) {1752writemask |= 1 << (start + 2);1753count = 2;1754}1755int num_bytes = count * elem_size_bytes; /* count in bytes */17561757/* we can only store 4 DWords at the same time.1758* can only happen for 64 Bit vectors. */1759if (num_bytes > 16) {1760writemask |= ((1u << (count - 2)) - 1u) << (start + 2);1761count = 2;1762num_bytes = 16;1763}17641765/* check alignment of 16 Bit stores */1766if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {1767writemask |= ((1u << (count - 1)) - 1u) << (start + 1);1768count = 1;1769num_bytes = 2;1770}17711772/* Due to alignment issues, split stores of 8-bit/16-bit1773* vectors.1774*/1775if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) {1776writemask |= ((1u << (count - 1)) - 1u) << (start + 1);1777count = 1;1778num_bytes = elem_size_bytes;1779}17801781data = extract_vector_range(&ctx->ac, base_data, start, count);17821783offset = LLVMBuildAdd(ctx->ac.builder, base_offset,1784LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");17851786if (num_bytes == 1) {1787ac_build_tbuffer_store_byte(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);1788} else if (num_bytes == 2) {1789ac_build_tbuffer_store_short(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);1790} else {1791int num_channels = num_bytes / 4;17921793switch (num_bytes) {1794case 16: /* v4f32 */1795data_type = ctx->ac.v4f32;1796break;1797case 12: /* v3f32 */1798data_type = ctx->ac.v3f32;1799break;1800case 8: /* v2f32 */1801data_type = ctx->ac.v2f32;1802break;1803case 4: /* f32 */1804data_type = ctx->ac.f32;1805break;1806default:1807unreachable("Malformed vector store.");1808}1809data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");18101811ac_build_buffer_store_dword(&ctx->ac, rsrc, data, num_channels, offset, ctx->ac.i32_0, 0,1812cache_policy);1813}1814}18151816exit_waterfall(ctx, &wctx, NULL);18171818if (ctx->ac.postponed_kill)1819ac_build_endif(&ctx->ac, 7000);1820}18211822static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx, LLVMValueRef descriptor,1823LLVMValueRef offset, LLVMValueRef compare,1824LLVMValueRef exchange, bool image)1825{1826LLVMBasicBlockRef start_block = NULL, then_block = NULL;1827if (ctx->abi->robust_buffer_access || image) {1828LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);18291830LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");1831start_block = LLVMGetInsertBlock(ctx->ac.builder);18321833ac_build_ifcc(&ctx->ac, cond, -1);18341835then_block = LLVMGetInsertBlock(ctx->ac.builder);1836}18371838if (image)1839offset = LLVMBuildMul(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, 8, false), "");18401841LLVMValueRef ptr_parts[2] = {1842ac_llvm_extract_elem(&ctx->ac, descriptor, 0),1843LLVMBuildAnd(ctx->ac.builder, ac_llvm_extract_elem(&ctx->ac, descriptor, 1),1844LLVMConstInt(ctx->ac.i32, 65535, 0), "")};18451846ptr_parts[1] = LLVMBuildTrunc(ctx->ac.builder, ptr_parts[1], ctx->ac.i16, "");1847ptr_parts[1] = LLVMBuildSExt(ctx->ac.builder, ptr_parts[1], ctx->ac.i32, "");18481849offset = LLVMBuildZExt(ctx->ac.builder, offset, ctx->ac.i64, "");18501851LLVMValueRef ptr = ac_build_gather_values(&ctx->ac, ptr_parts, 2);1852ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, "");1853ptr = LLVMBuildAdd(ctx->ac.builder, ptr, offset, "");1854ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, LLVMPointerType(ctx->ac.i64, AC_ADDR_SPACE_GLOBAL),1855"");18561857LLVMValueRef result =1858ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");1859result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");18601861if (ctx->abi->robust_buffer_access || image) {1862ac_build_endif(&ctx->ac, -1);18631864LLVMBasicBlockRef incoming_blocks[2] = {1865start_block,1866then_block,1867};18681869LLVMValueRef incoming_values[2] = {1870LLVMConstInt(ctx->ac.i64, 0, 0),1871result,1872};1873LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");1874LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);1875return ret;1876} else {1877return result;1878}1879}18801881static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)1882{1883if (ctx->ac.postponed_kill) {1884LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");1885ac_build_ifcc(&ctx->ac, cond, 7001);1886}18871888LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2]));1889const char *op;1890char name[64], type[8];1891LLVMValueRef params[6], descriptor;1892LLVMValueRef result;1893int arg_count = 0;18941895struct waterfall_context wctx;1896LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);18971898switch (instr->intrinsic) {1899case nir_intrinsic_ssbo_atomic_add:1900op = "add";1901break;1902case nir_intrinsic_ssbo_atomic_imin:1903op = "smin";1904break;1905case nir_intrinsic_ssbo_atomic_umin:1906op = "umin";1907break;1908case nir_intrinsic_ssbo_atomic_imax:1909op = "smax";1910break;1911case nir_intrinsic_ssbo_atomic_umax:1912op = "umax";1913break;1914case nir_intrinsic_ssbo_atomic_and:1915op = "and";1916break;1917case nir_intrinsic_ssbo_atomic_or:1918op = "or";1919break;1920case nir_intrinsic_ssbo_atomic_xor:1921op = "xor";1922break;1923case nir_intrinsic_ssbo_atomic_exchange:1924op = "swap";1925break;1926case nir_intrinsic_ssbo_atomic_comp_swap:1927op = "cmpswap";1928break;1929default:1930abort();1931}19321933descriptor = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true, false);19341935if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap && return_type == ctx->ac.i64) {1936result = emit_ssbo_comp_swap_64(ctx, descriptor, get_src(ctx, instr->src[1]),1937get_src(ctx, instr->src[2]), get_src(ctx, instr->src[3]), false);1938} else {1939if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {1940params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);1941}1942params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);1943params[arg_count++] = descriptor;1944params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */1945params[arg_count++] = ctx->ac.i32_0; /* soffset */1946params[arg_count++] = ctx->ac.i32_0; /* slc */19471948ac_build_type_name_for_intr(return_type, type, sizeof(type));1949snprintf(name, sizeof(name), "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);19501951result = ac_build_intrinsic(&ctx->ac, name, return_type, params, arg_count, 0);1952}19531954result = exit_waterfall(ctx, &wctx, result);1955if (ctx->ac.postponed_kill)1956ac_build_endif(&ctx->ac, 7001);1957return result;1958}19591960static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)1961{1962struct waterfall_context wctx;1963LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);19641965int elem_size_bytes = instr->dest.ssa.bit_size / 8;1966int num_components = instr->num_components;1967enum gl_access_qualifier access = nir_intrinsic_access(instr);1968unsigned cache_policy = get_cache_policy(ctx, access, false, false);19691970LLVMValueRef offset = get_src(ctx, instr->src[1]);1971LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, false, false);1972LLVMValueRef vindex = ctx->ac.i32_0;19731974LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);1975LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;19761977LLVMValueRef results[4];1978for (int i = 0; i < num_components;) {1979int num_elems = num_components - i;1980if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)1981num_elems = 1;1982if (num_elems * elem_size_bytes > 16)1983num_elems = 16 / elem_size_bytes;1984int load_bytes = num_elems * elem_size_bytes;19851986LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);19871988LLVMValueRef ret;19891990if (load_bytes == 1) {1991ret = ac_build_tbuffer_load_byte(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset,1992cache_policy);1993} else if (load_bytes == 2) {1994ret = ac_build_tbuffer_load_short(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset,1995cache_policy);1996} else {1997int num_channels = util_next_power_of_two(load_bytes) / 4;1998bool can_speculate = access & ACCESS_CAN_REORDER;19992000ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels, vindex, offset, immoffset, 0,2001ctx->ac.f32, cache_policy, can_speculate, false);2002}20032004LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));2005ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");2006ret = ac_trim_vector(&ctx->ac, ret, load_bytes);20072008LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);2009ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");20102011for (unsigned j = 0; j < num_elems; j++) {2012results[i + j] =2013LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");2014}2015i += num_elems;2016}20172018LLVMValueRef ret = ac_build_gather_values(&ctx->ac, results, num_components);2019return exit_waterfall(ctx, &wctx, ret);2020}20212022static LLVMValueRef enter_waterfall_ubo(struct ac_nir_context *ctx, struct waterfall_context *wctx,2023const nir_intrinsic_instr *instr)2024{2025return enter_waterfall(ctx, wctx, get_src(ctx, instr->src[0]),2026nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);2027}20282029static LLVMValueRef visit_load_global(struct ac_nir_context *ctx,2030nir_intrinsic_instr *instr)2031{2032LLVMValueRef addr = get_src(ctx, instr->src[0]);2033LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);2034LLVMValueRef val;20352036LLVMTypeRef ptr_type = LLVMPointerType(result_type, AC_ADDR_SPACE_GLOBAL);20372038addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");20392040val = LLVMBuildLoad(ctx->ac.builder, addr, "");20412042if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) {2043LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);2044LLVMSetAlignment(val, ac_get_type_size(result_type));2045}20462047return val;2048}20492050static void visit_store_global(struct ac_nir_context *ctx,2051nir_intrinsic_instr *instr)2052{2053if (ctx->ac.postponed_kill) {2054LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");2055ac_build_ifcc(&ctx->ac, cond, 7002);2056}20572058LLVMValueRef data = get_src(ctx, instr->src[0]);2059LLVMValueRef addr = get_src(ctx, instr->src[1]);2060LLVMTypeRef type = LLVMTypeOf(data);2061LLVMValueRef val;20622063LLVMTypeRef ptr_type = LLVMPointerType(type, AC_ADDR_SPACE_GLOBAL);20642065addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");20662067val = LLVMBuildStore(ctx->ac.builder, data, addr);20682069if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) {2070LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);2071LLVMSetAlignment(val, ac_get_type_size(type));2072}20732074if (ctx->ac.postponed_kill)2075ac_build_endif(&ctx->ac, 7002);2076}20772078static LLVMValueRef visit_global_atomic(struct ac_nir_context *ctx,2079nir_intrinsic_instr *instr)2080{2081if (ctx->ac.postponed_kill) {2082LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");2083ac_build_ifcc(&ctx->ac, cond, 7002);2084}20852086LLVMValueRef addr = get_src(ctx, instr->src[0]);2087LLVMValueRef data = get_src(ctx, instr->src[1]);2088LLVMAtomicRMWBinOp op;2089LLVMValueRef result;20902091/* use "singlethread" sync scope to implement relaxed ordering */2092const char *sync_scope = "singlethread-one-as";20932094LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(data), AC_ADDR_SPACE_GLOBAL);20952096addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");20972098if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap) {2099LLVMValueRef data1 = get_src(ctx, instr->src[2]);2100result = ac_build_atomic_cmp_xchg(&ctx->ac, addr, data, data1, sync_scope);2101result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");2102} else {2103switch (instr->intrinsic) {2104case nir_intrinsic_global_atomic_add:2105op = LLVMAtomicRMWBinOpAdd;2106break;2107case nir_intrinsic_global_atomic_umin:2108op = LLVMAtomicRMWBinOpUMin;2109break;2110case nir_intrinsic_global_atomic_umax:2111op = LLVMAtomicRMWBinOpUMax;2112break;2113case nir_intrinsic_global_atomic_imin:2114op = LLVMAtomicRMWBinOpMin;2115break;2116case nir_intrinsic_global_atomic_imax:2117op = LLVMAtomicRMWBinOpMax;2118break;2119case nir_intrinsic_global_atomic_and:2120op = LLVMAtomicRMWBinOpAnd;2121break;2122case nir_intrinsic_global_atomic_or:2123op = LLVMAtomicRMWBinOpOr;2124break;2125case nir_intrinsic_global_atomic_xor:2126op = LLVMAtomicRMWBinOpXor;2127break;2128case nir_intrinsic_global_atomic_exchange:2129op = LLVMAtomicRMWBinOpXchg;2130break;2131default:2132unreachable("Invalid global atomic operation");2133}21342135result = ac_build_atomic_rmw(&ctx->ac, op, addr, ac_to_integer(&ctx->ac, data), sync_scope);2136}21372138if (ctx->ac.postponed_kill)2139ac_build_endif(&ctx->ac, 7002);21402141return result;2142}21432144static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)2145{2146struct waterfall_context wctx;2147LLVMValueRef rsrc_base = enter_waterfall_ubo(ctx, &wctx, instr);21482149LLVMValueRef ret;2150LLVMValueRef rsrc = rsrc_base;2151LLVMValueRef offset = get_src(ctx, instr->src[1]);2152int num_components = instr->num_components;21532154if (ctx->abi->load_ubo) {2155nir_binding binding = nir_chase_binding(instr->src[0]);2156rsrc = ctx->abi->load_ubo(ctx->abi, binding.desc_set, binding.binding, binding.success, rsrc);2157}21582159/* Convert to a scalar 32-bit load. */2160if (instr->dest.ssa.bit_size == 64)2161num_components *= 2;2162else if (instr->dest.ssa.bit_size == 16)2163num_components = DIV_ROUND_UP(num_components, 2);2164else if (instr->dest.ssa.bit_size == 8)2165num_components = DIV_ROUND_UP(num_components, 4);21662167ret =2168ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL, 0,2169ctx->ac.f32, 0, true, true);21702171/* Convert to the original type. */2172if (instr->dest.ssa.bit_size == 64) {2173ret = LLVMBuildBitCast(ctx->ac.builder, ret,2174LLVMVectorType(ctx->ac.i64, num_components / 2), "");2175} else if (instr->dest.ssa.bit_size == 16) {2176ret = LLVMBuildBitCast(ctx->ac.builder, ret,2177LLVMVectorType(ctx->ac.i16, num_components * 2), "");2178} else if (instr->dest.ssa.bit_size == 8) {2179ret = LLVMBuildBitCast(ctx->ac.builder, ret,2180LLVMVectorType(ctx->ac.i8, num_components * 4), "");2181}21822183ret = ac_trim_vector(&ctx->ac, ret, instr->num_components);2184ret = LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");21852186return exit_waterfall(ctx, &wctx, ret);2187}21882189static unsigned type_scalar_size_bytes(const struct glsl_type *type)2190{2191assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_matrix(type));2192return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;2193}21942195static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)2196{2197if (ctx->ac.postponed_kill) {2198LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");2199ac_build_ifcc(&ctx->ac, cond, 7002);2200}22012202unsigned base = nir_intrinsic_base(instr);2203unsigned writemask = nir_intrinsic_write_mask(instr);2204unsigned component = nir_intrinsic_component(instr);2205LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));2206nir_src offset = *nir_get_io_offset_src(instr);2207LLVMValueRef indir_index = NULL;22082209if (nir_src_is_const(offset))2210assert(nir_src_as_uint(offset) == 0);2211else2212indir_index = get_src(ctx, offset);22132214switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {2215case 16:2216case 32:2217break;2218case 64:2219unreachable("64-bit IO should have been lowered to 32 bits");2220return;2221default:2222unreachable("unhandled store_output bit size");2223return;2224}22252226writemask <<= component;22272228if (ctx->stage == MESA_SHADER_TESS_CTRL) {2229nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);2230LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;2231unsigned location = nir_intrinsic_io_semantics(instr).location;22322233ctx->abi->store_tcs_outputs(ctx->abi, vertex_index, indir_index, src,2234writemask, component, location, base);2235return;2236}22372238/* No indirect indexing is allowed after this point. */2239assert(!indir_index);22402241for (unsigned chan = 0; chan < 8; chan++) {2242if (!(writemask & (1 << chan)))2243continue;22442245LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);2246LLVMValueRef output_addr = ctx->abi->outputs[base * 4 + chan];22472248if (LLVMGetElementType(LLVMTypeOf(output_addr)) == ctx->ac.f32 &&2249LLVMTypeOf(value) == ctx->ac.f16) {2250LLVMValueRef output, index;22512252/* Insert the 16-bit value into the low or high bits of the 32-bit output2253* using read-modify-write.2254*/2255index = LLVMConstInt(ctx->ac.i32, nir_intrinsic_io_semantics(instr).high_16bits, 0);2256output = LLVMBuildLoad(ctx->ac.builder, output_addr, "");2257output = LLVMBuildBitCast(ctx->ac.builder, output, ctx->ac.v2f16, "");2258output = LLVMBuildInsertElement(ctx->ac.builder, output, value, index, "");2259value = LLVMBuildBitCast(ctx->ac.builder, output, ctx->ac.f32, "");2260}2261LLVMBuildStore(ctx->ac.builder, value, output_addr);2262}22632264if (ctx->ac.postponed_kill)2265ac_build_endif(&ctx->ac, 7002);2266}22672268static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)2269{2270switch (dim) {2271case GLSL_SAMPLER_DIM_BUF:2272return 1;2273case GLSL_SAMPLER_DIM_1D:2274return array ? 2 : 1;2275case GLSL_SAMPLER_DIM_2D:2276return array ? 3 : 2;2277case GLSL_SAMPLER_DIM_MS:2278return array ? 4 : 3;2279case GLSL_SAMPLER_DIM_3D:2280case GLSL_SAMPLER_DIM_CUBE:2281return 3;2282case GLSL_SAMPLER_DIM_RECT:2283case GLSL_SAMPLER_DIM_SUBPASS:2284return 2;2285case GLSL_SAMPLER_DIM_SUBPASS_MS:2286return 3;2287default:2288break;2289}2290return 0;2291}22922293static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,2294LLVMValueRef coord_x, LLVMValueRef coord_y,2295LLVMValueRef coord_z, LLVMValueRef sample_index,2296LLVMValueRef fmask_desc_ptr)2297{2298unsigned sample_chan = coord_z ? 3 : 2;2299LLVMValueRef addr[4] = {coord_x, coord_y, coord_z};2300addr[sample_chan] = sample_index;23012302ac_apply_fmask_to_sample(ctx, fmask_desc_ptr, addr, coord_z != NULL);2303return addr[sample_chan];2304}23052306static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr)2307{2308assert(instr->src[0].is_ssa);2309return nir_instr_as_deref(instr->src[0].ssa->parent_instr);2310}23112312static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,2313const nir_intrinsic_instr *instr,2314LLVMValueRef dynamic_index,2315enum ac_descriptor_type desc_type, bool write)2316{2317nir_deref_instr *deref_instr = instr->src[0].ssa->parent_instr->type == nir_instr_type_deref2318? nir_instr_as_deref(instr->src[0].ssa->parent_instr)2319: NULL;23202321return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, dynamic_index, true, write);2322}23232324static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,2325LLVMValueRef dynamic_desc_index, struct ac_image_args *args,2326enum glsl_sampler_dim dim, bool is_array)2327{2328LLVMValueRef src0 = get_src(ctx, instr->src[1]);2329LLVMValueRef masks[] = {2330LLVMConstInt(ctx->ac.i32, 0, false),2331LLVMConstInt(ctx->ac.i32, 1, false),2332LLVMConstInt(ctx->ac.i32, 2, false),2333LLVMConstInt(ctx->ac.i32, 3, false),2334};2335LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);23362337int count;2338ASSERTED bool add_frag_pos =2339(dim == GLSL_SAMPLER_DIM_SUBPASS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS);2340bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS);2341bool gfx9_1d = ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D;2342assert(!add_frag_pos && "Input attachments should be lowered by this point.");2343count = image_type_to_components_count(dim, is_array);23442345if (is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load ||2346instr->intrinsic == nir_intrinsic_bindless_image_load ||2347instr->intrinsic == nir_intrinsic_image_deref_sparse_load ||2348instr->intrinsic == nir_intrinsic_bindless_image_sparse_load)) {2349LLVMValueRef fmask_load_address[3];23502351fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");2352fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");2353if (is_array)2354fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");2355else2356fmask_load_address[2] = NULL;23572358sample_index = adjust_sample_index_using_fmask(2359&ctx->ac, fmask_load_address[0], fmask_load_address[1], fmask_load_address[2],2360sample_index,2361get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), AC_DESC_FMASK,2362&instr->instr, dynamic_desc_index, true, false));2363}2364if (count == 1 && !gfx9_1d) {2365if (instr->src[1].ssa->num_components)2366args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");2367else2368args->coords[0] = src0;2369} else {2370int chan;2371if (is_ms)2372count--;2373for (chan = 0; chan < count; ++chan) {2374args->coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);2375}23762377if (gfx9_1d) {2378if (is_array) {2379args->coords[2] = args->coords[1];2380args->coords[1] = ctx->ac.i32_0;2381} else2382args->coords[1] = ctx->ac.i32_0;2383count++;2384}2385if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_2D && !is_array) {2386/* The hw can't bind a slice of a 3D image as a 2D2387* image, because it ignores BASE_ARRAY if the target2388* is 3D. The workaround is to read BASE_ARRAY and set2389* it as the 3rd address operand for all 2D images.2390*/2391LLVMValueRef first_layer, const5, mask;23922393const5 = LLVMConstInt(ctx->ac.i32, 5, 0);2394mask = LLVMConstInt(ctx->ac.i32, S_008F24_BASE_ARRAY(~0), 0);2395first_layer = LLVMBuildExtractElement(ctx->ac.builder, args->resource, const5, "");2396first_layer = LLVMBuildAnd(ctx->ac.builder, first_layer, mask, "");23972398args->coords[count] = first_layer;2399count++;2400}24012402if (is_ms) {2403args->coords[count] = sample_index;2404count++;2405}2406}2407}24082409static LLVMValueRef enter_waterfall_image(struct ac_nir_context *ctx,2410struct waterfall_context *wctx,2411const nir_intrinsic_instr *instr)2412{2413nir_deref_instr *deref_instr = NULL;24142415if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref)2416deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);24172418LLVMValueRef value = get_sampler_desc_index(ctx, deref_instr, &instr->instr, true);2419return enter_waterfall(ctx, wctx, value, nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);2420}24212422static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,2423bool bindless)2424{2425LLVMValueRef res;24262427enum glsl_sampler_dim dim;2428enum gl_access_qualifier access = nir_intrinsic_access(instr);2429bool is_array;2430if (bindless) {2431dim = nir_intrinsic_image_dim(instr);2432is_array = nir_intrinsic_image_array(instr);2433} else {2434const nir_deref_instr *image_deref = get_image_deref(instr);2435const struct glsl_type *type = image_deref->type;2436const nir_variable *var = nir_deref_instr_get_variable(image_deref);2437dim = glsl_get_sampler_dim(type);2438access |= var->data.access;2439is_array = glsl_sampler_type_is_array(type);2440}24412442struct waterfall_context wctx;2443LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);24442445struct ac_image_args args = {0};24462447args.cache_policy = get_cache_policy(ctx, access, false, false);2448args.tfe = instr->intrinsic == nir_intrinsic_image_deref_sparse_load;24492450if (dim == GLSL_SAMPLER_DIM_BUF) {2451unsigned num_channels = util_last_bit(nir_ssa_def_components_read(&instr->dest.ssa));2452if (instr->dest.ssa.bit_size == 64)2453num_channels = num_channels < 4 ? 2 : 4;2454LLVMValueRef rsrc, vindex;24552456rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false);2457vindex =2458LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, "");24592460assert(instr->dest.is_ssa);2461bool can_speculate = access & ACCESS_CAN_REORDER;2462res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels,2463args.cache_policy, can_speculate,2464instr->dest.ssa.bit_size == 16,2465args.tfe);2466res = ac_build_expand(&ctx->ac, res, num_channels, args.tfe ? 5 : 4);24672468res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);2469res = ac_to_integer(&ctx->ac, res);2470} else {2471bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;24722473args.opcode = level_zero ? ac_image_load : ac_image_load_mip;2474args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);2475get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);2476args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);2477if (!level_zero)2478args.lod = get_src(ctx, instr->src[3]);2479args.dmask = 15;2480args.attributes = AC_FUNC_ATTR_READONLY;24812482assert(instr->dest.is_ssa);2483args.d16 = instr->dest.ssa.bit_size == 16;24842485res = ac_build_image_opcode(&ctx->ac, &args);2486}24872488if (instr->dest.ssa.bit_size == 64) {2489LLVMValueRef code = NULL;2490if (args.tfe) {2491code = ac_llvm_extract_elem(&ctx->ac, res, 4);2492res = ac_trim_vector(&ctx->ac, res, 4);2493}24942495res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i64, 2), "");2496LLVMValueRef x = LLVMBuildExtractElement(ctx->ac.builder, res, ctx->ac.i32_0, "");2497LLVMValueRef w = LLVMBuildExtractElement(ctx->ac.builder, res, ctx->ac.i32_1, "");24982499if (code)2500code = LLVMBuildZExt(ctx->ac.builder, code, ctx->ac.i64, "");2501LLVMValueRef values[5] = {x, ctx->ac.i64_0, ctx->ac.i64_0, w, code};2502res = ac_build_gather_values(&ctx->ac, values, 4 + args.tfe);2503}25042505return exit_waterfall(ctx, &wctx, res);2506}25072508static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,2509bool bindless)2510{2511if (ctx->ac.postponed_kill) {2512LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");2513ac_build_ifcc(&ctx->ac, cond, 7003);2514}25152516enum glsl_sampler_dim dim;2517enum gl_access_qualifier access = nir_intrinsic_access(instr);2518bool is_array;25192520if (bindless) {2521dim = nir_intrinsic_image_dim(instr);2522is_array = nir_intrinsic_image_array(instr);2523} else {2524const nir_deref_instr *image_deref = get_image_deref(instr);2525const struct glsl_type *type = image_deref->type;2526const nir_variable *var = nir_deref_instr_get_variable(image_deref);2527dim = glsl_get_sampler_dim(type);2528access |= var->data.access;2529is_array = glsl_sampler_type_is_array(type);2530}25312532struct waterfall_context wctx;2533LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);25342535bool writeonly_memory = access & ACCESS_NON_READABLE;2536struct ac_image_args args = {0};25372538args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);25392540LLVMValueRef src = get_src(ctx, instr->src[3]);2541if (instr->src[3].ssa->bit_size == 64) {2542/* only R64_UINT and R64_SINT supported */2543src = ac_llvm_extract_elem(&ctx->ac, src, 0);2544src = LLVMBuildBitCast(ctx->ac.builder, src, ctx->ac.v2f32, "");2545} else {2546src = ac_to_float(&ctx->ac, src);2547}25482549if (dim == GLSL_SAMPLER_DIM_BUF) {2550LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, true);2551unsigned src_channels = ac_get_llvm_num_components(src);2552LLVMValueRef vindex;25532554if (src_channels == 3)2555src = ac_build_expand_to_vec4(&ctx->ac, src, 3);25562557vindex =2558LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, "");25592560ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, args.cache_policy);2561} else {2562bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;25632564args.opcode = level_zero ? ac_image_store : ac_image_store_mip;2565args.data[0] = src;2566args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);2567get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);2568args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);2569if (!level_zero)2570args.lod = get_src(ctx, instr->src[4]);2571args.dmask = 15;2572args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16;25732574ac_build_image_opcode(&ctx->ac, &args);2575}25762577exit_waterfall(ctx, &wctx, NULL);2578if (ctx->ac.postponed_kill)2579ac_build_endif(&ctx->ac, 7003);2580}25812582static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,2583bool bindless)2584{2585if (ctx->ac.postponed_kill) {2586LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");2587ac_build_ifcc(&ctx->ac, cond, 7004);2588}25892590LLVMValueRef params[7];2591int param_count = 0;25922593bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||2594instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap;2595const char *atomic_name;2596char intrinsic_name[64];2597enum ac_atomic_op atomic_subop;2598ASSERTED int length;25992600enum glsl_sampler_dim dim;2601bool is_array;2602if (bindless) {2603dim = nir_intrinsic_image_dim(instr);2604is_array = nir_intrinsic_image_array(instr);2605} else {2606const struct glsl_type *type = get_image_deref(instr)->type;2607dim = glsl_get_sampler_dim(type);2608is_array = glsl_sampler_type_is_array(type);2609}26102611struct waterfall_context wctx;2612LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);26132614switch (instr->intrinsic) {2615case nir_intrinsic_bindless_image_atomic_add:2616case nir_intrinsic_image_deref_atomic_add:2617atomic_name = "add";2618atomic_subop = ac_atomic_add;2619break;2620case nir_intrinsic_bindless_image_atomic_imin:2621case nir_intrinsic_image_deref_atomic_imin:2622atomic_name = "smin";2623atomic_subop = ac_atomic_smin;2624break;2625case nir_intrinsic_bindless_image_atomic_umin:2626case nir_intrinsic_image_deref_atomic_umin:2627atomic_name = "umin";2628atomic_subop = ac_atomic_umin;2629break;2630case nir_intrinsic_bindless_image_atomic_imax:2631case nir_intrinsic_image_deref_atomic_imax:2632atomic_name = "smax";2633atomic_subop = ac_atomic_smax;2634break;2635case nir_intrinsic_bindless_image_atomic_umax:2636case nir_intrinsic_image_deref_atomic_umax:2637atomic_name = "umax";2638atomic_subop = ac_atomic_umax;2639break;2640case nir_intrinsic_bindless_image_atomic_and:2641case nir_intrinsic_image_deref_atomic_and:2642atomic_name = "and";2643atomic_subop = ac_atomic_and;2644break;2645case nir_intrinsic_bindless_image_atomic_or:2646case nir_intrinsic_image_deref_atomic_or:2647atomic_name = "or";2648atomic_subop = ac_atomic_or;2649break;2650case nir_intrinsic_bindless_image_atomic_xor:2651case nir_intrinsic_image_deref_atomic_xor:2652atomic_name = "xor";2653atomic_subop = ac_atomic_xor;2654break;2655case nir_intrinsic_bindless_image_atomic_exchange:2656case nir_intrinsic_image_deref_atomic_exchange:2657atomic_name = "swap";2658atomic_subop = ac_atomic_swap;2659break;2660case nir_intrinsic_bindless_image_atomic_comp_swap:2661case nir_intrinsic_image_deref_atomic_comp_swap:2662atomic_name = "cmpswap";2663atomic_subop = 0; /* not used */2664break;2665case nir_intrinsic_bindless_image_atomic_inc_wrap:2666case nir_intrinsic_image_deref_atomic_inc_wrap: {2667atomic_name = "inc";2668atomic_subop = ac_atomic_inc_wrap;2669break;2670}2671case nir_intrinsic_bindless_image_atomic_dec_wrap:2672case nir_intrinsic_image_deref_atomic_dec_wrap:2673atomic_name = "dec";2674atomic_subop = ac_atomic_dec_wrap;2675break;2676default:2677abort();2678}26792680if (cmpswap)2681params[param_count++] = get_src(ctx, instr->src[4]);2682params[param_count++] = get_src(ctx, instr->src[3]);26832684LLVMValueRef result;2685if (dim == GLSL_SAMPLER_DIM_BUF) {2686params[param_count++] = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, true);2687params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),2688ctx->ac.i32_0, ""); /* vindex */2689params[param_count++] = ctx->ac.i32_0; /* voffset */2690if (cmpswap && instr->dest.ssa.bit_size == 64) {2691result = emit_ssbo_comp_swap_64(ctx, params[2], params[3], params[1], params[0], true);2692} else {2693params[param_count++] = ctx->ac.i32_0; /* soffset */2694params[param_count++] = ctx->ac.i32_0; /* slc */26952696length = snprintf(intrinsic_name, sizeof(intrinsic_name),2697"llvm.amdgcn.struct.buffer.atomic.%s.%s", atomic_name,2698instr->dest.ssa.bit_size == 64 ? "i64" : "i32");26992700assert(length < sizeof(intrinsic_name));2701result = ac_build_intrinsic(&ctx->ac, intrinsic_name, LLVMTypeOf(params[0]), params, param_count, 0);2702}2703} else {2704struct ac_image_args args = {0};2705args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic;2706args.atomic = atomic_subop;2707args.data[0] = params[0];2708if (cmpswap)2709args.data[1] = params[1];2710args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);2711get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);2712args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);27132714result = ac_build_image_opcode(&ctx->ac, &args);2715}27162717result = exit_waterfall(ctx, &wctx, result);2718if (ctx->ac.postponed_kill)2719ac_build_endif(&ctx->ac, 7004);2720return result;2721}27222723static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)2724{2725struct waterfall_context wctx;2726LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);2727LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);27282729LLVMValueRef ret = ac_build_image_get_sample_count(&ctx->ac, rsrc);27302731return exit_waterfall(ctx, &wctx, ret);2732}27332734static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,2735bool bindless)2736{2737LLVMValueRef res;27382739enum glsl_sampler_dim dim;2740bool is_array;2741if (bindless) {2742dim = nir_intrinsic_image_dim(instr);2743is_array = nir_intrinsic_image_array(instr);2744} else {2745const struct glsl_type *type = get_image_deref(instr)->type;2746dim = glsl_get_sampler_dim(type);2747is_array = glsl_sampler_type_is_array(type);2748}27492750struct waterfall_context wctx;2751LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);27522753if (dim == GLSL_SAMPLER_DIM_BUF) {2754res = get_buffer_size(2755ctx, get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false), true);2756} else {27572758struct ac_image_args args = {0};27592760args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);2761args.dmask = 0xf;2762args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);2763args.opcode = ac_image_get_resinfo;2764assert(nir_src_as_uint(instr->src[1]) == 0);2765args.lod = ctx->ac.i32_0;2766args.attributes = AC_FUNC_ATTR_READNONE;27672768res = ac_build_image_opcode(&ctx->ac, &args);27692770LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);27712772if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {2773LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);2774LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");2775z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");2776res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");2777}27782779if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {2780LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");2781res = LLVMBuildInsertElement(ctx->ac.builder, res, layers, ctx->ac.i32_1, "");2782}2783}2784return exit_waterfall(ctx, &wctx, res);2785}27862787static void emit_membar(struct ac_llvm_context *ac, const nir_intrinsic_instr *instr)2788{2789unsigned wait_flags = 0;27902791switch (instr->intrinsic) {2792case nir_intrinsic_memory_barrier:2793case nir_intrinsic_group_memory_barrier:2794wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;2795break;2796case nir_intrinsic_memory_barrier_buffer:2797case nir_intrinsic_memory_barrier_image:2798wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;2799break;2800case nir_intrinsic_memory_barrier_shared:2801wait_flags = AC_WAIT_LGKM;2802break;2803default:2804break;2805}28062807ac_build_waitcnt(ac, wait_flags);2808}28092810void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)2811{2812/* GFX6 only (thanks to a hw bug workaround):2813* The real barrier instruction isn’t needed, because an entire patch2814* always fits into a single wave.2815*/2816if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {2817ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);2818return;2819}2820ac_build_s_barrier(ac);2821}28222823static void emit_discard(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)2824{2825LLVMValueRef cond;28262827if (instr->intrinsic == nir_intrinsic_discard_if ||2828instr->intrinsic == nir_intrinsic_terminate_if) {2829cond = LLVMBuildNot(ctx->ac.builder, get_src(ctx, instr->src[0]), "");2830} else {2831assert(instr->intrinsic == nir_intrinsic_discard);2832cond = ctx->ac.i1false;2833}28342835ac_build_kill_if_false(&ctx->ac, cond);2836}28372838static void emit_demote(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)2839{2840LLVMValueRef cond;28412842if (instr->intrinsic == nir_intrinsic_demote_if) {2843cond = LLVMBuildNot(ctx->ac.builder, get_src(ctx, instr->src[0]), "");2844} else {2845assert(instr->intrinsic == nir_intrinsic_demote);2846cond = ctx->ac.i1false;2847}28482849if (LLVM_VERSION_MAJOR >= 13) {2850/* This demotes the pixel if the condition is false. */2851ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.wqm.demote", ctx->ac.voidt, &cond, 1, 0);2852return;2853}28542855LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");2856mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, "");2857LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill);28582859if (!ctx->info->fs.needs_all_helper_invocations) {2860/* This is an optional optimization that only kills whole inactive quads.2861* It's not used when subgroup operations can possibly use all helper2862* invocations.2863*/2864if (ctx->ac.flow->depth == 0) {2865ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));2866} else {2867/* amdgcn.wqm.vote doesn't work inside conditional blocks. Here's why.2868*2869* The problem is that kill(wqm.vote(0)) kills all active threads within2870* the block, which breaks the whole quad mode outside the block if2871* the conditional block has partially active quads (2x2 pixel blocks).2872* E.g. threads 0-3 are active outside the block, but only thread 0 is2873* active inside the block. Thread 0 shouldn't be killed by demote,2874* because threads 1-3 are still active outside the block.2875*2876* The fix for amdgcn.wqm.vote would be to return S_WQM((live & ~exec) | cond)2877* instead of S_WQM(cond).2878*2879* The less efficient workaround we do here is to save the kill condition2880* to a temporary (postponed_kill) and do kill(wqm.vote(cond)) after we2881* exit the conditional block.2882*/2883ctx->ac.conditional_demote_seen = true;2884}2885}2886}28872888static LLVMValueRef visit_load_local_invocation_index(struct ac_nir_context *ctx)2889{2890if (ctx->args->vs_rel_patch_id.used) {2891return ac_get_arg(&ctx->ac, ctx->args->vs_rel_patch_id);2892} else if (ctx->args->merged_wave_info.used) {2893/* Thread ID in threadgroup in merged ESGS. */2894LLVMValueRef wave_id = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 24, 4);2895LLVMValueRef wave_size = LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false);2896LLVMValueRef threads_before = LLVMBuildMul(ctx->ac.builder, wave_id, wave_size, "");2897return LLVMBuildAdd(ctx->ac.builder, threads_before, ac_get_thread_id(&ctx->ac), "");2898}28992900LLVMValueRef result;2901LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);2902result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),2903LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");29042905if (ctx->ac.wave_size == 32)2906result = LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 1, false), "");29072908return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");2909}29102911static LLVMValueRef visit_load_subgroup_id(struct ac_nir_context *ctx)2912{2913if (ctx->stage == MESA_SHADER_COMPUTE) {2914LLVMValueRef result;2915result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),2916LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");2917return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");2918} else {2919return LLVMConstInt(ctx->ac.i32, 0, false);2920}2921}29222923static LLVMValueRef visit_load_num_subgroups(struct ac_nir_context *ctx)2924{2925if (ctx->stage == MESA_SHADER_COMPUTE) {2926return LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),2927LLVMConstInt(ctx->ac.i32, 0x3f, false), "");2928} else {2929return LLVMConstInt(ctx->ac.i32, 1, false);2930}2931}29322933static LLVMValueRef visit_first_invocation(struct ac_nir_context *ctx)2934{2935LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);2936const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64";29372938/* The second argument is whether cttz(0) should be defined, but we do not care. */2939LLVMValueRef args[] = {active_set, ctx->ac.i1false};2940LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr, ctx->ac.iN_wavemask, args, 2,2941AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE);29422943return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");2944}29452946static LLVMValueRef visit_load_shared(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)2947{2948unsigned alignment = nir_intrinsic_align(instr);2949unsigned const_off = nir_intrinsic_base(instr);29502951LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->dest.ssa.bit_size, const_off);2952LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);2953int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));2954LLVMValueRef derived_ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(result_type, addr_space), "");2955LLVMValueRef ret = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");2956LLVMSetAlignment(ret, alignment);29572958return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");2959}29602961static void visit_store_shared(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)2962{2963LLVMValueRef derived_ptr, data, index;2964LLVMBuilderRef builder = ctx->ac.builder;29652966unsigned const_off = nir_intrinsic_base(instr);2967LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1], instr->src[0].ssa->bit_size, const_off);2968LLVMValueRef src = get_src(ctx, instr->src[0]);29692970int writemask = nir_intrinsic_write_mask(instr);2971for (int chan = 0; chan < 4; chan++) {2972if (!(writemask & (1 << chan))) {2973continue;2974}2975data = ac_llvm_extract_elem(&ctx->ac, src, chan);2976index = LLVMConstInt(ctx->ac.i32, chan, 0);2977derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");2978LLVMBuildStore(builder, data, derived_ptr);2979}2980}29812982static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,2983LLVMValueRef ptr, int src_idx)2984{2985if (ctx->ac.postponed_kill) {2986LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");2987ac_build_ifcc(&ctx->ac, cond, 7005);2988}29892990LLVMValueRef result;2991LLVMValueRef src = get_src(ctx, instr->src[src_idx]);29922993const char *sync_scope = "workgroup-one-as";29942995if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap) {2996LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);2997result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope);2998result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");2999} else {3000LLVMAtomicRMWBinOp op;3001switch (instr->intrinsic) {3002case nir_intrinsic_shared_atomic_add:3003op = LLVMAtomicRMWBinOpAdd;3004break;3005case nir_intrinsic_shared_atomic_umin:3006op = LLVMAtomicRMWBinOpUMin;3007break;3008case nir_intrinsic_shared_atomic_umax:3009op = LLVMAtomicRMWBinOpUMax;3010break;3011case nir_intrinsic_shared_atomic_imin:3012op = LLVMAtomicRMWBinOpMin;3013break;3014case nir_intrinsic_shared_atomic_imax:3015op = LLVMAtomicRMWBinOpMax;3016break;3017case nir_intrinsic_shared_atomic_and:3018op = LLVMAtomicRMWBinOpAnd;3019break;3020case nir_intrinsic_shared_atomic_or:3021op = LLVMAtomicRMWBinOpOr;3022break;3023case nir_intrinsic_shared_atomic_xor:3024op = LLVMAtomicRMWBinOpXor;3025break;3026case nir_intrinsic_shared_atomic_exchange:3027op = LLVMAtomicRMWBinOpXchg;3028break;3029case nir_intrinsic_shared_atomic_fadd:3030op = LLVMAtomicRMWBinOpFAdd;3031break;3032default:3033return NULL;3034}30353036LLVMValueRef val;30373038if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd) {3039val = ac_to_float(&ctx->ac, src);30403041LLVMTypeRef ptr_type =3042LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)));3043ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type, "");3044} else {3045val = ac_to_integer(&ctx->ac, src);3046}30473048result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope);30493050if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd ||3051instr->intrinsic == nir_intrinsic_deref_atomic_fadd) {3052result = ac_to_integer(&ctx->ac, result);3053}3054}30553056if (ctx->ac.postponed_kill)3057ac_build_endif(&ctx->ac, 7005);3058return result;3059}30603061static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)3062{3063LLVMValueRef values[2];3064LLVMValueRef pos[2];30653066pos[0] = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]));3067pos[1] = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]));30683069values[0] = ac_build_fract(&ctx->ac, pos[0], 32);3070values[1] = ac_build_fract(&ctx->ac, pos[1], 32);3071return ac_build_gather_values(&ctx->ac, values, 2);3072}30733074static LLVMValueRef lookup_interp_param(struct ac_nir_context *ctx, enum glsl_interp_mode interp,3075unsigned location)3076{3077switch (interp) {3078case INTERP_MODE_FLAT:3079default:3080return NULL;3081case INTERP_MODE_SMOOTH:3082case INTERP_MODE_NONE:3083if (location == INTERP_CENTER)3084return ac_get_arg(&ctx->ac, ctx->args->persp_center);3085else if (location == INTERP_CENTROID)3086return ctx->abi->persp_centroid;3087else if (location == INTERP_SAMPLE)3088return ac_get_arg(&ctx->ac, ctx->args->persp_sample);3089break;3090case INTERP_MODE_NOPERSPECTIVE:3091if (location == INTERP_CENTER)3092return ac_get_arg(&ctx->ac, ctx->args->linear_center);3093else if (location == INTERP_CENTROID)3094return ctx->abi->linear_centroid;3095else if (location == INTERP_SAMPLE)3096return ac_get_arg(&ctx->ac, ctx->args->linear_sample);3097break;3098}3099return NULL;3100}31013102static LLVMValueRef barycentric_center(struct ac_nir_context *ctx, unsigned mode)3103{3104LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);3105return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");3106}31073108static LLVMValueRef barycentric_offset(struct ac_nir_context *ctx, unsigned mode,3109LLVMValueRef offset)3110{3111LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);3112LLVMValueRef src_c0 =3113ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_0, ""));3114LLVMValueRef src_c1 =3115ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_1, ""));31163117LLVMValueRef ij_out[2];3118LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);31193120/*3121* take the I then J parameters, and the DDX/Y for it, and3122* calculate the IJ inputs for the interpolator.3123* temp1 = ddx * offset/sample.x + I;3124* interp_param.I = ddy * offset/sample.y + temp1;3125* temp1 = ddx * offset/sample.x + J;3126* interp_param.J = ddy * offset/sample.y + temp1;3127*/3128for (unsigned i = 0; i < 2; i++) {3129LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);3130LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);3131LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, ix_ll, "");3132LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, iy_ll, "");3133LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ix_ll, "");3134LLVMValueRef temp1, temp2;31353136interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el, ctx->ac.f32, "");31373138temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el);3139temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1);31403141ij_out[i] = LLVMBuildBitCast(ctx->ac.builder, temp2, ctx->ac.i32, "");3142}3143interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);3144return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");3145}31463147static LLVMValueRef barycentric_centroid(struct ac_nir_context *ctx, unsigned mode)3148{3149LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTROID);3150return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");3151}31523153static LLVMValueRef barycentric_at_sample(struct ac_nir_context *ctx, unsigned mode,3154LLVMValueRef sample_id)3155{3156if (ctx->abi->interp_at_sample_force_center)3157return barycentric_center(ctx, mode);31583159LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);31603161/* fetch sample ID */3162LLVMValueRef sample_pos = ctx->abi->load_sample_position(ctx->abi, sample_id);31633164LLVMValueRef src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_0, "");3165src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, "");3166LLVMValueRef src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_1, "");3167src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");3168LLVMValueRef coords[] = {src_c0, src_c1};3169LLVMValueRef offset = ac_build_gather_values(&ctx->ac, coords, 2);31703171return barycentric_offset(ctx, mode, offset);3172}31733174static LLVMValueRef barycentric_sample(struct ac_nir_context *ctx, unsigned mode)3175{3176LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_SAMPLE);3177return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");3178}31793180static LLVMValueRef barycentric_model(struct ac_nir_context *ctx)3181{3182return LLVMBuildBitCast(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->pull_model),3183ctx->ac.v3i32, "");3184}31853186static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx, LLVMValueRef interp_param,3187unsigned index, unsigned comp_start,3188unsigned num_components, unsigned bitsize,3189bool high_16bits)3190{3191LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);3192LLVMValueRef interp_param_f;31933194interp_param_f = LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2f32, "");3195LLVMValueRef i = LLVMBuildExtractElement(ctx->ac.builder, interp_param_f, ctx->ac.i32_0, "");3196LLVMValueRef j = LLVMBuildExtractElement(ctx->ac.builder, interp_param_f, ctx->ac.i32_1, "");31973198/* Workaround for issue 2647: kill threads with infinite interpolation coeffs */3199if (ctx->verified_interp && !_mesa_hash_table_search(ctx->verified_interp, interp_param)) {3200LLVMValueRef args[2];3201args[0] = i;3202args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false);3203LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1, args, 2,3204AC_FUNC_ATTR_READNONE);3205ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, ""));3206_mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param);3207}32083209LLVMValueRef values[4];3210assert(bitsize == 16 || bitsize == 32);3211for (unsigned comp = 0; comp < num_components; comp++) {3212LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, comp_start + comp, false);3213if (bitsize == 16) {3214values[comp] = ac_build_fs_interp_f16(&ctx->ac, llvm_chan, attr_number,3215ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j,3216high_16bits);3217} else {3218values[comp] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,3219ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j);3220}3221}32223223return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));3224}32253226static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *instr,3227bool is_output)3228{3229LLVMValueRef values[8];3230LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);3231LLVMTypeRef component_type;3232unsigned base = nir_intrinsic_base(instr);3233unsigned component = nir_intrinsic_component(instr);3234unsigned count = instr->dest.ssa.num_components;3235nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);3236LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;3237nir_src offset = *nir_get_io_offset_src(instr);3238LLVMValueRef indir_index = NULL;32393240switch (instr->dest.ssa.bit_size) {3241case 16:3242case 32:3243break;3244case 64:3245unreachable("64-bit IO should have been lowered");3246return NULL;3247default:3248unreachable("unhandled load type");3249return NULL;3250}32513252if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)3253component_type = LLVMGetElementType(dest_type);3254else3255component_type = dest_type;32563257if (nir_src_is_const(offset))3258assert(nir_src_as_uint(offset) == 0);3259else3260indir_index = get_src(ctx, offset);32613262if (ctx->stage == MESA_SHADER_TESS_CTRL ||3263(ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {3264bool vertex_index_is_invoc_id =3265vertex_index_src &&3266vertex_index_src->ssa->parent_instr->type == nir_instr_type_intrinsic &&3267nir_instr_as_intrinsic(vertex_index_src->ssa->parent_instr)->intrinsic ==3268nir_intrinsic_load_invocation_id;32693270LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,3271vertex_index, indir_index,3272base, component,3273count, !is_output,3274vertex_index_is_invoc_id);3275if (instr->dest.ssa.bit_size == 16) {3276result = ac_to_integer(&ctx->ac, result);3277result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");3278}3279return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");3280}32813282/* No indirect indexing is allowed after this point. */3283assert(!indir_index);32843285if (ctx->stage == MESA_SHADER_GEOMETRY) {3286assert(nir_src_is_const(*vertex_index_src));32873288return ctx->abi->load_inputs(ctx->abi, base, component, count,3289nir_src_as_uint(*vertex_index_src), component_type);3290}32913292if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&3293nir_intrinsic_io_semantics(instr).fb_fetch_output)3294return ctx->abi->emit_fbfetch(ctx->abi);32953296/* Other non-fragment cases have inputs and outputs in temporaries. */3297if (ctx->stage != MESA_SHADER_FRAGMENT) {3298for (unsigned chan = component; chan < count + component; chan++) {3299if (is_output) {3300values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->abi->outputs[base * 4 + chan], "");3301} else {3302values[chan] = ctx->abi->inputs[base * 4 + chan];3303if (!values[chan])3304values[chan] = LLVMGetUndef(ctx->ac.i32);3305}3306}3307LLVMValueRef result = ac_build_varying_gather_values(&ctx->ac, values, count, component);3308return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");3309}33103311/* Fragment shader inputs. */3312unsigned vertex_id = 2; /* P0 */33133314if (instr->intrinsic == nir_intrinsic_load_input_vertex) {3315nir_const_value *src0 = nir_src_as_const_value(instr->src[0]);33163317switch (src0[0].i32) {3318case 0:3319vertex_id = 2;3320break;3321case 1:3322vertex_id = 0;3323break;3324case 2:3325vertex_id = 1;3326break;3327default:3328unreachable("Invalid vertex index");3329}3330}33313332LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false);33333334for (unsigned chan = 0; chan < count; chan++) {3335LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false);3336values[chan] =3337ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, vertex_id, false), llvm_chan,3338attr_number, ac_get_arg(&ctx->ac, ctx->args->prim_mask));3339values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");3340if (instr->dest.ssa.bit_size == 16 &&3341nir_intrinsic_io_semantics(instr).high_16bits)3342values[chan] = LLVMBuildLShr(ctx->ac.builder, values[chan], LLVMConstInt(ctx->ac.i32, 16, 0), "");3343values[chan] =3344LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan],3345instr->dest.ssa.bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32, "");3346}33473348LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, count);3349return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");3350}33513352static LLVMValueRef3353emit_load_frag_shading_rate(struct ac_nir_context *ctx)3354{3355LLVMValueRef x_rate, y_rate, cond;33563357/* VRS Rate X = Ancillary[2:3]3358* VRS Rate Y = Ancillary[4:5]3359*/3360x_rate = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 2, 2);3361y_rate = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 4, 2);33623363/* xRate = xRate == 0x1 ? Horizontal2Pixels : None. */3364cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, x_rate, ctx->ac.i32_1, "");3365x_rate = LLVMBuildSelect(ctx->ac.builder, cond,3366LLVMConstInt(ctx->ac.i32, 4, false), ctx->ac.i32_0, "");33673368/* yRate = yRate == 0x1 ? Vertical2Pixels : None. */3369cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, y_rate, ctx->ac.i32_1, "");3370y_rate = LLVMBuildSelect(ctx->ac.builder, cond,3371LLVMConstInt(ctx->ac.i32, 1, false), ctx->ac.i32_0, "");33723373return LLVMBuildOr(ctx->ac.builder, x_rate, y_rate, "");3374}33753376static LLVMValueRef3377emit_load_frag_coord(struct ac_nir_context *ctx)3378{3379LLVMValueRef values[4] = {3380ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),3381ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),3382ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))};33833384if (ctx->abi->adjust_frag_coord_z) {3385/* Adjust gl_FragCoord.z for VRS due to a hw bug on some GFX10.3 chips. */3386LLVMValueRef frag_z = values[2];33873388/* dFdx fine */3389LLVMValueRef adjusted_frag_z = emit_ddxy(ctx, nir_op_fddx_fine, frag_z);33903391/* adjusted_frag_z * 0.0625 + frag_z */3392adjusted_frag_z = LLVMBuildFAdd(ctx->ac.builder, frag_z,3393LLVMBuildFMul(ctx->ac.builder, adjusted_frag_z,3394LLVMConstReal(ctx->ac.f32, 0.0625), ""), "");33953396/* VRS Rate X = Ancillary[2:3] */3397LLVMValueRef x_rate = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 2, 2);33983399/* xRate = xRate == 0x1 ? adjusted_frag_z : frag_z. */3400LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, x_rate, ctx->ac.i32_1, "");3401values[2] = LLVMBuildSelect(ctx->ac.builder, cond, adjusted_frag_z, frag_z, "");3402}34033404return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));3405}34063407static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)3408{3409LLVMValueRef result = NULL;34103411switch (instr->intrinsic) {3412case nir_intrinsic_ballot:3413result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));3414if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)3415result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");3416break;3417case nir_intrinsic_read_invocation:3418result =3419ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));3420break;3421case nir_intrinsic_read_first_invocation:3422result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);3423break;3424case nir_intrinsic_load_subgroup_invocation:3425result = ac_get_thread_id(&ctx->ac);3426break;3427case nir_intrinsic_load_workgroup_id: {3428LLVMValueRef values[3];34293430for (int i = 0; i < 3; i++) {3431values[i] = ctx->args->workgroup_ids[i].used3432? ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i])3433: ctx->ac.i32_0;3434}34353436result = ac_build_gather_values(&ctx->ac, values, 3);3437break;3438}3439case nir_intrinsic_load_base_vertex:3440case nir_intrinsic_load_first_vertex:3441result = ctx->abi->load_base_vertex(ctx->abi,3442instr->intrinsic == nir_intrinsic_load_base_vertex);3443break;3444case nir_intrinsic_load_workgroup_size:3445result = ctx->abi->load_local_group_size(ctx->abi);3446break;3447case nir_intrinsic_load_vertex_id:3448result = LLVMBuildAdd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->vertex_id),3449ac_get_arg(&ctx->ac, ctx->args->base_vertex), "");3450break;3451case nir_intrinsic_load_vertex_id_zero_base: {3452result = ctx->abi->vertex_id;3453break;3454}3455case nir_intrinsic_load_local_invocation_id: {3456LLVMValueRef ids = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);34573458if (LLVMGetTypeKind(LLVMTypeOf(ids)) == LLVMIntegerTypeKind) {3459/* Thread IDs are packed in VGPR0, 10 bits per component. */3460LLVMValueRef id[3];34613462for (unsigned i = 0; i < 3; i++)3463id[i] = ac_unpack_param(&ctx->ac, ids, i * 10, 10);34643465result = ac_build_gather_values(&ctx->ac, id, 3);3466} else {3467result = ids;3468}3469break;3470}3471case nir_intrinsic_load_base_instance:3472result = ac_get_arg(&ctx->ac, ctx->args->start_instance);3473break;3474case nir_intrinsic_load_draw_id:3475result = ac_get_arg(&ctx->ac, ctx->args->draw_id);3476break;3477case nir_intrinsic_load_view_index:3478result = ac_get_arg(&ctx->ac, ctx->args->view_index);3479break;3480case nir_intrinsic_load_invocation_id:3481if (ctx->stage == MESA_SHADER_TESS_CTRL) {3482result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 8, 5);3483} else {3484if (ctx->ac.chip_class >= GFX10) {3485result =3486LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),3487LLVMConstInt(ctx->ac.i32, 127, 0), "");3488} else {3489result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);3490}3491}3492break;3493case nir_intrinsic_load_primitive_id:3494if (ctx->stage == MESA_SHADER_GEOMETRY) {3495result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);3496} else if (ctx->stage == MESA_SHADER_TESS_CTRL) {3497result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);3498} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {3499result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);3500} else3501fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);3502break;3503case nir_intrinsic_load_sample_id:3504result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 8, 4);3505break;3506case nir_intrinsic_load_sample_pos:3507result = load_sample_pos(ctx);3508break;3509case nir_intrinsic_load_sample_mask_in:3510result = ctx->abi->load_sample_mask_in(ctx->abi);3511break;3512case nir_intrinsic_load_frag_coord:3513result = emit_load_frag_coord(ctx);3514break;3515case nir_intrinsic_load_frag_shading_rate:3516result = emit_load_frag_shading_rate(ctx);3517break;3518case nir_intrinsic_load_layer_id:3519result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];3520break;3521case nir_intrinsic_load_front_face:3522result = emit_i2b(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->front_face));3523break;3524case nir_intrinsic_load_helper_invocation:3525result = ac_build_load_helper_invocation(&ctx->ac);3526break;3527case nir_intrinsic_is_helper_invocation:3528result = ac_build_is_helper_invocation(&ctx->ac);3529break;3530case nir_intrinsic_load_color0:3531result = ctx->abi->color0;3532break;3533case nir_intrinsic_load_color1:3534result = ctx->abi->color1;3535break;3536case nir_intrinsic_load_user_data_amd:3537assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);3538result = ctx->abi->user_data;3539break;3540case nir_intrinsic_load_instance_id:3541result = ctx->abi->instance_id;3542break;3543case nir_intrinsic_load_num_workgroups:3544result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);3545break;3546case nir_intrinsic_load_local_invocation_index:3547result = visit_load_local_invocation_index(ctx);3548break;3549case nir_intrinsic_load_subgroup_id:3550result = visit_load_subgroup_id(ctx);3551break;3552case nir_intrinsic_load_num_subgroups:3553result = visit_load_num_subgroups(ctx);3554break;3555case nir_intrinsic_first_invocation:3556result = visit_first_invocation(ctx);3557break;3558case nir_intrinsic_load_push_constant:3559result = visit_load_push_constant(ctx, instr);3560break;3561case nir_intrinsic_vulkan_resource_index: {3562LLVMValueRef index = get_src(ctx, instr->src[0]);3563unsigned desc_set = nir_intrinsic_desc_set(instr);3564unsigned binding = nir_intrinsic_binding(instr);35653566result = ctx->abi->load_resource(ctx->abi, index, desc_set, binding);3567break;3568}3569case nir_intrinsic_store_ssbo:3570visit_store_ssbo(ctx, instr);3571break;3572case nir_intrinsic_load_ssbo:3573result = visit_load_buffer(ctx, instr);3574break;3575case nir_intrinsic_load_global:3576result = visit_load_global(ctx, instr);3577break;3578case nir_intrinsic_store_global:3579visit_store_global(ctx, instr);3580break;3581case nir_intrinsic_global_atomic_add:3582case nir_intrinsic_global_atomic_imin:3583case nir_intrinsic_global_atomic_umin:3584case nir_intrinsic_global_atomic_imax:3585case nir_intrinsic_global_atomic_umax:3586case nir_intrinsic_global_atomic_and:3587case nir_intrinsic_global_atomic_or:3588case nir_intrinsic_global_atomic_xor:3589case nir_intrinsic_global_atomic_exchange:3590case nir_intrinsic_global_atomic_comp_swap:3591result = visit_global_atomic(ctx, instr);3592break;3593case nir_intrinsic_ssbo_atomic_add:3594case nir_intrinsic_ssbo_atomic_imin:3595case nir_intrinsic_ssbo_atomic_umin:3596case nir_intrinsic_ssbo_atomic_imax:3597case nir_intrinsic_ssbo_atomic_umax:3598case nir_intrinsic_ssbo_atomic_and:3599case nir_intrinsic_ssbo_atomic_or:3600case nir_intrinsic_ssbo_atomic_xor:3601case nir_intrinsic_ssbo_atomic_exchange:3602case nir_intrinsic_ssbo_atomic_comp_swap:3603result = visit_atomic_ssbo(ctx, instr);3604break;3605case nir_intrinsic_load_ubo:3606result = visit_load_ubo_buffer(ctx, instr);3607break;3608case nir_intrinsic_get_ssbo_size:3609result = visit_get_ssbo_size(ctx, instr);3610break;3611case nir_intrinsic_load_input:3612case nir_intrinsic_load_input_vertex:3613case nir_intrinsic_load_per_vertex_input:3614result = visit_load(ctx, instr, false);3615break;3616case nir_intrinsic_load_output:3617case nir_intrinsic_load_per_vertex_output:3618result = visit_load(ctx, instr, true);3619break;3620case nir_intrinsic_store_output:3621case nir_intrinsic_store_per_vertex_output:3622visit_store_output(ctx, instr);3623break;3624case nir_intrinsic_load_shared:3625result = visit_load_shared(ctx, instr);3626break;3627case nir_intrinsic_store_shared:3628visit_store_shared(ctx, instr);3629break;3630case nir_intrinsic_bindless_image_samples:3631case nir_intrinsic_image_deref_samples:3632result = visit_image_samples(ctx, instr);3633break;3634case nir_intrinsic_bindless_image_load:3635result = visit_image_load(ctx, instr, true);3636break;3637case nir_intrinsic_image_deref_load:3638case nir_intrinsic_image_deref_sparse_load:3639result = visit_image_load(ctx, instr, false);3640break;3641case nir_intrinsic_bindless_image_store:3642visit_image_store(ctx, instr, true);3643break;3644case nir_intrinsic_image_deref_store:3645visit_image_store(ctx, instr, false);3646break;3647case nir_intrinsic_bindless_image_atomic_add:3648case nir_intrinsic_bindless_image_atomic_imin:3649case nir_intrinsic_bindless_image_atomic_umin:3650case nir_intrinsic_bindless_image_atomic_imax:3651case nir_intrinsic_bindless_image_atomic_umax:3652case nir_intrinsic_bindless_image_atomic_and:3653case nir_intrinsic_bindless_image_atomic_or:3654case nir_intrinsic_bindless_image_atomic_xor:3655case nir_intrinsic_bindless_image_atomic_exchange:3656case nir_intrinsic_bindless_image_atomic_comp_swap:3657case nir_intrinsic_bindless_image_atomic_inc_wrap:3658case nir_intrinsic_bindless_image_atomic_dec_wrap:3659result = visit_image_atomic(ctx, instr, true);3660break;3661case nir_intrinsic_image_deref_atomic_add:3662case nir_intrinsic_image_deref_atomic_imin:3663case nir_intrinsic_image_deref_atomic_umin:3664case nir_intrinsic_image_deref_atomic_imax:3665case nir_intrinsic_image_deref_atomic_umax:3666case nir_intrinsic_image_deref_atomic_and:3667case nir_intrinsic_image_deref_atomic_or:3668case nir_intrinsic_image_deref_atomic_xor:3669case nir_intrinsic_image_deref_atomic_exchange:3670case nir_intrinsic_image_deref_atomic_comp_swap:3671case nir_intrinsic_image_deref_atomic_inc_wrap:3672case nir_intrinsic_image_deref_atomic_dec_wrap:3673result = visit_image_atomic(ctx, instr, false);3674break;3675case nir_intrinsic_bindless_image_size:3676result = visit_image_size(ctx, instr, true);3677break;3678case nir_intrinsic_image_deref_size:3679result = visit_image_size(ctx, instr, false);3680break;3681case nir_intrinsic_shader_clock:3682result = ac_build_shader_clock(&ctx->ac, nir_intrinsic_memory_scope(instr));3683break;3684case nir_intrinsic_discard:3685case nir_intrinsic_discard_if:3686case nir_intrinsic_terminate:3687case nir_intrinsic_terminate_if:3688emit_discard(ctx, instr);3689break;3690case nir_intrinsic_demote:3691case nir_intrinsic_demote_if:3692emit_demote(ctx, instr);3693break;3694case nir_intrinsic_memory_barrier:3695case nir_intrinsic_group_memory_barrier:3696case nir_intrinsic_memory_barrier_buffer:3697case nir_intrinsic_memory_barrier_image:3698case nir_intrinsic_memory_barrier_shared:3699emit_membar(&ctx->ac, instr);3700break;3701case nir_intrinsic_scoped_barrier: {3702assert(!(nir_intrinsic_memory_semantics(instr) &3703(NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));37043705nir_variable_mode modes = nir_intrinsic_memory_modes(instr);37063707unsigned wait_flags = 0;3708if (modes & (nir_var_mem_global | nir_var_mem_ssbo))3709wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;3710if (modes & nir_var_mem_shared)3711wait_flags |= AC_WAIT_LGKM;37123713if (wait_flags)3714ac_build_waitcnt(&ctx->ac, wait_flags);37153716if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)3717ac_emit_barrier(&ctx->ac, ctx->stage);3718break;3719}3720case nir_intrinsic_memory_barrier_tcs_patch:3721break;3722case nir_intrinsic_control_barrier:3723ac_emit_barrier(&ctx->ac, ctx->stage);3724break;3725case nir_intrinsic_shared_atomic_add:3726case nir_intrinsic_shared_atomic_imin:3727case nir_intrinsic_shared_atomic_umin:3728case nir_intrinsic_shared_atomic_imax:3729case nir_intrinsic_shared_atomic_umax:3730case nir_intrinsic_shared_atomic_and:3731case nir_intrinsic_shared_atomic_or:3732case nir_intrinsic_shared_atomic_xor:3733case nir_intrinsic_shared_atomic_exchange:3734case nir_intrinsic_shared_atomic_comp_swap:3735case nir_intrinsic_shared_atomic_fadd: {3736LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size, 0);3737result = visit_var_atomic(ctx, instr, ptr, 1);3738break;3739}3740case nir_intrinsic_deref_atomic_add:3741case nir_intrinsic_deref_atomic_imin:3742case nir_intrinsic_deref_atomic_umin:3743case nir_intrinsic_deref_atomic_imax:3744case nir_intrinsic_deref_atomic_umax:3745case nir_intrinsic_deref_atomic_and:3746case nir_intrinsic_deref_atomic_or:3747case nir_intrinsic_deref_atomic_xor:3748case nir_intrinsic_deref_atomic_exchange:3749case nir_intrinsic_deref_atomic_comp_swap:3750case nir_intrinsic_deref_atomic_fadd: {3751LLVMValueRef ptr = get_src(ctx, instr->src[0]);3752result = visit_var_atomic(ctx, instr, ptr, 1);3753break;3754}3755case nir_intrinsic_load_barycentric_pixel:3756result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));3757break;3758case nir_intrinsic_load_barycentric_centroid:3759result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));3760break;3761case nir_intrinsic_load_barycentric_sample:3762result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));3763break;3764case nir_intrinsic_load_barycentric_model:3765result = barycentric_model(ctx);3766break;3767case nir_intrinsic_load_barycentric_at_offset: {3768LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));3769result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);3770break;3771}3772case nir_intrinsic_load_barycentric_at_sample: {3773LLVMValueRef sample_id = get_src(ctx, instr->src[0]);3774result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);3775break;3776}3777case nir_intrinsic_load_interpolated_input: {3778/* We assume any indirect loads have been lowered away */3779ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);3780assert(offset);3781assert(offset[0].i32 == 0);37823783LLVMValueRef interp_param = get_src(ctx, instr->src[0]);3784unsigned index = nir_intrinsic_base(instr);3785unsigned component = nir_intrinsic_component(instr);3786result = load_interpolated_input(ctx, interp_param, index, component,3787instr->dest.ssa.num_components, instr->dest.ssa.bit_size,3788nir_intrinsic_io_semantics(instr).high_16bits);3789break;3790}3791case nir_intrinsic_emit_vertex:3792ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);3793break;3794case nir_intrinsic_emit_vertex_with_counter: {3795unsigned stream = nir_intrinsic_stream_id(instr);3796LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);3797ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs);3798break;3799}3800case nir_intrinsic_end_primitive:3801case nir_intrinsic_end_primitive_with_counter:3802ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));3803break;3804case nir_intrinsic_load_tess_coord:3805result = ctx->abi->load_tess_coord(ctx->abi);3806break;3807case nir_intrinsic_load_tess_level_outer:3808result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);3809break;3810case nir_intrinsic_load_tess_level_inner:3811result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);3812break;3813case nir_intrinsic_load_tess_level_outer_default:3814result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);3815break;3816case nir_intrinsic_load_tess_level_inner_default:3817result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);3818break;3819case nir_intrinsic_load_patch_vertices_in:3820result = ctx->abi->load_patch_vertices_in(ctx->abi);3821break;3822case nir_intrinsic_load_tess_rel_patch_id_amd:3823if (ctx->stage == MESA_SHADER_TESS_CTRL)3824result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 0, 8);3825else if (ctx->stage == MESA_SHADER_TESS_EVAL)3826result = ac_get_arg(&ctx->ac, ctx->args->tes_rel_patch_id);3827else3828unreachable("tess_rel_patch_id_amd is only supported by tessellation shaders");3829break;3830case nir_intrinsic_load_ring_tess_factors_amd:3831result = ctx->abi->load_ring_tess_factors(ctx->abi);3832break;3833case nir_intrinsic_load_ring_tess_factors_offset_amd:3834result = ac_get_arg(&ctx->ac, ctx->args->tcs_factor_offset);3835break;3836case nir_intrinsic_load_ring_tess_offchip_amd:3837result = ctx->abi->load_ring_tess_offchip(ctx->abi);3838break;3839case nir_intrinsic_load_ring_tess_offchip_offset_amd:3840result = ac_get_arg(&ctx->ac, ctx->args->tess_offchip_offset);3841break;3842case nir_intrinsic_load_ring_esgs_amd:3843result = ctx->abi->load_ring_esgs(ctx->abi);3844break;3845case nir_intrinsic_load_ring_es2gs_offset_amd:3846result = ac_get_arg(&ctx->ac, ctx->args->es2gs_offset);3847break;3848case nir_intrinsic_load_gs_vertex_offset_amd:3849result = ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[nir_intrinsic_base(instr)]);3850break;3851case nir_intrinsic_vote_all: {3852result = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));3853break;3854}3855case nir_intrinsic_vote_any: {3856result = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));3857break;3858}3859case nir_intrinsic_shuffle:3860if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 ||3861(ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {3862result =3863ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));3864} else {3865LLVMValueRef src = get_src(ctx, instr->src[0]);3866LLVMValueRef index = get_src(ctx, instr->src[1]);3867LLVMTypeRef type = LLVMTypeOf(src);3868struct waterfall_context wctx;3869LLVMValueRef index_val;38703871index_val = enter_waterfall(ctx, &wctx, index, true);38723873src = LLVMBuildZExt(ctx->ac.builder, src, ctx->ac.i32, "");38743875result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", ctx->ac.i32,3876(LLVMValueRef[]){src, index_val}, 2,3877AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);38783879result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");38803881result = exit_waterfall(ctx, &wctx, result);3882}3883break;3884case nir_intrinsic_reduce:3885result = ac_build_reduce(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0],3886instr->const_index[1]);3887break;3888case nir_intrinsic_inclusive_scan:3889result =3890ac_build_inclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);3891break;3892case nir_intrinsic_exclusive_scan:3893result =3894ac_build_exclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);3895break;3896case nir_intrinsic_quad_broadcast: {3897unsigned lane = nir_src_as_uint(instr->src[1]);3898result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);3899break;3900}3901case nir_intrinsic_quad_swap_horizontal:3902result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);3903break;3904case nir_intrinsic_quad_swap_vertical:3905result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);3906break;3907case nir_intrinsic_quad_swap_diagonal:3908result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);3909break;3910case nir_intrinsic_quad_swizzle_amd: {3911uint32_t mask = nir_intrinsic_swizzle_mask(instr);3912result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,3913(mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);3914break;3915}3916case nir_intrinsic_masked_swizzle_amd: {3917uint32_t mask = nir_intrinsic_swizzle_mask(instr);3918result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);3919break;3920}3921case nir_intrinsic_write_invocation_amd:3922result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),3923get_src(ctx, instr->src[1]), get_src(ctx, instr->src[2]));3924break;3925case nir_intrinsic_mbcnt_amd:3926result = ac_build_mbcnt_add(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));3927break;3928case nir_intrinsic_load_scratch: {3929LLVMValueRef offset = get_src(ctx, instr->src[0]);3930LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);3931LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);3932LLVMTypeRef vec_type = instr->dest.ssa.num_components == 13933? comp_type3934: LLVMVectorType(comp_type, instr->dest.ssa.num_components);3935unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));3936ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");3937result = LLVMBuildLoad(ctx->ac.builder, ptr, "");3938break;3939}3940case nir_intrinsic_store_scratch: {3941LLVMValueRef offset = get_src(ctx, instr->src[1]);3942LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);3943LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);3944unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));3945ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(comp_type, addr_space), "");3946LLVMValueRef src = get_src(ctx, instr->src[0]);3947unsigned wrmask = nir_intrinsic_write_mask(instr);3948while (wrmask) {3949int start, count;3950u_bit_scan_consecutive_range(&wrmask, &start, &count);39513952LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);3953LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");3954LLVMTypeRef vec_type = count == 1 ? comp_type : LLVMVectorType(comp_type, count);3955offset_ptr = LLVMBuildBitCast(ctx->ac.builder, offset_ptr,3956LLVMPointerType(vec_type, addr_space), "");3957LLVMValueRef offset_src = ac_extract_components(&ctx->ac, src, start, count);3958LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);3959}3960break;3961}3962case nir_intrinsic_load_constant: {3963unsigned base = nir_intrinsic_base(instr);3964unsigned range = nir_intrinsic_range(instr);39653966LLVMValueRef offset = get_src(ctx, instr->src[0]);3967offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, base, false), "");39683969/* Clamp the offset to avoid out-of-bound access because global3970* instructions can't handle them.3971*/3972LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false);3973LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");3974offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, "");39753976LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data, offset);3977LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);3978LLVMTypeRef vec_type = instr->dest.ssa.num_components == 13979? comp_type3980: LLVMVectorType(comp_type, instr->dest.ssa.num_components);3981unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));3982ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");3983result = LLVMBuildLoad(ctx->ac.builder, ptr, "");3984break;3985}3986case nir_intrinsic_set_vertex_and_primitive_count:3987/* Currently ignored. */3988break;3989case nir_intrinsic_load_buffer_amd: {3990LLVMValueRef descriptor = get_src(ctx, instr->src[0]);3991LLVMValueRef addr_voffset = get_src(ctx, instr->src[1]);3992LLVMValueRef addr_soffset = get_src(ctx, instr->src[2]);3993unsigned num_components = instr->dest.ssa.num_components;3994unsigned const_offset = nir_intrinsic_base(instr);3995bool swizzled = nir_intrinsic_is_swizzled(instr);3996bool reorder = nir_intrinsic_can_reorder(instr);3997bool slc = nir_intrinsic_slc_amd(instr);39983999enum ac_image_cache_policy cache_policy = ac_glc;4000if (swizzled)4001cache_policy |= ac_swizzled;4002if (slc)4003cache_policy |= ac_slc;4004if (ctx->ac.chip_class >= GFX10)4005cache_policy |= ac_dlc;40064007LLVMTypeRef channel_type;4008if (instr->dest.ssa.bit_size == 8)4009channel_type = ctx->ac.i8;4010else if (instr->dest.ssa.bit_size == 16)4011channel_type = ctx->ac.i16;4012else if (instr->dest.ssa.bit_size == 32)4013channel_type = ctx->ac.i32;4014else if (instr->dest.ssa.bit_size == 64)4015channel_type = ctx->ac.i64;4016else if (instr->dest.ssa.bit_size == 128)4017channel_type = ctx->ac.i128;4018else4019unreachable("Unsupported channel type for load_buffer_amd");40204021result = ac_build_buffer_load(&ctx->ac, descriptor, num_components, NULL,4022addr_voffset, addr_soffset, const_offset,4023channel_type, cache_policy, reorder, false);4024result = ac_to_integer(&ctx->ac, ac_trim_vector(&ctx->ac, result, num_components));4025break;4026}4027case nir_intrinsic_store_buffer_amd: {4028LLVMValueRef store_data = get_src(ctx, instr->src[0]);4029LLVMValueRef descriptor = get_src(ctx, instr->src[1]);4030LLVMValueRef addr_voffset = get_src(ctx, instr->src[2]);4031LLVMValueRef addr_soffset = get_src(ctx, instr->src[3]);4032unsigned num_components = instr->src[0].ssa->num_components;4033unsigned const_offset = nir_intrinsic_base(instr);4034bool swizzled = nir_intrinsic_is_swizzled(instr);4035bool slc = nir_intrinsic_slc_amd(instr);40364037enum ac_image_cache_policy cache_policy = ac_glc;4038if (swizzled)4039cache_policy |= ac_swizzled;4040if (slc)4041cache_policy |= ac_slc;40424043ac_build_buffer_store_dword(&ctx->ac, descriptor, store_data, num_components,4044addr_voffset, addr_soffset, const_offset,4045cache_policy);4046break;4047}4048default:4049fprintf(stderr, "Unknown intrinsic: ");4050nir_print_instr(&instr->instr, stderr);4051fprintf(stderr, "\n");4052abort();4053break;4054}4055if (result) {4056ctx->ssa_defs[instr->dest.ssa.index] = result;4057}4058}40594060static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx, unsigned base_index,4061unsigned constant_index,4062LLVMValueRef dynamic_index)4063{4064LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0);4065LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,4066LLVMConstInt(ctx->ac.i32, constant_index, 0), "");40674068/* Bindless uniforms are 64bit so multiple index by 8 */4069index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), "");4070offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");40714072LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, 0, 0, false, ctx->ac.i32_0);40734074LLVMValueRef ret =4075ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset, NULL, 0, ctx->ac.f32, 0, true, true);40764077return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");4078}40794080struct sampler_desc_address {4081unsigned descriptor_set;4082unsigned base_index; /* binding in vulkan */4083unsigned constant_index;4084LLVMValueRef dynamic_index;4085bool image;4086bool bindless;4087};40884089static struct sampler_desc_address get_sampler_desc_internal(struct ac_nir_context *ctx,4090nir_deref_instr *deref_instr,4091const nir_instr *instr, bool image)4092{4093LLVMValueRef index = NULL;4094unsigned constant_index = 0;4095unsigned descriptor_set;4096unsigned base_index;4097bool bindless = false;40984099if (!deref_instr) {4100descriptor_set = 0;4101if (image) {4102nir_intrinsic_instr *img_instr = nir_instr_as_intrinsic(instr);4103base_index = 0;4104bindless = true;4105index = get_src(ctx, img_instr->src[0]);4106} else {4107nir_tex_instr *tex_instr = nir_instr_as_tex(instr);4108int sampSrcIdx = nir_tex_instr_src_index(tex_instr, nir_tex_src_sampler_handle);4109if (sampSrcIdx != -1) {4110base_index = 0;4111bindless = true;4112index = get_src(ctx, tex_instr->src[sampSrcIdx].src);4113} else {4114assert(tex_instr && !image);4115base_index = tex_instr->sampler_index;4116}4117}4118} else {4119while (deref_instr->deref_type != nir_deref_type_var) {4120if (deref_instr->deref_type == nir_deref_type_array) {4121unsigned array_size = glsl_get_aoa_size(deref_instr->type);4122if (!array_size)4123array_size = 1;41244125if (nir_src_is_const(deref_instr->arr.index)) {4126constant_index += array_size * nir_src_as_uint(deref_instr->arr.index);4127} else {4128LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);41294130indirect = LLVMBuildMul(ctx->ac.builder, indirect,4131LLVMConstInt(ctx->ac.i32, array_size, false), "");41324133if (!index)4134index = indirect;4135else4136index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");4137}41384139deref_instr = nir_src_as_deref(deref_instr->parent);4140} else if (deref_instr->deref_type == nir_deref_type_struct) {4141unsigned sidx = deref_instr->strct.index;4142deref_instr = nir_src_as_deref(deref_instr->parent);4143constant_index += glsl_get_struct_location_offset(deref_instr->type, sidx);4144} else {4145unreachable("Unsupported deref type");4146}4147}4148descriptor_set = deref_instr->var->data.descriptor_set;41494150if (deref_instr->var->data.bindless) {4151/* For now just assert on unhandled variable types */4152assert(deref_instr->var->data.mode == nir_var_uniform);41534154base_index = deref_instr->var->data.driver_location;4155bindless = true;41564157index = index ? index : ctx->ac.i32_0;4158index = get_bindless_index_from_uniform(ctx, base_index, constant_index, index);4159} else4160base_index = deref_instr->var->data.binding;4161}4162return (struct sampler_desc_address){4163.descriptor_set = descriptor_set,4164.base_index = base_index,4165.constant_index = constant_index,4166.dynamic_index = index,4167.image = image,4168.bindless = bindless,4169};4170}41714172/* Extract any possibly divergent index into a separate value that can be fed4173* into get_sampler_desc with the same arguments. */4174static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,4175const nir_instr *instr, bool image)4176{4177struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);4178return addr.dynamic_index;4179}41804181static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,4182enum ac_descriptor_type desc_type, const nir_instr *instr,4183LLVMValueRef index, bool image, bool write)4184{4185struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);4186return ctx->abi->load_sampler_desc(ctx->abi, addr.descriptor_set, addr.base_index,4187addr.constant_index, index, desc_type, addr.image, write,4188addr.bindless);4189}41904191/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.4192*4193* GFX6-GFX7:4194* If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic4195* filtering manually. The driver sets img7 to a mask clearing4196* MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:4197* s_and_b32 samp0, samp0, img74198*4199* GFX8:4200* The ANISO_OVERRIDE sampler field enables this fix in TA.4201*/4202static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMValueRef res,4203LLVMValueRef samp)4204{4205LLVMBuilderRef builder = ctx->ac.builder;4206LLVMValueRef img7, samp0;42074208if (ctx->ac.chip_class >= GFX8)4209return samp;42104211img7 = LLVMBuildExtractElement(builder, res, LLVMConstInt(ctx->ac.i32, 7, 0), "");4212samp0 = LLVMBuildExtractElement(builder, samp, LLVMConstInt(ctx->ac.i32, 0, 0), "");4213samp0 = LLVMBuildAnd(builder, samp0, img7, "");4214return LLVMBuildInsertElement(builder, samp, samp0, LLVMConstInt(ctx->ac.i32, 0, 0), "");4215}42164217static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,4218struct waterfall_context *wctx, LLVMValueRef *res_ptr,4219LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)4220{4221nir_deref_instr *texture_deref_instr = NULL;4222nir_deref_instr *sampler_deref_instr = NULL;4223int plane = -1;42244225for (unsigned i = 0; i < instr->num_srcs; i++) {4226switch (instr->src[i].src_type) {4227case nir_tex_src_texture_deref:4228texture_deref_instr = nir_src_as_deref(instr->src[i].src);4229break;4230case nir_tex_src_sampler_deref:4231sampler_deref_instr = nir_src_as_deref(instr->src[i].src);4232break;4233case nir_tex_src_plane:4234plane = nir_src_as_int(instr->src[i].src);4235break;4236default:4237break;4238}4239}42404241LLVMValueRef texture_dynamic_index =4242get_sampler_desc_index(ctx, texture_deref_instr, &instr->instr, false);4243if (!sampler_deref_instr)4244sampler_deref_instr = texture_deref_instr;42454246LLVMValueRef sampler_dynamic_index =4247get_sampler_desc_index(ctx, sampler_deref_instr, &instr->instr, false);4248if (instr->texture_non_uniform)4249texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, true);42504251if (instr->sampler_non_uniform)4252sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, true);42534254enum ac_descriptor_type main_descriptor =4255instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;42564257if (plane >= 0) {4258assert(instr->op != nir_texop_txf_ms && instr->op != nir_texop_samples_identical);4259assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF);42604261main_descriptor = AC_DESC_PLANE_0 + plane;4262}42634264if (instr->op == nir_texop_fragment_mask_fetch) {4265/* The fragment mask is fetched from the compressed4266* multisampled surface.4267*/4268main_descriptor = AC_DESC_FMASK;4269}42704271*res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr,4272texture_dynamic_index, false, false);42734274if (samp_ptr) {4275*samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr,4276sampler_dynamic_index, false, false);4277if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)4278*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);4279}4280if (fmask_ptr && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_samples_identical))4281*fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK, &instr->instr,4282texture_dynamic_index, false, false);4283}42844285static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, LLVMValueRef coord)4286{4287coord = ac_to_float(ctx, coord);4288coord = ac_build_round(ctx, coord);4289coord = ac_to_integer(ctx, coord);4290return coord;4291}42924293static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)4294{4295LLVMValueRef result = NULL;4296struct ac_image_args args = {0};4297LLVMValueRef fmask_ptr = NULL, sample_index = NULL;4298LLVMValueRef ddx = NULL, ddy = NULL;4299unsigned offset_src = 0;4300struct waterfall_context wctx[2] = {{{0}}};43014302tex_fetch_ptrs(ctx, instr, wctx, &args.resource, &args.sampler, &fmask_ptr);43034304for (unsigned i = 0; i < instr->num_srcs; i++) {4305switch (instr->src[i].src_type) {4306case nir_tex_src_coord: {4307LLVMValueRef coord = get_src(ctx, instr->src[i].src);4308args.a16 = instr->src[i].src.ssa->bit_size == 16;4309for (unsigned chan = 0; chan < instr->coord_components; ++chan)4310args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);4311break;4312}4313case nir_tex_src_projector:4314break;4315case nir_tex_src_comparator:4316if (instr->is_shadow) {4317args.compare = get_src(ctx, instr->src[i].src);4318args.compare = ac_to_float(&ctx->ac, args.compare);4319assert(instr->src[i].src.ssa->bit_size == 32);4320}4321break;4322case nir_tex_src_offset:4323args.offset = get_src(ctx, instr->src[i].src);4324offset_src = i;4325/* We pack it with bit shifts, so we need it to be 32-bit. */4326assert(ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.offset)) == 32);4327break;4328case nir_tex_src_bias:4329args.bias = get_src(ctx, instr->src[i].src);4330assert(ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.bias)) == 32);4331break;4332case nir_tex_src_lod:4333if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)4334args.level_zero = true;4335else4336args.lod = get_src(ctx, instr->src[i].src);4337break;4338case nir_tex_src_ms_index:4339sample_index = get_src(ctx, instr->src[i].src);4340break;4341case nir_tex_src_ms_mcs:4342break;4343case nir_tex_src_ddx:4344ddx = get_src(ctx, instr->src[i].src);4345args.g16 = instr->src[i].src.ssa->bit_size == 16;4346break;4347case nir_tex_src_ddy:4348ddy = get_src(ctx, instr->src[i].src);4349assert(LLVMTypeOf(ddy) == LLVMTypeOf(ddx));4350break;4351case nir_tex_src_min_lod:4352args.min_lod = get_src(ctx, instr->src[i].src);4353break;4354case nir_tex_src_texture_offset:4355case nir_tex_src_sampler_offset:4356case nir_tex_src_plane:4357default:4358break;4359}4360}43614362if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {4363result = get_buffer_size(ctx, args.resource, true);4364goto write_result;4365}43664367if (instr->op == nir_texop_texture_samples) {4368LLVMValueRef res, samples, is_msaa;4369LLVMValueRef default_sample;43704371res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");4372samples =4373LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 3, false), "");4374is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 28, false), "");4375is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa, LLVMConstInt(ctx->ac.i32, 0xe, false), "");4376is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,4377LLVMConstInt(ctx->ac.i32, 0xe, false), "");43784379samples = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 16, false), "");4380samples = LLVMBuildAnd(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 0xf, false), "");4381samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, samples, "");43824383if (ctx->abi->robust_buffer_access) {4384LLVMValueRef dword1, is_null_descriptor;43854386/* Extract the second dword of the descriptor, if it's4387* all zero, then it's a null descriptor.4388*/4389dword1 =4390LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 1, false), "");4391is_null_descriptor = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1,4392LLVMConstInt(ctx->ac.i32, 0, false), "");4393default_sample =4394LLVMBuildSelect(ctx->ac.builder, is_null_descriptor, ctx->ac.i32_0, ctx->ac.i32_1, "");4395} else {4396default_sample = ctx->ac.i32_1;4397}43984399samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, default_sample, "");4400result = samples;4401goto write_result;4402}44034404if (args.offset && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {4405LLVMValueRef offset[3], pack;4406for (unsigned chan = 0; chan < 3; ++chan)4407offset[chan] = ctx->ac.i32_0;44084409unsigned num_components = ac_get_llvm_num_components(args.offset);4410for (unsigned chan = 0; chan < num_components; chan++) {4411offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan);4412offset[chan] =4413LLVMBuildAnd(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->ac.i32, 0x3f, false), "");4414if (chan)4415offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],4416LLVMConstInt(ctx->ac.i32, chan * 8, false), "");4417}4418pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");4419pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");4420args.offset = pack;4421}44224423/* Section 8.23.1 (Depth Texture Comparison Mode) of the4424* OpenGL 4.5 spec says:4425*4426* "If the texture’s internal format indicates a fixed-point4427* depth texture, then D_t and D_ref are clamped to the4428* range [0, 1]; otherwise no clamping is performed."4429*4430* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,4431* so the depth comparison value isn't clamped for Z16 and4432* Z24 anymore. Do it manually here for GFX8-9; GFX10 has4433* an explicitly clamped 32-bit float format.4434*/4435if (args.compare && ctx->ac.chip_class >= GFX8 && ctx->ac.chip_class <= GFX9 &&4436ctx->abi->clamp_shadow_reference) {4437LLVMValueRef upgraded, clamped;44384439upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,4440LLVMConstInt(ctx->ac.i32, 3, false), "");4441upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded, LLVMConstInt(ctx->ac.i32, 29, false), "");4442upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->ac.i1, "");4443clamped = ac_build_clamp(&ctx->ac, args.compare);4444args.compare = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, args.compare, "");4445}44464447/* pack derivatives */4448if (ddx || ddy) {4449int num_src_deriv_channels, num_dest_deriv_channels;4450switch (instr->sampler_dim) {4451case GLSL_SAMPLER_DIM_3D:4452case GLSL_SAMPLER_DIM_CUBE:4453num_src_deriv_channels = 3;4454num_dest_deriv_channels = 3;4455break;4456case GLSL_SAMPLER_DIM_2D:4457default:4458num_src_deriv_channels = 2;4459num_dest_deriv_channels = 2;4460break;4461case GLSL_SAMPLER_DIM_1D:4462num_src_deriv_channels = 1;4463if (ctx->ac.chip_class == GFX9) {4464num_dest_deriv_channels = 2;4465} else {4466num_dest_deriv_channels = 1;4467}4468break;4469}44704471for (unsigned i = 0; i < num_src_deriv_channels; i++) {4472args.derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));4473args.derivs[num_dest_deriv_channels + i] =4474ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));4475}4476for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {4477LLVMValueRef zero = args.g16 ? ctx->ac.f16_0 : ctx->ac.f32_0;4478args.derivs[i] = zero;4479args.derivs[num_dest_deriv_channels + i] = zero;4480}4481}44824483if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {4484for (unsigned chan = 0; chan < instr->coord_components; chan++)4485args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);4486if (instr->coord_components == 3)4487args.coords[3] = LLVMGetUndef(args.a16 ? ctx->ac.f16 : ctx->ac.f32);4488ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array,4489instr->op == nir_texop_lod, args.coords, args.derivs);4490}44914492/* Texture coordinates fixups */4493if (instr->coord_components > 1 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&4494instr->is_array && instr->op != nir_texop_txf) {4495args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]);4496}44974498if (instr->coord_components > 2 &&4499(instr->sampler_dim == GLSL_SAMPLER_DIM_2D || instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||4500instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||4501instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&4502instr->is_array && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms &&4503instr->op != nir_texop_fragment_fetch && instr->op != nir_texop_fragment_mask_fetch) {4504args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);4505}45064507if (ctx->ac.chip_class == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&4508instr->op != nir_texop_lod) {4509LLVMValueRef filler;4510if (instr->op == nir_texop_txf)4511filler = args.a16 ? ctx->ac.i16_0 : ctx->ac.i32_0;4512else4513filler = LLVMConstReal(args.a16 ? ctx->ac.f16 : ctx->ac.f32, 0.5);45144515if (instr->is_array)4516args.coords[2] = args.coords[1];4517args.coords[1] = filler;4518}45194520/* Pack sample index */4521if (sample_index && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_fragment_fetch))4522args.coords[instr->coord_components] = sample_index;45234524if (instr->op == nir_texop_samples_identical) {4525struct ac_image_args txf_args = {0};4526memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords));45274528txf_args.dmask = 0xf;4529txf_args.resource = fmask_ptr;4530txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d;4531result = build_tex_intrinsic(ctx, instr, &txf_args);45324533result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");4534result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);4535goto write_result;4536}45374538if ((instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ||4539instr->sampler_dim == GLSL_SAMPLER_DIM_MS) &&4540instr->op != nir_texop_txs && instr->op != nir_texop_fragment_fetch &&4541instr->op != nir_texop_fragment_mask_fetch) {4542unsigned sample_chan = instr->is_array ? 3 : 2;4543args.coords[sample_chan] = adjust_sample_index_using_fmask(4544&ctx->ac, args.coords[0], args.coords[1], instr->is_array ? args.coords[2] : NULL,4545args.coords[sample_chan], fmask_ptr);4546}45474548if (args.offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) {4549int num_offsets = instr->src[offset_src].src.ssa->num_components;4550num_offsets = MIN2(num_offsets, instr->coord_components);4551for (unsigned i = 0; i < num_offsets; ++i) {4552LLVMValueRef off = ac_llvm_extract_elem(&ctx->ac, args.offset, i);4553if (args.a16)4554off = LLVMBuildTrunc(ctx->ac.builder, off, ctx->ac.i16, "");4555args.coords[i] = LLVMBuildAdd(ctx->ac.builder, args.coords[i], off, "");4556}4557args.offset = NULL;4558}45594560/* DMASK was repurposed for GATHER4. 4 components are always4561* returned and DMASK works like a swizzle - it selects4562* the component to fetch. The only valid DMASK values are4563* 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns4564* (red,red,red,red) etc.) The ISA document doesn't mention4565* this.4566*/4567args.dmask = 0xf;4568if (instr->op == nir_texop_tg4) {4569if (instr->is_shadow)4570args.dmask = 1;4571else4572args.dmask = 1 << instr->component;4573}45744575if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) {4576args.dim = ac_get_sampler_dim(ctx->ac.chip_class, instr->sampler_dim, instr->is_array);4577args.unorm = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;4578}45794580/* Adjust the number of coordinates because we only need (x,y) for 2D4581* multisampled images and (x,y,layer) for 2D multisampled layered4582* images or for multisampled input attachments.4583*/4584if (instr->op == nir_texop_fragment_mask_fetch) {4585if (args.dim == ac_image_2dmsaa) {4586args.dim = ac_image_2d;4587} else {4588assert(args.dim == ac_image_2darraymsaa);4589args.dim = ac_image_2darray;4590}4591}45924593/* Set TRUNC_COORD=0 for textureGather(). */4594if (instr->op == nir_texop_tg4) {4595LLVMValueRef dword0 = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, ctx->ac.i32_0, "");4596dword0 = LLVMBuildAnd(ctx->ac.builder, dword0, LLVMConstInt(ctx->ac.i32, C_008F30_TRUNC_COORD, 0), "");4597args.sampler = LLVMBuildInsertElement(ctx->ac.builder, args.sampler, dword0, ctx->ac.i32_0, "");4598}45994600assert(instr->dest.is_ssa);4601args.d16 = instr->dest.ssa.bit_size == 16;4602args.tfe = instr->is_sparse;46034604result = build_tex_intrinsic(ctx, instr, &args);46054606LLVMValueRef code = NULL;4607if (instr->is_sparse) {4608code = ac_llvm_extract_elem(&ctx->ac, result, 4);4609result = ac_trim_vector(&ctx->ac, result, 4);4610}46114612if (instr->op == nir_texop_query_levels)4613result =4614LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");4615else if (instr->is_shadow && instr->is_new_style_shadow && instr->op != nir_texop_txs &&4616instr->op != nir_texop_lod && instr->op != nir_texop_tg4)4617result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");4618else if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&4619instr->is_array) {4620LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);4621LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);4622LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");4623z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");4624result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");4625} else if (ctx->ac.chip_class == GFX9 && instr->op == nir_texop_txs &&4626instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array) {4627LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);4628LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");4629result = LLVMBuildInsertElement(ctx->ac.builder, result, layers, ctx->ac.i32_1, "");4630} else if (nir_tex_instr_result_size(instr) != 4)4631result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);46324633if (instr->is_sparse)4634result = ac_build_concat(&ctx->ac, result, code);46354636write_result:4637if (result) {4638assert(instr->dest.is_ssa);4639result = ac_to_integer(&ctx->ac, result);46404641for (int i = ARRAY_SIZE(wctx); --i >= 0;) {4642result = exit_waterfall(ctx, wctx + i, result);4643}46444645ctx->ssa_defs[instr->dest.ssa.index] = result;4646}4647}46484649static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)4650{4651LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);4652LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");46534654ctx->ssa_defs[instr->dest.ssa.index] = result;4655_mesa_hash_table_insert(ctx->phis, instr, result);4656}46574658static void visit_post_phi(struct ac_nir_context *ctx, nir_phi_instr *instr, LLVMValueRef llvm_phi)4659{4660nir_foreach_phi_src (src, instr) {4661LLVMBasicBlockRef block = get_block(ctx, src->pred);4662LLVMValueRef llvm_src = get_src(ctx, src->src);46634664LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);4665}4666}46674668static void phi_post_pass(struct ac_nir_context *ctx)4669{4670hash_table_foreach(ctx->phis, entry)4671{4672visit_post_phi(ctx, (nir_phi_instr *)entry->key, (LLVMValueRef)entry->data);4673}4674}46754676static bool is_def_used_in_an_export(const nir_ssa_def *def)4677{4678nir_foreach_use (use_src, def) {4679if (use_src->parent_instr->type == nir_instr_type_intrinsic) {4680nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);4681if (instr->intrinsic == nir_intrinsic_store_deref)4682return true;4683} else if (use_src->parent_instr->type == nir_instr_type_alu) {4684nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);4685if (instr->op == nir_op_vec4 && is_def_used_in_an_export(&instr->dest.dest.ssa)) {4686return true;4687}4688}4689}4690return false;4691}46924693static void visit_ssa_undef(struct ac_nir_context *ctx, const nir_ssa_undef_instr *instr)4694{4695unsigned num_components = instr->def.num_components;4696LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);46974698if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) {4699LLVMValueRef undef;47004701if (num_components == 1)4702undef = LLVMGetUndef(type);4703else {4704undef = LLVMGetUndef(LLVMVectorType(type, num_components));4705}4706ctx->ssa_defs[instr->def.index] = undef;4707} else {4708LLVMValueRef zero = LLVMConstInt(type, 0, false);4709if (num_components > 1) {4710zero = ac_build_gather_values_extended(&ctx->ac, &zero, 4, 0, false, false);4711}4712ctx->ssa_defs[instr->def.index] = zero;4713}4714}47154716static void visit_jump(struct ac_llvm_context *ctx, const nir_jump_instr *instr)4717{4718switch (instr->type) {4719case nir_jump_break:4720ac_build_break(ctx);4721break;4722case nir_jump_continue:4723ac_build_continue(ctx);4724break;4725default:4726fprintf(stderr, "Unknown NIR jump instr: ");4727nir_print_instr(&instr->instr, stderr);4728fprintf(stderr, "\n");4729abort();4730}4731}47324733static LLVMTypeRef glsl_base_to_llvm_type(struct ac_llvm_context *ac, enum glsl_base_type type)4734{4735switch (type) {4736case GLSL_TYPE_INT:4737case GLSL_TYPE_UINT:4738case GLSL_TYPE_BOOL:4739case GLSL_TYPE_SUBROUTINE:4740return ac->i32;4741case GLSL_TYPE_INT8:4742case GLSL_TYPE_UINT8:4743return ac->i8;4744case GLSL_TYPE_INT16:4745case GLSL_TYPE_UINT16:4746return ac->i16;4747case GLSL_TYPE_FLOAT:4748return ac->f32;4749case GLSL_TYPE_FLOAT16:4750return ac->f16;4751case GLSL_TYPE_INT64:4752case GLSL_TYPE_UINT64:4753return ac->i64;4754case GLSL_TYPE_DOUBLE:4755return ac->f64;4756default:4757unreachable("unknown GLSL type");4758}4759}47604761static LLVMTypeRef glsl_to_llvm_type(struct ac_llvm_context *ac, const struct glsl_type *type)4762{4763if (glsl_type_is_scalar(type)) {4764return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));4765}47664767if (glsl_type_is_vector(type)) {4768return LLVMVectorType(glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),4769glsl_get_vector_elements(type));4770}47714772if (glsl_type_is_matrix(type)) {4773return LLVMArrayType(glsl_to_llvm_type(ac, glsl_get_column_type(type)),4774glsl_get_matrix_columns(type));4775}47764777if (glsl_type_is_array(type)) {4778return LLVMArrayType(glsl_to_llvm_type(ac, glsl_get_array_element(type)),4779glsl_get_length(type));4780}47814782assert(glsl_type_is_struct_or_ifc(type));47834784LLVMTypeRef *const member_types = alloca(glsl_get_length(type) * sizeof(LLVMTypeRef));47854786for (unsigned i = 0; i < glsl_get_length(type); i++) {4787member_types[i] = glsl_to_llvm_type(ac, glsl_get_struct_field(type, i));4788}47894790return LLVMStructTypeInContext(ac->context, member_types, glsl_get_length(type), false);4791}47924793static void visit_deref(struct ac_nir_context *ctx, nir_deref_instr *instr)4794{4795if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared | nir_var_mem_global))4796return;47974798LLVMValueRef result = NULL;4799switch (instr->deref_type) {4800case nir_deref_type_var: {4801struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var);4802result = entry->data;4803break;4804}4805case nir_deref_type_struct:4806if (nir_deref_mode_is(instr, nir_var_mem_global)) {4807nir_deref_instr *parent = nir_deref_instr_parent(instr);4808uint64_t offset = glsl_get_struct_field_offset(parent->type, instr->strct.index);4809result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),4810LLVMConstInt(ctx->ac.i32, offset, 0));4811} else {4812result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),4813LLVMConstInt(ctx->ac.i32, instr->strct.index, 0));4814}4815break;4816case nir_deref_type_array:4817if (nir_deref_mode_is(instr, nir_var_mem_global)) {4818nir_deref_instr *parent = nir_deref_instr_parent(instr);4819unsigned stride = glsl_get_explicit_stride(parent->type);48204821if ((glsl_type_is_matrix(parent->type) && glsl_matrix_type_is_row_major(parent->type)) ||4822(glsl_type_is_vector(parent->type) && stride == 0))4823stride = type_scalar_size_bytes(parent->type);48244825assert(stride > 0);4826LLVMValueRef index = get_src(ctx, instr->arr.index);4827if (LLVMTypeOf(index) != ctx->ac.i64)4828index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");48294830LLVMValueRef offset =4831LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");48324833result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);4834} else {4835result =4836ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));4837}4838break;4839case nir_deref_type_ptr_as_array:4840if (nir_deref_mode_is(instr, nir_var_mem_global)) {4841unsigned stride = nir_deref_instr_array_stride(instr);48424843LLVMValueRef index = get_src(ctx, instr->arr.index);4844if (LLVMTypeOf(index) != ctx->ac.i64)4845index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");48464847LLVMValueRef offset =4848LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");48494850result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);4851} else {4852result =4853ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));4854}4855break;4856case nir_deref_type_cast: {4857result = get_src(ctx, instr->parent);48584859/* We can't use the structs from LLVM because the shader4860* specifies its own offsets. */4861LLVMTypeRef pointee_type = ctx->ac.i8;4862if (nir_deref_mode_is(instr, nir_var_mem_shared))4863pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);48644865unsigned address_space;48664867switch (instr->modes) {4868case nir_var_mem_shared:4869address_space = AC_ADDR_SPACE_LDS;4870break;4871case nir_var_mem_global:4872address_space = AC_ADDR_SPACE_GLOBAL;4873break;4874default:4875unreachable("Unhandled address space");4876}48774878LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);48794880if (LLVMTypeOf(result) != type) {4881if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {4882result = LLVMBuildBitCast(ctx->ac.builder, result, type, "");4883} else {4884result = LLVMBuildIntToPtr(ctx->ac.builder, result, type, "");4885}4886}4887break;4888}4889default:4890unreachable("Unhandled deref_instr deref type");4891}48924893ctx->ssa_defs[instr->dest.ssa.index] = result;4894}48954896static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list);48974898static void visit_block(struct ac_nir_context *ctx, nir_block *block)4899{4900LLVMBasicBlockRef blockref = LLVMGetInsertBlock(ctx->ac.builder);4901LLVMValueRef first = LLVMGetFirstInstruction(blockref);4902if (first) {4903/* ac_branch_exited() might have already inserted non-phis */4904LLVMPositionBuilderBefore(ctx->ac.builder, LLVMGetFirstInstruction(blockref));4905}49064907nir_foreach_instr(instr, block) {4908if (instr->type != nir_instr_type_phi)4909break;4910visit_phi(ctx, nir_instr_as_phi(instr));4911}49124913LLVMPositionBuilderAtEnd(ctx->ac.builder, blockref);49144915nir_foreach_instr (instr, block) {4916switch (instr->type) {4917case nir_instr_type_alu:4918visit_alu(ctx, nir_instr_as_alu(instr));4919break;4920case nir_instr_type_load_const:4921visit_load_const(ctx, nir_instr_as_load_const(instr));4922break;4923case nir_instr_type_intrinsic:4924visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));4925break;4926case nir_instr_type_tex:4927visit_tex(ctx, nir_instr_as_tex(instr));4928break;4929case nir_instr_type_phi:4930break;4931case nir_instr_type_ssa_undef:4932visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));4933break;4934case nir_instr_type_jump:4935visit_jump(&ctx->ac, nir_instr_as_jump(instr));4936break;4937case nir_instr_type_deref:4938visit_deref(ctx, nir_instr_as_deref(instr));4939break;4940default:4941fprintf(stderr, "Unknown NIR instr type: ");4942nir_print_instr(instr, stderr);4943fprintf(stderr, "\n");4944abort();4945}4946}49474948_mesa_hash_table_insert(ctx->defs, block, LLVMGetInsertBlock(ctx->ac.builder));4949}49504951static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)4952{4953LLVMValueRef value = get_src(ctx, if_stmt->condition);49544955nir_block *then_block = (nir_block *)exec_list_get_head(&if_stmt->then_list);49564957ac_build_ifcc(&ctx->ac, value, then_block->index);49584959visit_cf_list(ctx, &if_stmt->then_list);49604961if (!exec_list_is_empty(&if_stmt->else_list)) {4962nir_block *else_block = (nir_block *)exec_list_get_head(&if_stmt->else_list);49634964ac_build_else(&ctx->ac, else_block->index);4965visit_cf_list(ctx, &if_stmt->else_list);4966}49674968ac_build_endif(&ctx->ac, then_block->index);4969}49704971static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)4972{4973nir_block *first_loop_block = (nir_block *)exec_list_get_head(&loop->body);49744975ac_build_bgnloop(&ctx->ac, first_loop_block->index);49764977visit_cf_list(ctx, &loop->body);49784979ac_build_endloop(&ctx->ac, first_loop_block->index);4980}49814982static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list)4983{4984foreach_list_typed(nir_cf_node, node, node, list)4985{4986switch (node->type) {4987case nir_cf_node_block:4988visit_block(ctx, nir_cf_node_as_block(node));4989break;49904991case nir_cf_node_if:4992visit_if(ctx, nir_cf_node_as_if(node));4993break;49944995case nir_cf_node_loop:4996visit_loop(ctx, nir_cf_node_as_loop(node));4997break;49984999default:5000assert(0);5001}5002}5003}50045005void ac_handle_shader_output_decl(struct ac_llvm_context *ctx, struct ac_shader_abi *abi,5006struct nir_shader *nir, struct nir_variable *variable,5007gl_shader_stage stage)5008{5009unsigned output_loc = variable->data.driver_location;5010unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);50115012/* tess ctrl has it's own load/store paths for outputs */5013if (stage == MESA_SHADER_TESS_CTRL)5014return;50155016if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL ||5017stage == MESA_SHADER_GEOMETRY) {5018int idx = variable->data.location + variable->data.index;5019if (idx == VARYING_SLOT_CLIP_DIST0) {5020int length = nir->info.clip_distance_array_size + nir->info.cull_distance_array_size;50215022if (length > 4)5023attrib_count = 2;5024else5025attrib_count = 1;5026}5027}50285029bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));5030LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;5031for (unsigned i = 0; i < attrib_count; ++i) {5032for (unsigned chan = 0; chan < 4; chan++) {5033abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] =5034ac_build_alloca_undef(ctx, type, "");5035}5036}5037}50385039static void setup_scratch(struct ac_nir_context *ctx, struct nir_shader *shader)5040{5041if (shader->scratch_size == 0)5042return;50435044ctx->scratch =5045ac_build_alloca_undef(&ctx->ac, LLVMArrayType(ctx->ac.i8, shader->scratch_size), "scratch");5046}50475048static void setup_constant_data(struct ac_nir_context *ctx, struct nir_shader *shader)5049{5050if (!shader->constant_data)5051return;50525053LLVMValueRef data = LLVMConstStringInContext(ctx->ac.context, shader->constant_data,5054shader->constant_data_size, true);5055LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, shader->constant_data_size);5056LLVMValueRef global =5057LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "const_data", AC_ADDR_SPACE_CONST);50585059LLVMSetInitializer(global, data);5060LLVMSetGlobalConstant(global, true);5061LLVMSetVisibility(global, LLVMHiddenVisibility);5062ctx->constant_data = global;5063}50645065static void setup_shared(struct ac_nir_context *ctx, struct nir_shader *nir)5066{5067if (ctx->ac.lds)5068return;50695070LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, nir->info.shared_size);50715072LLVMValueRef lds =5073LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "compute_lds", AC_ADDR_SPACE_LDS);5074LLVMSetAlignment(lds, 64 * 1024);50755076ctx->ac.lds =5077LLVMBuildBitCast(ctx->ac.builder, lds, LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS), "");5078}50795080void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,5081const struct ac_shader_args *args, struct nir_shader *nir)5082{5083struct ac_nir_context ctx = {0};5084struct nir_function *func;50855086ctx.ac = *ac;5087ctx.abi = abi;5088ctx.args = args;50895090ctx.stage = nir->info.stage;5091ctx.info = &nir->info;50925093ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));50945095/* TODO: remove this after RADV switches to lowered IO */5096if (!nir->info.io_lowered) {5097nir_foreach_shader_out_variable(variable, nir)5098{5099ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable, ctx.stage);5100}5101}51025103ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);5104ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);5105ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);51065107if (ctx.abi->kill_ps_if_inf_interp)5108ctx.verified_interp =5109_mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);51105111func = (struct nir_function *)exec_list_get_head(&nir->functions);51125113nir_index_ssa_defs(func->impl);5114ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));51155116setup_scratch(&ctx, nir);5117setup_constant_data(&ctx, nir);51185119if (gl_shader_stage_is_compute(nir->info.stage))5120setup_shared(&ctx, nir);51215122if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_demote &&5123LLVM_VERSION_MAJOR < 13) {5124/* true = don't kill. */5125ctx.ac.postponed_kill = ac_build_alloca_init(&ctx.ac, ctx.ac.i1true, "");5126}51275128visit_cf_list(&ctx, &func->impl->body);5129phi_post_pass(&ctx);51305131if (ctx.ac.postponed_kill)5132ac_build_kill_if_false(&ctx.ac, LLVMBuildLoad(ctx.ac.builder, ctx.ac.postponed_kill, ""));51335134if (!gl_shader_stage_is_compute(nir->info.stage))5135ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS, ctx.abi->outputs);51365137free(ctx.ssa_defs);5138ralloc_free(ctx.defs);5139ralloc_free(ctx.phis);5140ralloc_free(ctx.vars);5141if (ctx.abi->kill_ps_if_inf_interp)5142ralloc_free(ctx.verified_interp);5143}51445145static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)5146{5147if (intrin->intrinsic != nir_intrinsic_store_output)5148return 0;51495150unsigned writemask = nir_intrinsic_write_mask(intrin) << nir_intrinsic_component(intrin);5151unsigned location = nir_intrinsic_io_semantics(intrin).location;51525153if (location == VARYING_SLOT_TESS_LEVEL_OUTER)5154return writemask << 4;5155else if (location == VARYING_SLOT_TESS_LEVEL_INNER)5156return writemask;51575158return 0;5159}51605161static void scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,5162unsigned *cond_block_tf_writemask,5163bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)5164{5165switch (cf_node->type) {5166case nir_cf_node_block: {5167nir_block *block = nir_cf_node_as_block(cf_node);5168nir_foreach_instr (instr, block) {5169if (instr->type != nir_instr_type_intrinsic)5170continue;51715172nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);5173if (intrin->intrinsic == nir_intrinsic_control_barrier) {51745175/* If we find a barrier in nested control flow put this in the5176* too hard basket. In GLSL this is not possible but it is in5177* SPIR-V.5178*/5179if (is_nested_cf) {5180*tessfactors_are_def_in_all_invocs = false;5181return;5182}51835184/* The following case must be prevented:5185* gl_TessLevelInner = ...;5186* barrier();5187* if (gl_InvocationID == 1)5188* gl_TessLevelInner = ...;5189*5190* If you consider disjoint code segments separated by barriers, each5191* such segment that writes tess factor channels should write the same5192* channels in all codepaths within that segment.5193*/5194if (*upper_block_tf_writemask || *cond_block_tf_writemask) {5195/* Accumulate the result: */5196*tessfactors_are_def_in_all_invocs &=5197!(*cond_block_tf_writemask & ~(*upper_block_tf_writemask));51985199/* Analyze the next code segment from scratch. */5200*upper_block_tf_writemask = 0;5201*cond_block_tf_writemask = 0;5202}5203} else5204*upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin);5205}52065207break;5208}5209case nir_cf_node_if: {5210unsigned then_tessfactor_writemask = 0;5211unsigned else_tessfactor_writemask = 0;52125213nir_if *if_stmt = nir_cf_node_as_if(cf_node);5214foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list)5215{5216scan_tess_ctrl(nested_node, &then_tessfactor_writemask, cond_block_tf_writemask,5217tessfactors_are_def_in_all_invocs, true);5218}52195220foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list)5221{5222scan_tess_ctrl(nested_node, &else_tessfactor_writemask, cond_block_tf_writemask,5223tessfactors_are_def_in_all_invocs, true);5224}52255226if (then_tessfactor_writemask || else_tessfactor_writemask) {5227/* If both statements write the same tess factor channels,5228* we can say that the upper block writes them too.5229*/5230*upper_block_tf_writemask |= then_tessfactor_writemask & else_tessfactor_writemask;5231*cond_block_tf_writemask |= then_tessfactor_writemask | else_tessfactor_writemask;5232}52335234break;5235}5236case nir_cf_node_loop: {5237nir_loop *loop = nir_cf_node_as_loop(cf_node);5238foreach_list_typed(nir_cf_node, nested_node, node, &loop->body)5239{5240scan_tess_ctrl(nested_node, cond_block_tf_writemask, cond_block_tf_writemask,5241tessfactors_are_def_in_all_invocs, true);5242}52435244break;5245}5246default:5247unreachable("unknown cf node type");5248}5249}52505251bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir)5252{5253assert(nir->info.stage == MESA_SHADER_TESS_CTRL);52545255/* The pass works as follows:5256* If all codepaths write tess factors, we can say that all5257* invocations define tess factors.5258*5259* Each tess factor channel is tracked separately.5260*/5261unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */5262unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */52635264/* Initial value = true. Here the pass will accumulate results from5265* multiple segments surrounded by barriers. If tess factors aren't5266* written at all, it's a shader bug and we don't care if this will be5267* true.5268*/5269bool tessfactors_are_def_in_all_invocs = true;52705271nir_foreach_function (function, nir) {5272if (function->impl) {5273foreach_list_typed(nir_cf_node, node, node, &function->impl->body)5274{5275scan_tess_ctrl(node, &main_block_tf_writemask, &cond_block_tf_writemask,5276&tessfactors_are_def_in_all_invocs, false);5277}5278}5279}52805281/* Accumulate the result for the last code segment separated by a5282* barrier.5283*/5284if (main_block_tf_writemask || cond_block_tf_writemask) {5285tessfactors_are_def_in_all_invocs &= !(cond_block_tf_writemask & ~main_block_tf_writemask);5286}52875288return tessfactors_are_def_in_all_invocs;5289}529052915292