Path: blob/21.2-virgl/src/microsoft/compiler/nir_to_dxil.c
4564 views
/*1* Copyright © Microsoft Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "nir_to_dxil.h"2425#include "dxil_module.h"26#include "dxil_container.h"27#include "dxil_function.h"28#include "dxil_signature.h"29#include "dxil_enums.h"30#include "dxil_dump.h"31#include "dxil_nir.h"3233#include "util/u_debug.h"34#include "util/u_math.h"35#include "util/u_dynarray.h"36#include "nir/nir_builder.h"3738#include "git_sha1.h"3940#include "vulkan/vulkan_core.h"4142#include <stdint.h>4344int debug_dxil = 0;4546static const struct debug_named_value47dxil_debug_options[] = {48{ "verbose", DXIL_DEBUG_VERBOSE, NULL },49{ "dump_blob", DXIL_DEBUG_DUMP_BLOB , "Write shader blobs" },50{ "trace", DXIL_DEBUG_TRACE , "Trace instruction conversion" },51{ "dump_module", DXIL_DEBUG_DUMP_MODULE, "dump module tree to stderr"},52DEBUG_NAMED_VALUE_END53};5455DEBUG_GET_ONCE_FLAGS_OPTION(debug_dxil, "DXIL_DEBUG", dxil_debug_options, 0)5657#define NIR_INSTR_UNSUPPORTED(instr) \58if (debug_dxil & DXIL_DEBUG_VERBOSE) \59do { \60fprintf(stderr, "Unsupported instruction:"); \61nir_print_instr(instr, stderr); \62fprintf(stderr, "\n"); \63} while (0)6465#define TRACE_CONVERSION(instr) \66if (debug_dxil & DXIL_DEBUG_TRACE) \67do { \68fprintf(stderr, "Convert '"); \69nir_print_instr(instr, stderr); \70fprintf(stderr, "'\n"); \71} while (0)7273static const nir_shader_compiler_options74nir_options = {75.lower_ineg = true,76.lower_fneg = true,77.lower_ffma16 = true,78.lower_ffma32 = true,79.lower_isign = true,80.lower_fsign = true,81.lower_iabs = true,82.lower_fmod = true,83.lower_fpow = true,84.lower_scmp = true,85.lower_ldexp = true,86.lower_flrp16 = true,87.lower_flrp32 = true,88.lower_flrp64 = true,89.lower_bitfield_extract_to_shifts = true,90.lower_extract_word = true,91.lower_extract_byte = true,92.lower_insert_word = true,93.lower_insert_byte = true,94.lower_all_io_to_elements = true,95.lower_all_io_to_temps = true,96.lower_hadd = true,97.lower_add_sat = true,98.lower_uadd_carry = true,99.lower_mul_high = true,100.lower_rotate = true,101.lower_pack_64_2x32_split = true,102.lower_pack_32_2x16_split = true,103.lower_unpack_64_2x32_split = true,104.lower_unpack_32_2x16_split = true,105.has_fsub = true,106.has_isub = true,107.use_scoped_barrier = true,108.vertex_id_zero_based = true,109.lower_base_vertex = true,110.has_cs_global_id = true,111.has_txs = true,112};113114const nir_shader_compiler_options*115dxil_get_nir_compiler_options(void)116{117return &nir_options;118}119120static bool121emit_llvm_ident(struct dxil_module *m)122{123const struct dxil_mdnode *compiler = dxil_get_metadata_string(m, "Mesa version " PACKAGE_VERSION MESA_GIT_SHA1);124if (!compiler)125return false;126127const struct dxil_mdnode *llvm_ident = dxil_get_metadata_node(m, &compiler, 1);128return llvm_ident &&129dxil_add_metadata_named_node(m, "llvm.ident", &llvm_ident, 1);130}131132static bool133emit_named_version(struct dxil_module *m, const char *name,134int major, int minor)135{136const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, major);137const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, minor);138const struct dxil_mdnode *version_nodes[] = { major_node, minor_node };139const struct dxil_mdnode *version = dxil_get_metadata_node(m, version_nodes,140ARRAY_SIZE(version_nodes));141return dxil_add_metadata_named_node(m, name, &version, 1);142}143144static const char *145get_shader_kind_str(enum dxil_shader_kind kind)146{147switch (kind) {148case DXIL_PIXEL_SHADER:149return "ps";150case DXIL_VERTEX_SHADER:151return "vs";152case DXIL_GEOMETRY_SHADER:153return "gs";154case DXIL_HULL_SHADER:155return "hs";156case DXIL_DOMAIN_SHADER:157return "ds";158case DXIL_COMPUTE_SHADER:159return "cs";160default:161unreachable("invalid shader kind");162}163}164165static bool166emit_dx_shader_model(struct dxil_module *m)167{168const struct dxil_mdnode *type_node = dxil_get_metadata_string(m, get_shader_kind_str(m->shader_kind));169const struct dxil_mdnode *major_node = dxil_get_metadata_int32(m, m->major_version);170const struct dxil_mdnode *minor_node = dxil_get_metadata_int32(m, m->minor_version);171const struct dxil_mdnode *shader_model[] = { type_node, major_node,172minor_node };173const struct dxil_mdnode *dx_shader_model = dxil_get_metadata_node(m, shader_model, ARRAY_SIZE(shader_model));174175return dxil_add_metadata_named_node(m, "dx.shaderModel",176&dx_shader_model, 1);177}178179enum {180DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG = 0,181DXIL_STRUCTURED_BUFFER_ELEMENT_STRIDE_TAG = 1182};183184enum dxil_intr {185DXIL_INTR_LOAD_INPUT = 4,186DXIL_INTR_STORE_OUTPUT = 5,187DXIL_INTR_FABS = 6,188DXIL_INTR_SATURATE = 7,189190DXIL_INTR_ISFINITE = 10,191DXIL_INTR_ISNORMAL = 11,192193DXIL_INTR_FCOS = 12,194DXIL_INTR_FSIN = 13,195196DXIL_INTR_FEXP2 = 21,197DXIL_INTR_FRC = 22,198DXIL_INTR_FLOG2 = 23,199200DXIL_INTR_SQRT = 24,201DXIL_INTR_RSQRT = 25,202DXIL_INTR_ROUND_NE = 26,203DXIL_INTR_ROUND_NI = 27,204DXIL_INTR_ROUND_PI = 28,205DXIL_INTR_ROUND_Z = 29,206207DXIL_INTR_COUNTBITS = 31,208DXIL_INTR_FIRSTBIT_HI = 33,209210DXIL_INTR_FMAX = 35,211DXIL_INTR_FMIN = 36,212DXIL_INTR_IMAX = 37,213DXIL_INTR_IMIN = 38,214DXIL_INTR_UMAX = 39,215DXIL_INTR_UMIN = 40,216217DXIL_INTR_FMA = 47,218219DXIL_INTR_CREATE_HANDLE = 57,220DXIL_INTR_CBUFFER_LOAD_LEGACY = 59,221222DXIL_INTR_SAMPLE = 60,223DXIL_INTR_SAMPLE_BIAS = 61,224DXIL_INTR_SAMPLE_LEVEL = 62,225DXIL_INTR_SAMPLE_GRAD = 63,226DXIL_INTR_SAMPLE_CMP = 64,227DXIL_INTR_SAMPLE_CMP_LVL_ZERO = 65,228229DXIL_INTR_TEXTURE_LOAD = 66,230DXIL_INTR_TEXTURE_STORE = 67,231232DXIL_INTR_BUFFER_LOAD = 68,233DXIL_INTR_BUFFER_STORE = 69,234235DXIL_INTR_TEXTURE_SIZE = 72,236237DXIL_INTR_ATOMIC_BINOP = 78,238DXIL_INTR_ATOMIC_CMPXCHG = 79,239DXIL_INTR_BARRIER = 80,240DXIL_INTR_TEXTURE_LOD = 81,241242DXIL_INTR_DISCARD = 82,243DXIL_INTR_DDX_COARSE = 83,244DXIL_INTR_DDY_COARSE = 84,245DXIL_INTR_DDX_FINE = 85,246DXIL_INTR_DDY_FINE = 86,247248DXIL_INTR_THREAD_ID = 93,249DXIL_INTR_GROUP_ID = 94,250DXIL_INTR_THREAD_ID_IN_GROUP = 95,251252DXIL_INTR_EMIT_STREAM = 97,253DXIL_INTR_CUT_STREAM = 98,254255DXIL_INTR_MAKE_DOUBLE = 101,256DXIL_INTR_SPLIT_DOUBLE = 102,257258DXIL_INTR_PRIMITIVE_ID = 108,259260DXIL_INTR_LEGACY_F32TOF16 = 130,261DXIL_INTR_LEGACY_F16TOF32 = 131,262263DXIL_INTR_ATTRIBUTE_AT_VERTEX = 137,264};265266enum dxil_atomic_op {267DXIL_ATOMIC_ADD = 0,268DXIL_ATOMIC_AND = 1,269DXIL_ATOMIC_OR = 2,270DXIL_ATOMIC_XOR = 3,271DXIL_ATOMIC_IMIN = 4,272DXIL_ATOMIC_IMAX = 5,273DXIL_ATOMIC_UMIN = 6,274DXIL_ATOMIC_UMAX = 7,275DXIL_ATOMIC_EXCHANGE = 8,276};277278typedef struct {279unsigned id;280unsigned binding;281unsigned size;282unsigned space;283} resource_array_layout;284285static void286fill_resource_metadata(struct dxil_module *m, const struct dxil_mdnode **fields,287const struct dxil_type *struct_type,288const char *name, const resource_array_layout *layout)289{290const struct dxil_type *pointer_type = dxil_module_get_pointer_type(m, struct_type);291const struct dxil_value *pointer_undef = dxil_module_get_undef(m, pointer_type);292293fields[0] = dxil_get_metadata_int32(m, layout->id); // resource ID294fields[1] = dxil_get_metadata_value(m, pointer_type, pointer_undef); // global constant symbol295fields[2] = dxil_get_metadata_string(m, name ? name : ""); // name296fields[3] = dxil_get_metadata_int32(m, layout->space); // space ID297fields[4] = dxil_get_metadata_int32(m, layout->binding); // lower bound298fields[5] = dxil_get_metadata_int32(m, layout->size); // range size299}300301static const struct dxil_mdnode *302emit_srv_metadata(struct dxil_module *m, const struct dxil_type *elem_type,303const char *name, const resource_array_layout *layout,304enum dxil_component_type comp_type,305enum dxil_resource_kind res_kind)306{307const struct dxil_mdnode *fields[9];308309const struct dxil_mdnode *metadata_tag_nodes[2];310311fill_resource_metadata(m, fields, elem_type, name, layout);312fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape313fields[7] = dxil_get_metadata_int1(m, 0); // sample count314if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&315res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {316metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);317metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);318fields[8] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata319} else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)320fields[8] = NULL;321else322unreachable("Structured buffers not supported yet");323324return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));325}326327static const struct dxil_mdnode *328emit_uav_metadata(struct dxil_module *m, const struct dxil_type *struct_type,329const char *name, const resource_array_layout *layout,330enum dxil_component_type comp_type,331enum dxil_resource_kind res_kind)332{333const struct dxil_mdnode *fields[11];334335const struct dxil_mdnode *metadata_tag_nodes[2];336337fill_resource_metadata(m, fields, struct_type, name, layout);338fields[6] = dxil_get_metadata_int32(m, res_kind); // resource shape339fields[7] = dxil_get_metadata_int1(m, false); // globally-coherent340fields[8] = dxil_get_metadata_int1(m, false); // has counter341fields[9] = dxil_get_metadata_int1(m, false); // is ROV342if (res_kind != DXIL_RESOURCE_KIND_RAW_BUFFER &&343res_kind != DXIL_RESOURCE_KIND_STRUCTURED_BUFFER) {344metadata_tag_nodes[0] = dxil_get_metadata_int32(m, DXIL_TYPED_BUFFER_ELEMENT_TYPE_TAG);345metadata_tag_nodes[1] = dxil_get_metadata_int32(m, comp_type);346fields[10] = dxil_get_metadata_node(m, metadata_tag_nodes, ARRAY_SIZE(metadata_tag_nodes)); // metadata347} else if (res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER)348fields[10] = NULL;349else350unreachable("Structured buffers not supported yet");351352return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));353}354355static const struct dxil_mdnode *356emit_cbv_metadata(struct dxil_module *m, const struct dxil_type *struct_type,357const char *name, const resource_array_layout *layout,358unsigned size)359{360const struct dxil_mdnode *fields[8];361362fill_resource_metadata(m, fields, struct_type, name, layout);363fields[6] = dxil_get_metadata_int32(m, size); // constant buffer size364fields[7] = NULL; // metadata365366return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));367}368369static const struct dxil_mdnode *370emit_sampler_metadata(struct dxil_module *m, const struct dxil_type *struct_type,371nir_variable *var, const resource_array_layout *layout)372{373const struct dxil_mdnode *fields[8];374const struct glsl_type *type = glsl_without_array(var->type);375376fill_resource_metadata(m, fields, struct_type, var->name, layout);377fields[6] = dxil_get_metadata_int32(m, DXIL_SAMPLER_KIND_DEFAULT); // sampler kind378enum dxil_sampler_kind sampler_kind = glsl_sampler_type_is_shadow(type) ?379DXIL_SAMPLER_KIND_COMPARISON : DXIL_SAMPLER_KIND_DEFAULT;380fields[6] = dxil_get_metadata_int32(m, sampler_kind); // sampler kind381fields[7] = NULL; // metadata382383return dxil_get_metadata_node(m, fields, ARRAY_SIZE(fields));384}385386387#define MAX_SRVS 128388#define MAX_UAVS 64389#define MAX_CBVS 64 // ??390#define MAX_SAMPLERS 64 // ??391392struct dxil_def {393const struct dxil_value *chans[NIR_MAX_VEC_COMPONENTS];394};395396struct ntd_context {397void *ralloc_ctx;398const struct nir_to_dxil_options *opts;399struct nir_shader *shader;400401struct dxil_module mod;402403struct util_dynarray srv_metadata_nodes;404const struct dxil_value *srv_handles[MAX_SRVS];405406struct util_dynarray uav_metadata_nodes;407const struct dxil_value *uav_handles[MAX_UAVS];408409struct util_dynarray cbv_metadata_nodes;410const struct dxil_value *cbv_handles[MAX_CBVS];411412struct util_dynarray sampler_metadata_nodes;413const struct dxil_value *sampler_handles[MAX_SAMPLERS];414415struct util_dynarray resources;416417const struct dxil_mdnode *shader_property_nodes[6];418size_t num_shader_property_nodes;419420struct dxil_def *defs;421unsigned num_defs;422struct hash_table *phis;423424const struct dxil_value *sharedvars;425const struct dxil_value *scratchvars;426struct hash_table *consts;427428nir_variable *ps_front_face;429nir_variable *system_value[SYSTEM_VALUE_MAX];430};431432static const char*433unary_func_name(enum dxil_intr intr)434{435switch (intr) {436case DXIL_INTR_COUNTBITS:437case DXIL_INTR_FIRSTBIT_HI:438return "dx.op.unaryBits";439case DXIL_INTR_ISFINITE:440case DXIL_INTR_ISNORMAL:441return "dx.op.isSpecialFloat";442default:443return "dx.op.unary";444}445}446447static const struct dxil_value *448emit_unary_call(struct ntd_context *ctx, enum overload_type overload,449enum dxil_intr intr,450const struct dxil_value *op0)451{452const struct dxil_func *func = dxil_get_function(&ctx->mod,453unary_func_name(intr),454overload);455if (!func)456return NULL;457458const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);459if (!opcode)460return NULL;461462const struct dxil_value *args[] = {463opcode,464op0465};466467return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));468}469470static const struct dxil_value *471emit_binary_call(struct ntd_context *ctx, enum overload_type overload,472enum dxil_intr intr,473const struct dxil_value *op0, const struct dxil_value *op1)474{475const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.binary", overload);476if (!func)477return NULL;478479const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);480if (!opcode)481return NULL;482483const struct dxil_value *args[] = {484opcode,485op0,486op1487};488489return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));490}491492static const struct dxil_value *493emit_tertiary_call(struct ntd_context *ctx, enum overload_type overload,494enum dxil_intr intr,495const struct dxil_value *op0,496const struct dxil_value *op1,497const struct dxil_value *op2)498{499const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.tertiary", overload);500if (!func)501return NULL;502503const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, intr);504if (!opcode)505return NULL;506507const struct dxil_value *args[] = {508opcode,509op0,510op1,511op2512};513514return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));515}516517static const struct dxil_value *518emit_threadid_call(struct ntd_context *ctx, const struct dxil_value *comp)519{520const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadId", DXIL_I32);521if (!func)522return NULL;523524const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,525DXIL_INTR_THREAD_ID);526if (!opcode)527return NULL;528529const struct dxil_value *args[] = {530opcode,531comp532};533534return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));535}536537static const struct dxil_value *538emit_threadidingroup_call(struct ntd_context *ctx,539const struct dxil_value *comp)540{541const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.threadIdInGroup", DXIL_I32);542543if (!func)544return NULL;545546const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,547DXIL_INTR_THREAD_ID_IN_GROUP);548if (!opcode)549return NULL;550551const struct dxil_value *args[] = {552opcode,553comp554};555556return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));557}558559static const struct dxil_value *560emit_groupid_call(struct ntd_context *ctx, const struct dxil_value *comp)561{562const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.groupId", DXIL_I32);563564if (!func)565return NULL;566567const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,568DXIL_INTR_GROUP_ID);569if (!opcode)570return NULL;571572const struct dxil_value *args[] = {573opcode,574comp575};576577return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));578}579580static const struct dxil_value *581emit_bufferload_call(struct ntd_context *ctx,582const struct dxil_value *handle,583const struct dxil_value *coord[2])584{585const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferLoad", DXIL_I32);586if (!func)587return NULL;588589const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,590DXIL_INTR_BUFFER_LOAD);591const struct dxil_value *args[] = { opcode, handle, coord[0], coord[1] };592593return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));594}595596static bool597emit_bufferstore_call(struct ntd_context *ctx,598const struct dxil_value *handle,599const struct dxil_value *coord[2],600const struct dxil_value *value[4],601const struct dxil_value *write_mask,602enum overload_type overload)603{604const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.bufferStore", overload);605606if (!func)607return false;608609const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,610DXIL_INTR_BUFFER_STORE);611const struct dxil_value *args[] = {612opcode, handle, coord[0], coord[1],613value[0], value[1], value[2], value[3],614write_mask615};616617return dxil_emit_call_void(&ctx->mod, func,618args, ARRAY_SIZE(args));619}620621static bool622emit_texturestore_call(struct ntd_context *ctx,623const struct dxil_value *handle,624const struct dxil_value *coord[3],625const struct dxil_value *value[4],626const struct dxil_value *write_mask,627enum overload_type overload)628{629const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureStore", overload);630631if (!func)632return false;633634const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,635DXIL_INTR_TEXTURE_STORE);636const struct dxil_value *args[] = {637opcode, handle, coord[0], coord[1], coord[2],638value[0], value[1], value[2], value[3],639write_mask640};641642return dxil_emit_call_void(&ctx->mod, func,643args, ARRAY_SIZE(args));644}645646static const struct dxil_value *647emit_atomic_binop(struct ntd_context *ctx,648const struct dxil_value *handle,649enum dxil_atomic_op atomic_op,650const struct dxil_value *coord[3],651const struct dxil_value *value)652{653const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.atomicBinOp", DXIL_I32);654655if (!func)656return false;657658const struct dxil_value *opcode =659dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_BINOP);660const struct dxil_value *atomic_op_value =661dxil_module_get_int32_const(&ctx->mod, atomic_op);662const struct dxil_value *args[] = {663opcode, handle, atomic_op_value,664coord[0], coord[1], coord[2], value665};666667return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));668}669670static const struct dxil_value *671emit_atomic_cmpxchg(struct ntd_context *ctx,672const struct dxil_value *handle,673const struct dxil_value *coord[3],674const struct dxil_value *cmpval,675const struct dxil_value *newval)676{677const struct dxil_func *func =678dxil_get_function(&ctx->mod, "dx.op.atomicCompareExchange", DXIL_I32);679680if (!func)681return false;682683const struct dxil_value *opcode =684dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATOMIC_CMPXCHG);685const struct dxil_value *args[] = {686opcode, handle, coord[0], coord[1], coord[2], cmpval, newval687};688689return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));690}691692static const struct dxil_value *693emit_createhandle_call(struct ntd_context *ctx,694enum dxil_resource_class resource_class,695unsigned resource_range_id,696const struct dxil_value *resource_range_index,697bool non_uniform_resource_index)698{699const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CREATE_HANDLE);700const struct dxil_value *resource_class_value = dxil_module_get_int8_const(&ctx->mod, resource_class);701const struct dxil_value *resource_range_id_value = dxil_module_get_int32_const(&ctx->mod, resource_range_id);702const struct dxil_value *non_uniform_resource_index_value = dxil_module_get_int1_const(&ctx->mod, non_uniform_resource_index);703if (!opcode || !resource_class_value || !resource_range_id_value ||704!non_uniform_resource_index_value)705return NULL;706707const struct dxil_value *args[] = {708opcode,709resource_class_value,710resource_range_id_value,711resource_range_index,712non_uniform_resource_index_value713};714715const struct dxil_func *func =716dxil_get_function(&ctx->mod, "dx.op.createHandle", DXIL_NONE);717718if (!func)719return NULL;720721return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));722}723724static const struct dxil_value *725emit_createhandle_call_const_index(struct ntd_context *ctx,726enum dxil_resource_class resource_class,727unsigned resource_range_id,728unsigned resource_range_index,729bool non_uniform_resource_index)730{731732const struct dxil_value *resource_range_index_value = dxil_module_get_int32_const(&ctx->mod, resource_range_index);733if (!resource_range_index_value)734return NULL;735736return emit_createhandle_call(ctx, resource_class, resource_range_id,737resource_range_index_value,738non_uniform_resource_index);739}740741static void742add_resource(struct ntd_context *ctx, enum dxil_resource_type type,743const resource_array_layout *layout)744{745struct dxil_resource *resource = util_dynarray_grow(&ctx->resources, struct dxil_resource, 1);746resource->resource_type = type;747resource->space = layout->space;748resource->lower_bound = layout->binding;749if (layout->size == 0 || (uint64_t)layout->size + layout->binding >= UINT_MAX)750resource->upper_bound = UINT_MAX;751else752resource->upper_bound = layout->binding + layout->size - 1;753}754755static unsigned756get_resource_id(struct ntd_context *ctx, enum dxil_resource_class class,757unsigned space, unsigned binding)758{759unsigned offset = 0;760unsigned count = 0;761762unsigned num_srvs = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);763unsigned num_uavs = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);764unsigned num_cbvs = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);765unsigned num_samplers = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);766767switch (class) {768case DXIL_RESOURCE_CLASS_UAV:769offset = num_srvs + num_samplers + num_cbvs;770count = num_uavs;771break;772case DXIL_RESOURCE_CLASS_SRV:773offset = num_samplers + num_cbvs;774count = num_srvs;775break;776case DXIL_RESOURCE_CLASS_SAMPLER:777offset = num_cbvs;778count = num_samplers;779break;780case DXIL_RESOURCE_CLASS_CBV:781offset = 0;782count = num_cbvs;783break;784}785786assert(offset + count <= util_dynarray_num_elements(&ctx->resources, struct dxil_resource));787for (unsigned i = offset; i < offset + count; ++i) {788const struct dxil_resource *resource = util_dynarray_element(&ctx->resources, struct dxil_resource, i);789if (resource->space == space &&790resource->lower_bound <= binding &&791resource->upper_bound >= binding) {792return i - offset;793}794}795796unreachable("Resource access for undeclared range");797return 0;798}799800static bool801emit_srv(struct ntd_context *ctx, nir_variable *var, unsigned count)802{803unsigned id = util_dynarray_num_elements(&ctx->srv_metadata_nodes, const struct dxil_mdnode *);804unsigned binding = var->data.binding;805resource_array_layout layout = {id, binding, count, var->data.descriptor_set};806807enum dxil_component_type comp_type;808enum dxil_resource_kind res_kind;809enum dxil_resource_type res_type;810if (var->data.mode == nir_var_mem_ssbo) {811comp_type = DXIL_COMP_TYPE_INVALID;812res_kind = DXIL_RESOURCE_KIND_RAW_BUFFER;813res_type = DXIL_RES_SRV_RAW;814} else {815comp_type = dxil_get_comp_type(var->type);816res_kind = dxil_get_resource_kind(var->type);817res_type = DXIL_RES_SRV_TYPED;818}819const struct dxil_type *res_type_as_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, false /* readwrite */);820const struct dxil_mdnode *srv_meta = emit_srv_metadata(&ctx->mod, res_type_as_type, var->name,821&layout, comp_type, res_kind);822823if (!srv_meta)824return false;825826util_dynarray_append(&ctx->srv_metadata_nodes, const struct dxil_mdnode *, srv_meta);827add_resource(ctx, res_type, &layout);828829if (!ctx->opts->vulkan_environment) {830for (unsigned i = 0; i < count; ++i) {831const struct dxil_value *handle =832emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_SRV,833id, binding + i, false);834if (!handle)835return false;836837int idx = var->data.binding + i;838ctx->srv_handles[idx] = handle;839}840}841842return true;843}844845static bool846emit_globals(struct ntd_context *ctx, unsigned size)847{848nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo)849size++;850851if (!size)852return true;853854const struct dxil_type *struct_type = dxil_module_get_res_type(&ctx->mod,855DXIL_RESOURCE_KIND_RAW_BUFFER, DXIL_COMP_TYPE_INVALID, true /* readwrite */);856if (!struct_type)857return false;858859const struct dxil_type *array_type =860dxil_module_get_array_type(&ctx->mod, struct_type, size);861if (!array_type)862return false;863864resource_array_layout layout = {0, 0, size, 0};865const struct dxil_mdnode *uav_meta =866emit_uav_metadata(&ctx->mod, array_type,867"globals", &layout,868DXIL_COMP_TYPE_INVALID,869DXIL_RESOURCE_KIND_RAW_BUFFER);870if (!uav_meta)871return false;872873util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);874if (util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)875ctx->mod.feats.use_64uavs = 1;876/* Handles to UAVs used for kernel globals are created on-demand */877add_resource(ctx, DXIL_RES_UAV_RAW, &layout);878ctx->mod.raw_and_structured_buffers = true;879return true;880}881882static bool883emit_uav(struct ntd_context *ctx, unsigned binding, unsigned space, unsigned count,884enum dxil_component_type comp_type, enum dxil_resource_kind res_kind, const char *name)885{886unsigned id = util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *);887resource_array_layout layout = { id, binding, count, space };888889const struct dxil_type *res_type = dxil_module_get_res_type(&ctx->mod, res_kind, comp_type, true /* readwrite */);890const struct dxil_mdnode *uav_meta = emit_uav_metadata(&ctx->mod, res_type, name,891&layout, comp_type, res_kind);892893if (!uav_meta)894return false;895896util_dynarray_append(&ctx->uav_metadata_nodes, const struct dxil_mdnode *, uav_meta);897if (util_dynarray_num_elements(&ctx->uav_metadata_nodes, const struct dxil_mdnode *) > 8)898ctx->mod.feats.use_64uavs = 1;899900add_resource(ctx, res_kind == DXIL_RESOURCE_KIND_RAW_BUFFER ? DXIL_RES_UAV_RAW : DXIL_RES_UAV_TYPED, &layout);901902if (!ctx->opts->vulkan_environment) {903for (unsigned i = 0; i < count; ++i) {904const struct dxil_value *handle = emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_UAV,905id, binding + i, false);906if (!handle)907return false;908909ctx->uav_handles[binding + i] = handle;910}911}912913return true;914}915916static bool917emit_uav_var(struct ntd_context *ctx, nir_variable *var, unsigned count)918{919unsigned binding = var->data.binding;920unsigned space = var->data.descriptor_set;921enum dxil_component_type comp_type = dxil_get_comp_type(var->type);922enum dxil_resource_kind res_kind = dxil_get_resource_kind(var->type);923const char *name = var->name;924925return emit_uav(ctx, binding, space, count, comp_type, res_kind, name);926}927928static unsigned get_dword_size(const struct glsl_type *type)929{930if (glsl_type_is_array(type)) {931type = glsl_without_array(type);932}933assert(glsl_type_is_struct(type) || glsl_type_is_interface(type));934return glsl_get_explicit_size(type, false);935}936937static bool938var_fill_const_array_with_vector_or_scalar(struct ntd_context *ctx,939const struct nir_constant *c,940const struct glsl_type *type,941void *const_vals,942unsigned int offset)943{944assert(glsl_type_is_vector_or_scalar(type));945unsigned int components = glsl_get_vector_elements(type);946unsigned bit_size = glsl_get_bit_size(type);947unsigned int increment = bit_size / 8;948949for (unsigned int comp = 0; comp < components; comp++) {950uint8_t *dst = (uint8_t *)const_vals + offset;951952switch (bit_size) {953case 64:954memcpy(dst, &c->values[comp].u64, sizeof(c->values[0].u64));955break;956case 32:957memcpy(dst, &c->values[comp].u32, sizeof(c->values[0].u32));958break;959case 16:960memcpy(dst, &c->values[comp].u16, sizeof(c->values[0].u16));961break;962case 8:963assert(glsl_base_type_is_integer(glsl_get_base_type(type)));964memcpy(dst, &c->values[comp].u8, sizeof(c->values[0].u8));965break;966default:967unreachable("unexpeted bit-size");968}969970offset += increment;971}972973return true;974}975976static bool977var_fill_const_array(struct ntd_context *ctx, const struct nir_constant *c,978const struct glsl_type *type, void *const_vals,979unsigned int offset)980{981assert(!glsl_type_is_interface(type));982983if (glsl_type_is_vector_or_scalar(type)) {984return var_fill_const_array_with_vector_or_scalar(ctx, c, type,985const_vals,986offset);987} else if (glsl_type_is_array(type)) {988assert(!glsl_type_is_unsized_array(type));989const struct glsl_type *without = glsl_without_array(type);990unsigned stride = glsl_get_explicit_stride(without);991992for (unsigned elt = 0; elt < glsl_get_length(type); elt++) {993if (!var_fill_const_array(ctx, c->elements[elt], without,994const_vals, offset + (elt * stride))) {995return false;996}997offset += glsl_get_cl_size(without);998}999return true;1000} else if (glsl_type_is_struct(type)) {1001for (unsigned int elt = 0; elt < glsl_get_length(type); elt++) {1002const struct glsl_type *elt_type = glsl_get_struct_field(type, elt);1003unsigned field_offset = glsl_get_struct_field_offset(type, elt);10041005if (!var_fill_const_array(ctx, c->elements[elt],1006elt_type, const_vals,1007offset + field_offset)) {1008return false;1009}1010}1011return true;1012}10131014unreachable("unknown GLSL type in var_fill_const_array");1015}10161017static bool1018emit_global_consts(struct ntd_context *ctx)1019{1020nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_temp) {1021bool err;10221023assert(var->constant_initializer);10241025unsigned int num_members = DIV_ROUND_UP(glsl_get_cl_size(var->type), 4);1026uint32_t *const_ints = ralloc_array(ctx->ralloc_ctx, uint32_t, num_members);1027err = var_fill_const_array(ctx, var->constant_initializer, var->type,1028const_ints, 0);1029if (!err)1030return false;1031const struct dxil_value **const_vals =1032ralloc_array(ctx->ralloc_ctx, const struct dxil_value *, num_members);1033if (!const_vals)1034return false;1035for (int i = 0; i < num_members; i++)1036const_vals[i] = dxil_module_get_int32_const(&ctx->mod, const_ints[i]);10371038const struct dxil_type *elt_type = dxil_module_get_int_type(&ctx->mod, 32);1039if (!elt_type)1040return false;1041const struct dxil_type *type =1042dxil_module_get_array_type(&ctx->mod, elt_type, num_members);1043if (!type)1044return false;1045const struct dxil_value *agg_vals =1046dxil_module_get_array_const(&ctx->mod, type, const_vals);1047if (!agg_vals)1048return false;10491050const struct dxil_value *gvar = dxil_add_global_ptr_var(&ctx->mod, var->name, type,1051DXIL_AS_DEFAULT, 4,1052agg_vals);1053if (!gvar)1054return false;10551056if (!_mesa_hash_table_insert(ctx->consts, var, (void *)gvar))1057return false;1058}10591060return true;1061}10621063static bool1064emit_cbv(struct ntd_context *ctx, unsigned binding, unsigned space,1065unsigned size, unsigned count, char *name)1066{1067unsigned idx = util_dynarray_num_elements(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *);10681069const struct dxil_type *float32 = dxil_module_get_float_type(&ctx->mod, 32);1070const struct dxil_type *array_type = dxil_module_get_array_type(&ctx->mod, float32, size);1071const struct dxil_type *buffer_type = dxil_module_get_struct_type(&ctx->mod, name,1072&array_type, 1);1073const struct dxil_type *final_type = count != 1 ? dxil_module_get_array_type(&ctx->mod, buffer_type, count) : buffer_type;1074resource_array_layout layout = {idx, binding, count, space};1075const struct dxil_mdnode *cbv_meta = emit_cbv_metadata(&ctx->mod, final_type,1076name, &layout, 4 * size);10771078if (!cbv_meta)1079return false;10801081util_dynarray_append(&ctx->cbv_metadata_nodes, const struct dxil_mdnode *, cbv_meta);1082add_resource(ctx, DXIL_RES_CBV, &layout);10831084if (!ctx->opts->vulkan_environment) {1085for (unsigned i = 0; i < count; ++i) {1086const struct dxil_value *handle = emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_CBV,1087idx, binding + i, false);1088if (!handle)1089return false;10901091assert(!ctx->cbv_handles[binding + i]);1092ctx->cbv_handles[binding + i] = handle;1093}1094}10951096return true;1097}10981099static bool1100emit_ubo_var(struct ntd_context *ctx, nir_variable *var)1101{1102unsigned count = 1;1103if (glsl_type_is_array(var->type))1104count = glsl_get_length(var->type);1105return emit_cbv(ctx, var->data.binding, var->data.descriptor_set, get_dword_size(var->type), count, var->name);1106}11071108static bool1109emit_sampler(struct ntd_context *ctx, nir_variable *var, unsigned count)1110{1111unsigned id = util_dynarray_num_elements(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *);1112unsigned binding = var->data.binding;1113resource_array_layout layout = {id, binding, count, var->data.descriptor_set};1114const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);1115const struct dxil_type *sampler_type = dxil_module_get_struct_type(&ctx->mod, "struct.SamplerState", &int32_type, 1);1116const struct dxil_mdnode *sampler_meta = emit_sampler_metadata(&ctx->mod, sampler_type, var, &layout);11171118if (!sampler_meta)1119return false;11201121util_dynarray_append(&ctx->sampler_metadata_nodes, const struct dxil_mdnode *, sampler_meta);1122add_resource(ctx, DXIL_RES_SAMPLER, &layout);11231124if (!ctx->opts->vulkan_environment) {1125for (unsigned i = 0; i < count; ++i) {1126const struct dxil_value *handle =1127emit_createhandle_call_const_index(ctx, DXIL_RESOURCE_CLASS_SAMPLER,1128id, binding + i, false);1129if (!handle)1130return false;11311132unsigned idx = var->data.binding + i;1133ctx->sampler_handles[idx] = handle;1134}1135}11361137return true;1138}11391140static const struct dxil_mdnode *1141emit_gs_state(struct ntd_context *ctx)1142{1143const struct dxil_mdnode *gs_state_nodes[5];1144const nir_shader *s = ctx->shader;11451146gs_state_nodes[0] = dxil_get_metadata_int32(&ctx->mod, dxil_get_input_primitive(s->info.gs.input_primitive));1147gs_state_nodes[1] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.vertices_out);1148gs_state_nodes[2] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.active_stream_mask);1149gs_state_nodes[3] = dxil_get_metadata_int32(&ctx->mod, dxil_get_primitive_topology(s->info.gs.output_primitive));1150gs_state_nodes[4] = dxil_get_metadata_int32(&ctx->mod, s->info.gs.invocations);11511152for (unsigned i = 0; i < ARRAY_SIZE(gs_state_nodes); ++i) {1153if (!gs_state_nodes[i])1154return NULL;1155}11561157return dxil_get_metadata_node(&ctx->mod, gs_state_nodes, ARRAY_SIZE(gs_state_nodes));1158}11591160static const struct dxil_mdnode *1161emit_threads(struct ntd_context *ctx)1162{1163const nir_shader *s = ctx->shader;1164const struct dxil_mdnode *threads_x = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[0], 1));1165const struct dxil_mdnode *threads_y = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[1], 1));1166const struct dxil_mdnode *threads_z = dxil_get_metadata_int32(&ctx->mod, MAX2(s->info.workgroup_size[2], 1));1167if (!threads_x || !threads_y || !threads_z)1168return false;11691170const struct dxil_mdnode *threads_nodes[] = { threads_x, threads_y, threads_z };1171return dxil_get_metadata_node(&ctx->mod, threads_nodes, ARRAY_SIZE(threads_nodes));1172}11731174static int64_t1175get_module_flags(struct ntd_context *ctx)1176{1177/* See the DXIL documentation for the definition of these flags:1178*1179* https://github.com/Microsoft/DirectXShaderCompiler/blob/master/docs/DXIL.rst#shader-flags1180*/11811182uint64_t flags = 0;1183if (ctx->mod.feats.doubles)1184flags |= (1 << 2);1185if (ctx->mod.raw_and_structured_buffers)1186flags |= (1 << 4);1187if (ctx->mod.feats.min_precision)1188flags |= (1 << 5);1189if (ctx->mod.feats.dx11_1_double_extensions)1190flags |= (1 << 6);1191if (ctx->mod.feats.inner_coverage)1192flags |= (1 << 10);1193if (ctx->mod.feats.typed_uav_load_additional_formats)1194flags |= (1 << 13);1195if (ctx->mod.feats.use_64uavs)1196flags |= (1 << 15);1197if (ctx->mod.feats.cs_4x_raw_sb)1198flags |= (1 << 17);1199if (ctx->mod.feats.wave_ops)1200flags |= (1 << 19);1201if (ctx->mod.feats.int64_ops)1202flags |= (1 << 20);1203if (ctx->mod.feats.stencil_ref)1204flags |= (1 << 11);1205if (ctx->mod.feats.native_low_precision)1206flags |= (1 << 23) | (1 << 5);12071208if (ctx->opts->disable_math_refactoring)1209flags |= (1 << 1);12101211return flags;1212}12131214static const struct dxil_mdnode *1215emit_entrypoint(struct ntd_context *ctx,1216const struct dxil_func *func, const char *name,1217const struct dxil_mdnode *signatures,1218const struct dxil_mdnode *resources,1219const struct dxil_mdnode *shader_props)1220{1221const struct dxil_mdnode *func_md = dxil_get_metadata_func(&ctx->mod, func);1222const struct dxil_mdnode *name_md = dxil_get_metadata_string(&ctx->mod, name);1223const struct dxil_mdnode *nodes[] = {1224func_md,1225name_md,1226signatures,1227resources,1228shader_props1229};1230return dxil_get_metadata_node(&ctx->mod, nodes,1231ARRAY_SIZE(nodes));1232}12331234static const struct dxil_mdnode *1235emit_resources(struct ntd_context *ctx)1236{1237bool emit_resources = false;1238const struct dxil_mdnode *resources_nodes[] = {1239NULL, NULL, NULL, NULL1240};12411242#define ARRAY_AND_SIZE(arr) arr.data, util_dynarray_num_elements(&arr, const struct dxil_mdnode *)12431244if (ctx->srv_metadata_nodes.size) {1245resources_nodes[0] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->srv_metadata_nodes));1246emit_resources = true;1247}12481249if (ctx->uav_metadata_nodes.size) {1250resources_nodes[1] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->uav_metadata_nodes));1251emit_resources = true;1252}12531254if (ctx->cbv_metadata_nodes.size) {1255resources_nodes[2] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->cbv_metadata_nodes));1256emit_resources = true;1257}12581259if (ctx->sampler_metadata_nodes.size) {1260resources_nodes[3] = dxil_get_metadata_node(&ctx->mod, ARRAY_AND_SIZE(ctx->sampler_metadata_nodes));1261emit_resources = true;1262}12631264#undef ARRAY_AND_SIZE12651266return emit_resources ?1267dxil_get_metadata_node(&ctx->mod, resources_nodes, ARRAY_SIZE(resources_nodes)): NULL;1268}12691270static boolean1271emit_tag(struct ntd_context *ctx, enum dxil_shader_tag tag,1272const struct dxil_mdnode *value_node)1273{1274const struct dxil_mdnode *tag_node = dxil_get_metadata_int32(&ctx->mod, tag);1275if (!tag_node || !value_node)1276return false;1277assert(ctx->num_shader_property_nodes <= ARRAY_SIZE(ctx->shader_property_nodes) - 2);1278ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = tag_node;1279ctx->shader_property_nodes[ctx->num_shader_property_nodes++] = value_node;12801281return true;1282}12831284static bool1285emit_metadata(struct ntd_context *ctx)1286{1287unsigned dxilMinor = ctx->mod.minor_version;1288if (!emit_llvm_ident(&ctx->mod) ||1289!emit_named_version(&ctx->mod, "dx.version", 1, dxilMinor) ||1290!emit_named_version(&ctx->mod, "dx.valver", 1, 4) ||1291!emit_dx_shader_model(&ctx->mod))1292return false;12931294const struct dxil_type *void_type = dxil_module_get_void_type(&ctx->mod);1295const struct dxil_type *main_func_type = dxil_module_add_function_type(&ctx->mod, void_type, NULL, 0);1296const struct dxil_func *main_func = dxil_add_function_def(&ctx->mod, "main", main_func_type);1297if (!main_func)1298return false;12991300const struct dxil_mdnode *resources_node = emit_resources(ctx);13011302const struct dxil_mdnode *main_entrypoint = dxil_get_metadata_func(&ctx->mod, main_func);1303const struct dxil_mdnode *node27 = dxil_get_metadata_node(&ctx->mod, NULL, 0);13041305const struct dxil_mdnode *node4 = dxil_get_metadata_int32(&ctx->mod, 0);1306const struct dxil_mdnode *nodes_4_27_27[] = {1307node4, node27, node271308};1309const struct dxil_mdnode *node28 = dxil_get_metadata_node(&ctx->mod, nodes_4_27_27,1310ARRAY_SIZE(nodes_4_27_27));13111312const struct dxil_mdnode *node29 = dxil_get_metadata_node(&ctx->mod, &node28, 1);13131314const struct dxil_mdnode *node3 = dxil_get_metadata_int32(&ctx->mod, 1);1315const struct dxil_mdnode *main_type_annotation_nodes[] = {1316node3, main_entrypoint, node291317};1318const struct dxil_mdnode *main_type_annotation = dxil_get_metadata_node(&ctx->mod, main_type_annotation_nodes,1319ARRAY_SIZE(main_type_annotation_nodes));13201321if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {1322if (!emit_tag(ctx, DXIL_SHADER_TAG_GS_STATE, emit_gs_state(ctx)))1323return false;1324} else if (ctx->mod.shader_kind == DXIL_COMPUTE_SHADER) {1325if (!emit_tag(ctx, DXIL_SHADER_TAG_NUM_THREADS, emit_threads(ctx)))1326return false;1327}13281329uint64_t flags = get_module_flags(ctx);1330if (flags != 0) {1331if (!emit_tag(ctx, DXIL_SHADER_TAG_FLAGS, dxil_get_metadata_int64(&ctx->mod, flags)))1332return false;1333}1334const struct dxil_mdnode *shader_properties = NULL;1335if (ctx->num_shader_property_nodes > 0) {1336shader_properties = dxil_get_metadata_node(&ctx->mod, ctx->shader_property_nodes,1337ctx->num_shader_property_nodes);1338if (!shader_properties)1339return false;1340}13411342const struct dxil_mdnode *signatures = get_signatures(&ctx->mod, ctx->shader,1343ctx->opts->vulkan_environment);13441345const struct dxil_mdnode *dx_entry_point = emit_entrypoint(ctx, main_func,1346"main", signatures, resources_node, shader_properties);1347if (!dx_entry_point)1348return false;13491350if (resources_node) {1351const struct dxil_mdnode *dx_resources = resources_node;1352dxil_add_metadata_named_node(&ctx->mod, "dx.resources",1353&dx_resources, 1);1354}13551356const struct dxil_mdnode *dx_type_annotations[] = { main_type_annotation };1357return dxil_add_metadata_named_node(&ctx->mod, "dx.typeAnnotations",1358dx_type_annotations,1359ARRAY_SIZE(dx_type_annotations)) &&1360dxil_add_metadata_named_node(&ctx->mod, "dx.entryPoints",1361&dx_entry_point, 1);1362}13631364static const struct dxil_value *1365bitcast_to_int(struct ntd_context *ctx, unsigned bit_size,1366const struct dxil_value *value)1367{1368const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod, bit_size);1369if (!type)1370return NULL;13711372return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);1373}13741375static const struct dxil_value *1376bitcast_to_float(struct ntd_context *ctx, unsigned bit_size,1377const struct dxil_value *value)1378{1379const struct dxil_type *type = dxil_module_get_float_type(&ctx->mod, bit_size);1380if (!type)1381return NULL;13821383return dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, type, value);1384}13851386static void1387store_ssa_def(struct ntd_context *ctx, nir_ssa_def *ssa, unsigned chan,1388const struct dxil_value *value)1389{1390assert(ssa->index < ctx->num_defs);1391assert(chan < ssa->num_components);1392/* We pre-defined the dest value because of a phi node, so bitcast while storing if the1393* base type differs */1394if (ctx->defs[ssa->index].chans[chan]) {1395const struct dxil_type *expect_type = dxil_value_get_type(ctx->defs[ssa->index].chans[chan]);1396const struct dxil_type *value_type = dxil_value_get_type(value);1397if (dxil_type_to_nir_type(expect_type) != dxil_type_to_nir_type(value_type))1398value = dxil_emit_cast(&ctx->mod, DXIL_CAST_BITCAST, expect_type, value);1399}1400ctx->defs[ssa->index].chans[chan] = value;1401}14021403static void1404store_dest_value(struct ntd_context *ctx, nir_dest *dest, unsigned chan,1405const struct dxil_value *value)1406{1407assert(dest->is_ssa);1408assert(value);1409store_ssa_def(ctx, &dest->ssa, chan, value);1410}14111412static void1413store_dest(struct ntd_context *ctx, nir_dest *dest, unsigned chan,1414const struct dxil_value *value, nir_alu_type type)1415{1416switch (nir_alu_type_get_base_type(type)) {1417case nir_type_float:1418if (nir_dest_bit_size(*dest) == 64)1419ctx->mod.feats.doubles = true;1420FALLTHROUGH;1421case nir_type_uint:1422case nir_type_int:1423if (nir_dest_bit_size(*dest) == 16)1424ctx->mod.feats.native_low_precision = true;1425if (nir_dest_bit_size(*dest) == 64)1426ctx->mod.feats.int64_ops = true;1427FALLTHROUGH;1428case nir_type_bool:1429store_dest_value(ctx, dest, chan, value);1430break;1431default:1432unreachable("unexpected nir_alu_type");1433}1434}14351436static void1437store_alu_dest(struct ntd_context *ctx, nir_alu_instr *alu, unsigned chan,1438const struct dxil_value *value)1439{1440assert(!alu->dest.saturate);1441store_dest(ctx, &alu->dest.dest, chan, value,1442nir_op_infos[alu->op].output_type);1443}14441445static const struct dxil_value *1446get_src_ssa(struct ntd_context *ctx, const nir_ssa_def *ssa, unsigned chan)1447{1448assert(ssa->index < ctx->num_defs);1449assert(chan < ssa->num_components);1450assert(ctx->defs[ssa->index].chans[chan]);1451return ctx->defs[ssa->index].chans[chan];1452}14531454static const struct dxil_value *1455get_src(struct ntd_context *ctx, nir_src *src, unsigned chan,1456nir_alu_type type)1457{1458assert(src->is_ssa);1459const struct dxil_value *value = get_src_ssa(ctx, src->ssa, chan);14601461const int bit_size = nir_src_bit_size(*src);14621463switch (nir_alu_type_get_base_type(type)) {1464case nir_type_int:1465case nir_type_uint: {1466assert(bit_size != 64 || ctx->mod.feats.int64_ops);1467const struct dxil_type *expect_type = dxil_module_get_int_type(&ctx->mod, bit_size);1468/* nohing to do */1469if (dxil_value_type_equal_to(value, expect_type))1470return value;1471assert(dxil_value_type_bitsize_equal_to(value, bit_size));1472return bitcast_to_int(ctx, bit_size, value);1473}14741475case nir_type_float:1476assert(nir_src_bit_size(*src) >= 16);1477assert(nir_src_bit_size(*src) != 64 || (ctx->mod.feats.doubles &&1478ctx->mod.feats.int64_ops));1479if (dxil_value_type_equal_to(value, dxil_module_get_float_type(&ctx->mod, bit_size)))1480return value;1481assert(dxil_value_type_bitsize_equal_to(value, bit_size));1482return bitcast_to_float(ctx, bit_size, value);14831484case nir_type_bool:1485if (!dxil_value_type_bitsize_equal_to(value, 1)) {1486return dxil_emit_cast(&ctx->mod, DXIL_CAST_TRUNC,1487dxil_module_get_int_type(&ctx->mod, 1), value);1488}1489return value;14901491default:1492unreachable("unexpected nir_alu_type");1493}1494}14951496static const struct dxil_type *1497get_alu_src_type(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)1498{1499assert(!alu->src[src].abs);1500assert(!alu->src[src].negate);1501nir_ssa_def *ssa_src = alu->src[src].src.ssa;1502unsigned chan = alu->src[src].swizzle[0];1503const struct dxil_value *value = get_src_ssa(ctx, ssa_src, chan);1504return dxil_value_get_type(value);1505}15061507static const struct dxil_value *1508get_alu_src(struct ntd_context *ctx, nir_alu_instr *alu, unsigned src)1509{1510assert(!alu->src[src].abs);1511assert(!alu->src[src].negate);15121513unsigned chan = alu->src[src].swizzle[0];1514return get_src(ctx, &alu->src[src].src, chan,1515nir_op_infos[alu->op].input_types[src]);1516}15171518static bool1519emit_binop(struct ntd_context *ctx, nir_alu_instr *alu,1520enum dxil_bin_opcode opcode,1521const struct dxil_value *op0, const struct dxil_value *op1)1522{1523bool is_float_op = nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) == nir_type_float;15241525enum dxil_opt_flags flags = 0;1526if (is_float_op && !alu->exact)1527flags |= DXIL_UNSAFE_ALGEBRA;15281529const struct dxil_value *v = dxil_emit_binop(&ctx->mod, opcode, op0, op1, flags);1530if (!v)1531return false;1532store_alu_dest(ctx, alu, 0, v);1533return true;1534}15351536static bool1537emit_shift(struct ntd_context *ctx, nir_alu_instr *alu,1538enum dxil_bin_opcode opcode,1539const struct dxil_value *op0, const struct dxil_value *op1)1540{1541unsigned op0_bit_size = nir_src_bit_size(alu->src[0].src);1542unsigned op1_bit_size = nir_src_bit_size(alu->src[1].src);1543if (op0_bit_size != op1_bit_size) {1544const struct dxil_type *type =1545dxil_module_get_int_type(&ctx->mod, op0_bit_size);1546enum dxil_cast_opcode cast_op =1547op1_bit_size < op0_bit_size ? DXIL_CAST_ZEXT : DXIL_CAST_TRUNC;1548op1 = dxil_emit_cast(&ctx->mod, cast_op, type, op1);1549}15501551const struct dxil_value *v =1552dxil_emit_binop(&ctx->mod, opcode, op0, op1, 0);1553if (!v)1554return false;1555store_alu_dest(ctx, alu, 0, v);1556return true;1557}15581559static bool1560emit_cmp(struct ntd_context *ctx, nir_alu_instr *alu,1561enum dxil_cmp_pred pred,1562const struct dxil_value *op0, const struct dxil_value *op1)1563{1564const struct dxil_value *v = dxil_emit_cmp(&ctx->mod, pred, op0, op1);1565if (!v)1566return false;1567store_alu_dest(ctx, alu, 0, v);1568return true;1569}15701571static enum dxil_cast_opcode1572get_cast_op(nir_alu_instr *alu)1573{1574unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);1575unsigned src_bits = nir_src_bit_size(alu->src[0].src);15761577switch (alu->op) {1578/* bool -> int */1579case nir_op_b2i16:1580case nir_op_b2i32:1581case nir_op_b2i64:1582return DXIL_CAST_ZEXT;15831584/* float -> float */1585case nir_op_f2f16_rtz:1586case nir_op_f2f32:1587case nir_op_f2f64:1588assert(dst_bits != src_bits);1589if (dst_bits < src_bits)1590return DXIL_CAST_FPTRUNC;1591else1592return DXIL_CAST_FPEXT;15931594/* int -> int */1595case nir_op_i2i16:1596case nir_op_i2i32:1597case nir_op_i2i64:1598assert(dst_bits != src_bits);1599if (dst_bits < src_bits)1600return DXIL_CAST_TRUNC;1601else1602return DXIL_CAST_SEXT;16031604/* uint -> uint */1605case nir_op_u2u16:1606case nir_op_u2u32:1607case nir_op_u2u64:1608assert(dst_bits != src_bits);1609if (dst_bits < src_bits)1610return DXIL_CAST_TRUNC;1611else1612return DXIL_CAST_ZEXT;16131614/* float -> int */1615case nir_op_f2i16:1616case nir_op_f2i32:1617case nir_op_f2i64:1618return DXIL_CAST_FPTOSI;16191620/* float -> uint */1621case nir_op_f2u16:1622case nir_op_f2u32:1623case nir_op_f2u64:1624return DXIL_CAST_FPTOUI;16251626/* int -> float */1627case nir_op_i2f16:1628case nir_op_i2f32:1629case nir_op_i2f64:1630return DXIL_CAST_SITOFP;16311632/* uint -> float */1633case nir_op_u2f16:1634case nir_op_u2f32:1635case nir_op_u2f64:1636return DXIL_CAST_UITOFP;16371638default:1639unreachable("unexpected cast op");1640}1641}16421643static const struct dxil_type *1644get_cast_dest_type(struct ntd_context *ctx, nir_alu_instr *alu)1645{1646unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);1647switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type)) {1648case nir_type_bool:1649assert(dst_bits == 1);1650FALLTHROUGH;1651case nir_type_int:1652case nir_type_uint:1653return dxil_module_get_int_type(&ctx->mod, dst_bits);16541655case nir_type_float:1656return dxil_module_get_float_type(&ctx->mod, dst_bits);16571658default:1659unreachable("unknown nir_alu_type");1660}1661}16621663static bool1664is_double(nir_alu_type alu_type, unsigned bit_size)1665{1666return nir_alu_type_get_base_type(alu_type) == nir_type_float &&1667bit_size == 64;1668}16691670static bool1671emit_cast(struct ntd_context *ctx, nir_alu_instr *alu,1672const struct dxil_value *value)1673{1674enum dxil_cast_opcode opcode = get_cast_op(alu);1675const struct dxil_type *type = get_cast_dest_type(ctx, alu);1676if (!type)1677return false;16781679const nir_op_info *info = &nir_op_infos[alu->op];1680switch (opcode) {1681case DXIL_CAST_UITOFP:1682case DXIL_CAST_SITOFP:1683if (is_double(info->output_type, nir_dest_bit_size(alu->dest.dest)))1684ctx->mod.feats.dx11_1_double_extensions = true;1685break;1686case DXIL_CAST_FPTOUI:1687case DXIL_CAST_FPTOSI:1688if (is_double(info->input_types[0], nir_src_bit_size(alu->src[0].src)))1689ctx->mod.feats.dx11_1_double_extensions = true;1690break;1691default:1692break;1693}16941695const struct dxil_value *v = dxil_emit_cast(&ctx->mod, opcode, type,1696value);1697if (!v)1698return false;1699store_alu_dest(ctx, alu, 0, v);1700return true;1701}17021703static enum overload_type1704get_overload(nir_alu_type alu_type, unsigned bit_size)1705{1706switch (nir_alu_type_get_base_type(alu_type)) {1707case nir_type_int:1708case nir_type_uint:1709switch (bit_size) {1710case 16: return DXIL_I16;1711case 32: return DXIL_I32;1712case 64: return DXIL_I64;1713default:1714unreachable("unexpected bit_size");1715}1716case nir_type_float:1717switch (bit_size) {1718case 16: return DXIL_F16;1719case 32: return DXIL_F32;1720case 64: return DXIL_F64;1721default:1722unreachable("unexpected bit_size");1723}1724default:1725unreachable("unexpected output type");1726}1727}17281729static bool1730emit_unary_intin(struct ntd_context *ctx, nir_alu_instr *alu,1731enum dxil_intr intr, const struct dxil_value *op)1732{1733const nir_op_info *info = &nir_op_infos[alu->op];1734unsigned src_bits = nir_src_bit_size(alu->src[0].src);1735enum overload_type overload = get_overload(info->input_types[0], src_bits);17361737const struct dxil_value *v = emit_unary_call(ctx, overload, intr, op);1738if (!v)1739return false;1740store_alu_dest(ctx, alu, 0, v);1741return true;1742}17431744static bool1745emit_binary_intin(struct ntd_context *ctx, nir_alu_instr *alu,1746enum dxil_intr intr,1747const struct dxil_value *op0, const struct dxil_value *op1)1748{1749const nir_op_info *info = &nir_op_infos[alu->op];1750assert(info->output_type == info->input_types[0]);1751assert(info->output_type == info->input_types[1]);1752unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);1753assert(nir_src_bit_size(alu->src[0].src) == dst_bits);1754assert(nir_src_bit_size(alu->src[1].src) == dst_bits);1755enum overload_type overload = get_overload(info->output_type, dst_bits);17561757const struct dxil_value *v = emit_binary_call(ctx, overload, intr,1758op0, op1);1759if (!v)1760return false;1761store_alu_dest(ctx, alu, 0, v);1762return true;1763}17641765static bool1766emit_tertiary_intin(struct ntd_context *ctx, nir_alu_instr *alu,1767enum dxil_intr intr,1768const struct dxil_value *op0,1769const struct dxil_value *op1,1770const struct dxil_value *op2)1771{1772const nir_op_info *info = &nir_op_infos[alu->op];1773assert(info->output_type == info->input_types[0]);1774assert(info->output_type == info->input_types[1]);1775assert(info->output_type == info->input_types[2]);17761777unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);1778assert(nir_src_bit_size(alu->src[0].src) == dst_bits);1779assert(nir_src_bit_size(alu->src[1].src) == dst_bits);1780assert(nir_src_bit_size(alu->src[2].src) == dst_bits);17811782enum overload_type overload = get_overload(info->output_type, dst_bits);17831784const struct dxil_value *v = emit_tertiary_call(ctx, overload, intr,1785op0, op1, op2);1786if (!v)1787return false;1788store_alu_dest(ctx, alu, 0, v);1789return true;1790}17911792static bool emit_select(struct ntd_context *ctx, nir_alu_instr *alu,1793const struct dxil_value *sel,1794const struct dxil_value *val_true,1795const struct dxil_value *val_false)1796{1797assert(sel);1798assert(val_true);1799assert(val_false);18001801const struct dxil_value *v = dxil_emit_select(&ctx->mod, sel, val_true, val_false);1802if (!v)1803return false;18041805store_alu_dest(ctx, alu, 0, v);1806return true;1807}18081809static bool1810emit_b2f16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)1811{1812assert(val);18131814struct dxil_module *m = &ctx->mod;18151816const struct dxil_value *c1 = dxil_module_get_float16_const(m, 0x3C00);1817const struct dxil_value *c0 = dxil_module_get_float16_const(m, 0);18181819if (!c0 || !c1)1820return false;18211822return emit_select(ctx, alu, val, c1, c0);1823}18241825static bool1826emit_b2f32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)1827{1828assert(val);18291830struct dxil_module *m = &ctx->mod;18311832const struct dxil_value *c1 = dxil_module_get_float_const(m, 1.0f);1833const struct dxil_value *c0 = dxil_module_get_float_const(m, 0.0f);18341835if (!c0 || !c1)1836return false;18371838return emit_select(ctx, alu, val, c1, c0);1839}18401841static bool1842emit_f2b32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)1843{1844assert(val);18451846const struct dxil_value *zero = dxil_module_get_float_const(&ctx->mod, 0.0f);1847return emit_cmp(ctx, alu, DXIL_FCMP_UNE, val, zero);1848}18491850static bool1851emit_ufind_msb(struct ntd_context *ctx, nir_alu_instr *alu,1852const struct dxil_value *val)1853{1854const nir_op_info *info = &nir_op_infos[alu->op];1855unsigned dst_bits = nir_dest_bit_size(alu->dest.dest);1856unsigned src_bits = nir_src_bit_size(alu->src[0].src);1857enum overload_type overload = get_overload(info->output_type, src_bits);18581859const struct dxil_value *v = emit_unary_call(ctx, overload,1860DXIL_INTR_FIRSTBIT_HI, val);1861if (!v)1862return false;18631864const struct dxil_value *size = dxil_module_get_int32_const(&ctx->mod,1865src_bits - 1);1866const struct dxil_value *zero = dxil_module_get_int_const(&ctx->mod, 0,1867src_bits);1868if (!size || !zero)1869return false;18701871v = dxil_emit_binop(&ctx->mod, DXIL_BINOP_SUB, size, v, 0);1872const struct dxil_value *cnd = dxil_emit_cmp(&ctx->mod, DXIL_ICMP_NE,1873val, zero);1874if (!v || !cnd)1875return false;18761877const struct dxil_value *minus_one =1878dxil_module_get_int_const(&ctx->mod, -1, dst_bits);1879if (!minus_one)1880return false;18811882v = dxil_emit_select(&ctx->mod, cnd, v, minus_one);1883if (!v)1884return false;18851886store_alu_dest(ctx, alu, 0, v);1887return true;1888}18891890static bool1891emit_f16tof32(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)1892{1893const struct dxil_func *func = dxil_get_function(&ctx->mod,1894"dx.op.legacyF16ToF32",1895DXIL_NONE);1896if (!func)1897return false;18981899const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F16TOF32);1900if (!opcode)1901return false;19021903const struct dxil_value *args[] = {1904opcode,1905val1906};19071908const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));1909if (!v)1910return false;1911store_alu_dest(ctx, alu, 0, v);1912return true;1913}19141915static bool1916emit_f32tof16(struct ntd_context *ctx, nir_alu_instr *alu, const struct dxil_value *val)1917{1918const struct dxil_func *func = dxil_get_function(&ctx->mod,1919"dx.op.legacyF32ToF16",1920DXIL_NONE);1921if (!func)1922return false;19231924const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LEGACY_F32TOF16);1925if (!opcode)1926return false;19271928const struct dxil_value *args[] = {1929opcode,1930val1931};19321933const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));1934if (!v)1935return false;1936store_alu_dest(ctx, alu, 0, v);1937return true;1938}19391940static bool1941emit_vec(struct ntd_context *ctx, nir_alu_instr *alu, unsigned num_inputs)1942{1943const struct dxil_type *type = get_alu_src_type(ctx, alu, 0);1944nir_alu_type t = dxil_type_to_nir_type(type);19451946for (unsigned i = 0; i < num_inputs; i++)1947store_alu_dest(ctx, alu, i, get_src(ctx, &alu->src[i].src,1948alu->src[i].swizzle[0], t));1949return true;1950}19511952static bool1953emit_make_double(struct ntd_context *ctx, nir_alu_instr *alu)1954{1955const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.makeDouble", DXIL_F64);1956if (!func)1957return false;19581959const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_MAKE_DOUBLE);1960if (!opcode)1961return false;19621963const struct dxil_value *args[3] = {1964opcode,1965get_src(ctx, &alu->src[0].src, 0, nir_type_uint32),1966get_src(ctx, &alu->src[0].src, 1, nir_type_uint32),1967};1968if (!args[1] || !args[2])1969return false;19701971const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));1972if (!v)1973return false;1974store_dest(ctx, &alu->dest.dest, 0, v, nir_type_float64);1975return true;1976}19771978static bool1979emit_split_double(struct ntd_context *ctx, nir_alu_instr *alu)1980{1981const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.splitDouble", DXIL_F64);1982if (!func)1983return false;19841985const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SPLIT_DOUBLE);1986if (!opcode)1987return false;19881989const struct dxil_value *args[] = {1990opcode,1991get_src(ctx, &alu->src[0].src, 0, nir_type_float64)1992};1993if (!args[1])1994return false;19951996const struct dxil_value *v = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));1997if (!v)1998return false;19992000const struct dxil_value *hi = dxil_emit_extractval(&ctx->mod, v, 0);2001const struct dxil_value *lo = dxil_emit_extractval(&ctx->mod, v, 1);2002if (!hi || !lo)2003return false;20042005store_dest_value(ctx, &alu->dest.dest, 0, hi);2006store_dest_value(ctx, &alu->dest.dest, 1, lo);2007return true;2008}20092010static bool2011emit_alu(struct ntd_context *ctx, nir_alu_instr *alu)2012{2013/* handle vec-instructions first; they are the only ones that produce2014* vector results.2015*/2016switch (alu->op) {2017case nir_op_vec2:2018case nir_op_vec3:2019case nir_op_vec4:2020case nir_op_vec8:2021case nir_op_vec16:2022return emit_vec(ctx, alu, nir_op_infos[alu->op].num_inputs);2023case nir_op_mov: {2024assert(nir_dest_num_components(alu->dest.dest) == 1);2025store_ssa_def(ctx, &alu->dest.dest.ssa, 0, get_src_ssa(ctx,2026alu->src->src.ssa, alu->src->swizzle[0]));2027return true;2028}2029case nir_op_pack_double_2x32_dxil:2030return emit_make_double(ctx, alu);2031case nir_op_unpack_double_2x32_dxil:2032return emit_split_double(ctx, alu);2033default:2034/* silence warnings */2035;2036}20372038/* other ops should be scalar */2039assert(alu->dest.write_mask == 1);2040const struct dxil_value *src[4];2041assert(nir_op_infos[alu->op].num_inputs <= 4);2042for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++)2043src[i] = get_alu_src(ctx, alu, i);20442045switch (alu->op) {2046case nir_op_iadd:2047case nir_op_fadd: return emit_binop(ctx, alu, DXIL_BINOP_ADD, src[0], src[1]);20482049case nir_op_isub:2050case nir_op_fsub: return emit_binop(ctx, alu, DXIL_BINOP_SUB, src[0], src[1]);20512052case nir_op_imul:2053case nir_op_fmul: return emit_binop(ctx, alu, DXIL_BINOP_MUL, src[0], src[1]);20542055case nir_op_idiv:2056case nir_op_fdiv: return emit_binop(ctx, alu, DXIL_BINOP_SDIV, src[0], src[1]);20572058case nir_op_udiv: return emit_binop(ctx, alu, DXIL_BINOP_UDIV, src[0], src[1]);2059case nir_op_irem: return emit_binop(ctx, alu, DXIL_BINOP_SREM, src[0], src[1]);2060case nir_op_imod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);2061case nir_op_umod: return emit_binop(ctx, alu, DXIL_BINOP_UREM, src[0], src[1]);2062case nir_op_ishl: return emit_shift(ctx, alu, DXIL_BINOP_SHL, src[0], src[1]);2063case nir_op_ishr: return emit_shift(ctx, alu, DXIL_BINOP_ASHR, src[0], src[1]);2064case nir_op_ushr: return emit_shift(ctx, alu, DXIL_BINOP_LSHR, src[0], src[1]);2065case nir_op_iand: return emit_binop(ctx, alu, DXIL_BINOP_AND, src[0], src[1]);2066case nir_op_ior: return emit_binop(ctx, alu, DXIL_BINOP_OR, src[0], src[1]);2067case nir_op_ixor: return emit_binop(ctx, alu, DXIL_BINOP_XOR, src[0], src[1]);2068case nir_op_ieq: return emit_cmp(ctx, alu, DXIL_ICMP_EQ, src[0], src[1]);2069case nir_op_ine: return emit_cmp(ctx, alu, DXIL_ICMP_NE, src[0], src[1]);2070case nir_op_ige: return emit_cmp(ctx, alu, DXIL_ICMP_SGE, src[0], src[1]);2071case nir_op_uge: return emit_cmp(ctx, alu, DXIL_ICMP_UGE, src[0], src[1]);2072case nir_op_ilt: return emit_cmp(ctx, alu, DXIL_ICMP_SLT, src[0], src[1]);2073case nir_op_ult: return emit_cmp(ctx, alu, DXIL_ICMP_ULT, src[0], src[1]);2074case nir_op_feq: return emit_cmp(ctx, alu, DXIL_FCMP_OEQ, src[0], src[1]);2075case nir_op_fneu: return emit_cmp(ctx, alu, DXIL_FCMP_UNE, src[0], src[1]);2076case nir_op_flt: return emit_cmp(ctx, alu, DXIL_FCMP_OLT, src[0], src[1]);2077case nir_op_fge: return emit_cmp(ctx, alu, DXIL_FCMP_OGE, src[0], src[1]);2078case nir_op_bcsel: return emit_select(ctx, alu, src[0], src[1], src[2]);2079case nir_op_ftrunc: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_Z, src[0]);2080case nir_op_fabs: return emit_unary_intin(ctx, alu, DXIL_INTR_FABS, src[0]);2081case nir_op_fcos: return emit_unary_intin(ctx, alu, DXIL_INTR_FCOS, src[0]);2082case nir_op_fsin: return emit_unary_intin(ctx, alu, DXIL_INTR_FSIN, src[0]);2083case nir_op_fceil: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_PI, src[0]);2084case nir_op_fexp2: return emit_unary_intin(ctx, alu, DXIL_INTR_FEXP2, src[0]);2085case nir_op_flog2: return emit_unary_intin(ctx, alu, DXIL_INTR_FLOG2, src[0]);2086case nir_op_ffloor: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NI, src[0]);2087case nir_op_ffract: return emit_unary_intin(ctx, alu, DXIL_INTR_FRC, src[0]);2088case nir_op_fisnormal: return emit_unary_intin(ctx, alu, DXIL_INTR_ISNORMAL, src[0]);2089case nir_op_fisfinite: return emit_unary_intin(ctx, alu, DXIL_INTR_ISFINITE, src[0]);20902091case nir_op_fddx:2092case nir_op_fddx_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_COARSE, src[0]);2093case nir_op_fddx_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDX_FINE, src[0]);2094case nir_op_fddy:2095case nir_op_fddy_coarse: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_COARSE, src[0]);2096case nir_op_fddy_fine: return emit_unary_intin(ctx, alu, DXIL_INTR_DDY_FINE, src[0]);20972098case nir_op_fround_even: return emit_unary_intin(ctx, alu, DXIL_INTR_ROUND_NE, src[0]);2099case nir_op_frcp: {2100const struct dxil_value *one = dxil_module_get_float_const(&ctx->mod, 1.0f);2101return emit_binop(ctx, alu, DXIL_BINOP_SDIV, one, src[0]);2102}2103case nir_op_fsat: return emit_unary_intin(ctx, alu, DXIL_INTR_SATURATE, src[0]);2104case nir_op_bit_count: return emit_unary_intin(ctx, alu, DXIL_INTR_COUNTBITS, src[0]);2105case nir_op_ufind_msb: return emit_ufind_msb(ctx, alu, src[0]);2106case nir_op_imax: return emit_binary_intin(ctx, alu, DXIL_INTR_IMAX, src[0], src[1]);2107case nir_op_imin: return emit_binary_intin(ctx, alu, DXIL_INTR_IMIN, src[0], src[1]);2108case nir_op_umax: return emit_binary_intin(ctx, alu, DXIL_INTR_UMAX, src[0], src[1]);2109case nir_op_umin: return emit_binary_intin(ctx, alu, DXIL_INTR_UMIN, src[0], src[1]);2110case nir_op_frsq: return emit_unary_intin(ctx, alu, DXIL_INTR_RSQRT, src[0]);2111case nir_op_fsqrt: return emit_unary_intin(ctx, alu, DXIL_INTR_SQRT, src[0]);2112case nir_op_fmax: return emit_binary_intin(ctx, alu, DXIL_INTR_FMAX, src[0], src[1]);2113case nir_op_fmin: return emit_binary_intin(ctx, alu, DXIL_INTR_FMIN, src[0], src[1]);2114case nir_op_ffma: return emit_tertiary_intin(ctx, alu, DXIL_INTR_FMA, src[0], src[1], src[2]);21152116case nir_op_unpack_half_2x16_split_x: return emit_f16tof32(ctx, alu, src[0]);2117case nir_op_pack_half_2x16_split: return emit_f32tof16(ctx, alu, src[0]);21182119case nir_op_b2i16:2120case nir_op_i2i16:2121case nir_op_f2i16:2122case nir_op_f2u16:2123case nir_op_u2u16:2124case nir_op_u2f16:2125case nir_op_i2f16:2126case nir_op_f2f16_rtz:2127case nir_op_b2i32:2128case nir_op_f2f32:2129case nir_op_f2i32:2130case nir_op_f2u32:2131case nir_op_i2f32:2132case nir_op_i2i32:2133case nir_op_u2f32:2134case nir_op_u2u32:2135case nir_op_b2i64:2136case nir_op_f2f64:2137case nir_op_f2i64:2138case nir_op_f2u64:2139case nir_op_i2f64:2140case nir_op_i2i64:2141case nir_op_u2f64:2142case nir_op_u2u64:2143return emit_cast(ctx, alu, src[0]);21442145case nir_op_f2b32: return emit_f2b32(ctx, alu, src[0]);2146case nir_op_b2f16: return emit_b2f16(ctx, alu, src[0]);2147case nir_op_b2f32: return emit_b2f32(ctx, alu, src[0]);2148default:2149NIR_INSTR_UNSUPPORTED(&alu->instr);2150assert("Unimplemented ALU instruction");2151return false;2152}2153}21542155static const struct dxil_value *2156load_ubo(struct ntd_context *ctx, const struct dxil_value *handle,2157const struct dxil_value *offset, enum overload_type overload)2158{2159assert(handle && offset);21602161const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CBUFFER_LOAD_LEGACY);2162if (!opcode)2163return NULL;21642165const struct dxil_value *args[] = {2166opcode, handle, offset2167};21682169const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cbufferLoadLegacy", overload);2170if (!func)2171return NULL;2172return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));2173}21742175static bool2176emit_barrier(struct ntd_context *ctx, nir_intrinsic_instr *intr)2177{2178const struct dxil_value *opcode, *mode;2179const struct dxil_func *func;2180uint32_t flags = 0;21812182if (nir_intrinsic_execution_scope(intr) == NIR_SCOPE_WORKGROUP)2183flags |= DXIL_BARRIER_MODE_SYNC_THREAD_GROUP;21842185nir_variable_mode modes = nir_intrinsic_memory_modes(intr);2186nir_scope mem_scope = nir_intrinsic_memory_scope(intr);21872188/* Currently vtn uses uniform to indicate image memory, which DXIL considers global */2189if (modes & nir_var_uniform)2190modes |= nir_var_mem_global;21912192if (modes & (nir_var_mem_ssbo | nir_var_mem_global)) {2193if (mem_scope > NIR_SCOPE_WORKGROUP)2194flags |= DXIL_BARRIER_MODE_UAV_FENCE_GLOBAL;2195else2196flags |= DXIL_BARRIER_MODE_UAV_FENCE_THREAD_GROUP;2197}21982199if (modes & nir_var_mem_shared)2200flags |= DXIL_BARRIER_MODE_GROUPSHARED_MEM_FENCE;22012202func = dxil_get_function(&ctx->mod, "dx.op.barrier", DXIL_NONE);2203if (!func)2204return false;22052206opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_BARRIER);2207if (!opcode)2208return false;22092210mode = dxil_module_get_int32_const(&ctx->mod, flags);2211if (!mode)2212return false;22132214const struct dxil_value *args[] = { opcode, mode };22152216return dxil_emit_call_void(&ctx->mod, func,2217args, ARRAY_SIZE(args));2218}22192220static bool2221emit_load_global_invocation_id(struct ntd_context *ctx,2222nir_intrinsic_instr *intr)2223{2224assert(intr->dest.is_ssa);2225nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);22262227for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {2228if (comps & (1 << i)) {2229const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);2230if (!idx)2231return false;2232const struct dxil_value *globalid = emit_threadid_call(ctx, idx);22332234if (!globalid)2235return false;22362237store_dest_value(ctx, &intr->dest, i, globalid);2238}2239}2240return true;2241}22422243static bool2244emit_load_local_invocation_id(struct ntd_context *ctx,2245nir_intrinsic_instr *intr)2246{2247assert(intr->dest.is_ssa);2248nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);22492250for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {2251if (comps & (1 << i)) {2252const struct dxil_value2253*idx = dxil_module_get_int32_const(&ctx->mod, i);2254if (!idx)2255return false;2256const struct dxil_value2257*threadidingroup = emit_threadidingroup_call(ctx, idx);2258if (!threadidingroup)2259return false;2260store_dest_value(ctx, &intr->dest, i, threadidingroup);2261}2262}2263return true;2264}22652266static bool2267emit_load_local_workgroup_id(struct ntd_context *ctx,2268nir_intrinsic_instr *intr)2269{2270assert(intr->dest.is_ssa);2271nir_component_mask_t comps = nir_ssa_def_components_read(&intr->dest.ssa);22722273for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {2274if (comps & (1 << i)) {2275const struct dxil_value *idx = dxil_module_get_int32_const(&ctx->mod, i);2276if (!idx)2277return false;2278const struct dxil_value *groupid = emit_groupid_call(ctx, idx);2279if (!groupid)2280return false;2281store_dest_value(ctx, &intr->dest, i, groupid);2282}2283}2284return true;2285}22862287static bool2288emit_load_primitiveid(struct ntd_context *ctx,2289nir_intrinsic_instr *intr)2290{2291const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.primitiveID", DXIL_I32);2292if (!func)2293return false;22942295const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod,2296DXIL_INTR_PRIMITIVE_ID);2297if (!opcode)2298return false;22992300const struct dxil_value *args[] = {2301opcode2302};23032304const struct dxil_value *primid = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));2305store_dest_value(ctx, &intr->dest, 0, primid);23062307return true;2308}23092310static const struct dxil_value *2311get_int32_undef(struct dxil_module *m)2312{2313const struct dxil_type *int32_type =2314dxil_module_get_int_type(m, 32);2315if (!int32_type)2316return NULL;23172318return dxil_module_get_undef(m, int32_type);2319}23202321static const struct dxil_value *2322emit_gep_for_index(struct ntd_context *ctx, const nir_variable *var,2323const struct dxil_value *index)2324{2325assert(var->data.mode == nir_var_shader_temp);23262327struct hash_entry *he = _mesa_hash_table_search(ctx->consts, var);2328assert(he != NULL);2329const struct dxil_value *ptr = he->data;23302331const struct dxil_value *zero = dxil_module_get_int32_const(&ctx->mod, 0);2332if (!zero)2333return NULL;23342335const struct dxil_value *ops[] = { ptr, zero, index };2336return dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));2337}23382339static const struct dxil_value *2340get_ubo_ssbo_handle(struct ntd_context *ctx, nir_src *src, enum dxil_resource_class class, unsigned base_binding)2341{2342/* This source might be one of:2343* 1. Constant resource index - just look it up in precomputed handle arrays2344* If it's null in that array, create a handle, and store the result2345* 2. A handle from load_vulkan_descriptor - just get the stored SSA value2346* 3. Dynamic resource index - create a handle for it here2347*/2348assert(src->ssa->num_components == 1 && src->ssa->bit_size == 32);2349nir_const_value *const_block_index = nir_src_as_const_value(*src);2350const struct dxil_value **handle_entry = NULL;2351if (const_block_index) {2352assert(!ctx->opts->vulkan_environment);2353switch (class) {2354case DXIL_RESOURCE_CLASS_CBV:2355handle_entry = &ctx->cbv_handles[const_block_index->u32];2356break;2357case DXIL_RESOURCE_CLASS_UAV:2358handle_entry = &ctx->uav_handles[const_block_index->u32];2359break;2360case DXIL_RESOURCE_CLASS_SRV:2361handle_entry = &ctx->srv_handles[const_block_index->u32];2362break;2363default:2364unreachable("Unexpected resource class");2365}2366}23672368if (handle_entry && *handle_entry)2369return *handle_entry;23702371const struct dxil_value *value = get_src_ssa(ctx, src->ssa, 0);2372if (ctx->opts->vulkan_environment) {2373return value;2374}23752376const struct dxil_value *handle = emit_createhandle_call(ctx, class,2377get_resource_id(ctx, class, 0, base_binding), value, !const_block_index);2378if (handle_entry)2379*handle_entry = handle;23802381return handle;2382}23832384static bool2385emit_load_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)2386{2387const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);23882389nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));2390enum dxil_resource_class class = DXIL_RESOURCE_CLASS_UAV;2391if (var && var->data.access & ACCESS_NON_WRITEABLE)2392class = DXIL_RESOURCE_CLASS_SRV;23932394const struct dxil_value *handle = get_ubo_ssbo_handle(ctx, &intr->src[0], class, 0);2395const struct dxil_value *offset =2396get_src(ctx, &intr->src[1], 0, nir_type_uint);2397if (!int32_undef || !handle || !offset)2398return false;23992400assert(nir_src_bit_size(intr->src[0]) == 32);2401assert(nir_intrinsic_dest_components(intr) <= 4);24022403const struct dxil_value *coord[2] = {2404offset,2405int32_undef2406};24072408const struct dxil_value *load = emit_bufferload_call(ctx, handle, coord);2409if (!load)2410return false;24112412for (int i = 0; i < nir_intrinsic_dest_components(intr); i++) {2413const struct dxil_value *val =2414dxil_emit_extractval(&ctx->mod, load, i);2415if (!val)2416return false;2417store_dest_value(ctx, &intr->dest, i, val);2418}2419return true;2420}24212422static bool2423emit_store_ssbo(struct ntd_context *ctx, nir_intrinsic_instr *intr)2424{2425const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[1], DXIL_RESOURCE_CLASS_UAV, 0);2426const struct dxil_value *offset =2427get_src(ctx, &intr->src[2], 0, nir_type_uint);2428if (!handle || !offset)2429return false;24302431assert(nir_src_bit_size(intr->src[0]) == 32);2432unsigned num_components = nir_src_num_components(intr->src[0]);2433assert(num_components <= 4);2434const struct dxil_value *value[4];2435for (unsigned i = 0; i < num_components; ++i) {2436value[i] = get_src(ctx, &intr->src[0], i, nir_type_uint);2437if (!value[i])2438return false;2439}24402441const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);2442if (!int32_undef)2443return false;24442445const struct dxil_value *coord[2] = {2446offset,2447int32_undef2448};24492450for (int i = num_components; i < 4; ++i)2451value[i] = int32_undef;24522453const struct dxil_value *write_mask =2454dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);2455if (!write_mask)2456return false;24572458return emit_bufferstore_call(ctx, handle, coord, value, write_mask, DXIL_I32);2459}24602461static bool2462emit_store_ssbo_masked(struct ntd_context *ctx, nir_intrinsic_instr *intr)2463{2464const struct dxil_value *value =2465get_src(ctx, &intr->src[0], 0, nir_type_uint);2466const struct dxil_value *mask =2467get_src(ctx, &intr->src[1], 0, nir_type_uint);2468const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[2], DXIL_RESOURCE_CLASS_UAV, 0);2469const struct dxil_value *offset =2470get_src(ctx, &intr->src[3], 0, nir_type_uint);2471if (!value || !mask || !handle || !offset)2472return false;24732474const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);2475if (!int32_undef)2476return false;24772478const struct dxil_value *coord[3] = {2479offset, int32_undef, int32_undef2480};24812482return2483emit_atomic_binop(ctx, handle, DXIL_ATOMIC_AND, coord, mask) != NULL &&2484emit_atomic_binop(ctx, handle, DXIL_ATOMIC_OR, coord, value) != NULL;2485}24862487static bool2488emit_store_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)2489{2490const struct dxil_value *zero, *index;24912492/* All shared mem accesses should have been lowered to scalar 32bit2493* accesses.2494*/2495assert(nir_src_bit_size(intr->src[0]) == 32);2496assert(nir_src_num_components(intr->src[0]) == 1);24972498zero = dxil_module_get_int32_const(&ctx->mod, 0);2499if (!zero)2500return false;25012502if (intr->intrinsic == nir_intrinsic_store_shared_dxil)2503index = get_src(ctx, &intr->src[1], 0, nir_type_uint);2504else2505index = get_src(ctx, &intr->src[2], 0, nir_type_uint);2506if (!index)2507return false;25082509const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };2510const struct dxil_value *ptr, *value;25112512ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));2513if (!ptr)2514return false;25152516value = get_src(ctx, &intr->src[0], 0, nir_type_uint);25172518if (intr->intrinsic == nir_intrinsic_store_shared_dxil)2519return dxil_emit_store(&ctx->mod, value, ptr, 4, false);25202521const struct dxil_value *mask = get_src(ctx, &intr->src[1], 0, nir_type_uint);25222523if (!dxil_emit_atomicrmw(&ctx->mod, mask, ptr, DXIL_RMWOP_AND, false,2524DXIL_ATOMIC_ORDERING_ACQREL,2525DXIL_SYNC_SCOPE_CROSSTHREAD))2526return false;25272528if (!dxil_emit_atomicrmw(&ctx->mod, value, ptr, DXIL_RMWOP_OR, false,2529DXIL_ATOMIC_ORDERING_ACQREL,2530DXIL_SYNC_SCOPE_CROSSTHREAD))2531return false;25322533return true;2534}25352536static bool2537emit_store_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)2538{2539const struct dxil_value *zero, *index;25402541/* All scratch mem accesses should have been lowered to scalar 32bit2542* accesses.2543*/2544assert(nir_src_bit_size(intr->src[0]) == 32);2545assert(nir_src_num_components(intr->src[0]) == 1);25462547zero = dxil_module_get_int32_const(&ctx->mod, 0);2548if (!zero)2549return false;25502551index = get_src(ctx, &intr->src[1], 0, nir_type_uint);2552if (!index)2553return false;25542555const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };2556const struct dxil_value *ptr, *value;25572558ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));2559if (!ptr)2560return false;25612562value = get_src(ctx, &intr->src[0], 0, nir_type_uint);2563return dxil_emit_store(&ctx->mod, value, ptr, 4, false);2564}25652566static bool2567emit_load_ubo(struct ntd_context *ctx, nir_intrinsic_instr *intr)2568{2569const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, 0);2570if (!handle)2571return false;25722573const struct dxil_value *offset;2574nir_const_value *const_offset = nir_src_as_const_value(intr->src[1]);2575if (const_offset) {2576offset = dxil_module_get_int32_const(&ctx->mod, const_offset->i32 >> 4);2577} else {2578const struct dxil_value *offset_src = get_src(ctx, &intr->src[1], 0, nir_type_uint);2579const struct dxil_value *c4 = dxil_module_get_int32_const(&ctx->mod, 4);2580offset = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ASHR, offset_src, c4, 0);2581}25822583const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_F32);25842585if (!agg)2586return false;25872588for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {2589const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, agg, i);2590store_dest(ctx, &intr->dest, i, retval,2591nir_dest_bit_size(intr->dest) > 1 ? nir_type_float : nir_type_bool);2592}2593return true;2594}25952596static bool2597emit_load_ubo_dxil(struct ntd_context *ctx, nir_intrinsic_instr *intr)2598{2599assert(nir_dest_num_components(intr->dest) <= 4);2600assert(nir_dest_bit_size(intr->dest) == 32);26012602const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_CBV, 0);2603const struct dxil_value *offset =2604get_src(ctx, &intr->src[1], 0, nir_type_uint);26052606if (!handle || !offset)2607return false;26082609const struct dxil_value *agg = load_ubo(ctx, handle, offset, DXIL_I32);2610if (!agg)2611return false;26122613for (unsigned i = 0; i < nir_dest_num_components(intr->dest); i++)2614store_dest_value(ctx, &intr->dest, i,2615dxil_emit_extractval(&ctx->mod, agg, i));26162617return true;2618}26192620static bool2621emit_store_output(struct ntd_context *ctx, nir_intrinsic_instr *intr,2622nir_variable *output)2623{2624nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(output->type));2625enum overload_type overload = DXIL_F32;2626if (output->data.compact)2627out_type = nir_type_float;2628else2629overload = get_overload(out_type, glsl_get_bit_size(output->type));2630const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.storeOutput", overload);26312632if (!func)2633return false;26342635const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_STORE_OUTPUT);2636const struct dxil_value *output_id = dxil_module_get_int32_const(&ctx->mod, (int)output->data.driver_location);2637const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);26382639bool success = true;2640if (output->data.compact) {2641nir_deref_instr *array_deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);2642unsigned array_index = nir_src_as_uint(array_deref->arr.index);2643const struct dxil_value *col = dxil_module_get_int8_const(&ctx->mod, array_index);2644const struct dxil_value *value = get_src(ctx, &intr->src[1], 0, out_type);2645const struct dxil_value *args[] = {2646opcode, output_id, row, col, value2647};2648success = dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));2649} else {2650uint32_t writemask = nir_intrinsic_write_mask(intr);2651for (unsigned i = 0; i < nir_src_num_components(intr->src[1]) && success; ++i) {2652if (writemask & (1 << i)) {2653const struct dxil_value *col = dxil_module_get_int8_const(&ctx->mod, i);2654const struct dxil_value *value = get_src(ctx, &intr->src[1], i, out_type);2655const struct dxil_value *args[] = {2656opcode, output_id, row, col, value2657};2658success &= dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));2659}2660}2661}2662return success;2663}26642665static bool2666emit_store_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)2667{2668nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);2669nir_variable *var = nir_deref_instr_get_variable(deref);26702671switch (var->data.mode) {2672case nir_var_shader_out:2673return emit_store_output(ctx, intr, var);26742675default:2676unreachable("unsupported nir_variable_mode");2677}2678}26792680static bool2681emit_load_input_array(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var, nir_src *index)2682{2683assert(var);2684const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT);2685const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location);2686const struct dxil_value *vertex_id;2687const struct dxil_value *row;26882689if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {2690vertex_id = get_src(ctx, index, 0, nir_type_int);2691row = dxil_module_get_int32_const(&ctx->mod, 0);2692} else {2693const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);2694vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);2695row = get_src(ctx, index, 0, nir_type_int);2696}26972698nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(glsl_get_array_element(var->type)));2699enum overload_type overload = get_overload(out_type, glsl_get_bit_size(glsl_get_array_element(var->type)));27002701const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload);27022703if (!func)2704return false;27052706for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {2707const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i);27082709const struct dxil_value *args[] = {2710opcode, input_id, row, comp, vertex_id2711};27122713const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));2714if (!retval)2715return false;2716store_dest(ctx, &intr->dest, i, retval, out_type);2717}2718return true;2719}27202721static bool2722emit_load_compact_input_array(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var, nir_deref_instr *deref)2723{2724assert(var);2725const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT);2726const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location);2727const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);2728const struct dxil_value *vertex_id;27292730nir_src *col = &deref->arr.index;2731nir_src_is_const(*col);27322733if (ctx->mod.shader_kind == DXIL_GEOMETRY_SHADER) {2734nir_deref_instr *deref_parent = nir_deref_instr_parent(deref);2735assert(deref_parent->deref_type == nir_deref_type_array);27362737vertex_id = get_src(ctx, &deref_parent->arr.index, 0, nir_type_int);2738} else {2739const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);2740vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);2741}27422743nir_alu_type out_type = nir_type_float;2744enum overload_type overload = get_overload(out_type, 32);27452746const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload);27472748if (!func)2749return false;27502751const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, nir_src_as_int(*col));27522753const struct dxil_value *args[] = {2754opcode, input_id, row, comp, vertex_id2755};27562757const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));2758if (!retval)2759return false;2760store_dest(ctx, &intr->dest, 0, retval, out_type);2761return true;2762}27632764static bool2765emit_load_input_interpolated(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable *var)2766{2767assert(var);2768const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_LOAD_INPUT);2769const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, var->data.driver_location);2770const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);2771const struct dxil_type *int32_type = dxil_module_get_int_type(&ctx->mod, 32);2772const struct dxil_value *vertex_id = dxil_module_get_undef(&ctx->mod, int32_type);27732774nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(var->type));2775enum overload_type overload = get_overload(out_type, glsl_get_bit_size(var->type));27762777const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.loadInput", overload);27782779if (!func)2780return false;27812782for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {2783const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i);27842785const struct dxil_value *args[] = {2786opcode, input_id, row, comp, vertex_id2787};27882789const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));2790if (!retval)2791return false;2792store_dest(ctx, &intr->dest, i, retval, out_type);2793}2794return true;2795}27962797static bool2798emit_load_input_flat(struct ntd_context *ctx, nir_intrinsic_instr *intr, nir_variable* var)2799{2800const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_ATTRIBUTE_AT_VERTEX);2801const struct dxil_value *input_id = dxil_module_get_int32_const(&ctx->mod, (int)var->data.driver_location);2802const struct dxil_value *row = dxil_module_get_int32_const(&ctx->mod, 0);2803const struct dxil_value *vertex_id = dxil_module_get_int8_const(&ctx->mod, ctx->opts->provoking_vertex);28042805nir_alu_type out_type = nir_get_nir_type_for_glsl_base_type(glsl_get_base_type(var->type));2806enum overload_type overload = get_overload(out_type, glsl_get_bit_size(var->type));28072808const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.attributeAtVertex", overload);2809if (!func)2810return false;28112812for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {2813const struct dxil_value *comp = dxil_module_get_int8_const(&ctx->mod, i);2814const struct dxil_value *args[] = {2815opcode, input_id, row, comp, vertex_id2816};28172818const struct dxil_value *retval = dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));2819if (!retval)2820return false;28212822store_dest(ctx, &intr->dest, i, retval, out_type);2823}2824return true;2825}28262827static bool2828emit_load_input(struct ntd_context *ctx, nir_intrinsic_instr *intr,2829nir_variable *input)2830{2831if (ctx->mod.shader_kind != DXIL_PIXEL_SHADER ||2832input->data.interpolation != INTERP_MODE_FLAT ||2833!ctx->opts->interpolate_at_vertex ||2834ctx->opts->provoking_vertex == 0 ||2835glsl_type_is_integer(input->type))2836return emit_load_input_interpolated(ctx, intr, input);2837else2838return emit_load_input_flat(ctx, intr, input);2839}28402841static bool2842emit_load_ptr(struct ntd_context *ctx, nir_intrinsic_instr *intr)2843{2844struct nir_variable *var =2845nir_deref_instr_get_variable(nir_src_as_deref(intr->src[0]));2846const struct dxil_value *index =2847get_src(ctx, &intr->src[1], 0, nir_type_uint);28482849const struct dxil_value *ptr = emit_gep_for_index(ctx, var, index);2850if (!ptr)2851return false;28522853const struct dxil_value *retval =2854dxil_emit_load(&ctx->mod, ptr, 4, false);28552856store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);2857return true;2858}28592860static bool2861emit_load_shared(struct ntd_context *ctx, nir_intrinsic_instr *intr)2862{2863const struct dxil_value *zero, *index;2864unsigned bit_size = nir_dest_bit_size(intr->dest);2865unsigned align = bit_size / 8;28662867/* All shared mem accesses should have been lowered to scalar 32bit2868* accesses.2869*/2870assert(bit_size == 32);2871assert(nir_dest_num_components(intr->dest) == 1);28722873zero = dxil_module_get_int32_const(&ctx->mod, 0);2874if (!zero)2875return false;28762877index = get_src(ctx, &intr->src[0], 0, nir_type_uint);2878if (!index)2879return false;28802881const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };2882const struct dxil_value *ptr, *retval;28832884ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));2885if (!ptr)2886return false;28872888retval = dxil_emit_load(&ctx->mod, ptr, align, false);2889if (!retval)2890return false;28912892store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);2893return true;2894}28952896static bool2897emit_load_scratch(struct ntd_context *ctx, nir_intrinsic_instr *intr)2898{2899const struct dxil_value *zero, *index;2900unsigned bit_size = nir_dest_bit_size(intr->dest);2901unsigned align = bit_size / 8;29022903/* All scratch mem accesses should have been lowered to scalar 32bit2904* accesses.2905*/2906assert(bit_size == 32);2907assert(nir_dest_num_components(intr->dest) == 1);29082909zero = dxil_module_get_int32_const(&ctx->mod, 0);2910if (!zero)2911return false;29122913index = get_src(ctx, &intr->src[0], 0, nir_type_uint);2914if (!index)2915return false;29162917const struct dxil_value *ops[] = { ctx->scratchvars, zero, index };2918const struct dxil_value *ptr, *retval;29192920ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));2921if (!ptr)2922return false;29232924retval = dxil_emit_load(&ctx->mod, ptr, align, false);2925if (!retval)2926return false;29272928store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);2929return true;2930}29312932static bool2933emit_load_deref(struct ntd_context *ctx, nir_intrinsic_instr *intr)2934{2935assert(intr->src[0].is_ssa);2936nir_deref_instr *deref = nir_instr_as_deref(intr->src[0].ssa->parent_instr);2937nir_variable *var = nir_deref_instr_get_variable(deref);29382939switch (var->data.mode) {2940case nir_var_shader_in:2941if (glsl_type_is_array(var->type)) {2942if (var->data.compact)2943return emit_load_compact_input_array(ctx, intr, var, deref);2944else2945return emit_load_input_array(ctx, intr, var, &deref->arr.index);2946}2947return emit_load_input(ctx, intr, var);29482949default:2950unreachable("unsupported nir_variable_mode");2951}2952}29532954static bool2955emit_discard_if_with_value(struct ntd_context *ctx, const struct dxil_value *value)2956{2957const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_DISCARD);2958if (!opcode)2959return false;29602961const struct dxil_value *args[] = {2962opcode,2963value2964};29652966const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.discard", DXIL_NONE);2967if (!func)2968return false;29692970return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));2971}29722973static bool2974emit_discard_if(struct ntd_context *ctx, nir_intrinsic_instr *intr)2975{2976const struct dxil_value *value = get_src(ctx, &intr->src[0], 0, nir_type_bool);2977return emit_discard_if_with_value(ctx, value);2978}29792980static bool2981emit_discard(struct ntd_context *ctx)2982{2983const struct dxil_value *value = dxil_module_get_int1_const(&ctx->mod, true);2984return emit_discard_if_with_value(ctx, value);2985}29862987static bool2988emit_emit_vertex(struct ntd_context *ctx, nir_intrinsic_instr *intr)2989{2990const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_EMIT_STREAM);2991const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));2992if (!opcode || !stream_id)2993return false;29942995const struct dxil_value *args[] = {2996opcode,2997stream_id2998};29993000const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.emitStream", DXIL_NONE);3001if (!func)3002return false;30033004return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));3005}30063007static bool3008emit_end_primitive(struct ntd_context *ctx, nir_intrinsic_instr *intr)3009{3010const struct dxil_value *opcode = dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_CUT_STREAM);3011const struct dxil_value *stream_id = dxil_module_get_int8_const(&ctx->mod, nir_intrinsic_stream_id(intr));3012if (!opcode || !stream_id)3013return false;30143015const struct dxil_value *args[] = {3016opcode,3017stream_id3018};30193020const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.cutStream", DXIL_NONE);3021if (!func)3022return false;30233024return dxil_emit_call_void(&ctx->mod, func, args, ARRAY_SIZE(args));3025}30263027static bool3028emit_image_store(struct ntd_context *ctx, nir_intrinsic_instr *intr)3029{3030const struct dxil_value *handle;3031if (ctx->opts->vulkan_environment) {3032assert(intr->intrinsic == nir_intrinsic_image_deref_store);3033handle = get_src_ssa(ctx, intr->src[0].ssa, 0);3034} else {3035assert(intr->intrinsic == nir_intrinsic_image_store);3036int binding = nir_src_as_int(intr->src[0]);3037handle = ctx->uav_handles[binding];3038}3039if (!handle)3040return false;30413042const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);3043if (!int32_undef)3044return false;30453046const struct dxil_value *coord[3] = { int32_undef, int32_undef, int32_undef };3047enum glsl_sampler_dim image_dim = intr->intrinsic == nir_intrinsic_image_store ?3048nir_intrinsic_image_dim(intr) :3049glsl_get_sampler_dim(nir_src_as_deref(intr->src[0])->type);3050unsigned num_coords = glsl_get_sampler_dim_coordinate_components(image_dim);3051assert(num_coords <= nir_src_num_components(intr->src[1]));3052for (unsigned i = 0; i < num_coords; ++i) {3053coord[i] = get_src(ctx, &intr->src[1], i, nir_type_uint);3054if (!coord[i])3055return false;3056}30573058nir_alu_type in_type = nir_intrinsic_src_type(intr);3059enum overload_type overload = get_overload(in_type, 32);30603061assert(nir_src_bit_size(intr->src[3]) == 32);3062unsigned num_components = nir_src_num_components(intr->src[3]);3063assert(num_components <= 4);3064const struct dxil_value *value[4];3065for (unsigned i = 0; i < num_components; ++i) {3066value[i] = get_src(ctx, &intr->src[3], i, in_type);3067if (!value[i])3068return false;3069}30703071for (int i = num_components; i < 4; ++i)3072value[i] = int32_undef;30733074const struct dxil_value *write_mask =3075dxil_module_get_int8_const(&ctx->mod, (1u << num_components) - 1);3076if (!write_mask)3077return false;30783079if (image_dim == GLSL_SAMPLER_DIM_BUF) {3080coord[1] = int32_undef;3081return emit_bufferstore_call(ctx, handle, coord, value, write_mask, overload);3082} else3083return emit_texturestore_call(ctx, handle, coord, value, write_mask, overload);3084}30853086struct texop_parameters {3087const struct dxil_value *tex;3088const struct dxil_value *sampler;3089const struct dxil_value *bias, *lod_or_sample, *min_lod;3090const struct dxil_value *coord[4], *offset[3], *dx[3], *dy[3];3091const struct dxil_value *cmp;3092enum overload_type overload;3093};30943095static const struct dxil_value *3096emit_texture_size(struct ntd_context *ctx, struct texop_parameters *params)3097{3098const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.getDimensions", DXIL_NONE);3099if (!func)3100return false;31013102const struct dxil_value *args[] = {3103dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_SIZE),3104params->tex,3105params->lod_or_sample3106};31073108return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));3109}31103111static bool3112emit_image_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)3113{3114const struct dxil_value *handle;3115if (ctx->opts->vulkan_environment) {3116assert(intr->intrinsic == nir_intrinsic_image_deref_size);3117handle = get_src_ssa(ctx, intr->src[0].ssa, 0);3118}3119else {3120assert(intr->intrinsic == nir_intrinsic_image_size);3121int binding = nir_src_as_int(intr->src[0]);3122handle = ctx->uav_handles[binding];3123}3124if (!handle)3125return false;31263127const struct dxil_value *lod = get_src(ctx, &intr->src[1], 0, nir_type_uint);3128if (!lod)3129return false;31303131struct texop_parameters params = {3132.tex = handle,3133.lod_or_sample = lod3134};3135const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);3136if (!dimensions)3137return false;31383139for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {3140const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, i);3141store_dest(ctx, &intr->dest, i, retval, nir_type_uint);3142}31433144return true;3145}31463147static bool3148emit_get_ssbo_size(struct ntd_context *ctx, nir_intrinsic_instr *intr)3149{3150const struct dxil_value* handle = NULL;3151if (ctx->opts->vulkan_environment) {3152handle = get_src_ssa(ctx, intr->src[0].ssa, 0);3153} else {3154int binding = nir_src_as_int(intr->src[0]);3155handle = ctx->uav_handles[binding];3156}31573158if (!handle)3159return false;31603161struct texop_parameters params = {3162.tex = handle,3163.lod_or_sample = dxil_module_get_undef(3164&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32))3165};31663167const struct dxil_value *dimensions = emit_texture_size(ctx, ¶ms);3168if (!dimensions)3169return false;31703171const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, dimensions, 0);3172store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);31733174return true;3175}31763177static bool3178emit_ssbo_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,3179enum dxil_atomic_op op, nir_alu_type type)3180{3181const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, 0);3182const struct dxil_value *offset =3183get_src(ctx, &intr->src[1], 0, nir_type_uint);3184const struct dxil_value *value =3185get_src(ctx, &intr->src[2], 0, type);31863187if (!value || !handle || !offset)3188return false;31893190const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);3191if (!int32_undef)3192return false;31933194const struct dxil_value *coord[3] = {3195offset, int32_undef, int32_undef3196};31973198const struct dxil_value *retval =3199emit_atomic_binop(ctx, handle, op, coord, value);32003201if (!retval)3202return false;32033204store_dest(ctx, &intr->dest, 0, retval, type);3205return true;3206}32073208static bool3209emit_ssbo_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)3210{3211const struct dxil_value* handle = get_ubo_ssbo_handle(ctx, &intr->src[0], DXIL_RESOURCE_CLASS_UAV, 0);3212const struct dxil_value *offset =3213get_src(ctx, &intr->src[1], 0, nir_type_uint);3214const struct dxil_value *cmpval =3215get_src(ctx, &intr->src[2], 0, nir_type_int);3216const struct dxil_value *newval =3217get_src(ctx, &intr->src[3], 0, nir_type_int);32183219if (!cmpval || !newval || !handle || !offset)3220return false;32213222const struct dxil_value *int32_undef = get_int32_undef(&ctx->mod);3223if (!int32_undef)3224return false;32253226const struct dxil_value *coord[3] = {3227offset, int32_undef, int32_undef3228};32293230const struct dxil_value *retval =3231emit_atomic_cmpxchg(ctx, handle, coord, cmpval, newval);32323233if (!retval)3234return false;32353236store_dest(ctx, &intr->dest, 0, retval, nir_type_int);3237return true;3238}32393240static bool3241emit_shared_atomic(struct ntd_context *ctx, nir_intrinsic_instr *intr,3242enum dxil_rmw_op op, nir_alu_type type)3243{3244const struct dxil_value *zero, *index;32453246assert(nir_src_bit_size(intr->src[1]) == 32);32473248zero = dxil_module_get_int32_const(&ctx->mod, 0);3249if (!zero)3250return false;32513252index = get_src(ctx, &intr->src[0], 0, nir_type_uint);3253if (!index)3254return false;32553256const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };3257const struct dxil_value *ptr, *value, *retval;32583259ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));3260if (!ptr)3261return false;32623263value = get_src(ctx, &intr->src[1], 0, type);32643265retval = dxil_emit_atomicrmw(&ctx->mod, value, ptr, op, false,3266DXIL_ATOMIC_ORDERING_ACQREL,3267DXIL_SYNC_SCOPE_CROSSTHREAD);3268if (!retval)3269return false;32703271store_dest(ctx, &intr->dest, 0, retval, type);3272return true;3273}32743275static bool3276emit_shared_atomic_comp_swap(struct ntd_context *ctx, nir_intrinsic_instr *intr)3277{3278const struct dxil_value *zero, *index;32793280assert(nir_src_bit_size(intr->src[1]) == 32);32813282zero = dxil_module_get_int32_const(&ctx->mod, 0);3283if (!zero)3284return false;32853286index = get_src(ctx, &intr->src[0], 0, nir_type_uint);3287if (!index)3288return false;32893290const struct dxil_value *ops[] = { ctx->sharedvars, zero, index };3291const struct dxil_value *ptr, *cmpval, *newval, *retval;32923293ptr = dxil_emit_gep_inbounds(&ctx->mod, ops, ARRAY_SIZE(ops));3294if (!ptr)3295return false;32963297cmpval = get_src(ctx, &intr->src[1], 0, nir_type_uint);3298newval = get_src(ctx, &intr->src[2], 0, nir_type_uint);32993300retval = dxil_emit_cmpxchg(&ctx->mod, cmpval, newval, ptr, false,3301DXIL_ATOMIC_ORDERING_ACQREL,3302DXIL_SYNC_SCOPE_CROSSTHREAD);3303if (!retval)3304return false;33053306store_dest(ctx, &intr->dest, 0, retval, nir_type_uint);3307return true;3308}33093310static bool3311emit_vulkan_resource_index(struct ntd_context *ctx, nir_intrinsic_instr *intr)3312{3313unsigned int binding = nir_intrinsic_binding(intr);33143315bool const_index = nir_src_is_const(intr->src[0]);3316if (const_index) {3317binding += nir_src_as_const_value(intr->src[0])->u32;3318}33193320const struct dxil_value *index_value = dxil_module_get_int32_const(&ctx->mod, binding);3321if (!index_value)3322return false;3323if (!const_index) {3324index_value = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD,3325index_value, get_src(ctx, &intr->src[0], 0, nir_type_uint32), 0);3326if (!index_value)3327return false;3328}33293330store_dest(ctx, &intr->dest, 0, index_value, nir_type_uint32);3331store_dest(ctx, &intr->dest, 1, dxil_module_get_int32_const(&ctx->mod, 0), nir_type_uint32);3332return true;3333}33343335static bool3336emit_load_vulkan_descriptor(struct ntd_context *ctx, nir_intrinsic_instr *intr)3337{3338nir_intrinsic_instr* index = nir_src_as_intrinsic(intr->src[0]);3339/* We currently do not support reindex */3340assert(index && index->intrinsic == nir_intrinsic_vulkan_resource_index);33413342unsigned binding = nir_intrinsic_binding(index);3343unsigned space = nir_intrinsic_desc_set(index);33443345/* The descriptor_set field for variables is only 5 bits. We shouldn't have intrinsics trying to go beyond that. */3346assert(space < 32);33473348nir_variable *var = nir_get_binding_variable(ctx->shader, nir_chase_binding(intr->src[0]));33493350const struct dxil_value *handle = NULL;3351enum dxil_resource_class resource_class;33523353switch (nir_intrinsic_desc_type(intr)) {3354case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:3355resource_class = DXIL_RESOURCE_CLASS_CBV;3356break;3357case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:3358if (var->data.access & ACCESS_NON_WRITEABLE)3359resource_class = DXIL_RESOURCE_CLASS_SRV;3360else3361resource_class = DXIL_RESOURCE_CLASS_UAV;3362break;3363default:3364unreachable("unknown descriptor type");3365return false;3366}33673368handle = emit_createhandle_call(ctx, resource_class,3369get_resource_id(ctx, resource_class, space, binding),3370get_src(ctx, &intr->src[0], 0, nir_type_uint32), false);33713372store_dest_value(ctx, &intr->dest, 0, handle);3373store_dest(ctx, &intr->dest, 1, get_src(ctx, &intr->src[0], 1, nir_type_uint32), nir_type_uint32);33743375return true;3376}33773378static bool3379emit_intrinsic(struct ntd_context *ctx, nir_intrinsic_instr *intr)3380{3381switch (intr->intrinsic) {3382case nir_intrinsic_load_global_invocation_id:3383case nir_intrinsic_load_global_invocation_id_zero_base:3384return emit_load_global_invocation_id(ctx, intr);3385case nir_intrinsic_load_local_invocation_id:3386return emit_load_local_invocation_id(ctx, intr);3387case nir_intrinsic_load_workgroup_id:3388case nir_intrinsic_load_workgroup_id_zero_base:3389return emit_load_local_workgroup_id(ctx, intr);3390case nir_intrinsic_load_ssbo:3391return emit_load_ssbo(ctx, intr);3392case nir_intrinsic_store_ssbo:3393return emit_store_ssbo(ctx, intr);3394case nir_intrinsic_store_ssbo_masked_dxil:3395return emit_store_ssbo_masked(ctx, intr);3396case nir_intrinsic_store_deref:3397return emit_store_deref(ctx, intr);3398case nir_intrinsic_store_shared_dxil:3399case nir_intrinsic_store_shared_masked_dxil:3400return emit_store_shared(ctx, intr);3401case nir_intrinsic_store_scratch_dxil:3402return emit_store_scratch(ctx, intr);3403case nir_intrinsic_load_deref:3404return emit_load_deref(ctx, intr);3405case nir_intrinsic_load_ptr_dxil:3406return emit_load_ptr(ctx, intr);3407case nir_intrinsic_load_ubo:3408return emit_load_ubo(ctx, intr);3409case nir_intrinsic_load_ubo_dxil:3410return emit_load_ubo_dxil(ctx, intr);3411case nir_intrinsic_load_front_face:3412return emit_load_input_interpolated(ctx, intr,3413ctx->system_value[SYSTEM_VALUE_FRONT_FACE]);3414case nir_intrinsic_load_vertex_id_zero_base:3415return emit_load_input_interpolated(ctx, intr,3416ctx->system_value[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE]);3417case nir_intrinsic_load_instance_id:3418return emit_load_input_interpolated(ctx, intr,3419ctx->system_value[SYSTEM_VALUE_INSTANCE_ID]);3420case nir_intrinsic_load_primitive_id:3421return emit_load_primitiveid(ctx, intr);3422case nir_intrinsic_load_shared_dxil:3423return emit_load_shared(ctx, intr);3424case nir_intrinsic_load_scratch_dxil:3425return emit_load_scratch(ctx, intr);3426case nir_intrinsic_discard_if:3427return emit_discard_if(ctx, intr);3428case nir_intrinsic_discard:3429return emit_discard(ctx);3430case nir_intrinsic_emit_vertex:3431return emit_emit_vertex(ctx, intr);3432case nir_intrinsic_end_primitive:3433return emit_end_primitive(ctx, intr);3434case nir_intrinsic_scoped_barrier:3435return emit_barrier(ctx, intr);3436case nir_intrinsic_ssbo_atomic_add:3437return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_ADD, nir_type_int);3438case nir_intrinsic_ssbo_atomic_imin:3439return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMIN, nir_type_int);3440case nir_intrinsic_ssbo_atomic_umin:3441return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMIN, nir_type_uint);3442case nir_intrinsic_ssbo_atomic_imax:3443return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_IMAX, nir_type_int);3444case nir_intrinsic_ssbo_atomic_umax:3445return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_UMAX, nir_type_uint);3446case nir_intrinsic_ssbo_atomic_and:3447return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_AND, nir_type_uint);3448case nir_intrinsic_ssbo_atomic_or:3449return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_OR, nir_type_uint);3450case nir_intrinsic_ssbo_atomic_xor:3451return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_XOR, nir_type_uint);3452case nir_intrinsic_ssbo_atomic_exchange:3453return emit_ssbo_atomic(ctx, intr, DXIL_ATOMIC_EXCHANGE, nir_type_int);3454case nir_intrinsic_ssbo_atomic_comp_swap:3455return emit_ssbo_atomic_comp_swap(ctx, intr);3456case nir_intrinsic_shared_atomic_add_dxil:3457return emit_shared_atomic(ctx, intr, DXIL_RMWOP_ADD, nir_type_int);3458case nir_intrinsic_shared_atomic_imin_dxil:3459return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MIN, nir_type_int);3460case nir_intrinsic_shared_atomic_umin_dxil:3461return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMIN, nir_type_uint);3462case nir_intrinsic_shared_atomic_imax_dxil:3463return emit_shared_atomic(ctx, intr, DXIL_RMWOP_MAX, nir_type_int);3464case nir_intrinsic_shared_atomic_umax_dxil:3465return emit_shared_atomic(ctx, intr, DXIL_RMWOP_UMAX, nir_type_uint);3466case nir_intrinsic_shared_atomic_and_dxil:3467return emit_shared_atomic(ctx, intr, DXIL_RMWOP_AND, nir_type_uint);3468case nir_intrinsic_shared_atomic_or_dxil:3469return emit_shared_atomic(ctx, intr, DXIL_RMWOP_OR, nir_type_uint);3470case nir_intrinsic_shared_atomic_xor_dxil:3471return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XOR, nir_type_uint);3472case nir_intrinsic_shared_atomic_exchange_dxil:3473return emit_shared_atomic(ctx, intr, DXIL_RMWOP_XCHG, nir_type_int);3474case nir_intrinsic_shared_atomic_comp_swap_dxil:3475return emit_shared_atomic_comp_swap(ctx, intr);3476case nir_intrinsic_image_store:3477case nir_intrinsic_image_deref_store:3478return emit_image_store(ctx, intr);3479case nir_intrinsic_image_deref_size:3480case nir_intrinsic_image_size:3481return emit_image_size(ctx, intr);3482case nir_intrinsic_get_ssbo_size:3483return emit_get_ssbo_size(ctx, intr);34843485case nir_intrinsic_vulkan_resource_index:3486return emit_vulkan_resource_index(ctx, intr);3487case nir_intrinsic_load_vulkan_descriptor:3488return emit_load_vulkan_descriptor(ctx, intr);34893490case nir_intrinsic_load_num_workgroups:3491case nir_intrinsic_load_workgroup_size:3492default:3493NIR_INSTR_UNSUPPORTED(&intr->instr);3494assert("Unimplemented intrinsic instruction");3495return false;3496}3497}34983499static bool3500emit_load_const(struct ntd_context *ctx, nir_load_const_instr *load_const)3501{3502for (int i = 0; i < load_const->def.num_components; ++i) {3503const struct dxil_value *value;3504switch (load_const->def.bit_size) {3505case 1:3506value = dxil_module_get_int1_const(&ctx->mod,3507load_const->value[i].b);3508break;3509case 16:3510ctx->mod.feats.native_low_precision = true;3511value = dxil_module_get_int16_const(&ctx->mod,3512load_const->value[i].u16);3513break;3514case 32:3515value = dxil_module_get_int32_const(&ctx->mod,3516load_const->value[i].u32);3517break;3518case 64:3519ctx->mod.feats.int64_ops = true;3520value = dxil_module_get_int64_const(&ctx->mod,3521load_const->value[i].u64);3522break;3523default:3524unreachable("unexpected bit_size");3525}3526if (!value)3527return false;35283529store_ssa_def(ctx, &load_const->def, i, value);3530}3531return true;3532}35333534static bool3535emit_deref(struct ntd_context* ctx, nir_deref_instr* instr)3536{3537assert(instr->deref_type == nir_deref_type_var ||3538instr->deref_type == nir_deref_type_array);35393540/* In the non-Vulkan environment, there's nothing to emit. Any references to3541* derefs will emit the necessary logic to handle scratch/shared GEP addressing3542*/3543if (!ctx->opts->vulkan_environment)3544return true;35453546/* In the Vulkan environment, we don't have cached handles for textures or3547* samplers, so let's use the opportunity of walking through the derefs to3548* emit those.3549*/3550nir_variable *var = nir_deref_instr_get_variable(instr);3551assert(var);35523553if (!glsl_type_is_sampler(glsl_without_array(var->type)) &&3554!glsl_type_is_image(glsl_without_array(var->type)))3555return true;35563557const struct glsl_type *type = instr->type;3558const struct dxil_value *binding;35593560if (instr->deref_type == nir_deref_type_var) {3561binding = dxil_module_get_int32_const(&ctx->mod, var->data.binding);3562} else {3563const struct dxil_value *base = get_src(ctx, &instr->parent, 0, nir_type_uint32);3564const struct dxil_value *offset = get_src(ctx, &instr->arr.index, 0, nir_type_uint32);3565if (!base || !offset)3566return false;35673568binding = dxil_emit_binop(&ctx->mod, DXIL_BINOP_ADD, base, offset, 0);3569}35703571if (!binding)3572return false;35733574/* Haven't finished chasing the deref chain yet, just store the value */3575if (glsl_type_is_array(type)) {3576store_dest(ctx, &instr->dest, 0, binding, nir_type_uint32);3577return true;3578}35793580assert(glsl_type_is_sampler(type) || glsl_type_is_image(type));3581enum dxil_resource_class res_class;3582if (glsl_type_is_image(type))3583res_class = DXIL_RESOURCE_CLASS_UAV;3584else if (glsl_get_sampler_result_type(type) == GLSL_TYPE_VOID)3585res_class = DXIL_RESOURCE_CLASS_SAMPLER;3586else3587res_class = DXIL_RESOURCE_CLASS_SRV;35883589const struct dxil_value *handle = emit_createhandle_call(ctx, res_class,3590get_resource_id(ctx, res_class, var->data.descriptor_set, var->data.binding), binding, false);3591if (!handle)3592return false;35933594store_dest_value(ctx, &instr->dest, 0, handle);3595return true;3596}35973598static bool3599emit_cond_branch(struct ntd_context *ctx, const struct dxil_value *cond,3600int true_block, int false_block)3601{3602assert(cond);3603assert(true_block >= 0);3604assert(false_block >= 0);3605return dxil_emit_branch(&ctx->mod, cond, true_block, false_block);3606}36073608static bool3609emit_branch(struct ntd_context *ctx, int block)3610{3611assert(block >= 0);3612return dxil_emit_branch(&ctx->mod, NULL, block, -1);3613}36143615static bool3616emit_jump(struct ntd_context *ctx, nir_jump_instr *instr)3617{3618switch (instr->type) {3619case nir_jump_break:3620case nir_jump_continue:3621assert(instr->instr.block->successors[0]);3622assert(!instr->instr.block->successors[1]);3623return emit_branch(ctx, instr->instr.block->successors[0]->index);36243625default:3626unreachable("Unsupported jump type\n");3627}3628}36293630struct phi_block {3631unsigned num_components;3632struct dxil_instr *comp[NIR_MAX_VEC_COMPONENTS];3633};36343635static bool3636emit_phi(struct ntd_context *ctx, nir_phi_instr *instr)3637{3638unsigned bit_size = nir_dest_bit_size(instr->dest);3639const struct dxil_type *type = dxil_module_get_int_type(&ctx->mod,3640bit_size);36413642struct phi_block *vphi = ralloc(ctx->phis, struct phi_block);3643vphi->num_components = nir_dest_num_components(instr->dest);36443645for (unsigned i = 0; i < vphi->num_components; ++i) {3646struct dxil_instr *phi = vphi->comp[i] = dxil_emit_phi(&ctx->mod, type);3647if (!phi)3648return false;3649store_dest_value(ctx, &instr->dest, i, dxil_instr_get_return_value(phi));3650}3651_mesa_hash_table_insert(ctx->phis, instr, vphi);3652return true;3653}36543655static void3656fixup_phi(struct ntd_context *ctx, nir_phi_instr *instr,3657struct phi_block *vphi)3658{3659const struct dxil_value *values[128];3660unsigned blocks[128];3661for (unsigned i = 0; i < vphi->num_components; ++i) {3662size_t num_incoming = 0;3663nir_foreach_phi_src(src, instr) {3664assert(src->src.is_ssa);3665const struct dxil_value *val = get_src_ssa(ctx, src->src.ssa, i);3666assert(num_incoming < ARRAY_SIZE(values));3667values[num_incoming] = val;3668assert(num_incoming < ARRAY_SIZE(blocks));3669blocks[num_incoming] = src->pred->index;3670++num_incoming;3671}3672dxil_phi_set_incoming(vphi->comp[i], values, blocks, num_incoming);3673}3674}36753676static unsigned3677get_n_src(struct ntd_context *ctx, const struct dxil_value **values,3678unsigned max_components, nir_tex_src *src, nir_alu_type type)3679{3680unsigned num_components = nir_src_num_components(src->src);3681unsigned i = 0;36823683assert(num_components <= max_components);36843685for (i = 0; i < num_components; ++i) {3686values[i] = get_src(ctx, &src->src, i, type);3687assert(values[i] != NULL);3688}36893690return num_components;3691}36923693#define PAD_SRC(ctx, array, components, undef) \3694for (unsigned i = components; i < ARRAY_SIZE(array); ++i) { \3695array[i] = undef; \3696}36973698static const struct dxil_value *3699emit_sample(struct ntd_context *ctx, struct texop_parameters *params)3700{3701const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sample", params->overload);3702if (!func)3703return NULL;37043705const struct dxil_value *args[11] = {3706dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE),3707params->tex, params->sampler,3708params->coord[0], params->coord[1], params->coord[2], params->coord[3],3709params->offset[0], params->offset[1], params->offset[2],3710params->min_lod3711};37123713return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));3714}37153716static const struct dxil_value *3717emit_sample_bias(struct ntd_context *ctx, struct texop_parameters *params)3718{3719const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleBias", params->overload);3720if (!func)3721return NULL;37223723assert(params->bias != NULL);37243725const struct dxil_value *args[12] = {3726dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_BIAS),3727params->tex, params->sampler,3728params->coord[0], params->coord[1], params->coord[2], params->coord[3],3729params->offset[0], params->offset[1], params->offset[2],3730params->bias, params->min_lod3731};37323733return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));3734}37353736static const struct dxil_value *3737emit_sample_level(struct ntd_context *ctx, struct texop_parameters *params)3738{3739const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleLevel", params->overload);3740if (!func)3741return NULL;37423743assert(params->lod_or_sample != NULL);37443745const struct dxil_value *args[11] = {3746dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_LEVEL),3747params->tex, params->sampler,3748params->coord[0], params->coord[1], params->coord[2], params->coord[3],3749params->offset[0], params->offset[1], params->offset[2],3750params->lod_or_sample3751};37523753return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));3754}37553756static const struct dxil_value *3757emit_sample_cmp(struct ntd_context *ctx, struct texop_parameters *params)3758{3759const struct dxil_func *func;3760enum dxil_intr opcode;3761int numparam;37623763if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {3764func = dxil_get_function(&ctx->mod, "dx.op.sampleCmp", DXIL_F32);3765opcode = DXIL_INTR_SAMPLE_CMP;3766numparam = 12;3767} else {3768func = dxil_get_function(&ctx->mod, "dx.op.sampleCmpLevelZero", DXIL_F32);3769opcode = DXIL_INTR_SAMPLE_CMP_LVL_ZERO;3770numparam = 11;3771}37723773if (!func)3774return NULL;37753776const struct dxil_value *args[12] = {3777dxil_module_get_int32_const(&ctx->mod, opcode),3778params->tex, params->sampler,3779params->coord[0], params->coord[1], params->coord[2], params->coord[3],3780params->offset[0], params->offset[1], params->offset[2],3781params->cmp, params->min_lod3782};37833784return dxil_emit_call(&ctx->mod, func, args, numparam);3785}37863787static const struct dxil_value *3788emit_sample_grad(struct ntd_context *ctx, struct texop_parameters *params)3789{3790const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.sampleGrad", params->overload);3791if (!func)3792return false;37933794const struct dxil_value *args[17] = {3795dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_SAMPLE_GRAD),3796params->tex, params->sampler,3797params->coord[0], params->coord[1], params->coord[2], params->coord[3],3798params->offset[0], params->offset[1], params->offset[2],3799params->dx[0], params->dx[1], params->dx[2],3800params->dy[0], params->dy[1], params->dy[2],3801params->min_lod3802};38033804return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));3805}38063807static const struct dxil_value *3808emit_texel_fetch(struct ntd_context *ctx, struct texop_parameters *params)3809{3810const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.textureLoad", params->overload);3811if (!func)3812return false;38133814if (!params->lod_or_sample)3815params->lod_or_sample = dxil_module_get_undef(&ctx->mod, dxil_module_get_int_type(&ctx->mod, 32));38163817const struct dxil_value *args[] = {3818dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOAD),3819params->tex,3820params->lod_or_sample, params->coord[0], params->coord[1], params->coord[2],3821params->offset[0], params->offset[1], params->offset[2]3822};38233824return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));3825}38263827static const struct dxil_value *3828emit_texture_lod(struct ntd_context *ctx, struct texop_parameters *params)3829{3830const struct dxil_func *func = dxil_get_function(&ctx->mod, "dx.op.calculateLOD", DXIL_F32);3831if (!func)3832return false;38333834const struct dxil_value *args[] = {3835dxil_module_get_int32_const(&ctx->mod, DXIL_INTR_TEXTURE_LOD),3836params->tex,3837params->sampler,3838params->coord[0],3839params->coord[1],3840params->coord[2],3841dxil_module_get_int1_const(&ctx->mod, 1)3842};38433844return dxil_emit_call(&ctx->mod, func, args, ARRAY_SIZE(args));3845}38463847static bool3848emit_tex(struct ntd_context *ctx, nir_tex_instr *instr)3849{3850struct texop_parameters params;3851memset(¶ms, 0, sizeof(struct texop_parameters));3852if (!ctx->opts->vulkan_environment) {3853params.tex = ctx->srv_handles[instr->texture_index];3854params.sampler = ctx->sampler_handles[instr->sampler_index];3855}38563857const struct dxil_type *int_type = dxil_module_get_int_type(&ctx->mod, 32);3858const struct dxil_type *float_type = dxil_module_get_float_type(&ctx->mod, 32);3859const struct dxil_value *int_undef = dxil_module_get_undef(&ctx->mod, int_type);3860const struct dxil_value *float_undef = dxil_module_get_undef(&ctx->mod, float_type);38613862unsigned coord_components = 0, offset_components = 0, dx_components = 0, dy_components = 0;3863params.overload = get_overload(instr->dest_type, 32);38643865for (unsigned i = 0; i < instr->num_srcs; i++) {3866nir_alu_type type = nir_tex_instr_src_type(instr, i);38673868switch (instr->src[i].src_type) {3869case nir_tex_src_coord:3870coord_components = get_n_src(ctx, params.coord, ARRAY_SIZE(params.coord),3871&instr->src[i], type);3872break;38733874case nir_tex_src_offset:3875offset_components = get_n_src(ctx, params.offset, ARRAY_SIZE(params.offset),3876&instr->src[i], nir_type_int);3877break;38783879case nir_tex_src_bias:3880assert(instr->op == nir_texop_txb);3881assert(nir_src_num_components(instr->src[i].src) == 1);3882params.bias = get_src(ctx, &instr->src[i].src, 0, nir_type_float);3883assert(params.bias != NULL);3884break;38853886case nir_tex_src_lod:3887assert(nir_src_num_components(instr->src[i].src) == 1);3888/* Buffers don't have a LOD */3889if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)3890params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, type);3891else3892params.lod_or_sample = int_undef;3893assert(params.lod_or_sample != NULL);3894break;38953896case nir_tex_src_min_lod:3897assert(nir_src_num_components(instr->src[i].src) == 1);3898params.min_lod = get_src(ctx, &instr->src[i].src, 0, type);3899assert(params.min_lod != NULL);3900break;39013902case nir_tex_src_comparator:3903assert(nir_src_num_components(instr->src[i].src) == 1);3904params.cmp = get_src(ctx, &instr->src[i].src, 0, nir_type_float);3905assert(params.cmp != NULL);3906break;39073908case nir_tex_src_ddx:3909dx_components = get_n_src(ctx, params.dx, ARRAY_SIZE(params.dx),3910&instr->src[i], nir_type_float);3911assert(dx_components != 0);3912break;39133914case nir_tex_src_ddy:3915dy_components = get_n_src(ctx, params.dy, ARRAY_SIZE(params.dy),3916&instr->src[i], nir_type_float);3917assert(dy_components != 0);3918break;39193920case nir_tex_src_ms_index:3921params.lod_or_sample = get_src(ctx, &instr->src[i].src, 0, nir_type_int);3922assert(params.lod_or_sample != NULL);3923break;39243925case nir_tex_src_texture_deref:3926assert(ctx->opts->vulkan_environment);3927params.tex = get_src_ssa(ctx, instr->src[i].src.ssa, 0);3928break;39293930case nir_tex_src_sampler_deref:3931assert(ctx->opts->vulkan_environment);3932params.sampler = get_src_ssa(ctx, instr->src[i].src.ssa, 0);3933break;39343935case nir_tex_src_projector:3936unreachable("Texture projector should have been lowered");39373938default:3939fprintf(stderr, "texture source: %d\n", instr->src[i].src_type);3940unreachable("unknown texture source");3941}3942}39433944assert(params.tex != NULL);3945assert(instr->op == nir_texop_txf ||3946instr->op == nir_texop_txf_ms ||3947nir_tex_instr_is_query(instr) ||3948params.sampler != NULL);39493950PAD_SRC(ctx, params.coord, coord_components, float_undef);3951PAD_SRC(ctx, params.offset, offset_components, int_undef);3952if (!params.min_lod) params.min_lod = float_undef;39533954const struct dxil_value *sample = NULL;3955switch (instr->op) {3956case nir_texop_txb:3957sample = emit_sample_bias(ctx, ¶ms);3958break;39593960case nir_texop_tex:3961if (params.cmp != NULL) {3962sample = emit_sample_cmp(ctx, ¶ms);3963break;3964} else if (ctx->mod.shader_kind == DXIL_PIXEL_SHADER) {3965sample = emit_sample(ctx, ¶ms);3966break;3967}3968params.lod_or_sample = dxil_module_get_float_const(&ctx->mod, 0);3969FALLTHROUGH;3970case nir_texop_txl:3971sample = emit_sample_level(ctx, ¶ms);3972break;39733974case nir_texop_txd:3975PAD_SRC(ctx, params.dx, dx_components, float_undef);3976PAD_SRC(ctx, params.dy, dy_components,float_undef);3977sample = emit_sample_grad(ctx, ¶ms);3978break;39793980case nir_texop_txf:3981case nir_texop_txf_ms:3982if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {3983params.coord[1] = int_undef;3984sample = emit_bufferload_call(ctx, params.tex, params.coord);3985}3986else {3987PAD_SRC(ctx, params.coord, coord_components, int_undef);3988sample = emit_texel_fetch(ctx, ¶ms);3989}3990break;39913992case nir_texop_txs:3993sample = emit_texture_size(ctx, ¶ms);3994break;39953996case nir_texop_lod:3997sample = emit_texture_lod(ctx, ¶ms);3998store_dest(ctx, &instr->dest, 0, sample, nir_alu_type_get_base_type(instr->dest_type));3999return true;40004001case nir_texop_query_levels:4002params.lod_or_sample = dxil_module_get_int_const(&ctx->mod, 0, 32);4003sample = emit_texture_size(ctx, ¶ms);4004const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, 3);4005store_dest(ctx, &instr->dest, 0, retval, nir_alu_type_get_base_type(instr->dest_type));4006return true;40074008default:4009fprintf(stderr, "texture op: %d\n", instr->op);4010unreachable("unknown texture op");4011}40124013if (!sample)4014return false;40154016for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {4017const struct dxil_value *retval = dxil_emit_extractval(&ctx->mod, sample, i);4018store_dest(ctx, &instr->dest, i, retval, nir_alu_type_get_base_type(instr->dest_type));4019}40204021return true;4022}40234024static bool4025emit_undefined(struct ntd_context *ctx, nir_ssa_undef_instr *undef)4026{4027for (unsigned i = 0; i < undef->def.num_components; ++i)4028store_ssa_def(ctx, &undef->def, i, dxil_module_get_int32_const(&ctx->mod, 0));4029return true;4030}40314032static bool emit_instr(struct ntd_context *ctx, struct nir_instr* instr)4033{4034switch (instr->type) {4035case nir_instr_type_alu:4036return emit_alu(ctx, nir_instr_as_alu(instr));4037case nir_instr_type_intrinsic:4038return emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));4039case nir_instr_type_load_const:4040return emit_load_const(ctx, nir_instr_as_load_const(instr));4041case nir_instr_type_deref:4042return emit_deref(ctx, nir_instr_as_deref(instr));4043case nir_instr_type_jump:4044return emit_jump(ctx, nir_instr_as_jump(instr));4045case nir_instr_type_phi:4046return emit_phi(ctx, nir_instr_as_phi(instr));4047case nir_instr_type_tex:4048return emit_tex(ctx, nir_instr_as_tex(instr));4049case nir_instr_type_ssa_undef:4050return emit_undefined(ctx, nir_instr_as_ssa_undef(instr));4051default:4052NIR_INSTR_UNSUPPORTED(instr);4053unreachable("Unimplemented instruction type");4054return false;4055}4056}405740584059static bool4060emit_block(struct ntd_context *ctx, struct nir_block *block)4061{4062assert(block->index < ctx->mod.num_basic_block_ids);4063ctx->mod.basic_block_ids[block->index] = ctx->mod.curr_block;40644065nir_foreach_instr(instr, block) {4066TRACE_CONVERSION(instr);40674068if (!emit_instr(ctx, instr)) {4069return false;4070}4071}4072return true;4073}40744075static bool4076emit_cf_list(struct ntd_context *ctx, struct exec_list *list);40774078static bool4079emit_if(struct ntd_context *ctx, struct nir_if *if_stmt)4080{4081assert(nir_src_num_components(if_stmt->condition) == 1);4082const struct dxil_value *cond = get_src(ctx, &if_stmt->condition, 0,4083nir_type_bool);40844085/* prepare blocks */4086nir_block *then_block = nir_if_first_then_block(if_stmt);4087assert(nir_if_last_then_block(if_stmt)->successors[0]);4088assert(!nir_if_last_then_block(if_stmt)->successors[1]);4089int then_succ = nir_if_last_then_block(if_stmt)->successors[0]->index;40904091nir_block *else_block = NULL;4092int else_succ = -1;4093if (!exec_list_is_empty(&if_stmt->else_list)) {4094else_block = nir_if_first_else_block(if_stmt);4095assert(nir_if_last_else_block(if_stmt)->successors[0]);4096assert(!nir_if_last_else_block(if_stmt)->successors[1]);4097else_succ = nir_if_last_else_block(if_stmt)->successors[0]->index;4098}40994100if (!emit_cond_branch(ctx, cond, then_block->index,4101else_block ? else_block->index : then_succ))4102return false;41034104/* handle then-block */4105if (!emit_cf_list(ctx, &if_stmt->then_list) ||4106(!nir_block_ends_in_jump(nir_if_last_then_block(if_stmt)) &&4107!emit_branch(ctx, then_succ)))4108return false;41094110if (else_block) {4111/* handle else-block */4112if (!emit_cf_list(ctx, &if_stmt->else_list) ||4113(!nir_block_ends_in_jump(nir_if_last_else_block(if_stmt)) &&4114!emit_branch(ctx, else_succ)))4115return false;4116}41174118return true;4119}41204121static bool4122emit_loop(struct ntd_context *ctx, nir_loop *loop)4123{4124nir_block *first_block = nir_loop_first_block(loop);41254126assert(nir_loop_last_block(loop)->successors[0]);4127assert(!nir_loop_last_block(loop)->successors[1]);41284129if (!emit_branch(ctx, first_block->index))4130return false;41314132if (!emit_cf_list(ctx, &loop->body))4133return false;41344135if (!emit_branch(ctx, first_block->index))4136return false;41374138return true;4139}41404141static bool4142emit_cf_list(struct ntd_context *ctx, struct exec_list *list)4143{4144foreach_list_typed(nir_cf_node, node, node, list) {4145switch (node->type) {4146case nir_cf_node_block:4147if (!emit_block(ctx, nir_cf_node_as_block(node)))4148return false;4149break;41504151case nir_cf_node_if:4152if (!emit_if(ctx, nir_cf_node_as_if(node)))4153return false;4154break;41554156case nir_cf_node_loop:4157if (!emit_loop(ctx, nir_cf_node_as_loop(node)))4158return false;4159break;41604161default:4162unreachable("unsupported cf-list node");4163break;4164}4165}4166return true;4167}41684169static void4170insert_sorted_by_binding(struct exec_list *var_list, nir_variable *new_var)4171{4172nir_foreach_variable_in_list(var, var_list) {4173if (var->data.binding > new_var->data.binding) {4174exec_node_insert_node_before(&var->node, &new_var->node);4175return;4176}4177}4178exec_list_push_tail(var_list, &new_var->node);4179}418041814182static void4183sort_uniforms_by_binding_and_remove_structs(nir_shader *s)4184{4185struct exec_list new_list;4186exec_list_make_empty(&new_list);41874188nir_foreach_variable_with_modes_safe(var, s, nir_var_uniform) {4189exec_node_remove(&var->node);4190const struct glsl_type *type = glsl_without_array(var->type);4191if (!glsl_type_is_struct(type))4192insert_sorted_by_binding(&new_list, var);4193}4194exec_list_append(&s->variables, &new_list);4195}41964197static void4198prepare_phi_values(struct ntd_context *ctx)4199{4200/* PHI nodes are difficult to get right when tracking the types:4201* Since the incoming sources are linked to blocks, we can't bitcast4202* on the fly while loading. So scan the shader and insert a typed dummy4203* value for each phi source, and when storing we convert if the incoming4204* value has a different type then the one expected by the phi node.4205* We choose int as default, because it supports more bit sizes.4206*/4207nir_foreach_function(function, ctx->shader) {4208if (function->impl) {4209nir_foreach_block(block, function->impl) {4210nir_foreach_instr(instr, block) {4211if (instr->type == nir_instr_type_phi) {4212nir_phi_instr *ir = nir_instr_as_phi(instr);4213unsigned bitsize = nir_dest_bit_size(ir->dest);4214const struct dxil_value *dummy = dxil_module_get_int_const(&ctx->mod, 0, bitsize);4215nir_foreach_phi_src(src, ir) {4216for(unsigned int i = 0; i < ir->dest.ssa.num_components; ++i)4217store_ssa_def(ctx, src->src.ssa, i, dummy);4218}4219}4220}4221}4222}4223}4224}42254226static bool4227emit_cbvs(struct ntd_context *ctx)4228{4229if (ctx->shader->info.stage == MESA_SHADER_KERNEL || ctx->opts->vulkan_environment) {4230nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ubo) {4231if (!emit_ubo_var(ctx, var))4232return false;4233}4234} else {4235for (int i = ctx->opts->ubo_binding_offset; i < ctx->shader->info.num_ubos; ++i) {4236char name[64];4237snprintf(name, sizeof(name), "__ubo%d", i);4238if (!emit_cbv(ctx, i, 0, 16384 /*4096 vec4's*/, 1, name))4239return false;4240}4241}42424243return true;4244}42454246static bool4247emit_scratch(struct ntd_context *ctx)4248{4249if (ctx->shader->scratch_size) {4250/*4251* We always allocate an u32 array, no matter the actual variable types.4252* According to the DXIL spec, the minimum load/store granularity is4253* 32-bit, anything smaller requires using a read-extract/read-write-modify4254* approach.4255*/4256unsigned size = ALIGN_POT(ctx->shader->scratch_size, sizeof(uint32_t));4257const struct dxil_type *int32 = dxil_module_get_int_type(&ctx->mod, 32);4258const struct dxil_value *array_length = dxil_module_get_int32_const(&ctx->mod, size / sizeof(uint32_t));4259if (!int32 || !array_length)4260return false;42614262const struct dxil_type *type = dxil_module_get_array_type(4263&ctx->mod, int32, size / sizeof(uint32_t));4264if (!type)4265return false;42664267ctx->scratchvars = dxil_emit_alloca(&ctx->mod, type, int32, array_length, 4);4268if (!ctx->scratchvars)4269return false;4270}42714272return true;4273}42744275/* The validator complains if we don't have ops that reference a global variable. */4276static bool4277shader_has_shared_ops(struct nir_shader *s)4278{4279nir_foreach_function(func, s) {4280if (!func->impl)4281continue;4282nir_foreach_block(block, func->impl) {4283nir_foreach_instr(instr, block) {4284if (instr->type != nir_instr_type_intrinsic)4285continue;4286nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);4287switch (intrin->intrinsic) {4288case nir_intrinsic_load_shared_dxil:4289case nir_intrinsic_store_shared_dxil:4290case nir_intrinsic_shared_atomic_add_dxil:4291case nir_intrinsic_shared_atomic_and_dxil:4292case nir_intrinsic_shared_atomic_comp_swap_dxil:4293case nir_intrinsic_shared_atomic_exchange_dxil:4294case nir_intrinsic_shared_atomic_imax_dxil:4295case nir_intrinsic_shared_atomic_imin_dxil:4296case nir_intrinsic_shared_atomic_or_dxil:4297case nir_intrinsic_shared_atomic_umax_dxil:4298case nir_intrinsic_shared_atomic_umin_dxil:4299case nir_intrinsic_shared_atomic_xor_dxil:4300return true;4301default: break;4302}4303}4304}4305}4306return false;4307}43084309static bool4310emit_module(struct ntd_context *ctx, const struct nir_to_dxil_options *opts)4311{4312/* The validator forces us to emit resources in a specific order:4313* CBVs, Samplers, SRVs, UAVs. While we are at it also remove4314* stale struct uniforms, they are lowered but might not have been removed */4315sort_uniforms_by_binding_and_remove_structs(ctx->shader);43164317/* CBVs */4318if (!emit_cbvs(ctx))4319return false;43204321/* Samplers */4322nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {4323unsigned count = glsl_type_get_sampler_count(var->type);4324const struct glsl_type *without_array = glsl_without_array(var->type);4325if (var->data.mode == nir_var_uniform && glsl_type_is_sampler(without_array) &&4326glsl_get_sampler_result_type(without_array) == GLSL_TYPE_VOID) {4327if (!emit_sampler(ctx, var, count))4328return false;4329}4330}43314332/* SRVs */4333nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {4334unsigned count = glsl_type_get_sampler_count(var->type);4335const struct glsl_type *without_array = glsl_without_array(var->type);4336if (var->data.mode == nir_var_uniform && glsl_type_is_sampler(without_array) &&4337glsl_get_sampler_result_type(without_array) != GLSL_TYPE_VOID) {4338if (!emit_srv(ctx, var, count))4339return false;4340}4341}4342/* Handle read-only SSBOs as SRVs */4343nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {4344if ((var->data.access & ACCESS_NON_WRITEABLE) != 0) {4345unsigned count = 1;4346if (glsl_type_is_array(var->type))4347count = glsl_get_length(var->type);4348if (!emit_srv(ctx, var, count))4349return false;4350}4351}43524353if (ctx->shader->info.shared_size && shader_has_shared_ops(ctx->shader)) {4354const struct dxil_type *type;4355unsigned size;43564357/*4358* We always allocate an u32 array, no matter the actual variable types.4359* According to the DXIL spec, the minimum load/store granularity is4360* 32-bit, anything smaller requires using a read-extract/read-write-modify4361* approach. Non-atomic 64-bit accesses are allowed, but the4362* GEP(cast(gvar, u64[] *), offset) and cast(GEP(gvar, offset), u64 *))4363* sequences don't seem to be accepted by the DXIL validator when the4364* pointer is in the groupshared address space, making the 32-bit -> 64-bit4365* pointer cast impossible.4366*/4367size = ALIGN_POT(ctx->shader->info.shared_size, sizeof(uint32_t));4368type = dxil_module_get_array_type(&ctx->mod,4369dxil_module_get_int_type(&ctx->mod, 32),4370size / sizeof(uint32_t));4371ctx->sharedvars = dxil_add_global_ptr_var(&ctx->mod, "shared", type,4372DXIL_AS_GROUPSHARED,4373ffs(sizeof(uint64_t)),4374NULL);4375}43764377if (!emit_scratch(ctx))4378return false;43794380/* UAVs */4381if (ctx->shader->info.stage == MESA_SHADER_KERNEL) {4382if (!emit_globals(ctx, opts->num_kernel_globals))4383return false;43844385ctx->consts = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);4386if (!ctx->consts)4387return false;4388if (!emit_global_consts(ctx))4389return false;4390} else {4391/* Handle read/write SSBOs as UAVs */4392nir_foreach_variable_with_modes(var, ctx->shader, nir_var_mem_ssbo) {4393if ((var->data.access & ACCESS_NON_WRITEABLE) == 0) {4394unsigned count = 1;4395if (glsl_type_is_array(var->type))4396count = glsl_get_length(var->type);4397if (!emit_uav(ctx, var->data.binding, var->data.descriptor_set,4398count, DXIL_COMP_TYPE_INVALID,4399DXIL_RESOURCE_KIND_RAW_BUFFER, var->name))4400return false;4401}4402}4403}44044405nir_foreach_variable_with_modes(var, ctx->shader, nir_var_uniform) {4406if (var->data.mode == nir_var_uniform && glsl_type_is_image(glsl_without_array(var->type))) {4407if (!emit_uav_var(ctx, var, glsl_type_get_image_count(var->type)))4408return false;4409}4410}44114412nir_function_impl *entry = nir_shader_get_entrypoint(ctx->shader);4413nir_metadata_require(entry, nir_metadata_block_index);44144415assert(entry->num_blocks > 0);4416ctx->mod.basic_block_ids = rzalloc_array(ctx->ralloc_ctx, int,4417entry->num_blocks);4418if (!ctx->mod.basic_block_ids)4419return false;44204421for (int i = 0; i < entry->num_blocks; ++i)4422ctx->mod.basic_block_ids[i] = -1;4423ctx->mod.num_basic_block_ids = entry->num_blocks;44244425ctx->defs = rzalloc_array(ctx->ralloc_ctx, struct dxil_def,4426entry->ssa_alloc);4427if (!ctx->defs)4428return false;4429ctx->num_defs = entry->ssa_alloc;44304431ctx->phis = _mesa_pointer_hash_table_create(ctx->ralloc_ctx);4432if (!ctx->phis)4433return false;44344435prepare_phi_values(ctx);44364437if (!emit_cf_list(ctx, &entry->body))4438return false;44394440hash_table_foreach(ctx->phis, entry) {4441fixup_phi(ctx, (nir_phi_instr *)entry->key,4442(struct phi_block *)entry->data);4443}44444445if (!dxil_emit_ret_void(&ctx->mod))4446return false;44474448if (ctx->shader->info.stage == MESA_SHADER_FRAGMENT) {4449nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_out) {4450if (var->data.location == FRAG_RESULT_STENCIL) {4451ctx->mod.feats.stencil_ref = true;4452}4453}4454}44554456if (ctx->mod.feats.native_low_precision)4457ctx->mod.minor_version = MAX2(ctx->mod.minor_version, 2);44584459return emit_metadata(ctx) &&4460dxil_emit_module(&ctx->mod);4461}44624463static unsigned int4464get_dxil_shader_kind(struct nir_shader *s)4465{4466switch (s->info.stage) {4467case MESA_SHADER_VERTEX:4468return DXIL_VERTEX_SHADER;4469case MESA_SHADER_GEOMETRY:4470return DXIL_GEOMETRY_SHADER;4471case MESA_SHADER_FRAGMENT:4472return DXIL_PIXEL_SHADER;4473case MESA_SHADER_KERNEL:4474case MESA_SHADER_COMPUTE:4475return DXIL_COMPUTE_SHADER;4476default:4477unreachable("unknown shader stage in nir_to_dxil");4478return DXIL_COMPUTE_SHADER;4479}4480}44814482static unsigned4483lower_bit_size_callback(const nir_instr* instr, void *data)4484{4485if (instr->type != nir_instr_type_alu)4486return 0;4487const nir_alu_instr *alu = nir_instr_as_alu(instr);44884489if (nir_op_infos[alu->op].is_conversion)4490return 0;44914492unsigned num_inputs = nir_op_infos[alu->op].num_inputs;4493const struct nir_to_dxil_options *opts = (const struct nir_to_dxil_options*)data;4494unsigned min_bit_size = opts->lower_int16 ? 32 : 16;44954496unsigned ret = 0;4497for (unsigned i = 0; i < num_inputs; i++) {4498unsigned bit_size = nir_src_bit_size(alu->src[i].src);4499if (bit_size != 1 && bit_size < min_bit_size)4500ret = min_bit_size;4501}45024503return ret;4504}45054506static void4507optimize_nir(struct nir_shader *s, const struct nir_to_dxil_options *opts)4508{4509bool progress;4510do {4511progress = false;4512NIR_PASS_V(s, nir_lower_vars_to_ssa);4513NIR_PASS(progress, s, nir_lower_indirect_derefs, nir_var_function_temp, UINT32_MAX);4514NIR_PASS(progress, s, nir_lower_alu_to_scalar, NULL, NULL);4515NIR_PASS(progress, s, nir_copy_prop);4516NIR_PASS(progress, s, nir_opt_copy_prop_vars);4517NIR_PASS(progress, s, nir_lower_bit_size, lower_bit_size_callback, (void*)opts);4518NIR_PASS(progress, s, dxil_nir_lower_8bit_conv);4519if (opts->lower_int16)4520NIR_PASS(progress, s, dxil_nir_lower_16bit_conv);4521NIR_PASS(progress, s, nir_opt_remove_phis);4522NIR_PASS(progress, s, nir_opt_dce);4523NIR_PASS(progress, s, nir_opt_if, true);4524NIR_PASS(progress, s, nir_opt_dead_cf);4525NIR_PASS(progress, s, nir_opt_cse);4526NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);4527NIR_PASS(progress, s, nir_opt_algebraic);4528NIR_PASS(progress, s, dxil_nir_lower_x2b);4529if (s->options->lower_int64_options)4530NIR_PASS(progress, s, nir_lower_int64);4531NIR_PASS(progress, s, nir_lower_alu);4532NIR_PASS(progress, s, dxil_nir_lower_inot);4533NIR_PASS(progress, s, nir_opt_constant_folding);4534NIR_PASS(progress, s, nir_opt_undef);4535NIR_PASS(progress, s, nir_lower_undef_to_zero);4536NIR_PASS(progress, s, nir_opt_deref);4537NIR_PASS(progress, s, dxil_nir_lower_upcast_phis, opts->lower_int16 ? 32 : 16);4538NIR_PASS(progress, s, nir_lower_64bit_phis);4539NIR_PASS_V(s, nir_lower_system_values);4540} while (progress);45414542do {4543progress = false;4544NIR_PASS(progress, s, nir_opt_algebraic_late);4545} while (progress);4546}45474548static4549void dxil_fill_validation_state(struct ntd_context *ctx,4550struct dxil_validation_state *state)4551{4552state->num_resources = util_dynarray_num_elements(&ctx->resources, struct dxil_resource);4553state->resources = (struct dxil_resource*)ctx->resources.data;4554state->state.psv0.max_expected_wave_lane_count = UINT_MAX;4555state->state.shader_stage = (uint8_t)ctx->mod.shader_kind;4556state->state.sig_input_elements = (uint8_t)ctx->mod.num_sig_inputs;4557state->state.sig_output_elements = (uint8_t)ctx->mod.num_sig_outputs;4558//state->state.sig_patch_const_or_prim_elements = 0;45594560switch (ctx->mod.shader_kind) {4561case DXIL_VERTEX_SHADER:4562state->state.psv0.vs.output_position_present = ctx->mod.info.has_out_position;4563break;4564case DXIL_PIXEL_SHADER:4565/* TODO: handle depth outputs */4566state->state.psv0.ps.depth_output = ctx->mod.info.has_out_depth;4567/* just guessing */4568state->state.psv0.ps.sample_frequency = 0;4569break;4570case DXIL_COMPUTE_SHADER:4571break;4572case DXIL_GEOMETRY_SHADER:4573state->state.max_vertex_count = ctx->shader->info.gs.vertices_out;4574state->state.psv0.gs.input_primitive = dxil_get_input_primitive(ctx->shader->info.gs.input_primitive);4575state->state.psv0.gs.output_toplology = dxil_get_primitive_topology(ctx->shader->info.gs.output_primitive);4576state->state.psv0.gs.output_stream_mask = ctx->shader->info.gs.active_stream_mask;4577state->state.psv0.gs.output_position_present = ctx->mod.info.has_out_position;4578break;4579default:4580assert(0 && "Shader type not (yet) supported");4581}4582}45834584static nir_variable *4585add_sysvalue(struct ntd_context *ctx,4586uint8_t value, char *name,4587int driver_location)4588{45894590nir_variable *var = rzalloc(ctx->shader, nir_variable);4591if (!var)4592return NULL;4593var->data.driver_location = driver_location;4594var->data.location = value;4595var->type = glsl_uint_type();4596var->name = name;4597var->data.mode = nir_var_system_value;4598var->data.interpolation = INTERP_MODE_FLAT;4599return var;4600}46014602static bool4603append_input_or_sysvalue(struct ntd_context *ctx,4604int input_loc, int sv_slot,4605char *name, int driver_location)4606{4607if (input_loc >= 0) {4608/* Check inputs whether a variable is available the corresponds4609* to the sysvalue */4610nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in) {4611if (var->data.location == input_loc) {4612ctx->system_value[sv_slot] = var;4613return true;4614}4615}4616}46174618ctx->system_value[sv_slot] = add_sysvalue(ctx, sv_slot, name, driver_location);4619if (!ctx->system_value[sv_slot])4620return false;46214622nir_shader_add_variable(ctx->shader, ctx->system_value[sv_slot]);4623return true;4624}46254626struct sysvalue_name {4627gl_system_value value;4628int slot;4629char *name;4630} possible_sysvalues[] = {4631{SYSTEM_VALUE_VERTEX_ID_ZERO_BASE, -1, "SV_VertexID"},4632{SYSTEM_VALUE_INSTANCE_ID, -1, "SV_InstanceID"},4633{SYSTEM_VALUE_FRONT_FACE, VARYING_SLOT_FACE, "SV_IsFrontFace"},4634{SYSTEM_VALUE_PRIMITIVE_ID, VARYING_SLOT_PRIMITIVE_ID, "SV_PrimitiveID"},4635};46364637static bool4638allocate_sysvalues(struct ntd_context *ctx)4639{4640unsigned driver_location = 0;4641nir_foreach_variable_with_modes(var, ctx->shader, nir_var_shader_in)4642driver_location++;4643nir_foreach_variable_with_modes(var, ctx->shader, nir_var_system_value)4644driver_location++;46454646for (unsigned i = 0; i < ARRAY_SIZE(possible_sysvalues); ++i) {4647struct sysvalue_name *info = &possible_sysvalues[i];4648if (BITSET_TEST(ctx->shader->info.system_values_read, info->value)) {4649if (!append_input_or_sysvalue(ctx, info->slot,4650info->value, info->name,4651driver_location++))4652return false;4653}4654}4655return true;4656}46574658bool4659nir_to_dxil(struct nir_shader *s, const struct nir_to_dxil_options *opts,4660struct blob *blob)4661{4662assert(opts);4663bool retval = true;4664debug_dxil = (int)debug_get_option_debug_dxil();4665blob_init(blob);46664667struct ntd_context *ctx = calloc(1, sizeof(*ctx));4668if (!ctx)4669return false;46704671ctx->opts = opts;4672ctx->shader = s;46734674ctx->ralloc_ctx = ralloc_context(NULL);4675if (!ctx->ralloc_ctx) {4676retval = false;4677goto out;4678}46794680util_dynarray_init(&ctx->srv_metadata_nodes, ctx->ralloc_ctx);4681util_dynarray_init(&ctx->uav_metadata_nodes, ctx->ralloc_ctx);4682util_dynarray_init(&ctx->cbv_metadata_nodes, ctx->ralloc_ctx);4683util_dynarray_init(&ctx->sampler_metadata_nodes, ctx->ralloc_ctx);4684util_dynarray_init(&ctx->resources, ctx->ralloc_ctx);4685dxil_module_init(&ctx->mod, ctx->ralloc_ctx);4686ctx->mod.shader_kind = get_dxil_shader_kind(s);4687ctx->mod.major_version = 6;4688ctx->mod.minor_version = 1;46894690NIR_PASS_V(s, nir_lower_pack);4691NIR_PASS_V(s, nir_lower_frexp);4692NIR_PASS_V(s, nir_lower_flrp, 16 | 32 | 64, true);46934694optimize_nir(s, opts);46954696NIR_PASS_V(s, nir_remove_dead_variables,4697nir_var_function_temp | nir_var_shader_temp, NULL);46984699if (!allocate_sysvalues(ctx))4700return false;47014702if (debug_dxil & DXIL_DEBUG_VERBOSE)4703nir_print_shader(s, stderr);47044705if (!emit_module(ctx, opts)) {4706debug_printf("D3D12: dxil_container_add_module failed\n");4707retval = false;4708goto out;4709}47104711if (debug_dxil & DXIL_DEBUG_DUMP_MODULE) {4712struct dxil_dumper *dumper = dxil_dump_create();4713dxil_dump_module(dumper, &ctx->mod);4714fprintf(stderr, "\n");4715dxil_dump_buf_to_file(dumper, stderr);4716fprintf(stderr, "\n\n");4717dxil_dump_free(dumper);4718}47194720struct dxil_container container;4721dxil_container_init(&container);4722if (!dxil_container_add_features(&container, &ctx->mod.feats)) {4723debug_printf("D3D12: dxil_container_add_features failed\n");4724retval = false;4725goto out;4726}47274728if (!dxil_container_add_io_signature(&container,4729DXIL_ISG1,4730ctx->mod.num_sig_inputs,4731ctx->mod.inputs)) {4732debug_printf("D3D12: failed to write input signature\n");4733retval = false;4734goto out;4735}47364737if (!dxil_container_add_io_signature(&container,4738DXIL_OSG1,4739ctx->mod.num_sig_outputs,4740ctx->mod.outputs)) {4741debug_printf("D3D12: failed to write output signature\n");4742retval = false;4743goto out;4744}47454746struct dxil_validation_state validation_state;4747memset(&validation_state, 0, sizeof(validation_state));4748dxil_fill_validation_state(ctx, &validation_state);47494750if (!dxil_container_add_state_validation(&container,&ctx->mod,4751&validation_state)) {4752debug_printf("D3D12: failed to write state-validation\n");4753retval = false;4754goto out;4755}47564757if (!dxil_container_add_module(&container, &ctx->mod)) {4758debug_printf("D3D12: failed to write module\n");4759retval = false;4760goto out;4761}47624763if (!dxil_container_write(&container, blob)) {4764debug_printf("D3D12: dxil_container_write failed\n");4765retval = false;4766goto out;4767}4768dxil_container_finish(&container);47694770if (debug_dxil & DXIL_DEBUG_DUMP_BLOB) {4771static int shader_id = 0;4772char buffer[64];4773snprintf(buffer, sizeof(buffer), "shader_%s_%d.blob",4774get_shader_kind_str(ctx->mod.shader_kind), shader_id++);4775debug_printf("Try to write blob to %s\n", buffer);4776FILE *f = fopen(buffer, "wb");4777if (f) {4778fwrite(blob->data, 1, blob->size, f);4779fclose(f);4780}4781}47824783out:4784dxil_module_release(&ctx->mod);4785ralloc_free(ctx->ralloc_ctx);4786free(ctx);4787return retval;4788}47894790enum dxil_sysvalue_type4791nir_var_to_dxil_sysvalue_type(nir_variable *var, uint64_t other_stage_mask)4792{4793switch (var->data.location) {4794case VARYING_SLOT_FACE:4795return DXIL_GENERATED_SYSVALUE;4796case VARYING_SLOT_POS:4797case VARYING_SLOT_PRIMITIVE_ID:4798case VARYING_SLOT_CLIP_DIST0:4799case VARYING_SLOT_CLIP_DIST1:4800case VARYING_SLOT_PSIZ:4801if (!((1ull << var->data.location) & other_stage_mask))4802return DXIL_SYSVALUE;4803FALLTHROUGH;4804default:4805return DXIL_NO_SYSVALUE;4806}4807}480848094810