Path: blob/21.2-virgl/src/microsoft/clc/clc_compiler.c
4560 views
/*1* Copyright © Microsoft Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "nir.h"24#include "nir_serialize.h"25#include "glsl_types.h"26#include "nir_types.h"27#include "clc_compiler.h"28#include "clc_helpers.h"29#include "clc_nir.h"30#include "../compiler/dxil_nir.h"31#include "../compiler/dxil_nir_lower_int_samplers.h"32#include "../compiler/nir_to_dxil.h"3334#include "util/u_debug.h"35#include <util/u_math.h>36#include "spirv/nir_spirv.h"37#include "nir_builder.h"38#include "nir_builtin_builder.h"3940#include "git_sha1.h"4142enum clc_debug_flags {43CLC_DEBUG_DUMP_SPIRV = 1 << 0,44CLC_DEBUG_VERBOSE = 1 << 1,45};4647static const struct debug_named_value clc_debug_options[] = {48{ "dump_spirv", CLC_DEBUG_DUMP_SPIRV, "Dump spirv blobs" },49{ "verbose", CLC_DEBUG_VERBOSE, NULL },50DEBUG_NAMED_VALUE_END51};5253DEBUG_GET_ONCE_FLAGS_OPTION(debug_clc, "CLC_DEBUG", clc_debug_options, 0)5455static void56clc_print_kernels_info(const struct clc_object *obj)57{58fprintf(stdout, "Kernels:\n");59for (unsigned i = 0; i < obj->num_kernels; i++) {60const struct clc_kernel_arg *args = obj->kernels[i].args;61bool first = true;6263fprintf(stdout, "\tvoid %s(", obj->kernels[i].name);64for (unsigned j = 0; j < obj->kernels[i].num_args; j++) {65if (!first)66fprintf(stdout, ", ");67else68first = false;6970switch (args[j].address_qualifier) {71case CLC_KERNEL_ARG_ADDRESS_GLOBAL:72fprintf(stdout, "__global ");73break;74case CLC_KERNEL_ARG_ADDRESS_LOCAL:75fprintf(stdout, "__local ");76break;77case CLC_KERNEL_ARG_ADDRESS_CONSTANT:78fprintf(stdout, "__constant ");79break;80default:81break;82}8384if (args[j].type_qualifier & CLC_KERNEL_ARG_TYPE_VOLATILE)85fprintf(stdout, "volatile ");86if (args[j].type_qualifier & CLC_KERNEL_ARG_TYPE_CONST)87fprintf(stdout, "const ");88if (args[j].type_qualifier & CLC_KERNEL_ARG_TYPE_RESTRICT)89fprintf(stdout, "restrict ");9091fprintf(stdout, "%s %s", args[j].type_name, args[j].name);92}93fprintf(stdout, ");\n");94}95}9697struct clc_image_lower_context98{99struct clc_dxil_metadata *metadata;100unsigned *num_srvs;101unsigned *num_uavs;102nir_deref_instr *deref;103unsigned num_buf_ids;104int metadata_index;105};106107static int108lower_image_deref_impl(nir_builder *b, struct clc_image_lower_context *context,109const struct glsl_type *new_var_type,110unsigned *num_bindings)111{112nir_variable *in_var = nir_deref_instr_get_variable(context->deref);113nir_variable *uniform = nir_variable_create(b->shader, nir_var_uniform, new_var_type, NULL);114uniform->data.access = in_var->data.access;115uniform->data.binding = in_var->data.binding;116if (context->num_buf_ids > 0) {117// Need to assign a new binding118context->metadata->args[context->metadata_index].119image.buf_ids[context->num_buf_ids] = uniform->data.binding = (*num_bindings)++;120}121context->num_buf_ids++;122return uniform->data.binding;123}124125static int126lower_read_only_image_deref(nir_builder *b, struct clc_image_lower_context *context,127nir_alu_type image_type)128{129nir_variable *in_var = nir_deref_instr_get_variable(context->deref);130131// Non-writeable images should be converted to samplers,132// since they may have texture operations done on them133const struct glsl_type *new_var_type =134glsl_sampler_type(glsl_get_sampler_dim(in_var->type),135false, glsl_sampler_type_is_array(in_var->type),136nir_get_glsl_base_type_for_nir_type(image_type | 32));137return lower_image_deref_impl(b, context, new_var_type, context->num_srvs);138}139140static int141lower_read_write_image_deref(nir_builder *b, struct clc_image_lower_context *context,142nir_alu_type image_type)143{144nir_variable *in_var = nir_deref_instr_get_variable(context->deref);145const struct glsl_type *new_var_type =146glsl_image_type(glsl_get_sampler_dim(in_var->type),147glsl_sampler_type_is_array(in_var->type),148nir_get_glsl_base_type_for_nir_type(image_type | 32));149return lower_image_deref_impl(b, context, new_var_type, context->num_uavs);150}151152static void153clc_lower_input_image_deref(nir_builder *b, struct clc_image_lower_context *context)154{155// The input variable here isn't actually an image, it's just the156// image format data.157//158// For every use of an image in a different way, we'll add an159// appropriate uniform to match it. That can result in up to160// 3 uniforms (float4, int4, uint4) for each image. Only one of these161// formats will actually produce correct data, but a single kernel162// could use runtime conditionals to potentially access any of them.163//164// If the image is used in a query that doesn't have a corresponding165// DXIL intrinsic (CL image channel order or channel format), then166// we'll add a kernel input for that data that'll be lowered by the167// explicit IO pass later on.168//169// After all that, we can remove the image input variable and deref.170171enum image_uniform_type {172FLOAT4,173INT4,174UINT4,175IMAGE_UNIFORM_TYPE_COUNT176};177178int image_bindings[IMAGE_UNIFORM_TYPE_COUNT] = {-1, -1, -1};179nir_ssa_def *format_deref_dest = NULL, *order_deref_dest = NULL;180181nir_variable *in_var = nir_deref_instr_get_variable(context->deref);182enum gl_access_qualifier access = in_var->data.access;183184context->metadata_index = 0;185while (context->metadata->args[context->metadata_index].image.buf_ids[0] != in_var->data.binding)186context->metadata_index++;187188context->num_buf_ids = 0;189190/* Do this in 2 passes:191* 1. When encountering a strongly-typed access (load/store), replace the deref192* with one that references an appropriately typed variable. When encountering193* an untyped access (size query), if we have a strongly-typed variable already,194* replace the deref to point to it.195* 2. If there's any references left, they should all be untyped. If we found196* a strongly-typed access later in the 1st pass, then just replace the reference.197* If we didn't, e.g. the resource is only used for a size query, then pick an198* arbitrary type for it.199*/200for (int pass = 0; pass < 2; ++pass) {201nir_foreach_use_safe(src, &context->deref->dest.ssa) {202enum image_uniform_type type;203204if (src->parent_instr->type == nir_instr_type_intrinsic) {205nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(src->parent_instr);206enum nir_alu_type dest_type;207208b->cursor = nir_before_instr(&intrinsic->instr);209210switch (intrinsic->intrinsic) {211case nir_intrinsic_image_deref_load:212case nir_intrinsic_image_deref_store: {213dest_type = intrinsic->intrinsic == nir_intrinsic_image_deref_load ?214nir_intrinsic_dest_type(intrinsic) : nir_intrinsic_src_type(intrinsic);215216switch (nir_alu_type_get_base_type(dest_type)) {217case nir_type_float: type = FLOAT4; break;218case nir_type_int: type = INT4; break;219case nir_type_uint: type = UINT4; break;220default: unreachable("Unsupported image type for load.");221}222223int image_binding = image_bindings[type];224if (image_binding < 0) {225image_binding = image_bindings[type] =226lower_read_write_image_deref(b, context, dest_type);227}228229assert((in_var->data.access & ACCESS_NON_WRITEABLE) == 0);230nir_rewrite_image_intrinsic(intrinsic, nir_imm_int(b, image_binding), false);231break;232}233234case nir_intrinsic_image_deref_size: {235int image_binding = -1;236for (unsigned i = 0; i < IMAGE_UNIFORM_TYPE_COUNT; ++i) {237if (image_bindings[i] >= 0) {238image_binding = image_bindings[i];239break;240}241}242if (image_binding < 0) {243// Skip for now and come back to it244if (pass == 0)245break;246247type = FLOAT4;248image_binding = image_bindings[type] =249lower_read_write_image_deref(b, context, nir_type_float32);250}251252assert((in_var->data.access & ACCESS_NON_WRITEABLE) == 0);253nir_rewrite_image_intrinsic(intrinsic, nir_imm_int(b, image_binding), false);254break;255}256257case nir_intrinsic_image_deref_format:258case nir_intrinsic_image_deref_order: {259nir_ssa_def **cached_deref = intrinsic->intrinsic == nir_intrinsic_image_deref_format ?260&format_deref_dest : &order_deref_dest;261if (!*cached_deref) {262nir_variable *new_input = nir_variable_create(b->shader, nir_var_uniform, glsl_uint_type(), NULL);263new_input->data.driver_location = in_var->data.driver_location;264if (intrinsic->intrinsic == nir_intrinsic_image_deref_format) {265/* Match cl_image_format { image_channel_order, image_channel_data_type }; */266new_input->data.driver_location += glsl_get_cl_size(new_input->type);267}268269b->cursor = nir_after_instr(&context->deref->instr);270*cached_deref = nir_load_var(b, new_input);271}272273/* No actual intrinsic needed here, just reference the loaded variable */274nir_ssa_def_rewrite_uses(&intrinsic->dest.ssa, *cached_deref);275nir_instr_remove(&intrinsic->instr);276break;277}278279default:280unreachable("Unsupported image intrinsic");281}282} else if (src->parent_instr->type == nir_instr_type_tex) {283assert(in_var->data.access & ACCESS_NON_WRITEABLE);284nir_tex_instr *tex = nir_instr_as_tex(src->parent_instr);285286switch (nir_alu_type_get_base_type(tex->dest_type)) {287case nir_type_float: type = FLOAT4; break;288case nir_type_int: type = INT4; break;289case nir_type_uint: type = UINT4; break;290default: unreachable("Unsupported image format for sample.");291}292293int image_binding = image_bindings[type];294if (image_binding < 0) {295image_binding = image_bindings[type] =296lower_read_only_image_deref(b, context, tex->dest_type);297}298299nir_tex_instr_remove_src(tex, nir_tex_instr_src_index(tex, nir_tex_src_texture_deref));300tex->texture_index = image_binding;301}302}303}304305context->metadata->args[context->metadata_index].image.num_buf_ids = context->num_buf_ids;306307nir_instr_remove(&context->deref->instr);308exec_node_remove(&in_var->node);309}310311static void312clc_lower_images(nir_shader *nir, struct clc_image_lower_context *context)313{314nir_foreach_function(func, nir) {315if (!func->is_entrypoint)316continue;317assert(func->impl);318319nir_builder b;320nir_builder_init(&b, func->impl);321322nir_foreach_block(block, func->impl) {323nir_foreach_instr_safe(instr, block) {324if (instr->type == nir_instr_type_deref) {325context->deref = nir_instr_as_deref(instr);326327if (glsl_type_is_image(context->deref->type)) {328assert(context->deref->deref_type == nir_deref_type_var);329clc_lower_input_image_deref(&b, context);330}331}332}333}334}335}336337static void338clc_lower_64bit_semantics(nir_shader *nir)339{340nir_foreach_function(func, nir) {341nir_builder b;342nir_builder_init(&b, func->impl);343344nir_foreach_block(block, func->impl) {345nir_foreach_instr_safe(instr, block) {346if (instr->type == nir_instr_type_intrinsic) {347nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr);348switch (intrinsic->intrinsic) {349case nir_intrinsic_load_global_invocation_id:350case nir_intrinsic_load_global_invocation_id_zero_base:351case nir_intrinsic_load_base_global_invocation_id:352case nir_intrinsic_load_local_invocation_id:353case nir_intrinsic_load_workgroup_id:354case nir_intrinsic_load_workgroup_id_zero_base:355case nir_intrinsic_load_base_workgroup_id:356case nir_intrinsic_load_num_workgroups:357break;358default:359continue;360}361362if (nir_instr_ssa_def(instr)->bit_size != 64)363continue;364365intrinsic->dest.ssa.bit_size = 32;366b.cursor = nir_after_instr(instr);367368nir_ssa_def *i64 = nir_u2u64(&b, &intrinsic->dest.ssa);369nir_ssa_def_rewrite_uses_after(370&intrinsic->dest.ssa,371i64,372i64->parent_instr);373}374}375}376}377}378379static void380clc_lower_nonnormalized_samplers(nir_shader *nir,381const dxil_wrap_sampler_state *states)382{383nir_foreach_function(func, nir) {384if (!func->is_entrypoint)385continue;386assert(func->impl);387388nir_builder b;389nir_builder_init(&b, func->impl);390391nir_foreach_block(block, func->impl) {392nir_foreach_instr_safe(instr, block) {393if (instr->type != nir_instr_type_tex)394continue;395nir_tex_instr *tex = nir_instr_as_tex(instr);396397int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);398if (sampler_src_idx == -1)399continue;400401nir_src *sampler_src = &tex->src[sampler_src_idx].src;402assert(sampler_src->is_ssa && sampler_src->ssa->parent_instr->type == nir_instr_type_deref);403nir_variable *sampler = nir_deref_instr_get_variable(404nir_instr_as_deref(sampler_src->ssa->parent_instr));405406// If the sampler returns ints, we'll handle this in the int lowering pass407if (nir_alu_type_get_base_type(tex->dest_type) != nir_type_float)408continue;409410// If sampler uses normalized coords, nothing to do411if (!states[sampler->data.binding].is_nonnormalized_coords)412continue;413414b.cursor = nir_before_instr(&tex->instr);415416int coords_idx = nir_tex_instr_src_index(tex, nir_tex_src_coord);417assert(coords_idx != -1);418nir_ssa_def *coords =419nir_ssa_for_src(&b, tex->src[coords_idx].src, tex->coord_components);420421nir_ssa_def *txs = nir_i2f32(&b, nir_get_texture_size(&b, tex));422423// Normalize coords for tex424nir_ssa_def *scale = nir_frcp(&b, txs);425nir_ssa_def *comps[4];426for (unsigned i = 0; i < coords->num_components; ++i) {427comps[i] = nir_channel(&b, coords, i);428if (tex->is_array && i == coords->num_components - 1) {429// Don't scale the array index, but do clamp it430comps[i] = nir_fround_even(&b, comps[i]);431comps[i] = nir_fmax(&b, comps[i], nir_imm_float(&b, 0.0f));432comps[i] = nir_fmin(&b, comps[i], nir_fsub(&b, nir_channel(&b, txs, i), nir_imm_float(&b, 1.0f)));433break;434}435436// The CTS is pretty clear that this value has to be floored for nearest sampling437// but must not be for linear sampling.438if (!states[sampler->data.binding].is_linear_filtering)439comps[i] = nir_fadd_imm(&b, nir_ffloor(&b, comps[i]), 0.5f);440comps[i] = nir_fmul(&b, comps[i], nir_channel(&b, scale, i));441}442nir_ssa_def *normalized_coords = nir_vec(&b, comps, coords->num_components);443nir_instr_rewrite_src(&tex->instr,444&tex->src[coords_idx].src,445nir_src_for_ssa(normalized_coords));446}447}448}449}450451452static void453clc_context_optimize(nir_shader *s)454{455bool progress;456do {457progress = false;458NIR_PASS(progress, s, nir_split_var_copies);459NIR_PASS(progress, s, nir_opt_copy_prop_vars);460NIR_PASS(progress, s, nir_lower_var_copies);461NIR_PASS(progress, s, nir_lower_vars_to_ssa);462NIR_PASS(progress, s, nir_copy_prop);463NIR_PASS(progress, s, nir_opt_remove_phis);464NIR_PASS(progress, s, nir_opt_dce);465NIR_PASS(progress, s, nir_opt_if, true);466NIR_PASS(progress, s, nir_opt_dead_cf);467NIR_PASS(progress, s, nir_opt_cse);468NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);469NIR_PASS(progress, s, nir_opt_algebraic);470NIR_PASS(progress, s, nir_opt_constant_folding);471NIR_PASS(progress, s, nir_opt_undef);472NIR_PASS(progress, s, nir_lower_undef_to_zero);473NIR_PASS(progress, s, nir_opt_deref);474} while (progress);475}476477struct clc_context *478clc_context_new(const struct clc_logger *logger, const struct clc_context_options *options)479{480struct clc_context *ctx = rzalloc(NULL, struct clc_context);481if (!ctx) {482clc_error(logger, "D3D12: failed to allocate a clc_context");483return NULL;484}485486const struct spirv_to_nir_options libclc_spirv_options = {487.environment = NIR_SPIRV_OPENCL,488.create_library = true,489.constant_addr_format = nir_address_format_32bit_index_offset_pack64,490.global_addr_format = nir_address_format_32bit_index_offset_pack64,491.shared_addr_format = nir_address_format_32bit_offset_as_64bit,492.temp_addr_format = nir_address_format_32bit_offset_as_64bit,493.float_controls_execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32,494.caps = {495.address = true,496.float64 = true,497.int8 = true,498.int16 = true,499.int64 = true,500.kernel = true,501},502};503const struct nir_shader_compiler_options *libclc_nir_options =504dxil_get_nir_compiler_options();505506glsl_type_singleton_init_or_ref();507nir_shader *s = nir_load_libclc_shader(64, NULL, &libclc_spirv_options, libclc_nir_options);508if (!s) {509clc_error(logger, "D3D12: spirv_to_nir failed on libclc blob");510ralloc_free(ctx);511return NULL;512}513514if (options && options->optimize)515clc_context_optimize(s);516517ralloc_steal(ctx, s);518ctx->libclc_nir = s;519520return ctx;521}522523void524clc_free_context(struct clc_context *ctx)525{526ralloc_free(ctx);527glsl_type_singleton_decref();528};529530void clc_context_serialize(struct clc_context *context,531void **serialized,532size_t *serialized_size)533{534struct blob tmp;535blob_init(&tmp);536nir_serialize(&tmp, context->libclc_nir, true);537538blob_finish_get_buffer(&tmp, serialized, serialized_size);539}540541void clc_context_free_serialized(void *serialized)542{543free(serialized);544}545546struct clc_context *547clc_context_deserialize(const void *serialized, size_t serialized_size)548{549struct clc_context *ctx = rzalloc(NULL, struct clc_context);550if (!ctx) {551return NULL;552}553const struct nir_shader_compiler_options *libclc_nir_options =554dxil_get_nir_compiler_options();555556glsl_type_singleton_init_or_ref();557558struct blob_reader tmp;559blob_reader_init(&tmp, serialized, serialized_size);560561nir_shader *s = nir_deserialize(NULL, libclc_nir_options, &tmp);562if (!s) {563ralloc_free(ctx);564return NULL;565}566567ralloc_steal(ctx, s);568ctx->libclc_nir = s;569570return ctx;571}572573struct clc_object *574clc_compile(struct clc_context *ctx,575const struct clc_compile_args *args,576const struct clc_logger *logger)577{578struct clc_object *obj;579int ret;580581obj = calloc(1, sizeof(*obj));582if (!obj) {583clc_error(logger, "D3D12: failed to allocate a clc_object");584return NULL;585}586587ret = clc_to_spirv(args, &obj->spvbin, logger);588if (ret < 0) {589free(obj);590return NULL;591}592593if (debug_get_option_debug_clc() & CLC_DEBUG_DUMP_SPIRV)594clc_dump_spirv(&obj->spvbin, stdout);595596return obj;597}598599struct clc_object *600clc_link(struct clc_context *ctx,601const struct clc_linker_args *args,602const struct clc_logger *logger)603{604struct clc_object *out_obj;605int ret;606607out_obj = malloc(sizeof(*out_obj));608if (!out_obj) {609clc_error(logger, "failed to allocate a clc_object");610return NULL;611}612613ret = clc_link_spirv_binaries(args, &out_obj->spvbin, logger);614if (ret < 0) {615free(out_obj);616return NULL;617}618619if (debug_get_option_debug_clc() & CLC_DEBUG_DUMP_SPIRV)620clc_dump_spirv(&out_obj->spvbin, stdout);621622out_obj->kernels = clc_spirv_get_kernels_info(&out_obj->spvbin,623&out_obj->num_kernels);624625if (debug_get_option_debug_clc() & CLC_DEBUG_VERBOSE)626clc_print_kernels_info(out_obj);627628return out_obj;629}630631void clc_free_object(struct clc_object *obj)632{633clc_free_kernels_info(obj->kernels, obj->num_kernels);634clc_free_spirv_binary(&obj->spvbin);635free(obj);636}637638static nir_variable *639add_kernel_inputs_var(struct clc_dxil_object *dxil, nir_shader *nir,640unsigned *cbv_id)641{642if (!dxil->kernel->num_args)643return NULL;644645struct clc_dxil_metadata *metadata = &dxil->metadata;646unsigned size = 0;647648nir_foreach_variable_with_modes(var, nir, nir_var_uniform)649size = MAX2(size,650var->data.driver_location +651glsl_get_cl_size(var->type));652653size = align(size, 4);654655const struct glsl_type *array_type = glsl_array_type(glsl_uint_type(), size / 4, 4);656const struct glsl_struct_field field = { array_type, "arr" };657nir_variable *var =658nir_variable_create(nir, nir_var_mem_ubo,659glsl_struct_type(&field, 1, "kernel_inputs", false),660"kernel_inputs");661var->data.binding = (*cbv_id)++;662var->data.how_declared = nir_var_hidden;663return var;664}665666static nir_variable *667add_work_properties_var(struct clc_dxil_object *dxil,668struct nir_shader *nir, unsigned *cbv_id)669{670struct clc_dxil_metadata *metadata = &dxil->metadata;671const struct glsl_type *array_type =672glsl_array_type(glsl_uint_type(),673sizeof(struct clc_work_properties_data) / sizeof(unsigned),674sizeof(unsigned));675const struct glsl_struct_field field = { array_type, "arr" };676nir_variable *var =677nir_variable_create(nir, nir_var_mem_ubo,678glsl_struct_type(&field, 1, "kernel_work_properties", false),679"kernel_work_properies");680var->data.binding = (*cbv_id)++;681var->data.how_declared = nir_var_hidden;682return var;683}684685static void686clc_lower_constant_to_ssbo(nir_shader *nir,687const struct clc_kernel_info *kerninfo, unsigned *uav_id)688{689/* Update UBO vars and assign them a binding. */690nir_foreach_variable_with_modes(var, nir, nir_var_mem_constant) {691var->data.mode = nir_var_mem_ssbo;692var->data.binding = (*uav_id)++;693}694695/* And finally patch all the derefs referincing the constant696* variables/pointers.697*/698nir_foreach_function(func, nir) {699if (!func->is_entrypoint)700continue;701702assert(func->impl);703704nir_builder b;705nir_builder_init(&b, func->impl);706707nir_foreach_block(block, func->impl) {708nir_foreach_instr(instr, block) {709if (instr->type != nir_instr_type_deref)710continue;711712nir_deref_instr *deref = nir_instr_as_deref(instr);713714if (deref->modes != nir_var_mem_constant)715continue;716717deref->modes = nir_var_mem_ssbo;718}719}720}721}722723static void724clc_lower_global_to_ssbo(nir_shader *nir)725{726nir_foreach_function(func, nir) {727if (!func->is_entrypoint)728continue;729730assert(func->impl);731732nir_foreach_block(block, func->impl) {733nir_foreach_instr(instr, block) {734if (instr->type != nir_instr_type_deref)735continue;736737nir_deref_instr *deref = nir_instr_as_deref(instr);738739if (deref->modes != nir_var_mem_global)740continue;741742deref->modes = nir_var_mem_ssbo;743}744}745}746}747748static void749copy_const_initializer(const nir_constant *constant, const struct glsl_type *type,750uint8_t *data)751{752unsigned size = glsl_get_cl_size(type);753754if (glsl_type_is_array(type)) {755const struct glsl_type *elm_type = glsl_get_array_element(type);756unsigned step_size = glsl_get_explicit_stride(type);757758for (unsigned i = 0; i < constant->num_elements; i++) {759copy_const_initializer(constant->elements[i], elm_type,760data + (i * step_size));761}762} else if (glsl_type_is_struct(type)) {763for (unsigned i = 0; i < constant->num_elements; i++) {764const struct glsl_type *elm_type = glsl_get_struct_field(type, i);765int offset = glsl_get_struct_field_offset(type, i);766copy_const_initializer(constant->elements[i], elm_type, data + offset);767}768} else {769assert(glsl_type_is_vector_or_scalar(type));770771for (unsigned i = 0; i < glsl_get_components(type); i++) {772switch (glsl_get_bit_size(type)) {773case 64:774*((uint64_t *)data) = constant->values[i].u64;775break;776case 32:777*((uint32_t *)data) = constant->values[i].u32;778break;779case 16:780*((uint16_t *)data) = constant->values[i].u16;781break;782case 8:783*((uint8_t *)data) = constant->values[i].u8;784break;785default:786unreachable("Invalid base type");787}788789data += glsl_get_bit_size(type) / 8;790}791}792}793794static const struct glsl_type *795get_cast_type(unsigned bit_size)796{797switch (bit_size) {798case 64:799return glsl_int64_t_type();800case 32:801return glsl_int_type();802case 16:803return glsl_int16_t_type();804case 8:805return glsl_int8_t_type();806}807unreachable("Invalid bit_size");808}809810static void811split_unaligned_load(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)812{813enum gl_access_qualifier access = nir_intrinsic_access(intrin);814nir_ssa_def *srcs[NIR_MAX_VEC_COMPONENTS * NIR_MAX_VEC_COMPONENTS * sizeof(int64_t) / 8];815unsigned comp_size = intrin->dest.ssa.bit_size / 8;816unsigned num_comps = intrin->dest.ssa.num_components;817818b->cursor = nir_before_instr(&intrin->instr);819820nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);821822const struct glsl_type *cast_type = get_cast_type(alignment * 8);823nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);824825unsigned num_loads = DIV_ROUND_UP(comp_size * num_comps, alignment);826for (unsigned i = 0; i < num_loads; ++i) {827nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));828srcs[i] = nir_load_deref_with_access(b, elem, access);829}830831nir_ssa_def *new_dest = nir_extract_bits(b, srcs, num_loads, 0, num_comps, intrin->dest.ssa.bit_size);832nir_ssa_def_rewrite_uses(&intrin->dest.ssa, new_dest);833nir_instr_remove(&intrin->instr);834}835836static void837split_unaligned_store(nir_builder *b, nir_intrinsic_instr *intrin, unsigned alignment)838{839enum gl_access_qualifier access = nir_intrinsic_access(intrin);840841assert(intrin->src[1].is_ssa);842nir_ssa_def *value = intrin->src[1].ssa;843unsigned comp_size = value->bit_size / 8;844unsigned num_comps = value->num_components;845846b->cursor = nir_before_instr(&intrin->instr);847848nir_deref_instr *ptr = nir_src_as_deref(intrin->src[0]);849850const struct glsl_type *cast_type = get_cast_type(alignment * 8);851nir_deref_instr *cast = nir_build_deref_cast(b, &ptr->dest.ssa, ptr->modes, cast_type, alignment);852853unsigned num_stores = DIV_ROUND_UP(comp_size * num_comps, alignment);854for (unsigned i = 0; i < num_stores; ++i) {855nir_ssa_def *substore_val = nir_extract_bits(b, &value, 1, i * alignment * 8, 1, alignment * 8);856nir_deref_instr *elem = nir_build_deref_ptr_as_array(b, cast, nir_imm_intN_t(b, i, cast->dest.ssa.bit_size));857nir_store_deref_with_access(b, elem, substore_val, ~0, access);858}859860nir_instr_remove(&intrin->instr);861}862863static bool864split_unaligned_loads_stores(nir_shader *shader)865{866bool progress = false;867868nir_foreach_function(function, shader) {869if (!function->impl)870continue;871872nir_builder b;873nir_builder_init(&b, function->impl);874875nir_foreach_block(block, function->impl) {876nir_foreach_instr_safe(instr, block) {877if (instr->type != nir_instr_type_intrinsic)878continue;879nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);880if (intrin->intrinsic != nir_intrinsic_load_deref &&881intrin->intrinsic != nir_intrinsic_store_deref)882continue;883nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);884885unsigned align_mul = 0, align_offset = 0;886nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset);887888unsigned alignment = align_offset ? 1 << (ffs(align_offset) - 1) : align_mul;889890/* We can load anything at 4-byte alignment, except for891* UBOs (AKA CBs where the granularity is 16 bytes).892*/893if (alignment >= (deref->modes == nir_var_mem_ubo ? 16 : 4))894continue;895896nir_ssa_def *val;897if (intrin->intrinsic == nir_intrinsic_load_deref) {898assert(intrin->dest.is_ssa);899val = &intrin->dest.ssa;900} else {901assert(intrin->src[1].is_ssa);902val = intrin->src[1].ssa;903}904905unsigned natural_alignment =906val->bit_size / 8 *907(val->num_components == 3 ? 4 : val->num_components);908909if (alignment >= natural_alignment)910continue;911912if (intrin->intrinsic == nir_intrinsic_load_deref)913split_unaligned_load(&b, intrin, alignment);914else915split_unaligned_store(&b, intrin, alignment);916progress = true;917}918}919}920921return progress;922}923924static enum pipe_tex_wrap925wrap_from_cl_addressing(unsigned addressing_mode)926{927switch (addressing_mode)928{929default:930case SAMPLER_ADDRESSING_MODE_NONE:931case SAMPLER_ADDRESSING_MODE_CLAMP:932// Since OpenCL's only border color is 0's and D3D specs out-of-bounds loads to return 0, don't apply any wrap mode933return (enum pipe_tex_wrap)-1;934case SAMPLER_ADDRESSING_MODE_CLAMP_TO_EDGE: return PIPE_TEX_WRAP_CLAMP_TO_EDGE;935case SAMPLER_ADDRESSING_MODE_REPEAT: return PIPE_TEX_WRAP_REPEAT;936case SAMPLER_ADDRESSING_MODE_REPEAT_MIRRORED: return PIPE_TEX_WRAP_MIRROR_REPEAT;937}938}939940static bool shader_has_double(nir_shader *nir)941{942bool progress = false;943944foreach_list_typed(nir_function, func, node, &nir->functions) {945if (!func->is_entrypoint)946continue;947948assert(func->impl);949950nir_foreach_block(block, func->impl) {951nir_foreach_instr_safe(instr, block) {952if (instr->type != nir_instr_type_alu)953continue;954955nir_alu_instr *alu = nir_instr_as_alu(instr);956const nir_op_info *info = &nir_op_infos[alu->op];957958if (info->output_type & nir_type_float &&959nir_dest_bit_size(alu->dest.dest) == 64)960return true;961}962}963}964965return false;966}967968static bool969scale_fdiv(nir_shader *nir)970{971bool progress = false;972nir_foreach_function(func, nir) {973if (!func->impl)974continue;975nir_builder b;976nir_builder_init(&b, func->impl);977nir_foreach_block(block, func->impl) {978nir_foreach_instr(instr, block) {979if (instr->type != nir_instr_type_alu)980continue;981nir_alu_instr *alu = nir_instr_as_alu(instr);982if (alu->op != nir_op_fdiv || alu->src[0].src.ssa->bit_size != 32)983continue;984985b.cursor = nir_before_instr(instr);986nir_ssa_def *fabs = nir_fabs(&b, alu->src[1].src.ssa);987nir_ssa_def *big = nir_flt(&b, nir_imm_int(&b, 0x7e800000), fabs);988nir_ssa_def *small = nir_flt(&b, fabs, nir_imm_int(&b, 0x00800000));989990nir_ssa_def *scaled_down_a = nir_fmul_imm(&b, alu->src[0].src.ssa, 0.25);991nir_ssa_def *scaled_down_b = nir_fmul_imm(&b, alu->src[1].src.ssa, 0.25);992nir_ssa_def *scaled_up_a = nir_fmul_imm(&b, alu->src[0].src.ssa, 16777216.0);993nir_ssa_def *scaled_up_b = nir_fmul_imm(&b, alu->src[1].src.ssa, 16777216.0);994995nir_ssa_def *final_a =996nir_bcsel(&b, big, scaled_down_a,997(nir_bcsel(&b, small, scaled_up_a, alu->src[0].src.ssa)));998nir_ssa_def *final_b =999nir_bcsel(&b, big, scaled_down_b,1000(nir_bcsel(&b, small, scaled_up_b, alu->src[1].src.ssa)));10011002nir_instr_rewrite_src(instr, &alu->src[0].src, nir_src_for_ssa(final_a));1003nir_instr_rewrite_src(instr, &alu->src[1].src, nir_src_for_ssa(final_b));1004progress = true;1005}1006}1007}1008return progress;1009}10101011struct clc_dxil_object *1012clc_to_dxil(struct clc_context *ctx,1013const struct clc_object *obj,1014const char *entrypoint,1015const struct clc_runtime_kernel_conf *conf,1016const struct clc_logger *logger)1017{1018struct clc_dxil_object *dxil;1019struct nir_shader *nir;10201021dxil = calloc(1, sizeof(*dxil));1022if (!dxil) {1023clc_error(logger, "failed to allocate the dxil object");1024return NULL;1025}10261027for (unsigned i = 0; i < obj->num_kernels; i++) {1028if (!strcmp(obj->kernels[i].name, entrypoint)) {1029dxil->kernel = &obj->kernels[i];1030break;1031}1032}10331034if (!dxil->kernel) {1035clc_error(logger, "no '%s' kernel found", entrypoint);1036goto err_free_dxil;1037}10381039const struct spirv_to_nir_options spirv_options = {1040.environment = NIR_SPIRV_OPENCL,1041.clc_shader = ctx->libclc_nir,1042.constant_addr_format = nir_address_format_32bit_index_offset_pack64,1043.global_addr_format = nir_address_format_32bit_index_offset_pack64,1044.shared_addr_format = nir_address_format_32bit_offset_as_64bit,1045.temp_addr_format = nir_address_format_32bit_offset_as_64bit,1046.float_controls_execution_mode = FLOAT_CONTROLS_DENORM_FLUSH_TO_ZERO_FP32,1047.caps = {1048.address = true,1049.float64 = true,1050.int8 = true,1051.int16 = true,1052.int64 = true,1053.kernel = true,1054.kernel_image = true,1055.literal_sampler = true,1056.printf = true,1057},1058};1059nir_shader_compiler_options nir_options =1060*dxil_get_nir_compiler_options();10611062if (conf && conf->lower_bit_size & 64) {1063nir_options.lower_pack_64_2x32_split = false;1064nir_options.lower_unpack_64_2x32_split = false;1065nir_options.lower_int64_options = ~0;1066}10671068if (conf && conf->lower_bit_size & 16)1069nir_options.support_16bit_alu = true;10701071glsl_type_singleton_init_or_ref();10721073nir = spirv_to_nir(obj->spvbin.data, obj->spvbin.size / 4,1074NULL, 0,1075MESA_SHADER_KERNEL, entrypoint,1076&spirv_options,1077&nir_options);1078if (!nir) {1079clc_error(logger, "spirv_to_nir() failed");1080goto err_free_dxil;1081}1082nir->info.workgroup_size_variable = true;10831084NIR_PASS_V(nir, nir_lower_goto_ifs);1085NIR_PASS_V(nir, nir_opt_dead_cf);10861087struct clc_dxil_metadata *metadata = &dxil->metadata;10881089metadata->args = calloc(dxil->kernel->num_args,1090sizeof(*metadata->args));1091if (!metadata->args) {1092clc_error(logger, "failed to allocate arg positions");1093goto err_free_dxil;1094}10951096{1097bool progress;1098do1099{1100progress = false;1101NIR_PASS(progress, nir, nir_copy_prop);1102NIR_PASS(progress, nir, nir_opt_copy_prop_vars);1103NIR_PASS(progress, nir, nir_opt_deref);1104NIR_PASS(progress, nir, nir_opt_dce);1105NIR_PASS(progress, nir, nir_opt_undef);1106NIR_PASS(progress, nir, nir_opt_constant_folding);1107NIR_PASS(progress, nir, nir_opt_cse);1108NIR_PASS(progress, nir, nir_lower_vars_to_ssa);1109NIR_PASS(progress, nir, nir_opt_algebraic);1110} while (progress);1111}11121113// Inline all functions first.1114// according to the comment on nir_inline_functions1115NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);1116NIR_PASS_V(nir, nir_lower_returns);1117NIR_PASS_V(nir, nir_lower_libclc, ctx->libclc_nir);1118NIR_PASS_V(nir, nir_inline_functions);11191120// Pick off the single entrypoint that we want.1121foreach_list_typed_safe(nir_function, func, node, &nir->functions) {1122if (!func->is_entrypoint)1123exec_node_remove(&func->node);1124}1125assert(exec_list_length(&nir->functions) == 1);11261127{1128bool progress;1129do1130{1131progress = false;1132NIR_PASS(progress, nir, nir_copy_prop);1133NIR_PASS(progress, nir, nir_opt_copy_prop_vars);1134NIR_PASS(progress, nir, nir_opt_deref);1135NIR_PASS(progress, nir, nir_opt_dce);1136NIR_PASS(progress, nir, nir_opt_undef);1137NIR_PASS(progress, nir, nir_opt_constant_folding);1138NIR_PASS(progress, nir, nir_opt_cse);1139NIR_PASS(progress, nir, nir_split_var_copies);1140NIR_PASS(progress, nir, nir_lower_var_copies);1141NIR_PASS(progress, nir, nir_lower_vars_to_ssa);1142NIR_PASS(progress, nir, nir_opt_algebraic);1143NIR_PASS(progress, nir, nir_opt_if, true);1144NIR_PASS(progress, nir, nir_opt_dead_cf);1145NIR_PASS(progress, nir, nir_opt_remove_phis);1146NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);1147NIR_PASS(progress, nir, nir_lower_vec3_to_vec4, nir_var_mem_generic | nir_var_uniform);1148} while (progress);1149}11501151NIR_PASS_V(nir, scale_fdiv);11521153dxil_wrap_sampler_state int_sampler_states[PIPE_MAX_SHADER_SAMPLER_VIEWS] = { {{0}} };1154unsigned sampler_id = 0;11551156struct exec_list inline_samplers_list;1157exec_list_make_empty(&inline_samplers_list);11581159// Move inline samplers to the end of the uniforms list1160nir_foreach_variable_with_modes_safe(var, nir, nir_var_uniform) {1161if (glsl_type_is_sampler(var->type) && var->data.sampler.is_inline_sampler) {1162exec_node_remove(&var->node);1163exec_list_push_tail(&inline_samplers_list, &var->node);1164}1165}1166exec_node_insert_list_after(exec_list_get_tail(&nir->variables), &inline_samplers_list);11671168NIR_PASS_V(nir, nir_lower_variable_initializers, ~(nir_var_function_temp | nir_var_shader_temp));11691170// Lower memcpy1171NIR_PASS_V(nir, dxil_nir_lower_memcpy_deref);11721173// Ensure the printf struct has explicit types, but we'll throw away the scratch size, because we haven't1174// necessarily removed all temp variables (e.g. the printf struct itself) at this point, so we'll rerun this later1175assert(nir->scratch_size == 0);1176NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_function_temp, glsl_get_cl_type_size_align);11771178nir_lower_printf_options printf_options = {1179.treat_doubles_as_floats = true,1180.max_buffer_size = 1024 * 10241181};1182NIR_PASS_V(nir, nir_lower_printf, &printf_options);11831184metadata->printf.info_count = nir->printf_info_count;1185metadata->printf.infos = calloc(nir->printf_info_count, sizeof(struct clc_printf_info));1186for (unsigned i = 0; i < nir->printf_info_count; i++) {1187metadata->printf.infos[i].str = malloc(nir->printf_info[i].string_size);1188memcpy(metadata->printf.infos[i].str, nir->printf_info[i].strings, nir->printf_info[i].string_size);1189metadata->printf.infos[i].num_args = nir->printf_info[i].num_args;1190metadata->printf.infos[i].arg_sizes = malloc(nir->printf_info[i].num_args * sizeof(unsigned));1191memcpy(metadata->printf.infos[i].arg_sizes, nir->printf_info[i].arg_sizes, nir->printf_info[i].num_args * sizeof(unsigned));1192}11931194// copy propagate to prepare for lower_explicit_io1195NIR_PASS_V(nir, nir_split_var_copies);1196NIR_PASS_V(nir, nir_opt_copy_prop_vars);1197NIR_PASS_V(nir, nir_lower_var_copies);1198NIR_PASS_V(nir, nir_lower_vars_to_ssa);1199NIR_PASS_V(nir, nir_lower_alu);1200NIR_PASS_V(nir, nir_opt_dce);1201NIR_PASS_V(nir, nir_opt_deref);12021203// For uniforms (kernel inputs), run this before adjusting variable list via image/sampler lowering1204NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_uniform, glsl_get_cl_type_size_align);12051206// Calculate input offsets/metadata.1207unsigned uav_id = 0;1208nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {1209int i = var->data.location;1210if (i < 0)1211continue;12121213unsigned size = glsl_get_cl_size(var->type);12141215metadata->args[i].offset = var->data.driver_location;1216metadata->args[i].size = size;1217metadata->kernel_inputs_buf_size = MAX2(metadata->kernel_inputs_buf_size,1218var->data.driver_location + size);1219if ((dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_GLOBAL ||1220dxil->kernel->args[i].address_qualifier == CLC_KERNEL_ARG_ADDRESS_CONSTANT) &&1221// Ignore images during this pass - global memory buffers need to have contiguous bindings1222!glsl_type_is_image(var->type)) {1223metadata->args[i].globconstptr.buf_id = uav_id++;1224} else if (glsl_type_is_sampler(var->type)) {1225unsigned address_mode = conf ? conf->args[i].sampler.addressing_mode : 0u;1226int_sampler_states[sampler_id].wrap[0] =1227int_sampler_states[sampler_id].wrap[1] =1228int_sampler_states[sampler_id].wrap[2] = wrap_from_cl_addressing(address_mode);1229int_sampler_states[sampler_id].is_nonnormalized_coords =1230conf ? !conf->args[i].sampler.normalized_coords : 0;1231int_sampler_states[sampler_id].is_linear_filtering =1232conf ? conf->args[i].sampler.linear_filtering : 0;1233metadata->args[i].sampler.sampler_id = var->data.binding = sampler_id++;1234}1235}12361237unsigned num_global_inputs = uav_id;12381239// Second pass over inputs to calculate image bindings1240unsigned srv_id = 0;1241nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {1242int i = var->data.location;1243if (i < 0)1244continue;12451246if (glsl_type_is_image(var->type)) {1247if (var->data.access == ACCESS_NON_WRITEABLE) {1248metadata->args[i].image.buf_ids[0] = srv_id++;1249} else {1250// Write or read-write are UAVs1251metadata->args[i].image.buf_ids[0] = uav_id++;1252}12531254metadata->args[i].image.num_buf_ids = 1;1255var->data.binding = metadata->args[i].image.buf_ids[0];1256}1257}12581259// Before removing dead uniforms, dedupe constant samplers to make more dead uniforms1260NIR_PASS_V(nir, clc_nir_dedupe_const_samplers);1261NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform | nir_var_mem_ubo | nir_var_mem_constant | nir_var_function_temp, NULL);12621263// Fill out inline sampler metadata, now that they've been deduped and dead ones removed1264nir_foreach_variable_with_modes(var, nir, nir_var_uniform) {1265if (glsl_type_is_sampler(var->type) && var->data.sampler.is_inline_sampler) {1266int_sampler_states[sampler_id].wrap[0] =1267int_sampler_states[sampler_id].wrap[1] =1268int_sampler_states[sampler_id].wrap[2] =1269wrap_from_cl_addressing(var->data.sampler.addressing_mode);1270int_sampler_states[sampler_id].is_nonnormalized_coords =1271!var->data.sampler.normalized_coordinates;1272int_sampler_states[sampler_id].is_linear_filtering =1273var->data.sampler.filter_mode == SAMPLER_FILTER_MODE_LINEAR;1274var->data.binding = sampler_id++;12751276assert(metadata->num_const_samplers < CLC_MAX_SAMPLERS);1277metadata->const_samplers[metadata->num_const_samplers].sampler_id = var->data.binding;1278metadata->const_samplers[metadata->num_const_samplers].addressing_mode = var->data.sampler.addressing_mode;1279metadata->const_samplers[metadata->num_const_samplers].normalized_coords = var->data.sampler.normalized_coordinates;1280metadata->const_samplers[metadata->num_const_samplers].filter_mode = var->data.sampler.filter_mode;1281metadata->num_const_samplers++;1282}1283}12841285// Needs to come before lower_explicit_io1286NIR_PASS_V(nir, nir_lower_readonly_images_to_tex, false);1287struct clc_image_lower_context image_lower_context = { metadata, &srv_id, &uav_id };1288NIR_PASS_V(nir, clc_lower_images, &image_lower_context);1289NIR_PASS_V(nir, clc_lower_nonnormalized_samplers, int_sampler_states);1290NIR_PASS_V(nir, nir_lower_samplers);1291NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex,1292int_sampler_states, NULL, 14.0f);12931294NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_mem_shared | nir_var_function_temp, NULL);12951296nir->scratch_size = 0;1297NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,1298nir_var_mem_shared | nir_var_function_temp | nir_var_mem_global | nir_var_mem_constant,1299glsl_get_cl_type_size_align);13001301NIR_PASS_V(nir, dxil_nir_lower_ubo_to_temp);1302NIR_PASS_V(nir, clc_lower_constant_to_ssbo, dxil->kernel, &uav_id);1303NIR_PASS_V(nir, clc_lower_global_to_ssbo);13041305bool has_printf = false;1306NIR_PASS(has_printf, nir, clc_lower_printf_base, uav_id);1307metadata->printf.uav_id = has_printf ? uav_id++ : -1;13081309NIR_PASS_V(nir, dxil_nir_lower_deref_ssbo);13101311NIR_PASS_V(nir, split_unaligned_loads_stores);13121313assert(nir->info.cs.ptr_size == 64);1314NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,1315nir_address_format_32bit_index_offset_pack64);1316NIR_PASS_V(nir, nir_lower_explicit_io,1317nir_var_mem_shared | nir_var_function_temp | nir_var_uniform,1318nir_address_format_32bit_offset_as_64bit);13191320NIR_PASS_V(nir, nir_lower_system_values);13211322nir_lower_compute_system_values_options compute_options = {1323.has_base_global_invocation_id = (conf && conf->support_global_work_id_offsets),1324.has_base_workgroup_id = (conf && conf->support_workgroup_id_offsets),1325};1326NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_options);13271328NIR_PASS_V(nir, clc_lower_64bit_semantics);13291330NIR_PASS_V(nir, nir_opt_deref);1331NIR_PASS_V(nir, nir_lower_vars_to_ssa);13321333unsigned cbv_id = 0;13341335nir_variable *inputs_var =1336add_kernel_inputs_var(dxil, nir, &cbv_id);1337nir_variable *work_properties_var =1338add_work_properties_var(dxil, nir, &cbv_id);13391340memcpy(metadata->local_size, nir->info.workgroup_size,1341sizeof(metadata->local_size));1342memcpy(metadata->local_size_hint, nir->info.cs.workgroup_size_hint,1343sizeof(metadata->local_size));13441345// Patch the localsize before calling clc_nir_lower_system_values().1346if (conf) {1347for (unsigned i = 0; i < ARRAY_SIZE(nir->info.workgroup_size); i++) {1348if (!conf->local_size[i] ||1349conf->local_size[i] == nir->info.workgroup_size[i])1350continue;13511352if (nir->info.workgroup_size[i] &&1353nir->info.workgroup_size[i] != conf->local_size[i]) {1354debug_printf("D3D12: runtime local size does not match reqd_work_group_size() values\n");1355goto err_free_dxil;1356}13571358nir->info.workgroup_size[i] = conf->local_size[i];1359}1360memcpy(metadata->local_size, nir->info.workgroup_size,1361sizeof(metadata->local_size));1362} else {1363/* Make sure there's at least one thread that's set to run */1364for (unsigned i = 0; i < ARRAY_SIZE(nir->info.workgroup_size); i++) {1365if (nir->info.workgroup_size[i] == 0)1366nir->info.workgroup_size[i] = 1;1367}1368}13691370NIR_PASS_V(nir, clc_nir_lower_kernel_input_loads, inputs_var);1371NIR_PASS_V(nir, split_unaligned_loads_stores);1372NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,1373nir_address_format_32bit_index_offset);1374NIR_PASS_V(nir, clc_nir_lower_system_values, work_properties_var);1375NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil);1376NIR_PASS_V(nir, dxil_nir_opt_alu_deref_srcs);1377NIR_PASS_V(nir, dxil_nir_lower_atomics_to_dxil);1378NIR_PASS_V(nir, nir_lower_fp16_casts);1379NIR_PASS_V(nir, nir_lower_convert_alu_types, NULL);13801381// Convert pack to pack_split1382NIR_PASS_V(nir, nir_lower_pack);1383// Lower pack_split to bit math1384NIR_PASS_V(nir, nir_opt_algebraic);13851386NIR_PASS_V(nir, nir_opt_dce);13871388nir_validate_shader(nir, "Validate before feeding NIR to the DXIL compiler");1389struct nir_to_dxil_options opts = {1390.interpolate_at_vertex = false,1391.lower_int16 = (conf && (conf->lower_bit_size & 16) != 0),1392.ubo_binding_offset = 0,1393.disable_math_refactoring = true,1394.num_kernel_globals = num_global_inputs,1395};13961397for (unsigned i = 0; i < dxil->kernel->num_args; i++) {1398if (dxil->kernel->args[i].address_qualifier != CLC_KERNEL_ARG_ADDRESS_LOCAL)1399continue;14001401/* If we don't have the runtime conf yet, we just create a dummy variable.1402* This will be adjusted when clc_to_dxil() is called with a conf1403* argument.1404*/1405unsigned size = 4;1406if (conf && conf->args)1407size = conf->args[i].localptr.size;14081409/* The alignment required for the pointee type is not easy to get from1410* here, so let's base our logic on the size itself. Anything bigger than1411* the maximum alignment constraint (which is 128 bytes, since ulong16 or1412* doubl16 size are the biggest base types) should be aligned on this1413* maximum alignment constraint. For smaller types, we use the size1414* itself to calculate the alignment.1415*/1416unsigned alignment = size < 128 ? (1 << (ffs(size) - 1)) : 128;14171418nir->info.shared_size = align(nir->info.shared_size, alignment);1419metadata->args[i].localptr.sharedmem_offset = nir->info.shared_size;1420nir->info.shared_size += size;1421}14221423metadata->local_mem_size = nir->info.shared_size;1424metadata->priv_mem_size = nir->scratch_size;14251426/* DXIL double math is too limited compared to what NIR expects. Let's refuse1427* to compile a shader when it contains double operations until we have1428* double lowering hooked up.1429*/1430if (shader_has_double(nir)) {1431clc_error(logger, "NIR shader contains doubles, which we don't support yet");1432goto err_free_dxil;1433}14341435struct blob tmp;1436if (!nir_to_dxil(nir, &opts, &tmp)) {1437debug_printf("D3D12: nir_to_dxil failed\n");1438goto err_free_dxil;1439}14401441nir_foreach_variable_with_modes(var, nir, nir_var_mem_ssbo) {1442if (var->constant_initializer) {1443if (glsl_type_is_array(var->type)) {1444int size = align(glsl_get_cl_size(var->type), 4);1445uint8_t *data = malloc(size);1446if (!data)1447goto err_free_dxil;14481449copy_const_initializer(var->constant_initializer, var->type, data);1450metadata->consts[metadata->num_consts].data = data;1451metadata->consts[metadata->num_consts].size = size;1452metadata->consts[metadata->num_consts].uav_id = var->data.binding;1453metadata->num_consts++;1454} else1455unreachable("unexpected constant initializer");1456}1457}14581459metadata->kernel_inputs_cbv_id = inputs_var ? inputs_var->data.binding : 0;1460metadata->work_properties_cbv_id = work_properties_var->data.binding;1461metadata->num_uavs = uav_id;1462metadata->num_srvs = srv_id;1463metadata->num_samplers = sampler_id;14641465ralloc_free(nir);1466glsl_type_singleton_decref();14671468blob_finish_get_buffer(&tmp, &dxil->binary.data,1469&dxil->binary.size);1470return dxil;14711472err_free_dxil:1473clc_free_dxil_object(dxil);1474return NULL;1475}14761477void clc_free_dxil_object(struct clc_dxil_object *dxil)1478{1479for (unsigned i = 0; i < dxil->metadata.num_consts; i++)1480free(dxil->metadata.consts[i].data);14811482for (unsigned i = 0; i < dxil->metadata.printf.info_count; i++) {1483free(dxil->metadata.printf.infos[i].arg_sizes);1484free(dxil->metadata.printf.infos[i].str);1485}1486free(dxil->metadata.printf.infos);14871488free(dxil->binary.data);1489free(dxil);1490}14911492uint64_t clc_compiler_get_version()1493{1494const char sha1[] = MESA_GIT_SHA1;1495const char* dash = strchr(sha1, '-');1496if (dash) {1497return strtoull(dash + 1, NULL, 16);1498}1499return 0;1500}150115021503