Path: blob/21.2-virgl/src/compiler/spirv/vtn_opencl.c
4545 views
/*1* Copyright © 2018 Red Hat2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22* Authors:23* Rob Clark ([email protected])24*/2526#include "math.h"27#include "nir/nir_builtin_builder.h"2829#include "util/u_printf.h"30#include "vtn_private.h"31#include "OpenCL.std.h"3233typedef nir_ssa_def *(*nir_handler)(struct vtn_builder *b,34uint32_t opcode,35unsigned num_srcs, nir_ssa_def **srcs,36struct vtn_type **src_types,37const struct vtn_type *dest_type);3839static int to_llvm_address_space(SpvStorageClass mode)40{41switch (mode) {42case SpvStorageClassPrivate:43case SpvStorageClassFunction: return 0;44case SpvStorageClassCrossWorkgroup: return 1;45case SpvStorageClassUniform:46case SpvStorageClassUniformConstant: return 2;47case SpvStorageClassWorkgroup: return 3;48default: return -1;49}50}515253static void54vtn_opencl_mangle(const char *in_name,55uint32_t const_mask,56int ntypes, struct vtn_type **src_types,57char **outstring)58{59char local_name[256] = "";60char *args_str = local_name + sprintf(local_name, "_Z%zu%s", strlen(in_name), in_name);6162for (unsigned i = 0; i < ntypes; ++i) {63const struct glsl_type *type = src_types[i]->type;64enum vtn_base_type base_type = src_types[i]->base_type;65if (src_types[i]->base_type == vtn_base_type_pointer) {66*(args_str++) = 'P';67int address_space = to_llvm_address_space(src_types[i]->storage_class);68if (address_space > 0)69args_str += sprintf(args_str, "U3AS%d", address_space);7071type = src_types[i]->deref->type;72base_type = src_types[i]->deref->base_type;73}7475if (const_mask & (1 << i))76*(args_str++) = 'K';7778unsigned num_elements = glsl_get_components(type);79if (num_elements > 1) {80/* Vectors are not treated as built-ins for mangling, so check for substitution.81* In theory, we'd need to know which substitution value this is. In practice,82* the functions we need from libclc only support 183*/84bool substitution = false;85for (unsigned j = 0; j < i; ++j) {86const struct glsl_type *other_type = src_types[j]->base_type == vtn_base_type_pointer ?87src_types[j]->deref->type : src_types[j]->type;88if (type == other_type) {89substitution = true;90break;91}92}9394if (substitution) {95args_str += sprintf(args_str, "S_");96continue;97} else98args_str += sprintf(args_str, "Dv%d_", num_elements);99}100101const char *suffix = NULL;102switch (base_type) {103case vtn_base_type_sampler: suffix = "11ocl_sampler"; break;104case vtn_base_type_event: suffix = "9ocl_event"; break;105default: {106const char *primitives[] = {107[GLSL_TYPE_UINT] = "j",108[GLSL_TYPE_INT] = "i",109[GLSL_TYPE_FLOAT] = "f",110[GLSL_TYPE_FLOAT16] = "Dh",111[GLSL_TYPE_DOUBLE] = "d",112[GLSL_TYPE_UINT8] = "h",113[GLSL_TYPE_INT8] = "c",114[GLSL_TYPE_UINT16] = "t",115[GLSL_TYPE_INT16] = "s",116[GLSL_TYPE_UINT64] = "m",117[GLSL_TYPE_INT64] = "l",118[GLSL_TYPE_BOOL] = "b",119[GLSL_TYPE_ERROR] = NULL,120};121enum glsl_base_type glsl_base_type = glsl_get_base_type(type);122assert(glsl_base_type < ARRAY_SIZE(primitives) && primitives[glsl_base_type]);123suffix = primitives[glsl_base_type];124break;125}126}127args_str += sprintf(args_str, "%s", suffix);128}129130*outstring = strdup(local_name);131}132133static nir_function *mangle_and_find(struct vtn_builder *b,134const char *name,135uint32_t const_mask,136uint32_t num_srcs,137struct vtn_type **src_types)138{139char *mname;140nir_function *found = NULL;141142vtn_opencl_mangle(name, const_mask, num_srcs, src_types, &mname);143/* try and find in current shader first. */144nir_foreach_function(funcs, b->shader) {145if (!strcmp(funcs->name, mname)) {146found = funcs;147break;148}149}150/* if not found here find in clc shader and create a decl mirroring it */151if (!found && b->options->clc_shader && b->options->clc_shader != b->shader) {152nir_foreach_function(funcs, b->options->clc_shader) {153if (!strcmp(funcs->name, mname)) {154found = funcs;155break;156}157}158if (found) {159nir_function *decl = nir_function_create(b->shader, mname);160decl->num_params = found->num_params;161decl->params = ralloc_array(b->shader, nir_parameter, decl->num_params);162for (unsigned i = 0; i < decl->num_params; i++) {163decl->params[i] = found->params[i];164}165found = decl;166}167}168if (!found)169vtn_fail("Can't find clc function %s\n", mname);170free(mname);171return found;172}173174static bool call_mangled_function(struct vtn_builder *b,175const char *name,176uint32_t const_mask,177uint32_t num_srcs,178struct vtn_type **src_types,179const struct vtn_type *dest_type,180nir_ssa_def **srcs,181nir_deref_instr **ret_deref_ptr)182{183nir_function *found = mangle_and_find(b, name, const_mask, num_srcs, src_types);184if (!found)185return false;186187nir_call_instr *call = nir_call_instr_create(b->shader, found);188189nir_deref_instr *ret_deref = NULL;190uint32_t param_idx = 0;191if (dest_type) {192nir_variable *ret_tmp = nir_local_variable_create(b->nb.impl,193glsl_get_bare_type(dest_type->type),194"return_tmp");195ret_deref = nir_build_deref_var(&b->nb, ret_tmp);196call->params[param_idx++] = nir_src_for_ssa(&ret_deref->dest.ssa);197}198199for (unsigned i = 0; i < num_srcs; i++)200call->params[param_idx++] = nir_src_for_ssa(srcs[i]);201nir_builder_instr_insert(&b->nb, &call->instr);202203*ret_deref_ptr = ret_deref;204return true;205}206207static void208handle_instr(struct vtn_builder *b, uint32_t opcode,209const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest, nir_handler handler)210{211struct vtn_type *dest_type = w_dest ? vtn_get_type(b, w_dest[0]) : NULL;212213nir_ssa_def *srcs[5] = { NULL };214struct vtn_type *src_types[5] = { NULL };215vtn_assert(num_srcs <= ARRAY_SIZE(srcs));216for (unsigned i = 0; i < num_srcs; i++) {217struct vtn_value *val = vtn_untyped_value(b, w_src[i]);218struct vtn_ssa_value *ssa = vtn_ssa_value(b, w_src[i]);219srcs[i] = ssa->def;220src_types[i] = val->type;221}222223nir_ssa_def *result = handler(b, opcode, num_srcs, srcs, src_types, dest_type);224if (result) {225vtn_push_nir_ssa(b, w_dest[1], result);226} else {227vtn_assert(dest_type == NULL);228}229}230231static nir_op232nir_alu_op_for_opencl_opcode(struct vtn_builder *b,233enum OpenCLstd_Entrypoints opcode)234{235switch (opcode) {236case OpenCLstd_Fabs: return nir_op_fabs;237case OpenCLstd_SAbs: return nir_op_iabs;238case OpenCLstd_SAdd_sat: return nir_op_iadd_sat;239case OpenCLstd_UAdd_sat: return nir_op_uadd_sat;240case OpenCLstd_Ceil: return nir_op_fceil;241case OpenCLstd_Floor: return nir_op_ffloor;242case OpenCLstd_SHadd: return nir_op_ihadd;243case OpenCLstd_UHadd: return nir_op_uhadd;244case OpenCLstd_Fmax: return nir_op_fmax;245case OpenCLstd_SMax: return nir_op_imax;246case OpenCLstd_UMax: return nir_op_umax;247case OpenCLstd_Fmin: return nir_op_fmin;248case OpenCLstd_SMin: return nir_op_imin;249case OpenCLstd_UMin: return nir_op_umin;250case OpenCLstd_Mix: return nir_op_flrp;251case OpenCLstd_Native_cos: return nir_op_fcos;252case OpenCLstd_Native_divide: return nir_op_fdiv;253case OpenCLstd_Native_exp2: return nir_op_fexp2;254case OpenCLstd_Native_log2: return nir_op_flog2;255case OpenCLstd_Native_powr: return nir_op_fpow;256case OpenCLstd_Native_recip: return nir_op_frcp;257case OpenCLstd_Native_rsqrt: return nir_op_frsq;258case OpenCLstd_Native_sin: return nir_op_fsin;259case OpenCLstd_Native_sqrt: return nir_op_fsqrt;260case OpenCLstd_SMul_hi: return nir_op_imul_high;261case OpenCLstd_UMul_hi: return nir_op_umul_high;262case OpenCLstd_Popcount: return nir_op_bit_count;263case OpenCLstd_SRhadd: return nir_op_irhadd;264case OpenCLstd_URhadd: return nir_op_urhadd;265case OpenCLstd_Rsqrt: return nir_op_frsq;266case OpenCLstd_Sign: return nir_op_fsign;267case OpenCLstd_Sqrt: return nir_op_fsqrt;268case OpenCLstd_SSub_sat: return nir_op_isub_sat;269case OpenCLstd_USub_sat: return nir_op_usub_sat;270case OpenCLstd_Trunc: return nir_op_ftrunc;271case OpenCLstd_Rint: return nir_op_fround_even;272case OpenCLstd_Half_divide: return nir_op_fdiv;273case OpenCLstd_Half_recip: return nir_op_frcp;274/* uhm... */275case OpenCLstd_UAbs: return nir_op_mov;276default:277vtn_fail("No NIR equivalent");278}279}280281static nir_ssa_def *282handle_alu(struct vtn_builder *b, uint32_t opcode,283unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,284const struct vtn_type *dest_type)285{286nir_ssa_def *ret = nir_build_alu(&b->nb, nir_alu_op_for_opencl_opcode(b, (enum OpenCLstd_Entrypoints)opcode),287srcs[0], srcs[1], srcs[2], NULL);288if (opcode == OpenCLstd_Popcount)289ret = nir_u2u(&b->nb, ret, glsl_get_bit_size(dest_type->type));290return ret;291}292293#define REMAP(op, str) [OpenCLstd_##op] = { str }294static const struct {295const char *fn;296} remap_table[] = {297REMAP(Distance, "distance"),298REMAP(Fast_distance, "fast_distance"),299REMAP(Fast_length, "fast_length"),300REMAP(Fast_normalize, "fast_normalize"),301REMAP(Half_rsqrt, "half_rsqrt"),302REMAP(Half_sqrt, "half_sqrt"),303REMAP(Length, "length"),304REMAP(Normalize, "normalize"),305REMAP(Degrees, "degrees"),306REMAP(Radians, "radians"),307REMAP(Rotate, "rotate"),308REMAP(Smoothstep, "smoothstep"),309REMAP(Step, "step"),310311REMAP(Pow, "pow"),312REMAP(Pown, "pown"),313REMAP(Powr, "powr"),314REMAP(Rootn, "rootn"),315REMAP(Modf, "modf"),316317REMAP(Acos, "acos"),318REMAP(Acosh, "acosh"),319REMAP(Acospi, "acospi"),320REMAP(Asin, "asin"),321REMAP(Asinh, "asinh"),322REMAP(Asinpi, "asinpi"),323REMAP(Atan, "atan"),324REMAP(Atan2, "atan2"),325REMAP(Atanh, "atanh"),326REMAP(Atanpi, "atanpi"),327REMAP(Atan2pi, "atan2pi"),328REMAP(Cos, "cos"),329REMAP(Cosh, "cosh"),330REMAP(Cospi, "cospi"),331REMAP(Sin, "sin"),332REMAP(Sinh, "sinh"),333REMAP(Sinpi, "sinpi"),334REMAP(Tan, "tan"),335REMAP(Tanh, "tanh"),336REMAP(Tanpi, "tanpi"),337REMAP(Sincos, "sincos"),338REMAP(Fract, "fract"),339REMAP(Frexp, "frexp"),340REMAP(Fma, "fma"),341REMAP(Fmod, "fmod"),342343REMAP(Half_cos, "cos"),344REMAP(Half_exp, "exp"),345REMAP(Half_exp2, "exp2"),346REMAP(Half_exp10, "exp10"),347REMAP(Half_log, "log"),348REMAP(Half_log2, "log2"),349REMAP(Half_log10, "log10"),350REMAP(Half_powr, "powr"),351REMAP(Half_sin, "sin"),352REMAP(Half_tan, "tan"),353354REMAP(Remainder, "remainder"),355REMAP(Remquo, "remquo"),356REMAP(Hypot, "hypot"),357REMAP(Exp, "exp"),358REMAP(Exp2, "exp2"),359REMAP(Exp10, "exp10"),360REMAP(Expm1, "expm1"),361REMAP(Ldexp, "ldexp"),362363REMAP(Ilogb, "ilogb"),364REMAP(Log, "log"),365REMAP(Log2, "log2"),366REMAP(Log10, "log10"),367REMAP(Log1p, "log1p"),368REMAP(Logb, "logb"),369370REMAP(Cbrt, "cbrt"),371REMAP(Erfc, "erfc"),372REMAP(Erf, "erf"),373374REMAP(Lgamma, "lgamma"),375REMAP(Lgamma_r, "lgamma_r"),376REMAP(Tgamma, "tgamma"),377378REMAP(UMad_sat, "mad_sat"),379REMAP(SMad_sat, "mad_sat"),380381REMAP(Shuffle, "shuffle"),382REMAP(Shuffle2, "shuffle2"),383};384#undef REMAP385386static const char *remap_clc_opcode(enum OpenCLstd_Entrypoints opcode)387{388if (opcode >= (sizeof(remap_table) / sizeof(const char *)))389return NULL;390return remap_table[opcode].fn;391}392393static struct vtn_type *394get_vtn_type_for_glsl_type(struct vtn_builder *b, const struct glsl_type *type)395{396struct vtn_type *ret = rzalloc(b, struct vtn_type);397assert(glsl_type_is_vector_or_scalar(type));398ret->type = type;399ret->length = glsl_get_vector_elements(type);400ret->base_type = glsl_type_is_vector(type) ? vtn_base_type_vector : vtn_base_type_scalar;401return ret;402}403404static struct vtn_type *405get_pointer_type(struct vtn_builder *b, struct vtn_type *t, SpvStorageClass storage_class)406{407struct vtn_type *ret = rzalloc(b, struct vtn_type);408ret->type = nir_address_format_to_glsl_type(409vtn_mode_to_address_format(410b, vtn_storage_class_to_mode(b, storage_class, NULL, NULL)));411ret->base_type = vtn_base_type_pointer;412ret->storage_class = storage_class;413ret->deref = t;414return ret;415}416417static struct vtn_type *418get_signed_type(struct vtn_builder *b, struct vtn_type *t)419{420if (t->base_type == vtn_base_type_pointer) {421return get_pointer_type(b, get_signed_type(b, t->deref), t->storage_class);422}423return get_vtn_type_for_glsl_type(424b, glsl_vector_type(glsl_signed_base_type_of(glsl_get_base_type(t->type)),425glsl_get_vector_elements(t->type)));426}427428static nir_ssa_def *429handle_clc_fn(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,430int num_srcs,431nir_ssa_def **srcs,432struct vtn_type **src_types,433const struct vtn_type *dest_type)434{435const char *name = remap_clc_opcode(opcode);436if (!name)437return NULL;438439/* Some functions which take params end up with uint (or pointer-to-uint) being passed,440* which doesn't mangle correctly when the function expects int or pointer-to-int.441* See https://www.khronos.org/registry/spir-v/specs/unified1/SPIRV.html#_a_id_unsignedsigned_a_unsigned_versus_signed_integers442*/443int signed_param = -1;444switch (opcode) {445case OpenCLstd_Frexp:446case OpenCLstd_Lgamma_r:447case OpenCLstd_Pown:448case OpenCLstd_Rootn:449case OpenCLstd_Ldexp:450signed_param = 1;451break;452case OpenCLstd_Remquo:453signed_param = 2;454break;455case OpenCLstd_SMad_sat: {456/* All parameters need to be converted to signed */457src_types[0] = src_types[1] = src_types[2] = get_signed_type(b, src_types[0]);458break;459}460default: break;461}462463if (signed_param >= 0) {464src_types[signed_param] = get_signed_type(b, src_types[signed_param]);465}466467nir_deref_instr *ret_deref = NULL;468469if (!call_mangled_function(b, name, 0, num_srcs, src_types,470dest_type, srcs, &ret_deref))471return NULL;472473return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;474}475476static nir_ssa_def *477handle_special(struct vtn_builder *b, uint32_t opcode,478unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,479const struct vtn_type *dest_type)480{481nir_builder *nb = &b->nb;482enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints)opcode;483484switch (cl_opcode) {485case OpenCLstd_SAbs_diff:486/* these works easier in direct NIR */487return nir_iabs_diff(nb, srcs[0], srcs[1]);488case OpenCLstd_UAbs_diff:489return nir_uabs_diff(nb, srcs[0], srcs[1]);490case OpenCLstd_Bitselect:491return nir_bitselect(nb, srcs[0], srcs[1], srcs[2]);492case OpenCLstd_SMad_hi:493return nir_imad_hi(nb, srcs[0], srcs[1], srcs[2]);494case OpenCLstd_UMad_hi:495return nir_umad_hi(nb, srcs[0], srcs[1], srcs[2]);496case OpenCLstd_SMul24:497return nir_imul24_relaxed(nb, srcs[0], srcs[1]);498case OpenCLstd_UMul24:499return nir_umul24_relaxed(nb, srcs[0], srcs[1]);500case OpenCLstd_SMad24:501return nir_iadd(nb, nir_imul24_relaxed(nb, srcs[0], srcs[1]), srcs[2]);502case OpenCLstd_UMad24:503return nir_umad24_relaxed(nb, srcs[0], srcs[1], srcs[2]);504case OpenCLstd_FClamp:505return nir_fclamp(nb, srcs[0], srcs[1], srcs[2]);506case OpenCLstd_SClamp:507return nir_iclamp(nb, srcs[0], srcs[1], srcs[2]);508case OpenCLstd_UClamp:509return nir_uclamp(nb, srcs[0], srcs[1], srcs[2]);510case OpenCLstd_Copysign:511return nir_copysign(nb, srcs[0], srcs[1]);512case OpenCLstd_Cross:513if (dest_type->length == 4)514return nir_cross4(nb, srcs[0], srcs[1]);515return nir_cross3(nb, srcs[0], srcs[1]);516case OpenCLstd_Fdim:517return nir_fdim(nb, srcs[0], srcs[1]);518case OpenCLstd_Fmod:519if (nb->shader->options->lower_fmod)520break;521return nir_fmod(nb, srcs[0], srcs[1]);522case OpenCLstd_Mad:523return nir_fmad(nb, srcs[0], srcs[1], srcs[2]);524case OpenCLstd_Maxmag:525return nir_maxmag(nb, srcs[0], srcs[1]);526case OpenCLstd_Minmag:527return nir_minmag(nb, srcs[0], srcs[1]);528case OpenCLstd_Nan:529return nir_nan(nb, srcs[0]);530case OpenCLstd_Nextafter:531return nir_nextafter(nb, srcs[0], srcs[1]);532case OpenCLstd_Normalize:533return nir_normalize(nb, srcs[0]);534case OpenCLstd_Clz:535return nir_clz_u(nb, srcs[0]);536case OpenCLstd_Ctz:537return nir_ctz_u(nb, srcs[0]);538case OpenCLstd_Select:539return nir_select(nb, srcs[0], srcs[1], srcs[2]);540case OpenCLstd_S_Upsample:541case OpenCLstd_U_Upsample:542/* SPIR-V and CL have different defs for upsample, just implement in nir */543return nir_upsample(nb, srcs[0], srcs[1]);544case OpenCLstd_Native_exp:545return nir_fexp(nb, srcs[0]);546case OpenCLstd_Native_exp10:547return nir_fexp2(nb, nir_fmul_imm(nb, srcs[0], log(10) / log(2)));548case OpenCLstd_Native_log:549return nir_flog(nb, srcs[0]);550case OpenCLstd_Native_log10:551return nir_fmul_imm(nb, nir_flog2(nb, srcs[0]), log(2) / log(10));552case OpenCLstd_Native_tan:553return nir_ftan(nb, srcs[0]);554case OpenCLstd_Ldexp:555if (nb->shader->options->lower_ldexp)556break;557return nir_ldexp(nb, srcs[0], srcs[1]);558case OpenCLstd_Fma:559/* FIXME: the software implementation only supports fp32 for now. */560if (nb->shader->options->lower_ffma32 && srcs[0]->bit_size == 32)561break;562return nir_ffma(nb, srcs[0], srcs[1], srcs[2]);563default:564break;565}566567nir_ssa_def *ret = handle_clc_fn(b, opcode, num_srcs, srcs, src_types, dest_type);568if (!ret)569vtn_fail("No NIR equivalent");570571return ret;572}573574static nir_ssa_def *575handle_core(struct vtn_builder *b, uint32_t opcode,576unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,577const struct vtn_type *dest_type)578{579nir_deref_instr *ret_deref = NULL;580581switch ((SpvOp)opcode) {582case SpvOpGroupAsyncCopy: {583/* Libclc doesn't include 3-component overloads of the async copy functions.584* However, the CLC spec says:585* async_work_group_copy and async_work_group_strided_copy for 3-component vector types586* behave as async_work_group_copy and async_work_group_strided_copy respectively for 4-component587* vector types588*/589for (unsigned i = 0; i < num_srcs; ++i) {590if (src_types[i]->base_type == vtn_base_type_pointer &&591src_types[i]->deref->base_type == vtn_base_type_vector &&592src_types[i]->deref->length == 3) {593src_types[i] =594get_pointer_type(b,595get_vtn_type_for_glsl_type(b, glsl_replace_vector_type(src_types[i]->deref->type, 4)),596src_types[i]->storage_class);597}598}599if (!call_mangled_function(b, "async_work_group_strided_copy", (1 << 1), num_srcs, src_types, dest_type, srcs, &ret_deref))600return NULL;601break;602}603case SpvOpGroupWaitEvents: {604src_types[0] = get_vtn_type_for_glsl_type(b, glsl_int_type());605if (!call_mangled_function(b, "wait_group_events", 0, num_srcs, src_types, dest_type, srcs, &ret_deref))606return NULL;607break;608}609default:610return NULL;611}612613return ret_deref ? nir_load_deref(&b->nb, ret_deref) : NULL;614}615616617static void618_handle_v_load_store(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,619const uint32_t *w, unsigned count, bool load,620bool vec_aligned, nir_rounding_mode rounding)621{622struct vtn_type *type;623if (load)624type = vtn_get_type(b, w[1]);625else626type = vtn_get_value_type(b, w[5]);627unsigned a = load ? 0 : 1;628629enum glsl_base_type base_type = glsl_get_base_type(type->type);630unsigned components = glsl_get_vector_elements(type->type);631632nir_ssa_def *offset = vtn_get_nir_ssa(b, w[5 + a]);633struct vtn_value *p = vtn_value(b, w[6 + a], vtn_value_type_pointer);634635struct vtn_ssa_value *comps[NIR_MAX_VEC_COMPONENTS];636nir_ssa_def *ncomps[NIR_MAX_VEC_COMPONENTS];637638nir_ssa_def *moffset = nir_imul_imm(&b->nb, offset,639(vec_aligned && components == 3) ? 4 : components);640nir_deref_instr *deref = vtn_pointer_to_deref(b, p->pointer);641642unsigned alignment = vec_aligned ? glsl_get_cl_alignment(type->type) :643glsl_get_bit_size(type->type) / 8;644enum glsl_base_type ptr_base_type =645glsl_get_base_type(p->pointer->type->type);646if (base_type != ptr_base_type) {647vtn_fail_if(ptr_base_type != GLSL_TYPE_FLOAT16 ||648(base_type != GLSL_TYPE_FLOAT &&649base_type != GLSL_TYPE_DOUBLE),650"vload/vstore cannot do type conversion. "651"vload/vstore_half can only convert from half to other "652"floating-point types.");653654/* Above-computed alignment was for floats/doubles, not halves */655alignment /= glsl_get_bit_size(type->type) / glsl_base_type_get_bit_size(ptr_base_type);656}657658deref = nir_alignment_deref_cast(&b->nb, deref, alignment, 0);659660for (int i = 0; i < components; i++) {661nir_ssa_def *coffset = nir_iadd_imm(&b->nb, moffset, i);662nir_deref_instr *arr_deref = nir_build_deref_ptr_as_array(&b->nb, deref, coffset);663664if (load) {665comps[i] = vtn_local_load(b, arr_deref, p->type->access);666ncomps[i] = comps[i]->def;667if (base_type != ptr_base_type) {668assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&669(base_type == GLSL_TYPE_FLOAT ||670base_type == GLSL_TYPE_DOUBLE));671ncomps[i] = nir_f2fN(&b->nb, ncomps[i],672glsl_base_type_get_bit_size(base_type));673}674} else {675struct vtn_ssa_value *ssa = vtn_create_ssa_value(b, glsl_scalar_type(base_type));676struct vtn_ssa_value *val = vtn_ssa_value(b, w[5]);677ssa->def = nir_channel(&b->nb, val->def, i);678if (base_type != ptr_base_type) {679assert(ptr_base_type == GLSL_TYPE_FLOAT16 &&680(base_type == GLSL_TYPE_FLOAT ||681base_type == GLSL_TYPE_DOUBLE));682if (rounding == nir_rounding_mode_undef) {683ssa->def = nir_f2f16(&b->nb, ssa->def);684} else {685ssa->def = nir_convert_alu_types(&b->nb, 16, ssa->def,686nir_type_float | ssa->def->bit_size,687nir_type_float16,688rounding, false);689}690}691vtn_local_store(b, ssa, arr_deref, p->type->access);692}693}694if (load) {695vtn_push_nir_ssa(b, w[2], nir_vec(&b->nb, ncomps, components));696}697}698699static void700vtn_handle_opencl_vload(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,701const uint32_t *w, unsigned count)702{703_handle_v_load_store(b, opcode, w, count, true,704opcode == OpenCLstd_Vloada_halfn,705nir_rounding_mode_undef);706}707708static void709vtn_handle_opencl_vstore(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,710const uint32_t *w, unsigned count)711{712_handle_v_load_store(b, opcode, w, count, false,713opcode == OpenCLstd_Vstorea_halfn,714nir_rounding_mode_undef);715}716717static void718vtn_handle_opencl_vstore_half_r(struct vtn_builder *b, enum OpenCLstd_Entrypoints opcode,719const uint32_t *w, unsigned count)720{721_handle_v_load_store(b, opcode, w, count, false,722opcode == OpenCLstd_Vstorea_halfn_r,723vtn_rounding_mode_to_nir(b, w[8]));724}725726static unsigned727vtn_add_printf_string(struct vtn_builder *b, uint32_t id, nir_printf_info *info)728{729nir_deref_instr *deref = vtn_nir_deref(b, id);730731while (deref && deref->deref_type != nir_deref_type_var)732deref = nir_deref_instr_parent(deref);733734vtn_fail_if(deref == NULL || !nir_deref_mode_is(deref, nir_var_mem_constant),735"Printf string argument must be a pointer to a constant variable");736vtn_fail_if(deref->var->constant_initializer == NULL,737"Printf string argument must have an initializer");738vtn_fail_if(!glsl_type_is_array(deref->var->type),739"Printf string must be an char array");740const struct glsl_type *char_type = glsl_get_array_element(deref->var->type);741vtn_fail_if(char_type != glsl_uint8_t_type() &&742char_type != glsl_int8_t_type(),743"Printf string must be an char array");744745nir_constant *c = deref->var->constant_initializer;746assert(c->num_elements == glsl_get_length(deref->var->type));747748unsigned idx = info->string_size;749info->strings = reralloc_size(b->shader, info->strings,750idx + c->num_elements);751info->string_size += c->num_elements;752753char *str = &info->strings[idx];754bool found_null = false;755for (unsigned i = 0; i < c->num_elements; i++) {756memcpy((char *)str + i, c->elements[i]->values, 1);757if (str[i] == '\0')758found_null = true;759}760vtn_fail_if(!found_null, "Printf string must be null terminated");761return idx;762}763764/* printf is special because there are no limits on args */765static void766handle_printf(struct vtn_builder *b, uint32_t opcode,767const uint32_t *w_src, unsigned num_srcs, const uint32_t *w_dest)768{769if (!b->options->caps.printf) {770vtn_push_nir_ssa(b, w_dest[1], nir_imm_int(&b->nb, -1));771return;772}773774/* Step 1. extract the format string */775776/*777* info_idx is 1-based to match clover/llvm778* the backend indexes the info table at info_idx - 1.779*/780b->shader->printf_info_count++;781unsigned info_idx = b->shader->printf_info_count;782783b->shader->printf_info = reralloc(b->shader, b->shader->printf_info,784nir_printf_info, info_idx);785nir_printf_info *info = &b->shader->printf_info[info_idx - 1];786787info->strings = NULL;788info->string_size = 0;789790vtn_add_printf_string(b, w_src[0], info);791792info->num_args = num_srcs - 1;793info->arg_sizes = ralloc_array(b->shader, unsigned, info->num_args);794795/* Step 2, build an ad-hoc struct type out of the args */796unsigned field_offset = 0;797struct glsl_struct_field *fields =798rzalloc_array(b, struct glsl_struct_field, num_srcs - 1);799for (unsigned i = 1; i < num_srcs; ++i) {800struct vtn_value *val = vtn_untyped_value(b, w_src[i]);801struct vtn_type *src_type = val->type;802fields[i - 1].type = src_type->type;803fields[i - 1].name = ralloc_asprintf(b->shader, "arg_%u", i);804field_offset = align(field_offset, 4);805fields[i - 1].offset = field_offset;806info->arg_sizes[i - 1] = glsl_get_cl_size(src_type->type);807field_offset += glsl_get_cl_size(src_type->type);808}809const struct glsl_type *struct_type =810glsl_struct_type(fields, num_srcs - 1, "printf", true);811812/* Step 3, create a variable of that type and populate its fields */813nir_variable *var = nir_local_variable_create(b->func->nir_func->impl,814struct_type, NULL);815nir_deref_instr *deref_var = nir_build_deref_var(&b->nb, var);816size_t fmt_pos = 0;817for (unsigned i = 1; i < num_srcs; ++i) {818nir_deref_instr *field_deref =819nir_build_deref_struct(&b->nb, deref_var, i - 1);820nir_ssa_def *field_src = vtn_ssa_value(b, w_src[i])->def;821/* extract strings */822fmt_pos = util_printf_next_spec_pos(info->strings, fmt_pos);823if (fmt_pos != -1 && info->strings[fmt_pos] == 's') {824unsigned idx = vtn_add_printf_string(b, w_src[i], info);825nir_store_deref(&b->nb, field_deref,826nir_imm_intN_t(&b->nb, idx, field_src->bit_size),827~0 /* write_mask */);828} else829nir_store_deref(&b->nb, field_deref, field_src, ~0);830}831832/* Lastly, the actual intrinsic */833nir_ssa_def *fmt_idx = nir_imm_int(&b->nb, info_idx);834nir_ssa_def *ret = nir_printf(&b->nb, fmt_idx, &deref_var->dest.ssa);835vtn_push_nir_ssa(b, w_dest[1], ret);836}837838static nir_ssa_def *839handle_round(struct vtn_builder *b, uint32_t opcode,840unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,841const struct vtn_type *dest_type)842{843nir_ssa_def *src = srcs[0];844nir_builder *nb = &b->nb;845nir_ssa_def *half = nir_imm_floatN_t(nb, 0.5, src->bit_size);846nir_ssa_def *truncated = nir_ftrunc(nb, src);847nir_ssa_def *remainder = nir_fsub(nb, src, truncated);848849return nir_bcsel(nb, nir_fge(nb, nir_fabs(nb, remainder), half),850nir_fadd(nb, truncated, nir_fsign(nb, src)), truncated);851}852853static nir_ssa_def *854handle_shuffle(struct vtn_builder *b, uint32_t opcode,855unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,856const struct vtn_type *dest_type)857{858struct nir_ssa_def *input = srcs[0];859struct nir_ssa_def *mask = srcs[1];860861unsigned out_elems = dest_type->length;862nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];863unsigned in_elems = input->num_components;864if (mask->bit_size != 32)865mask = nir_u2u32(&b->nb, mask);866mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, in_elems - 1, mask->bit_size));867for (unsigned i = 0; i < out_elems; i++)868outres[i] = nir_vector_extract(&b->nb, input, nir_channel(&b->nb, mask, i));869870return nir_vec(&b->nb, outres, out_elems);871}872873static nir_ssa_def *874handle_shuffle2(struct vtn_builder *b, uint32_t opcode,875unsigned num_srcs, nir_ssa_def **srcs, struct vtn_type **src_types,876const struct vtn_type *dest_type)877{878struct nir_ssa_def *input0 = srcs[0];879struct nir_ssa_def *input1 = srcs[1];880struct nir_ssa_def *mask = srcs[2];881882unsigned out_elems = dest_type->length;883nir_ssa_def *outres[NIR_MAX_VEC_COMPONENTS];884unsigned in_elems = input0->num_components;885unsigned total_mask = 2 * in_elems - 1;886unsigned half_mask = in_elems - 1;887if (mask->bit_size != 32)888mask = nir_u2u32(&b->nb, mask);889mask = nir_iand(&b->nb, mask, nir_imm_intN_t(&b->nb, total_mask, mask->bit_size));890for (unsigned i = 0; i < out_elems; i++) {891nir_ssa_def *this_mask = nir_channel(&b->nb, mask, i);892nir_ssa_def *vmask = nir_iand(&b->nb, this_mask, nir_imm_intN_t(&b->nb, half_mask, mask->bit_size));893nir_ssa_def *val0 = nir_vector_extract(&b->nb, input0, vmask);894nir_ssa_def *val1 = nir_vector_extract(&b->nb, input1, vmask);895nir_ssa_def *sel = nir_ilt(&b->nb, this_mask, nir_imm_intN_t(&b->nb, in_elems, mask->bit_size));896outres[i] = nir_bcsel(&b->nb, sel, val0, val1);897}898return nir_vec(&b->nb, outres, out_elems);899}900901bool902vtn_handle_opencl_instruction(struct vtn_builder *b, SpvOp ext_opcode,903const uint32_t *w, unsigned count)904{905enum OpenCLstd_Entrypoints cl_opcode = (enum OpenCLstd_Entrypoints) ext_opcode;906907switch (cl_opcode) {908case OpenCLstd_Fabs:909case OpenCLstd_SAbs:910case OpenCLstd_UAbs:911case OpenCLstd_SAdd_sat:912case OpenCLstd_UAdd_sat:913case OpenCLstd_Ceil:914case OpenCLstd_Floor:915case OpenCLstd_Fmax:916case OpenCLstd_SHadd:917case OpenCLstd_UHadd:918case OpenCLstd_SMax:919case OpenCLstd_UMax:920case OpenCLstd_Fmin:921case OpenCLstd_SMin:922case OpenCLstd_UMin:923case OpenCLstd_Mix:924case OpenCLstd_Native_cos:925case OpenCLstd_Native_divide:926case OpenCLstd_Native_exp2:927case OpenCLstd_Native_log2:928case OpenCLstd_Native_powr:929case OpenCLstd_Native_recip:930case OpenCLstd_Native_rsqrt:931case OpenCLstd_Native_sin:932case OpenCLstd_Native_sqrt:933case OpenCLstd_SMul_hi:934case OpenCLstd_UMul_hi:935case OpenCLstd_Popcount:936case OpenCLstd_SRhadd:937case OpenCLstd_URhadd:938case OpenCLstd_Rsqrt:939case OpenCLstd_Sign:940case OpenCLstd_Sqrt:941case OpenCLstd_SSub_sat:942case OpenCLstd_USub_sat:943case OpenCLstd_Trunc:944case OpenCLstd_Rint:945case OpenCLstd_Half_divide:946case OpenCLstd_Half_recip:947handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_alu);948return true;949case OpenCLstd_SAbs_diff:950case OpenCLstd_UAbs_diff:951case OpenCLstd_SMad_hi:952case OpenCLstd_UMad_hi:953case OpenCLstd_SMad24:954case OpenCLstd_UMad24:955case OpenCLstd_SMul24:956case OpenCLstd_UMul24:957case OpenCLstd_Bitselect:958case OpenCLstd_FClamp:959case OpenCLstd_SClamp:960case OpenCLstd_UClamp:961case OpenCLstd_Copysign:962case OpenCLstd_Cross:963case OpenCLstd_Degrees:964case OpenCLstd_Fdim:965case OpenCLstd_Fma:966case OpenCLstd_Distance:967case OpenCLstd_Fast_distance:968case OpenCLstd_Fast_length:969case OpenCLstd_Fast_normalize:970case OpenCLstd_Half_rsqrt:971case OpenCLstd_Half_sqrt:972case OpenCLstd_Length:973case OpenCLstd_Mad:974case OpenCLstd_Maxmag:975case OpenCLstd_Minmag:976case OpenCLstd_Nan:977case OpenCLstd_Nextafter:978case OpenCLstd_Normalize:979case OpenCLstd_Radians:980case OpenCLstd_Rotate:981case OpenCLstd_Select:982case OpenCLstd_Step:983case OpenCLstd_Smoothstep:984case OpenCLstd_S_Upsample:985case OpenCLstd_U_Upsample:986case OpenCLstd_Clz:987case OpenCLstd_Ctz:988case OpenCLstd_Native_exp:989case OpenCLstd_Native_exp10:990case OpenCLstd_Native_log:991case OpenCLstd_Native_log10:992case OpenCLstd_Acos:993case OpenCLstd_Acosh:994case OpenCLstd_Acospi:995case OpenCLstd_Asin:996case OpenCLstd_Asinh:997case OpenCLstd_Asinpi:998case OpenCLstd_Atan:999case OpenCLstd_Atan2:1000case OpenCLstd_Atanh:1001case OpenCLstd_Atanpi:1002case OpenCLstd_Atan2pi:1003case OpenCLstd_Fract:1004case OpenCLstd_Frexp:1005case OpenCLstd_Exp:1006case OpenCLstd_Exp2:1007case OpenCLstd_Expm1:1008case OpenCLstd_Exp10:1009case OpenCLstd_Fmod:1010case OpenCLstd_Ilogb:1011case OpenCLstd_Log:1012case OpenCLstd_Log2:1013case OpenCLstd_Log10:1014case OpenCLstd_Log1p:1015case OpenCLstd_Logb:1016case OpenCLstd_Ldexp:1017case OpenCLstd_Cos:1018case OpenCLstd_Cosh:1019case OpenCLstd_Cospi:1020case OpenCLstd_Sin:1021case OpenCLstd_Sinh:1022case OpenCLstd_Sinpi:1023case OpenCLstd_Tan:1024case OpenCLstd_Tanh:1025case OpenCLstd_Tanpi:1026case OpenCLstd_Cbrt:1027case OpenCLstd_Erfc:1028case OpenCLstd_Erf:1029case OpenCLstd_Lgamma:1030case OpenCLstd_Lgamma_r:1031case OpenCLstd_Tgamma:1032case OpenCLstd_Pow:1033case OpenCLstd_Powr:1034case OpenCLstd_Pown:1035case OpenCLstd_Rootn:1036case OpenCLstd_Remainder:1037case OpenCLstd_Remquo:1038case OpenCLstd_Hypot:1039case OpenCLstd_Sincos:1040case OpenCLstd_Modf:1041case OpenCLstd_UMad_sat:1042case OpenCLstd_SMad_sat:1043case OpenCLstd_Native_tan:1044case OpenCLstd_Half_cos:1045case OpenCLstd_Half_exp:1046case OpenCLstd_Half_exp2:1047case OpenCLstd_Half_exp10:1048case OpenCLstd_Half_log:1049case OpenCLstd_Half_log2:1050case OpenCLstd_Half_log10:1051case OpenCLstd_Half_powr:1052case OpenCLstd_Half_sin:1053case OpenCLstd_Half_tan:1054handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_special);1055return true;1056case OpenCLstd_Vloadn:1057case OpenCLstd_Vload_half:1058case OpenCLstd_Vload_halfn:1059case OpenCLstd_Vloada_halfn:1060vtn_handle_opencl_vload(b, cl_opcode, w, count);1061return true;1062case OpenCLstd_Vstoren:1063case OpenCLstd_Vstore_half:1064case OpenCLstd_Vstore_halfn:1065case OpenCLstd_Vstorea_halfn:1066vtn_handle_opencl_vstore(b, cl_opcode, w, count);1067return true;1068case OpenCLstd_Vstore_half_r:1069case OpenCLstd_Vstore_halfn_r:1070case OpenCLstd_Vstorea_halfn_r:1071vtn_handle_opencl_vstore_half_r(b, cl_opcode, w, count);1072return true;1073case OpenCLstd_Shuffle:1074handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle);1075return true;1076case OpenCLstd_Shuffle2:1077handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_shuffle2);1078return true;1079case OpenCLstd_Round:1080handle_instr(b, ext_opcode, w + 5, count - 5, w + 1, handle_round);1081return true;1082case OpenCLstd_Printf:1083handle_printf(b, ext_opcode, w + 5, count - 5, w + 1);1084return true;1085case OpenCLstd_Prefetch:1086/* TODO maybe add a nir instruction for this? */1087return true;1088default:1089vtn_fail("unhandled opencl opc: %u\n", ext_opcode);1090return false;1091}1092}10931094bool1095vtn_handle_opencl_core_instruction(struct vtn_builder *b, SpvOp opcode,1096const uint32_t *w, unsigned count)1097{1098switch (opcode) {1099case SpvOpGroupAsyncCopy:1100handle_instr(b, opcode, w + 4, count - 4, w + 1, handle_core);1101return true;1102case SpvOpGroupWaitEvents:1103handle_instr(b, opcode, w + 2, count - 2, NULL, handle_core);1104return true;1105default:1106return false;1107}1108return true;1109}111011111112