Path: blob/21.2-virgl/src/compiler/glsl/glsl_to_nir.cpp
4545 views
/*1* Copyright © 2014 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22* Authors:23* Connor Abbott ([email protected])24*25*/2627#include "float64_glsl.h"28#include "glsl_to_nir.h"29#include "ir_visitor.h"30#include "ir_hierarchical_visitor.h"31#include "ir.h"32#include "ir_optimization.h"33#include "program.h"34#include "compiler/nir/nir_control_flow.h"35#include "compiler/nir/nir_builder.h"36#include "compiler/nir/nir_builtin_builder.h"37#include "compiler/nir/nir_deref.h"38#include "main/errors.h"39#include "main/mtypes.h"40#include "main/shaderobj.h"41#include "util/u_math.h"4243/*44* pass to lower GLSL IR to NIR45*46* This will lower variable dereferences to loads/stores of corresponding47* variables in NIR - the variables will be converted to registers in a later48* pass.49*/5051namespace {5253class nir_visitor : public ir_visitor54{55public:56nir_visitor(gl_context *ctx, nir_shader *shader);57~nir_visitor();5859virtual void visit(ir_variable *);60virtual void visit(ir_function *);61virtual void visit(ir_function_signature *);62virtual void visit(ir_loop *);63virtual void visit(ir_if *);64virtual void visit(ir_discard *);65virtual void visit(ir_demote *);66virtual void visit(ir_loop_jump *);67virtual void visit(ir_return *);68virtual void visit(ir_call *);69virtual void visit(ir_assignment *);70virtual void visit(ir_emit_vertex *);71virtual void visit(ir_end_primitive *);72virtual void visit(ir_expression *);73virtual void visit(ir_swizzle *);74virtual void visit(ir_texture *);75virtual void visit(ir_constant *);76virtual void visit(ir_dereference_variable *);77virtual void visit(ir_dereference_record *);78virtual void visit(ir_dereference_array *);79virtual void visit(ir_barrier *);8081void create_function(ir_function_signature *ir);8283private:84void add_instr(nir_instr *instr, unsigned num_components, unsigned bit_size);85nir_ssa_def *evaluate_rvalue(ir_rvalue *ir);8687nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs);88nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1);89nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,90nir_ssa_def *src2);91nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1,92nir_ssa_def *src2, nir_ssa_def *src3);9394bool supports_std430;9596nir_shader *shader;97nir_function_impl *impl;98nir_builder b;99nir_ssa_def *result; /* result of the expression tree last visited */100101nir_deref_instr *evaluate_deref(ir_instruction *ir);102103nir_constant *constant_copy(ir_constant *ir, void *mem_ctx);104105/* most recent deref instruction created */106nir_deref_instr *deref;107108/* whether the IR we're operating on is per-function or global */109bool is_global;110111ir_function_signature *sig;112113/* map of ir_variable -> nir_variable */114struct hash_table *var_table;115116/* map of ir_function_signature -> nir_function_overload */117struct hash_table *overload_table;118};119120/*121* This visitor runs before the main visitor, calling create_function() for122* each function so that the main visitor can resolve forward references in123* calls.124*/125126class nir_function_visitor : public ir_hierarchical_visitor127{128public:129nir_function_visitor(nir_visitor *v) : visitor(v)130{131}132virtual ir_visitor_status visit_enter(ir_function *);133134private:135nir_visitor *visitor;136};137138/* glsl_to_nir can only handle converting certain function paramaters139* to NIR. This visitor checks for parameters it can't currently handle.140*/141class ir_function_param_visitor : public ir_hierarchical_visitor142{143public:144ir_function_param_visitor()145: unsupported(false)146{147}148149virtual ir_visitor_status visit_enter(ir_function_signature *ir)150{151152if (ir->is_intrinsic())153return visit_continue;154155foreach_in_list(ir_variable, param, &ir->parameters) {156if (!param->type->is_vector() || !param->type->is_scalar()) {157unsupported = true;158return visit_stop;159}160161if (param->data.mode == ir_var_function_inout) {162unsupported = true;163return visit_stop;164}165}166167if (!glsl_type_is_vector_or_scalar(ir->return_type) &&168!ir->return_type->is_void()) {169unsupported = true;170return visit_stop;171}172173return visit_continue;174}175176bool unsupported;177};178179} /* end of anonymous namespace */180181182static bool183has_unsupported_function_param(exec_list *ir)184{185ir_function_param_visitor visitor;186visit_list_elements(&visitor, ir);187return visitor.unsupported;188}189190nir_shader *191glsl_to_nir(struct gl_context *ctx,192const struct gl_shader_program *shader_prog,193gl_shader_stage stage,194const nir_shader_compiler_options *options)195{196struct gl_linked_shader *sh = shader_prog->_LinkedShaders[stage];197198const struct gl_shader_compiler_options *gl_options =199&ctx->Const.ShaderCompilerOptions[stage];200201/* glsl_to_nir can only handle converting certain function paramaters202* to NIR. If we find something we can't handle then we get the GLSL IR203* opts to remove it before we continue on.204*205* TODO: add missing glsl ir to nir support and remove this loop.206*/207while (has_unsupported_function_param(sh->ir)) {208do_common_optimization(sh->ir, true, true, gl_options,209ctx->Const.NativeIntegers);210}211212nir_shader *shader = nir_shader_create(NULL, stage, options,213&sh->Program->info);214215nir_visitor v1(ctx, shader);216nir_function_visitor v2(&v1);217v2.run(sh->ir);218visit_exec_list(sh->ir, &v1);219220nir_validate_shader(shader, "after glsl to nir, before function inline");221222/* We have to lower away local constant initializers right before we223* inline functions. That way they get properly initialized at the top224* of the function and not at the top of its caller.225*/226nir_lower_variable_initializers(shader, nir_var_all);227nir_lower_returns(shader);228nir_inline_functions(shader);229nir_opt_deref(shader);230231nir_validate_shader(shader, "after function inlining and return lowering");232233/* Now that we have inlined everything remove all of the functions except234* main().235*/236foreach_list_typed_safe(nir_function, function, node, &(shader)->functions){237if (strcmp("main", function->name) != 0) {238exec_node_remove(&function->node);239}240}241242shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name);243if (shader_prog->Label)244shader->info.label = ralloc_strdup(shader, shader_prog->Label);245246/* Check for transform feedback varyings specified via the API */247shader->info.has_transform_feedback_varyings =248shader_prog->TransformFeedback.NumVarying > 0;249250/* Check for transform feedback varyings specified in the Shader */251if (shader_prog->last_vert_prog)252shader->info.has_transform_feedback_varyings |=253shader_prog->last_vert_prog->sh.LinkedTransformFeedback->NumVarying > 0;254255if (shader->info.stage == MESA_SHADER_FRAGMENT) {256shader->info.fs.pixel_center_integer = sh->Program->info.fs.pixel_center_integer;257shader->info.fs.origin_upper_left = sh->Program->info.fs.origin_upper_left;258}259260return shader;261}262263nir_visitor::nir_visitor(gl_context *ctx, nir_shader *shader)264{265this->supports_std430 = ctx->Const.UseSTD430AsDefaultPacking;266this->shader = shader;267this->is_global = true;268this->var_table = _mesa_pointer_hash_table_create(NULL);269this->overload_table = _mesa_pointer_hash_table_create(NULL);270this->result = NULL;271this->impl = NULL;272this->deref = NULL;273this->sig = NULL;274memset(&this->b, 0, sizeof(this->b));275}276277nir_visitor::~nir_visitor()278{279_mesa_hash_table_destroy(this->var_table, NULL);280_mesa_hash_table_destroy(this->overload_table, NULL);281}282283nir_deref_instr *284nir_visitor::evaluate_deref(ir_instruction *ir)285{286ir->accept(this);287return this->deref;288}289290nir_constant *291nir_visitor::constant_copy(ir_constant *ir, void *mem_ctx)292{293if (ir == NULL)294return NULL;295296nir_constant *ret = rzalloc(mem_ctx, nir_constant);297298const unsigned rows = ir->type->vector_elements;299const unsigned cols = ir->type->matrix_columns;300unsigned i;301302ret->num_elements = 0;303switch (ir->type->base_type) {304case GLSL_TYPE_UINT:305/* Only float base types can be matrices. */306assert(cols == 1);307308for (unsigned r = 0; r < rows; r++)309ret->values[r].u32 = ir->value.u[r];310311break;312313case GLSL_TYPE_UINT16:314/* Only float base types can be matrices. */315assert(cols == 1);316317for (unsigned r = 0; r < rows; r++)318ret->values[r].u16 = ir->value.u16[r];319break;320321case GLSL_TYPE_INT:322/* Only float base types can be matrices. */323assert(cols == 1);324325for (unsigned r = 0; r < rows; r++)326ret->values[r].i32 = ir->value.i[r];327328break;329330case GLSL_TYPE_INT16:331/* Only float base types can be matrices. */332assert(cols == 1);333334for (unsigned r = 0; r < rows; r++)335ret->values[r].i16 = ir->value.i16[r];336break;337338case GLSL_TYPE_FLOAT:339case GLSL_TYPE_FLOAT16:340case GLSL_TYPE_DOUBLE:341if (cols > 1) {342ret->elements = ralloc_array(mem_ctx, nir_constant *, cols);343ret->num_elements = cols;344for (unsigned c = 0; c < cols; c++) {345nir_constant *col_const = rzalloc(mem_ctx, nir_constant);346col_const->num_elements = 0;347switch (ir->type->base_type) {348case GLSL_TYPE_FLOAT:349for (unsigned r = 0; r < rows; r++)350col_const->values[r].f32 = ir->value.f[c * rows + r];351break;352353case GLSL_TYPE_FLOAT16:354for (unsigned r = 0; r < rows; r++)355col_const->values[r].u16 = ir->value.f16[c * rows + r];356break;357358case GLSL_TYPE_DOUBLE:359for (unsigned r = 0; r < rows; r++)360col_const->values[r].f64 = ir->value.d[c * rows + r];361break;362363default:364unreachable("Cannot get here from the first level switch");365}366ret->elements[c] = col_const;367}368} else {369switch (ir->type->base_type) {370case GLSL_TYPE_FLOAT:371for (unsigned r = 0; r < rows; r++)372ret->values[r].f32 = ir->value.f[r];373break;374375case GLSL_TYPE_FLOAT16:376for (unsigned r = 0; r < rows; r++)377ret->values[r].u16 = ir->value.f16[r];378break;379380case GLSL_TYPE_DOUBLE:381for (unsigned r = 0; r < rows; r++)382ret->values[r].f64 = ir->value.d[r];383break;384385default:386unreachable("Cannot get here from the first level switch");387}388}389break;390391case GLSL_TYPE_UINT64:392/* Only float base types can be matrices. */393assert(cols == 1);394395for (unsigned r = 0; r < rows; r++)396ret->values[r].u64 = ir->value.u64[r];397break;398399case GLSL_TYPE_INT64:400/* Only float base types can be matrices. */401assert(cols == 1);402403for (unsigned r = 0; r < rows; r++)404ret->values[r].i64 = ir->value.i64[r];405break;406407case GLSL_TYPE_BOOL:408/* Only float base types can be matrices. */409assert(cols == 1);410411for (unsigned r = 0; r < rows; r++)412ret->values[r].b = ir->value.b[r];413414break;415416case GLSL_TYPE_STRUCT:417case GLSL_TYPE_ARRAY:418ret->elements = ralloc_array(mem_ctx, nir_constant *,419ir->type->length);420ret->num_elements = ir->type->length;421422for (i = 0; i < ir->type->length; i++)423ret->elements[i] = constant_copy(ir->const_elements[i], mem_ctx);424break;425426default:427unreachable("not reached");428}429430return ret;431}432433static const glsl_type *434wrap_type_in_array(const glsl_type *elem_type, const glsl_type *array_type)435{436if (!array_type->is_array())437return elem_type;438439elem_type = wrap_type_in_array(elem_type, array_type->fields.array);440441return glsl_type::get_array_instance(elem_type, array_type->length);442}443444static unsigned445get_nir_how_declared(unsigned how_declared)446{447if (how_declared == ir_var_hidden)448return nir_var_hidden;449450return nir_var_declared_normally;451}452453void454nir_visitor::visit(ir_variable *ir)455{456/* TODO: In future we should switch to using the NIR lowering pass but for457* now just ignore these variables as GLSL IR should have lowered them.458* Anything remaining are just dead vars that weren't cleaned up.459*/460if (ir->data.mode == ir_var_shader_shared)461return;462463/* FINISHME: inout parameters */464assert(ir->data.mode != ir_var_function_inout);465466if (ir->data.mode == ir_var_function_out)467return;468469nir_variable *var = rzalloc(shader, nir_variable);470var->type = ir->type;471var->name = ralloc_strdup(var, ir->name);472473var->data.always_active_io = ir->data.always_active_io;474var->data.read_only = ir->data.read_only;475var->data.centroid = ir->data.centroid;476var->data.sample = ir->data.sample;477var->data.patch = ir->data.patch;478var->data.how_declared = get_nir_how_declared(ir->data.how_declared);479var->data.invariant = ir->data.invariant;480var->data.location = ir->data.location;481var->data.stream = ir->data.stream;482if (ir->data.stream & (1u << 31))483var->data.stream |= NIR_STREAM_PACKED;484485var->data.precision = ir->data.precision;486var->data.explicit_location = ir->data.explicit_location;487var->data.matrix_layout = ir->data.matrix_layout;488var->data.from_named_ifc_block = ir->data.from_named_ifc_block;489var->data.compact = false;490491switch(ir->data.mode) {492case ir_var_auto:493case ir_var_temporary:494if (is_global)495var->data.mode = nir_var_shader_temp;496else497var->data.mode = nir_var_function_temp;498break;499500case ir_var_function_in:501case ir_var_const_in:502var->data.mode = nir_var_function_temp;503break;504505case ir_var_shader_in:506if (shader->info.stage == MESA_SHADER_GEOMETRY &&507ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {508/* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */509var->data.location = SYSTEM_VALUE_PRIMITIVE_ID;510var->data.mode = nir_var_system_value;511} else {512var->data.mode = nir_var_shader_in;513514if (shader->info.stage == MESA_SHADER_TESS_EVAL &&515(ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||516ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {517var->data.compact = ir->type->without_array()->is_scalar();518}519520if (shader->info.stage > MESA_SHADER_VERTEX &&521ir->data.location >= VARYING_SLOT_CLIP_DIST0 &&522ir->data.location <= VARYING_SLOT_CULL_DIST1) {523var->data.compact = ir->type->without_array()->is_scalar();524}525}526break;527528case ir_var_shader_out:529var->data.mode = nir_var_shader_out;530if (shader->info.stage == MESA_SHADER_TESS_CTRL &&531(ir->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||532ir->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)) {533var->data.compact = ir->type->without_array()->is_scalar();534}535536if (shader->info.stage <= MESA_SHADER_GEOMETRY &&537ir->data.location >= VARYING_SLOT_CLIP_DIST0 &&538ir->data.location <= VARYING_SLOT_CULL_DIST1) {539var->data.compact = ir->type->without_array()->is_scalar();540}541break;542543case ir_var_uniform:544if (ir->get_interface_type())545var->data.mode = nir_var_mem_ubo;546else547var->data.mode = nir_var_uniform;548break;549550case ir_var_shader_storage:551var->data.mode = nir_var_mem_ssbo;552break;553554case ir_var_system_value:555var->data.mode = nir_var_system_value;556break;557558default:559unreachable("not reached");560}561562unsigned mem_access = 0;563if (ir->data.memory_read_only)564mem_access |= ACCESS_NON_WRITEABLE;565if (ir->data.memory_write_only)566mem_access |= ACCESS_NON_READABLE;567if (ir->data.memory_coherent)568mem_access |= ACCESS_COHERENT;569if (ir->data.memory_volatile)570mem_access |= ACCESS_VOLATILE;571if (ir->data.memory_restrict)572mem_access |= ACCESS_RESTRICT;573574var->interface_type = ir->get_interface_type();575576/* For UBO and SSBO variables, we need explicit types */577if (var->data.mode & (nir_var_mem_ubo | nir_var_mem_ssbo)) {578const glsl_type *explicit_ifc_type =579ir->get_interface_type()->get_explicit_interface_type(supports_std430);580581var->interface_type = explicit_ifc_type;582583if (ir->type->without_array()->is_interface()) {584/* If the type contains the interface, wrap the explicit type in the585* right number of arrays.586*/587var->type = wrap_type_in_array(explicit_ifc_type, ir->type);588} else {589/* Otherwise, this variable is one entry in the interface */590UNUSED bool found = false;591for (unsigned i = 0; i < explicit_ifc_type->length; i++) {592const glsl_struct_field *field =593&explicit_ifc_type->fields.structure[i];594if (strcmp(ir->name, field->name) != 0)595continue;596597var->type = field->type;598if (field->memory_read_only)599mem_access |= ACCESS_NON_WRITEABLE;600if (field->memory_write_only)601mem_access |= ACCESS_NON_READABLE;602if (field->memory_coherent)603mem_access |= ACCESS_COHERENT;604if (field->memory_volatile)605mem_access |= ACCESS_VOLATILE;606if (field->memory_restrict)607mem_access |= ACCESS_RESTRICT;608609found = true;610break;611}612assert(found);613}614}615616var->data.interpolation = ir->data.interpolation;617var->data.location_frac = ir->data.location_frac;618619switch (ir->data.depth_layout) {620case ir_depth_layout_none:621var->data.depth_layout = nir_depth_layout_none;622break;623case ir_depth_layout_any:624var->data.depth_layout = nir_depth_layout_any;625break;626case ir_depth_layout_greater:627var->data.depth_layout = nir_depth_layout_greater;628break;629case ir_depth_layout_less:630var->data.depth_layout = nir_depth_layout_less;631break;632case ir_depth_layout_unchanged:633var->data.depth_layout = nir_depth_layout_unchanged;634break;635default:636unreachable("not reached");637}638639var->data.index = ir->data.index;640var->data.descriptor_set = 0;641var->data.binding = ir->data.binding;642var->data.explicit_binding = ir->data.explicit_binding;643var->data.bindless = ir->data.bindless;644var->data.offset = ir->data.offset;645var->data.access = (gl_access_qualifier)mem_access;646647if (var->type->without_array()->is_image()) {648var->data.image.format = ir->data.image_format;649} else if (var->data.mode == nir_var_shader_out) {650var->data.xfb.buffer = ir->data.xfb_buffer;651var->data.xfb.stride = ir->data.xfb_stride;652}653654var->data.fb_fetch_output = ir->data.fb_fetch_output;655var->data.explicit_xfb_buffer = ir->data.explicit_xfb_buffer;656var->data.explicit_xfb_stride = ir->data.explicit_xfb_stride;657658var->num_state_slots = ir->get_num_state_slots();659if (var->num_state_slots > 0) {660var->state_slots = rzalloc_array(var, nir_state_slot,661var->num_state_slots);662663ir_state_slot *state_slots = ir->get_state_slots();664for (unsigned i = 0; i < var->num_state_slots; i++) {665for (unsigned j = 0; j < 4; j++)666var->state_slots[i].tokens[j] = state_slots[i].tokens[j];667var->state_slots[i].swizzle = state_slots[i].swizzle;668}669} else {670var->state_slots = NULL;671}672673var->constant_initializer = constant_copy(ir->constant_initializer, var);674675if (var->data.mode == nir_var_function_temp)676nir_function_impl_add_variable(impl, var);677else678nir_shader_add_variable(shader, var);679680_mesa_hash_table_insert(var_table, ir, var);681}682683ir_visitor_status684nir_function_visitor::visit_enter(ir_function *ir)685{686foreach_in_list(ir_function_signature, sig, &ir->signatures) {687visitor->create_function(sig);688}689return visit_continue_with_parent;690}691692void693nir_visitor::create_function(ir_function_signature *ir)694{695if (ir->is_intrinsic())696return;697698nir_function *func = nir_function_create(shader, ir->function_name());699if (strcmp(ir->function_name(), "main") == 0)700func->is_entrypoint = true;701702func->num_params = ir->parameters.length() +703(ir->return_type != glsl_type::void_type);704func->params = ralloc_array(shader, nir_parameter, func->num_params);705706unsigned np = 0;707708if (ir->return_type != glsl_type::void_type) {709/* The return value is a variable deref (basically an out parameter) */710func->params[np].num_components = 1;711func->params[np].bit_size = 32;712np++;713}714715foreach_in_list(ir_variable, param, &ir->parameters) {716/* FINISHME: pass arrays, structs, etc by reference? */717assert(param->type->is_vector() || param->type->is_scalar());718719if (param->data.mode == ir_var_function_in) {720func->params[np].num_components = param->type->vector_elements;721func->params[np].bit_size = glsl_get_bit_size(param->type);722} else {723func->params[np].num_components = 1;724func->params[np].bit_size = 32;725}726np++;727}728assert(np == func->num_params);729730_mesa_hash_table_insert(this->overload_table, ir, func);731}732733void734nir_visitor::visit(ir_function *ir)735{736foreach_in_list(ir_function_signature, sig, &ir->signatures)737sig->accept(this);738}739740void741nir_visitor::visit(ir_function_signature *ir)742{743if (ir->is_intrinsic())744return;745746this->sig = ir;747748struct hash_entry *entry =749_mesa_hash_table_search(this->overload_table, ir);750751assert(entry);752nir_function *func = (nir_function *) entry->data;753754if (ir->is_defined) {755nir_function_impl *impl = nir_function_impl_create(func);756this->impl = impl;757758this->is_global = false;759760nir_builder_init(&b, impl);761b.cursor = nir_after_cf_list(&impl->body);762763unsigned i = (ir->return_type != glsl_type::void_type) ? 1 : 0;764765foreach_in_list(ir_variable, param, &ir->parameters) {766nir_variable *var =767nir_local_variable_create(impl, param->type, param->name);768769if (param->data.mode == ir_var_function_in) {770nir_store_var(&b, var, nir_load_param(&b, i), ~0);771}772773_mesa_hash_table_insert(var_table, param, var);774i++;775}776777visit_exec_list(&ir->body, this);778779this->is_global = true;780} else {781func->impl = NULL;782}783}784785void786nir_visitor::visit(ir_loop *ir)787{788nir_push_loop(&b);789visit_exec_list(&ir->body_instructions, this);790nir_pop_loop(&b, NULL);791}792793void794nir_visitor::visit(ir_if *ir)795{796nir_push_if(&b, evaluate_rvalue(ir->condition));797visit_exec_list(&ir->then_instructions, this);798nir_push_else(&b, NULL);799visit_exec_list(&ir->else_instructions, this);800nir_pop_if(&b, NULL);801}802803void804nir_visitor::visit(ir_discard *ir)805{806/*807* discards aren't treated as control flow, because before we lower them808* they can appear anywhere in the shader and the stuff after them may still809* be executed (yay, crazy GLSL rules!). However, after lowering, all the810* discards will be immediately followed by a return.811*/812813if (ir->condition)814nir_discard_if(&b, evaluate_rvalue(ir->condition));815else816nir_discard(&b);817}818819void820nir_visitor::visit(ir_demote *ir)821{822nir_demote(&b);823}824825void826nir_visitor::visit(ir_emit_vertex *ir)827{828nir_emit_vertex(&b, (unsigned)ir->stream_id());829}830831void832nir_visitor::visit(ir_end_primitive *ir)833{834nir_end_primitive(&b, (unsigned)ir->stream_id());835}836837void838nir_visitor::visit(ir_loop_jump *ir)839{840nir_jump_type type;841switch (ir->mode) {842case ir_loop_jump::jump_break:843type = nir_jump_break;844break;845case ir_loop_jump::jump_continue:846type = nir_jump_continue;847break;848default:849unreachable("not reached");850}851852nir_jump_instr *instr = nir_jump_instr_create(this->shader, type);853nir_builder_instr_insert(&b, &instr->instr);854}855856void857nir_visitor::visit(ir_return *ir)858{859if (ir->value != NULL) {860nir_deref_instr *ret_deref =861nir_build_deref_cast(&b, nir_load_param(&b, 0),862nir_var_function_temp, ir->value->type, 0);863864nir_ssa_def *val = evaluate_rvalue(ir->value);865nir_store_deref(&b, ret_deref, val, ~0);866}867868nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return);869nir_builder_instr_insert(&b, &instr->instr);870}871872static void873intrinsic_set_std430_align(nir_intrinsic_instr *intrin, const glsl_type *type)874{875unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);876unsigned pow2_components = util_next_power_of_two(type->vector_elements);877nir_intrinsic_set_align(intrin, (bit_size / 8) * pow2_components, 0);878}879880/* Accumulate any qualifiers along the deref chain to get the actual881* load/store qualifier.882*/883884static enum gl_access_qualifier885deref_get_qualifier(nir_deref_instr *deref)886{887nir_deref_path path;888nir_deref_path_init(&path, deref, NULL);889890unsigned qualifiers = path.path[0]->var->data.access;891892const glsl_type *parent_type = path.path[0]->type;893for (nir_deref_instr **cur_ptr = &path.path[1]; *cur_ptr; cur_ptr++) {894nir_deref_instr *cur = *cur_ptr;895896if (parent_type->is_interface()) {897const struct glsl_struct_field *field =898&parent_type->fields.structure[cur->strct.index];899if (field->memory_read_only)900qualifiers |= ACCESS_NON_WRITEABLE;901if (field->memory_write_only)902qualifiers |= ACCESS_NON_READABLE;903if (field->memory_coherent)904qualifiers |= ACCESS_COHERENT;905if (field->memory_volatile)906qualifiers |= ACCESS_VOLATILE;907if (field->memory_restrict)908qualifiers |= ACCESS_RESTRICT;909}910911parent_type = cur->type;912}913914nir_deref_path_finish(&path);915916return (gl_access_qualifier) qualifiers;917}918919void920nir_visitor::visit(ir_call *ir)921{922if (ir->callee->is_intrinsic()) {923nir_intrinsic_op op;924925switch (ir->callee->intrinsic_id) {926case ir_intrinsic_generic_atomic_add:927op = ir->return_deref->type->is_integer_32_64()928? nir_intrinsic_deref_atomic_add : nir_intrinsic_deref_atomic_fadd;929break;930case ir_intrinsic_generic_atomic_and:931op = nir_intrinsic_deref_atomic_and;932break;933case ir_intrinsic_generic_atomic_or:934op = nir_intrinsic_deref_atomic_or;935break;936case ir_intrinsic_generic_atomic_xor:937op = nir_intrinsic_deref_atomic_xor;938break;939case ir_intrinsic_generic_atomic_min:940assert(ir->return_deref);941if (ir->return_deref->type == glsl_type::int_type ||942ir->return_deref->type == glsl_type::int64_t_type)943op = nir_intrinsic_deref_atomic_imin;944else if (ir->return_deref->type == glsl_type::uint_type ||945ir->return_deref->type == glsl_type::uint64_t_type)946op = nir_intrinsic_deref_atomic_umin;947else if (ir->return_deref->type == glsl_type::float_type)948op = nir_intrinsic_deref_atomic_fmin;949else950unreachable("Invalid type");951break;952case ir_intrinsic_generic_atomic_max:953assert(ir->return_deref);954if (ir->return_deref->type == glsl_type::int_type ||955ir->return_deref->type == glsl_type::int64_t_type)956op = nir_intrinsic_deref_atomic_imax;957else if (ir->return_deref->type == glsl_type::uint_type ||958ir->return_deref->type == glsl_type::uint64_t_type)959op = nir_intrinsic_deref_atomic_umax;960else if (ir->return_deref->type == glsl_type::float_type)961op = nir_intrinsic_deref_atomic_fmax;962else963unreachable("Invalid type");964break;965case ir_intrinsic_generic_atomic_exchange:966op = nir_intrinsic_deref_atomic_exchange;967break;968case ir_intrinsic_generic_atomic_comp_swap:969op = ir->return_deref->type->is_integer_32_64()970? nir_intrinsic_deref_atomic_comp_swap971: nir_intrinsic_deref_atomic_fcomp_swap;972break;973case ir_intrinsic_atomic_counter_read:974op = nir_intrinsic_atomic_counter_read_deref;975break;976case ir_intrinsic_atomic_counter_increment:977op = nir_intrinsic_atomic_counter_inc_deref;978break;979case ir_intrinsic_atomic_counter_predecrement:980op = nir_intrinsic_atomic_counter_pre_dec_deref;981break;982case ir_intrinsic_atomic_counter_add:983op = nir_intrinsic_atomic_counter_add_deref;984break;985case ir_intrinsic_atomic_counter_and:986op = nir_intrinsic_atomic_counter_and_deref;987break;988case ir_intrinsic_atomic_counter_or:989op = nir_intrinsic_atomic_counter_or_deref;990break;991case ir_intrinsic_atomic_counter_xor:992op = nir_intrinsic_atomic_counter_xor_deref;993break;994case ir_intrinsic_atomic_counter_min:995op = nir_intrinsic_atomic_counter_min_deref;996break;997case ir_intrinsic_atomic_counter_max:998op = nir_intrinsic_atomic_counter_max_deref;999break;1000case ir_intrinsic_atomic_counter_exchange:1001op = nir_intrinsic_atomic_counter_exchange_deref;1002break;1003case ir_intrinsic_atomic_counter_comp_swap:1004op = nir_intrinsic_atomic_counter_comp_swap_deref;1005break;1006case ir_intrinsic_image_load:1007op = nir_intrinsic_image_deref_load;1008break;1009case ir_intrinsic_image_store:1010op = nir_intrinsic_image_deref_store;1011break;1012case ir_intrinsic_image_atomic_add:1013op = ir->return_deref->type->is_integer_32_64()1014? nir_intrinsic_image_deref_atomic_add1015: nir_intrinsic_image_deref_atomic_fadd;1016break;1017case ir_intrinsic_image_atomic_min:1018if (ir->return_deref->type == glsl_type::int_type)1019op = nir_intrinsic_image_deref_atomic_imin;1020else if (ir->return_deref->type == glsl_type::uint_type)1021op = nir_intrinsic_image_deref_atomic_umin;1022else1023unreachable("Invalid type");1024break;1025case ir_intrinsic_image_atomic_max:1026if (ir->return_deref->type == glsl_type::int_type)1027op = nir_intrinsic_image_deref_atomic_imax;1028else if (ir->return_deref->type == glsl_type::uint_type)1029op = nir_intrinsic_image_deref_atomic_umax;1030else1031unreachable("Invalid type");1032break;1033case ir_intrinsic_image_atomic_and:1034op = nir_intrinsic_image_deref_atomic_and;1035break;1036case ir_intrinsic_image_atomic_or:1037op = nir_intrinsic_image_deref_atomic_or;1038break;1039case ir_intrinsic_image_atomic_xor:1040op = nir_intrinsic_image_deref_atomic_xor;1041break;1042case ir_intrinsic_image_atomic_exchange:1043op = nir_intrinsic_image_deref_atomic_exchange;1044break;1045case ir_intrinsic_image_atomic_comp_swap:1046op = nir_intrinsic_image_deref_atomic_comp_swap;1047break;1048case ir_intrinsic_image_atomic_inc_wrap:1049op = nir_intrinsic_image_deref_atomic_inc_wrap;1050break;1051case ir_intrinsic_image_atomic_dec_wrap:1052op = nir_intrinsic_image_deref_atomic_dec_wrap;1053break;1054case ir_intrinsic_memory_barrier:1055op = nir_intrinsic_memory_barrier;1056break;1057case ir_intrinsic_image_size:1058op = nir_intrinsic_image_deref_size;1059break;1060case ir_intrinsic_image_samples:1061op = nir_intrinsic_image_deref_samples;1062break;1063case ir_intrinsic_ssbo_store:1064case ir_intrinsic_ssbo_load:1065case ir_intrinsic_ssbo_atomic_add:1066case ir_intrinsic_ssbo_atomic_and:1067case ir_intrinsic_ssbo_atomic_or:1068case ir_intrinsic_ssbo_atomic_xor:1069case ir_intrinsic_ssbo_atomic_min:1070case ir_intrinsic_ssbo_atomic_max:1071case ir_intrinsic_ssbo_atomic_exchange:1072case ir_intrinsic_ssbo_atomic_comp_swap:1073/* SSBO store/loads should only have been lowered in GLSL IR for1074* non-nir drivers, NIR drivers make use of gl_nir_lower_buffers()1075* instead.1076*/1077unreachable("Invalid operation nir doesn't want lowered ssbo "1078"store/loads");1079case ir_intrinsic_shader_clock:1080op = nir_intrinsic_shader_clock;1081break;1082case ir_intrinsic_begin_invocation_interlock:1083op = nir_intrinsic_begin_invocation_interlock;1084break;1085case ir_intrinsic_end_invocation_interlock:1086op = nir_intrinsic_end_invocation_interlock;1087break;1088case ir_intrinsic_group_memory_barrier:1089op = nir_intrinsic_group_memory_barrier;1090break;1091case ir_intrinsic_memory_barrier_atomic_counter:1092op = nir_intrinsic_memory_barrier_atomic_counter;1093break;1094case ir_intrinsic_memory_barrier_buffer:1095op = nir_intrinsic_memory_barrier_buffer;1096break;1097case ir_intrinsic_memory_barrier_image:1098op = nir_intrinsic_memory_barrier_image;1099break;1100case ir_intrinsic_memory_barrier_shared:1101op = nir_intrinsic_memory_barrier_shared;1102break;1103case ir_intrinsic_shared_load:1104op = nir_intrinsic_load_shared;1105break;1106case ir_intrinsic_shared_store:1107op = nir_intrinsic_store_shared;1108break;1109case ir_intrinsic_shared_atomic_add:1110op = ir->return_deref->type->is_integer_32_64()1111? nir_intrinsic_shared_atomic_add1112: nir_intrinsic_shared_atomic_fadd;1113break;1114case ir_intrinsic_shared_atomic_and:1115op = nir_intrinsic_shared_atomic_and;1116break;1117case ir_intrinsic_shared_atomic_or:1118op = nir_intrinsic_shared_atomic_or;1119break;1120case ir_intrinsic_shared_atomic_xor:1121op = nir_intrinsic_shared_atomic_xor;1122break;1123case ir_intrinsic_shared_atomic_min:1124assert(ir->return_deref);1125if (ir->return_deref->type == glsl_type::int_type ||1126ir->return_deref->type == glsl_type::int64_t_type)1127op = nir_intrinsic_shared_atomic_imin;1128else if (ir->return_deref->type == glsl_type::uint_type ||1129ir->return_deref->type == glsl_type::uint64_t_type)1130op = nir_intrinsic_shared_atomic_umin;1131else if (ir->return_deref->type == glsl_type::float_type)1132op = nir_intrinsic_shared_atomic_fmin;1133else1134unreachable("Invalid type");1135break;1136case ir_intrinsic_shared_atomic_max:1137assert(ir->return_deref);1138if (ir->return_deref->type == glsl_type::int_type ||1139ir->return_deref->type == glsl_type::int64_t_type)1140op = nir_intrinsic_shared_atomic_imax;1141else if (ir->return_deref->type == glsl_type::uint_type ||1142ir->return_deref->type == glsl_type::uint64_t_type)1143op = nir_intrinsic_shared_atomic_umax;1144else if (ir->return_deref->type == glsl_type::float_type)1145op = nir_intrinsic_shared_atomic_fmax;1146else1147unreachable("Invalid type");1148break;1149case ir_intrinsic_shared_atomic_exchange:1150op = nir_intrinsic_shared_atomic_exchange;1151break;1152case ir_intrinsic_shared_atomic_comp_swap:1153op = ir->return_deref->type->is_integer_32_64()1154? nir_intrinsic_shared_atomic_comp_swap1155: nir_intrinsic_shared_atomic_fcomp_swap;1156break;1157case ir_intrinsic_vote_any:1158op = nir_intrinsic_vote_any;1159break;1160case ir_intrinsic_vote_all:1161op = nir_intrinsic_vote_all;1162break;1163case ir_intrinsic_vote_eq:1164op = nir_intrinsic_vote_ieq;1165break;1166case ir_intrinsic_ballot:1167op = nir_intrinsic_ballot;1168break;1169case ir_intrinsic_read_invocation:1170op = nir_intrinsic_read_invocation;1171break;1172case ir_intrinsic_read_first_invocation:1173op = nir_intrinsic_read_first_invocation;1174break;1175case ir_intrinsic_helper_invocation:1176op = nir_intrinsic_is_helper_invocation;1177break;1178default:1179unreachable("not reached");1180}11811182nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op);1183nir_ssa_def *ret = &instr->dest.ssa;11841185switch (op) {1186case nir_intrinsic_deref_atomic_add:1187case nir_intrinsic_deref_atomic_imin:1188case nir_intrinsic_deref_atomic_umin:1189case nir_intrinsic_deref_atomic_imax:1190case nir_intrinsic_deref_atomic_umax:1191case nir_intrinsic_deref_atomic_and:1192case nir_intrinsic_deref_atomic_or:1193case nir_intrinsic_deref_atomic_xor:1194case nir_intrinsic_deref_atomic_exchange:1195case nir_intrinsic_deref_atomic_comp_swap:1196case nir_intrinsic_deref_atomic_fadd:1197case nir_intrinsic_deref_atomic_fmin:1198case nir_intrinsic_deref_atomic_fmax:1199case nir_intrinsic_deref_atomic_fcomp_swap: {1200int param_count = ir->actual_parameters.length();1201assert(param_count == 2 || param_count == 3);12021203/* Deref */1204exec_node *param = ir->actual_parameters.get_head();1205ir_rvalue *rvalue = (ir_rvalue *) param;1206ir_dereference *deref = rvalue->as_dereference();1207ir_swizzle *swizzle = NULL;1208if (!deref) {1209/* We may have a swizzle to pick off a single vec4 component */1210swizzle = rvalue->as_swizzle();1211assert(swizzle && swizzle->type->vector_elements == 1);1212deref = swizzle->val->as_dereference();1213assert(deref);1214}1215nir_deref_instr *nir_deref = evaluate_deref(deref);1216if (swizzle) {1217nir_deref = nir_build_deref_array_imm(&b, nir_deref,1218swizzle->mask.x);1219}1220instr->src[0] = nir_src_for_ssa(&nir_deref->dest.ssa);12211222nir_intrinsic_set_access(instr, deref_get_qualifier(nir_deref));12231224/* data1 parameter (this is always present) */1225param = param->get_next();1226ir_instruction *inst = (ir_instruction *) param;1227instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));12281229/* data2 parameter (only with atomic_comp_swap) */1230if (param_count == 3) {1231assert(op == nir_intrinsic_deref_atomic_comp_swap ||1232op == nir_intrinsic_deref_atomic_fcomp_swap);1233param = param->get_next();1234inst = (ir_instruction *) param;1235instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));1236}12371238/* Atomic result */1239assert(ir->return_deref);1240if (ir->return_deref->type->is_integer_64()) {1241nir_ssa_dest_init(&instr->instr, &instr->dest,1242ir->return_deref->type->vector_elements, 64, NULL);1243} else {1244nir_ssa_dest_init(&instr->instr, &instr->dest,1245ir->return_deref->type->vector_elements, 32, NULL);1246}1247nir_builder_instr_insert(&b, &instr->instr);1248break;1249}1250case nir_intrinsic_atomic_counter_read_deref:1251case nir_intrinsic_atomic_counter_inc_deref:1252case nir_intrinsic_atomic_counter_pre_dec_deref:1253case nir_intrinsic_atomic_counter_add_deref:1254case nir_intrinsic_atomic_counter_min_deref:1255case nir_intrinsic_atomic_counter_max_deref:1256case nir_intrinsic_atomic_counter_and_deref:1257case nir_intrinsic_atomic_counter_or_deref:1258case nir_intrinsic_atomic_counter_xor_deref:1259case nir_intrinsic_atomic_counter_exchange_deref:1260case nir_intrinsic_atomic_counter_comp_swap_deref: {1261/* Set the counter variable dereference. */1262exec_node *param = ir->actual_parameters.get_head();1263ir_dereference *counter = (ir_dereference *)param;12641265instr->src[0] = nir_src_for_ssa(&evaluate_deref(counter)->dest.ssa);1266param = param->get_next();12671268/* Set the intrinsic destination. */1269if (ir->return_deref) {1270nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);1271}12721273/* Set the intrinsic parameters. */1274if (!param->is_tail_sentinel()) {1275instr->src[1] =1276nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));1277param = param->get_next();1278}12791280if (!param->is_tail_sentinel()) {1281instr->src[2] =1282nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));1283param = param->get_next();1284}12851286nir_builder_instr_insert(&b, &instr->instr);1287break;1288}1289case nir_intrinsic_image_deref_load:1290case nir_intrinsic_image_deref_store:1291case nir_intrinsic_image_deref_atomic_add:1292case nir_intrinsic_image_deref_atomic_imin:1293case nir_intrinsic_image_deref_atomic_umin:1294case nir_intrinsic_image_deref_atomic_imax:1295case nir_intrinsic_image_deref_atomic_umax:1296case nir_intrinsic_image_deref_atomic_and:1297case nir_intrinsic_image_deref_atomic_or:1298case nir_intrinsic_image_deref_atomic_xor:1299case nir_intrinsic_image_deref_atomic_exchange:1300case nir_intrinsic_image_deref_atomic_comp_swap:1301case nir_intrinsic_image_deref_atomic_fadd:1302case nir_intrinsic_image_deref_samples:1303case nir_intrinsic_image_deref_size:1304case nir_intrinsic_image_deref_atomic_inc_wrap:1305case nir_intrinsic_image_deref_atomic_dec_wrap: {1306nir_ssa_undef_instr *instr_undef =1307nir_ssa_undef_instr_create(shader, 1, 32);1308nir_builder_instr_insert(&b, &instr_undef->instr);13091310/* Set the image variable dereference. */1311exec_node *param = ir->actual_parameters.get_head();1312ir_dereference *image = (ir_dereference *)param;1313nir_deref_instr *deref = evaluate_deref(image);1314const glsl_type *type = deref->type;13151316nir_intrinsic_set_access(instr, deref_get_qualifier(deref));13171318instr->src[0] = nir_src_for_ssa(&deref->dest.ssa);1319param = param->get_next();13201321/* Set the intrinsic destination. */1322if (ir->return_deref) {1323unsigned num_components = ir->return_deref->type->vector_elements;1324nir_ssa_dest_init(&instr->instr, &instr->dest,1325num_components, 32, NULL);1326}13271328if (op == nir_intrinsic_image_deref_size) {1329instr->num_components = instr->dest.ssa.num_components;1330} else if (op == nir_intrinsic_image_deref_load) {1331instr->num_components = 4;1332nir_intrinsic_set_dest_type(instr,1333nir_get_nir_type_for_glsl_base_type(type->sampled_type));1334} else if (op == nir_intrinsic_image_deref_store) {1335instr->num_components = 4;1336nir_intrinsic_set_src_type(instr,1337nir_get_nir_type_for_glsl_base_type(type->sampled_type));1338}13391340if (op == nir_intrinsic_image_deref_size ||1341op == nir_intrinsic_image_deref_samples) {1342/* image_deref_size takes an LOD parameter which is always 01343* coming from GLSL.1344*/1345if (op == nir_intrinsic_image_deref_size)1346instr->src[1] = nir_src_for_ssa(nir_imm_int(&b, 0));1347nir_builder_instr_insert(&b, &instr->instr);1348break;1349}13501351/* Set the address argument, extending the coordinate vector to four1352* components.1353*/1354nir_ssa_def *src_addr =1355evaluate_rvalue((ir_dereference *)param);1356nir_ssa_def *srcs[4];13571358for (int i = 0; i < 4; i++) {1359if (i < type->coordinate_components())1360srcs[i] = nir_channel(&b, src_addr, i);1361else1362srcs[i] = &instr_undef->def;1363}13641365instr->src[1] = nir_src_for_ssa(nir_vec(&b, srcs, 4));1366param = param->get_next();13671368/* Set the sample argument, which is undefined for single-sample1369* images.1370*/1371if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) {1372instr->src[2] =1373nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));1374param = param->get_next();1375} else {1376instr->src[2] = nir_src_for_ssa(&instr_undef->def);1377}13781379/* Set the intrinsic parameters. */1380if (!param->is_tail_sentinel()) {1381instr->src[3] =1382nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));1383param = param->get_next();1384} else if (op == nir_intrinsic_image_deref_load) {1385instr->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */1386}13871388if (!param->is_tail_sentinel()) {1389instr->src[4] =1390nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param));1391param = param->get_next();1392} else if (op == nir_intrinsic_image_deref_store) {1393instr->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0)); /* LOD */1394}13951396nir_builder_instr_insert(&b, &instr->instr);1397break;1398}1399case nir_intrinsic_memory_barrier:1400case nir_intrinsic_group_memory_barrier:1401case nir_intrinsic_memory_barrier_atomic_counter:1402case nir_intrinsic_memory_barrier_buffer:1403case nir_intrinsic_memory_barrier_image:1404case nir_intrinsic_memory_barrier_shared:1405nir_builder_instr_insert(&b, &instr->instr);1406break;1407case nir_intrinsic_shader_clock:1408nir_ssa_dest_init(&instr->instr, &instr->dest, 2, 32, NULL);1409nir_intrinsic_set_memory_scope(instr, NIR_SCOPE_SUBGROUP);1410nir_builder_instr_insert(&b, &instr->instr);1411break;1412case nir_intrinsic_begin_invocation_interlock:1413nir_builder_instr_insert(&b, &instr->instr);1414break;1415case nir_intrinsic_end_invocation_interlock:1416nir_builder_instr_insert(&b, &instr->instr);1417break;1418case nir_intrinsic_store_ssbo: {1419exec_node *param = ir->actual_parameters.get_head();1420ir_rvalue *block = ((ir_instruction *)param)->as_rvalue();14211422param = param->get_next();1423ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();14241425param = param->get_next();1426ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();14271428param = param->get_next();1429ir_constant *write_mask = ((ir_instruction *)param)->as_constant();1430assert(write_mask);14311432nir_ssa_def *nir_val = evaluate_rvalue(val);1433if (val->type->is_boolean())1434nir_val = nir_b2i32(&b, nir_val);14351436instr->src[0] = nir_src_for_ssa(nir_val);1437instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block));1438instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset));1439intrinsic_set_std430_align(instr, val->type);1440nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);1441instr->num_components = val->type->vector_elements;14421443nir_builder_instr_insert(&b, &instr->instr);1444break;1445}1446case nir_intrinsic_load_shared: {1447exec_node *param = ir->actual_parameters.get_head();1448ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();14491450nir_intrinsic_set_base(instr, 0);1451instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset));14521453const glsl_type *type = ir->return_deref->var->type;1454instr->num_components = type->vector_elements;1455intrinsic_set_std430_align(instr, type);14561457/* Setup destination register */1458unsigned bit_size = type->is_boolean() ? 32 : glsl_get_bit_size(type);1459nir_ssa_dest_init(&instr->instr, &instr->dest,1460type->vector_elements, bit_size, NULL);14611462nir_builder_instr_insert(&b, &instr->instr);14631464/* The value in shared memory is a 32-bit value */1465if (type->is_boolean())1466ret = nir_b2b1(&b, &instr->dest.ssa);1467break;1468}1469case nir_intrinsic_store_shared: {1470exec_node *param = ir->actual_parameters.get_head();1471ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue();14721473param = param->get_next();1474ir_rvalue *val = ((ir_instruction *)param)->as_rvalue();14751476param = param->get_next();1477ir_constant *write_mask = ((ir_instruction *)param)->as_constant();1478assert(write_mask);14791480nir_intrinsic_set_base(instr, 0);1481instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset));14821483nir_intrinsic_set_write_mask(instr, write_mask->value.u[0]);14841485nir_ssa_def *nir_val = evaluate_rvalue(val);1486/* The value in shared memory is a 32-bit value */1487if (val->type->is_boolean())1488nir_val = nir_b2b32(&b, nir_val);14891490instr->src[0] = nir_src_for_ssa(nir_val);1491instr->num_components = val->type->vector_elements;1492intrinsic_set_std430_align(instr, val->type);14931494nir_builder_instr_insert(&b, &instr->instr);1495break;1496}1497case nir_intrinsic_shared_atomic_add:1498case nir_intrinsic_shared_atomic_imin:1499case nir_intrinsic_shared_atomic_umin:1500case nir_intrinsic_shared_atomic_imax:1501case nir_intrinsic_shared_atomic_umax:1502case nir_intrinsic_shared_atomic_and:1503case nir_intrinsic_shared_atomic_or:1504case nir_intrinsic_shared_atomic_xor:1505case nir_intrinsic_shared_atomic_exchange:1506case nir_intrinsic_shared_atomic_comp_swap:1507case nir_intrinsic_shared_atomic_fadd:1508case nir_intrinsic_shared_atomic_fmin:1509case nir_intrinsic_shared_atomic_fmax:1510case nir_intrinsic_shared_atomic_fcomp_swap: {1511int param_count = ir->actual_parameters.length();1512assert(param_count == 2 || param_count == 3);15131514/* Offset */1515exec_node *param = ir->actual_parameters.get_head();1516ir_instruction *inst = (ir_instruction *) param;1517instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));15181519/* data1 parameter (this is always present) */1520param = param->get_next();1521inst = (ir_instruction *) param;1522instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));15231524/* data2 parameter (only with atomic_comp_swap) */1525if (param_count == 3) {1526assert(op == nir_intrinsic_shared_atomic_comp_swap ||1527op == nir_intrinsic_shared_atomic_fcomp_swap);1528param = param->get_next();1529inst = (ir_instruction *) param;1530instr->src[2] =1531nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue()));1532}15331534/* Atomic result */1535assert(ir->return_deref);1536unsigned bit_size = glsl_get_bit_size(ir->return_deref->type);1537nir_ssa_dest_init(&instr->instr, &instr->dest,1538ir->return_deref->type->vector_elements,1539bit_size, NULL);1540nir_builder_instr_insert(&b, &instr->instr);1541break;1542}1543case nir_intrinsic_vote_ieq:1544instr->num_components = 1;1545FALLTHROUGH;1546case nir_intrinsic_vote_any:1547case nir_intrinsic_vote_all: {1548nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL);15491550ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();1551instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));15521553nir_builder_instr_insert(&b, &instr->instr);1554break;1555}15561557case nir_intrinsic_ballot: {1558nir_ssa_dest_init(&instr->instr, &instr->dest,1559ir->return_deref->type->vector_elements, 64, NULL);1560instr->num_components = ir->return_deref->type->vector_elements;15611562ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();1563instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));15641565nir_builder_instr_insert(&b, &instr->instr);1566break;1567}1568case nir_intrinsic_read_invocation: {1569nir_ssa_dest_init(&instr->instr, &instr->dest,1570ir->return_deref->type->vector_elements, 32, NULL);1571instr->num_components = ir->return_deref->type->vector_elements;15721573ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();1574instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));15751576ir_rvalue *invocation = (ir_rvalue *) ir->actual_parameters.get_head()->next;1577instr->src[1] = nir_src_for_ssa(evaluate_rvalue(invocation));15781579nir_builder_instr_insert(&b, &instr->instr);1580break;1581}1582case nir_intrinsic_read_first_invocation: {1583nir_ssa_dest_init(&instr->instr, &instr->dest,1584ir->return_deref->type->vector_elements, 32, NULL);1585instr->num_components = ir->return_deref->type->vector_elements;15861587ir_rvalue *value = (ir_rvalue *) ir->actual_parameters.get_head();1588instr->src[0] = nir_src_for_ssa(evaluate_rvalue(value));15891590nir_builder_instr_insert(&b, &instr->instr);1591break;1592}1593case nir_intrinsic_is_helper_invocation: {1594nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 1, NULL);1595nir_builder_instr_insert(&b, &instr->instr);1596break;1597}1598default:1599unreachable("not reached");1600}16011602if (ir->return_deref)1603nir_store_deref(&b, evaluate_deref(ir->return_deref), ret, ~0);16041605return;1606}16071608struct hash_entry *entry =1609_mesa_hash_table_search(this->overload_table, ir->callee);1610assert(entry);1611nir_function *callee = (nir_function *) entry->data;16121613nir_call_instr *call = nir_call_instr_create(this->shader, callee);16141615unsigned i = 0;1616nir_deref_instr *ret_deref = NULL;1617if (ir->return_deref) {1618nir_variable *ret_tmp =1619nir_local_variable_create(this->impl, ir->return_deref->type,1620"return_tmp");1621ret_deref = nir_build_deref_var(&b, ret_tmp);1622call->params[i++] = nir_src_for_ssa(&ret_deref->dest.ssa);1623}16241625foreach_two_lists(formal_node, &ir->callee->parameters,1626actual_node, &ir->actual_parameters) {1627ir_rvalue *param_rvalue = (ir_rvalue *) actual_node;1628ir_variable *sig_param = (ir_variable *) formal_node;16291630if (sig_param->data.mode == ir_var_function_out) {1631nir_deref_instr *out_deref = evaluate_deref(param_rvalue);1632call->params[i] = nir_src_for_ssa(&out_deref->dest.ssa);1633} else if (sig_param->data.mode == ir_var_function_in) {1634nir_ssa_def *val = evaluate_rvalue(param_rvalue);1635nir_src src = nir_src_for_ssa(val);16361637nir_src_copy(&call->params[i], &src, call);1638} else if (sig_param->data.mode == ir_var_function_inout) {1639unreachable("unimplemented: inout parameters");1640}16411642i++;1643}16441645nir_builder_instr_insert(&b, &call->instr);16461647if (ir->return_deref)1648nir_store_deref(&b, evaluate_deref(ir->return_deref), nir_load_deref(&b, ret_deref), ~0);1649}16501651void1652nir_visitor::visit(ir_assignment *ir)1653{1654unsigned num_components = ir->lhs->type->vector_elements;16551656b.exact = ir->lhs->variable_referenced()->data.invariant ||1657ir->lhs->variable_referenced()->data.precise;16581659if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&1660(ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) {1661nir_deref_instr *lhs = evaluate_deref(ir->lhs);1662nir_deref_instr *rhs = evaluate_deref(ir->rhs);1663enum gl_access_qualifier lhs_qualifiers = deref_get_qualifier(lhs);1664enum gl_access_qualifier rhs_qualifiers = deref_get_qualifier(rhs);1665if (ir->condition) {1666nir_push_if(&b, evaluate_rvalue(ir->condition));1667nir_copy_deref_with_access(&b, lhs, rhs, lhs_qualifiers,1668rhs_qualifiers);1669nir_pop_if(&b, NULL);1670} else {1671nir_copy_deref_with_access(&b, lhs, rhs, lhs_qualifiers,1672rhs_qualifiers);1673}1674return;1675}16761677assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector());16781679ir->lhs->accept(this);1680nir_deref_instr *lhs_deref = this->deref;1681nir_ssa_def *src = evaluate_rvalue(ir->rhs);16821683if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) {1684/* GLSL IR will give us the input to the write-masked assignment in a1685* single packed vector. So, for example, if the writemask is xzw, then1686* we have to swizzle x -> x, y -> z, and z -> w and get the y component1687* from the load.1688*/1689unsigned swiz[4];1690unsigned component = 0;1691for (unsigned i = 0; i < 4; i++) {1692swiz[i] = ir->write_mask & (1 << i) ? component++ : 0;1693}1694src = nir_swizzle(&b, src, swiz, num_components);1695}16961697enum gl_access_qualifier qualifiers = deref_get_qualifier(lhs_deref);1698if (ir->condition) {1699nir_push_if(&b, evaluate_rvalue(ir->condition));1700nir_store_deref_with_access(&b, lhs_deref, src, ir->write_mask,1701qualifiers);1702nir_pop_if(&b, NULL);1703} else {1704nir_store_deref_with_access(&b, lhs_deref, src, ir->write_mask,1705qualifiers);1706}1707}17081709/*1710* Given an instruction, returns a pointer to its destination or NULL if there1711* is no destination.1712*1713* Note that this only handles instructions we generate at this level.1714*/1715static nir_dest *1716get_instr_dest(nir_instr *instr)1717{1718nir_alu_instr *alu_instr;1719nir_intrinsic_instr *intrinsic_instr;1720nir_tex_instr *tex_instr;17211722switch (instr->type) {1723case nir_instr_type_alu:1724alu_instr = nir_instr_as_alu(instr);1725return &alu_instr->dest.dest;17261727case nir_instr_type_intrinsic:1728intrinsic_instr = nir_instr_as_intrinsic(instr);1729if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest)1730return &intrinsic_instr->dest;1731else1732return NULL;17331734case nir_instr_type_tex:1735tex_instr = nir_instr_as_tex(instr);1736return &tex_instr->dest;17371738default:1739unreachable("not reached");1740}17411742return NULL;1743}17441745void1746nir_visitor::add_instr(nir_instr *instr, unsigned num_components,1747unsigned bit_size)1748{1749nir_dest *dest = get_instr_dest(instr);17501751if (dest)1752nir_ssa_dest_init(instr, dest, num_components, bit_size, NULL);17531754nir_builder_instr_insert(&b, instr);17551756if (dest) {1757assert(dest->is_ssa);1758this->result = &dest->ssa;1759}1760}17611762nir_ssa_def *1763nir_visitor::evaluate_rvalue(ir_rvalue* ir)1764{1765ir->accept(this);1766if (ir->as_dereference() || ir->as_constant()) {1767/*1768* A dereference is being used on the right hand side, which means we1769* must emit a variable load.1770*/17711772enum gl_access_qualifier access = deref_get_qualifier(this->deref);1773this->result = nir_load_deref_with_access(&b, this->deref, access);1774}17751776return this->result;1777}17781779static bool1780type_is_float(glsl_base_type type)1781{1782return type == GLSL_TYPE_FLOAT || type == GLSL_TYPE_DOUBLE ||1783type == GLSL_TYPE_FLOAT16;1784}17851786static bool1787type_is_signed(glsl_base_type type)1788{1789return type == GLSL_TYPE_INT || type == GLSL_TYPE_INT64 ||1790type == GLSL_TYPE_INT16;1791}17921793void1794nir_visitor::visit(ir_expression *ir)1795{1796/* Some special cases */1797switch (ir->operation) {1798case ir_unop_interpolate_at_centroid:1799case ir_binop_interpolate_at_offset:1800case ir_binop_interpolate_at_sample: {1801ir_dereference *deref = ir->operands[0]->as_dereference();1802ir_swizzle *swizzle = NULL;1803if (!deref) {1804/* the api does not allow a swizzle here, but the varying packing code1805* may have pushed one into here.1806*/1807swizzle = ir->operands[0]->as_swizzle();1808assert(swizzle);1809deref = swizzle->val->as_dereference();1810assert(deref);1811}18121813deref->accept(this);18141815nir_intrinsic_op op;1816if (nir_deref_mode_is(this->deref, nir_var_shader_in)) {1817switch (ir->operation) {1818case ir_unop_interpolate_at_centroid:1819op = nir_intrinsic_interp_deref_at_centroid;1820break;1821case ir_binop_interpolate_at_offset:1822op = nir_intrinsic_interp_deref_at_offset;1823break;1824case ir_binop_interpolate_at_sample:1825op = nir_intrinsic_interp_deref_at_sample;1826break;1827default:1828unreachable("Invalid interpolation intrinsic");1829}1830} else {1831/* This case can happen if the vertex shader does not write the1832* given varying. In this case, the linker will lower it to a1833* global variable. Since interpolating a variable makes no1834* sense, we'll just turn it into a load which will probably1835* eventually end up as an SSA definition.1836*/1837assert(nir_deref_mode_is(this->deref, nir_var_shader_temp));1838op = nir_intrinsic_load_deref;1839}18401841nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op);1842intrin->num_components = deref->type->vector_elements;1843intrin->src[0] = nir_src_for_ssa(&this->deref->dest.ssa);18441845if (intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||1846intrin->intrinsic == nir_intrinsic_interp_deref_at_sample)1847intrin->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));18481849unsigned bit_size = glsl_get_bit_size(deref->type);1850add_instr(&intrin->instr, deref->type->vector_elements, bit_size);18511852if (swizzle) {1853unsigned swiz[4] = {1854swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w1855};18561857result = nir_swizzle(&b, result, swiz,1858swizzle->type->vector_elements);1859}18601861return;1862}18631864case ir_unop_ssbo_unsized_array_length: {1865nir_intrinsic_instr *intrin =1866nir_intrinsic_instr_create(b.shader,1867nir_intrinsic_deref_buffer_array_length);18681869ir_dereference *deref = ir->operands[0]->as_dereference();1870intrin->src[0] = nir_src_for_ssa(&evaluate_deref(deref)->dest.ssa);18711872add_instr(&intrin->instr, 1, 32);1873return;1874}18751876case ir_binop_ubo_load:1877/* UBO loads should only have been lowered in GLSL IR for non-nir drivers,1878* NIR drivers make use of gl_nir_lower_buffers() instead.1879*/1880unreachable("Invalid operation nir doesn't want lowered ubo loads");1881default:1882break;1883}18841885nir_ssa_def *srcs[4];1886for (unsigned i = 0; i < ir->num_operands; i++)1887srcs[i] = evaluate_rvalue(ir->operands[i]);18881889glsl_base_type types[4];1890for (unsigned i = 0; i < ir->num_operands; i++)1891types[i] = ir->operands[i]->type->base_type;18921893glsl_base_type out_type = ir->type->base_type;18941895switch (ir->operation) {1896case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break;1897case ir_unop_logic_not:1898result = nir_inot(&b, srcs[0]);1899break;1900case ir_unop_neg:1901result = type_is_float(types[0]) ? nir_fneg(&b, srcs[0])1902: nir_ineg(&b, srcs[0]);1903break;1904case ir_unop_abs:1905result = type_is_float(types[0]) ? nir_fabs(&b, srcs[0])1906: nir_iabs(&b, srcs[0]);1907break;1908case ir_unop_clz:1909result = nir_uclz(&b, srcs[0]);1910break;1911case ir_unop_saturate:1912assert(type_is_float(types[0]));1913result = nir_fsat(&b, srcs[0]);1914break;1915case ir_unop_sign:1916result = type_is_float(types[0]) ? nir_fsign(&b, srcs[0])1917: nir_isign(&b, srcs[0]);1918break;1919case ir_unop_rcp: result = nir_frcp(&b, srcs[0]); break;1920case ir_unop_rsq: result = nir_frsq(&b, srcs[0]); break;1921case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break;1922case ir_unop_exp: unreachable("ir_unop_exp should have been lowered");1923case ir_unop_log: unreachable("ir_unop_log should have been lowered");1924case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break;1925case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break;1926case ir_unop_i2f:1927case ir_unop_u2f:1928case ir_unop_b2f:1929case ir_unop_f2i:1930case ir_unop_f2u:1931case ir_unop_f2b:1932case ir_unop_i2b:1933case ir_unop_b2i:1934case ir_unop_b2i64:1935case ir_unop_d2f:1936case ir_unop_f2d:1937case ir_unop_f162f:1938case ir_unop_f2f16:1939case ir_unop_f162b:1940case ir_unop_b2f16:1941case ir_unop_i2i:1942case ir_unop_u2u:1943case ir_unop_d2i:1944case ir_unop_d2u:1945case ir_unop_d2b:1946case ir_unop_i2d:1947case ir_unop_u2d:1948case ir_unop_i642i:1949case ir_unop_i642u:1950case ir_unop_i642f:1951case ir_unop_i642b:1952case ir_unop_i642d:1953case ir_unop_u642i:1954case ir_unop_u642u:1955case ir_unop_u642f:1956case ir_unop_u642d:1957case ir_unop_i2i64:1958case ir_unop_u2i64:1959case ir_unop_f2i64:1960case ir_unop_d2i64:1961case ir_unop_i2u64:1962case ir_unop_u2u64:1963case ir_unop_f2u64:1964case ir_unop_d2u64:1965case ir_unop_i2u:1966case ir_unop_u2i:1967case ir_unop_i642u64:1968case ir_unop_u642i64: {1969nir_alu_type src_type = nir_get_nir_type_for_glsl_base_type(types[0]);1970nir_alu_type dst_type = nir_get_nir_type_for_glsl_base_type(out_type);1971result = nir_build_alu(&b, nir_type_conversion_op(src_type, dst_type,1972nir_rounding_mode_undef),1973srcs[0], NULL, NULL, NULL);1974/* b2i and b2f don't have fixed bit-size versions so the builder will1975* just assume 32 and we have to fix it up here.1976*/1977result->bit_size = nir_alu_type_get_type_size(dst_type);1978break;1979}19801981case ir_unop_f2fmp: {1982result = nir_build_alu(&b, nir_op_f2fmp, srcs[0], NULL, NULL, NULL);1983break;1984}19851986case ir_unop_i2imp: {1987result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL);1988break;1989}19901991case ir_unop_u2ump: {1992result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL);1993break;1994}19951996case ir_unop_bitcast_i2f:1997case ir_unop_bitcast_f2i:1998case ir_unop_bitcast_u2f:1999case ir_unop_bitcast_f2u:2000case ir_unop_bitcast_i642d:2001case ir_unop_bitcast_d2i64:2002case ir_unop_bitcast_u642d:2003case ir_unop_bitcast_d2u64:2004case ir_unop_subroutine_to_int:2005/* no-op */2006result = nir_mov(&b, srcs[0]);2007break;2008case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break;2009case ir_unop_ceil: result = nir_fceil(&b, srcs[0]); break;2010case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break;2011case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break;2012case ir_unop_frexp_exp: result = nir_frexp_exp(&b, srcs[0]); break;2013case ir_unop_frexp_sig: result = nir_frexp_sig(&b, srcs[0]); break;2014case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break;2015case ir_unop_sin: result = nir_fsin(&b, srcs[0]); break;2016case ir_unop_cos: result = nir_fcos(&b, srcs[0]); break;2017case ir_unop_dFdx: result = nir_fddx(&b, srcs[0]); break;2018case ir_unop_dFdy: result = nir_fddy(&b, srcs[0]); break;2019case ir_unop_dFdx_fine: result = nir_fddx_fine(&b, srcs[0]); break;2020case ir_unop_dFdy_fine: result = nir_fddy_fine(&b, srcs[0]); break;2021case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break;2022case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break;2023case ir_unop_pack_snorm_2x16:2024result = nir_pack_snorm_2x16(&b, srcs[0]);2025break;2026case ir_unop_pack_snorm_4x8:2027result = nir_pack_snorm_4x8(&b, srcs[0]);2028break;2029case ir_unop_pack_unorm_2x16:2030result = nir_pack_unorm_2x16(&b, srcs[0]);2031break;2032case ir_unop_pack_unorm_4x8:2033result = nir_pack_unorm_4x8(&b, srcs[0]);2034break;2035case ir_unop_pack_half_2x16:2036result = nir_pack_half_2x16(&b, srcs[0]);2037break;2038case ir_unop_unpack_snorm_2x16:2039result = nir_unpack_snorm_2x16(&b, srcs[0]);2040break;2041case ir_unop_unpack_snorm_4x8:2042result = nir_unpack_snorm_4x8(&b, srcs[0]);2043break;2044case ir_unop_unpack_unorm_2x16:2045result = nir_unpack_unorm_2x16(&b, srcs[0]);2046break;2047case ir_unop_unpack_unorm_4x8:2048result = nir_unpack_unorm_4x8(&b, srcs[0]);2049break;2050case ir_unop_unpack_half_2x16:2051result = nir_unpack_half_2x16(&b, srcs[0]);2052break;2053case ir_unop_pack_sampler_2x32:2054case ir_unop_pack_image_2x32:2055case ir_unop_pack_double_2x32:2056case ir_unop_pack_int_2x32:2057case ir_unop_pack_uint_2x32:2058result = nir_pack_64_2x32(&b, srcs[0]);2059break;2060case ir_unop_unpack_sampler_2x32:2061case ir_unop_unpack_image_2x32:2062case ir_unop_unpack_double_2x32:2063case ir_unop_unpack_int_2x32:2064case ir_unop_unpack_uint_2x32:2065result = nir_unpack_64_2x32(&b, srcs[0]);2066break;2067case ir_unop_bitfield_reverse:2068result = nir_bitfield_reverse(&b, srcs[0]);2069break;2070case ir_unop_bit_count:2071result = nir_bit_count(&b, srcs[0]);2072break;2073case ir_unop_find_msb:2074switch (types[0]) {2075case GLSL_TYPE_UINT:2076result = nir_ufind_msb(&b, srcs[0]);2077break;2078case GLSL_TYPE_INT:2079result = nir_ifind_msb(&b, srcs[0]);2080break;2081default:2082unreachable("Invalid type for findMSB()");2083}2084break;2085case ir_unop_find_lsb:2086result = nir_find_lsb(&b, srcs[0]);2087break;20882089case ir_unop_get_buffer_size: {2090nir_intrinsic_instr *load = nir_intrinsic_instr_create(2091this->shader,2092nir_intrinsic_get_ssbo_size);2093load->num_components = ir->type->vector_elements;2094load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));2095unsigned bit_size = glsl_get_bit_size(ir->type);2096add_instr(&load->instr, ir->type->vector_elements, bit_size);2097return;2098}20992100case ir_unop_atan:2101result = nir_atan(&b, srcs[0]);2102break;21032104case ir_binop_add:2105result = type_is_float(out_type) ? nir_fadd(&b, srcs[0], srcs[1])2106: nir_iadd(&b, srcs[0], srcs[1]);2107break;2108case ir_binop_add_sat:2109result = type_is_signed(out_type) ? nir_iadd_sat(&b, srcs[0], srcs[1])2110: nir_uadd_sat(&b, srcs[0], srcs[1]);2111break;2112case ir_binop_sub:2113result = type_is_float(out_type) ? nir_fsub(&b, srcs[0], srcs[1])2114: nir_isub(&b, srcs[0], srcs[1]);2115break;2116case ir_binop_sub_sat:2117result = type_is_signed(out_type) ? nir_isub_sat(&b, srcs[0], srcs[1])2118: nir_usub_sat(&b, srcs[0], srcs[1]);2119break;2120case ir_binop_abs_sub:2121/* out_type is always unsigned for ir_binop_abs_sub, so we have to key2122* on the type of the sources.2123*/2124result = type_is_signed(types[0]) ? nir_uabs_isub(&b, srcs[0], srcs[1])2125: nir_uabs_usub(&b, srcs[0], srcs[1]);2126break;2127case ir_binop_avg:2128result = type_is_signed(out_type) ? nir_ihadd(&b, srcs[0], srcs[1])2129: nir_uhadd(&b, srcs[0], srcs[1]);2130break;2131case ir_binop_avg_round:2132result = type_is_signed(out_type) ? nir_irhadd(&b, srcs[0], srcs[1])2133: nir_urhadd(&b, srcs[0], srcs[1]);2134break;2135case ir_binop_mul_32x16:2136result = type_is_signed(out_type) ? nir_imul_32x16(&b, srcs[0], srcs[1])2137: nir_umul_32x16(&b, srcs[0], srcs[1]);2138break;2139case ir_binop_mul:2140if (type_is_float(out_type))2141result = nir_fmul(&b, srcs[0], srcs[1]);2142else if (out_type == GLSL_TYPE_INT64 &&2143(ir->operands[0]->type->base_type == GLSL_TYPE_INT ||2144ir->operands[1]->type->base_type == GLSL_TYPE_INT))2145result = nir_imul_2x32_64(&b, srcs[0], srcs[1]);2146else if (out_type == GLSL_TYPE_UINT64 &&2147(ir->operands[0]->type->base_type == GLSL_TYPE_UINT ||2148ir->operands[1]->type->base_type == GLSL_TYPE_UINT))2149result = nir_umul_2x32_64(&b, srcs[0], srcs[1]);2150else2151result = nir_imul(&b, srcs[0], srcs[1]);2152break;2153case ir_binop_div:2154if (type_is_float(out_type))2155result = nir_fdiv(&b, srcs[0], srcs[1]);2156else if (type_is_signed(out_type))2157result = nir_idiv(&b, srcs[0], srcs[1]);2158else2159result = nir_udiv(&b, srcs[0], srcs[1]);2160break;2161case ir_binop_mod:2162result = type_is_float(out_type) ? nir_fmod(&b, srcs[0], srcs[1])2163: nir_umod(&b, srcs[0], srcs[1]);2164break;2165case ir_binop_min:2166if (type_is_float(out_type))2167result = nir_fmin(&b, srcs[0], srcs[1]);2168else if (type_is_signed(out_type))2169result = nir_imin(&b, srcs[0], srcs[1]);2170else2171result = nir_umin(&b, srcs[0], srcs[1]);2172break;2173case ir_binop_max:2174if (type_is_float(out_type))2175result = nir_fmax(&b, srcs[0], srcs[1]);2176else if (type_is_signed(out_type))2177result = nir_imax(&b, srcs[0], srcs[1]);2178else2179result = nir_umax(&b, srcs[0], srcs[1]);2180break;2181case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break;2182case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break;2183case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break;2184case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break;2185case ir_binop_logic_and:2186result = nir_iand(&b, srcs[0], srcs[1]);2187break;2188case ir_binop_logic_or:2189result = nir_ior(&b, srcs[0], srcs[1]);2190break;2191case ir_binop_logic_xor:2192result = nir_ixor(&b, srcs[0], srcs[1]);2193break;2194case ir_binop_lshift: result = nir_ishl(&b, srcs[0], nir_u2u32(&b, srcs[1])); break;2195case ir_binop_rshift:2196result = (type_is_signed(out_type)) ? nir_ishr(&b, srcs[0], nir_u2u32(&b, srcs[1]))2197: nir_ushr(&b, srcs[0], nir_u2u32(&b, srcs[1]));2198break;2199case ir_binop_imul_high:2200result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1])2201: nir_umul_high(&b, srcs[0], srcs[1]);2202break;2203case ir_binop_carry: result = nir_uadd_carry(&b, srcs[0], srcs[1]); break;2204case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break;2205case ir_binop_less:2206if (type_is_float(types[0]))2207result = nir_flt(&b, srcs[0], srcs[1]);2208else if (type_is_signed(types[0]))2209result = nir_ilt(&b, srcs[0], srcs[1]);2210else2211result = nir_ult(&b, srcs[0], srcs[1]);2212break;2213case ir_binop_gequal:2214if (type_is_float(types[0]))2215result = nir_fge(&b, srcs[0], srcs[1]);2216else if (type_is_signed(types[0]))2217result = nir_ige(&b, srcs[0], srcs[1]);2218else2219result = nir_uge(&b, srcs[0], srcs[1]);2220break;2221case ir_binop_equal:2222if (type_is_float(types[0]))2223result = nir_feq(&b, srcs[0], srcs[1]);2224else2225result = nir_ieq(&b, srcs[0], srcs[1]);2226break;2227case ir_binop_nequal:2228if (type_is_float(types[0]))2229result = nir_fneu(&b, srcs[0], srcs[1]);2230else2231result = nir_ine(&b, srcs[0], srcs[1]);2232break;2233case ir_binop_all_equal:2234if (type_is_float(types[0])) {2235switch (ir->operands[0]->type->vector_elements) {2236case 1: result = nir_feq(&b, srcs[0], srcs[1]); break;2237case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break;2238case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break;2239case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break;2240default:2241unreachable("not reached");2242}2243} else {2244switch (ir->operands[0]->type->vector_elements) {2245case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break;2246case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break;2247case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break;2248case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break;2249default:2250unreachable("not reached");2251}2252}2253break;2254case ir_binop_any_nequal:2255if (type_is_float(types[0])) {2256switch (ir->operands[0]->type->vector_elements) {2257case 1: result = nir_fneu(&b, srcs[0], srcs[1]); break;2258case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break;2259case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break;2260case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break;2261default:2262unreachable("not reached");2263}2264} else {2265switch (ir->operands[0]->type->vector_elements) {2266case 1: result = nir_ine(&b, srcs[0], srcs[1]); break;2267case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break;2268case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break;2269case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break;2270default:2271unreachable("not reached");2272}2273}2274break;2275case ir_binop_dot:2276switch (ir->operands[0]->type->vector_elements) {2277case 2: result = nir_fdot2(&b, srcs[0], srcs[1]); break;2278case 3: result = nir_fdot3(&b, srcs[0], srcs[1]); break;2279case 4: result = nir_fdot4(&b, srcs[0], srcs[1]); break;2280default:2281unreachable("not reached");2282}2283break;2284case ir_binop_vector_extract: {2285result = nir_channel(&b, srcs[0], 0);2286for (unsigned i = 1; i < ir->operands[0]->type->vector_elements; i++) {2287nir_ssa_def *swizzled = nir_channel(&b, srcs[0], i);2288result = nir_bcsel(&b, nir_ieq_imm(&b, srcs[1], i),2289swizzled, result);2290}2291break;2292}22932294case ir_binop_atan2:2295result = nir_atan2(&b, srcs[0], srcs[1]);2296break;22972298case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break;2299case ir_triop_fma:2300result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]);2301break;2302case ir_triop_lrp:2303result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]);2304break;2305case ir_triop_csel:2306result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]);2307break;2308case ir_triop_bitfield_extract:2309result = ir->type->is_int_16_32() ?2310nir_ibitfield_extract(&b, nir_i2i32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2])) :2311nir_ubitfield_extract(&b, nir_u2u32(&b, srcs[0]), nir_i2i32(&b, srcs[1]), nir_i2i32(&b, srcs[2]));2312break;2313case ir_quadop_bitfield_insert:2314result = nir_bitfield_insert(&b,2315nir_u2u32(&b, srcs[0]), nir_u2u32(&b, srcs[1]),2316nir_i2i32(&b, srcs[2]), nir_i2i32(&b, srcs[3]));2317break;2318case ir_quadop_vector:2319result = nir_vec(&b, srcs, ir->type->vector_elements);2320break;23212322default:2323unreachable("not reached");2324}2325}23262327void2328nir_visitor::visit(ir_swizzle *ir)2329{2330unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w };2331result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle,2332ir->type->vector_elements);2333}23342335void2336nir_visitor::visit(ir_texture *ir)2337{2338unsigned num_srcs;2339nir_texop op;2340switch (ir->op) {2341case ir_tex:2342op = nir_texop_tex;2343num_srcs = 1; /* coordinate */2344break;23452346case ir_txb:2347case ir_txl:2348op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl;2349num_srcs = 2; /* coordinate, bias/lod */2350break;23512352case ir_txd:2353op = nir_texop_txd; /* coordinate, dPdx, dPdy */2354num_srcs = 3;2355break;23562357case ir_txf:2358op = nir_texop_txf;2359if (ir->lod_info.lod != NULL)2360num_srcs = 2; /* coordinate, lod */2361else2362num_srcs = 1; /* coordinate */2363break;23642365case ir_txf_ms:2366op = nir_texop_txf_ms;2367num_srcs = 2; /* coordinate, sample_index */2368break;23692370case ir_txs:2371op = nir_texop_txs;2372if (ir->lod_info.lod != NULL)2373num_srcs = 1; /* lod */2374else2375num_srcs = 0;2376break;23772378case ir_lod:2379op = nir_texop_lod;2380num_srcs = 1; /* coordinate */2381break;23822383case ir_tg4:2384op = nir_texop_tg4;2385num_srcs = 1; /* coordinate */2386break;23872388case ir_query_levels:2389op = nir_texop_query_levels;2390num_srcs = 0;2391break;23922393case ir_texture_samples:2394op = nir_texop_texture_samples;2395num_srcs = 0;2396break;23972398case ir_samples_identical:2399op = nir_texop_samples_identical;2400num_srcs = 1; /* coordinate */2401break;24022403default:2404unreachable("not reached");2405}24062407if (ir->projector != NULL)2408num_srcs++;2409if (ir->shadow_comparator != NULL)2410num_srcs++;2411/* offsets are constants we store inside nir_tex_intrs.offsets */2412if (ir->offset != NULL && !ir->offset->type->is_array())2413num_srcs++;24142415/* Add one for the texture deref */2416num_srcs += 2;24172418nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs);24192420instr->op = op;2421instr->sampler_dim =2422(glsl_sampler_dim) ir->sampler->type->sampler_dimensionality;2423instr->is_array = ir->sampler->type->sampler_array;2424instr->is_shadow = ir->sampler->type->sampler_shadow;2425if (instr->is_shadow)2426instr->is_new_style_shadow = (ir->type->vector_elements == 1);2427instr->dest_type = nir_get_nir_type_for_glsl_type(ir->type);24282429nir_deref_instr *sampler_deref = evaluate_deref(ir->sampler);24302431/* check for bindless handles */2432if (!nir_deref_mode_is(sampler_deref, nir_var_uniform) ||2433nir_deref_instr_get_variable(sampler_deref)->data.bindless) {2434nir_ssa_def *load = nir_load_deref(&b, sampler_deref);2435instr->src[0].src = nir_src_for_ssa(load);2436instr->src[0].src_type = nir_tex_src_texture_handle;2437instr->src[1].src = nir_src_for_ssa(load);2438instr->src[1].src_type = nir_tex_src_sampler_handle;2439} else {2440instr->src[0].src = nir_src_for_ssa(&sampler_deref->dest.ssa);2441instr->src[0].src_type = nir_tex_src_texture_deref;2442instr->src[1].src = nir_src_for_ssa(&sampler_deref->dest.ssa);2443instr->src[1].src_type = nir_tex_src_sampler_deref;2444}24452446unsigned src_number = 2;24472448if (ir->coordinate != NULL) {2449instr->coord_components = ir->coordinate->type->vector_elements;2450instr->src[src_number].src =2451nir_src_for_ssa(evaluate_rvalue(ir->coordinate));2452instr->src[src_number].src_type = nir_tex_src_coord;2453src_number++;2454}24552456if (ir->projector != NULL) {2457instr->src[src_number].src =2458nir_src_for_ssa(evaluate_rvalue(ir->projector));2459instr->src[src_number].src_type = nir_tex_src_projector;2460src_number++;2461}24622463if (ir->shadow_comparator != NULL) {2464instr->src[src_number].src =2465nir_src_for_ssa(evaluate_rvalue(ir->shadow_comparator));2466instr->src[src_number].src_type = nir_tex_src_comparator;2467src_number++;2468}24692470if (ir->offset != NULL) {2471if (ir->offset->type->is_array()) {2472for (int i = 0; i < ir->offset->type->array_size(); i++) {2473const ir_constant *c =2474ir->offset->as_constant()->get_array_element(i);24752476for (unsigned j = 0; j < 2; ++j) {2477int val = c->get_int_component(j);2478assert(val <= 31 && val >= -32);2479instr->tg4_offsets[i][j] = val;2480}2481}2482} else {2483assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar());24842485instr->src[src_number].src =2486nir_src_for_ssa(evaluate_rvalue(ir->offset));2487instr->src[src_number].src_type = nir_tex_src_offset;2488src_number++;2489}2490}24912492switch (ir->op) {2493case ir_txb:2494instr->src[src_number].src =2495nir_src_for_ssa(evaluate_rvalue(ir->lod_info.bias));2496instr->src[src_number].src_type = nir_tex_src_bias;2497src_number++;2498break;24992500case ir_txl:2501case ir_txf:2502case ir_txs:2503if (ir->lod_info.lod != NULL) {2504instr->src[src_number].src =2505nir_src_for_ssa(evaluate_rvalue(ir->lod_info.lod));2506instr->src[src_number].src_type = nir_tex_src_lod;2507src_number++;2508}2509break;25102511case ir_txd:2512instr->src[src_number].src =2513nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdx));2514instr->src[src_number].src_type = nir_tex_src_ddx;2515src_number++;2516instr->src[src_number].src =2517nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdy));2518instr->src[src_number].src_type = nir_tex_src_ddy;2519src_number++;2520break;25212522case ir_txf_ms:2523instr->src[src_number].src =2524nir_src_for_ssa(evaluate_rvalue(ir->lod_info.sample_index));2525instr->src[src_number].src_type = nir_tex_src_ms_index;2526src_number++;2527break;25282529case ir_tg4:2530instr->component = ir->lod_info.component->as_constant()->value.u[0];2531break;25322533default:2534break;2535}25362537assert(src_number == num_srcs);25382539unsigned bit_size = glsl_get_bit_size(ir->type);2540add_instr(&instr->instr, nir_tex_instr_dest_size(instr), bit_size);2541}25422543void2544nir_visitor::visit(ir_constant *ir)2545{2546/*2547* We don't know if this variable is an array or struct that gets2548* dereferenced, so do the safe thing an make it a variable with a2549* constant initializer and return a dereference.2550*/25512552nir_variable *var =2553nir_local_variable_create(this->impl, ir->type, "const_temp");2554var->data.read_only = true;2555var->constant_initializer = constant_copy(ir, var);25562557this->deref = nir_build_deref_var(&b, var);2558}25592560void2561nir_visitor::visit(ir_dereference_variable *ir)2562{2563if (ir->variable_referenced()->data.mode == ir_var_function_out) {2564unsigned i = (sig->return_type != glsl_type::void_type) ? 1 : 0;25652566foreach_in_list(ir_variable, param, &sig->parameters) {2567if (param == ir->variable_referenced()) {2568break;2569}2570i++;2571}25722573this->deref = nir_build_deref_cast(&b, nir_load_param(&b, i),2574nir_var_function_temp, ir->type, 0);2575return;2576}25772578assert(ir->variable_referenced()->data.mode != ir_var_function_inout);25792580struct hash_entry *entry =2581_mesa_hash_table_search(this->var_table, ir->var);2582assert(entry);2583nir_variable *var = (nir_variable *) entry->data;25842585this->deref = nir_build_deref_var(&b, var);2586}25872588void2589nir_visitor::visit(ir_dereference_record *ir)2590{2591ir->record->accept(this);25922593int field_index = ir->field_idx;2594assert(field_index >= 0);25952596this->deref = nir_build_deref_struct(&b, this->deref, field_index);2597}25982599void2600nir_visitor::visit(ir_dereference_array *ir)2601{2602nir_ssa_def *index = evaluate_rvalue(ir->array_index);26032604ir->array->accept(this);26052606this->deref = nir_build_deref_array(&b, this->deref, index);2607}26082609void2610nir_visitor::visit(ir_barrier *)2611{2612if (shader->info.stage == MESA_SHADER_COMPUTE)2613nir_memory_barrier_shared(&b);2614else if (shader->info.stage == MESA_SHADER_TESS_CTRL)2615nir_memory_barrier_tcs_patch(&b);26162617nir_control_barrier(&b);2618}26192620nir_shader *2621glsl_float64_funcs_to_nir(struct gl_context *ctx,2622const nir_shader_compiler_options *options)2623{2624/* We pretend it's a vertex shader. Ultimately, the stage shouldn't2625* matter because we're not optimizing anything here.2626*/2627struct gl_shader *sh = _mesa_new_shader(-1, MESA_SHADER_VERTEX);2628sh->Source = float64_source;2629sh->CompileStatus = COMPILE_FAILURE;2630_mesa_glsl_compile_shader(ctx, sh, false, false, true);26312632if (!sh->CompileStatus) {2633if (sh->InfoLog) {2634_mesa_problem(ctx,2635"fp64 software impl compile failed:\n%s\nsource:\n%s\n",2636sh->InfoLog, float64_source);2637}2638return NULL;2639}26402641nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_VERTEX, options, NULL);26422643nir_visitor v1(ctx, nir);2644nir_function_visitor v2(&v1);2645v2.run(sh->ir);2646visit_exec_list(sh->ir, &v1);26472648/* _mesa_delete_shader will try to free sh->Source but it's static const */2649sh->Source = NULL;2650_mesa_delete_shader(ctx, sh);26512652nir_validate_shader(nir, "float64_funcs_to_nir");26532654NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);2655NIR_PASS_V(nir, nir_lower_returns);2656NIR_PASS_V(nir, nir_inline_functions);2657NIR_PASS_V(nir, nir_opt_deref);26582659/* Do some optimizations to clean up the shader now. By optimizing the2660* functions in the library, we avoid having to re-do that work every2661* time we inline a copy of a function. Reducing basic blocks also helps2662* with compile times.2663*/2664NIR_PASS_V(nir, nir_lower_vars_to_ssa);2665NIR_PASS_V(nir, nir_copy_prop);2666NIR_PASS_V(nir, nir_opt_dce);2667NIR_PASS_V(nir, nir_opt_cse);2668NIR_PASS_V(nir, nir_opt_gcm, true);2669NIR_PASS_V(nir, nir_opt_peephole_select, 1, false, false);2670NIR_PASS_V(nir, nir_opt_dce);26712672return nir;2673}267426752676