Path: blob/21.2-virgl/src/compiler/nir/nir_builder.h
4545 views
/*1* Copyright © 2014-2015 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#ifndef NIR_BUILDER_H24#define NIR_BUILDER_H2526#include "nir_control_flow.h"27#include "util/bitscan.h"28#include "util/half_float.h"2930struct exec_list;3132typedef struct nir_builder {33nir_cursor cursor;3435/* Whether new ALU instructions will be marked "exact" */36bool exact;3738/* Whether to run divergence analysis on inserted instructions (loop merge39* and header phis are not updated). */40bool update_divergence;4142nir_shader *shader;43nir_function_impl *impl;44} nir_builder;4546static inline void47nir_builder_init(nir_builder *build, nir_function_impl *impl)48{49memset(build, 0, sizeof(*build));50build->exact = false;51build->impl = impl;52build->shader = impl->function->shader;53}5455static inline nir_builder MUST_CHECK PRINTFLIKE(3, 4)56nir_builder_init_simple_shader(gl_shader_stage stage,57const nir_shader_compiler_options *options,58const char *name, ...)59{60nir_builder b;6162memset(&b, 0, sizeof(b));63b.shader = nir_shader_create(NULL, stage, options, NULL);6465if (name) {66va_list args;67va_start(args, name);68b.shader->info.name = ralloc_vasprintf(b.shader, name, args);69va_end(args);70}7172nir_function *func = nir_function_create(b.shader, "main");73func->is_entrypoint = true;74b.exact = false;75b.impl = nir_function_impl_create(func);76b.cursor = nir_after_cf_list(&b.impl->body);7778return b;79}8081typedef bool (*nir_instr_pass_cb)(struct nir_builder *, nir_instr *, void *);8283/**84* Iterates over all the instructions in a NIR shader and calls the given pass85* on them.86*87* The pass should return true if it modified the shader. In that case, only88* the preserved metadata flags will be preserved in the function impl.89*90* The builder will be initialized to point at the function impl, but its91* cursor is unset.92*/93static inline bool94nir_shader_instructions_pass(nir_shader *shader,95nir_instr_pass_cb pass,96nir_metadata preserved,97void *cb_data)98{99bool progress = false;100101nir_foreach_function(function, shader) {102if (!function->impl)103continue;104105bool func_progress = false;106nir_builder b;107nir_builder_init(&b, function->impl);108109nir_foreach_block_safe(block, function->impl) {110nir_foreach_instr_safe(instr, block) {111func_progress |= pass(&b, instr, cb_data);112}113}114115if (func_progress) {116nir_metadata_preserve(function->impl, preserved);117progress = true;118} else {119nir_metadata_preserve(function->impl, nir_metadata_all);120}121}122123return progress;124}125126static inline void127nir_builder_instr_insert(nir_builder *build, nir_instr *instr)128{129nir_instr_insert(build->cursor, instr);130131if (build->update_divergence)132nir_update_instr_divergence(build->shader, instr);133134/* Move the cursor forward. */135build->cursor = nir_after_instr(instr);136}137138static inline nir_instr *139nir_builder_last_instr(nir_builder *build)140{141assert(build->cursor.option == nir_cursor_after_instr);142return build->cursor.instr;143}144145static inline void146nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf)147{148nir_cf_node_insert(build->cursor, cf);149}150151static inline bool152nir_builder_is_inside_cf(nir_builder *build, nir_cf_node *cf_node)153{154nir_block *block = nir_cursor_current_block(build->cursor);155for (nir_cf_node *n = &block->cf_node; n; n = n->parent) {156if (n == cf_node)157return true;158}159return false;160}161162static inline nir_if *163nir_push_if_src(nir_builder *build, nir_src condition)164{165nir_if *nif = nir_if_create(build->shader);166nif->condition = condition;167nir_builder_cf_insert(build, &nif->cf_node);168build->cursor = nir_before_cf_list(&nif->then_list);169return nif;170}171172static inline nir_if *173nir_push_if(nir_builder *build, nir_ssa_def *condition)174{175return nir_push_if_src(build, nir_src_for_ssa(condition));176}177178static inline nir_if *179nir_push_else(nir_builder *build, nir_if *nif)180{181if (nif) {182assert(nir_builder_is_inside_cf(build, &nif->cf_node));183} else {184nir_block *block = nir_cursor_current_block(build->cursor);185nif = nir_cf_node_as_if(block->cf_node.parent);186}187build->cursor = nir_before_cf_list(&nif->else_list);188return nif;189}190191static inline void192nir_pop_if(nir_builder *build, nir_if *nif)193{194if (nif) {195assert(nir_builder_is_inside_cf(build, &nif->cf_node));196} else {197nir_block *block = nir_cursor_current_block(build->cursor);198nif = nir_cf_node_as_if(block->cf_node.parent);199}200build->cursor = nir_after_cf_node(&nif->cf_node);201}202203static inline nir_ssa_def *204nir_if_phi(nir_builder *build, nir_ssa_def *then_def, nir_ssa_def *else_def)205{206nir_block *block = nir_cursor_current_block(build->cursor);207nir_if *nif = nir_cf_node_as_if(nir_cf_node_prev(&block->cf_node));208209nir_phi_instr *phi = nir_phi_instr_create(build->shader);210211nir_phi_src *src = ralloc(phi, nir_phi_src);212src->pred = nir_if_last_then_block(nif);213src->src = nir_src_for_ssa(then_def);214exec_list_push_tail(&phi->srcs, &src->node);215216src = ralloc(phi, nir_phi_src);217src->pred = nir_if_last_else_block(nif);218src->src = nir_src_for_ssa(else_def);219exec_list_push_tail(&phi->srcs, &src->node);220221assert(then_def->num_components == else_def->num_components);222assert(then_def->bit_size == else_def->bit_size);223nir_ssa_dest_init(&phi->instr, &phi->dest,224then_def->num_components, then_def->bit_size, NULL);225226nir_builder_instr_insert(build, &phi->instr);227228return &phi->dest.ssa;229}230231static inline nir_loop *232nir_push_loop(nir_builder *build)233{234nir_loop *loop = nir_loop_create(build->shader);235nir_builder_cf_insert(build, &loop->cf_node);236build->cursor = nir_before_cf_list(&loop->body);237return loop;238}239240static inline void241nir_pop_loop(nir_builder *build, nir_loop *loop)242{243if (loop) {244assert(nir_builder_is_inside_cf(build, &loop->cf_node));245} else {246nir_block *block = nir_cursor_current_block(build->cursor);247loop = nir_cf_node_as_loop(block->cf_node.parent);248}249build->cursor = nir_after_cf_node(&loop->cf_node);250}251252static inline nir_ssa_def *253nir_ssa_undef(nir_builder *build, unsigned num_components, unsigned bit_size)254{255nir_ssa_undef_instr *undef =256nir_ssa_undef_instr_create(build->shader, num_components, bit_size);257if (!undef)258return NULL;259260nir_instr_insert(nir_before_cf_list(&build->impl->body), &undef->instr);261if (build->update_divergence)262nir_update_instr_divergence(build->shader, &undef->instr);263264return &undef->def;265}266267static inline nir_ssa_def *268nir_build_imm(nir_builder *build, unsigned num_components,269unsigned bit_size, const nir_const_value *value)270{271nir_load_const_instr *load_const =272nir_load_const_instr_create(build->shader, num_components, bit_size);273if (!load_const)274return NULL;275276memcpy(load_const->value, value, sizeof(nir_const_value) * num_components);277278nir_builder_instr_insert(build, &load_const->instr);279280return &load_const->def;281}282283static inline nir_ssa_def *284nir_imm_zero(nir_builder *build, unsigned num_components, unsigned bit_size)285{286nir_load_const_instr *load_const =287nir_load_const_instr_create(build->shader, num_components, bit_size);288289/* nir_load_const_instr_create uses rzalloc so it's already zero */290291nir_builder_instr_insert(build, &load_const->instr);292293return &load_const->def;294}295296static inline nir_ssa_def *297nir_imm_boolN_t(nir_builder *build, bool x, unsigned bit_size)298{299nir_const_value v = nir_const_value_for_bool(x, bit_size);300return nir_build_imm(build, 1, bit_size, &v);301}302303static inline nir_ssa_def *304nir_imm_bool(nir_builder *build, bool x)305{306return nir_imm_boolN_t(build, x, 1);307}308309static inline nir_ssa_def *310nir_imm_true(nir_builder *build)311{312return nir_imm_bool(build, true);313}314315static inline nir_ssa_def *316nir_imm_false(nir_builder *build)317{318return nir_imm_bool(build, false);319}320321static inline nir_ssa_def *322nir_imm_floatN_t(nir_builder *build, double x, unsigned bit_size)323{324nir_const_value v = nir_const_value_for_float(x, bit_size);325return nir_build_imm(build, 1, bit_size, &v);326}327328static inline nir_ssa_def *329nir_imm_float16(nir_builder *build, float x)330{331return nir_imm_floatN_t(build, x, 16);332}333334static inline nir_ssa_def *335nir_imm_float(nir_builder *build, float x)336{337return nir_imm_floatN_t(build, x, 32);338}339340static inline nir_ssa_def *341nir_imm_double(nir_builder *build, double x)342{343return nir_imm_floatN_t(build, x, 64);344}345346static inline nir_ssa_def *347nir_imm_vec2(nir_builder *build, float x, float y)348{349nir_const_value v[2] = {350nir_const_value_for_float(x, 32),351nir_const_value_for_float(y, 32),352};353return nir_build_imm(build, 2, 32, v);354}355356static inline nir_ssa_def *357nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)358{359nir_const_value v[4] = {360nir_const_value_for_float(x, 32),361nir_const_value_for_float(y, 32),362nir_const_value_for_float(z, 32),363nir_const_value_for_float(w, 32),364};365366return nir_build_imm(build, 4, 32, v);367}368369static inline nir_ssa_def *370nir_imm_vec4_16(nir_builder *build, float x, float y, float z, float w)371{372nir_const_value v[4] = {373nir_const_value_for_float(x, 16),374nir_const_value_for_float(y, 16),375nir_const_value_for_float(z, 16),376nir_const_value_for_float(w, 16),377};378379return nir_build_imm(build, 4, 16, v);380}381382static inline nir_ssa_def *383nir_imm_intN_t(nir_builder *build, uint64_t x, unsigned bit_size)384{385nir_const_value v = nir_const_value_for_raw_uint(x, bit_size);386return nir_build_imm(build, 1, bit_size, &v);387}388389static inline nir_ssa_def *390nir_imm_int(nir_builder *build, int x)391{392return nir_imm_intN_t(build, x, 32);393}394395static inline nir_ssa_def *396nir_imm_int64(nir_builder *build, int64_t x)397{398return nir_imm_intN_t(build, x, 64);399}400401static inline nir_ssa_def *402nir_imm_ivec2(nir_builder *build, int x, int y)403{404nir_const_value v[2] = {405nir_const_value_for_int(x, 32),406nir_const_value_for_int(y, 32),407};408409return nir_build_imm(build, 2, 32, v);410}411412static inline nir_ssa_def *413nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)414{415nir_const_value v[4] = {416nir_const_value_for_int(x, 32),417nir_const_value_for_int(y, 32),418nir_const_value_for_int(z, 32),419nir_const_value_for_int(w, 32),420};421422return nir_build_imm(build, 4, 32, v);423}424425static inline nir_ssa_def *426nir_builder_alu_instr_finish_and_insert(nir_builder *build, nir_alu_instr *instr)427{428const nir_op_info *op_info = &nir_op_infos[instr->op];429430instr->exact = build->exact;431432/* Guess the number of components the destination temporary should have433* based on our input sizes, if it's not fixed for the op.434*/435unsigned num_components = op_info->output_size;436if (num_components == 0) {437for (unsigned i = 0; i < op_info->num_inputs; i++) {438if (op_info->input_sizes[i] == 0)439num_components = MAX2(num_components,440instr->src[i].src.ssa->num_components);441}442}443assert(num_components != 0);444445/* Figure out the bitwidth based on the source bitwidth if the instruction446* is variable-width.447*/448unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type);449if (bit_size == 0) {450for (unsigned i = 0; i < op_info->num_inputs; i++) {451unsigned src_bit_size = instr->src[i].src.ssa->bit_size;452if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) {453if (bit_size)454assert(src_bit_size == bit_size);455else456bit_size = src_bit_size;457} else {458assert(src_bit_size ==459nir_alu_type_get_type_size(op_info->input_types[i]));460}461}462}463464/* When in doubt, assume 32. */465if (bit_size == 0)466bit_size = 32;467468/* Make sure we don't swizzle from outside of our source vector (like if a469* scalar value was passed into a multiply with a vector).470*/471for (unsigned i = 0; i < op_info->num_inputs; i++) {472for (unsigned j = instr->src[i].src.ssa->num_components;473j < NIR_MAX_VEC_COMPONENTS; j++) {474instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1;475}476}477478nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,479bit_size, NULL);480instr->dest.write_mask = (1 << num_components) - 1;481482nir_builder_instr_insert(build, &instr->instr);483484return &instr->dest.dest.ssa;485}486487static inline nir_ssa_def *488nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,489nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3)490{491nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);492if (!instr)493return NULL;494495instr->src[0].src = nir_src_for_ssa(src0);496if (src1)497instr->src[1].src = nir_src_for_ssa(src1);498if (src2)499instr->src[2].src = nir_src_for_ssa(src2);500if (src3)501instr->src[3].src = nir_src_for_ssa(src3);502503return nir_builder_alu_instr_finish_and_insert(build, instr);504}505506/* for the couple special cases with more than 4 src args: */507static inline nir_ssa_def *508nir_build_alu_src_arr(nir_builder *build, nir_op op, nir_ssa_def **srcs)509{510const nir_op_info *op_info = &nir_op_infos[op];511nir_alu_instr *instr = nir_alu_instr_create(build->shader, op);512if (!instr)513return NULL;514515for (unsigned i = 0; i < op_info->num_inputs; i++)516instr->src[i].src = nir_src_for_ssa(srcs[i]);517518return nir_builder_alu_instr_finish_and_insert(build, instr);519}520521/* Generic builder for system values. */522static inline nir_ssa_def *523nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index,524unsigned num_components, unsigned bit_size)525{526nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op);527if (nir_intrinsic_infos[op].dest_components > 0)528assert(num_components == nir_intrinsic_infos[op].dest_components);529else530load->num_components = num_components;531load->const_index[0] = index;532533nir_ssa_dest_init(&load->instr, &load->dest,534num_components, bit_size, NULL);535nir_builder_instr_insert(build, &load->instr);536return &load->dest.ssa;537}538539#include "nir_builder_opcodes.h"540#undef nir_deref_mode_is541542static inline nir_ssa_def *543nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components)544{545return nir_build_alu_src_arr(build, nir_op_vec(num_components), comp);546}547548static inline nir_ssa_def *549nir_mov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)550{551assert(!src.abs && !src.negate);552if (src.src.is_ssa && src.src.ssa->num_components == num_components) {553bool any_swizzles = false;554for (unsigned i = 0; i < num_components; i++) {555if (src.swizzle[i] != i)556any_swizzles = true;557}558if (!any_swizzles)559return src.src.ssa;560}561562nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_mov);563nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components,564nir_src_bit_size(src.src), NULL);565mov->exact = build->exact;566mov->dest.write_mask = (1 << num_components) - 1;567mov->src[0] = src;568nir_builder_instr_insert(build, &mov->instr);569570return &mov->dest.dest.ssa;571}572573/**574* Construct a mov that reswizzles the source's components.575*/576static inline nir_ssa_def *577nir_swizzle(nir_builder *build, nir_ssa_def *src, const unsigned *swiz,578unsigned num_components)579{580assert(num_components <= NIR_MAX_VEC_COMPONENTS);581nir_alu_src alu_src = { NIR_SRC_INIT };582alu_src.src = nir_src_for_ssa(src);583584bool is_identity_swizzle = true;585for (unsigned i = 0; i < num_components && i < NIR_MAX_VEC_COMPONENTS; i++) {586if (swiz[i] != i)587is_identity_swizzle = false;588alu_src.swizzle[i] = swiz[i];589}590591if (num_components == src->num_components && is_identity_swizzle)592return src;593594return nir_mov_alu(build, alu_src, num_components);595}596597/* Selects the right fdot given the number of components in each source. */598static inline nir_ssa_def *599nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1)600{601assert(src0->num_components == src1->num_components);602switch (src0->num_components) {603case 1: return nir_fmul(build, src0, src1);604case 2: return nir_fdot2(build, src0, src1);605case 3: return nir_fdot3(build, src0, src1);606case 4: return nir_fdot4(build, src0, src1);607case 5: return nir_fdot5(build, src0, src1);608case 8: return nir_fdot8(build, src0, src1);609case 16: return nir_fdot16(build, src0, src1);610default:611unreachable("bad component size");612}613614return NULL;615}616617static inline nir_ssa_def *618nir_ball_iequal(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1)619{620switch (src0->num_components) {621case 1: return nir_ieq(b, src0, src1);622case 2: return nir_ball_iequal2(b, src0, src1);623case 3: return nir_ball_iequal3(b, src0, src1);624case 4: return nir_ball_iequal4(b, src0, src1);625case 5: return nir_ball_iequal5(b, src0, src1);626case 8: return nir_ball_iequal8(b, src0, src1);627case 16: return nir_ball_iequal16(b, src0, src1);628default:629unreachable("bad component size");630}631}632633static inline nir_ssa_def *634nir_ball(nir_builder *b, nir_ssa_def *src)635{636return nir_ball_iequal(b, src, nir_imm_true(b));637}638639static inline nir_ssa_def *640nir_bany_inequal(nir_builder *b, nir_ssa_def *src0, nir_ssa_def *src1)641{642switch (src0->num_components) {643case 1: return nir_ine(b, src0, src1);644case 2: return nir_bany_inequal2(b, src0, src1);645case 3: return nir_bany_inequal3(b, src0, src1);646case 4: return nir_bany_inequal4(b, src0, src1);647case 5: return nir_bany_inequal5(b, src0, src1);648case 8: return nir_bany_inequal8(b, src0, src1);649case 16: return nir_bany_inequal16(b, src0, src1);650default:651unreachable("bad component size");652}653}654655static inline nir_ssa_def *656nir_bany(nir_builder *b, nir_ssa_def *src)657{658return nir_bany_inequal(b, src, nir_imm_false(b));659}660661static inline nir_ssa_def *662nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c)663{664return nir_swizzle(b, def, &c, 1);665}666667static inline nir_ssa_def *668nir_channels(nir_builder *b, nir_ssa_def *def, nir_component_mask_t mask)669{670unsigned num_channels = 0, swizzle[NIR_MAX_VEC_COMPONENTS] = { 0 };671672for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {673if ((mask & (1 << i)) == 0)674continue;675swizzle[num_channels++] = i;676}677678return nir_swizzle(b, def, swizzle, num_channels);679}680681static inline nir_ssa_def *682_nir_select_from_array_helper(nir_builder *b, nir_ssa_def **arr,683nir_ssa_def *idx,684unsigned start, unsigned end)685{686if (start == end - 1) {687return arr[start];688} else {689unsigned mid = start + (end - start) / 2;690return nir_bcsel(b, nir_ilt(b, idx, nir_imm_intN_t(b, mid, idx->bit_size)),691_nir_select_from_array_helper(b, arr, idx, start, mid),692_nir_select_from_array_helper(b, arr, idx, mid, end));693}694}695696static inline nir_ssa_def *697nir_select_from_ssa_def_array(nir_builder *b, nir_ssa_def **arr,698unsigned arr_len, nir_ssa_def *idx)699{700return _nir_select_from_array_helper(b, arr, idx, 0, arr_len);701}702703static inline nir_ssa_def *704nir_vector_extract(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *c)705{706nir_src c_src = nir_src_for_ssa(c);707if (nir_src_is_const(c_src)) {708uint64_t c_const = nir_src_as_uint(c_src);709if (c_const < vec->num_components)710return nir_channel(b, vec, c_const);711else712return nir_ssa_undef(b, 1, vec->bit_size);713} else {714nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];715for (unsigned i = 0; i < vec->num_components; i++)716comps[i] = nir_channel(b, vec, i);717return nir_select_from_ssa_def_array(b, comps, vec->num_components, c);718}719}720721/** Replaces the component of `vec` specified by `c` with `scalar` */722static inline nir_ssa_def *723nir_vector_insert_imm(nir_builder *b, nir_ssa_def *vec,724nir_ssa_def *scalar, unsigned c)725{726assert(scalar->num_components == 1);727assert(c < vec->num_components);728729nir_op vec_op = nir_op_vec(vec->num_components);730nir_alu_instr *vec_instr = nir_alu_instr_create(b->shader, vec_op);731732for (unsigned i = 0; i < vec->num_components; i++) {733if (i == c) {734vec_instr->src[i].src = nir_src_for_ssa(scalar);735vec_instr->src[i].swizzle[0] = 0;736} else {737vec_instr->src[i].src = nir_src_for_ssa(vec);738vec_instr->src[i].swizzle[0] = i;739}740}741742return nir_builder_alu_instr_finish_and_insert(b, vec_instr);743}744745/** Replaces the component of `vec` specified by `c` with `scalar` */746static inline nir_ssa_def *747nir_vector_insert(nir_builder *b, nir_ssa_def *vec, nir_ssa_def *scalar,748nir_ssa_def *c)749{750assert(scalar->num_components == 1);751assert(c->num_components == 1);752753nir_src c_src = nir_src_for_ssa(c);754if (nir_src_is_const(c_src)) {755uint64_t c_const = nir_src_as_uint(c_src);756if (c_const < vec->num_components)757return nir_vector_insert_imm(b, vec, scalar, c_const);758else759return vec;760} else {761nir_const_value per_comp_idx_const[NIR_MAX_VEC_COMPONENTS];762for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++)763per_comp_idx_const[i] = nir_const_value_for_int(i, c->bit_size);764nir_ssa_def *per_comp_idx =765nir_build_imm(b, vec->num_components,766c->bit_size, per_comp_idx_const);767768/* nir_builder will automatically splat out scalars to vectors so an769* insert is as simple as "if I'm the channel, replace me with the770* scalar."771*/772return nir_bcsel(b, nir_ieq(b, c, per_comp_idx), scalar, vec);773}774}775776static inline nir_ssa_def *777nir_i2i(nir_builder *build, nir_ssa_def *x, unsigned dest_bit_size)778{779if (x->bit_size == dest_bit_size)780return x;781782switch (dest_bit_size) {783case 64: return nir_i2i64(build, x);784case 32: return nir_i2i32(build, x);785case 16: return nir_i2i16(build, x);786case 8: return nir_i2i8(build, x);787default: unreachable("Invalid bit size");788}789}790791static inline nir_ssa_def *792nir_u2u(nir_builder *build, nir_ssa_def *x, unsigned dest_bit_size)793{794if (x->bit_size == dest_bit_size)795return x;796797switch (dest_bit_size) {798case 64: return nir_u2u64(build, x);799case 32: return nir_u2u32(build, x);800case 16: return nir_u2u16(build, x);801case 8: return nir_u2u8(build, x);802default: unreachable("Invalid bit size");803}804}805806static inline nir_ssa_def *807nir_iadd_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)808{809assert(x->bit_size <= 64);810y &= BITFIELD64_MASK(x->bit_size);811812if (y == 0) {813return x;814} else {815return nir_iadd(build, x, nir_imm_intN_t(build, y, x->bit_size));816}817}818819static inline nir_ssa_def *820nir_iadd_imm_nuw(nir_builder *b, nir_ssa_def *x, uint64_t y)821{822nir_ssa_def *d = nir_iadd_imm(b, x, y);823if (d != x && d->parent_instr->type == nir_instr_type_alu)824nir_instr_as_alu(d->parent_instr)->no_unsigned_wrap = true;825return d;826}827828static inline nir_ssa_def *829nir_iadd_nuw(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)830{831nir_ssa_def *d = nir_iadd(b, x, y);832nir_instr_as_alu(d->parent_instr)->no_unsigned_wrap = true;833return d;834}835836static inline nir_ssa_def *837nir_ieq_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)838{839return nir_ieq(build, x, nir_imm_intN_t(build, y, x->bit_size));840}841842/* Use nir_iadd(x, -y) for reversing parameter ordering */843static inline nir_ssa_def *844nir_isub_imm(nir_builder *build, uint64_t y, nir_ssa_def *x)845{846return nir_isub(build, nir_imm_intN_t(build, y, x->bit_size), x);847}848849static inline nir_ssa_def *850_nir_mul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y, bool amul)851{852assert(x->bit_size <= 64);853y &= BITFIELD64_MASK(x->bit_size);854855if (y == 0) {856return nir_imm_intN_t(build, 0, x->bit_size);857} else if (y == 1) {858return x;859} else if (!build->shader->options->lower_bitops &&860util_is_power_of_two_or_zero64(y)) {861return nir_ishl(build, x, nir_imm_int(build, ffsll(y) - 1));862} else if (amul) {863return nir_amul(build, x, nir_imm_intN_t(build, y, x->bit_size));864} else {865return nir_imul(build, x, nir_imm_intN_t(build, y, x->bit_size));866}867}868869static inline nir_ssa_def *870nir_imul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)871{872return _nir_mul_imm(build, x, y, false);873}874875static inline nir_ssa_def *876nir_amul_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)877{878return _nir_mul_imm(build, x, y, true);879}880881static inline nir_ssa_def *882nir_fadd_imm(nir_builder *build, nir_ssa_def *x, double y)883{884return nir_fadd(build, x, nir_imm_floatN_t(build, y, x->bit_size));885}886887static inline nir_ssa_def *888nir_fmul_imm(nir_builder *build, nir_ssa_def *x, double y)889{890return nir_fmul(build, x, nir_imm_floatN_t(build, y, x->bit_size));891}892893static inline nir_ssa_def *894nir_iand_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)895{896assert(x->bit_size <= 64);897y &= BITFIELD64_MASK(x->bit_size);898899if (y == 0) {900return nir_imm_intN_t(build, 0, x->bit_size);901} else if (y == BITFIELD64_MASK(x->bit_size)) {902return x;903} else {904return nir_iand(build, x, nir_imm_intN_t(build, y, x->bit_size));905}906}907908static inline nir_ssa_def *909nir_ishr_imm(nir_builder *build, nir_ssa_def *x, uint32_t y)910{911if (y == 0) {912return x;913} else {914return nir_ishr(build, x, nir_imm_int(build, y));915}916}917918static inline nir_ssa_def *919nir_ushr_imm(nir_builder *build, nir_ssa_def *x, uint32_t y)920{921if (y == 0) {922return x;923} else {924return nir_ushr(build, x, nir_imm_int(build, y));925}926}927928static inline nir_ssa_def *929nir_udiv_imm(nir_builder *build, nir_ssa_def *x, uint64_t y)930{931assert(x->bit_size <= 64);932y &= BITFIELD64_MASK(x->bit_size);933934if (y == 1) {935return x;936} else if (util_is_power_of_two_nonzero(y)) {937return nir_ushr_imm(build, x, ffsll(y) - 1);938} else {939return nir_udiv(build, x, nir_imm_intN_t(build, y, x->bit_size));940}941}942943static inline nir_ssa_def *944nir_fclamp(nir_builder *b,945nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)946{947return nir_fmin(b, nir_fmax(b, x, min_val), max_val);948}949950static inline nir_ssa_def *951nir_iclamp(nir_builder *b,952nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)953{954return nir_imin(b, nir_imax(b, x, min_val), max_val);955}956957static inline nir_ssa_def *958nir_uclamp(nir_builder *b,959nir_ssa_def *x, nir_ssa_def *min_val, nir_ssa_def *max_val)960{961return nir_umin(b, nir_umax(b, x, min_val), max_val);962}963964static inline nir_ssa_def *965nir_pack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)966{967assert(src->num_components * src->bit_size == dest_bit_size);968969switch (dest_bit_size) {970case 64:971switch (src->bit_size) {972case 32: return nir_pack_64_2x32(b, src);973case 16: return nir_pack_64_4x16(b, src);974default: break;975}976break;977978case 32:979if (src->bit_size == 16)980return nir_pack_32_2x16(b, src);981break;982983default:984break;985}986987/* If we got here, we have no dedicated unpack opcode. */988nir_ssa_def *dest = nir_imm_intN_t(b, 0, dest_bit_size);989for (unsigned i = 0; i < src->num_components; i++) {990nir_ssa_def *val = nir_u2u(b, nir_channel(b, src, i), dest_bit_size);991val = nir_ishl(b, val, nir_imm_int(b, i * src->bit_size));992dest = nir_ior(b, dest, val);993}994return dest;995}996997static inline nir_ssa_def *998nir_unpack_bits(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)999{1000assert(src->num_components == 1);1001assert(src->bit_size > dest_bit_size);1002const unsigned dest_num_components = src->bit_size / dest_bit_size;1003assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS);10041005switch (src->bit_size) {1006case 64:1007switch (dest_bit_size) {1008case 32: return nir_unpack_64_2x32(b, src);1009case 16: return nir_unpack_64_4x16(b, src);1010default: break;1011}1012break;10131014case 32:1015if (dest_bit_size == 16)1016return nir_unpack_32_2x16(b, src);1017break;10181019default:1020break;1021}10221023/* If we got here, we have no dedicated unpack opcode. */1024nir_ssa_def *dest_comps[NIR_MAX_VEC_COMPONENTS];1025for (unsigned i = 0; i < dest_num_components; i++) {1026nir_ssa_def *val = nir_ushr_imm(b, src, i * dest_bit_size);1027dest_comps[i] = nir_u2u(b, val, dest_bit_size);1028}1029return nir_vec(b, dest_comps, dest_num_components);1030}10311032/**1033* Treats srcs as if it's one big blob of bits and extracts the range of bits1034* given by1035*1036* [first_bit, first_bit + dest_num_components * dest_bit_size)1037*1038* The range can have any alignment or size as long as it's an integer number1039* of destination components and fits inside the concatenated sources.1040*1041* TODO: The one caveat here is that we can't handle byte alignment if 64-bit1042* values are involved because that would require pack/unpack to/from a vec81043* which NIR currently does not support.1044*/1045static inline nir_ssa_def *1046nir_extract_bits(nir_builder *b, nir_ssa_def **srcs, unsigned num_srcs,1047unsigned first_bit,1048unsigned dest_num_components, unsigned dest_bit_size)1049{1050const unsigned num_bits = dest_num_components * dest_bit_size;10511052/* Figure out the common bit size */1053unsigned common_bit_size = dest_bit_size;1054for (unsigned i = 0; i < num_srcs; i++)1055common_bit_size = MIN2(common_bit_size, srcs[i]->bit_size);1056if (first_bit > 0)1057common_bit_size = MIN2(common_bit_size, (1u << (ffs(first_bit) - 1)));10581059/* We don't want to have to deal with 1-bit values */1060assert(common_bit_size >= 8);10611062nir_ssa_def *common_comps[NIR_MAX_VEC_COMPONENTS * sizeof(uint64_t)];1063assert(num_bits / common_bit_size <= ARRAY_SIZE(common_comps));10641065/* First, unpack to the common bit size and select the components from the1066* source.1067*/1068int src_idx = -1;1069unsigned src_start_bit = 0;1070unsigned src_end_bit = 0;1071for (unsigned i = 0; i < num_bits / common_bit_size; i++) {1072const unsigned bit = first_bit + (i * common_bit_size);1073while (bit >= src_end_bit) {1074src_idx++;1075assert(src_idx < (int) num_srcs);1076src_start_bit = src_end_bit;1077src_end_bit += srcs[src_idx]->bit_size *1078srcs[src_idx]->num_components;1079}1080assert(bit >= src_start_bit);1081assert(bit + common_bit_size <= src_end_bit);1082const unsigned rel_bit = bit - src_start_bit;1083const unsigned src_bit_size = srcs[src_idx]->bit_size;10841085nir_ssa_def *comp = nir_channel(b, srcs[src_idx],1086rel_bit / src_bit_size);1087if (srcs[src_idx]->bit_size > common_bit_size) {1088nir_ssa_def *unpacked = nir_unpack_bits(b, comp, common_bit_size);1089comp = nir_channel(b, unpacked, (rel_bit % src_bit_size) /1090common_bit_size);1091}1092common_comps[i] = comp;1093}10941095/* Now, re-pack the destination if we have to */1096if (dest_bit_size > common_bit_size) {1097unsigned common_per_dest = dest_bit_size / common_bit_size;1098nir_ssa_def *dest_comps[NIR_MAX_VEC_COMPONENTS];1099for (unsigned i = 0; i < dest_num_components; i++) {1100nir_ssa_def *unpacked = nir_vec(b, common_comps + i * common_per_dest,1101common_per_dest);1102dest_comps[i] = nir_pack_bits(b, unpacked, dest_bit_size);1103}1104return nir_vec(b, dest_comps, dest_num_components);1105} else {1106assert(dest_bit_size == common_bit_size);1107return nir_vec(b, common_comps, dest_num_components);1108}1109}11101111static inline nir_ssa_def *1112nir_bitcast_vector(nir_builder *b, nir_ssa_def *src, unsigned dest_bit_size)1113{1114assert((src->bit_size * src->num_components) % dest_bit_size == 0);1115const unsigned dest_num_components =1116(src->bit_size * src->num_components) / dest_bit_size;1117assert(dest_num_components <= NIR_MAX_VEC_COMPONENTS);11181119return nir_extract_bits(b, &src, 1, 0, dest_num_components, dest_bit_size);1120}11211122/**1123* Pad a value to N components with undefs of matching bit size.1124* If the value already contains >= num_components, it is returned without change.1125*/1126static inline nir_ssa_def *1127nir_pad_vector(nir_builder *b, nir_ssa_def *src, unsigned num_components)1128{1129assert(src->num_components <= num_components);1130if (src->num_components == num_components)1131return src;11321133nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS];1134nir_ssa_def *undef = nir_ssa_undef(b, 1, src->bit_size);1135unsigned i = 0;1136for (; i < src->num_components; i++)1137components[i] = nir_channel(b, src, i);1138for (; i < num_components; i++)1139components[i] = undef;11401141return nir_vec(b, components, num_components);1142}11431144/**1145* Pad a value to N components with copies of the given immediate of matching1146* bit size. If the value already contains >= num_components, it is returned1147* without change.1148*/1149static inline nir_ssa_def *1150nir_pad_vector_imm_int(nir_builder *b, nir_ssa_def *src, uint64_t imm_val,1151unsigned num_components)1152{1153assert(src->num_components <= num_components);1154if (src->num_components == num_components)1155return src;11561157nir_ssa_def *components[NIR_MAX_VEC_COMPONENTS];1158nir_ssa_def *imm = nir_imm_intN_t(b, imm_val, src->bit_size);1159unsigned i = 0;1160for (; i < src->num_components; i++)1161components[i] = nir_channel(b, src, i);1162for (; i < num_components; i++)1163components[i] = imm;11641165return nir_vec(b, components, num_components);1166}11671168/**1169* Pad a value to 4 components with undefs of matching bit size.1170* If the value already contains >= 4 components, it is returned without change.1171*/1172static inline nir_ssa_def *1173nir_pad_vec4(nir_builder *b, nir_ssa_def *src)1174{1175return nir_pad_vector(b, src, 4);1176}11771178/**1179* Turns a nir_src into a nir_ssa_def * so it can be passed to1180* nir_build_alu()-based builder calls.1181*1182* See nir_ssa_for_alu_src() for alu instructions.1183*/1184static inline nir_ssa_def *1185nir_ssa_for_src(nir_builder *build, nir_src src, int num_components)1186{1187if (src.is_ssa && src.ssa->num_components == num_components)1188return src.ssa;11891190nir_alu_src alu = { NIR_SRC_INIT };1191alu.src = src;1192for (int j = 0; j < NIR_MAX_VEC_COMPONENTS; j++)1193alu.swizzle[j] = j;11941195return nir_mov_alu(build, alu, num_components);1196}11971198/**1199* Similar to nir_ssa_for_src(), but for alu srcs, respecting the1200* nir_alu_src's swizzle.1201*/1202static inline nir_ssa_def *1203nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn)1204{1205if (nir_alu_src_is_trivial_ssa(instr, srcn))1206return instr->src[srcn].src.ssa;12071208nir_alu_src *src = &instr->src[srcn];1209unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn);1210return nir_mov_alu(build, *src, num_components);1211}12121213static inline unsigned1214nir_get_ptr_bitsize(nir_shader *shader)1215{1216if (shader->info.stage == MESA_SHADER_KERNEL)1217return shader->info.cs.ptr_size;1218return 32;1219}12201221static inline nir_deref_instr *1222nir_build_deref_var(nir_builder *build, nir_variable *var)1223{1224nir_deref_instr *deref =1225nir_deref_instr_create(build->shader, nir_deref_type_var);12261227deref->modes = (nir_variable_mode)var->data.mode;1228deref->type = var->type;1229deref->var = var;12301231nir_ssa_dest_init(&deref->instr, &deref->dest, 1,1232nir_get_ptr_bitsize(build->shader), NULL);12331234nir_builder_instr_insert(build, &deref->instr);12351236return deref;1237}12381239static inline nir_deref_instr *1240nir_build_deref_array(nir_builder *build, nir_deref_instr *parent,1241nir_ssa_def *index)1242{1243assert(glsl_type_is_array(parent->type) ||1244glsl_type_is_matrix(parent->type) ||1245glsl_type_is_vector(parent->type));12461247assert(index->bit_size == parent->dest.ssa.bit_size);12481249nir_deref_instr *deref =1250nir_deref_instr_create(build->shader, nir_deref_type_array);12511252deref->modes = parent->modes;1253deref->type = glsl_get_array_element(parent->type);1254deref->parent = nir_src_for_ssa(&parent->dest.ssa);1255deref->arr.index = nir_src_for_ssa(index);12561257nir_ssa_dest_init(&deref->instr, &deref->dest,1258parent->dest.ssa.num_components,1259parent->dest.ssa.bit_size, NULL);12601261nir_builder_instr_insert(build, &deref->instr);12621263return deref;1264}12651266static inline nir_deref_instr *1267nir_build_deref_array_imm(nir_builder *build, nir_deref_instr *parent,1268int64_t index)1269{1270assert(parent->dest.is_ssa);1271nir_ssa_def *idx_ssa = nir_imm_intN_t(build, index,1272parent->dest.ssa.bit_size);12731274return nir_build_deref_array(build, parent, idx_ssa);1275}12761277static inline nir_deref_instr *1278nir_build_deref_ptr_as_array(nir_builder *build, nir_deref_instr *parent,1279nir_ssa_def *index)1280{1281assert(parent->deref_type == nir_deref_type_array ||1282parent->deref_type == nir_deref_type_ptr_as_array ||1283parent->deref_type == nir_deref_type_cast);12841285assert(index->bit_size == parent->dest.ssa.bit_size);12861287nir_deref_instr *deref =1288nir_deref_instr_create(build->shader, nir_deref_type_ptr_as_array);12891290deref->modes = parent->modes;1291deref->type = parent->type;1292deref->parent = nir_src_for_ssa(&parent->dest.ssa);1293deref->arr.index = nir_src_for_ssa(index);12941295nir_ssa_dest_init(&deref->instr, &deref->dest,1296parent->dest.ssa.num_components,1297parent->dest.ssa.bit_size, NULL);12981299nir_builder_instr_insert(build, &deref->instr);13001301return deref;1302}13031304static inline nir_deref_instr *1305nir_build_deref_array_wildcard(nir_builder *build, nir_deref_instr *parent)1306{1307assert(glsl_type_is_array(parent->type) ||1308glsl_type_is_matrix(parent->type));13091310nir_deref_instr *deref =1311nir_deref_instr_create(build->shader, nir_deref_type_array_wildcard);13121313deref->modes = parent->modes;1314deref->type = glsl_get_array_element(parent->type);1315deref->parent = nir_src_for_ssa(&parent->dest.ssa);13161317nir_ssa_dest_init(&deref->instr, &deref->dest,1318parent->dest.ssa.num_components,1319parent->dest.ssa.bit_size, NULL);13201321nir_builder_instr_insert(build, &deref->instr);13221323return deref;1324}13251326static inline nir_deref_instr *1327nir_build_deref_struct(nir_builder *build, nir_deref_instr *parent,1328unsigned index)1329{1330assert(glsl_type_is_struct_or_ifc(parent->type));13311332nir_deref_instr *deref =1333nir_deref_instr_create(build->shader, nir_deref_type_struct);13341335deref->modes = parent->modes;1336deref->type = glsl_get_struct_field(parent->type, index);1337deref->parent = nir_src_for_ssa(&parent->dest.ssa);1338deref->strct.index = index;13391340nir_ssa_dest_init(&deref->instr, &deref->dest,1341parent->dest.ssa.num_components,1342parent->dest.ssa.bit_size, NULL);13431344nir_builder_instr_insert(build, &deref->instr);13451346return deref;1347}13481349static inline nir_deref_instr *1350nir_build_deref_cast(nir_builder *build, nir_ssa_def *parent,1351nir_variable_mode modes, const struct glsl_type *type,1352unsigned ptr_stride)1353{1354nir_deref_instr *deref =1355nir_deref_instr_create(build->shader, nir_deref_type_cast);13561357deref->modes = modes;1358deref->type = type;1359deref->parent = nir_src_for_ssa(parent);1360deref->cast.ptr_stride = ptr_stride;13611362nir_ssa_dest_init(&deref->instr, &deref->dest,1363parent->num_components, parent->bit_size, NULL);13641365nir_builder_instr_insert(build, &deref->instr);13661367return deref;1368}13691370static inline nir_deref_instr *1371nir_alignment_deref_cast(nir_builder *build, nir_deref_instr *parent,1372uint32_t align_mul, uint32_t align_offset)1373{1374nir_deref_instr *deref =1375nir_deref_instr_create(build->shader, nir_deref_type_cast);13761377deref->modes = parent->modes;1378deref->type = parent->type;1379deref->parent = nir_src_for_ssa(&parent->dest.ssa);1380deref->cast.ptr_stride = nir_deref_instr_array_stride(deref);1381deref->cast.align_mul = align_mul;1382deref->cast.align_offset = align_offset;13831384nir_ssa_dest_init(&deref->instr, &deref->dest,1385parent->dest.ssa.num_components,1386parent->dest.ssa.bit_size, NULL);13871388nir_builder_instr_insert(build, &deref->instr);13891390return deref;1391}13921393/** Returns a deref that follows another but starting from the given parent1394*1395* The new deref will be the same type and take the same array or struct index1396* as the leader deref but it may have a different parent. This is very1397* useful for walking deref paths.1398*/1399static inline nir_deref_instr *1400nir_build_deref_follower(nir_builder *b, nir_deref_instr *parent,1401nir_deref_instr *leader)1402{1403/* If the derefs would have the same parent, don't make a new one */1404assert(leader->parent.is_ssa);1405if (leader->parent.ssa == &parent->dest.ssa)1406return leader;14071408UNUSED nir_deref_instr *leader_parent = nir_src_as_deref(leader->parent);14091410switch (leader->deref_type) {1411case nir_deref_type_var:1412unreachable("A var dereference cannot have a parent");1413break;14141415case nir_deref_type_array:1416case nir_deref_type_array_wildcard:1417assert(glsl_type_is_matrix(parent->type) ||1418glsl_type_is_array(parent->type) ||1419(leader->deref_type == nir_deref_type_array &&1420glsl_type_is_vector(parent->type)));1421assert(glsl_get_length(parent->type) ==1422glsl_get_length(leader_parent->type));14231424if (leader->deref_type == nir_deref_type_array) {1425assert(leader->arr.index.is_ssa);1426nir_ssa_def *index = nir_i2i(b, leader->arr.index.ssa,1427parent->dest.ssa.bit_size);1428return nir_build_deref_array(b, parent, index);1429} else {1430return nir_build_deref_array_wildcard(b, parent);1431}14321433case nir_deref_type_struct:1434assert(glsl_type_is_struct_or_ifc(parent->type));1435assert(glsl_get_length(parent->type) ==1436glsl_get_length(leader_parent->type));14371438return nir_build_deref_struct(b, parent, leader->strct.index);14391440default:1441unreachable("Invalid deref instruction type");1442}1443}14441445static inline nir_ssa_def *1446nir_load_reg(nir_builder *build, nir_register *reg)1447{1448return nir_ssa_for_src(build, nir_src_for_reg(reg), reg->num_components);1449}14501451static inline void1452nir_store_reg(nir_builder *build, nir_register *reg,1453nir_ssa_def *def, nir_component_mask_t write_mask)1454{1455assert(reg->num_components == def->num_components);1456assert(reg->bit_size == def->bit_size);14571458nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_mov);1459mov->src[0].src = nir_src_for_ssa(def);1460mov->dest.dest = nir_dest_for_reg(reg);1461mov->dest.write_mask = write_mask & BITFIELD_MASK(reg->num_components);1462nir_builder_instr_insert(build, &mov->instr);1463}14641465static inline nir_ssa_def *1466nir_load_deref_with_access(nir_builder *build, nir_deref_instr *deref,1467enum gl_access_qualifier access)1468{1469return nir_build_load_deref(build, glsl_get_vector_elements(deref->type),1470glsl_get_bit_size(deref->type), &deref->dest.ssa,1471access);1472}14731474#undef nir_load_deref1475static inline nir_ssa_def *1476nir_load_deref(nir_builder *build, nir_deref_instr *deref)1477{1478return nir_load_deref_with_access(build, deref, (enum gl_access_qualifier)0);1479}14801481static inline void1482nir_store_deref_with_access(nir_builder *build, nir_deref_instr *deref,1483nir_ssa_def *value, unsigned writemask,1484enum gl_access_qualifier access)1485{1486writemask &= (1u << value->num_components) - 1u;1487nir_build_store_deref(build, &deref->dest.ssa, value, writemask, access);1488}14891490#undef nir_store_deref1491static inline void1492nir_store_deref(nir_builder *build, nir_deref_instr *deref,1493nir_ssa_def *value, unsigned writemask)1494{1495nir_store_deref_with_access(build, deref, value, writemask,1496(enum gl_access_qualifier)0);1497}14981499static inline void1500nir_copy_deref_with_access(nir_builder *build, nir_deref_instr *dest,1501nir_deref_instr *src,1502enum gl_access_qualifier dest_access,1503enum gl_access_qualifier src_access)1504{1505nir_build_copy_deref(build, &dest->dest.ssa, &src->dest.ssa, dest_access, src_access);1506}15071508#undef nir_copy_deref1509static inline void1510nir_copy_deref(nir_builder *build, nir_deref_instr *dest, nir_deref_instr *src)1511{1512nir_copy_deref_with_access(build, dest, src,1513(enum gl_access_qualifier) 0,1514(enum gl_access_qualifier) 0);1515}15161517static inline void1518nir_memcpy_deref_with_access(nir_builder *build, nir_deref_instr *dest,1519nir_deref_instr *src, nir_ssa_def *size,1520enum gl_access_qualifier dest_access,1521enum gl_access_qualifier src_access)1522{1523nir_build_memcpy_deref(build, &dest->dest.ssa, &src->dest.ssa,1524size, dest_access, src_access);1525}15261527#undef nir_memcpy_deref1528static inline void1529nir_memcpy_deref(nir_builder *build, nir_deref_instr *dest,1530nir_deref_instr *src, nir_ssa_def *size)1531{1532nir_memcpy_deref_with_access(build, dest, src, size,1533(enum gl_access_qualifier)0,1534(enum gl_access_qualifier)0);1535}15361537static inline nir_ssa_def *1538nir_load_var(nir_builder *build, nir_variable *var)1539{1540return nir_load_deref(build, nir_build_deref_var(build, var));1541}15421543static inline void1544nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value,1545unsigned writemask)1546{1547nir_store_deref(build, nir_build_deref_var(build, var), value, writemask);1548}15491550static inline void1551nir_copy_var(nir_builder *build, nir_variable *dest, nir_variable *src)1552{1553nir_copy_deref(build, nir_build_deref_var(build, dest),1554nir_build_deref_var(build, src));1555}15561557#undef nir_load_global1558static inline nir_ssa_def *1559nir_load_global(nir_builder *build, nir_ssa_def *addr, unsigned align,1560unsigned num_components, unsigned bit_size)1561{1562nir_intrinsic_instr *load =1563nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global);1564load->num_components = num_components;1565load->src[0] = nir_src_for_ssa(addr);1566nir_intrinsic_set_align(load, align, 0);1567nir_ssa_dest_init(&load->instr, &load->dest,1568num_components, bit_size, NULL);1569nir_builder_instr_insert(build, &load->instr);1570return &load->dest.ssa;1571}15721573#undef nir_store_global1574static inline void1575nir_store_global(nir_builder *build, nir_ssa_def *addr, unsigned align,1576nir_ssa_def *value, nir_component_mask_t write_mask)1577{1578nir_intrinsic_instr *store =1579nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_global);1580store->num_components = value->num_components;1581store->src[0] = nir_src_for_ssa(value);1582store->src[1] = nir_src_for_ssa(addr);1583nir_intrinsic_set_write_mask(store,1584write_mask & BITFIELD_MASK(value->num_components));1585nir_intrinsic_set_align(store, align, 0);1586nir_builder_instr_insert(build, &store->instr);1587}15881589#undef nir_load_global_constant1590static inline nir_ssa_def *1591nir_load_global_constant(nir_builder *build, nir_ssa_def *addr, unsigned align,1592unsigned num_components, unsigned bit_size)1593{1594nir_intrinsic_instr *load =1595nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_global_constant);1596load->num_components = num_components;1597load->src[0] = nir_src_for_ssa(addr);1598nir_intrinsic_set_align(load, align, 0);1599nir_ssa_dest_init(&load->instr, &load->dest,1600num_components, bit_size, NULL);1601nir_builder_instr_insert(build, &load->instr);1602return &load->dest.ssa;1603}16041605#undef nir_load_param1606static inline nir_ssa_def *1607nir_load_param(nir_builder *build, uint32_t param_idx)1608{1609assert(param_idx < build->impl->function->num_params);1610nir_parameter *param = &build->impl->function->params[param_idx];1611return nir_build_load_param(build, param->num_components, param->bit_size, param_idx);1612}16131614/**1615* This function takes an I/O intrinsic like load/store_input,1616* and emits a sequence that calculates the full offset of that instruction,1617* including a stride to the base and component offsets.1618*/1619static inline nir_ssa_def *1620nir_build_calc_io_offset(nir_builder *b,1621nir_intrinsic_instr *intrin,1622nir_ssa_def *base_stride,1623unsigned component_stride)1624{1625/* base is the driver_location, which is in slots (1 slot = 4x4 bytes) */1626nir_ssa_def *base_op = nir_imul_imm(b, base_stride, nir_intrinsic_base(intrin));16271628/* offset should be interpreted in relation to the base,1629* so the instruction effectively reads/writes another input/output1630* when it has an offset1631*/1632nir_ssa_def *offset_op = nir_imul(b, base_stride, nir_ssa_for_src(b, *nir_get_io_offset_src(intrin), 1));16331634/* component is in bytes */1635unsigned const_op = nir_intrinsic_component(intrin) * component_stride;16361637return nir_iadd_imm_nuw(b, nir_iadd_nuw(b, base_op, offset_op), const_op);1638}16391640/* calculate a `(1 << value) - 1` in ssa without overflows */1641static inline nir_ssa_def *1642nir_mask(nir_builder *b, nir_ssa_def *bits, unsigned dst_bit_size)1643{1644return nir_ushr(b, nir_imm_intN_t(b, -1, dst_bit_size),1645nir_isub_imm(b, dst_bit_size, nir_u2u32(b, bits)));1646}16471648static inline nir_ssa_def *1649nir_f2b(nir_builder *build, nir_ssa_def *f)1650{1651return nir_f2b1(build, f);1652}16531654static inline nir_ssa_def *1655nir_i2b(nir_builder *build, nir_ssa_def *i)1656{1657return nir_i2b1(build, i);1658}16591660static inline nir_ssa_def *1661nir_b2f(nir_builder *build, nir_ssa_def *b, uint32_t bit_size)1662{1663switch (bit_size) {1664case 64: return nir_b2f64(build, b);1665case 32: return nir_b2f32(build, b);1666case 16: return nir_b2f16(build, b);1667default:1668unreachable("Invalid bit-size");1669};1670}16711672static inline nir_ssa_def *1673nir_b2i(nir_builder *build, nir_ssa_def *b, uint32_t bit_size)1674{1675switch (bit_size) {1676case 64: return nir_b2i64(build, b);1677case 32: return nir_b2i32(build, b);1678case 16: return nir_b2i16(build, b);1679case 8: return nir_b2i8(build, b);1680default:1681unreachable("Invalid bit-size");1682};1683}1684static inline nir_ssa_def *1685nir_load_barycentric(nir_builder *build, nir_intrinsic_op op,1686unsigned interp_mode)1687{1688unsigned num_components = op == nir_intrinsic_load_barycentric_model ? 3 : 2;1689nir_intrinsic_instr *bary = nir_intrinsic_instr_create(build->shader, op);1690nir_ssa_dest_init(&bary->instr, &bary->dest, num_components, 32, NULL);1691nir_intrinsic_set_interp_mode(bary, interp_mode);1692nir_builder_instr_insert(build, &bary->instr);1693return &bary->dest.ssa;1694}16951696static inline void1697nir_jump(nir_builder *build, nir_jump_type jump_type)1698{1699assert(jump_type != nir_jump_goto && jump_type != nir_jump_goto_if);1700nir_jump_instr *jump = nir_jump_instr_create(build->shader, jump_type);1701nir_builder_instr_insert(build, &jump->instr);1702}17031704static inline void1705nir_goto(nir_builder *build, struct nir_block *target)1706{1707assert(!build->impl->structured);1708nir_jump_instr *jump = nir_jump_instr_create(build->shader, nir_jump_goto);1709jump->target = target;1710nir_builder_instr_insert(build, &jump->instr);1711}17121713static inline void1714nir_goto_if(nir_builder *build, struct nir_block *target, nir_src cond,1715struct nir_block *else_target)1716{1717assert(!build->impl->structured);1718nir_jump_instr *jump = nir_jump_instr_create(build->shader, nir_jump_goto_if);1719jump->condition = cond;1720jump->target = target;1721jump->else_target = else_target;1722nir_builder_instr_insert(build, &jump->instr);1723}17241725static inline nir_ssa_def *1726nir_compare_func(nir_builder *b, enum compare_func func,1727nir_ssa_def *src0, nir_ssa_def *src1)1728{1729switch (func) {1730case COMPARE_FUNC_NEVER:1731return nir_imm_int(b, 0);1732case COMPARE_FUNC_ALWAYS:1733return nir_imm_int(b, ~0);1734case COMPARE_FUNC_EQUAL:1735return nir_feq(b, src0, src1);1736case COMPARE_FUNC_NOTEQUAL:1737return nir_fneu(b, src0, src1);1738case COMPARE_FUNC_GREATER:1739return nir_flt(b, src1, src0);1740case COMPARE_FUNC_GEQUAL:1741return nir_fge(b, src0, src1);1742case COMPARE_FUNC_LESS:1743return nir_flt(b, src0, src1);1744case COMPARE_FUNC_LEQUAL:1745return nir_fge(b, src1, src0);1746}1747unreachable("bad compare func");1748}17491750static inline void1751nir_scoped_memory_barrier(nir_builder *b,1752nir_scope scope,1753nir_memory_semantics semantics,1754nir_variable_mode modes)1755{1756nir_scoped_barrier(b, NIR_SCOPE_NONE, scope, semantics, modes);1757}17581759static inline nir_ssa_def *1760nir_type_convert(nir_builder *b,1761nir_ssa_def *src,1762nir_alu_type src_type,1763nir_alu_type dest_type)1764{1765assert(nir_alu_type_get_type_size(src_type) == 0 ||1766nir_alu_type_get_type_size(src_type) == src->bit_size);17671768src_type = (nir_alu_type) (src_type | src->bit_size);17691770nir_op opcode =1771nir_type_conversion_op(src_type, dest_type, nir_rounding_mode_undef);17721773return nir_build_alu(b, opcode, src, NULL, NULL, NULL);1774}17751776static inline nir_ssa_def *1777nir_convert_to_bit_size(nir_builder *b,1778nir_ssa_def *src,1779nir_alu_type type,1780unsigned bit_size)1781{1782return nir_type_convert(b, src, type, (nir_alu_type) (type | bit_size));1783}17841785static inline nir_ssa_def *1786nir_i2iN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)1787{1788return nir_convert_to_bit_size(b, src, nir_type_int, bit_size);1789}17901791static inline nir_ssa_def *1792nir_u2uN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)1793{1794return nir_convert_to_bit_size(b, src, nir_type_uint, bit_size);1795}17961797static inline nir_ssa_def *1798nir_b2bN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)1799{1800return nir_convert_to_bit_size(b, src, nir_type_bool, bit_size);1801}18021803static inline nir_ssa_def *1804nir_f2fN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)1805{1806return nir_convert_to_bit_size(b, src, nir_type_float, bit_size);1807}18081809static inline nir_ssa_def *1810nir_i2fN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)1811{1812return nir_type_convert(b, src, nir_type_int,1813(nir_alu_type) (nir_type_float | bit_size));1814}18151816static inline nir_ssa_def *1817nir_u2fN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)1818{1819return nir_type_convert(b, src, nir_type_uint,1820(nir_alu_type) (nir_type_float | bit_size));1821}18221823static inline nir_ssa_def *1824nir_f2uN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)1825{1826return nir_type_convert(b, src, nir_type_float,1827(nir_alu_type) (nir_type_uint | bit_size));1828}18291830static inline nir_ssa_def *1831nir_f2iN(nir_builder *b, nir_ssa_def *src, unsigned bit_size)1832{1833return nir_type_convert(b, src, nir_type_float,1834(nir_alu_type) (nir_type_int | bit_size));1835}18361837#endif /* NIR_BUILDER_H */183818391840