Path: blob/21.2-virgl/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
4565 views
/**************************************************************************1*2* Copyright 2019 Red Hat.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the "Software"),7* to deal in the Software without restriction, including without limitation8* the rights to use, copy, modify, merge, publish, distribute, sublicense,9* and/or sell copies of the Software, and to permit persons to whom the10* Software is furnished to do so, subject to the following conditions:11*12* The above copyright notice and this permission notice shall be included13* in all copies or substantial portions of the Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS16* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER19* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,20* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE21* SOFTWARE.22*23**************************************************************************/2425#include "lp_bld_nir.h"26#include "lp_bld_init.h"27#include "lp_bld_flow.h"28#include "lp_bld_logic.h"29#include "lp_bld_gather.h"30#include "lp_bld_const.h"31#include "lp_bld_struct.h"32#include "lp_bld_arit.h"33#include "lp_bld_bitarit.h"34#include "lp_bld_coro.h"35#include "lp_bld_printf.h"36#include "util/u_math.h"3738static int bit_size_to_shift_size(int bit_size)39{40switch (bit_size) {41case 64:42return 3;43default:44case 32:45return 2;46case 16:47return 1;48case 8:49return 0;50}51}5253/*54* combine the execution mask if there is one with the current mask.55*/56static LLVMValueRef57mask_vec(struct lp_build_nir_context *bld_base)58{59struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;60LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;61struct lp_exec_mask *exec_mask = &bld->exec_mask;62LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;63if (!exec_mask->has_mask) {64return bld_mask;65}66if (!bld_mask)67return exec_mask->exec_mask;68return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),69exec_mask->exec_mask, "");70}7172static LLVMValueRef73emit_fetch_64bit(74struct lp_build_nir_context * bld_base,75LLVMValueRef input,76LLVMValueRef input2)77{78struct gallivm_state *gallivm = bld_base->base.gallivm;79LLVMBuilderRef builder = gallivm->builder;80LLVMValueRef res;81int i;82LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];83int len = bld_base->base.type.length * 2;84assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));8586for (i = 0; i < bld_base->base.type.length * 2; i+=2) {87#if UTIL_ARCH_LITTLE_ENDIAN88shuffles[i] = lp_build_const_int32(gallivm, i / 2);89shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);90#else91shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);92shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);93#endif94}95res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");9697return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");98}99100static void101emit_store_64bit_split(struct lp_build_nir_context *bld_base,102LLVMValueRef value,103LLVMValueRef split_values[2])104{105struct gallivm_state *gallivm = bld_base->base.gallivm;106LLVMBuilderRef builder = gallivm->builder;107unsigned i;108LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];109LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];110int len = bld_base->base.type.length * 2;111112value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");113for (i = 0; i < bld_base->base.type.length; i++) {114#if UTIL_ARCH_LITTLE_ENDIAN115shuffles[i] = lp_build_const_int32(gallivm, i * 2);116shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);117#else118shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);119shuffles2[i] = lp_build_const_int32(gallivm, i * 2);120#endif121}122123split_values[0] = LLVMBuildShuffleVector(builder, value,124LLVMGetUndef(LLVMTypeOf(value)),125LLVMConstVector(shuffles,126bld_base->base.type.length),127"");128split_values[1] = LLVMBuildShuffleVector(builder, value,129LLVMGetUndef(LLVMTypeOf(value)),130LLVMConstVector(shuffles2,131bld_base->base.type.length),132"");133}134135static void136emit_store_64bit_chan(struct lp_build_nir_context *bld_base,137LLVMValueRef chan_ptr,138LLVMValueRef chan_ptr2,139LLVMValueRef value)140{141struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;142struct lp_build_context *float_bld = &bld_base->base;143LLVMValueRef split_vals[2];144145emit_store_64bit_split(bld_base, value, split_vals);146147lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);148lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);149}150151static LLVMValueRef152get_soa_array_offsets(struct lp_build_context *uint_bld,153LLVMValueRef indirect_index,154int num_components,155unsigned chan_index,156bool need_perelement_offset)157{158struct gallivm_state *gallivm = uint_bld->gallivm;159LLVMValueRef chan_vec =160lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);161LLVMValueRef length_vec =162lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);163LLVMValueRef index_vec;164165/* index_vec = (indirect_index * 4 + chan_index) * length + offsets */166index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));167index_vec = lp_build_add(uint_bld, index_vec, chan_vec);168index_vec = lp_build_mul(uint_bld, index_vec, length_vec);169170if (need_perelement_offset) {171LLVMValueRef pixel_offsets;172unsigned i;173/* build pixel offset vector: {0, 1, 2, 3, ...} */174pixel_offsets = uint_bld->undef;175for (i = 0; i < uint_bld->type.length; i++) {176LLVMValueRef ii = lp_build_const_int32(gallivm, i);177pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,178ii, ii, "");179}180index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);181}182return index_vec;183}184185static LLVMValueRef186build_gather(struct lp_build_nir_context *bld_base,187struct lp_build_context *bld,188LLVMValueRef base_ptr,189LLVMValueRef indexes,190LLVMValueRef overflow_mask,191LLVMValueRef indexes2)192{193struct gallivm_state *gallivm = bld_base->base.gallivm;194LLVMBuilderRef builder = gallivm->builder;195struct lp_build_context *uint_bld = &bld_base->uint_bld;196LLVMValueRef res;197unsigned i;198199if (indexes2)200res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));201else202res = bld->undef;203/*204* overflow_mask is a vector telling us which channels205* in the vector overflowed. We use the overflow behavior for206* constant buffers which is defined as:207* Out of bounds access to constant buffer returns 0 in all208* components. Out of bounds behavior is always with respect209* to the size of the buffer bound at that slot.210*/211212if (overflow_mask) {213/*214* We avoid per-element control flow here (also due to llvm going crazy,215* though I suspect it's better anyway since overflow is likely rare).216* Note that since we still fetch from buffers even if num_elements was217* zero (in this case we'll fetch from index zero) the jit func callers218* MUST provide valid fake constant buffers of size 4x32 (the values do219* not matter), otherwise we'd still need (not per element though)220* control flow.221*/222indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);223if (indexes2)224indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);225}226227/*228* Loop over elements of index_vec, load scalar value, insert it into 'res'.229*/230for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {231LLVMValueRef si, di;232LLVMValueRef index;233LLVMValueRef scalar_ptr, scalar;234235di = lp_build_const_int32(gallivm, i);236if (indexes2)237si = lp_build_const_int32(gallivm, i >> 1);238else239si = di;240241if (indexes2 && (i & 1)) {242index = LLVMBuildExtractElement(builder,243indexes2, si, "");244} else {245index = LLVMBuildExtractElement(builder,246indexes, si, "");247}248scalar_ptr = LLVMBuildGEP(builder, base_ptr,249&index, 1, "gather_ptr");250scalar = LLVMBuildLoad(builder, scalar_ptr, "");251252res = LLVMBuildInsertElement(builder, res, scalar, di, "");253}254255if (overflow_mask) {256if (indexes2) {257res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");258overflow_mask = LLVMBuildSExt(builder, overflow_mask,259bld_base->dbl_bld.int_vec_type, "");260res = lp_build_select(&bld_base->dbl_bld, overflow_mask,261bld_base->dbl_bld.zero, res);262} else263res = lp_build_select(bld, overflow_mask, bld->zero, res);264}265266return res;267}268269/**270* Scatter/store vector.271*/272static void273emit_mask_scatter(struct lp_build_nir_soa_context *bld,274LLVMValueRef base_ptr,275LLVMValueRef indexes,276LLVMValueRef values,277struct lp_exec_mask *mask)278{279struct gallivm_state *gallivm = bld->bld_base.base.gallivm;280LLVMBuilderRef builder = gallivm->builder;281unsigned i;282LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;283284/*285* Loop over elements of index_vec, store scalar value.286*/287for (i = 0; i < bld->bld_base.base.type.length; i++) {288LLVMValueRef ii = lp_build_const_int32(gallivm, i);289LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");290LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");291LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");292LLVMValueRef scalar_pred = pred ?293LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;294295if (0)296lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",297ii, val, index, scalar_ptr);298299if (scalar_pred) {300LLVMValueRef real_val, dst_val;301dst_val = LLVMBuildLoad(builder, scalar_ptr, "");302real_val = lp_build_select(&bld->uint_elem_bld, scalar_pred, val, dst_val);303LLVMBuildStore(builder, real_val, scalar_ptr);304}305else {306LLVMBuildStore(builder, val, scalar_ptr);307}308}309}310311static void emit_load_var(struct lp_build_nir_context *bld_base,312nir_variable_mode deref_mode,313unsigned num_components,314unsigned bit_size,315nir_variable *var,316unsigned vertex_index,317LLVMValueRef indir_vertex_index,318unsigned const_index,319LLVMValueRef indir_index,320LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])321{322struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;323struct gallivm_state *gallivm = bld_base->base.gallivm;324int dmul = bit_size == 64 ? 2 : 1;325unsigned location = var->data.driver_location;326unsigned location_frac = var->data.location_frac;327328if (!var->data.compact && !indir_index)329location += const_index;330else if (var->data.compact) {331location += const_index / 4;332location_frac += const_index % 4;333const_index = 0;334}335switch (deref_mode) {336case nir_var_shader_in:337for (unsigned i = 0; i < num_components; i++) {338int idx = (i * dmul) + location_frac;339int comp_loc = location;340341if (bit_size == 64 && idx >= 4) {342comp_loc++;343idx = idx % 4;344}345346if (bld->gs_iface) {347LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);348LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);349LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);350LLVMValueRef result2;351352result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,353false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);354if (bit_size == 64) {355LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);356result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,357false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);358result[i] = emit_fetch_64bit(bld_base, result[i], result2);359}360} else if (bld->tes_iface) {361LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);362LLVMValueRef attrib_index_val;363LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);364LLVMValueRef result2;365366if (indir_index) {367if (var->data.compact) {368swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));369attrib_index_val = lp_build_const_int32(gallivm, comp_loc);370} else371attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));372} else373attrib_index_val = lp_build_const_int32(gallivm, comp_loc);374375if (var->data.patch) {376result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,377indir_index ? true : false, attrib_index_val, swizzle_index_val);378if (bit_size == 64) {379LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);380result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,381indir_index ? true : false, attrib_index_val, swizzle_index_val);382result[i] = emit_fetch_64bit(bld_base, result[i], result2);383}384}385else {386result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,387indir_vertex_index ? true : false,388indir_vertex_index ? indir_vertex_index : vertex_index_val,389(indir_index && !var->data.compact) ? true : false, attrib_index_val,390(indir_index && var->data.compact) ? true : false, swizzle_index_val);391if (bit_size == 64) {392LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);393result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,394indir_vertex_index ? true : false,395indir_vertex_index ? indir_vertex_index : vertex_index_val,396indir_index ? true : false, attrib_index_val, false, swizzle_index_val);397result[i] = emit_fetch_64bit(bld_base, result[i], result2);398}399}400} else if (bld->tcs_iface) {401LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);402LLVMValueRef attrib_index_val;403LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);404405if (indir_index) {406if (var->data.compact) {407swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));408attrib_index_val = lp_build_const_int32(gallivm, comp_loc);409} else410attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));411} else412attrib_index_val = lp_build_const_int32(gallivm, comp_loc);413result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,414indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,415(indir_index && !var->data.compact) ? true : false, attrib_index_val,416(indir_index && var->data.compact) ? true : false, swizzle_index_val);417if (bit_size == 64) {418LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);419LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,420indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,421indir_index ? true : false, attrib_index_val,422false, swizzle_index_val);423result[i] = emit_fetch_64bit(bld_base, result[i], result2);424}425} else {426if (indir_index) {427LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));428LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,429attrib_index_val, 4, idx,430TRUE);431LLVMValueRef index_vec2 = NULL;432LLVMTypeRef fptr_type;433LLVMValueRef inputs_array;434fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);435inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, fptr_type, "");436437if (bit_size == 64)438index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,439indir_index, 4, idx + 1, TRUE);440441/* Gather values from the input register array */442result[i] = build_gather(bld_base, &bld_base->base, inputs_array, index_vec, NULL, index_vec2);443} else {444if (bld->indirects & nir_var_shader_in) {445LLVMValueRef lindex = lp_build_const_int32(gallivm,446comp_loc * 4 + idx);447LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder,448bld->inputs_array, lindex);449if (bit_size == 64) {450LLVMValueRef lindex2 = lp_build_const_int32(gallivm,451comp_loc * 4 + (idx + 1));452LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder,453bld->inputs_array, lindex2);454result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);455} else {456result[i] = input_ptr;457}458} else {459if (bit_size == 64) {460LLVMValueRef tmp[2];461tmp[0] = bld->inputs[comp_loc][idx];462tmp[1] = bld->inputs[comp_loc][idx + 1];463result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);464} else {465result[i] = bld->inputs[comp_loc][idx];466}467}468}469}470}471break;472case nir_var_shader_out:473if (bld->fs_iface && bld->fs_iface->fb_fetch) {474bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.driver_location, result);475return;476}477for (unsigned i = 0; i < num_components; i++) {478int idx = (i * dmul) + location_frac;479if (bld->tcs_iface) {480LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);481LLVMValueRef attrib_index_val;482LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);483484if (indir_index)485attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));486else487attrib_index_val = lp_build_const_int32(gallivm, location);488489result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,490indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,491(indir_index && !var->data.compact) ? true : false, attrib_index_val,492(indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);493if (bit_size == 64) {494LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);495LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,496indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,497indir_index ? true : false, attrib_index_val,498false, swizzle_index_val, 0);499result[i] = emit_fetch_64bit(bld_base, result[i], result2);500}501}502}503break;504default:505break;506}507}508509static void emit_store_chan(struct lp_build_nir_context *bld_base,510nir_variable_mode deref_mode,511unsigned bit_size,512unsigned location, unsigned comp,513unsigned chan,514LLVMValueRef dst)515{516struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;517LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;518struct lp_build_context *float_bld = &bld_base->base;519520if (bit_size == 64) {521chan *= 2;522chan += comp;523if (chan >= 4) {524chan -= 4;525location++;526}527emit_store_64bit_chan(bld_base, bld->outputs[location][chan],528bld->outputs[location][chan + 1], dst);529} else {530dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");531lp_exec_mask_store(&bld->exec_mask, float_bld, dst,532bld->outputs[location][chan + comp]);533}534}535536static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,537bool is_compact,538unsigned bit_size,539unsigned location,540unsigned const_index,541LLVMValueRef indir_vertex_index,542LLVMValueRef indir_index,543unsigned comp,544unsigned chan,545LLVMValueRef chan_val)546{547struct gallivm_state *gallivm = bld_base->base.gallivm;548struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;549LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;550unsigned swizzle = chan;551if (bit_size == 64) {552swizzle *= 2;553swizzle += comp;554if (swizzle >= 4) {555swizzle -= 4;556location++;557}558} else559swizzle += comp;560LLVMValueRef attrib_index_val;561LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);562563if (indir_index) {564if (is_compact) {565swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));566attrib_index_val = lp_build_const_int32(gallivm, const_index + location);567} else568attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));569} else570attrib_index_val = lp_build_const_int32(gallivm, const_index + location);571if (bit_size == 64) {572LLVMValueRef split_vals[2];573LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);574emit_store_64bit_split(bld_base, chan_val, split_vals);575bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,576indir_vertex_index ? true : false,577indir_vertex_index,578indir_index ? true : false,579attrib_index_val,580false, swizzle_index_val,581split_vals[0], mask_vec(bld_base));582bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,583indir_vertex_index ? true : false,584indir_vertex_index,585indir_index ? true : false,586attrib_index_val,587false, swizzle_index_val2,588split_vals[1], mask_vec(bld_base));589} else {590chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");591bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,592indir_vertex_index ? true : false,593indir_vertex_index,594indir_index && !is_compact ? true : false,595attrib_index_val,596indir_index && is_compact ? true : false,597swizzle_index_val,598chan_val, mask_vec(bld_base));599}600}601602static void emit_store_var(struct lp_build_nir_context *bld_base,603nir_variable_mode deref_mode,604unsigned num_components,605unsigned bit_size,606nir_variable *var,607unsigned writemask,608LLVMValueRef indir_vertex_index,609unsigned const_index,610LLVMValueRef indir_index,611LLVMValueRef dst)612{613struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;614LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;615switch (deref_mode) {616case nir_var_shader_out: {617unsigned location = var->data.driver_location;618unsigned comp = var->data.location_frac;619if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {620if (var->data.location == FRAG_RESULT_STENCIL)621comp = 1;622else if (var->data.location == FRAG_RESULT_DEPTH)623comp = 2;624}625626if (var->data.compact) {627location += const_index / 4;628comp += const_index % 4;629const_index = 0;630}631632for (unsigned chan = 0; chan < num_components; chan++) {633if (writemask & (1u << chan)) {634LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");635if (bld->tcs_iface) {636emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);637} else638emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);639}640}641break;642}643default:644break;645}646}647648static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,649struct lp_build_context *reg_bld,650const nir_reg_src *reg,651LLVMValueRef indir_src,652LLVMValueRef reg_storage)653{654struct gallivm_state *gallivm = bld_base->base.gallivm;655LLVMBuilderRef builder = gallivm->builder;656int nc = reg->reg->num_components;657LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };658struct lp_build_context *uint_bld = &bld_base->uint_bld;659if (reg->reg->num_array_elems) {660LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);661if (reg->indirect) {662LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);663indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");664indirect_val = lp_build_min(uint_bld, indirect_val, max_index);665}666reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");667for (unsigned i = 0; i < nc; i++) {668LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);669vals[i] = build_gather(bld_base, reg_bld, reg_storage, indirect_offset, NULL, NULL);670}671} else {672for (unsigned i = 0; i < nc; i++) {673LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,674lp_build_const_int32(gallivm, i));675vals[i] = LLVMBuildLoad(builder, this_storage, "");676}677}678return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);679}680681static void emit_store_reg(struct lp_build_nir_context *bld_base,682struct lp_build_context *reg_bld,683const nir_reg_dest *reg,684unsigned writemask,685LLVMValueRef indir_src,686LLVMValueRef reg_storage,687LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])688{689struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;690struct gallivm_state *gallivm = bld_base->base.gallivm;691LLVMBuilderRef builder = gallivm->builder;692struct lp_build_context *uint_bld = &bld_base->uint_bld;693int nc = reg->reg->num_components;694if (reg->reg->num_array_elems > 0) {695LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);696if (reg->indirect) {697LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);698indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");699indirect_val = lp_build_min(uint_bld, indirect_val, max_index);700}701reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");702for (unsigned i = 0; i < nc; i++) {703if (!(writemask & (1 << i)))704continue;705LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);706dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");707emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);708}709return;710}711712for (unsigned i = 0; i < nc; i++) {713LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,714lp_build_const_int32(gallivm, i));715dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");716lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], this_storage);717}718}719720static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,721unsigned nc,722unsigned bit_size,723unsigned offset_bit_size,724bool offset_is_uniform,725LLVMValueRef offset,726LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])727{728struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;729struct gallivm_state *gallivm = bld_base->base.gallivm;730LLVMBuilderRef builder = gallivm->builder;731struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);732LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;733unsigned size_shift = bit_size_to_shift_size(bit_size);734struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);735if (size_shift)736offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));737738LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);739kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");740741if (offset_is_uniform) {742offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");743744for (unsigned c = 0; c < nc; c++) {745LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");746747LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset);748result[c] = lp_build_broadcast_scalar(bld_broad, scalar);749}750}751}752753static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)754{755LLVMBuilderRef builder = gallivm->builder;756switch (bit_size) {757case 8:758addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");759break;760case 16:761addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");762break;763case 32:764default:765addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");766break;767case 64:768addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");769break;770}771return addr_ptr;772}773774static void emit_load_global(struct lp_build_nir_context *bld_base,775unsigned nc,776unsigned bit_size,777unsigned addr_bit_size,778LLVMValueRef addr,779LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])780{781struct gallivm_state *gallivm = bld_base->base.gallivm;782LLVMBuilderRef builder = gallivm->builder;783struct lp_build_context *uint_bld = &bld_base->uint_bld;784struct lp_build_context *res_bld;785786res_bld = get_int_bld(bld_base, true, bit_size);787788for (unsigned c = 0; c < nc; c++) {789LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");790LLVMValueRef exec_mask = mask_vec(bld_base);791struct lp_build_loop_state loop_state;792lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));793794struct lp_build_if_state ifthen;795LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");796cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");797lp_build_if(&ifthen, gallivm, cond);798799LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,800loop_state.counter, "");801addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);802803LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));804805LLVMValueRef temp_res;806temp_res = LLVMBuildLoad(builder, result, "");807temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, "");808LLVMBuildStore(builder, temp_res, result);809lp_build_endif(&ifthen);810lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),811NULL, LLVMIntUGE);812outval[c] = LLVMBuildLoad(builder, result, "");813}814}815816static void emit_store_global(struct lp_build_nir_context *bld_base,817unsigned writemask,818unsigned nc, unsigned bit_size,819unsigned addr_bit_size,820LLVMValueRef addr,821LLVMValueRef dst)822{823struct gallivm_state *gallivm = bld_base->base.gallivm;824LLVMBuilderRef builder = gallivm->builder;825struct lp_build_context *uint_bld = &bld_base->uint_bld;826827for (unsigned c = 0; c < nc; c++) {828if (!(writemask & (1u << c)))829continue;830LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");831832LLVMValueRef exec_mask = mask_vec(bld_base);833struct lp_build_loop_state loop_state;834lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));835LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,836loop_state.counter, "");837838LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,839loop_state.counter, "");840addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);841switch (bit_size) {842case 8:843value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt8TypeInContext(gallivm->context), "");844break;845case 16:846value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt16TypeInContext(gallivm->context), "");847break;848case 32:849value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), "");850break;851case 64:852value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), "");853break;854default:855break;856}857struct lp_build_if_state ifthen;858859LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");860cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");861lp_build_if(&ifthen, gallivm, cond);862lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr);863lp_build_endif(&ifthen);864lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),865NULL, LLVMIntUGE);866}867}868869static void emit_atomic_global(struct lp_build_nir_context *bld_base,870nir_intrinsic_op nir_op,871unsigned addr_bit_size,872unsigned val_bit_size,873LLVMValueRef addr,874LLVMValueRef val, LLVMValueRef val2,875LLVMValueRef *result)876{877struct gallivm_state *gallivm = bld_base->base.gallivm;878LLVMBuilderRef builder = gallivm->builder;879struct lp_build_context *uint_bld = &bld_base->uint_bld;880struct lp_build_context *atom_bld = get_int_bld(bld_base, true, val_bit_size);881LLVMValueRef atom_res = lp_build_alloca(gallivm,882LLVMTypeOf(val), "");883LLVMValueRef exec_mask = mask_vec(bld_base);884struct lp_build_loop_state loop_state;885lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));886887LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,888loop_state.counter, "");889890LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,891loop_state.counter, "");892addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);893struct lp_build_if_state ifthen;894LLVMValueRef cond, temp_res;895LLVMValueRef scalar;896cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");897cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");898lp_build_if(&ifthen, gallivm, cond);899900addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");901if (nir_op == nir_intrinsic_global_atomic_comp_swap) {902LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,903loop_state.counter, "");904cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");905scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,906cas_src_ptr,907LLVMAtomicOrderingSequentiallyConsistent,908LLVMAtomicOrderingSequentiallyConsistent,909false);910scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");911} else {912LLVMAtomicRMWBinOp op;913switch (nir_op) {914case nir_intrinsic_global_atomic_add:915op = LLVMAtomicRMWBinOpAdd;916break;917case nir_intrinsic_global_atomic_exchange:918919op = LLVMAtomicRMWBinOpXchg;920break;921case nir_intrinsic_global_atomic_and:922op = LLVMAtomicRMWBinOpAnd;923break;924case nir_intrinsic_global_atomic_or:925op = LLVMAtomicRMWBinOpOr;926break;927case nir_intrinsic_global_atomic_xor:928op = LLVMAtomicRMWBinOpXor;929break;930case nir_intrinsic_global_atomic_umin:931op = LLVMAtomicRMWBinOpUMin;932break;933case nir_intrinsic_global_atomic_umax:934op = LLVMAtomicRMWBinOpUMax;935break;936case nir_intrinsic_global_atomic_imin:937op = LLVMAtomicRMWBinOpMin;938break;939case nir_intrinsic_global_atomic_imax:940op = LLVMAtomicRMWBinOpMax;941break;942default:943unreachable("unknown atomic op");944}945946scalar = LLVMBuildAtomicRMW(builder, op,947addr_ptr, value_ptr,948LLVMAtomicOrderingSequentiallyConsistent,949false);950}951temp_res = LLVMBuildLoad(builder, atom_res, "");952temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");953LLVMBuildStore(builder, temp_res, atom_res);954lp_build_else(&ifthen);955temp_res = LLVMBuildLoad(builder, atom_res, "");956bool is_float = LLVMTypeOf(val) == bld_base->base.vec_type;957LLVMValueRef zero_val;958if (is_float) {959if (val_bit_size == 64)960zero_val = lp_build_const_double(gallivm, 0);961else962zero_val = lp_build_const_float(gallivm, 0);963} else {964if (val_bit_size == 64)965zero_val = lp_build_const_int64(gallivm, 0);966else967zero_val = lp_build_const_int32(gallivm, 0);968}969970temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");971LLVMBuildStore(builder, temp_res, atom_res);972lp_build_endif(&ifthen);973lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),974NULL, LLVMIntUGE);975*result = LLVMBuildLoad(builder, atom_res, "");976}977978static void emit_load_ubo(struct lp_build_nir_context *bld_base,979unsigned nc,980unsigned bit_size,981bool offset_is_uniform,982LLVMValueRef index,983LLVMValueRef offset,984LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])985{986struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;987struct gallivm_state *gallivm = bld_base->base.gallivm;988LLVMBuilderRef builder = gallivm->builder;989struct lp_build_context *uint_bld = &bld_base->uint_bld;990struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);991LLVMValueRef consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, index);992unsigned size_shift = bit_size_to_shift_size(bit_size);993if (size_shift)994offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));995996LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);997consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");998999if (offset_is_uniform) {1000offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");10011002for (unsigned c = 0; c < nc; c++) {1003LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");10041005LLVMValueRef scalar = lp_build_pointer_get(builder, consts_ptr, this_offset);1006result[c] = lp_build_broadcast_scalar(bld_broad, scalar);1007}1008} else {1009LLVMValueRef overflow_mask;1010LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index);10111012num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);1013if (bit_size == 64)1014num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);1015else if (bit_size == 16)1016num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);1017else if (bit_size == 8)1018num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);10191020for (unsigned c = 0; c < nc; c++) {1021LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));1022overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,1023this_offset, num_consts);1024result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL);1025}1026}1027}102810291030static void emit_load_mem(struct lp_build_nir_context *bld_base,1031unsigned nc,1032unsigned bit_size,1033LLVMValueRef index,1034LLVMValueRef offset,1035LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1036{1037struct gallivm_state *gallivm = bld_base->base.gallivm;1038struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1039LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;1040LLVMValueRef ssbo_ptr = NULL;1041struct lp_build_context *uint_bld = &bld_base->uint_bld;1042LLVMValueRef ssbo_limit = NULL;1043struct lp_build_context *load_bld;1044uint32_t shift_val = bit_size_to_shift_size(bit_size);10451046load_bld = get_int_bld(bld_base, true, bit_size);10471048if (index) {1049LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));1050ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");1051ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);10521053ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));1054} else1055ssbo_ptr = bld->shared_ptr;10561057offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");1058for (unsigned c = 0; c < nc; c++) {1059LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));1060LLVMValueRef exec_mask = mask_vec(bld_base);10611062if (ssbo_limit) {1063LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);1064exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");1065}10661067LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, "");1068struct lp_build_loop_state loop_state;1069lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));10701071struct lp_build_if_state ifthen;1072LLVMValueRef cond, temp_res;10731074loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,1075loop_state.counter, "");10761077cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");1078cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");10791080lp_build_if(&ifthen, gallivm, cond);1081LLVMValueRef scalar;1082if (bit_size != 32) {1083LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(load_bld->elem_type, 0), "");1084scalar = lp_build_pointer_get(builder, ssbo_ptr2, loop_index);1085} else1086scalar = lp_build_pointer_get(builder, ssbo_ptr, loop_index);10871088temp_res = LLVMBuildLoad(builder, result, "");1089temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");1090LLVMBuildStore(builder, temp_res, result);1091lp_build_else(&ifthen);1092temp_res = LLVMBuildLoad(builder, result, "");1093LLVMValueRef zero;1094if (bit_size == 64)1095zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);1096else if (bit_size == 16)1097zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);1098else if (bit_size == 8)1099zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);1100else1101zero = lp_build_const_int32(gallivm, 0);1102temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");1103LLVMBuildStore(builder, temp_res, result);1104lp_build_endif(&ifthen);1105lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),1106NULL, LLVMIntUGE);1107outval[c] = LLVMBuildLoad(gallivm->builder, result, "");1108}1109}11101111static void emit_store_mem(struct lp_build_nir_context *bld_base,1112unsigned writemask,1113unsigned nc,1114unsigned bit_size,1115LLVMValueRef index,1116LLVMValueRef offset,1117LLVMValueRef dst)1118{1119struct gallivm_state *gallivm = bld_base->base.gallivm;1120struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1121LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;1122LLVMValueRef ssbo_ptr;1123struct lp_build_context *uint_bld = &bld_base->uint_bld;1124LLVMValueRef ssbo_limit = NULL;1125struct lp_build_context *store_bld;1126uint32_t shift_val = bit_size_to_shift_size(bit_size);1127store_bld = get_int_bld(bld_base, true, bit_size);11281129if (index) {1130LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));1131ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");1132ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);1133ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));1134} else1135ssbo_ptr = bld->shared_ptr;11361137offset = lp_build_shr_imm(uint_bld, offset, shift_val);1138for (unsigned c = 0; c < nc; c++) {1139if (!(writemask & (1u << c)))1140continue;1141LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));1142LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");11431144LLVMValueRef exec_mask = mask_vec(bld_base);1145if (ssbo_limit) {1146LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);1147exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");1148}11491150struct lp_build_loop_state loop_state;1151lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));1152LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,1153loop_state.counter, "");1154value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");1155struct lp_build_if_state ifthen;1156LLVMValueRef cond;11571158loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,1159loop_state.counter, "");1160cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");1161cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");1162lp_build_if(&ifthen, gallivm, cond);1163if (bit_size != 32) {1164LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(store_bld->elem_type, 0), "");1165lp_build_pointer_set(builder, ssbo_ptr2, loop_index, value_ptr);1166} else1167lp_build_pointer_set(builder, ssbo_ptr, loop_index, value_ptr);1168lp_build_endif(&ifthen);1169lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),1170NULL, LLVMIntUGE);1171}1172}11731174static void emit_atomic_mem(struct lp_build_nir_context *bld_base,1175nir_intrinsic_op nir_op,1176uint32_t bit_size,1177LLVMValueRef index, LLVMValueRef offset,1178LLVMValueRef val, LLVMValueRef val2,1179LLVMValueRef *result)1180{1181struct gallivm_state *gallivm = bld_base->base.gallivm;1182struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1183LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;1184LLVMValueRef ssbo_ptr;1185struct lp_build_context *uint_bld = &bld_base->uint_bld;1186LLVMValueRef ssbo_limit = NULL;1187uint32_t shift_val = bit_size_to_shift_size(bit_size);1188struct lp_build_context *atomic_bld = get_int_bld(bld_base, true, bit_size);1189if (index) {1190LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));1191ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, 2), "");1192ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);1193ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));1194} else1195ssbo_ptr = bld->shared_ptr;11961197offset = lp_build_shr_imm(uint_bld, offset, shift_val);1198LLVMValueRef atom_res = lp_build_alloca(gallivm,1199atomic_bld->vec_type, "");12001201LLVMValueRef exec_mask = mask_vec(bld_base);1202if (ssbo_limit) {1203LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, offset, ssbo_limit);1204exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");1205}12061207struct lp_build_loop_state loop_state;1208lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));12091210LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,1211loop_state.counter, "");1212value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");12131214offset = LLVMBuildExtractElement(gallivm->builder, offset,1215loop_state.counter, "");12161217LLVMValueRef scalar_ptr;1218if (bit_size != 32) {1219LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(atomic_bld->elem_type, 0), "");1220scalar_ptr = LLVMBuildGEP(builder, ssbo_ptr2, &offset, 1, "");1221} else1222scalar_ptr = LLVMBuildGEP(builder, ssbo_ptr, &offset, 1, "");12231224struct lp_build_if_state ifthen;1225LLVMValueRef cond, temp_res;1226LLVMValueRef scalar;1227cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");1228cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");1229lp_build_if(&ifthen, gallivm, cond);12301231if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) {1232LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,1233loop_state.counter, "");1234cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");1235scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,1236cas_src_ptr,1237LLVMAtomicOrderingSequentiallyConsistent,1238LLVMAtomicOrderingSequentiallyConsistent,1239false);1240scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");1241} else {1242LLVMAtomicRMWBinOp op;12431244switch (nir_op) {1245case nir_intrinsic_shared_atomic_add:1246case nir_intrinsic_ssbo_atomic_add:1247op = LLVMAtomicRMWBinOpAdd;1248break;1249case nir_intrinsic_shared_atomic_exchange:1250case nir_intrinsic_ssbo_atomic_exchange:1251op = LLVMAtomicRMWBinOpXchg;1252break;1253case nir_intrinsic_shared_atomic_and:1254case nir_intrinsic_ssbo_atomic_and:1255op = LLVMAtomicRMWBinOpAnd;1256break;1257case nir_intrinsic_shared_atomic_or:1258case nir_intrinsic_ssbo_atomic_or:1259op = LLVMAtomicRMWBinOpOr;1260break;1261case nir_intrinsic_shared_atomic_xor:1262case nir_intrinsic_ssbo_atomic_xor:1263op = LLVMAtomicRMWBinOpXor;1264break;1265case nir_intrinsic_shared_atomic_umin:1266case nir_intrinsic_ssbo_atomic_umin:1267op = LLVMAtomicRMWBinOpUMin;1268break;1269case nir_intrinsic_shared_atomic_umax:1270case nir_intrinsic_ssbo_atomic_umax:1271op = LLVMAtomicRMWBinOpUMax;1272break;1273case nir_intrinsic_ssbo_atomic_imin:1274case nir_intrinsic_shared_atomic_imin:1275op = LLVMAtomicRMWBinOpMin;1276break;1277case nir_intrinsic_ssbo_atomic_imax:1278case nir_intrinsic_shared_atomic_imax:1279op = LLVMAtomicRMWBinOpMax;1280break;1281default:1282unreachable("unknown atomic op");1283}1284scalar = LLVMBuildAtomicRMW(builder, op,1285scalar_ptr, value_ptr,1286LLVMAtomicOrderingSequentiallyConsistent,1287false);1288}1289temp_res = LLVMBuildLoad(builder, atom_res, "");1290temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");1291LLVMBuildStore(builder, temp_res, atom_res);1292lp_build_else(&ifthen);1293temp_res = LLVMBuildLoad(builder, atom_res, "");1294LLVMValueRef zero = bit_size == 64 ? lp_build_const_int64(gallivm, 0) : lp_build_const_int32(gallivm, 0);1295temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");1296LLVMBuildStore(builder, temp_res, atom_res);1297lp_build_endif(&ifthen);12981299lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),1300NULL, LLVMIntUGE);1301*result = LLVMBuildLoad(builder, atom_res, "");1302}13031304static void emit_barrier(struct lp_build_nir_context *bld_base)1305{1306struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1307struct gallivm_state * gallivm = bld_base->base.gallivm;13081309LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");13101311lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);1312LLVMPositionBuilderAtEnd(gallivm->builder, resume);1313}13141315static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,1316LLVMValueRef index)1317{1318struct gallivm_state *gallivm = bld_base->base.gallivm;1319struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1320LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;1321struct lp_build_context *bld_broad = &bld_base->uint_bld;1322LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,1323LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));1324return lp_build_broadcast_scalar(bld_broad, size_ptr);1325}13261327static void emit_image_op(struct lp_build_nir_context *bld_base,1328struct lp_img_params *params)1329{1330struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1331struct gallivm_state *gallivm = bld_base->base.gallivm;13321333params->type = bld_base->base.type;1334params->context_ptr = bld->context_ptr;1335params->thread_data_ptr = bld->thread_data_ptr;1336params->exec_mask = mask_vec(bld_base);13371338if (params->image_index_offset)1339params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,1340lp_build_const_int32(gallivm, 0), "");13411342bld->image->emit_op(bld->image,1343bld->bld_base.base.gallivm,1344params);13451346}13471348static void emit_image_size(struct lp_build_nir_context *bld_base,1349struct lp_sampler_size_query_params *params)1350{1351struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1352struct gallivm_state *gallivm = bld_base->base.gallivm;13531354params->int_type = bld_base->int_bld.type;1355params->context_ptr = bld->context_ptr;13561357if (params->texture_unit_offset)1358params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,1359lp_build_const_int32(gallivm, 0), "");1360bld->image->emit_size_query(bld->image,1361bld->bld_base.base.gallivm,1362params);13631364}13651366static void init_var_slots(struct lp_build_nir_context *bld_base,1367nir_variable *var, unsigned sc)1368{1369struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1370unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;13711372if (!bld->outputs)1373return;1374for (unsigned comp = sc; comp < slots + sc; comp++) {1375unsigned this_loc = var->data.driver_location + (comp / 4);1376unsigned this_chan = comp % 4;13771378if (!bld->outputs[this_loc][this_chan])1379bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,1380bld_base->base.vec_type, "output");1381}1382}13831384static void emit_var_decl(struct lp_build_nir_context *bld_base,1385nir_variable *var)1386{1387unsigned sc = var->data.location_frac;1388switch (var->data.mode) {1389case nir_var_shader_out: {1390if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {1391if (var->data.location == FRAG_RESULT_STENCIL)1392sc = 1;1393else if (var->data.location == FRAG_RESULT_DEPTH)1394sc = 2;1395}1396init_var_slots(bld_base, var, sc);1397break;1398}1399default:1400break;1401}1402}14031404static void emit_tex(struct lp_build_nir_context *bld_base,1405struct lp_sampler_params *params)1406{1407struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1408struct gallivm_state *gallivm = bld_base->base.gallivm;14091410params->type = bld_base->base.type;1411params->context_ptr = bld->context_ptr;1412params->thread_data_ptr = bld->thread_data_ptr;14131414if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {1415/* this is horrible but this can be dynamic */1416LLVMValueRef coords[5];1417LLVMValueRef *orig_texel_ptr;1418struct lp_build_context *uint_bld = &bld_base->uint_bld;1419LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),1420LLVMGetUndef(bld_base->base.vec_type),1421LLVMGetUndef(bld_base->base.vec_type),1422LLVMGetUndef(bld_base->base.vec_type) };1423LLVMValueRef texel[4], orig_offset, orig_lod;1424unsigned i;1425orig_texel_ptr = params->texel;1426orig_lod = params->lod;1427for (i = 0; i < 5; i++) {1428coords[i] = params->coords[i];1429}1430orig_offset = params->texture_index_offset;14311432for (unsigned v = 0; v < uint_bld->type.length; v++) {1433LLVMValueRef idx = lp_build_const_int32(gallivm, v);1434LLVMValueRef new_coords[5];1435for (i = 0; i < 5; i++) {1436new_coords[i] = LLVMBuildExtractElement(gallivm->builder,1437coords[i], idx, "");1438}1439params->coords = new_coords;1440params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,1441orig_offset,1442idx, "");1443params->type = lp_elem_type(bld_base->base.type);14441445if (orig_lod)1446params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");1447params->texel = texel;1448bld->sampler->emit_tex_sample(bld->sampler,1449gallivm,1450params);14511452for (i = 0; i < 4; i++) {1453result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");1454}1455}1456for (i = 0; i < 4; i++) {1457orig_texel_ptr[i] = result[i];1458}1459return;1460}14611462if (params->texture_index_offset)1463params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,1464params->texture_index_offset,1465lp_build_const_int32(bld_base->base.gallivm, 0), "");14661467params->type = bld_base->base.type;1468bld->sampler->emit_tex_sample(bld->sampler,1469bld->bld_base.base.gallivm,1470params);1471}14721473static void emit_tex_size(struct lp_build_nir_context *bld_base,1474struct lp_sampler_size_query_params *params)1475{1476struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;14771478params->int_type = bld_base->int_bld.type;1479params->context_ptr = bld->context_ptr;14801481if (params->texture_unit_offset)1482params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,1483params->texture_unit_offset,1484lp_build_const_int32(bld_base->base.gallivm, 0), "");1485bld->sampler->emit_size_query(bld->sampler,1486bld->bld_base.base.gallivm,1487params);1488}14891490static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,1491nir_intrinsic_instr *instr,1492LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1493{1494struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1495struct gallivm_state *gallivm = bld_base->base.gallivm;1496struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->dest.ssa.bit_size);1497switch (instr->intrinsic) {1498case nir_intrinsic_load_instance_id:1499result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);1500break;1501case nir_intrinsic_load_base_instance:1502result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);1503break;1504case nir_intrinsic_load_base_vertex:1505result[0] = bld->system_values.basevertex;1506break;1507case nir_intrinsic_load_first_vertex:1508result[0] = bld->system_values.firstvertex;1509break;1510case nir_intrinsic_load_vertex_id:1511result[0] = bld->system_values.vertex_id;1512break;1513case nir_intrinsic_load_primitive_id:1514result[0] = bld->system_values.prim_id;1515break;1516case nir_intrinsic_load_workgroup_id: {1517LLVMValueRef tmp[3];1518for (unsigned i = 0; i < 3; i++) {1519tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), "");1520if (instr->dest.ssa.bit_size == 64)1521tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");1522result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);1523}1524break;1525}1526case nir_intrinsic_load_local_invocation_id:1527for (unsigned i = 0; i < 3; i++)1528result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, "");1529break;1530case nir_intrinsic_load_num_workgroups: {1531LLVMValueRef tmp[3];1532for (unsigned i = 0; i < 3; i++) {1533tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), "");1534if (instr->dest.ssa.bit_size == 64)1535tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");1536result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);1537}1538break;1539}1540case nir_intrinsic_load_invocation_id:1541if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)1542result[0] = bld->system_values.invocation_id;1543else1544result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);1545break;1546case nir_intrinsic_load_front_face:1547result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);1548break;1549case nir_intrinsic_load_draw_id:1550result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);1551break;1552default:1553break;1554case nir_intrinsic_load_workgroup_size:1555for (unsigned i = 0; i < 3; i++)1556result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), ""));1557break;1558case nir_intrinsic_load_work_dim:1559result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);1560break;1561case nir_intrinsic_load_tess_coord:1562for (unsigned i = 0; i < 3; i++) {1563result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");1564}1565break;1566case nir_intrinsic_load_tess_level_outer:1567for (unsigned i = 0; i < 4; i++)1568result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));1569break;1570case nir_intrinsic_load_tess_level_inner:1571for (unsigned i = 0; i < 2; i++)1572result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));1573break;1574case nir_intrinsic_load_patch_vertices_in:1575result[0] = bld->system_values.vertices_in;1576break;1577case nir_intrinsic_load_sample_id:1578result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);1579break;1580case nir_intrinsic_load_sample_pos:1581for (unsigned i = 0; i < 2; i++) {1582LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");1583idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");1584LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);1585result[i] = lp_build_broadcast_scalar(&bld_base->base, val);1586}1587break;1588case nir_intrinsic_load_sample_mask_in:1589result[0] = bld->system_values.sample_mask_in;1590break;1591case nir_intrinsic_load_view_index:1592result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);1593break;1594case nir_intrinsic_load_subgroup_invocation: {1595LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];1596for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)1597elems[i] = lp_build_const_int32(gallivm, i);1598result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);1599break;1600}1601case nir_intrinsic_load_subgroup_id:1602result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);1603break;1604case nir_intrinsic_load_num_subgroups:1605result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);1606break;1607}1608}16091610static void emit_helper_invocation(struct lp_build_nir_context *bld_base,1611LLVMValueRef *dst)1612{1613struct gallivm_state *gallivm = bld_base->base.gallivm;1614struct lp_build_context *uint_bld = &bld_base->uint_bld;1615*dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));1616}16171618static void bgnloop(struct lp_build_nir_context *bld_base)1619{1620struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1621lp_exec_bgnloop(&bld->exec_mask, true);1622}16231624static void endloop(struct lp_build_nir_context *bld_base)1625{1626struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1627lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);1628}16291630static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)1631{1632LLVMBuilderRef builder = bld_base->base.gallivm->builder;1633struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1634lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));1635}16361637static void else_stmt(struct lp_build_nir_context *bld_base)1638{1639struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1640lp_exec_mask_cond_invert(&bld->exec_mask);1641}16421643static void endif_stmt(struct lp_build_nir_context *bld_base)1644{1645struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1646lp_exec_mask_cond_pop(&bld->exec_mask);1647}16481649static void break_stmt(struct lp_build_nir_context *bld_base)1650{1651struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;16521653lp_exec_break(&bld->exec_mask, NULL, false);1654}16551656static void continue_stmt(struct lp_build_nir_context *bld_base)1657{1658struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1659lp_exec_continue(&bld->exec_mask);1660}16611662static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)1663{1664struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1665LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;1666LLVMValueRef mask;16671668if (!cond) {1669if (bld->exec_mask.has_mask) {1670mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");1671} else {1672mask = LLVMConstNull(bld->bld_base.base.int_vec_type);1673}1674} else {1675mask = LLVMBuildNot(builder, cond, "");1676if (bld->exec_mask.has_mask) {1677LLVMValueRef invmask;1678invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");1679mask = LLVMBuildOr(builder, mask, invmask, "");1680}1681}1682lp_build_mask_update(bld->mask, mask);1683}16841685static void1686increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,1687LLVMValueRef ptr,1688LLVMValueRef mask)1689{1690LLVMBuilderRef builder = bld_base->base.gallivm->builder;1691LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");16921693current_vec = LLVMBuildSub(builder, current_vec, mask, "");16941695LLVMBuildStore(builder, current_vec, ptr);1696}16971698static void1699clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,1700LLVMValueRef ptr,1701LLVMValueRef mask)1702{1703LLVMBuilderRef builder = bld_base->base.gallivm->builder;1704LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");17051706current_vec = lp_build_select(&bld_base->uint_bld,1707mask,1708bld_base->uint_bld.zero,1709current_vec);17101711LLVMBuildStore(builder, current_vec, ptr);1712}17131714static LLVMValueRef1715clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,1716LLVMValueRef current_mask_vec,1717LLVMValueRef total_emitted_vertices_vec)1718{1719LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;1720struct lp_build_context *int_bld = &bld->bld_base.int_bld;1721LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,1722total_emitted_vertices_vec,1723bld->max_output_vertices_vec);17241725return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");1726}17271728static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)1729{1730struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1731LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;17321733if (stream_id >= bld->gs_vertex_streams)1734return;1735assert(bld->gs_iface->emit_vertex);1736LLVMValueRef total_emitted_vertices_vec =1737LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");1738LLVMValueRef mask = mask_vec(bld_base);1739mask = clamp_mask_to_max_output_vertices(bld, mask,1740total_emitted_vertices_vec);1741bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,1742bld->outputs,1743total_emitted_vertices_vec,1744mask,1745lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));17461747increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],1748mask);1749increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],1750mask);1751}17521753static void1754end_primitive_masked(struct lp_build_nir_context * bld_base,1755LLVMValueRef mask, uint32_t stream_id)1756{1757struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;1758LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;17591760if (stream_id >= bld->gs_vertex_streams)1761return;1762struct lp_build_context *uint_bld = &bld_base->uint_bld;1763LLVMValueRef emitted_vertices_vec =1764LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], "");1765LLVMValueRef emitted_prims_vec =1766LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr[stream_id], "");1767LLVMValueRef total_emitted_vertices_vec =1768LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");17691770LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,1771PIPE_FUNC_NOTEQUAL,1772emitted_vertices_vec,1773uint_bld->zero);1774mask = LLVMBuildAnd(builder, mask, emitted_mask, "");1775bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,1776total_emitted_vertices_vec,1777emitted_vertices_vec, emitted_prims_vec, mask, stream_id);1778increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],1779mask);1780clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],1781mask);1782}17831784static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)1785{1786ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;17871788assert(bld->gs_iface->end_primitive);17891790LLVMValueRef mask = mask_vec(bld_base);1791end_primitive_masked(bld_base, mask, stream_id);1792}17931794static void1795emit_prologue(struct lp_build_nir_soa_context *bld)1796{1797struct gallivm_state * gallivm = bld->bld_base.base.gallivm;1798if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {1799uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);1800unsigned index, chan;1801LLVMTypeRef vec_type = bld->bld_base.base.vec_type;1802LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);1803bld->inputs_array = lp_build_array_alloca(gallivm,1804vec_type, array_size,1805"input_array");18061807for (index = 0; index < num_inputs; ++index) {1808for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {1809LLVMValueRef lindex =1810lp_build_const_int32(gallivm, index * 4 + chan);1811LLVMValueRef input_ptr =1812LLVMBuildGEP(gallivm->builder, bld->inputs_array,1813&lindex, 1, "");1814LLVMValueRef value = bld->inputs[index][chan];1815if (value)1816LLVMBuildStore(gallivm->builder, value, input_ptr);1817}1818}1819}1820}18211822static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,1823nir_intrinsic_instr *instr, LLVMValueRef result[4])1824{1825struct gallivm_state * gallivm = bld_base->base.gallivm;1826LLVMBuilderRef builder = gallivm->builder;1827uint32_t bit_size = nir_src_bit_size(instr->src[0]);1828LLVMValueRef exec_mask = mask_vec(bld_base);1829struct lp_build_loop_state loop_state;1830LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");18311832LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");1833LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");1834LLVMValueRef init_val = NULL;1835if (instr->intrinsic == nir_intrinsic_vote_ieq ||1836instr->intrinsic == nir_intrinsic_vote_feq) {1837/* for equal we unfortunately have to loop and find the first valid one. */1838lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));1839LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");18401841struct lp_build_if_state ifthen;1842lp_build_if(&ifthen, gallivm, if_cond);1843LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,1844loop_state.counter, "");1845LLVMBuildStore(builder, value_ptr, eq_store);1846LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);1847lp_build_endif(&ifthen);1848lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),1849NULL, LLVMIntUGE);1850init_val = LLVMBuildLoad(builder, eq_store, "");1851} else {1852LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);1853}18541855LLVMValueRef res;1856lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));1857LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,1858loop_state.counter, "");1859struct lp_build_if_state ifthen;1860LLVMValueRef if_cond;1861if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");18621863lp_build_if(&ifthen, gallivm, if_cond);1864res = LLVMBuildLoad(builder, res_store, "");18651866if (instr->intrinsic == nir_intrinsic_vote_feq) {1867struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);1868LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,1869LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),1870LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");1871tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");1872res = LLVMBuildAnd(builder, res, tmp, "");1873} else if (instr->intrinsic == nir_intrinsic_vote_ieq) {1874LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");1875tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");1876res = LLVMBuildAnd(builder, res, tmp, "");1877} else if (instr->intrinsic == nir_intrinsic_vote_any)1878res = LLVMBuildOr(builder, res, value_ptr, "");1879else1880res = LLVMBuildAnd(builder, res, value_ptr, "");1881LLVMBuildStore(builder, res, res_store);1882lp_build_endif(&ifthen);1883lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),1884NULL, LLVMIntUGE);1885result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));1886}18871888static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])1889{1890struct gallivm_state * gallivm = bld_base->base.gallivm;1891LLVMBuilderRef builder = gallivm->builder;1892LLVMValueRef exec_mask = mask_vec(bld_base);1893struct lp_build_loop_state loop_state;1894src = LLVMBuildAnd(builder, src, exec_mask, "");1895LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");1896LLVMValueRef res;1897lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));1898LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,1899loop_state.counter, "");1900res = LLVMBuildLoad(builder, res_store, "");1901res = LLVMBuildOr(builder,1902res,1903LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");1904LLVMBuildStore(builder, res, res_store);19051906lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),1907NULL, LLVMIntUGE);1908result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));1909}19101911static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])1912{1913struct gallivm_state *gallivm = bld_base->base.gallivm;1914LLVMBuilderRef builder = gallivm->builder;1915LLVMValueRef exec_mask = mask_vec(bld_base);1916struct lp_build_loop_state loop_state;19171918LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");1919LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");1920lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));1921LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,1922loop_state.counter, "");1923LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,1924LLVMIntEQ,1925value_ptr,1926lp_build_const_int32(gallivm, -1), "");1927LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,1928LLVMIntEQ,1929LLVMBuildLoad(builder, found_store, ""),1930lp_build_const_int32(gallivm, 0), "");19311932cond = LLVMBuildAnd(builder, cond, cond2, "");1933struct lp_build_if_state ifthen;1934lp_build_if(&ifthen, gallivm, cond);1935LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);1936LLVMBuildStore(builder, loop_state.counter, idx_store);1937lp_build_endif(&ifthen);1938lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),1939NULL, LLVMIntUGE);19401941result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,1942lp_build_const_int32(gallivm, -1),1943LLVMBuildLoad(builder, idx_store, ""),1944"");1945}19461947static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,1948nir_intrinsic_instr *instr, LLVMValueRef result[4])1949{1950struct gallivm_state *gallivm = bld_base->base.gallivm;1951LLVMBuilderRef builder = gallivm->builder;1952uint32_t bit_size = nir_src_bit_size(instr->src[0]);1953/* can't use llvm reduction intrinsics because of exec_mask */1954LLVMValueRef exec_mask = mask_vec(bld_base);1955struct lp_build_loop_state loop_state;1956nir_op reduction_op = nir_intrinsic_reduction_op(instr);19571958LLVMValueRef res_store = NULL;1959LLVMValueRef scan_store;1960struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);19611962if (instr->intrinsic != nir_intrinsic_reduce)1963res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");19641965scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");19661967struct lp_build_context elem_bld;1968bool is_flt = reduction_op == nir_op_fadd ||1969reduction_op == nir_op_fmul ||1970reduction_op == nir_op_fmin ||1971reduction_op == nir_op_fmax;1972bool is_unsigned = reduction_op == nir_op_umin ||1973reduction_op == nir_op_umax;19741975struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :1976get_int_bld(bld_base, is_unsigned, bit_size);19771978lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));19791980LLVMValueRef store_val = NULL;1981/*1982* Put the identity value for the operation into the storage1983*/1984switch (reduction_op) {1985case nir_op_fmin: {1986LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :1987lp_build_const_float(gallivm, INFINITY);1988store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");1989break;1990}1991case nir_op_fmax: {1992LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :1993lp_build_const_float(gallivm, -INFINITY);1994store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");1995break;1996}1997case nir_op_fmul: {1998LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :1999lp_build_const_float(gallivm, 1.0);2000store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");2001break;2002}2003case nir_op_umin:2004store_val = lp_build_const_int32(gallivm, UINT_MAX);2005break;2006case nir_op_imin:2007store_val = lp_build_const_int32(gallivm, INT_MAX);2008break;2009case nir_op_imax:2010store_val = lp_build_const_int32(gallivm, INT_MIN);2011break;2012case nir_op_imul:2013store_val = lp_build_const_int32(gallivm, 1);2014break;2015case nir_op_iand:2016store_val = lp_build_const_int32(gallivm, 0xffffffff);2017break;2018default:2019break;2020}2021if (store_val)2022LLVMBuildStore(builder, store_val, scan_store);20232024LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");20252026lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));20272028struct lp_build_if_state ifthen;2029LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");2030lp_build_if(&ifthen, gallivm, if_cond);2031LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, "");20322033LLVMValueRef res = NULL;2034LLVMValueRef scan_val = LLVMBuildLoad(gallivm->builder, scan_store, "");2035if (instr->intrinsic != nir_intrinsic_reduce)2036res = LLVMBuildLoad(gallivm->builder, res_store, "");20372038if (instr->intrinsic == nir_intrinsic_exclusive_scan)2039res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");20402041if (is_flt) {2042scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");2043value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");2044}2045switch (reduction_op) {2046case nir_op_fadd:2047case nir_op_iadd:2048scan_val = lp_build_add(&elem_bld, value, scan_val);2049break;2050case nir_op_fmul:2051case nir_op_imul:2052scan_val = lp_build_mul(&elem_bld, value, scan_val);2053break;2054case nir_op_imin:2055case nir_op_umin:2056case nir_op_fmin:2057scan_val = lp_build_min(&elem_bld, value, scan_val);2058break;2059case nir_op_imax:2060case nir_op_umax:2061case nir_op_fmax:2062scan_val = lp_build_max(&elem_bld, value, scan_val);2063break;2064case nir_op_iand:2065scan_val = lp_build_and(&elem_bld, value, scan_val);2066break;2067case nir_op_ior:2068scan_val = lp_build_or(&elem_bld, value, scan_val);2069break;2070case nir_op_ixor:2071scan_val = lp_build_xor(&elem_bld, value, scan_val);2072break;2073default:2074assert(0);2075break;2076}2077if (is_flt)2078scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");2079LLVMBuildStore(builder, scan_val, scan_store);20802081if (instr->intrinsic == nir_intrinsic_inclusive_scan) {2082res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");2083}20842085if (instr->intrinsic != nir_intrinsic_reduce)2086LLVMBuildStore(builder, res, res_store);2087lp_build_endif(&ifthen);20882089lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),2090NULL, LLVMIntUGE);2091if (instr->intrinsic == nir_intrinsic_reduce)2092result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad(builder, scan_store, ""));2093else2094result[0] = LLVMBuildLoad(builder, res_store, "");2095}20962097static void emit_read_invocation(struct lp_build_nir_context *bld_base,2098LLVMValueRef src,2099unsigned bit_size,2100LLVMValueRef invoc,2101LLVMValueRef result[4])2102{2103struct gallivm_state *gallivm = bld_base->base.gallivm;2104LLVMBuilderRef builder = gallivm->builder;2105LLVMValueRef idx;2106struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);2107if (invoc) {2108idx = invoc;2109idx = LLVMBuildExtractElement(gallivm->builder, idx, lp_build_const_int32(gallivm, 0), "");2110} else {2111/* have to find the first active invocation */2112LLVMValueRef exec_mask = mask_vec(bld_base);2113struct lp_build_loop_state loop_state;2114LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");2115LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");2116lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length));21172118LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");2119struct lp_build_if_state ifthen;21202121lp_build_if(&ifthen, gallivm, if_cond);2122LLVMBuildStore(builder, loop_state.counter, res_store);2123lp_build_endif(&ifthen);21242125lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, -1),2126lp_build_const_int32(gallivm, -1), LLVMIntEQ);2127idx = LLVMBuildLoad(builder, res_store, "");2128}21292130LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,2131src, idx, "");2132result[0] = lp_build_broadcast_scalar(uint_bld, value);2133}21342135static void2136emit_interp_at(struct lp_build_nir_context *bld_base,2137unsigned num_components,2138nir_variable *var,2139bool centroid,2140bool sample,2141unsigned const_index,2142LLVMValueRef indir_index,2143LLVMValueRef offsets[2],2144LLVMValueRef dst[4])2145{2146struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;21472148for (unsigned i = 0; i < num_components; i++) {2149dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,2150const_index + var->data.driver_location, i + var->data.location_frac,2151centroid, sample, indir_index, offsets);2152}2153}21542155static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,2156struct lp_type type,2157unsigned scratch_size)2158{2159LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);2160LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];2161unsigned i;21622163if (type.length == 1)2164return LLVMConstInt(elem_type, 0, 0);21652166for (i = 0; i < type.length; ++i)2167elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);21682169return LLVMConstVector(elems, type.length);2170}21712172static void2173emit_load_scratch(struct lp_build_nir_context *bld_base,2174unsigned nc, unsigned bit_size,2175LLVMValueRef offset,2176LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])2177{2178struct gallivm_state * gallivm = bld_base->base.gallivm;2179LLVMBuilderRef builder = gallivm->builder;2180struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;2181struct lp_build_context *uint_bld = &bld_base->uint_bld;2182struct lp_build_context *load_bld;2183LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;2184uint32_t shift_val = bit_size_to_shift_size(bit_size);21852186load_bld = get_int_bld(bld_base, true, bit_size);21872188offset = lp_build_add(uint_bld, offset, thread_offsets);2189offset = lp_build_shr_imm(uint_bld, offset, shift_val);2190for (unsigned c = 0; c < nc; c++) {2191LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));2192LLVMValueRef exec_mask = mask_vec(bld_base);21932194LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, "");2195struct lp_build_loop_state loop_state;2196lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));21972198struct lp_build_if_state ifthen;2199LLVMValueRef cond, temp_res;22002201loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,2202loop_state.counter, "");2203cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");2204cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");22052206lp_build_if(&ifthen, gallivm, cond);2207LLVMValueRef scalar;2208LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(load_bld->elem_type, 0), "");2209scalar = lp_build_pointer_get(builder, ptr2, loop_index);22102211temp_res = LLVMBuildLoad(builder, result, "");2212temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");2213LLVMBuildStore(builder, temp_res, result);2214lp_build_else(&ifthen);2215temp_res = LLVMBuildLoad(builder, result, "");2216LLVMValueRef zero;2217if (bit_size == 64)2218zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);2219else if (bit_size == 16)2220zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);2221else if (bit_size == 8)2222zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);2223else2224zero = lp_build_const_int32(gallivm, 0);2225temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");2226LLVMBuildStore(builder, temp_res, result);2227lp_build_endif(&ifthen);2228lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),2229NULL, LLVMIntUGE);2230outval[c] = LLVMBuildLoad(gallivm->builder, result, "");2231}2232}22332234static void2235emit_store_scratch(struct lp_build_nir_context *bld_base,2236unsigned writemask, unsigned nc,2237unsigned bit_size, LLVMValueRef offset,2238LLVMValueRef dst)2239{2240struct gallivm_state * gallivm = bld_base->base.gallivm;2241LLVMBuilderRef builder = gallivm->builder;2242struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;2243struct lp_build_context *uint_bld = &bld_base->uint_bld;2244struct lp_build_context *store_bld;2245LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;2246uint32_t shift_val = bit_size_to_shift_size(bit_size);2247store_bld = get_int_bld(bld_base, true, bit_size);22482249LLVMValueRef exec_mask = mask_vec(bld_base);2250offset = lp_build_add(uint_bld, offset, thread_offsets);2251offset = lp_build_shr_imm(uint_bld, offset, shift_val);22522253for (unsigned c = 0; c < nc; c++) {2254if (!(writemask & (1u << c)))2255continue;2256LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");2257LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));22582259struct lp_build_loop_state loop_state;2260lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));22612262LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,2263loop_state.counter, "");2264value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");22652266struct lp_build_if_state ifthen;2267LLVMValueRef cond;22682269loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,2270loop_state.counter, "");22712272cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");2273cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");2274lp_build_if(&ifthen, gallivm, cond);22752276LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(store_bld->elem_type, 0), "");2277lp_build_pointer_set(builder, ptr2, loop_index, value_ptr);22782279lp_build_endif(&ifthen);2280lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),2281NULL, LLVMIntUGE);2282}2283}22842285void lp_build_nir_soa(struct gallivm_state *gallivm,2286struct nir_shader *shader,2287const struct lp_build_tgsi_params *params,2288LLVMValueRef (*outputs)[4])2289{2290struct lp_build_nir_soa_context bld;2291struct lp_type type = params->type;2292struct lp_type res_type;22932294assert(type.length <= LP_MAX_VECTOR_LENGTH);2295memset(&res_type, 0, sizeof res_type);2296res_type.width = type.width;2297res_type.length = type.length;2298res_type.sign = 1;22992300/* Setup build context */2301memset(&bld, 0, sizeof bld);2302lp_build_context_init(&bld.bld_base.base, gallivm, type);2303lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));2304lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));2305lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));2306lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));2307{2308struct lp_type dbl_type;2309dbl_type = type;2310dbl_type.width *= 2;2311lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);2312}2313{2314struct lp_type uint64_type;2315uint64_type = lp_uint_type(type);2316uint64_type.width *= 2;2317lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);2318}2319{2320struct lp_type int64_type;2321int64_type = lp_int_type(type);2322int64_type.width *= 2;2323lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);2324}2325{2326struct lp_type uint16_type;2327uint16_type = lp_uint_type(type);2328uint16_type.width /= 2;2329lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);2330}2331{2332struct lp_type int16_type;2333int16_type = lp_int_type(type);2334int16_type.width /= 2;2335lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);2336}2337{2338struct lp_type uint8_type;2339uint8_type = lp_uint_type(type);2340uint8_type.width /= 4;2341lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);2342}2343{2344struct lp_type int8_type;2345int8_type = lp_int_type(type);2346int8_type.width /= 4;2347lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);2348}2349bld.bld_base.load_var = emit_load_var;2350bld.bld_base.store_var = emit_store_var;2351bld.bld_base.load_reg = emit_load_reg;2352bld.bld_base.store_reg = emit_store_reg;2353bld.bld_base.emit_var_decl = emit_var_decl;2354bld.bld_base.load_ubo = emit_load_ubo;2355bld.bld_base.load_kernel_arg = emit_load_kernel_arg;2356bld.bld_base.load_global = emit_load_global;2357bld.bld_base.store_global = emit_store_global;2358bld.bld_base.atomic_global = emit_atomic_global;2359bld.bld_base.tex = emit_tex;2360bld.bld_base.tex_size = emit_tex_size;2361bld.bld_base.bgnloop = bgnloop;2362bld.bld_base.endloop = endloop;2363bld.bld_base.if_cond = if_cond;2364bld.bld_base.else_stmt = else_stmt;2365bld.bld_base.endif_stmt = endif_stmt;2366bld.bld_base.break_stmt = break_stmt;2367bld.bld_base.continue_stmt = continue_stmt;2368bld.bld_base.sysval_intrin = emit_sysval_intrin;2369bld.bld_base.discard = discard;2370bld.bld_base.emit_vertex = emit_vertex;2371bld.bld_base.end_primitive = end_primitive;2372bld.bld_base.load_mem = emit_load_mem;2373bld.bld_base.store_mem = emit_store_mem;2374bld.bld_base.get_ssbo_size = emit_get_ssbo_size;2375bld.bld_base.atomic_mem = emit_atomic_mem;2376bld.bld_base.barrier = emit_barrier;2377bld.bld_base.image_op = emit_image_op;2378bld.bld_base.image_size = emit_image_size;2379bld.bld_base.vote = emit_vote;2380bld.bld_base.elect = emit_elect;2381bld.bld_base.reduce = emit_reduce;2382bld.bld_base.ballot = emit_ballot;2383bld.bld_base.read_invocation = emit_read_invocation;2384bld.bld_base.helper_invocation = emit_helper_invocation;2385bld.bld_base.interp_at = emit_interp_at;2386bld.bld_base.load_scratch = emit_load_scratch;2387bld.bld_base.store_scratch = emit_store_scratch;23882389bld.mask = params->mask;2390bld.inputs = params->inputs;2391bld.outputs = outputs;2392bld.consts_ptr = params->consts_ptr;2393bld.const_sizes_ptr = params->const_sizes_ptr;2394bld.ssbo_ptr = params->ssbo_ptr;2395bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;2396bld.sampler = params->sampler;2397// bld.bld_base.info = params->info;23982399bld.context_ptr = params->context_ptr;2400bld.thread_data_ptr = params->thread_data_ptr;2401bld.image = params->image;2402bld.shared_ptr = params->shared_ptr;2403bld.coro = params->coro;2404bld.kernel_args_ptr = params->kernel_args;2405bld.indirects = 0;2406if (params->info->indirect_files & (1 << TGSI_FILE_INPUT))2407bld.indirects |= nir_var_shader_in;24082409bld.gs_iface = params->gs_iface;2410bld.tcs_iface = params->tcs_iface;2411bld.tes_iface = params->tes_iface;2412bld.fs_iface = params->fs_iface;2413if (bld.gs_iface) {2414struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;24152416bld.gs_vertex_streams = params->gs_vertex_streams;2417bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,2418shader->info.gs.vertices_out);2419for (int i = 0; i < params->gs_vertex_streams; i++) {2420bld.emitted_prims_vec_ptr[i] =2421lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");2422bld.emitted_vertices_vec_ptr[i] =2423lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");2424bld.total_emitted_vertices_vec_ptr[i] =2425lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");2426}2427}2428lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);24292430bld.system_values = *params->system_values;24312432bld.bld_base.shader = shader;24332434if (shader->scratch_size) {2435bld.scratch_ptr = lp_build_array_alloca(gallivm,2436LLVMInt8TypeInContext(gallivm->context),2437lp_build_const_int32(gallivm, shader->scratch_size * type.length),2438"scratch");2439}2440bld.scratch_size = shader->scratch_size;2441emit_prologue(&bld);2442lp_build_nir_llvm(&bld.bld_base, shader);24432444if (bld.gs_iface) {2445LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;2446LLVMValueRef total_emitted_vertices_vec;2447LLVMValueRef emitted_prims_vec;24482449for (int i = 0; i < params->gs_vertex_streams; i++) {2450end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);24512452total_emitted_vertices_vec =2453LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], "");24542455emitted_prims_vec =2456LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr[i], "");2457bld.gs_iface->gs_epilogue(bld.gs_iface,2458total_emitted_vertices_vec,2459emitted_prims_vec, i);2460}2461}2462lp_exec_mask_fini(&bld.exec_mask);2463}246424652466