Path: blob/21.2-virgl/src/freedreno/ir3/ir3_shader.c
4565 views
/*1* Copyright (C) 2014 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#include "util/format/u_format.h"27#include "util/u_atomic.h"28#include "util/u_math.h"29#include "util/u_memory.h"30#include "util/u_string.h"3132#include "drm/freedreno_drmif.h"3334#include "ir3_assembler.h"35#include "ir3_compiler.h"36#include "ir3_nir.h"37#include "ir3_parser.h"38#include "ir3_shader.h"3940#include "isa/isa.h"4142#include "disasm.h"4344int45ir3_glsl_type_size(const struct glsl_type *type, bool bindless)46{47return glsl_count_attribute_slots(type, false);48}4950/* for vertex shader, the inputs are loaded into registers before the shader51* is executed, so max_regs from the shader instructions might not properly52* reflect the # of registers actually used, especially in case passthrough53* varyings.54*55* Likewise, for fragment shader, we can have some regs which are passed56* input values but never touched by the resulting shader (ie. as result57* of dead code elimination or simply because we don't know how to turn58* the reg off.59*/60static void61fixup_regfootprint(struct ir3_shader_variant *v)62{63unsigned i;6465for (i = 0; i < v->inputs_count; i++) {66/* skip frag inputs fetch via bary.f since their reg's are67* not written by gpu before shader starts (and in fact the68* regid's might not even be valid)69*/70if (v->inputs[i].bary)71continue;7273/* ignore high regs that are global to all threads in a warp74* (they exist by default) (a5xx+)75*/76if (v->inputs[i].regid >= regid(48, 0))77continue;7879if (v->inputs[i].compmask) {80unsigned n = util_last_bit(v->inputs[i].compmask) - 1;81int32_t regid = v->inputs[i].regid + n;82if (v->inputs[i].half) {83if (!v->mergedregs) {84v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);85} else {86v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);87}88} else {89v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);90}91}92}9394for (i = 0; i < v->outputs_count; i++) {95/* for ex, VS shaders with tess don't have normal varying outs: */96if (!VALIDREG(v->outputs[i].regid))97continue;98int32_t regid = v->outputs[i].regid + 3;99if (v->outputs[i].half) {100if (!v->mergedregs) {101v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);102} else {103v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);104}105} else {106v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);107}108}109110for (i = 0; i < v->num_sampler_prefetch; i++) {111unsigned n = util_last_bit(v->sampler_prefetch[i].wrmask) - 1;112int32_t regid = v->sampler_prefetch[i].dst + n;113if (v->sampler_prefetch[i].half_precision) {114if (!v->mergedregs) {115v->info.max_half_reg = MAX2(v->info.max_half_reg, regid >> 2);116} else {117v->info.max_reg = MAX2(v->info.max_reg, regid >> 3);118}119} else {120v->info.max_reg = MAX2(v->info.max_reg, regid >> 2);121}122}123}124125/* wrapper for ir3_assemble() which does some info fixup based on126* shader state. Non-static since used by ir3_cmdline too.127*/128void *129ir3_shader_assemble(struct ir3_shader_variant *v)130{131const struct ir3_compiler *compiler = v->shader->compiler;132struct ir3_info *info = &v->info;133uint32_t *bin;134135ir3_collect_info(v);136137if (v->constant_data_size) {138/* Make sure that where we're about to place the constant_data is safe139* to indirectly upload from.140*/141info->constant_data_offset =142align(info->size, v->shader->compiler->const_upload_unit * 16);143info->size = info->constant_data_offset + v->constant_data_size;144}145146/* Pad out the size so that when turnip uploads the shaders in147* sequence, the starting offset of the next one is properly aligned.148*/149info->size = align(info->size, compiler->instr_align * sizeof(instr_t));150151bin = isa_assemble(v);152if (!bin)153return NULL;154155/* Append the immediates after the end of the program. This lets us emit156* the immediates as an indirect load, while avoiding creating another BO.157*/158if (v->constant_data_size)159memcpy(&bin[info->constant_data_offset / 4], v->constant_data,160v->constant_data_size);161ralloc_free(v->constant_data);162v->constant_data = NULL;163164/* NOTE: if relative addressing is used, we set constlen in165* the compiler (to worst-case value) since we don't know in166* the assembler what the max addr reg value can be:167*/168v->constlen = MAX2(v->constlen, info->max_const + 1);169170if (v->constlen > ir3_const_state(v)->offsets.driver_param)171v->need_driver_params = true;172173/* On a4xx and newer, constlen must be a multiple of 16 dwords even though174* uploads are in units of 4 dwords. Round it up here to make calculations175* regarding the shared constlen simpler.176*/177if (compiler->gpu_id >= 400)178v->constlen = align(v->constlen, 4);179180/* Use the per-wave layout by default on a6xx for compute shaders. It181* should result in better performance when loads/stores are to a uniform182* index.183*/184v->pvtmem_per_wave = compiler->gpu_id >= 600 && !info->multi_dword_ldp_stp &&185v->type == MESA_SHADER_COMPUTE;186187fixup_regfootprint(v);188189return bin;190}191192static bool193try_override_shader_variant(struct ir3_shader_variant *v,194const char *identifier)195{196assert(ir3_shader_override_path);197198char *name =199ralloc_asprintf(NULL, "%s/%s.asm", ir3_shader_override_path, identifier);200201FILE *f = fopen(name, "r");202203if (!f) {204ralloc_free(name);205return false;206}207208struct ir3_kernel_info info;209info.numwg = INVALID_REG;210v->ir = ir3_parse(v, &info, f);211212fclose(f);213214if (!v->ir) {215fprintf(stderr, "Failed to parse %s\n", name);216exit(1);217}218219v->bin = ir3_shader_assemble(v);220if (!v->bin) {221fprintf(stderr, "Failed to assemble %s\n", name);222exit(1);223}224225ralloc_free(name);226return true;227}228229static void230assemble_variant(struct ir3_shader_variant *v)231{232v->bin = ir3_shader_assemble(v);233234bool dbg_enabled = shader_debug_enabled(v->shader->type);235if (dbg_enabled || ir3_shader_override_path || v->disasm_info.write_disasm) {236unsigned char sha1[21];237char sha1buf[41];238239_mesa_sha1_compute(v->bin, v->info.size, sha1);240_mesa_sha1_format(sha1buf, sha1);241242bool shader_overridden =243ir3_shader_override_path && try_override_shader_variant(v, sha1buf);244245if (v->disasm_info.write_disasm) {246char *stream_data = NULL;247size_t stream_size = 0;248FILE *stream = open_memstream(&stream_data, &stream_size);249250fprintf(stream,251"Native code%s for unnamed %s shader %s with sha1 %s:\n",252shader_overridden ? " (overridden)" : "", ir3_shader_stage(v),253v->shader->nir->info.name, sha1buf);254ir3_shader_disasm(v, v->bin, stream);255256fclose(stream);257258v->disasm_info.disasm = ralloc_size(v->shader, stream_size + 1);259memcpy(v->disasm_info.disasm, stream_data, stream_size);260v->disasm_info.disasm[stream_size] = 0;261free(stream_data);262}263264if (dbg_enabled || shader_overridden) {265char *stream_data = NULL;266size_t stream_size = 0;267FILE *stream = open_memstream(&stream_data, &stream_size);268269fprintf(stream,270"Native code%s for unnamed %s shader %s with sha1 %s:\n",271shader_overridden ? " (overridden)" : "", ir3_shader_stage(v),272v->shader->nir->info.name, sha1buf);273if (v->shader->type == MESA_SHADER_FRAGMENT)274fprintf(stream, "SIMD0\n");275ir3_shader_disasm(v, v->bin, stream);276fclose(stream);277278mesa_log_multiline(MESA_LOG_INFO, stream_data);279free(stream_data);280}281}282283/* no need to keep the ir around beyond this point: */284ir3_destroy(v->ir);285v->ir = NULL;286}287288static bool289compile_variant(struct ir3_shader_variant *v)290{291int ret = ir3_compile_shader_nir(v->shader->compiler, v);292if (ret) {293mesa_loge("compile failed! (%s:%s)", v->shader->nir->info.name,294v->shader->nir->info.label);295return false;296}297298assemble_variant(v);299if (!v->bin) {300mesa_loge("assemble failed! (%s:%s)", v->shader->nir->info.name,301v->shader->nir->info.label);302return false;303}304305return true;306}307308/*309* For creating normal shader variants, 'nonbinning' is NULL. For310* creating binning pass shader, it is link to corresponding normal311* (non-binning) variant.312*/313static struct ir3_shader_variant *314alloc_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,315struct ir3_shader_variant *nonbinning)316{317void *mem_ctx = shader;318/* hang the binning variant off it's non-binning counterpart instead319* of the shader, to simplify the error cleanup paths320*/321if (nonbinning)322mem_ctx = nonbinning;323struct ir3_shader_variant *v = rzalloc_size(mem_ctx, sizeof(*v));324325if (!v)326return NULL;327328v->id = ++shader->variant_count;329v->shader = shader;330v->binning_pass = !!nonbinning;331v->nonbinning = nonbinning;332v->key = *key;333v->type = shader->type;334v->mergedregs = shader->compiler->gpu_id >= 600;335336if (!v->binning_pass)337v->const_state = rzalloc_size(v, sizeof(*v->const_state));338339return v;340}341342static bool343needs_binning_variant(struct ir3_shader_variant *v)344{345if ((v->type == MESA_SHADER_VERTEX) && ir3_has_binning_vs(&v->key))346return true;347return false;348}349350static struct ir3_shader_variant *351create_variant(struct ir3_shader *shader, const struct ir3_shader_key *key,352bool write_disasm)353{354struct ir3_shader_variant *v = alloc_variant(shader, key, NULL);355356if (!v)357goto fail;358359v->disasm_info.write_disasm = write_disasm;360361if (needs_binning_variant(v)) {362v->binning = alloc_variant(shader, key, v);363if (!v->binning)364goto fail;365v->binning->disasm_info.write_disasm = write_disasm;366}367368if (ir3_disk_cache_retrieve(shader->compiler, v))369return v;370371if (!shader->nir_finalized) {372ir3_nir_post_finalize(shader->compiler, shader->nir);373374if (ir3_shader_debug & IR3_DBG_DISASM) {375mesa_logi("dump nir%d: type=%d", shader->id, shader->type);376nir_log_shaderi(shader->nir);377}378379if (v->disasm_info.write_disasm) {380v->disasm_info.nir = nir_shader_as_str(shader->nir, shader);381}382383shader->nir_finalized = true;384}385386if (!compile_variant(v))387goto fail;388389if (needs_binning_variant(v) && !compile_variant(v->binning))390goto fail;391392ir3_disk_cache_store(shader->compiler, v);393394return v;395396fail:397ralloc_free(v);398return NULL;399}400401static inline struct ir3_shader_variant *402shader_variant(struct ir3_shader *shader, const struct ir3_shader_key *key)403{404struct ir3_shader_variant *v;405406for (v = shader->variants; v; v = v->next)407if (ir3_shader_key_equal(key, &v->key))408return v;409410return NULL;411}412413struct ir3_shader_variant *414ir3_shader_get_variant(struct ir3_shader *shader,415const struct ir3_shader_key *key, bool binning_pass,416bool write_disasm, bool *created)417{418mtx_lock(&shader->variants_lock);419struct ir3_shader_variant *v = shader_variant(shader, key);420421if (!v) {422/* compile new variant if it doesn't exist already: */423v = create_variant(shader, key, write_disasm);424if (v) {425v->next = shader->variants;426shader->variants = v;427*created = true;428}429}430431if (v && binning_pass) {432v = v->binning;433assert(v);434}435436mtx_unlock(&shader->variants_lock);437438return v;439}440441void442ir3_shader_destroy(struct ir3_shader *shader)443{444ralloc_free(shader->nir);445mtx_destroy(&shader->variants_lock);446ralloc_free(shader);447}448449/**450* Creates a bitmask of the used bits of the shader key by this particular451* shader. Used by the gallium driver to skip state-dependent recompiles when452* possible.453*/454static void455ir3_setup_used_key(struct ir3_shader *shader)456{457nir_shader *nir = shader->nir;458struct shader_info *info = &nir->info;459struct ir3_shader_key *key = &shader->key_mask;460461/* This key flag is just used to make for a cheaper ir3_shader_key_equal462* check in the common case.463*/464key->has_per_samp = true;465466key->safe_constlen = true;467468/* When clip/cull distances are natively supported, we only use469* ucp_enables to determine whether to lower legacy clip planes to470* gl_ClipDistance.471*/472if (info->stage != MESA_SHADER_FRAGMENT || !shader->compiler->has_clip_cull)473key->ucp_enables = 0xff;474475if (info->stage == MESA_SHADER_FRAGMENT) {476key->fastc_srgb = ~0;477key->fsamples = ~0;478479if (info->inputs_read & VARYING_BITS_COLOR) {480key->rasterflat = true;481}482483if (info->inputs_read & VARYING_BIT_LAYER) {484key->layer_zero = true;485}486487if (info->inputs_read & VARYING_BIT_VIEWPORT) {488key->view_zero = true;489}490491/* Only used for deciding on behavior of492* nir_intrinsic_load_barycentric_sample, or the centroid demotion493* on older HW.494*/495key->msaa = info->fs.uses_sample_qualifier ||496(shader->compiler->gpu_id < 600 &&497(BITSET_TEST(info->system_values_read,498SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID) ||499BITSET_TEST(info->system_values_read,500SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID)));501} else {502key->tessellation = ~0;503key->has_gs = true;504505if (info->stage == MESA_SHADER_VERTEX) {506key->vastc_srgb = ~0;507key->vsamples = ~0;508}509}510}511512/* Given an array of constlen's, decrease some of them so that the sum stays513* within "combined_limit" while trying to fairly share the reduction. Returns514* a bitfield of which stages should be trimmed.515*/516static uint32_t517trim_constlens(unsigned *constlens, unsigned first_stage, unsigned last_stage,518unsigned combined_limit, unsigned safe_limit)519{520unsigned cur_total = 0;521for (unsigned i = first_stage; i <= last_stage; i++) {522cur_total += constlens[i];523}524525unsigned max_stage = 0;526unsigned max_const = 0;527uint32_t trimmed = 0;528529while (cur_total > combined_limit) {530for (unsigned i = first_stage; i <= last_stage; i++) {531if (constlens[i] >= max_const) {532max_stage = i;533max_const = constlens[i];534}535}536537assert(max_const > safe_limit);538trimmed |= 1 << max_stage;539cur_total = cur_total - max_const + safe_limit;540constlens[max_stage] = safe_limit;541}542543return trimmed;544}545546/* Figures out which stages in the pipeline to use the "safe" constlen for, in547* order to satisfy all shared constlen limits.548*/549uint32_t550ir3_trim_constlen(struct ir3_shader_variant **variants,551const struct ir3_compiler *compiler)552{553unsigned constlens[MESA_SHADER_STAGES] = {};554555for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {556if (variants[i])557constlens[i] = variants[i]->constlen;558}559560uint32_t trimmed = 0;561STATIC_ASSERT(MESA_SHADER_STAGES <= 8 * sizeof(trimmed));562563/* There are two shared limits to take into account, the geometry limit on564* a6xx and the total limit. The frag limit on a6xx only matters for a565* single stage, so it's always satisfied with the first variant.566*/567if (compiler->gpu_id >= 600) {568trimmed |=569trim_constlens(constlens, MESA_SHADER_VERTEX, MESA_SHADER_GEOMETRY,570compiler->max_const_geom, compiler->max_const_safe);571}572trimmed |=573trim_constlens(constlens, MESA_SHADER_VERTEX, MESA_SHADER_FRAGMENT,574compiler->max_const_pipeline, compiler->max_const_safe);575576return trimmed;577}578579struct ir3_shader *580ir3_shader_from_nir(struct ir3_compiler *compiler, nir_shader *nir,581unsigned reserved_user_consts,582struct ir3_stream_output_info *stream_output)583{584struct ir3_shader *shader = rzalloc_size(NULL, sizeof(*shader));585586mtx_init(&shader->variants_lock, mtx_plain);587shader->compiler = compiler;588shader->id = p_atomic_inc_return(&shader->compiler->shader_count);589shader->type = nir->info.stage;590if (stream_output)591memcpy(&shader->stream_output, stream_output,592sizeof(shader->stream_output));593shader->num_reserved_user_consts = reserved_user_consts;594shader->nir = nir;595596ir3_disk_cache_init_shader_key(compiler, shader);597598ir3_setup_used_key(shader);599600return shader;601}602603static void604dump_reg(FILE *out, const char *name, uint32_t r)605{606if (r != regid(63, 0)) {607const char *reg_type = (r & HALF_REG_ID) ? "hr" : "r";608fprintf(out, "; %s: %s%d.%c\n", name, reg_type, (r & ~HALF_REG_ID) >> 2,609"xyzw"[r & 0x3]);610}611}612613static void614dump_output(FILE *out, struct ir3_shader_variant *so, unsigned slot,615const char *name)616{617uint32_t regid;618regid = ir3_find_output_regid(so, slot);619dump_reg(out, name, regid);620}621622static const char *623input_name(struct ir3_shader_variant *so, int i)624{625if (so->inputs[i].sysval) {626return gl_system_value_name(so->inputs[i].slot);627} else if (so->type == MESA_SHADER_VERTEX) {628return gl_vert_attrib_name(so->inputs[i].slot);629} else {630return gl_varying_slot_name_for_stage(so->inputs[i].slot, so->type);631}632}633634static const char *635output_name(struct ir3_shader_variant *so, int i)636{637if (so->type == MESA_SHADER_FRAGMENT) {638return gl_frag_result_name(so->outputs[i].slot);639} else {640switch (so->outputs[i].slot) {641case VARYING_SLOT_GS_HEADER_IR3:642return "GS_HEADER";643case VARYING_SLOT_GS_VERTEX_FLAGS_IR3:644return "GS_VERTEX_FLAGS";645case VARYING_SLOT_TCS_HEADER_IR3:646return "TCS_HEADER";647default:648return gl_varying_slot_name_for_stage(so->outputs[i].slot, so->type);649}650}651}652653void654ir3_shader_disasm(struct ir3_shader_variant *so, uint32_t *bin, FILE *out)655{656struct ir3 *ir = so->ir;657struct ir3_register *reg;658const char *type = ir3_shader_stage(so);659uint8_t regid;660unsigned i;661662foreach_input_n (instr, i, ir) {663reg = instr->dsts[0];664regid = reg->num;665fprintf(out, "@in(%sr%d.%c)\tin%d",666(reg->flags & IR3_REG_HALF) ? "h" : "", (regid >> 2),667"xyzw"[regid & 0x3], i);668669if (reg->wrmask > 0x1)670fprintf(out, " (wrmask=0x%x)", reg->wrmask);671fprintf(out, "\n");672}673674/* print pre-dispatch texture fetches: */675for (i = 0; i < so->num_sampler_prefetch; i++) {676const struct ir3_sampler_prefetch *fetch = &so->sampler_prefetch[i];677fprintf(out,678"@tex(%sr%d.%c)\tsrc=%u, samp=%u, tex=%u, wrmask=0x%x, cmd=%u\n",679fetch->half_precision ? "h" : "", fetch->dst >> 2,680"xyzw"[fetch->dst & 0x3], fetch -> src, fetch -> samp_id,681fetch -> tex_id, fetch -> wrmask, fetch -> cmd);682}683684const struct ir3_const_state *const_state = ir3_const_state(so);685for (i = 0; i < DIV_ROUND_UP(const_state->immediates_count, 4); i++) {686fprintf(out, "@const(c%d.x)\t", const_state->offsets.immediate + i);687fprintf(out, "0x%08x, 0x%08x, 0x%08x, 0x%08x\n",688const_state->immediates[i * 4 + 0],689const_state->immediates[i * 4 + 1],690const_state->immediates[i * 4 + 2],691const_state->immediates[i * 4 + 3]);692}693694isa_decode(bin, so->info.sizedwords * 4, out,695&(struct isa_decode_options){696.gpu_id = ir->compiler->gpu_id,697.show_errors = true,698.branch_labels = true,699});700701fprintf(out, "; %s: outputs:", type);702for (i = 0; i < so->outputs_count; i++) {703uint8_t regid = so->outputs[i].regid;704const char *reg_type = so->outputs[i].half ? "hr" : "r";705fprintf(out, " %s%d.%c (%s)", reg_type, (regid >> 2), "xyzw"[regid & 0x3],706output_name(so, i));707}708fprintf(out, "\n");709710fprintf(out, "; %s: inputs:", type);711for (i = 0; i < so->inputs_count; i++) {712uint8_t regid = so->inputs[i].regid;713fprintf(out, " r%d.%c (%s slot=%d cm=%x,il=%u,b=%u)", (regid >> 2),714"xyzw"[regid & 0x3], input_name(so, i), so -> inputs[i].slot,715so->inputs[i].compmask, so->inputs[i].inloc, so->inputs[i].bary);716}717fprintf(out, "\n");718719/* print generic shader info: */720fprintf(721out,722"; %s prog %d/%d: %u instr, %u nops, %u non-nops, %u mov, %u cov, %u dwords\n",723type, so->shader->id, so->id, so->info.instrs_count, so->info.nops_count,724so->info.instrs_count - so->info.nops_count, so->info.mov_count,725so->info.cov_count, so->info.sizedwords);726727fprintf(out,728"; %s prog %d/%d: %u last-baryf, %d half, %d full, %u constlen\n",729type, so->shader->id, so->id, so->info.last_baryf,730so->info.max_half_reg + 1, so->info.max_reg + 1, so->constlen);731732fprintf(733out,734"; %s prog %d/%d: %u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7, \n",735type, so->shader->id, so->id, so->info.instrs_per_cat[0],736so->info.instrs_per_cat[1], so->info.instrs_per_cat[2],737so->info.instrs_per_cat[3], so->info.instrs_per_cat[4],738so->info.instrs_per_cat[5], so->info.instrs_per_cat[6],739so->info.instrs_per_cat[7]);740741fprintf(742out,743"; %s prog %d/%d: %u sstall, %u (ss), %u (sy), %d max_sun, %d loops\n",744type, so->shader->id, so->id, so->info.sstall, so->info.ss, so->info.sy,745so->max_sun, so->loops);746747/* print shader type specific info: */748switch (so->type) {749case MESA_SHADER_VERTEX:750dump_output(out, so, VARYING_SLOT_POS, "pos");751dump_output(out, so, VARYING_SLOT_PSIZ, "psize");752break;753case MESA_SHADER_FRAGMENT:754dump_reg(out, "pos (ij_pixel)",755ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL));756dump_reg(757out, "pos (ij_centroid)",758ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID));759dump_reg(out, "pos (ij_size)",760ir3_find_sysval_regid(so, SYSTEM_VALUE_BARYCENTRIC_PERSP_SIZE));761dump_output(out, so, FRAG_RESULT_DEPTH, "posz");762if (so->color0_mrt) {763dump_output(out, so, FRAG_RESULT_COLOR, "color");764} else {765dump_output(out, so, FRAG_RESULT_DATA0, "data0");766dump_output(out, so, FRAG_RESULT_DATA1, "data1");767dump_output(out, so, FRAG_RESULT_DATA2, "data2");768dump_output(out, so, FRAG_RESULT_DATA3, "data3");769dump_output(out, so, FRAG_RESULT_DATA4, "data4");770dump_output(out, so, FRAG_RESULT_DATA5, "data5");771dump_output(out, so, FRAG_RESULT_DATA6, "data6");772dump_output(out, so, FRAG_RESULT_DATA7, "data7");773}774dump_reg(out, "fragcoord",775ir3_find_sysval_regid(so, SYSTEM_VALUE_FRAG_COORD));776dump_reg(out, "fragface",777ir3_find_sysval_regid(so, SYSTEM_VALUE_FRONT_FACE));778break;779default:780/* TODO */781break;782}783784fprintf(out, "\n");785}786787uint64_t788ir3_shader_outputs(const struct ir3_shader *so)789{790return so->nir->info.outputs_written;791}792793/* Add any missing varyings needed for stream-out. Otherwise varyings not794* used by fragment shader will be stripped out.795*/796void797ir3_link_stream_out(struct ir3_shader_linkage *l,798const struct ir3_shader_variant *v)799{800const struct ir3_stream_output_info *strmout = &v->shader->stream_output;801802/*803* First, any stream-out varyings not already in linkage map (ie. also804* consumed by frag shader) need to be added:805*/806for (unsigned i = 0; i < strmout->num_outputs; i++) {807const struct ir3_stream_output *out = &strmout->output[i];808unsigned k = out->register_index;809unsigned compmask =810(1 << (out->num_components + out->start_component)) - 1;811unsigned idx, nextloc = 0;812813/* psize/pos need to be the last entries in linkage map, and will814* get added link_stream_out, so skip over them:815*/816if ((v->outputs[k].slot == VARYING_SLOT_PSIZ) ||817(v->outputs[k].slot == VARYING_SLOT_POS))818continue;819820for (idx = 0; idx < l->cnt; idx++) {821if (l->var[idx].regid == v->outputs[k].regid)822break;823nextloc = MAX2(nextloc, l->var[idx].loc + 4);824}825826/* add if not already in linkage map: */827if (idx == l->cnt)828ir3_link_add(l, v->outputs[k].regid, compmask, nextloc);829830/* expand component-mask if needed, ie streaming out all components831* but frag shader doesn't consume all components:832*/833if (compmask & ~l->var[idx].compmask) {834l->var[idx].compmask |= compmask;835l->max_loc = MAX2(836l->max_loc, l->var[idx].loc + util_last_bit(l->var[idx].compmask));837}838}839}840841842