Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/pp/nir.c
4574 views
/*1* Copyright (c) 2017 Lima Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22*/2324#include <string.h>2526#include "util/hash_table.h"27#include "util/ralloc.h"28#include "util/bitscan.h"29#include "compiler/nir/nir.h"30#include "pipe/p_state.h"313233#include "ppir.h"3435static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)36{37ppir_node *node = ppir_node_create(block, op, ssa->index, 0);38if (!node)39return NULL;4041ppir_dest *dest = ppir_node_get_dest(node);42dest->type = ppir_target_ssa;43dest->ssa.num_components = ssa->num_components;44dest->write_mask = u_bit_consecutive(0, ssa->num_components);4546if (node->type == ppir_node_type_load ||47node->type == ppir_node_type_store)48dest->ssa.is_head = true;4950return node;51}5253static void *ppir_node_create_reg(ppir_block *block, ppir_op op,54nir_register *reg, unsigned mask)55{56ppir_node *node = ppir_node_create(block, op, reg->index, mask);57if (!node)58return NULL;5960ppir_dest *dest = ppir_node_get_dest(node);6162list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {63if (r->index == reg->index) {64dest->reg = r;65break;66}67}6869dest->type = ppir_target_register;70dest->write_mask = mask;7172if (node->type == ppir_node_type_load ||73node->type == ppir_node_type_store)74dest->reg->is_head = true;7576return node;77}7879static void *ppir_node_create_dest(ppir_block *block, ppir_op op,80nir_dest *dest, unsigned mask)81{82unsigned index = -1;8384if (dest) {85if (dest->is_ssa)86return ppir_node_create_ssa(block, op, &dest->ssa);87else88return ppir_node_create_reg(block, op, dest->reg.reg, mask);89}9091return ppir_node_create(block, op, index, 0);92}9394static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,95ppir_src *ps, nir_src *ns, unsigned mask)96{97ppir_node *child = NULL;9899if (ns->is_ssa) {100child = comp->var_nodes[ns->ssa->index];101if (child->op != ppir_op_undef)102ppir_node_add_dep(node, child, ppir_dep_src);103}104else {105nir_register *reg = ns->reg.reg;106while (mask) {107int swizzle = ps->swizzle[u_bit_scan(&mask)];108child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];109/* Reg is read before it was written, create a dummy node for it */110if (!child) {111child = ppir_node_create_reg(node->block, ppir_op_dummy, reg,112u_bit_consecutive(0, 4));113comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;114}115/* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */116if (child && node != child && child->op != ppir_op_dummy)117ppir_node_add_dep(node, child, ppir_dep_src);118}119}120121ppir_node_target_assign(ps, child);122}123124static int nir_to_ppir_opcodes[nir_num_opcodes] = {125/* not supported */126[0 ... nir_last_opcode] = -1,127128[nir_op_mov] = ppir_op_mov,129[nir_op_fmul] = ppir_op_mul,130[nir_op_fabs] = ppir_op_abs,131[nir_op_fneg] = ppir_op_neg,132[nir_op_fadd] = ppir_op_add,133[nir_op_fsum3] = ppir_op_sum3,134[nir_op_fsum4] = ppir_op_sum4,135[nir_op_frsq] = ppir_op_rsqrt,136[nir_op_flog2] = ppir_op_log2,137[nir_op_fexp2] = ppir_op_exp2,138[nir_op_fsqrt] = ppir_op_sqrt,139[nir_op_fsin] = ppir_op_sin,140[nir_op_fcos] = ppir_op_cos,141[nir_op_fmax] = ppir_op_max,142[nir_op_fmin] = ppir_op_min,143[nir_op_frcp] = ppir_op_rcp,144[nir_op_ffloor] = ppir_op_floor,145[nir_op_fceil] = ppir_op_ceil,146[nir_op_ffract] = ppir_op_fract,147[nir_op_sge] = ppir_op_ge,148[nir_op_slt] = ppir_op_lt,149[nir_op_seq] = ppir_op_eq,150[nir_op_sne] = ppir_op_ne,151[nir_op_fcsel] = ppir_op_select,152[nir_op_inot] = ppir_op_not,153[nir_op_ftrunc] = ppir_op_trunc,154[nir_op_fsat] = ppir_op_sat,155[nir_op_fddx] = ppir_op_ddx,156[nir_op_fddy] = ppir_op_ddy,157};158159static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)160{161nir_alu_instr *instr = nir_instr_as_alu(ni);162int op = nir_to_ppir_opcodes[instr->op];163164if (op < 0) {165ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);166return false;167}168169ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,170instr->dest.write_mask);171if (!node)172return false;173174ppir_dest *pd = &node->dest;175nir_alu_dest *nd = &instr->dest;176if (nd->saturate)177pd->modifier = ppir_outmod_clamp_fraction;178179unsigned src_mask;180switch (op) {181case ppir_op_sum3:182src_mask = 0b0111;183break;184case ppir_op_sum4:185src_mask = 0b1111;186break;187default:188src_mask = pd->write_mask;189break;190}191192unsigned num_child = nir_op_infos[instr->op].num_inputs;193node->num_src = num_child;194195for (int i = 0; i < num_child; i++) {196nir_alu_src *ns = instr->src + i;197ppir_src *ps = node->src + i;198memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));199ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);200201ps->absolute = ns->abs;202ps->negate = ns->negate;203}204205list_addtail(&node->node.list, &block->node_list);206return true;207}208209static ppir_block *ppir_block_create(ppir_compiler *comp);210211static bool ppir_emit_discard_block(ppir_compiler *comp)212{213ppir_block *block = ppir_block_create(comp);214ppir_discard_node *discard;215if (!block)216return false;217218comp->discard_block = block;219block->comp = comp;220221discard = ppir_node_create(block, ppir_op_discard, -1, 0);222if (discard)223list_addtail(&discard->node.list, &block->node_list);224else225return false;226227return true;228}229230static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)231{232nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);233ppir_node *node;234ppir_compiler *comp = block->comp;235ppir_branch_node *branch;236237if (!comp->discard_block && !ppir_emit_discard_block(comp))238return NULL;239240node = ppir_node_create(block, ppir_op_branch, -1, 0);241if (!node)242return NULL;243branch = ppir_node_to_branch(node);244245/* second src and condition will be updated during lowering */246ppir_node_add_src(block->comp, node, &branch->src[0],247&instr->src[0], u_bit_consecutive(0, instr->num_components));248branch->num_src = 1;249branch->target = comp->discard_block;250251return node;252}253254static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)255{256ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);257258return node;259}260261static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)262{263ppir_node *node;264nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);265unsigned mask = 0;266ppir_load_node *lnode;267ppir_alu_node *alu_node;268269switch (instr->intrinsic) {270case nir_intrinsic_load_input:271if (!instr->dest.is_ssa)272mask = u_bit_consecutive(0, instr->num_components);273274lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);275if (!lnode)276return false;277278lnode->num_components = instr->num_components;279lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);280if (nir_src_is_const(instr->src[0]))281lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);282else {283lnode->num_src = 1;284ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);285}286list_addtail(&lnode->node.list, &block->node_list);287return true;288289case nir_intrinsic_load_frag_coord:290case nir_intrinsic_load_point_coord:291case nir_intrinsic_load_front_face:292if (!instr->dest.is_ssa)293mask = u_bit_consecutive(0, instr->num_components);294295ppir_op op;296switch (instr->intrinsic) {297case nir_intrinsic_load_frag_coord:298op = ppir_op_load_fragcoord;299break;300case nir_intrinsic_load_point_coord:301op = ppir_op_load_pointcoord;302break;303case nir_intrinsic_load_front_face:304op = ppir_op_load_frontface;305break;306default:307unreachable("bad intrinsic");308break;309}310311lnode = ppir_node_create_dest(block, op, &instr->dest, mask);312if (!lnode)313return false;314315lnode->num_components = instr->num_components;316list_addtail(&lnode->node.list, &block->node_list);317return true;318319case nir_intrinsic_load_uniform:320if (!instr->dest.is_ssa)321mask = u_bit_consecutive(0, instr->num_components);322323lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);324if (!lnode)325return false;326327lnode->num_components = instr->num_components;328lnode->index = nir_intrinsic_base(instr);329if (nir_src_is_const(instr->src[0]))330lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);331else {332lnode->num_src = 1;333ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);334}335336list_addtail(&lnode->node.list, &block->node_list);337return true;338339case nir_intrinsic_store_output: {340/* In simple cases where the store_output is ssa, that register341* can be directly marked as the output.342* If discard is used or the source is not ssa, things can get a343* lot more complicated, so don't try to optimize those and fall344* back to inserting a mov at the end.345* If the source node will only be able to output to pipeline346* registers, fall back to the mov as well. */347if (!block->comp->uses_discard && instr->src->is_ssa) {348node = block->comp->var_nodes[instr->src->ssa->index];349switch (node->op) {350case ppir_op_load_uniform:351case ppir_op_load_texture:352case ppir_op_const:353break;354default:355node->is_end = 1;356return true;357}358}359360alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);361if (!alu_node)362return false;363364ppir_dest *dest = ppir_node_get_dest(&alu_node->node);365dest->type = ppir_target_ssa;366dest->ssa.num_components = instr->num_components;367dest->ssa.index = 0;368dest->write_mask = u_bit_consecutive(0, instr->num_components);369370alu_node->num_src = 1;371372for (int i = 0; i < instr->num_components; i++)373alu_node->src[0].swizzle[i] = i;374375ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,376u_bit_consecutive(0, instr->num_components));377378alu_node->node.is_end = 1;379380list_addtail(&alu_node->node.list, &block->node_list);381return true;382}383384case nir_intrinsic_discard:385node = ppir_emit_discard(block, ni);386list_addtail(&node->list, &block->node_list);387return true;388389case nir_intrinsic_discard_if:390node = ppir_emit_discard_if(block, ni);391list_addtail(&node->list, &block->node_list);392return true;393394default:395ppir_error("unsupported nir_intrinsic_instr %s\n",396nir_intrinsic_infos[instr->intrinsic].name);397return false;398}399}400401static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni)402{403nir_load_const_instr *instr = nir_instr_as_load_const(ni);404ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);405if (!node)406return false;407408assert(instr->def.bit_size == 32);409410for (int i = 0; i < instr->def.num_components; i++)411node->constant.value[i].i = instr->value[i].i32;412node->constant.num = instr->def.num_components;413414list_addtail(&node->node.list, &block->node_list);415return true;416}417418static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)419{420nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);421ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);422if (!node)423return false;424ppir_alu_node *alu = ppir_node_to_alu(node);425426ppir_dest *dest = &alu->dest;427dest->ssa.undef = true;428429list_addtail(&node->list, &block->node_list);430return true;431}432433static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)434{435nir_tex_instr *instr = nir_instr_as_tex(ni);436ppir_load_texture_node *node;437438switch (instr->op) {439case nir_texop_tex:440case nir_texop_txb:441case nir_texop_txl:442break;443default:444ppir_error("unsupported texop %d\n", instr->op);445return false;446}447448switch (instr->sampler_dim) {449case GLSL_SAMPLER_DIM_2D:450case GLSL_SAMPLER_DIM_CUBE:451case GLSL_SAMPLER_DIM_RECT:452case GLSL_SAMPLER_DIM_EXTERNAL:453break;454default:455ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);456return false;457}458459/* emit ld_tex node */460461unsigned mask = 0;462if (!instr->dest.is_ssa)463mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));464465node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);466if (!node)467return false;468469node->sampler = instr->texture_index;470node->sampler_dim = instr->sampler_dim;471472for (int i = 0; i < instr->coord_components; i++)473node->src[0].swizzle[i] = i;474475for (int i = 0; i < instr->num_srcs; i++) {476switch (instr->src[i].src_type) {477case nir_tex_src_coord: {478nir_src *ns = &instr->src[i].src;479if (ns->is_ssa) {480ppir_node *child = block->comp->var_nodes[ns->ssa->index];481if (child->op == ppir_op_load_varying) {482/* If the successor is load_texture, promote it to load_coords */483nir_tex_src *nts = (nir_tex_src *)ns;484if (nts->src_type == nir_tex_src_coord)485child->op = ppir_op_load_coords;486}487}488489/* src[0] is not used by the ld_tex instruction but ensures490* correct scheduling due to the pipeline dependency */491ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src,492u_bit_consecutive(0, instr->coord_components));493node->num_src++;494break;495}496case nir_tex_src_bias:497case nir_tex_src_lod:498node->lod_bias_en = true;499node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);500ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1);501node->num_src++;502break;503default:504ppir_error("unsupported texture source type\n");505return false;506}507}508509list_addtail(&node->node.list, &block->node_list);510511/* validate load coords node */512513ppir_node *src_coords = ppir_node_get_src(&node->node, 0)->node;514ppir_load_node *load = NULL;515516if (src_coords && ppir_node_has_single_src_succ(src_coords) &&517(src_coords->op == ppir_op_load_coords))518load = ppir_node_to_load(src_coords);519else {520/* Create load_coords node */521load = ppir_node_create(block, ppir_op_load_coords_reg, -1, 0);522if (!load)523return false;524list_addtail(&load->node.list, &block->node_list);525526load->src = node->src[0];527load->num_src = 1;528if (node->sampler_dim == GLSL_SAMPLER_DIM_CUBE)529load->num_components = 3;530else531load->num_components = 2;532533ppir_debug("%s create load_coords node %d for %d\n",534__FUNCTION__, load->index, node->node.index);535536ppir_node_foreach_pred_safe((&node->node), dep) {537ppir_node *pred = dep->pred;538ppir_node_remove_dep(dep);539ppir_node_add_dep(&load->node, pred, ppir_dep_src);540}541ppir_node_add_dep(&node->node, &load->node, ppir_dep_src);542}543544assert(load);545node->src[0].type = load->dest.type = ppir_target_pipeline;546node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard;547548return true;549}550551static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)552{553ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uintptr_t)nblock);554555return block;556}557558static bool ppir_emit_jump(ppir_block *block, nir_instr *ni)559{560ppir_node *node;561ppir_compiler *comp = block->comp;562ppir_branch_node *branch;563ppir_block *jump_block;564nir_jump_instr *jump = nir_instr_as_jump(ni);565566switch (jump->type) {567case nir_jump_break: {568assert(comp->current_block->successors[0]);569assert(!comp->current_block->successors[1]);570jump_block = comp->current_block->successors[0];571}572break;573case nir_jump_continue:574jump_block = comp->loop_cont_block;575break;576default:577ppir_error("nir_jump_instr not support\n");578return false;579}580581assert(jump_block != NULL);582583node = ppir_node_create(block, ppir_op_branch, -1, 0);584if (!node)585return false;586branch = ppir_node_to_branch(node);587588/* Unconditional */589branch->num_src = 0;590branch->target = jump_block;591592list_addtail(&node->list, &block->node_list);593return true;594}595596static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {597[nir_instr_type_alu] = ppir_emit_alu,598[nir_instr_type_intrinsic] = ppir_emit_intrinsic,599[nir_instr_type_load_const] = ppir_emit_load_const,600[nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,601[nir_instr_type_tex] = ppir_emit_tex,602[nir_instr_type_jump] = ppir_emit_jump,603};604605static ppir_block *ppir_block_create(ppir_compiler *comp)606{607ppir_block *block = rzalloc(comp, ppir_block);608if (!block)609return NULL;610611list_inithead(&block->node_list);612list_inithead(&block->instr_list);613614block->comp = comp;615616return block;617}618619static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)620{621ppir_block *block = ppir_get_block(comp, nblock);622623comp->current_block = block;624625list_addtail(&block->list, &comp->block_list);626627nir_foreach_instr(instr, nblock) {628assert(instr->type < nir_instr_type_phi);629if (!ppir_emit_instr[instr->type](block, instr))630return false;631}632633return true;634}635636static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);637638static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)639{640ppir_node *node;641ppir_branch_node *else_branch, *after_branch;642nir_block *nir_else_block = nir_if_first_else_block(if_stmt);643bool empty_else_block =644(nir_else_block == nir_if_last_else_block(if_stmt) &&645exec_list_is_empty(&nir_else_block->instr_list));646ppir_block *block = comp->current_block;647648node = ppir_node_create(block, ppir_op_branch, -1, 0);649if (!node)650return false;651else_branch = ppir_node_to_branch(node);652ppir_node_add_src(block->comp, node, &else_branch->src[0],653&if_stmt->condition, 1);654else_branch->num_src = 1;655/* Negate condition to minimize branching. We're generating following:656* current_block: { ...; if (!statement) branch else_block; }657* then_block: { ...; branch after_block; }658* else_block: { ... }659* after_block: { ... }660*661* or if else list is empty:662* block: { if (!statement) branch else_block; }663* then_block: { ... }664* else_block: after_block: { ... }665*/666else_branch->negate = true;667list_addtail(&else_branch->node.list, &block->node_list);668669if (!ppir_emit_cf_list(comp, &if_stmt->then_list))670return false;671672if (empty_else_block) {673nir_block *nblock = nir_if_last_else_block(if_stmt);674assert(nblock->successors[0]);675assert(!nblock->successors[1]);676else_branch->target = ppir_get_block(comp, nblock->successors[0]);677/* Add empty else block to the list */678list_addtail(&block->successors[1]->list, &comp->block_list);679return true;680}681682else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));683684nir_block *last_then_block = nir_if_last_then_block(if_stmt);685assert(last_then_block->successors[0]);686assert(!last_then_block->successors[1]);687block = ppir_get_block(comp, last_then_block);688node = ppir_node_create(block, ppir_op_branch, -1, 0);689if (!node)690return false;691after_branch = ppir_node_to_branch(node);692/* Unconditional */693after_branch->num_src = 0;694after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);695/* Target should be after_block, will fixup later */696list_addtail(&after_branch->node.list, &block->node_list);697698if (!ppir_emit_cf_list(comp, &if_stmt->else_list))699return false;700701return true;702}703704static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)705{706ppir_block *save_loop_cont_block = comp->loop_cont_block;707ppir_block *block;708ppir_branch_node *loop_branch;709nir_block *loop_last_block;710ppir_node *node;711712comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));713714if (!ppir_emit_cf_list(comp, &nloop->body))715return false;716717loop_last_block = nir_loop_last_block(nloop);718block = ppir_get_block(comp, loop_last_block);719node = ppir_node_create(block, ppir_op_branch, -1, 0);720if (!node)721return false;722loop_branch = ppir_node_to_branch(node);723/* Unconditional */724loop_branch->num_src = 0;725loop_branch->target = comp->loop_cont_block;726list_addtail(&loop_branch->node.list, &block->node_list);727728comp->loop_cont_block = save_loop_cont_block;729730comp->num_loops++;731732return true;733}734735static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)736{737ppir_error("function nir_cf_node not support\n");738return false;739}740741static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)742{743foreach_list_typed(nir_cf_node, node, node, list) {744bool ret;745746switch (node->type) {747case nir_cf_node_block:748ret = ppir_emit_block(comp, nir_cf_node_as_block(node));749break;750case nir_cf_node_if:751ret = ppir_emit_if(comp, nir_cf_node_as_if(node));752break;753case nir_cf_node_loop:754ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));755break;756case nir_cf_node_function:757ret = ppir_emit_function(comp, nir_cf_node_as_function(node));758break;759default:760ppir_error("unknown NIR node type %d\n", node->type);761return false;762}763764if (!ret)765return false;766}767768return true;769}770771static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)772{773ppir_compiler *comp = rzalloc_size(774prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));775if (!comp)776return NULL;777778list_inithead(&comp->block_list);779list_inithead(&comp->reg_list);780comp->reg_num = 0;781comp->blocks = _mesa_hash_table_u64_create(prog);782783comp->var_nodes = (ppir_node **)(comp + 1);784comp->reg_base = num_ssa;785comp->prog = prog;786return comp;787}788789static void ppir_add_ordering_deps(ppir_compiler *comp)790{791/* Some intrinsics do not have explicit dependencies and thus depend792* on instructions order. Consider discard_if and the is_end node as793* example. If we don't add fake dependency of discard_if to is_end,794* scheduler may put the is_end first and since is_end terminates795* shader on Utgard PP, rest of it will never be executed.796* Add fake dependencies for discard/branch/store to preserve797* instruction order.798*799* TODO: scheduler should schedule discard_if as early as possible otherwise800* we may end up with suboptimal code for cases like this:801*802* s3 = s1 < s2803* discard_if s3804* s4 = s1 + s2805* store s4806*807* In this case store depends on discard_if and s4, but since dependencies can808* be scheduled in any order it can result in code like this:809*810* instr1: s3 = s1 < s3811* instr2: s4 = s1 + s2812* instr3: discard_if s3813* instr4: store s4814*/815list_for_each_entry(ppir_block, block, &comp->block_list, list) {816ppir_node *prev_node = NULL;817list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {818if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {819ppir_node_add_dep(prev_node, node, ppir_dep_sequence);820}821if (node->is_end ||822node->op == ppir_op_discard ||823node->op == ppir_op_store_temp ||824node->op == ppir_op_branch) {825prev_node = node;826}827}828}829}830831static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,832struct pipe_debug_callback *debug)833{834const struct shader_info *info = &nir->info;835char *shaderdb;836ASSERTED int ret = asprintf(&shaderdb,837"%s shader: %d inst, %d loops, %d:%d spills:fills\n",838gl_shader_stage_name(info->stage),839comp->cur_instr_index,840comp->num_loops,841comp->num_spills,842comp->num_fills);843assert(ret >= 0);844845if (lima_debug & LIMA_DEBUG_SHADERDB)846fprintf(stderr, "SHADER-DB: %s\n", shaderdb);847848pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);849free(shaderdb);850}851852static void ppir_add_write_after_read_deps(ppir_compiler *comp)853{854list_for_each_entry(ppir_block, block, &comp->block_list, list) {855list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {856ppir_node *write = NULL;857list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {858for (int i = 0; i < ppir_node_get_src_num(node); i++) {859ppir_src *src = ppir_node_get_src(node, i);860if (src && src->type == ppir_target_register &&861src->reg == reg &&862write) {863ppir_debug("Adding dep %d for write %d\n", node->index, write->index);864ppir_node_add_dep(write, node, ppir_dep_write_after_read);865}866}867ppir_dest *dest = ppir_node_get_dest(node);868if (dest && dest->type == ppir_target_register &&869dest->reg == reg)870write = node;871}872}873}874}875876bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir,877struct ra_regs *ra,878struct pipe_debug_callback *debug)879{880nir_function_impl *func = nir_shader_get_entrypoint(nir);881ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);882if (!comp)883return false;884885comp->ra = ra;886comp->uses_discard = nir->info.fs.uses_discard;887888/* 1st pass: create ppir blocks */889nir_foreach_function(function, nir) {890if (!function->impl)891continue;892893nir_foreach_block(nblock, function->impl) {894ppir_block *block = ppir_block_create(comp);895if (!block)896return false;897block->index = nblock->index;898_mesa_hash_table_u64_insert(comp->blocks, (uintptr_t)nblock, block);899}900}901902/* 2nd pass: populate successors */903nir_foreach_function(function, nir) {904if (!function->impl)905continue;906907nir_foreach_block(nblock, function->impl) {908ppir_block *block = ppir_get_block(comp, nblock);909assert(block);910911for (int i = 0; i < 2; i++) {912if (nblock->successors[i])913block->successors[i] = ppir_get_block(comp, nblock->successors[i]);914}915}916}917918/* Validate outputs, we support only gl_FragColor */919nir_foreach_shader_out_variable(var, nir) {920switch (var->data.location) {921case FRAG_RESULT_COLOR:922case FRAG_RESULT_DATA0:923break;924default:925ppir_error("unsupported output type\n");926goto err_out0;927break;928}929}930931foreach_list_typed(nir_register, reg, node, &func->registers) {932ppir_reg *r = rzalloc(comp, ppir_reg);933if (!r)934return false;935936r->index = reg->index;937r->num_components = reg->num_components;938r->is_head = false;939list_addtail(&r->list, &comp->reg_list);940comp->reg_num++;941}942943if (!ppir_emit_cf_list(comp, &func->body))944goto err_out0;945946/* If we have discard block add it to the very end */947if (comp->discard_block)948list_addtail(&comp->discard_block->list, &comp->block_list);949950ppir_node_print_prog(comp);951952if (!ppir_lower_prog(comp))953goto err_out0;954955ppir_add_ordering_deps(comp);956ppir_add_write_after_read_deps(comp);957958ppir_node_print_prog(comp);959960if (!ppir_node_to_instr(comp))961goto err_out0;962963if (!ppir_schedule_prog(comp))964goto err_out0;965966if (!ppir_regalloc_prog(comp))967goto err_out0;968969if (!ppir_codegen_prog(comp))970goto err_out0;971972ppir_print_shader_db(nir, comp, debug);973974_mesa_hash_table_u64_destroy(comp->blocks);975ralloc_free(comp);976return true;977978err_out0:979_mesa_hash_table_u64_destroy(comp->blocks);980ralloc_free(comp);981return false;982}983984985986