Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/pp/node.c
4574 views
/*1* Copyright (c) 2017 Lima Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22*/2324#include "util/u_math.h"25#include "util/ralloc.h"26#include "util/bitscan.h"2728#include "ppir.h"2930const ppir_op_info ppir_op_infos[] = {31[ppir_op_mov] = {32.name = "mov",33.slots = (int []) {34PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,35PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,36PPIR_INSTR_SLOT_END37},38},39[ppir_op_abs] = {40.name = "abs",41},42[ppir_op_neg] = {43.name = "neg",44},45[ppir_op_sat] = {46.name = "sat",47},48[ppir_op_mul] = {49.name = "mul",50.slots = (int []) {51PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,52PPIR_INSTR_SLOT_END53},54},55[ppir_op_add] = {56.name = "add",57.slots = (int []) {58PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,59PPIR_INSTR_SLOT_END60},61},62[ppir_op_sum3] = {63.name = "sum3",64.slots = (int []) {65PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END66},67},68[ppir_op_sum4] = {69.name = "sum4",70.slots = (int []) {71PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_END72},73},74[ppir_op_rsqrt] = {75.name = "rsqrt",76.slots = (int []) {77PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END78},79},80[ppir_op_log2] = {81.name = "log2",82.slots = (int []) {83PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END84},85},86[ppir_op_exp2] = {87.name = "exp2",88.slots = (int []) {89PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END90},91},92[ppir_op_sqrt] = {93.name = "sqrt",94.slots = (int []) {95PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END96},97},98[ppir_op_sin] = {99.name = "sin",100.slots = (int []) {101PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END102},103},104[ppir_op_cos] = {105.name = "cos",106.slots = (int []) {107PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END108},109},110[ppir_op_max] = {111.name = "max",112.slots = (int []) {113PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,114PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,115PPIR_INSTR_SLOT_END116},117},118[ppir_op_min] = {119.name = "min",120.slots = (int []) {121PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_SCL_MUL,122PPIR_INSTR_SLOT_ALU_VEC_ADD, PPIR_INSTR_SLOT_ALU_VEC_MUL,123PPIR_INSTR_SLOT_END124},125},126[ppir_op_floor] = {127.name = "floor",128.slots = (int []) {129PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,130PPIR_INSTR_SLOT_END131},132},133[ppir_op_ceil] = {134.name = "ceil",135.slots = (int []) {136PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,137PPIR_INSTR_SLOT_END138},139},140[ppir_op_fract] = {141.name = "fract",142.slots = (int []) {143PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,144PPIR_INSTR_SLOT_END145},146},147[ppir_op_ddx] = {148.name = "ddx",149.slots = (int []) {150PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,151PPIR_INSTR_SLOT_END152},153},154[ppir_op_ddy] = {155.name = "ddy",156.slots = (int []) {157PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,158PPIR_INSTR_SLOT_END159},160},161[ppir_op_and] = {162.name = "and",163.slots = (int []) {164PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,165PPIR_INSTR_SLOT_END166},167},168[ppir_op_or] = {169.name = "or",170.slots = (int []) {171PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,172PPIR_INSTR_SLOT_END173},174},175[ppir_op_xor] = {176.name = "xor",177.slots = (int []) {178PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,179PPIR_INSTR_SLOT_END180},181},182[ppir_op_not] = {183.name = "not",184.slots = (int []) {185PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_VEC_MUL,186PPIR_INSTR_SLOT_END187},188},189[ppir_op_lt] = {190.name = "lt",191},192[ppir_op_le] = {193.name = "le",194},195[ppir_op_gt] = {196.name = "gt",197.slots = (int []) {198PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,199PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,200PPIR_INSTR_SLOT_END201},202},203[ppir_op_ge] = {204.name = "ge",205.slots = (int []) {206PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,207PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,208PPIR_INSTR_SLOT_END209},210},211[ppir_op_eq] = {212.name = "eq",213.slots = (int []) {214PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,215PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,216PPIR_INSTR_SLOT_END217},218},219[ppir_op_ne] = {220.name = "ne",221.slots = (int []) {222PPIR_INSTR_SLOT_ALU_SCL_MUL, PPIR_INSTR_SLOT_ALU_SCL_ADD,223PPIR_INSTR_SLOT_ALU_VEC_MUL, PPIR_INSTR_SLOT_ALU_VEC_ADD,224PPIR_INSTR_SLOT_END225},226},227[ppir_op_select] = {228.name = "select",229.slots = (int []) {230PPIR_INSTR_SLOT_ALU_SCL_ADD, PPIR_INSTR_SLOT_ALU_VEC_ADD,231PPIR_INSTR_SLOT_END232},233},234[ppir_op_rcp] = {235.name = "rcp",236.slots = (int []) {237PPIR_INSTR_SLOT_ALU_COMBINE, PPIR_INSTR_SLOT_END238},239},240[ppir_op_load_varying] = {241.name = "ld_var",242.type = ppir_node_type_load,243.slots = (int []) {244PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END245},246},247[ppir_op_load_coords] = {248.name = "ld_coords",249.type = ppir_node_type_load,250.slots = (int []) {251PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END252},253},254[ppir_op_load_coords_reg] = {255.name = "ld_coords_reg",256.type = ppir_node_type_load,257.slots = (int []) {258PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END259},260},261[ppir_op_load_fragcoord] = {262.name = "ld_fragcoord",263.type = ppir_node_type_load,264.slots = (int []) {265PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END266},267},268[ppir_op_load_pointcoord] = {269.name = "ld_pointcoord",270.type = ppir_node_type_load,271.slots = (int []) {272PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END273},274},275[ppir_op_load_frontface] = {276.name = "ld_frontface",277.type = ppir_node_type_load,278.slots = (int []) {279PPIR_INSTR_SLOT_VARYING, PPIR_INSTR_SLOT_END280},281},282[ppir_op_load_uniform] = {283.name = "ld_uni",284.type = ppir_node_type_load,285.slots = (int []) {286PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END287},288},289[ppir_op_load_texture] = {290.name = "ld_tex",291.type = ppir_node_type_load_texture,292.slots = (int []) {293PPIR_INSTR_SLOT_TEXLD, PPIR_INSTR_SLOT_END294},295},296[ppir_op_load_temp] = {297.name = "ld_temp",298.type = ppir_node_type_load,299.slots = (int []) {300PPIR_INSTR_SLOT_UNIFORM, PPIR_INSTR_SLOT_END301},302},303[ppir_op_const] = {304.name = "const",305.type = ppir_node_type_const,306},307[ppir_op_store_temp] = {308.name = "st_temp",309.type = ppir_node_type_store,310.slots = (int []) {311PPIR_INSTR_SLOT_STORE_TEMP, PPIR_INSTR_SLOT_END312},313},314[ppir_op_discard] = {315.name = "discard",316.type = ppir_node_type_discard,317.slots = (int []) {318PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END319},320},321[ppir_op_branch] = {322.name = "branch",323.type = ppir_node_type_branch,324.slots = (int []) {325PPIR_INSTR_SLOT_BRANCH, PPIR_INSTR_SLOT_END326},327},328[ppir_op_undef] = {329.name = "undef",330.type = ppir_node_type_alu,331.slots = (int []) {332},333},334[ppir_op_dummy] = {335.name = "dummy",336.type = ppir_node_type_alu,337.slots = (int []) {338},339},340};341342void *ppir_node_create(ppir_block *block, ppir_op op, int index, unsigned mask)343{344ppir_compiler *comp = block->comp;345static const int node_size[] = {346[ppir_node_type_alu] = sizeof(ppir_alu_node),347[ppir_node_type_const] = sizeof(ppir_const_node),348[ppir_node_type_load] = sizeof(ppir_load_node),349[ppir_node_type_store] = sizeof(ppir_store_node),350[ppir_node_type_load_texture] = sizeof(ppir_load_texture_node),351[ppir_node_type_discard] = sizeof(ppir_discard_node),352[ppir_node_type_branch] = sizeof(ppir_branch_node),353};354355ppir_node_type type = ppir_op_infos[op].type;356int size = node_size[type];357ppir_node *node = rzalloc_size(block, size);358if (!node)359return NULL;360361list_inithead(&node->succ_list);362list_inithead(&node->pred_list);363364if (index >= 0) {365if (mask) {366/* reg has 4 slots for each component write node */367while (mask)368comp->var_nodes[(index << 2) + comp->reg_base + u_bit_scan(&mask)] = node;369snprintf(node->name, sizeof(node->name), "reg%d", index);370} else {371comp->var_nodes[index] = node;372snprintf(node->name, sizeof(node->name), "ssa%d", index);373}374}375else376snprintf(node->name, sizeof(node->name), "new");377378node->op = op;379node->type = type;380node->index = comp->cur_index++;381node->block = block;382383return node;384}385386void ppir_node_add_dep(ppir_node *succ, ppir_node *pred,387ppir_dep_type type)388{389/* don't add dep for two nodes from different block */390if (succ->block != pred->block) {391pred->succ_different_block = true;392return;393}394395/* don't add duplicated dep */396ppir_node_foreach_pred(succ, dep) {397if (dep->pred == pred)398return;399}400401ppir_dep *dep = ralloc(succ, ppir_dep);402dep->pred = pred;403dep->succ = succ;404dep->type = type;405list_addtail(&dep->pred_link, &succ->pred_list);406list_addtail(&dep->succ_link, &pred->succ_list);407}408409void ppir_node_remove_dep(ppir_dep *dep)410{411list_del(&dep->succ_link);412list_del(&dep->pred_link);413ralloc_free(dep);414}415416static void _ppir_node_replace_child(ppir_src *src, ppir_node *old_child, ppir_node *new_child)417{418ppir_dest *od = ppir_node_get_dest(old_child);419if (ppir_node_target_equal(src, od)) {420ppir_node_target_assign(src, new_child);421}422}423424void ppir_node_replace_child(ppir_node *parent, ppir_node *old_child, ppir_node *new_child)425{426switch (parent->type) {427case ppir_node_type_alu:428{429ppir_alu_node *alu = ppir_node_to_alu(parent);430for (int i = 0; i < alu->num_src; i++)431_ppir_node_replace_child(alu->src + i, old_child, new_child);432break;433}434case ppir_node_type_branch:435{436ppir_branch_node *branch = ppir_node_to_branch(parent);437for (int i = 0; i < 2; i++)438_ppir_node_replace_child(branch->src + i, old_child, new_child);439break;440}441case ppir_node_type_load:442{443ppir_load_node *load = ppir_node_to_load(parent);444_ppir_node_replace_child(&load->src, old_child, new_child);445break;446}447case ppir_node_type_load_texture:448{449ppir_load_texture_node *load_texture = ppir_node_to_load_texture(parent);450for (int i = 0; i < load_texture->num_src; i++)451_ppir_node_replace_child(ppir_node_get_src(parent, i), old_child, new_child);452break;453}454case ppir_node_type_store:455{456ppir_store_node *store = ppir_node_to_store(parent);457_ppir_node_replace_child(&store->src, old_child, new_child);458break;459}460default:461ppir_debug("unknown node type in %s\n", __func__);462break;463}464}465466void ppir_node_replace_pred(ppir_dep *dep, ppir_node *new_pred)467{468list_del(&dep->succ_link);469dep->pred = new_pred;470list_addtail(&dep->succ_link, &new_pred->succ_list);471}472473ppir_dep *ppir_dep_for_pred(ppir_node *node, ppir_node *pred)474{475if (!pred)476return NULL;477478if (node->block != pred->block)479return NULL;480481ppir_node_foreach_pred(node, dep) {482if (dep->pred == pred)483return dep;484}485return NULL;486}487488void ppir_node_replace_all_succ(ppir_node *dst, ppir_node *src)489{490ppir_node_foreach_succ_safe(src, dep) {491ppir_node_replace_pred(dep, dst);492ppir_node_replace_child(dep->succ, src, dst);493}494}495496void ppir_node_delete(ppir_node *node)497{498ppir_node_foreach_succ_safe(node, dep)499ppir_node_remove_dep(dep);500501ppir_node_foreach_pred_safe(node, dep)502ppir_node_remove_dep(dep);503504list_del(&node->list);505ralloc_free(node);506}507508static void ppir_node_print_dest(ppir_dest *dest)509{510switch (dest->type) {511case ppir_target_ssa:512printf("ssa%d", dest->ssa.index);513break;514case ppir_target_pipeline:515printf("pipeline %d", dest->pipeline);516break;517case ppir_target_register:518printf("reg %d", dest->reg->index);519break;520}521}522523static void ppir_node_print_src(ppir_src *src)524{525switch (src->type) {526case ppir_target_ssa: {527if (src->node)528printf("ssa node %d", src->node->index);529else530printf("ssa idx %d", src->ssa ? src->ssa->index : -1);531break;532}533case ppir_target_pipeline:534if (src->node)535printf("pipeline %d node %d", src->pipeline, src->node->index);536else537printf("pipeline %d", src->pipeline);538break;539case ppir_target_register:540printf("reg %d", src->reg->index);541break;542}543}544545static void ppir_node_print_node(ppir_node *node, int space)546{547for (int i = 0; i < space; i++)548printf(" ");549550printf("%s%d: %s %s: ", node->printed && !ppir_node_is_leaf(node) ? "+" : "",551node->index, ppir_op_infos[node->op].name, node->name);552553ppir_dest *dest = ppir_node_get_dest(node);554if (dest) {555printf("dest: ");556ppir_node_print_dest(dest);557}558559if (ppir_node_get_src_num(node) > 0) {560printf(" src: ");561}562for (int i = 0; i < ppir_node_get_src_num(node); i++) {563ppir_node_print_src(ppir_node_get_src(node, i));564if (i != (ppir_node_get_src_num(node) - 1))565printf(", ");566}567printf("\n");568569if (!node->printed) {570ppir_node_foreach_pred(node, dep) {571ppir_node *pred = dep->pred;572ppir_node_print_node(pred, space + 2);573}574575node->printed = true;576}577}578579void ppir_node_print_prog(ppir_compiler *comp)580{581if (!(lima_debug & LIMA_DEBUG_PP))582return;583584list_for_each_entry(ppir_block, block, &comp->block_list, list) {585list_for_each_entry(ppir_node, node, &block->node_list, list) {586node->printed = false;587}588}589590printf("========prog========\n");591list_for_each_entry(ppir_block, block, &comp->block_list, list) {592printf("-------block %3d-------\n", block->index);593list_for_each_entry(ppir_node, node, &block->node_list, list) {594if (ppir_node_is_root(node))595ppir_node_print_node(node, 0);596}597}598printf("====================\n");599}600601static ppir_node *ppir_node_insert_mov_local(ppir_node *node)602{603ppir_node *move = ppir_node_create(node->block, ppir_op_mov, -1, 0);604if (unlikely(!move))605return NULL;606607ppir_dest *dest = ppir_node_get_dest(node);608ppir_alu_node *alu = ppir_node_to_alu(move);609alu->dest = *dest;610alu->num_src = 1;611ppir_node_target_assign(alu->src, node);612613for (int s = 0; s < 4; s++)614alu->src->swizzle[s] = s;615616ppir_node_replace_all_succ(move, node);617ppir_node_add_dep(move, node, ppir_dep_src);618list_addtail(&move->list, &node->list);619620if (node->is_end) {621node->is_end = false;622move->is_end = true;623}624625return move;626}627628ppir_node *ppir_node_insert_mov(ppir_node *old)629{630ppir_node *move = ppir_node_insert_mov_local(old);631ppir_compiler *comp = old->block->comp;632633list_for_each_entry(ppir_block, block, &comp->block_list, list) {634if (old->block == block)635continue;636list_for_each_entry_safe(ppir_node, node, &block->node_list, list) {637for (int i = 0; i < ppir_node_get_src_num(node); i++){638ppir_src *src = ppir_node_get_src(node, i);639if (!src)640continue;641if (src->node == old)642ppir_node_target_assign(src, move);643}644}645}646647return move;648}649650bool ppir_node_has_single_src_succ(ppir_node *node)651{652if (ppir_node_has_single_succ(node) &&653list_first_entry(&node->succ_list,654ppir_dep, succ_link)->type == ppir_dep_src)655return true;656657int cnt = 0;658ppir_node_foreach_succ(node, dep) {659if (dep->type != ppir_dep_src)660continue;661cnt++;662}663664return cnt == 1;665}666667668