Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/gp/node.c
4574 views
/*1* Copyright (c) 2017 Lima Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22*/2324#include "util/u_math.h"25#include "util/ralloc.h"2627#include "gpir.h"2829const gpir_op_info gpir_op_infos[] = {30[gpir_op_mov] = {31.name = "mov",32.slots = (int []) {33GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_MUL1,34GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_MUL0,35GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_COMPLEX,36GPIR_INSTR_SLOT_END37},38},39[gpir_op_mul] = {40.name = "mul",41.dest_neg = true,42.slots = (int []) { GPIR_INSTR_SLOT_MUL1, GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },43},44[gpir_op_select] = {45.name = "select",46.dest_neg = true,47.slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },48.may_consume_two_slots = true,49},50[gpir_op_complex1] = {51.name = "complex1",52.slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },53.spillless = true,54.may_consume_two_slots = true,55},56[gpir_op_complex2] = {57.name = "complex2",58.slots = (int []) { GPIR_INSTR_SLOT_MUL0, GPIR_INSTR_SLOT_END },59.spillless = true,60.schedule_first = true,61},62[gpir_op_add] = {63.name = "add",64.src_neg = {true, true, false, false},65.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },66},67[gpir_op_floor] = {68.name = "floor",69.src_neg = {true, false, false, false},70.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },71.spillless = true,72.may_consume_two_slots = true,73},74[gpir_op_sign] = {75.name = "sign",76.src_neg = {true, false, false, false},77.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },78.spillless = true,79.may_consume_two_slots = true,80},81[gpir_op_ge] = {82.name = "ge",83.src_neg = {true, true, false, false},84.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },85.spillless = true,86.may_consume_two_slots = true,87},88[gpir_op_lt] = {89.name = "lt",90.src_neg = {true, true, false, false},91.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },92.spillless = true,93.may_consume_two_slots = true,94},95[gpir_op_min] = {96.name = "min",97.src_neg = {true, true, false, false},98.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },99.spillless = true,100.may_consume_two_slots = true,101},102[gpir_op_max] = {103.name = "max",104.src_neg = {true, true, false, false},105.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },106.spillless = true,107.may_consume_two_slots = true,108},109[gpir_op_abs] = {110.name = "abs",111.src_neg = {true, true, false, false},112},113[gpir_op_neg] = {114.name = "neg",115.slots = (int []) {116GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_MUL1,117GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_MUL0,118GPIR_INSTR_SLOT_END119},120},121[gpir_op_not] = {122.name = "not",123.src_neg = {true, true, false, false},124.slots = (int []) { GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END },125},126[gpir_op_eq] = {127.name = "eq",128.slots = (int []) {129GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END130},131},132[gpir_op_ne] = {133.name = "ne",134.slots = (int []) {135GPIR_INSTR_SLOT_ADD0, GPIR_INSTR_SLOT_ADD1, GPIR_INSTR_SLOT_END136},137},138[gpir_op_clamp_const] = {139.name = "clamp_const",140},141[gpir_op_preexp2] = {142.name = "preexp2",143.slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },144.spillless = true,145.schedule_first = true,146},147[gpir_op_postlog2] = {148.name = "postlog2",149.slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },150},151[gpir_op_exp2_impl] = {152.name = "exp2_impl",153.slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },154.spillless = true,155.schedule_first = true,156},157[gpir_op_log2_impl] = {158.name = "log2_impl",159.slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },160.spillless = true,161.schedule_first = true,162},163[gpir_op_rcp_impl] = {164.name = "rcp_impl",165.slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },166.spillless = true,167.schedule_first = true,168},169[gpir_op_rsqrt_impl] = {170.name = "rsqrt_impl",171.slots = (int []) { GPIR_INSTR_SLOT_COMPLEX, GPIR_INSTR_SLOT_END },172.spillless = true,173.schedule_first = true,174},175[gpir_op_load_uniform] = {176.name = "ld_uni",177.slots = (int []) {178GPIR_INSTR_SLOT_MEM_LOAD0, GPIR_INSTR_SLOT_MEM_LOAD1,179GPIR_INSTR_SLOT_MEM_LOAD2, GPIR_INSTR_SLOT_MEM_LOAD3,180GPIR_INSTR_SLOT_END181},182.type = gpir_node_type_load,183},184[gpir_op_load_temp] = {185.name = "ld_tmp",186.type = gpir_node_type_load,187},188[gpir_op_load_attribute] = {189.name = "ld_att",190.slots = (int []) {191GPIR_INSTR_SLOT_REG0_LOAD0, GPIR_INSTR_SLOT_REG0_LOAD1,192GPIR_INSTR_SLOT_REG0_LOAD2, GPIR_INSTR_SLOT_REG0_LOAD3,193GPIR_INSTR_SLOT_END194},195.type = gpir_node_type_load,196},197[gpir_op_load_reg] = {198.name = "ld_reg",199.slots = (int []) {200GPIR_INSTR_SLOT_REG1_LOAD0, GPIR_INSTR_SLOT_REG1_LOAD1,201GPIR_INSTR_SLOT_REG1_LOAD2, GPIR_INSTR_SLOT_REG1_LOAD3,202GPIR_INSTR_SLOT_REG0_LOAD0, GPIR_INSTR_SLOT_REG0_LOAD1,203GPIR_INSTR_SLOT_REG0_LOAD2, GPIR_INSTR_SLOT_REG0_LOAD3,204GPIR_INSTR_SLOT_END205},206.type = gpir_node_type_load,207.spillless = true,208},209[gpir_op_store_temp] = {210.name = "st_tmp",211.type = gpir_node_type_store,212},213[gpir_op_store_reg] = {214.name = "st_reg",215.slots = (int []) {216GPIR_INSTR_SLOT_STORE0, GPIR_INSTR_SLOT_STORE1,217GPIR_INSTR_SLOT_STORE2, GPIR_INSTR_SLOT_STORE3,218GPIR_INSTR_SLOT_END219},220.type = gpir_node_type_store,221.spillless = true,222},223[gpir_op_store_varying] = {224.name = "st_var",225.slots = (int []) {226GPIR_INSTR_SLOT_STORE0, GPIR_INSTR_SLOT_STORE1,227GPIR_INSTR_SLOT_STORE2, GPIR_INSTR_SLOT_STORE3,228GPIR_INSTR_SLOT_END229},230.type = gpir_node_type_store,231.spillless = true,232},233[gpir_op_store_temp_load_off0] = {234.name = "st_of0",235.type = gpir_node_type_store,236},237[gpir_op_store_temp_load_off1] = {238.name = "st_of1",239.type = gpir_node_type_store,240},241[gpir_op_store_temp_load_off2] = {242.name = "st_of2",243.type = gpir_node_type_store,244},245[gpir_op_branch_cond] = {246.name = "branch_cond",247.type = gpir_node_type_branch,248.schedule_first = true,249.slots = (int []) { GPIR_INSTR_SLOT_PASS, GPIR_INSTR_SLOT_END },250},251[gpir_op_const] = {252.name = "const",253.type = gpir_node_type_const,254},255[gpir_op_exp2] = {256.name = "exp2",257},258[gpir_op_log2] = {259.name = "log2",260},261[gpir_op_rcp] = {262.name = "rcp",263},264[gpir_op_rsqrt] = {265.name = "rsqrt",266},267[gpir_op_ceil] = {268.name = "ceil",269},270[gpir_op_exp] = {271.name = "exp",272},273[gpir_op_log] = {274.name = "log",275},276[gpir_op_sin] = {277.name = "sin",278},279[gpir_op_cos] = {280.name = "cos",281},282[gpir_op_tan] = {283.name = "tan",284},285[gpir_op_dummy_f] = {286.name = "dummy_f",287.type = gpir_node_type_alu,288.spillless = true,289},290[gpir_op_dummy_m] = {291.name = "dummy_m",292.type = gpir_node_type_alu,293},294[gpir_op_branch_uncond] = {295.name = "branch_uncond",296.type = gpir_node_type_branch,297},298};299300void *gpir_node_create(gpir_block *block, gpir_op op)301{302static const int node_size[] = {303[gpir_node_type_alu] = sizeof(gpir_alu_node),304[gpir_node_type_const] = sizeof(gpir_const_node),305[gpir_node_type_load] = sizeof(gpir_load_node),306[gpir_node_type_store] = sizeof(gpir_store_node),307[gpir_node_type_branch] = sizeof(gpir_branch_node),308};309310gpir_node_type type = gpir_op_infos[op].type;311int size = node_size[type];312gpir_node *node = rzalloc_size(block, size);313if (unlikely(!node))314return NULL;315316snprintf(node->name, sizeof(node->name), "new");317318list_inithead(&node->succ_list);319list_inithead(&node->pred_list);320321node->op = op;322node->type = type;323node->index = block->comp->cur_index++;324node->block = block;325326return node;327}328329gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type)330{331/* don't add dep for two nodes from different block */332if (succ->block != pred->block)333return NULL;334335/* don't add self loop dep */336if (succ == pred)337return NULL;338339/* don't add duplicated dep */340gpir_node_foreach_pred(succ, dep) {341if (dep->pred == pred) {342/* use stronger dependency */343if (dep->type > type)344dep->type = type;345return dep;346}347}348349gpir_dep *dep = ralloc(succ, gpir_dep);350dep->type = type;351dep->pred = pred;352dep->succ = succ;353list_addtail(&dep->pred_link, &succ->pred_list);354list_addtail(&dep->succ_link, &pred->succ_list);355return dep;356}357358void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred)359{360gpir_node_foreach_pred(succ, dep) {361if (dep->pred == pred) {362list_del(&dep->succ_link);363list_del(&dep->pred_link);364ralloc_free(dep);365return;366}367}368}369370void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child,371gpir_node *new_child)372{373if (parent->type == gpir_node_type_alu) {374gpir_alu_node *alu = gpir_node_to_alu(parent);375for (int i = 0; i < alu->num_child; i++) {376if (alu->children[i] == old_child)377alu->children[i] = new_child;378}379}380else if (parent->type == gpir_node_type_store) {381gpir_store_node *store = gpir_node_to_store(parent);382if (store->child == old_child)383store->child = new_child;384} else if (parent->type == gpir_node_type_branch) {385gpir_branch_node *branch = gpir_node_to_branch(parent);386if (branch->cond == old_child)387branch->cond = new_child;388}389}390391void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred)392{393list_del(&dep->succ_link);394dep->pred = new_pred;395list_addtail(&dep->succ_link, &new_pred->succ_list);396}397398void gpir_node_replace_succ(gpir_node *dst, gpir_node *src)399{400gpir_node_foreach_succ_safe(src, dep) {401if (dep->type != GPIR_DEP_INPUT)402continue;403404gpir_node_replace_pred(dep, dst);405gpir_node_replace_child(dep->succ, src, dst);406}407}408409void gpir_node_insert_child(gpir_node *parent, gpir_node *child,410gpir_node *insert_child)411{412gpir_node_foreach_pred(parent, dep) {413if (dep->pred == child) {414gpir_node_replace_pred(dep, insert_child);415gpir_node_replace_child(parent, child, insert_child);416break;417}418}419}420421void gpir_node_delete(gpir_node *node)422{423gpir_node_foreach_succ_safe(node, dep) {424list_del(&dep->succ_link);425list_del(&dep->pred_link);426ralloc_free(dep);427}428429gpir_node_foreach_pred_safe(node, dep) {430list_del(&dep->succ_link);431list_del(&dep->pred_link);432ralloc_free(dep);433}434435list_del(&node->list);436ralloc_free(node);437}438439static void gpir_node_print_node(gpir_node *node, int type, int space)440{441static char *dep_name[] = {442[GPIR_DEP_INPUT] = "input",443[GPIR_DEP_OFFSET] = "offset",444[GPIR_DEP_READ_AFTER_WRITE] = "RaW",445[GPIR_DEP_WRITE_AFTER_READ] = "WaR",446};447448for (int i = 0; i < space; i++)449printf(" ");450printf("%s%s %d %s %s\n", node->printed && !gpir_node_is_leaf(node) ? "+" : "",451gpir_op_infos[node->op].name, node->index, node->name, dep_name[type]);452453if (!node->printed) {454gpir_node_foreach_pred(node, dep) {455gpir_node_print_node(dep->pred, dep->type, space + 2);456}457458node->printed = true;459}460}461462void gpir_node_print_prog_dep(gpir_compiler *comp)463{464if (!(lima_debug & LIMA_DEBUG_GP))465return;466467list_for_each_entry(gpir_block, block, &comp->block_list, list) {468list_for_each_entry(gpir_node, node, &block->node_list, list) {469node->printed = false;470}471}472473printf("======== node prog dep ========\n");474list_for_each_entry(gpir_block, block, &comp->block_list, list) {475list_for_each_entry(gpir_node, node, &block->node_list, list) {476if (gpir_node_is_root(node))477gpir_node_print_node(node, GPIR_DEP_INPUT, 0);478}479printf("----------------------------\n");480}481}482483void gpir_node_print_prog_seq(gpir_compiler *comp)484{485if (!(lima_debug & LIMA_DEBUG_GP))486return;487488int index = 0;489printf("======== node prog seq ========\n");490list_for_each_entry(gpir_block, block, &comp->block_list, list) {491list_for_each_entry(gpir_node, node, &block->node_list, list) {492printf("%03d: %s %d %s pred", index++, gpir_op_infos[node->op].name,493node->index, node->name);494gpir_node_foreach_pred(node, dep) {495printf(" %d", dep->pred->index);496}497printf(" succ");498gpir_node_foreach_succ(node, dep) {499printf(" %d", dep->succ->index);500}501printf("\n");502}503printf("----------------------------\n");504}505}506507508