Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/gp/instr.c
4574 views
/*1* Copyright (c) 2017 Lima Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22*/2324#include <string.h>2526#include "util/ralloc.h"2728#include "gpir.h"2930gpir_instr *gpir_instr_create(gpir_block *block)31{32gpir_instr *instr = rzalloc(block, gpir_instr);33if (unlikely(!instr))34return NULL;3536block->comp->num_instr++;37if (block->comp->num_instr > 512) {38gpir_error("shader exceeds limit of 512 instructions\n");39return NULL;40}4142instr->index = block->sched.instr_index++;43instr->alu_num_slot_free = 6;44instr->alu_non_cplx_slot_free = 5;45instr->alu_max_allowed_next_max = 5;4647list_add(&instr->list, &block->instr_list);48return instr;49}5051static gpir_node *gpir_instr_get_the_other_acc_node(gpir_instr *instr, int slot)52{53if (slot == GPIR_INSTR_SLOT_ADD0)54return instr->slots[GPIR_INSTR_SLOT_ADD1];55else if (slot == GPIR_INSTR_SLOT_ADD1)56return instr->slots[GPIR_INSTR_SLOT_ADD0];5758return NULL;59}6061static bool gpir_instr_check_acc_same_op(gpir_instr *instr, gpir_node *node, int slot)62{63/* two ACC slots must share the same op code */64gpir_node *acc_node = gpir_instr_get_the_other_acc_node(instr, slot);6566/* spill move case may get acc_node == node */67if (acc_node && acc_node != node &&68!gpir_codegen_acc_same_op(node->op, acc_node->op))69return false;7071return true;72}7374static int gpir_instr_get_consume_slot(gpir_instr *instr, gpir_node *node)75{76if (gpir_op_infos[node->op].may_consume_two_slots) {77gpir_node *acc_node = gpir_instr_get_the_other_acc_node(instr, node->sched.pos);78if (acc_node)79/* at this point node must have the same acc op with acc_node,80* so it just consumes the extra slot acc_node consumed */81return 0;82else83return 2;84}85else86return 1;87}8889static bool gpir_instr_insert_alu_check(gpir_instr *instr, gpir_node *node)90{91if (!gpir_instr_check_acc_same_op(instr, node, node->sched.pos))92return false;9394if (node->sched.next_max_node && !node->sched.complex_allowed &&95node->sched.pos == GPIR_INSTR_SLOT_COMPLEX)96return false;9798int consume_slot = gpir_instr_get_consume_slot(instr, node);99int non_cplx_consume_slot =100node->sched.pos == GPIR_INSTR_SLOT_COMPLEX ? 0 : consume_slot;101int store_reduce_slot = 0;102int non_cplx_store_reduce_slot = 0;103int max_reduce_slot = node->sched.max_node ? 1 : 0;104int next_max_reduce_slot = node->sched.next_max_node ? 1 : 0;105int alu_new_max_allowed_next_max =106node->op == gpir_op_complex1 ? 4 : instr->alu_max_allowed_next_max;107108/* check if this node is child of one store node.109* complex1 won't be any of this instr's store node's child,110* because it has two instr latency before store can use it.111*/112for (int i = GPIR_INSTR_SLOT_STORE0; i <= GPIR_INSTR_SLOT_STORE3; i++) {113gpir_store_node *s = gpir_node_to_store(instr->slots[i]);114if (s && s->child == node) {115store_reduce_slot = 1;116if (node->sched.next_max_node && !node->sched.complex_allowed)117non_cplx_store_reduce_slot = 1;118break;119}120}121122/* Check that the invariants will be maintained after we adjust everything123*/124125int slot_difference =126instr->alu_num_slot_needed_by_store - store_reduce_slot +127instr->alu_num_slot_needed_by_max - max_reduce_slot +128MAX2(instr->alu_num_unscheduled_next_max - next_max_reduce_slot -129alu_new_max_allowed_next_max, 0) -130(instr->alu_num_slot_free - consume_slot);131if (slot_difference > 0) {132gpir_debug("failed %d because of alu slot\n", node->index);133instr->slot_difference = slot_difference;134}135136int non_cplx_slot_difference =137instr->alu_num_slot_needed_by_max - max_reduce_slot +138instr->alu_num_slot_needed_by_non_cplx_store - non_cplx_store_reduce_slot -139(instr->alu_non_cplx_slot_free - non_cplx_consume_slot);140if (non_cplx_slot_difference > 0) {141gpir_debug("failed %d because of alu slot\n", node->index);142instr->non_cplx_slot_difference = non_cplx_slot_difference;143}144145if (slot_difference > 0 || non_cplx_slot_difference > 0)146return false;147148instr->alu_num_slot_free -= consume_slot;149instr->alu_non_cplx_slot_free -= non_cplx_consume_slot;150instr->alu_num_slot_needed_by_store -= store_reduce_slot;151instr->alu_num_slot_needed_by_non_cplx_store -= non_cplx_store_reduce_slot;152instr->alu_num_slot_needed_by_max -= max_reduce_slot;153instr->alu_num_unscheduled_next_max -= next_max_reduce_slot;154instr->alu_max_allowed_next_max = alu_new_max_allowed_next_max;155return true;156}157158static void gpir_instr_remove_alu(gpir_instr *instr, gpir_node *node)159{160int consume_slot = gpir_instr_get_consume_slot(instr, node);161162for (int i = GPIR_INSTR_SLOT_STORE0; i <= GPIR_INSTR_SLOT_STORE3; i++) {163gpir_store_node *s = gpir_node_to_store(instr->slots[i]);164if (s && s->child == node) {165instr->alu_num_slot_needed_by_store++;166if (node->sched.next_max_node && !node->sched.complex_allowed)167instr->alu_num_slot_needed_by_non_cplx_store++;168break;169}170}171172instr->alu_num_slot_free += consume_slot;173if (node->sched.pos != GPIR_INSTR_SLOT_COMPLEX)174instr->alu_non_cplx_slot_free += consume_slot;175if (node->sched.max_node)176instr->alu_num_slot_needed_by_max++;177if (node->sched.next_max_node)178instr->alu_num_unscheduled_next_max++;179if (node->op == gpir_op_complex1)180instr->alu_max_allowed_next_max = 5;181}182183static bool gpir_instr_insert_reg0_check(gpir_instr *instr, gpir_node *node)184{185gpir_load_node *load = gpir_node_to_load(node);186int i = node->sched.pos - GPIR_INSTR_SLOT_REG0_LOAD0;187188if (load->component != i)189return false;190191if (instr->reg0_is_attr && node->op != gpir_op_load_attribute)192return false;193194if (instr->reg0_use_count) {195if (instr->reg0_index != load->index)196return false;197}198else {199instr->reg0_is_attr = node->op == gpir_op_load_attribute;200instr->reg0_index = load->index;201}202203instr->reg0_use_count++;204return true;205}206207static void gpir_instr_remove_reg0(gpir_instr *instr, gpir_node *node)208{209instr->reg0_use_count--;210if (!instr->reg0_use_count)211instr->reg0_is_attr = false;212}213214static bool gpir_instr_insert_reg1_check(gpir_instr *instr, gpir_node *node)215{216gpir_load_node *load = gpir_node_to_load(node);217int i = node->sched.pos - GPIR_INSTR_SLOT_REG1_LOAD0;218219if (load->component != i)220return false;221222if (instr->reg1_use_count) {223if (instr->reg1_index != load->index)224return false;225}226else227instr->reg1_index = load->index;228229instr->reg1_use_count++;230return true;231}232233static void gpir_instr_remove_reg1(gpir_instr *instr, gpir_node *node)234{235instr->reg1_use_count--;236}237238static bool gpir_instr_insert_mem_check(gpir_instr *instr, gpir_node *node)239{240gpir_load_node *load = gpir_node_to_load(node);241int i = node->sched.pos - GPIR_INSTR_SLOT_MEM_LOAD0;242243if (load->component != i)244return false;245246if (instr->mem_is_temp && node->op != gpir_op_load_temp)247return false;248249if (instr->mem_use_count) {250if (instr->mem_index != load->index)251return false;252}253else {254instr->mem_is_temp = node->op == gpir_op_load_temp;255instr->mem_index = load->index;256}257258instr->mem_use_count++;259return true;260}261262static void gpir_instr_remove_mem(gpir_instr *instr, gpir_node *node)263{264instr->mem_use_count--;265if (!instr->mem_use_count)266instr->mem_is_temp = false;267}268269static bool gpir_instr_insert_store_check(gpir_instr *instr, gpir_node *node)270{271gpir_store_node *store = gpir_node_to_store(node);272int i = node->sched.pos - GPIR_INSTR_SLOT_STORE0;273274if (store->component != i)275return false;276277i >>= 1;278switch (instr->store_content[i]) {279case GPIR_INSTR_STORE_NONE:280/* store temp has only one address reg for two store unit */281if (node->op == gpir_op_store_temp &&282instr->store_content[!i] == GPIR_INSTR_STORE_TEMP &&283instr->store_index[!i] != store->index)284return false;285break;286287case GPIR_INSTR_STORE_VARYING:288if (node->op != gpir_op_store_varying ||289instr->store_index[i] != store->index)290return false;291break;292293case GPIR_INSTR_STORE_REG:294if (node->op != gpir_op_store_reg ||295instr->store_index[i] != store->index)296return false;297break;298299case GPIR_INSTR_STORE_TEMP:300if (node->op != gpir_op_store_temp ||301instr->store_index[i] != store->index)302return false;303break;304}305306/* check if any store node has the same child as this node */307for (int j = GPIR_INSTR_SLOT_STORE0; j <= GPIR_INSTR_SLOT_STORE3; j++) {308gpir_store_node *s = gpir_node_to_store(instr->slots[j]);309if (s && s->child == store->child)310goto out;311}312313/* check if the child is already in this instr's alu slot,314* this may happen when store an scheduled alu node to reg315*/316for (int j = GPIR_INSTR_SLOT_ALU_BEGIN; j <= GPIR_INSTR_SLOT_ALU_END; j++) {317if (store->child == instr->slots[j])318goto out;319}320321/* Check the invariants documented in gpir.h, similar to the ALU case.322* When the only thing that changes is alu_num_slot_needed_by_store, we323* can get away with just checking the first one.324*/325int slot_difference = instr->alu_num_slot_needed_by_store + 1326+ instr->alu_num_slot_needed_by_max +327MAX2(instr->alu_num_unscheduled_next_max - instr->alu_max_allowed_next_max, 0) -328instr->alu_num_slot_free;329if (slot_difference > 0) {330instr->slot_difference = slot_difference;331return false;332}333334if (store->child->sched.next_max_node &&335!store->child->sched.complex_allowed) {336/* The child of the store is already partially ready, and has a use one337* cycle ago that disqualifies it (or a move replacing it) from being338* put in the complex slot. Therefore we have to check the non-complex339* invariant.340*/341int non_cplx_slot_difference =342instr->alu_num_slot_needed_by_max +343instr->alu_num_slot_needed_by_non_cplx_store + 1 -344instr->alu_non_cplx_slot_free;345if (non_cplx_slot_difference > 0) {346instr->non_cplx_slot_difference = non_cplx_slot_difference;347return false;348}349350instr->alu_num_slot_needed_by_non_cplx_store++;351}352353instr->alu_num_slot_needed_by_store++;354355out:356if (instr->store_content[i] == GPIR_INSTR_STORE_NONE) {357if (node->op == gpir_op_store_varying)358instr->store_content[i] = GPIR_INSTR_STORE_VARYING;359else if (node->op == gpir_op_store_reg)360instr->store_content[i] = GPIR_INSTR_STORE_REG;361else362instr->store_content[i] = GPIR_INSTR_STORE_TEMP;363364instr->store_index[i] = store->index;365}366return true;367}368369static void gpir_instr_remove_store(gpir_instr *instr, gpir_node *node)370{371gpir_store_node *store = gpir_node_to_store(node);372int component = node->sched.pos - GPIR_INSTR_SLOT_STORE0;373int other_slot = GPIR_INSTR_SLOT_STORE0 + (component ^ 1);374375for (int j = GPIR_INSTR_SLOT_STORE0; j <= GPIR_INSTR_SLOT_STORE3; j++) {376if (j == node->sched.pos)377continue;378379gpir_store_node *s = gpir_node_to_store(instr->slots[j]);380if (s && s->child == store->child)381goto out;382}383384for (int j = GPIR_INSTR_SLOT_ALU_BEGIN; j <= GPIR_INSTR_SLOT_ALU_END; j++) {385if (store->child == instr->slots[j])386goto out;387}388389instr->alu_num_slot_needed_by_store--;390391if (store->child->sched.next_max_node &&392!store->child->sched.complex_allowed) {393instr->alu_num_slot_needed_by_non_cplx_store--;394}395396out:397if (!instr->slots[other_slot])398instr->store_content[component >> 1] = GPIR_INSTR_STORE_NONE;399}400401static bool gpir_instr_spill_move(gpir_instr *instr, int slot, int spill_to_start)402{403gpir_node *node = instr->slots[slot];404if (!node)405return true;406407if (node->op != gpir_op_mov)408return false;409410for (int i = spill_to_start; i <= GPIR_INSTR_SLOT_DIST_TWO_END; i++) {411if (i != slot && !instr->slots[i] &&412gpir_instr_check_acc_same_op(instr, node, i)) {413instr->slots[i] = node;414instr->slots[slot] = NULL;415node->sched.pos = i;416417gpir_debug("instr %d spill move %d from slot %d to %d\n",418instr->index, node->index, slot, i);419return true;420}421}422423return false;424}425426static bool gpir_instr_slot_free(gpir_instr *instr, gpir_node *node)427{428if (node->op == gpir_op_mov ||429node->sched.pos > GPIR_INSTR_SLOT_DIST_TWO_END) {430if (instr->slots[node->sched.pos])431return false;432}433else {434/* for node needs dist two slot, if the slot has a move, we can435* spill it to other dist two slot without any side effect */436int spill_to_start = GPIR_INSTR_SLOT_MUL0;437if (node->op == gpir_op_complex1 || node->op == gpir_op_select)438spill_to_start = GPIR_INSTR_SLOT_ADD0;439440if (!gpir_instr_spill_move(instr, node->sched.pos, spill_to_start))441return false;442443if (node->op == gpir_op_complex1 || node->op == gpir_op_select) {444if (!gpir_instr_spill_move(instr, GPIR_INSTR_SLOT_MUL1, spill_to_start))445return false;446}447}448449return true;450}451452bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node)453{454instr->slot_difference = 0;455instr->non_cplx_slot_difference = 0;456457if (!gpir_instr_slot_free(instr, node))458return false;459460if (node->sched.pos >= GPIR_INSTR_SLOT_ALU_BEGIN &&461node->sched.pos <= GPIR_INSTR_SLOT_ALU_END) {462if (!gpir_instr_insert_alu_check(instr, node))463return false;464}465else if (node->sched.pos >= GPIR_INSTR_SLOT_REG0_LOAD0 &&466node->sched.pos <= GPIR_INSTR_SLOT_REG0_LOAD3) {467if (!gpir_instr_insert_reg0_check(instr, node))468return false;469}470else if (node->sched.pos >= GPIR_INSTR_SLOT_REG1_LOAD0 &&471node->sched.pos <= GPIR_INSTR_SLOT_REG1_LOAD3) {472if (!gpir_instr_insert_reg1_check(instr, node))473return false;474}475else if (node->sched.pos >= GPIR_INSTR_SLOT_MEM_LOAD0 &&476node->sched.pos <= GPIR_INSTR_SLOT_MEM_LOAD3) {477if (!gpir_instr_insert_mem_check(instr, node))478return false;479}480else if (node->sched.pos >= GPIR_INSTR_SLOT_STORE0 &&481node->sched.pos <= GPIR_INSTR_SLOT_STORE3) {482if (!gpir_instr_insert_store_check(instr, node))483return false;484}485486instr->slots[node->sched.pos] = node;487488if (node->op == gpir_op_complex1 || node->op == gpir_op_select)489instr->slots[GPIR_INSTR_SLOT_MUL1] = node;490491return true;492}493494void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node)495{496assert(node->sched.pos >= 0);497498/* This can happen if we merge duplicate loads in the scheduler. */499if (instr->slots[node->sched.pos] != node) {500node->sched.pos = -1;501node->sched.instr = NULL;502return;503}504505if (node->sched.pos >= GPIR_INSTR_SLOT_ALU_BEGIN &&506node->sched.pos <= GPIR_INSTR_SLOT_ALU_END)507gpir_instr_remove_alu(instr, node);508else if (node->sched.pos >= GPIR_INSTR_SLOT_REG0_LOAD0 &&509node->sched.pos <= GPIR_INSTR_SLOT_REG0_LOAD3)510gpir_instr_remove_reg0(instr, node);511else if (node->sched.pos >= GPIR_INSTR_SLOT_REG1_LOAD0 &&512node->sched.pos <= GPIR_INSTR_SLOT_REG1_LOAD3)513gpir_instr_remove_reg1(instr, node);514else if (node->sched.pos >= GPIR_INSTR_SLOT_MEM_LOAD0 &&515node->sched.pos <= GPIR_INSTR_SLOT_MEM_LOAD3)516gpir_instr_remove_mem(instr, node);517else if (node->sched.pos >= GPIR_INSTR_SLOT_STORE0 &&518node->sched.pos <= GPIR_INSTR_SLOT_STORE3)519gpir_instr_remove_store(instr, node);520521instr->slots[node->sched.pos] = NULL;522523if (node->op == gpir_op_complex1 || node->op == gpir_op_select)524instr->slots[GPIR_INSTR_SLOT_MUL1] = NULL;525526node->sched.pos = -1;527node->sched.instr = NULL;528}529530void gpir_instr_print_prog(gpir_compiler *comp)531{532struct {533int len;534char *name;535} fields[] = {536[GPIR_INSTR_SLOT_MUL0] = { 4, "mul0" },537[GPIR_INSTR_SLOT_MUL1] = { 4, "mul1" },538[GPIR_INSTR_SLOT_ADD0] = { 4, "add0" },539[GPIR_INSTR_SLOT_ADD1] = { 4, "add1" },540[GPIR_INSTR_SLOT_REG0_LOAD3] = { 15, "load0" },541[GPIR_INSTR_SLOT_REG1_LOAD3] = { 15, "load1" },542[GPIR_INSTR_SLOT_MEM_LOAD3] = { 15, "load2" },543[GPIR_INSTR_SLOT_STORE3] = { 15, "store" },544[GPIR_INSTR_SLOT_COMPLEX] = { 4, "cmpl" },545[GPIR_INSTR_SLOT_PASS] = { 4, "pass" },546};547548printf("========prog instr========\n");549printf(" ");550for (int i = 0; i < GPIR_INSTR_SLOT_NUM; i++) {551if (fields[i].len)552printf("%-*s ", fields[i].len, fields[i].name);553}554printf("\n");555556int index = 0;557list_for_each_entry(gpir_block, block, &comp->block_list, list) {558list_for_each_entry(gpir_instr, instr, &block->instr_list, list) {559printf("%03d: ", index++);560561char buff[16] = "null";562int start = 0;563for (int j = 0; j < GPIR_INSTR_SLOT_NUM; j++) {564gpir_node *node = instr->slots[j];565if (fields[j].len) {566if (node)567snprintf(buff + start, sizeof(buff) - start, "%d", node->index);568printf("%-*s ", fields[j].len, buff);569570strcpy(buff, "null");571start = 0;572}573else {574if (node)575start += snprintf(buff + start, sizeof(buff) - start, "%d", node->index);576start += snprintf(buff + start, sizeof(buff) - start, "|");577}578}579printf("\n");580}581printf("-----------------------\n");582}583printf("==========================\n");584}585586587