Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/gp/codegen.c
4574 views
/*1* Copyright (c) 2017 Lima Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22*/2324#include "util/ralloc.h"2526#include "gpir.h"27#include "codegen.h"28#include "lima_context.h"2930static gpir_codegen_src gpir_get_alu_input(gpir_node *parent, gpir_node *child)31{32static const int slot_to_src[GPIR_INSTR_SLOT_NUM][3] = {33[GPIR_INSTR_SLOT_MUL0] = {34gpir_codegen_src_unused, gpir_codegen_src_p1_mul_0, gpir_codegen_src_p2_mul_0 },35[GPIR_INSTR_SLOT_MUL1] = {36gpir_codegen_src_unused, gpir_codegen_src_p1_mul_1, gpir_codegen_src_p2_mul_1 },3738[GPIR_INSTR_SLOT_ADD0] = {39gpir_codegen_src_unused, gpir_codegen_src_p1_acc_0, gpir_codegen_src_p2_acc_0 },40[GPIR_INSTR_SLOT_ADD1] = {41gpir_codegen_src_unused, gpir_codegen_src_p1_acc_1, gpir_codegen_src_p2_acc_1 },4243[GPIR_INSTR_SLOT_COMPLEX] = {44gpir_codegen_src_unused, gpir_codegen_src_p1_complex, gpir_codegen_src_unused },45[GPIR_INSTR_SLOT_PASS] = {46gpir_codegen_src_unused, gpir_codegen_src_p1_pass, gpir_codegen_src_p2_pass },4748[GPIR_INSTR_SLOT_REG0_LOAD0] = {49gpir_codegen_src_attrib_x, gpir_codegen_src_p1_attrib_x, gpir_codegen_src_unused },50[GPIR_INSTR_SLOT_REG0_LOAD1] = {51gpir_codegen_src_attrib_y, gpir_codegen_src_p1_attrib_y, gpir_codegen_src_unused },52[GPIR_INSTR_SLOT_REG0_LOAD2] = {53gpir_codegen_src_attrib_z, gpir_codegen_src_p1_attrib_z, gpir_codegen_src_unused },54[GPIR_INSTR_SLOT_REG0_LOAD3] = {55gpir_codegen_src_attrib_w, gpir_codegen_src_p1_attrib_w, gpir_codegen_src_unused },5657[GPIR_INSTR_SLOT_REG1_LOAD0] = {58gpir_codegen_src_register_x, gpir_codegen_src_unused, gpir_codegen_src_unused},59[GPIR_INSTR_SLOT_REG1_LOAD1] = {60gpir_codegen_src_register_y, gpir_codegen_src_unused, gpir_codegen_src_unused},61[GPIR_INSTR_SLOT_REG1_LOAD2] = {62gpir_codegen_src_register_z, gpir_codegen_src_unused, gpir_codegen_src_unused},63[GPIR_INSTR_SLOT_REG1_LOAD3] = {64gpir_codegen_src_register_w, gpir_codegen_src_unused, gpir_codegen_src_unused},6566[GPIR_INSTR_SLOT_MEM_LOAD0] = {67gpir_codegen_src_load_x, gpir_codegen_src_unused, gpir_codegen_src_unused },68[GPIR_INSTR_SLOT_MEM_LOAD1] = {69gpir_codegen_src_load_y, gpir_codegen_src_unused, gpir_codegen_src_unused },70[GPIR_INSTR_SLOT_MEM_LOAD2] = {71gpir_codegen_src_load_z, gpir_codegen_src_unused, gpir_codegen_src_unused },72[GPIR_INSTR_SLOT_MEM_LOAD3] = {73gpir_codegen_src_load_w, gpir_codegen_src_unused, gpir_codegen_src_unused },74};7576int diff = child->sched.instr->index - parent->sched.instr->index;77assert(diff < 3);78assert(diff >= 0);7980int src = slot_to_src[child->sched.pos][diff];81assert(src != gpir_codegen_src_unused);82return src;83}8485static void gpir_codegen_mul0_slot(gpir_codegen_instr *code, gpir_instr *instr)86{87gpir_node *node = instr->slots[GPIR_INSTR_SLOT_MUL0];8889if (!node) {90code->mul0_src0 = gpir_codegen_src_unused;91code->mul0_src1 = gpir_codegen_src_unused;92return;93}9495gpir_alu_node *alu = gpir_node_to_alu(node);9697switch (node->op) {98case gpir_op_mul:99code->mul0_src0 = gpir_get_alu_input(node, alu->children[0]);100code->mul0_src1 = gpir_get_alu_input(node, alu->children[1]);101if (code->mul0_src1 == gpir_codegen_src_p1_complex) {102/* Will get confused with gpir_codegen_src_ident, so need to swap inputs */103code->mul0_src1 = code->mul0_src0;104code->mul0_src0 = gpir_codegen_src_p1_complex;105}106107code->mul0_neg = alu->dest_negate;108if (alu->children_negate[0])109code->mul0_neg = !code->mul0_neg;110if (alu->children_negate[1])111code->mul0_neg = !code->mul0_neg;112break;113114case gpir_op_neg:115code->mul0_neg = true;116FALLTHROUGH;117case gpir_op_mov:118code->mul0_src0 = gpir_get_alu_input(node, alu->children[0]);119code->mul0_src1 = gpir_codegen_src_ident;120break;121122case gpir_op_complex1:123code->mul0_src0 = gpir_get_alu_input(node, alu->children[0]);124code->mul0_src1 = gpir_get_alu_input(node, alu->children[1]);125code->mul_op = gpir_codegen_mul_op_complex1;126break;127128case gpir_op_complex2:129code->mul0_src0 = gpir_get_alu_input(node, alu->children[0]);130code->mul0_src1 = code->mul0_src0;131code->mul_op = gpir_codegen_mul_op_complex2;132break;133134case gpir_op_select:135code->mul0_src0 = gpir_get_alu_input(node, alu->children[2]);136code->mul0_src1 = gpir_get_alu_input(node, alu->children[0]);137code->mul_op = gpir_codegen_mul_op_select;138break;139140default:141assert(0);142}143}144145static void gpir_codegen_mul1_slot(gpir_codegen_instr *code, gpir_instr *instr)146{147gpir_node *node = instr->slots[GPIR_INSTR_SLOT_MUL1];148149if (!node) {150code->mul1_src0 = gpir_codegen_src_unused;151code->mul1_src1 = gpir_codegen_src_unused;152return;153}154155gpir_alu_node *alu = gpir_node_to_alu(node);156157switch (node->op) {158case gpir_op_mul:159code->mul1_src0 = gpir_get_alu_input(node, alu->children[0]);160code->mul1_src1 = gpir_get_alu_input(node, alu->children[1]);161if (code->mul1_src1 == gpir_codegen_src_p1_complex) {162/* Will get confused with gpir_codegen_src_ident, so need to swap inputs */163code->mul1_src1 = code->mul1_src0;164code->mul1_src0 = gpir_codegen_src_p1_complex;165}166167code->mul1_neg = alu->dest_negate;168if (alu->children_negate[0])169code->mul1_neg = !code->mul1_neg;170if (alu->children_negate[1])171code->mul1_neg = !code->mul1_neg;172break;173174case gpir_op_neg:175code->mul1_neg = true;176FALLTHROUGH;177case gpir_op_mov:178code->mul1_src0 = gpir_get_alu_input(node, alu->children[0]);179code->mul1_src1 = gpir_codegen_src_ident;180break;181182case gpir_op_complex1:183code->mul1_src0 = gpir_get_alu_input(node, alu->children[0]);184code->mul1_src1 = gpir_get_alu_input(node, alu->children[2]);185break;186187case gpir_op_select:188code->mul1_src0 = gpir_get_alu_input(node, alu->children[1]);189code->mul1_src1 = gpir_codegen_src_unused;190break;191192default:193assert(0);194}195}196197static void gpir_codegen_add0_slot(gpir_codegen_instr *code, gpir_instr *instr)198{199gpir_node *node = instr->slots[GPIR_INSTR_SLOT_ADD0];200201if (!node) {202code->acc0_src0 = gpir_codegen_src_unused;203code->acc0_src1 = gpir_codegen_src_unused;204return;205}206207gpir_alu_node *alu = gpir_node_to_alu(node);208209switch (node->op) {210case gpir_op_add:211case gpir_op_min:212case gpir_op_max:213case gpir_op_lt:214case gpir_op_ge:215code->acc0_src0 = gpir_get_alu_input(node, alu->children[0]);216code->acc0_src1 = gpir_get_alu_input(node, alu->children[1]);217218code->acc0_src0_neg = alu->children_negate[0];219code->acc0_src1_neg = alu->children_negate[1];220221switch (node->op) {222case gpir_op_add:223code->acc_op = gpir_codegen_acc_op_add;224if (code->acc0_src1 == gpir_codegen_src_p1_complex) {225code->acc0_src1 = code->acc0_src0;226code->acc0_src0 = gpir_codegen_src_p1_complex;227228bool tmp = code->acc0_src0_neg;229code->acc0_src0_neg = code->acc0_src1_neg;230code->acc0_src1_neg = tmp;231}232break;233case gpir_op_min:234code->acc_op = gpir_codegen_acc_op_min;235break;236case gpir_op_max:237code->acc_op = gpir_codegen_acc_op_max;238break;239case gpir_op_lt:240code->acc_op = gpir_codegen_acc_op_lt;241break;242case gpir_op_ge:243code->acc_op = gpir_codegen_acc_op_ge;244break;245default:246assert(0);247}248249break;250251case gpir_op_floor:252case gpir_op_sign:253code->acc0_src0 = gpir_get_alu_input(node, alu->children[0]);254code->acc0_src0_neg = alu->children_negate[0];255switch (node->op) {256case gpir_op_floor:257code->acc_op = gpir_codegen_acc_op_floor;258break;259case gpir_op_sign:260code->acc_op = gpir_codegen_acc_op_sign;261break;262default:263assert(0);264}265break;266267case gpir_op_neg:268code->acc0_src0_neg = true;269FALLTHROUGH;270case gpir_op_mov:271code->acc_op = gpir_codegen_acc_op_add;272code->acc0_src0 = gpir_get_alu_input(node, alu->children[0]);273code->acc0_src1 = gpir_codegen_src_ident;274code->acc0_src1_neg = true;275break;276277default:278assert(0);279}280}281282static void gpir_codegen_add1_slot(gpir_codegen_instr *code, gpir_instr *instr)283{284gpir_node *node = instr->slots[GPIR_INSTR_SLOT_ADD1];285286if (!node) {287code->acc1_src0 = gpir_codegen_src_unused;288code->acc1_src1 = gpir_codegen_src_unused;289return;290}291292gpir_alu_node *alu = gpir_node_to_alu(node);293294switch (node->op) {295case gpir_op_add:296case gpir_op_min:297case gpir_op_max:298case gpir_op_lt:299case gpir_op_ge:300code->acc1_src0 = gpir_get_alu_input(node, alu->children[0]);301code->acc1_src1 = gpir_get_alu_input(node, alu->children[1]);302303code->acc1_src0_neg = alu->children_negate[0];304code->acc1_src1_neg = alu->children_negate[1];305306switch (node->op) {307case gpir_op_add:308code->acc_op = gpir_codegen_acc_op_add;309if (code->acc1_src1 == gpir_codegen_src_p1_complex) {310code->acc1_src1 = code->acc1_src0;311code->acc1_src0 = gpir_codegen_src_p1_complex;312313bool tmp = code->acc1_src0_neg;314code->acc1_src0_neg = code->acc1_src1_neg;315code->acc1_src1_neg = tmp;316}317break;318case gpir_op_min:319code->acc_op = gpir_codegen_acc_op_min;320break;321case gpir_op_max:322code->acc_op = gpir_codegen_acc_op_max;323break;324case gpir_op_lt:325code->acc_op = gpir_codegen_acc_op_lt;326break;327case gpir_op_ge:328code->acc_op = gpir_codegen_acc_op_ge;329break;330default:331assert(0);332}333334break;335336case gpir_op_floor:337case gpir_op_sign:338code->acc1_src0 = gpir_get_alu_input(node, alu->children[0]);339code->acc1_src0_neg = alu->children_negate[0];340switch (node->op) {341case gpir_op_floor:342code->acc_op = gpir_codegen_acc_op_floor;343break;344case gpir_op_sign:345code->acc_op = gpir_codegen_acc_op_sign;346break;347default:348assert(0);349}350break;351352case gpir_op_neg:353code->acc1_src0_neg = true;354FALLTHROUGH;355case gpir_op_mov:356code->acc_op = gpir_codegen_acc_op_add;357code->acc1_src0 = gpir_get_alu_input(node, alu->children[0]);358code->acc1_src1 = gpir_codegen_src_ident;359code->acc1_src1_neg = true;360break;361362default:363assert(0);364}365}366367static void gpir_codegen_complex_slot(gpir_codegen_instr *code, gpir_instr *instr)368{369gpir_node *node = instr->slots[GPIR_INSTR_SLOT_COMPLEX];370371if (!node) {372code->complex_src = gpir_codegen_src_unused;373return;374}375376switch (node->op) {377case gpir_op_mov:378case gpir_op_rcp_impl:379case gpir_op_rsqrt_impl:380case gpir_op_exp2_impl:381case gpir_op_log2_impl:382{383gpir_alu_node *alu = gpir_node_to_alu(node);384code->complex_src = gpir_get_alu_input(node, alu->children[0]);385break;386}387default:388assert(0);389}390391switch (node->op) {392case gpir_op_mov:393code->complex_op = gpir_codegen_complex_op_pass;394break;395case gpir_op_rcp_impl:396code->complex_op = gpir_codegen_complex_op_rcp;397break;398case gpir_op_rsqrt_impl:399code->complex_op = gpir_codegen_complex_op_rsqrt;400break;401case gpir_op_exp2_impl:402code->complex_op = gpir_codegen_complex_op_exp2;403break;404case gpir_op_log2_impl:405code->complex_op = gpir_codegen_complex_op_log2;406break;407default:408assert(0);409}410}411412static void gpir_codegen_pass_slot(gpir_codegen_instr *code, gpir_instr *instr)413{414gpir_node *node = instr->slots[GPIR_INSTR_SLOT_PASS];415416if (!node) {417code->pass_op = gpir_codegen_pass_op_pass;418code->pass_src = gpir_codegen_src_unused;419return;420}421422if (node->op == gpir_op_branch_cond) {423gpir_branch_node *branch = gpir_node_to_branch(node);424425code->pass_op = gpir_codegen_pass_op_pass;426code->pass_src = gpir_get_alu_input(node, branch->cond);427428/* Fill out branch information */429unsigned offset = branch->dest->instr_offset;430assert(offset < 0x200);431code->branch = true;432code->branch_target = offset & 0xff;433code->branch_target_lo = !(offset >> 8);434code->unknown_1 = 13;435return;436}437438gpir_alu_node *alu = gpir_node_to_alu(node);439code->pass_src = gpir_get_alu_input(node, alu->children[0]);440441switch (node->op) {442case gpir_op_mov:443code->pass_op = gpir_codegen_pass_op_pass;444break;445case gpir_op_preexp2:446code->pass_op = gpir_codegen_pass_op_preexp2;447break;448case gpir_op_postlog2:449code->pass_op = gpir_codegen_pass_op_postlog2;450break;451default:452assert(0);453}454455}456457static void gpir_codegen_reg0_slot(gpir_codegen_instr *code, gpir_instr *instr)458{459if (!instr->reg0_use_count)460return;461462code->register0_attribute = instr->reg0_is_attr;463code->register0_addr = instr->reg0_index;464}465466static void gpir_codegen_reg1_slot(gpir_codegen_instr *code, gpir_instr *instr)467{468if (!instr->reg1_use_count)469return;470471code->register1_addr = instr->reg1_index;472}473474static void gpir_codegen_mem_slot(gpir_codegen_instr *code, gpir_instr *instr)475{476if (!instr->mem_use_count) {477code->load_offset = gpir_codegen_load_off_none;478return;479}480481code->load_addr = instr->mem_index;482code->load_offset = gpir_codegen_load_off_none;483}484485static gpir_codegen_store_src gpir_get_store_input(gpir_node *node)486{487static int slot_to_src[GPIR_INSTR_SLOT_NUM] = {488[GPIR_INSTR_SLOT_MUL0] = gpir_codegen_store_src_mul_0,489[GPIR_INSTR_SLOT_MUL1] = gpir_codegen_store_src_mul_1,490[GPIR_INSTR_SLOT_ADD0] = gpir_codegen_store_src_acc_0,491[GPIR_INSTR_SLOT_ADD1] = gpir_codegen_store_src_acc_1,492[GPIR_INSTR_SLOT_COMPLEX] = gpir_codegen_store_src_complex,493[GPIR_INSTR_SLOT_PASS] = gpir_codegen_store_src_pass,494[GPIR_INSTR_SLOT_REG0_LOAD0...GPIR_INSTR_SLOT_STORE3] = gpir_codegen_store_src_none,495};496497gpir_store_node *store = gpir_node_to_store(node);498return slot_to_src[store->child->sched.pos];499}500501static void gpir_codegen_store_slot(gpir_codegen_instr *code, gpir_instr *instr)502{503504gpir_node *node = instr->slots[GPIR_INSTR_SLOT_STORE0];505if (node)506code->store0_src_x = gpir_get_store_input(node);507else508code->store0_src_x = gpir_codegen_store_src_none;509510node = instr->slots[GPIR_INSTR_SLOT_STORE1];511if (node)512code->store0_src_y = gpir_get_store_input(node);513else514code->store0_src_y = gpir_codegen_store_src_none;515516node = instr->slots[GPIR_INSTR_SLOT_STORE2];517if (node)518code->store1_src_z = gpir_get_store_input(node);519else520code->store1_src_z = gpir_codegen_store_src_none;521522node = instr->slots[GPIR_INSTR_SLOT_STORE3];523if (node)524code->store1_src_w = gpir_get_store_input(node);525else526code->store1_src_w = gpir_codegen_store_src_none;527528if (instr->store_content[0] == GPIR_INSTR_STORE_TEMP) {529code->store0_temporary = true;530code->unknown_1 = 12;531}532else {533code->store0_varying = instr->store_content[0] == GPIR_INSTR_STORE_VARYING;534code->store0_addr = instr->store_index[0];535}536537if (instr->store_content[1] == GPIR_INSTR_STORE_TEMP) {538code->store1_temporary = true;539code->unknown_1 = 12;540}541else {542code->store1_varying = instr->store_content[1] == GPIR_INSTR_STORE_VARYING;543code->store1_addr = instr->store_index[1];544}545}546547static void gpir_codegen(gpir_codegen_instr *code, gpir_instr *instr)548{549gpir_codegen_mul0_slot(code, instr);550gpir_codegen_mul1_slot(code, instr);551552gpir_codegen_add0_slot(code, instr);553gpir_codegen_add1_slot(code, instr);554555gpir_codegen_complex_slot(code, instr);556gpir_codegen_pass_slot(code, instr);557558gpir_codegen_reg0_slot(code, instr);559gpir_codegen_reg1_slot(code, instr);560gpir_codegen_mem_slot(code, instr);561562gpir_codegen_store_slot(code, instr);563}564565static void gpir_codegen_print_prog(gpir_compiler *comp)566{567uint32_t *data = comp->prog->shader;568int num_dword_per_instr = sizeof(gpir_codegen_instr) / sizeof(uint32_t);569570for (int i = 0; i < comp->num_instr; i++) {571printf("%03d: ", i);572for (int j = 0; j < num_dword_per_instr; j++)573printf("%08x ", data[i * num_dword_per_instr + j]);574printf("\n");575}576}577578bool gpir_codegen_prog(gpir_compiler *comp)579{580int num_instr = 0;581list_for_each_entry(gpir_block, block, &comp->block_list, list) {582block->instr_offset = num_instr;583num_instr += list_length(&block->instr_list);584}585586assert(num_instr <= 512);587588gpir_codegen_instr *code = rzalloc_array(comp->prog, gpir_codegen_instr, num_instr);589if (!code)590return false;591592int instr_index = 0;593list_for_each_entry(gpir_block, block, &comp->block_list, list) {594list_for_each_entry(gpir_instr, instr, &block->instr_list, list) {595gpir_codegen(code + instr_index, instr);596instr_index++;597}598}599600for (int i = 0; i < num_instr; i++) {601if (code[i].register0_attribute)602comp->prog->state.prefetch = i;603}604605comp->prog->shader = code;606comp->prog->state.shader_size = num_instr * sizeof(gpir_codegen_instr);607608if (lima_debug & LIMA_DEBUG_GP) {609gpir_codegen_print_prog(comp);610gpir_disassemble_program(code, num_instr);611}612613return true;614}615616static gpir_codegen_acc_op gpir_codegen_get_acc_op(gpir_op op)617{618switch (op) {619case gpir_op_add:620case gpir_op_neg:621case gpir_op_mov:622return gpir_codegen_acc_op_add;623case gpir_op_min:624return gpir_codegen_acc_op_min;625case gpir_op_max:626return gpir_codegen_acc_op_max;627case gpir_op_lt:628return gpir_codegen_acc_op_lt;629case gpir_op_ge:630return gpir_codegen_acc_op_ge;631case gpir_op_floor:632return gpir_codegen_acc_op_floor;633case gpir_op_sign:634return gpir_codegen_acc_op_sign;635default:636assert(0);637}638return -1;639}640641bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2)642{643return gpir_codegen_get_acc_op(op1) == gpir_codegen_get_acc_op(op2);644}645646647