Path: blob/21.2-virgl/src/broadcom/compiler/vir_to_qpu.c
4564 views
/*1* Copyright © 2016 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "compiler/v3d_compiler.h"24#include "qpu/qpu_instr.h"25#include "qpu/qpu_disasm.h"2627static inline struct qpu_reg28qpu_reg(int index)29{30struct qpu_reg reg = {31.magic = false,32.index = index,33};34return reg;35}3637static inline struct qpu_reg38qpu_magic(enum v3d_qpu_waddr waddr)39{40struct qpu_reg reg = {41.magic = true,42.index = waddr,43};44return reg;45}4647static inline struct qpu_reg48qpu_acc(int acc)49{50return qpu_magic(V3D_QPU_WADDR_R0 + acc);51}5253struct v3d_qpu_instr54v3d_qpu_nop(void)55{56struct v3d_qpu_instr instr = {57.type = V3D_QPU_INSTR_TYPE_ALU,58.alu = {59.add = {60.op = V3D_QPU_A_NOP,61.waddr = V3D_QPU_WADDR_NOP,62.magic_write = true,63},64.mul = {65.op = V3D_QPU_M_NOP,66.waddr = V3D_QPU_WADDR_NOP,67.magic_write = true,68},69}70};7172return instr;73}7475static struct qinst *76vir_nop(void)77{78struct qreg undef = vir_nop_reg();79struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);8081return qinst;82}8384static struct qinst *85new_qpu_nop_before(struct qinst *inst)86{87struct qinst *q = vir_nop();8889list_addtail(&q->link, &inst->link);9091return q;92}9394/**95* Allocates the src register (accumulator or register file) into the RADDR96* fields of the instruction.97*/98static void99set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)100{101if (src.smimm) {102assert(instr->sig.small_imm);103*mux = V3D_QPU_MUX_B;104return;105}106107if (src.magic) {108assert(src.index >= V3D_QPU_WADDR_R0 &&109src.index <= V3D_QPU_WADDR_R5);110*mux = src.index - V3D_QPU_WADDR_R0 + V3D_QPU_MUX_R0;111return;112}113114if (instr->alu.add.a != V3D_QPU_MUX_A &&115instr->alu.add.b != V3D_QPU_MUX_A &&116instr->alu.mul.a != V3D_QPU_MUX_A &&117instr->alu.mul.b != V3D_QPU_MUX_A) {118instr->raddr_a = src.index;119*mux = V3D_QPU_MUX_A;120} else {121if (instr->raddr_a == src.index) {122*mux = V3D_QPU_MUX_A;123} else {124assert(!(instr->alu.add.a == V3D_QPU_MUX_B &&125instr->alu.add.b == V3D_QPU_MUX_B &&126instr->alu.mul.a == V3D_QPU_MUX_B &&127instr->alu.mul.b == V3D_QPU_MUX_B) ||128src.index == instr->raddr_b);129130instr->raddr_b = src.index;131*mux = V3D_QPU_MUX_B;132}133}134}135136static bool137is_no_op_mov(struct qinst *qinst)138{139static const struct v3d_qpu_sig no_sig = {0};140141/* Make sure it's just a lone MOV. */142if (qinst->qpu.type != V3D_QPU_INSTR_TYPE_ALU ||143qinst->qpu.alu.mul.op != V3D_QPU_M_MOV ||144qinst->qpu.alu.add.op != V3D_QPU_A_NOP ||145memcmp(&qinst->qpu.sig, &no_sig, sizeof(no_sig)) != 0) {146return false;147}148149/* Check if it's a MOV from a register to itself. */150enum v3d_qpu_waddr waddr = qinst->qpu.alu.mul.waddr;151if (qinst->qpu.alu.mul.magic_write) {152if (waddr < V3D_QPU_WADDR_R0 || waddr > V3D_QPU_WADDR_R4)153return false;154155if (qinst->qpu.alu.mul.a !=156V3D_QPU_MUX_R0 + (waddr - V3D_QPU_WADDR_R0)) {157return false;158}159} else {160int raddr;161162switch (qinst->qpu.alu.mul.a) {163case V3D_QPU_MUX_A:164raddr = qinst->qpu.raddr_a;165break;166case V3D_QPU_MUX_B:167raddr = qinst->qpu.raddr_b;168break;169default:170return false;171}172if (raddr != waddr)173return false;174}175176/* No packing or flags updates, or we need to execute the177* instruction.178*/179if (qinst->qpu.alu.mul.a_unpack != V3D_QPU_UNPACK_NONE ||180qinst->qpu.alu.mul.output_pack != V3D_QPU_PACK_NONE ||181qinst->qpu.flags.mc != V3D_QPU_COND_NONE ||182qinst->qpu.flags.mpf != V3D_QPU_PF_NONE ||183qinst->qpu.flags.muf != V3D_QPU_UF_NONE) {184return false;185}186187return true;188}189190static void191v3d_generate_code_block(struct v3d_compile *c,192struct qblock *block,193struct qpu_reg *temp_registers)194{195int last_vpm_read_index = -1;196197vir_for_each_inst_safe(qinst, block) {198#if 0199fprintf(stderr, "translating qinst to qpu: ");200vir_dump_inst(c, qinst);201fprintf(stderr, "\n");202#endif203204struct qinst *temp;205206if (vir_has_uniform(qinst))207c->num_uniforms++;208209int nsrc = vir_get_nsrc(qinst);210struct qpu_reg src[ARRAY_SIZE(qinst->src)];211for (int i = 0; i < nsrc; i++) {212int index = qinst->src[i].index;213switch (qinst->src[i].file) {214case QFILE_REG:215src[i] = qpu_reg(qinst->src[i].index);216break;217case QFILE_MAGIC:218src[i] = qpu_magic(qinst->src[i].index);219break;220case QFILE_NULL:221case QFILE_LOAD_IMM:222src[i] = qpu_acc(0);223break;224case QFILE_TEMP:225src[i] = temp_registers[index];226break;227case QFILE_SMALL_IMM:228src[i].smimm = true;229break;230231case QFILE_VPM:232assert((int)qinst->src[i].index >=233last_vpm_read_index);234(void)last_vpm_read_index;235last_vpm_read_index = qinst->src[i].index;236237temp = new_qpu_nop_before(qinst);238temp->qpu.sig.ldvpm = true;239240src[i] = qpu_acc(3);241break;242}243}244245struct qpu_reg dst;246switch (qinst->dst.file) {247case QFILE_NULL:248dst = qpu_magic(V3D_QPU_WADDR_NOP);249break;250251case QFILE_REG:252dst = qpu_reg(qinst->dst.index);253break;254255case QFILE_MAGIC:256dst = qpu_magic(qinst->dst.index);257break;258259case QFILE_TEMP:260dst = temp_registers[qinst->dst.index];261break;262263case QFILE_VPM:264dst = qpu_magic(V3D_QPU_WADDR_VPM);265break;266267case QFILE_SMALL_IMM:268case QFILE_LOAD_IMM:269assert(!"not reached");270break;271}272273if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {274if (qinst->qpu.sig.ldunif || qinst->qpu.sig.ldunifa) {275assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);276assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);277278if (!dst.magic ||279dst.index != V3D_QPU_WADDR_R5) {280assert(c->devinfo->ver >= 40);281282if (qinst->qpu.sig.ldunif) {283qinst->qpu.sig.ldunif = false;284qinst->qpu.sig.ldunifrf = true;285} else {286qinst->qpu.sig.ldunifa = false;287qinst->qpu.sig.ldunifarf = true;288}289qinst->qpu.sig_addr = dst.index;290qinst->qpu.sig_magic = dst.magic;291}292} else if (v3d_qpu_sig_writes_address(c->devinfo,293&qinst->qpu.sig)) {294assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);295assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);296297qinst->qpu.sig_addr = dst.index;298qinst->qpu.sig_magic = dst.magic;299} else if (qinst->qpu.alu.add.op != V3D_QPU_A_NOP) {300assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);301if (nsrc >= 1) {302set_src(&qinst->qpu,303&qinst->qpu.alu.add.a, src[0]);304}305if (nsrc >= 2) {306set_src(&qinst->qpu,307&qinst->qpu.alu.add.b, src[1]);308}309310qinst->qpu.alu.add.waddr = dst.index;311qinst->qpu.alu.add.magic_write = dst.magic;312} else {313if (nsrc >= 1) {314set_src(&qinst->qpu,315&qinst->qpu.alu.mul.a, src[0]);316}317if (nsrc >= 2) {318set_src(&qinst->qpu,319&qinst->qpu.alu.mul.b, src[1]);320}321322qinst->qpu.alu.mul.waddr = dst.index;323qinst->qpu.alu.mul.magic_write = dst.magic;324325if (is_no_op_mov(qinst)) {326vir_remove_instruction(c, qinst);327continue;328}329}330} else {331assert(qinst->qpu.type == V3D_QPU_INSTR_TYPE_BRANCH);332}333}334}335336static bool337reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)338{339struct v3d_qpu_instr qpu;340ASSERTED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);341assert(ok);342343if (qpu.sig.ldunif ||344qpu.sig.ldunifrf ||345qpu.sig.ldtlbu ||346qpu.sig.wrtmuc) {347return true;348}349350if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)351return true;352353if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {354if (qpu.alu.add.magic_write &&355v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {356return true;357}358359if (qpu.alu.mul.magic_write &&360v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {361return true;362}363}364365return false;366}367368static void369v3d_dump_qpu(struct v3d_compile *c)370{371fprintf(stderr, "%s prog %d/%d QPU:\n",372vir_get_stage_name(c),373c->program_id, c->variant_id);374375int next_uniform = 0;376for (int i = 0; i < c->qpu_inst_count; i++) {377const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);378fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);379380/* We can only do this on 4.x, because we're not tracking TMU381* implicit uniforms here on 3.x.382*/383if (c->devinfo->ver >= 40 &&384reads_uniform(c->devinfo, c->qpu_insts[i])) {385fprintf(stderr, " (");386vir_dump_uniform(c->uniform_contents[next_uniform],387c->uniform_data[next_uniform]);388fprintf(stderr, ")");389next_uniform++;390}391fprintf(stderr, "\n");392ralloc_free((void *)str);393}394395/* Make sure our dumping lined up. */396if (c->devinfo->ver >= 40)397assert(next_uniform == c->num_uniforms);398399fprintf(stderr, "\n");400}401402void403v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)404{405/* Reset the uniform count to how many will be actually loaded by the406* generated QPU code.407*/408c->num_uniforms = 0;409410vir_for_each_block(block, c)411v3d_generate_code_block(c, block, temp_registers);412413v3d_qpu_schedule_instructions(c);414415c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);416int i = 0;417vir_for_each_inst_inorder(inst, c) {418bool ok = v3d_qpu_instr_pack(c->devinfo, &inst->qpu,419&c->qpu_insts[i++]);420if (!ok) {421fprintf(stderr, "Failed to pack instruction %d:\n", i);422vir_dump_inst(c, inst);423fprintf(stderr, "\n");424c->compilation_result = V3D_COMPILATION_FAILED;425return;426}427428if (v3d_qpu_is_nop(&inst->qpu))429c->nop_count++;430}431assert(i == c->qpu_inst_count);432433if (V3D_DEBUG & (V3D_DEBUG_QPU |434v3d_debug_flag_for_shader_stage(c->s->info.stage))) {435v3d_dump_qpu(c);436}437438qpu_validate(c);439440free(temp_registers);441}442443444