Path: blob/21.2-virgl/src/broadcom/compiler/qpu_validate.c
4564 views
/*1* Copyright © 2014 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223/**24* @file25*26* Validates the QPU instruction sequence after register allocation and27* scheduling.28*/2930#include <assert.h>31#include <stdio.h>32#include <stdlib.h>33#include "v3d_compiler.h"34#include "qpu/qpu_disasm.h"3536struct v3d_qpu_validate_state {37struct v3d_compile *c;38const struct v3d_qpu_instr *last;39int ip;40int last_sfu_write;41int last_branch_ip;42int last_thrsw_ip;4344/* Set when we've found the last-THRSW signal, or if we were started45* in single-segment mode.46*/47bool last_thrsw_found;4849/* Set when we've found the THRSW after the last THRSW */50bool thrend_found;5152int thrsw_count;53};5455static void56fail_instr(struct v3d_qpu_validate_state *state, const char *msg)57{58struct v3d_compile *c = state->c;5960fprintf(stderr, "v3d_qpu_validate at ip %d: %s:\n", state->ip, msg);6162int dump_ip = 0;63vir_for_each_inst_inorder(inst, c) {64v3d_qpu_dump(c->devinfo, &inst->qpu);6566if (dump_ip++ == state->ip)67fprintf(stderr, " *** ERROR ***");6869fprintf(stderr, "\n");70}7172fprintf(stderr, "\n");73abort();74}7576static bool77in_branch_delay_slots(struct v3d_qpu_validate_state *state)78{79return (state->ip - state->last_branch_ip) < 3;80}8182static bool83in_thrsw_delay_slots(struct v3d_qpu_validate_state *state)84{85return (state->ip - state->last_thrsw_ip) < 3;86}8788static bool89qpu_magic_waddr_matches(const struct v3d_qpu_instr *inst,90bool (*predicate)(enum v3d_qpu_waddr waddr))91{92if (inst->type == V3D_QPU_INSTR_TYPE_ALU)93return false;9495if (inst->alu.add.op != V3D_QPU_A_NOP &&96inst->alu.add.magic_write &&97predicate(inst->alu.add.waddr))98return true;99100if (inst->alu.mul.op != V3D_QPU_M_NOP &&101inst->alu.mul.magic_write &&102predicate(inst->alu.mul.waddr))103return true;104105return false;106}107108static void109qpu_validate_inst(struct v3d_qpu_validate_state *state, struct qinst *qinst)110{111const struct v3d_device_info *devinfo = state->c->devinfo;112const struct v3d_qpu_instr *inst = &qinst->qpu;113114if (inst->type != V3D_QPU_INSTR_TYPE_ALU)115return;116117/* LDVARY writes r5 two instructions later and LDUNIF writes118* r5 one instruction later, which is illegal to have119* together.120*/121if (state->last && state->last->sig.ldvary &&122(inst->sig.ldunif || inst->sig.ldunifa)) {123fail_instr(state, "LDUNIF after a LDVARY");124}125126/* GFXH-1633 (fixed since V3D 4.2.14, which is Rpi4)127*128* FIXME: This would not check correctly for V3D 4.2 versions lower129* than V3D 4.2.14, but that is not a real issue because the simulator130* will still catch this, and we are not really targetting any such131* versions anyway.132*/133if (state->c->devinfo->ver < 42) {134bool last_reads_ldunif = (state->last && (state->last->sig.ldunif ||135state->last->sig.ldunifrf));136bool last_reads_ldunifa = (state->last && (state->last->sig.ldunifa ||137state->last->sig.ldunifarf));138bool reads_ldunif = inst->sig.ldunif || inst->sig.ldunifrf;139bool reads_ldunifa = inst->sig.ldunifa || inst->sig.ldunifarf;140if ((last_reads_ldunif && reads_ldunifa) ||141(last_reads_ldunifa && reads_ldunif)) {142fail_instr(state,143"LDUNIF and LDUNIFA can't be next to each other");144}145}146147int tmu_writes = 0;148int sfu_writes = 0;149int vpm_writes = 0;150int tlb_writes = 0;151int tsy_writes = 0;152153if (inst->alu.add.op != V3D_QPU_A_NOP) {154if (inst->alu.add.magic_write) {155if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,156inst->alu.add.waddr)) {157tmu_writes++;158}159if (v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))160sfu_writes++;161if (v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr))162vpm_writes++;163if (v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr))164tlb_writes++;165if (v3d_qpu_magic_waddr_is_tsy(inst->alu.add.waddr))166tsy_writes++;167}168}169170if (inst->alu.mul.op != V3D_QPU_M_NOP) {171if (inst->alu.mul.magic_write) {172if (v3d_qpu_magic_waddr_is_tmu(state->c->devinfo,173inst->alu.mul.waddr)) {174tmu_writes++;175}176if (v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))177sfu_writes++;178if (v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr))179vpm_writes++;180if (v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr))181tlb_writes++;182if (v3d_qpu_magic_waddr_is_tsy(inst->alu.mul.waddr))183tsy_writes++;184}185}186187if (in_thrsw_delay_slots(state)) {188/* There's no way you want to start SFU during the THRSW delay189* slots, since the result would land in the other thread.190*/191if (sfu_writes) {192fail_instr(state,193"SFU write started during THRSW delay slots ");194}195196if (inst->sig.ldvary)197fail_instr(state, "LDVARY during THRSW delay slots");198}199200(void)qpu_magic_waddr_matches; /* XXX */201202/* SFU r4 results come back two instructions later. No doing203* r4 read/writes or other SFU lookups until it's done.204*/205if (state->ip - state->last_sfu_write < 2) {206if (v3d_qpu_uses_mux(inst, V3D_QPU_MUX_R4))207fail_instr(state, "R4 read too soon after SFU");208209if (v3d_qpu_writes_r4(devinfo, inst))210fail_instr(state, "R4 write too soon after SFU");211212if (sfu_writes)213fail_instr(state, "SFU write too soon after SFU");214}215216/* XXX: The docs say VPM can happen with the others, but the simulator217* disagrees.218*/219if (tmu_writes +220sfu_writes +221vpm_writes +222tlb_writes +223tsy_writes +224inst->sig.ldtmu +225inst->sig.ldtlb +226inst->sig.ldvpm +227inst->sig.ldtlbu > 1) {228fail_instr(state,229"Only one of [TMU, SFU, TSY, TLB read, VPM] allowed");230}231232if (sfu_writes)233state->last_sfu_write = state->ip;234235if (inst->sig.thrsw) {236if (in_branch_delay_slots(state))237fail_instr(state, "THRSW in a branch delay slot.");238239if (state->last_thrsw_found)240state->thrend_found = true;241242if (state->last_thrsw_ip == state->ip - 1) {243/* If it's the second THRSW in a row, then it's just a244* last-thrsw signal.245*/246if (state->last_thrsw_found)247fail_instr(state, "Two last-THRSW signals");248state->last_thrsw_found = true;249} else {250if (in_thrsw_delay_slots(state)) {251fail_instr(state,252"THRSW too close to another THRSW.");253}254state->thrsw_count++;255state->last_thrsw_ip = state->ip;256}257}258259if (state->thrend_found &&260state->last_thrsw_ip - state->ip <= 2 &&261inst->type == V3D_QPU_INSTR_TYPE_ALU) {262if ((inst->alu.add.op != V3D_QPU_A_NOP &&263!inst->alu.add.magic_write)) {264fail_instr(state, "RF write after THREND");265}266267if ((inst->alu.mul.op != V3D_QPU_M_NOP &&268!inst->alu.mul.magic_write)) {269fail_instr(state, "RF write after THREND");270}271272if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&273!inst->sig_magic) {274fail_instr(state, "RF write after THREND");275}276277/* GFXH-1625: No TMUWT in the last instruction */278if (state->last_thrsw_ip - state->ip == 2 &&279inst->alu.add.op == V3D_QPU_A_TMUWT)280fail_instr(state, "TMUWT in last instruction");281}282283if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {284if (in_branch_delay_slots(state))285fail_instr(state, "branch in a branch delay slot.");286if (in_thrsw_delay_slots(state))287fail_instr(state, "branch in a THRSW delay slot.");288state->last_branch_ip = state->ip;289}290}291292static void293qpu_validate_block(struct v3d_qpu_validate_state *state, struct qblock *block)294{295vir_for_each_inst(qinst, block) {296qpu_validate_inst(state, qinst);297298state->last = &qinst->qpu;299state->ip++;300}301}302303/**304* Checks for the instruction restrictions from page 37 ("Summary of305* Instruction Restrictions").306*/307void308qpu_validate(struct v3d_compile *c)309{310/* We don't want to do validation in release builds, but we want to311* keep compiling the validation code to make sure it doesn't get312* broken.313*/314#ifndef DEBUG315return;316#endif317318struct v3d_qpu_validate_state state = {319.c = c,320.last_sfu_write = -10,321.last_thrsw_ip = -10,322.last_branch_ip = -10,323.ip = 0,324325.last_thrsw_found = !c->last_thrsw,326};327328vir_for_each_block(block, c) {329qpu_validate_block(&state, block);330}331332if (state.thrsw_count > 1 && !state.last_thrsw_found) {333fail_instr(&state,334"thread switch found without last-THRSW in program");335}336337if (!state.thrend_found)338fail_instr(&state, "No program-end THRSW found");339}340341342