Path: blob/21.2-virgl/src/broadcom/qpu/qpu_instr.c
4560 views
/*1* Copyright © 2016 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include <stdlib.h>24#include <string.h>25#include "util/macros.h"26#include "broadcom/common/v3d_device_info.h"27#include "qpu_instr.h"2829const char *30v3d_qpu_magic_waddr_name(const struct v3d_device_info *devinfo,31enum v3d_qpu_waddr waddr)32{33/* V3D 4.x UNIFA aliases TMU in V3D 3.x in the table below */34if (devinfo->ver < 40 && waddr == V3D_QPU_WADDR_TMU)35return "tmu";3637static const char *waddr_magic[] = {38[V3D_QPU_WADDR_R0] = "r0",39[V3D_QPU_WADDR_R1] = "r1",40[V3D_QPU_WADDR_R2] = "r2",41[V3D_QPU_WADDR_R3] = "r3",42[V3D_QPU_WADDR_R4] = "r4",43[V3D_QPU_WADDR_R5] = "r5",44[V3D_QPU_WADDR_NOP] = "-",45[V3D_QPU_WADDR_TLB] = "tlb",46[V3D_QPU_WADDR_TLBU] = "tlbu",47[V3D_QPU_WADDR_UNIFA] = "unifa",48[V3D_QPU_WADDR_TMUL] = "tmul",49[V3D_QPU_WADDR_TMUD] = "tmud",50[V3D_QPU_WADDR_TMUA] = "tmua",51[V3D_QPU_WADDR_TMUAU] = "tmuau",52[V3D_QPU_WADDR_VPM] = "vpm",53[V3D_QPU_WADDR_VPMU] = "vpmu",54[V3D_QPU_WADDR_SYNC] = "sync",55[V3D_QPU_WADDR_SYNCU] = "syncu",56[V3D_QPU_WADDR_SYNCB] = "syncb",57[V3D_QPU_WADDR_RECIP] = "recip",58[V3D_QPU_WADDR_RSQRT] = "rsqrt",59[V3D_QPU_WADDR_EXP] = "exp",60[V3D_QPU_WADDR_LOG] = "log",61[V3D_QPU_WADDR_SIN] = "sin",62[V3D_QPU_WADDR_RSQRT2] = "rsqrt2",63[V3D_QPU_WADDR_TMUC] = "tmuc",64[V3D_QPU_WADDR_TMUS] = "tmus",65[V3D_QPU_WADDR_TMUT] = "tmut",66[V3D_QPU_WADDR_TMUR] = "tmur",67[V3D_QPU_WADDR_TMUI] = "tmui",68[V3D_QPU_WADDR_TMUB] = "tmub",69[V3D_QPU_WADDR_TMUDREF] = "tmudref",70[V3D_QPU_WADDR_TMUOFF] = "tmuoff",71[V3D_QPU_WADDR_TMUSCM] = "tmuscm",72[V3D_QPU_WADDR_TMUSF] = "tmusf",73[V3D_QPU_WADDR_TMUSLOD] = "tmuslod",74[V3D_QPU_WADDR_TMUHS] = "tmuhs",75[V3D_QPU_WADDR_TMUHSCM] = "tmuscm",76[V3D_QPU_WADDR_TMUHSF] = "tmuhsf",77[V3D_QPU_WADDR_TMUHSLOD] = "tmuhslod",78[V3D_QPU_WADDR_R5REP] = "r5rep",79};8081return waddr_magic[waddr];82}8384const char *85v3d_qpu_add_op_name(enum v3d_qpu_add_op op)86{87static const char *op_names[] = {88[V3D_QPU_A_FADD] = "fadd",89[V3D_QPU_A_FADDNF] = "faddnf",90[V3D_QPU_A_VFPACK] = "vfpack",91[V3D_QPU_A_ADD] = "add",92[V3D_QPU_A_SUB] = "sub",93[V3D_QPU_A_FSUB] = "fsub",94[V3D_QPU_A_MIN] = "min",95[V3D_QPU_A_MAX] = "max",96[V3D_QPU_A_UMIN] = "umin",97[V3D_QPU_A_UMAX] = "umax",98[V3D_QPU_A_SHL] = "shl",99[V3D_QPU_A_SHR] = "shr",100[V3D_QPU_A_ASR] = "asr",101[V3D_QPU_A_ROR] = "ror",102[V3D_QPU_A_FMIN] = "fmin",103[V3D_QPU_A_FMAX] = "fmax",104[V3D_QPU_A_VFMIN] = "vfmin",105[V3D_QPU_A_AND] = "and",106[V3D_QPU_A_OR] = "or",107[V3D_QPU_A_XOR] = "xor",108[V3D_QPU_A_VADD] = "vadd",109[V3D_QPU_A_VSUB] = "vsub",110[V3D_QPU_A_NOT] = "not",111[V3D_QPU_A_NEG] = "neg",112[V3D_QPU_A_FLAPUSH] = "flapush",113[V3D_QPU_A_FLBPUSH] = "flbpush",114[V3D_QPU_A_FLPOP] = "flpop",115[V3D_QPU_A_RECIP] = "recip",116[V3D_QPU_A_SETMSF] = "setmsf",117[V3D_QPU_A_SETREVF] = "setrevf",118[V3D_QPU_A_NOP] = "nop",119[V3D_QPU_A_TIDX] = "tidx",120[V3D_QPU_A_EIDX] = "eidx",121[V3D_QPU_A_LR] = "lr",122[V3D_QPU_A_VFLA] = "vfla",123[V3D_QPU_A_VFLNA] = "vflna",124[V3D_QPU_A_VFLB] = "vflb",125[V3D_QPU_A_VFLNB] = "vflnb",126[V3D_QPU_A_FXCD] = "fxcd",127[V3D_QPU_A_XCD] = "xcd",128[V3D_QPU_A_FYCD] = "fycd",129[V3D_QPU_A_YCD] = "ycd",130[V3D_QPU_A_MSF] = "msf",131[V3D_QPU_A_REVF] = "revf",132[V3D_QPU_A_VDWWT] = "vdwwt",133[V3D_QPU_A_IID] = "iid",134[V3D_QPU_A_SAMPID] = "sampid",135[V3D_QPU_A_BARRIERID] = "barrierid",136[V3D_QPU_A_TMUWT] = "tmuwt",137[V3D_QPU_A_VPMSETUP] = "vpmsetup",138[V3D_QPU_A_VPMWT] = "vpmwt",139[V3D_QPU_A_FLAFIRST] = "flafirst",140[V3D_QPU_A_FLNAFIRST] = "flnafirst",141[V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",142[V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",143[V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",144[V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",145[V3D_QPU_A_LDVPMP] = "ldvpmp",146[V3D_QPU_A_RSQRT] = "rsqrt",147[V3D_QPU_A_EXP] = "exp",148[V3D_QPU_A_LOG] = "log",149[V3D_QPU_A_SIN] = "sin",150[V3D_QPU_A_RSQRT2] = "rsqrt2",151[V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",152[V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",153[V3D_QPU_A_FCMP] = "fcmp",154[V3D_QPU_A_VFMAX] = "vfmax",155[V3D_QPU_A_FROUND] = "fround",156[V3D_QPU_A_FTOIN] = "ftoin",157[V3D_QPU_A_FTRUNC] = "ftrunc",158[V3D_QPU_A_FTOIZ] = "ftoiz",159[V3D_QPU_A_FFLOOR] = "ffloor",160[V3D_QPU_A_FTOUZ] = "ftouz",161[V3D_QPU_A_FCEIL] = "fceil",162[V3D_QPU_A_FTOC] = "ftoc",163[V3D_QPU_A_FDX] = "fdx",164[V3D_QPU_A_FDY] = "fdy",165[V3D_QPU_A_STVPMV] = "stvpmv",166[V3D_QPU_A_STVPMD] = "stvpmd",167[V3D_QPU_A_STVPMP] = "stvpmp",168[V3D_QPU_A_ITOF] = "itof",169[V3D_QPU_A_CLZ] = "clz",170[V3D_QPU_A_UTOF] = "utof",171};172173if (op >= ARRAY_SIZE(op_names))174return NULL;175176return op_names[op];177}178179const char *180v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op)181{182static const char *op_names[] = {183[V3D_QPU_M_ADD] = "add",184[V3D_QPU_M_SUB] = "sub",185[V3D_QPU_M_UMUL24] = "umul24",186[V3D_QPU_M_VFMUL] = "vfmul",187[V3D_QPU_M_SMUL24] = "smul24",188[V3D_QPU_M_MULTOP] = "multop",189[V3D_QPU_M_FMOV] = "fmov",190[V3D_QPU_M_MOV] = "mov",191[V3D_QPU_M_NOP] = "nop",192[V3D_QPU_M_FMUL] = "fmul",193};194195if (op >= ARRAY_SIZE(op_names))196return NULL;197198return op_names[op];199}200201const char *202v3d_qpu_cond_name(enum v3d_qpu_cond cond)203{204switch (cond) {205case V3D_QPU_COND_NONE:206return "";207case V3D_QPU_COND_IFA:208return ".ifa";209case V3D_QPU_COND_IFB:210return ".ifb";211case V3D_QPU_COND_IFNA:212return ".ifna";213case V3D_QPU_COND_IFNB:214return ".ifnb";215default:216unreachable("bad cond value");217}218}219220const char *221v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond)222{223switch (cond) {224case V3D_QPU_BRANCH_COND_ALWAYS:225return "";226case V3D_QPU_BRANCH_COND_A0:227return ".a0";228case V3D_QPU_BRANCH_COND_NA0:229return ".na0";230case V3D_QPU_BRANCH_COND_ALLA:231return ".alla";232case V3D_QPU_BRANCH_COND_ANYNA:233return ".anyna";234case V3D_QPU_BRANCH_COND_ANYA:235return ".anya";236case V3D_QPU_BRANCH_COND_ALLNA:237return ".allna";238default:239unreachable("bad branch cond value");240}241}242243const char *244v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign)245{246switch (msfign) {247case V3D_QPU_MSFIGN_NONE:248return "";249case V3D_QPU_MSFIGN_P:250return "p";251case V3D_QPU_MSFIGN_Q:252return "q";253default:254unreachable("bad branch cond value");255}256}257258const char *259v3d_qpu_pf_name(enum v3d_qpu_pf pf)260{261switch (pf) {262case V3D_QPU_PF_NONE:263return "";264case V3D_QPU_PF_PUSHZ:265return ".pushz";266case V3D_QPU_PF_PUSHN:267return ".pushn";268case V3D_QPU_PF_PUSHC:269return ".pushc";270default:271unreachable("bad pf value");272}273}274275const char *276v3d_qpu_uf_name(enum v3d_qpu_uf uf)277{278switch (uf) {279case V3D_QPU_UF_NONE:280return "";281case V3D_QPU_UF_ANDZ:282return ".andz";283case V3D_QPU_UF_ANDNZ:284return ".andnz";285case V3D_QPU_UF_NORZ:286return ".norz";287case V3D_QPU_UF_NORNZ:288return ".nornz";289case V3D_QPU_UF_ANDN:290return ".andn";291case V3D_QPU_UF_ANDNN:292return ".andnn";293case V3D_QPU_UF_NORN:294return ".norn";295case V3D_QPU_UF_NORNN:296return ".nornn";297case V3D_QPU_UF_ANDC:298return ".andc";299case V3D_QPU_UF_ANDNC:300return ".andnc";301case V3D_QPU_UF_NORC:302return ".norc";303case V3D_QPU_UF_NORNC:304return ".nornc";305default:306unreachable("bad pf value");307}308}309310const char *311v3d_qpu_pack_name(enum v3d_qpu_output_pack pack)312{313switch (pack) {314case V3D_QPU_PACK_NONE:315return "";316case V3D_QPU_PACK_L:317return ".l";318case V3D_QPU_PACK_H:319return ".h";320default:321unreachable("bad pack value");322}323}324325const char *326v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack)327{328switch (unpack) {329case V3D_QPU_UNPACK_NONE:330return "";331case V3D_QPU_UNPACK_L:332return ".l";333case V3D_QPU_UNPACK_H:334return ".h";335case V3D_QPU_UNPACK_ABS:336return ".abs";337case V3D_QPU_UNPACK_REPLICATE_32F_16:338return ".ff";339case V3D_QPU_UNPACK_REPLICATE_L_16:340return ".ll";341case V3D_QPU_UNPACK_REPLICATE_H_16:342return ".hh";343case V3D_QPU_UNPACK_SWAP_16:344return ".swp";345default:346unreachable("bad unpack value");347}348}349350#define D 1351#define A 2352#define B 4353static const uint8_t add_op_args[] = {354[V3D_QPU_A_FADD] = D | A | B,355[V3D_QPU_A_FADDNF] = D | A | B,356[V3D_QPU_A_VFPACK] = D | A | B,357[V3D_QPU_A_ADD] = D | A | B,358[V3D_QPU_A_VFPACK] = D | A | B,359[V3D_QPU_A_SUB] = D | A | B,360[V3D_QPU_A_VFPACK] = D | A | B,361[V3D_QPU_A_FSUB] = D | A | B,362[V3D_QPU_A_MIN] = D | A | B,363[V3D_QPU_A_MAX] = D | A | B,364[V3D_QPU_A_UMIN] = D | A | B,365[V3D_QPU_A_UMAX] = D | A | B,366[V3D_QPU_A_SHL] = D | A | B,367[V3D_QPU_A_SHR] = D | A | B,368[V3D_QPU_A_ASR] = D | A | B,369[V3D_QPU_A_ROR] = D | A | B,370[V3D_QPU_A_FMIN] = D | A | B,371[V3D_QPU_A_FMAX] = D | A | B,372[V3D_QPU_A_VFMIN] = D | A | B,373374[V3D_QPU_A_AND] = D | A | B,375[V3D_QPU_A_OR] = D | A | B,376[V3D_QPU_A_XOR] = D | A | B,377378[V3D_QPU_A_VADD] = D | A | B,379[V3D_QPU_A_VSUB] = D | A | B,380[V3D_QPU_A_NOT] = D | A,381[V3D_QPU_A_NEG] = D | A,382[V3D_QPU_A_FLAPUSH] = D | A,383[V3D_QPU_A_FLBPUSH] = D | A,384[V3D_QPU_A_FLPOP] = D | A,385[V3D_QPU_A_RECIP] = D | A,386[V3D_QPU_A_SETMSF] = D | A,387[V3D_QPU_A_SETREVF] = D | A,388[V3D_QPU_A_NOP] = 0,389[V3D_QPU_A_TIDX] = D,390[V3D_QPU_A_EIDX] = D,391[V3D_QPU_A_LR] = D,392[V3D_QPU_A_VFLA] = D,393[V3D_QPU_A_VFLNA] = D,394[V3D_QPU_A_VFLB] = D,395[V3D_QPU_A_VFLNB] = D,396397[V3D_QPU_A_FXCD] = D,398[V3D_QPU_A_XCD] = D,399[V3D_QPU_A_FYCD] = D,400[V3D_QPU_A_YCD] = D,401402[V3D_QPU_A_MSF] = D,403[V3D_QPU_A_REVF] = D,404[V3D_QPU_A_VDWWT] = D,405[V3D_QPU_A_IID] = D,406[V3D_QPU_A_SAMPID] = D,407[V3D_QPU_A_BARRIERID] = D,408[V3D_QPU_A_TMUWT] = D,409[V3D_QPU_A_VPMWT] = D,410[V3D_QPU_A_FLAFIRST] = D,411[V3D_QPU_A_FLNAFIRST] = D,412413[V3D_QPU_A_VPMSETUP] = D | A,414415[V3D_QPU_A_LDVPMV_IN] = D | A,416[V3D_QPU_A_LDVPMV_OUT] = D | A,417[V3D_QPU_A_LDVPMD_IN] = D | A,418[V3D_QPU_A_LDVPMD_OUT] = D | A,419[V3D_QPU_A_LDVPMP] = D | A,420[V3D_QPU_A_RSQRT] = D | A,421[V3D_QPU_A_EXP] = D | A,422[V3D_QPU_A_LOG] = D | A,423[V3D_QPU_A_SIN] = D | A,424[V3D_QPU_A_RSQRT2] = D | A,425[V3D_QPU_A_LDVPMG_IN] = D | A | B,426[V3D_QPU_A_LDVPMG_OUT] = D | A | B,427428/* FIXME: MOVABSNEG */429430[V3D_QPU_A_FCMP] = D | A | B,431[V3D_QPU_A_VFMAX] = D | A | B,432433[V3D_QPU_A_FROUND] = D | A,434[V3D_QPU_A_FTOIN] = D | A,435[V3D_QPU_A_FTRUNC] = D | A,436[V3D_QPU_A_FTOIZ] = D | A,437[V3D_QPU_A_FFLOOR] = D | A,438[V3D_QPU_A_FTOUZ] = D | A,439[V3D_QPU_A_FCEIL] = D | A,440[V3D_QPU_A_FTOC] = D | A,441442[V3D_QPU_A_FDX] = D | A,443[V3D_QPU_A_FDY] = D | A,444445[V3D_QPU_A_STVPMV] = A | B,446[V3D_QPU_A_STVPMD] = A | B,447[V3D_QPU_A_STVPMP] = A | B,448449[V3D_QPU_A_ITOF] = D | A,450[V3D_QPU_A_CLZ] = D | A,451[V3D_QPU_A_UTOF] = D | A,452};453454static const uint8_t mul_op_args[] = {455[V3D_QPU_M_ADD] = D | A | B,456[V3D_QPU_M_SUB] = D | A | B,457[V3D_QPU_M_UMUL24] = D | A | B,458[V3D_QPU_M_VFMUL] = D | A | B,459[V3D_QPU_M_SMUL24] = D | A | B,460[V3D_QPU_M_MULTOP] = D | A | B,461[V3D_QPU_M_FMOV] = D | A,462[V3D_QPU_M_NOP] = 0,463[V3D_QPU_M_MOV] = D | A,464[V3D_QPU_M_FMUL] = D | A | B,465};466467bool468v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op)469{470assert(op < ARRAY_SIZE(add_op_args));471472return add_op_args[op] & D;473}474475bool476v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op)477{478assert(op < ARRAY_SIZE(mul_op_args));479480return mul_op_args[op] & D;481}482483int484v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op)485{486assert(op < ARRAY_SIZE(add_op_args));487488uint8_t args = add_op_args[op];489if (args & B)490return 2;491else if (args & A)492return 1;493else494return 0;495}496497int498v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op)499{500assert(op < ARRAY_SIZE(mul_op_args));501502uint8_t args = mul_op_args[op];503if (args & B)504return 2;505else if (args & A)506return 1;507else508return 0;509}510511enum v3d_qpu_cond512v3d_qpu_cond_invert(enum v3d_qpu_cond cond)513{514switch (cond) {515case V3D_QPU_COND_IFA:516return V3D_QPU_COND_IFNA;517case V3D_QPU_COND_IFNA:518return V3D_QPU_COND_IFA;519case V3D_QPU_COND_IFB:520return V3D_QPU_COND_IFNB;521case V3D_QPU_COND_IFNB:522return V3D_QPU_COND_IFB;523default:524unreachable("Non-invertible cond");525}526}527528bool529v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr)530{531switch (waddr) {532case V3D_QPU_WADDR_RECIP:533case V3D_QPU_WADDR_RSQRT:534case V3D_QPU_WADDR_EXP:535case V3D_QPU_WADDR_LOG:536case V3D_QPU_WADDR_SIN:537case V3D_QPU_WADDR_RSQRT2:538return true;539default:540return false;541}542}543544bool545v3d_qpu_magic_waddr_is_tmu(const struct v3d_device_info *devinfo,546enum v3d_qpu_waddr waddr)547{548if (devinfo->ver >= 40) {549return ((waddr >= V3D_QPU_WADDR_TMUD &&550waddr <= V3D_QPU_WADDR_TMUAU) ||551(waddr >= V3D_QPU_WADDR_TMUC &&552waddr <= V3D_QPU_WADDR_TMUHSLOD));553} else {554return ((waddr >= V3D_QPU_WADDR_TMU &&555waddr <= V3D_QPU_WADDR_TMUAU) ||556(waddr >= V3D_QPU_WADDR_TMUC &&557waddr <= V3D_QPU_WADDR_TMUHSLOD));558}559}560561bool562v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr *inst)563{564return (inst->sig.ldtmu ||565(inst->type == V3D_QPU_INSTR_TYPE_ALU &&566inst->alu.add.op == V3D_QPU_A_TMUWT));567}568569bool570v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr)571{572return (waddr == V3D_QPU_WADDR_TLB ||573waddr == V3D_QPU_WADDR_TLBU);574}575576bool577v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr)578{579return (waddr == V3D_QPU_WADDR_VPM ||580waddr == V3D_QPU_WADDR_VPMU);581}582583bool584v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)585{586return (waddr == V3D_QPU_WADDR_SYNC ||587waddr == V3D_QPU_WADDR_SYNCB ||588waddr == V3D_QPU_WADDR_SYNCU);589}590591bool592v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr)593{594switch (waddr) {595case V3D_QPU_WADDR_VPMU:596case V3D_QPU_WADDR_TLBU:597case V3D_QPU_WADDR_TMUAU:598case V3D_QPU_WADDR_SYNCU:599return true;600default:601return false;602}603}604605static bool606v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op)607{608switch (op) {609case V3D_QPU_A_VPMSETUP:610case V3D_QPU_A_LDVPMV_IN:611case V3D_QPU_A_LDVPMV_OUT:612case V3D_QPU_A_LDVPMD_IN:613case V3D_QPU_A_LDVPMD_OUT:614case V3D_QPU_A_LDVPMP:615case V3D_QPU_A_LDVPMG_IN:616case V3D_QPU_A_LDVPMG_OUT:617return true;618default:619return false;620}621}622623static bool624v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op)625{626switch (op) {627case V3D_QPU_A_VPMSETUP:628case V3D_QPU_A_STVPMV:629case V3D_QPU_A_STVPMD:630case V3D_QPU_A_STVPMP:631return true;632default:633return false;634}635}636637bool638v3d_qpu_uses_tlb(const struct v3d_qpu_instr *inst)639{640if (inst->sig.ldtlb ||641inst->sig.ldtlbu)642return true;643644if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {645if (inst->alu.add.magic_write &&646v3d_qpu_magic_waddr_is_tlb(inst->alu.add.waddr)) {647return true;648}649650if (inst->alu.mul.magic_write &&651v3d_qpu_magic_waddr_is_tlb(inst->alu.mul.waddr)) {652return true;653}654}655656return false;657}658659bool660v3d_qpu_uses_sfu(const struct v3d_qpu_instr *inst)661{662if (v3d_qpu_instr_is_sfu(inst))663return true;664665if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {666if (inst->alu.add.magic_write &&667v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr)) {668return true;669}670671if (inst->alu.mul.magic_write &&672v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr)) {673return true;674}675}676677return false;678}679680bool681v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr *inst)682{683if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {684switch (inst->alu.add.op) {685case V3D_QPU_A_RECIP:686case V3D_QPU_A_RSQRT:687case V3D_QPU_A_EXP:688case V3D_QPU_A_LOG:689case V3D_QPU_A_SIN:690case V3D_QPU_A_RSQRT2:691return true;692default:693return false;694}695}696return false;697}698699bool700v3d_qpu_writes_tmu(const struct v3d_device_info *devinfo,701const struct v3d_qpu_instr *inst)702{703return (inst->type == V3D_QPU_INSTR_TYPE_ALU &&704((inst->alu.add.magic_write &&705v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.add.waddr)) ||706(inst->alu.mul.magic_write &&707v3d_qpu_magic_waddr_is_tmu(devinfo, inst->alu.mul.waddr))));708}709710bool711v3d_qpu_writes_tmu_not_tmuc(const struct v3d_device_info *devinfo,712const struct v3d_qpu_instr *inst)713{714return v3d_qpu_writes_tmu(devinfo, inst) &&715(!inst->alu.add.magic_write ||716inst->alu.add.waddr != V3D_QPU_WADDR_TMUC) &&717(!inst->alu.mul.magic_write ||718inst->alu.mul.waddr != V3D_QPU_WADDR_TMUC);719}720721bool722v3d_qpu_reads_vpm(const struct v3d_qpu_instr *inst)723{724if (inst->sig.ldvpm)725return true;726727if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {728if (v3d_qpu_add_op_reads_vpm(inst->alu.add.op))729return true;730}731732return false;733}734735bool736v3d_qpu_writes_vpm(const struct v3d_qpu_instr *inst)737{738if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {739if (v3d_qpu_add_op_writes_vpm(inst->alu.add.op))740return true;741742if (inst->alu.add.magic_write &&743v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {744return true;745}746747if (inst->alu.mul.magic_write &&748v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {749return true;750}751}752753return false;754}755756bool757v3d_qpu_writes_unifa(const struct v3d_device_info *devinfo,758const struct v3d_qpu_instr *inst)759{760if (devinfo->ver < 40)761return false;762763if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {764if (inst->alu.add.op != V3D_QPU_A_NOP &&765inst->alu.add.magic_write &&766inst->alu.add.waddr == V3D_QPU_WADDR_UNIFA) {767return true;768}769770if (inst->alu.mul.op != V3D_QPU_M_NOP &&771inst->alu.mul.magic_write &&772inst->alu.mul.waddr == V3D_QPU_WADDR_UNIFA) {773return true;774}775}776777return false;778}779780static bool781v3d_qpu_waits_vpm(const struct v3d_qpu_instr *inst)782{783return inst->type == V3D_QPU_INSTR_TYPE_ALU &&784inst->alu.add.op == V3D_QPU_A_VPMWT;785}786787bool788v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr *inst)789{790return v3d_qpu_reads_vpm(inst) || v3d_qpu_writes_vpm(inst);791}792793bool794v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)795{796return v3d_qpu_reads_vpm(inst) ||797v3d_qpu_writes_vpm(inst) ||798v3d_qpu_waits_vpm(inst);799}800801static bool802qpu_writes_magic_waddr_explicitly(const struct v3d_device_info *devinfo,803const struct v3d_qpu_instr *inst,804uint32_t waddr)805{806if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {807if (inst->alu.add.magic_write && inst->alu.add.waddr == waddr)808return true;809810if (inst->alu.mul.magic_write && inst->alu.mul.waddr == waddr)811return true;812}813814if (v3d_qpu_sig_writes_address(devinfo, &inst->sig) &&815inst->sig_magic && inst->sig_addr == waddr) {816return true;817}818819return false;820}821822bool823v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,824const struct v3d_qpu_instr *inst)825{826if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R3))827return true;828829return (devinfo->ver < 41 && inst->sig.ldvary) || inst->sig.ldvpm;830}831832bool833v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,834const struct v3d_qpu_instr *inst)835{836if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {837if (inst->alu.add.magic_write &&838(inst->alu.add.waddr == V3D_QPU_WADDR_R4 ||839v3d_qpu_magic_waddr_is_sfu(inst->alu.add.waddr))) {840return true;841}842843if (inst->alu.mul.magic_write &&844(inst->alu.mul.waddr == V3D_QPU_WADDR_R4 ||845v3d_qpu_magic_waddr_is_sfu(inst->alu.mul.waddr))) {846return true;847}848}849850if (v3d_qpu_sig_writes_address(devinfo, &inst->sig)) {851if (inst->sig_magic && inst->sig_addr == V3D_QPU_WADDR_R4)852return true;853} else if (inst->sig.ldtmu) {854return true;855}856857return false;858}859860bool861v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,862const struct v3d_qpu_instr *inst)863{864if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R5))865return true;866867return inst->sig.ldvary || inst->sig.ldunif || inst->sig.ldunifa;868}869870bool871v3d_qpu_writes_accum(const struct v3d_device_info *devinfo,872const struct v3d_qpu_instr *inst)873{874if (v3d_qpu_writes_r5(devinfo, inst))875return true;876if (v3d_qpu_writes_r4(devinfo, inst))877return true;878if (v3d_qpu_writes_r3(devinfo, inst))879return true;880if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R2))881return true;882if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R1))883return true;884if (qpu_writes_magic_waddr_explicitly(devinfo, inst, V3D_QPU_WADDR_R0))885return true;886887return false;888}889890bool891v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux)892{893int add_nsrc = v3d_qpu_add_op_num_src(inst->alu.add.op);894int mul_nsrc = v3d_qpu_mul_op_num_src(inst->alu.mul.op);895896return ((add_nsrc > 0 && inst->alu.add.a == mux) ||897(add_nsrc > 1 && inst->alu.add.b == mux) ||898(mul_nsrc > 0 && inst->alu.mul.a == mux) ||899(mul_nsrc > 1 && inst->alu.mul.b == mux));900}901902bool903v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,904const struct v3d_qpu_sig *sig)905{906if (devinfo->ver < 41)907return false;908909return (sig->ldunifrf ||910sig->ldunifarf ||911sig->ldvary ||912sig->ldtmu ||913sig->ldtlb ||914sig->ldtlbu);915}916917bool918v3d_qpu_reads_flags(const struct v3d_qpu_instr *inst)919{920if (inst->type == V3D_QPU_INSTR_TYPE_BRANCH) {921return inst->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS;922} else if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {923if (inst->flags.ac != V3D_QPU_COND_NONE ||924inst->flags.mc != V3D_QPU_COND_NONE ||925inst->flags.auf != V3D_QPU_UF_NONE ||926inst->flags.muf != V3D_QPU_UF_NONE)927return true;928929switch (inst->alu.add.op) {930case V3D_QPU_A_VFLA:931case V3D_QPU_A_VFLNA:932case V3D_QPU_A_VFLB:933case V3D_QPU_A_VFLNB:934case V3D_QPU_A_FLAPUSH:935case V3D_QPU_A_FLBPUSH:936case V3D_QPU_A_FLAFIRST:937case V3D_QPU_A_FLNAFIRST:938return true;939default:940break;941}942}943944return false;945}946947bool948v3d_qpu_writes_flags(const struct v3d_qpu_instr *inst)949{950if (inst->flags.apf != V3D_QPU_PF_NONE ||951inst->flags.mpf != V3D_QPU_PF_NONE ||952inst->flags.auf != V3D_QPU_UF_NONE ||953inst->flags.muf != V3D_QPU_UF_NONE) {954return true;955}956957return false;958}959960bool961v3d_qpu_unpacks_f32(const struct v3d_qpu_instr *inst)962{963if (inst->type != V3D_QPU_INSTR_TYPE_ALU)964return false;965966switch (inst->alu.add.op) {967case V3D_QPU_A_FADD:968case V3D_QPU_A_FADDNF:969case V3D_QPU_A_FSUB:970case V3D_QPU_A_FMIN:971case V3D_QPU_A_FMAX:972case V3D_QPU_A_FCMP:973case V3D_QPU_A_FROUND:974case V3D_QPU_A_FTRUNC:975case V3D_QPU_A_FFLOOR:976case V3D_QPU_A_FCEIL:977case V3D_QPU_A_FDX:978case V3D_QPU_A_FDY:979case V3D_QPU_A_FTOIN:980case V3D_QPU_A_FTOIZ:981case V3D_QPU_A_FTOUZ:982case V3D_QPU_A_FTOC:983case V3D_QPU_A_VFPACK:984return true;985break;986default:987break;988}989990switch (inst->alu.mul.op) {991case V3D_QPU_M_FMOV:992case V3D_QPU_M_FMUL:993return true;994break;995default:996break;997}998999return false;1000}1001bool1002v3d_qpu_unpacks_f16(const struct v3d_qpu_instr *inst)1003{1004if (inst->type != V3D_QPU_INSTR_TYPE_ALU)1005return false;10061007switch (inst->alu.add.op) {1008case V3D_QPU_A_VFMIN:1009case V3D_QPU_A_VFMAX:1010return true;1011break;1012default:1013break;1014}10151016switch (inst->alu.mul.op) {1017case V3D_QPU_M_VFMUL:1018return true;1019break;1020default:1021break;1022}10231024return false;1025}10261027bool1028v3d_qpu_is_nop(struct v3d_qpu_instr *inst)1029{1030static const struct v3d_qpu_sig nosig = { 0 };10311032if (inst->type != V3D_QPU_INSTR_TYPE_ALU)1033return false;1034if (inst->alu.add.op != V3D_QPU_A_NOP)1035return false;1036if (inst->alu.mul.op != V3D_QPU_M_NOP)1037return false;1038if (memcmp(&inst->sig, &nosig, sizeof(nosig)))1039return false;1040return true;1041}104210431044