Path: blob/21.2-virgl/src/broadcom/qpu/qpu_pack.c
4560 views
/*1* Copyright © 2016 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include <string.h>24#include "util/macros.h"25#include "util/bitscan.h"2627#include "broadcom/common/v3d_device_info.h"28#include "qpu_instr.h"2930#ifndef QPU_MASK31#define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))32/* Using the GNU statement expression extension */33#define QPU_SET_FIELD(value, field) \34({ \35uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \36assert((fieldval & ~ field ## _MASK) == 0); \37fieldval & field ## _MASK; \38})3940#define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))4142#define QPU_UPDATE_FIELD(inst, value, field) \43(((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))44#endif /* QPU_MASK */4546#define V3D_QPU_OP_MUL_SHIFT 5847#define V3D_QPU_OP_MUL_MASK QPU_MASK(63, 58)4849#define V3D_QPU_SIG_SHIFT 5350#define V3D_QPU_SIG_MASK QPU_MASK(57, 53)5152#define V3D_QPU_COND_SHIFT 4653#define V3D_QPU_COND_MASK QPU_MASK(52, 46)54#define V3D_QPU_COND_SIG_MAGIC_ADDR (1 << 6)5556#define V3D_QPU_MM QPU_MASK(45, 45)57#define V3D_QPU_MA QPU_MASK(44, 44)5859#define V3D_QPU_WADDR_M_SHIFT 3860#define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)6162#define V3D_QPU_BRANCH_ADDR_LOW_SHIFT 3563#define V3D_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)6465#define V3D_QPU_WADDR_A_SHIFT 3266#define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)6768#define V3D_QPU_BRANCH_COND_SHIFT 3269#define V3D_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)7071#define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT 2472#define V3D_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)7374#define V3D_QPU_OP_ADD_SHIFT 2475#define V3D_QPU_OP_ADD_MASK QPU_MASK(31, 24)7677#define V3D_QPU_MUL_B_SHIFT 2178#define V3D_QPU_MUL_B_MASK QPU_MASK(23, 21)7980#define V3D_QPU_BRANCH_MSFIGN_SHIFT 2181#define V3D_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)8283#define V3D_QPU_MUL_A_SHIFT 1884#define V3D_QPU_MUL_A_MASK QPU_MASK(20, 18)8586#define V3D_QPU_ADD_B_SHIFT 1587#define V3D_QPU_ADD_B_MASK QPU_MASK(17, 15)8889#define V3D_QPU_BRANCH_BDU_SHIFT 1590#define V3D_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)9192#define V3D_QPU_BRANCH_UB QPU_MASK(14, 14)9394#define V3D_QPU_ADD_A_SHIFT 1295#define V3D_QPU_ADD_A_MASK QPU_MASK(14, 12)9697#define V3D_QPU_BRANCH_BDI_SHIFT 1298#define V3D_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)99100#define V3D_QPU_RADDR_A_SHIFT 6101#define V3D_QPU_RADDR_A_MASK QPU_MASK(11, 6)102103#define V3D_QPU_RADDR_B_SHIFT 0104#define V3D_QPU_RADDR_B_MASK QPU_MASK(5, 0)105106#define THRSW .thrsw = true107#define LDUNIF .ldunif = true108#define LDUNIFRF .ldunifrf = true109#define LDUNIFA .ldunifa = true110#define LDUNIFARF .ldunifarf = true111#define LDTMU .ldtmu = true112#define LDVARY .ldvary = true113#define LDVPM .ldvpm = true114#define SMIMM .small_imm = true115#define LDTLB .ldtlb = true116#define LDTLBU .ldtlbu = true117#define UCB .ucb = true118#define ROT .rotate = true119#define WRTMUC .wrtmuc = true120121static const struct v3d_qpu_sig v33_sig_map[] = {122/* MISC R3 R4 R5 */123[0] = { },124[1] = { THRSW, },125[2] = { LDUNIF },126[3] = { THRSW, LDUNIF },127[4] = { LDTMU, },128[5] = { THRSW, LDTMU, },129[6] = { LDTMU, LDUNIF },130[7] = { THRSW, LDTMU, LDUNIF },131[8] = { LDVARY, },132[9] = { THRSW, LDVARY, },133[10] = { LDVARY, LDUNIF },134[11] = { THRSW, LDVARY, LDUNIF },135[12] = { LDVARY, LDTMU, },136[13] = { THRSW, LDVARY, LDTMU, },137[14] = { SMIMM, LDVARY, },138[15] = { SMIMM, },139[16] = { LDTLB, },140[17] = { LDTLBU, },141/* 18-21 reserved */142[22] = { UCB, },143[23] = { ROT, },144[24] = { LDVPM, },145[25] = { THRSW, LDVPM, },146[26] = { LDVPM, LDUNIF },147[27] = { THRSW, LDVPM, LDUNIF },148[28] = { LDVPM, LDTMU, },149[29] = { THRSW, LDVPM, LDTMU, },150[30] = { SMIMM, LDVPM, },151[31] = { SMIMM, },152};153154static const struct v3d_qpu_sig v40_sig_map[] = {155/* MISC R3 R4 R5 */156[0] = { },157[1] = { THRSW, },158[2] = { LDUNIF },159[3] = { THRSW, LDUNIF },160[4] = { LDTMU, },161[5] = { THRSW, LDTMU, },162[6] = { LDTMU, LDUNIF },163[7] = { THRSW, LDTMU, LDUNIF },164[8] = { LDVARY, },165[9] = { THRSW, LDVARY, },166[10] = { LDVARY, LDUNIF },167[11] = { THRSW, LDVARY, LDUNIF },168/* 12-13 reserved */169[14] = { SMIMM, LDVARY, },170[15] = { SMIMM, },171[16] = { LDTLB, },172[17] = { LDTLBU, },173[18] = { WRTMUC },174[19] = { THRSW, WRTMUC },175[20] = { LDVARY, WRTMUC },176[21] = { THRSW, LDVARY, WRTMUC },177[22] = { UCB, },178[23] = { ROT, },179/* 24-30 reserved */180[31] = { SMIMM, LDTMU, },181};182183static const struct v3d_qpu_sig v41_sig_map[] = {184/* MISC phys R5 */185[0] = { },186[1] = { THRSW, },187[2] = { LDUNIF },188[3] = { THRSW, LDUNIF },189[4] = { LDTMU, },190[5] = { THRSW, LDTMU, },191[6] = { LDTMU, LDUNIF },192[7] = { THRSW, LDTMU, LDUNIF },193[8] = { LDVARY, },194[9] = { THRSW, LDVARY, },195[10] = { LDVARY, LDUNIF },196[11] = { THRSW, LDVARY, LDUNIF },197[12] = { LDUNIFRF },198[13] = { THRSW, LDUNIFRF },199[14] = { SMIMM, LDVARY, },200[15] = { SMIMM, },201[16] = { LDTLB, },202[17] = { LDTLBU, },203[18] = { WRTMUC },204[19] = { THRSW, WRTMUC },205[20] = { LDVARY, WRTMUC },206[21] = { THRSW, LDVARY, WRTMUC },207[22] = { UCB, },208[23] = { ROT, },209/* 24-30 reserved */210[24] = { LDUNIFA},211[25] = { LDUNIFARF },212[31] = { SMIMM, LDTMU, },213};214215bool216v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,217uint32_t packed_sig,218struct v3d_qpu_sig *sig)219{220if (packed_sig >= ARRAY_SIZE(v33_sig_map))221return false;222223if (devinfo->ver >= 41)224*sig = v41_sig_map[packed_sig];225else if (devinfo->ver == 40)226*sig = v40_sig_map[packed_sig];227else228*sig = v33_sig_map[packed_sig];229230/* Signals with zeroed unpacked contents after element 0 are reserved. */231return (packed_sig == 0 ||232memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);233}234235bool236v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,237const struct v3d_qpu_sig *sig,238uint32_t *packed_sig)239{240static const struct v3d_qpu_sig *map;241242if (devinfo->ver >= 41)243map = v41_sig_map;244else if (devinfo->ver == 40)245map = v40_sig_map;246else247map = v33_sig_map;248249for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {250if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {251*packed_sig = i;252return true;253}254}255256return false;257}258static inline unsigned259fui( float f )260{261union {float f; unsigned ui;} fi;262fi.f = f;263return fi.ui;264}265266static const uint32_t small_immediates[] = {2670, 1, 2, 3,2684, 5, 6, 7,2698, 9, 10, 11,27012, 13, 14, 15,271-16, -15, -14, -13,272-12, -11, -10, -9,273-8, -7, -6, -5,274-4, -3, -2, -1,2750x3b800000, /* 2.0^-8 */2760x3c000000, /* 2.0^-7 */2770x3c800000, /* 2.0^-6 */2780x3d000000, /* 2.0^-5 */2790x3d800000, /* 2.0^-4 */2800x3e000000, /* 2.0^-3 */2810x3e800000, /* 2.0^-2 */2820x3f000000, /* 2.0^-1 */2830x3f800000, /* 2.0^0 */2840x40000000, /* 2.0^1 */2850x40800000, /* 2.0^2 */2860x41000000, /* 2.0^3 */2870x41800000, /* 2.0^4 */2880x42000000, /* 2.0^5 */2890x42800000, /* 2.0^6 */2900x43000000, /* 2.0^7 */291};292293bool294v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,295uint32_t packed_small_immediate,296uint32_t *small_immediate)297{298if (packed_small_immediate >= ARRAY_SIZE(small_immediates))299return false;300301*small_immediate = small_immediates[packed_small_immediate];302return true;303}304305bool306v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,307uint32_t value,308uint32_t *packed_small_immediate)309{310STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);311312for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {313if (small_immediates[i] == value) {314*packed_small_immediate = i;315return true;316}317}318319return false;320}321322bool323v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,324uint32_t packed_cond,325struct v3d_qpu_flags *cond)326{327static const enum v3d_qpu_cond cond_map[4] = {328[0] = V3D_QPU_COND_IFA,329[1] = V3D_QPU_COND_IFB,330[2] = V3D_QPU_COND_IFNA,331[3] = V3D_QPU_COND_IFNB,332};333334cond->ac = V3D_QPU_COND_NONE;335cond->mc = V3D_QPU_COND_NONE;336cond->apf = V3D_QPU_PF_NONE;337cond->mpf = V3D_QPU_PF_NONE;338cond->auf = V3D_QPU_UF_NONE;339cond->muf = V3D_QPU_UF_NONE;340341if (packed_cond == 0) {342return true;343} else if (packed_cond >> 2 == 0) {344cond->apf = packed_cond & 0x3;345} else if (packed_cond >> 4 == 0) {346cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;347} else if (packed_cond == 0x10) {348return false;349} else if (packed_cond >> 2 == 0x4) {350cond->mpf = packed_cond & 0x3;351} else if (packed_cond >> 4 == 0x1) {352cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;353} else if (packed_cond >> 4 == 0x2) {354cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;355cond->mpf = packed_cond & 0x3;356} else if (packed_cond >> 4 == 0x3) {357cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;358cond->apf = packed_cond & 0x3;359} else if (packed_cond >> 6) {360cond->mc = cond_map[(packed_cond >> 4) & 0x3];361if (((packed_cond >> 2) & 0x3) == 0) {362cond->ac = cond_map[packed_cond & 0x3];363} else {364cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;365}366}367368return true;369}370371bool372v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,373const struct v3d_qpu_flags *cond,374uint32_t *packed_cond)375{376#define AC (1 << 0)377#define MC (1 << 1)378#define APF (1 << 2)379#define MPF (1 << 3)380#define AUF (1 << 4)381#define MUF (1 << 5)382static const struct {383uint8_t flags_present;384uint8_t bits;385} flags_table[] = {386{ 0, 0 },387{ APF, 0 },388{ AUF, 0 },389{ MPF, (1 << 4) },390{ MUF, (1 << 4) },391{ AC, (1 << 5) },392{ AC | MPF, (1 << 5) },393{ MC, (1 << 5) | (1 << 4) },394{ MC | APF, (1 << 5) | (1 << 4) },395{ MC | AC, (1 << 6) },396{ MC | AUF, (1 << 6) },397};398399uint8_t flags_present = 0;400if (cond->ac != V3D_QPU_COND_NONE)401flags_present |= AC;402if (cond->mc != V3D_QPU_COND_NONE)403flags_present |= MC;404if (cond->apf != V3D_QPU_PF_NONE)405flags_present |= APF;406if (cond->mpf != V3D_QPU_PF_NONE)407flags_present |= MPF;408if (cond->auf != V3D_QPU_UF_NONE)409flags_present |= AUF;410if (cond->muf != V3D_QPU_UF_NONE)411flags_present |= MUF;412413for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {414if (flags_table[i].flags_present != flags_present)415continue;416417*packed_cond = flags_table[i].bits;418419*packed_cond |= cond->apf;420*packed_cond |= cond->mpf;421422if (flags_present & AUF)423*packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;424if (flags_present & MUF)425*packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;426427if (flags_present & AC)428*packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;429430if (flags_present & MC) {431if (*packed_cond & (1 << 6))432*packed_cond |= (cond->mc -433V3D_QPU_COND_IFA) << 4;434else435*packed_cond |= (cond->mc -436V3D_QPU_COND_IFA) << 2;437}438439return true;440}441442return false;443}444445/* Make a mapping of the table of opcodes in the spec. The opcode is446* determined by a combination of the opcode field, and in the case of 0 or447* 1-arg opcodes, the mux_b field as well.448*/449#define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))450#define ANYMUX MUX_MASK(0, 7)451452struct opcode_desc {453uint8_t opcode_first;454uint8_t opcode_last;455uint8_t mux_b_mask;456uint8_t mux_a_mask;457uint8_t op;458/* 0 if it's the same across V3D versions, or a specific V3D version. */459uint8_t ver;460};461462static const struct opcode_desc add_ops[] = {463/* FADD is FADDNF depending on the order of the mux_a/mux_b. */464{ 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD },465{ 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },466{ 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },467{ 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD },468{ 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },469{ 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB },470{ 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },471{ 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },472{ 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },473{ 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },474{ 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },475{ 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },476{ 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },477{ 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },478{ 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },479{ 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },480/* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */481{ 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },482{ 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },483{ 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },484485{ 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },486{ 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },487{ 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },488489{ 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },490{ 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },491{ 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },492{ 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },493{ 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },494{ 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },495{ 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP },496{ 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP },497{ 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },498{ 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },499{ 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },500{ 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },501{ 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },502{ 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },503{ 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },504{ 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },505{ 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },506{ 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },507508{ 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },509{ 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },510{ 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },511{ 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },512513{ 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },514{ 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },515{ 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT, 33 },516{ 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID, 40 },517{ 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID, 40 },518{ 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID, 40 },519{ 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },520{ 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },521{ 187, 187, 1 << 2, 1 << 7, V3D_QPU_A_FLAFIRST, 41 },522{ 187, 187, 1 << 3, 1 << 0, V3D_QPU_A_FLNAFIRST, 41 },523{ 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },524525{ 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },526{ 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },527{ 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },528{ 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },529{ 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },530{ 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 },531{ 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 },532{ 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 },533{ 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 },534{ 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 },535{ 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },536{ 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },537538/* FIXME: MORE COMPLICATED */539/* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */540541{ 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },542{ 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },543544{ 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },545{ 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },546{ 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },547{ 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },548{ 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },549{ 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },550{ 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },551{ 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },552553{ 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },554{ 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },555556/* The stvpms are distinguished by the waddr field. */557{ 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },558{ 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },559{ 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },560561{ 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },562{ 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },563{ 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },564};565566static const struct opcode_desc mul_ops[] = {567{ 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },568{ 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },569{ 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },570{ 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },571{ 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },572{ 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },573{ 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },574{ 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },575{ 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },576{ 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },577{ 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },578};579580static const struct opcode_desc *581lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,582uint32_t opcode, uint32_t mux_a, uint32_t mux_b)583{584for (int i = 0; i < num_opcodes; i++) {585const struct opcode_desc *op_desc = &opcodes[i];586587if (opcode < op_desc->opcode_first ||588opcode > op_desc->opcode_last)589continue;590591if (!(op_desc->mux_b_mask & (1 << mux_b)))592continue;593594if (!(op_desc->mux_a_mask & (1 << mux_a)))595continue;596597return op_desc;598}599600return NULL;601}602603static bool604v3d_qpu_float32_unpack_unpack(uint32_t packed,605enum v3d_qpu_input_unpack *unpacked)606{607switch (packed) {608case 0:609*unpacked = V3D_QPU_UNPACK_ABS;610return true;611case 1:612*unpacked = V3D_QPU_UNPACK_NONE;613return true;614case 2:615*unpacked = V3D_QPU_UNPACK_L;616return true;617case 3:618*unpacked = V3D_QPU_UNPACK_H;619return true;620default:621return false;622}623}624625static bool626v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,627uint32_t *packed)628{629switch (unpacked) {630case V3D_QPU_UNPACK_ABS:631*packed = 0;632return true;633case V3D_QPU_UNPACK_NONE:634*packed = 1;635return true;636case V3D_QPU_UNPACK_L:637*packed = 2;638return true;639case V3D_QPU_UNPACK_H:640*packed = 3;641return true;642default:643return false;644}645}646647static bool648v3d_qpu_float16_unpack_unpack(uint32_t packed,649enum v3d_qpu_input_unpack *unpacked)650{651switch (packed) {652case 0:653*unpacked = V3D_QPU_UNPACK_NONE;654return true;655case 1:656*unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;657return true;658case 2:659*unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;660return true;661case 3:662*unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;663return true;664case 4:665*unpacked = V3D_QPU_UNPACK_SWAP_16;666return true;667default:668return false;669}670}671672static bool673v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,674uint32_t *packed)675{676switch (unpacked) {677case V3D_QPU_UNPACK_NONE:678*packed = 0;679return true;680case V3D_QPU_UNPACK_REPLICATE_32F_16:681*packed = 1;682return true;683case V3D_QPU_UNPACK_REPLICATE_L_16:684*packed = 2;685return true;686case V3D_QPU_UNPACK_REPLICATE_H_16:687*packed = 3;688return true;689case V3D_QPU_UNPACK_SWAP_16:690*packed = 4;691return true;692default:693return false;694}695}696697static bool698v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,699uint32_t *packed)700{701switch (unpacked) {702case V3D_QPU_PACK_NONE:703*packed = 0;704return true;705case V3D_QPU_PACK_L:706*packed = 1;707return true;708case V3D_QPU_PACK_H:709*packed = 2;710return true;711default:712return false;713}714}715716static bool717v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,718struct v3d_qpu_instr *instr)719{720uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);721uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);722uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);723uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);724725uint32_t map_op = op;726/* Some big clusters of opcodes are replicated with unpack727* flags728*/729if (map_op >= 249 && map_op <= 251)730map_op = (map_op - 249 + 245);731if (map_op >= 253 && map_op <= 255)732map_op = (map_op - 253 + 245);733734const struct opcode_desc *desc =735lookup_opcode(add_ops, ARRAY_SIZE(add_ops),736map_op, mux_a, mux_b);737if (!desc)738return false;739740instr->alu.add.op = desc->op;741742/* FADD/FADDNF and FMIN/FMAX are determined by the orders of the743* operands.744*/745if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {746if (instr->alu.add.op == V3D_QPU_A_FMIN)747instr->alu.add.op = V3D_QPU_A_FMAX;748if (instr->alu.add.op == V3D_QPU_A_FADD)749instr->alu.add.op = V3D_QPU_A_FADDNF;750}751752/* Some QPU ops require a bit more than just basic opcode and mux a/b753* comparisons to distinguish them.754*/755switch (instr->alu.add.op) {756case V3D_QPU_A_STVPMV:757case V3D_QPU_A_STVPMD:758case V3D_QPU_A_STVPMP:759switch (waddr) {760case 0:761instr->alu.add.op = V3D_QPU_A_STVPMV;762break;763case 1:764instr->alu.add.op = V3D_QPU_A_STVPMD;765break;766case 2:767instr->alu.add.op = V3D_QPU_A_STVPMP;768break;769default:770return false;771}772break;773default:774break;775}776777switch (instr->alu.add.op) {778case V3D_QPU_A_FADD:779case V3D_QPU_A_FADDNF:780case V3D_QPU_A_FSUB:781case V3D_QPU_A_FMIN:782case V3D_QPU_A_FMAX:783case V3D_QPU_A_FCMP:784case V3D_QPU_A_VFPACK:785if (instr->alu.add.op != V3D_QPU_A_VFPACK)786instr->alu.add.output_pack = (op >> 4) & 0x3;787else788instr->alu.add.output_pack = V3D_QPU_PACK_NONE;789790if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,791&instr->alu.add.a_unpack)) {792return false;793}794795if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,796&instr->alu.add.b_unpack)) {797return false;798}799break;800801case V3D_QPU_A_FFLOOR:802case V3D_QPU_A_FROUND:803case V3D_QPU_A_FTRUNC:804case V3D_QPU_A_FCEIL:805case V3D_QPU_A_FDX:806case V3D_QPU_A_FDY:807instr->alu.add.output_pack = mux_b & 0x3;808809if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,810&instr->alu.add.a_unpack)) {811return false;812}813break;814815case V3D_QPU_A_FTOIN:816case V3D_QPU_A_FTOIZ:817case V3D_QPU_A_FTOUZ:818case V3D_QPU_A_FTOC:819instr->alu.add.output_pack = V3D_QPU_PACK_NONE;820821if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,822&instr->alu.add.a_unpack)) {823return false;824}825break;826827case V3D_QPU_A_VFMIN:828case V3D_QPU_A_VFMAX:829if (!v3d_qpu_float16_unpack_unpack(op & 0x7,830&instr->alu.add.a_unpack)) {831return false;832}833834instr->alu.add.output_pack = V3D_QPU_PACK_NONE;835instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;836break;837838default:839instr->alu.add.output_pack = V3D_QPU_PACK_NONE;840instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;841instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;842break;843}844845instr->alu.add.a = mux_a;846instr->alu.add.b = mux_b;847instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);848849instr->alu.add.magic_write = false;850if (packed_inst & V3D_QPU_MA) {851switch (instr->alu.add.op) {852case V3D_QPU_A_LDVPMV_IN:853instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;854break;855case V3D_QPU_A_LDVPMD_IN:856instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;857break;858case V3D_QPU_A_LDVPMG_IN:859instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;860break;861default:862instr->alu.add.magic_write = true;863break;864}865}866867return true;868}869870static bool871v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,872struct v3d_qpu_instr *instr)873{874uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);875uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);876uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);877878{879const struct opcode_desc *desc =880lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops),881op, mux_a, mux_b);882if (!desc)883return false;884885instr->alu.mul.op = desc->op;886}887888switch (instr->alu.mul.op) {889case V3D_QPU_M_FMUL:890instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;891892if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,893&instr->alu.mul.a_unpack)) {894return false;895}896897if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,898&instr->alu.mul.b_unpack)) {899return false;900}901902break;903904case V3D_QPU_M_FMOV:905instr->alu.mul.output_pack = (((op & 1) << 1) +906((mux_b >> 2) & 1));907908if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,909&instr->alu.mul.a_unpack)) {910return false;911}912913break;914915case V3D_QPU_M_VFMUL:916instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;917918if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,919&instr->alu.mul.a_unpack)) {920return false;921}922923instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;924925break;926927default:928instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;929instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;930instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;931break;932}933934instr->alu.mul.a = mux_a;935instr->alu.mul.b = mux_b;936instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);937instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;938939return true;940}941942static bool943v3d_qpu_add_pack(const struct v3d_device_info *devinfo,944const struct v3d_qpu_instr *instr, uint64_t *packed_instr)945{946uint32_t waddr = instr->alu.add.waddr;947uint32_t mux_a = instr->alu.add.a;948uint32_t mux_b = instr->alu.add.b;949int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);950const struct opcode_desc *desc;951952int opcode;953for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)];954desc++) {955if (desc->op == instr->alu.add.op)956break;957}958if (desc == &add_ops[ARRAY_SIZE(add_ops)])959return false;960961opcode = desc->opcode_first;962963/* If an operation doesn't use an arg, its mux values may be used to964* identify the operation type.965*/966if (nsrc < 2)967mux_b = ffs(desc->mux_b_mask) - 1;968969if (nsrc < 1)970mux_a = ffs(desc->mux_a_mask) - 1;971972bool no_magic_write = false;973974switch (instr->alu.add.op) {975case V3D_QPU_A_STVPMV:976waddr = 0;977no_magic_write = true;978break;979case V3D_QPU_A_STVPMD:980waddr = 1;981no_magic_write = true;982break;983case V3D_QPU_A_STVPMP:984waddr = 2;985no_magic_write = true;986break;987988case V3D_QPU_A_LDVPMV_IN:989case V3D_QPU_A_LDVPMD_IN:990case V3D_QPU_A_LDVPMP:991case V3D_QPU_A_LDVPMG_IN:992assert(!instr->alu.add.magic_write);993break;994995case V3D_QPU_A_LDVPMV_OUT:996case V3D_QPU_A_LDVPMD_OUT:997case V3D_QPU_A_LDVPMG_OUT:998assert(!instr->alu.add.magic_write);999*packed_instr |= V3D_QPU_MA;1000break;10011002default:1003break;1004}10051006switch (instr->alu.add.op) {1007case V3D_QPU_A_FADD:1008case V3D_QPU_A_FADDNF:1009case V3D_QPU_A_FSUB:1010case V3D_QPU_A_FMIN:1011case V3D_QPU_A_FMAX:1012case V3D_QPU_A_FCMP: {1013uint32_t output_pack;1014uint32_t a_unpack;1015uint32_t b_unpack;10161017if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,1018&output_pack)) {1019return false;1020}1021opcode |= output_pack << 4;10221023if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,1024&a_unpack)) {1025return false;1026}10271028if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,1029&b_unpack)) {1030return false;1031}10321033/* These operations with commutative operands are1034* distinguished by which order their operands come in.1035*/1036bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;1037if (((instr->alu.add.op == V3D_QPU_A_FMIN ||1038instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||1039((instr->alu.add.op == V3D_QPU_A_FMAX ||1040instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {1041uint32_t temp;10421043temp = a_unpack;1044a_unpack = b_unpack;1045b_unpack = temp;10461047temp = mux_a;1048mux_a = mux_b;1049mux_b = temp;1050}10511052opcode |= a_unpack << 2;1053opcode |= b_unpack << 0;10541055break;1056}10571058case V3D_QPU_A_VFPACK: {1059uint32_t a_unpack;1060uint32_t b_unpack;10611062if (instr->alu.add.a_unpack == V3D_QPU_UNPACK_ABS ||1063instr->alu.add.b_unpack == V3D_QPU_UNPACK_ABS) {1064return false;1065}10661067if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,1068&a_unpack)) {1069return false;1070}10711072if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,1073&b_unpack)) {1074return false;1075}10761077opcode = (opcode & ~(1 << 2)) | (a_unpack << 2);1078opcode = (opcode & ~(1 << 0)) | (b_unpack << 0);10791080break;1081}10821083case V3D_QPU_A_FFLOOR:1084case V3D_QPU_A_FROUND:1085case V3D_QPU_A_FTRUNC:1086case V3D_QPU_A_FCEIL:1087case V3D_QPU_A_FDX:1088case V3D_QPU_A_FDY: {1089uint32_t packed;10901091if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,1092&packed)) {1093return false;1094}1095mux_b |= packed;10961097if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,1098&packed)) {1099return false;1100}1101if (packed == 0)1102return false;1103opcode = (opcode & ~(1 << 2)) | packed << 2;1104break;1105}11061107case V3D_QPU_A_FTOIN:1108case V3D_QPU_A_FTOIZ:1109case V3D_QPU_A_FTOUZ:1110case V3D_QPU_A_FTOC:1111if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)1112return false;11131114uint32_t packed;1115if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,1116&packed)) {1117return false;1118}1119if (packed == 0)1120return false;1121opcode |= packed << 2;11221123break;11241125case V3D_QPU_A_VFMIN:1126case V3D_QPU_A_VFMAX:1127if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||1128instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {1129return false;1130}11311132if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,1133&packed)) {1134return false;1135}1136opcode |= packed;1137break;11381139default:1140if (instr->alu.add.op != V3D_QPU_A_NOP &&1141(instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||1142instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||1143instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {1144return false;1145}1146break;1147}11481149*packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);1150*packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);1151*packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);1152*packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);1153if (instr->alu.add.magic_write && !no_magic_write)1154*packed_instr |= V3D_QPU_MA;11551156return true;1157}11581159static bool1160v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,1161const struct v3d_qpu_instr *instr, uint64_t *packed_instr)1162{1163uint32_t mux_a = instr->alu.mul.a;1164uint32_t mux_b = instr->alu.mul.b;1165int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);1166const struct opcode_desc *desc;11671168for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)];1169desc++) {1170if (desc->op == instr->alu.mul.op)1171break;1172}1173if (desc == &mul_ops[ARRAY_SIZE(mul_ops)])1174return false;11751176uint32_t opcode = desc->opcode_first;11771178/* Some opcodes have a single valid value for their mux a/b, so set1179* that here. If mux a/b determine packing, it will be set below.1180*/1181if (nsrc < 2)1182mux_b = ffs(desc->mux_b_mask) - 1;11831184if (nsrc < 1)1185mux_a = ffs(desc->mux_a_mask) - 1;11861187switch (instr->alu.mul.op) {1188case V3D_QPU_M_FMUL: {1189uint32_t packed;11901191if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,1192&packed)) {1193return false;1194}1195/* No need for a +1 because desc->opcode_first has a 1 in this1196* field.1197*/1198opcode += packed << 4;11991200if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,1201&packed)) {1202return false;1203}1204opcode |= packed << 2;12051206if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,1207&packed)) {1208return false;1209}1210opcode |= packed << 0;1211break;1212}12131214case V3D_QPU_M_FMOV: {1215uint32_t packed;12161217if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,1218&packed)) {1219return false;1220}1221opcode |= (packed >> 1) & 1;1222mux_b = (packed & 1) << 2;12231224if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,1225&packed)) {1226return false;1227}1228mux_b |= packed;1229break;1230}12311232case V3D_QPU_M_VFMUL: {1233uint32_t packed;12341235if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)1236return false;12371238if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,1239&packed)) {1240return false;1241}1242if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)1243opcode = 8;1244else1245opcode |= (packed + 4) & 7;12461247if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)1248return false;12491250break;1251}12521253default:1254break;1255}12561257*packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);1258*packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);12591260*packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);1261*packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);1262if (instr->alu.mul.magic_write)1263*packed_instr |= V3D_QPU_MM;12641265return true;1266}12671268static bool1269v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,1270uint64_t packed_instr,1271struct v3d_qpu_instr *instr)1272{1273instr->type = V3D_QPU_INSTR_TYPE_ALU;12741275if (!v3d_qpu_sig_unpack(devinfo,1276QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),1277&instr->sig))1278return false;12791280uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);1281if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {1282instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;1283instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;12841285instr->flags.ac = V3D_QPU_COND_NONE;1286instr->flags.mc = V3D_QPU_COND_NONE;1287instr->flags.apf = V3D_QPU_PF_NONE;1288instr->flags.mpf = V3D_QPU_PF_NONE;1289instr->flags.auf = V3D_QPU_UF_NONE;1290instr->flags.muf = V3D_QPU_UF_NONE;1291} else {1292if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))1293return false;1294}12951296instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);1297instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);12981299if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))1300return false;13011302if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))1303return false;13041305return true;1306}13071308static bool1309v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,1310uint64_t packed_instr,1311struct v3d_qpu_instr *instr)1312{1313instr->type = V3D_QPU_INSTR_TYPE_BRANCH;13141315uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);1316if (cond == 0)1317instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;1318else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=1319V3D_QPU_BRANCH_COND_ALLNA)1320instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);1321else1322return false;13231324uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);1325if (msfign == 3)1326return false;1327instr->branch.msfign = msfign;13281329instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);13301331instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;1332if (instr->branch.ub) {1333instr->branch.bdu = QPU_GET_FIELD(packed_instr,1334V3D_QPU_BRANCH_BDU);1335}13361337instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,1338V3D_QPU_RADDR_A);13391340instr->branch.offset = 0;13411342instr->branch.offset +=1343QPU_GET_FIELD(packed_instr,1344V3D_QPU_BRANCH_ADDR_LOW) << 3;13451346instr->branch.offset +=1347QPU_GET_FIELD(packed_instr,1348V3D_QPU_BRANCH_ADDR_HIGH) << 24;13491350return true;1351}13521353bool1354v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,1355uint64_t packed_instr,1356struct v3d_qpu_instr *instr)1357{1358if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {1359return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);1360} else {1361uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);13621363if ((sig & 24) == 16) {1364return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,1365instr);1366} else {1367return false;1368}1369}1370}13711372static bool1373v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,1374const struct v3d_qpu_instr *instr,1375uint64_t *packed_instr)1376{1377uint32_t sig;1378if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))1379return false;1380*packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);13811382if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {1383*packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);1384*packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);13851386if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))1387return false;1388if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))1389return false;13901391uint32_t flags;1392if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {1393if (instr->flags.ac != V3D_QPU_COND_NONE ||1394instr->flags.mc != V3D_QPU_COND_NONE ||1395instr->flags.apf != V3D_QPU_PF_NONE ||1396instr->flags.mpf != V3D_QPU_PF_NONE ||1397instr->flags.auf != V3D_QPU_UF_NONE ||1398instr->flags.muf != V3D_QPU_UF_NONE) {1399return false;1400}14011402flags = instr->sig_addr;1403if (instr->sig_magic)1404flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;1405} else {1406if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))1407return false;1408}14091410*packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);1411} else {1412if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))1413return false;1414}14151416return true;1417}14181419static bool1420v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,1421const struct v3d_qpu_instr *instr,1422uint64_t *packed_instr)1423{1424*packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);14251426if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {1427*packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -1428V3D_QPU_BRANCH_COND_A0),1429V3D_QPU_BRANCH_COND);1430}14311432*packed_instr |= QPU_SET_FIELD(instr->branch.msfign,1433V3D_QPU_BRANCH_MSFIGN);14341435*packed_instr |= QPU_SET_FIELD(instr->branch.bdi,1436V3D_QPU_BRANCH_BDI);14371438if (instr->branch.ub) {1439*packed_instr |= V3D_QPU_BRANCH_UB;1440*packed_instr |= QPU_SET_FIELD(instr->branch.bdu,1441V3D_QPU_BRANCH_BDU);1442}14431444switch (instr->branch.bdi) {1445case V3D_QPU_BRANCH_DEST_ABS:1446case V3D_QPU_BRANCH_DEST_REL:1447*packed_instr |= QPU_SET_FIELD(instr->branch.msfign,1448V3D_QPU_BRANCH_MSFIGN);14491450*packed_instr |= QPU_SET_FIELD((instr->branch.offset &1451~0xff000000) >> 3,1452V3D_QPU_BRANCH_ADDR_LOW);14531454*packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,1455V3D_QPU_BRANCH_ADDR_HIGH);1456break;1457default:1458break;1459}14601461if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||1462instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {1463*packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,1464V3D_QPU_RADDR_A);1465}14661467return true;1468}14691470bool1471v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,1472const struct v3d_qpu_instr *instr,1473uint64_t *packed_instr)1474{1475*packed_instr = 0;14761477switch (instr->type) {1478case V3D_QPU_INSTR_TYPE_ALU:1479return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);1480case V3D_QPU_INSTR_TYPE_BRANCH:1481return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);1482default:1483return false;1484}1485}148614871488