Path: blob/21.2-virgl/src/gallium/drivers/vc4/vc4_qpu.c
4570 views
/*1* Copyright © 2014 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include <stdbool.h>24#include "util/ralloc.h"25#include "vc4_qir.h"26#include "vc4_qpu.h"2728#define QPU_MUX(mux, muxfield) \29QPU_SET_FIELD(mux != QPU_MUX_SMALL_IMM ? mux : QPU_MUX_B, muxfield)3031static uint64_t32set_src_raddr(uint64_t inst, struct qpu_reg src)33{34if (src.mux == QPU_MUX_A) {35assert(QPU_GET_FIELD(inst, QPU_RADDR_A) == QPU_R_NOP ||36QPU_GET_FIELD(inst, QPU_RADDR_A) == src.addr);37return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_A);38}3940if (src.mux == QPU_MUX_B) {41assert((QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP ||42QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr) &&43QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM);44return QPU_UPDATE_FIELD(inst, src.addr, QPU_RADDR_B);45}4647if (src.mux == QPU_MUX_SMALL_IMM) {48if (QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) {49assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == src.addr);50} else {51inst = qpu_set_sig(inst, QPU_SIG_SMALL_IMM);52assert(QPU_GET_FIELD(inst, QPU_RADDR_B) == QPU_R_NOP);53}54return ((inst & ~QPU_RADDR_B_MASK) |55QPU_SET_FIELD(src.addr, QPU_RADDR_B));56}5758return inst;59}6061uint64_t62qpu_NOP()63{64uint64_t inst = 0;6566inst |= QPU_SET_FIELD(QPU_A_NOP, QPU_OP_ADD);67inst |= QPU_SET_FIELD(QPU_M_NOP, QPU_OP_MUL);6869/* Note: These field values are actually non-zero */70inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);71inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);72inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);73inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);74inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);7576return inst;77}7879static uint64_t80qpu_a_dst(struct qpu_reg dst)81{82uint64_t inst = 0;8384if (dst.mux <= QPU_MUX_R5) {85/* Translate the mux to the ACCn values. */86inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_ADD);87} else {88inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_ADD);89if (dst.mux == QPU_MUX_B)90inst |= QPU_WS;91}9293return inst;94}9596static uint64_t97qpu_m_dst(struct qpu_reg dst)98{99uint64_t inst = 0;100101if (dst.mux <= QPU_MUX_R5) {102/* Translate the mux to the ACCn values. */103inst |= QPU_SET_FIELD(32 + dst.mux, QPU_WADDR_MUL);104} else {105inst |= QPU_SET_FIELD(dst.addr, QPU_WADDR_MUL);106if (dst.mux == QPU_MUX_A)107inst |= QPU_WS;108}109110return inst;111}112113uint64_t114qpu_a_MOV(struct qpu_reg dst, struct qpu_reg src)115{116uint64_t inst = 0;117118inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);119inst |= QPU_SET_FIELD(QPU_A_OR, QPU_OP_ADD);120inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);121inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);122inst |= qpu_a_dst(dst);123inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);124inst |= QPU_MUX(src.mux, QPU_ADD_A);125inst |= QPU_MUX(src.mux, QPU_ADD_B);126inst = set_src_raddr(inst, src);127inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);128129return inst;130}131132uint64_t133qpu_m_MOV(struct qpu_reg dst, struct qpu_reg src)134{135uint64_t inst = 0;136137inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);138inst |= QPU_SET_FIELD(QPU_M_V8MIN, QPU_OP_MUL);139inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);140inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);141inst |= qpu_m_dst(dst);142inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);143inst |= QPU_MUX(src.mux, QPU_MUL_A);144inst |= QPU_MUX(src.mux, QPU_MUL_B);145inst = set_src_raddr(inst, src);146inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);147148return inst;149}150151uint64_t152qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)153{154uint64_t inst = 0;155156inst |= qpu_a_dst(dst);157inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);158inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);159inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);160inst |= QPU_SET_FIELD(QPU_SIG_LOAD_IMM, QPU_SIG);161inst |= val;162163return inst;164}165166uint64_t167qpu_load_imm_u2(struct qpu_reg dst, uint32_t val)168{169return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2,170QPU_LOAD_IMM_MODE);171}172173uint64_t174qpu_load_imm_i2(struct qpu_reg dst, uint32_t val)175{176return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2,177QPU_LOAD_IMM_MODE);178}179180uint64_t181qpu_branch(uint32_t cond, uint32_t target)182{183uint64_t inst = 0;184185inst |= qpu_a_dst(qpu_ra(QPU_W_NOP));186inst |= qpu_m_dst(qpu_rb(QPU_W_NOP));187inst |= QPU_SET_FIELD(cond, QPU_BRANCH_COND);188inst |= QPU_SET_FIELD(QPU_SIG_BRANCH, QPU_SIG);189inst |= QPU_SET_FIELD(target, QPU_BRANCH_TARGET);190191return inst;192}193194uint64_t195qpu_a_alu2(enum qpu_op_add op,196struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)197{198uint64_t inst = 0;199200inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);201inst |= QPU_SET_FIELD(op, QPU_OP_ADD);202inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);203inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);204inst |= qpu_a_dst(dst);205inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_ADD);206inst |= QPU_MUX(src0.mux, QPU_ADD_A);207inst = set_src_raddr(inst, src0);208inst |= QPU_MUX(src1.mux, QPU_ADD_B);209inst = set_src_raddr(inst, src1);210inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL);211212return inst;213}214215uint64_t216qpu_m_alu2(enum qpu_op_mul op,217struct qpu_reg dst, struct qpu_reg src0, struct qpu_reg src1)218{219uint64_t inst = 0;220221inst |= QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG);222inst |= QPU_SET_FIELD(op, QPU_OP_MUL);223inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);224inst |= QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B);225inst |= qpu_m_dst(dst);226inst |= QPU_SET_FIELD(QPU_COND_ALWAYS, QPU_COND_MUL);227inst |= QPU_MUX(src0.mux, QPU_MUL_A);228inst = set_src_raddr(inst, src0);229inst |= QPU_MUX(src1.mux, QPU_MUL_B);230inst = set_src_raddr(inst, src1);231inst |= QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD);232233return inst;234}235236uint64_t237qpu_m_rot(struct qpu_reg dst, struct qpu_reg src0, int rot)238{239uint64_t inst = 0;240inst = qpu_m_alu2(QPU_M_V8MIN, dst, src0, src0);241242inst = QPU_UPDATE_FIELD(inst, QPU_SIG_SMALL_IMM, QPU_SIG);243inst = QPU_UPDATE_FIELD(inst, QPU_SMALL_IMM_MUL_ROT + rot,244QPU_SMALL_IMM);245246return inst;247}248249static bool250merge_fields(uint64_t *merge,251uint64_t a, uint64_t b,252uint64_t mask, uint64_t ignore)253{254if ((a & mask) == ignore) {255*merge = (*merge & ~mask) | (b & mask);256} else if ((b & mask) == ignore) {257*merge = (*merge & ~mask) | (a & mask);258} else {259if ((a & mask) != (b & mask))260return false;261}262263return true;264}265266int267qpu_num_sf_accesses(uint64_t inst)268{269int accesses = 0;270static const uint32_t specials[] = {271QPU_W_TLB_COLOR_MS,272QPU_W_TLB_COLOR_ALL,273QPU_W_TLB_Z,274QPU_W_TMU0_S,275QPU_W_TMU0_T,276QPU_W_TMU0_R,277QPU_W_TMU0_B,278QPU_W_TMU1_S,279QPU_W_TMU1_T,280QPU_W_TMU1_R,281QPU_W_TMU1_B,282QPU_W_SFU_RECIP,283QPU_W_SFU_RECIPSQRT,284QPU_W_SFU_EXP,285QPU_W_SFU_LOG,286};287uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);288uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);289uint32_t raddr_a = QPU_GET_FIELD(inst, QPU_RADDR_A);290uint32_t raddr_b = QPU_GET_FIELD(inst, QPU_RADDR_B);291292for (int j = 0; j < ARRAY_SIZE(specials); j++) {293if (waddr_add == specials[j])294accesses++;295if (waddr_mul == specials[j])296accesses++;297}298299if (raddr_a == QPU_R_MUTEX_ACQUIRE)300accesses++;301if (raddr_b == QPU_R_MUTEX_ACQUIRE &&302QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM)303accesses++;304305/* XXX: semaphore, combined color read/write? */306switch (QPU_GET_FIELD(inst, QPU_SIG)) {307case QPU_SIG_COLOR_LOAD:308case QPU_SIG_COLOR_LOAD_END:309case QPU_SIG_LOAD_TMU0:310case QPU_SIG_LOAD_TMU1:311accesses++;312}313314return accesses;315}316317static bool318qpu_waddr_ignores_ws(uint32_t waddr)319{320switch(waddr) {321case QPU_W_ACC0:322case QPU_W_ACC1:323case QPU_W_ACC2:324case QPU_W_ACC3:325case QPU_W_NOP:326case QPU_W_TLB_Z:327case QPU_W_TLB_COLOR_MS:328case QPU_W_TLB_COLOR_ALL:329case QPU_W_TLB_ALPHA_MASK:330case QPU_W_VPM:331case QPU_W_SFU_RECIP:332case QPU_W_SFU_RECIPSQRT:333case QPU_W_SFU_EXP:334case QPU_W_SFU_LOG:335case QPU_W_TMU0_S:336case QPU_W_TMU0_T:337case QPU_W_TMU0_R:338case QPU_W_TMU0_B:339case QPU_W_TMU1_S:340case QPU_W_TMU1_T:341case QPU_W_TMU1_R:342case QPU_W_TMU1_B:343return true;344}345346return false;347}348349static void350swap_ra_file_mux_helper(uint64_t *merge, uint64_t *a, uint32_t mux_shift)351{352uint64_t mux_mask = (uint64_t)0x7 << mux_shift;353uint64_t mux_a_val = (uint64_t)QPU_MUX_A << mux_shift;354uint64_t mux_b_val = (uint64_t)QPU_MUX_B << mux_shift;355356if ((*a & mux_mask) == mux_a_val) {357*a = (*a & ~mux_mask) | mux_b_val;358*merge = (*merge & ~mux_mask) | mux_b_val;359}360}361362static bool363try_swap_ra_file(uint64_t *merge, uint64_t *a, uint64_t *b)364{365uint32_t raddr_a_a = QPU_GET_FIELD(*a, QPU_RADDR_A);366uint32_t raddr_a_b = QPU_GET_FIELD(*a, QPU_RADDR_B);367uint32_t raddr_b_a = QPU_GET_FIELD(*b, QPU_RADDR_A);368uint32_t raddr_b_b = QPU_GET_FIELD(*b, QPU_RADDR_B);369370if (raddr_a_b != QPU_R_NOP)371return false;372373switch (raddr_a_a) {374case QPU_R_UNIF:375case QPU_R_VARY:376break;377default:378return false;379}380381if (!(*merge & QPU_PM) &&382QPU_GET_FIELD(*merge, QPU_UNPACK) != QPU_UNPACK_NOP) {383return false;384}385386if (raddr_b_b != QPU_R_NOP &&387raddr_b_b != raddr_a_a)388return false;389390/* Move raddr A to B in instruction a. */391*a = (*a & ~QPU_RADDR_A_MASK) | QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A);392*a = (*a & ~QPU_RADDR_B_MASK) | QPU_SET_FIELD(raddr_a_a, QPU_RADDR_B);393*merge = QPU_UPDATE_FIELD(*merge, raddr_b_a, QPU_RADDR_A);394*merge = QPU_UPDATE_FIELD(*merge, raddr_a_a, QPU_RADDR_B);395swap_ra_file_mux_helper(merge, a, QPU_ADD_A_SHIFT);396swap_ra_file_mux_helper(merge, a, QPU_ADD_B_SHIFT);397swap_ra_file_mux_helper(merge, a, QPU_MUL_A_SHIFT);398swap_ra_file_mux_helper(merge, a, QPU_MUL_B_SHIFT);399400return true;401}402403static bool404convert_mov(uint64_t *inst)405{406uint32_t add_a = QPU_GET_FIELD(*inst, QPU_ADD_A);407uint32_t waddr_add = QPU_GET_FIELD(*inst, QPU_WADDR_ADD);408uint32_t cond_add = QPU_GET_FIELD(*inst, QPU_COND_ADD);409410/* Is it a MOV? */411if (QPU_GET_FIELD(*inst, QPU_OP_ADD) != QPU_A_OR ||412(add_a != QPU_GET_FIELD(*inst, QPU_ADD_B))) {413return false;414}415416if (QPU_GET_FIELD(*inst, QPU_SIG) != QPU_SIG_NONE)417return false;418419/* We could maybe support this in the .8888 and .8a-.8d cases. */420if (*inst & QPU_PM)421return false;422423*inst = QPU_UPDATE_FIELD(*inst, QPU_A_NOP, QPU_OP_ADD);424*inst = QPU_UPDATE_FIELD(*inst, QPU_M_V8MIN, QPU_OP_MUL);425426*inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_A);427*inst = QPU_UPDATE_FIELD(*inst, add_a, QPU_MUL_B);428*inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_A);429*inst = QPU_UPDATE_FIELD(*inst, QPU_MUX_R0, QPU_ADD_B);430431*inst = QPU_UPDATE_FIELD(*inst, waddr_add, QPU_WADDR_MUL);432*inst = QPU_UPDATE_FIELD(*inst, QPU_W_NOP, QPU_WADDR_ADD);433434*inst = QPU_UPDATE_FIELD(*inst, cond_add, QPU_COND_MUL);435*inst = QPU_UPDATE_FIELD(*inst, QPU_COND_NEVER, QPU_COND_ADD);436437if (!qpu_waddr_ignores_ws(waddr_add))438*inst ^= QPU_WS;439440return true;441}442443static bool444writes_a_file(uint64_t inst)445{446if (!(inst & QPU_WS))447return QPU_GET_FIELD(inst, QPU_WADDR_ADD) < 32;448else449return QPU_GET_FIELD(inst, QPU_WADDR_MUL) < 32;450}451452static bool453reads_r4(uint64_t inst)454{455return (QPU_GET_FIELD(inst, QPU_ADD_A) == QPU_MUX_R4 ||456QPU_GET_FIELD(inst, QPU_ADD_B) == QPU_MUX_R4 ||457QPU_GET_FIELD(inst, QPU_MUL_A) == QPU_MUX_R4 ||458QPU_GET_FIELD(inst, QPU_MUL_B) == QPU_MUX_R4);459}460461uint64_t462qpu_merge_inst(uint64_t a, uint64_t b)463{464uint64_t merge = a | b;465bool ok = true;466uint32_t a_sig = QPU_GET_FIELD(a, QPU_SIG);467uint32_t b_sig = QPU_GET_FIELD(b, QPU_SIG);468469if (QPU_GET_FIELD(a, QPU_OP_ADD) != QPU_A_NOP &&470QPU_GET_FIELD(b, QPU_OP_ADD) != QPU_A_NOP) {471if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP ||472QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP ||473!(convert_mov(&a) || convert_mov(&b))) {474return 0;475} else {476merge = a | b;477}478}479480if (QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP &&481QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)482return 0;483484if (qpu_num_sf_accesses(a) && qpu_num_sf_accesses(b))485return 0;486487if (a_sig == QPU_SIG_LOAD_IMM ||488b_sig == QPU_SIG_LOAD_IMM ||489a_sig == QPU_SIG_SMALL_IMM ||490b_sig == QPU_SIG_SMALL_IMM ||491a_sig == QPU_SIG_BRANCH ||492b_sig == QPU_SIG_BRANCH) {493return 0;494}495496ok = ok && merge_fields(&merge, a, b, QPU_SIG_MASK,497QPU_SET_FIELD(QPU_SIG_NONE, QPU_SIG));498499/* Misc fields that have to match exactly. */500ok = ok && merge_fields(&merge, a, b, QPU_SF, ~0);501502if (!merge_fields(&merge, a, b, QPU_RADDR_A_MASK,503QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_A))) {504/* Since we tend to use regfile A by default both for register505* allocation and for our special values (uniforms and506* varyings), try swapping uniforms and varyings to regfile B507* to resolve raddr A conflicts.508*/509if (!try_swap_ra_file(&merge, &a, &b) &&510!try_swap_ra_file(&merge, &b, &a)) {511return 0;512}513}514515ok = ok && merge_fields(&merge, a, b, QPU_RADDR_B_MASK,516QPU_SET_FIELD(QPU_R_NOP, QPU_RADDR_B));517518ok = ok && merge_fields(&merge, a, b, QPU_WADDR_ADD_MASK,519QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_ADD));520ok = ok && merge_fields(&merge, a, b, QPU_WADDR_MUL_MASK,521QPU_SET_FIELD(QPU_W_NOP, QPU_WADDR_MUL));522523/* Allow disagreement on WS (swapping A vs B physical reg file as the524* destination for ADD/MUL) if one of the original instructions525* ignores it (probably because it's just writing to accumulators).526*/527if (qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_ADD)) &&528qpu_waddr_ignores_ws(QPU_GET_FIELD(a, QPU_WADDR_MUL))) {529merge = (merge & ~QPU_WS) | (b & QPU_WS);530} else if (qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_ADD)) &&531qpu_waddr_ignores_ws(QPU_GET_FIELD(b, QPU_WADDR_MUL))) {532merge = (merge & ~QPU_WS) | (a & QPU_WS);533} else {534if ((a & QPU_WS) != (b & QPU_WS))535return 0;536}537538if (!merge_fields(&merge, a, b, QPU_PM, ~0)) {539/* If one instruction has PM bit set and the other not, the540* one without PM shouldn't do packing/unpacking, and we541* have to make sure non-NOP packing/unpacking from PM542* instruction aren't added to it.543*/544uint64_t temp;545546/* Let a be the one with PM bit */547if (!(a & QPU_PM)) {548temp = a;549a = b;550b = temp;551}552553if ((b & (QPU_PACK_MASK | QPU_UNPACK_MASK)) != 0)554return 0;555556if ((a & QPU_PACK_MASK) != 0 &&557QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)558return 0;559560if ((a & QPU_UNPACK_MASK) != 0 && reads_r4(b))561return 0;562} else {563/* packing: Make sure that non-NOP packs agree, then deal with564* special-case failing of adding a non-NOP pack to something565* with a NOP pack.566*/567if (!merge_fields(&merge, a, b, QPU_PACK_MASK, 0))568return 0;569bool new_a_pack = (QPU_GET_FIELD(a, QPU_PACK) !=570QPU_GET_FIELD(merge, QPU_PACK));571bool new_b_pack = (QPU_GET_FIELD(b, QPU_PACK) !=572QPU_GET_FIELD(merge, QPU_PACK));573if (!(merge & QPU_PM)) {574/* Make sure we're not going to be putting a new575* a-file packing on either half.576*/577if (new_a_pack && writes_a_file(a))578return 0;579580if (new_b_pack && writes_a_file(b))581return 0;582} else {583/* Make sure we're not going to be putting new MUL584* packing on either half.585*/586if (new_a_pack &&587QPU_GET_FIELD(a, QPU_OP_MUL) != QPU_M_NOP)588return 0;589590if (new_b_pack &&591QPU_GET_FIELD(b, QPU_OP_MUL) != QPU_M_NOP)592return 0;593}594595/* unpacking: Make sure that non-NOP unpacks agree, then deal596* with special-case failing of adding a non-NOP unpack to597* something with a NOP unpack.598*/599if (!merge_fields(&merge, a, b, QPU_UNPACK_MASK, 0))600return 0;601bool new_a_unpack = (QPU_GET_FIELD(a, QPU_UNPACK) !=602QPU_GET_FIELD(merge, QPU_UNPACK));603bool new_b_unpack = (QPU_GET_FIELD(b, QPU_UNPACK) !=604QPU_GET_FIELD(merge, QPU_UNPACK));605if (!(merge & QPU_PM)) {606/* Make sure we're not going to be putting a new607* a-file packing on either half.608*/609if (new_a_unpack &&610QPU_GET_FIELD(a, QPU_RADDR_A) != QPU_R_NOP)611return 0;612613if (new_b_unpack &&614QPU_GET_FIELD(b, QPU_RADDR_A) != QPU_R_NOP)615return 0;616} else {617/* Make sure we're not going to be putting new r4618* unpack on either half.619*/620if (new_a_unpack && reads_r4(a))621return 0;622623if (new_b_unpack && reads_r4(b))624return 0;625}626}627628if (ok)629return merge;630else631return 0;632}633634uint64_t635qpu_set_sig(uint64_t inst, uint32_t sig)636{637assert(QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_NONE);638return QPU_UPDATE_FIELD(inst, sig, QPU_SIG);639}640641uint64_t642qpu_set_cond_add(uint64_t inst, uint32_t cond)643{644assert(QPU_GET_FIELD(inst, QPU_COND_ADD) == QPU_COND_ALWAYS);645return QPU_UPDATE_FIELD(inst, cond, QPU_COND_ADD);646}647648uint64_t649qpu_set_cond_mul(uint64_t inst, uint32_t cond)650{651assert(QPU_GET_FIELD(inst, QPU_COND_MUL) == QPU_COND_ALWAYS);652return QPU_UPDATE_FIELD(inst, cond, QPU_COND_MUL);653}654655bool656qpu_waddr_is_tlb(uint32_t waddr)657{658switch (waddr) {659case QPU_W_TLB_COLOR_ALL:660case QPU_W_TLB_COLOR_MS:661case QPU_W_TLB_Z:662return true;663default:664return false;665}666}667668bool669qpu_inst_is_tlb(uint64_t inst)670{671uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);672673return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_ADD)) ||674qpu_waddr_is_tlb(QPU_GET_FIELD(inst, QPU_WADDR_MUL)) ||675sig == QPU_SIG_COLOR_LOAD ||676sig == QPU_SIG_WAIT_FOR_SCOREBOARD);677}678679/**680* Returns the small immediate value to be encoded in to the raddr b field if681* the argument can be represented as one, or ~0 otherwise.682*/683uint32_t684qpu_encode_small_immediate(uint32_t i)685{686if (i <= 15)687return i;688if ((int)i < 0 && (int)i >= -16)689return i + 32;690691switch (i) {692case 0x3f800000:693return 32;694case 0x40000000:695return 33;696case 0x40800000:697return 34;698case 0x41000000:699return 35;700case 0x41800000:701return 36;702case 0x42000000:703return 37;704case 0x42800000:705return 38;706case 0x43000000:707return 39;708case 0x3b800000:709return 40;710case 0x3c000000:711return 41;712case 0x3c800000:713return 42;714case 0x3d000000:715return 43;716case 0x3d800000:717return 44;718case 0x3e000000:719return 45;720case 0x3e800000:721return 46;722case 0x3f000000:723return 47;724}725726return ~0;727}728729void730qpu_serialize_one_inst(struct vc4_compile *c, uint64_t inst)731{732if (c->qpu_inst_count >= c->qpu_inst_size) {733c->qpu_inst_size = MAX2(16, c->qpu_inst_size * 2);734c->qpu_insts = reralloc(c, c->qpu_insts,735uint64_t, c->qpu_inst_size);736}737c->qpu_insts[c->qpu_inst_count++] = inst;738}739740741