/*1* Copyright (c) 2017 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#ifndef _AFUC_H_24#define _AFUC_H_2526#include <stdbool.h>2728#include "util/macros.h"2930/*31TODO kernel debugfs to inject packet into rb for easier experimentation. It32should trigger reloading pfp/me and resetting gpu..3334Actually maybe it should be flag on submit ioctl to be able to deal w/ relocs,35should be restricted to CAP_ADMIN and probably compile option too (default=n).36if flag set, copy cmdstream bo contents into RB instead of IB'ing to it from37RB.38*/3940/* The opcode is encoded variable length. Opcodes less than 0x3041* are encoded as 5 bits followed by (rep) flag. Opcodes >= 0x3042* (ie. top two bits are '11' are encoded as 6 bits. See get_opc()43*/44typedef enum {45OPC_NOP = 0x00,4647OPC_ADD = 0x01, /* add immediate */48OPC_ADDHI = 0x02, /* add immediate (hi 32b of 64b) */49OPC_SUB = 0x03, /* subtract immediate */50OPC_SUBHI = 0x04, /* subtract immediate (hi 32b of 64b) */51OPC_AND = 0x05, /* AND immediate */52OPC_OR = 0x06, /* OR immediate */53OPC_XOR = 0x07, /* XOR immediate */54OPC_NOT = 0x08, /* bitwise not of immed (src1 ignored) */55OPC_SHL = 0x09, /* shift-left immediate */56OPC_USHR = 0x0a, /* unsigned shift right by immediate */57OPC_ISHR = 0x0b, /* signed shift right by immediate */58OPC_ROT = 0x0c, /* rotate left (left shift with wrap-around) */59OPC_MUL8 = 0x0d, /* 8bit multiply by immediate */60OPC_MIN = 0x0e,61OPC_MAX = 0x0f,62OPC_CMP = 0x10, /* compare src to immed */63OPC_MOVI = 0x11, /* move immediate */6465/* Return the most-significant bit of src2, or 0 if src2 == 0 (the66* same as if src2 == 1). src1 is ignored. Note that this overlaps67* with STORE6, so it can only be used with the two-source encoding.68*/69OPC_MSB = 0x14,7071OPC_ALU = 0x13, /* ALU instruction with two src registers */7273/* These seem something to do with setting some external state..74* doesn't seem to map *directly* to registers, but I guess that75* is where things end up. For example, this sequence in the76* CP_INDIRECT_BUFFER handler:77*78* mov $02, $data ; low 32b of IB target address79* mov $03, $data ; high 32b of IB target80* mov $04, $data ; IB size in dwords81* breq $04, 0x0, #l23 (#69, 04a2)82* and $05, $18, 0x000383* shl $05, $05, 0x000284* cwrite $02, [$05 + 0x0b0], 0x885* cwrite $03, [$05 + 0x0b1], 0x886* cwrite $04, [$05 + 0x0b2], 0x887*88* Note that CP_IB1/2_BASE_LO/HI/BUFSZ in 0x0b1f->0xb21 (IB1) and89* 0x0b22->0x0b24 (IB2). Presumably $05 ends up w/ different value90* for RB->IB1 vs IB1->IB2.91*/92OPC_CWRITE5 = 0x15,93OPC_CREAD5 = 0x16,9495/* A6xx shuffled around the cwrite/cread opcodes and added new opcodes96* that let you read/write directly to memory (and bypass the IOMMU?).97*/98OPC_STORE6 = 0x14,99OPC_CWRITE6 = 0x15,100OPC_LOAD6 = 0x16,101OPC_CREAD6 = 0x17,102103OPC_BRNEI = 0x30, /* relative branch (if $src != immed) */104OPC_BREQI = 0x31, /* relative branch (if $src == immed) */105OPC_BRNEB = 0x32, /* relative branch (if bit not set) */106OPC_BREQB = 0x33, /* relative branch (if bit is set) */107OPC_RET = 0x34, /* return */108OPC_CALL = 0x35, /* "function" call */109OPC_WIN = 0x36, /* wait for input (ie. wait for WPTR to advance) */110OPC_PREEMPTLEAVE6 = 0x38, /* try to leave preemption */111OPC_SETSECURE = 0x3b, /* switch secure mode on/off */112} afuc_opc;113114/**115* Special GPR registers:116*117* Notes: (applicable to a6xx, double check a5xx)118*119* 0x1d:120* $addr: writes configure GPU reg address to read/write121* (does not respect CP_PROTECT)122* $memdata: reads from FIFO filled based on MEM_READ_DWORDS/123* MEM_READ_ADDR124* 0x1e: (note different mnemonic for src vs dst)125* $usraddr: writes configure GPU reg address to read/write,126* respecting CP_PROTECT127* $regdata: reads from FIFO filled based on REG_READ_DWORDS/128* REG_READ_ADDR129* 0x1f:130* $data: reads from from pm4 input stream131* $data: writes to stream configured by write to $addr132* or $usraddr133*/134typedef enum {135REG_REM = 0x1c,136REG_MEMDATA = 0x1d, /* when used as src */137REG_ADDR = 0x1d, /* when used as dst */138REG_REGDATA = 0x1e, /* when used as src */139REG_USRADDR = 0x1e, /* when used as dst */140REG_DATA = 0x1f,141} afuc_reg;142143typedef union PACKED {144/* addi, subi, andi, ori, xori, etc: */145struct PACKED {146uint32_t uimm : 16;147uint32_t dst : 5;148uint32_t src : 5;149uint32_t hdr : 6;150} alui;151struct PACKED {152uint32_t uimm : 16;153uint32_t dst : 5;154uint32_t shift : 5;155uint32_t hdr : 6;156} movi;157struct PACKED {158uint32_t alu : 5;159uint32_t pad : 4;160uint32_t xmov : 2; /* execute eXtra mov's based on $rem */161uint32_t dst : 5;162uint32_t src2 : 5;163uint32_t src1 : 5;164uint32_t hdr : 6;165} alu;166struct PACKED {167uint32_t uimm : 12;168/* TODO this needs to be confirmed:169*170* flags:171* 0x4 - post-increment src2 by uimm (need to confirm this is also172* true for load/cread). TBD whether, when used in conjunction173* with @LOAD_STORE_HI, 32b rollover works properly.174*175* other values tbd, also need to confirm if different bits can be176* set together (I don't see examples of this in existing fw)177*/178uint32_t flags : 4;179uint32_t src1 : 5; /* dst (cread) or src (cwrite) register */180uint32_t src2 : 5; /* read or write address is src2+uimm */181uint32_t hdr : 6;182} control;183struct PACKED {184int32_t ioff : 16; /* relative offset */185uint32_t bit_or_imm : 5;186uint32_t src : 5;187uint32_t hdr : 6;188} br;189struct PACKED {190uint32_t uoff : 26; /* absolute (unsigned) offset */191uint32_t hdr : 6;192} call;193struct PACKED {194uint32_t pad : 25;195uint32_t interrupt : 1; /* return from ctxt-switch interrupt handler */196uint32_t hdr : 6;197} ret;198struct PACKED {199uint32_t pad : 26;200uint32_t hdr : 6;201} waitin;202struct PACKED {203uint32_t pad : 26;204uint32_t opc_r : 6;205};206207} afuc_instr;208209static inline void210afuc_get_opc(afuc_instr *ai, afuc_opc *opc, bool *rep)211{212if (ai->opc_r < 0x30) {213*opc = ai->opc_r >> 1;214*rep = ai->opc_r & 0x1;215} else {216*opc = ai->opc_r;217*rep = false;218}219}220221static inline void222afuc_set_opc(afuc_instr *ai, afuc_opc opc, bool rep)223{224if (opc < 0x30) {225ai->opc_r = opc << 1;226ai->opc_r |= !!rep;227} else {228ai->opc_r = opc;229}230}231232void print_src(unsigned reg);233void print_dst(unsigned reg);234void print_control_reg(uint32_t id);235void print_pipe_reg(uint32_t id);236237#endif /* _AFUC_H_ */238239240