Path: blob/21.2-virgl/src/asahi/compiler/agx_pack.c
4564 views
/*1* Copyright (C) 2021 Alyssa Rosenzweig <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#include "agx_compiler.h"2425/* Binary patches needed for branch offsets */26struct agx_branch_fixup {27/* Offset into the binary to patch */28off_t offset;2930/* Value to patch with will be block->offset */31agx_block *block;32};3334/* Texturing has its own operands */35static unsigned36agx_pack_sample_coords(agx_index index, bool *flag)37{38/* TODO: how to encode 16-bit coords? */39assert(index.size == AGX_SIZE_32);40assert(index.value < 0x100);4142*flag = index.discard;43return index.value;44}4546static unsigned47agx_pack_texture(agx_index index, unsigned *flag)48{49/* TODO: indirection */50assert(index.type == AGX_INDEX_IMMEDIATE);51*flag = 0;52return index.value;53}5455static unsigned56agx_pack_sampler(agx_index index, bool *flag)57{58/* TODO: indirection */59assert(index.type == AGX_INDEX_IMMEDIATE);60*flag = 0;61return index.value;62}6364static unsigned65agx_pack_sample_offset(agx_index index, bool *flag)66{67/* TODO: offsets */68assert(index.type == AGX_INDEX_NULL);69*flag = 0;70return 0;71}7273static unsigned74agx_pack_lod(agx_index index)75{76/* Immediate zero */77if (index.type == AGX_INDEX_IMMEDIATE && index.value == 0)78return 0;7980/* Otherwise must be a 16-bit float immediate */81assert(index.type == AGX_INDEX_REGISTER);82assert(index.size == AGX_SIZE_16);83assert(index.value < 0x100);8485return index.value;86}8788/* Load/stores have their own operands */8990static unsigned91agx_pack_memory_reg(agx_index index, bool *flag)92{93assert(index.size == AGX_SIZE_16 || index.size == AGX_SIZE_32);94assert(index.size == AGX_SIZE_16 || (index.value & 1) == 0);95assert(index.value < 0x100);9697*flag = (index.size == AGX_SIZE_32);98return index.value;99}100101static unsigned102agx_pack_memory_base(agx_index index, bool *flag)103{104assert(index.size == AGX_SIZE_64);105assert((index.value & 1) == 0);106107if (index.type == AGX_INDEX_UNIFORM) {108assert(index.value < 0x200);109*flag = 1;110return index.value;111} else {112assert(index.value < 0x100);113*flag = 0;114return index.value;115}116}117118static unsigned119agx_pack_memory_index(agx_index index, bool *flag)120{121if (index.type == AGX_INDEX_IMMEDIATE) {122assert(index.value < 0x10000);123*flag = 1;124125return index.value;126} else {127assert(index.type == AGX_INDEX_REGISTER);128assert((index.value & 1) == 0);129assert(index.value < 0x100);130131*flag = 0;132return index.value;133}134}135136/* ALU goes through a common path */137138static unsigned139agx_pack_alu_dst(agx_index dest)140{141assert(dest.type == AGX_INDEX_REGISTER);142unsigned reg = dest.value;143enum agx_size size = dest.size;144assert(reg < 0x100);145146/* RA invariant: alignment of half-reg */147if (size >= AGX_SIZE_32)148assert((reg & 1) == 0);149150return151(dest.cache ? (1 << 0) : 0) |152((size >= AGX_SIZE_32) ? (1 << 1) : 0) |153((size == AGX_SIZE_64) ? (1 << 2) : 0) |154((reg << 2));155}156157static unsigned158agx_pack_alu_src(agx_index src)159{160unsigned value = src.value;161enum agx_size size = src.size;162163if (src.type == AGX_INDEX_IMMEDIATE) {164/* Flags 0 for an 8-bit immediate */165assert(value < 0x100);166167return168(value & BITFIELD_MASK(6)) |169((value >> 6) << 10);170} else if (src.type == AGX_INDEX_UNIFORM) {171assert(size == AGX_SIZE_16 || size == AGX_SIZE_32);172assert(value < 0x200);173174return175(value & BITFIELD_MASK(6)) |176((value >> 8) << 6) |177((size == AGX_SIZE_32) ? (1 << 7) : 0) |178(0x1 << 8) |179(((value >> 6) & BITFIELD_MASK(2)) << 10);180} else {181assert(src.type == AGX_INDEX_REGISTER);182assert(!(src.cache && src.discard));183184unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;185unsigned size_flag =186(size == AGX_SIZE_64) ? 0x3 :187(size == AGX_SIZE_32) ? 0x2 :188(size == AGX_SIZE_16) ? 0x0 : 0x0;189190return191(value & BITFIELD_MASK(6)) |192(hint << 6) |193(size_flag << 8) |194(((value >> 6) & BITFIELD_MASK(2)) << 10);195}196}197198static unsigned199agx_pack_cmpsel_src(agx_index src, enum agx_size dest_size)200{201unsigned value = src.value;202ASSERTED enum agx_size size = src.size;203204if (src.type == AGX_INDEX_IMMEDIATE) {205/* Flags 0x4 for an 8-bit immediate */206assert(value < 0x100);207208return209(value & BITFIELD_MASK(6)) |210(0x4 << 6) |211((value >> 6) << 10);212} else if (src.type == AGX_INDEX_UNIFORM) {213assert(size == AGX_SIZE_16 || size == AGX_SIZE_32);214assert(size == dest_size);215assert(value < 0x200);216217return218(value & BITFIELD_MASK(6)) |219((value >> 8) << 6) |220(0x3 << 7) |221(((value >> 6) & BITFIELD_MASK(2)) << 10);222} else {223assert(src.type == AGX_INDEX_REGISTER);224assert(!(src.cache && src.discard));225assert(size == AGX_SIZE_16 || size == AGX_SIZE_32);226assert(size == dest_size);227228unsigned hint = src.discard ? 0x3 : src.cache ? 0x2 : 0x1;229230return231(value & BITFIELD_MASK(6)) |232(hint << 6) |233(((value >> 6) & BITFIELD_MASK(2)) << 10);234}235}236237static unsigned238agx_pack_float_mod(agx_index src)239{240return (src.abs ? (1 << 0) : 0)241| (src.neg ? (1 << 1) : 0);242}243244static bool245agx_all_16(agx_instr *I)246{247agx_foreach_dest(I, d) {248if (!agx_is_null(I->dest[d]) && I->dest[d].size != AGX_SIZE_16)249return false;250}251252agx_foreach_src(I, s) {253if (!agx_is_null(I->src[s]) && I->src[s].size != AGX_SIZE_16)254return false;255}256257return true;258}259260/* Generic pack for ALU instructions, which are quite regular */261262static void263agx_pack_alu(struct util_dynarray *emission, agx_instr *I)264{265struct agx_opcode_info info = agx_opcodes_info[I->op];266bool is_16 = agx_all_16(I) && info.encoding_16.exact;267struct agx_encoding encoding = is_16 ?268info.encoding_16 : info.encoding;269270assert(encoding.exact && "invalid encoding");271272uint64_t raw = encoding.exact;273uint16_t extend = 0;274275// TODO: assert saturable276if (I->saturate)277raw |= (1 << 6);278279if (info.nr_dests) {280assert(info.nr_dests == 1);281unsigned D = agx_pack_alu_dst(I->dest[0]);282unsigned extend_offset = (sizeof(extend)*8) - 4;283284raw |= (D & BITFIELD_MASK(8)) << 7;285extend |= ((D >> 8) << extend_offset);286} else if (info.immediates & AGX_IMMEDIATE_NEST) {287raw |= (I->invert_cond << 8);288raw |= (I->nest << 11);289raw |= (I->icond << 13);290}291292for (unsigned s = 0; s < info.nr_srcs; ++s) {293bool is_cmpsel = (s >= 2) &&294(I->op == AGX_OPCODE_ICMPSEL || I->op == AGX_OPCODE_FCMPSEL);295296unsigned src = is_cmpsel ?297agx_pack_cmpsel_src(I->src[s], I->dest[0].size) :298agx_pack_alu_src(I->src[s]);299300unsigned src_short = (src & BITFIELD_MASK(10));301unsigned src_extend = (src >> 10);302303/* Size bit always zero and so omitted for 16-bit */304if (is_16 && !is_cmpsel)305assert((src_short & (1 << 9)) == 0);306307if (info.is_float) {308unsigned fmod = agx_pack_float_mod(I->src[s]);309unsigned fmod_offset = is_16 ? 9 : 10;310src_short |= (fmod << fmod_offset);311} else if (I->op == AGX_OPCODE_IMAD || I->op == AGX_OPCODE_IADD) {312bool zext = I->src[s].abs;313bool extends = I->src[s].size < AGX_SIZE_64;314315unsigned sxt = (extends && !zext) ? (1 << 10) : 0;316317assert(!I->src[s].neg || s == 1);318src_short |= sxt;319}320321/* Sources come at predictable offsets */322unsigned offset = 16 + (12 * s);323raw |= (((uint64_t) src_short) << offset);324325/* Destination and each source get extended in reverse order */326unsigned extend_offset = (sizeof(extend)*8) - ((s + 3) * 2);327extend |= (src_extend << extend_offset);328}329330if ((I->op == AGX_OPCODE_IMAD || I->op == AGX_OPCODE_IADD) && I->src[1].neg)331raw |= (1 << 27);332333if (info.immediates & AGX_IMMEDIATE_TRUTH_TABLE) {334raw |= (I->truth_table & 0x3) << 26;335raw |= (uint64_t) (I->truth_table >> 2) << 38;336} else if (info.immediates & AGX_IMMEDIATE_SHIFT) {337raw |= (uint64_t) (I->shift & 1) << 39;338raw |= (uint64_t) (I->shift >> 2) << 52;339} else if (info.immediates & AGX_IMMEDIATE_BFI_MASK) {340raw |= (uint64_t) (I->mask & 0x3) << 38;341raw |= (uint64_t) ((I->mask >> 2) & 0x3) << 50;342raw |= (uint64_t) ((I->mask >> 4) & 0x1) << 63;343} else if (info.immediates & AGX_IMMEDIATE_SR) {344raw |= (uint64_t) (I->sr & 0x3F) << 16;345raw |= (uint64_t) (I->sr >> 6) << 26;346} else if (info.immediates & AGX_IMMEDIATE_WRITEOUT)347raw |= (uint64_t) (I->imm) << 8;348else if (info.immediates & AGX_IMMEDIATE_IMM)349raw |= (uint64_t) (I->imm) << 16;350else if (info.immediates & AGX_IMMEDIATE_ROUND)351raw |= (uint64_t) (I->imm) << 26;352else if (info.immediates & (AGX_IMMEDIATE_FCOND | AGX_IMMEDIATE_ICOND))353raw |= (uint64_t) (I->fcond) << 61;354355/* Determine length bit */356unsigned length = encoding.length_short;357unsigned short_mask = (1 << length) - 1;358bool length_bit = (extend || (raw & ~short_mask));359360if (encoding.extensible && length_bit) {361raw |= (1 << 15);362length += (length > 8) ? 4 : 2;363}364365/* Pack! */366if (length <= sizeof(uint64_t)) {367unsigned extend_offset = ((length - sizeof(extend)) * 8);368369/* XXX: This is a weird special case */370if (I->op == AGX_OPCODE_IADD)371extend_offset -= 16;372373raw |= (uint64_t) extend << extend_offset;374memcpy(util_dynarray_grow_bytes(emission, 1, length), &raw, length);375} else {376/* So far, >8 byte ALU is only to store the extend bits */377unsigned extend_offset = (((length - sizeof(extend)) * 8) - 64);378unsigned hi = ((uint64_t) extend) << extend_offset;379380memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);381memcpy(util_dynarray_grow_bytes(emission, 1, length - 8), &hi, length - 8);382}383}384385static void386agx_pack_instr(struct util_dynarray *emission, struct util_dynarray *fixups, agx_instr *I)387{388switch (I->op) {389case AGX_OPCODE_LD_TILE:390case AGX_OPCODE_ST_TILE:391{392bool load = (I->op == AGX_OPCODE_LD_TILE);393unsigned D = agx_pack_alu_dst(load ? I->dest[0] : I->src[0]);394unsigned rt = 0; /* TODO */395unsigned mask = I->mask ?: 0xF;396assert(mask < 0x10);397398uint64_t raw =3990x09 |400(load ? (1 << 6) : 0) |401((uint64_t) (D & BITFIELD_MASK(8)) << 7) |402((uint64_t) (I->format) << 24) |403((uint64_t) (rt) << 32) |404(load ? (1ull << 35) : 0) |405((uint64_t) (mask) << 36) |406((uint64_t) 0x0380FC << 40) |407(((uint64_t) (D >> 8)) << 60);408409unsigned size = 8;410memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);411break;412}413414case AGX_OPCODE_LD_VARY:415case AGX_OPCODE_LD_VARY_FLAT:416{417bool flat = (I->op == AGX_OPCODE_LD_VARY_FLAT);418unsigned D = agx_pack_alu_dst(I->dest[0]);419unsigned channels = (I->channels & 0x3);420assert(I->mask < 0xF); /* 0 indicates full mask */421agx_index index_src = I->src[0];422assert(index_src.type == AGX_INDEX_IMMEDIATE);423assert(!(flat && I->perspective));424unsigned index = index_src.value;425426uint64_t raw =4270x21 | (flat ? (1 << 7) : 0) |428(I->perspective ? (1 << 6) : 0) |429((D & 0xFF) << 7) |430(1ull << 15) | /* XXX */431(((uint64_t) index) << 16) |432(((uint64_t) channels) << 30) |433(!flat ? (1ull << 46) : 0) | /* XXX */434(!flat ? (1ull << 52) : 0) | /* XXX */435(((uint64_t) (D >> 8)) << 56);436437unsigned size = 8;438memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);439break;440}441442case AGX_OPCODE_ST_VARY:443{444agx_index index_src = I->src[0];445agx_index value = I->src[1];446447assert(index_src.type == AGX_INDEX_IMMEDIATE);448assert(value.type == AGX_INDEX_REGISTER);449assert(value.size == AGX_SIZE_32);450451uint64_t raw =4520x11 |453(I->last ? (1 << 7) : 0) |454((value.value & 0x3F) << 9) |455(((uint64_t) index_src.value) << 16) |456(0x80 << 16) | /* XXX */457((value.value >> 6) << 24) |458(0x8 << 28); /* XXX */459460unsigned size = 4;461memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);462break;463}464465case AGX_OPCODE_DEVICE_LOAD:466{467assert(I->mask != 0);468assert(I->format <= 0x10);469470bool Rt, At, Ot;471unsigned R = agx_pack_memory_reg(I->dest[0], &Rt);472unsigned A = agx_pack_memory_base(I->src[0], &At);473unsigned O = agx_pack_memory_index(I->src[1], &Ot);474unsigned u1 = 1; // XXX475unsigned u3 = 0;476unsigned u4 = 4; // XXX477unsigned u5 = 0;478bool L = true; /* TODO: when would you want short? */479480uint64_t raw =4810x05 |482((I->format & BITFIELD_MASK(3)) << 7) |483((R & BITFIELD_MASK(6)) << 10) |484((A & BITFIELD_MASK(4)) << 16) |485((O & BITFIELD_MASK(4)) << 20) |486(Ot ? (1 << 24) : 0) |487(I->src[1].abs ? (1 << 25) : 0) |488(u1 << 26) |489(At << 27) |490(u3 << 28) |491(I->scoreboard << 30) |492(((uint64_t) ((O >> 4) & BITFIELD_MASK(4))) << 32) |493(((uint64_t) ((A >> 4) & BITFIELD_MASK(4))) << 36) |494(((uint64_t) ((R >> 6) & BITFIELD_MASK(2))) << 40) |495(((uint64_t) I->shift) << 42) |496(((uint64_t) u4) << 44) |497(L ? (1ull << 47) : 0) |498(((uint64_t) (I->format >> 3)) << 48) |499(((uint64_t) Rt) << 49) |500(((uint64_t) u5) << 50) |501(((uint64_t) I->mask) << 52) |502(((uint64_t) (O >> 8)) << 56);503504unsigned size = L ? 8 : 6;505memcpy(util_dynarray_grow_bytes(emission, 1, size), &raw, size);506break;507}508509case AGX_OPCODE_TEXTURE_SAMPLE:510{511assert(I->mask != 0);512assert(I->format <= 0x10);513514bool Rt, Ot, Ct, St;515unsigned Tt;516517unsigned R = agx_pack_memory_reg(I->dest[0], &Rt);518unsigned C = agx_pack_sample_coords(I->src[0], &Ct);519unsigned T = agx_pack_texture(I->src[2], &Tt);520unsigned S = agx_pack_sampler(I->src[3], &St);521unsigned O = agx_pack_sample_offset(I->src[4], &Ot);522unsigned D = agx_pack_lod(I->src[1]);523524unsigned U = 0; // TODO: what is sampler ureg?525unsigned q1 = 0; // XXX526unsigned q2 = 0; // XXX527unsigned q3 = 12; // XXX528unsigned q4 = 1; // XXX529unsigned q5 = 0; // XXX530unsigned q6 = 0; // XXX531532uint32_t extend =533((U & BITFIELD_MASK(5)) << 0) |534(q4 << 5) |535((R >> 6) << 8) |536((C >> 6) << 10) |537((D >> 6) << 12) |538((T >> 6) << 14) |539((O & BITFIELD_MASK(6)) << 16) |540(q6 << 22) |541(Ot << 27) |542((S >> 6) << 28) |543((O >> 6) << 30);544545bool L = (extend != 0);546assert(I->scoreboard == 0 && "todo");547548uint64_t raw =5490x31 |550(Rt ? (1 << 8) : 0) |551((R & BITFIELD_MASK(6)) << 9) |552(L ? (1 << 15) : 0) |553((C & BITFIELD_MASK(6)) << 16) |554(Ct ? (1 << 22) : 0) |555(q1 << 23) |556((D & BITFIELD_MASK(6)) << 24) |557(q2 << 30) |558(((uint64_t) (T & BITFIELD_MASK(6))) << 32) |559(((uint64_t) Tt) << 38) |560(((uint64_t) I->dim) << 40) |561(((uint64_t) q3) << 43) |562(((uint64_t) I->mask) << 48) |563(((uint64_t) I->lod_mode) << 52) |564(((uint64_t) (S & BITFIELD_MASK(6))) << 32) |565(((uint64_t) St) << 62) |566(((uint64_t) q5) << 63);567568memcpy(util_dynarray_grow_bytes(emission, 1, 8), &raw, 8);569if (L)570memcpy(util_dynarray_grow_bytes(emission, 1, 4), &extend, 4);571572break;573}574575case AGX_OPCODE_JMP_EXEC_ANY:576case AGX_OPCODE_JMP_EXEC_NONE:577{578/* We don't implement indirect branches */579assert(I->target != NULL);580581/* We'll fix the offset later. */582struct agx_branch_fixup fixup = {583.block = I->target,584.offset = emission->size585};586587util_dynarray_append(fixups, struct agx_branch_fixup, fixup);588589/* The rest of the instruction is fixed */590struct agx_opcode_info info = agx_opcodes_info[I->op];591uint64_t raw = info.encoding.exact;592memcpy(util_dynarray_grow_bytes(emission, 1, 6), &raw, 6);593break;594}595596default:597agx_pack_alu(emission, I);598return;599}600}601602/* Relative branches may be emitted before their targets, so we patch the603* binary to fix up the branch offsets after the main emit */604605static void606agx_fixup_branch(struct util_dynarray *emission, struct agx_branch_fixup fix)607{608/* Branch offset is 2 bytes into the jump instruction */609uint8_t *location = ((uint8_t *) emission->data) + fix.offset + 2;610611/* Offsets are relative to the jump instruction */612int32_t patch = (int32_t) fix.block->offset - (int32_t) fix.offset;613614/* Patch the binary */615memcpy(location, &patch, sizeof(patch));616}617618void619agx_pack_binary(agx_context *ctx, struct util_dynarray *emission)620{621struct util_dynarray fixups;622util_dynarray_init(&fixups, ctx);623624agx_foreach_block(ctx, block) {625/* Relative to the start of the binary, the block begins at the current626* number of bytes emitted */627block->offset = emission->size;628629agx_foreach_instr_in_block(block, ins) {630agx_pack_instr(emission, &fixups, ins);631}632}633634util_dynarray_foreach(&fixups, struct agx_branch_fixup, fixup)635agx_fixup_branch(emission, *fixup);636637util_dynarray_fini(&fixups);638}639640641