Path: blob/21.2-virgl/src/panfrost/bifrost/bi_pack.c
4564 views
/*1* Copyright (C) 2020 Collabora, Ltd.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#include "compiler.h"2425/* This file contains the final passes of the compiler. Running after26* scheduling and RA, the IR is now finalized, so we need to emit it to actual27* bits on the wire (as well as fixup branches) */2829static uint64_t30bi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2)31{32/* next_dependencies are the union of the dependencies of successors'33* dependencies */3435unsigned dependency_wait = next_1 ? next_1->dependencies : 0;36dependency_wait |= next_2 ? next_2->dependencies : 0;3738bool staging_barrier = next_1 ? next_1->staging_barrier : false;39staging_barrier |= next_2 ? next_2->staging_barrier : 0;4041struct bifrost_header header = {42.flow_control =43(next_1 == NULL && next_2 == NULL) ?44BIFROST_FLOW_END : clause->flow_control,45.terminate_discarded_threads = clause->td,46.next_clause_prefetch = clause->next_clause_prefetch && next_1,47.staging_barrier = staging_barrier,48.staging_register = clause->staging_register,49.dependency_wait = dependency_wait,50.dependency_slot = clause->scoreboard_id,51.message_type = clause->message_type,52.next_message_type = next_1 ? next_1->message_type : 0,53};5455uint64_t u = 0;56memcpy(&u, &header, sizeof(header));57return u;58}5960/* Assigns a slot for reading, before anything is written */6162static void63bi_assign_slot_read(bi_registers *regs, bi_index src)64{65/* We only assign for registers */66if (src.type != BI_INDEX_REGISTER)67return;6869/* Check if we already assigned the slot */70for (unsigned i = 0; i <= 1; ++i) {71if (regs->slot[i] == src.value && regs->enabled[i])72return;73}7475if (regs->slot[2] == src.value && regs->slot23.slot2 == BIFROST_OP_READ)76return;7778/* Assign it now */7980for (unsigned i = 0; i <= 1; ++i) {81if (!regs->enabled[i]) {82regs->slot[i] = src.value;83regs->enabled[i] = true;84return;85}86}8788if (!regs->slot23.slot3) {89regs->slot[2] = src.value;90regs->slot23.slot2 = BIFROST_OP_READ;91return;92}9394bi_print_slots(regs, stderr);95unreachable("Failed to find a free slot for src");96}9798static bi_registers99bi_assign_slots(bi_tuple *now, bi_tuple *prev)100{101/* We assign slots for the main register mechanism. Special ops102* use the data registers, which has its own mechanism entirely103* and thus gets skipped over here. */104105bool read_dreg = now->add && bi_opcode_props[now->add->op].sr_read;106bool write_dreg = prev->add && bi_opcode_props[prev->add->op].sr_write;107108/* First, assign reads */109110if (now->fma)111bi_foreach_src(now->fma, src)112bi_assign_slot_read(&now->regs, (now->fma)->src[src]);113114if (now->add) {115bi_foreach_src(now->add, src) {116if (!(src == 0 && read_dreg))117bi_assign_slot_read(&now->regs, (now->add)->src[src]);118}119}120121/* Next, assign writes. Staging writes are assigned separately, but122* +ATEST wants its destination written to both a staging register123* _and_ a regular write, because it may not generate a message */124125if (prev->add && (!write_dreg || prev->add->op == BI_OPCODE_ATEST)) {126bi_index idx = prev->add->dest[0];127128if (idx.type == BI_INDEX_REGISTER) {129now->regs.slot[3] = idx.value;130now->regs.slot23.slot3 = BIFROST_OP_WRITE;131}132}133134if (prev->fma) {135bi_index idx = (prev->fma)->dest[0];136137if (idx.type == BI_INDEX_REGISTER) {138if (now->regs.slot23.slot3) {139/* Scheduler constraint: cannot read 3 and write 2 */140assert(!now->regs.slot23.slot2);141now->regs.slot[2] = idx.value;142now->regs.slot23.slot2 = BIFROST_OP_WRITE;143} else {144now->regs.slot[3] = idx.value;145now->regs.slot23.slot3 = BIFROST_OP_WRITE;146now->regs.slot23.slot3_fma = true;147}148}149}150151return now->regs;152}153154static enum bifrost_reg_mode155bi_pack_register_mode(bi_registers r)156{157/* Handle idle as a special case */158if (!(r.slot23.slot2 | r.slot23.slot3))159return r.first_instruction ? BIFROST_IDLE_1 : BIFROST_IDLE;160161/* Otherwise, use the LUT */162for (unsigned i = 0; i < ARRAY_SIZE(bifrost_reg_ctrl_lut); ++i) {163if (memcmp(bifrost_reg_ctrl_lut + i, &r.slot23, sizeof(r.slot23)) == 0)164return i;165}166167bi_print_slots(&r, stderr);168unreachable("Invalid slot assignment");169}170171static uint64_t172bi_pack_registers(bi_registers regs)173{174enum bifrost_reg_mode mode = bi_pack_register_mode(regs);175struct bifrost_regs s = { 0 };176uint64_t packed = 0;177178/* Need to pack 5-bit mode as a 4-bit field. The decoder moves bit 3 to bit 4 for179* first instruction and adds 16 when reg 2 == reg 3 */180181unsigned ctrl;182bool r2_equals_r3 = false;183184if (regs.first_instruction) {185/* Bit 3 implicitly must be clear for first instructions.186* The affected patterns all write both ADD/FMA, but that187* is forbidden for the last instruction (whose writes are188* encoded by the first), so this does not add additional189* encoding constraints */190assert(!(mode & 0x8));191192/* Move bit 4 to bit 3, since bit 3 is clear */193ctrl = (mode & 0x7) | ((mode & 0x10) >> 1);194195/* If we can let r2 equal r3, we have to or the hardware raises196* INSTR_INVALID_ENC (it's unclear why). */197if (!(regs.slot23.slot2 && regs.slot23.slot3))198r2_equals_r3 = true;199} else {200/* We force r2=r3 or not for the upper bit */201ctrl = (mode & 0xF);202r2_equals_r3 = (mode & 0x10);203}204205if (regs.enabled[1]) {206/* Gotta save that bit!~ Required by the 63-x trick */207assert(regs.slot[1] > regs.slot[0]);208assert(regs.enabled[0]);209210/* Do the 63-x trick, see docs/disasm */211if (regs.slot[0] > 31) {212regs.slot[0] = 63 - regs.slot[0];213regs.slot[1] = 63 - regs.slot[1];214}215216assert(regs.slot[0] <= 31);217assert(regs.slot[1] <= 63);218219s.ctrl = ctrl;220s.reg1 = regs.slot[1];221s.reg0 = regs.slot[0];222} else {223/* slot 1 disabled, so set to zero and use slot 1 for ctrl */224s.ctrl = 0;225s.reg1 = ctrl << 2;226227if (regs.enabled[0]) {228/* Bit 0 upper bit of slot 0 */229s.reg1 |= (regs.slot[0] >> 5);230231/* Rest of slot 0 in usual spot */232s.reg0 = (regs.slot[0] & 0b11111);233} else {234/* Bit 1 set if slot 0 also disabled */235s.reg1 |= (1 << 1);236}237}238239/* Force r2 =/!= r3 as needed */240if (r2_equals_r3) {241assert(regs.slot[3] == regs.slot[2] || !(regs.slot23.slot2 && regs.slot23.slot3));242243if (regs.slot23.slot2)244regs.slot[3] = regs.slot[2];245else246regs.slot[2] = regs.slot[3];247} else if (!regs.first_instruction) {248/* Enforced by the encoding anyway */249assert(regs.slot[2] != regs.slot[3]);250}251252s.reg2 = regs.slot[2];253s.reg3 = regs.slot[3];254s.fau_idx = regs.fau_idx;255256memcpy(&packed, &s, sizeof(s));257return packed;258}259260/* We must ensure slot 1 > slot 0 for the 63-x trick to function, so we fix261* this up at pack time. (Scheduling doesn't care.) */262263static void264bi_flip_slots(bi_registers *regs)265{266if (regs->enabled[0] && regs->enabled[1] && regs->slot[1] < regs->slot[0]) {267unsigned temp = regs->slot[0];268regs->slot[0] = regs->slot[1];269regs->slot[1] = temp;270}271272}273274static inline enum bifrost_packed_src275bi_get_src_slot(bi_registers *regs, unsigned reg)276{277if (regs->slot[0] == reg && regs->enabled[0])278return BIFROST_SRC_PORT0;279else if (regs->slot[1] == reg && regs->enabled[1])280return BIFROST_SRC_PORT1;281else if (regs->slot[2] == reg && regs->slot23.slot2 == BIFROST_OP_READ)282return BIFROST_SRC_PORT2;283else284unreachable("Tried to access register with no port");285}286287static inline enum bifrost_packed_src288bi_get_src_new(bi_instr *ins, bi_registers *regs, unsigned s)289{290if (!ins)291return 0;292293bi_index src = ins->src[s];294295if (src.type == BI_INDEX_REGISTER)296return bi_get_src_slot(regs, src.value);297else if (src.type == BI_INDEX_PASS)298return src.value;299else if (bi_is_null(src) && ins->op == BI_OPCODE_ZS_EMIT && s < 2)300return BIFROST_SRC_STAGE;301else {302/* TODO make safer */303return BIFROST_SRC_STAGE;304}305}306307static struct bi_packed_tuple308bi_pack_tuple(bi_clause *clause, bi_tuple *tuple, bi_tuple *prev, bool first_tuple, gl_shader_stage stage)309{310bi_assign_slots(tuple, prev);311tuple->regs.fau_idx = tuple->fau_idx;312tuple->regs.first_instruction = first_tuple;313314bi_flip_slots(&tuple->regs);315316bool sr_read = tuple->add &&317bi_opcode_props[(tuple->add)->op].sr_read;318319uint64_t reg = bi_pack_registers(tuple->regs);320uint64_t fma = bi_pack_fma(tuple->fma,321bi_get_src_new(tuple->fma, &tuple->regs, 0),322bi_get_src_new(tuple->fma, &tuple->regs, 1),323bi_get_src_new(tuple->fma, &tuple->regs, 2),324bi_get_src_new(tuple->fma, &tuple->regs, 3));325326uint64_t add = bi_pack_add(tuple->add,327bi_get_src_new(tuple->add, &tuple->regs, sr_read + 0),328bi_get_src_new(tuple->add, &tuple->regs, sr_read + 1),329bi_get_src_new(tuple->add, &tuple->regs, sr_read + 2),3300);331332if (tuple->add) {333bi_instr *add = tuple->add;334335bool sr_write = bi_opcode_props[add->op].sr_write &&336!bi_is_null(add->dest[0]);337338if (sr_read && !bi_is_null(add->src[0])) {339assert(add->src[0].type == BI_INDEX_REGISTER);340clause->staging_register = add->src[0].value;341342if (sr_write)343assert(bi_is_equiv(add->src[0], add->dest[0]));344} else if (sr_write) {345assert(add->dest[0].type == BI_INDEX_REGISTER);346clause->staging_register = add->dest[0].value;347}348}349350struct bi_packed_tuple packed = {351.lo = reg | (fma << 35) | ((add & 0b111111) << 58),352.hi = add >> 6353};354355return packed;356}357358/* A block contains at most one PC-relative constant, from a terminal branch.359* Find the last instruction and if it is a relative branch, fix up the360* PC-relative constant to contain the absolute offset. This occurs at pack361* time instead of schedule time because the number of quadwords between each362* block is not known until after all other passes have finished.363*/364365static void366bi_assign_branch_offset(bi_context *ctx, bi_block *block)367{368if (list_is_empty(&block->clauses))369return;370371bi_clause *clause = list_last_entry(&block->clauses, bi_clause, link);372bi_instr *br = bi_last_instr_in_clause(clause);373374if (!br->branch_target)375return;376377/* Put it in the high place */378int32_t qwords = bi_block_offset(ctx, clause, br->branch_target);379int32_t bytes = qwords * 16;380381/* Copy so we can toy with the sign without undefined behaviour */382uint32_t raw = 0;383memcpy(&raw, &bytes, sizeof(raw));384385/* Clear off top bits for A1/B1 bits */386raw &= ~0xF0000000;387388/* Put in top 32-bits */389assert(clause->pcrel_idx < 8);390clause->constants[clause->pcrel_idx] |= ((uint64_t) raw) << 32ull;391}392393static void394bi_pack_constants(unsigned tuple_count, uint64_t *constants,395unsigned word_idx, unsigned constant_words, bool ec0_packed,396struct util_dynarray *emission)397{398unsigned index = (word_idx << 1) + ec0_packed;399400/* Do more constants follow */401bool more = (word_idx + 1) < constant_words;402403/* Indexed first by tuple count and second by constant word number,404* indicates the position in the clause */405unsigned pos_lookup[8][3] = {406{ 0 },407{ 1 },408{ 3 },409{ 2, 5 },410{ 4, 8 },411{ 7, 11, 14 },412{ 6, 10, 13 },413{ 9, 12 }414};415416/* Compute the pos, and check everything is reasonable */417assert((tuple_count - 1) < 8);418assert(word_idx < 3);419unsigned pos = pos_lookup[tuple_count - 1][word_idx];420assert(pos != 0 || (tuple_count == 1 && word_idx == 0));421422struct bifrost_fmt_constant quad = {423.pos = pos,424.tag = more ? BIFROST_FMTC_CONSTANTS : BIFROST_FMTC_FINAL,425.imm_1 = constants[index + 0] >> 4,426.imm_2 = constants[index + 1] >> 4,427};428429util_dynarray_append(emission, struct bifrost_fmt_constant, quad);430}431432static inline uint8_t433bi_pack_literal(enum bi_clause_subword literal)434{435assert(literal >= BI_CLAUSE_SUBWORD_LITERAL_0);436assert(literal <= BI_CLAUSE_SUBWORD_LITERAL_7);437438return (literal - BI_CLAUSE_SUBWORD_LITERAL_0);439}440441static inline uint8_t442bi_clause_upper(unsigned val,443struct bi_packed_tuple *tuples,444ASSERTED unsigned tuple_count)445{446assert(val < tuple_count);447448/* top 3-bits of 78-bits is tuple >> 75 == (tuple >> 64) >> 11 */449struct bi_packed_tuple tuple = tuples[val];450return (tuple.hi >> 11);451}452453static inline uint8_t454bi_pack_upper(enum bi_clause_subword upper,455struct bi_packed_tuple *tuples,456ASSERTED unsigned tuple_count)457{458assert(upper >= BI_CLAUSE_SUBWORD_UPPER_0);459assert(upper <= BI_CLAUSE_SUBWORD_UPPER_7);460461return bi_clause_upper(upper - BI_CLAUSE_SUBWORD_UPPER_0, tuples,462tuple_count);463}464465static inline uint64_t466bi_pack_tuple_bits(enum bi_clause_subword idx,467struct bi_packed_tuple *tuples,468ASSERTED unsigned tuple_count,469unsigned offset, unsigned nbits)470{471assert(idx >= BI_CLAUSE_SUBWORD_TUPLE_0);472assert(idx <= BI_CLAUSE_SUBWORD_TUPLE_7);473474unsigned val = (idx - BI_CLAUSE_SUBWORD_TUPLE_0);475assert(val < tuple_count);476477struct bi_packed_tuple tuple = tuples[val];478479assert(offset + nbits < 78);480assert(nbits <= 64);481482/* (X >> start) & m483* = (((hi << 64) | lo) >> start) & m484* = (((hi << 64) >> start) | (lo >> start)) & m485* = { ((hi << (64 - start)) | (lo >> start)) & m if start <= 64486* { ((hi >> (start - 64)) | (lo >> start)) & m if start >= 64487* = { ((hi << (64 - start)) & m) | ((lo >> start) & m) if start <= 64488* { ((hi >> (start - 64)) & m) | ((lo >> start) & m) if start >= 64489*490* By setting m = 2^64 - 1, we justify doing the respective shifts as491* 64-bit integers. Zero special cased to avoid undefined behaviour.492*/493494uint64_t lo = (tuple.lo >> offset);495uint64_t hi = (offset == 0) ? 0496: (offset > 64) ? (tuple.hi >> (offset - 64))497: (tuple.hi << (64 - offset));498499return (lo | hi) & ((1ULL << nbits) - 1);500}501502static inline uint16_t503bi_pack_lu(enum bi_clause_subword word,504struct bi_packed_tuple *tuples,505ASSERTED unsigned tuple_count)506{507return (word >= BI_CLAUSE_SUBWORD_UPPER_0) ?508bi_pack_upper(word, tuples, tuple_count) :509bi_pack_literal(word);510}511512static uint8_t513bi_pack_sync(enum bi_clause_subword t1,514enum bi_clause_subword t2,515enum bi_clause_subword t3,516struct bi_packed_tuple *tuples,517ASSERTED unsigned tuple_count,518bool z)519{520uint8_t sync =521(bi_pack_lu(t3, tuples, tuple_count) << 0) |522(bi_pack_lu(t2, tuples, tuple_count) << 3);523524if (t1 == BI_CLAUSE_SUBWORD_Z)525sync |= z << 6;526else527sync |= bi_pack_literal(t1) << 6;528529return sync;530}531532static inline uint64_t533bi_pack_t_ec(enum bi_clause_subword word,534struct bi_packed_tuple *tuples,535ASSERTED unsigned tuple_count,536uint64_t ec0)537{538if (word == BI_CLAUSE_SUBWORD_CONSTANT)539return ec0;540else541return bi_pack_tuple_bits(word, tuples, tuple_count, 0, 60);542}543544static uint32_t545bi_pack_subwords_56(enum bi_clause_subword t,546struct bi_packed_tuple *tuples,547ASSERTED unsigned tuple_count,548uint64_t header, uint64_t ec0,549unsigned tuple_subword)550{551switch (t) {552case BI_CLAUSE_SUBWORD_HEADER:553return (header & ((1 << 30) - 1));554case BI_CLAUSE_SUBWORD_RESERVED:555return 0;556case BI_CLAUSE_SUBWORD_CONSTANT:557return (ec0 >> 15) & ((1 << 30) - 1);558default:559return bi_pack_tuple_bits(t, tuples, tuple_count, tuple_subword * 15, 30);560}561}562563static uint16_t564bi_pack_subword(enum bi_clause_subword t, unsigned format,565struct bi_packed_tuple *tuples,566ASSERTED unsigned tuple_count,567uint64_t header, uint64_t ec0, unsigned m0,568unsigned tuple_subword)569{570switch (t) {571case BI_CLAUSE_SUBWORD_HEADER:572return header >> 30;573case BI_CLAUSE_SUBWORD_M:574return m0;575case BI_CLAUSE_SUBWORD_CONSTANT:576return (format == 5 || format == 10) ?577(ec0 & ((1 << 15) - 1)) :578(ec0 >> (15 + 30));579case BI_CLAUSE_SUBWORD_UPPER_23:580return (bi_clause_upper(2, tuples, tuple_count) << 12) |581(bi_clause_upper(3, tuples, tuple_count) << 9);582case BI_CLAUSE_SUBWORD_UPPER_56:583return (bi_clause_upper(5, tuples, tuple_count) << 12) |584(bi_clause_upper(6, tuples, tuple_count) << 9);585case BI_CLAUSE_SUBWORD_UPPER_0 ... BI_CLAUSE_SUBWORD_UPPER_7:586return bi_pack_upper(t, tuples, tuple_count) << 12;587default:588return bi_pack_tuple_bits(t, tuples, tuple_count, tuple_subword * 15, 15);589}590}591592/* EC0 is 60-bits (bottom 4 already shifted off) */593void594bi_pack_format(struct util_dynarray *emission,595unsigned index,596struct bi_packed_tuple *tuples,597ASSERTED unsigned tuple_count,598uint64_t header, uint64_t ec0,599unsigned m0, bool z)600{601struct bi_clause_format format = bi_clause_formats[index];602603uint8_t sync = bi_pack_sync(format.tag_1, format.tag_2, format.tag_3,604tuples, tuple_count, z);605606uint64_t s0_s3 = bi_pack_t_ec(format.s0_s3, tuples, tuple_count, ec0);607608uint16_t s4 = bi_pack_subword(format.s4, format.format, tuples, tuple_count, header, ec0, m0, 4);609610uint32_t s5_s6 = bi_pack_subwords_56(format.s5_s6,611tuples, tuple_count, header, ec0,612(format.format == 2 || format.format == 7) ? 0 : 3);613614uint64_t s7 = bi_pack_subword(format.s7, format.format, tuples, tuple_count, header, ec0, m0, 2);615616/* Now that subwords are packed, split into 64-bit halves and emit */617uint64_t lo = sync | ((s0_s3 & ((1ull << 56) - 1)) << 8);618uint64_t hi = (s0_s3 >> 56) | ((uint64_t) s4 << 4) | ((uint64_t) s5_s6 << 19) | ((uint64_t) s7 << 49);619620util_dynarray_append(emission, uint64_t, lo);621util_dynarray_append(emission, uint64_t, hi);622}623624static void625bi_pack_clause(bi_context *ctx, bi_clause *clause,626bi_clause *next_1, bi_clause *next_2,627struct util_dynarray *emission, gl_shader_stage stage)628{629struct bi_packed_tuple ins[8] = { 0 };630631for (unsigned i = 0; i < clause->tuple_count; ++i) {632unsigned prev = ((i == 0) ? clause->tuple_count : i) - 1;633ins[i] = bi_pack_tuple(clause, &clause->tuples[i],634&clause->tuples[prev], i == 0, stage);635}636637bool ec0_packed = bi_ec0_packed(clause->tuple_count);638639if (ec0_packed)640clause->constant_count = MAX2(clause->constant_count, 1);641642unsigned constant_quads =643DIV_ROUND_UP(clause->constant_count - (ec0_packed ? 1 : 0), 2);644645uint64_t header = bi_pack_header(clause, next_1, next_2);646uint64_t ec0 = (clause->constants[0] >> 4);647unsigned m0 = (clause->pcrel_idx == 0) ? 4 : 0;648649unsigned counts[8] = {6501, 2, 3, 3, 4, 5, 5, 6651};652653unsigned indices[8][6] = {654{ 1 },655{ 0, 2 },656{ 0, 3, 4 },657{ 0, 3, 6 },658{ 0, 3, 7, 8 },659{ 0, 3, 5, 9, 10 },660{ 0, 3, 5, 9, 11 },661{ 0, 3, 5, 9, 12, 13 },662};663664unsigned count = counts[clause->tuple_count - 1];665666for (unsigned pos = 0; pos < count; ++pos) {667ASSERTED unsigned idx = indices[clause->tuple_count - 1][pos];668assert(bi_clause_formats[idx].pos == pos);669assert((bi_clause_formats[idx].tag_1 == BI_CLAUSE_SUBWORD_Z) ==670(pos == count - 1));671672/* Whether to end the clause immediately after the last tuple */673bool z = (constant_quads == 0);674675bi_pack_format(emission, indices[clause->tuple_count - 1][pos],676ins, clause->tuple_count, header, ec0, m0,677z);678}679680/* Pack the remaining constants */681682for (unsigned pos = 0; pos < constant_quads; ++pos) {683bi_pack_constants(clause->tuple_count, clause->constants,684pos, constant_quads, ec0_packed, emission);685}686}687688static void689bi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission,690const bi_clause *clause)691{692/* No need to collect return addresses when we're in a blend shader. */693if (ctx->inputs->is_blend)694return;695696const bi_tuple *tuple = &clause->tuples[clause->tuple_count - 1];697const bi_instr *ins = tuple->add;698699if (!ins || ins->op != BI_OPCODE_BLEND)700return;701702703unsigned loc = tuple->regs.fau_idx - BIR_FAU_BLEND_0;704assert(loc < ARRAY_SIZE(ctx->info->bifrost.blend));705assert(!ctx->info->bifrost.blend[loc].return_offset);706ctx->info->bifrost.blend[loc].return_offset =707util_dynarray_num_elements(emission, uint8_t);708assert(!(ctx->info->bifrost.blend[loc].return_offset & 0x7));709}710711unsigned712bi_pack(bi_context *ctx, struct util_dynarray *emission)713{714unsigned previous_size = emission->size;715716bi_foreach_block(ctx, _block) {717bi_block *block = (bi_block *) _block;718719bi_assign_branch_offset(ctx, block);720721bi_foreach_clause_in_block(block, clause) {722bool is_last = (clause->link.next == &block->clauses);723724/* Get the succeeding clauses, either two successors of725* the block for the last clause in the block or just726* the next clause within the block */727728bi_clause *next = NULL, *next_2 = NULL;729730if (is_last) {731next = bi_next_clause(ctx, block->base.successors[0], NULL);732next_2 = bi_next_clause(ctx, block->base.successors[1], NULL);733} else {734next = bi_next_clause(ctx, _block, clause);735}736737738previous_size = emission->size;739740bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage);741742if (!is_last)743bi_collect_blend_ret_addr(ctx, emission, clause);744}745}746747return emission->size - previous_size;748}749750#ifndef NDEBUG751752static void753bi_test_pack_literal(void)754{755for (unsigned x = 0; x <= 7; ++x)756assert(bi_pack_literal(BI_CLAUSE_SUBWORD_LITERAL_0 + x) == x);757}758759static void760bi_test_pack_upper(void)761{762struct bi_packed_tuple tuples[] = {763{ 0, 0x3 << (75 - 64) },764{ 0, 0x1 << (75 - 64) },765{ 0, 0x7 << (75 - 64) },766{ 0, 0x0 << (75 - 64) },767{ 0, 0x2 << (75 - 64) },768{ 0, 0x6 << (75 - 64) },769{ 0, 0x5 << (75 - 64) },770{ 0, 0x4 << (75 - 64) },771};772773assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 0, tuples, 8) == 3);774assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 1, tuples, 8) == 1);775assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 2, tuples, 8) == 7);776assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 3, tuples, 8) == 0);777assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 4, tuples, 8) == 2);778assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 5, tuples, 8) == 6);779assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 6, tuples, 8) == 5);780assert(bi_pack_upper(BI_CLAUSE_SUBWORD_UPPER_0 + 7, tuples, 8) == 4);781}782783static void784bi_test_pack_tuple_bits(void)785{786struct bi_packed_tuple tuples[] = {787{ 0x1234567801234567, 0x3A },788{ 0x9876543299999999, 0x1B },789{ 0xABCDEF0101234567, 0x7C },790};791792assert(bi_pack_tuple_bits(BI_CLAUSE_SUBWORD_TUPLE_0 + 0, tuples, 8, 0, 30) == 0x01234567);793assert(bi_pack_tuple_bits(BI_CLAUSE_SUBWORD_TUPLE_0 + 1, tuples, 8, 10, 30) == 0xca66666);794assert(bi_pack_tuple_bits(BI_CLAUSE_SUBWORD_TUPLE_0 + 2, tuples, 8, 40, 15) == 0x4def);795}796797#define L(x) (BI_CLAUSE_SUBWORD_LITERAL_0 + x)798#define U(x) (BI_CLAUSE_SUBWORD_UPPER_0 + x)799#define Z BI_CLAUSE_SUBWORD_Z800801static void802bi_test_pack_sync(void)803{804struct bi_packed_tuple tuples[] = {805{ 0, 0x3 << (75 - 64) },806{ 0, 0x5 << (75 - 64) },807{ 0, 0x7 << (75 - 64) },808{ 0, 0x0 << (75 - 64) },809{ 0, 0x2 << (75 - 64) },810{ 0, 0x6 << (75 - 64) },811{ 0, 0x5 << (75 - 64) },812{ 0, 0x4 << (75 - 64) },813};814815assert(bi_pack_sync(L(3), L(1), L(7), tuples, 8, false) == 0xCF);816assert(bi_pack_sync(L(3), L(1), U(7), tuples, 8, false) == 0xCC);817assert(bi_pack_sync(L(3), U(1), U(7), tuples, 8, false) == 0xEC);818assert(bi_pack_sync(Z, U(1), U(7), tuples, 8, false) == 0x2C);819assert(bi_pack_sync(Z, U(1), U(7), tuples, 8, true) == 0x6C);820}821822int bi_test_packing(void)823{824bi_test_pack_literal();825bi_test_pack_upper();826bi_test_pack_tuple_bits();827bi_test_pack_sync();828829return 0;830}831#endif832833834