Path: blob/21.2-virgl/src/intel/common/mi_builder.h
4547 views
/*1* Copyright © 2019 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#ifndef MI_BUILDER_H24#define MI_BUILDER_H2526#include "dev/intel_device_info.h"27#include "genxml/genX_bits.h"28#include "util/bitscan.h"29#include "util/fast_idiv_by_const.h"30#include "util/u_math.h"3132#ifndef MI_BUILDER_NUM_ALLOC_GPRS33/** The number of GPRs the MI builder is allowed to allocate34*35* This may be set by a user of this API so that it can reserve some GPRs at36* the top end for its own use.37*/38#define MI_BUILDER_NUM_ALLOC_GPRS 1639#endif4041/** These must be defined by the user of the builder42*43* void *__gen_get_batch_dwords(__gen_user_data *user_data,44* unsigned num_dwords);45*46* __gen_address_type47* __gen_address_offset(__gen_address_type addr, uint64_t offset);48*49*50* If self-modifying batches are supported, we must be able to pass batch51* addresses around as void*s so pinning as well as batch chaining or some52* other mechanism for ensuring batch pointers remain valid during building is53* required. The following function must also be defined, it returns an54* address in canonical form:55*56* __gen_address_type57* __gen_get_batch_address(__gen_user_data *user_data, void *location);58*59* Also, __gen_combine_address must accept a location value of NULL and return60* a fully valid 64-bit address.61*/6263/*64* Start of the actual MI builder65*/6667#define __genxml_cmd_length(cmd) cmd ## _length68#define __genxml_cmd_header(cmd) cmd ## _header69#define __genxml_cmd_pack(cmd) cmd ## _pack7071#define mi_builder_pack(b, cmd, dst, name) \72for (struct cmd name = { __genxml_cmd_header(cmd) }, \73*_dst = (struct cmd *)(dst); __builtin_expect(_dst != NULL, 1); \74__genxml_cmd_pack(cmd)((b)->user_data, (void *)_dst, &name), \75_dst = NULL)7677#define mi_builder_emit(b, cmd, name) \78mi_builder_pack((b), cmd, __gen_get_batch_dwords((b)->user_data, __genxml_cmd_length(cmd)), name)798081enum mi_value_type {82MI_VALUE_TYPE_IMM,83MI_VALUE_TYPE_MEM32,84MI_VALUE_TYPE_MEM64,85MI_VALUE_TYPE_REG32,86MI_VALUE_TYPE_REG64,87};8889struct mi_value {90enum mi_value_type type;9192union {93uint64_t imm;94__gen_address_type addr;95uint32_t reg;96};9798#if GFX_VERx10 >= 7599bool invert;100#endif101};102103struct mi_reg_num {104uint32_t num;105#if GFX_VER >= 11106bool cs;107#endif108};109110static inline struct mi_reg_num111mi_adjust_reg_num(uint32_t reg)112{113#if GFX_VER >= 11114bool cs = reg >= 0x2000 && reg < 0x4000;115return (struct mi_reg_num) {116.num = reg - (cs ? 0x2000 : 0),117.cs = cs,118};119#else120return (struct mi_reg_num) { .num = reg, };121#endif122}123124#if GFX_VER >= 9125#define MI_BUILDER_MAX_MATH_DWORDS 256126#else127#define MI_BUILDER_MAX_MATH_DWORDS 64128#endif129130struct mi_builder {131const struct intel_device_info *devinfo;132__gen_user_data *user_data;133134#if GFX_VERx10 >= 75135uint32_t gprs;136uint8_t gpr_refs[MI_BUILDER_NUM_ALLOC_GPRS];137138unsigned num_math_dwords;139uint32_t math_dwords[MI_BUILDER_MAX_MATH_DWORDS];140#endif141};142143static inline void144mi_builder_init(struct mi_builder *b,145const struct intel_device_info *devinfo,146__gen_user_data *user_data)147{148memset(b, 0, sizeof(*b));149b->devinfo = devinfo;150b->user_data = user_data;151152#if GFX_VERx10 >= 75153b->gprs = 0;154b->num_math_dwords = 0;155#endif156}157158static inline void159mi_builder_flush_math(struct mi_builder *b)160{161#if GFX_VERx10 >= 75162if (b->num_math_dwords == 0)163return;164165uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,1661 + b->num_math_dwords);167mi_builder_pack(b, GENX(MI_MATH), dw, math) {168math.DWordLength = 1 + b->num_math_dwords - GENX(MI_MATH_length_bias);169}170memcpy(dw + 1, b->math_dwords, b->num_math_dwords * sizeof(uint32_t));171b->num_math_dwords = 0;172#endif173}174175#define _MI_BUILDER_GPR_BASE 0x2600176/* The actual hardware limit on GPRs */177#define _MI_BUILDER_NUM_HW_GPRS 16178179#if GFX_VERx10 >= 75180181static inline bool182mi_value_is_reg(struct mi_value val)183{184return val.type == MI_VALUE_TYPE_REG32 ||185val.type == MI_VALUE_TYPE_REG64;186}187188static inline bool189mi_value_is_gpr(struct mi_value val)190{191return mi_value_is_reg(val) &&192val.reg >= _MI_BUILDER_GPR_BASE &&193val.reg < _MI_BUILDER_GPR_BASE +194_MI_BUILDER_NUM_HW_GPRS * 8;195}196197static inline bool198_mi_value_is_allocated_gpr(struct mi_value val)199{200return mi_value_is_reg(val) &&201val.reg >= _MI_BUILDER_GPR_BASE &&202val.reg < _MI_BUILDER_GPR_BASE +203MI_BUILDER_NUM_ALLOC_GPRS * 8;204}205206static inline uint32_t207_mi_value_as_gpr(struct mi_value val)208{209assert(mi_value_is_gpr(val));210assert(val.reg % 8 == 0);211return (val.reg - _MI_BUILDER_GPR_BASE) / 8;212}213214static inline struct mi_value215mi_new_gpr(struct mi_builder *b)216{217unsigned gpr = ffs(~b->gprs) - 1;218assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);219assert(b->gpr_refs[gpr] == 0);220b->gprs |= (1u << gpr);221b->gpr_refs[gpr] = 1;222223return (struct mi_value) {224.type = MI_VALUE_TYPE_REG64,225.reg = _MI_BUILDER_GPR_BASE + gpr * 8,226};227}228#endif /* GFX_VERx10 >= 75 */229230/** Take a reference to a mi_value231*232* The MI builder uses reference counting to automatically free ALU GPRs for233* re-use in calculations. All mi_* math functions consume the reference234* they are handed for each source and return a reference to a value which the235* caller must consume. In particular, if you pas the same value into a236* single mi_* math function twice (say to add a number to itself), you237* are responsible for calling mi_value_ref() to get a second reference238* because the mi_* math function will consume it twice.239*/240static inline struct mi_value241mi_value_ref(struct mi_builder *b, struct mi_value val)242{243#if GFX_VERx10 >= 75244if (_mi_value_is_allocated_gpr(val)) {245unsigned gpr = _mi_value_as_gpr(val);246assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);247assert(b->gprs & (1u << gpr));248assert(b->gpr_refs[gpr] < UINT8_MAX);249b->gpr_refs[gpr]++;250}251#endif /* GFX_VERx10 >= 75 */252253return val;254}255256/** Drop a reference to a mi_value257*258* See also mi_value_ref.259*/260static inline void261mi_value_unref(struct mi_builder *b, struct mi_value val)262{263#if GFX_VERx10 >= 75264if (_mi_value_is_allocated_gpr(val)) {265unsigned gpr = _mi_value_as_gpr(val);266assert(gpr < MI_BUILDER_NUM_ALLOC_GPRS);267assert(b->gprs & (1u << gpr));268assert(b->gpr_refs[gpr] > 0);269if (--b->gpr_refs[gpr] == 0)270b->gprs &= ~(1u << gpr);271}272#endif /* GFX_VERx10 >= 75 */273}274275static inline struct mi_value276mi_imm(uint64_t imm)277{278return (struct mi_value) {279.type = MI_VALUE_TYPE_IMM,280.imm = imm,281};282}283284static inline struct mi_value285mi_reg32(uint32_t reg)286{287struct mi_value val = {288.type = MI_VALUE_TYPE_REG32,289.reg = reg,290};291#if GFX_VERx10 >= 75292assert(!_mi_value_is_allocated_gpr(val));293#endif294return val;295}296297static inline struct mi_value298mi_reg64(uint32_t reg)299{300struct mi_value val = {301.type = MI_VALUE_TYPE_REG64,302.reg = reg,303};304#if GFX_VERx10 >= 75305assert(!_mi_value_is_allocated_gpr(val));306#endif307return val;308}309310static inline struct mi_value311mi_mem32(__gen_address_type addr)312{313return (struct mi_value) {314.type = MI_VALUE_TYPE_MEM32,315.addr = addr,316};317}318319static inline struct mi_value320mi_mem64(__gen_address_type addr)321{322return (struct mi_value) {323.type = MI_VALUE_TYPE_MEM64,324.addr = addr,325};326}327328static inline struct mi_value329mi_value_half(struct mi_value value, bool top_32_bits)330{331switch (value.type) {332case MI_VALUE_TYPE_IMM:333if (top_32_bits)334value.imm >>= 32;335else336value.imm &= 0xffffffffu;337return value;338339case MI_VALUE_TYPE_MEM32:340assert(!top_32_bits);341return value;342343case MI_VALUE_TYPE_MEM64:344if (top_32_bits)345value.addr = __gen_address_offset(value.addr, 4);346value.type = MI_VALUE_TYPE_MEM32;347return value;348349case MI_VALUE_TYPE_REG32:350assert(!top_32_bits);351return value;352353case MI_VALUE_TYPE_REG64:354if (top_32_bits)355value.reg += 4;356value.type = MI_VALUE_TYPE_REG32;357return value;358}359360unreachable("Invalid mi_value type");361}362363static inline void364_mi_copy_no_unref(struct mi_builder *b,365struct mi_value dst, struct mi_value src)366{367#if GFX_VERx10 >= 75368/* TODO: We could handle src.invert by emitting a bit of math if we really369* wanted to.370*/371assert(!dst.invert && !src.invert);372#endif373mi_builder_flush_math(b);374375switch (dst.type) {376case MI_VALUE_TYPE_IMM:377unreachable("Cannot copy to an immediate");378379case MI_VALUE_TYPE_MEM64:380case MI_VALUE_TYPE_REG64:381switch (src.type) {382case MI_VALUE_TYPE_IMM:383if (dst.type == MI_VALUE_TYPE_REG64) {384uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,385GENX(MI_LOAD_REGISTER_IMM_length) + 2);386struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);387mi_builder_pack(b, GENX(MI_LOAD_REGISTER_IMM), dw, lri) {388lri.DWordLength = GENX(MI_LOAD_REGISTER_IMM_length) + 2 -389GENX(MI_LOAD_REGISTER_IMM_length_bias);390#if GFX_VER >= 11391lri.AddCSMMIOStartOffset = reg.cs;392#endif393}394dw[1] = reg.num;395dw[2] = src.imm;396dw[3] = reg.num + 4;397dw[4] = src.imm >> 32;398} else {399#if GFX_VER >= 8400assert(dst.type == MI_VALUE_TYPE_MEM64);401uint32_t *dw = (uint32_t *)__gen_get_batch_dwords(b->user_data,402GENX(MI_STORE_DATA_IMM_length) + 1);403mi_builder_pack(b, GENX(MI_STORE_DATA_IMM), dw, sdm) {404sdm.DWordLength = GENX(MI_STORE_DATA_IMM_length) + 1 -405GENX(MI_STORE_DATA_IMM_length_bias);406sdm.StoreQword = true;407sdm.Address = dst.addr;408}409dw[3] = src.imm;410dw[4] = src.imm >> 32;411#else412_mi_copy_no_unref(b, mi_value_half(dst, false),413mi_value_half(src, false));414_mi_copy_no_unref(b, mi_value_half(dst, true),415mi_value_half(src, true));416#endif417}418break;419case MI_VALUE_TYPE_REG32:420case MI_VALUE_TYPE_MEM32:421_mi_copy_no_unref(b, mi_value_half(dst, false),422mi_value_half(src, false));423_mi_copy_no_unref(b, mi_value_half(dst, true),424mi_imm(0));425break;426case MI_VALUE_TYPE_REG64:427case MI_VALUE_TYPE_MEM64:428_mi_copy_no_unref(b, mi_value_half(dst, false),429mi_value_half(src, false));430_mi_copy_no_unref(b, mi_value_half(dst, true),431mi_value_half(src, true));432break;433default:434unreachable("Invalid mi_value type");435}436break;437438case MI_VALUE_TYPE_MEM32:439switch (src.type) {440case MI_VALUE_TYPE_IMM:441mi_builder_emit(b, GENX(MI_STORE_DATA_IMM), sdi) {442sdi.Address = dst.addr;443#if GFX_VER >= 12444sdi.ForceWriteCompletionCheck = true;445#endif446sdi.ImmediateData = src.imm;447}448break;449450case MI_VALUE_TYPE_MEM32:451case MI_VALUE_TYPE_MEM64:452#if GFX_VER >= 8453mi_builder_emit(b, GENX(MI_COPY_MEM_MEM), cmm) {454cmm.DestinationMemoryAddress = dst.addr;455cmm.SourceMemoryAddress = src.addr;456}457#elif GFX_VERx10 == 75458{459struct mi_value tmp = mi_new_gpr(b);460_mi_copy_no_unref(b, tmp, src);461_mi_copy_no_unref(b, dst, tmp);462mi_value_unref(b, tmp);463}464#else465unreachable("Cannot do mem <-> mem copy on IVB and earlier");466#endif467break;468469case MI_VALUE_TYPE_REG32:470case MI_VALUE_TYPE_REG64:471mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {472struct mi_reg_num reg = mi_adjust_reg_num(src.reg);473srm.RegisterAddress = reg.num;474#if GFX_VER >= 11475srm.AddCSMMIOStartOffset = reg.cs;476#endif477srm.MemoryAddress = dst.addr;478}479break;480481default:482unreachable("Invalid mi_value type");483}484break;485486case MI_VALUE_TYPE_REG32:487switch (src.type) {488case MI_VALUE_TYPE_IMM:489mi_builder_emit(b, GENX(MI_LOAD_REGISTER_IMM), lri) {490struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);491lri.RegisterOffset = reg.num;492#if GFX_VER >= 11493lri.AddCSMMIOStartOffset = reg.cs;494#endif495lri.DataDWord = src.imm;496}497break;498499case MI_VALUE_TYPE_MEM32:500case MI_VALUE_TYPE_MEM64:501#if GFX_VER >= 7502mi_builder_emit(b, GENX(MI_LOAD_REGISTER_MEM), lrm) {503struct mi_reg_num reg = mi_adjust_reg_num(dst.reg);504lrm.RegisterAddress = reg.num;505#if GFX_VER >= 11506lrm.AddCSMMIOStartOffset = reg.cs;507#endif508lrm.MemoryAddress = src.addr;509}510#else511unreachable("Cannot load do mem -> reg copy on SNB and earlier");512#endif513break;514515case MI_VALUE_TYPE_REG32:516case MI_VALUE_TYPE_REG64:517#if GFX_VERx10 >= 75518if (src.reg != dst.reg) {519mi_builder_emit(b, GENX(MI_LOAD_REGISTER_REG), lrr) {520struct mi_reg_num reg = mi_adjust_reg_num(src.reg);521lrr.SourceRegisterAddress = reg.num;522#if GFX_VER >= 11523lrr.AddCSMMIOStartOffsetSource = reg.cs;524#endif525reg = mi_adjust_reg_num(dst.reg);526lrr.DestinationRegisterAddress = reg.num;527#if GFX_VER >= 11528lrr.AddCSMMIOStartOffsetDestination = reg.cs;529#endif530}531}532#else533unreachable("Cannot do reg <-> reg copy on IVB and earlier");534#endif535break;536537default:538unreachable("Invalid mi_value type");539}540break;541542default:543unreachable("Invalid mi_value type");544}545}546547#if GFX_VERx10 >= 75548static inline struct mi_value549mi_resolve_invert(struct mi_builder *b, struct mi_value src);550#endif551552/** Store the value in src to the value represented by dst553*554* If the bit size of src and dst mismatch, this function does an unsigned555* integer cast. If src has more bits than dst, it takes the bottom bits. If556* src has fewer bits then dst, it fills the top bits with zeros.557*558* This function consumes one reference for each of src and dst.559*/560static inline void561mi_store(struct mi_builder *b, struct mi_value dst, struct mi_value src)562{563#if GFX_VERx10 >= 75564src = mi_resolve_invert(b, src);565#endif566_mi_copy_no_unref(b, dst, src);567mi_value_unref(b, src);568mi_value_unref(b, dst);569}570571static inline void572mi_memset(struct mi_builder *b, __gen_address_type dst,573uint32_t value, uint32_t size)574{575#if GFX_VERx10 >= 75576assert(b->num_math_dwords == 0);577#endif578579/* This memset operates in units of dwords. */580assert(size % 4 == 0);581582for (uint32_t i = 0; i < size; i += 4) {583mi_store(b, mi_mem32(__gen_address_offset(dst, i)),584mi_imm(value));585}586}587588/* NOTE: On IVB, this function stomps GFX7_3DPRIM_BASE_VERTEX */589static inline void590mi_memcpy(struct mi_builder *b, __gen_address_type dst,591__gen_address_type src, uint32_t size)592{593#if GFX_VERx10 >= 75594assert(b->num_math_dwords == 0);595#endif596597/* This memcpy operates in units of dwords. */598assert(size % 4 == 0);599600for (uint32_t i = 0; i < size; i += 4) {601struct mi_value dst_val = mi_mem32(__gen_address_offset(dst, i));602struct mi_value src_val = mi_mem32(__gen_address_offset(src, i));603#if GFX_VERx10 >= 75604mi_store(b, dst_val, src_val);605#else606/* IVB does not have a general purpose register for command streamer607* commands. Therefore, we use an alternate temporary register.608*/609struct mi_value tmp_reg = mi_reg32(0x2440); /* GFX7_3DPRIM_BASE_VERTEX */610mi_store(b, tmp_reg, src_val);611mi_store(b, dst_val, tmp_reg);612#endif613}614}615616/*617* MI_MATH Section. Only available on Haswell+618*/619620#if GFX_VERx10 >= 75621622/**623* Perform a predicated store (assuming the condition is already loaded624* in the MI_PREDICATE_RESULT register) of the value in src to the memory625* location specified by dst. Non-memory destinations are not supported.626*627* This function consumes one reference for each of src and dst.628*/629static inline void630mi_store_if(struct mi_builder *b, struct mi_value dst, struct mi_value src)631{632assert(!dst.invert && !src.invert);633634mi_builder_flush_math(b);635636/* We can only predicate MI_STORE_REGISTER_MEM, so restrict the637* destination to be memory, and resolve the source to a temporary638* register if it isn't in one already.639*/640assert(dst.type == MI_VALUE_TYPE_MEM64 ||641dst.type == MI_VALUE_TYPE_MEM32);642643if (src.type != MI_VALUE_TYPE_REG32 &&644src.type != MI_VALUE_TYPE_REG64) {645struct mi_value tmp = mi_new_gpr(b);646_mi_copy_no_unref(b, tmp, src);647src = tmp;648}649650if (dst.type == MI_VALUE_TYPE_MEM64) {651mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {652struct mi_reg_num reg = mi_adjust_reg_num(src.reg);653srm.RegisterAddress = reg.num;654#if GFX_VER >= 11655srm.AddCSMMIOStartOffset = reg.cs;656#endif657srm.MemoryAddress = dst.addr;658srm.PredicateEnable = true;659}660mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {661struct mi_reg_num reg = mi_adjust_reg_num(src.reg + 4);662srm.RegisterAddress = reg.num;663#if GFX_VER >= 11664srm.AddCSMMIOStartOffset = reg.cs;665#endif666srm.MemoryAddress = __gen_address_offset(dst.addr, 4);667srm.PredicateEnable = true;668}669} else {670mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {671struct mi_reg_num reg = mi_adjust_reg_num(src.reg);672srm.RegisterAddress = reg.num;673#if GFX_VER >= 11674srm.AddCSMMIOStartOffset = reg.cs;675#endif676srm.MemoryAddress = dst.addr;677srm.PredicateEnable = true;678}679}680681mi_value_unref(b, src);682mi_value_unref(b, dst);683}684685static inline void686_mi_builder_push_math(struct mi_builder *b,687const uint32_t *dwords,688unsigned num_dwords)689{690assert(num_dwords < MI_BUILDER_MAX_MATH_DWORDS);691if (b->num_math_dwords + num_dwords > MI_BUILDER_MAX_MATH_DWORDS)692mi_builder_flush_math(b);693694memcpy(&b->math_dwords[b->num_math_dwords],695dwords, num_dwords * sizeof(*dwords));696b->num_math_dwords += num_dwords;697}698699static inline uint32_t700_mi_pack_alu(uint32_t opcode, uint32_t operand1, uint32_t operand2)701{702struct GENX(MI_MATH_ALU_INSTRUCTION) instr = {703.Operand2 = operand2,704.Operand1 = operand1,705.ALUOpcode = opcode,706};707708uint32_t dw;709GENX(MI_MATH_ALU_INSTRUCTION_pack)(NULL, &dw, &instr);710711return dw;712}713714static inline struct mi_value715mi_value_to_gpr(struct mi_builder *b, struct mi_value val)716{717if (mi_value_is_gpr(val))718return val;719720/* Save off the invert flag because it makes copy() grumpy */721bool invert = val.invert;722val.invert = false;723724struct mi_value tmp = mi_new_gpr(b);725_mi_copy_no_unref(b, tmp, val);726tmp.invert = invert;727728return tmp;729}730731static inline uint64_t732mi_value_to_u64(struct mi_value val)733{734assert(val.type == MI_VALUE_TYPE_IMM);735return val.invert ? ~val.imm : val.imm;736}737738static inline uint32_t739_mi_math_load_src(struct mi_builder *b, unsigned src, struct mi_value *val)740{741if (val->type == MI_VALUE_TYPE_IMM &&742(val->imm == 0 || val->imm == UINT64_MAX)) {743uint64_t imm = val->invert ? ~val->imm : val->imm;744return _mi_pack_alu(imm ? MI_ALU_LOAD1 : MI_ALU_LOAD0, src, 0);745} else {746*val = mi_value_to_gpr(b, *val);747return _mi_pack_alu(val->invert ? MI_ALU_LOADINV : MI_ALU_LOAD,748src, _mi_value_as_gpr(*val));749}750}751752static inline struct mi_value753mi_math_binop(struct mi_builder *b, uint32_t opcode,754struct mi_value src0, struct mi_value src1,755uint32_t store_op, uint32_t store_src)756{757struct mi_value dst = mi_new_gpr(b);758759uint32_t dw[4];760dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &src0);761dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &src1);762dw[2] = _mi_pack_alu(opcode, 0, 0);763dw[3] = _mi_pack_alu(store_op, _mi_value_as_gpr(dst), store_src);764_mi_builder_push_math(b, dw, 4);765766mi_value_unref(b, src0);767mi_value_unref(b, src1);768769return dst;770}771772static inline struct mi_value773mi_inot(struct mi_builder *b, struct mi_value val)774{775if (val.type == MI_VALUE_TYPE_IMM)776return mi_imm(~mi_value_to_u64(val));777778val.invert = !val.invert;779return val;780}781782static inline struct mi_value783mi_resolve_invert(struct mi_builder *b, struct mi_value src)784{785if (!src.invert)786return src;787788assert(src.type != MI_VALUE_TYPE_IMM);789return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),790MI_ALU_STORE, MI_ALU_ACCU);791}792793static inline struct mi_value794mi_iadd(struct mi_builder *b, struct mi_value src0, struct mi_value src1)795{796if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)797return mi_imm(mi_value_to_u64(src0) + mi_value_to_u64(src1));798799return mi_math_binop(b, MI_ALU_ADD, src0, src1,800MI_ALU_STORE, MI_ALU_ACCU);801}802803static inline struct mi_value804mi_iadd_imm(struct mi_builder *b,805struct mi_value src, uint64_t N)806{807if (N == 0)808return src;809810return mi_iadd(b, src, mi_imm(N));811}812813static inline struct mi_value814mi_isub(struct mi_builder *b, struct mi_value src0, struct mi_value src1)815{816if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)817return mi_imm(mi_value_to_u64(src0) - mi_value_to_u64(src1));818819return mi_math_binop(b, MI_ALU_SUB, src0, src1,820MI_ALU_STORE, MI_ALU_ACCU);821}822823static inline struct mi_value824mi_ieq(struct mi_builder *b, struct mi_value src0, struct mi_value src1)825{826if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)827return mi_imm(mi_value_to_u64(src0) == mi_value_to_u64(src1) ? ~0ull : 0);828829/* Compute "equal" by subtracting and storing the zero bit */830return mi_math_binop(b, MI_ALU_SUB, src0, src1,831MI_ALU_STORE, MI_ALU_ZF);832}833834static inline struct mi_value835mi_ine(struct mi_builder *b, struct mi_value src0, struct mi_value src1)836{837if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)838return mi_imm(mi_value_to_u64(src0) != mi_value_to_u64(src1) ? ~0ull : 0);839840/* Compute "not equal" by subtracting and storing the inverse zero bit */841return mi_math_binop(b, MI_ALU_SUB, src0, src1,842MI_ALU_STOREINV, MI_ALU_ZF);843}844845static inline struct mi_value846mi_ult(struct mi_builder *b, struct mi_value src0, struct mi_value src1)847{848if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)849return mi_imm(mi_value_to_u64(src0) < mi_value_to_u64(src1) ? ~0ull : 0);850851/* Compute "less than" by subtracting and storing the carry bit */852return mi_math_binop(b, MI_ALU_SUB, src0, src1,853MI_ALU_STORE, MI_ALU_CF);854}855856static inline struct mi_value857mi_uge(struct mi_builder *b, struct mi_value src0, struct mi_value src1)858{859if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)860return mi_imm(mi_value_to_u64(src0) >= mi_value_to_u64(src1) ? ~0ull : 0);861862/* Compute "less than" by subtracting and storing the carry bit */863return mi_math_binop(b, MI_ALU_SUB, src0, src1,864MI_ALU_STOREINV, MI_ALU_CF);865}866867static inline struct mi_value868mi_iand(struct mi_builder *b, struct mi_value src0, struct mi_value src1)869{870if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)871return mi_imm(mi_value_to_u64(src0) & mi_value_to_u64(src1));872873return mi_math_binop(b, MI_ALU_AND, src0, src1,874MI_ALU_STORE, MI_ALU_ACCU);875}876877static inline struct mi_value878mi_nz(struct mi_builder *b, struct mi_value src)879{880if (src.type == MI_VALUE_TYPE_IMM)881return mi_imm(mi_value_to_u64(src) != 0 ? ~0ull : 0);882883return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),884MI_ALU_STOREINV, MI_ALU_ZF);885}886887static inline struct mi_value888mi_z(struct mi_builder *b, struct mi_value src)889{890if (src.type == MI_VALUE_TYPE_IMM)891return mi_imm(mi_value_to_u64(src) == 0 ? ~0ull : 0);892893return mi_math_binop(b, MI_ALU_ADD, src, mi_imm(0),894MI_ALU_STORE, MI_ALU_ZF);895}896897static inline struct mi_value898mi_ior(struct mi_builder *b,899struct mi_value src0, struct mi_value src1)900{901if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)902return mi_imm(mi_value_to_u64(src0) | mi_value_to_u64(src1));903904return mi_math_binop(b, MI_ALU_OR, src0, src1,905MI_ALU_STORE, MI_ALU_ACCU);906}907908#if GFX_VERx10 >= 125909static inline struct mi_value910mi_ishl(struct mi_builder *b, struct mi_value src0, struct mi_value src1)911{912if (src1.type == MI_VALUE_TYPE_IMM) {913assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));914assert(mi_value_to_u64(src1) <= 32);915}916917if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)918return mi_imm(mi_value_to_u64(src0) << mi_value_to_u64(src1));919920return mi_math_binop(b, MI_ALU_SHL, src0, src1,921MI_ALU_STORE, MI_ALU_ACCU);922}923924static inline struct mi_value925mi_ushr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)926{927if (src1.type == MI_VALUE_TYPE_IMM) {928assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));929assert(mi_value_to_u64(src1) <= 32);930}931932if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)933return mi_imm(mi_value_to_u64(src0) >> mi_value_to_u64(src1));934935return mi_math_binop(b, MI_ALU_SHR, src0, src1,936MI_ALU_STORE, MI_ALU_ACCU);937}938939static inline struct mi_value940mi_ushr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)941{942if (shift == 0)943return src;944945if (shift >= 64)946return mi_imm(0);947948if (src.type == MI_VALUE_TYPE_IMM)949return mi_imm(mi_value_to_u64(src) >> shift);950951struct mi_value res = mi_value_to_gpr(b, src);952953/* Annoyingly, we only have power-of-two shifts */954while (shift) {955int bit = u_bit_scan(&shift);956assert(bit <= 5);957res = mi_ushr(b, res, mi_imm(1 << bit));958}959960return res;961}962963static inline struct mi_value964mi_ishr(struct mi_builder *b, struct mi_value src0, struct mi_value src1)965{966if (src1.type == MI_VALUE_TYPE_IMM) {967assert(util_is_power_of_two_or_zero(mi_value_to_u64(src1)));968assert(mi_value_to_u64(src1) <= 32);969}970971if (src0.type == MI_VALUE_TYPE_IMM && src1.type == MI_VALUE_TYPE_IMM)972return mi_imm((int64_t)mi_value_to_u64(src0) >> mi_value_to_u64(src1));973974return mi_math_binop(b, MI_ALU_SAR, src0, src1,975MI_ALU_STORE, MI_ALU_ACCU);976}977978static inline struct mi_value979mi_ishr_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)980{981if (shift == 0)982return src;983984if (shift >= 64)985return mi_imm(0);986987if (src.type == MI_VALUE_TYPE_IMM)988return mi_imm((int64_t)mi_value_to_u64(src) >> shift);989990struct mi_value res = mi_value_to_gpr(b, src);991992/* Annoyingly, we only have power-of-two shifts */993while (shift) {994int bit = u_bit_scan(&shift);995assert(bit <= 5);996res = mi_ishr(b, res, mi_imm(1 << bit));997}998999return res;1000}1001#endif /* if GFX_VERx10 >= 125 */10021003static inline struct mi_value1004mi_imul_imm(struct mi_builder *b, struct mi_value src, uint32_t N)1005{1006if (src.type == MI_VALUE_TYPE_IMM)1007return mi_imm(mi_value_to_u64(src) * N);10081009if (N == 0) {1010mi_value_unref(b, src);1011return mi_imm(0);1012}10131014if (N == 1)1015return src;10161017src = mi_value_to_gpr(b, src);10181019struct mi_value res = mi_value_ref(b, src);10201021unsigned top_bit = 31 - __builtin_clz(N);1022for (int i = top_bit - 1; i >= 0; i--) {1023res = mi_iadd(b, res, mi_value_ref(b, res));1024if (N & (1 << i))1025res = mi_iadd(b, res, mi_value_ref(b, src));1026}10271028mi_value_unref(b, src);10291030return res;1031}10321033static inline struct mi_value1034mi_ishl_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)1035{1036if (shift == 0)1037return src;10381039if (shift >= 64)1040return mi_imm(0);10411042if (src.type == MI_VALUE_TYPE_IMM)1043return mi_imm(mi_value_to_u64(src) << shift);10441045struct mi_value res = mi_value_to_gpr(b, src);10461047#if GFX_VERx10 >= 1251048/* Annoyingly, we only have power-of-two shifts */1049while (shift) {1050int bit = u_bit_scan(&shift);1051assert(bit <= 5);1052res = mi_ishl(b, res, mi_imm(1 << bit));1053}1054#else1055for (unsigned i = 0; i < shift; i++)1056res = mi_iadd(b, res, mi_value_ref(b, res));1057#endif10581059return res;1060}10611062static inline struct mi_value1063mi_ushr32_imm(struct mi_builder *b, struct mi_value src, uint32_t shift)1064{1065if (shift == 0)1066return src;10671068if (shift >= 64)1069return mi_imm(0);10701071/* We right-shift by left-shifting by 32 - shift and taking the top 32 bits1072* of the result.1073*/1074if (src.type == MI_VALUE_TYPE_IMM)1075return mi_imm((mi_value_to_u64(src) >> shift) & UINT32_MAX);10761077if (shift > 32) {1078struct mi_value tmp = mi_new_gpr(b);1079_mi_copy_no_unref(b, mi_value_half(tmp, false),1080mi_value_half(src, true));1081_mi_copy_no_unref(b, mi_value_half(tmp, true), mi_imm(0));1082mi_value_unref(b, src);1083src = tmp;1084shift -= 32;1085}1086assert(shift <= 32);1087struct mi_value tmp = mi_ishl_imm(b, src, 32 - shift);1088struct mi_value dst = mi_new_gpr(b);1089_mi_copy_no_unref(b, mi_value_half(dst, false),1090mi_value_half(tmp, true));1091_mi_copy_no_unref(b, mi_value_half(dst, true), mi_imm(0));1092mi_value_unref(b, tmp);1093return dst;1094}10951096static inline struct mi_value1097mi_udiv32_imm(struct mi_builder *b, struct mi_value N, uint32_t D)1098{1099if (N.type == MI_VALUE_TYPE_IMM) {1100assert(mi_value_to_u64(N) <= UINT32_MAX);1101return mi_imm(mi_value_to_u64(N) / D);1102}11031104/* We implicitly assume that N is only a 32-bit value */1105if (D == 0) {1106/* This is invalid but we should do something */1107return mi_imm(0);1108} else if (util_is_power_of_two_or_zero(D)) {1109return mi_ushr32_imm(b, N, util_logbase2(D));1110} else {1111struct util_fast_udiv_info m = util_compute_fast_udiv_info(D, 32, 32);1112assert(m.multiplier <= UINT32_MAX);11131114if (m.pre_shift)1115N = mi_ushr32_imm(b, N, m.pre_shift);11161117/* Do the 32x32 multiply into gpr0 */1118N = mi_imul_imm(b, N, m.multiplier);11191120if (m.increment)1121N = mi_iadd(b, N, mi_imm(m.multiplier));11221123N = mi_ushr32_imm(b, N, 32);11241125if (m.post_shift)1126N = mi_ushr32_imm(b, N, m.post_shift);11271128return N;1129}1130}11311132#endif /* MI_MATH section */11331134/* This assumes addresses of strictly more than 32bits (aka. Gfx8+). */1135#if MI_BUILDER_CAN_WRITE_BATCH11361137struct mi_address_token {1138/* Pointers to address memory fields in the batch. */1139uint64_t *ptrs[2];1140};11411142static inline struct mi_address_token1143mi_store_address(struct mi_builder *b, struct mi_value addr_reg)1144{1145mi_builder_flush_math(b);11461147assert(addr_reg.type == MI_VALUE_TYPE_REG64);11481149struct mi_address_token token = {};11501151for (unsigned i = 0; i < 2; i++) {1152mi_builder_emit(b, GENX(MI_STORE_REGISTER_MEM), srm) {1153srm.RegisterAddress = addr_reg.reg + (i * 4);11541155const unsigned addr_dw =1156GENX(MI_STORE_REGISTER_MEM_MemoryAddress_start) / 8;1157token.ptrs[i] = (void *)_dst + addr_dw;1158}1159}11601161mi_value_unref(b, addr_reg);1162return token;1163}11641165static inline void1166mi_self_mod_barrier(struct mi_builder *b)1167{1168/* First make sure all the memory writes from previous modifying commands1169* have landed. We want to do this before going through the CS cache,1170* otherwise we could be fetching memory that hasn't been written to yet.1171*/1172mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {1173pc.CommandStreamerStallEnable = true;1174}1175/* Documentation says Gfx11+ should be able to invalidate the command cache1176* but experiment show it doesn't work properly, so for now just get over1177* the CS prefetch.1178*/1179for (uint32_t i = 0; i < (b->devinfo->cs_prefetch_size / 4); i++)1180mi_builder_emit(b, GENX(MI_NOOP), noop);1181}11821183static inline void1184_mi_resolve_address_token(struct mi_builder *b,1185struct mi_address_token token,1186void *batch_location)1187{1188__gen_address_type addr = __gen_get_batch_address(b->user_data,1189batch_location);1190uint64_t addr_addr_u64 = __gen_combine_address(b->user_data, batch_location,1191addr, 0);1192*(token.ptrs[0]) = addr_addr_u64;1193*(token.ptrs[1]) = addr_addr_u64 + 4;1194}11951196#endif /* MI_BUILDER_CAN_WRITE_BATCH */11971198#if GFX_VERx10 >= 12511991200/*1201* Indirect load/store. Only available on XE_HP+1202*/12031204MUST_CHECK static inline struct mi_value1205mi_load_mem64_offset(struct mi_builder *b,1206__gen_address_type addr, struct mi_value offset)1207{1208uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);1209struct mi_value addr_val = mi_imm(addr_u64);12101211struct mi_value dst = mi_new_gpr(b);12121213uint32_t dw[5];1214dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);1215dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);1216dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);1217dw[3] = _mi_pack_alu(MI_ALU_LOADIND, _mi_value_as_gpr(dst), MI_ALU_ACCU);1218dw[4] = _mi_pack_alu(MI_ALU_FENCE_RD, 0, 0);1219_mi_builder_push_math(b, dw, 5);12201221mi_value_unref(b, addr_val);1222mi_value_unref(b, offset);12231224return dst;1225}12261227static inline void1228mi_store_mem64_offset(struct mi_builder *b,1229__gen_address_type addr, struct mi_value offset,1230struct mi_value data)1231{1232uint64_t addr_u64 = __gen_combine_address(b->user_data, NULL, addr, 0);1233struct mi_value addr_val = mi_imm(addr_u64);12341235data = mi_value_to_gpr(b, mi_resolve_invert(b, data));12361237uint32_t dw[5];1238dw[0] = _mi_math_load_src(b, MI_ALU_SRCA, &addr_val);1239dw[1] = _mi_math_load_src(b, MI_ALU_SRCB, &offset);1240dw[2] = _mi_pack_alu(MI_ALU_ADD, 0, 0);1241dw[3] = _mi_pack_alu(MI_ALU_STOREIND, MI_ALU_ACCU, _mi_value_as_gpr(data));1242dw[4] = _mi_pack_alu(MI_ALU_FENCE_WR, 0, 0);1243_mi_builder_push_math(b, dw, 5);12441245mi_value_unref(b, addr_val);1246mi_value_unref(b, offset);1247mi_value_unref(b, data);12481249/* This is the only math case which has side-effects outside of regular1250* registers to flush math afterwards so we don't confuse anyone.1251*/1252mi_builder_flush_math(b);1253}12541255/*1256* Control-flow Section. Only available on XE_HP+1257*/12581259struct _mi_goto {1260bool predicated;1261void *mi_bbs;1262};12631264struct mi_goto_target {1265bool placed;1266unsigned num_gotos;1267struct _mi_goto gotos[8];1268__gen_address_type addr;1269};12701271#define MI_GOTO_TARGET_INIT ((struct mi_goto_target) {})12721273#define MI_BUILDER_MI_PREDICATE_RESULT_num 0x241812741275static inline void1276mi_goto_if(struct mi_builder *b, struct mi_value cond,1277struct mi_goto_target *t)1278{1279/* First, set up the predicate, if any */1280bool predicated;1281if (cond.type == MI_VALUE_TYPE_IMM) {1282/* If it's an immediate, the goto either doesn't happen or happens1283* unconditionally.1284*/1285if (mi_value_to_u64(cond) == 0)1286return;12871288assert(mi_value_to_u64(cond) == ~0ull);1289predicated = false;1290} else if (mi_value_is_reg(cond) &&1291cond.reg == MI_BUILDER_MI_PREDICATE_RESULT_num) {1292/* If it's MI_PREDICATE_RESULT, we use whatever predicate the client1293* provided us with1294*/1295assert(cond.type == MI_VALUE_TYPE_REG32);1296predicated = true;1297} else {1298mi_store(b, mi_reg32(MI_BUILDER_MI_PREDICATE_RESULT_num), cond);1299predicated = true;1300}13011302if (predicated) {1303mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {1304sp.PredicateEnable = NOOPOnResultClear;1305}1306}1307if (t->placed) {1308mi_builder_emit(b, GENX(MI_BATCH_BUFFER_START), bbs) {1309bbs.PredicationEnable = predicated;1310bbs.AddressSpaceIndicator = ASI_PPGTT;1311bbs.BatchBufferStartAddress = t->addr;1312}1313} else {1314assert(t->num_gotos < ARRAY_SIZE(t->gotos));1315struct _mi_goto g = {1316.predicated = predicated,1317.mi_bbs = __gen_get_batch_dwords(b->user_data,1318GENX(MI_BATCH_BUFFER_START_length)),1319};1320memset(g.mi_bbs, 0, 4 * GENX(MI_BATCH_BUFFER_START_length));1321t->gotos[t->num_gotos++] = g;1322}1323if (predicated) {1324mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {1325sp.PredicateEnable = NOOPNever;1326}1327}1328}13291330static inline void1331mi_goto(struct mi_builder *b, struct mi_goto_target *t)1332{1333mi_goto_if(b, mi_imm(-1), t);1334}13351336static inline void1337mi_goto_target(struct mi_builder *b, struct mi_goto_target *t)1338{1339mi_builder_emit(b, GENX(MI_SET_PREDICATE), sp) {1340sp.PredicateEnable = NOOPNever;1341t->addr = __gen_get_batch_address(b->user_data, _dst);1342}1343t->placed = true;13441345struct GENX(MI_BATCH_BUFFER_START) bbs = { GENX(MI_BATCH_BUFFER_START_header) };1346bbs.AddressSpaceIndicator = ASI_PPGTT;1347bbs.BatchBufferStartAddress = t->addr;13481349for (unsigned i = 0; i < t->num_gotos; i++) {1350bbs.PredicationEnable = t->gotos[i].predicated;1351GENX(MI_BATCH_BUFFER_START_pack)(b->user_data, t->gotos[i].mi_bbs, &bbs);1352}1353}13541355static inline struct mi_goto_target1356mi_goto_target_init_and_place(struct mi_builder *b)1357{1358struct mi_goto_target t = MI_GOTO_TARGET_INIT;1359mi_goto_target(b, &t);1360return t;1361}13621363#define mi_loop(b) \1364for (struct mi_goto_target __break = MI_GOTO_TARGET_INIT, \1365__continue = mi_goto_target_init_and_place(b); !__break.placed; \1366mi_goto(b, &__continue), mi_goto_target(b, &__break))13671368#define mi_break(b) mi_goto(b, &__break)1369#define mi_break_if(b, cond) mi_goto_if(b, cond, &__break)1370#define mi_continue(b) mi_goto(b, &__continue)1371#define mi_continue_if(b, cond) mi_goto_if(b, cond, &__continue)13721373#endif /* GFX_VERx10 >= 125 */13741375#endif /* MI_BUILDER_H */137613771378