Path: blob/21.2-virgl/src/gallium/drivers/vc4/vc4_qir.h
4570 views
/*1* Copyright © 2014 Broadcom2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#ifndef VC4_QIR_H24#define VC4_QIR_H2526#include <assert.h>27#include <stdio.h>28#include <stdlib.h>29#include <stdbool.h>30#include <stdint.h>31#include <string.h>3233#include "util/macros.h"34#include "compiler/nir/nir.h"35#include "util/list.h"36#include "util/u_math.h"3738#include "vc4_screen.h"39#include "vc4_qpu_defines.h"40#include "vc4_qpu.h"41#include "kernel/vc4_packet.h"42#include "pipe/p_state.h"4344struct nir_builder;4546enum qfile {47QFILE_NULL,48QFILE_TEMP,49QFILE_VARY,50QFILE_UNIF,51QFILE_VPM,52QFILE_TLB_COLOR_WRITE,53QFILE_TLB_COLOR_WRITE_MS,54QFILE_TLB_Z_WRITE,55QFILE_TLB_STENCIL_SETUP,5657/* If tex_s is written on its own without preceding t/r/b setup, it's58* a direct memory access using the input value, without the sideband59* uniform load. We represent these in QIR as a separate write60* destination so we can tell if the sideband uniform is present.61*/62QFILE_TEX_S_DIRECT,6364QFILE_TEX_S,65QFILE_TEX_T,66QFILE_TEX_R,67QFILE_TEX_B,6869/* Payload registers that aren't in the physical register file, so we70* can just use the corresponding qpu_reg at qpu_emit time.71*/72QFILE_FRAG_X,73QFILE_FRAG_Y,74QFILE_FRAG_REV_FLAG,75QFILE_QPU_ELEMENT,7677/**78* Stores an immediate value in the index field that will be used79* directly by qpu_load_imm().80*/81QFILE_LOAD_IMM,8283/**84* Stores an immediate value in the index field that can be turned85* into a small immediate field by qpu_encode_small_immediate().86*/87QFILE_SMALL_IMM,88};8990struct qreg {91enum qfile file;92uint32_t index;93int pack;94};9596static inline struct qreg qir_reg(enum qfile file, uint32_t index)97{98return (struct qreg){file, index};99}100101enum qop {102QOP_UNDEF,103QOP_MOV,104QOP_FMOV,105QOP_MMOV,106QOP_FADD,107QOP_FSUB,108QOP_FMUL,109QOP_V8MULD,110QOP_V8MIN,111QOP_V8MAX,112QOP_V8ADDS,113QOP_V8SUBS,114QOP_MUL24,115QOP_FMIN,116QOP_FMAX,117QOP_FMINABS,118QOP_FMAXABS,119QOP_ADD,120QOP_SUB,121QOP_SHL,122QOP_SHR,123QOP_ASR,124QOP_MIN,125QOP_MIN_NOIMM,126QOP_MAX,127QOP_AND,128QOP_OR,129QOP_XOR,130QOP_NOT,131132QOP_FTOI,133QOP_ITOF,134QOP_RCP,135QOP_RSQ,136QOP_EXP2,137QOP_LOG2,138QOP_VW_SETUP,139QOP_VR_SETUP,140QOP_TLB_COLOR_READ,141QOP_MS_MASK,142QOP_VARY_ADD_C,143144QOP_FRAG_Z,145QOP_FRAG_W,146147/**148* Signal of texture read being necessary and then reading r4 into149* the destination150*/151QOP_TEX_RESULT,152153/**154* Insert the signal for switching threads in a threaded fragment155* shader. No value can be live in an accumulator across a thrsw.156*157* At the QPU level, this will have several delay slots before the158* switch happens. Those slots are the responsibility of the159* scheduler.160*/161QOP_THRSW,162163/* 32-bit immediate loaded to each SIMD channel */164QOP_LOAD_IMM,165166/* 32-bit immediate divided into 16 2-bit unsigned int values and167* loaded to each corresponding SIMD channel.168*/169QOP_LOAD_IMM_U2,170/* 32-bit immediate divided into 16 2-bit signed int values and171* loaded to each corresponding SIMD channel.172*/173QOP_LOAD_IMM_I2,174175QOP_ROT_MUL,176177/* Jumps to block->successor[0] if the qinst->cond (as a178* QPU_COND_BRANCH_*) passes, or block->successor[1] if not. Note179* that block->successor[1] may be unset if the condition is ALWAYS.180*/181QOP_BRANCH,182183/* Emits an ADD from src[0] to src[1], where src[0] must be a184* QOP_LOAD_IMM result and src[1] is a QUNIFORM_UNIFORMS_ADDRESS,185* required by the kernel as part of its branch validation.186*/187QOP_UNIFORMS_RESET,188};189190struct queued_qpu_inst {191struct list_head link;192uint64_t inst;193};194195struct qinst {196struct list_head link;197198enum qop op;199struct qreg dst;200struct qreg src[3];201bool sf;202bool cond_is_exec_mask;203uint8_t cond;204};205206enum qstage {207/**208* Coordinate shader, runs during binning, before the VS, and just209* outputs position.210*/211QSTAGE_COORD,212QSTAGE_VERT,213QSTAGE_FRAG,214};215216enum quniform_contents {217/**218* Indicates that a constant 32-bit value is copied from the program's219* uniform contents.220*/221QUNIFORM_CONSTANT,222/**223* Indicates that the program's uniform contents are used as an index224* into the GL uniform storage.225*/226QUNIFORM_UNIFORM,227228/** @{229* Scaling factors from clip coordinates to relative to the viewport230* center.231*232* This is used by the coordinate and vertex shaders to produce the233* 32-bit entry consisting of 2 16-bit fields with 12.4 signed fixed234* point offsets from the viewport ccenter.235*/236QUNIFORM_VIEWPORT_X_SCALE,237QUNIFORM_VIEWPORT_Y_SCALE,238/** @} */239240QUNIFORM_VIEWPORT_Z_OFFSET,241QUNIFORM_VIEWPORT_Z_SCALE,242243QUNIFORM_USER_CLIP_PLANE,244245/**246* A reference to a texture config parameter 0 uniform.247*248* This is a uniform implicitly loaded with a QPU_W_TMU* write, which249* defines texture type, miplevels, and such. It will be found as a250* parameter to the first QOP_TEX_[STRB] instruction in a sequence.251*/252QUNIFORM_TEXTURE_CONFIG_P0,253254/**255* A reference to a texture config parameter 1 uniform.256*257* This is a uniform implicitly loaded with a QPU_W_TMU* write, which258* defines texture width, height, filters, and wrap modes. It will be259* found as a parameter to the second QOP_TEX_[STRB] instruction in a260* sequence.261*/262QUNIFORM_TEXTURE_CONFIG_P1,263264/** A reference to a texture config parameter 2 cubemap stride uniform */265QUNIFORM_TEXTURE_CONFIG_P2,266267QUNIFORM_TEXTURE_FIRST_LEVEL,268269QUNIFORM_TEXTURE_MSAA_ADDR,270271QUNIFORM_UBO0_ADDR,272QUNIFORM_UBO1_ADDR,273274QUNIFORM_TEXRECT_SCALE_X,275QUNIFORM_TEXRECT_SCALE_Y,276277QUNIFORM_TEXTURE_BORDER_COLOR,278279QUNIFORM_BLEND_CONST_COLOR_X,280QUNIFORM_BLEND_CONST_COLOR_Y,281QUNIFORM_BLEND_CONST_COLOR_Z,282QUNIFORM_BLEND_CONST_COLOR_W,283QUNIFORM_BLEND_CONST_COLOR_RGBA,284QUNIFORM_BLEND_CONST_COLOR_AAAA,285286QUNIFORM_STENCIL,287288QUNIFORM_SAMPLE_MASK,289290/* Placeholder uniform that will be updated by the kernel when used by291* an instruction writing to QPU_W_UNIFORMS_ADDRESS.292*/293QUNIFORM_UNIFORMS_ADDRESS,294};295296struct vc4_varying_slot {297uint8_t slot;298uint8_t swizzle;299};300301struct vc4_key {302struct vc4_uncompiled_shader *shader_state;303struct {304enum pipe_format format;305uint8_t swizzle[4];306union {307struct {308unsigned compare_mode:1;309unsigned compare_func:3;310unsigned wrap_s:3;311unsigned wrap_t:3;312bool force_first_level:1;313};314struct {315uint16_t msaa_width, msaa_height;316};317};318} tex[VC4_MAX_TEXTURE_SAMPLERS];319uint8_t ucp_enables;320};321322struct vc4_fs_key {323struct vc4_key base;324enum pipe_format color_format;325bool depth_enabled;326bool stencil_enabled;327bool stencil_twoside;328bool stencil_full_writemasks;329bool is_points;330bool is_lines;331bool point_coord_upper_left;332bool msaa;333bool sample_coverage;334bool sample_alpha_to_coverage;335bool sample_alpha_to_one;336uint8_t logicop_func;337uint32_t point_sprite_mask;338uint32_t ubo_1_size;339340struct pipe_rt_blend_state blend;341};342343struct vc4_vs_key {344struct vc4_key base;345346const struct vc4_fs_inputs *fs_inputs;347enum pipe_format attr_formats[8];348bool is_coord;349bool per_vertex_point_size;350};351352/** A basic block of QIR intructions. */353struct qblock {354struct list_head link;355356struct list_head instructions;357struct list_head qpu_inst_list;358359struct set *predecessors;360struct qblock *successors[2];361362int index;363364/* Instruction IPs for the first and last instruction of the block.365* Set by vc4_qpu_schedule.c.366*/367uint32_t start_qpu_ip;368uint32_t end_qpu_ip;369370/* Instruction IP for the branch instruction of the block. Set by371* vc4_qpu_schedule.c.372*/373uint32_t branch_qpu_ip;374375/** @{ used by vc4_qir_live_variables.c */376BITSET_WORD *def;377BITSET_WORD *use;378BITSET_WORD *live_in;379BITSET_WORD *live_out;380int start_ip, end_ip;381/** @} */382};383384struct vc4_compile {385struct vc4_context *vc4;386nir_shader *s;387nir_function_impl *impl;388struct exec_list *cf_node_list;389390/**391* Mapping from nir_register * or nir_ssa_def * to array of struct392* qreg for the values.393*/394struct hash_table *def_ht;395396/* For each temp, the instruction generating its value. */397struct qinst **defs;398uint32_t defs_array_size;399400/**401* Inputs to the shader, arranged by TGSI declaration order.402*403* Not all fragment shader QFILE_VARY reads are present in this array.404*/405struct qreg *inputs;406struct qreg *outputs;407bool msaa_per_sample_output;408struct qreg color_reads[VC4_MAX_SAMPLES];409struct qreg sample_colors[VC4_MAX_SAMPLES];410uint32_t inputs_array_size;411uint32_t outputs_array_size;412uint32_t uniforms_array_size;413414/* State for whether we're executing on each channel currently. 0 if415* yes, otherwise a block number + 1 that the channel jumped to.416*/417struct qreg execute;418419struct qreg line_x, point_x, point_y;420/** boolean (~0 -> true) if the fragment has been discarded. */421struct qreg discard;422struct qreg payload_FRAG_Z;423struct qreg payload_FRAG_W;424425uint8_t vattr_sizes[8];426427/**428* Array of the VARYING_SLOT_* of all FS QFILE_VARY reads.429*430* This includes those that aren't part of the VPM varyings, like431* point/line coordinates.432*/433struct vc4_varying_slot *input_slots;434uint32_t num_input_slots;435uint32_t input_slots_array_size;436437/**438* An entry per outputs[] in the VS indicating what the VARYING_SLOT_*439* of the output is. Used to emit from the VS in the order that the440* FS needs.441*/442struct vc4_varying_slot *output_slots;443444struct pipe_shader_state *shader_state;445struct vc4_key *key;446struct vc4_fs_key *fs_key;447struct vc4_vs_key *vs_key;448449/* Live ranges of temps. */450int *temp_start, *temp_end;451452uint32_t *uniform_data;453enum quniform_contents *uniform_contents;454uint32_t uniform_array_size;455uint32_t num_uniforms;456uint32_t num_outputs;457uint32_t num_texture_samples;458uint32_t output_position_index;459uint32_t output_color_index;460uint32_t output_point_size_index;461uint32_t output_sample_mask_index;462463struct qreg undef;464enum qstage stage;465uint32_t num_temps;466467struct list_head blocks;468int next_block_index;469struct qblock *cur_block;470struct qblock *loop_cont_block;471struct qblock *loop_break_block;472struct qblock *last_top_block;473474struct list_head qpu_inst_list;475476/* Pre-QPU-scheduled instruction containing the last THRSW */477uint64_t *last_thrsw;478479uint64_t *qpu_insts;480uint32_t qpu_inst_count;481uint32_t qpu_inst_size;482uint32_t num_inputs;483484/**485* Number of inputs from num_inputs remaining to be queued to the read486* FIFO in the VS/CS.487*/488uint32_t num_inputs_remaining;489490/* Number of inputs currently in the read FIFO for the VS/CS */491uint32_t num_inputs_in_fifo;492493/** Next offset in the VPM to read from in the VS/CS */494uint32_t vpm_read_offset;495496uint32_t program_id;497uint32_t variant_id;498499/* Set to compile program in threaded FS mode, where SIG_THREAD_SWITCH500* is used to hide texturing latency at the cost of limiting ourselves501* to the bottom half of physical reg space.502*/503bool fs_threaded;504505bool last_thrsw_at_top_level;506507bool failed;508};509510/* Special nir_load_input intrinsic index for loading the current TLB511* destination color.512*/513#define VC4_NIR_TLB_COLOR_READ_INPUT 2000000000514515#define VC4_NIR_MS_MASK_OUTPUT 2000000000516517struct vc4_compile *qir_compile_init(void);518void qir_compile_destroy(struct vc4_compile *c);519struct qblock *qir_new_block(struct vc4_compile *c);520void qir_set_emit_block(struct vc4_compile *c, struct qblock *block);521void qir_link_blocks(struct qblock *predecessor, struct qblock *successor);522struct qblock *qir_entry_block(struct vc4_compile *c);523struct qblock *qir_exit_block(struct vc4_compile *c);524struct qinst *qir_inst(enum qop op, struct qreg dst,525struct qreg src0, struct qreg src1);526void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst);527struct qreg qir_uniform(struct vc4_compile *c,528enum quniform_contents contents,529uint32_t data);530void qir_schedule_instructions(struct vc4_compile *c);531void qir_reorder_uniforms(struct vc4_compile *c);532void qir_emit_uniform_stream_resets(struct vc4_compile *c);533534struct qreg qir_emit_def(struct vc4_compile *c, struct qinst *inst);535struct qinst *qir_emit_nondef(struct vc4_compile *c, struct qinst *inst);536537struct qreg qir_get_temp(struct vc4_compile *c);538void qir_calculate_live_intervals(struct vc4_compile *c);539int qir_get_nsrc(struct qinst *inst);540int qir_get_non_sideband_nsrc(struct qinst *inst);541int qir_get_tex_uniform_src(struct qinst *inst);542bool qir_reg_equals(struct qreg a, struct qreg b);543bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);544bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);545bool qir_has_uniform_read(struct qinst *inst);546bool qir_is_mul(struct qinst *inst);547bool qir_is_raw_mov(struct qinst *inst);548bool qir_is_tex(struct qinst *inst);549bool qir_has_implicit_tex_uniform(struct qinst *inst);550bool qir_is_float_input(struct qinst *inst);551bool qir_depends_on_flags(struct qinst *inst);552bool qir_writes_r4(struct qinst *inst);553struct qreg qir_follow_movs(struct vc4_compile *c, struct qreg reg);554uint8_t qir_channels_written(struct qinst *inst);555556void qir_dump(struct vc4_compile *c);557void qir_dump_inst(struct vc4_compile *c, struct qinst *inst);558char *qir_describe_uniform(enum quniform_contents contents, uint32_t data,559const uint32_t *uniforms);560const char *qir_get_stage_name(enum qstage stage);561562void qir_validate(struct vc4_compile *c);563564void qir_optimize(struct vc4_compile *c);565bool qir_opt_algebraic(struct vc4_compile *c);566bool qir_opt_coalesce_ff_writes(struct vc4_compile *c);567bool qir_opt_constant_folding(struct vc4_compile *c);568bool qir_opt_copy_propagation(struct vc4_compile *c);569bool qir_opt_dead_code(struct vc4_compile *c);570bool qir_opt_peephole_sf(struct vc4_compile *c);571bool qir_opt_small_immediates(struct vc4_compile *c);572bool qir_opt_vpm(struct vc4_compile *c);573void vc4_nir_lower_blend(nir_shader *s, struct vc4_compile *c);574void vc4_nir_lower_io(nir_shader *s, struct vc4_compile *c);575nir_ssa_def *vc4_nir_get_swizzled_channel(struct nir_builder *b,576nir_ssa_def **srcs, int swiz);577void vc4_nir_lower_txf_ms(nir_shader *s, struct vc4_compile *c);578void qir_lower_uniforms(struct vc4_compile *c);579580uint32_t qpu_schedule_instructions(struct vc4_compile *c);581582void qir_SF(struct vc4_compile *c, struct qreg src);583584static inline struct qreg585qir_uniform_ui(struct vc4_compile *c, uint32_t ui)586{587return qir_uniform(c, QUNIFORM_CONSTANT, ui);588}589590static inline struct qreg591qir_uniform_f(struct vc4_compile *c, float f)592{593return qir_uniform(c, QUNIFORM_CONSTANT, fui(f));594}595596#define QIR_ALU0(name) \597static inline struct qreg \598qir_##name(struct vc4_compile *c) \599{ \600return qir_emit_def(c, qir_inst(QOP_##name, c->undef, \601c->undef, c->undef)); \602} \603static inline struct qinst * \604qir_##name##_dest(struct vc4_compile *c, struct qreg dest) \605{ \606return qir_emit_nondef(c, qir_inst(QOP_##name, dest, \607c->undef, c->undef)); \608}609610#define QIR_ALU1(name) \611static inline struct qreg \612qir_##name(struct vc4_compile *c, struct qreg a) \613{ \614return qir_emit_def(c, qir_inst(QOP_##name, c->undef, \615a, c->undef)); \616} \617static inline struct qinst * \618qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \619struct qreg a) \620{ \621return qir_emit_nondef(c, qir_inst(QOP_##name, dest, a, \622c->undef)); \623}624625#define QIR_ALU2(name) \626static inline struct qreg \627qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \628{ \629return qir_emit_def(c, qir_inst(QOP_##name, c->undef, a, b)); \630} \631static inline struct qinst * \632qir_##name##_dest(struct vc4_compile *c, struct qreg dest, \633struct qreg a, struct qreg b) \634{ \635return qir_emit_nondef(c, qir_inst(QOP_##name, dest, a, b)); \636}637638#define QIR_NODST_1(name) \639static inline struct qinst * \640qir_##name(struct vc4_compile *c, struct qreg a) \641{ \642return qir_emit_nondef(c, qir_inst(QOP_##name, c->undef, \643a, c->undef)); \644}645646#define QIR_NODST_2(name) \647static inline struct qinst * \648qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b) \649{ \650return qir_emit_nondef(c, qir_inst(QOP_##name, c->undef, \651a, b)); \652}653654#define QIR_PAYLOAD(name) \655static inline struct qreg \656qir_##name(struct vc4_compile *c) \657{ \658struct qreg *payload = &c->payload_##name; \659if (payload->file != QFILE_NULL) \660return *payload; \661*payload = qir_get_temp(c); \662struct qinst *inst = qir_inst(QOP_##name, *payload, \663c->undef, c->undef); \664struct qblock *entry = qir_entry_block(c); \665list_add(&inst->link, &entry->instructions); \666c->defs[payload->index] = inst; \667return *payload; \668}669670QIR_ALU1(MOV)671QIR_ALU1(FMOV)672QIR_ALU1(MMOV)673QIR_ALU2(FADD)674QIR_ALU2(FSUB)675QIR_ALU2(FMUL)676QIR_ALU2(V8MULD)677QIR_ALU2(V8MIN)678QIR_ALU2(V8MAX)679QIR_ALU2(V8ADDS)680QIR_ALU2(V8SUBS)681QIR_ALU2(MUL24)682QIR_ALU2(FMIN)683QIR_ALU2(FMAX)684QIR_ALU2(FMINABS)685QIR_ALU2(FMAXABS)686QIR_ALU1(FTOI)687QIR_ALU1(ITOF)688689QIR_ALU2(ADD)690QIR_ALU2(SUB)691QIR_ALU2(SHL)692QIR_ALU2(SHR)693QIR_ALU2(ASR)694QIR_ALU2(MIN)695QIR_ALU2(MIN_NOIMM)696QIR_ALU2(MAX)697QIR_ALU2(AND)698QIR_ALU2(OR)699QIR_ALU2(XOR)700QIR_ALU1(NOT)701702QIR_ALU1(RCP)703QIR_ALU1(RSQ)704QIR_ALU1(EXP2)705QIR_ALU1(LOG2)706QIR_ALU1(VARY_ADD_C)707QIR_PAYLOAD(FRAG_Z)708QIR_PAYLOAD(FRAG_W)709QIR_ALU0(TEX_RESULT)710QIR_ALU0(TLB_COLOR_READ)711QIR_NODST_1(MS_MASK)712713static inline struct qreg714qir_SEL(struct vc4_compile *c, uint8_t cond, struct qreg src0, struct qreg src1)715{716struct qreg t = qir_get_temp(c);717qir_MOV_dest(c, t, src1);718qir_MOV_dest(c, t, src0)->cond = cond;719return t;720}721722static inline struct qreg723qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)724{725struct qreg t = qir_FMOV(c, src);726c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i;727return t;728}729730static inline struct qreg731qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i)732{733struct qreg t = qir_MOV(c, src);734c->defs[t.index]->src[0].pack = QPU_UNPACK_8A + i;735return t;736}737738static inline struct qreg739qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i)740{741struct qreg t = qir_FMOV(c, src);742c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i;743return t;744}745746static inline struct qreg747qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)748{749struct qreg t = qir_MOV(c, src);750c->defs[t.index]->src[0].pack = QPU_UNPACK_16A + i;751return t;752}753754static inline void755qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan)756{757assert(!dest.pack);758dest.pack = QPU_PACK_MUL_8A + chan;759qir_emit_nondef(c, qir_inst(QOP_MMOV, dest, val, c->undef));760}761762static inline struct qreg763qir_PACK_8888_F(struct vc4_compile *c, struct qreg val)764{765struct qreg dest = qir_MMOV(c, val);766c->defs[dest.index]->dst.pack = QPU_PACK_MUL_8888;767return dest;768}769770static inline struct qreg771qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)772{773return qir_EXP2(c, qir_FMUL(c,774y,775qir_LOG2(c, x)));776}777778static inline void779qir_VPM_WRITE(struct vc4_compile *c, struct qreg val)780{781qir_MOV_dest(c, qir_reg(QFILE_VPM, 0), val);782}783784static inline struct qreg785qir_LOAD_IMM(struct vc4_compile *c, uint32_t val)786{787return qir_emit_def(c, qir_inst(QOP_LOAD_IMM, c->undef,788qir_reg(QFILE_LOAD_IMM, val), c->undef));789}790791static inline struct qreg792qir_LOAD_IMM_U2(struct vc4_compile *c, uint32_t val)793{794return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_U2, c->undef,795qir_reg(QFILE_LOAD_IMM, val),796c->undef));797}798799static inline struct qreg800qir_LOAD_IMM_I2(struct vc4_compile *c, uint32_t val)801{802return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_I2, c->undef,803qir_reg(QFILE_LOAD_IMM, val),804c->undef));805}806807/** Shifts the multiply output to the right by rot channels */808static inline struct qreg809qir_ROT_MUL(struct vc4_compile *c, struct qreg val, uint32_t rot)810{811return qir_emit_def(c, qir_inst(QOP_ROT_MUL, c->undef,812val,813qir_reg(QFILE_LOAD_IMM,814QPU_SMALL_IMM_MUL_ROT + rot)));815}816817static inline struct qinst *818qir_MOV_cond(struct vc4_compile *c, uint8_t cond,819struct qreg dest, struct qreg src)820{821struct qinst *mov = qir_MOV_dest(c, dest, src);822mov->cond = cond;823return mov;824}825826static inline struct qinst *827qir_BRANCH(struct vc4_compile *c, uint8_t cond)828{829struct qinst *inst = qir_inst(QOP_BRANCH, c->undef, c->undef, c->undef);830inst->cond = cond;831qir_emit_nondef(c, inst);832return inst;833}834835#define qir_for_each_block(block, c) \836list_for_each_entry(struct qblock, block, &c->blocks, link)837838#define qir_for_each_block_rev(block, c) \839list_for_each_entry_rev(struct qblock, block, &c->blocks, link)840841/* Loop over the non-NULL members of the successors array. */842#define qir_for_each_successor(succ, block) \843for (struct qblock *succ = block->successors[0]; \844succ != NULL; \845succ = (succ == block->successors[1] ? NULL : \846block->successors[1]))847848#define qir_for_each_inst(inst, block) \849list_for_each_entry(struct qinst, inst, &block->instructions, link)850851#define qir_for_each_inst_rev(inst, block) \852list_for_each_entry_rev(struct qinst, inst, &block->instructions, link)853854#define qir_for_each_inst_safe(inst, block) \855list_for_each_entry_safe(struct qinst, inst, &block->instructions, link)856857#define qir_for_each_inst_inorder(inst, c) \858qir_for_each_block(_block, c) \859qir_for_each_inst_safe(inst, _block)860861#endif /* VC4_QIR_H */862863864