Path: blob/21.2-virgl/src/gallium/drivers/etnaviv/etnaviv_compiler_nir_emit.c
4570 views
/*1* Copyright (c) 2019 Zodiac Inflight Innovations2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22* Authors:23* Jonathan Marek <[email protected]>24*/2526#include "etnaviv_compiler_nir.h"27#include "util/compiler.h"2829/* to map nir srcs should to etna_inst srcs */30enum {31SRC_0_1_2 = (0 << 0) | (1 << 2) | (2 << 4),32SRC_0_1_X = (0 << 0) | (1 << 2) | (3 << 4),33SRC_0_X_X = (0 << 0) | (3 << 2) | (3 << 4),34SRC_0_X_1 = (0 << 0) | (3 << 2) | (1 << 4),35SRC_0_1_0 = (0 << 0) | (1 << 2) | (0 << 4),36SRC_X_X_0 = (3 << 0) | (3 << 2) | (0 << 4),37SRC_0_X_0 = (0 << 0) | (3 << 2) | (0 << 4),38};3940/* info to translate a nir op to etna_inst */41struct etna_op_info {42uint8_t opcode; /* INST_OPCODE_ */43uint8_t src; /* SRC_ enum */44uint8_t cond; /* INST_CONDITION_ */45uint8_t type; /* INST_TYPE_ */46};4748static const struct etna_op_info etna_ops[] = {49[0 ... nir_num_opcodes - 1] = {0xff},50#undef TRUE51#undef FALSE52#define OPCT(nir, op, src, cond, type) [nir_op_##nir] = { \53INST_OPCODE_##op, \54SRC_##src, \55INST_CONDITION_##cond, \56INST_TYPE_##type \57}58#define OPC(nir, op, src, cond) OPCT(nir, op, src, cond, F32)59#define IOPC(nir, op, src, cond) OPCT(nir, op, src, cond, S32)60#define UOPC(nir, op, src, cond) OPCT(nir, op, src, cond, U32)61#define OP(nir, op, src) OPC(nir, op, src, TRUE)62#define IOP(nir, op, src) IOPC(nir, op, src, TRUE)63#define UOP(nir, op, src) UOPC(nir, op, src, TRUE)64OP(mov, MOV, X_X_0), OP(fneg, MOV, X_X_0), OP(fabs, MOV, X_X_0), OP(fsat, MOV, X_X_0),65OP(fmul, MUL, 0_1_X), OP(fadd, ADD, 0_X_1), OP(ffma, MAD, 0_1_2),66OP(fdot2, DP2, 0_1_X), OP(fdot3, DP3, 0_1_X), OP(fdot4, DP4, 0_1_X),67OPC(fmin, SELECT, 0_1_0, GT), OPC(fmax, SELECT, 0_1_0, LT),68OP(ffract, FRC, X_X_0), OP(frcp, RCP, X_X_0), OP(frsq, RSQ, X_X_0),69OP(fsqrt, SQRT, X_X_0), OP(fsin, SIN, X_X_0), OP(fcos, COS, X_X_0),70OP(fsign, SIGN, X_X_0), OP(ffloor, FLOOR, X_X_0), OP(fceil, CEIL, X_X_0),71OP(flog2, LOG, X_X_0), OP(fexp2, EXP, X_X_0),72OPC(seq, SET, 0_1_X, EQ), OPC(sne, SET, 0_1_X, NE), OPC(sge, SET, 0_1_X, GE), OPC(slt, SET, 0_1_X, LT),73OPC(fcsel, SELECT, 0_1_2, NZ),74OP(fdiv, DIV, 0_1_X),75OP(fddx, DSX, 0_X_0), OP(fddy, DSY, 0_X_0),7677/* type convert */78IOP(i2f32, I2F, 0_X_X),79UOP(u2f32, I2F, 0_X_X),80IOP(f2i32, F2I, 0_X_X),81UOP(f2u32, F2I, 0_X_X),82UOP(b2f32, AND, 0_X_X), /* AND with fui(1.0f) */83UOP(b2i32, AND, 0_X_X), /* AND with 1 */84OPC(f2b32, CMP, 0_X_X, NE), /* != 0.0 */85UOPC(i2b32, CMP, 0_X_X, NE), /* != 0 */8687/* arithmetic */88IOP(iadd, ADD, 0_X_1),89IOP(imul, IMULLO0, 0_1_X),90/* IOP(imad, IMADLO0, 0_1_2), */91IOP(ineg, ADD, X_X_0), /* ADD 0, -x */92IOP(iabs, IABS, X_X_0),93IOP(isign, SIGN, X_X_0),94IOPC(imin, SELECT, 0_1_0, GT),95IOPC(imax, SELECT, 0_1_0, LT),96UOPC(umin, SELECT, 0_1_0, GT),97UOPC(umax, SELECT, 0_1_0, LT),9899/* select */100UOPC(b32csel, SELECT, 0_1_2, NZ),101102/* compare with int result */103OPC(feq32, CMP, 0_1_X, EQ),104OPC(fneu32, CMP, 0_1_X, NE),105OPC(fge32, CMP, 0_1_X, GE),106OPC(flt32, CMP, 0_1_X, LT),107IOPC(ieq32, CMP, 0_1_X, EQ),108IOPC(ine32, CMP, 0_1_X, NE),109IOPC(ige32, CMP, 0_1_X, GE),110IOPC(ilt32, CMP, 0_1_X, LT),111UOPC(uge32, CMP, 0_1_X, GE),112UOPC(ult32, CMP, 0_1_X, LT),113114/* bit ops */115IOP(ior, OR, 0_X_1),116IOP(iand, AND, 0_X_1),117IOP(ixor, XOR, 0_X_1),118IOP(inot, NOT, X_X_0),119IOP(ishl, LSHIFT, 0_X_1),120IOP(ishr, RSHIFT, 0_X_1),121UOP(ushr, RSHIFT, 0_X_1),122};123124void125etna_emit_alu(struct etna_compile *c, nir_op op, struct etna_inst_dst dst,126struct etna_inst_src src[3], bool saturate)127{128struct etna_op_info ei = etna_ops[op];129unsigned swiz_scalar = INST_SWIZ_BROADCAST(ffs(dst.write_mask) - 1);130131if (ei.opcode == 0xff)132compile_error(c, "Unhandled ALU op: %s\n", nir_op_infos[op].name);133134struct etna_inst inst = {135.opcode = ei.opcode,136.type = ei.type,137.cond = ei.cond,138.dst = dst,139.sat = saturate,140};141142switch (op) {143case nir_op_fdiv:144case nir_op_flog2:145case nir_op_fsin:146case nir_op_fcos:147if (c->specs->has_new_transcendentals)148inst.tex.amode = 1;149FALLTHROUGH;150case nir_op_frsq:151case nir_op_frcp:152case nir_op_fexp2:153case nir_op_fsqrt:154case nir_op_imul:155/* scalar instructions we want src to be in x component */156src[0].swiz = inst_swiz_compose(src[0].swiz, swiz_scalar);157src[1].swiz = inst_swiz_compose(src[1].swiz, swiz_scalar);158break;159/* deal with instructions which don't have 1:1 mapping */160case nir_op_b2f32:161inst.src[2] = etna_immediate_float(1.0f);162break;163case nir_op_b2i32:164inst.src[2] = etna_immediate_int(1);165break;166case nir_op_f2b32:167inst.src[1] = etna_immediate_float(0.0f);168break;169case nir_op_i2b32:170inst.src[1] = etna_immediate_int(0);171break;172case nir_op_ineg:173inst.src[0] = etna_immediate_int(0);174src[0].neg = 1;175break;176default:177break;178}179180/* set the "true" value for CMP instructions */181if (inst.opcode == INST_OPCODE_CMP)182inst.src[2] = etna_immediate_int(-1);183184for (unsigned j = 0; j < 3; j++) {185unsigned i = ((ei.src >> j*2) & 3);186if (i < 3)187inst.src[j] = src[i];188}189190emit_inst(c, &inst);191}192193void194etna_emit_tex(struct etna_compile *c, nir_texop op, unsigned texid, unsigned dst_swiz,195struct etna_inst_dst dst, struct etna_inst_src coord,196struct etna_inst_src lod_bias, struct etna_inst_src compare)197{198struct etna_inst inst = {199.dst = dst,200.tex.id = texid + (is_fs(c) ? 0 : c->specs->vertex_sampler_offset),201.tex.swiz = dst_swiz,202.src[0] = coord,203};204205if (lod_bias.use)206inst.src[1] = lod_bias;207208if (compare.use)209inst.src[2] = compare;210211switch (op) {212case nir_texop_tex: inst.opcode = INST_OPCODE_TEXLD; break;213case nir_texop_txb: inst.opcode = INST_OPCODE_TEXLDB; break;214case nir_texop_txl: inst.opcode = INST_OPCODE_TEXLDL; break;215default:216compile_error(c, "Unhandled NIR tex type: %d\n", op);217}218219emit_inst(c, &inst);220}221222void223etna_emit_jump(struct etna_compile *c, unsigned block, struct etna_inst_src condition)224{225if (!condition.use) {226emit_inst(c, &(struct etna_inst) {.opcode = INST_OPCODE_BRANCH, .imm = block });227return;228}229230struct etna_inst inst = {231.opcode = INST_OPCODE_BRANCH,232.cond = INST_CONDITION_NOT,233.type = INST_TYPE_U32,234.src[0] = condition,235.imm = block,236};237inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3);238emit_inst(c, &inst);239}240241void242etna_emit_discard(struct etna_compile *c, struct etna_inst_src condition)243{244if (!condition.use) {245emit_inst(c, &(struct etna_inst) { .opcode = INST_OPCODE_TEXKILL });246return;247}248249struct etna_inst inst = {250.opcode = INST_OPCODE_TEXKILL,251.cond = INST_CONDITION_NZ,252.type = (c->specs->halti < 2) ? INST_TYPE_F32 : INST_TYPE_U32,253.src[0] = condition,254};255inst.src[0].swiz = INST_SWIZ_BROADCAST(inst.src[0].swiz & 3);256emit_inst(c, &inst);257}258259260