Path: blob/21.2-virgl/src/gallium/auxiliary/tgsi/tgsi_lowering.c
4565 views
/*1* Copyright (C) 2014 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors:23* Rob Clark <[email protected]>24*/2526#include "tgsi/tgsi_transform.h"27#include "tgsi/tgsi_scan.h"28#include "tgsi/tgsi_dump.h"2930#include "util/compiler.h"31#include "util/u_debug.h"32#include "util/u_math.h"3334#include "tgsi_lowering.h"3536struct tgsi_lowering_context {37struct tgsi_transform_context base;38const struct tgsi_lowering_config *config;39struct tgsi_shader_info *info;40unsigned two_side_colors;41unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];42unsigned color_base; /* base register for chosen COLOR/BCOLOR's */43int face_idx;44unsigned numtmp;45struct {46struct tgsi_full_src_register src;47struct tgsi_full_dst_register dst;48} tmp[2];49#define A 050#define B 151struct tgsi_full_src_register imm;52int emitted_decls;53unsigned saturate;54};5556static inline struct tgsi_lowering_context *57tgsi_lowering_context(struct tgsi_transform_context *tctx)58{59return (struct tgsi_lowering_context *)tctx;60}6162/*63* Utility helpers:64*/6566static void67reg_dst(struct tgsi_full_dst_register *dst,68const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)69{70*dst = *orig_dst;71dst->Register.WriteMask &= wrmask;72assert(dst->Register.WriteMask);73}7475static inline void76get_swiz(unsigned *swiz, const struct tgsi_src_register *src)77{78swiz[0] = src->SwizzleX;79swiz[1] = src->SwizzleY;80swiz[2] = src->SwizzleZ;81swiz[3] = src->SwizzleW;82}8384static void85reg_src(struct tgsi_full_src_register *src,86const struct tgsi_full_src_register *orig_src,87unsigned sx, unsigned sy, unsigned sz, unsigned sw)88{89unsigned swiz[4];90get_swiz(swiz, &orig_src->Register);91*src = *orig_src;92src->Register.SwizzleX = swiz[sx];93src->Register.SwizzleY = swiz[sy];94src->Register.SwizzleZ = swiz[sz];95src->Register.SwizzleW = swiz[sw];96}9798#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */99#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \100TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w101102/*103* if (dst.x aliases src.x) {104* MOV tmpA.x, src.x105* src = tmpA106* }107* COS dst.x, src.x108* SIN dst.y, src.x109* MOV dst.zw, imm{0.0, 1.0}110*/111static bool112aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,113const struct tgsi_full_src_register *src, unsigned src_mask)114{115if ((dst->Register.File == src->Register.File) &&116(dst->Register.Index == src->Register.Index)) {117unsigned i, actual_mask = 0;118unsigned swiz[4];119get_swiz(swiz, &src->Register);120for (i = 0; i < 4; i++)121if (src_mask & (1 << i))122actual_mask |= (1 << swiz[i]);123if (actual_mask & dst_mask)124return true;125}126return false;127}128129static void130create_mov(struct tgsi_transform_context *tctx,131const struct tgsi_full_dst_register *dst,132const struct tgsi_full_src_register *src,133unsigned mask, unsigned saturate)134{135struct tgsi_full_instruction new_inst;136137new_inst = tgsi_default_full_instruction();138new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;139new_inst.Instruction.Saturate = saturate;140new_inst.Instruction.NumDstRegs = 1;141reg_dst(&new_inst.Dst[0], dst, mask);142new_inst.Instruction.NumSrcRegs = 1;143reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));144tctx->emit_instruction(tctx, &new_inst);145}146147/* to help calculate # of tgsi tokens for a lowering.. we assume148* the worst case, ie. removed instructions don't have ADDR[] or149* anything which increases the # of tokens per src/dst and the150* inserted instructions do.151*152* OINST() - old instruction153* 1 : instruction itself154* 1 : dst155* 1 * nargs : srcN156*157* NINST() - new instruction158* 1 : instruction itself159* 2 : dst160* 2 * nargs : srcN161*/162163#define OINST(nargs) (1 + 1 + 1 * (nargs))164#define NINST(nargs) (1 + 2 + 2 * (nargs))165166/*167* Lowering Translators:168*/169170/* DST - Distance Vector171* dst.x = 1.0172* dst.y = src0.y \times src1.y173* dst.z = src0.z174* dst.w = src1.w175*176* ; note: could be more clever and use just a single temp177* ; if I was clever enough to re-write the swizzles.178* ; needs: 2 tmp, imm{1.0}179* if (dst.y aliases src0.z) {180* MOV tmpA.yz, src0.yz181* src0 = tmpA182* }183* if (dst.yz aliases src1.w) {184* MOV tmpB.yw, src1.yw185* src1 = tmpB186* }187* MUL dst.y, src0.y, src1.y188* MOV dst.z, src0.z189* MOV dst.w, src1.w190* MOV dst.x, imm{1.0}191*/192#define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \193NINST(1) + NINST(1) - OINST(2))194#define DST_TMP 2195static void196transform_dst(struct tgsi_transform_context *tctx,197struct tgsi_full_instruction *inst)198{199struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);200struct tgsi_full_dst_register *dst = &inst->Dst[0];201struct tgsi_full_src_register *src0 = &inst->Src[0];202struct tgsi_full_src_register *src1 = &inst->Src[1];203struct tgsi_full_instruction new_inst;204205if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {206create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);207src0 = &ctx->tmp[A].src;208}209210if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {211create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);212src1 = &ctx->tmp[B].src;213}214215if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {216/* MUL dst.y, src0.y, src1.y */217new_inst = tgsi_default_full_instruction();218new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;219new_inst.Instruction.NumDstRegs = 1;220reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);221new_inst.Instruction.NumSrcRegs = 2;222reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));223reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));224tctx->emit_instruction(tctx, &new_inst);225}226227if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {228/* MOV dst.z, src0.z */229new_inst = tgsi_default_full_instruction();230new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;231new_inst.Instruction.NumDstRegs = 1;232reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);233new_inst.Instruction.NumSrcRegs = 1;234reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));235tctx->emit_instruction(tctx, &new_inst);236}237238if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {239/* MOV dst.w, src1.w */240new_inst = tgsi_default_full_instruction();241new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;242new_inst.Instruction.NumDstRegs = 1;243reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);244new_inst.Instruction.NumSrcRegs = 1;245reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));246tctx->emit_instruction(tctx, &new_inst);247}248249if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {250/* MOV dst.x, imm{1.0} */251new_inst = tgsi_default_full_instruction();252new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;253new_inst.Instruction.NumDstRegs = 1;254reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);255new_inst.Instruction.NumSrcRegs = 1;256reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));257tctx->emit_instruction(tctx, &new_inst);258}259}260261/* LRP - Linear Interpolate262* dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x263* dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y264* dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z265* dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w266*267* This becomes: src0 \times src1 + src2 - src0 \times src2, which268* can then become: src0 \times src1 - (src0 \times src2 - src2)269*270* ; needs: 1 tmp271* MAD tmpA, src0, src2, -src2272* MAD dst, src0, src1, -tmpA273*/274#define LRP_GROW (NINST(3) + NINST(3) - OINST(3))275#define LRP_TMP 1276static void277transform_lrp(struct tgsi_transform_context *tctx,278struct tgsi_full_instruction *inst)279{280struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);281struct tgsi_full_dst_register *dst = &inst->Dst[0];282struct tgsi_full_src_register *src0 = &inst->Src[0];283struct tgsi_full_src_register *src1 = &inst->Src[1];284struct tgsi_full_src_register *src2 = &inst->Src[2];285struct tgsi_full_instruction new_inst;286287if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {288/* MAD tmpA, src0, src2, -src2 */289new_inst = tgsi_default_full_instruction();290new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;291new_inst.Instruction.NumDstRegs = 1;292reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);293new_inst.Instruction.NumSrcRegs = 3;294reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));295reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));296reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));297new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;298tctx->emit_instruction(tctx, &new_inst);299300/* MAD dst, src0, src1, -tmpA */301new_inst = tgsi_default_full_instruction();302new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;303new_inst.Instruction.NumDstRegs = 1;304reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);305new_inst.Instruction.NumSrcRegs = 3;306reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));307reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));308reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));309new_inst.Src[2].Register.Negate = true;310tctx->emit_instruction(tctx, &new_inst);311}312}313314/* FRC - Fraction315* dst.x = src.x - \lfloor src.x\rfloor316* dst.y = src.y - \lfloor src.y\rfloor317* dst.z = src.z - \lfloor src.z\rfloor318* dst.w = src.w - \lfloor src.w\rfloor319*320* ; needs: 1 tmp321* FLR tmpA, src322* SUB dst, src, tmpA323*/324#define FRC_GROW (NINST(1) + NINST(2) - OINST(1))325#define FRC_TMP 1326static void327transform_frc(struct tgsi_transform_context *tctx,328struct tgsi_full_instruction *inst)329{330struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);331struct tgsi_full_dst_register *dst = &inst->Dst[0];332struct tgsi_full_src_register *src = &inst->Src[0];333struct tgsi_full_instruction new_inst;334335if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {336/* FLR tmpA, src */337new_inst = tgsi_default_full_instruction();338new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;339new_inst.Instruction.NumDstRegs = 1;340reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);341new_inst.Instruction.NumSrcRegs = 1;342reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));343tctx->emit_instruction(tctx, &new_inst);344345/* SUB dst, src, tmpA */346new_inst = tgsi_default_full_instruction();347new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;348new_inst.Instruction.NumDstRegs = 1;349reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);350new_inst.Instruction.NumSrcRegs = 2;351reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));352reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));353new_inst.Src[1].Register.Negate = 1;354tctx->emit_instruction(tctx, &new_inst);355}356}357358/* POW - Power359* dst.x = src0.x^{src1.x}360* dst.y = src0.x^{src1.x}361* dst.z = src0.x^{src1.x}362* dst.w = src0.x^{src1.x}363*364* ; needs: 1 tmp365* LG2 tmpA.x, src0.x366* MUL tmpA.x, src1.x, tmpA.x367* EX2 dst, tmpA.x368*/369#define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))370#define POW_TMP 1371static void372transform_pow(struct tgsi_transform_context *tctx,373struct tgsi_full_instruction *inst)374{375struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);376struct tgsi_full_dst_register *dst = &inst->Dst[0];377struct tgsi_full_src_register *src0 = &inst->Src[0];378struct tgsi_full_src_register *src1 = &inst->Src[1];379struct tgsi_full_instruction new_inst;380381if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {382/* LG2 tmpA.x, src0.x */383new_inst = tgsi_default_full_instruction();384new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;385new_inst.Instruction.NumDstRegs = 1;386reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);387new_inst.Instruction.NumSrcRegs = 1;388reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));389tctx->emit_instruction(tctx, &new_inst);390391/* MUL tmpA.x, src1.x, tmpA.x */392new_inst = tgsi_default_full_instruction();393new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;394new_inst.Instruction.NumDstRegs = 1;395reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);396new_inst.Instruction.NumSrcRegs = 2;397reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));398reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));399tctx->emit_instruction(tctx, &new_inst);400401/* EX2 dst, tmpA.x */402new_inst = tgsi_default_full_instruction();403new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;404new_inst.Instruction.NumDstRegs = 1;405reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);406new_inst.Instruction.NumSrcRegs = 1;407reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));408tctx->emit_instruction(tctx, &new_inst);409}410}411412/* LIT - Light Coefficients413* dst.x = 1.0414* dst.y = max(src.x, 0.0)415* dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0416* dst.w = 1.0417*418* ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}419* MAX tmpA.xy, src.xy, imm{0.0}420* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}421* LG2 tmpA.y, tmpA.y422* MUL tmpA.y, tmpA.z, tmpA.y423* EX2 tmpA.y, tmpA.y424* CMP tmpA.y, -src.x, tmpA.y, imm{0.0}425* MOV dst.yz, tmpA.xy426* MOV dst.xw, imm{1.0}427*/428#define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \429NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))430#define LIT_TMP 1431static void432transform_lit(struct tgsi_transform_context *tctx,433struct tgsi_full_instruction *inst)434{435struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);436struct tgsi_full_dst_register *dst = &inst->Dst[0];437struct tgsi_full_src_register *src = &inst->Src[0];438struct tgsi_full_instruction new_inst;439440if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {441/* MAX tmpA.xy, src.xy, imm{0.0} */442new_inst = tgsi_default_full_instruction();443new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;444new_inst.Instruction.NumDstRegs = 1;445reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);446new_inst.Instruction.NumSrcRegs = 2;447reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));448reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));449tctx->emit_instruction(tctx, &new_inst);450451/* MIN tmpA.z, src.w, imm{128.0} */452new_inst = tgsi_default_full_instruction();453new_inst.Instruction.Opcode = TGSI_OPCODE_MIN;454new_inst.Instruction.NumDstRegs = 1;455reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);456new_inst.Instruction.NumSrcRegs = 2;457reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));458reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));459tctx->emit_instruction(tctx, &new_inst);460461/* MAX tmpA.z, tmpA.z, -imm{128.0} */462new_inst = tgsi_default_full_instruction();463new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;464new_inst.Instruction.NumDstRegs = 1;465reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);466new_inst.Instruction.NumSrcRegs = 2;467reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _));468reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));469new_inst.Src[1].Register.Negate = true;470tctx->emit_instruction(tctx, &new_inst);471472/* LG2 tmpA.y, tmpA.y */473new_inst = tgsi_default_full_instruction();474new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;475new_inst.Instruction.NumDstRegs = 1;476reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);477new_inst.Instruction.NumSrcRegs = 1;478reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));479tctx->emit_instruction(tctx, &new_inst);480481/* MUL tmpA.y, tmpA.z, tmpA.y */482new_inst = tgsi_default_full_instruction();483new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;484new_inst.Instruction.NumDstRegs = 1;485reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);486new_inst.Instruction.NumSrcRegs = 2;487reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));488reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));489tctx->emit_instruction(tctx, &new_inst);490491/* EX2 tmpA.y, tmpA.y */492new_inst = tgsi_default_full_instruction();493new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;494new_inst.Instruction.NumDstRegs = 1;495reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);496new_inst.Instruction.NumSrcRegs = 1;497reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));498tctx->emit_instruction(tctx, &new_inst);499500/* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */501new_inst = tgsi_default_full_instruction();502new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;503new_inst.Instruction.NumDstRegs = 1;504reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);505new_inst.Instruction.NumSrcRegs = 3;506reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));507new_inst.Src[0].Register.Negate = true;508reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));509reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));510tctx->emit_instruction(tctx, &new_inst);511512/* MOV dst.yz, tmpA.xy */513new_inst = tgsi_default_full_instruction();514new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;515new_inst.Instruction.NumDstRegs = 1;516reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);517new_inst.Instruction.NumSrcRegs = 1;518reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));519tctx->emit_instruction(tctx, &new_inst);520}521522if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {523/* MOV dst.xw, imm{1.0} */524new_inst = tgsi_default_full_instruction();525new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;526new_inst.Instruction.NumDstRegs = 1;527reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);528new_inst.Instruction.NumSrcRegs = 1;529reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));530tctx->emit_instruction(tctx, &new_inst);531}532}533534/* EXP - Approximate Exponential Base 2535* dst.x = 2^{\lfloor src.x\rfloor}536* dst.y = src.x - \lfloor src.x\rfloor537* dst.z = 2^{src.x}538* dst.w = 1.0539*540* ; needs: 1 tmp, imm{1.0}541* if (lowering FLR) {542* FRC tmpA.x, src.x543* SUB tmpA.x, src.x, tmpA.x544* } else {545* FLR tmpA.x, src.x546* }547* EX2 tmpA.y, src.x548* SUB dst.y, src.x, tmpA.x549* EX2 dst.x, tmpA.x550* MOV dst.z, tmpA.y551* MOV dst.w, imm{1.0}552*/553#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \554NINST(1)+ NINST(1) - OINST(1))555#define EXP_TMP 1556static void557transform_exp(struct tgsi_transform_context *tctx,558struct tgsi_full_instruction *inst)559{560struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);561struct tgsi_full_dst_register *dst = &inst->Dst[0];562struct tgsi_full_src_register *src = &inst->Src[0];563struct tgsi_full_instruction new_inst;564565if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {566if (ctx->config->lower_FLR) {567/* FRC tmpA.x, src.x */568new_inst = tgsi_default_full_instruction();569new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;570new_inst.Instruction.NumDstRegs = 1;571reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);572new_inst.Instruction.NumSrcRegs = 1;573reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));574tctx->emit_instruction(tctx, &new_inst);575576/* SUB tmpA.x, src.x, tmpA.x */577new_inst = tgsi_default_full_instruction();578new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;579new_inst.Instruction.NumDstRegs = 1;580reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);581new_inst.Instruction.NumSrcRegs = 2;582reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));583reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));584new_inst.Src[1].Register.Negate = 1;585tctx->emit_instruction(tctx, &new_inst);586} else {587/* FLR tmpA.x, src.x */588new_inst = tgsi_default_full_instruction();589new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;590new_inst.Instruction.NumDstRegs = 1;591reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);592new_inst.Instruction.NumSrcRegs = 1;593reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));594tctx->emit_instruction(tctx, &new_inst);595}596}597598if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {599/* EX2 tmpA.y, src.x */600new_inst = tgsi_default_full_instruction();601new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;602new_inst.Instruction.NumDstRegs = 1;603reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);604new_inst.Instruction.NumSrcRegs = 1;605reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));606tctx->emit_instruction(tctx, &new_inst);607}608609if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {610/* SUB dst.y, src.x, tmpA.x */611new_inst = tgsi_default_full_instruction();612new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;613new_inst.Instruction.NumDstRegs = 1;614reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);615new_inst.Instruction.NumSrcRegs = 2;616reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));617reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));618new_inst.Src[1].Register.Negate = 1;619tctx->emit_instruction(tctx, &new_inst);620}621622if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {623/* EX2 dst.x, tmpA.x */624new_inst = tgsi_default_full_instruction();625new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;626new_inst.Instruction.NumDstRegs = 1;627reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);628new_inst.Instruction.NumSrcRegs = 1;629reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));630tctx->emit_instruction(tctx, &new_inst);631}632633if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {634/* MOV dst.z, tmpA.y */635new_inst = tgsi_default_full_instruction();636new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;637new_inst.Instruction.NumDstRegs = 1;638reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);639new_inst.Instruction.NumSrcRegs = 1;640reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));641tctx->emit_instruction(tctx, &new_inst);642}643644if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {645/* MOV dst.w, imm{1.0} */646new_inst = tgsi_default_full_instruction();647new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;648new_inst.Instruction.NumDstRegs = 1;649reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);650new_inst.Instruction.NumSrcRegs = 1;651reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));652tctx->emit_instruction(tctx, &new_inst);653}654}655656/* LOG - Approximate Logarithm Base 2657* dst.x = \lfloor\log_2{|src.x|}\rfloor658* dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}659* dst.z = \log_2{|src.x|}660* dst.w = 1.0661*662* ; needs: 1 tmp, imm{1.0}663* LG2 tmpA.x, |src.x|664* if (lowering FLR) {665* FRC tmpA.y, tmpA.x666* SUB tmpA.y, tmpA.x, tmpA.y667* } else {668* FLR tmpA.y, tmpA.x669* }670* EX2 tmpA.z, tmpA.y671* RCP tmpA.z, tmpA.z672* MUL dst.y, |src.x|, tmpA.z673* MOV dst.xz, tmpA.yx674* MOV dst.w, imm{1.0}675*/676#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \677NINST(2) + NINST(1) + NINST(1) - OINST(1))678#define LOG_TMP 1679static void680transform_log(struct tgsi_transform_context *tctx,681struct tgsi_full_instruction *inst)682{683struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);684struct tgsi_full_dst_register *dst = &inst->Dst[0];685struct tgsi_full_src_register *src = &inst->Src[0];686struct tgsi_full_instruction new_inst;687688if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {689/* LG2 tmpA.x, |src.x| */690new_inst = tgsi_default_full_instruction();691new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;692new_inst.Instruction.NumDstRegs = 1;693reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);694new_inst.Instruction.NumSrcRegs = 1;695reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));696new_inst.Src[0].Register.Absolute = true;697tctx->emit_instruction(tctx, &new_inst);698}699700if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {701if (ctx->config->lower_FLR) {702/* FRC tmpA.y, tmpA.x */703new_inst = tgsi_default_full_instruction();704new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;705new_inst.Instruction.NumDstRegs = 1;706reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);707new_inst.Instruction.NumSrcRegs = 1;708reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));709tctx->emit_instruction(tctx, &new_inst);710711/* SUB tmpA.y, tmpA.x, tmpA.y */712new_inst = tgsi_default_full_instruction();713new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;714new_inst.Instruction.NumDstRegs = 1;715reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);716new_inst.Instruction.NumSrcRegs = 2;717reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));718reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));719new_inst.Src[1].Register.Negate = 1;720tctx->emit_instruction(tctx, &new_inst);721} else {722/* FLR tmpA.y, tmpA.x */723new_inst = tgsi_default_full_instruction();724new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;725new_inst.Instruction.NumDstRegs = 1;726reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);727new_inst.Instruction.NumSrcRegs = 1;728reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));729tctx->emit_instruction(tctx, &new_inst);730}731}732733if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {734/* EX2 tmpA.z, tmpA.y */735new_inst = tgsi_default_full_instruction();736new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;737new_inst.Instruction.NumDstRegs = 1;738reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);739new_inst.Instruction.NumSrcRegs = 1;740reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));741tctx->emit_instruction(tctx, &new_inst);742743/* RCP tmpA.z, tmpA.z */744new_inst = tgsi_default_full_instruction();745new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;746new_inst.Instruction.NumDstRegs = 1;747reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);748new_inst.Instruction.NumSrcRegs = 1;749reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));750tctx->emit_instruction(tctx, &new_inst);751752/* MUL dst.y, |src.x|, tmpA.z */753new_inst = tgsi_default_full_instruction();754new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;755new_inst.Instruction.NumDstRegs = 1;756reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);757new_inst.Instruction.NumSrcRegs = 2;758reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));759new_inst.Src[0].Register.Absolute = true;760reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));761tctx->emit_instruction(tctx, &new_inst);762}763764if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {765/* MOV dst.xz, tmpA.yx */766new_inst = tgsi_default_full_instruction();767new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;768new_inst.Instruction.NumDstRegs = 1;769reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);770new_inst.Instruction.NumSrcRegs = 1;771reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));772tctx->emit_instruction(tctx, &new_inst);773}774775if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {776/* MOV dst.w, imm{1.0} */777new_inst = tgsi_default_full_instruction();778new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;779new_inst.Instruction.NumDstRegs = 1;780reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);781new_inst.Instruction.NumSrcRegs = 1;782reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));783tctx->emit_instruction(tctx, &new_inst);784}785}786787/* DP4 - 4-component Dot Product788* dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w789*790* DP3 - 3-component Dot Product791* dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z792*793* DP2 - 2-component Dot Product794* dst = src0.x \times src1.x + src0.y \times src1.y795*796* NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar797* operations, which is what you'd prefer for a ISA that is natively798* scalar. Probably a native vector ISA would at least already have799* DP4/DP3 instructions, but perhaps there is room for an alternative800* translation for DP2 using vector instructions.801*802* ; needs: 1 tmp803* MUL tmpA.x, src0.x, src1.x804* MAD tmpA.x, src0.y, src1.y, tmpA.x805* if (DP3 || DP4) {806* MAD tmpA.x, src0.z, src1.z, tmpA.x807* if (DP4) {808* MAD tmpA.x, src0.w, src1.w, tmpA.x809* }810* }811* ; fixup last instruction to replicate into dst812*/813#define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))814#define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))815#define DP2_GROW (NINST(2) + NINST(3) - OINST(2))816#define DOTP_TMP 1817static void818transform_dotp(struct tgsi_transform_context *tctx,819struct tgsi_full_instruction *inst)820{821struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);822struct tgsi_full_dst_register *dst = &inst->Dst[0];823struct tgsi_full_src_register *src0 = &inst->Src[0];824struct tgsi_full_src_register *src1 = &inst->Src[1];825struct tgsi_full_instruction new_inst;826enum tgsi_opcode opcode = inst->Instruction.Opcode;827828/* NOTE: any potential last instruction must replicate src on all829* components (since it could be re-written to write to final dst)830*/831832if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {833/* MUL tmpA.x, src0.x, src1.x */834new_inst = tgsi_default_full_instruction();835new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;836new_inst.Instruction.NumDstRegs = 1;837reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);838new_inst.Instruction.NumSrcRegs = 2;839reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));840reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));841tctx->emit_instruction(tctx, &new_inst);842843/* MAD tmpA.x, src0.y, src1.y, tmpA.x */844new_inst = tgsi_default_full_instruction();845new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;846new_inst.Instruction.NumDstRegs = 1;847reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);848new_inst.Instruction.NumSrcRegs = 3;849reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));850reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));851reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));852853if ((opcode == TGSI_OPCODE_DP3) ||854(opcode == TGSI_OPCODE_DP4)) {855tctx->emit_instruction(tctx, &new_inst);856857/* MAD tmpA.x, src0.z, src1.z, tmpA.x */858new_inst = tgsi_default_full_instruction();859new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;860new_inst.Instruction.NumDstRegs = 1;861reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);862new_inst.Instruction.NumSrcRegs = 3;863reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));864reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));865reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));866867if (opcode == TGSI_OPCODE_DP4) {868tctx->emit_instruction(tctx, &new_inst);869870/* MAD tmpA.x, src0.w, src1.w, tmpA.x */871new_inst = tgsi_default_full_instruction();872new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;873new_inst.Instruction.NumDstRegs = 1;874reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);875new_inst.Instruction.NumSrcRegs = 3;876reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));877reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));878reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));879}880}881882/* fixup last instruction to write to dst: */883reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);884885tctx->emit_instruction(tctx, &new_inst);886}887}888889/* FLR - floor, CEIL - ceil890* ; needs: 1 tmp891* if (CEIL) {892* FRC tmpA, -src893* ADD dst, src, tmpA894* } else {895* FRC tmpA, src896* SUB dst, src, tmpA897* }898*/899#define FLR_GROW (NINST(1) + NINST(2) - OINST(1))900#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))901#define FLR_TMP 1902#define CEIL_TMP 1903static void904transform_flr_ceil(struct tgsi_transform_context *tctx,905struct tgsi_full_instruction *inst)906{907struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);908struct tgsi_full_dst_register *dst = &inst->Dst[0];909struct tgsi_full_src_register *src0 = &inst->Src[0];910struct tgsi_full_instruction new_inst;911enum tgsi_opcode opcode = inst->Instruction.Opcode;912913if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {914/* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */915new_inst = tgsi_default_full_instruction();916new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;917new_inst.Instruction.NumDstRegs = 1;918reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);919new_inst.Instruction.NumSrcRegs = 1;920reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));921922if (opcode == TGSI_OPCODE_CEIL)923new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;924tctx->emit_instruction(tctx, &new_inst);925926/* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */927new_inst = tgsi_default_full_instruction();928new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;929new_inst.Instruction.NumDstRegs = 1;930reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);931new_inst.Instruction.NumSrcRegs = 2;932reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));933reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));934if (opcode == TGSI_OPCODE_FLR)935new_inst.Src[1].Register.Negate = 1;936tctx->emit_instruction(tctx, &new_inst);937}938}939940/* TRUNC - truncate off fractional part941* dst.x = trunc(src.x)942* dst.y = trunc(src.y)943* dst.z = trunc(src.z)944* dst.w = trunc(src.w)945*946* ; needs: 1 tmp947* if (lower FLR) {948* FRC tmpA, |src|949* SUB tmpA, |src|, tmpA950* } else {951* FLR tmpA, |src|952* }953* CMP dst, src, -tmpA, tmpA954*/955#define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))956#define TRUNC_TMP 1957static void958transform_trunc(struct tgsi_transform_context *tctx,959struct tgsi_full_instruction *inst)960{961struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);962struct tgsi_full_dst_register *dst = &inst->Dst[0];963struct tgsi_full_src_register *src0 = &inst->Src[0];964struct tgsi_full_instruction new_inst;965966if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {967if (ctx->config->lower_FLR) {968new_inst = tgsi_default_full_instruction();969new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;970new_inst.Instruction.NumDstRegs = 1;971reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);972new_inst.Instruction.NumSrcRegs = 1;973reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));974new_inst.Src[0].Register.Absolute = true;975new_inst.Src[0].Register.Negate = false;976tctx->emit_instruction(tctx, &new_inst);977978new_inst = tgsi_default_full_instruction();979new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;980new_inst.Instruction.NumDstRegs = 1;981reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);982new_inst.Instruction.NumSrcRegs = 2;983reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));984new_inst.Src[0].Register.Absolute = true;985new_inst.Src[0].Register.Negate = false;986reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));987new_inst.Src[1].Register.Negate = 1;988tctx->emit_instruction(tctx, &new_inst);989} else {990new_inst = tgsi_default_full_instruction();991new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;992new_inst.Instruction.NumDstRegs = 1;993reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);994new_inst.Instruction.NumSrcRegs = 1;995reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));996new_inst.Src[0].Register.Absolute = true;997new_inst.Src[0].Register.Negate = false;998tctx->emit_instruction(tctx, &new_inst);999}10001001new_inst = tgsi_default_full_instruction();1002new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;1003new_inst.Instruction.NumDstRegs = 1;1004reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);1005new_inst.Instruction.NumSrcRegs = 3;1006reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));1007reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));1008new_inst.Src[1].Register.Negate = true;1009reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));1010tctx->emit_instruction(tctx, &new_inst);1011}1012}10131014/* Inserts a MOV_SAT for the needed components of tex coord. Note that1015* in the case of TXP, the clamping must happen *after* projection, so1016* we need to lower TXP to TEX.1017*1018* MOV tmpA, src01019* if (opc == TXP) {1020* ; do perspective division manually before clamping:1021* RCP tmpB, tmpA.w1022* MUL tmpB.<pmask>, tmpA, tmpB.xxxx1023* opc = TEX;1024* }1025* MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords1026* <opc> dst, tmpA, ...1027*/1028#define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))1029#define SAMP_TMP 21030static int1031transform_samp(struct tgsi_transform_context *tctx,1032struct tgsi_full_instruction *inst)1033{1034struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);1035struct tgsi_full_src_register *coord = &inst->Src[0];1036struct tgsi_full_src_register *samp;1037struct tgsi_full_instruction new_inst;1038/* mask is clamped coords, pmask is all coords (for projection): */1039unsigned mask = 0, pmask = 0, smask;1040unsigned tex = inst->Texture.Texture;1041enum tgsi_opcode opcode = inst->Instruction.Opcode;1042bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&1043(ctx->config->lower_TXP & (1 << tex));10441045if (opcode == TGSI_OPCODE_TXB2) {1046samp = &inst->Src[2];1047} else {1048samp = &inst->Src[1];1049}10501051/* convert sampler # to bitmask to test: */1052smask = 1 << samp->Register.Index;10531054/* check if we actually need to lower this one: */1055if (!(ctx->saturate & smask) && !lower_txp)1056return -1;10571058/* figure out which coordinates need saturating:1059* - RECT textures should not get saturated1060* - array index coords should not get saturated1061*/1062switch (tex) {1063case TGSI_TEXTURE_3D:1064case TGSI_TEXTURE_CUBE:1065case TGSI_TEXTURE_CUBE_ARRAY:1066case TGSI_TEXTURE_SHADOWCUBE:1067case TGSI_TEXTURE_SHADOWCUBE_ARRAY:1068if (ctx->config->saturate_r & smask)1069mask |= TGSI_WRITEMASK_Z;1070pmask |= TGSI_WRITEMASK_Z;1071FALLTHROUGH;10721073case TGSI_TEXTURE_2D:1074case TGSI_TEXTURE_2D_ARRAY:1075case TGSI_TEXTURE_SHADOW2D:1076case TGSI_TEXTURE_SHADOW2D_ARRAY:1077case TGSI_TEXTURE_2D_MSAA:1078case TGSI_TEXTURE_2D_ARRAY_MSAA:1079if (ctx->config->saturate_t & smask)1080mask |= TGSI_WRITEMASK_Y;1081pmask |= TGSI_WRITEMASK_Y;1082FALLTHROUGH;10831084case TGSI_TEXTURE_1D:1085case TGSI_TEXTURE_1D_ARRAY:1086case TGSI_TEXTURE_SHADOW1D:1087case TGSI_TEXTURE_SHADOW1D_ARRAY:1088if (ctx->config->saturate_s & smask)1089mask |= TGSI_WRITEMASK_X;1090pmask |= TGSI_WRITEMASK_X;1091break;10921093case TGSI_TEXTURE_RECT:1094case TGSI_TEXTURE_SHADOWRECT:1095/* we don't saturate, but in case of lower_txp we1096* still need to do the perspective divide:1097*/1098pmask = TGSI_WRITEMASK_XY;1099break;1100}11011102/* sanity check.. driver could be asking to saturate a non-1103* existent coordinate component:1104*/1105if (!mask && !lower_txp)1106return -1;11071108/* MOV tmpA, src0 */1109create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);11101111/* This is a bit sad.. we need to clamp *after* the coords1112* are projected, which means lowering TXP to TEX and doing1113* the projection ourself. But since I haven't figured out1114* how to make the lowering code deliver an electric shock1115* to anyone using GL_CLAMP, we must do this instead:1116*/1117if (opcode == TGSI_OPCODE_TXP) {1118/* RCP tmpB.x tmpA.w */1119new_inst = tgsi_default_full_instruction();1120new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;1121new_inst.Instruction.NumDstRegs = 1;1122reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);1123new_inst.Instruction.NumSrcRegs = 1;1124reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));1125tctx->emit_instruction(tctx, &new_inst);11261127/* MUL tmpA.mask, tmpA, tmpB.xxxx */1128new_inst = tgsi_default_full_instruction();1129new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;1130new_inst.Instruction.NumDstRegs = 1;1131reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);1132new_inst.Instruction.NumSrcRegs = 2;1133reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));1134reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));1135tctx->emit_instruction(tctx, &new_inst);11361137opcode = TGSI_OPCODE_TEX;1138}11391140/* MOV_SAT tmpA.<mask>, tmpA */1141if (mask) {1142create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);1143}11441145/* modify the texture samp instruction to take fixed up coord: */1146new_inst = *inst;1147new_inst.Instruction.Opcode = opcode;1148new_inst.Src[0] = ctx->tmp[A].src;1149tctx->emit_instruction(tctx, &new_inst);11501151return 0;1152}11531154/* Two-sided color emulation:1155* For each COLOR input, create a corresponding BCOLOR input, plus1156* CMP instruction to select front or back color based on FACE1157*/1158#define TWOSIDE_GROW(n) ( \11592 + /* FACE */ \1160((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\1161((n) * 1) + /* TEMP[] */ \1162((n) * NINST(3)) /* CMP instr */ \1163)11641165static void1166emit_twoside(struct tgsi_transform_context *tctx)1167{1168struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);1169struct tgsi_shader_info *info = ctx->info;1170struct tgsi_full_declaration decl;1171struct tgsi_full_instruction new_inst;1172unsigned inbase, tmpbase;1173unsigned i;11741175inbase = info->file_max[TGSI_FILE_INPUT] + 1;1176tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;11771178/* additional inputs for BCOLOR's */1179for (i = 0; i < ctx->two_side_colors; i++) {1180unsigned in_idx = ctx->two_side_idx[i];1181decl = tgsi_default_full_declaration();1182decl.Declaration.File = TGSI_FILE_INPUT;1183decl.Declaration.Semantic = true;1184decl.Range.First = decl.Range.Last = inbase + i;1185decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;1186decl.Semantic.Index = info->input_semantic_index[in_idx];1187decl.Declaration.Interpolate = true;1188decl.Interp.Interpolate = info->input_interpolate[in_idx];1189decl.Interp.Location = info->input_interpolate_loc[in_idx];1190decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx];1191tctx->emit_declaration(tctx, &decl);1192}11931194/* additional input for FACE */1195if (ctx->two_side_colors && (ctx->face_idx == -1)) {1196decl = tgsi_default_full_declaration();1197decl.Declaration.File = TGSI_FILE_INPUT;1198decl.Declaration.Semantic = true;1199decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;1200decl.Semantic.Name = TGSI_SEMANTIC_FACE;1201decl.Semantic.Index = 0;1202tctx->emit_declaration(tctx, &decl);12031204ctx->face_idx = decl.Range.First;1205}12061207/* additional temps for COLOR/BCOLOR selection: */1208for (i = 0; i < ctx->two_side_colors; i++) {1209decl = tgsi_default_full_declaration();1210decl.Declaration.File = TGSI_FILE_TEMPORARY;1211decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;1212tctx->emit_declaration(tctx, &decl);1213}12141215/* and finally additional instructions to select COLOR/BCOLOR: */1216for (i = 0; i < ctx->two_side_colors; i++) {1217new_inst = tgsi_default_full_instruction();1218new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;12191220new_inst.Instruction.NumDstRegs = 1;1221new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;1222new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;1223new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;12241225new_inst.Instruction.NumSrcRegs = 3;1226new_inst.Src[0].Register.File = TGSI_FILE_INPUT;1227new_inst.Src[0].Register.Index = ctx->face_idx;1228new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;1229new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;1230new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;1231new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;1232new_inst.Src[1].Register.File = TGSI_FILE_INPUT;1233new_inst.Src[1].Register.Index = inbase + i;1234new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;1235new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;1236new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;1237new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;1238new_inst.Src[2].Register.File = TGSI_FILE_INPUT;1239new_inst.Src[2].Register.Index = ctx->two_side_idx[i];1240new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;1241new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;1242new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;1243new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;12441245tctx->emit_instruction(tctx, &new_inst);1246}1247}12481249static void1250emit_decls(struct tgsi_transform_context *tctx)1251{1252struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);1253struct tgsi_shader_info *info = ctx->info;1254struct tgsi_full_declaration decl;1255struct tgsi_full_immediate immed;1256unsigned tmpbase;1257unsigned i;12581259tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;12601261ctx->color_base = tmpbase + ctx->numtmp;12621263/* declare immediate: */1264immed = tgsi_default_full_immediate();1265immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */1266immed.u[0].Float = 0.0;1267immed.u[1].Float = 1.0;1268immed.u[2].Float = 128.0;1269immed.u[3].Float = 0.0;1270tctx->emit_immediate(tctx, &immed);12711272ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;1273ctx->imm.Register.Index = info->immediate_count;1274ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;1275ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;1276ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;1277ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;12781279/* declare temp regs: */1280for (i = 0; i < ctx->numtmp; i++) {1281decl = tgsi_default_full_declaration();1282decl.Declaration.File = TGSI_FILE_TEMPORARY;1283decl.Range.First = decl.Range.Last = tmpbase + i;1284tctx->emit_declaration(tctx, &decl);12851286ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;1287ctx->tmp[i].src.Register.Index = tmpbase + i;1288ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;1289ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;1290ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;1291ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;12921293ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;1294ctx->tmp[i].dst.Register.Index = tmpbase + i;1295ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;1296}12971298if (ctx->two_side_colors)1299emit_twoside(tctx);1300}13011302static void1303rename_color_inputs(struct tgsi_lowering_context *ctx,1304struct tgsi_full_instruction *inst)1305{1306unsigned i, j;1307for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {1308struct tgsi_src_register *src = &inst->Src[i].Register;1309if (src->File == TGSI_FILE_INPUT) {1310for (j = 0; j < ctx->two_side_colors; j++) {1311if (src->Index == (int)ctx->two_side_idx[j]) {1312src->File = TGSI_FILE_TEMPORARY;1313src->Index = ctx->color_base + j;1314break;1315}1316}1317}1318}13191320}13211322static void1323transform_instr(struct tgsi_transform_context *tctx,1324struct tgsi_full_instruction *inst)1325{1326struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);13271328if (!ctx->emitted_decls) {1329emit_decls(tctx);1330ctx->emitted_decls = 1;1331}13321333/* if emulating two-sided-color, we need to re-write some1334* src registers:1335*/1336if (ctx->two_side_colors)1337rename_color_inputs(ctx, inst);13381339switch (inst->Instruction.Opcode) {1340case TGSI_OPCODE_DST:1341if (!ctx->config->lower_DST)1342goto skip;1343transform_dst(tctx, inst);1344break;1345case TGSI_OPCODE_LRP:1346if (!ctx->config->lower_LRP)1347goto skip;1348transform_lrp(tctx, inst);1349break;1350case TGSI_OPCODE_FRC:1351if (!ctx->config->lower_FRC)1352goto skip;1353transform_frc(tctx, inst);1354break;1355case TGSI_OPCODE_POW:1356if (!ctx->config->lower_POW)1357goto skip;1358transform_pow(tctx, inst);1359break;1360case TGSI_OPCODE_LIT:1361if (!ctx->config->lower_LIT)1362goto skip;1363transform_lit(tctx, inst);1364break;1365case TGSI_OPCODE_EXP:1366if (!ctx->config->lower_EXP)1367goto skip;1368transform_exp(tctx, inst);1369break;1370case TGSI_OPCODE_LOG:1371if (!ctx->config->lower_LOG)1372goto skip;1373transform_log(tctx, inst);1374break;1375case TGSI_OPCODE_DP4:1376if (!ctx->config->lower_DP4)1377goto skip;1378transform_dotp(tctx, inst);1379break;1380case TGSI_OPCODE_DP3:1381if (!ctx->config->lower_DP3)1382goto skip;1383transform_dotp(tctx, inst);1384break;1385case TGSI_OPCODE_DP2:1386if (!ctx->config->lower_DP2)1387goto skip;1388transform_dotp(tctx, inst);1389break;1390case TGSI_OPCODE_FLR:1391if (!ctx->config->lower_FLR)1392goto skip;1393transform_flr_ceil(tctx, inst);1394break;1395case TGSI_OPCODE_CEIL:1396if (!ctx->config->lower_CEIL)1397goto skip;1398transform_flr_ceil(tctx, inst);1399break;1400case TGSI_OPCODE_TRUNC:1401if (!ctx->config->lower_TRUNC)1402goto skip;1403transform_trunc(tctx, inst);1404break;1405case TGSI_OPCODE_TEX:1406case TGSI_OPCODE_TXP:1407case TGSI_OPCODE_TXB:1408case TGSI_OPCODE_TXB2:1409case TGSI_OPCODE_TXL:1410if (transform_samp(tctx, inst))1411goto skip;1412break;1413default:1414skip:1415tctx->emit_instruction(tctx, inst);1416break;1417}1418}14191420/* returns NULL if no lowering required, else returns the new1421* tokens (which caller is required to free()). In either case1422* returns the current info.1423*/1424const struct tgsi_token *1425tgsi_transform_lowering(const struct tgsi_lowering_config *config,1426const struct tgsi_token *tokens,1427struct tgsi_shader_info *info)1428{1429struct tgsi_lowering_context ctx;1430struct tgsi_token *newtoks;1431int newlen, numtmp;14321433/* sanity check in case limit is ever increased: */1434STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);14351436/* sanity check the lowering */1437assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));1438assert(!(config->lower_FRC && config->lower_TRUNC));14391440memset(&ctx, 0, sizeof(ctx));1441ctx.base.transform_instruction = transform_instr;1442ctx.info = info;1443ctx.config = config;14441445tgsi_scan_shader(tokens, info);14461447/* if we are adding fragment shader support to emulate two-sided1448* color, then figure out the number of additional inputs we need1449* to create for BCOLOR's..1450*/1451if ((info->processor == PIPE_SHADER_FRAGMENT) &&1452config->color_two_side) {1453int i;1454ctx.face_idx = -1;1455for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {1456if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)1457ctx.two_side_idx[ctx.two_side_colors++] = i;1458if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)1459ctx.face_idx = i;1460}1461}14621463ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;14641465#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)1466/* if there are no instructions to lower, then we are done: */1467if (!(OPCS(DST) ||1468OPCS(LRP) ||1469OPCS(FRC) ||1470OPCS(POW) ||1471OPCS(LIT) ||1472OPCS(EXP) ||1473OPCS(LOG) ||1474OPCS(DP4) ||1475OPCS(DP3) ||1476OPCS(DP2) ||1477OPCS(FLR) ||1478OPCS(CEIL) ||1479OPCS(TRUNC) ||1480OPCS(TXP) ||1481ctx.two_side_colors ||1482ctx.saturate))1483return NULL;14841485#if 0 /* debug */1486_debug_printf("BEFORE:");1487tgsi_dump(tokens, 0);1488#endif14891490numtmp = 0;1491newlen = tgsi_num_tokens(tokens);1492if (OPCS(DST)) {1493newlen += DST_GROW * OPCS(DST);1494numtmp = MAX2(numtmp, DST_TMP);1495}1496if (OPCS(LRP)) {1497newlen += LRP_GROW * OPCS(LRP);1498numtmp = MAX2(numtmp, LRP_TMP);1499}1500if (OPCS(FRC)) {1501newlen += FRC_GROW * OPCS(FRC);1502numtmp = MAX2(numtmp, FRC_TMP);1503}1504if (OPCS(POW)) {1505newlen += POW_GROW * OPCS(POW);1506numtmp = MAX2(numtmp, POW_TMP);1507}1508if (OPCS(LIT)) {1509newlen += LIT_GROW * OPCS(LIT);1510numtmp = MAX2(numtmp, LIT_TMP);1511}1512if (OPCS(EXP)) {1513newlen += EXP_GROW * OPCS(EXP);1514numtmp = MAX2(numtmp, EXP_TMP);1515}1516if (OPCS(LOG)) {1517newlen += LOG_GROW * OPCS(LOG);1518numtmp = MAX2(numtmp, LOG_TMP);1519}1520if (OPCS(DP4)) {1521newlen += DP4_GROW * OPCS(DP4);1522numtmp = MAX2(numtmp, DOTP_TMP);1523}1524if (OPCS(DP3)) {1525newlen += DP3_GROW * OPCS(DP3);1526numtmp = MAX2(numtmp, DOTP_TMP);1527}1528if (OPCS(DP2)) {1529newlen += DP2_GROW * OPCS(DP2);1530numtmp = MAX2(numtmp, DOTP_TMP);1531}1532if (OPCS(FLR)) {1533newlen += FLR_GROW * OPCS(FLR);1534numtmp = MAX2(numtmp, FLR_TMP);1535}1536if (OPCS(CEIL)) {1537newlen += CEIL_GROW * OPCS(CEIL);1538numtmp = MAX2(numtmp, CEIL_TMP);1539}1540if (OPCS(TRUNC)) {1541newlen += TRUNC_GROW * OPCS(TRUNC);1542numtmp = MAX2(numtmp, TRUNC_TMP);1543}1544if (ctx.saturate || config->lower_TXP) {1545int n = 0;15461547if (ctx.saturate) {1548n = info->opcode_count[TGSI_OPCODE_TEX] +1549info->opcode_count[TGSI_OPCODE_TXP] +1550info->opcode_count[TGSI_OPCODE_TXB] +1551info->opcode_count[TGSI_OPCODE_TXB2] +1552info->opcode_count[TGSI_OPCODE_TXL];1553} else if (config->lower_TXP) {1554n = info->opcode_count[TGSI_OPCODE_TXP];1555}15561557newlen += SAMP_GROW * n;1558numtmp = MAX2(numtmp, SAMP_TMP);1559}15601561/* specifically don't include two_side_colors temps in the count: */1562ctx.numtmp = numtmp;15631564if (ctx.two_side_colors) {1565newlen += TWOSIDE_GROW(ctx.two_side_colors);1566/* note: we permanently consume temp regs, re-writing references1567* to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP1568* instruction that selects which varying to use):1569*/1570numtmp += ctx.two_side_colors;1571}15721573newlen += 2 * numtmp;1574newlen += 5; /* immediate */15751576newtoks = tgsi_alloc_tokens(newlen);1577if (!newtoks)1578return NULL;15791580tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);15811582tgsi_scan_shader(newtoks, info);15831584#if 0 /* debug */1585_debug_printf("AFTER:");1586tgsi_dump(newtoks, 0);1587#endif15881589return newtoks;1590}159115921593