Path: blob/21.2-virgl/src/gallium/drivers/i915/i915_fpc_emit.c
4570 views
/**************************************************************************1*2* Copyright 2003 VMware, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25**************************************************************************/2627#include "util/u_math.h"28#include "i915_context.h"29#include "i915_fpc.h"30#include "i915_reg.h"3132uint32_t33i915_get_temp(struct i915_fp_compile *p)34{35int bit = ffs(~p->temp_flag);36if (!bit) {37i915_program_error(p, "i915_get_temp: out of temporaries");38return 0;39}4041p->temp_flag |= 1 << (bit - 1);42return bit - 1;43}4445static void46i915_release_temp(struct i915_fp_compile *p, int reg)47{48p->temp_flag &= ~(1 << reg);49}5051/**52* Get unpreserved temporary, a temp whose value is not preserved between53* PS program phases.54*/55uint32_t56i915_get_utemp(struct i915_fp_compile *p)57{58int bit = ffs(~p->utemp_flag);59if (!bit) {60i915_program_error(p, "i915_get_utemp: out of temporaries");61return 0;62}6364p->utemp_flag |= 1 << (bit - 1);65return UREG(REG_TYPE_U, (bit - 1));66}6768void69i915_release_utemps(struct i915_fp_compile *p)70{71p->utemp_flag = ~0x7;72}7374uint32_t75i915_emit_decl(struct i915_fp_compile *p, uint32_t type, uint32_t nr,76uint32_t d0_flags)77{78uint32_t reg = UREG(type, nr);7980if (type == REG_TYPE_T) {81if (p->decl_t & (1 << nr))82return reg;8384p->decl_t |= (1 << nr);85} else if (type == REG_TYPE_S) {86if (p->decl_s & (1 << nr))87return reg;8889p->decl_s |= (1 << nr);90} else91return reg;9293if (p->decl < p->declarations + I915_PROGRAM_SIZE) {94*(p->decl++) = (D0_DCL | D0_DEST(reg) | d0_flags);95*(p->decl++) = D1_MBZ;96*(p->decl++) = D2_MBZ;97} else98i915_program_error(p, "Out of declarations");99100p->nr_decl_insn++;101return reg;102}103104uint32_t105i915_emit_arith(struct i915_fp_compile *p, uint32_t op, uint32_t dest,106uint32_t mask, uint32_t saturate, uint32_t src0, uint32_t src1,107uint32_t src2)108{109uint32_t c[3];110uint32_t nr_const = 0;111112assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);113dest = UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest));114assert(dest);115116if (GET_UREG_TYPE(src0) == REG_TYPE_CONST)117c[nr_const++] = 0;118if (GET_UREG_TYPE(src1) == REG_TYPE_CONST)119c[nr_const++] = 1;120if (GET_UREG_TYPE(src2) == REG_TYPE_CONST)121c[nr_const++] = 2;122123/* Recursively call this function to MOV additional const values124* into temporary registers. Use utemp registers for this -125* currently shouldn't be possible to run out, but keep an eye on126* this.127*/128if (nr_const > 1) {129uint32_t s[3], first, i, old_utemp_flag;130131s[0] = src0;132s[1] = src1;133s[2] = src2;134old_utemp_flag = p->utemp_flag;135136first = GET_UREG_NR(s[c[0]]);137for (i = 1; i < nr_const; i++) {138if (GET_UREG_NR(s[c[i]]) != first) {139uint32_t tmp = i915_get_utemp(p);140141i915_emit_arith(p, A0_MOV, tmp, A0_DEST_CHANNEL_ALL, 0, s[c[i]], 0,1420);143s[c[i]] = tmp;144}145}146147src0 = s[0];148src1 = s[1];149src2 = s[2];150p->utemp_flag = old_utemp_flag; /* restore */151}152153if (p->csr < p->program + I915_PROGRAM_SIZE) {154*(p->csr++) = (op | A0_DEST(dest) | mask | saturate | A0_SRC0(src0));155*(p->csr++) = (A1_SRC0(src0) | A1_SRC1(src1));156*(p->csr++) = (A2_SRC1(src1) | A2_SRC2(src2));157} else158i915_program_error(p, "Out of instructions");159160if (GET_UREG_TYPE(dest) == REG_TYPE_R)161p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect;162163p->nr_alu_insn++;164return dest;165}166167/**168* Emit a texture load or texkill instruction.169* \param dest the dest i915 register170* \param destmask the dest register writemask171* \param sampler the i915 sampler register172* \param coord the i915 source texcoord operand173* \param opcode the instruction opcode174*/175uint32_t176i915_emit_texld(struct i915_fp_compile *p, uint32_t dest, uint32_t destmask,177uint32_t sampler, uint32_t coord, uint32_t opcode,178uint32_t num_coord)179{180const uint32_t k = UREG(GET_UREG_TYPE(coord), GET_UREG_NR(coord));181182int temp = -1;183uint32_t ignore = 0;184185/* Eliminate the useless texture coordinates. Otherwise we end up generating186* a swizzle for no reason below. */187switch (num_coord) {188case 1:189/* For 1D textures, make sure that the Y coordinate is actually190* initialized. It seems that if the channel is never written during the191* program, texturing returns undefined results (even if the Y wrap is192* REPEAT).193*/194coord = swizzle(coord, X, X, Y, Z);195FALLTHROUGH;196case 2:197ignore |= (0xf << UREG_CHANNEL_Z_SHIFT);198FALLTHROUGH;199case 3:200ignore |= (0xf << UREG_CHANNEL_W_SHIFT);201}202203if ((coord & ~ignore) != (k & ~ignore) ||204GET_UREG_TYPE(coord) == REG_TYPE_CONST) {205/* texcoord is swizzled or negated. Need to allocate a new temporary206* register (a utemp / unpreserved temp) won't do.207*/208uint32_t tempReg;209210temp = i915_get_temp(p); /* get temp reg index */211tempReg = UREG(REG_TYPE_R, temp); /* make i915 register */212213i915_emit_arith(p, A0_MOV, tempReg,214A0_DEST_CHANNEL_ALL, /* dest reg, writemask */2150, /* saturate */216coord, 0, 0); /* src0, src1, src2 */217218/* new src texcoord is tempReg */219coord = tempReg;220}221222/* Don't worry about saturate as we only support223*/224if (destmask != A0_DEST_CHANNEL_ALL) {225/* if not writing to XYZW... */226uint32_t tmp = i915_get_utemp(p);227i915_emit_texld(p, tmp, A0_DEST_CHANNEL_ALL, sampler, coord, opcode,228num_coord);229i915_emit_arith(p, A0_MOV, dest, destmask, 0, tmp, 0, 0);230/* XXX release utemp here? */231} else {232assert(GET_UREG_TYPE(dest) != REG_TYPE_CONST);233assert(dest == UREG(GET_UREG_TYPE(dest), GET_UREG_NR(dest)));234235/* Output register being oC or oD defines a phase boundary */236if (GET_UREG_TYPE(dest) == REG_TYPE_OC ||237GET_UREG_TYPE(dest) == REG_TYPE_OD)238p->nr_tex_indirect++;239240/* Reading from an r# register whose contents depend on output of the241* current phase defines a phase boundary.242*/243if (GET_UREG_TYPE(coord) == REG_TYPE_R &&244p->register_phases[GET_UREG_NR(coord)] == p->nr_tex_indirect)245p->nr_tex_indirect++;246247if (p->csr < p->program + I915_PROGRAM_SIZE) {248*(p->csr++) = (opcode | T0_DEST(dest) | T0_SAMPLER(sampler));249250*(p->csr++) = T1_ADDRESS_REG(coord);251*(p->csr++) = T2_MBZ;252} else253i915_program_error(p, "Out of instructions");254255if (GET_UREG_TYPE(dest) == REG_TYPE_R)256p->register_phases[GET_UREG_NR(dest)] = p->nr_tex_indirect;257258p->nr_tex_insn++;259}260261if (temp >= 0)262i915_release_temp(p, temp);263264return dest;265}266267uint32_t268i915_emit_const1f(struct i915_fp_compile *p, float c0)269{270struct i915_fragment_shader *ifs = p->shader;271unsigned reg, idx;272273if (c0 == 0.0)274return swizzle(UREG(REG_TYPE_R, 0), ZERO, ZERO, ZERO, ZERO);275if (c0 == 1.0)276return swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE);277278for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {279if (ifs->constant_flags[reg] == I915_CONSTFLAG_USER)280continue;281for (idx = 0; idx < 4; idx++) {282if (!(ifs->constant_flags[reg] & (1 << idx)) ||283ifs->constants[reg][idx] == c0) {284ifs->constants[reg][idx] = c0;285ifs->constant_flags[reg] |= 1 << idx;286if (reg + 1 > ifs->num_constants)287ifs->num_constants = reg + 1;288return swizzle(UREG(REG_TYPE_CONST, reg), idx, ZERO, ZERO, ONE);289}290}291}292293i915_program_error(p, "i915_emit_const1f: out of constants");294return 0;295}296297uint32_t298i915_emit_const2f(struct i915_fp_compile *p, float c0, float c1)299{300struct i915_fragment_shader *ifs = p->shader;301unsigned reg, idx;302303if (c0 == 0.0)304return swizzle(i915_emit_const1f(p, c1), ZERO, X, Z, W);305if (c0 == 1.0)306return swizzle(i915_emit_const1f(p, c1), ONE, X, Z, W);307308if (c1 == 0.0)309return swizzle(i915_emit_const1f(p, c0), X, ZERO, Z, W);310if (c1 == 1.0)311return swizzle(i915_emit_const1f(p, c0), X, ONE, Z, W);312313// XXX emit swizzle here for 0, 1, -1 and any combination thereof314// we can use swizzle + neg for that315for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {316if (ifs->constant_flags[reg] == 0xf ||317ifs->constant_flags[reg] == I915_CONSTFLAG_USER)318continue;319for (idx = 0; idx < 3; idx++) {320if (!(ifs->constant_flags[reg] & (3 << idx))) {321ifs->constants[reg][idx + 0] = c0;322ifs->constants[reg][idx + 1] = c1;323ifs->constant_flags[reg] |= 3 << idx;324if (reg + 1 > ifs->num_constants)325ifs->num_constants = reg + 1;326return swizzle(UREG(REG_TYPE_CONST, reg), idx, idx + 1, ZERO, ONE);327}328}329}330331i915_program_error(p, "i915_emit_const2f: out of constants");332return 0;333}334335uint32_t336i915_emit_const4f(struct i915_fp_compile *p, float c0, float c1, float c2,337float c3)338{339struct i915_fragment_shader *ifs = p->shader;340unsigned reg;341342// XXX emit swizzle here for 0, 1, -1 and any combination thereof343// we can use swizzle + neg for that344for (reg = 0; reg < I915_MAX_CONSTANT; reg++) {345if (ifs->constant_flags[reg] == 0xf && ifs->constants[reg][0] == c0 &&346ifs->constants[reg][1] == c1 && ifs->constants[reg][2] == c2 &&347ifs->constants[reg][3] == c3) {348return UREG(REG_TYPE_CONST, reg);349} else if (ifs->constant_flags[reg] == 0) {350351ifs->constants[reg][0] = c0;352ifs->constants[reg][1] = c1;353ifs->constants[reg][2] = c2;354ifs->constants[reg][3] = c3;355ifs->constant_flags[reg] = 0xf;356if (reg + 1 > ifs->num_constants)357ifs->num_constants = reg + 1;358return UREG(REG_TYPE_CONST, reg);359}360}361362i915_program_error(p, "i915_emit_const4f: out of constants");363return 0;364}365366uint32_t367i915_emit_const4fv(struct i915_fp_compile *p, const float *c)368{369return i915_emit_const4f(p, c[0], c[1], c[2], c[3]);370}371372373