Path: blob/21.2-virgl/src/gallium/drivers/i915/i915_fpc_translate.c
4570 views
/**************************************************************************1*2* Copyright 2007 VMware, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* The above copyright notice and this permission notice (including the14* next paragraph) shall be included in all copies or substantial portions15* of the Software.16*17* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS18* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF19* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.20* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR21* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,22* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE23* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.24*25**************************************************************************/2627#include <stdarg.h>2829#include "i915_context.h"30#include "i915_debug.h"31#include "i915_debug_private.h"32#include "i915_fpc.h"33#include "i915_reg.h"3435#include "pipe/p_shader_tokens.h"36#include "tgsi/tgsi_dump.h"37#include "tgsi/tgsi_info.h"38#include "tgsi/tgsi_parse.h"39#include "util/log.h"40#include "util/u_math.h"41#include "util/u_memory.h"42#include "util/u_string.h"4344#include "draw/draw_vertex.h"4546#ifndef M_PI47#define M_PI 3.1415926535897932384648#endif4950/**51* Simple pass-through fragment shader to use when we don't have52* a real shader (or it fails to compile for some reason).53*/54static unsigned passthrough_program[] = {55_3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),56/* move to output color:57*/58(A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |59(REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),60((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |61(SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |62(SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |63(SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),640};6566/* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */67static const float sin_constants[4] = {682.0 * M_PI, -8.0f * M_PI *M_PI *M_PI / (3 * 2 * 1),6932.0f * M_PI *M_PI *M_PI *M_PI *M_PI / (5 * 4 * 3 * 2 * 1),70-128.0f * M_PI *M_PI *M_PI *M_PI *M_PI *M_PI *M_PI /71(7 * 6 * 5 * 4 * 3 * 2 * 1)};7273/* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */74static const float cos_constants[4] = {751.0, -4.0f * M_PI *M_PI / (2 * 1),7616.0f * M_PI *M_PI *M_PI *M_PI / (4 * 3 * 2 * 1),77-64.0f * M_PI *M_PI *M_PI *M_PI *M_PI *M_PI / (6 * 5 * 4 * 3 * 2 * 1)};7879/**80* component-wise negation of ureg81*/82static inline int83negate(int reg, int x, int y, int z, int w)84{85/* Another neat thing about the UREG representation */86return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |87((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |88((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |89((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));90}9192/**93* In the event of a translation failure, we'll generate a simple color94* pass-through program.95*/96static void97i915_use_passthrough_shader(struct i915_fragment_shader *fs)98{99fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));100if (fs->program) {101memcpy(fs->program, passthrough_program, sizeof(passthrough_program));102fs->program_len = ARRAY_SIZE(passthrough_program);103}104fs->num_constants = 0;105}106107void108i915_program_error(struct i915_fp_compile *p, const char *msg, ...)109{110if (p->log_program_errors) {111va_list args;112113va_start(args, msg);114mesa_loge_v(msg, args);115va_end(args);116}117118p->error = 1;119}120121static uint32_t122get_mapping(struct i915_fragment_shader *fs, int unit)123{124int i;125for (i = 0; i < I915_TEX_UNITS; i++) {126if (fs->generic_mapping[i] == -1) {127fs->generic_mapping[i] = unit;128return i;129}130if (fs->generic_mapping[i] == unit)131return i;132}133debug_printf("Exceeded max generics\n");134return 0;135}136137/**138* Construct a ureg for the given source register. Will emit139* constants, apply swizzling and negation as needed.140*/141static uint32_t142src_vector(struct i915_fp_compile *p,143const struct i915_full_src_register *source,144struct i915_fragment_shader *fs)145{146uint32_t index = source->Register.Index;147uint32_t src = 0, sem_name, sem_ind;148149switch (source->Register.File) {150case TGSI_FILE_TEMPORARY:151if (source->Register.Index >= I915_MAX_TEMPORARY) {152i915_program_error(p, "Exceeded max temporary reg");153return 0;154}155src = UREG(REG_TYPE_R, index);156break;157case TGSI_FILE_INPUT:158/* XXX: Packing COL1, FOGC into a single attribute works for159* texenv programs, but will fail for real fragment programs160* that use these attributes and expect them to be a full 4161* components wide. Could use a texcoord to pass these162* attributes if necessary, but that won't work in the general163* case.164*165* We also use a texture coordinate to pass wpos when possible.166*/167168sem_name = p->shader->info.input_semantic_name[index];169sem_ind = p->shader->info.input_semantic_index[index];170171switch (sem_name) {172case TGSI_SEMANTIC_POSITION: {173/* for fragcoord */174int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS);175src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit,176D0_CHANNEL_ALL);177break;178}179case TGSI_SEMANTIC_COLOR:180if (sem_ind == 0) {181src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);182} else {183/* secondary color */184assert(sem_ind == 1);185src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);186src = swizzle(src, X, Y, Z, ONE);187}188break;189case TGSI_SEMANTIC_FOG:190src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);191src = swizzle(src, W, W, W, W);192break;193case TGSI_SEMANTIC_GENERIC: {194int real_tex_unit = get_mapping(fs, sem_ind);195src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit,196D0_CHANNEL_ALL);197break;198}199case TGSI_SEMANTIC_FACE: {200/* for back/front faces */201int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE);202src =203i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X);204break;205}206default:207i915_program_error(p, "Bad source->Index");208return 0;209}210break;211212case TGSI_FILE_IMMEDIATE:213assert(index < p->num_immediates);214index = p->immediates_map[index];215FALLTHROUGH;216case TGSI_FILE_CONSTANT:217src = UREG(REG_TYPE_CONST, index);218break;219220default:221i915_program_error(p, "Bad source->File");222return 0;223}224225src = swizzle(src, source->Register.SwizzleX, source->Register.SwizzleY,226source->Register.SwizzleZ, source->Register.SwizzleW);227228/* No HW abs flag, so we have to max with the negation. */229if (source->Register.Absolute) {230uint32_t tmp = i915_get_utemp(p);231i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src,232negate(src, 1, 1, 1, 1), 0);233src = tmp;234}235236/* There's both negate-all-components and per-component negation.237* Try to handle both here.238*/239{240int n = source->Register.Negate;241src = negate(src, n, n, n, n);242}243244return src;245}246247/**248* Construct a ureg for a destination register.249*/250static uint32_t251get_result_vector(struct i915_fp_compile *p,252const struct i915_full_dst_register *dest)253{254switch (dest->Register.File) {255case TGSI_FILE_OUTPUT: {256uint32_t sem_name =257p->shader->info.output_semantic_name[dest->Register.Index];258switch (sem_name) {259case TGSI_SEMANTIC_POSITION:260return UREG(REG_TYPE_OD, 0);261case TGSI_SEMANTIC_COLOR:262return UREG(REG_TYPE_OC, 0);263default:264i915_program_error(p, "Bad inst->DstReg.Index/semantics");265return 0;266}267}268case TGSI_FILE_TEMPORARY:269return UREG(REG_TYPE_R, dest->Register.Index);270default:271i915_program_error(p, "Bad inst->DstReg.File");272return 0;273}274}275276/**277* Compute flags for saturation and writemask.278*/279static uint32_t280get_result_flags(const struct i915_full_instruction *inst)281{282const uint32_t writeMask = inst->Dst[0].Register.WriteMask;283uint32_t flags = 0x0;284285if (inst->Instruction.Saturate)286flags |= A0_DEST_SATURATE;287288if (writeMask & TGSI_WRITEMASK_X)289flags |= A0_DEST_CHANNEL_X;290if (writeMask & TGSI_WRITEMASK_Y)291flags |= A0_DEST_CHANNEL_Y;292if (writeMask & TGSI_WRITEMASK_Z)293flags |= A0_DEST_CHANNEL_Z;294if (writeMask & TGSI_WRITEMASK_W)295flags |= A0_DEST_CHANNEL_W;296297return flags;298}299300/**301* Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token302*/303static uint32_t304translate_tex_src_target(struct i915_fp_compile *p, uint32_t tex)305{306switch (tex) {307case TGSI_TEXTURE_SHADOW1D:308FALLTHROUGH;309case TGSI_TEXTURE_1D:310return D0_SAMPLE_TYPE_2D;311312case TGSI_TEXTURE_SHADOW2D:313FALLTHROUGH;314case TGSI_TEXTURE_2D:315return D0_SAMPLE_TYPE_2D;316317case TGSI_TEXTURE_SHADOWRECT:318FALLTHROUGH;319case TGSI_TEXTURE_RECT:320return D0_SAMPLE_TYPE_2D;321322case TGSI_TEXTURE_3D:323return D0_SAMPLE_TYPE_VOLUME;324325case TGSI_TEXTURE_CUBE:326return D0_SAMPLE_TYPE_CUBE;327328default:329i915_program_error(p, "TexSrc type");330return 0;331}332}333334/**335* Return the number of coords needed to access a given TGSI_TEXTURE_*336*/337uint32_t338i915_num_coords(uint32_t tex)339{340switch (tex) {341case TGSI_TEXTURE_SHADOW1D:342case TGSI_TEXTURE_1D:343return 1;344345case TGSI_TEXTURE_SHADOW2D:346case TGSI_TEXTURE_2D:347case TGSI_TEXTURE_SHADOWRECT:348case TGSI_TEXTURE_RECT:349return 2;350351case TGSI_TEXTURE_3D:352case TGSI_TEXTURE_CUBE:353return 3;354355default:356debug_printf("Unknown texture target for num coords");357return 2;358}359}360361/**362* Generate texel lookup instruction.363*/364static void365emit_tex(struct i915_fp_compile *p, const struct i915_full_instruction *inst,366uint32_t opcode, struct i915_fragment_shader *fs)367{368uint32_t texture = inst->Texture.Texture;369uint32_t unit = inst->Src[1].Register.Index;370uint32_t tex = translate_tex_src_target(p, texture);371uint32_t sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);372uint32_t coord = src_vector(p, &inst->Src[0], fs);373374i915_emit_texld(p, get_result_vector(p, &inst->Dst[0]),375get_result_flags(inst), sampler, coord, opcode,376i915_num_coords(texture));377}378379/**380* Generate a simple arithmetic instruction381* \param opcode the i915 opcode382* \param numArgs the number of input/src arguments383*/384static void385emit_simple_arith(struct i915_fp_compile *p,386const struct i915_full_instruction *inst, uint32_t opcode,387uint32_t numArgs, struct i915_fragment_shader *fs)388{389uint32_t arg1, arg2, arg3;390391assert(numArgs <= 3);392393arg1 = (numArgs < 1) ? 0 : src_vector(p, &inst->Src[0], fs);394arg2 = (numArgs < 2) ? 0 : src_vector(p, &inst->Src[1], fs);395arg3 = (numArgs < 3) ? 0 : src_vector(p, &inst->Src[2], fs);396397i915_emit_arith(p, opcode, get_result_vector(p, &inst->Dst[0]),398get_result_flags(inst), 0, arg1, arg2, arg3);399}400401/** As above, but swap the first two src regs */402static void403emit_simple_arith_swap2(struct i915_fp_compile *p,404const struct i915_full_instruction *inst,405uint32_t opcode, uint32_t numArgs,406struct i915_fragment_shader *fs)407{408struct i915_full_instruction inst2;409410assert(numArgs == 2);411412/* transpose first two registers */413inst2 = *inst;414inst2.Src[0] = inst->Src[1];415inst2.Src[1] = inst->Src[0];416417emit_simple_arith(p, &inst2, opcode, numArgs, fs);418}419420/*421* Translate TGSI instruction to i915 instruction.422*423* Possible concerns:424*425* DDX, DDY -- return 0426* SIN, COS -- could use another taylor step?427* LIT -- results seem a little different to sw mesa428* LOG -- different to mesa on negative numbers, but this is conformant.429*/430static void431i915_translate_instruction(struct i915_fp_compile *p,432const struct i915_full_instruction *inst,433struct i915_fragment_shader *fs)434{435uint32_t src0, src1, src2, flags;436uint32_t tmp = 0;437438switch (inst->Instruction.Opcode) {439case TGSI_OPCODE_ADD:440emit_simple_arith(p, inst, A0_ADD, 2, fs);441break;442443case TGSI_OPCODE_CEIL:444src0 = src_vector(p, &inst->Src[0], fs);445tmp = i915_get_utemp(p);446flags = get_result_flags(inst);447i915_emit_arith(p, A0_FLR, tmp, flags & A0_DEST_CHANNEL_ALL, 0,448negate(src0, 1, 1, 1, 1), 0, 0);449i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]), flags, 0,450negate(tmp, 1, 1, 1, 1), 0, 0);451break;452453case TGSI_OPCODE_CMP:454src0 = src_vector(p, &inst->Src[0], fs);455src1 = src_vector(p, &inst->Src[1], fs);456src2 = src_vector(p, &inst->Src[2], fs);457i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]),458get_result_flags(inst), 0, src0, src2,459src1); /* NOTE: order of src2, src1 */460break;461462case TGSI_OPCODE_COS:463src0 = src_vector(p, &inst->Src[0], fs);464tmp = i915_get_utemp(p);465466i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, src0,467i915_emit_const1f(p, 1.0f / (float)(M_PI * 2.0)), 0);468469i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);470471/*472* t0.xy = MUL x.xx11, x.x111 ; x^2, x, 1, 1473* t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1474* t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1475* result = DP4 t0, cos_constants476*/477i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_XY, 0,478swizzle(tmp, X, X, ONE, ONE),479swizzle(tmp, X, ONE, ONE, ONE), 0);480481i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_XYZ, 0,482swizzle(tmp, X, Y, X, ONE), swizzle(tmp, X, X, ONE, ONE),4830);484485i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_XYZ, 0,486swizzle(tmp, X, X, Z, ONE),487swizzle(tmp, Z, ONE, ONE, ONE), 0);488489i915_emit_arith(p, A0_DP4, get_result_vector(p, &inst->Dst[0]),490get_result_flags(inst), 0, swizzle(tmp, ONE, Z, Y, X),491i915_emit_const4fv(p, cos_constants), 0);492break;493494case TGSI_OPCODE_DDX:495case TGSI_OPCODE_DDY:496/* XXX We just output 0 here */497debug_printf("Punting DDX/DDY\n");498src0 = get_result_vector(p, &inst->Dst[0]);499i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]),500get_result_flags(inst), 0,501swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);502break;503504case TGSI_OPCODE_DP2:505src0 = src_vector(p, &inst->Src[0], fs);506src1 = src_vector(p, &inst->Src[1], fs);507508i915_emit_arith(p, A0_DP3, get_result_vector(p, &inst->Dst[0]),509get_result_flags(inst), 0,510swizzle(src0, X, Y, ZERO, ZERO), src1, 0);511break;512513case TGSI_OPCODE_DP3:514emit_simple_arith(p, inst, A0_DP3, 2, fs);515break;516517case TGSI_OPCODE_DP4:518emit_simple_arith(p, inst, A0_DP4, 2, fs);519break;520521case TGSI_OPCODE_DST:522src0 = src_vector(p, &inst->Src[0], fs);523src1 = src_vector(p, &inst->Src[1], fs);524525/* result[0] = 1 * 1;526* result[1] = a[1] * b[1];527* result[2] = a[2] * 1;528* result[3] = 1 * b[3];529*/530i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),531get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE),532swizzle(src1, ONE, Y, ONE, W), 0);533break;534535case TGSI_OPCODE_END:536/* no-op */537break;538539case TGSI_OPCODE_EX2:540src0 = src_vector(p, &inst->Src[0], fs);541542i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]),543get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,5440);545break;546547case TGSI_OPCODE_FLR:548emit_simple_arith(p, inst, A0_FLR, 1, fs);549break;550551case TGSI_OPCODE_FRC:552emit_simple_arith(p, inst, A0_FRC, 1, fs);553break;554555case TGSI_OPCODE_KILL_IF:556/* kill if src[0].x < 0 || src[0].y < 0 ... */557src0 = src_vector(p, &inst->Src[0], fs);558tmp = i915_get_utemp(p);559560i915_emit_texld(p, tmp, /* dest reg: a dummy reg */561A0_DEST_CHANNEL_ALL, /* dest writemask */5620, /* sampler */563src0, /* coord*/564T0_TEXKILL, /* opcode */5651); /* num_coord */566break;567568case TGSI_OPCODE_KILL:569/* unconditional kill */570tmp = i915_get_utemp(p);571572i915_emit_texld(p, tmp, /* dest reg: a dummy reg */573A0_DEST_CHANNEL_ALL, /* dest writemask */5740, /* sampler */575negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),5761, 1, 1, 1), /* coord */577T0_TEXKILL, /* opcode */5781); /* num_coord */579break;580581case TGSI_OPCODE_LG2:582src0 = src_vector(p, &inst->Src[0], fs);583584i915_emit_arith(p, A0_LOG, get_result_vector(p, &inst->Dst[0]),585get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,5860);587break;588589case TGSI_OPCODE_LIT:590src0 = src_vector(p, &inst->Src[0], fs);591tmp = i915_get_utemp(p);592593/* tmp = max( a.xyzw, a.00zw )594* XXX: Clamp tmp.w to -128..128595* tmp.y = log(tmp.y)596* tmp.y = tmp.w * tmp.y597* tmp.y = exp(tmp.y)598* result = cmp (a.11-x1, a.1x01, a.1xy1 )599*/600i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,601swizzle(src0, ZERO, ZERO, Z, W), 0);602603i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,604swizzle(tmp, Y, Y, Y, Y), 0, 0);605606i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,607swizzle(tmp, ZERO, Y, ZERO, ZERO),608swizzle(tmp, ZERO, W, ZERO, ZERO), 0);609610i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,611swizzle(tmp, Y, Y, Y, Y), 0, 0);612613i915_emit_arith(614p, A0_CMP, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),6150, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),616swizzle(tmp, ONE, X, ZERO, ONE), swizzle(tmp, ONE, X, Y, ONE));617618break;619620case TGSI_OPCODE_LRP:621src0 = src_vector(p, &inst->Src[0], fs);622src1 = src_vector(p, &inst->Src[1], fs);623src2 = src_vector(p, &inst->Src[2], fs);624flags = get_result_flags(inst);625tmp = i915_get_utemp(p);626627/* b*a + c*(1-a)628*629* b*a + c - ca630*631* tmp = b*a + c,632* result = (-c)*a + tmp633*/634i915_emit_arith(p, A0_MAD, tmp, flags & A0_DEST_CHANNEL_ALL, 0, src1,635src0, src2);636637i915_emit_arith(p, A0_MAD, get_result_vector(p, &inst->Dst[0]), flags, 0,638negate(src2, 1, 1, 1, 1), src0, tmp);639break;640641case TGSI_OPCODE_MAD:642emit_simple_arith(p, inst, A0_MAD, 3, fs);643break;644645case TGSI_OPCODE_MAX:646emit_simple_arith(p, inst, A0_MAX, 2, fs);647break;648649case TGSI_OPCODE_MIN:650emit_simple_arith(p, inst, A0_MIN, 2, fs);651break;652653case TGSI_OPCODE_MOV:654emit_simple_arith(p, inst, A0_MOV, 1, fs);655break;656657case TGSI_OPCODE_MUL:658emit_simple_arith(p, inst, A0_MUL, 2, fs);659break;660661case TGSI_OPCODE_NOP:662break;663664case TGSI_OPCODE_POW:665src0 = src_vector(p, &inst->Src[0], fs);666src1 = src_vector(p, &inst->Src[1], fs);667tmp = i915_get_utemp(p);668flags = get_result_flags(inst);669670/* XXX: masking on intermediate values, here and elsewhere.671*/672i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_X, 0,673swizzle(src0, X, X, X, X), 0, 0);674675i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);676677i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]), flags, 0,678swizzle(tmp, X, X, X, X), 0, 0);679break;680681case TGSI_OPCODE_RET:682/* XXX: no-op? */683break;684685case TGSI_OPCODE_RCP:686src0 = src_vector(p, &inst->Src[0], fs);687688i915_emit_arith(p, A0_RCP, get_result_vector(p, &inst->Dst[0]),689get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,6900);691break;692693case TGSI_OPCODE_RSQ:694src0 = src_vector(p, &inst->Src[0], fs);695696i915_emit_arith(p, A0_RSQ, get_result_vector(p, &inst->Dst[0]),697get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,6980);699break;700701case TGSI_OPCODE_SEQ:702/* if we're both >= and <= then we're == */703src0 = src_vector(p, &inst->Src[0], fs);704src1 = src_vector(p, &inst->Src[1], fs);705tmp = i915_get_utemp(p);706707i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);708709i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),710get_result_flags(inst), 0, src1, src0, 0);711712i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),713get_result_flags(inst), 0,714get_result_vector(p, &inst->Dst[0]), tmp, 0);715716break;717718case TGSI_OPCODE_SGE:719emit_simple_arith(p, inst, A0_SGE, 2, fs);720break;721722case TGSI_OPCODE_SIN:723src0 = src_vector(p, &inst->Src[0], fs);724tmp = i915_get_utemp(p);725726i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, src0,727i915_emit_const1f(p, 1.0f / (float)(M_PI * 2.0)), 0);728729i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);730731/*732* t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1733* t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x734* t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x735* result = DP4 t1.wzyx, sin_constants736*/737i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_XY, 0,738swizzle(tmp, X, X, ONE, ONE),739swizzle(tmp, X, ONE, ONE, ONE), 0);740741i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_ALL, 0,742swizzle(tmp, X, Y, X, Y), swizzle(tmp, X, X, ONE, ONE),7430);744745i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_ALL, 0,746swizzle(tmp, X, Y, Y, W), swizzle(tmp, X, Z, ONE, ONE),7470);748749i915_emit_arith(p, A0_DP4, get_result_vector(p, &inst->Dst[0]),750get_result_flags(inst), 0, swizzle(tmp, W, Z, Y, X),751i915_emit_const4fv(p, sin_constants), 0);752break;753754case TGSI_OPCODE_SLE:755/* like SGE, but swap reg0, reg1 */756emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);757break;758759case TGSI_OPCODE_SLT:760emit_simple_arith(p, inst, A0_SLT, 2, fs);761break;762763case TGSI_OPCODE_SGT:764/* like SLT, but swap reg0, reg1 */765emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);766break;767768case TGSI_OPCODE_SNE:769/* if we're < or > then we're != */770src0 = src_vector(p, &inst->Src[0], fs);771src1 = src_vector(p, &inst->Src[1], fs);772tmp = i915_get_utemp(p);773774i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);775776i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),777get_result_flags(inst), 0, src1, src0, 0);778779i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]),780get_result_flags(inst), 0,781get_result_vector(p, &inst->Dst[0]), tmp, 0);782break;783784case TGSI_OPCODE_SSG:785/* compute (src>0) - (src<0) */786src0 = src_vector(p, &inst->Src[0], fs);787tmp = i915_get_utemp(p);788789i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0,790swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);791792i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),793get_result_flags(inst), 0,794swizzle(src0, ZERO, ZERO, ZERO, ZERO), src0, 0);795796i915_emit_arith(797p, A0_ADD, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0,798get_result_vector(p, &inst->Dst[0]), negate(tmp, 1, 1, 1, 1), 0);799break;800801case TGSI_OPCODE_TEX:802emit_tex(p, inst, T0_TEXLD, fs);803break;804805case TGSI_OPCODE_TRUNC:806emit_simple_arith(p, inst, A0_TRC, 1, fs);807break;808809case TGSI_OPCODE_TXB:810emit_tex(p, inst, T0_TEXLDB, fs);811break;812813case TGSI_OPCODE_TXP:814emit_tex(p, inst, T0_TEXLDP, fs);815break;816817default:818i915_program_error(p, "bad opcode %s (%d)",819tgsi_get_opcode_name(inst->Instruction.Opcode),820inst->Instruction.Opcode);821return;822}823824i915_release_utemps(p);825}826827static void828i915_translate_token(struct i915_fp_compile *p,829const union i915_full_token *token,830struct i915_fragment_shader *fs)831{832struct i915_fragment_shader *ifs = p->shader;833switch (token->Token.Type) {834case TGSI_TOKEN_TYPE_PROPERTY:835/* Ignore properties where we only support one value. */836assert(token->FullProperty.Property.PropertyName ==837TGSI_PROPERTY_FS_COORD_ORIGIN ||838token->FullProperty.Property.PropertyName ==839TGSI_PROPERTY_FS_COORD_PIXEL_CENTER ||840token->FullProperty.Property.PropertyName ==841TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);842break;843844case TGSI_TOKEN_TYPE_DECLARATION:845if (token->FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) {846if (token->FullDeclaration.Range.Last >= I915_MAX_CONSTANT) {847i915_program_error(p, "Exceeded %d max uniforms",848I915_MAX_CONSTANT);849} else {850uint32_t i;851for (i = token->FullDeclaration.Range.First;852i <= token->FullDeclaration.Range.Last; i++) {853ifs->constant_flags[i] = I915_CONSTFLAG_USER;854ifs->num_constants = MAX2(ifs->num_constants, i + 1);855}856}857} else if (token->FullDeclaration.Declaration.File ==858TGSI_FILE_TEMPORARY) {859if (token->FullDeclaration.Range.Last >= I915_MAX_TEMPORARY) {860i915_program_error(p, "Exceeded %d max TGSI temps",861I915_MAX_TEMPORARY);862} else {863uint32_t i;864for (i = token->FullDeclaration.Range.First;865i <= token->FullDeclaration.Range.Last; i++) {866/* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */867p->temp_flag |= (1 << i); /* mark temp as used */868}869}870}871break;872873case TGSI_TOKEN_TYPE_IMMEDIATE: {874const struct tgsi_full_immediate *imm = &token->FullImmediate;875const uint32_t pos = p->num_immediates++;876uint32_t j;877assert(imm->Immediate.NrTokens <= 4 + 1);878for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {879p->immediates[pos][j] = imm->u[j].Float;880}881} break;882883case TGSI_TOKEN_TYPE_INSTRUCTION:884if (p->first_instruction) {885/* resolve location of immediates */886uint32_t i, j;887for (i = 0; i < p->num_immediates; i++) {888/* find constant slot for this immediate */889for (j = 0; j < I915_MAX_CONSTANT; j++) {890if (ifs->constant_flags[j] == 0x0) {891memcpy(ifs->constants[j], p->immediates[i],8924 * sizeof(float));893/*printf("immediate %d maps to const %d\n", i, j);*/894ifs->constant_flags[j] = 0xf; /* all four comps used */895p->immediates_map[i] = j;896ifs->num_constants = MAX2(ifs->num_constants, j + 1);897break;898}899}900if (j == I915_MAX_CONSTANT) {901i915_program_error(p, "Exceeded %d max uniforms and immediates.",902I915_MAX_CONSTANT);903}904}905906p->first_instruction = false;907}908909i915_translate_instruction(p, &token->FullInstruction, fs);910break;911912default:913assert(0);914}915}916917/**918* Translate TGSI fragment shader into i915 hardware instructions.919* \param p the translation state920* \param tokens the TGSI token array921*/922static void923i915_translate_instructions(struct i915_fp_compile *p,924const struct i915_token_list *tokens,925struct i915_fragment_shader *fs)926{927int i;928for (i = 0; i < tokens->NumTokens && !p->error; i++) {929i915_translate_token(p, &tokens->Tokens[i], fs);930}931}932933static struct i915_fp_compile *934i915_init_compile(struct i915_context *i915, struct i915_fragment_shader *ifs)935{936struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);937int i;938939p->shader = ifs;940941/* Put new constants at end of const buffer, growing downward.942* The problem is we don't know how many user-defined constants might943* be specified with pipe->set_constant_buffer().944* Should pre-scan the user's program to determine the highest-numbered945* constant referenced.946*/947ifs->num_constants = 0;948memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));949950memset(&p->register_phases, 0, sizeof(p->register_phases));951952for (i = 0; i < I915_TEX_UNITS; i++)953ifs->generic_mapping[i] = -1;954955p->log_program_errors = !i915->no_log_program_errors;956957p->first_instruction = true;958959p->nr_tex_indirect = 1; /* correct? */960p->nr_tex_insn = 0;961p->nr_alu_insn = 0;962p->nr_decl_insn = 0;963964p->csr = p->program;965p->decl = p->declarations;966p->decl_s = 0;967p->decl_t = 0;968p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;969p->utemp_flag = ~0x7;970971/* initialize the first program word */972*(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;973974return p;975}976977/* Copy compile results to the fragment program struct and destroy the978* compilation context.979*/980static void981i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)982{983struct i915_fragment_shader *ifs = p->shader;984unsigned long program_size = (unsigned long)(p->csr - p->program);985unsigned long decl_size = (unsigned long)(p->decl - p->declarations);986987if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)988debug_printf("Exceeded max nr indirect texture lookups\n");989990if (p->nr_tex_insn > I915_MAX_TEX_INSN)991i915_program_error(p, "Exceeded max TEX instructions");992993if (p->nr_alu_insn > I915_MAX_ALU_INSN)994i915_program_error(p, "Exceeded max ALU instructions");995996if (p->nr_decl_insn > I915_MAX_DECL_INSN)997i915_program_error(p, "Exceeded max DECL instructions");998999/* hw doesn't seem to like empty frag programs (num_instructions == 1 is just1000* TGSI_END), even when the depth write fixup gets emitted below - maybe that1001* one is fishy, too?1002*/1003if (ifs->info.num_instructions == 1)1004i915_program_error(p, "Empty fragment shader");10051006if (p->error) {1007p->NumNativeInstructions = 0;1008p->NumNativeAluInstructions = 0;1009p->NumNativeTexInstructions = 0;1010p->NumNativeTexIndirections = 0;10111012i915_use_passthrough_shader(ifs);1013} else {1014p->NumNativeInstructions =1015p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;1016p->NumNativeAluInstructions = p->nr_alu_insn;1017p->NumNativeTexInstructions = p->nr_tex_insn;1018p->NumNativeTexIndirections = p->nr_tex_indirect;10191020/* patch in the program length */1021p->declarations[0] |= program_size + decl_size - 2;10221023/* Copy compilation results to fragment program struct:1024*/1025assert(!ifs->program);10261027ifs->program_len = decl_size + program_size;1028ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t));1029memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t));1030memcpy(&ifs->program[decl_size], p->program,1031program_size * sizeof(uint32_t));1032}10331034/* Release the compilation struct:1035*/1036FREE(p);1037}10381039/**1040* Rather than trying to intercept and jiggle depth writes during1041* emit, just move the value into its correct position at the end of1042* the program:1043*/1044static void1045i915_fixup_depth_write(struct i915_fp_compile *p)1046{1047for (int i = 0; i < p->shader->info.num_outputs; i++) {1048if (p->shader->info.output_semantic_name[i] != TGSI_SEMANTIC_POSITION)1049continue;10501051const uint32_t depth = UREG(REG_TYPE_OD, 0);10521053i915_emit_arith(p, A0_MOV, /* opcode */1054depth, /* dest reg */1055A0_DEST_CHANNEL_W, /* write mask */10560, /* saturate? */1057swizzle(depth, X, Y, Z, Z), /* src0 */10580, 0 /* src1, src2 */);1059}1060}10611062void1063i915_translate_fragment_program(struct i915_context *i915,1064struct i915_fragment_shader *fs)1065{1066struct i915_fp_compile *p;1067const struct tgsi_token *tokens = fs->state.tokens;1068struct i915_token_list *i_tokens;10691070if (I915_DBG_ON(DBG_FS)) {1071mesa_logi("TGSI fragment shader:");1072tgsi_dump(tokens, 0);1073}10741075p = i915_init_compile(i915, fs);10761077i_tokens = i915_optimize(tokens);1078i915_translate_instructions(p, i_tokens, fs);1079i915_fixup_depth_write(p);10801081i915_fini_compile(i915, p);1082i915_optimize_free(i_tokens);10831084if (I915_DBG_ON(DBG_FS)) {1085mesa_logi("i915 fragment shader with %d constants%s", fs->num_constants,1086fs->num_constants ? ":" : "");10871088for (int i = 0; i < I915_MAX_CONSTANT; i++) {1089if (fs->constant_flags[i] &&1090fs->constant_flags[i] != I915_CONSTFLAG_USER) {1091mesa_logi("\t\tC[%d] = { %f, %f, %f, %f }", i, fs->constants[i][0],1092fs->constants[i][1], fs->constants[i][2],1093fs->constants[i][3]);1094}1095}1096i915_disassemble_program(fs->program, fs->program_len);1097}1098}109911001101