Path: blob/21.2-virgl/src/gallium/frontends/d3d10umd/ShaderTGSI.c
4565 views
/**************************************************************************1*2* Copyright 2012-2021 VMware, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the7* "Software"), to deal in the Software without restriction, including8* without limitation the rights to use, copy, modify, merge, publish,9* distribute, sub license, and/or sell copies of the Software, and to10* permit persons to whom the Software is furnished to do so, subject to11* the following conditions:12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL16* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,17* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR18* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE19* USE OR OTHER DEALINGS IN THE SOFTWARE.20*21* The above copyright notice and this permission notice (including the22* next paragraph) shall be included in all copies or substantial portions23* of the Software.24*25**************************************************************************/2627/*28* ShaderTGSI.c --29* Functions for translating shaders.30*/3132#include "Debug.h"33#include "ShaderParse.h"3435#include "pipe/p_state.h"36#include "tgsi/tgsi_ureg.h"37#include "tgsi/tgsi_dump.h"38#include "util/u_memory.h"3940#include "ShaderDump.h"414243enum dx10_opcode_format {44OF_FLOAT,45OF_INT,46OF_UINT47};4849struct dx10_opcode_xlate {50D3D10_SB_OPCODE_TYPE type;51enum dx10_opcode_format format;52uint tgsi_opcode;53};5455/* Opcodes that we have not even attempted to implement:56*/57#define TGSI_LOG_UNSUPPORTED TGSI_OPCODE_LAST5859/* Opcodes which do not translate directly to a TGSI opcode, but which60* have at least a partial implemention coded below:61*/62#define TGSI_EXPAND (TGSI_OPCODE_LAST+1)6364static struct dx10_opcode_xlate opcode_xlate[D3D10_SB_NUM_OPCODES] = {65{D3D10_SB_OPCODE_ADD, OF_FLOAT, TGSI_OPCODE_ADD},66{D3D10_SB_OPCODE_AND, OF_UINT, TGSI_OPCODE_AND},67{D3D10_SB_OPCODE_BREAK, OF_FLOAT, TGSI_OPCODE_BRK},68{D3D10_SB_OPCODE_BREAKC, OF_UINT, TGSI_EXPAND},69{D3D10_SB_OPCODE_CALL, OF_UINT, TGSI_EXPAND},70{D3D10_SB_OPCODE_CALLC, OF_UINT, TGSI_EXPAND},71{D3D10_SB_OPCODE_CASE, OF_UINT, TGSI_OPCODE_CASE},72{D3D10_SB_OPCODE_CONTINUE, OF_FLOAT, TGSI_OPCODE_CONT},73{D3D10_SB_OPCODE_CONTINUEC, OF_UINT, TGSI_EXPAND},74{D3D10_SB_OPCODE_CUT, OF_FLOAT, TGSI_EXPAND},75{D3D10_SB_OPCODE_DEFAULT, OF_FLOAT, TGSI_OPCODE_DEFAULT},76{D3D10_SB_OPCODE_DERIV_RTX, OF_FLOAT, TGSI_OPCODE_DDX},77{D3D10_SB_OPCODE_DERIV_RTY, OF_FLOAT, TGSI_OPCODE_DDY},78{D3D10_SB_OPCODE_DISCARD, OF_UINT, TGSI_EXPAND},79{D3D10_SB_OPCODE_DIV, OF_FLOAT, TGSI_OPCODE_DIV},80{D3D10_SB_OPCODE_DP2, OF_FLOAT, TGSI_OPCODE_DP2},81{D3D10_SB_OPCODE_DP3, OF_FLOAT, TGSI_OPCODE_DP3},82{D3D10_SB_OPCODE_DP4, OF_FLOAT, TGSI_OPCODE_DP4},83{D3D10_SB_OPCODE_ELSE, OF_FLOAT, TGSI_OPCODE_ELSE},84{D3D10_SB_OPCODE_EMIT, OF_FLOAT, TGSI_EXPAND},85{D3D10_SB_OPCODE_EMITTHENCUT, OF_FLOAT, TGSI_EXPAND},86{D3D10_SB_OPCODE_ENDIF, OF_FLOAT, TGSI_OPCODE_ENDIF},87{D3D10_SB_OPCODE_ENDLOOP, OF_FLOAT, TGSI_OPCODE_ENDLOOP},88{D3D10_SB_OPCODE_ENDSWITCH, OF_FLOAT, TGSI_OPCODE_ENDSWITCH},89{D3D10_SB_OPCODE_EQ, OF_FLOAT, TGSI_OPCODE_FSEQ},90{D3D10_SB_OPCODE_EXP, OF_FLOAT, TGSI_EXPAND},91{D3D10_SB_OPCODE_FRC, OF_FLOAT, TGSI_OPCODE_FRC},92{D3D10_SB_OPCODE_FTOI, OF_FLOAT, TGSI_EXPAND},93{D3D10_SB_OPCODE_FTOU, OF_FLOAT, TGSI_EXPAND},94{D3D10_SB_OPCODE_GE, OF_FLOAT, TGSI_OPCODE_FSGE},95{D3D10_SB_OPCODE_IADD, OF_INT, TGSI_OPCODE_UADD},96{D3D10_SB_OPCODE_IF, OF_UINT, TGSI_EXPAND},97{D3D10_SB_OPCODE_IEQ, OF_INT, TGSI_OPCODE_USEQ},98{D3D10_SB_OPCODE_IGE, OF_INT, TGSI_OPCODE_ISGE},99{D3D10_SB_OPCODE_ILT, OF_INT, TGSI_OPCODE_ISLT},100{D3D10_SB_OPCODE_IMAD, OF_INT, TGSI_OPCODE_UMAD},101{D3D10_SB_OPCODE_IMAX, OF_INT, TGSI_OPCODE_IMAX},102{D3D10_SB_OPCODE_IMIN, OF_INT, TGSI_OPCODE_IMIN},103{D3D10_SB_OPCODE_IMUL, OF_INT, TGSI_EXPAND},104{D3D10_SB_OPCODE_INE, OF_INT, TGSI_OPCODE_USNE},105{D3D10_SB_OPCODE_INEG, OF_INT, TGSI_OPCODE_INEG},106{D3D10_SB_OPCODE_ISHL, OF_INT, TGSI_OPCODE_SHL},107{D3D10_SB_OPCODE_ISHR, OF_INT, TGSI_OPCODE_ISHR},108{D3D10_SB_OPCODE_ITOF, OF_INT, TGSI_OPCODE_I2F},109{D3D10_SB_OPCODE_LABEL, OF_INT, TGSI_EXPAND},110{D3D10_SB_OPCODE_LD, OF_UINT, TGSI_EXPAND},111{D3D10_SB_OPCODE_LD_MS, OF_UINT, TGSI_EXPAND},112{D3D10_SB_OPCODE_LOG, OF_FLOAT, TGSI_EXPAND},113{D3D10_SB_OPCODE_LOOP, OF_FLOAT, TGSI_OPCODE_BGNLOOP},114{D3D10_SB_OPCODE_LT, OF_FLOAT, TGSI_OPCODE_FSLT},115{D3D10_SB_OPCODE_MAD, OF_FLOAT, TGSI_OPCODE_MAD},116{D3D10_SB_OPCODE_MIN, OF_FLOAT, TGSI_OPCODE_MIN},117{D3D10_SB_OPCODE_MAX, OF_FLOAT, TGSI_OPCODE_MAX},118{D3D10_SB_OPCODE_CUSTOMDATA, OF_FLOAT, TGSI_EXPAND},119{D3D10_SB_OPCODE_MOV, OF_UINT, TGSI_OPCODE_MOV},120{D3D10_SB_OPCODE_MOVC, OF_UINT, TGSI_OPCODE_UCMP},121{D3D10_SB_OPCODE_MUL, OF_FLOAT, TGSI_OPCODE_MUL},122{D3D10_SB_OPCODE_NE, OF_FLOAT, TGSI_OPCODE_FSNE},123{D3D10_SB_OPCODE_NOP, OF_FLOAT, TGSI_OPCODE_NOP},124{D3D10_SB_OPCODE_NOT, OF_UINT, TGSI_OPCODE_NOT},125{D3D10_SB_OPCODE_OR, OF_UINT, TGSI_OPCODE_OR},126{D3D10_SB_OPCODE_RESINFO, OF_UINT, TGSI_EXPAND},127{D3D10_SB_OPCODE_RET, OF_FLOAT, TGSI_OPCODE_RET},128{D3D10_SB_OPCODE_RETC, OF_UINT, TGSI_EXPAND},129{D3D10_SB_OPCODE_ROUND_NE, OF_FLOAT, TGSI_OPCODE_ROUND},130{D3D10_SB_OPCODE_ROUND_NI, OF_FLOAT, TGSI_OPCODE_FLR},131{D3D10_SB_OPCODE_ROUND_PI, OF_FLOAT, TGSI_OPCODE_CEIL},132{D3D10_SB_OPCODE_ROUND_Z, OF_FLOAT, TGSI_OPCODE_TRUNC},133{D3D10_SB_OPCODE_RSQ, OF_FLOAT, TGSI_EXPAND},134{D3D10_SB_OPCODE_SAMPLE, OF_FLOAT, TGSI_EXPAND},135{D3D10_SB_OPCODE_SAMPLE_C, OF_FLOAT, TGSI_EXPAND},136{D3D10_SB_OPCODE_SAMPLE_C_LZ, OF_FLOAT, TGSI_EXPAND},137{D3D10_SB_OPCODE_SAMPLE_L, OF_FLOAT, TGSI_EXPAND},138{D3D10_SB_OPCODE_SAMPLE_D, OF_FLOAT, TGSI_EXPAND},139{D3D10_SB_OPCODE_SAMPLE_B, OF_FLOAT, TGSI_EXPAND},140{D3D10_SB_OPCODE_SQRT, OF_FLOAT, TGSI_EXPAND},141{D3D10_SB_OPCODE_SWITCH, OF_UINT, TGSI_OPCODE_SWITCH},142{D3D10_SB_OPCODE_SINCOS, OF_FLOAT, TGSI_EXPAND},143{D3D10_SB_OPCODE_UDIV, OF_UINT, TGSI_EXPAND},144{D3D10_SB_OPCODE_ULT, OF_UINT, TGSI_OPCODE_USLT},145{D3D10_SB_OPCODE_UGE, OF_UINT, TGSI_OPCODE_USGE},146{D3D10_SB_OPCODE_UMUL, OF_UINT, TGSI_EXPAND},147{D3D10_SB_OPCODE_UMAD, OF_UINT, TGSI_OPCODE_UMAD},148{D3D10_SB_OPCODE_UMAX, OF_UINT, TGSI_OPCODE_UMAX},149{D3D10_SB_OPCODE_UMIN, OF_UINT, TGSI_OPCODE_UMIN},150{D3D10_SB_OPCODE_USHR, OF_UINT, TGSI_OPCODE_USHR},151{D3D10_SB_OPCODE_UTOF, OF_UINT, TGSI_OPCODE_U2F},152{D3D10_SB_OPCODE_XOR, OF_UINT, TGSI_OPCODE_XOR},153{D3D10_SB_OPCODE_DCL_RESOURCE, OF_FLOAT, TGSI_EXPAND},154{D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER, OF_FLOAT, TGSI_EXPAND},155{D3D10_SB_OPCODE_DCL_SAMPLER, OF_FLOAT, TGSI_EXPAND},156{D3D10_SB_OPCODE_DCL_INDEX_RANGE, OF_FLOAT, TGSI_LOG_UNSUPPORTED},157{D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, OF_FLOAT, TGSI_EXPAND},158{D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE, OF_FLOAT, TGSI_EXPAND},159{D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, OF_FLOAT, TGSI_EXPAND},160{D3D10_SB_OPCODE_DCL_INPUT, OF_FLOAT, TGSI_EXPAND},161{D3D10_SB_OPCODE_DCL_INPUT_SGV, OF_FLOAT, TGSI_EXPAND},162{D3D10_SB_OPCODE_DCL_INPUT_SIV, OF_FLOAT, TGSI_EXPAND},163{D3D10_SB_OPCODE_DCL_INPUT_PS, OF_FLOAT, TGSI_EXPAND},164{D3D10_SB_OPCODE_DCL_INPUT_PS_SGV, OF_FLOAT, TGSI_EXPAND},165{D3D10_SB_OPCODE_DCL_INPUT_PS_SIV, OF_FLOAT, TGSI_EXPAND},166{D3D10_SB_OPCODE_DCL_OUTPUT, OF_FLOAT, TGSI_EXPAND},167{D3D10_SB_OPCODE_DCL_OUTPUT_SGV, OF_FLOAT, TGSI_EXPAND},168{D3D10_SB_OPCODE_DCL_OUTPUT_SIV, OF_FLOAT, TGSI_EXPAND},169{D3D10_SB_OPCODE_DCL_TEMPS, OF_FLOAT, TGSI_EXPAND},170{D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP, OF_FLOAT, TGSI_EXPAND},171{D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS, OF_FLOAT, TGSI_LOG_UNSUPPORTED},172{D3D10_SB_OPCODE_RESERVED0, OF_FLOAT, TGSI_LOG_UNSUPPORTED},173{D3D10_1_SB_OPCODE_LOD, OF_FLOAT, TGSI_LOG_UNSUPPORTED},174{D3D10_1_SB_OPCODE_GATHER4, OF_FLOAT, TGSI_LOG_UNSUPPORTED},175{D3D10_1_SB_OPCODE_SAMPLE_POS, OF_FLOAT, TGSI_LOG_UNSUPPORTED},176{D3D10_1_SB_OPCODE_SAMPLE_INFO, OF_FLOAT, TGSI_LOG_UNSUPPORTED}177};178179#define SHADER_MAX_TEMPS 4096180#define SHADER_MAX_INPUTS 32181#define SHADER_MAX_OUTPUTS 32182#define SHADER_MAX_CONSTS 4096183#define SHADER_MAX_RESOURCES PIPE_MAX_SHADER_SAMPLER_VIEWS184#define SHADER_MAX_SAMPLERS PIPE_MAX_SAMPLERS185#define SHADER_MAX_INDEXABLE_TEMPS 4096186187struct Shader_call {188unsigned d3d_label;189unsigned tgsi_label_token;190};191192struct Shader_label {193unsigned d3d_label;194unsigned tgsi_insn_no;195};196197struct Shader_resource {198uint target; /* TGSI_TEXTURE_x */199};200201struct Shader_xlate {202struct ureg_program *ureg;203204uint vertices_in;205uint declared_temps;206207struct ureg_dst temps[SHADER_MAX_TEMPS];208struct ureg_dst output_depth;209struct Shader_resource resources[SHADER_MAX_RESOURCES];210struct ureg_src sv[SHADER_MAX_RESOURCES];211struct ureg_src samplers[SHADER_MAX_SAMPLERS];212struct ureg_src imms;213struct ureg_src prim_id;214215uint temp_offset;216uint indexable_temp_offsets[SHADER_MAX_INDEXABLE_TEMPS];217218struct {219boolean declared;220uint writemask;221uint siv_name;222boolean overloaded;223struct ureg_src reg;224} inputs[SHADER_MAX_INPUTS];225226struct {227struct ureg_dst reg[4];228} outputs[SHADER_MAX_OUTPUTS];229230struct {231uint d3d;232uint tgsi;233} clip_distance_mapping[2], cull_distance_mapping[2];234uint num_clip_distances_declared;235uint num_cull_distances_declared;236237struct Shader_call *calls;238uint num_calls;239uint max_calls;240struct Shader_label *labels;241uint num_labels;242uint max_labels;243};244245static uint246translate_interpolation(D3D10_SB_INTERPOLATION_MODE interpolation)247{248switch (interpolation) {249case D3D10_SB_INTERPOLATION_UNDEFINED:250assert(0);251return TGSI_INTERPOLATE_LINEAR;252253case D3D10_SB_INTERPOLATION_CONSTANT:254return TGSI_INTERPOLATE_CONSTANT;255case D3D10_SB_INTERPOLATION_LINEAR:256return TGSI_INTERPOLATE_PERSPECTIVE;257case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE:258return TGSI_INTERPOLATE_LINEAR;259260case D3D10_SB_INTERPOLATION_LINEAR_CENTROID:261case D3D10_SB_INTERPOLATION_LINEAR_SAMPLE: // DX10.1262LOG_UNSUPPORTED(TRUE);263return TGSI_INTERPOLATE_PERSPECTIVE;264265case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID:266case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: // DX10.1267LOG_UNSUPPORTED(TRUE);268return TGSI_INTERPOLATE_LINEAR;269}270271assert(0);272return TGSI_INTERPOLATE_LINEAR;273}274275static uint276translate_system_name(D3D10_SB_NAME name)277{278switch (name) {279case D3D10_SB_NAME_UNDEFINED:280assert(0); /* should not happen */281return TGSI_SEMANTIC_GENERIC;282case D3D10_SB_NAME_POSITION:283return TGSI_SEMANTIC_POSITION;284case D3D10_SB_NAME_CLIP_DISTANCE:285case D3D10_SB_NAME_CULL_DISTANCE:286return TGSI_SEMANTIC_CLIPDIST;287case D3D10_SB_NAME_PRIMITIVE_ID:288return TGSI_SEMANTIC_PRIMID;289case D3D10_SB_NAME_INSTANCE_ID:290return TGSI_SEMANTIC_INSTANCEID;291case D3D10_SB_NAME_VERTEX_ID:292return TGSI_SEMANTIC_VERTEXID_NOBASE;293case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:294return TGSI_SEMANTIC_VIEWPORT_INDEX;295case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:296return TGSI_SEMANTIC_LAYER;297case D3D10_SB_NAME_IS_FRONT_FACE:298return TGSI_SEMANTIC_FACE;299case D3D10_SB_NAME_SAMPLE_INDEX:300LOG_UNSUPPORTED(TRUE);301return TGSI_SEMANTIC_GENERIC;302}303304assert(0);305return TGSI_SEMANTIC_GENERIC;306}307308static uint309translate_semantic_index(struct Shader_xlate *sx,310D3D10_SB_NAME name,311const struct Shader_dst_operand *operand)312{313unsigned idx;314switch (name) {315case D3D10_SB_NAME_CLIP_DISTANCE:316case D3D10_SB_NAME_CULL_DISTANCE:317if (sx->clip_distance_mapping[0].d3d == operand->base.index[0].imm) {318idx = sx->clip_distance_mapping[0].tgsi;319} else {320assert(sx->clip_distance_mapping[1].d3d == operand->base.index[0].imm);321idx = sx->clip_distance_mapping[1].tgsi;322}323break;324/* case D3D10_SB_NAME_CULL_DISTANCE:325if (sx->cull_distance_mapping[0].d3d == operand->base.index[0].imm) {326idx = sx->cull_distance_mapping[0].tgsi;327} else {328assert(sx->cull_distance_mapping[1].d3d == operand->base.index[0].imm);329idx = sx->cull_distance_mapping[1].tgsi;330}331break;*/332default:333idx = 0;334}335return idx;336}337338static enum tgsi_return_type339trans_dcl_ret_type(D3D10_SB_RESOURCE_RETURN_TYPE d3drettype) {340switch (d3drettype) {341case D3D10_SB_RETURN_TYPE_UNORM:342return TGSI_RETURN_TYPE_UNORM;343case D3D10_SB_RETURN_TYPE_SNORM:344return TGSI_RETURN_TYPE_SNORM;345case D3D10_SB_RETURN_TYPE_SINT:346return TGSI_RETURN_TYPE_SINT;347case D3D10_SB_RETURN_TYPE_UINT:348return TGSI_RETURN_TYPE_UINT;349case D3D10_SB_RETURN_TYPE_FLOAT:350return TGSI_RETURN_TYPE_FLOAT;351case D3D10_SB_RETURN_TYPE_MIXED:352default:353LOG_UNSUPPORTED(TRUE);354return TGSI_RETURN_TYPE_FLOAT;355}356}357358static void359declare_vertices_in(struct Shader_xlate *sx,360unsigned in)361{362/* Make sure vertices_in is consistent with input primitive363* and other input declarations.364*/365if (sx->vertices_in) {366assert(sx->vertices_in == in);367} else {368sx->vertices_in = in;369}370}371372struct swizzle_mapping {373unsigned x;374unsigned y;375unsigned z;376unsigned w;377};378379/* mapping of writmask to swizzles */380static const struct swizzle_mapping writemask_to_swizzle[] = {381{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_NONE382{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_X383{ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_Y384{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_XY385{ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_Z386{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_XZ387{ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_YZ388{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_XYZ389{ TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_W390{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XW391{ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YW392{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYW393{ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_ZW394{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XZW395{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YZW396{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYZW397};398399static struct ureg_src400swizzle_reg(struct ureg_src src, uint writemask,401unsigned siv_name)402{403switch (siv_name) {404case D3D10_SB_NAME_PRIMITIVE_ID:405case D3D10_SB_NAME_INSTANCE_ID:406case D3D10_SB_NAME_VERTEX_ID:407case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:408case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:409case D3D10_SB_NAME_IS_FRONT_FACE:410return ureg_scalar(src, TGSI_SWIZZLE_X);411default: {412const struct swizzle_mapping *swizzle =413&writemask_to_swizzle[writemask];414return ureg_swizzle(src, swizzle->x, swizzle->y,415swizzle->z, swizzle->w);416}417}418}419420static void421dcl_base_output(struct Shader_xlate *sx,422struct ureg_program *ureg,423struct ureg_dst reg,424const struct Shader_dst_operand *operand)425{426unsigned writemask =427operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;428unsigned idx = operand->base.index[0].imm;429unsigned i;430431if (!writemask) {432sx->outputs[idx].reg[0] = reg;433sx->outputs[idx].reg[1] = reg;434sx->outputs[idx].reg[2] = reg;435sx->outputs[idx].reg[3] = reg;436return;437}438439for (i = 0; i < 4; ++i) {440unsigned mask = 1 << i;441if ((writemask & mask)) {442sx->outputs[idx].reg[i] = reg;443}444}445}446447static void448dcl_base_input(struct Shader_xlate *sx,449struct ureg_program *ureg,450const struct Shader_dst_operand *operand,451struct ureg_src dcl_reg,452uint index,453uint siv_name)454{455unsigned writemask =456operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;457458if (sx->inputs[index].declared && !sx->inputs[index].overloaded) {459struct ureg_dst temp = ureg_DECL_temporary(sx->ureg);460461ureg_MOV(ureg,462ureg_writemask(temp, sx->inputs[index].writemask),463swizzle_reg(sx->inputs[index].reg, sx->inputs[index].writemask,464sx->inputs[index].siv_name));465ureg_MOV(ureg, ureg_writemask(temp, writemask),466swizzle_reg(dcl_reg, writemask, siv_name));467sx->inputs[index].reg = ureg_src(temp);468sx->inputs[index].overloaded = TRUE;469sx->inputs[index].writemask |= writemask;470} else if (sx->inputs[index].overloaded) {471struct ureg_dst temp = ureg_dst(sx->inputs[index].reg);472ureg_MOV(ureg, ureg_writemask(temp, writemask),473swizzle_reg(dcl_reg, writemask, siv_name));474sx->inputs[index].writemask |= writemask;475} else {476assert(!sx->inputs[index].declared);477478sx->inputs[index].reg = dcl_reg;479sx->inputs[index].declared = TRUE;480sx->inputs[index].writemask = writemask;481sx->inputs[index].siv_name = siv_name;482}483}484485static void486dcl_vs_input(struct Shader_xlate *sx,487struct ureg_program *ureg,488const struct Shader_dst_operand *dst)489{490struct ureg_src reg;491assert(dst->base.index_dim == 1);492assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);493494reg = ureg_DECL_vs_input(ureg, dst->base.index[0].imm);495496dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,497D3D10_SB_NAME_UNDEFINED);498}499500static void501dcl_gs_input(struct Shader_xlate *sx,502struct ureg_program *ureg,503const struct Shader_dst_operand *dst)504{505if (dst->base.index_dim == 2) {506assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);507508declare_vertices_in(sx, dst->base.index[0].imm);509510/* XXX: Implement declaration masks in gallium.511*/512if (!sx->inputs[dst->base.index[1].imm].reg.File) {513struct ureg_src reg =514ureg_DECL_input(ureg,515TGSI_SEMANTIC_GENERIC,516dst->base.index[1].imm,5170, 1);518dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,519D3D10_SB_NAME_UNDEFINED);520}521} else {522assert(dst->base.type == D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID);523assert(dst->base.index_dim == 0);524525sx->prim_id = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_PRIMID, 0);526}527}528529static void530dcl_sgv_input(struct Shader_xlate *sx,531struct ureg_program *ureg,532const struct Shader_dst_operand *dst,533uint dcl_siv_name)534{535struct ureg_src reg;536assert(dst->base.index_dim == 1);537assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);538539reg = ureg_DECL_system_value(ureg, translate_system_name(dcl_siv_name), 0);540541dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,542dcl_siv_name);543}544545static void546dcl_siv_input(struct Shader_xlate *sx,547struct ureg_program *ureg,548const struct Shader_dst_operand *dst,549uint dcl_siv_name)550{551struct ureg_src reg;552assert(dst->base.index_dim == 2);553assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);554555declare_vertices_in(sx, dst->base.index[0].imm);556557reg = ureg_DECL_input(ureg,558translate_system_name(dcl_siv_name), 0,5590, 1);560561dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,562dcl_siv_name);563}564565static void566dcl_ps_input(struct Shader_xlate *sx,567struct ureg_program *ureg,568const struct Shader_dst_operand *dst,569uint dcl_in_ps_interp)570{571struct ureg_src reg;572assert(dst->base.index_dim == 1);573assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);574575reg = ureg_DECL_fs_input(ureg,576TGSI_SEMANTIC_GENERIC,577dst->base.index[0].imm,578translate_interpolation(dcl_in_ps_interp));579580dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,581D3D10_SB_NAME_UNDEFINED);582}583584static void585dcl_ps_sgv_input(struct Shader_xlate *sx,586struct ureg_program *ureg,587const struct Shader_dst_operand *dst,588uint dcl_siv_name)589{590struct ureg_src reg;591assert(dst->base.index_dim == 1);592assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);593594if (dcl_siv_name == D3D10_SB_NAME_POSITION) {595ureg_property(ureg,596TGSI_PROPERTY_FS_COORD_ORIGIN,597TGSI_FS_COORD_ORIGIN_UPPER_LEFT);598ureg_property(ureg,599TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,600TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);601}602603reg = ureg_DECL_fs_input(ureg,604translate_system_name(dcl_siv_name),6050,606TGSI_INTERPOLATE_CONSTANT);607608if (dcl_siv_name == D3D10_SB_NAME_IS_FRONT_FACE) {609/* We need to map gallium's front_face to the one expected610* by D3D10 */611struct ureg_dst tmp = ureg_DECL_temporary(ureg);612613tmp = ureg_writemask(tmp, TGSI_WRITEMASK_X);614615ureg_CMP(ureg, tmp, reg,616ureg_imm1i(ureg, 0), ureg_imm1i(ureg, -1));617618reg = ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X);619}620621dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,622dcl_siv_name);623}624625static void626dcl_ps_siv_input(struct Shader_xlate *sx,627struct ureg_program *ureg,628const struct Shader_dst_operand *dst,629uint dcl_siv_name, uint dcl_in_ps_interp)630{631struct ureg_src reg;632assert(dst->base.index_dim == 1);633assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);634635reg = ureg_DECL_fs_input(ureg,636translate_system_name(dcl_siv_name),6370,638translate_interpolation(dcl_in_ps_interp));639640if (dcl_siv_name == D3D10_SB_NAME_POSITION) {641/* D3D10 expects reciprocal of interpolated 1/w as 4th component,642* gallium/GL just interpolated 1/w */643struct ureg_dst tmp = ureg_DECL_temporary(ureg);644645ureg_MOV(ureg, tmp, reg);646ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W),647ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W));648reg = ureg_src(tmp);649}650651dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,652dcl_siv_name);653}654655static struct ureg_src656translate_relative_operand(struct Shader_xlate *sx,657const struct Shader_relative_operand *operand)658{659struct ureg_src reg;660661switch (operand->type) {662case D3D10_SB_OPERAND_TYPE_TEMP:663assert(operand->index[0].imm < SHADER_MAX_TEMPS);664665reg = ureg_src(sx->temps[sx->temp_offset + operand->index[0].imm]);666break;667668case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:669reg = sx->prim_id;670break;671672case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:673assert(operand->index[1].imm < SHADER_MAX_TEMPS);674675reg = ureg_src(sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +676operand->index[1].imm]);677break;678679case D3D10_SB_OPERAND_TYPE_INPUT:680case D3D10_SB_OPERAND_TYPE_OUTPUT:681case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:682case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:683case D3D10_SB_OPERAND_TYPE_SAMPLER:684case D3D10_SB_OPERAND_TYPE_RESOURCE:685case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:686case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:687case D3D10_SB_OPERAND_TYPE_LABEL:688case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:689case D3D10_SB_OPERAND_TYPE_NULL:690case D3D10_SB_OPERAND_TYPE_RASTERIZER:691case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:692LOG_UNSUPPORTED(TRUE);693reg = ureg_src(ureg_DECL_temporary(sx->ureg));694break;695696default:697assert(0); /* should never happen */698reg = ureg_src(ureg_DECL_temporary(sx->ureg));699}700701reg = ureg_scalar(reg, operand->comp);702return reg;703}704705static struct ureg_dst706translate_operand(struct Shader_xlate *sx,707const struct Shader_operand *operand,708unsigned writemask)709{710struct ureg_dst reg;711712switch (operand->type) {713case D3D10_SB_OPERAND_TYPE_TEMP:714assert(operand->index_dim == 1);715assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);716assert(operand->index[0].imm < SHADER_MAX_TEMPS);717718reg = sx->temps[sx->temp_offset + operand->index[0].imm];719break;720721case D3D10_SB_OPERAND_TYPE_OUTPUT:722assert(operand->index_dim == 1);723assert(operand->index[0].imm < SHADER_MAX_OUTPUTS);724725if (operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32) {726if (!writemask) {727reg = sx->outputs[operand->index[0].imm].reg[0];728} else {729unsigned i;730for (i = 0; i < 4; ++i) {731unsigned mask = 1 << i;732if ((writemask & mask)) {733reg = sx->outputs[operand->index[0].imm].reg[i];734break;735}736}737}738} else {739struct ureg_src addr =740translate_relative_operand(sx, &operand->index[0].rel);741assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);742reg = ureg_dst_indirect(sx->outputs[operand->index[0].imm].reg[0], addr);743}744break;745746case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:747assert(operand->index_dim == 0);748749reg = sx->output_depth;750break;751752case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:753assert(operand->index_dim == 0);754755reg = ureg_dst(sx->prim_id);756break;757758case D3D10_SB_OPERAND_TYPE_INPUT:759case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:760case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:761case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:762case D3D10_SB_OPERAND_TYPE_SAMPLER:763case D3D10_SB_OPERAND_TYPE_RESOURCE:764case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:765case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:766case D3D10_SB_OPERAND_TYPE_LABEL:767case D3D10_SB_OPERAND_TYPE_NULL:768case D3D10_SB_OPERAND_TYPE_RASTERIZER:769case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:770/* XXX: Translate more operands types.771*/772LOG_UNSUPPORTED(TRUE);773reg = ureg_DECL_temporary(sx->ureg);774}775776return reg;777}778779static struct ureg_src780translate_indexable_temp(struct Shader_xlate *sx,781const struct Shader_operand *operand)782{783struct ureg_src reg;784switch (operand->index[1].index_rep) {785case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:786reg = ureg_src(787sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +788operand->index[1].imm]);789break;790case D3D10_SB_OPERAND_INDEX_RELATIVE:791reg = ureg_src_indirect(792ureg_src(sx->temps[793sx->indexable_temp_offsets[operand->index[0].imm]]),794translate_relative_operand(sx,795&operand->index[1].rel));796break;797case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:798reg = ureg_src_indirect(799ureg_src(sx->temps[800operand->index[1].imm +801sx->indexable_temp_offsets[operand->index[0].imm]]),802translate_relative_operand(sx,803&operand->index[1].rel));804break;805default:806/* XXX: Other index representations.807*/808LOG_UNSUPPORTED(TRUE);809reg = ureg_src(ureg_DECL_temporary(sx->ureg));810}811return reg;812}813814static struct ureg_dst815translate_dst_operand(struct Shader_xlate *sx,816const struct Shader_dst_operand *operand,817boolean saturate)818{819struct ureg_dst reg;820unsigned writemask =821operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;822823assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT) == 4);824assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_X >> 4) == TGSI_WRITEMASK_X);825assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Y >> 4) == TGSI_WRITEMASK_Y);826assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Z >> 4) == TGSI_WRITEMASK_Z);827assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_W >> 4) == TGSI_WRITEMASK_W);828829switch (operand->base.type) {830case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:831assert(operand->base.index_dim == 2);832assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);833assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);834835reg = ureg_dst(translate_indexable_temp(sx, &operand->base));836break;837838default:839reg = translate_operand(sx, &operand->base, writemask);840}841842/* oDepth often has an empty writemask */843if (operand->base.type != D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {844reg = ureg_writemask(reg, writemask);845}846847if (saturate) {848reg = ureg_saturate(reg);849}850851return reg;852}853854static struct ureg_src855translate_src_operand(struct Shader_xlate *sx,856const struct Shader_src_operand *operand,857const enum dx10_opcode_format format)858{859struct ureg_src reg;860861switch (operand->base.type) {862case D3D10_SB_OPERAND_TYPE_INPUT:863if (operand->base.index_dim == 1) {864switch (operand->base.index[0].index_rep) {865case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:866assert(operand->base.index[0].imm < SHADER_MAX_INPUTS);867reg = sx->inputs[operand->base.index[0].imm].reg;868break;869case D3D10_SB_OPERAND_INDEX_RELATIVE: {870struct ureg_src tmp =871translate_relative_operand(sx, &operand->base.index[0].rel);872reg = ureg_src_indirect(sx->inputs[0].reg, tmp);873}874break;875case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {876struct ureg_src tmp =877translate_relative_operand(sx, &operand->base.index[0].rel);878reg = ureg_src_indirect(sx->inputs[operand->base.index[0].imm].reg, tmp);879}880break;881default:882/* XXX: Other index representations.883*/884LOG_UNSUPPORTED(TRUE);885886}887} else {888assert(operand->base.index_dim == 2);889assert(operand->base.index[1].imm < SHADER_MAX_INPUTS);890891switch (operand->base.index[1].index_rep) {892case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:893reg = sx->inputs[operand->base.index[1].imm].reg;894break;895case D3D10_SB_OPERAND_INDEX_RELATIVE: {896struct ureg_src tmp =897translate_relative_operand(sx, &operand->base.index[1].rel);898reg = ureg_src_indirect(sx->inputs[0].reg, tmp);899}900break;901case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {902struct ureg_src tmp =903translate_relative_operand(sx, &operand->base.index[1].rel);904reg = ureg_src_indirect(sx->inputs[operand->base.index[1].imm].reg, tmp);905}906break;907default:908/* XXX: Other index representations.909*/910LOG_UNSUPPORTED(TRUE);911}912913switch (operand->base.index[0].index_rep) {914case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:915reg = ureg_src_dimension(reg, operand->base.index[0].imm);916break;917case D3D10_SB_OPERAND_INDEX_RELATIVE:{918struct ureg_src tmp =919translate_relative_operand(sx, &operand->base.index[0].rel);920reg = ureg_src_dimension_indirect(reg, tmp, 0);921}922break;923case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {924struct ureg_src tmp =925translate_relative_operand(sx, &operand->base.index[0].rel);926reg = ureg_src_dimension_indirect(reg, tmp, operand->base.index[0].imm);927}928break;929default:930/* XXX: Other index representations.931*/932LOG_UNSUPPORTED(TRUE);933}934}935break;936937case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:938assert(operand->base.index_dim == 2);939assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);940assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);941942reg = translate_indexable_temp(sx, &operand->base);943break;944945case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:946switch (format) {947case OF_FLOAT:948reg = ureg_imm4f(sx->ureg,949operand->imm[0].f32,950operand->imm[1].f32,951operand->imm[2].f32,952operand->imm[3].f32);953break;954case OF_INT:955reg = ureg_imm4i(sx->ureg,956operand->imm[0].i32,957operand->imm[1].i32,958operand->imm[2].i32,959operand->imm[3].i32);960break;961case OF_UINT:962reg = ureg_imm4u(sx->ureg,963operand->imm[0].u32,964operand->imm[1].u32,965operand->imm[2].u32,966operand->imm[3].u32);967break;968default:969assert(0);970reg = ureg_src(ureg_DECL_temporary(sx->ureg));971}972break;973974case D3D10_SB_OPERAND_TYPE_SAMPLER:975assert(operand->base.index_dim == 1);976assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);977assert(operand->base.index[0].imm < SHADER_MAX_SAMPLERS);978979reg = sx->samplers[operand->base.index[0].imm];980break;981982case D3D10_SB_OPERAND_TYPE_RESOURCE:983assert(operand->base.index_dim == 1);984assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);985assert(operand->base.index[0].imm < SHADER_MAX_RESOURCES);986987reg = sx->sv[operand->base.index[0].imm];988break;989990case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:991assert(operand->base.index_dim == 2);992993assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);994assert(operand->base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);995996switch (operand->base.index[1].index_rep) {997case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:998assert(operand->base.index[1].imm < SHADER_MAX_CONSTS);9991000reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);1001reg = ureg_src_dimension(reg, operand->base.index[0].imm);1002break;1003case D3D10_SB_OPERAND_INDEX_RELATIVE:1004case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:1005reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);1006reg = ureg_src_indirect(1007reg,1008translate_relative_operand(sx, &operand->base.index[1].rel));1009reg = ureg_src_dimension(reg, operand->base.index[0].imm);1010break;1011default:1012/* XXX: Other index representations.1013*/1014LOG_UNSUPPORTED(TRUE);1015}10161017break;10181019case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:1020assert(operand->base.index_dim == 1);10211022switch (operand->base.index[0].index_rep) {1023case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:1024reg = sx->imms;1025reg.Index += operand->base.index[0].imm;1026break;1027case D3D10_SB_OPERAND_INDEX_RELATIVE:1028case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:1029reg = sx->imms;1030reg.Index += operand->base.index[0].imm;1031reg = ureg_src_indirect(1032sx->imms,1033translate_relative_operand(sx, &operand->base.index[0].rel));1034break;1035default:1036/* XXX: Other index representations.1037*/1038LOG_UNSUPPORTED(TRUE);1039}1040break;10411042case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:1043reg = sx->prim_id;1044break;10451046default:1047reg = ureg_src(translate_operand(sx, &operand->base, 0));1048}10491050reg = ureg_swizzle(reg,1051operand->swizzle[0],1052operand->swizzle[1],1053operand->swizzle[2],1054operand->swizzle[3]);10551056switch (operand->modifier) {1057case D3D10_SB_OPERAND_MODIFIER_NONE:1058break;1059case D3D10_SB_OPERAND_MODIFIER_NEG:1060reg = ureg_negate(reg);1061break;1062case D3D10_SB_OPERAND_MODIFIER_ABS:1063reg = ureg_abs(reg);1064break;1065case D3D10_SB_OPERAND_MODIFIER_ABSNEG:1066reg = ureg_negate(ureg_abs(reg));1067break;1068default:1069assert(0);1070}10711072return reg;1073}10741075static uint1076translate_resource_dimension(D3D10_SB_RESOURCE_DIMENSION dim)1077{1078switch (dim) {1079case D3D10_SB_RESOURCE_DIMENSION_UNKNOWN:1080return TGSI_TEXTURE_UNKNOWN;1081case D3D10_SB_RESOURCE_DIMENSION_BUFFER:1082return TGSI_TEXTURE_BUFFER;1083case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D:1084return TGSI_TEXTURE_1D;1085case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D:1086return TGSI_TEXTURE_2D;1087case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS:1088return TGSI_TEXTURE_2D_MSAA;1089case D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D:1090return TGSI_TEXTURE_3D;1091case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE:1092return TGSI_TEXTURE_CUBE;1093case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY:1094return TGSI_TEXTURE_1D_ARRAY;1095case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY:1096return TGSI_TEXTURE_2D_ARRAY;1097case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY:1098return TGSI_TEXTURE_2D_ARRAY_MSAA;1099case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY:1100return TGSI_TEXTURE_CUBE_ARRAY;1101default:1102assert(0);1103return TGSI_TEXTURE_UNKNOWN;1104}1105}11061107static uint1108texture_dim_from_tgsi_target(unsigned tgsi_target)1109{1110switch (tgsi_target) {1111case TGSI_TEXTURE_BUFFER:1112case TGSI_TEXTURE_1D:1113case TGSI_TEXTURE_1D_ARRAY:1114return 1;1115case TGSI_TEXTURE_2D:1116case TGSI_TEXTURE_2D_MSAA:1117case TGSI_TEXTURE_CUBE:1118case TGSI_TEXTURE_2D_ARRAY:1119case TGSI_TEXTURE_2D_ARRAY_MSAA:1120return 2;1121case TGSI_TEXTURE_3D:1122return 3;1123case TGSI_TEXTURE_UNKNOWN:1124default:1125assert(0);1126return 1;1127}1128}11291130static boolean1131operand_is_scalar(const struct Shader_src_operand *operand)1132{1133return operand->swizzle[0] == operand->swizzle[1] &&1134operand->swizzle[1] == operand->swizzle[2] &&1135operand->swizzle[2] == operand->swizzle[3];1136}11371138static void1139Shader_add_call(struct Shader_xlate *sx,1140unsigned d3d_label,1141unsigned tgsi_label_token)1142{1143ASSERT(sx->num_calls < sx->max_calls);11441145sx->calls[sx->num_calls].d3d_label = d3d_label;1146sx->calls[sx->num_calls].tgsi_label_token = tgsi_label_token;1147sx->num_calls++;1148}11491150static void1151Shader_add_label(struct Shader_xlate *sx,1152unsigned d3d_label,1153unsigned tgsi_insn_no)1154{1155ASSERT(sx->num_labels < sx->max_labels);11561157sx->labels[sx->num_labels].d3d_label = d3d_label;1158sx->labels[sx->num_labels].tgsi_insn_no = tgsi_insn_no;1159sx->num_labels++;1160}116111621163static void1164sample_ureg_emit(struct ureg_program *ureg,1165unsigned tgsi_opcode,1166unsigned num_src,1167struct Shader_opcode *opcode,1168struct ureg_dst dst,1169struct ureg_src *src)1170{1171unsigned num_offsets = 0;1172struct tgsi_texture_offset texoffsets;11731174memset(&texoffsets, 0, sizeof texoffsets);11751176if (opcode->imm_texel_offset.u ||1177opcode->imm_texel_offset.v ||1178opcode->imm_texel_offset.w) {1179struct ureg_src offsetreg;1180num_offsets = 1;1181/* don't actually always need all 3 values */1182offsetreg = ureg_imm3i(ureg,1183opcode->imm_texel_offset.u,1184opcode->imm_texel_offset.v,1185opcode->imm_texel_offset.w);1186texoffsets.File = offsetreg.File;1187texoffsets.Index = offsetreg.Index;1188texoffsets.SwizzleX = offsetreg.SwizzleX;1189texoffsets.SwizzleY = offsetreg.SwizzleY;1190texoffsets.SwizzleZ = offsetreg.SwizzleZ;1191}11921193ureg_tex_insn(ureg,1194tgsi_opcode,1195&dst, 1,1196TGSI_TEXTURE_UNKNOWN,1197TGSI_RETURN_TYPE_UNKNOWN,1198&texoffsets, num_offsets,1199src, num_src);1200}12011202typedef void (*unary_ureg_func)(struct ureg_program *ureg, struct ureg_dst dst,1203struct ureg_src src);1204static void1205expand_unary_to_scalarf(struct ureg_program *ureg, unary_ureg_func func,1206struct Shader_xlate *sx, struct Shader_opcode *opcode)1207{1208struct ureg_dst tmp = ureg_DECL_temporary(ureg);1209struct ureg_dst dst = translate_dst_operand(sx, &opcode->dst[0],1210opcode->saturate);1211struct ureg_src src = translate_src_operand(sx, &opcode->src[0], OF_FLOAT);1212struct ureg_dst scalar_dst;1213ureg_MOV(ureg, tmp, src);1214src = ureg_src(tmp);12151216scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_X);1217if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {1218func(ureg, scalar_dst,1219ureg_scalar(src, TGSI_SWIZZLE_X));1220}1221scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Y);1222if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {1223func(ureg, scalar_dst,1224ureg_scalar(src, TGSI_SWIZZLE_Y));1225}1226scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Z);1227if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {1228func(ureg, scalar_dst,1229ureg_scalar(src, TGSI_SWIZZLE_Z));1230}1231scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_W);1232if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {1233func(ureg, scalar_dst,1234ureg_scalar(src, TGSI_SWIZZLE_W));1235}1236ureg_release_temporary(ureg, tmp);1237}12381239const struct tgsi_token *1240Shader_tgsi_translate(const unsigned *code,1241unsigned *output_mapping)1242{1243struct Shader_xlate sx;1244struct Shader_parser parser;1245struct ureg_program *ureg = NULL;1246struct Shader_opcode opcode;1247const struct tgsi_token *tokens = NULL;1248uint nr_tokens;1249boolean shader_dumped = FALSE;1250boolean inside_sub = FALSE;1251uint i, j;12521253memset(&sx, 0, sizeof sx);12541255Shader_parse_init(&parser, code);12561257if (st_debug & ST_DEBUG_TGSI) {1258dx10_shader_dump_tokens(code);1259shader_dumped = TRUE;1260}12611262sx.max_calls = 64;1263sx.calls = (struct Shader_call *)MALLOC(sx.max_calls *1264sizeof(struct Shader_call));1265sx.num_calls = 0;12661267sx.max_labels = 64;1268sx.labels = (struct Shader_label *)MALLOC(sx.max_labels *1269sizeof(struct Shader_call));1270sx.num_labels = 0;1271127212731274/* Header. */1275switch (parser.header.type) {1276case D3D10_SB_PIXEL_SHADER:1277ureg = ureg_create(PIPE_SHADER_FRAGMENT);1278break;1279case D3D10_SB_VERTEX_SHADER:1280ureg = ureg_create(PIPE_SHADER_VERTEX);1281break;1282case D3D10_SB_GEOMETRY_SHADER:1283ureg = ureg_create(PIPE_SHADER_GEOMETRY);1284break;1285}12861287assert(ureg);1288sx.ureg = ureg;12891290while (Shader_parse_opcode(&parser, &opcode)) {1291const struct dx10_opcode_xlate *ox;12921293assert(opcode.type < D3D10_SB_NUM_OPCODES);1294ox = &opcode_xlate[opcode.type];12951296switch (opcode.type) {1297case D3D10_SB_OPCODE_EXP:1298expand_unary_to_scalarf(ureg, ureg_EX2, &sx, &opcode);1299break;1300case D3D10_SB_OPCODE_SQRT:1301expand_unary_to_scalarf(ureg, ureg_SQRT, &sx, &opcode);1302break;1303case D3D10_SB_OPCODE_RSQ:1304expand_unary_to_scalarf(ureg, ureg_RSQ, &sx, &opcode);1305break;1306case D3D10_SB_OPCODE_LOG:1307expand_unary_to_scalarf(ureg, ureg_LG2, &sx, &opcode);1308break;1309case D3D10_SB_OPCODE_IMUL:1310if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {1311ureg_IMUL_HI(ureg,1312translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),1313translate_src_operand(&sx, &opcode.src[0], OF_INT),1314translate_src_operand(&sx, &opcode.src[1], OF_INT));1315}13161317if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {1318ureg_UMUL(ureg,1319translate_dst_operand(&sx, &opcode.dst[1], opcode.saturate),1320translate_src_operand(&sx, &opcode.src[0], OF_INT),1321translate_src_operand(&sx, &opcode.src[1], OF_INT));1322}13231324break;13251326case D3D10_SB_OPCODE_FTOI: {1327/* XXX: tgsi (and just about everybody else, c, opencl, glsl) has1328* out-of-range (and NaN) values undefined for f2i/f2u, but d3d101329* requires clamping to min and max representable value (as well as 01330* for NaNs) (this applies to both ftoi and ftou). At least the online1331* docs state that - this is consistent with generic d3d10 conversion1332* rules.1333* For FTOI, we cheat a bit here - in particular depending on noone1334* caring about NaNs, and depending on the (undefined!) behavior of1335* F2I returning 0x80000000 for too negative values (which works with1336* x86 sse). Hence only need to clamp too positive values.1337* Note that it is impossible to clamp using a float, since 2^31 - 11338* is not exactly representable with a float.1339*/1340struct ureg_dst too_large = ureg_DECL_temporary(ureg);1341struct ureg_dst tmp = ureg_DECL_temporary(ureg);1342ureg_FSGE(ureg, too_large,1343translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),1344ureg_imm1f(ureg, 2147483648.0f));1345ureg_F2I(ureg, tmp,1346translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));1347ureg_UCMP(ureg,1348translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),1349ureg_src(too_large),1350ureg_imm1i(ureg, 0x7fffffff),1351ureg_src(tmp));1352ureg_release_temporary(ureg, too_large);1353ureg_release_temporary(ureg, tmp);1354}1355break;13561357case D3D10_SB_OPCODE_FTOU: {1358/* For ftou, we need to do both clamps, which as a bonus also1359* gets us correct NaN behavior.1360* Note that it is impossible to clamp using a float against the upper1361* limit, since 2^32 - 1 is not exactly representable with a float,1362* but the clamp against 0.0 certainly works just fine.1363*/1364struct ureg_dst too_large = ureg_DECL_temporary(ureg);1365struct ureg_dst tmp = ureg_DECL_temporary(ureg);1366ureg_FSGE(ureg, too_large,1367translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),1368ureg_imm1f(ureg, 4294967296.0f));1369/* clamp negative values + NaN to zero.1370* (Could be done slightly more efficient in llvmpipe due to1371* MAX NaN behavior handling.)1372*/1373ureg_MAX(ureg, tmp,1374ureg_imm1f(ureg, 0.0f),1375translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));1376ureg_F2U(ureg, tmp,1377ureg_src(tmp));1378ureg_UCMP(ureg,1379translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),1380ureg_src(too_large),1381ureg_imm1u(ureg, 0xffffffff),1382ureg_src(tmp));1383ureg_release_temporary(ureg, too_large);1384ureg_release_temporary(ureg, tmp);1385}1386break;13871388case D3D10_SB_OPCODE_LD_MS:1389/* XXX: We don't support multi-sampling yet, but we need to parse1390* this opcode regardless, so we just ignore sample index operand1391* for now */1392case D3D10_SB_OPCODE_LD:1393if (st_debug & ST_DEBUG_OLD_TEX_OPS) {1394unsigned resource = opcode.src[1].base.index[0].imm;1395assert(opcode.src[1].base.index_dim == 1);1396assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);13971398if (ureg_src_is_undef(sx.samplers[resource])) {1399sx.samplers[resource] =1400ureg_DECL_sampler(ureg, resource);1401}14021403ureg_TXF(ureg,1404translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),1405sx.resources[resource].target,1406translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),1407sx.samplers[resource]);1408}1409else {1410struct ureg_src srcreg[2];1411srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_INT);1412srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_INT);14131414sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_I, 2, &opcode,1415translate_dst_operand(&sx, &opcode.dst[0],1416opcode.saturate),1417srcreg);1418}1419break;14201421case D3D10_SB_OPCODE_CUSTOMDATA:1422if (opcode.customdata._class ==1423D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER) {1424sx.imms =1425ureg_DECL_immediate_block_uint(ureg,1426opcode.customdata.u.constbuf.data,1427opcode.customdata.u.constbuf.count);1428} else {1429assert(0);1430}1431break;14321433case D3D10_SB_OPCODE_RESINFO:1434if (st_debug & ST_DEBUG_OLD_TEX_OPS) {1435unsigned resource = opcode.src[1].base.index[0].imm;1436assert(opcode.src[1].base.index_dim == 1);1437assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);14381439if (ureg_src_is_undef(sx.samplers[resource])) {1440sx.samplers[resource] =1441ureg_DECL_sampler(ureg, resource);1442}1443/* don't bother with swizzle, ret type etc. */1444ureg_TXQ(ureg,1445translate_dst_operand(&sx, &opcode.dst[0],1446opcode.saturate),1447sx.resources[resource].target,1448translate_src_operand(&sx, &opcode.src[0], OF_UINT),1449sx.samplers[resource]);1450}1451else {1452struct ureg_dst r0 = ureg_DECL_temporary(ureg);1453struct ureg_src tsrc = translate_src_operand(&sx, &opcode.src[1], OF_UINT);1454struct ureg_dst dstreg = translate_dst_operand(&sx, &opcode.dst[0],1455opcode.saturate);14561457/* while specs say swizzle is ignored better safe than sorry */1458tsrc.SwizzleX = TGSI_SWIZZLE_X;1459tsrc.SwizzleY = TGSI_SWIZZLE_Y;1460tsrc.SwizzleZ = TGSI_SWIZZLE_Z;1461tsrc.SwizzleW = TGSI_SWIZZLE_W;14621463ureg_SVIEWINFO(ureg, r0,1464translate_src_operand(&sx, &opcode.src[0], OF_UINT),1465tsrc);14661467tsrc = ureg_src(r0);1468tsrc.SwizzleX = opcode.src[1].swizzle[0];1469tsrc.SwizzleY = opcode.src[1].swizzle[1];1470tsrc.SwizzleZ = opcode.src[1].swizzle[2];1471tsrc.SwizzleW = opcode.src[1].swizzle[3];14721473if (opcode.specific.resinfo_ret_type ==1474D3D10_SB_RESINFO_INSTRUCTION_RETURN_UINT) {1475ureg_MOV(ureg, dstreg, tsrc);1476}1477else if (opcode.specific.resinfo_ret_type ==1478D3D10_SB_RESINFO_INSTRUCTION_RETURN_FLOAT) {1479ureg_I2F(ureg, dstreg, tsrc);1480}1481else { /* D3D10_SB_RESINFO_INSTRUCTION_RETURN_RCPFLOAT */1482unsigned i;1483/*1484* Must apply rcp only to parts determined by dims,1485* (width/height/depth) but NOT to array size nor mip levels1486* hence need to figure that out here.1487* This is one sick modifier if you ask me!1488*/1489unsigned res_index = opcode.src[1].base.index[0].imm;1490unsigned target = sx.resources[res_index].target;1491unsigned dims = texture_dim_from_tgsi_target(target);14921493ureg_I2F(ureg, r0, ureg_src(r0));1494tsrc = ureg_src(r0);1495for (i = 0; i < 4; i++) {1496unsigned dst_swizzle = opcode.src[1].swizzle[i];1497struct ureg_dst dstregmasked = ureg_writemask(dstreg, 1 << i);1498/*1499* could do one mov with multiple write mask bits set1500* but rcp is scalar anyway.1501*/1502if (dst_swizzle < dims) {1503ureg_RCP(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));1504}1505else {1506ureg_MOV(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));1507}1508}1509}1510ureg_release_temporary(ureg, r0);1511}1512break;15131514case D3D10_SB_OPCODE_SAMPLE:1515if (st_debug & ST_DEBUG_OLD_TEX_OPS) {1516assert(opcode.src[1].base.index_dim == 1);1517assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);15181519LOG_UNSUPPORTED(opcode.src[1].base.index[0].imm != opcode.src[2].base.index[0].imm);15201521ureg_TEX(ureg,1522translate_dst_operand(&sx, &opcode.dst[0],1523opcode.saturate),1524sx.resources[opcode.src[1].base.index[0].imm].target,1525translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),1526translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));1527}1528else {1529struct ureg_src srcreg[3];1530srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);1531srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);1532srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);15331534sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE, 3, &opcode,1535translate_dst_operand(&sx, &opcode.dst[0],1536opcode.saturate),1537srcreg);1538}1539break;15401541case D3D10_SB_OPCODE_SAMPLE_C:1542if (st_debug & ST_DEBUG_OLD_TEX_OPS) {1543struct ureg_dst r0 = ureg_DECL_temporary(ureg);15441545/* XXX: Support only 2D texture targets for now.1546* Need to figure out how to pack the compare value1547* for other dimensions and if there is enough space1548* in a single operand for all possible cases.1549*/1550LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=1551TGSI_TEXTURE_2D);15521553assert(opcode.src[1].base.index_dim == 1);1554assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);15551556/* Insert the compare value into .z component.1557*/1558ureg_MOV(ureg,1559ureg_writemask(r0, TGSI_WRITEMASK_XYW),1560translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));1561ureg_MOV(ureg,1562ureg_writemask(r0, TGSI_WRITEMASK_Z),1563translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));15641565/* XXX: Pass explicit Lod=0 in D3D10_SB_OPCODE_SAMPLE_C_LZ case.1566*/15671568ureg_TEX(ureg,1569translate_dst_operand(&sx, &opcode.dst[0],1570opcode.saturate),1571sx.resources[opcode.src[1].base.index[0].imm].target,1572ureg_src(r0),1573translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));15741575ureg_release_temporary(ureg, r0);1576}1577else {1578struct ureg_src srcreg[4];1579srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);1580srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);1581srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);1582srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);15831584sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C, 4, &opcode,1585translate_dst_operand(&sx, &opcode.dst[0],1586opcode.saturate),1587srcreg);1588}1589break;15901591case D3D10_SB_OPCODE_SAMPLE_C_LZ:1592if (st_debug & ST_DEBUG_OLD_TEX_OPS) {1593struct ureg_dst r0 = ureg_DECL_temporary(ureg);15941595assert(opcode.src[1].base.index_dim == 1);1596assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);15971598/* XXX: Support only 2D texture targets for now.1599* Need to figure out how to pack the compare value1600* for other dimensions and if there is enough space1601* in a single operand for all possible cases.1602*/1603LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=1604TGSI_TEXTURE_2D);16051606/* Insert the compare value into .z component.1607* Insert 0 into .w component.1608*/1609ureg_MOV(ureg,1610ureg_writemask(r0, TGSI_WRITEMASK_XY),1611translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));1612ureg_MOV(ureg,1613ureg_writemask(r0, TGSI_WRITEMASK_Z),1614translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));1615ureg_MOV(ureg,1616ureg_writemask(r0, TGSI_WRITEMASK_W),1617ureg_imm1f(ureg, 0.0f));16181619ureg_TXL(ureg,1620translate_dst_operand(&sx, &opcode.dst[0],1621opcode.saturate),1622sx.resources[opcode.src[1].base.index[0].imm].target,1623ureg_src(r0),1624translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));16251626ureg_release_temporary(ureg, r0);1627}1628else {1629struct ureg_src srcreg[4];1630srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);1631srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);1632srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);1633srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);16341635sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C_LZ, 4, &opcode,1636translate_dst_operand(&sx, &opcode.dst[0],1637opcode.saturate),1638srcreg);1639}1640break;16411642case D3D10_SB_OPCODE_SAMPLE_L:1643if (st_debug & ST_DEBUG_OLD_TEX_OPS) {1644struct ureg_dst r0 = ureg_DECL_temporary(ureg);16451646assert(opcode.src[1].base.index_dim == 1);1647assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);16481649/* Insert LOD into .w component.1650*/1651ureg_MOV(ureg,1652ureg_writemask(r0, TGSI_WRITEMASK_XYZ),1653translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));1654ureg_MOV(ureg,1655ureg_writemask(r0, TGSI_WRITEMASK_W),1656translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));16571658ureg_TXL(ureg,1659translate_dst_operand(&sx, &opcode.dst[0],1660opcode.saturate),1661sx.resources[opcode.src[1].base.index[0].imm].target,1662ureg_src(r0),1663translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));16641665ureg_release_temporary(ureg, r0);1666}1667else {1668struct ureg_src srcreg[4];1669srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);1670srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);1671srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);1672srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);16731674sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_L, 4, &opcode,1675translate_dst_operand(&sx, &opcode.dst[0],1676opcode.saturate),1677srcreg);1678}1679break;16801681case D3D10_SB_OPCODE_SAMPLE_D:1682if (st_debug & ST_DEBUG_OLD_TEX_OPS) {1683assert(opcode.src[1].base.index_dim == 1);1684assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);16851686ureg_TXD(ureg,1687translate_dst_operand(&sx, &opcode.dst[0],1688opcode.saturate),1689sx.resources[opcode.src[1].base.index[0].imm].target,1690translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),1691translate_src_operand(&sx, &opcode.src[3], OF_FLOAT),1692translate_src_operand(&sx, &opcode.src[4], OF_FLOAT),1693translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));1694}1695else {1696struct ureg_src srcreg[5];1697srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);1698srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);1699srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);1700srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);1701srcreg[4] = translate_src_operand(&sx, &opcode.src[4], OF_FLOAT);17021703sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_D, 5, &opcode,1704translate_dst_operand(&sx, &opcode.dst[0],1705opcode.saturate),1706srcreg);1707}1708break;17091710case D3D10_SB_OPCODE_SAMPLE_B:1711if (st_debug & ST_DEBUG_OLD_TEX_OPS) {1712struct ureg_dst r0 = ureg_DECL_temporary(ureg);17131714assert(opcode.src[1].base.index_dim == 1);1715assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);17161717/* Insert LOD bias into .w component.1718*/1719ureg_MOV(ureg,1720ureg_writemask(r0, TGSI_WRITEMASK_XYZ),1721translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));1722ureg_MOV(ureg,1723ureg_writemask(r0, TGSI_WRITEMASK_W),1724translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));17251726ureg_TXB(ureg,1727translate_dst_operand(&sx, &opcode.dst[0],1728opcode.saturate),1729sx.resources[opcode.src[1].base.index[0].imm].target,1730ureg_src(r0),1731translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));17321733ureg_release_temporary(ureg, r0);1734}1735else {1736struct ureg_src srcreg[4];1737srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);1738srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);1739srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);1740srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);17411742sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_B, 4, &opcode,1743translate_dst_operand(&sx, &opcode.dst[0],1744opcode.saturate),1745srcreg);1746}1747break;17481749case D3D10_SB_OPCODE_SINCOS: {1750struct ureg_dst src0 = ureg_DECL_temporary(ureg);1751ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));1752if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {1753struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[0],1754opcode.saturate);1755struct ureg_src src = ureg_src(src0);1756ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),1757ureg_scalar(src, TGSI_SWIZZLE_X));1758ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),1759ureg_scalar(src, TGSI_SWIZZLE_Y));1760ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),1761ureg_scalar(src, TGSI_SWIZZLE_Z));1762ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),1763ureg_scalar(src, TGSI_SWIZZLE_W));1764}1765if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {1766struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[1],1767opcode.saturate);1768struct ureg_src src = ureg_src(src0);1769ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),1770ureg_scalar(src, TGSI_SWIZZLE_X));1771ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),1772ureg_scalar(src, TGSI_SWIZZLE_Y));1773ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),1774ureg_scalar(src, TGSI_SWIZZLE_Z));1775ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),1776ureg_scalar(src, TGSI_SWIZZLE_W));1777}1778ureg_release_temporary(ureg, src0);1779}1780break;17811782case D3D10_SB_OPCODE_UDIV: {1783struct ureg_dst src0 = ureg_DECL_temporary(ureg);1784struct ureg_dst src1 = ureg_DECL_temporary(ureg);1785ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_UINT));1786ureg_MOV(ureg, src1, translate_src_operand(&sx, &opcode.src[1], OF_UINT));1787if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {1788ureg_UDIV(ureg,1789translate_dst_operand(&sx, &opcode.dst[0],1790opcode.saturate),1791ureg_src(src0), ureg_src(src1));1792}1793if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {1794ureg_UMOD(ureg,1795translate_dst_operand(&sx, &opcode.dst[1],1796opcode.saturate),1797ureg_src(src0), ureg_src(src1));1798}1799ureg_release_temporary(ureg, src0);1800ureg_release_temporary(ureg, src1);1801}1802break;1803case D3D10_SB_OPCODE_UMUL: {1804if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {1805ureg_UMUL_HI(ureg,1806translate_dst_operand(&sx, &opcode.dst[0],1807opcode.saturate),1808translate_src_operand(&sx, &opcode.src[0], OF_UINT),1809translate_src_operand(&sx, &opcode.src[1], OF_UINT));1810}1811if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {1812ureg_UMUL(ureg,1813translate_dst_operand(&sx, &opcode.dst[1],1814opcode.saturate),1815translate_src_operand(&sx, &opcode.src[0], OF_UINT),1816translate_src_operand(&sx, &opcode.src[1], OF_UINT));1817}1818}1819break;18201821case D3D10_SB_OPCODE_DCL_RESOURCE:1822{1823unsigned target;1824unsigned res_index = opcode.dst[0].base.index[0].imm;1825assert(opcode.dst[0].base.index_dim == 1);1826assert(res_index < SHADER_MAX_RESOURCES);18271828target = translate_resource_dimension(opcode.specific.dcl_resource_dimension);1829sx.resources[res_index].target = target;1830if (!(st_debug & ST_DEBUG_OLD_TEX_OPS)) {1831sx.sv[res_index] =1832ureg_DECL_sampler_view(ureg, res_index, target,1833trans_dcl_ret_type(opcode.dcl_resource_ret_type[0]),1834trans_dcl_ret_type(opcode.dcl_resource_ret_type[1]),1835trans_dcl_ret_type(opcode.dcl_resource_ret_type[2]),1836trans_dcl_ret_type(opcode.dcl_resource_ret_type[3]));1837}1838break;1839}18401841case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: {1842unsigned num_constants = opcode.src[0].base.index[1].imm;18431844assert(opcode.src[0].base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);18451846if (num_constants == 0) {1847num_constants = SHADER_MAX_CONSTS;1848} else {1849assert(num_constants <= SHADER_MAX_CONSTS);1850}18511852ureg_DECL_constant2D(ureg,18530,1854num_constants - 1,1855opcode.src[0].base.index[0].imm);1856break;1857}18581859case D3D10_SB_OPCODE_DCL_SAMPLER:1860assert(opcode.dst[0].base.index_dim == 1);1861assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_SAMPLERS);18621863sx.samplers[opcode.dst[0].base.index[0].imm] =1864ureg_DECL_sampler(ureg,1865opcode.dst[0].base.index[0].imm);1866break;18671868case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:1869assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);18701871switch (opcode.specific.dcl_gs_output_primitive_topology) {1872case D3D10_SB_PRIMITIVE_TOPOLOGY_POINTLIST:1873ureg_property(sx.ureg,1874TGSI_PROPERTY_GS_OUTPUT_PRIM,1875PIPE_PRIM_POINTS);1876break;18771878case D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP:1879ureg_property(sx.ureg,1880TGSI_PROPERTY_GS_OUTPUT_PRIM,1881PIPE_PRIM_LINE_STRIP);1882break;18831884case D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP:1885ureg_property(sx.ureg,1886TGSI_PROPERTY_GS_OUTPUT_PRIM,1887PIPE_PRIM_TRIANGLE_STRIP);1888break;18891890default:1891assert(0);1892}1893break;18941895case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE:1896assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);18971898/* Figure out the second dimension of GS inputs.1899*/1900switch (opcode.specific.dcl_gs_input_primitive) {1901case D3D10_SB_PRIMITIVE_POINT:1902declare_vertices_in(&sx, 1);1903ureg_property(sx.ureg,1904TGSI_PROPERTY_GS_INPUT_PRIM,1905PIPE_PRIM_POINTS);1906break;19071908case D3D10_SB_PRIMITIVE_LINE:1909declare_vertices_in(&sx, 2);1910ureg_property(sx.ureg,1911TGSI_PROPERTY_GS_INPUT_PRIM,1912PIPE_PRIM_LINES);1913break;19141915case D3D10_SB_PRIMITIVE_TRIANGLE:1916declare_vertices_in(&sx, 3);1917ureg_property(sx.ureg,1918TGSI_PROPERTY_GS_INPUT_PRIM,1919PIPE_PRIM_TRIANGLES);1920break;19211922case D3D10_SB_PRIMITIVE_LINE_ADJ:1923declare_vertices_in(&sx, 4);1924ureg_property(sx.ureg,1925TGSI_PROPERTY_GS_INPUT_PRIM,1926PIPE_PRIM_LINES_ADJACENCY);1927break;19281929case D3D10_SB_PRIMITIVE_TRIANGLE_ADJ:1930declare_vertices_in(&sx, 6);1931ureg_property(sx.ureg,1932TGSI_PROPERTY_GS_INPUT_PRIM,1933PIPE_PRIM_TRIANGLES_ADJACENCY);1934break;19351936default:1937assert(0);1938}1939break;19401941case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:1942assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);19431944ureg_property(sx.ureg,1945TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,1946opcode.specific.dcl_max_output_vertex_count);1947break;19481949case D3D10_SB_OPCODE_DCL_INPUT:1950if (parser.header.type == D3D10_SB_VERTEX_SHADER) {1951dcl_vs_input(&sx, ureg, &opcode.dst[0]);1952} else {1953assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);1954dcl_gs_input(&sx, ureg, &opcode.dst[0]);1955}1956break;19571958case D3D10_SB_OPCODE_DCL_INPUT_SGV:1959assert(parser.header.type == D3D10_SB_VERTEX_SHADER);1960dcl_sgv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);1961break;19621963case D3D10_SB_OPCODE_DCL_INPUT_SIV:1964assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);1965dcl_siv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);1966break;19671968case D3D10_SB_OPCODE_DCL_INPUT_PS:1969assert(parser.header.type == D3D10_SB_PIXEL_SHADER);1970dcl_ps_input(&sx, ureg, &opcode.dst[0],1971opcode.specific.dcl_in_ps_interp);1972break;19731974case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV:1975assert(parser.header.type == D3D10_SB_PIXEL_SHADER);1976dcl_ps_sgv_input(&sx, ureg, &opcode.dst[0],1977opcode.dcl_siv_name);1978break;19791980case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV:1981assert(parser.header.type == D3D10_SB_PIXEL_SHADER);1982dcl_ps_siv_input(&sx, ureg, &opcode.dst[0],1983opcode.dcl_siv_name,1984opcode.specific.dcl_in_ps_interp);1985break;19861987case D3D10_SB_OPCODE_DCL_OUTPUT:1988if (parser.header.type == D3D10_SB_PIXEL_SHADER) {1989/* Pixel shader outputs. */1990if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {1991/* Depth output. */1992assert(opcode.dst[0].base.index_dim == 0);19931994sx.output_depth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z, 0, 1);1995sx.output_depth = ureg_writemask(sx.output_depth, TGSI_WRITEMASK_Z);1996} else {1997/* Color outputs. */1998assert(opcode.dst[0].base.index_dim == 1);1999assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);20002001dcl_base_output(&sx, ureg,2002ureg_DECL_output(ureg,2003TGSI_SEMANTIC_COLOR,2004opcode.dst[0].base.index[0].imm),2005&opcode.dst[0]);2006}2007} else {2008assert(opcode.dst[0].base.index_dim == 1);2009assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);20102011if (output_mapping) {2012unsigned nr_outputs = ureg_get_nr_outputs(ureg);2013output_mapping[nr_outputs]2014= opcode.dst[0].base.index[0].imm;2015}2016dcl_base_output(&sx, ureg,2017ureg_DECL_output(ureg,2018TGSI_SEMANTIC_GENERIC,2019opcode.dst[0].base.index[0].imm),2020&opcode.dst[0]);2021}2022break;20232024case D3D10_SB_OPCODE_DCL_OUTPUT_SIV:2025assert(opcode.dst[0].base.index_dim == 1);2026assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);20272028if (output_mapping) {2029unsigned nr_outputs = ureg_get_nr_outputs(ureg);2030output_mapping[nr_outputs]2031= opcode.dst[0].base.index[0].imm;2032}2033if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE ||2034opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {2035/*2036* FIXME: this is quite broken. gallium no longer has separate2037* clip/cull dists, using (max 2) combined clipdist/culldist regs2038* instead. Unlike d3d10 though, which is clip and which cull is2039* simply determined by by number of clip/cull dists (that is,2040* all clip dists must come first).2041*/2042unsigned numcliporcull = sx.num_clip_distances_declared +2043sx.num_cull_distances_declared;2044sx.clip_distance_mapping[numcliporcull].d3d =2045opcode.dst[0].base.index[0].imm;2046sx.clip_distance_mapping[numcliporcull].tgsi = numcliporcull;2047if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE) {2048++sx.num_clip_distances_declared;2049/* re-emit should be safe... */2050ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,2051sx.num_clip_distances_declared);2052} else {2053++sx.num_cull_distances_declared;2054ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,2055sx.num_cull_distances_declared);2056}2057} else if (0 && opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {2058sx.cull_distance_mapping[sx.num_cull_distances_declared].d3d =2059opcode.dst[0].base.index[0].imm;2060sx.cull_distance_mapping[sx.num_cull_distances_declared].tgsi =2061sx.num_cull_distances_declared;2062++sx.num_cull_distances_declared;2063ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,2064sx.num_cull_distances_declared);2065}20662067dcl_base_output(&sx, ureg,2068ureg_DECL_output_masked(2069ureg,2070translate_system_name(opcode.dcl_siv_name),2071translate_semantic_index(&sx, opcode.dcl_siv_name,2072&opcode.dst[0]),2073opcode.dst[0].mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT,20740, 1),2075&opcode.dst[0]);2076break;20772078case D3D10_SB_OPCODE_DCL_OUTPUT_SGV:2079assert(opcode.dst[0].base.index_dim == 1);2080assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);20812082if (output_mapping) {2083unsigned nr_outputs = ureg_get_nr_outputs(ureg);2084output_mapping[nr_outputs]2085= opcode.dst[0].base.index[0].imm;2086}2087dcl_base_output(&sx, ureg,2088ureg_DECL_output(ureg,2089translate_system_name(opcode.dcl_siv_name),20900),2091&opcode.dst[0]);2092break;20932094case D3D10_SB_OPCODE_DCL_TEMPS:2095{2096uint i;20972098assert(opcode.specific.dcl_num_temps + sx.declared_temps <=2099SHADER_MAX_TEMPS);21002101sx.temp_offset = sx.declared_temps;21022103for (i = 0; i < opcode.specific.dcl_num_temps; i++) {2104sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);2105}2106sx.declared_temps += opcode.specific.dcl_num_temps;2107}2108break;21092110case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP:2111{2112uint i;21132114/* XXX: Add true indexable temps to gallium.2115*/21162117assert(opcode.specific.dcl_indexable_temp.index <2118SHADER_MAX_INDEXABLE_TEMPS);2119assert(opcode.specific.dcl_indexable_temp.count + sx.declared_temps <=2120SHADER_MAX_TEMPS);21212122sx.indexable_temp_offsets[opcode.specific.dcl_indexable_temp.index] =2123sx.declared_temps;21242125for (i = 0; i < opcode.specific.dcl_indexable_temp.count; i++) {2126sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);2127}2128sx.declared_temps += opcode.specific.dcl_indexable_temp.count;2129}2130break;2131case D3D10_SB_OPCODE_IF: {2132unsigned label = 0;2133if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {2134struct ureg_src src =2135translate_src_operand(&sx, &opcode.src[0], OF_INT);2136struct ureg_dst src_nz = ureg_DECL_temporary(ureg);2137ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));2138ureg_UIF(ureg, ureg_src(src_nz), &label);2139ureg_release_temporary(ureg, src_nz);;2140} else {2141ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);2142}2143}2144break;2145case D3D10_SB_OPCODE_RETC:2146case D3D10_SB_OPCODE_CONTINUEC:2147case D3D10_SB_OPCODE_CALLC:2148case D3D10_SB_OPCODE_DISCARD:2149case D3D10_SB_OPCODE_BREAKC:2150{2151unsigned label = 0;2152assert(operand_is_scalar(&opcode.src[0]));2153if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {2154struct ureg_src src =2155translate_src_operand(&sx, &opcode.src[0], OF_INT);2156struct ureg_dst src_nz = ureg_DECL_temporary(ureg);2157ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));2158ureg_UIF(ureg, ureg_src(src_nz), &label);2159ureg_release_temporary(ureg, src_nz);2160}2161else {2162ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);2163}2164switch (opcode.type) {2165case D3D10_SB_OPCODE_RETC:2166ureg_RET(ureg);2167break;2168case D3D10_SB_OPCODE_CONTINUEC:2169ureg_CONT(ureg);2170break;2171case D3D10_SB_OPCODE_CALLC: {2172unsigned label = opcode.src[1].base.index[0].imm;2173unsigned tgsi_token_label = 0;2174ureg_CAL(ureg, &tgsi_token_label);2175Shader_add_call(&sx, label, tgsi_token_label);2176}2177break;2178case D3D10_SB_OPCODE_DISCARD:2179ureg_KILL(ureg);2180break;2181case D3D10_SB_OPCODE_BREAKC:2182ureg_BRK(ureg);2183break;2184default:2185assert(0);2186break;2187}2188ureg_ENDIF(ureg);2189}2190break;2191case D3D10_SB_OPCODE_LABEL: {2192unsigned label = opcode.src[0].base.index[0].imm;2193unsigned tgsi_inst_no = 0;2194if (inside_sub) {2195ureg_ENDSUB(ureg);2196}2197tgsi_inst_no = ureg_get_instruction_number(ureg);2198ureg_BGNSUB(ureg);2199inside_sub = TRUE;2200Shader_add_label(&sx, label, tgsi_inst_no);2201}2202break;2203case D3D10_SB_OPCODE_CALL: {2204unsigned label = opcode.src[0].base.index[0].imm;2205unsigned tgsi_token_label = 0;2206ureg_CAL(ureg, &tgsi_token_label);2207Shader_add_call(&sx, label, tgsi_token_label);2208}2209break;2210case D3D10_SB_OPCODE_EMIT:2211ureg_EMIT(ureg, ureg_imm1u(ureg, 0));2212break;2213case D3D10_SB_OPCODE_CUT:2214ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));2215break;2216case D3D10_SB_OPCODE_EMITTHENCUT:2217ureg_EMIT(ureg, ureg_imm1u(ureg, 0));2218ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));2219break;2220case D3D10_SB_OPCODE_DCL_INDEX_RANGE:2221case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS:2222/* Ignore */2223break;2224default:2225{2226uint i;2227struct ureg_dst dst[SHADER_MAX_DST_OPERANDS];2228struct ureg_src src[SHADER_MAX_SRC_OPERANDS];22292230assert(ox->tgsi_opcode != TGSI_EXPAND);22312232if (ox->tgsi_opcode == TGSI_LOG_UNSUPPORTED) {2233if (!shader_dumped) {2234dx10_shader_dump_tokens(code);2235shader_dumped = TRUE;2236}2237debug_printf("%s: unsupported opcode %i\n",2238__FUNCTION__, ox->type);2239assert(ox->tgsi_opcode != TGSI_LOG_UNSUPPORTED);2240}22412242/* Destination operands. */2243for (i = 0; i < opcode.num_dst; i++) {2244dst[i] = translate_dst_operand(&sx, &opcode.dst[i],2245opcode.saturate);2246}22472248/* Source operands. */2249for (i = 0; i < opcode.num_src; i++) {2250src[i] = translate_src_operand(&sx, &opcode.src[i], ox->format);2251}22522253/* Try to re-route output depth to Z channel. */2254if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {2255LOG_UNSUPPORTED(opcode.type != D3D10_SB_OPCODE_MOV);2256dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_Z);2257src[0] = ureg_scalar(src[0], TGSI_SWIZZLE_X);2258}22592260ureg_insn(ureg,2261ox->tgsi_opcode,2262dst,2263opcode.num_dst,2264src,2265opcode.num_src, 0);2266}2267}22682269Shader_opcode_free(&opcode);2270}22712272if (inside_sub) {2273ureg_ENDSUB(ureg);2274}22752276ureg_END(ureg);22772278for (i = 0; i < sx.num_calls; ++i) {2279for (j = 0; j < sx.num_labels; ++j) {2280if (sx.calls[i].d3d_label == sx.labels[j].d3d_label) {2281ureg_fixup_label(sx.ureg,2282sx.calls[i].tgsi_label_token,2283sx.labels[j].tgsi_insn_no);2284break;2285}2286}2287ASSERT(j < sx.num_labels);2288}2289FREE(sx.labels);2290FREE(sx.calls);22912292tokens = ureg_get_tokens(ureg, &nr_tokens);2293assert(tokens);2294ureg_destroy(ureg);22952296if (st_debug & ST_DEBUG_TGSI) {2297tgsi_dump(tokens, 0);2298}22992300return tokens;2301}230223032304