Path: blob/21.2-virgl/src/gallium/frontends/nine/nine_ff.c
4561 views
1/* FF is big and ugly so feel free to write lines as long as you like.2* Aieeeeeeeee !3*4* Let me make that clearer:5* Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!6*/78#include "device9.h"9#include "basetexture9.h"10#include "vertexdeclaration9.h"11#include "vertexshader9.h"12#include "pixelshader9.h"13#include "nine_ff.h"14#include "nine_defines.h"15#include "nine_helpers.h"16#include "nine_pipe.h"17#include "nine_dump.h"1819#include "pipe/p_context.h"20#include "tgsi/tgsi_ureg.h"21#include "tgsi/tgsi_dump.h"22#include "util/u_box.h"23#include "util/u_hash_table.h"24#include "util/u_upload_mgr.h"2526#define DBG_CHANNEL DBG_FF2728#define NINE_FF_NUM_VS_CONST 19629#define NINE_FF_NUM_PS_CONST 243031struct fvec432{33float x, y, z, w;34};3536struct nine_ff_vs_key37{38union {39struct {40uint32_t position_t : 1;41uint32_t lighting : 1;42uint32_t darkness : 1; /* lighting enabled but no active lights */43uint32_t localviewer : 1;44uint32_t vertexpointsize : 1;45uint32_t pointscale : 1;46uint32_t vertexblend : 3;47uint32_t vertexblend_indexed : 1;48uint32_t vertextween : 1;49uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */50uint32_t mtl_ambient : 2;51uint32_t mtl_specular : 2;52uint32_t mtl_emissive : 2;53uint32_t fog_mode : 2;54uint32_t fog_range : 1;55uint32_t color0in_one : 1;56uint32_t color1in_zero : 1;57uint32_t has_normal : 1;58uint32_t fog : 1;59uint32_t normalizenormals : 1;60uint32_t ucp : 1;61uint32_t pad1 : 4;62uint32_t tc_dim_input: 16; /* 8 * 2 bits */63uint32_t pad2 : 16;64uint32_t tc_dim_output: 24; /* 8 * 3 bits */65uint32_t pad3 : 8;66uint32_t tc_gen : 24; /* 8 * 3 bits */67uint32_t pad4 : 8;68uint32_t tc_idx : 24;69uint32_t pad5 : 8;70uint32_t passthrough;71};72uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */73uint32_t value32[6];74};75};7677/* Texture stage state:78*79* COLOROP D3DTOP 5 bit80* ALPHAOP D3DTOP 5 bit81* COLORARG0 D3DTA 3 bit82* COLORARG1 D3DTA 3 bit83* COLORARG2 D3DTA 3 bit84* ALPHAARG0 D3DTA 3 bit85* ALPHAARG1 D3DTA 3 bit86* ALPHAARG2 D3DTA 3 bit87* RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)88* TEXCOORDINDEX 0 - 7 3 bit89* ===========================90* 32 bit per stage91*/92struct nine_ff_ps_key93{94union {95struct {96struct {97uint32_t colorop : 5;98uint32_t alphaop : 5;99uint32_t colorarg0 : 3;100uint32_t colorarg1 : 3;101uint32_t colorarg2 : 3;102uint32_t alphaarg0 : 3;103uint32_t alphaarg1 : 3;104uint32_t alphaarg2 : 3;105uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */106uint32_t textarget : 2; /* 1D/2D/3D/CUBE */107uint32_t pad : 1;108/* that's 32 bit exactly */109} ts[8];110uint32_t projected : 16;111uint32_t fog : 1; /* for vFog coming from VS */112uint32_t fog_mode : 2;113uint32_t fog_source : 1; /* 0: Z, 1: W */114uint32_t specular : 1;115uint32_t pad1 : 11; /* 9 32-bit words with this */116uint8_t colorarg_b4[3];117uint8_t colorarg_b5[3];118uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */119uint8_t pad2[3];120};121uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */122uint32_t value32[12];123};124};125126static uint32_t nine_ff_vs_key_hash(const void *key)127{128const struct nine_ff_vs_key *vs = key;129unsigned i;130uint32_t hash = vs->value32[0];131for (i = 1; i < ARRAY_SIZE(vs->value32); ++i)132hash ^= vs->value32[i];133return hash;134}135static bool nine_ff_vs_key_comp(const void *key1, const void *key2)136{137struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;138struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;139140return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0;141}142static uint32_t nine_ff_ps_key_hash(const void *key)143{144const struct nine_ff_ps_key *ps = key;145unsigned i;146uint32_t hash = ps->value32[0];147for (i = 1; i < ARRAY_SIZE(ps->value32); ++i)148hash ^= ps->value32[i];149return hash;150}151static bool nine_ff_ps_key_comp(const void *key1, const void *key2)152{153struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;154struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;155156return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0;157}158static uint32_t nine_ff_fvf_key_hash(const void *key)159{160return *(DWORD *)key;161}162static bool nine_ff_fvf_key_comp(const void *key1, const void *key2)163{164return *(DWORD *)key1 == *(DWORD *)key2;165}166167static void nine_ff_prune_vs(struct NineDevice9 *);168static void nine_ff_prune_ps(struct NineDevice9 *);169170static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override)171{172if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) {173const struct tgsi_token *toks = ureg_get_tokens(ureg, NULL);174tgsi_dump(toks, 0);175ureg_free_tokens(toks);176}177}178179#define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)180#define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)181#define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)182#define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)183184#define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)185#define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)186#define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)187#define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)188189#define _XYZW(r) (r)190191/* AL should contain base address of lights table. */192#define LIGHT_CONST(i) \193ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))194195#define MATERIAL_CONST(i) \196ureg_DECL_constant(ureg, 19 + (i))197198#define _CONST(n) ureg_DECL_constant(ureg, n)199200/* VS FF constants layout:201*202* CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION203* CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW204* CONST[ 8..11] D3DTS_PROJECTION205* CONST[12..15] D3DTS_VIEW^(-1)206* CONST[16..18] Normal matrix207*208* CONST[19].xyz MATERIAL.Emissive + Material.Ambient * RS.Ambient209* CONST[20] MATERIAL.Diffuse210* CONST[21] MATERIAL.Ambient211* CONST[22] MATERIAL.Specular212* CONST[23].x___ MATERIAL.Power213* CONST[24] MATERIAL.Emissive214* CONST[25] RS.Ambient215*216* CONST[26].x___ RS.PointSizeMin217* CONST[26]._y__ RS.PointSizeMax218* CONST[26].__z_ RS.PointSize219* CONST[26].___w RS.PointScaleA220* CONST[27].x___ RS.PointScaleB221* CONST[27]._y__ RS.PointScaleC222*223* CONST[28].x___ RS.FogEnd224* CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)225* CONST[28].__z_ RS.FogDensity226227* CONST[30].x___ TWEENFACTOR228*229* CONST[32].x___ LIGHT[0].Type230* CONST[32]._yzw LIGHT[0].Attenuation0,1,2231* CONST[33] LIGHT[0].Diffuse232* CONST[34] LIGHT[0].Specular233* CONST[35] LIGHT[0].Ambient234* CONST[36].xyz_ LIGHT[0].Position235* CONST[36].___w LIGHT[0].Range236* CONST[37].xyz_ LIGHT[0].Direction237* CONST[37].___w LIGHT[0].Falloff238* CONST[38].x___ cos(LIGHT[0].Theta / 2)239* CONST[38]._y__ cos(LIGHT[0].Phi / 2)240* CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))241* CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)242* CONST[39].___w 1 if this is the last active light, 0 if not243* CONST[40] LIGHT[1]244* CONST[48] LIGHT[2]245* CONST[56] LIGHT[3]246* CONST[64] LIGHT[4]247* CONST[72] LIGHT[5]248* CONST[80] LIGHT[6]249* CONST[88] LIGHT[7]250* NOTE: no lighting code is generated if there are no active lights251*252* CONST[100].x___ Viewport 2/width253* CONST[100]._y__ Viewport 2/height254* CONST[100].__z_ Viewport 1/(zmax - zmin)255* CONST[100].___w Viewport width256* CONST[101].x___ Viewport x0257* CONST[101]._y__ Viewport y0258* CONST[101].__z_ Viewport z0259*260* CONST[128..131] D3DTS_TEXTURE0261* CONST[132..135] D3DTS_TEXTURE1262* CONST[136..139] D3DTS_TEXTURE2263* CONST[140..143] D3DTS_TEXTURE3264* CONST[144..147] D3DTS_TEXTURE4265* CONST[148..151] D3DTS_TEXTURE5266* CONST[152..155] D3DTS_TEXTURE6267* CONST[156..159] D3DTS_TEXTURE7268*269* CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW270* CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW271* ...272* CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW273*/274struct vs_build_ctx275{276struct ureg_program *ureg;277const struct nine_ff_vs_key *key;278279uint16_t input[PIPE_MAX_ATTRIBS];280unsigned num_inputs;281282struct ureg_src aVtx;283struct ureg_src aNrm;284struct ureg_src aCol[2];285struct ureg_src aTex[8];286struct ureg_src aPsz;287struct ureg_src aInd;288struct ureg_src aWgt;289290struct ureg_src aVtx1; /* tweening */291struct ureg_src aNrm1;292293struct ureg_src mtlA;294struct ureg_src mtlD;295struct ureg_src mtlS;296struct ureg_src mtlE;297};298299static inline unsigned300get_texcoord_sn(struct pipe_screen *screen)301{302if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))303return TGSI_SEMANTIC_TEXCOORD;304return TGSI_SEMANTIC_GENERIC;305}306307static inline struct ureg_src308build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)309{310const unsigned i = vs->num_inputs++;311assert(i < PIPE_MAX_ATTRIBS);312vs->input[i] = ndecl;313return ureg_DECL_vs_input(vs->ureg, i);314}315316/* NOTE: dst may alias src */317static inline void318ureg_normalize3(struct ureg_program *ureg,319struct ureg_dst dst, struct ureg_src src)320{321struct ureg_dst tmp = ureg_DECL_temporary(ureg);322struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);323324ureg_DP3(ureg, tmp_x, src, src);325ureg_RSQ(ureg, tmp_x, _X(tmp));326ureg_MUL(ureg, dst, src, _X(tmp));327ureg_release_temporary(ureg, tmp);328}329330static void *331nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)332{333const struct nine_ff_vs_key *key = vs->key;334struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX);335struct ureg_dst oPos, oCol[2], oPsz, oFog;336struct ureg_dst AR;337unsigned i, c;338unsigned label[32], l = 0;339boolean need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);340boolean has_aNrm;341boolean need_aVtx = key->lighting || key->fog_mode || key->pointscale || key->ucp;342const unsigned texcoord_sn = get_texcoord_sn(device->screen);343344vs->ureg = ureg;345346/* Check which inputs we should transform. */347for (i = 0; i < 8 * 3; i += 3) {348switch ((key->tc_gen >> i) & 0x7) {349case NINED3DTSS_TCI_CAMERASPACENORMAL:350need_aNrm = TRUE;351break;352case NINED3DTSS_TCI_CAMERASPACEPOSITION:353need_aVtx = TRUE;354break;355case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:356need_aVtx = need_aNrm = TRUE;357break;358case NINED3DTSS_TCI_SPHEREMAP:359need_aVtx = need_aNrm = TRUE;360break;361default:362break;363}364}365366has_aNrm = need_aNrm && key->has_normal;367368/* Declare and record used inputs (needed for linkage with vertex format):369* (texture coordinates handled later)370*/371vs->aVtx = build_vs_add_input(vs,372key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);373374vs->aNrm = ureg_imm1f(ureg, 0.0f);375if (has_aNrm)376vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);377378vs->aCol[0] = ureg_imm1f(ureg, 1.0f);379vs->aCol[1] = ureg_imm1f(ureg, 0.0f);380381if (key->lighting || key->darkness) {382const unsigned mask = key->mtl_diffuse | key->mtl_specular |383key->mtl_ambient | key->mtl_emissive;384if ((mask & 0x1) && !key->color0in_one)385vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));386if ((mask & 0x2) && !key->color1in_zero)387vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));388389vs->mtlD = MATERIAL_CONST(1);390vs->mtlA = MATERIAL_CONST(2);391vs->mtlS = MATERIAL_CONST(3);392vs->mtlE = MATERIAL_CONST(5);393if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else394if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1];395if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else396if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1];397if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else398if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];399if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else400if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];401} else {402if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));403if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));404}405406if (key->vertexpointsize)407vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);408409if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))410vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);411if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))412vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);413if (key->vertextween) {414vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));415vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));416}417418/* Declare outputs:419*/420oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */421oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));422oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));423if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {424oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 16);425oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);426}427428if (key->vertexpointsize || key->pointscale) {429oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,430TGSI_WRITEMASK_X, 0, 1);431oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);432}433434if (key->lighting || key->vertexblend)435AR = ureg_DECL_address(ureg);436437/* === Vertex transformation / vertex blending:438*/439440if (key->position_t) {441if (device->driver_caps.window_space_position_support) {442ureg_MOV(ureg, oPos, vs->aVtx);443} else {444struct ureg_dst tmp = ureg_DECL_temporary(ureg);445/* vs->aVtx contains the coordinates buffer wise.446* later in the pipeline, clipping, viewport and division447* by w (rhw = 1/w) are going to be applied, so do the reverse448* of these transformations (except clipping) to have the good449* position at the end.*/450ureg_MOV(ureg, tmp, vs->aVtx);451/* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */452ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101)));453ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));454ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f));455/* Y needs to be reversed */456ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));457/* inverse rhw */458ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));459/* multiply X, Y, Z by w */460ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));461ureg_MOV(ureg, oPos, ureg_src(tmp));462ureg_release_temporary(ureg, tmp);463}464} else if (key->vertexblend) {465struct ureg_dst tmp = ureg_DECL_temporary(ureg);466struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);467struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);468struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);469struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg);470struct ureg_src cWM[4];471472for (i = 160; i <= 195; ++i)473ureg_DECL_constant(ureg, i);474475/* translate world matrix index to constant file index */476if (key->vertexblend_indexed) {477ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f));478ureg_ARL(ureg, AR, ureg_src(tmp));479}480481ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));482ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));483ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));484485for (i = 0; i < key->vertexblend; ++i) {486for (c = 0; c < 4; ++c) {487cWM[c] = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c), 0);488if (key->vertexblend_indexed)489cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));490}491492/* multiply by WORLD(index) */493ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);494ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));495ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));496ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));497498if (has_aNrm) {499/* Note: the spec says the transpose of the inverse of the500* WorldView matrices should be used, but all tests show501* otherwise.502* Only case unknown: D3DVBF_0WEIGHTS */503ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]);504ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2));505ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2));506}507508if (i < (key->vertexblend - 1)) {509/* accumulate weighted position value */510ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst));511if (has_aNrm)512ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst));513/* subtract weighted position value for last value */514ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i)));515}516}517518/* the last weighted position is always 1 - sum_of_previous_weights */519ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst));520if (has_aNrm)521ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst));522523/* multiply by VIEW_PROJ */524ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8));525ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp));526ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp));527ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp));528529if (need_aVtx)530vs->aVtx = ureg_src(aVtx_dst);531532ureg_release_temporary(ureg, tmp);533ureg_release_temporary(ureg, tmp2);534ureg_release_temporary(ureg, sum_blendweights);535if (!need_aVtx)536ureg_release_temporary(ureg, aVtx_dst);537538if (has_aNrm) {539if (key->normalizenormals)540ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));541vs->aNrm = ureg_src(aNrm_dst);542} else543ureg_release_temporary(ureg, aNrm_dst);544} else {545struct ureg_dst tmp = ureg_DECL_temporary(ureg);546547if (key->vertextween) {548struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);549ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx);550vs->aVtx = ureg_src(aVtx_dst);551if (has_aNrm) {552struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);553ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm);554vs->aNrm = ureg_src(aNrm_dst);555}556}557558/* position = vertex * WORLD_VIEW_PROJ */559ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));560ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));561ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));562ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));563ureg_release_temporary(ureg, tmp);564565if (need_aVtx) {566struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);567ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4));568ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst));569ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst));570ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst));571vs->aVtx = ureg_src(aVtx_dst);572}573if (has_aNrm) {574struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);575ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16));576ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst));577ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst));578if (key->normalizenormals)579ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));580vs->aNrm = ureg_src(aNrm_dst);581}582}583584/* === Process point size:585*/586if (key->vertexpointsize || key->pointscale) {587struct ureg_dst tmp = ureg_DECL_temporary(ureg);588struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);589struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);590struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);591if (key->vertexpointsize) {592struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);593ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1));594ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1));595} else {596struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);597ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1));598}599600if (key->pointscale) {601struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);602struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);603604ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);605ureg_RSQ(ureg, tmp_y, _X(tmp));606ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp));607ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f));608ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));609ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));610ureg_RSQ(ureg, tmp_x, _X(tmp));611ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp));612ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100)));613ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1));614ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1));615}616617ureg_MOV(ureg, oPsz, _Z(tmp));618ureg_release_temporary(ureg, tmp);619}620621for (i = 0; i < 8; ++i) {622struct ureg_dst tmp, tmp_x, tmp2;623struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed;624unsigned c, writemask;625const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;626const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;627unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);628const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;629630/* No texture output of index s */631if (tci == NINED3DTSS_TCI_DISABLE)632continue;633oTex = ureg_DECL_output(ureg, texcoord_sn, i);634tmp = ureg_DECL_temporary(ureg);635tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);636input_coord = ureg_DECL_temporary(ureg);637transformed = ureg_DECL_temporary(ureg);638639/* Get the coordinate */640switch (tci) {641case NINED3DTSS_TCI_PASSTHRU:642/* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *643* Else the idx is used only to determine wrapping mode. */644vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));645ureg_MOV(ureg, input_coord, vs->aTex[idx]);646break;647case NINED3DTSS_TCI_CAMERASPACENORMAL:648ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm);649ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));650dim_input = 4;651break;652case NINED3DTSS_TCI_CAMERASPACEPOSITION:653ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx);654ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));655dim_input = 4;656break;657case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:658tmp.WriteMask = TGSI_WRITEMASK_XYZ;659aVtx_normed = ureg_DECL_temporary(ureg);660ureg_normalize3(ureg, aVtx_normed, vs->aVtx);661ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);662ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));663ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));664ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));665ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));666ureg_release_temporary(ureg, aVtx_normed);667dim_input = 4;668tmp.WriteMask = TGSI_WRITEMASK_XYZW;669break;670case NINED3DTSS_TCI_SPHEREMAP:671/* Implement the formula of GL_SPHERE_MAP */672tmp.WriteMask = TGSI_WRITEMASK_XYZ;673aVtx_normed = ureg_DECL_temporary(ureg);674tmp2 = ureg_DECL_temporary(ureg);675ureg_normalize3(ureg, aVtx_normed, vs->aVtx);676ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);677ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));678ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));679ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));680/* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */681ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp));682ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2));683ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2));684ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2));685ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f));686/* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2)687* TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */688ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2));689ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f));690ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));691ureg_release_temporary(ureg, aVtx_normed);692ureg_release_temporary(ureg, tmp2);693dim_input = 4;694tmp.WriteMask = TGSI_WRITEMASK_XYZW;695break;696default:697assert(0);698break;699}700701/* Apply the transformation */702/* dim_output == 0 => do not transform the components.703* XYZRHW also disables transformation */704if (!dim_output || key->position_t) {705ureg_release_temporary(ureg, transformed);706transformed = input_coord;707writemask = TGSI_WRITEMASK_XYZW;708} else {709for (c = 0; c < dim_output; c++) {710t = ureg_writemask(transformed, 1 << c);711switch (dim_input) {712/* dim_input = 1 2 3: -> we add trailing 1 to input*/713case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));714break;715case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));716ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));717break;718case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));719ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));720break;721case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;722default:723assert(0);724}725}726writemask = (1 << dim_output) - 1;727ureg_release_temporary(ureg, input_coord);728}729730ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));731ureg_release_temporary(ureg, transformed);732ureg_release_temporary(ureg, tmp);733}734735/* === Lighting:736*737* DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.738* POINT: Finite distance to scene, divergent rays, isotropic, attenuation.739* SPOT: Finite distance, divergent rays, angular dependence, attenuation.740*741* vec3 normal = normalize(in.Normal * NormalMatrix);742* vec3 hitDir = light.direction;743* float atten = 1.0;744*745* if (light.type != DIRECTIONAL)746* {747* vec3 hitVec = light.position - eyeVertex;748* float d = length(hitVec);749* hitDir = hitVec / d;750* atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);751* }752*753* if (light.type == SPOTLIGHT)754* {755* float rho = dp3(-hitVec, light.direction);756* if (rho < cos(light.phi / 2))757* atten = 0;758* if (rho < cos(light.theta / 2))759* atten *= pow(some_func(rho), light.falloff);760* }761*762* float nDotHit = dp3_sat(normal, hitVec);763* float powFact = 0.0;764*765* if (nDotHit > 0.0)766* {767* vec3 midVec = normalize(hitDir + eye);768* float nDotMid = dp3_sat(normal, midVec);769* pFact = pow(nDotMid, material.power);770* }771*772* ambient += light.ambient * atten;773* diffuse += light.diffuse * atten * nDotHit;774* specular += light.specular * atten * powFact;775*/776if (key->lighting) {777struct ureg_dst tmp = ureg_DECL_temporary(ureg);778struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);779struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);780struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);781struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);782struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);783struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);784785struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);786787struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);788789/* Light.*.Alpha is not used. */790struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);791struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);792struct ureg_dst rS = ureg_DECL_temporary(ureg);793794struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));795796struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));797struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));798struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));799struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));800struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));801struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));802struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));803struct ureg_src cLPos = _XYZW(LIGHT_CONST(4));804struct ureg_src cLRng = _WWWW(LIGHT_CONST(4));805struct ureg_src cLDir = _XYZW(LIGHT_CONST(5));806struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));807struct ureg_src cLTht = _XXXX(LIGHT_CONST(6));808struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6));809struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));810struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));811812const unsigned loop_label = l++;813814/* Declare all light constants to allow indirect adressing */815for (i = 32; i < 96; i++)816ureg_DECL_constant(ureg, i);817818ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */819ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));820ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));821ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));822823/* loop management */824ureg_BGNLOOP(ureg, &label[loop_label]);825ureg_ARL(ureg, AL, _W(rCtr));826827/* if (not DIRECTIONAL light): */828ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));829ureg_MOV(ureg, rHit, ureg_negate(cLDir));830ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));831ureg_IF(ureg, _X(tmp), &label[l++]);832{833/* hitDir = light.position - eyeVtx834* d = length(hitDir)835*/836ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx));837ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));838ureg_RSQ(ureg, tmp_y, _X(tmp));839ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */840841/* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */842ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);843ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);844ureg_RCP(ureg, rAtt, _W(rAtt));845/* cut-off if distance exceeds Light.Range */846ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);847ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));848}849ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));850ureg_ENDIF(ureg);851852/* normalize hitDir */853ureg_normalize3(ureg, rHit, ureg_src(rHit));854855/* if (SPOT light) */856ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));857ureg_IF(ureg, _X(tmp), &label[l++]);858{859/* rho = dp3(-hitDir, light.spotDir)860*861* if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi862* spotAtt = 1863* else864* if (rho <= light.cphi2)865* spotAtt = 0866* else867* spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff868*/869ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */870ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi));871ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);872ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */873ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */874ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */875ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));876ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));877}878ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));879ureg_ENDIF(ureg);880881/* directional factors, let's not use LIT because of clarity */882883if (has_aNrm) {884if (key->localviewer) {885ureg_normalize3(ureg, rMid, vs->aVtx);886ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));887} else {888ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f));889}890ureg_normalize3(ureg, rMid, ureg_src(rMid));891ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit));892ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));893ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp));894/* Tests show that specular is computed only if (dp3(normal,hitDir) > 0).895* For front facing, it is more restrictive than test (dp3(normal,mid) > 0).896* No tests were made for backfacing, so add the two conditions */897ureg_IF(ureg, _Z(tmp), &label[l++]);898{899ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));900ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);901ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */902ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */903}904ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));905ureg_ENDIF(ureg);906907ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */908ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */909}910911ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */912913/* break if this was the last light */914ureg_IF(ureg, cLLast, &label[l++]);915ureg_BRK(ureg);916ureg_ENDIF(ureg);917ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));918919ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));920ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));921ureg_ENDLOOP(ureg, &label[loop_label]);922923/* Apply to material:924*925* oCol[0] = (material.emissive + material.ambient * rs.ambient) +926* material.ambient * ambient +927* material.diffuse * diffuse +928* oCol[1] = material.specular * specular;929*/930if (key->mtl_emissive == 0 && key->mtl_ambient == 0)931ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), vs->mtlA, _CONST(19));932else {933ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));934ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);935}936937ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), ureg_src(rD), vs->mtlD, ureg_src(tmp));938ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);939ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);940ureg_release_temporary(ureg, rAtt);941ureg_release_temporary(ureg, rHit);942ureg_release_temporary(ureg, rMid);943ureg_release_temporary(ureg, rCtr);944ureg_release_temporary(ureg, rD);945ureg_release_temporary(ureg, rA);946ureg_release_temporary(ureg, rS);947ureg_release_temporary(ureg, rAtt);948ureg_release_temporary(ureg, tmp);949} else950/* COLOR */951if (key->darkness) {952if (key->mtl_emissive == 0 && key->mtl_ambient == 0)953ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _CONST(19));954else955ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);956ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);957ureg_MOV(ureg, oCol[1], ureg_imm1f(ureg, 0.0f));958} else {959ureg_MOV(ureg, oCol[0], vs->aCol[0]);960ureg_MOV(ureg, oCol[1], vs->aCol[1]);961}962963/* === Process fog.964*965* exp(x) = ex2(log2(e) * x)966*/967if (key->fog_mode) {968struct ureg_dst tmp = ureg_DECL_temporary(ureg);969struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);970struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);971if (key->fog_range) {972ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);973ureg_RSQ(ureg, tmp_z, _X(tmp));974ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));975} else {976ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx)));977}978979if (key->fog_mode == D3DFOG_EXP) {980ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));981ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));982ureg_EX2(ureg, tmp_x, _X(tmp));983} else984if (key->fog_mode == D3DFOG_EXP2) {985ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));986ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));987ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));988ureg_EX2(ureg, tmp_x, _X(tmp));989} else990if (key->fog_mode == D3DFOG_LINEAR) {991ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp)));992ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));993}994ureg_MOV(ureg, oFog, _X(tmp));995ureg_release_temporary(ureg, tmp);996} else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {997ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));998}9991000if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {1001struct ureg_src input;1002struct ureg_dst output;1003input = vs->aWgt;1004output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);1005ureg_MOV(ureg, output, input);1006}1007if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {1008struct ureg_src input;1009struct ureg_dst output;1010input = vs->aInd;1011output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);1012ureg_MOV(ureg, output, input);1013}1014if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {1015struct ureg_src input;1016struct ureg_dst output;1017input = vs->aNrm;1018output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);1019ureg_MOV(ureg, output, input);1020}1021if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {1022struct ureg_src input;1023struct ureg_dst output;1024input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);1025output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);1026ureg_MOV(ureg, output, input);1027}1028if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {1029struct ureg_src input;1030struct ureg_dst output;1031input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);1032output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 23);1033ureg_MOV(ureg, output, input);1034}1035if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {1036struct ureg_src input;1037struct ureg_dst output;1038input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);1039input = ureg_scalar(input, TGSI_SWIZZLE_X);1040output = oFog;1041ureg_MOV(ureg, output, input);1042}1043if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {1044(void) 0; /* TODO: replace z of position output ? */1045}10461047/* ucp for ff applies on world coordinates.1048* aVtx is in worldview coordinates. */1049if (key->ucp) {1050struct ureg_dst clipVect = ureg_DECL_output(ureg, TGSI_SEMANTIC_CLIPVERTEX, 0);1051struct ureg_dst tmp = ureg_DECL_temporary(ureg);1052ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(12));1053ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(13), ureg_src(tmp));1054ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(14), ureg_src(tmp));1055ureg_ADD(ureg, clipVect, _CONST(15), ureg_src(tmp));1056ureg_release_temporary(ureg, tmp);1057}10581059if (key->position_t && device->driver_caps.window_space_position_support)1060ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);10611062ureg_END(ureg);1063nine_ureg_tgsi_dump(ureg, FALSE);1064return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL);1065}10661067/* PS FF constants layout:1068*1069* CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT1070* CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT001071* CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT011072* CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT101073* CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT111074* CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE1075* CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET1076*1077* CONST[20] D3DRS_TEXTUREFACTOR1078* CONST[21] D3DRS_FOGCOLOR1079* CONST[22].x___ RS.FogEnd1080* CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)1081* CONST[22].__z_ RS.FogDensity1082*/1083struct ps_build_ctx1084{1085struct ureg_program *ureg;10861087struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */1088struct ureg_src vT[8]; /* TEXCOORD[i] */1089struct ureg_dst rCur; /* D3DTA_CURRENT */1090struct ureg_dst rMod;1091struct ureg_src rCurSrc;1092struct ureg_dst rTmp; /* D3DTA_TEMP */1093struct ureg_src rTmpSrc;1094struct ureg_dst rTex;1095struct ureg_src rTexSrc;1096struct ureg_src cBEM[8];1097struct ureg_src s[8];10981099struct {1100unsigned index;1101unsigned index_pre_mod;1102} stage;1103};11041105static struct ureg_src1106ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)1107{1108struct ureg_src reg;11091110switch (ta & D3DTA_SELECTMASK) {1111case D3DTA_CONSTANT:1112reg = ureg_DECL_constant(ps->ureg, ps->stage.index);1113break;1114case D3DTA_CURRENT:1115reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;1116break;1117case D3DTA_DIFFUSE:1118reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);1119break;1120case D3DTA_SPECULAR:1121reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);1122break;1123case D3DTA_TEMP:1124reg = ps->rTmpSrc;1125break;1126case D3DTA_TEXTURE:1127reg = ps->rTexSrc;1128break;1129case D3DTA_TFACTOR:1130reg = ureg_DECL_constant(ps->ureg, 20);1131break;1132default:1133assert(0);1134reg = ureg_src_undef();1135break;1136}1137if (ta & D3DTA_COMPLEMENT) {1138struct ureg_dst dst = ureg_DECL_temporary(ps->ureg);1139ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg));1140reg = ureg_src(dst);1141}1142if (ta & D3DTA_ALPHAREPLICATE)1143reg = _WWWW(reg);1144return reg;1145}11461147static struct ureg_dst1148ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)1149{1150assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));11511152switch (ta & D3DTA_SELECTMASK) {1153case D3DTA_CURRENT:1154return ps->rCur;1155case D3DTA_TEMP:1156return ps->rTmp;1157default:1158assert(0);1159return ureg_dst_undef();1160}1161}11621163static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)1164{1165switch (top) {1166case D3DTOP_DISABLE:1167return 0x0;1168case D3DTOP_SELECTARG1:1169case D3DTOP_PREMODULATE:1170return 0x2;1171case D3DTOP_SELECTARG2:1172return 0x4;1173case D3DTOP_MULTIPLYADD:1174case D3DTOP_LERP:1175return 0x7;1176default:1177return 0x6;1178}1179}11801181static inline boolean1182is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)1183{1184return !dst.WriteMask ||1185(dst.File == src.File &&1186dst.Index == src.Index &&1187!dst.Indirect &&1188!dst.Saturate &&1189!src.Indirect &&1190!src.Negate &&1191!src.Absolute &&1192(!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&1193(!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&1194(!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&1195(!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));11961197}11981199static void1200ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)1201{1202struct ureg_program *ureg = ps->ureg;1203struct ureg_dst tmp = ureg_DECL_temporary(ureg);1204struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);1205struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);12061207tmp.WriteMask = dst.WriteMask;12081209if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&1210top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&1211top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&1212top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&1213top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&1214top != D3DTOP_LERP)1215dst = ureg_saturate(dst);12161217switch (top) {1218case D3DTOP_SELECTARG1:1219if (!is_MOV_no_op(dst, arg[1]))1220ureg_MOV(ureg, dst, arg[1]);1221break;1222case D3DTOP_SELECTARG2:1223if (!is_MOV_no_op(dst, arg[2]))1224ureg_MOV(ureg, dst, arg[2]);1225break;1226case D3DTOP_MODULATE:1227ureg_MUL(ureg, dst, arg[1], arg[2]);1228break;1229case D3DTOP_MODULATE2X:1230ureg_MUL(ureg, tmp, arg[1], arg[2]);1231ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));1232break;1233case D3DTOP_MODULATE4X:1234ureg_MUL(ureg, tmp, arg[1], arg[2]);1235ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));1236break;1237case D3DTOP_ADD:1238ureg_ADD(ureg, dst, arg[1], arg[2]);1239break;1240case D3DTOP_ADDSIGNED:1241ureg_ADD(ureg, tmp, arg[1], arg[2]);1242ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f));1243break;1244case D3DTOP_ADDSIGNED2X:1245ureg_ADD(ureg, tmp, arg[1], arg[2]);1246ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));1247break;1248case D3DTOP_SUBTRACT:1249ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2]));1250break;1251case D3DTOP_ADDSMOOTH:1252ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));1253ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);1254break;1255case D3DTOP_BLENDDIFFUSEALPHA:1256ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);1257break;1258case D3DTOP_BLENDTEXTUREALPHA:1259/* XXX: alpha taken from previous stage, texture or result ? */1260ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);1261break;1262case D3DTOP_BLENDFACTORALPHA:1263ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);1264break;1265case D3DTOP_BLENDTEXTUREALPHAPM:1266ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex)));1267ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);1268break;1269case D3DTOP_BLENDCURRENTALPHA:1270ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);1271break;1272case D3DTOP_PREMODULATE:1273ureg_MOV(ureg, dst, arg[1]);1274ps->stage.index_pre_mod = ps->stage.index + 1;1275break;1276case D3DTOP_MODULATEALPHA_ADDCOLOR:1277ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);1278break;1279case D3DTOP_MODULATECOLOR_ADDALPHA:1280ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));1281break;1282case D3DTOP_MODULATEINVALPHA_ADDCOLOR:1283ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1])));1284ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);1285break;1286case D3DTOP_MODULATEINVCOLOR_ADDALPHA:1287ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));1288ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));1289break;1290case D3DTOP_BUMPENVMAP:1291break;1292case D3DTOP_BUMPENVMAPLUMINANCE:1293break;1294case D3DTOP_DOTPRODUCT3:1295ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));1296ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));1297ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));1298ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));1299break;1300case D3DTOP_MULTIPLYADD:1301ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);1302break;1303case D3DTOP_LERP:1304ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);1305break;1306case D3DTOP_DISABLE:1307/* no-op ? */1308break;1309default:1310assert(!"invalid D3DTOP");1311break;1312}1313ureg_release_temporary(ureg, tmp);1314ureg_release_temporary(ureg, tmp2);1315}13161317static void *1318nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)1319{1320struct ps_build_ctx ps;1321struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT);1322struct ureg_dst oCol;1323unsigned s;1324const unsigned texcoord_sn = get_texcoord_sn(device->screen);13251326memset(&ps, 0, sizeof(ps));1327ps.ureg = ureg;1328ps.stage.index_pre_mod = -1;13291330ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);13311332ps.rCur = ureg_DECL_temporary(ureg);1333ps.rTmp = ureg_DECL_temporary(ureg);1334ps.rTex = ureg_DECL_temporary(ureg);1335ps.rCurSrc = ureg_src(ps.rCur);1336ps.rTmpSrc = ureg_src(ps.rTmp);1337ps.rTexSrc = ureg_src(ps.rTex);13381339/* Initial values */1340ureg_MOV(ureg, ps.rCur, ps.vC[0]);1341ureg_MOV(ureg, ps.rTmp, ureg_imm1f(ureg, 0.0f));1342ureg_MOV(ureg, ps.rTex, ureg_imm1f(ureg, 0.0f));13431344for (s = 0; s < 8; ++s) {1345ps.s[s] = ureg_src_undef();13461347if (key->ts[s].colorop != D3DTOP_DISABLE) {1348if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||1349key->ts[s].colorarg1 == D3DTA_SPECULAR ||1350key->ts[s].colorarg2 == D3DTA_SPECULAR)1351ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);13521353if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||1354key->ts[s].colorarg1 == D3DTA_TEXTURE ||1355key->ts[s].colorarg2 == D3DTA_TEXTURE ||1356key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA ||1357key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) {1358ps.s[s] = ureg_DECL_sampler(ureg, s);1359ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);1360}1361if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||1362key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))1363ps.s[s] = ureg_DECL_sampler(ureg, s);1364}13651366if (key->ts[s].alphaop != D3DTOP_DISABLE) {1367if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||1368key->ts[s].alphaarg1 == D3DTA_SPECULAR ||1369key->ts[s].alphaarg2 == D3DTA_SPECULAR)1370ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);13711372if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||1373key->ts[s].alphaarg1 == D3DTA_TEXTURE ||1374key->ts[s].alphaarg2 == D3DTA_TEXTURE ||1375key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA ||1376key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) {1377ps.s[s] = ureg_DECL_sampler(ureg, s);1378ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);1379}1380}1381}1382if (key->specular)1383ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);13841385oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);13861387/* Run stages.1388*/1389for (s = 0; s < 8; ++s) {1390unsigned colorarg[3];1391unsigned alphaarg[3];1392const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);1393const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);1394struct ureg_dst dst;1395struct ureg_src arg[3];13961397if (key->ts[s].colorop == D3DTOP_DISABLE) {1398assert (key->ts[s].alphaop == D3DTOP_DISABLE);1399continue;1400}1401ps.stage.index = s;14021403DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,1404nine_D3DTOP_to_str(key->ts[s].colorop),1405nine_D3DTOP_to_str(key->ts[s].alphaop));14061407if (!ureg_src_is_undef(ps.s[s])) {1408unsigned target;1409struct ureg_src texture_coord = ps.vT[s];1410struct ureg_dst delta;1411switch (key->ts[s].textarget) {1412case 0: target = TGSI_TEXTURE_1D; break;1413case 1: target = TGSI_TEXTURE_2D; break;1414case 2: target = TGSI_TEXTURE_3D; break;1415case 3: target = TGSI_TEXTURE_CUBE; break;1416/* this is a 2 bit bitfield, do I really need a default case ? */1417}14181419/* Modify coordinates */1420if (s >= 1 &&1421(key->ts[s-1].colorop == D3DTOP_BUMPENVMAP ||1422key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) {1423delta = ureg_DECL_temporary(ureg);1424/* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */1425ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1)));1426ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta));1427/* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */1428ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1)));1429ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta));1430texture_coord = ureg_src(ureg_DECL_temporary(ureg));1431ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]);1432ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta));1433/* Prepare luminance multiplier1434* t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */1435if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {1436struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2));1437struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2));14381439ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset);1440}1441}1442if (key->projected & (3 << (s *2))) {1443unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);1444if (dim == 4)1445ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]);1446else {1447struct ureg_dst tmp = ureg_DECL_temporary(ureg);1448ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1));1449ureg_MUL(ureg, ps.rTmp, _X(tmp), texture_coord);1450ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);1451ureg_release_temporary(ureg, tmp);1452}1453} else {1454ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]);1455}1456if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)1457ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta));1458}14591460if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||1461key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE)1462continue;14631464dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);14651466if (ps.stage.index_pre_mod == ps.stage.index) {1467ps.rMod = ureg_DECL_temporary(ureg);1468ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);1469}14701471colorarg[0] = (key->ts[s].colorarg0 | (((key->colorarg_b4[0] >> s) & 0x1) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;1472colorarg[1] = (key->ts[s].colorarg1 | (((key->colorarg_b4[1] >> s) & 0x1) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;1473colorarg[2] = (key->ts[s].colorarg2 | (((key->colorarg_b4[2] >> s) & 0x1) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;1474alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;1475alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;1476alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;14771478if (key->ts[s].colorop != key->ts[s].alphaop ||1479colorarg[0] != alphaarg[0] ||1480colorarg[1] != alphaarg[1] ||1481colorarg[2] != alphaarg[2])1482dst.WriteMask = TGSI_WRITEMASK_XYZ;14831484/* Special DOTPRODUCT behaviour (see wine tests) */1485if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)1486dst.WriteMask = TGSI_WRITEMASK_XYZW;14871488if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);1489if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);1490if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);1491ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);14921493if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {1494dst.WriteMask = TGSI_WRITEMASK_W;14951496if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);1497if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);1498if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);1499ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);1500}1501}15021503if (key->specular)1504ureg_ADD(ureg, ureg_writemask(ps.rCur, TGSI_WRITEMASK_XYZ), ps.rCurSrc, ps.vC[1]);15051506/* Fog.1507*/1508if (key->fog_mode) {1509struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);1510struct ureg_src vPos;1511if (device->screen->get_param(device->screen,1512PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {1513vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);1514} else {1515vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,1516TGSI_INTERPOLATE_LINEAR);1517}15181519/* Source is either W or Z.1520* When we use vs ff,1521* Z is when an orthogonal projection matrix is detected,1522* W (WFOG) else.1523* Z is used for programmable vs.1524* Note: Tests indicate that the projection matrix coefficients do1525* actually affect pixel fog (and not vertex fog) when vs ff is used,1526* which justifies taking the position's w instead of taking the z coordinate1527* before the projection in the vs shader.1528*/1529if (!key->fog_source)1530ureg_MOV(ureg, rFog, _ZZZZ(vPos));1531else1532/* Position's w is 1/w */1533ureg_RCP(ureg, rFog, _WWWW(vPos));15341535if (key->fog_mode == D3DFOG_EXP) {1536ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));1537ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));1538ureg_EX2(ureg, rFog, _X(rFog));1539} else1540if (key->fog_mode == D3DFOG_EXP2) {1541ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));1542ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));1543ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));1544ureg_EX2(ureg, rFog, _X(rFog));1545} else1546if (key->fog_mode == D3DFOG_LINEAR) {1547ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog)));1548ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));1549}1550ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));1551ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);1552} else1553if (key->fog) {1554struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16, TGSI_INTERPOLATE_PERSPECTIVE);1555ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));1556ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);1557} else {1558ureg_MOV(ureg, oCol, ps.rCurSrc);1559}15601561ureg_END(ureg);1562nine_ureg_tgsi_dump(ureg, FALSE);1563return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL);1564}15651566static struct NineVertexShader9 *1567nine_ff_get_vs(struct NineDevice9 *device)1568{1569const struct nine_context *context = &device->context;1570struct NineVertexShader9 *vs;1571struct vs_build_ctx bld;1572struct nine_ff_vs_key key;1573unsigned s, i;1574boolean has_indexes = false;1575boolean has_weights = false;1576char input_texture_coord[8];15771578assert(sizeof(key) <= sizeof(key.value32));15791580memset(&key, 0, sizeof(key));1581memset(&bld, 0, sizeof(bld));1582memset(&input_texture_coord, 0, sizeof(input_texture_coord));15831584bld.key = &key;15851586/* FIXME: this shouldn't be NULL, but it is on init */1587if (context->vdecl) {1588key.color0in_one = 1;1589key.color1in_zero = 1;1590for (i = 0; i < context->vdecl->nelems; i++) {1591uint16_t usage = context->vdecl->usage_map[i];1592if (usage == NINE_DECLUSAGE_POSITIONT)1593key.position_t = 1;1594else if (usage == NINE_DECLUSAGE_i(COLOR, 0))1595key.color0in_one = 0;1596else if (usage == NINE_DECLUSAGE_i(COLOR, 1))1597key.color1in_zero = 0;1598else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) {1599has_indexes = true;1600key.passthrough |= 1 << usage;1601} else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) {1602has_weights = true;1603key.passthrough |= 1 << usage;1604} else if (usage == NINE_DECLUSAGE_i(NORMAL, 0)) {1605key.has_normal = 1;1606key.passthrough |= 1 << usage;1607} else if (usage == NINE_DECLUSAGE_PSIZE)1608key.vertexpointsize = 1;1609else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {1610s = usage / NINE_DECLUSAGE_COUNT;1611if (s < 8)1612input_texture_coord[s] = nine_decltype_get_dim(context->vdecl->decls[i].Type);1613else1614DBG("FF given texture coordinate >= 8. Ignoring\n");1615} else if (usage < NINE_DECLUSAGE_NONE)1616key.passthrough |= 1 << usage;1617}1618}1619/* ff vs + ps 3.0: some elements are passed to the ps (wine test).1620* We do restrict to indices 0 */1621key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |1622(1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |1623(1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));1624if (!key.position_t)1625key.passthrough = 0;1626key.pointscale = !!context->rs[D3DRS_POINTSCALEENABLE];16271628key.lighting = !!context->rs[D3DRS_LIGHTING] && context->ff.num_lights_active;1629key.darkness = !!context->rs[D3DRS_LIGHTING] && !context->ff.num_lights_active;1630if (key.position_t) {1631key.darkness = 0; /* |= key.lighting; */ /* XXX ? */1632key.lighting = 0;1633}1634if ((key.lighting | key.darkness) && context->rs[D3DRS_COLORVERTEX]) {1635uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2);1636key.mtl_diffuse = context->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask;1637key.mtl_ambient = context->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask;1638key.mtl_specular = context->rs[D3DRS_SPECULARMATERIALSOURCE] & mask;1639key.mtl_emissive = context->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask;1640}1641key.fog = !!context->rs[D3DRS_FOGENABLE];1642key.fog_mode = (!key.position_t && context->rs[D3DRS_FOGENABLE]) ? context->rs[D3DRS_FOGVERTEXMODE] : 0;1643if (key.fog_mode)1644key.fog_range = context->rs[D3DRS_RANGEFOGENABLE];16451646key.localviewer = !!context->rs[D3DRS_LOCALVIEWER];1647key.normalizenormals = !!context->rs[D3DRS_NORMALIZENORMALS];1648key.ucp = !!context->rs[D3DRS_CLIPPLANEENABLE];16491650if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {1651key.vertexblend_indexed = !!context->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes;16521653switch (context->rs[D3DRS_VERTEXBLEND]) {1654case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;1655case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;1656case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;1657case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;1658case D3DVBF_TWEENING: key.vertextween = 1; break;1659default:1660assert(!"invalid D3DVBF");1661break;1662}1663if (!has_weights && context->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS)1664key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */1665}16661667for (s = 0; s < 8; ++s) {1668unsigned gen = (context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;1669unsigned idx = context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7;1670unsigned dim;16711672if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)1673gen = NINED3DTSS_TCI_PASSTHRU;16741675if (!input_texture_coord[idx] && gen == NINED3DTSS_TCI_PASSTHRU)1676gen = NINED3DTSS_TCI_DISABLE;16771678key.tc_gen |= gen << (s * 3);1679key.tc_idx |= idx << (s * 3);1680key.tc_dim_input |= ((input_texture_coord[idx]-1) & 0x3) << (s * 2);16811682dim = context->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;1683if (dim > 4)1684dim = input_texture_coord[idx];1685if (dim == 1) /* NV behaviour */1686dim = 0;1687key.tc_dim_output |= dim << (s * 3);1688}16891690DBG("VS ff key hash: %x\n", nine_ff_vs_key_hash(&key));1691vs = util_hash_table_get(device->ff.ht_vs, &key);1692if (vs)1693return vs;1694NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));16951696nine_ff_prune_vs(device);1697if (vs) {1698unsigned n;16991700memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));17011702_mesa_hash_table_insert(device->ff.ht_vs, &vs->ff_key, vs);1703device->ff.num_vs++;17041705vs->num_inputs = bld.num_inputs;1706for (n = 0; n < bld.num_inputs; ++n)1707vs->input_map[n].ndecl = bld.input[n];17081709vs->position_t = key.position_t;1710vs->point_size = key.vertexpointsize | key.pointscale;1711}1712return vs;1713}17141715#define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE)1716#define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))17171718static struct NinePixelShader9 *1719nine_ff_get_ps(struct NineDevice9 *device)1720{1721struct nine_context *context = &device->context;1722D3DMATRIX *projection_matrix = GET_D3DTS(PROJECTION);1723struct NinePixelShader9 *ps;1724struct nine_ff_ps_key key;1725unsigned s;1726uint8_t sampler_mask = 0;17271728assert(sizeof(key) <= sizeof(key.value32));17291730memset(&key, 0, sizeof(key));1731for (s = 0; s < 8; ++s) {1732key.ts[s].colorop = context->ff.tex_stage[s][D3DTSS_COLOROP];1733key.ts[s].alphaop = context->ff.tex_stage[s][D3DTSS_ALPHAOP];1734const uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);1735const uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);1736/* MSDN says D3DTOP_DISABLE disables this and all subsequent stages.1737* ALPHAOP cannot be enabled if COLOROP is disabled.1738* Verified on Windows. */1739if (key.ts[s].colorop == D3DTOP_DISABLE) {1740key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */1741break;1742}17431744if (!context->texture[s].enabled &&1745((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE &&1746used_c & 0x1) ||1747(context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE &&1748used_c & 0x2) ||1749(context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE &&1750used_c & 0x4))) {1751/* Tested on Windows: Invalid texture read disables the stage1752* and the subsequent ones, but only for colorop. For alpha,1753* it's as if the texture had alpha of 1.0, which is what1754* has our dummy texture in that case. Invalid color also1755* disabled the following alpha stages. */1756key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;1757break;1758}17591760if (context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE ||1761context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE ||1762context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE ||1763context->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE ||1764context->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE ||1765context->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE)1766sampler_mask |= (1 << s);17671768if (key.ts[s].colorop != D3DTOP_DISABLE) {1769if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0] & 0x7;1770if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1] & 0x7;1771if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2] & 0x7;1772if (used_c & 0x1) key.colorarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) & 0x1) << s;1773if (used_c & 0x1) key.colorarg_b5[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) & 0x1) << s;1774if (used_c & 0x2) key.colorarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) & 0x1) << s;1775if (used_c & 0x2) key.colorarg_b5[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) & 0x1) << s;1776if (used_c & 0x4) key.colorarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) & 0x1) << s;1777if (used_c & 0x4) key.colorarg_b5[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) & 0x1) << s;1778}1779if (key.ts[s].alphaop != D3DTOP_DISABLE) {1780if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0] & 0x7;1781if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1] & 0x7;1782if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2] & 0x7;1783if (used_a & 0x1) key.alphaarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) & 0x1) << s;1784if (used_a & 0x2) key.alphaarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) & 0x1) << s;1785if (used_a & 0x4) key.alphaarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) & 0x1) << s;1786}1787key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;17881789if (context->texture[s].enabled) {1790switch (context->texture[s].type) {1791case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;1792case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;1793case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break;1794default:1795assert(!"unexpected texture type");1796break;1797}1798} else {1799key.ts[s].textarget = 1;1800}1801}18021803/* Note: If colorop is D3DTOP_DISABLE for the first stage1804* (which implies alphaop is too), nothing particular happens,1805* that is, current is equal to diffuse (which is the case anyway,1806* because it is how it is initialized).1807* Special case seems if alphaop is D3DTOP_DISABLE and not colorop,1808* because then if the resultarg is TEMP, then diffuse alpha is written1809* to it. */1810if (key.ts[0].colorop != D3DTOP_DISABLE &&1811key.ts[0].alphaop == D3DTOP_DISABLE &&1812key.ts[0].resultarg != 0) {1813key.ts[0].alphaop = D3DTOP_SELECTARG1;1814key.ts[0].alphaarg1 = D3DTA_DIFFUSE;1815}1816/* When no alpha stage writes to current, diffuse alpha is taken.1817* Since we initialize current to diffuse, we have the behaviour. */18181819/* Last stage always writes to Current */1820if (s >= 1)1821key.ts[s-1].resultarg = 0;18221823key.projected = nine_ff_get_projected_key_ff(context);1824key.specular = !!context->rs[D3DRS_SPECULARENABLE];18251826for (; s < 8; ++s)1827key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;1828if (context->rs[D3DRS_FOGENABLE])1829key.fog_mode = context->rs[D3DRS_FOGTABLEMODE];1830key.fog = !!context->rs[D3DRS_FOGENABLE];1831/* Pixel fog (with WFOG advertised): source is either Z or W.1832* W is the source if vs ff is used, and the1833* projection matrix is not orthogonal.1834* Tests on Win 10 seem to indicate _341835* and _33 are checked against 0, 1. */1836if (key.fog_mode && key.fog)1837key.fog_source = !context->programmable_vs &&1838!(projection_matrix->_34 == 0.0f &&1839projection_matrix->_44 == 1.0f);18401841DBG("PS ff key hash: %x\n", nine_ff_ps_key_hash(&key));1842ps = util_hash_table_get(device->ff.ht_ps, &key);1843if (ps)1844return ps;1845NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));18461847nine_ff_prune_ps(device);1848if (ps) {1849memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));18501851_mesa_hash_table_insert(device->ff.ht_ps, &ps->ff_key, ps);1852device->ff.num_ps++;18531854ps->rt_mask = 0x1;1855ps->sampler_mask = sampler_mask;1856}1857return ps;1858}18591860static void1861nine_ff_load_vs_transforms(struct NineDevice9 *device)1862{1863struct nine_context *context = &device->context;1864D3DMATRIX T;1865D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;1866unsigned i;18671868/* TODO: make this nicer, and only upload the ones we need */1869/* TODO: use ff.vs_const as storage of W, V, P matrices */18701871if (IS_D3DTS_DIRTY(context, WORLD) ||1872IS_D3DTS_DIRTY(context, VIEW) ||1873IS_D3DTS_DIRTY(context, PROJECTION)) {1874/* WVP, WV matrices */1875nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));1876nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));18771878/* normal matrix == transpose(inverse(WV)) */1879nine_d3d_matrix_inverse(&T, &M[1]);1880nine_d3d_matrix_transpose(&M[4], &T);18811882/* P matrix */1883M[2] = *GET_D3DTS(PROJECTION);18841885/* V and W matrix */1886nine_d3d_matrix_inverse(&M[3], GET_D3DTS(VIEW));1887M[40] = M[1];1888}18891890if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {1891/* load other world matrices */1892for (i = 1; i <= 8; ++i) {1893nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW));1894}1895}18961897device->ff.vs_const[30 * 4] = asfloat(context->rs[D3DRS_TWEENFACTOR]);1898}18991900static void1901nine_ff_load_lights(struct NineDevice9 *device)1902{1903struct nine_context *context = &device->context;1904struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;1905unsigned l;19061907if (context->changed.group & NINE_STATE_FF_MATERIAL) {1908const D3DMATERIAL9 *mtl = &context->ff.material;19091910memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));1911memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));1912memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));1913dst[23].x = mtl->Power;1914memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));1915d3dcolor_to_rgba(&dst[25].x, context->rs[D3DRS_AMBIENT]);1916dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;1917dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;1918dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;1919}19201921if (!(context->changed.group & NINE_STATE_FF_LIGHTING))1922return;19231924for (l = 0; l < context->ff.num_lights_active; ++l) {1925const D3DLIGHT9 *light = &context->ff.light[context->ff.active_light[l]];19261927dst[32 + l * 8].x = light->Type;1928dst[32 + l * 8].y = light->Attenuation0;1929dst[32 + l * 8].z = light->Attenuation1;1930dst[32 + l * 8].w = light->Attenuation2;1931memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));1932memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));1933memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));1934nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));1935nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));1936dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;1937dst[37 + l * 8].w = light->Falloff;1938dst[38 + l * 8].x = cosf(light->Theta * 0.5f);1939dst[38 + l * 8].y = cosf(light->Phi * 0.5f);1940dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);1941dst[39 + l * 8].w = (float)((l + 1) == context->ff.num_lights_active);1942}1943}19441945static void1946nine_ff_load_point_and_fog_params(struct NineDevice9 *device)1947{1948struct nine_context *context = &device->context;1949struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;19501951if (!(context->changed.group & NINE_STATE_FF_VS_OTHER))1952return;1953dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]);1954dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]);1955dst[26].z = asfloat(context->rs[D3DRS_POINTSIZE]);1956dst[26].w = asfloat(context->rs[D3DRS_POINTSCALE_A]);1957dst[27].x = asfloat(context->rs[D3DRS_POINTSCALE_B]);1958dst[27].y = asfloat(context->rs[D3DRS_POINTSCALE_C]);1959dst[28].x = asfloat(context->rs[D3DRS_FOGEND]);1960dst[28].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));1961if (isinf(dst[28].y))1962dst[28].y = 0.0f;1963dst[28].z = asfloat(context->rs[D3DRS_FOGDENSITY]);1964}19651966static void1967nine_ff_load_tex_matrices(struct NineDevice9 *device)1968{1969struct nine_context *context = &device->context;1970D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;1971unsigned s;19721973if (!(context->ff.changed.transform[0] & 0xff0000))1974return;1975for (s = 0; s < 8; ++s) {1976if (IS_D3DTS_DIRTY(context, TEXTURE0 + s))1977nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&context->ff, D3DTS_TEXTURE0 + s, FALSE));1978}1979}19801981static void1982nine_ff_load_ps_params(struct NineDevice9 *device)1983{1984struct nine_context *context = &device->context;1985struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;1986unsigned s;19871988if (!(context->changed.group & NINE_STATE_FF_PS_CONSTS))1989return;19901991for (s = 0; s < 8; ++s)1992d3dcolor_to_rgba(&dst[s].x, context->ff.tex_stage[s][D3DTSS_CONSTANT]);19931994for (s = 0; s < 8; ++s) {1995dst[8 + s].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);1996dst[8 + s].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);1997dst[8 + s].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);1998dst[8 + s].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);1999if (s & 1) {2000dst[16 + s / 2].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);2001dst[16 + s / 2].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);2002} else {2003dst[16 + s / 2].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);2004dst[16 + s / 2].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);2005}2006}20072008d3dcolor_to_rgba(&dst[20].x, context->rs[D3DRS_TEXTUREFACTOR]);2009d3dcolor_to_rgba(&dst[21].x, context->rs[D3DRS_FOGCOLOR]);2010dst[22].x = asfloat(context->rs[D3DRS_FOGEND]);2011dst[22].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));2012dst[22].z = asfloat(context->rs[D3DRS_FOGDENSITY]);2013}20142015static void2016nine_ff_load_viewport_info(struct NineDevice9 *device)2017{2018D3DVIEWPORT9 *viewport = &device->context.viewport;2019struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;2020float diffZ = viewport->MaxZ - viewport->MinZ;20212022/* Note: the other functions avoids to fill the const again if nothing changed.2023* But we don't have much to fill, and adding code to allow that may be complex2024* so just fill it always */2025dst[100].x = 2.0f / (float)(viewport->Width);2026dst[100].y = 2.0f / (float)(viewport->Height);2027dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);2028dst[100].w = (float)(viewport->Width);2029dst[101].x = (float)(viewport->X);2030dst[101].y = (float)(viewport->Y);2031dst[101].z = (float)(viewport->MinZ);2032}20332034void2035nine_ff_update(struct NineDevice9 *device)2036{2037struct nine_context *context = &device->context;2038struct pipe_constant_buffer cb;20392040DBG("vs=%p ps=%p\n", context->vs, context->ps);20412042/* NOTE: the only reference belongs to the hash table */2043if (!context->programmable_vs) {2044device->ff.vs = nine_ff_get_vs(device);2045context->changed.group |= NINE_STATE_VS;2046}2047if (!context->ps) {2048device->ff.ps = nine_ff_get_ps(device);2049context->changed.group |= NINE_STATE_PS;2050}20512052if (!context->programmable_vs) {2053nine_ff_load_vs_transforms(device);2054nine_ff_load_tex_matrices(device);2055nine_ff_load_lights(device);2056nine_ff_load_point_and_fog_params(device);2057nine_ff_load_viewport_info(device);20582059memset(context->ff.changed.transform, 0, sizeof(context->ff.changed.transform));20602061cb.buffer_offset = 0;2062cb.buffer = NULL;2063cb.user_buffer = device->ff.vs_const;2064cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);20652066context->pipe_data.cb_vs_ff = cb;2067context->commit |= NINE_STATE_COMMIT_CONST_VS;20682069context->changed.group &= ~NINE_STATE_FF_VS;2070}20712072if (!context->ps) {2073nine_ff_load_ps_params(device);20742075cb.buffer_offset = 0;2076cb.buffer = NULL;2077cb.user_buffer = device->ff.ps_const;2078cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);20792080context->pipe_data.cb_ps_ff = cb;2081context->commit |= NINE_STATE_COMMIT_CONST_PS;20822083context->changed.group &= ~NINE_STATE_FF_PS;2084}2085}208620872088boolean2089nine_ff_init(struct NineDevice9 *device)2090{2091device->ff.ht_vs = _mesa_hash_table_create(NULL, nine_ff_vs_key_hash,2092nine_ff_vs_key_comp);2093device->ff.ht_ps = _mesa_hash_table_create(NULL, nine_ff_ps_key_hash,2094nine_ff_ps_key_comp);20952096device->ff.ht_fvf = _mesa_hash_table_create(NULL, nine_ff_fvf_key_hash,2097nine_ff_fvf_key_comp);20982099device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));2100device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));21012102return device->ff.ht_vs && device->ff.ht_ps &&2103device->ff.ht_fvf &&2104device->ff.vs_const && device->ff.ps_const;2105}21062107static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)2108{2109NineUnknown_Unbind(NineUnknown(value));2110return PIPE_OK;2111}21122113void2114nine_ff_fini(struct NineDevice9 *device)2115{2116if (device->ff.ht_vs) {2117util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);2118_mesa_hash_table_destroy(device->ff.ht_vs, NULL);2119}2120if (device->ff.ht_ps) {2121util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);2122_mesa_hash_table_destroy(device->ff.ht_ps, NULL);2123}2124if (device->ff.ht_fvf) {2125util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);2126_mesa_hash_table_destroy(device->ff.ht_fvf, NULL);2127}2128device->ff.vs = NULL; /* destroyed by unbinding from hash table */2129device->ff.ps = NULL;21302131FREE(device->ff.vs_const);2132FREE(device->ff.ps_const);2133}21342135static void2136nine_ff_prune_vs(struct NineDevice9 *device)2137{2138struct nine_context *context = &device->context;21392140if (device->ff.num_vs > 1024) {2141/* could destroy the bound one here, so unbind */2142context->pipe->bind_vs_state(context->pipe, NULL);2143util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);2144_mesa_hash_table_clear(device->ff.ht_vs, NULL);2145device->ff.num_vs = 0;2146context->changed.group |= NINE_STATE_VS;2147}2148}2149static void2150nine_ff_prune_ps(struct NineDevice9 *device)2151{2152struct nine_context *context = &device->context;21532154if (device->ff.num_ps > 1024) {2155/* could destroy the bound one here, so unbind */2156context->pipe->bind_fs_state(context->pipe, NULL);2157util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);2158_mesa_hash_table_clear(device->ff.ht_ps, NULL);2159device->ff.num_ps = 0;2160context->changed.group |= NINE_STATE_PS;2161}2162}21632164/* ========================================================================== */21652166/* Matrix multiplication:2167*2168* in memory: 0 1 2 3 (row major)2169* 4 5 6 72170* 8 9 a b2171* c d e f2172*2173* cA cB cC cD2174* r0 = (r0 * cA) (r0 * cB) . .2175* r1 = (r1 * cA) (r1 * cB)2176* r2 = (r2 * cA) .2177* r3 = (r3 * cA) .2178*2179* r: (11) (12) (13) (14)2180* (21) (22) (23) (24)2181* (31) (32) (33) (34)2182* (41) (42) (43) (44)2183* l: (11 12 13 14)2184* (21 22 23 24)2185* (31 32 33 34)2186* (41 42 43 44)2187*2188* v: (x y z 1 )2189*2190* t.xyzw = MUL(v.xxxx, r[0]);2191* t.xyzw = MAD(v.yyyy, r[1], t.xyzw);2192* t.xyzw = MAD(v.zzzz, r[2], t.xyzw);2193* v.xyzw = MAD(v.wwww, r[3], t.xyzw);2194*2195* v.x = DP4(v, c[0]);2196* v.y = DP4(v, c[1]);2197* v.z = DP4(v, c[2]);2198* v.w = DP4(v, c[3]) = 12199*/22002201/*2202static void2203nine_D3DMATRIX_print(const D3DMATRIX *M)2204{2205DBG("\n(%f %f %f %f)\n"2206"(%f %f %f %f)\n"2207"(%f %f %f %f)\n"2208"(%f %f %f %f)\n",2209M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],2210M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],2211M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],2212M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);2213}2214*/22152216static inline float2217nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)2218{2219return A->m[r][0] * B->m[0][c] +2220A->m[r][1] * B->m[1][c] +2221A->m[r][2] * B->m[2][c] +2222A->m[r][3] * B->m[3][c];2223}22242225static inline float2226nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)2227{2228return v->x * M->m[0][c] +2229v->y * M->m[1][c] +2230v->z * M->m[2][c] +22311.0f * M->m[3][c];2232}22332234static inline float2235nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)2236{2237return v->x * M->m[0][c] +2238v->y * M->m[1][c] +2239v->z * M->m[2][c];2240}22412242void2243nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)2244{2245D->_11 = nine_DP4_row_col(L, 0, R, 0);2246D->_12 = nine_DP4_row_col(L, 0, R, 1);2247D->_13 = nine_DP4_row_col(L, 0, R, 2);2248D->_14 = nine_DP4_row_col(L, 0, R, 3);22492250D->_21 = nine_DP4_row_col(L, 1, R, 0);2251D->_22 = nine_DP4_row_col(L, 1, R, 1);2252D->_23 = nine_DP4_row_col(L, 1, R, 2);2253D->_24 = nine_DP4_row_col(L, 1, R, 3);22542255D->_31 = nine_DP4_row_col(L, 2, R, 0);2256D->_32 = nine_DP4_row_col(L, 2, R, 1);2257D->_33 = nine_DP4_row_col(L, 2, R, 2);2258D->_34 = nine_DP4_row_col(L, 2, R, 3);22592260D->_41 = nine_DP4_row_col(L, 3, R, 0);2261D->_42 = nine_DP4_row_col(L, 3, R, 1);2262D->_43 = nine_DP4_row_col(L, 3, R, 2);2263D->_44 = nine_DP4_row_col(L, 3, R, 3);2264}22652266void2267nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)2268{2269d->x = nine_DP4_vec_col(v, M, 0);2270d->y = nine_DP4_vec_col(v, M, 1);2271d->z = nine_DP4_vec_col(v, M, 2);2272}22732274void2275nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)2276{2277d->x = nine_DP3_vec_col(v, M, 0);2278d->y = nine_DP3_vec_col(v, M, 1);2279d->z = nine_DP3_vec_col(v, M, 2);2280}22812282void2283nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)2284{2285unsigned i, j;2286for (i = 0; i < 4; ++i)2287for (j = 0; j < 4; ++j)2288D->m[i][j] = M->m[j][i];2289}22902291#define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \2292float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \2293if (t > 0.0f) pos += t; else neg += t; } while(0)22942295#define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \2296float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \2297if (t > 0.0f) neg -= t; else pos -= t; } while(0)2298float2299nine_d3d_matrix_det(const D3DMATRIX *M)2300{2301float pos = 0.0f;2302float neg = 0.0f;23032304_M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);2305_M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);2306_M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);23072308_M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);2309_M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);2310_M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);23112312_M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);2313_M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);2314_M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);23152316_M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);2317_M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);2318_M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);23192320_M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);2321_M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);2322_M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);23232324_M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);2325_M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);2326_M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);23272328_M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);2329_M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);2330_M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);23312332_M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);2333_M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);2334_M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);23352336return pos + neg;2337}23382339/* XXX: Probably better to just use src/mesa/math/m_matrix.c because2340* I have no idea where this code came from.2341*/2342void2343nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)2344{2345int i, k;2346float det;23472348D->m[0][0] =2349M->m[1][1] * M->m[2][2] * M->m[3][3] -2350M->m[1][1] * M->m[3][2] * M->m[2][3] -2351M->m[1][2] * M->m[2][1] * M->m[3][3] +2352M->m[1][2] * M->m[3][1] * M->m[2][3] +2353M->m[1][3] * M->m[2][1] * M->m[3][2] -2354M->m[1][3] * M->m[3][1] * M->m[2][2];23552356D->m[0][1] =2357-M->m[0][1] * M->m[2][2] * M->m[3][3] +2358M->m[0][1] * M->m[3][2] * M->m[2][3] +2359M->m[0][2] * M->m[2][1] * M->m[3][3] -2360M->m[0][2] * M->m[3][1] * M->m[2][3] -2361M->m[0][3] * M->m[2][1] * M->m[3][2] +2362M->m[0][3] * M->m[3][1] * M->m[2][2];23632364D->m[0][2] =2365M->m[0][1] * M->m[1][2] * M->m[3][3] -2366M->m[0][1] * M->m[3][2] * M->m[1][3] -2367M->m[0][2] * M->m[1][1] * M->m[3][3] +2368M->m[0][2] * M->m[3][1] * M->m[1][3] +2369M->m[0][3] * M->m[1][1] * M->m[3][2] -2370M->m[0][3] * M->m[3][1] * M->m[1][2];23712372D->m[0][3] =2373-M->m[0][1] * M->m[1][2] * M->m[2][3] +2374M->m[0][1] * M->m[2][2] * M->m[1][3] +2375M->m[0][2] * M->m[1][1] * M->m[2][3] -2376M->m[0][2] * M->m[2][1] * M->m[1][3] -2377M->m[0][3] * M->m[1][1] * M->m[2][2] +2378M->m[0][3] * M->m[2][1] * M->m[1][2];23792380D->m[1][0] =2381-M->m[1][0] * M->m[2][2] * M->m[3][3] +2382M->m[1][0] * M->m[3][2] * M->m[2][3] +2383M->m[1][2] * M->m[2][0] * M->m[3][3] -2384M->m[1][2] * M->m[3][0] * M->m[2][3] -2385M->m[1][3] * M->m[2][0] * M->m[3][2] +2386M->m[1][3] * M->m[3][0] * M->m[2][2];23872388D->m[1][1] =2389M->m[0][0] * M->m[2][2] * M->m[3][3] -2390M->m[0][0] * M->m[3][2] * M->m[2][3] -2391M->m[0][2] * M->m[2][0] * M->m[3][3] +2392M->m[0][2] * M->m[3][0] * M->m[2][3] +2393M->m[0][3] * M->m[2][0] * M->m[3][2] -2394M->m[0][3] * M->m[3][0] * M->m[2][2];23952396D->m[1][2] =2397-M->m[0][0] * M->m[1][2] * M->m[3][3] +2398M->m[0][0] * M->m[3][2] * M->m[1][3] +2399M->m[0][2] * M->m[1][0] * M->m[3][3] -2400M->m[0][2] * M->m[3][0] * M->m[1][3] -2401M->m[0][3] * M->m[1][0] * M->m[3][2] +2402M->m[0][3] * M->m[3][0] * M->m[1][2];24032404D->m[1][3] =2405M->m[0][0] * M->m[1][2] * M->m[2][3] -2406M->m[0][0] * M->m[2][2] * M->m[1][3] -2407M->m[0][2] * M->m[1][0] * M->m[2][3] +2408M->m[0][2] * M->m[2][0] * M->m[1][3] +2409M->m[0][3] * M->m[1][0] * M->m[2][2] -2410M->m[0][3] * M->m[2][0] * M->m[1][2];24112412D->m[2][0] =2413M->m[1][0] * M->m[2][1] * M->m[3][3] -2414M->m[1][0] * M->m[3][1] * M->m[2][3] -2415M->m[1][1] * M->m[2][0] * M->m[3][3] +2416M->m[1][1] * M->m[3][0] * M->m[2][3] +2417M->m[1][3] * M->m[2][0] * M->m[3][1] -2418M->m[1][3] * M->m[3][0] * M->m[2][1];24192420D->m[2][1] =2421-M->m[0][0] * M->m[2][1] * M->m[3][3] +2422M->m[0][0] * M->m[3][1] * M->m[2][3] +2423M->m[0][1] * M->m[2][0] * M->m[3][3] -2424M->m[0][1] * M->m[3][0] * M->m[2][3] -2425M->m[0][3] * M->m[2][0] * M->m[3][1] +2426M->m[0][3] * M->m[3][0] * M->m[2][1];24272428D->m[2][2] =2429M->m[0][0] * M->m[1][1] * M->m[3][3] -2430M->m[0][0] * M->m[3][1] * M->m[1][3] -2431M->m[0][1] * M->m[1][0] * M->m[3][3] +2432M->m[0][1] * M->m[3][0] * M->m[1][3] +2433M->m[0][3] * M->m[1][0] * M->m[3][1] -2434M->m[0][3] * M->m[3][0] * M->m[1][1];24352436D->m[2][3] =2437-M->m[0][0] * M->m[1][1] * M->m[2][3] +2438M->m[0][0] * M->m[2][1] * M->m[1][3] +2439M->m[0][1] * M->m[1][0] * M->m[2][3] -2440M->m[0][1] * M->m[2][0] * M->m[1][3] -2441M->m[0][3] * M->m[1][0] * M->m[2][1] +2442M->m[0][3] * M->m[2][0] * M->m[1][1];24432444D->m[3][0] =2445-M->m[1][0] * M->m[2][1] * M->m[3][2] +2446M->m[1][0] * M->m[3][1] * M->m[2][2] +2447M->m[1][1] * M->m[2][0] * M->m[3][2] -2448M->m[1][1] * M->m[3][0] * M->m[2][2] -2449M->m[1][2] * M->m[2][0] * M->m[3][1] +2450M->m[1][2] * M->m[3][0] * M->m[2][1];24512452D->m[3][1] =2453M->m[0][0] * M->m[2][1] * M->m[3][2] -2454M->m[0][0] * M->m[3][1] * M->m[2][2] -2455M->m[0][1] * M->m[2][0] * M->m[3][2] +2456M->m[0][1] * M->m[3][0] * M->m[2][2] +2457M->m[0][2] * M->m[2][0] * M->m[3][1] -2458M->m[0][2] * M->m[3][0] * M->m[2][1];24592460D->m[3][2] =2461-M->m[0][0] * M->m[1][1] * M->m[3][2] +2462M->m[0][0] * M->m[3][1] * M->m[1][2] +2463M->m[0][1] * M->m[1][0] * M->m[3][2] -2464M->m[0][1] * M->m[3][0] * M->m[1][2] -2465M->m[0][2] * M->m[1][0] * M->m[3][1] +2466M->m[0][2] * M->m[3][0] * M->m[1][1];24672468D->m[3][3] =2469M->m[0][0] * M->m[1][1] * M->m[2][2] -2470M->m[0][0] * M->m[2][1] * M->m[1][2] -2471M->m[0][1] * M->m[1][0] * M->m[2][2] +2472M->m[0][1] * M->m[2][0] * M->m[1][2] +2473M->m[0][2] * M->m[1][0] * M->m[2][1] -2474M->m[0][2] * M->m[2][0] * M->m[1][1];24752476det =2477M->m[0][0] * D->m[0][0] +2478M->m[1][0] * D->m[0][1] +2479M->m[2][0] * D->m[0][2] +2480M->m[3][0] * D->m[0][3];24812482if (fabsf(det) < 1e-30) {/* non inversible */2483*D = *M; /* wine tests */2484return;2485}24862487det = 1.0 / det;24882489for (i = 0; i < 4; i++)2490for (k = 0; k < 4; k++)2491D->m[i][k] *= det;24922493#if defined(DEBUG) || !defined(NDEBUG)2494{2495D3DMATRIX I;24962497nine_d3d_matrix_matrix_mul(&I, D, M);24982499for (i = 0; i < 4; ++i)2500for (k = 0; k < 4; ++k)2501if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)2502DBG("Matrix inversion check FAILED !\n");2503}2504#endif2505}250625072508