Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/frontends/nine/nine_ff.c
4561 views
1
2
/* FF is big and ugly so feel free to write lines as long as you like.
3
* Aieeeeeeeee !
4
*
5
* Let me make that clearer:
6
* Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!
7
*/
8
9
#include "device9.h"
10
#include "basetexture9.h"
11
#include "vertexdeclaration9.h"
12
#include "vertexshader9.h"
13
#include "pixelshader9.h"
14
#include "nine_ff.h"
15
#include "nine_defines.h"
16
#include "nine_helpers.h"
17
#include "nine_pipe.h"
18
#include "nine_dump.h"
19
20
#include "pipe/p_context.h"
21
#include "tgsi/tgsi_ureg.h"
22
#include "tgsi/tgsi_dump.h"
23
#include "util/u_box.h"
24
#include "util/u_hash_table.h"
25
#include "util/u_upload_mgr.h"
26
27
#define DBG_CHANNEL DBG_FF
28
29
#define NINE_FF_NUM_VS_CONST 196
30
#define NINE_FF_NUM_PS_CONST 24
31
32
struct fvec4
33
{
34
float x, y, z, w;
35
};
36
37
struct nine_ff_vs_key
38
{
39
union {
40
struct {
41
uint32_t position_t : 1;
42
uint32_t lighting : 1;
43
uint32_t darkness : 1; /* lighting enabled but no active lights */
44
uint32_t localviewer : 1;
45
uint32_t vertexpointsize : 1;
46
uint32_t pointscale : 1;
47
uint32_t vertexblend : 3;
48
uint32_t vertexblend_indexed : 1;
49
uint32_t vertextween : 1;
50
uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */
51
uint32_t mtl_ambient : 2;
52
uint32_t mtl_specular : 2;
53
uint32_t mtl_emissive : 2;
54
uint32_t fog_mode : 2;
55
uint32_t fog_range : 1;
56
uint32_t color0in_one : 1;
57
uint32_t color1in_zero : 1;
58
uint32_t has_normal : 1;
59
uint32_t fog : 1;
60
uint32_t normalizenormals : 1;
61
uint32_t ucp : 1;
62
uint32_t pad1 : 4;
63
uint32_t tc_dim_input: 16; /* 8 * 2 bits */
64
uint32_t pad2 : 16;
65
uint32_t tc_dim_output: 24; /* 8 * 3 bits */
66
uint32_t pad3 : 8;
67
uint32_t tc_gen : 24; /* 8 * 3 bits */
68
uint32_t pad4 : 8;
69
uint32_t tc_idx : 24;
70
uint32_t pad5 : 8;
71
uint32_t passthrough;
72
};
73
uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
74
uint32_t value32[6];
75
};
76
};
77
78
/* Texture stage state:
79
*
80
* COLOROP D3DTOP 5 bit
81
* ALPHAOP D3DTOP 5 bit
82
* COLORARG0 D3DTA 3 bit
83
* COLORARG1 D3DTA 3 bit
84
* COLORARG2 D3DTA 3 bit
85
* ALPHAARG0 D3DTA 3 bit
86
* ALPHAARG1 D3DTA 3 bit
87
* ALPHAARG2 D3DTA 3 bit
88
* RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
89
* TEXCOORDINDEX 0 - 7 3 bit
90
* ===========================
91
* 32 bit per stage
92
*/
93
struct nine_ff_ps_key
94
{
95
union {
96
struct {
97
struct {
98
uint32_t colorop : 5;
99
uint32_t alphaop : 5;
100
uint32_t colorarg0 : 3;
101
uint32_t colorarg1 : 3;
102
uint32_t colorarg2 : 3;
103
uint32_t alphaarg0 : 3;
104
uint32_t alphaarg1 : 3;
105
uint32_t alphaarg2 : 3;
106
uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
107
uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
108
uint32_t pad : 1;
109
/* that's 32 bit exactly */
110
} ts[8];
111
uint32_t projected : 16;
112
uint32_t fog : 1; /* for vFog coming from VS */
113
uint32_t fog_mode : 2;
114
uint32_t fog_source : 1; /* 0: Z, 1: W */
115
uint32_t specular : 1;
116
uint32_t pad1 : 11; /* 9 32-bit words with this */
117
uint8_t colorarg_b4[3];
118
uint8_t colorarg_b5[3];
119
uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
120
uint8_t pad2[3];
121
};
122
uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
123
uint32_t value32[12];
124
};
125
};
126
127
static uint32_t nine_ff_vs_key_hash(const void *key)
128
{
129
const struct nine_ff_vs_key *vs = key;
130
unsigned i;
131
uint32_t hash = vs->value32[0];
132
for (i = 1; i < ARRAY_SIZE(vs->value32); ++i)
133
hash ^= vs->value32[i];
134
return hash;
135
}
136
static bool nine_ff_vs_key_comp(const void *key1, const void *key2)
137
{
138
struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;
139
struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;
140
141
return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0;
142
}
143
static uint32_t nine_ff_ps_key_hash(const void *key)
144
{
145
const struct nine_ff_ps_key *ps = key;
146
unsigned i;
147
uint32_t hash = ps->value32[0];
148
for (i = 1; i < ARRAY_SIZE(ps->value32); ++i)
149
hash ^= ps->value32[i];
150
return hash;
151
}
152
static bool nine_ff_ps_key_comp(const void *key1, const void *key2)
153
{
154
struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;
155
struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;
156
157
return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0;
158
}
159
static uint32_t nine_ff_fvf_key_hash(const void *key)
160
{
161
return *(DWORD *)key;
162
}
163
static bool nine_ff_fvf_key_comp(const void *key1, const void *key2)
164
{
165
return *(DWORD *)key1 == *(DWORD *)key2;
166
}
167
168
static void nine_ff_prune_vs(struct NineDevice9 *);
169
static void nine_ff_prune_ps(struct NineDevice9 *);
170
171
static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override)
172
{
173
if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) {
174
const struct tgsi_token *toks = ureg_get_tokens(ureg, NULL);
175
tgsi_dump(toks, 0);
176
ureg_free_tokens(toks);
177
}
178
}
179
180
#define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
181
#define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
182
#define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
183
#define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
184
185
#define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
186
#define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
187
#define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
188
#define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
189
190
#define _XYZW(r) (r)
191
192
/* AL should contain base address of lights table. */
193
#define LIGHT_CONST(i) \
194
ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
195
196
#define MATERIAL_CONST(i) \
197
ureg_DECL_constant(ureg, 19 + (i))
198
199
#define _CONST(n) ureg_DECL_constant(ureg, n)
200
201
/* VS FF constants layout:
202
*
203
* CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
204
* CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
205
* CONST[ 8..11] D3DTS_PROJECTION
206
* CONST[12..15] D3DTS_VIEW^(-1)
207
* CONST[16..18] Normal matrix
208
*
209
* CONST[19].xyz MATERIAL.Emissive + Material.Ambient * RS.Ambient
210
* CONST[20] MATERIAL.Diffuse
211
* CONST[21] MATERIAL.Ambient
212
* CONST[22] MATERIAL.Specular
213
* CONST[23].x___ MATERIAL.Power
214
* CONST[24] MATERIAL.Emissive
215
* CONST[25] RS.Ambient
216
*
217
* CONST[26].x___ RS.PointSizeMin
218
* CONST[26]._y__ RS.PointSizeMax
219
* CONST[26].__z_ RS.PointSize
220
* CONST[26].___w RS.PointScaleA
221
* CONST[27].x___ RS.PointScaleB
222
* CONST[27]._y__ RS.PointScaleC
223
*
224
* CONST[28].x___ RS.FogEnd
225
* CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
226
* CONST[28].__z_ RS.FogDensity
227
228
* CONST[30].x___ TWEENFACTOR
229
*
230
* CONST[32].x___ LIGHT[0].Type
231
* CONST[32]._yzw LIGHT[0].Attenuation0,1,2
232
* CONST[33] LIGHT[0].Diffuse
233
* CONST[34] LIGHT[0].Specular
234
* CONST[35] LIGHT[0].Ambient
235
* CONST[36].xyz_ LIGHT[0].Position
236
* CONST[36].___w LIGHT[0].Range
237
* CONST[37].xyz_ LIGHT[0].Direction
238
* CONST[37].___w LIGHT[0].Falloff
239
* CONST[38].x___ cos(LIGHT[0].Theta / 2)
240
* CONST[38]._y__ cos(LIGHT[0].Phi / 2)
241
* CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
242
* CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
243
* CONST[39].___w 1 if this is the last active light, 0 if not
244
* CONST[40] LIGHT[1]
245
* CONST[48] LIGHT[2]
246
* CONST[56] LIGHT[3]
247
* CONST[64] LIGHT[4]
248
* CONST[72] LIGHT[5]
249
* CONST[80] LIGHT[6]
250
* CONST[88] LIGHT[7]
251
* NOTE: no lighting code is generated if there are no active lights
252
*
253
* CONST[100].x___ Viewport 2/width
254
* CONST[100]._y__ Viewport 2/height
255
* CONST[100].__z_ Viewport 1/(zmax - zmin)
256
* CONST[100].___w Viewport width
257
* CONST[101].x___ Viewport x0
258
* CONST[101]._y__ Viewport y0
259
* CONST[101].__z_ Viewport z0
260
*
261
* CONST[128..131] D3DTS_TEXTURE0
262
* CONST[132..135] D3DTS_TEXTURE1
263
* CONST[136..139] D3DTS_TEXTURE2
264
* CONST[140..143] D3DTS_TEXTURE3
265
* CONST[144..147] D3DTS_TEXTURE4
266
* CONST[148..151] D3DTS_TEXTURE5
267
* CONST[152..155] D3DTS_TEXTURE6
268
* CONST[156..159] D3DTS_TEXTURE7
269
*
270
* CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW
271
* CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW
272
* ...
273
* CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW
274
*/
275
struct vs_build_ctx
276
{
277
struct ureg_program *ureg;
278
const struct nine_ff_vs_key *key;
279
280
uint16_t input[PIPE_MAX_ATTRIBS];
281
unsigned num_inputs;
282
283
struct ureg_src aVtx;
284
struct ureg_src aNrm;
285
struct ureg_src aCol[2];
286
struct ureg_src aTex[8];
287
struct ureg_src aPsz;
288
struct ureg_src aInd;
289
struct ureg_src aWgt;
290
291
struct ureg_src aVtx1; /* tweening */
292
struct ureg_src aNrm1;
293
294
struct ureg_src mtlA;
295
struct ureg_src mtlD;
296
struct ureg_src mtlS;
297
struct ureg_src mtlE;
298
};
299
300
static inline unsigned
301
get_texcoord_sn(struct pipe_screen *screen)
302
{
303
if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
304
return TGSI_SEMANTIC_TEXCOORD;
305
return TGSI_SEMANTIC_GENERIC;
306
}
307
308
static inline struct ureg_src
309
build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
310
{
311
const unsigned i = vs->num_inputs++;
312
assert(i < PIPE_MAX_ATTRIBS);
313
vs->input[i] = ndecl;
314
return ureg_DECL_vs_input(vs->ureg, i);
315
}
316
317
/* NOTE: dst may alias src */
318
static inline void
319
ureg_normalize3(struct ureg_program *ureg,
320
struct ureg_dst dst, struct ureg_src src)
321
{
322
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
323
struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
324
325
ureg_DP3(ureg, tmp_x, src, src);
326
ureg_RSQ(ureg, tmp_x, _X(tmp));
327
ureg_MUL(ureg, dst, src, _X(tmp));
328
ureg_release_temporary(ureg, tmp);
329
}
330
331
static void *
332
nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
333
{
334
const struct nine_ff_vs_key *key = vs->key;
335
struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX);
336
struct ureg_dst oPos, oCol[2], oPsz, oFog;
337
struct ureg_dst AR;
338
unsigned i, c;
339
unsigned label[32], l = 0;
340
boolean need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
341
boolean has_aNrm;
342
boolean need_aVtx = key->lighting || key->fog_mode || key->pointscale || key->ucp;
343
const unsigned texcoord_sn = get_texcoord_sn(device->screen);
344
345
vs->ureg = ureg;
346
347
/* Check which inputs we should transform. */
348
for (i = 0; i < 8 * 3; i += 3) {
349
switch ((key->tc_gen >> i) & 0x7) {
350
case NINED3DTSS_TCI_CAMERASPACENORMAL:
351
need_aNrm = TRUE;
352
break;
353
case NINED3DTSS_TCI_CAMERASPACEPOSITION:
354
need_aVtx = TRUE;
355
break;
356
case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
357
need_aVtx = need_aNrm = TRUE;
358
break;
359
case NINED3DTSS_TCI_SPHEREMAP:
360
need_aVtx = need_aNrm = TRUE;
361
break;
362
default:
363
break;
364
}
365
}
366
367
has_aNrm = need_aNrm && key->has_normal;
368
369
/* Declare and record used inputs (needed for linkage with vertex format):
370
* (texture coordinates handled later)
371
*/
372
vs->aVtx = build_vs_add_input(vs,
373
key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);
374
375
vs->aNrm = ureg_imm1f(ureg, 0.0f);
376
if (has_aNrm)
377
vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);
378
379
vs->aCol[0] = ureg_imm1f(ureg, 1.0f);
380
vs->aCol[1] = ureg_imm1f(ureg, 0.0f);
381
382
if (key->lighting || key->darkness) {
383
const unsigned mask = key->mtl_diffuse | key->mtl_specular |
384
key->mtl_ambient | key->mtl_emissive;
385
if ((mask & 0x1) && !key->color0in_one)
386
vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
387
if ((mask & 0x2) && !key->color1in_zero)
388
vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
389
390
vs->mtlD = MATERIAL_CONST(1);
391
vs->mtlA = MATERIAL_CONST(2);
392
vs->mtlS = MATERIAL_CONST(3);
393
vs->mtlE = MATERIAL_CONST(5);
394
if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else
395
if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1];
396
if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else
397
if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1];
398
if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else
399
if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];
400
if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else
401
if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];
402
} else {
403
if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
404
if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
405
}
406
407
if (key->vertexpointsize)
408
vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
409
410
if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
411
vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
412
if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
413
vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
414
if (key->vertextween) {
415
vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
416
vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));
417
}
418
419
/* Declare outputs:
420
*/
421
oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
422
oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
423
oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
424
if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
425
oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 16);
426
oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
427
}
428
429
if (key->vertexpointsize || key->pointscale) {
430
oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
431
TGSI_WRITEMASK_X, 0, 1);
432
oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
433
}
434
435
if (key->lighting || key->vertexblend)
436
AR = ureg_DECL_address(ureg);
437
438
/* === Vertex transformation / vertex blending:
439
*/
440
441
if (key->position_t) {
442
if (device->driver_caps.window_space_position_support) {
443
ureg_MOV(ureg, oPos, vs->aVtx);
444
} else {
445
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
446
/* vs->aVtx contains the coordinates buffer wise.
447
* later in the pipeline, clipping, viewport and division
448
* by w (rhw = 1/w) are going to be applied, so do the reverse
449
* of these transformations (except clipping) to have the good
450
* position at the end.*/
451
ureg_MOV(ureg, tmp, vs->aVtx);
452
/* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
453
ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101)));
454
ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
455
ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f));
456
/* Y needs to be reversed */
457
ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
458
/* inverse rhw */
459
ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
460
/* multiply X, Y, Z by w */
461
ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
462
ureg_MOV(ureg, oPos, ureg_src(tmp));
463
ureg_release_temporary(ureg, tmp);
464
}
465
} else if (key->vertexblend) {
466
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
467
struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
468
struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
469
struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
470
struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg);
471
struct ureg_src cWM[4];
472
473
for (i = 160; i <= 195; ++i)
474
ureg_DECL_constant(ureg, i);
475
476
/* translate world matrix index to constant file index */
477
if (key->vertexblend_indexed) {
478
ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f));
479
ureg_ARL(ureg, AR, ureg_src(tmp));
480
}
481
482
ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
483
ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
484
ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
485
486
for (i = 0; i < key->vertexblend; ++i) {
487
for (c = 0; c < 4; ++c) {
488
cWM[c] = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c), 0);
489
if (key->vertexblend_indexed)
490
cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
491
}
492
493
/* multiply by WORLD(index) */
494
ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
495
ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
496
ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
497
ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
498
499
if (has_aNrm) {
500
/* Note: the spec says the transpose of the inverse of the
501
* WorldView matrices should be used, but all tests show
502
* otherwise.
503
* Only case unknown: D3DVBF_0WEIGHTS */
504
ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]);
505
ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2));
506
ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2));
507
}
508
509
if (i < (key->vertexblend - 1)) {
510
/* accumulate weighted position value */
511
ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst));
512
if (has_aNrm)
513
ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst));
514
/* subtract weighted position value for last value */
515
ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i)));
516
}
517
}
518
519
/* the last weighted position is always 1 - sum_of_previous_weights */
520
ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst));
521
if (has_aNrm)
522
ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst));
523
524
/* multiply by VIEW_PROJ */
525
ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8));
526
ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp));
527
ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp));
528
ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp));
529
530
if (need_aVtx)
531
vs->aVtx = ureg_src(aVtx_dst);
532
533
ureg_release_temporary(ureg, tmp);
534
ureg_release_temporary(ureg, tmp2);
535
ureg_release_temporary(ureg, sum_blendweights);
536
if (!need_aVtx)
537
ureg_release_temporary(ureg, aVtx_dst);
538
539
if (has_aNrm) {
540
if (key->normalizenormals)
541
ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
542
vs->aNrm = ureg_src(aNrm_dst);
543
} else
544
ureg_release_temporary(ureg, aNrm_dst);
545
} else {
546
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
547
548
if (key->vertextween) {
549
struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
550
ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx);
551
vs->aVtx = ureg_src(aVtx_dst);
552
if (has_aNrm) {
553
struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
554
ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm);
555
vs->aNrm = ureg_src(aNrm_dst);
556
}
557
}
558
559
/* position = vertex * WORLD_VIEW_PROJ */
560
ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
561
ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
562
ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
563
ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
564
ureg_release_temporary(ureg, tmp);
565
566
if (need_aVtx) {
567
struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
568
ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4));
569
ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst));
570
ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst));
571
ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst));
572
vs->aVtx = ureg_src(aVtx_dst);
573
}
574
if (has_aNrm) {
575
struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
576
ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16));
577
ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst));
578
ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst));
579
if (key->normalizenormals)
580
ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
581
vs->aNrm = ureg_src(aNrm_dst);
582
}
583
}
584
585
/* === Process point size:
586
*/
587
if (key->vertexpointsize || key->pointscale) {
588
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
589
struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
590
struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
591
struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
592
if (key->vertexpointsize) {
593
struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
594
ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1));
595
ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1));
596
} else {
597
struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
598
ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1));
599
}
600
601
if (key->pointscale) {
602
struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
603
struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
604
605
ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
606
ureg_RSQ(ureg, tmp_y, _X(tmp));
607
ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp));
608
ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f));
609
ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
610
ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
611
ureg_RSQ(ureg, tmp_x, _X(tmp));
612
ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp));
613
ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100)));
614
ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1));
615
ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1));
616
}
617
618
ureg_MOV(ureg, oPsz, _Z(tmp));
619
ureg_release_temporary(ureg, tmp);
620
}
621
622
for (i = 0; i < 8; ++i) {
623
struct ureg_dst tmp, tmp_x, tmp2;
624
struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed;
625
unsigned c, writemask;
626
const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
627
const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
628
unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
629
const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
630
631
/* No texture output of index s */
632
if (tci == NINED3DTSS_TCI_DISABLE)
633
continue;
634
oTex = ureg_DECL_output(ureg, texcoord_sn, i);
635
tmp = ureg_DECL_temporary(ureg);
636
tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
637
input_coord = ureg_DECL_temporary(ureg);
638
transformed = ureg_DECL_temporary(ureg);
639
640
/* Get the coordinate */
641
switch (tci) {
642
case NINED3DTSS_TCI_PASSTHRU:
643
/* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
644
* Else the idx is used only to determine wrapping mode. */
645
vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
646
ureg_MOV(ureg, input_coord, vs->aTex[idx]);
647
break;
648
case NINED3DTSS_TCI_CAMERASPACENORMAL:
649
ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm);
650
ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
651
dim_input = 4;
652
break;
653
case NINED3DTSS_TCI_CAMERASPACEPOSITION:
654
ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx);
655
ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
656
dim_input = 4;
657
break;
658
case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
659
tmp.WriteMask = TGSI_WRITEMASK_XYZ;
660
aVtx_normed = ureg_DECL_temporary(ureg);
661
ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
662
ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
663
ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
664
ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
665
ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
666
ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
667
ureg_release_temporary(ureg, aVtx_normed);
668
dim_input = 4;
669
tmp.WriteMask = TGSI_WRITEMASK_XYZW;
670
break;
671
case NINED3DTSS_TCI_SPHEREMAP:
672
/* Implement the formula of GL_SPHERE_MAP */
673
tmp.WriteMask = TGSI_WRITEMASK_XYZ;
674
aVtx_normed = ureg_DECL_temporary(ureg);
675
tmp2 = ureg_DECL_temporary(ureg);
676
ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
677
ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
678
ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
679
ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
680
ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp)));
681
/* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */
682
ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp));
683
ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2));
684
ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2));
685
ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2));
686
ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f));
687
/* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2)
688
* TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */
689
ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2));
690
ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
691
ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
692
ureg_release_temporary(ureg, aVtx_normed);
693
ureg_release_temporary(ureg, tmp2);
694
dim_input = 4;
695
tmp.WriteMask = TGSI_WRITEMASK_XYZW;
696
break;
697
default:
698
assert(0);
699
break;
700
}
701
702
/* Apply the transformation */
703
/* dim_output == 0 => do not transform the components.
704
* XYZRHW also disables transformation */
705
if (!dim_output || key->position_t) {
706
ureg_release_temporary(ureg, transformed);
707
transformed = input_coord;
708
writemask = TGSI_WRITEMASK_XYZW;
709
} else {
710
for (c = 0; c < dim_output; c++) {
711
t = ureg_writemask(transformed, 1 << c);
712
switch (dim_input) {
713
/* dim_input = 1 2 3: -> we add trailing 1 to input*/
714
case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
715
break;
716
case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
717
ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
718
break;
719
case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
720
ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
721
break;
722
case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
723
default:
724
assert(0);
725
}
726
}
727
writemask = (1 << dim_output) - 1;
728
ureg_release_temporary(ureg, input_coord);
729
}
730
731
ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
732
ureg_release_temporary(ureg, transformed);
733
ureg_release_temporary(ureg, tmp);
734
}
735
736
/* === Lighting:
737
*
738
* DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
739
* POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
740
* SPOT: Finite distance, divergent rays, angular dependence, attenuation.
741
*
742
* vec3 normal = normalize(in.Normal * NormalMatrix);
743
* vec3 hitDir = light.direction;
744
* float atten = 1.0;
745
*
746
* if (light.type != DIRECTIONAL)
747
* {
748
* vec3 hitVec = light.position - eyeVertex;
749
* float d = length(hitVec);
750
* hitDir = hitVec / d;
751
* atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
752
* }
753
*
754
* if (light.type == SPOTLIGHT)
755
* {
756
* float rho = dp3(-hitVec, light.direction);
757
* if (rho < cos(light.phi / 2))
758
* atten = 0;
759
* if (rho < cos(light.theta / 2))
760
* atten *= pow(some_func(rho), light.falloff);
761
* }
762
*
763
* float nDotHit = dp3_sat(normal, hitVec);
764
* float powFact = 0.0;
765
*
766
* if (nDotHit > 0.0)
767
* {
768
* vec3 midVec = normalize(hitDir + eye);
769
* float nDotMid = dp3_sat(normal, midVec);
770
* pFact = pow(nDotMid, material.power);
771
* }
772
*
773
* ambient += light.ambient * atten;
774
* diffuse += light.diffuse * atten * nDotHit;
775
* specular += light.specular * atten * powFact;
776
*/
777
if (key->lighting) {
778
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
779
struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
780
struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
781
struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
782
struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
783
struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
784
struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
785
786
struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
787
788
struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);
789
790
/* Light.*.Alpha is not used. */
791
struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
792
struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
793
struct ureg_dst rS = ureg_DECL_temporary(ureg);
794
795
struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));
796
797
struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));
798
struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));
799
struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));
800
struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));
801
struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));
802
struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));
803
struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));
804
struct ureg_src cLPos = _XYZW(LIGHT_CONST(4));
805
struct ureg_src cLRng = _WWWW(LIGHT_CONST(4));
806
struct ureg_src cLDir = _XYZW(LIGHT_CONST(5));
807
struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));
808
struct ureg_src cLTht = _XXXX(LIGHT_CONST(6));
809
struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6));
810
struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));
811
struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));
812
813
const unsigned loop_label = l++;
814
815
/* Declare all light constants to allow indirect adressing */
816
for (i = 32; i < 96; i++)
817
ureg_DECL_constant(ureg, i);
818
819
ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
820
ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
821
ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
822
ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));
823
824
/* loop management */
825
ureg_BGNLOOP(ureg, &label[loop_label]);
826
ureg_ARL(ureg, AL, _W(rCtr));
827
828
/* if (not DIRECTIONAL light): */
829
ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
830
ureg_MOV(ureg, rHit, ureg_negate(cLDir));
831
ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
832
ureg_IF(ureg, _X(tmp), &label[l++]);
833
{
834
/* hitDir = light.position - eyeVtx
835
* d = length(hitDir)
836
*/
837
ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx));
838
ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
839
ureg_RSQ(ureg, tmp_y, _X(tmp));
840
ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
841
842
/* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
843
ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
844
ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
845
ureg_RCP(ureg, rAtt, _W(rAtt));
846
/* cut-off if distance exceeds Light.Range */
847
ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
848
ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
849
}
850
ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
851
ureg_ENDIF(ureg);
852
853
/* normalize hitDir */
854
ureg_normalize3(ureg, rHit, ureg_src(rHit));
855
856
/* if (SPOT light) */
857
ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
858
ureg_IF(ureg, _X(tmp), &label[l++]);
859
{
860
/* rho = dp3(-hitDir, light.spotDir)
861
*
862
* if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
863
* spotAtt = 1
864
* else
865
* if (rho <= light.cphi2)
866
* spotAtt = 0
867
* else
868
* spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
869
*/
870
ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
871
ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi));
872
ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
873
ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
874
ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
875
ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
876
ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
877
ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
878
}
879
ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
880
ureg_ENDIF(ureg);
881
882
/* directional factors, let's not use LIT because of clarity */
883
884
if (has_aNrm) {
885
if (key->localviewer) {
886
ureg_normalize3(ureg, rMid, vs->aVtx);
887
ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
888
} else {
889
ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f));
890
}
891
ureg_normalize3(ureg, rMid, ureg_src(rMid));
892
ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit));
893
ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
894
ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp));
895
/* Tests show that specular is computed only if (dp3(normal,hitDir) > 0).
896
* For front facing, it is more restrictive than test (dp3(normal,mid) > 0).
897
* No tests were made for backfacing, so add the two conditions */
898
ureg_IF(ureg, _Z(tmp), &label[l++]);
899
{
900
ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
901
ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);
902
ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */
903
ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */
904
}
905
ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
906
ureg_ENDIF(ureg);
907
908
ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */
909
ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */
910
}
911
912
ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */
913
914
/* break if this was the last light */
915
ureg_IF(ureg, cLLast, &label[l++]);
916
ureg_BRK(ureg);
917
ureg_ENDIF(ureg);
918
ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
919
920
ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));
921
ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));
922
ureg_ENDLOOP(ureg, &label[loop_label]);
923
924
/* Apply to material:
925
*
926
* oCol[0] = (material.emissive + material.ambient * rs.ambient) +
927
* material.ambient * ambient +
928
* material.diffuse * diffuse +
929
* oCol[1] = material.specular * specular;
930
*/
931
if (key->mtl_emissive == 0 && key->mtl_ambient == 0)
932
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), vs->mtlA, _CONST(19));
933
else {
934
ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));
935
ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
936
}
937
938
ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), ureg_src(rD), vs->mtlD, ureg_src(tmp));
939
ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);
940
ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
941
ureg_release_temporary(ureg, rAtt);
942
ureg_release_temporary(ureg, rHit);
943
ureg_release_temporary(ureg, rMid);
944
ureg_release_temporary(ureg, rCtr);
945
ureg_release_temporary(ureg, rD);
946
ureg_release_temporary(ureg, rA);
947
ureg_release_temporary(ureg, rS);
948
ureg_release_temporary(ureg, rAtt);
949
ureg_release_temporary(ureg, tmp);
950
} else
951
/* COLOR */
952
if (key->darkness) {
953
if (key->mtl_emissive == 0 && key->mtl_ambient == 0)
954
ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _CONST(19));
955
else
956
ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
957
ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD);
958
ureg_MOV(ureg, oCol[1], ureg_imm1f(ureg, 0.0f));
959
} else {
960
ureg_MOV(ureg, oCol[0], vs->aCol[0]);
961
ureg_MOV(ureg, oCol[1], vs->aCol[1]);
962
}
963
964
/* === Process fog.
965
*
966
* exp(x) = ex2(log2(e) * x)
967
*/
968
if (key->fog_mode) {
969
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
970
struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
971
struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
972
if (key->fog_range) {
973
ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
974
ureg_RSQ(ureg, tmp_z, _X(tmp));
975
ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));
976
} else {
977
ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx)));
978
}
979
980
if (key->fog_mode == D3DFOG_EXP) {
981
ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
982
ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
983
ureg_EX2(ureg, tmp_x, _X(tmp));
984
} else
985
if (key->fog_mode == D3DFOG_EXP2) {
986
ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
987
ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
988
ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
989
ureg_EX2(ureg, tmp_x, _X(tmp));
990
} else
991
if (key->fog_mode == D3DFOG_LINEAR) {
992
ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp)));
993
ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
994
}
995
ureg_MOV(ureg, oFog, _X(tmp));
996
ureg_release_temporary(ureg, tmp);
997
} else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
998
ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
999
}
1000
1001
if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
1002
struct ureg_src input;
1003
struct ureg_dst output;
1004
input = vs->aWgt;
1005
output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
1006
ureg_MOV(ureg, output, input);
1007
}
1008
if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
1009
struct ureg_src input;
1010
struct ureg_dst output;
1011
input = vs->aInd;
1012
output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
1013
ureg_MOV(ureg, output, input);
1014
}
1015
if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
1016
struct ureg_src input;
1017
struct ureg_dst output;
1018
input = vs->aNrm;
1019
output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
1020
ureg_MOV(ureg, output, input);
1021
}
1022
if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
1023
struct ureg_src input;
1024
struct ureg_dst output;
1025
input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
1026
output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
1027
ureg_MOV(ureg, output, input);
1028
}
1029
if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
1030
struct ureg_src input;
1031
struct ureg_dst output;
1032
input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
1033
output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 23);
1034
ureg_MOV(ureg, output, input);
1035
}
1036
if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
1037
struct ureg_src input;
1038
struct ureg_dst output;
1039
input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
1040
input = ureg_scalar(input, TGSI_SWIZZLE_X);
1041
output = oFog;
1042
ureg_MOV(ureg, output, input);
1043
}
1044
if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
1045
(void) 0; /* TODO: replace z of position output ? */
1046
}
1047
1048
/* ucp for ff applies on world coordinates.
1049
* aVtx is in worldview coordinates. */
1050
if (key->ucp) {
1051
struct ureg_dst clipVect = ureg_DECL_output(ureg, TGSI_SEMANTIC_CLIPVERTEX, 0);
1052
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1053
ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(12));
1054
ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(13), ureg_src(tmp));
1055
ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(14), ureg_src(tmp));
1056
ureg_ADD(ureg, clipVect, _CONST(15), ureg_src(tmp));
1057
ureg_release_temporary(ureg, tmp);
1058
}
1059
1060
if (key->position_t && device->driver_caps.window_space_position_support)
1061
ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
1062
1063
ureg_END(ureg);
1064
nine_ureg_tgsi_dump(ureg, FALSE);
1065
return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL);
1066
}
1067
1068
/* PS FF constants layout:
1069
*
1070
* CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
1071
* CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
1072
* CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
1073
* CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
1074
* CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
1075
* CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
1076
* CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
1077
*
1078
* CONST[20] D3DRS_TEXTUREFACTOR
1079
* CONST[21] D3DRS_FOGCOLOR
1080
* CONST[22].x___ RS.FogEnd
1081
* CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
1082
* CONST[22].__z_ RS.FogDensity
1083
*/
1084
struct ps_build_ctx
1085
{
1086
struct ureg_program *ureg;
1087
1088
struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */
1089
struct ureg_src vT[8]; /* TEXCOORD[i] */
1090
struct ureg_dst rCur; /* D3DTA_CURRENT */
1091
struct ureg_dst rMod;
1092
struct ureg_src rCurSrc;
1093
struct ureg_dst rTmp; /* D3DTA_TEMP */
1094
struct ureg_src rTmpSrc;
1095
struct ureg_dst rTex;
1096
struct ureg_src rTexSrc;
1097
struct ureg_src cBEM[8];
1098
struct ureg_src s[8];
1099
1100
struct {
1101
unsigned index;
1102
unsigned index_pre_mod;
1103
} stage;
1104
};
1105
1106
static struct ureg_src
1107
ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
1108
{
1109
struct ureg_src reg;
1110
1111
switch (ta & D3DTA_SELECTMASK) {
1112
case D3DTA_CONSTANT:
1113
reg = ureg_DECL_constant(ps->ureg, ps->stage.index);
1114
break;
1115
case D3DTA_CURRENT:
1116
reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
1117
break;
1118
case D3DTA_DIFFUSE:
1119
reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1120
break;
1121
case D3DTA_SPECULAR:
1122
reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1123
break;
1124
case D3DTA_TEMP:
1125
reg = ps->rTmpSrc;
1126
break;
1127
case D3DTA_TEXTURE:
1128
reg = ps->rTexSrc;
1129
break;
1130
case D3DTA_TFACTOR:
1131
reg = ureg_DECL_constant(ps->ureg, 20);
1132
break;
1133
default:
1134
assert(0);
1135
reg = ureg_src_undef();
1136
break;
1137
}
1138
if (ta & D3DTA_COMPLEMENT) {
1139
struct ureg_dst dst = ureg_DECL_temporary(ps->ureg);
1140
ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg));
1141
reg = ureg_src(dst);
1142
}
1143
if (ta & D3DTA_ALPHAREPLICATE)
1144
reg = _WWWW(reg);
1145
return reg;
1146
}
1147
1148
static struct ureg_dst
1149
ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
1150
{
1151
assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));
1152
1153
switch (ta & D3DTA_SELECTMASK) {
1154
case D3DTA_CURRENT:
1155
return ps->rCur;
1156
case D3DTA_TEMP:
1157
return ps->rTmp;
1158
default:
1159
assert(0);
1160
return ureg_dst_undef();
1161
}
1162
}
1163
1164
static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
1165
{
1166
switch (top) {
1167
case D3DTOP_DISABLE:
1168
return 0x0;
1169
case D3DTOP_SELECTARG1:
1170
case D3DTOP_PREMODULATE:
1171
return 0x2;
1172
case D3DTOP_SELECTARG2:
1173
return 0x4;
1174
case D3DTOP_MULTIPLYADD:
1175
case D3DTOP_LERP:
1176
return 0x7;
1177
default:
1178
return 0x6;
1179
}
1180
}
1181
1182
static inline boolean
1183
is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
1184
{
1185
return !dst.WriteMask ||
1186
(dst.File == src.File &&
1187
dst.Index == src.Index &&
1188
!dst.Indirect &&
1189
!dst.Saturate &&
1190
!src.Indirect &&
1191
!src.Negate &&
1192
!src.Absolute &&
1193
(!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&
1194
(!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&
1195
(!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&
1196
(!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));
1197
1198
}
1199
1200
static void
1201
ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)
1202
{
1203
struct ureg_program *ureg = ps->ureg;
1204
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1205
struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
1206
struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
1207
1208
tmp.WriteMask = dst.WriteMask;
1209
1210
if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&
1211
top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&
1212
top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&
1213
top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&
1214
top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&
1215
top != D3DTOP_LERP)
1216
dst = ureg_saturate(dst);
1217
1218
switch (top) {
1219
case D3DTOP_SELECTARG1:
1220
if (!is_MOV_no_op(dst, arg[1]))
1221
ureg_MOV(ureg, dst, arg[1]);
1222
break;
1223
case D3DTOP_SELECTARG2:
1224
if (!is_MOV_no_op(dst, arg[2]))
1225
ureg_MOV(ureg, dst, arg[2]);
1226
break;
1227
case D3DTOP_MODULATE:
1228
ureg_MUL(ureg, dst, arg[1], arg[2]);
1229
break;
1230
case D3DTOP_MODULATE2X:
1231
ureg_MUL(ureg, tmp, arg[1], arg[2]);
1232
ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));
1233
break;
1234
case D3DTOP_MODULATE4X:
1235
ureg_MUL(ureg, tmp, arg[1], arg[2]);
1236
ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
1237
break;
1238
case D3DTOP_ADD:
1239
ureg_ADD(ureg, dst, arg[1], arg[2]);
1240
break;
1241
case D3DTOP_ADDSIGNED:
1242
ureg_ADD(ureg, tmp, arg[1], arg[2]);
1243
ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f));
1244
break;
1245
case D3DTOP_ADDSIGNED2X:
1246
ureg_ADD(ureg, tmp, arg[1], arg[2]);
1247
ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1248
break;
1249
case D3DTOP_SUBTRACT:
1250
ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2]));
1251
break;
1252
case D3DTOP_ADDSMOOTH:
1253
ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
1254
ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
1255
break;
1256
case D3DTOP_BLENDDIFFUSEALPHA:
1257
ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
1258
break;
1259
case D3DTOP_BLENDTEXTUREALPHA:
1260
/* XXX: alpha taken from previous stage, texture or result ? */
1261
ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
1262
break;
1263
case D3DTOP_BLENDFACTORALPHA:
1264
ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
1265
break;
1266
case D3DTOP_BLENDTEXTUREALPHAPM:
1267
ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex)));
1268
ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
1269
break;
1270
case D3DTOP_BLENDCURRENTALPHA:
1271
ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
1272
break;
1273
case D3DTOP_PREMODULATE:
1274
ureg_MOV(ureg, dst, arg[1]);
1275
ps->stage.index_pre_mod = ps->stage.index + 1;
1276
break;
1277
case D3DTOP_MODULATEALPHA_ADDCOLOR:
1278
ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
1279
break;
1280
case D3DTOP_MODULATECOLOR_ADDALPHA:
1281
ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
1282
break;
1283
case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
1284
ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1])));
1285
ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
1286
break;
1287
case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
1288
ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1]));
1289
ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
1290
break;
1291
case D3DTOP_BUMPENVMAP:
1292
break;
1293
case D3DTOP_BUMPENVMAPLUMINANCE:
1294
break;
1295
case D3DTOP_DOTPRODUCT3:
1296
ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
1297
ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5));
1298
ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
1299
ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
1300
break;
1301
case D3DTOP_MULTIPLYADD:
1302
ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
1303
break;
1304
case D3DTOP_LERP:
1305
ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
1306
break;
1307
case D3DTOP_DISABLE:
1308
/* no-op ? */
1309
break;
1310
default:
1311
assert(!"invalid D3DTOP");
1312
break;
1313
}
1314
ureg_release_temporary(ureg, tmp);
1315
ureg_release_temporary(ureg, tmp2);
1316
}
1317
1318
static void *
1319
nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
1320
{
1321
struct ps_build_ctx ps;
1322
struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1323
struct ureg_dst oCol;
1324
unsigned s;
1325
const unsigned texcoord_sn = get_texcoord_sn(device->screen);
1326
1327
memset(&ps, 0, sizeof(ps));
1328
ps.ureg = ureg;
1329
ps.stage.index_pre_mod = -1;
1330
1331
ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1332
1333
ps.rCur = ureg_DECL_temporary(ureg);
1334
ps.rTmp = ureg_DECL_temporary(ureg);
1335
ps.rTex = ureg_DECL_temporary(ureg);
1336
ps.rCurSrc = ureg_src(ps.rCur);
1337
ps.rTmpSrc = ureg_src(ps.rTmp);
1338
ps.rTexSrc = ureg_src(ps.rTex);
1339
1340
/* Initial values */
1341
ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1342
ureg_MOV(ureg, ps.rTmp, ureg_imm1f(ureg, 0.0f));
1343
ureg_MOV(ureg, ps.rTex, ureg_imm1f(ureg, 0.0f));
1344
1345
for (s = 0; s < 8; ++s) {
1346
ps.s[s] = ureg_src_undef();
1347
1348
if (key->ts[s].colorop != D3DTOP_DISABLE) {
1349
if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
1350
key->ts[s].colorarg1 == D3DTA_SPECULAR ||
1351
key->ts[s].colorarg2 == D3DTA_SPECULAR)
1352
ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1353
1354
if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
1355
key->ts[s].colorarg1 == D3DTA_TEXTURE ||
1356
key->ts[s].colorarg2 == D3DTA_TEXTURE ||
1357
key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA ||
1358
key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) {
1359
ps.s[s] = ureg_DECL_sampler(ureg, s);
1360
ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1361
}
1362
if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||
1363
key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))
1364
ps.s[s] = ureg_DECL_sampler(ureg, s);
1365
}
1366
1367
if (key->ts[s].alphaop != D3DTOP_DISABLE) {
1368
if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
1369
key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
1370
key->ts[s].alphaarg2 == D3DTA_SPECULAR)
1371
ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1372
1373
if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
1374
key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
1375
key->ts[s].alphaarg2 == D3DTA_TEXTURE ||
1376
key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA ||
1377
key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) {
1378
ps.s[s] = ureg_DECL_sampler(ureg, s);
1379
ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1380
}
1381
}
1382
}
1383
if (key->specular)
1384
ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1385
1386
oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
1387
1388
/* Run stages.
1389
*/
1390
for (s = 0; s < 8; ++s) {
1391
unsigned colorarg[3];
1392
unsigned alphaarg[3];
1393
const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);
1394
const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);
1395
struct ureg_dst dst;
1396
struct ureg_src arg[3];
1397
1398
if (key->ts[s].colorop == D3DTOP_DISABLE) {
1399
assert (key->ts[s].alphaop == D3DTOP_DISABLE);
1400
continue;
1401
}
1402
ps.stage.index = s;
1403
1404
DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,
1405
nine_D3DTOP_to_str(key->ts[s].colorop),
1406
nine_D3DTOP_to_str(key->ts[s].alphaop));
1407
1408
if (!ureg_src_is_undef(ps.s[s])) {
1409
unsigned target;
1410
struct ureg_src texture_coord = ps.vT[s];
1411
struct ureg_dst delta;
1412
switch (key->ts[s].textarget) {
1413
case 0: target = TGSI_TEXTURE_1D; break;
1414
case 1: target = TGSI_TEXTURE_2D; break;
1415
case 2: target = TGSI_TEXTURE_3D; break;
1416
case 3: target = TGSI_TEXTURE_CUBE; break;
1417
/* this is a 2 bit bitfield, do I really need a default case ? */
1418
}
1419
1420
/* Modify coordinates */
1421
if (s >= 1 &&
1422
(key->ts[s-1].colorop == D3DTOP_BUMPENVMAP ||
1423
key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) {
1424
delta = ureg_DECL_temporary(ureg);
1425
/* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */
1426
ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1)));
1427
ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta));
1428
/* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */
1429
ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1)));
1430
ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta));
1431
texture_coord = ureg_src(ureg_DECL_temporary(ureg));
1432
ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]);
1433
ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta));
1434
/* Prepare luminance multiplier
1435
* t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */
1436
if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
1437
struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2));
1438
struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2));
1439
1440
ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset);
1441
}
1442
}
1443
if (key->projected & (3 << (s *2))) {
1444
unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
1445
if (dim == 4)
1446
ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1447
else {
1448
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1449
ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1));
1450
ureg_MUL(ureg, ps.rTmp, _X(tmp), texture_coord);
1451
ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
1452
ureg_release_temporary(ureg, tmp);
1453
}
1454
} else {
1455
ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1456
}
1457
if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1458
ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta));
1459
}
1460
1461
if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
1462
key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1463
continue;
1464
1465
dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);
1466
1467
if (ps.stage.index_pre_mod == ps.stage.index) {
1468
ps.rMod = ureg_DECL_temporary(ureg);
1469
ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
1470
}
1471
1472
colorarg[0] = (key->ts[s].colorarg0 | (((key->colorarg_b4[0] >> s) & 0x1) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
1473
colorarg[1] = (key->ts[s].colorarg1 | (((key->colorarg_b4[1] >> s) & 0x1) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
1474
colorarg[2] = (key->ts[s].colorarg2 | (((key->colorarg_b4[2] >> s) & 0x1) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
1475
alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
1476
alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
1477
alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
1478
1479
if (key->ts[s].colorop != key->ts[s].alphaop ||
1480
colorarg[0] != alphaarg[0] ||
1481
colorarg[1] != alphaarg[1] ||
1482
colorarg[2] != alphaarg[2])
1483
dst.WriteMask = TGSI_WRITEMASK_XYZ;
1484
1485
/* Special DOTPRODUCT behaviour (see wine tests) */
1486
if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
1487
dst.WriteMask = TGSI_WRITEMASK_XYZW;
1488
1489
if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
1490
if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
1491
if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
1492
ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);
1493
1494
if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {
1495
dst.WriteMask = TGSI_WRITEMASK_W;
1496
1497
if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);
1498
if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);
1499
if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);
1500
ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);
1501
}
1502
}
1503
1504
if (key->specular)
1505
ureg_ADD(ureg, ureg_writemask(ps.rCur, TGSI_WRITEMASK_XYZ), ps.rCurSrc, ps.vC[1]);
1506
1507
/* Fog.
1508
*/
1509
if (key->fog_mode) {
1510
struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);
1511
struct ureg_src vPos;
1512
if (device->screen->get_param(device->screen,
1513
PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
1514
vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1515
} else {
1516
vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
1517
TGSI_INTERPOLATE_LINEAR);
1518
}
1519
1520
/* Source is either W or Z.
1521
* When we use vs ff,
1522
* Z is when an orthogonal projection matrix is detected,
1523
* W (WFOG) else.
1524
* Z is used for programmable vs.
1525
* Note: Tests indicate that the projection matrix coefficients do
1526
* actually affect pixel fog (and not vertex fog) when vs ff is used,
1527
* which justifies taking the position's w instead of taking the z coordinate
1528
* before the projection in the vs shader.
1529
*/
1530
if (!key->fog_source)
1531
ureg_MOV(ureg, rFog, _ZZZZ(vPos));
1532
else
1533
/* Position's w is 1/w */
1534
ureg_RCP(ureg, rFog, _WWWW(vPos));
1535
1536
if (key->fog_mode == D3DFOG_EXP) {
1537
ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
1538
ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1539
ureg_EX2(ureg, rFog, _X(rFog));
1540
} else
1541
if (key->fog_mode == D3DFOG_EXP2) {
1542
ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22)));
1543
ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
1544
ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1545
ureg_EX2(ureg, rFog, _X(rFog));
1546
} else
1547
if (key->fog_mode == D3DFOG_LINEAR) {
1548
ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog)));
1549
ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
1550
}
1551
ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
1552
ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1553
} else
1554
if (key->fog) {
1555
struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16, TGSI_INTERPOLATE_PERSPECTIVE);
1556
ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
1557
ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1558
} else {
1559
ureg_MOV(ureg, oCol, ps.rCurSrc);
1560
}
1561
1562
ureg_END(ureg);
1563
nine_ureg_tgsi_dump(ureg, FALSE);
1564
return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL);
1565
}
1566
1567
static struct NineVertexShader9 *
1568
nine_ff_get_vs(struct NineDevice9 *device)
1569
{
1570
const struct nine_context *context = &device->context;
1571
struct NineVertexShader9 *vs;
1572
struct vs_build_ctx bld;
1573
struct nine_ff_vs_key key;
1574
unsigned s, i;
1575
boolean has_indexes = false;
1576
boolean has_weights = false;
1577
char input_texture_coord[8];
1578
1579
assert(sizeof(key) <= sizeof(key.value32));
1580
1581
memset(&key, 0, sizeof(key));
1582
memset(&bld, 0, sizeof(bld));
1583
memset(&input_texture_coord, 0, sizeof(input_texture_coord));
1584
1585
bld.key = &key;
1586
1587
/* FIXME: this shouldn't be NULL, but it is on init */
1588
if (context->vdecl) {
1589
key.color0in_one = 1;
1590
key.color1in_zero = 1;
1591
for (i = 0; i < context->vdecl->nelems; i++) {
1592
uint16_t usage = context->vdecl->usage_map[i];
1593
if (usage == NINE_DECLUSAGE_POSITIONT)
1594
key.position_t = 1;
1595
else if (usage == NINE_DECLUSAGE_i(COLOR, 0))
1596
key.color0in_one = 0;
1597
else if (usage == NINE_DECLUSAGE_i(COLOR, 1))
1598
key.color1in_zero = 0;
1599
else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) {
1600
has_indexes = true;
1601
key.passthrough |= 1 << usage;
1602
} else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) {
1603
has_weights = true;
1604
key.passthrough |= 1 << usage;
1605
} else if (usage == NINE_DECLUSAGE_i(NORMAL, 0)) {
1606
key.has_normal = 1;
1607
key.passthrough |= 1 << usage;
1608
} else if (usage == NINE_DECLUSAGE_PSIZE)
1609
key.vertexpointsize = 1;
1610
else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
1611
s = usage / NINE_DECLUSAGE_COUNT;
1612
if (s < 8)
1613
input_texture_coord[s] = nine_decltype_get_dim(context->vdecl->decls[i].Type);
1614
else
1615
DBG("FF given texture coordinate >= 8. Ignoring\n");
1616
} else if (usage < NINE_DECLUSAGE_NONE)
1617
key.passthrough |= 1 << usage;
1618
}
1619
}
1620
/* ff vs + ps 3.0: some elements are passed to the ps (wine test).
1621
* We do restrict to indices 0 */
1622
key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
1623
(1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
1624
(1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
1625
if (!key.position_t)
1626
key.passthrough = 0;
1627
key.pointscale = !!context->rs[D3DRS_POINTSCALEENABLE];
1628
1629
key.lighting = !!context->rs[D3DRS_LIGHTING] && context->ff.num_lights_active;
1630
key.darkness = !!context->rs[D3DRS_LIGHTING] && !context->ff.num_lights_active;
1631
if (key.position_t) {
1632
key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
1633
key.lighting = 0;
1634
}
1635
if ((key.lighting | key.darkness) && context->rs[D3DRS_COLORVERTEX]) {
1636
uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2);
1637
key.mtl_diffuse = context->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask;
1638
key.mtl_ambient = context->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask;
1639
key.mtl_specular = context->rs[D3DRS_SPECULARMATERIALSOURCE] & mask;
1640
key.mtl_emissive = context->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask;
1641
}
1642
key.fog = !!context->rs[D3DRS_FOGENABLE];
1643
key.fog_mode = (!key.position_t && context->rs[D3DRS_FOGENABLE]) ? context->rs[D3DRS_FOGVERTEXMODE] : 0;
1644
if (key.fog_mode)
1645
key.fog_range = context->rs[D3DRS_RANGEFOGENABLE];
1646
1647
key.localviewer = !!context->rs[D3DRS_LOCALVIEWER];
1648
key.normalizenormals = !!context->rs[D3DRS_NORMALIZENORMALS];
1649
key.ucp = !!context->rs[D3DRS_CLIPPLANEENABLE];
1650
1651
if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1652
key.vertexblend_indexed = !!context->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes;
1653
1654
switch (context->rs[D3DRS_VERTEXBLEND]) {
1655
case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;
1656
case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;
1657
case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;
1658
case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;
1659
case D3DVBF_TWEENING: key.vertextween = 1; break;
1660
default:
1661
assert(!"invalid D3DVBF");
1662
break;
1663
}
1664
if (!has_weights && context->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS)
1665
key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */
1666
}
1667
1668
for (s = 0; s < 8; ++s) {
1669
unsigned gen = (context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
1670
unsigned idx = context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7;
1671
unsigned dim;
1672
1673
if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
1674
gen = NINED3DTSS_TCI_PASSTHRU;
1675
1676
if (!input_texture_coord[idx] && gen == NINED3DTSS_TCI_PASSTHRU)
1677
gen = NINED3DTSS_TCI_DISABLE;
1678
1679
key.tc_gen |= gen << (s * 3);
1680
key.tc_idx |= idx << (s * 3);
1681
key.tc_dim_input |= ((input_texture_coord[idx]-1) & 0x3) << (s * 2);
1682
1683
dim = context->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
1684
if (dim > 4)
1685
dim = input_texture_coord[idx];
1686
if (dim == 1) /* NV behaviour */
1687
dim = 0;
1688
key.tc_dim_output |= dim << (s * 3);
1689
}
1690
1691
DBG("VS ff key hash: %x\n", nine_ff_vs_key_hash(&key));
1692
vs = util_hash_table_get(device->ff.ht_vs, &key);
1693
if (vs)
1694
return vs;
1695
NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));
1696
1697
nine_ff_prune_vs(device);
1698
if (vs) {
1699
unsigned n;
1700
1701
memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
1702
1703
_mesa_hash_table_insert(device->ff.ht_vs, &vs->ff_key, vs);
1704
device->ff.num_vs++;
1705
1706
vs->num_inputs = bld.num_inputs;
1707
for (n = 0; n < bld.num_inputs; ++n)
1708
vs->input_map[n].ndecl = bld.input[n];
1709
1710
vs->position_t = key.position_t;
1711
vs->point_size = key.vertexpointsize | key.pointscale;
1712
}
1713
return vs;
1714
}
1715
1716
#define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE)
1717
#define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1718
1719
static struct NinePixelShader9 *
1720
nine_ff_get_ps(struct NineDevice9 *device)
1721
{
1722
struct nine_context *context = &device->context;
1723
D3DMATRIX *projection_matrix = GET_D3DTS(PROJECTION);
1724
struct NinePixelShader9 *ps;
1725
struct nine_ff_ps_key key;
1726
unsigned s;
1727
uint8_t sampler_mask = 0;
1728
1729
assert(sizeof(key) <= sizeof(key.value32));
1730
1731
memset(&key, 0, sizeof(key));
1732
for (s = 0; s < 8; ++s) {
1733
key.ts[s].colorop = context->ff.tex_stage[s][D3DTSS_COLOROP];
1734
key.ts[s].alphaop = context->ff.tex_stage[s][D3DTSS_ALPHAOP];
1735
const uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
1736
const uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
1737
/* MSDN says D3DTOP_DISABLE disables this and all subsequent stages.
1738
* ALPHAOP cannot be enabled if COLOROP is disabled.
1739
* Verified on Windows. */
1740
if (key.ts[s].colorop == D3DTOP_DISABLE) {
1741
key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */
1742
break;
1743
}
1744
1745
if (!context->texture[s].enabled &&
1746
((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE &&
1747
used_c & 0x1) ||
1748
(context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE &&
1749
used_c & 0x2) ||
1750
(context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE &&
1751
used_c & 0x4))) {
1752
/* Tested on Windows: Invalid texture read disables the stage
1753
* and the subsequent ones, but only for colorop. For alpha,
1754
* it's as if the texture had alpha of 1.0, which is what
1755
* has our dummy texture in that case. Invalid color also
1756
* disabled the following alpha stages. */
1757
key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1758
break;
1759
}
1760
1761
if (context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE ||
1762
context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE ||
1763
context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE ||
1764
context->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE ||
1765
context->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE ||
1766
context->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE)
1767
sampler_mask |= (1 << s);
1768
1769
if (key.ts[s].colorop != D3DTOP_DISABLE) {
1770
if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0] & 0x7;
1771
if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1] & 0x7;
1772
if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2] & 0x7;
1773
if (used_c & 0x1) key.colorarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) & 0x1) << s;
1774
if (used_c & 0x1) key.colorarg_b5[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) & 0x1) << s;
1775
if (used_c & 0x2) key.colorarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) & 0x1) << s;
1776
if (used_c & 0x2) key.colorarg_b5[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) & 0x1) << s;
1777
if (used_c & 0x4) key.colorarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) & 0x1) << s;
1778
if (used_c & 0x4) key.colorarg_b5[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) & 0x1) << s;
1779
}
1780
if (key.ts[s].alphaop != D3DTOP_DISABLE) {
1781
if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0] & 0x7;
1782
if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1] & 0x7;
1783
if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2] & 0x7;
1784
if (used_a & 0x1) key.alphaarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) & 0x1) << s;
1785
if (used_a & 0x2) key.alphaarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) & 0x1) << s;
1786
if (used_a & 0x4) key.alphaarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) & 0x1) << s;
1787
}
1788
key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
1789
1790
if (context->texture[s].enabled) {
1791
switch (context->texture[s].type) {
1792
case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;
1793
case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
1794
case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break;
1795
default:
1796
assert(!"unexpected texture type");
1797
break;
1798
}
1799
} else {
1800
key.ts[s].textarget = 1;
1801
}
1802
}
1803
1804
/* Note: If colorop is D3DTOP_DISABLE for the first stage
1805
* (which implies alphaop is too), nothing particular happens,
1806
* that is, current is equal to diffuse (which is the case anyway,
1807
* because it is how it is initialized).
1808
* Special case seems if alphaop is D3DTOP_DISABLE and not colorop,
1809
* because then if the resultarg is TEMP, then diffuse alpha is written
1810
* to it. */
1811
if (key.ts[0].colorop != D3DTOP_DISABLE &&
1812
key.ts[0].alphaop == D3DTOP_DISABLE &&
1813
key.ts[0].resultarg != 0) {
1814
key.ts[0].alphaop = D3DTOP_SELECTARG1;
1815
key.ts[0].alphaarg1 = D3DTA_DIFFUSE;
1816
}
1817
/* When no alpha stage writes to current, diffuse alpha is taken.
1818
* Since we initialize current to diffuse, we have the behaviour. */
1819
1820
/* Last stage always writes to Current */
1821
if (s >= 1)
1822
key.ts[s-1].resultarg = 0;
1823
1824
key.projected = nine_ff_get_projected_key_ff(context);
1825
key.specular = !!context->rs[D3DRS_SPECULARENABLE];
1826
1827
for (; s < 8; ++s)
1828
key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1829
if (context->rs[D3DRS_FOGENABLE])
1830
key.fog_mode = context->rs[D3DRS_FOGTABLEMODE];
1831
key.fog = !!context->rs[D3DRS_FOGENABLE];
1832
/* Pixel fog (with WFOG advertised): source is either Z or W.
1833
* W is the source if vs ff is used, and the
1834
* projection matrix is not orthogonal.
1835
* Tests on Win 10 seem to indicate _34
1836
* and _33 are checked against 0, 1. */
1837
if (key.fog_mode && key.fog)
1838
key.fog_source = !context->programmable_vs &&
1839
!(projection_matrix->_34 == 0.0f &&
1840
projection_matrix->_44 == 1.0f);
1841
1842
DBG("PS ff key hash: %x\n", nine_ff_ps_key_hash(&key));
1843
ps = util_hash_table_get(device->ff.ht_ps, &key);
1844
if (ps)
1845
return ps;
1846
NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));
1847
1848
nine_ff_prune_ps(device);
1849
if (ps) {
1850
memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
1851
1852
_mesa_hash_table_insert(device->ff.ht_ps, &ps->ff_key, ps);
1853
device->ff.num_ps++;
1854
1855
ps->rt_mask = 0x1;
1856
ps->sampler_mask = sampler_mask;
1857
}
1858
return ps;
1859
}
1860
1861
static void
1862
nine_ff_load_vs_transforms(struct NineDevice9 *device)
1863
{
1864
struct nine_context *context = &device->context;
1865
D3DMATRIX T;
1866
D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1867
unsigned i;
1868
1869
/* TODO: make this nicer, and only upload the ones we need */
1870
/* TODO: use ff.vs_const as storage of W, V, P matrices */
1871
1872
if (IS_D3DTS_DIRTY(context, WORLD) ||
1873
IS_D3DTS_DIRTY(context, VIEW) ||
1874
IS_D3DTS_DIRTY(context, PROJECTION)) {
1875
/* WVP, WV matrices */
1876
nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
1877
nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
1878
1879
/* normal matrix == transpose(inverse(WV)) */
1880
nine_d3d_matrix_inverse(&T, &M[1]);
1881
nine_d3d_matrix_transpose(&M[4], &T);
1882
1883
/* P matrix */
1884
M[2] = *GET_D3DTS(PROJECTION);
1885
1886
/* V and W matrix */
1887
nine_d3d_matrix_inverse(&M[3], GET_D3DTS(VIEW));
1888
M[40] = M[1];
1889
}
1890
1891
if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1892
/* load other world matrices */
1893
for (i = 1; i <= 8; ++i) {
1894
nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW));
1895
}
1896
}
1897
1898
device->ff.vs_const[30 * 4] = asfloat(context->rs[D3DRS_TWEENFACTOR]);
1899
}
1900
1901
static void
1902
nine_ff_load_lights(struct NineDevice9 *device)
1903
{
1904
struct nine_context *context = &device->context;
1905
struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1906
unsigned l;
1907
1908
if (context->changed.group & NINE_STATE_FF_MATERIAL) {
1909
const D3DMATERIAL9 *mtl = &context->ff.material;
1910
1911
memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
1912
memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
1913
memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));
1914
dst[23].x = mtl->Power;
1915
memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));
1916
d3dcolor_to_rgba(&dst[25].x, context->rs[D3DRS_AMBIENT]);
1917
dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;
1918
dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;
1919
dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
1920
}
1921
1922
if (!(context->changed.group & NINE_STATE_FF_LIGHTING))
1923
return;
1924
1925
for (l = 0; l < context->ff.num_lights_active; ++l) {
1926
const D3DLIGHT9 *light = &context->ff.light[context->ff.active_light[l]];
1927
1928
dst[32 + l * 8].x = light->Type;
1929
dst[32 + l * 8].y = light->Attenuation0;
1930
dst[32 + l * 8].z = light->Attenuation1;
1931
dst[32 + l * 8].w = light->Attenuation2;
1932
memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));
1933
memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));
1934
memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));
1935
nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));
1936
nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));
1937
dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;
1938
dst[37 + l * 8].w = light->Falloff;
1939
dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
1940
dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
1941
dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
1942
dst[39 + l * 8].w = (float)((l + 1) == context->ff.num_lights_active);
1943
}
1944
}
1945
1946
static void
1947
nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
1948
{
1949
struct nine_context *context = &device->context;
1950
struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1951
1952
if (!(context->changed.group & NINE_STATE_FF_VS_OTHER))
1953
return;
1954
dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]);
1955
dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]);
1956
dst[26].z = asfloat(context->rs[D3DRS_POINTSIZE]);
1957
dst[26].w = asfloat(context->rs[D3DRS_POINTSCALE_A]);
1958
dst[27].x = asfloat(context->rs[D3DRS_POINTSCALE_B]);
1959
dst[27].y = asfloat(context->rs[D3DRS_POINTSCALE_C]);
1960
dst[28].x = asfloat(context->rs[D3DRS_FOGEND]);
1961
dst[28].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
1962
if (isinf(dst[28].y))
1963
dst[28].y = 0.0f;
1964
dst[28].z = asfloat(context->rs[D3DRS_FOGDENSITY]);
1965
}
1966
1967
static void
1968
nine_ff_load_tex_matrices(struct NineDevice9 *device)
1969
{
1970
struct nine_context *context = &device->context;
1971
D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1972
unsigned s;
1973
1974
if (!(context->ff.changed.transform[0] & 0xff0000))
1975
return;
1976
for (s = 0; s < 8; ++s) {
1977
if (IS_D3DTS_DIRTY(context, TEXTURE0 + s))
1978
nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&context->ff, D3DTS_TEXTURE0 + s, FALSE));
1979
}
1980
}
1981
1982
static void
1983
nine_ff_load_ps_params(struct NineDevice9 *device)
1984
{
1985
struct nine_context *context = &device->context;
1986
struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
1987
unsigned s;
1988
1989
if (!(context->changed.group & NINE_STATE_FF_PS_CONSTS))
1990
return;
1991
1992
for (s = 0; s < 8; ++s)
1993
d3dcolor_to_rgba(&dst[s].x, context->ff.tex_stage[s][D3DTSS_CONSTANT]);
1994
1995
for (s = 0; s < 8; ++s) {
1996
dst[8 + s].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
1997
dst[8 + s].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
1998
dst[8 + s].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
1999
dst[8 + s].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
2000
if (s & 1) {
2001
dst[16 + s / 2].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
2002
dst[16 + s / 2].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
2003
} else {
2004
dst[16 + s / 2].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
2005
dst[16 + s / 2].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
2006
}
2007
}
2008
2009
d3dcolor_to_rgba(&dst[20].x, context->rs[D3DRS_TEXTUREFACTOR]);
2010
d3dcolor_to_rgba(&dst[21].x, context->rs[D3DRS_FOGCOLOR]);
2011
dst[22].x = asfloat(context->rs[D3DRS_FOGEND]);
2012
dst[22].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART]));
2013
dst[22].z = asfloat(context->rs[D3DRS_FOGDENSITY]);
2014
}
2015
2016
static void
2017
nine_ff_load_viewport_info(struct NineDevice9 *device)
2018
{
2019
D3DVIEWPORT9 *viewport = &device->context.viewport;
2020
struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
2021
float diffZ = viewport->MaxZ - viewport->MinZ;
2022
2023
/* Note: the other functions avoids to fill the const again if nothing changed.
2024
* But we don't have much to fill, and adding code to allow that may be complex
2025
* so just fill it always */
2026
dst[100].x = 2.0f / (float)(viewport->Width);
2027
dst[100].y = 2.0f / (float)(viewport->Height);
2028
dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);
2029
dst[100].w = (float)(viewport->Width);
2030
dst[101].x = (float)(viewport->X);
2031
dst[101].y = (float)(viewport->Y);
2032
dst[101].z = (float)(viewport->MinZ);
2033
}
2034
2035
void
2036
nine_ff_update(struct NineDevice9 *device)
2037
{
2038
struct nine_context *context = &device->context;
2039
struct pipe_constant_buffer cb;
2040
2041
DBG("vs=%p ps=%p\n", context->vs, context->ps);
2042
2043
/* NOTE: the only reference belongs to the hash table */
2044
if (!context->programmable_vs) {
2045
device->ff.vs = nine_ff_get_vs(device);
2046
context->changed.group |= NINE_STATE_VS;
2047
}
2048
if (!context->ps) {
2049
device->ff.ps = nine_ff_get_ps(device);
2050
context->changed.group |= NINE_STATE_PS;
2051
}
2052
2053
if (!context->programmable_vs) {
2054
nine_ff_load_vs_transforms(device);
2055
nine_ff_load_tex_matrices(device);
2056
nine_ff_load_lights(device);
2057
nine_ff_load_point_and_fog_params(device);
2058
nine_ff_load_viewport_info(device);
2059
2060
memset(context->ff.changed.transform, 0, sizeof(context->ff.changed.transform));
2061
2062
cb.buffer_offset = 0;
2063
cb.buffer = NULL;
2064
cb.user_buffer = device->ff.vs_const;
2065
cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
2066
2067
context->pipe_data.cb_vs_ff = cb;
2068
context->commit |= NINE_STATE_COMMIT_CONST_VS;
2069
2070
context->changed.group &= ~NINE_STATE_FF_VS;
2071
}
2072
2073
if (!context->ps) {
2074
nine_ff_load_ps_params(device);
2075
2076
cb.buffer_offset = 0;
2077
cb.buffer = NULL;
2078
cb.user_buffer = device->ff.ps_const;
2079
cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
2080
2081
context->pipe_data.cb_ps_ff = cb;
2082
context->commit |= NINE_STATE_COMMIT_CONST_PS;
2083
2084
context->changed.group &= ~NINE_STATE_FF_PS;
2085
}
2086
}
2087
2088
2089
boolean
2090
nine_ff_init(struct NineDevice9 *device)
2091
{
2092
device->ff.ht_vs = _mesa_hash_table_create(NULL, nine_ff_vs_key_hash,
2093
nine_ff_vs_key_comp);
2094
device->ff.ht_ps = _mesa_hash_table_create(NULL, nine_ff_ps_key_hash,
2095
nine_ff_ps_key_comp);
2096
2097
device->ff.ht_fvf = _mesa_hash_table_create(NULL, nine_ff_fvf_key_hash,
2098
nine_ff_fvf_key_comp);
2099
2100
device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));
2101
device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));
2102
2103
return device->ff.ht_vs && device->ff.ht_ps &&
2104
device->ff.ht_fvf &&
2105
device->ff.vs_const && device->ff.ps_const;
2106
}
2107
2108
static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)
2109
{
2110
NineUnknown_Unbind(NineUnknown(value));
2111
return PIPE_OK;
2112
}
2113
2114
void
2115
nine_ff_fini(struct NineDevice9 *device)
2116
{
2117
if (device->ff.ht_vs) {
2118
util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2119
_mesa_hash_table_destroy(device->ff.ht_vs, NULL);
2120
}
2121
if (device->ff.ht_ps) {
2122
util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2123
_mesa_hash_table_destroy(device->ff.ht_ps, NULL);
2124
}
2125
if (device->ff.ht_fvf) {
2126
util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);
2127
_mesa_hash_table_destroy(device->ff.ht_fvf, NULL);
2128
}
2129
device->ff.vs = NULL; /* destroyed by unbinding from hash table */
2130
device->ff.ps = NULL;
2131
2132
FREE(device->ff.vs_const);
2133
FREE(device->ff.ps_const);
2134
}
2135
2136
static void
2137
nine_ff_prune_vs(struct NineDevice9 *device)
2138
{
2139
struct nine_context *context = &device->context;
2140
2141
if (device->ff.num_vs > 1024) {
2142
/* could destroy the bound one here, so unbind */
2143
context->pipe->bind_vs_state(context->pipe, NULL);
2144
util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2145
_mesa_hash_table_clear(device->ff.ht_vs, NULL);
2146
device->ff.num_vs = 0;
2147
context->changed.group |= NINE_STATE_VS;
2148
}
2149
}
2150
static void
2151
nine_ff_prune_ps(struct NineDevice9 *device)
2152
{
2153
struct nine_context *context = &device->context;
2154
2155
if (device->ff.num_ps > 1024) {
2156
/* could destroy the bound one here, so unbind */
2157
context->pipe->bind_fs_state(context->pipe, NULL);
2158
util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2159
_mesa_hash_table_clear(device->ff.ht_ps, NULL);
2160
device->ff.num_ps = 0;
2161
context->changed.group |= NINE_STATE_PS;
2162
}
2163
}
2164
2165
/* ========================================================================== */
2166
2167
/* Matrix multiplication:
2168
*
2169
* in memory: 0 1 2 3 (row major)
2170
* 4 5 6 7
2171
* 8 9 a b
2172
* c d e f
2173
*
2174
* cA cB cC cD
2175
* r0 = (r0 * cA) (r0 * cB) . .
2176
* r1 = (r1 * cA) (r1 * cB)
2177
* r2 = (r2 * cA) .
2178
* r3 = (r3 * cA) .
2179
*
2180
* r: (11) (12) (13) (14)
2181
* (21) (22) (23) (24)
2182
* (31) (32) (33) (34)
2183
* (41) (42) (43) (44)
2184
* l: (11 12 13 14)
2185
* (21 22 23 24)
2186
* (31 32 33 34)
2187
* (41 42 43 44)
2188
*
2189
* v: (x y z 1 )
2190
*
2191
* t.xyzw = MUL(v.xxxx, r[0]);
2192
* t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
2193
* t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
2194
* v.xyzw = MAD(v.wwww, r[3], t.xyzw);
2195
*
2196
* v.x = DP4(v, c[0]);
2197
* v.y = DP4(v, c[1]);
2198
* v.z = DP4(v, c[2]);
2199
* v.w = DP4(v, c[3]) = 1
2200
*/
2201
2202
/*
2203
static void
2204
nine_D3DMATRIX_print(const D3DMATRIX *M)
2205
{
2206
DBG("\n(%f %f %f %f)\n"
2207
"(%f %f %f %f)\n"
2208
"(%f %f %f %f)\n"
2209
"(%f %f %f %f)\n",
2210
M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
2211
M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
2212
M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
2213
M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
2214
}
2215
*/
2216
2217
static inline float
2218
nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
2219
{
2220
return A->m[r][0] * B->m[0][c] +
2221
A->m[r][1] * B->m[1][c] +
2222
A->m[r][2] * B->m[2][c] +
2223
A->m[r][3] * B->m[3][c];
2224
}
2225
2226
static inline float
2227
nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2228
{
2229
return v->x * M->m[0][c] +
2230
v->y * M->m[1][c] +
2231
v->z * M->m[2][c] +
2232
1.0f * M->m[3][c];
2233
}
2234
2235
static inline float
2236
nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2237
{
2238
return v->x * M->m[0][c] +
2239
v->y * M->m[1][c] +
2240
v->z * M->m[2][c];
2241
}
2242
2243
void
2244
nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)
2245
{
2246
D->_11 = nine_DP4_row_col(L, 0, R, 0);
2247
D->_12 = nine_DP4_row_col(L, 0, R, 1);
2248
D->_13 = nine_DP4_row_col(L, 0, R, 2);
2249
D->_14 = nine_DP4_row_col(L, 0, R, 3);
2250
2251
D->_21 = nine_DP4_row_col(L, 1, R, 0);
2252
D->_22 = nine_DP4_row_col(L, 1, R, 1);
2253
D->_23 = nine_DP4_row_col(L, 1, R, 2);
2254
D->_24 = nine_DP4_row_col(L, 1, R, 3);
2255
2256
D->_31 = nine_DP4_row_col(L, 2, R, 0);
2257
D->_32 = nine_DP4_row_col(L, 2, R, 1);
2258
D->_33 = nine_DP4_row_col(L, 2, R, 2);
2259
D->_34 = nine_DP4_row_col(L, 2, R, 3);
2260
2261
D->_41 = nine_DP4_row_col(L, 3, R, 0);
2262
D->_42 = nine_DP4_row_col(L, 3, R, 1);
2263
D->_43 = nine_DP4_row_col(L, 3, R, 2);
2264
D->_44 = nine_DP4_row_col(L, 3, R, 3);
2265
}
2266
2267
void
2268
nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2269
{
2270
d->x = nine_DP4_vec_col(v, M, 0);
2271
d->y = nine_DP4_vec_col(v, M, 1);
2272
d->z = nine_DP4_vec_col(v, M, 2);
2273
}
2274
2275
void
2276
nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2277
{
2278
d->x = nine_DP3_vec_col(v, M, 0);
2279
d->y = nine_DP3_vec_col(v, M, 1);
2280
d->z = nine_DP3_vec_col(v, M, 2);
2281
}
2282
2283
void
2284
nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)
2285
{
2286
unsigned i, j;
2287
for (i = 0; i < 4; ++i)
2288
for (j = 0; j < 4; ++j)
2289
D->m[i][j] = M->m[j][i];
2290
}
2291
2292
#define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2293
float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2294
if (t > 0.0f) pos += t; else neg += t; } while(0)
2295
2296
#define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2297
float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2298
if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2299
float
2300
nine_d3d_matrix_det(const D3DMATRIX *M)
2301
{
2302
float pos = 0.0f;
2303
float neg = 0.0f;
2304
2305
_M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2306
_M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2307
_M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2308
2309
_M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2310
_M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2311
_M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2312
2313
_M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2314
_M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2315
_M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2316
2317
_M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2318
_M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2319
_M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2320
2321
_M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2322
_M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2323
_M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2324
2325
_M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2326
_M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2327
_M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2328
2329
_M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2330
_M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2331
_M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2332
2333
_M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2334
_M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2335
_M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2336
2337
return pos + neg;
2338
}
2339
2340
/* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2341
* I have no idea where this code came from.
2342
*/
2343
void
2344
nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
2345
{
2346
int i, k;
2347
float det;
2348
2349
D->m[0][0] =
2350
M->m[1][1] * M->m[2][2] * M->m[3][3] -
2351
M->m[1][1] * M->m[3][2] * M->m[2][3] -
2352
M->m[1][2] * M->m[2][1] * M->m[3][3] +
2353
M->m[1][2] * M->m[3][1] * M->m[2][3] +
2354
M->m[1][3] * M->m[2][1] * M->m[3][2] -
2355
M->m[1][3] * M->m[3][1] * M->m[2][2];
2356
2357
D->m[0][1] =
2358
-M->m[0][1] * M->m[2][2] * M->m[3][3] +
2359
M->m[0][1] * M->m[3][2] * M->m[2][3] +
2360
M->m[0][2] * M->m[2][1] * M->m[3][3] -
2361
M->m[0][2] * M->m[3][1] * M->m[2][3] -
2362
M->m[0][3] * M->m[2][1] * M->m[3][2] +
2363
M->m[0][3] * M->m[3][1] * M->m[2][2];
2364
2365
D->m[0][2] =
2366
M->m[0][1] * M->m[1][2] * M->m[3][3] -
2367
M->m[0][1] * M->m[3][2] * M->m[1][3] -
2368
M->m[0][2] * M->m[1][1] * M->m[3][3] +
2369
M->m[0][2] * M->m[3][1] * M->m[1][3] +
2370
M->m[0][3] * M->m[1][1] * M->m[3][2] -
2371
M->m[0][3] * M->m[3][1] * M->m[1][2];
2372
2373
D->m[0][3] =
2374
-M->m[0][1] * M->m[1][2] * M->m[2][3] +
2375
M->m[0][1] * M->m[2][2] * M->m[1][3] +
2376
M->m[0][2] * M->m[1][1] * M->m[2][3] -
2377
M->m[0][2] * M->m[2][1] * M->m[1][3] -
2378
M->m[0][3] * M->m[1][1] * M->m[2][2] +
2379
M->m[0][3] * M->m[2][1] * M->m[1][2];
2380
2381
D->m[1][0] =
2382
-M->m[1][0] * M->m[2][2] * M->m[3][3] +
2383
M->m[1][0] * M->m[3][2] * M->m[2][3] +
2384
M->m[1][2] * M->m[2][0] * M->m[3][3] -
2385
M->m[1][2] * M->m[3][0] * M->m[2][3] -
2386
M->m[1][3] * M->m[2][0] * M->m[3][2] +
2387
M->m[1][3] * M->m[3][0] * M->m[2][2];
2388
2389
D->m[1][1] =
2390
M->m[0][0] * M->m[2][2] * M->m[3][3] -
2391
M->m[0][0] * M->m[3][2] * M->m[2][3] -
2392
M->m[0][2] * M->m[2][0] * M->m[3][3] +
2393
M->m[0][2] * M->m[3][0] * M->m[2][3] +
2394
M->m[0][3] * M->m[2][0] * M->m[3][2] -
2395
M->m[0][3] * M->m[3][0] * M->m[2][2];
2396
2397
D->m[1][2] =
2398
-M->m[0][0] * M->m[1][2] * M->m[3][3] +
2399
M->m[0][0] * M->m[3][2] * M->m[1][3] +
2400
M->m[0][2] * M->m[1][0] * M->m[3][3] -
2401
M->m[0][2] * M->m[3][0] * M->m[1][3] -
2402
M->m[0][3] * M->m[1][0] * M->m[3][2] +
2403
M->m[0][3] * M->m[3][0] * M->m[1][2];
2404
2405
D->m[1][3] =
2406
M->m[0][0] * M->m[1][2] * M->m[2][3] -
2407
M->m[0][0] * M->m[2][2] * M->m[1][3] -
2408
M->m[0][2] * M->m[1][0] * M->m[2][3] +
2409
M->m[0][2] * M->m[2][0] * M->m[1][3] +
2410
M->m[0][3] * M->m[1][0] * M->m[2][2] -
2411
M->m[0][3] * M->m[2][0] * M->m[1][2];
2412
2413
D->m[2][0] =
2414
M->m[1][0] * M->m[2][1] * M->m[3][3] -
2415
M->m[1][0] * M->m[3][1] * M->m[2][3] -
2416
M->m[1][1] * M->m[2][0] * M->m[3][3] +
2417
M->m[1][1] * M->m[3][0] * M->m[2][3] +
2418
M->m[1][3] * M->m[2][0] * M->m[3][1] -
2419
M->m[1][3] * M->m[3][0] * M->m[2][1];
2420
2421
D->m[2][1] =
2422
-M->m[0][0] * M->m[2][1] * M->m[3][3] +
2423
M->m[0][0] * M->m[3][1] * M->m[2][3] +
2424
M->m[0][1] * M->m[2][0] * M->m[3][3] -
2425
M->m[0][1] * M->m[3][0] * M->m[2][3] -
2426
M->m[0][3] * M->m[2][0] * M->m[3][1] +
2427
M->m[0][3] * M->m[3][0] * M->m[2][1];
2428
2429
D->m[2][2] =
2430
M->m[0][0] * M->m[1][1] * M->m[3][3] -
2431
M->m[0][0] * M->m[3][1] * M->m[1][3] -
2432
M->m[0][1] * M->m[1][0] * M->m[3][3] +
2433
M->m[0][1] * M->m[3][0] * M->m[1][3] +
2434
M->m[0][3] * M->m[1][0] * M->m[3][1] -
2435
M->m[0][3] * M->m[3][0] * M->m[1][1];
2436
2437
D->m[2][3] =
2438
-M->m[0][0] * M->m[1][1] * M->m[2][3] +
2439
M->m[0][0] * M->m[2][1] * M->m[1][3] +
2440
M->m[0][1] * M->m[1][0] * M->m[2][3] -
2441
M->m[0][1] * M->m[2][0] * M->m[1][3] -
2442
M->m[0][3] * M->m[1][0] * M->m[2][1] +
2443
M->m[0][3] * M->m[2][0] * M->m[1][1];
2444
2445
D->m[3][0] =
2446
-M->m[1][0] * M->m[2][1] * M->m[3][2] +
2447
M->m[1][0] * M->m[3][1] * M->m[2][2] +
2448
M->m[1][1] * M->m[2][0] * M->m[3][2] -
2449
M->m[1][1] * M->m[3][0] * M->m[2][2] -
2450
M->m[1][2] * M->m[2][0] * M->m[3][1] +
2451
M->m[1][2] * M->m[3][0] * M->m[2][1];
2452
2453
D->m[3][1] =
2454
M->m[0][0] * M->m[2][1] * M->m[3][2] -
2455
M->m[0][0] * M->m[3][1] * M->m[2][2] -
2456
M->m[0][1] * M->m[2][0] * M->m[3][2] +
2457
M->m[0][1] * M->m[3][0] * M->m[2][2] +
2458
M->m[0][2] * M->m[2][0] * M->m[3][1] -
2459
M->m[0][2] * M->m[3][0] * M->m[2][1];
2460
2461
D->m[3][2] =
2462
-M->m[0][0] * M->m[1][1] * M->m[3][2] +
2463
M->m[0][0] * M->m[3][1] * M->m[1][2] +
2464
M->m[0][1] * M->m[1][0] * M->m[3][2] -
2465
M->m[0][1] * M->m[3][0] * M->m[1][2] -
2466
M->m[0][2] * M->m[1][0] * M->m[3][1] +
2467
M->m[0][2] * M->m[3][0] * M->m[1][1];
2468
2469
D->m[3][3] =
2470
M->m[0][0] * M->m[1][1] * M->m[2][2] -
2471
M->m[0][0] * M->m[2][1] * M->m[1][2] -
2472
M->m[0][1] * M->m[1][0] * M->m[2][2] +
2473
M->m[0][1] * M->m[2][0] * M->m[1][2] +
2474
M->m[0][2] * M->m[1][0] * M->m[2][1] -
2475
M->m[0][2] * M->m[2][0] * M->m[1][1];
2476
2477
det =
2478
M->m[0][0] * D->m[0][0] +
2479
M->m[1][0] * D->m[0][1] +
2480
M->m[2][0] * D->m[0][2] +
2481
M->m[3][0] * D->m[0][3];
2482
2483
if (fabsf(det) < 1e-30) {/* non inversible */
2484
*D = *M; /* wine tests */
2485
return;
2486
}
2487
2488
det = 1.0 / det;
2489
2490
for (i = 0; i < 4; i++)
2491
for (k = 0; k < 4; k++)
2492
D->m[i][k] *= det;
2493
2494
#if defined(DEBUG) || !defined(NDEBUG)
2495
{
2496
D3DMATRIX I;
2497
2498
nine_d3d_matrix_matrix_mul(&I, D, M);
2499
2500
for (i = 0; i < 4; ++i)
2501
for (k = 0; k < 4; ++k)
2502
if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)
2503
DBG("Matrix inversion check FAILED !\n");
2504
}
2505
#endif
2506
}
2507
2508