Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/frontends/d3d10umd/ShaderTGSI.c
4565 views
1
/**************************************************************************
2
*
3
* Copyright 2012-2021 VMware, Inc.
4
* All Rights Reserved.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sub license, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
13
*
14
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17
* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20
* USE OR OTHER DEALINGS IN THE SOFTWARE.
21
*
22
* The above copyright notice and this permission notice (including the
23
* next paragraph) shall be included in all copies or substantial portions
24
* of the Software.
25
*
26
**************************************************************************/
27
28
/*
29
* ShaderTGSI.c --
30
* Functions for translating shaders.
31
*/
32
33
#include "Debug.h"
34
#include "ShaderParse.h"
35
36
#include "pipe/p_state.h"
37
#include "tgsi/tgsi_ureg.h"
38
#include "tgsi/tgsi_dump.h"
39
#include "util/u_memory.h"
40
41
#include "ShaderDump.h"
42
43
44
enum dx10_opcode_format {
45
OF_FLOAT,
46
OF_INT,
47
OF_UINT
48
};
49
50
struct dx10_opcode_xlate {
51
D3D10_SB_OPCODE_TYPE type;
52
enum dx10_opcode_format format;
53
uint tgsi_opcode;
54
};
55
56
/* Opcodes that we have not even attempted to implement:
57
*/
58
#define TGSI_LOG_UNSUPPORTED TGSI_OPCODE_LAST
59
60
/* Opcodes which do not translate directly to a TGSI opcode, but which
61
* have at least a partial implemention coded below:
62
*/
63
#define TGSI_EXPAND (TGSI_OPCODE_LAST+1)
64
65
static struct dx10_opcode_xlate opcode_xlate[D3D10_SB_NUM_OPCODES] = {
66
{D3D10_SB_OPCODE_ADD, OF_FLOAT, TGSI_OPCODE_ADD},
67
{D3D10_SB_OPCODE_AND, OF_UINT, TGSI_OPCODE_AND},
68
{D3D10_SB_OPCODE_BREAK, OF_FLOAT, TGSI_OPCODE_BRK},
69
{D3D10_SB_OPCODE_BREAKC, OF_UINT, TGSI_EXPAND},
70
{D3D10_SB_OPCODE_CALL, OF_UINT, TGSI_EXPAND},
71
{D3D10_SB_OPCODE_CALLC, OF_UINT, TGSI_EXPAND},
72
{D3D10_SB_OPCODE_CASE, OF_UINT, TGSI_OPCODE_CASE},
73
{D3D10_SB_OPCODE_CONTINUE, OF_FLOAT, TGSI_OPCODE_CONT},
74
{D3D10_SB_OPCODE_CONTINUEC, OF_UINT, TGSI_EXPAND},
75
{D3D10_SB_OPCODE_CUT, OF_FLOAT, TGSI_EXPAND},
76
{D3D10_SB_OPCODE_DEFAULT, OF_FLOAT, TGSI_OPCODE_DEFAULT},
77
{D3D10_SB_OPCODE_DERIV_RTX, OF_FLOAT, TGSI_OPCODE_DDX},
78
{D3D10_SB_OPCODE_DERIV_RTY, OF_FLOAT, TGSI_OPCODE_DDY},
79
{D3D10_SB_OPCODE_DISCARD, OF_UINT, TGSI_EXPAND},
80
{D3D10_SB_OPCODE_DIV, OF_FLOAT, TGSI_OPCODE_DIV},
81
{D3D10_SB_OPCODE_DP2, OF_FLOAT, TGSI_OPCODE_DP2},
82
{D3D10_SB_OPCODE_DP3, OF_FLOAT, TGSI_OPCODE_DP3},
83
{D3D10_SB_OPCODE_DP4, OF_FLOAT, TGSI_OPCODE_DP4},
84
{D3D10_SB_OPCODE_ELSE, OF_FLOAT, TGSI_OPCODE_ELSE},
85
{D3D10_SB_OPCODE_EMIT, OF_FLOAT, TGSI_EXPAND},
86
{D3D10_SB_OPCODE_EMITTHENCUT, OF_FLOAT, TGSI_EXPAND},
87
{D3D10_SB_OPCODE_ENDIF, OF_FLOAT, TGSI_OPCODE_ENDIF},
88
{D3D10_SB_OPCODE_ENDLOOP, OF_FLOAT, TGSI_OPCODE_ENDLOOP},
89
{D3D10_SB_OPCODE_ENDSWITCH, OF_FLOAT, TGSI_OPCODE_ENDSWITCH},
90
{D3D10_SB_OPCODE_EQ, OF_FLOAT, TGSI_OPCODE_FSEQ},
91
{D3D10_SB_OPCODE_EXP, OF_FLOAT, TGSI_EXPAND},
92
{D3D10_SB_OPCODE_FRC, OF_FLOAT, TGSI_OPCODE_FRC},
93
{D3D10_SB_OPCODE_FTOI, OF_FLOAT, TGSI_EXPAND},
94
{D3D10_SB_OPCODE_FTOU, OF_FLOAT, TGSI_EXPAND},
95
{D3D10_SB_OPCODE_GE, OF_FLOAT, TGSI_OPCODE_FSGE},
96
{D3D10_SB_OPCODE_IADD, OF_INT, TGSI_OPCODE_UADD},
97
{D3D10_SB_OPCODE_IF, OF_UINT, TGSI_EXPAND},
98
{D3D10_SB_OPCODE_IEQ, OF_INT, TGSI_OPCODE_USEQ},
99
{D3D10_SB_OPCODE_IGE, OF_INT, TGSI_OPCODE_ISGE},
100
{D3D10_SB_OPCODE_ILT, OF_INT, TGSI_OPCODE_ISLT},
101
{D3D10_SB_OPCODE_IMAD, OF_INT, TGSI_OPCODE_UMAD},
102
{D3D10_SB_OPCODE_IMAX, OF_INT, TGSI_OPCODE_IMAX},
103
{D3D10_SB_OPCODE_IMIN, OF_INT, TGSI_OPCODE_IMIN},
104
{D3D10_SB_OPCODE_IMUL, OF_INT, TGSI_EXPAND},
105
{D3D10_SB_OPCODE_INE, OF_INT, TGSI_OPCODE_USNE},
106
{D3D10_SB_OPCODE_INEG, OF_INT, TGSI_OPCODE_INEG},
107
{D3D10_SB_OPCODE_ISHL, OF_INT, TGSI_OPCODE_SHL},
108
{D3D10_SB_OPCODE_ISHR, OF_INT, TGSI_OPCODE_ISHR},
109
{D3D10_SB_OPCODE_ITOF, OF_INT, TGSI_OPCODE_I2F},
110
{D3D10_SB_OPCODE_LABEL, OF_INT, TGSI_EXPAND},
111
{D3D10_SB_OPCODE_LD, OF_UINT, TGSI_EXPAND},
112
{D3D10_SB_OPCODE_LD_MS, OF_UINT, TGSI_EXPAND},
113
{D3D10_SB_OPCODE_LOG, OF_FLOAT, TGSI_EXPAND},
114
{D3D10_SB_OPCODE_LOOP, OF_FLOAT, TGSI_OPCODE_BGNLOOP},
115
{D3D10_SB_OPCODE_LT, OF_FLOAT, TGSI_OPCODE_FSLT},
116
{D3D10_SB_OPCODE_MAD, OF_FLOAT, TGSI_OPCODE_MAD},
117
{D3D10_SB_OPCODE_MIN, OF_FLOAT, TGSI_OPCODE_MIN},
118
{D3D10_SB_OPCODE_MAX, OF_FLOAT, TGSI_OPCODE_MAX},
119
{D3D10_SB_OPCODE_CUSTOMDATA, OF_FLOAT, TGSI_EXPAND},
120
{D3D10_SB_OPCODE_MOV, OF_UINT, TGSI_OPCODE_MOV},
121
{D3D10_SB_OPCODE_MOVC, OF_UINT, TGSI_OPCODE_UCMP},
122
{D3D10_SB_OPCODE_MUL, OF_FLOAT, TGSI_OPCODE_MUL},
123
{D3D10_SB_OPCODE_NE, OF_FLOAT, TGSI_OPCODE_FSNE},
124
{D3D10_SB_OPCODE_NOP, OF_FLOAT, TGSI_OPCODE_NOP},
125
{D3D10_SB_OPCODE_NOT, OF_UINT, TGSI_OPCODE_NOT},
126
{D3D10_SB_OPCODE_OR, OF_UINT, TGSI_OPCODE_OR},
127
{D3D10_SB_OPCODE_RESINFO, OF_UINT, TGSI_EXPAND},
128
{D3D10_SB_OPCODE_RET, OF_FLOAT, TGSI_OPCODE_RET},
129
{D3D10_SB_OPCODE_RETC, OF_UINT, TGSI_EXPAND},
130
{D3D10_SB_OPCODE_ROUND_NE, OF_FLOAT, TGSI_OPCODE_ROUND},
131
{D3D10_SB_OPCODE_ROUND_NI, OF_FLOAT, TGSI_OPCODE_FLR},
132
{D3D10_SB_OPCODE_ROUND_PI, OF_FLOAT, TGSI_OPCODE_CEIL},
133
{D3D10_SB_OPCODE_ROUND_Z, OF_FLOAT, TGSI_OPCODE_TRUNC},
134
{D3D10_SB_OPCODE_RSQ, OF_FLOAT, TGSI_EXPAND},
135
{D3D10_SB_OPCODE_SAMPLE, OF_FLOAT, TGSI_EXPAND},
136
{D3D10_SB_OPCODE_SAMPLE_C, OF_FLOAT, TGSI_EXPAND},
137
{D3D10_SB_OPCODE_SAMPLE_C_LZ, OF_FLOAT, TGSI_EXPAND},
138
{D3D10_SB_OPCODE_SAMPLE_L, OF_FLOAT, TGSI_EXPAND},
139
{D3D10_SB_OPCODE_SAMPLE_D, OF_FLOAT, TGSI_EXPAND},
140
{D3D10_SB_OPCODE_SAMPLE_B, OF_FLOAT, TGSI_EXPAND},
141
{D3D10_SB_OPCODE_SQRT, OF_FLOAT, TGSI_EXPAND},
142
{D3D10_SB_OPCODE_SWITCH, OF_UINT, TGSI_OPCODE_SWITCH},
143
{D3D10_SB_OPCODE_SINCOS, OF_FLOAT, TGSI_EXPAND},
144
{D3D10_SB_OPCODE_UDIV, OF_UINT, TGSI_EXPAND},
145
{D3D10_SB_OPCODE_ULT, OF_UINT, TGSI_OPCODE_USLT},
146
{D3D10_SB_OPCODE_UGE, OF_UINT, TGSI_OPCODE_USGE},
147
{D3D10_SB_OPCODE_UMUL, OF_UINT, TGSI_EXPAND},
148
{D3D10_SB_OPCODE_UMAD, OF_UINT, TGSI_OPCODE_UMAD},
149
{D3D10_SB_OPCODE_UMAX, OF_UINT, TGSI_OPCODE_UMAX},
150
{D3D10_SB_OPCODE_UMIN, OF_UINT, TGSI_OPCODE_UMIN},
151
{D3D10_SB_OPCODE_USHR, OF_UINT, TGSI_OPCODE_USHR},
152
{D3D10_SB_OPCODE_UTOF, OF_UINT, TGSI_OPCODE_U2F},
153
{D3D10_SB_OPCODE_XOR, OF_UINT, TGSI_OPCODE_XOR},
154
{D3D10_SB_OPCODE_DCL_RESOURCE, OF_FLOAT, TGSI_EXPAND},
155
{D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER, OF_FLOAT, TGSI_EXPAND},
156
{D3D10_SB_OPCODE_DCL_SAMPLER, OF_FLOAT, TGSI_EXPAND},
157
{D3D10_SB_OPCODE_DCL_INDEX_RANGE, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
158
{D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, OF_FLOAT, TGSI_EXPAND},
159
{D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE, OF_FLOAT, TGSI_EXPAND},
160
{D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, OF_FLOAT, TGSI_EXPAND},
161
{D3D10_SB_OPCODE_DCL_INPUT, OF_FLOAT, TGSI_EXPAND},
162
{D3D10_SB_OPCODE_DCL_INPUT_SGV, OF_FLOAT, TGSI_EXPAND},
163
{D3D10_SB_OPCODE_DCL_INPUT_SIV, OF_FLOAT, TGSI_EXPAND},
164
{D3D10_SB_OPCODE_DCL_INPUT_PS, OF_FLOAT, TGSI_EXPAND},
165
{D3D10_SB_OPCODE_DCL_INPUT_PS_SGV, OF_FLOAT, TGSI_EXPAND},
166
{D3D10_SB_OPCODE_DCL_INPUT_PS_SIV, OF_FLOAT, TGSI_EXPAND},
167
{D3D10_SB_OPCODE_DCL_OUTPUT, OF_FLOAT, TGSI_EXPAND},
168
{D3D10_SB_OPCODE_DCL_OUTPUT_SGV, OF_FLOAT, TGSI_EXPAND},
169
{D3D10_SB_OPCODE_DCL_OUTPUT_SIV, OF_FLOAT, TGSI_EXPAND},
170
{D3D10_SB_OPCODE_DCL_TEMPS, OF_FLOAT, TGSI_EXPAND},
171
{D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP, OF_FLOAT, TGSI_EXPAND},
172
{D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
173
{D3D10_SB_OPCODE_RESERVED0, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
174
{D3D10_1_SB_OPCODE_LOD, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
175
{D3D10_1_SB_OPCODE_GATHER4, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
176
{D3D10_1_SB_OPCODE_SAMPLE_POS, OF_FLOAT, TGSI_LOG_UNSUPPORTED},
177
{D3D10_1_SB_OPCODE_SAMPLE_INFO, OF_FLOAT, TGSI_LOG_UNSUPPORTED}
178
};
179
180
#define SHADER_MAX_TEMPS 4096
181
#define SHADER_MAX_INPUTS 32
182
#define SHADER_MAX_OUTPUTS 32
183
#define SHADER_MAX_CONSTS 4096
184
#define SHADER_MAX_RESOURCES PIPE_MAX_SHADER_SAMPLER_VIEWS
185
#define SHADER_MAX_SAMPLERS PIPE_MAX_SAMPLERS
186
#define SHADER_MAX_INDEXABLE_TEMPS 4096
187
188
struct Shader_call {
189
unsigned d3d_label;
190
unsigned tgsi_label_token;
191
};
192
193
struct Shader_label {
194
unsigned d3d_label;
195
unsigned tgsi_insn_no;
196
};
197
198
struct Shader_resource {
199
uint target; /* TGSI_TEXTURE_x */
200
};
201
202
struct Shader_xlate {
203
struct ureg_program *ureg;
204
205
uint vertices_in;
206
uint declared_temps;
207
208
struct ureg_dst temps[SHADER_MAX_TEMPS];
209
struct ureg_dst output_depth;
210
struct Shader_resource resources[SHADER_MAX_RESOURCES];
211
struct ureg_src sv[SHADER_MAX_RESOURCES];
212
struct ureg_src samplers[SHADER_MAX_SAMPLERS];
213
struct ureg_src imms;
214
struct ureg_src prim_id;
215
216
uint temp_offset;
217
uint indexable_temp_offsets[SHADER_MAX_INDEXABLE_TEMPS];
218
219
struct {
220
boolean declared;
221
uint writemask;
222
uint siv_name;
223
boolean overloaded;
224
struct ureg_src reg;
225
} inputs[SHADER_MAX_INPUTS];
226
227
struct {
228
struct ureg_dst reg[4];
229
} outputs[SHADER_MAX_OUTPUTS];
230
231
struct {
232
uint d3d;
233
uint tgsi;
234
} clip_distance_mapping[2], cull_distance_mapping[2];
235
uint num_clip_distances_declared;
236
uint num_cull_distances_declared;
237
238
struct Shader_call *calls;
239
uint num_calls;
240
uint max_calls;
241
struct Shader_label *labels;
242
uint num_labels;
243
uint max_labels;
244
};
245
246
static uint
247
translate_interpolation(D3D10_SB_INTERPOLATION_MODE interpolation)
248
{
249
switch (interpolation) {
250
case D3D10_SB_INTERPOLATION_UNDEFINED:
251
assert(0);
252
return TGSI_INTERPOLATE_LINEAR;
253
254
case D3D10_SB_INTERPOLATION_CONSTANT:
255
return TGSI_INTERPOLATE_CONSTANT;
256
case D3D10_SB_INTERPOLATION_LINEAR:
257
return TGSI_INTERPOLATE_PERSPECTIVE;
258
case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE:
259
return TGSI_INTERPOLATE_LINEAR;
260
261
case D3D10_SB_INTERPOLATION_LINEAR_CENTROID:
262
case D3D10_SB_INTERPOLATION_LINEAR_SAMPLE: // DX10.1
263
LOG_UNSUPPORTED(TRUE);
264
return TGSI_INTERPOLATE_PERSPECTIVE;
265
266
case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID:
267
case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: // DX10.1
268
LOG_UNSUPPORTED(TRUE);
269
return TGSI_INTERPOLATE_LINEAR;
270
}
271
272
assert(0);
273
return TGSI_INTERPOLATE_LINEAR;
274
}
275
276
static uint
277
translate_system_name(D3D10_SB_NAME name)
278
{
279
switch (name) {
280
case D3D10_SB_NAME_UNDEFINED:
281
assert(0); /* should not happen */
282
return TGSI_SEMANTIC_GENERIC;
283
case D3D10_SB_NAME_POSITION:
284
return TGSI_SEMANTIC_POSITION;
285
case D3D10_SB_NAME_CLIP_DISTANCE:
286
case D3D10_SB_NAME_CULL_DISTANCE:
287
return TGSI_SEMANTIC_CLIPDIST;
288
case D3D10_SB_NAME_PRIMITIVE_ID:
289
return TGSI_SEMANTIC_PRIMID;
290
case D3D10_SB_NAME_INSTANCE_ID:
291
return TGSI_SEMANTIC_INSTANCEID;
292
case D3D10_SB_NAME_VERTEX_ID:
293
return TGSI_SEMANTIC_VERTEXID_NOBASE;
294
case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
295
return TGSI_SEMANTIC_VIEWPORT_INDEX;
296
case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
297
return TGSI_SEMANTIC_LAYER;
298
case D3D10_SB_NAME_IS_FRONT_FACE:
299
return TGSI_SEMANTIC_FACE;
300
case D3D10_SB_NAME_SAMPLE_INDEX:
301
LOG_UNSUPPORTED(TRUE);
302
return TGSI_SEMANTIC_GENERIC;
303
}
304
305
assert(0);
306
return TGSI_SEMANTIC_GENERIC;
307
}
308
309
static uint
310
translate_semantic_index(struct Shader_xlate *sx,
311
D3D10_SB_NAME name,
312
const struct Shader_dst_operand *operand)
313
{
314
unsigned idx;
315
switch (name) {
316
case D3D10_SB_NAME_CLIP_DISTANCE:
317
case D3D10_SB_NAME_CULL_DISTANCE:
318
if (sx->clip_distance_mapping[0].d3d == operand->base.index[0].imm) {
319
idx = sx->clip_distance_mapping[0].tgsi;
320
} else {
321
assert(sx->clip_distance_mapping[1].d3d == operand->base.index[0].imm);
322
idx = sx->clip_distance_mapping[1].tgsi;
323
}
324
break;
325
/* case D3D10_SB_NAME_CULL_DISTANCE:
326
if (sx->cull_distance_mapping[0].d3d == operand->base.index[0].imm) {
327
idx = sx->cull_distance_mapping[0].tgsi;
328
} else {
329
assert(sx->cull_distance_mapping[1].d3d == operand->base.index[0].imm);
330
idx = sx->cull_distance_mapping[1].tgsi;
331
}
332
break;*/
333
default:
334
idx = 0;
335
}
336
return idx;
337
}
338
339
static enum tgsi_return_type
340
trans_dcl_ret_type(D3D10_SB_RESOURCE_RETURN_TYPE d3drettype) {
341
switch (d3drettype) {
342
case D3D10_SB_RETURN_TYPE_UNORM:
343
return TGSI_RETURN_TYPE_UNORM;
344
case D3D10_SB_RETURN_TYPE_SNORM:
345
return TGSI_RETURN_TYPE_SNORM;
346
case D3D10_SB_RETURN_TYPE_SINT:
347
return TGSI_RETURN_TYPE_SINT;
348
case D3D10_SB_RETURN_TYPE_UINT:
349
return TGSI_RETURN_TYPE_UINT;
350
case D3D10_SB_RETURN_TYPE_FLOAT:
351
return TGSI_RETURN_TYPE_FLOAT;
352
case D3D10_SB_RETURN_TYPE_MIXED:
353
default:
354
LOG_UNSUPPORTED(TRUE);
355
return TGSI_RETURN_TYPE_FLOAT;
356
}
357
}
358
359
static void
360
declare_vertices_in(struct Shader_xlate *sx,
361
unsigned in)
362
{
363
/* Make sure vertices_in is consistent with input primitive
364
* and other input declarations.
365
*/
366
if (sx->vertices_in) {
367
assert(sx->vertices_in == in);
368
} else {
369
sx->vertices_in = in;
370
}
371
}
372
373
struct swizzle_mapping {
374
unsigned x;
375
unsigned y;
376
unsigned z;
377
unsigned w;
378
};
379
380
/* mapping of writmask to swizzles */
381
static const struct swizzle_mapping writemask_to_swizzle[] = {
382
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_NONE
383
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_X
384
{ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_Y
385
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_XY
386
{ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_Z
387
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_XZ
388
{ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_YZ
389
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_XYZ
390
{ TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_W
391
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XW
392
{ TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YW
393
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYW
394
{ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_ZW
395
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XZW
396
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YZW
397
{ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYZW
398
};
399
400
static struct ureg_src
401
swizzle_reg(struct ureg_src src, uint writemask,
402
unsigned siv_name)
403
{
404
switch (siv_name) {
405
case D3D10_SB_NAME_PRIMITIVE_ID:
406
case D3D10_SB_NAME_INSTANCE_ID:
407
case D3D10_SB_NAME_VERTEX_ID:
408
case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX:
409
case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX:
410
case D3D10_SB_NAME_IS_FRONT_FACE:
411
return ureg_scalar(src, TGSI_SWIZZLE_X);
412
default: {
413
const struct swizzle_mapping *swizzle =
414
&writemask_to_swizzle[writemask];
415
return ureg_swizzle(src, swizzle->x, swizzle->y,
416
swizzle->z, swizzle->w);
417
}
418
}
419
}
420
421
static void
422
dcl_base_output(struct Shader_xlate *sx,
423
struct ureg_program *ureg,
424
struct ureg_dst reg,
425
const struct Shader_dst_operand *operand)
426
{
427
unsigned writemask =
428
operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
429
unsigned idx = operand->base.index[0].imm;
430
unsigned i;
431
432
if (!writemask) {
433
sx->outputs[idx].reg[0] = reg;
434
sx->outputs[idx].reg[1] = reg;
435
sx->outputs[idx].reg[2] = reg;
436
sx->outputs[idx].reg[3] = reg;
437
return;
438
}
439
440
for (i = 0; i < 4; ++i) {
441
unsigned mask = 1 << i;
442
if ((writemask & mask)) {
443
sx->outputs[idx].reg[i] = reg;
444
}
445
}
446
}
447
448
static void
449
dcl_base_input(struct Shader_xlate *sx,
450
struct ureg_program *ureg,
451
const struct Shader_dst_operand *operand,
452
struct ureg_src dcl_reg,
453
uint index,
454
uint siv_name)
455
{
456
unsigned writemask =
457
operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
458
459
if (sx->inputs[index].declared && !sx->inputs[index].overloaded) {
460
struct ureg_dst temp = ureg_DECL_temporary(sx->ureg);
461
462
ureg_MOV(ureg,
463
ureg_writemask(temp, sx->inputs[index].writemask),
464
swizzle_reg(sx->inputs[index].reg, sx->inputs[index].writemask,
465
sx->inputs[index].siv_name));
466
ureg_MOV(ureg, ureg_writemask(temp, writemask),
467
swizzle_reg(dcl_reg, writemask, siv_name));
468
sx->inputs[index].reg = ureg_src(temp);
469
sx->inputs[index].overloaded = TRUE;
470
sx->inputs[index].writemask |= writemask;
471
} else if (sx->inputs[index].overloaded) {
472
struct ureg_dst temp = ureg_dst(sx->inputs[index].reg);
473
ureg_MOV(ureg, ureg_writemask(temp, writemask),
474
swizzle_reg(dcl_reg, writemask, siv_name));
475
sx->inputs[index].writemask |= writemask;
476
} else {
477
assert(!sx->inputs[index].declared);
478
479
sx->inputs[index].reg = dcl_reg;
480
sx->inputs[index].declared = TRUE;
481
sx->inputs[index].writemask = writemask;
482
sx->inputs[index].siv_name = siv_name;
483
}
484
}
485
486
static void
487
dcl_vs_input(struct Shader_xlate *sx,
488
struct ureg_program *ureg,
489
const struct Shader_dst_operand *dst)
490
{
491
struct ureg_src reg;
492
assert(dst->base.index_dim == 1);
493
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
494
495
reg = ureg_DECL_vs_input(ureg, dst->base.index[0].imm);
496
497
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
498
D3D10_SB_NAME_UNDEFINED);
499
}
500
501
static void
502
dcl_gs_input(struct Shader_xlate *sx,
503
struct ureg_program *ureg,
504
const struct Shader_dst_operand *dst)
505
{
506
if (dst->base.index_dim == 2) {
507
assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);
508
509
declare_vertices_in(sx, dst->base.index[0].imm);
510
511
/* XXX: Implement declaration masks in gallium.
512
*/
513
if (!sx->inputs[dst->base.index[1].imm].reg.File) {
514
struct ureg_src reg =
515
ureg_DECL_input(ureg,
516
TGSI_SEMANTIC_GENERIC,
517
dst->base.index[1].imm,
518
0, 1);
519
dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,
520
D3D10_SB_NAME_UNDEFINED);
521
}
522
} else {
523
assert(dst->base.type == D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID);
524
assert(dst->base.index_dim == 0);
525
526
sx->prim_id = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_PRIMID, 0);
527
}
528
}
529
530
static void
531
dcl_sgv_input(struct Shader_xlate *sx,
532
struct ureg_program *ureg,
533
const struct Shader_dst_operand *dst,
534
uint dcl_siv_name)
535
{
536
struct ureg_src reg;
537
assert(dst->base.index_dim == 1);
538
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
539
540
reg = ureg_DECL_system_value(ureg, translate_system_name(dcl_siv_name), 0);
541
542
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
543
dcl_siv_name);
544
}
545
546
static void
547
dcl_siv_input(struct Shader_xlate *sx,
548
struct ureg_program *ureg,
549
const struct Shader_dst_operand *dst,
550
uint dcl_siv_name)
551
{
552
struct ureg_src reg;
553
assert(dst->base.index_dim == 2);
554
assert(dst->base.index[1].imm < SHADER_MAX_INPUTS);
555
556
declare_vertices_in(sx, dst->base.index[0].imm);
557
558
reg = ureg_DECL_input(ureg,
559
translate_system_name(dcl_siv_name), 0,
560
0, 1);
561
562
dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm,
563
dcl_siv_name);
564
}
565
566
static void
567
dcl_ps_input(struct Shader_xlate *sx,
568
struct ureg_program *ureg,
569
const struct Shader_dst_operand *dst,
570
uint dcl_in_ps_interp)
571
{
572
struct ureg_src reg;
573
assert(dst->base.index_dim == 1);
574
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
575
576
reg = ureg_DECL_fs_input(ureg,
577
TGSI_SEMANTIC_GENERIC,
578
dst->base.index[0].imm,
579
translate_interpolation(dcl_in_ps_interp));
580
581
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
582
D3D10_SB_NAME_UNDEFINED);
583
}
584
585
static void
586
dcl_ps_sgv_input(struct Shader_xlate *sx,
587
struct ureg_program *ureg,
588
const struct Shader_dst_operand *dst,
589
uint dcl_siv_name)
590
{
591
struct ureg_src reg;
592
assert(dst->base.index_dim == 1);
593
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
594
595
if (dcl_siv_name == D3D10_SB_NAME_POSITION) {
596
ureg_property(ureg,
597
TGSI_PROPERTY_FS_COORD_ORIGIN,
598
TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
599
ureg_property(ureg,
600
TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
601
TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER);
602
}
603
604
reg = ureg_DECL_fs_input(ureg,
605
translate_system_name(dcl_siv_name),
606
0,
607
TGSI_INTERPOLATE_CONSTANT);
608
609
if (dcl_siv_name == D3D10_SB_NAME_IS_FRONT_FACE) {
610
/* We need to map gallium's front_face to the one expected
611
* by D3D10 */
612
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
613
614
tmp = ureg_writemask(tmp, TGSI_WRITEMASK_X);
615
616
ureg_CMP(ureg, tmp, reg,
617
ureg_imm1i(ureg, 0), ureg_imm1i(ureg, -1));
618
619
reg = ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X);
620
}
621
622
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
623
dcl_siv_name);
624
}
625
626
static void
627
dcl_ps_siv_input(struct Shader_xlate *sx,
628
struct ureg_program *ureg,
629
const struct Shader_dst_operand *dst,
630
uint dcl_siv_name, uint dcl_in_ps_interp)
631
{
632
struct ureg_src reg;
633
assert(dst->base.index_dim == 1);
634
assert(dst->base.index[0].imm < SHADER_MAX_INPUTS);
635
636
reg = ureg_DECL_fs_input(ureg,
637
translate_system_name(dcl_siv_name),
638
0,
639
translate_interpolation(dcl_in_ps_interp));
640
641
if (dcl_siv_name == D3D10_SB_NAME_POSITION) {
642
/* D3D10 expects reciprocal of interpolated 1/w as 4th component,
643
* gallium/GL just interpolated 1/w */
644
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
645
646
ureg_MOV(ureg, tmp, reg);
647
ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W),
648
ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W));
649
reg = ureg_src(tmp);
650
}
651
652
dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm,
653
dcl_siv_name);
654
}
655
656
static struct ureg_src
657
translate_relative_operand(struct Shader_xlate *sx,
658
const struct Shader_relative_operand *operand)
659
{
660
struct ureg_src reg;
661
662
switch (operand->type) {
663
case D3D10_SB_OPERAND_TYPE_TEMP:
664
assert(operand->index[0].imm < SHADER_MAX_TEMPS);
665
666
reg = ureg_src(sx->temps[sx->temp_offset + operand->index[0].imm]);
667
break;
668
669
case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
670
reg = sx->prim_id;
671
break;
672
673
case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
674
assert(operand->index[1].imm < SHADER_MAX_TEMPS);
675
676
reg = ureg_src(sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +
677
operand->index[1].imm]);
678
break;
679
680
case D3D10_SB_OPERAND_TYPE_INPUT:
681
case D3D10_SB_OPERAND_TYPE_OUTPUT:
682
case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
683
case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
684
case D3D10_SB_OPERAND_TYPE_SAMPLER:
685
case D3D10_SB_OPERAND_TYPE_RESOURCE:
686
case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
687
case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
688
case D3D10_SB_OPERAND_TYPE_LABEL:
689
case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
690
case D3D10_SB_OPERAND_TYPE_NULL:
691
case D3D10_SB_OPERAND_TYPE_RASTERIZER:
692
case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
693
LOG_UNSUPPORTED(TRUE);
694
reg = ureg_src(ureg_DECL_temporary(sx->ureg));
695
break;
696
697
default:
698
assert(0); /* should never happen */
699
reg = ureg_src(ureg_DECL_temporary(sx->ureg));
700
}
701
702
reg = ureg_scalar(reg, operand->comp);
703
return reg;
704
}
705
706
static struct ureg_dst
707
translate_operand(struct Shader_xlate *sx,
708
const struct Shader_operand *operand,
709
unsigned writemask)
710
{
711
struct ureg_dst reg;
712
713
switch (operand->type) {
714
case D3D10_SB_OPERAND_TYPE_TEMP:
715
assert(operand->index_dim == 1);
716
assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
717
assert(operand->index[0].imm < SHADER_MAX_TEMPS);
718
719
reg = sx->temps[sx->temp_offset + operand->index[0].imm];
720
break;
721
722
case D3D10_SB_OPERAND_TYPE_OUTPUT:
723
assert(operand->index_dim == 1);
724
assert(operand->index[0].imm < SHADER_MAX_OUTPUTS);
725
726
if (operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32) {
727
if (!writemask) {
728
reg = sx->outputs[operand->index[0].imm].reg[0];
729
} else {
730
unsigned i;
731
for (i = 0; i < 4; ++i) {
732
unsigned mask = 1 << i;
733
if ((writemask & mask)) {
734
reg = sx->outputs[operand->index[0].imm].reg[i];
735
break;
736
}
737
}
738
}
739
} else {
740
struct ureg_src addr =
741
translate_relative_operand(sx, &operand->index[0].rel);
742
assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE);
743
reg = ureg_dst_indirect(sx->outputs[operand->index[0].imm].reg[0], addr);
744
}
745
break;
746
747
case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH:
748
assert(operand->index_dim == 0);
749
750
reg = sx->output_depth;
751
break;
752
753
case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
754
assert(operand->index_dim == 0);
755
756
reg = ureg_dst(sx->prim_id);
757
break;
758
759
case D3D10_SB_OPERAND_TYPE_INPUT:
760
case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
761
case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
762
case D3D10_SB_OPERAND_TYPE_IMMEDIATE64:
763
case D3D10_SB_OPERAND_TYPE_SAMPLER:
764
case D3D10_SB_OPERAND_TYPE_RESOURCE:
765
case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
766
case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
767
case D3D10_SB_OPERAND_TYPE_LABEL:
768
case D3D10_SB_OPERAND_TYPE_NULL:
769
case D3D10_SB_OPERAND_TYPE_RASTERIZER:
770
case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
771
/* XXX: Translate more operands types.
772
*/
773
LOG_UNSUPPORTED(TRUE);
774
reg = ureg_DECL_temporary(sx->ureg);
775
}
776
777
return reg;
778
}
779
780
static struct ureg_src
781
translate_indexable_temp(struct Shader_xlate *sx,
782
const struct Shader_operand *operand)
783
{
784
struct ureg_src reg;
785
switch (operand->index[1].index_rep) {
786
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
787
reg = ureg_src(
788
sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] +
789
operand->index[1].imm]);
790
break;
791
case D3D10_SB_OPERAND_INDEX_RELATIVE:
792
reg = ureg_src_indirect(
793
ureg_src(sx->temps[
794
sx->indexable_temp_offsets[operand->index[0].imm]]),
795
translate_relative_operand(sx,
796
&operand->index[1].rel));
797
break;
798
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
799
reg = ureg_src_indirect(
800
ureg_src(sx->temps[
801
operand->index[1].imm +
802
sx->indexable_temp_offsets[operand->index[0].imm]]),
803
translate_relative_operand(sx,
804
&operand->index[1].rel));
805
break;
806
default:
807
/* XXX: Other index representations.
808
*/
809
LOG_UNSUPPORTED(TRUE);
810
reg = ureg_src(ureg_DECL_temporary(sx->ureg));
811
}
812
return reg;
813
}
814
815
static struct ureg_dst
816
translate_dst_operand(struct Shader_xlate *sx,
817
const struct Shader_dst_operand *operand,
818
boolean saturate)
819
{
820
struct ureg_dst reg;
821
unsigned writemask =
822
operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT;
823
824
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT) == 4);
825
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_X >> 4) == TGSI_WRITEMASK_X);
826
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Y >> 4) == TGSI_WRITEMASK_Y);
827
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Z >> 4) == TGSI_WRITEMASK_Z);
828
assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_W >> 4) == TGSI_WRITEMASK_W);
829
830
switch (operand->base.type) {
831
case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
832
assert(operand->base.index_dim == 2);
833
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
834
assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);
835
836
reg = ureg_dst(translate_indexable_temp(sx, &operand->base));
837
break;
838
839
default:
840
reg = translate_operand(sx, &operand->base, writemask);
841
}
842
843
/* oDepth often has an empty writemask */
844
if (operand->base.type != D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
845
reg = ureg_writemask(reg, writemask);
846
}
847
848
if (saturate) {
849
reg = ureg_saturate(reg);
850
}
851
852
return reg;
853
}
854
855
static struct ureg_src
856
translate_src_operand(struct Shader_xlate *sx,
857
const struct Shader_src_operand *operand,
858
const enum dx10_opcode_format format)
859
{
860
struct ureg_src reg;
861
862
switch (operand->base.type) {
863
case D3D10_SB_OPERAND_TYPE_INPUT:
864
if (operand->base.index_dim == 1) {
865
switch (operand->base.index[0].index_rep) {
866
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
867
assert(operand->base.index[0].imm < SHADER_MAX_INPUTS);
868
reg = sx->inputs[operand->base.index[0].imm].reg;
869
break;
870
case D3D10_SB_OPERAND_INDEX_RELATIVE: {
871
struct ureg_src tmp =
872
translate_relative_operand(sx, &operand->base.index[0].rel);
873
reg = ureg_src_indirect(sx->inputs[0].reg, tmp);
874
}
875
break;
876
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
877
struct ureg_src tmp =
878
translate_relative_operand(sx, &operand->base.index[0].rel);
879
reg = ureg_src_indirect(sx->inputs[operand->base.index[0].imm].reg, tmp);
880
}
881
break;
882
default:
883
/* XXX: Other index representations.
884
*/
885
LOG_UNSUPPORTED(TRUE);
886
887
}
888
} else {
889
assert(operand->base.index_dim == 2);
890
assert(operand->base.index[1].imm < SHADER_MAX_INPUTS);
891
892
switch (operand->base.index[1].index_rep) {
893
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
894
reg = sx->inputs[operand->base.index[1].imm].reg;
895
break;
896
case D3D10_SB_OPERAND_INDEX_RELATIVE: {
897
struct ureg_src tmp =
898
translate_relative_operand(sx, &operand->base.index[1].rel);
899
reg = ureg_src_indirect(sx->inputs[0].reg, tmp);
900
}
901
break;
902
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
903
struct ureg_src tmp =
904
translate_relative_operand(sx, &operand->base.index[1].rel);
905
reg = ureg_src_indirect(sx->inputs[operand->base.index[1].imm].reg, tmp);
906
}
907
break;
908
default:
909
/* XXX: Other index representations.
910
*/
911
LOG_UNSUPPORTED(TRUE);
912
}
913
914
switch (operand->base.index[0].index_rep) {
915
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
916
reg = ureg_src_dimension(reg, operand->base.index[0].imm);
917
break;
918
case D3D10_SB_OPERAND_INDEX_RELATIVE:{
919
struct ureg_src tmp =
920
translate_relative_operand(sx, &operand->base.index[0].rel);
921
reg = ureg_src_dimension_indirect(reg, tmp, 0);
922
}
923
break;
924
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: {
925
struct ureg_src tmp =
926
translate_relative_operand(sx, &operand->base.index[0].rel);
927
reg = ureg_src_dimension_indirect(reg, tmp, operand->base.index[0].imm);
928
}
929
break;
930
default:
931
/* XXX: Other index representations.
932
*/
933
LOG_UNSUPPORTED(TRUE);
934
}
935
}
936
break;
937
938
case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP:
939
assert(operand->base.index_dim == 2);
940
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
941
assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS);
942
943
reg = translate_indexable_temp(sx, &operand->base);
944
break;
945
946
case D3D10_SB_OPERAND_TYPE_IMMEDIATE32:
947
switch (format) {
948
case OF_FLOAT:
949
reg = ureg_imm4f(sx->ureg,
950
operand->imm[0].f32,
951
operand->imm[1].f32,
952
operand->imm[2].f32,
953
operand->imm[3].f32);
954
break;
955
case OF_INT:
956
reg = ureg_imm4i(sx->ureg,
957
operand->imm[0].i32,
958
operand->imm[1].i32,
959
operand->imm[2].i32,
960
operand->imm[3].i32);
961
break;
962
case OF_UINT:
963
reg = ureg_imm4u(sx->ureg,
964
operand->imm[0].u32,
965
operand->imm[1].u32,
966
operand->imm[2].u32,
967
operand->imm[3].u32);
968
break;
969
default:
970
assert(0);
971
reg = ureg_src(ureg_DECL_temporary(sx->ureg));
972
}
973
break;
974
975
case D3D10_SB_OPERAND_TYPE_SAMPLER:
976
assert(operand->base.index_dim == 1);
977
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
978
assert(operand->base.index[0].imm < SHADER_MAX_SAMPLERS);
979
980
reg = sx->samplers[operand->base.index[0].imm];
981
break;
982
983
case D3D10_SB_OPERAND_TYPE_RESOURCE:
984
assert(operand->base.index_dim == 1);
985
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
986
assert(operand->base.index[0].imm < SHADER_MAX_RESOURCES);
987
988
reg = sx->sv[operand->base.index[0].imm];
989
break;
990
991
case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER:
992
assert(operand->base.index_dim == 2);
993
994
assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32);
995
assert(operand->base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);
996
997
switch (operand->base.index[1].index_rep) {
998
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
999
assert(operand->base.index[1].imm < SHADER_MAX_CONSTS);
1000
1001
reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);
1002
reg = ureg_src_dimension(reg, operand->base.index[0].imm);
1003
break;
1004
case D3D10_SB_OPERAND_INDEX_RELATIVE:
1005
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1006
reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm);
1007
reg = ureg_src_indirect(
1008
reg,
1009
translate_relative_operand(sx, &operand->base.index[1].rel));
1010
reg = ureg_src_dimension(reg, operand->base.index[0].imm);
1011
break;
1012
default:
1013
/* XXX: Other index representations.
1014
*/
1015
LOG_UNSUPPORTED(TRUE);
1016
}
1017
1018
break;
1019
1020
case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER:
1021
assert(operand->base.index_dim == 1);
1022
1023
switch (operand->base.index[0].index_rep) {
1024
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32:
1025
reg = sx->imms;
1026
reg.Index += operand->base.index[0].imm;
1027
break;
1028
case D3D10_SB_OPERAND_INDEX_RELATIVE:
1029
case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE:
1030
reg = sx->imms;
1031
reg.Index += operand->base.index[0].imm;
1032
reg = ureg_src_indirect(
1033
sx->imms,
1034
translate_relative_operand(sx, &operand->base.index[0].rel));
1035
break;
1036
default:
1037
/* XXX: Other index representations.
1038
*/
1039
LOG_UNSUPPORTED(TRUE);
1040
}
1041
break;
1042
1043
case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID:
1044
reg = sx->prim_id;
1045
break;
1046
1047
default:
1048
reg = ureg_src(translate_operand(sx, &operand->base, 0));
1049
}
1050
1051
reg = ureg_swizzle(reg,
1052
operand->swizzle[0],
1053
operand->swizzle[1],
1054
operand->swizzle[2],
1055
operand->swizzle[3]);
1056
1057
switch (operand->modifier) {
1058
case D3D10_SB_OPERAND_MODIFIER_NONE:
1059
break;
1060
case D3D10_SB_OPERAND_MODIFIER_NEG:
1061
reg = ureg_negate(reg);
1062
break;
1063
case D3D10_SB_OPERAND_MODIFIER_ABS:
1064
reg = ureg_abs(reg);
1065
break;
1066
case D3D10_SB_OPERAND_MODIFIER_ABSNEG:
1067
reg = ureg_negate(ureg_abs(reg));
1068
break;
1069
default:
1070
assert(0);
1071
}
1072
1073
return reg;
1074
}
1075
1076
static uint
1077
translate_resource_dimension(D3D10_SB_RESOURCE_DIMENSION dim)
1078
{
1079
switch (dim) {
1080
case D3D10_SB_RESOURCE_DIMENSION_UNKNOWN:
1081
return TGSI_TEXTURE_UNKNOWN;
1082
case D3D10_SB_RESOURCE_DIMENSION_BUFFER:
1083
return TGSI_TEXTURE_BUFFER;
1084
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D:
1085
return TGSI_TEXTURE_1D;
1086
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D:
1087
return TGSI_TEXTURE_2D;
1088
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS:
1089
return TGSI_TEXTURE_2D_MSAA;
1090
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D:
1091
return TGSI_TEXTURE_3D;
1092
case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE:
1093
return TGSI_TEXTURE_CUBE;
1094
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY:
1095
return TGSI_TEXTURE_1D_ARRAY;
1096
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY:
1097
return TGSI_TEXTURE_2D_ARRAY;
1098
case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
1099
return TGSI_TEXTURE_2D_ARRAY_MSAA;
1100
case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY:
1101
return TGSI_TEXTURE_CUBE_ARRAY;
1102
default:
1103
assert(0);
1104
return TGSI_TEXTURE_UNKNOWN;
1105
}
1106
}
1107
1108
static uint
1109
texture_dim_from_tgsi_target(unsigned tgsi_target)
1110
{
1111
switch (tgsi_target) {
1112
case TGSI_TEXTURE_BUFFER:
1113
case TGSI_TEXTURE_1D:
1114
case TGSI_TEXTURE_1D_ARRAY:
1115
return 1;
1116
case TGSI_TEXTURE_2D:
1117
case TGSI_TEXTURE_2D_MSAA:
1118
case TGSI_TEXTURE_CUBE:
1119
case TGSI_TEXTURE_2D_ARRAY:
1120
case TGSI_TEXTURE_2D_ARRAY_MSAA:
1121
return 2;
1122
case TGSI_TEXTURE_3D:
1123
return 3;
1124
case TGSI_TEXTURE_UNKNOWN:
1125
default:
1126
assert(0);
1127
return 1;
1128
}
1129
}
1130
1131
static boolean
1132
operand_is_scalar(const struct Shader_src_operand *operand)
1133
{
1134
return operand->swizzle[0] == operand->swizzle[1] &&
1135
operand->swizzle[1] == operand->swizzle[2] &&
1136
operand->swizzle[2] == operand->swizzle[3];
1137
}
1138
1139
static void
1140
Shader_add_call(struct Shader_xlate *sx,
1141
unsigned d3d_label,
1142
unsigned tgsi_label_token)
1143
{
1144
ASSERT(sx->num_calls < sx->max_calls);
1145
1146
sx->calls[sx->num_calls].d3d_label = d3d_label;
1147
sx->calls[sx->num_calls].tgsi_label_token = tgsi_label_token;
1148
sx->num_calls++;
1149
}
1150
1151
static void
1152
Shader_add_label(struct Shader_xlate *sx,
1153
unsigned d3d_label,
1154
unsigned tgsi_insn_no)
1155
{
1156
ASSERT(sx->num_labels < sx->max_labels);
1157
1158
sx->labels[sx->num_labels].d3d_label = d3d_label;
1159
sx->labels[sx->num_labels].tgsi_insn_no = tgsi_insn_no;
1160
sx->num_labels++;
1161
}
1162
1163
1164
static void
1165
sample_ureg_emit(struct ureg_program *ureg,
1166
unsigned tgsi_opcode,
1167
unsigned num_src,
1168
struct Shader_opcode *opcode,
1169
struct ureg_dst dst,
1170
struct ureg_src *src)
1171
{
1172
unsigned num_offsets = 0;
1173
struct tgsi_texture_offset texoffsets;
1174
1175
memset(&texoffsets, 0, sizeof texoffsets);
1176
1177
if (opcode->imm_texel_offset.u ||
1178
opcode->imm_texel_offset.v ||
1179
opcode->imm_texel_offset.w) {
1180
struct ureg_src offsetreg;
1181
num_offsets = 1;
1182
/* don't actually always need all 3 values */
1183
offsetreg = ureg_imm3i(ureg,
1184
opcode->imm_texel_offset.u,
1185
opcode->imm_texel_offset.v,
1186
opcode->imm_texel_offset.w);
1187
texoffsets.File = offsetreg.File;
1188
texoffsets.Index = offsetreg.Index;
1189
texoffsets.SwizzleX = offsetreg.SwizzleX;
1190
texoffsets.SwizzleY = offsetreg.SwizzleY;
1191
texoffsets.SwizzleZ = offsetreg.SwizzleZ;
1192
}
1193
1194
ureg_tex_insn(ureg,
1195
tgsi_opcode,
1196
&dst, 1,
1197
TGSI_TEXTURE_UNKNOWN,
1198
TGSI_RETURN_TYPE_UNKNOWN,
1199
&texoffsets, num_offsets,
1200
src, num_src);
1201
}
1202
1203
typedef void (*unary_ureg_func)(struct ureg_program *ureg, struct ureg_dst dst,
1204
struct ureg_src src);
1205
static void
1206
expand_unary_to_scalarf(struct ureg_program *ureg, unary_ureg_func func,
1207
struct Shader_xlate *sx, struct Shader_opcode *opcode)
1208
{
1209
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1210
struct ureg_dst dst = translate_dst_operand(sx, &opcode->dst[0],
1211
opcode->saturate);
1212
struct ureg_src src = translate_src_operand(sx, &opcode->src[0], OF_FLOAT);
1213
struct ureg_dst scalar_dst;
1214
ureg_MOV(ureg, tmp, src);
1215
src = ureg_src(tmp);
1216
1217
scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_X);
1218
if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1219
func(ureg, scalar_dst,
1220
ureg_scalar(src, TGSI_SWIZZLE_X));
1221
}
1222
scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Y);
1223
if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1224
func(ureg, scalar_dst,
1225
ureg_scalar(src, TGSI_SWIZZLE_Y));
1226
}
1227
scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Z);
1228
if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1229
func(ureg, scalar_dst,
1230
ureg_scalar(src, TGSI_SWIZZLE_Z));
1231
}
1232
scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_W);
1233
if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) {
1234
func(ureg, scalar_dst,
1235
ureg_scalar(src, TGSI_SWIZZLE_W));
1236
}
1237
ureg_release_temporary(ureg, tmp);
1238
}
1239
1240
const struct tgsi_token *
1241
Shader_tgsi_translate(const unsigned *code,
1242
unsigned *output_mapping)
1243
{
1244
struct Shader_xlate sx;
1245
struct Shader_parser parser;
1246
struct ureg_program *ureg = NULL;
1247
struct Shader_opcode opcode;
1248
const struct tgsi_token *tokens = NULL;
1249
uint nr_tokens;
1250
boolean shader_dumped = FALSE;
1251
boolean inside_sub = FALSE;
1252
uint i, j;
1253
1254
memset(&sx, 0, sizeof sx);
1255
1256
Shader_parse_init(&parser, code);
1257
1258
if (st_debug & ST_DEBUG_TGSI) {
1259
dx10_shader_dump_tokens(code);
1260
shader_dumped = TRUE;
1261
}
1262
1263
sx.max_calls = 64;
1264
sx.calls = (struct Shader_call *)MALLOC(sx.max_calls *
1265
sizeof(struct Shader_call));
1266
sx.num_calls = 0;
1267
1268
sx.max_labels = 64;
1269
sx.labels = (struct Shader_label *)MALLOC(sx.max_labels *
1270
sizeof(struct Shader_call));
1271
sx.num_labels = 0;
1272
1273
1274
1275
/* Header. */
1276
switch (parser.header.type) {
1277
case D3D10_SB_PIXEL_SHADER:
1278
ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1279
break;
1280
case D3D10_SB_VERTEX_SHADER:
1281
ureg = ureg_create(PIPE_SHADER_VERTEX);
1282
break;
1283
case D3D10_SB_GEOMETRY_SHADER:
1284
ureg = ureg_create(PIPE_SHADER_GEOMETRY);
1285
break;
1286
}
1287
1288
assert(ureg);
1289
sx.ureg = ureg;
1290
1291
while (Shader_parse_opcode(&parser, &opcode)) {
1292
const struct dx10_opcode_xlate *ox;
1293
1294
assert(opcode.type < D3D10_SB_NUM_OPCODES);
1295
ox = &opcode_xlate[opcode.type];
1296
1297
switch (opcode.type) {
1298
case D3D10_SB_OPCODE_EXP:
1299
expand_unary_to_scalarf(ureg, ureg_EX2, &sx, &opcode);
1300
break;
1301
case D3D10_SB_OPCODE_SQRT:
1302
expand_unary_to_scalarf(ureg, ureg_SQRT, &sx, &opcode);
1303
break;
1304
case D3D10_SB_OPCODE_RSQ:
1305
expand_unary_to_scalarf(ureg, ureg_RSQ, &sx, &opcode);
1306
break;
1307
case D3D10_SB_OPCODE_LOG:
1308
expand_unary_to_scalarf(ureg, ureg_LG2, &sx, &opcode);
1309
break;
1310
case D3D10_SB_OPCODE_IMUL:
1311
if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1312
ureg_IMUL_HI(ureg,
1313
translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1314
translate_src_operand(&sx, &opcode.src[0], OF_INT),
1315
translate_src_operand(&sx, &opcode.src[1], OF_INT));
1316
}
1317
1318
if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1319
ureg_UMUL(ureg,
1320
translate_dst_operand(&sx, &opcode.dst[1], opcode.saturate),
1321
translate_src_operand(&sx, &opcode.src[0], OF_INT),
1322
translate_src_operand(&sx, &opcode.src[1], OF_INT));
1323
}
1324
1325
break;
1326
1327
case D3D10_SB_OPCODE_FTOI: {
1328
/* XXX: tgsi (and just about everybody else, c, opencl, glsl) has
1329
* out-of-range (and NaN) values undefined for f2i/f2u, but d3d10
1330
* requires clamping to min and max representable value (as well as 0
1331
* for NaNs) (this applies to both ftoi and ftou). At least the online
1332
* docs state that - this is consistent with generic d3d10 conversion
1333
* rules.
1334
* For FTOI, we cheat a bit here - in particular depending on noone
1335
* caring about NaNs, and depending on the (undefined!) behavior of
1336
* F2I returning 0x80000000 for too negative values (which works with
1337
* x86 sse). Hence only need to clamp too positive values.
1338
* Note that it is impossible to clamp using a float, since 2^31 - 1
1339
* is not exactly representable with a float.
1340
*/
1341
struct ureg_dst too_large = ureg_DECL_temporary(ureg);
1342
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1343
ureg_FSGE(ureg, too_large,
1344
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1345
ureg_imm1f(ureg, 2147483648.0f));
1346
ureg_F2I(ureg, tmp,
1347
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1348
ureg_UCMP(ureg,
1349
translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1350
ureg_src(too_large),
1351
ureg_imm1i(ureg, 0x7fffffff),
1352
ureg_src(tmp));
1353
ureg_release_temporary(ureg, too_large);
1354
ureg_release_temporary(ureg, tmp);
1355
}
1356
break;
1357
1358
case D3D10_SB_OPCODE_FTOU: {
1359
/* For ftou, we need to do both clamps, which as a bonus also
1360
* gets us correct NaN behavior.
1361
* Note that it is impossible to clamp using a float against the upper
1362
* limit, since 2^32 - 1 is not exactly representable with a float,
1363
* but the clamp against 0.0 certainly works just fine.
1364
*/
1365
struct ureg_dst too_large = ureg_DECL_temporary(ureg);
1366
struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1367
ureg_FSGE(ureg, too_large,
1368
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1369
ureg_imm1f(ureg, 4294967296.0f));
1370
/* clamp negative values + NaN to zero.
1371
* (Could be done slightly more efficient in llvmpipe due to
1372
* MAX NaN behavior handling.)
1373
*/
1374
ureg_MAX(ureg, tmp,
1375
ureg_imm1f(ureg, 0.0f),
1376
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1377
ureg_F2U(ureg, tmp,
1378
ureg_src(tmp));
1379
ureg_UCMP(ureg,
1380
translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1381
ureg_src(too_large),
1382
ureg_imm1u(ureg, 0xffffffff),
1383
ureg_src(tmp));
1384
ureg_release_temporary(ureg, too_large);
1385
ureg_release_temporary(ureg, tmp);
1386
}
1387
break;
1388
1389
case D3D10_SB_OPCODE_LD_MS:
1390
/* XXX: We don't support multi-sampling yet, but we need to parse
1391
* this opcode regardless, so we just ignore sample index operand
1392
* for now */
1393
case D3D10_SB_OPCODE_LD:
1394
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1395
unsigned resource = opcode.src[1].base.index[0].imm;
1396
assert(opcode.src[1].base.index_dim == 1);
1397
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1398
1399
if (ureg_src_is_undef(sx.samplers[resource])) {
1400
sx.samplers[resource] =
1401
ureg_DECL_sampler(ureg, resource);
1402
}
1403
1404
ureg_TXF(ureg,
1405
translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate),
1406
sx.resources[resource].target,
1407
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1408
sx.samplers[resource]);
1409
}
1410
else {
1411
struct ureg_src srcreg[2];
1412
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_INT);
1413
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_INT);
1414
1415
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_I, 2, &opcode,
1416
translate_dst_operand(&sx, &opcode.dst[0],
1417
opcode.saturate),
1418
srcreg);
1419
}
1420
break;
1421
1422
case D3D10_SB_OPCODE_CUSTOMDATA:
1423
if (opcode.customdata._class ==
1424
D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER) {
1425
sx.imms =
1426
ureg_DECL_immediate_block_uint(ureg,
1427
opcode.customdata.u.constbuf.data,
1428
opcode.customdata.u.constbuf.count);
1429
} else {
1430
assert(0);
1431
}
1432
break;
1433
1434
case D3D10_SB_OPCODE_RESINFO:
1435
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1436
unsigned resource = opcode.src[1].base.index[0].imm;
1437
assert(opcode.src[1].base.index_dim == 1);
1438
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1439
1440
if (ureg_src_is_undef(sx.samplers[resource])) {
1441
sx.samplers[resource] =
1442
ureg_DECL_sampler(ureg, resource);
1443
}
1444
/* don't bother with swizzle, ret type etc. */
1445
ureg_TXQ(ureg,
1446
translate_dst_operand(&sx, &opcode.dst[0],
1447
opcode.saturate),
1448
sx.resources[resource].target,
1449
translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1450
sx.samplers[resource]);
1451
}
1452
else {
1453
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1454
struct ureg_src tsrc = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1455
struct ureg_dst dstreg = translate_dst_operand(&sx, &opcode.dst[0],
1456
opcode.saturate);
1457
1458
/* while specs say swizzle is ignored better safe than sorry */
1459
tsrc.SwizzleX = TGSI_SWIZZLE_X;
1460
tsrc.SwizzleY = TGSI_SWIZZLE_Y;
1461
tsrc.SwizzleZ = TGSI_SWIZZLE_Z;
1462
tsrc.SwizzleW = TGSI_SWIZZLE_W;
1463
1464
ureg_SVIEWINFO(ureg, r0,
1465
translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1466
tsrc);
1467
1468
tsrc = ureg_src(r0);
1469
tsrc.SwizzleX = opcode.src[1].swizzle[0];
1470
tsrc.SwizzleY = opcode.src[1].swizzle[1];
1471
tsrc.SwizzleZ = opcode.src[1].swizzle[2];
1472
tsrc.SwizzleW = opcode.src[1].swizzle[3];
1473
1474
if (opcode.specific.resinfo_ret_type ==
1475
D3D10_SB_RESINFO_INSTRUCTION_RETURN_UINT) {
1476
ureg_MOV(ureg, dstreg, tsrc);
1477
}
1478
else if (opcode.specific.resinfo_ret_type ==
1479
D3D10_SB_RESINFO_INSTRUCTION_RETURN_FLOAT) {
1480
ureg_I2F(ureg, dstreg, tsrc);
1481
}
1482
else { /* D3D10_SB_RESINFO_INSTRUCTION_RETURN_RCPFLOAT */
1483
unsigned i;
1484
/*
1485
* Must apply rcp only to parts determined by dims,
1486
* (width/height/depth) but NOT to array size nor mip levels
1487
* hence need to figure that out here.
1488
* This is one sick modifier if you ask me!
1489
*/
1490
unsigned res_index = opcode.src[1].base.index[0].imm;
1491
unsigned target = sx.resources[res_index].target;
1492
unsigned dims = texture_dim_from_tgsi_target(target);
1493
1494
ureg_I2F(ureg, r0, ureg_src(r0));
1495
tsrc = ureg_src(r0);
1496
for (i = 0; i < 4; i++) {
1497
unsigned dst_swizzle = opcode.src[1].swizzle[i];
1498
struct ureg_dst dstregmasked = ureg_writemask(dstreg, 1 << i);
1499
/*
1500
* could do one mov with multiple write mask bits set
1501
* but rcp is scalar anyway.
1502
*/
1503
if (dst_swizzle < dims) {
1504
ureg_RCP(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));
1505
}
1506
else {
1507
ureg_MOV(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle));
1508
}
1509
}
1510
}
1511
ureg_release_temporary(ureg, r0);
1512
}
1513
break;
1514
1515
case D3D10_SB_OPCODE_SAMPLE:
1516
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1517
assert(opcode.src[1].base.index_dim == 1);
1518
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1519
1520
LOG_UNSUPPORTED(opcode.src[1].base.index[0].imm != opcode.src[2].base.index[0].imm);
1521
1522
ureg_TEX(ureg,
1523
translate_dst_operand(&sx, &opcode.dst[0],
1524
opcode.saturate),
1525
sx.resources[opcode.src[1].base.index[0].imm].target,
1526
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1527
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1528
}
1529
else {
1530
struct ureg_src srcreg[3];
1531
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1532
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1533
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1534
1535
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE, 3, &opcode,
1536
translate_dst_operand(&sx, &opcode.dst[0],
1537
opcode.saturate),
1538
srcreg);
1539
}
1540
break;
1541
1542
case D3D10_SB_OPCODE_SAMPLE_C:
1543
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1544
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1545
1546
/* XXX: Support only 2D texture targets for now.
1547
* Need to figure out how to pack the compare value
1548
* for other dimensions and if there is enough space
1549
* in a single operand for all possible cases.
1550
*/
1551
LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=
1552
TGSI_TEXTURE_2D);
1553
1554
assert(opcode.src[1].base.index_dim == 1);
1555
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1556
1557
/* Insert the compare value into .z component.
1558
*/
1559
ureg_MOV(ureg,
1560
ureg_writemask(r0, TGSI_WRITEMASK_XYW),
1561
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1562
ureg_MOV(ureg,
1563
ureg_writemask(r0, TGSI_WRITEMASK_Z),
1564
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1565
1566
/* XXX: Pass explicit Lod=0 in D3D10_SB_OPCODE_SAMPLE_C_LZ case.
1567
*/
1568
1569
ureg_TEX(ureg,
1570
translate_dst_operand(&sx, &opcode.dst[0],
1571
opcode.saturate),
1572
sx.resources[opcode.src[1].base.index[0].imm].target,
1573
ureg_src(r0),
1574
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1575
1576
ureg_release_temporary(ureg, r0);
1577
}
1578
else {
1579
struct ureg_src srcreg[4];
1580
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1581
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1582
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1583
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1584
1585
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C, 4, &opcode,
1586
translate_dst_operand(&sx, &opcode.dst[0],
1587
opcode.saturate),
1588
srcreg);
1589
}
1590
break;
1591
1592
case D3D10_SB_OPCODE_SAMPLE_C_LZ:
1593
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1594
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1595
1596
assert(opcode.src[1].base.index_dim == 1);
1597
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1598
1599
/* XXX: Support only 2D texture targets for now.
1600
* Need to figure out how to pack the compare value
1601
* for other dimensions and if there is enough space
1602
* in a single operand for all possible cases.
1603
*/
1604
LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target !=
1605
TGSI_TEXTURE_2D);
1606
1607
/* Insert the compare value into .z component.
1608
* Insert 0 into .w component.
1609
*/
1610
ureg_MOV(ureg,
1611
ureg_writemask(r0, TGSI_WRITEMASK_XY),
1612
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1613
ureg_MOV(ureg,
1614
ureg_writemask(r0, TGSI_WRITEMASK_Z),
1615
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1616
ureg_MOV(ureg,
1617
ureg_writemask(r0, TGSI_WRITEMASK_W),
1618
ureg_imm1f(ureg, 0.0f));
1619
1620
ureg_TXL(ureg,
1621
translate_dst_operand(&sx, &opcode.dst[0],
1622
opcode.saturate),
1623
sx.resources[opcode.src[1].base.index[0].imm].target,
1624
ureg_src(r0),
1625
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1626
1627
ureg_release_temporary(ureg, r0);
1628
}
1629
else {
1630
struct ureg_src srcreg[4];
1631
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1632
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1633
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1634
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1635
1636
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C_LZ, 4, &opcode,
1637
translate_dst_operand(&sx, &opcode.dst[0],
1638
opcode.saturate),
1639
srcreg);
1640
}
1641
break;
1642
1643
case D3D10_SB_OPCODE_SAMPLE_L:
1644
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1645
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1646
1647
assert(opcode.src[1].base.index_dim == 1);
1648
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1649
1650
/* Insert LOD into .w component.
1651
*/
1652
ureg_MOV(ureg,
1653
ureg_writemask(r0, TGSI_WRITEMASK_XYZ),
1654
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1655
ureg_MOV(ureg,
1656
ureg_writemask(r0, TGSI_WRITEMASK_W),
1657
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1658
1659
ureg_TXL(ureg,
1660
translate_dst_operand(&sx, &opcode.dst[0],
1661
opcode.saturate),
1662
sx.resources[opcode.src[1].base.index[0].imm].target,
1663
ureg_src(r0),
1664
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1665
1666
ureg_release_temporary(ureg, r0);
1667
}
1668
else {
1669
struct ureg_src srcreg[4];
1670
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1671
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1672
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1673
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1674
1675
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_L, 4, &opcode,
1676
translate_dst_operand(&sx, &opcode.dst[0],
1677
opcode.saturate),
1678
srcreg);
1679
}
1680
break;
1681
1682
case D3D10_SB_OPCODE_SAMPLE_D:
1683
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1684
assert(opcode.src[1].base.index_dim == 1);
1685
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1686
1687
ureg_TXD(ureg,
1688
translate_dst_operand(&sx, &opcode.dst[0],
1689
opcode.saturate),
1690
sx.resources[opcode.src[1].base.index[0].imm].target,
1691
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT),
1692
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT),
1693
translate_src_operand(&sx, &opcode.src[4], OF_FLOAT),
1694
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1695
}
1696
else {
1697
struct ureg_src srcreg[5];
1698
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1699
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1700
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1701
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1702
srcreg[4] = translate_src_operand(&sx, &opcode.src[4], OF_FLOAT);
1703
1704
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_D, 5, &opcode,
1705
translate_dst_operand(&sx, &opcode.dst[0],
1706
opcode.saturate),
1707
srcreg);
1708
}
1709
break;
1710
1711
case D3D10_SB_OPCODE_SAMPLE_B:
1712
if (st_debug & ST_DEBUG_OLD_TEX_OPS) {
1713
struct ureg_dst r0 = ureg_DECL_temporary(ureg);
1714
1715
assert(opcode.src[1].base.index_dim == 1);
1716
assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES);
1717
1718
/* Insert LOD bias into .w component.
1719
*/
1720
ureg_MOV(ureg,
1721
ureg_writemask(r0, TGSI_WRITEMASK_XYZ),
1722
translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1723
ureg_MOV(ureg,
1724
ureg_writemask(r0, TGSI_WRITEMASK_W),
1725
translate_src_operand(&sx, &opcode.src[3], OF_FLOAT));
1726
1727
ureg_TXB(ureg,
1728
translate_dst_operand(&sx, &opcode.dst[0],
1729
opcode.saturate),
1730
sx.resources[opcode.src[1].base.index[0].imm].target,
1731
ureg_src(r0),
1732
translate_src_operand(&sx, &opcode.src[2], OF_FLOAT));
1733
1734
ureg_release_temporary(ureg, r0);
1735
}
1736
else {
1737
struct ureg_src srcreg[4];
1738
srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT);
1739
srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT);
1740
srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT);
1741
srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT);
1742
1743
sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_B, 4, &opcode,
1744
translate_dst_operand(&sx, &opcode.dst[0],
1745
opcode.saturate),
1746
srcreg);
1747
}
1748
break;
1749
1750
case D3D10_SB_OPCODE_SINCOS: {
1751
struct ureg_dst src0 = ureg_DECL_temporary(ureg);
1752
ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT));
1753
if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1754
struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[0],
1755
opcode.saturate);
1756
struct ureg_src src = ureg_src(src0);
1757
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1758
ureg_scalar(src, TGSI_SWIZZLE_X));
1759
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1760
ureg_scalar(src, TGSI_SWIZZLE_Y));
1761
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
1762
ureg_scalar(src, TGSI_SWIZZLE_Z));
1763
ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1764
ureg_scalar(src, TGSI_SWIZZLE_W));
1765
}
1766
if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1767
struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[1],
1768
opcode.saturate);
1769
struct ureg_src src = ureg_src(src0);
1770
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X),
1771
ureg_scalar(src, TGSI_SWIZZLE_X));
1772
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y),
1773
ureg_scalar(src, TGSI_SWIZZLE_Y));
1774
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z),
1775
ureg_scalar(src, TGSI_SWIZZLE_Z));
1776
ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W),
1777
ureg_scalar(src, TGSI_SWIZZLE_W));
1778
}
1779
ureg_release_temporary(ureg, src0);
1780
}
1781
break;
1782
1783
case D3D10_SB_OPCODE_UDIV: {
1784
struct ureg_dst src0 = ureg_DECL_temporary(ureg);
1785
struct ureg_dst src1 = ureg_DECL_temporary(ureg);
1786
ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_UINT));
1787
ureg_MOV(ureg, src1, translate_src_operand(&sx, &opcode.src[1], OF_UINT));
1788
if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1789
ureg_UDIV(ureg,
1790
translate_dst_operand(&sx, &opcode.dst[0],
1791
opcode.saturate),
1792
ureg_src(src0), ureg_src(src1));
1793
}
1794
if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1795
ureg_UMOD(ureg,
1796
translate_dst_operand(&sx, &opcode.dst[1],
1797
opcode.saturate),
1798
ureg_src(src0), ureg_src(src1));
1799
}
1800
ureg_release_temporary(ureg, src0);
1801
ureg_release_temporary(ureg, src1);
1802
}
1803
break;
1804
case D3D10_SB_OPCODE_UMUL: {
1805
if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1806
ureg_UMUL_HI(ureg,
1807
translate_dst_operand(&sx, &opcode.dst[0],
1808
opcode.saturate),
1809
translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1810
translate_src_operand(&sx, &opcode.src[1], OF_UINT));
1811
}
1812
if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) {
1813
ureg_UMUL(ureg,
1814
translate_dst_operand(&sx, &opcode.dst[1],
1815
opcode.saturate),
1816
translate_src_operand(&sx, &opcode.src[0], OF_UINT),
1817
translate_src_operand(&sx, &opcode.src[1], OF_UINT));
1818
}
1819
}
1820
break;
1821
1822
case D3D10_SB_OPCODE_DCL_RESOURCE:
1823
{
1824
unsigned target;
1825
unsigned res_index = opcode.dst[0].base.index[0].imm;
1826
assert(opcode.dst[0].base.index_dim == 1);
1827
assert(res_index < SHADER_MAX_RESOURCES);
1828
1829
target = translate_resource_dimension(opcode.specific.dcl_resource_dimension);
1830
sx.resources[res_index].target = target;
1831
if (!(st_debug & ST_DEBUG_OLD_TEX_OPS)) {
1832
sx.sv[res_index] =
1833
ureg_DECL_sampler_view(ureg, res_index, target,
1834
trans_dcl_ret_type(opcode.dcl_resource_ret_type[0]),
1835
trans_dcl_ret_type(opcode.dcl_resource_ret_type[1]),
1836
trans_dcl_ret_type(opcode.dcl_resource_ret_type[2]),
1837
trans_dcl_ret_type(opcode.dcl_resource_ret_type[3]));
1838
}
1839
break;
1840
}
1841
1842
case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: {
1843
unsigned num_constants = opcode.src[0].base.index[1].imm;
1844
1845
assert(opcode.src[0].base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS);
1846
1847
if (num_constants == 0) {
1848
num_constants = SHADER_MAX_CONSTS;
1849
} else {
1850
assert(num_constants <= SHADER_MAX_CONSTS);
1851
}
1852
1853
ureg_DECL_constant2D(ureg,
1854
0,
1855
num_constants - 1,
1856
opcode.src[0].base.index[0].imm);
1857
break;
1858
}
1859
1860
case D3D10_SB_OPCODE_DCL_SAMPLER:
1861
assert(opcode.dst[0].base.index_dim == 1);
1862
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_SAMPLERS);
1863
1864
sx.samplers[opcode.dst[0].base.index[0].imm] =
1865
ureg_DECL_sampler(ureg,
1866
opcode.dst[0].base.index[0].imm);
1867
break;
1868
1869
case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
1870
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1871
1872
switch (opcode.specific.dcl_gs_output_primitive_topology) {
1873
case D3D10_SB_PRIMITIVE_TOPOLOGY_POINTLIST:
1874
ureg_property(sx.ureg,
1875
TGSI_PROPERTY_GS_OUTPUT_PRIM,
1876
PIPE_PRIM_POINTS);
1877
break;
1878
1879
case D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP:
1880
ureg_property(sx.ureg,
1881
TGSI_PROPERTY_GS_OUTPUT_PRIM,
1882
PIPE_PRIM_LINE_STRIP);
1883
break;
1884
1885
case D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP:
1886
ureg_property(sx.ureg,
1887
TGSI_PROPERTY_GS_OUTPUT_PRIM,
1888
PIPE_PRIM_TRIANGLE_STRIP);
1889
break;
1890
1891
default:
1892
assert(0);
1893
}
1894
break;
1895
1896
case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE:
1897
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1898
1899
/* Figure out the second dimension of GS inputs.
1900
*/
1901
switch (opcode.specific.dcl_gs_input_primitive) {
1902
case D3D10_SB_PRIMITIVE_POINT:
1903
declare_vertices_in(&sx, 1);
1904
ureg_property(sx.ureg,
1905
TGSI_PROPERTY_GS_INPUT_PRIM,
1906
PIPE_PRIM_POINTS);
1907
break;
1908
1909
case D3D10_SB_PRIMITIVE_LINE:
1910
declare_vertices_in(&sx, 2);
1911
ureg_property(sx.ureg,
1912
TGSI_PROPERTY_GS_INPUT_PRIM,
1913
PIPE_PRIM_LINES);
1914
break;
1915
1916
case D3D10_SB_PRIMITIVE_TRIANGLE:
1917
declare_vertices_in(&sx, 3);
1918
ureg_property(sx.ureg,
1919
TGSI_PROPERTY_GS_INPUT_PRIM,
1920
PIPE_PRIM_TRIANGLES);
1921
break;
1922
1923
case D3D10_SB_PRIMITIVE_LINE_ADJ:
1924
declare_vertices_in(&sx, 4);
1925
ureg_property(sx.ureg,
1926
TGSI_PROPERTY_GS_INPUT_PRIM,
1927
PIPE_PRIM_LINES_ADJACENCY);
1928
break;
1929
1930
case D3D10_SB_PRIMITIVE_TRIANGLE_ADJ:
1931
declare_vertices_in(&sx, 6);
1932
ureg_property(sx.ureg,
1933
TGSI_PROPERTY_GS_INPUT_PRIM,
1934
PIPE_PRIM_TRIANGLES_ADJACENCY);
1935
break;
1936
1937
default:
1938
assert(0);
1939
}
1940
break;
1941
1942
case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
1943
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1944
1945
ureg_property(sx.ureg,
1946
TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES,
1947
opcode.specific.dcl_max_output_vertex_count);
1948
break;
1949
1950
case D3D10_SB_OPCODE_DCL_INPUT:
1951
if (parser.header.type == D3D10_SB_VERTEX_SHADER) {
1952
dcl_vs_input(&sx, ureg, &opcode.dst[0]);
1953
} else {
1954
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1955
dcl_gs_input(&sx, ureg, &opcode.dst[0]);
1956
}
1957
break;
1958
1959
case D3D10_SB_OPCODE_DCL_INPUT_SGV:
1960
assert(parser.header.type == D3D10_SB_VERTEX_SHADER);
1961
dcl_sgv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);
1962
break;
1963
1964
case D3D10_SB_OPCODE_DCL_INPUT_SIV:
1965
assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER);
1966
dcl_siv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name);
1967
break;
1968
1969
case D3D10_SB_OPCODE_DCL_INPUT_PS:
1970
assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
1971
dcl_ps_input(&sx, ureg, &opcode.dst[0],
1972
opcode.specific.dcl_in_ps_interp);
1973
break;
1974
1975
case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV:
1976
assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
1977
dcl_ps_sgv_input(&sx, ureg, &opcode.dst[0],
1978
opcode.dcl_siv_name);
1979
break;
1980
1981
case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV:
1982
assert(parser.header.type == D3D10_SB_PIXEL_SHADER);
1983
dcl_ps_siv_input(&sx, ureg, &opcode.dst[0],
1984
opcode.dcl_siv_name,
1985
opcode.specific.dcl_in_ps_interp);
1986
break;
1987
1988
case D3D10_SB_OPCODE_DCL_OUTPUT:
1989
if (parser.header.type == D3D10_SB_PIXEL_SHADER) {
1990
/* Pixel shader outputs. */
1991
if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
1992
/* Depth output. */
1993
assert(opcode.dst[0].base.index_dim == 0);
1994
1995
sx.output_depth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z, 0, 1);
1996
sx.output_depth = ureg_writemask(sx.output_depth, TGSI_WRITEMASK_Z);
1997
} else {
1998
/* Color outputs. */
1999
assert(opcode.dst[0].base.index_dim == 1);
2000
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2001
2002
dcl_base_output(&sx, ureg,
2003
ureg_DECL_output(ureg,
2004
TGSI_SEMANTIC_COLOR,
2005
opcode.dst[0].base.index[0].imm),
2006
&opcode.dst[0]);
2007
}
2008
} else {
2009
assert(opcode.dst[0].base.index_dim == 1);
2010
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2011
2012
if (output_mapping) {
2013
unsigned nr_outputs = ureg_get_nr_outputs(ureg);
2014
output_mapping[nr_outputs]
2015
= opcode.dst[0].base.index[0].imm;
2016
}
2017
dcl_base_output(&sx, ureg,
2018
ureg_DECL_output(ureg,
2019
TGSI_SEMANTIC_GENERIC,
2020
opcode.dst[0].base.index[0].imm),
2021
&opcode.dst[0]);
2022
}
2023
break;
2024
2025
case D3D10_SB_OPCODE_DCL_OUTPUT_SIV:
2026
assert(opcode.dst[0].base.index_dim == 1);
2027
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2028
2029
if (output_mapping) {
2030
unsigned nr_outputs = ureg_get_nr_outputs(ureg);
2031
output_mapping[nr_outputs]
2032
= opcode.dst[0].base.index[0].imm;
2033
}
2034
if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE ||
2035
opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {
2036
/*
2037
* FIXME: this is quite broken. gallium no longer has separate
2038
* clip/cull dists, using (max 2) combined clipdist/culldist regs
2039
* instead. Unlike d3d10 though, which is clip and which cull is
2040
* simply determined by by number of clip/cull dists (that is,
2041
* all clip dists must come first).
2042
*/
2043
unsigned numcliporcull = sx.num_clip_distances_declared +
2044
sx.num_cull_distances_declared;
2045
sx.clip_distance_mapping[numcliporcull].d3d =
2046
opcode.dst[0].base.index[0].imm;
2047
sx.clip_distance_mapping[numcliporcull].tgsi = numcliporcull;
2048
if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE) {
2049
++sx.num_clip_distances_declared;
2050
/* re-emit should be safe... */
2051
ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED,
2052
sx.num_clip_distances_declared);
2053
} else {
2054
++sx.num_cull_distances_declared;
2055
ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
2056
sx.num_cull_distances_declared);
2057
}
2058
} else if (0 && opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) {
2059
sx.cull_distance_mapping[sx.num_cull_distances_declared].d3d =
2060
opcode.dst[0].base.index[0].imm;
2061
sx.cull_distance_mapping[sx.num_cull_distances_declared].tgsi =
2062
sx.num_cull_distances_declared;
2063
++sx.num_cull_distances_declared;
2064
ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED,
2065
sx.num_cull_distances_declared);
2066
}
2067
2068
dcl_base_output(&sx, ureg,
2069
ureg_DECL_output_masked(
2070
ureg,
2071
translate_system_name(opcode.dcl_siv_name),
2072
translate_semantic_index(&sx, opcode.dcl_siv_name,
2073
&opcode.dst[0]),
2074
opcode.dst[0].mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT,
2075
0, 1),
2076
&opcode.dst[0]);
2077
break;
2078
2079
case D3D10_SB_OPCODE_DCL_OUTPUT_SGV:
2080
assert(opcode.dst[0].base.index_dim == 1);
2081
assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS);
2082
2083
if (output_mapping) {
2084
unsigned nr_outputs = ureg_get_nr_outputs(ureg);
2085
output_mapping[nr_outputs]
2086
= opcode.dst[0].base.index[0].imm;
2087
}
2088
dcl_base_output(&sx, ureg,
2089
ureg_DECL_output(ureg,
2090
translate_system_name(opcode.dcl_siv_name),
2091
0),
2092
&opcode.dst[0]);
2093
break;
2094
2095
case D3D10_SB_OPCODE_DCL_TEMPS:
2096
{
2097
uint i;
2098
2099
assert(opcode.specific.dcl_num_temps + sx.declared_temps <=
2100
SHADER_MAX_TEMPS);
2101
2102
sx.temp_offset = sx.declared_temps;
2103
2104
for (i = 0; i < opcode.specific.dcl_num_temps; i++) {
2105
sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);
2106
}
2107
sx.declared_temps += opcode.specific.dcl_num_temps;
2108
}
2109
break;
2110
2111
case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP:
2112
{
2113
uint i;
2114
2115
/* XXX: Add true indexable temps to gallium.
2116
*/
2117
2118
assert(opcode.specific.dcl_indexable_temp.index <
2119
SHADER_MAX_INDEXABLE_TEMPS);
2120
assert(opcode.specific.dcl_indexable_temp.count + sx.declared_temps <=
2121
SHADER_MAX_TEMPS);
2122
2123
sx.indexable_temp_offsets[opcode.specific.dcl_indexable_temp.index] =
2124
sx.declared_temps;
2125
2126
for (i = 0; i < opcode.specific.dcl_indexable_temp.count; i++) {
2127
sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg);
2128
}
2129
sx.declared_temps += opcode.specific.dcl_indexable_temp.count;
2130
}
2131
break;
2132
case D3D10_SB_OPCODE_IF: {
2133
unsigned label = 0;
2134
if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {
2135
struct ureg_src src =
2136
translate_src_operand(&sx, &opcode.src[0], OF_INT);
2137
struct ureg_dst src_nz = ureg_DECL_temporary(ureg);
2138
ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));
2139
ureg_UIF(ureg, ureg_src(src_nz), &label);
2140
ureg_release_temporary(ureg, src_nz);;
2141
} else {
2142
ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);
2143
}
2144
}
2145
break;
2146
case D3D10_SB_OPCODE_RETC:
2147
case D3D10_SB_OPCODE_CONTINUEC:
2148
case D3D10_SB_OPCODE_CALLC:
2149
case D3D10_SB_OPCODE_DISCARD:
2150
case D3D10_SB_OPCODE_BREAKC:
2151
{
2152
unsigned label = 0;
2153
assert(operand_is_scalar(&opcode.src[0]));
2154
if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) {
2155
struct ureg_src src =
2156
translate_src_operand(&sx, &opcode.src[0], OF_INT);
2157
struct ureg_dst src_nz = ureg_DECL_temporary(ureg);
2158
ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0));
2159
ureg_UIF(ureg, ureg_src(src_nz), &label);
2160
ureg_release_temporary(ureg, src_nz);
2161
}
2162
else {
2163
ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label);
2164
}
2165
switch (opcode.type) {
2166
case D3D10_SB_OPCODE_RETC:
2167
ureg_RET(ureg);
2168
break;
2169
case D3D10_SB_OPCODE_CONTINUEC:
2170
ureg_CONT(ureg);
2171
break;
2172
case D3D10_SB_OPCODE_CALLC: {
2173
unsigned label = opcode.src[1].base.index[0].imm;
2174
unsigned tgsi_token_label = 0;
2175
ureg_CAL(ureg, &tgsi_token_label);
2176
Shader_add_call(&sx, label, tgsi_token_label);
2177
}
2178
break;
2179
case D3D10_SB_OPCODE_DISCARD:
2180
ureg_KILL(ureg);
2181
break;
2182
case D3D10_SB_OPCODE_BREAKC:
2183
ureg_BRK(ureg);
2184
break;
2185
default:
2186
assert(0);
2187
break;
2188
}
2189
ureg_ENDIF(ureg);
2190
}
2191
break;
2192
case D3D10_SB_OPCODE_LABEL: {
2193
unsigned label = opcode.src[0].base.index[0].imm;
2194
unsigned tgsi_inst_no = 0;
2195
if (inside_sub) {
2196
ureg_ENDSUB(ureg);
2197
}
2198
tgsi_inst_no = ureg_get_instruction_number(ureg);
2199
ureg_BGNSUB(ureg);
2200
inside_sub = TRUE;
2201
Shader_add_label(&sx, label, tgsi_inst_no);
2202
}
2203
break;
2204
case D3D10_SB_OPCODE_CALL: {
2205
unsigned label = opcode.src[0].base.index[0].imm;
2206
unsigned tgsi_token_label = 0;
2207
ureg_CAL(ureg, &tgsi_token_label);
2208
Shader_add_call(&sx, label, tgsi_token_label);
2209
}
2210
break;
2211
case D3D10_SB_OPCODE_EMIT:
2212
ureg_EMIT(ureg, ureg_imm1u(ureg, 0));
2213
break;
2214
case D3D10_SB_OPCODE_CUT:
2215
ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));
2216
break;
2217
case D3D10_SB_OPCODE_EMITTHENCUT:
2218
ureg_EMIT(ureg, ureg_imm1u(ureg, 0));
2219
ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0));
2220
break;
2221
case D3D10_SB_OPCODE_DCL_INDEX_RANGE:
2222
case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS:
2223
/* Ignore */
2224
break;
2225
default:
2226
{
2227
uint i;
2228
struct ureg_dst dst[SHADER_MAX_DST_OPERANDS];
2229
struct ureg_src src[SHADER_MAX_SRC_OPERANDS];
2230
2231
assert(ox->tgsi_opcode != TGSI_EXPAND);
2232
2233
if (ox->tgsi_opcode == TGSI_LOG_UNSUPPORTED) {
2234
if (!shader_dumped) {
2235
dx10_shader_dump_tokens(code);
2236
shader_dumped = TRUE;
2237
}
2238
debug_printf("%s: unsupported opcode %i\n",
2239
__FUNCTION__, ox->type);
2240
assert(ox->tgsi_opcode != TGSI_LOG_UNSUPPORTED);
2241
}
2242
2243
/* Destination operands. */
2244
for (i = 0; i < opcode.num_dst; i++) {
2245
dst[i] = translate_dst_operand(&sx, &opcode.dst[i],
2246
opcode.saturate);
2247
}
2248
2249
/* Source operands. */
2250
for (i = 0; i < opcode.num_src; i++) {
2251
src[i] = translate_src_operand(&sx, &opcode.src[i], ox->format);
2252
}
2253
2254
/* Try to re-route output depth to Z channel. */
2255
if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) {
2256
LOG_UNSUPPORTED(opcode.type != D3D10_SB_OPCODE_MOV);
2257
dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_Z);
2258
src[0] = ureg_scalar(src[0], TGSI_SWIZZLE_X);
2259
}
2260
2261
ureg_insn(ureg,
2262
ox->tgsi_opcode,
2263
dst,
2264
opcode.num_dst,
2265
src,
2266
opcode.num_src, 0);
2267
}
2268
}
2269
2270
Shader_opcode_free(&opcode);
2271
}
2272
2273
if (inside_sub) {
2274
ureg_ENDSUB(ureg);
2275
}
2276
2277
ureg_END(ureg);
2278
2279
for (i = 0; i < sx.num_calls; ++i) {
2280
for (j = 0; j < sx.num_labels; ++j) {
2281
if (sx.calls[i].d3d_label == sx.labels[j].d3d_label) {
2282
ureg_fixup_label(sx.ureg,
2283
sx.calls[i].tgsi_label_token,
2284
sx.labels[j].tgsi_insn_no);
2285
break;
2286
}
2287
}
2288
ASSERT(j < sx.num_labels);
2289
}
2290
FREE(sx.labels);
2291
FREE(sx.calls);
2292
2293
tokens = ureg_get_tokens(ureg, &nr_tokens);
2294
assert(tokens);
2295
ureg_destroy(ureg);
2296
2297
if (st_debug & ST_DEBUG_TGSI) {
2298
tgsi_dump(tokens, 0);
2299
}
2300
2301
return tokens;
2302
}
2303
2304