Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/auxiliary/tgsi/tgsi_lowering.c
4565 views
1
/*
2
* Copyright (C) 2014 Rob Clark <[email protected]>
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*
23
* Authors:
24
* Rob Clark <[email protected]>
25
*/
26
27
#include "tgsi/tgsi_transform.h"
28
#include "tgsi/tgsi_scan.h"
29
#include "tgsi/tgsi_dump.h"
30
31
#include "util/compiler.h"
32
#include "util/u_debug.h"
33
#include "util/u_math.h"
34
35
#include "tgsi_lowering.h"
36
37
struct tgsi_lowering_context {
38
struct tgsi_transform_context base;
39
const struct tgsi_lowering_config *config;
40
struct tgsi_shader_info *info;
41
unsigned two_side_colors;
42
unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
43
unsigned color_base; /* base register for chosen COLOR/BCOLOR's */
44
int face_idx;
45
unsigned numtmp;
46
struct {
47
struct tgsi_full_src_register src;
48
struct tgsi_full_dst_register dst;
49
} tmp[2];
50
#define A 0
51
#define B 1
52
struct tgsi_full_src_register imm;
53
int emitted_decls;
54
unsigned saturate;
55
};
56
57
static inline struct tgsi_lowering_context *
58
tgsi_lowering_context(struct tgsi_transform_context *tctx)
59
{
60
return (struct tgsi_lowering_context *)tctx;
61
}
62
63
/*
64
* Utility helpers:
65
*/
66
67
static void
68
reg_dst(struct tgsi_full_dst_register *dst,
69
const struct tgsi_full_dst_register *orig_dst, unsigned wrmask)
70
{
71
*dst = *orig_dst;
72
dst->Register.WriteMask &= wrmask;
73
assert(dst->Register.WriteMask);
74
}
75
76
static inline void
77
get_swiz(unsigned *swiz, const struct tgsi_src_register *src)
78
{
79
swiz[0] = src->SwizzleX;
80
swiz[1] = src->SwizzleY;
81
swiz[2] = src->SwizzleZ;
82
swiz[3] = src->SwizzleW;
83
}
84
85
static void
86
reg_src(struct tgsi_full_src_register *src,
87
const struct tgsi_full_src_register *orig_src,
88
unsigned sx, unsigned sy, unsigned sz, unsigned sw)
89
{
90
unsigned swiz[4];
91
get_swiz(swiz, &orig_src->Register);
92
*src = *orig_src;
93
src->Register.SwizzleX = swiz[sx];
94
src->Register.SwizzleY = swiz[sy];
95
src->Register.SwizzleZ = swiz[sz];
96
src->Register.SwizzleW = swiz[sw];
97
}
98
99
#define TGSI_SWIZZLE__ TGSI_SWIZZLE_X /* don't-care value! */
100
#define SWIZ(x,y,z,w) TGSI_SWIZZLE_ ## x, TGSI_SWIZZLE_ ## y, \
101
TGSI_SWIZZLE_ ## z, TGSI_SWIZZLE_ ## w
102
103
/*
104
* if (dst.x aliases src.x) {
105
* MOV tmpA.x, src.x
106
* src = tmpA
107
* }
108
* COS dst.x, src.x
109
* SIN dst.y, src.x
110
* MOV dst.zw, imm{0.0, 1.0}
111
*/
112
static bool
113
aliases(const struct tgsi_full_dst_register *dst, unsigned dst_mask,
114
const struct tgsi_full_src_register *src, unsigned src_mask)
115
{
116
if ((dst->Register.File == src->Register.File) &&
117
(dst->Register.Index == src->Register.Index)) {
118
unsigned i, actual_mask = 0;
119
unsigned swiz[4];
120
get_swiz(swiz, &src->Register);
121
for (i = 0; i < 4; i++)
122
if (src_mask & (1 << i))
123
actual_mask |= (1 << swiz[i]);
124
if (actual_mask & dst_mask)
125
return true;
126
}
127
return false;
128
}
129
130
static void
131
create_mov(struct tgsi_transform_context *tctx,
132
const struct tgsi_full_dst_register *dst,
133
const struct tgsi_full_src_register *src,
134
unsigned mask, unsigned saturate)
135
{
136
struct tgsi_full_instruction new_inst;
137
138
new_inst = tgsi_default_full_instruction();
139
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
140
new_inst.Instruction.Saturate = saturate;
141
new_inst.Instruction.NumDstRegs = 1;
142
reg_dst(&new_inst.Dst[0], dst, mask);
143
new_inst.Instruction.NumSrcRegs = 1;
144
reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
145
tctx->emit_instruction(tctx, &new_inst);
146
}
147
148
/* to help calculate # of tgsi tokens for a lowering.. we assume
149
* the worst case, ie. removed instructions don't have ADDR[] or
150
* anything which increases the # of tokens per src/dst and the
151
* inserted instructions do.
152
*
153
* OINST() - old instruction
154
* 1 : instruction itself
155
* 1 : dst
156
* 1 * nargs : srcN
157
*
158
* NINST() - new instruction
159
* 1 : instruction itself
160
* 2 : dst
161
* 2 * nargs : srcN
162
*/
163
164
#define OINST(nargs) (1 + 1 + 1 * (nargs))
165
#define NINST(nargs) (1 + 2 + 2 * (nargs))
166
167
/*
168
* Lowering Translators:
169
*/
170
171
/* DST - Distance Vector
172
* dst.x = 1.0
173
* dst.y = src0.y \times src1.y
174
* dst.z = src0.z
175
* dst.w = src1.w
176
*
177
* ; note: could be more clever and use just a single temp
178
* ; if I was clever enough to re-write the swizzles.
179
* ; needs: 2 tmp, imm{1.0}
180
* if (dst.y aliases src0.z) {
181
* MOV tmpA.yz, src0.yz
182
* src0 = tmpA
183
* }
184
* if (dst.yz aliases src1.w) {
185
* MOV tmpB.yw, src1.yw
186
* src1 = tmpB
187
* }
188
* MUL dst.y, src0.y, src1.y
189
* MOV dst.z, src0.z
190
* MOV dst.w, src1.w
191
* MOV dst.x, imm{1.0}
192
*/
193
#define DST_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
194
NINST(1) + NINST(1) - OINST(2))
195
#define DST_TMP 2
196
static void
197
transform_dst(struct tgsi_transform_context *tctx,
198
struct tgsi_full_instruction *inst)
199
{
200
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
201
struct tgsi_full_dst_register *dst = &inst->Dst[0];
202
struct tgsi_full_src_register *src0 = &inst->Src[0];
203
struct tgsi_full_src_register *src1 = &inst->Src[1];
204
struct tgsi_full_instruction new_inst;
205
206
if (aliases(dst, TGSI_WRITEMASK_Y, src0, TGSI_WRITEMASK_Z)) {
207
create_mov(tctx, &ctx->tmp[A].dst, src0, TGSI_WRITEMASK_YZ, 0);
208
src0 = &ctx->tmp[A].src;
209
}
210
211
if (aliases(dst, TGSI_WRITEMASK_YZ, src1, TGSI_WRITEMASK_W)) {
212
create_mov(tctx, &ctx->tmp[B].dst, src1, TGSI_WRITEMASK_YW, 0);
213
src1 = &ctx->tmp[B].src;
214
}
215
216
if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
217
/* MUL dst.y, src0.y, src1.y */
218
new_inst = tgsi_default_full_instruction();
219
new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
220
new_inst.Instruction.NumDstRegs = 1;
221
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
222
new_inst.Instruction.NumSrcRegs = 2;
223
reg_src(&new_inst.Src[0], src0, SWIZ(_, Y, _, _));
224
reg_src(&new_inst.Src[1], src1, SWIZ(_, Y, _, _));
225
tctx->emit_instruction(tctx, &new_inst);
226
}
227
228
if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
229
/* MOV dst.z, src0.z */
230
new_inst = tgsi_default_full_instruction();
231
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
232
new_inst.Instruction.NumDstRegs = 1;
233
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
234
new_inst.Instruction.NumSrcRegs = 1;
235
reg_src(&new_inst.Src[0], src0, SWIZ(_, _, Z, _));
236
tctx->emit_instruction(tctx, &new_inst);
237
}
238
239
if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
240
/* MOV dst.w, src1.w */
241
new_inst = tgsi_default_full_instruction();
242
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
243
new_inst.Instruction.NumDstRegs = 1;
244
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
245
new_inst.Instruction.NumSrcRegs = 1;
246
reg_src(&new_inst.Src[0], src1, SWIZ(_, _, _, W));
247
tctx->emit_instruction(tctx, &new_inst);
248
}
249
250
if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
251
/* MOV dst.x, imm{1.0} */
252
new_inst = tgsi_default_full_instruction();
253
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
254
new_inst.Instruction.NumDstRegs = 1;
255
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
256
new_inst.Instruction.NumSrcRegs = 1;
257
reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, _));
258
tctx->emit_instruction(tctx, &new_inst);
259
}
260
}
261
262
/* LRP - Linear Interpolate
263
* dst.x = src0.x \times src1.x + (1.0 - src0.x) \times src2.x
264
* dst.y = src0.y \times src1.y + (1.0 - src0.y) \times src2.y
265
* dst.z = src0.z \times src1.z + (1.0 - src0.z) \times src2.z
266
* dst.w = src0.w \times src1.w + (1.0 - src0.w) \times src2.w
267
*
268
* This becomes: src0 \times src1 + src2 - src0 \times src2, which
269
* can then become: src0 \times src1 - (src0 \times src2 - src2)
270
*
271
* ; needs: 1 tmp
272
* MAD tmpA, src0, src2, -src2
273
* MAD dst, src0, src1, -tmpA
274
*/
275
#define LRP_GROW (NINST(3) + NINST(3) - OINST(3))
276
#define LRP_TMP 1
277
static void
278
transform_lrp(struct tgsi_transform_context *tctx,
279
struct tgsi_full_instruction *inst)
280
{
281
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
282
struct tgsi_full_dst_register *dst = &inst->Dst[0];
283
struct tgsi_full_src_register *src0 = &inst->Src[0];
284
struct tgsi_full_src_register *src1 = &inst->Src[1];
285
struct tgsi_full_src_register *src2 = &inst->Src[2];
286
struct tgsi_full_instruction new_inst;
287
288
if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
289
/* MAD tmpA, src0, src2, -src2 */
290
new_inst = tgsi_default_full_instruction();
291
new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
292
new_inst.Instruction.NumDstRegs = 1;
293
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
294
new_inst.Instruction.NumSrcRegs = 3;
295
reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
296
reg_src(&new_inst.Src[1], src2, SWIZ(X, Y, Z, W));
297
reg_src(&new_inst.Src[2], src2, SWIZ(X, Y, Z, W));
298
new_inst.Src[2].Register.Negate = !new_inst.Src[2].Register.Negate;
299
tctx->emit_instruction(tctx, &new_inst);
300
301
/* MAD dst, src0, src1, -tmpA */
302
new_inst = tgsi_default_full_instruction();
303
new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
304
new_inst.Instruction.NumDstRegs = 1;
305
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
306
new_inst.Instruction.NumSrcRegs = 3;
307
reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
308
reg_src(&new_inst.Src[1], src1, SWIZ(X, Y, Z, W));
309
reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
310
new_inst.Src[2].Register.Negate = true;
311
tctx->emit_instruction(tctx, &new_inst);
312
}
313
}
314
315
/* FRC - Fraction
316
* dst.x = src.x - \lfloor src.x\rfloor
317
* dst.y = src.y - \lfloor src.y\rfloor
318
* dst.z = src.z - \lfloor src.z\rfloor
319
* dst.w = src.w - \lfloor src.w\rfloor
320
*
321
* ; needs: 1 tmp
322
* FLR tmpA, src
323
* SUB dst, src, tmpA
324
*/
325
#define FRC_GROW (NINST(1) + NINST(2) - OINST(1))
326
#define FRC_TMP 1
327
static void
328
transform_frc(struct tgsi_transform_context *tctx,
329
struct tgsi_full_instruction *inst)
330
{
331
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
332
struct tgsi_full_dst_register *dst = &inst->Dst[0];
333
struct tgsi_full_src_register *src = &inst->Src[0];
334
struct tgsi_full_instruction new_inst;
335
336
if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
337
/* FLR tmpA, src */
338
new_inst = tgsi_default_full_instruction();
339
new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
340
new_inst.Instruction.NumDstRegs = 1;
341
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
342
new_inst.Instruction.NumSrcRegs = 1;
343
reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
344
tctx->emit_instruction(tctx, &new_inst);
345
346
/* SUB dst, src, tmpA */
347
new_inst = tgsi_default_full_instruction();
348
new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
349
new_inst.Instruction.NumDstRegs = 1;
350
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
351
new_inst.Instruction.NumSrcRegs = 2;
352
reg_src(&new_inst.Src[0], src, SWIZ(X, Y, Z, W));
353
reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
354
new_inst.Src[1].Register.Negate = 1;
355
tctx->emit_instruction(tctx, &new_inst);
356
}
357
}
358
359
/* POW - Power
360
* dst.x = src0.x^{src1.x}
361
* dst.y = src0.x^{src1.x}
362
* dst.z = src0.x^{src1.x}
363
* dst.w = src0.x^{src1.x}
364
*
365
* ; needs: 1 tmp
366
* LG2 tmpA.x, src0.x
367
* MUL tmpA.x, src1.x, tmpA.x
368
* EX2 dst, tmpA.x
369
*/
370
#define POW_GROW (NINST(1) + NINST(2) + NINST(1) - OINST(2))
371
#define POW_TMP 1
372
static void
373
transform_pow(struct tgsi_transform_context *tctx,
374
struct tgsi_full_instruction *inst)
375
{
376
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
377
struct tgsi_full_dst_register *dst = &inst->Dst[0];
378
struct tgsi_full_src_register *src0 = &inst->Src[0];
379
struct tgsi_full_src_register *src1 = &inst->Src[1];
380
struct tgsi_full_instruction new_inst;
381
382
if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
383
/* LG2 tmpA.x, src0.x */
384
new_inst = tgsi_default_full_instruction();
385
new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
386
new_inst.Instruction.NumDstRegs = 1;
387
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
388
new_inst.Instruction.NumSrcRegs = 1;
389
reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
390
tctx->emit_instruction(tctx, &new_inst);
391
392
/* MUL tmpA.x, src1.x, tmpA.x */
393
new_inst = tgsi_default_full_instruction();
394
new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
395
new_inst.Instruction.NumDstRegs = 1;
396
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
397
new_inst.Instruction.NumSrcRegs = 2;
398
reg_src(&new_inst.Src[0], src1, SWIZ(X, _, _, _));
399
reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
400
tctx->emit_instruction(tctx, &new_inst);
401
402
/* EX2 dst, tmpA.x */
403
new_inst = tgsi_default_full_instruction();
404
new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
405
new_inst.Instruction.NumDstRegs = 1;
406
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
407
new_inst.Instruction.NumSrcRegs = 1;
408
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
409
tctx->emit_instruction(tctx, &new_inst);
410
}
411
}
412
413
/* LIT - Light Coefficients
414
* dst.x = 1.0
415
* dst.y = max(src.x, 0.0)
416
* dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
417
* dst.w = 1.0
418
*
419
* ; needs: 1 tmp, imm{0.0}, imm{1.0}, imm{128.0}
420
* MAX tmpA.xy, src.xy, imm{0.0}
421
* CLAMP tmpA.z, src.w, -imm{128.0}, imm{128.0}
422
* LG2 tmpA.y, tmpA.y
423
* MUL tmpA.y, tmpA.z, tmpA.y
424
* EX2 tmpA.y, tmpA.y
425
* CMP tmpA.y, -src.x, tmpA.y, imm{0.0}
426
* MOV dst.yz, tmpA.xy
427
* MOV dst.xw, imm{1.0}
428
*/
429
#define LIT_GROW (NINST(1) + NINST(3) + NINST(1) + NINST(2) + \
430
NINST(1) + NINST(3) + NINST(1) + NINST(1) - OINST(1))
431
#define LIT_TMP 1
432
static void
433
transform_lit(struct tgsi_transform_context *tctx,
434
struct tgsi_full_instruction *inst)
435
{
436
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
437
struct tgsi_full_dst_register *dst = &inst->Dst[0];
438
struct tgsi_full_src_register *src = &inst->Src[0];
439
struct tgsi_full_instruction new_inst;
440
441
if (dst->Register.WriteMask & TGSI_WRITEMASK_YZ) {
442
/* MAX tmpA.xy, src.xy, imm{0.0} */
443
new_inst = tgsi_default_full_instruction();
444
new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
445
new_inst.Instruction.NumDstRegs = 1;
446
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XY);
447
new_inst.Instruction.NumSrcRegs = 2;
448
reg_src(&new_inst.Src[0], src, SWIZ(X, Y, _, _));
449
reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(X, X, _, _));
450
tctx->emit_instruction(tctx, &new_inst);
451
452
/* MIN tmpA.z, src.w, imm{128.0} */
453
new_inst = tgsi_default_full_instruction();
454
new_inst.Instruction.Opcode = TGSI_OPCODE_MIN;
455
new_inst.Instruction.NumDstRegs = 1;
456
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
457
new_inst.Instruction.NumSrcRegs = 2;
458
reg_src(&new_inst.Src[0], src, SWIZ(_, _, W, _));
459
reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
460
tctx->emit_instruction(tctx, &new_inst);
461
462
/* MAX tmpA.z, tmpA.z, -imm{128.0} */
463
new_inst = tgsi_default_full_instruction();
464
new_inst.Instruction.Opcode = TGSI_OPCODE_MAX;
465
new_inst.Instruction.NumDstRegs = 1;
466
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
467
new_inst.Instruction.NumSrcRegs = 2;
468
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Z, _));
469
reg_src(&new_inst.Src[1], &ctx->imm, SWIZ(_, _, Z, _));
470
new_inst.Src[1].Register.Negate = true;
471
tctx->emit_instruction(tctx, &new_inst);
472
473
/* LG2 tmpA.y, tmpA.y */
474
new_inst = tgsi_default_full_instruction();
475
new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
476
new_inst.Instruction.NumDstRegs = 1;
477
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
478
new_inst.Instruction.NumSrcRegs = 1;
479
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
480
tctx->emit_instruction(tctx, &new_inst);
481
482
/* MUL tmpA.y, tmpA.z, tmpA.y */
483
new_inst = tgsi_default_full_instruction();
484
new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
485
new_inst.Instruction.NumDstRegs = 1;
486
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
487
new_inst.Instruction.NumSrcRegs = 2;
488
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
489
reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
490
tctx->emit_instruction(tctx, &new_inst);
491
492
/* EX2 tmpA.y, tmpA.y */
493
new_inst = tgsi_default_full_instruction();
494
new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
495
new_inst.Instruction.NumDstRegs = 1;
496
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
497
new_inst.Instruction.NumSrcRegs = 1;
498
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
499
tctx->emit_instruction(tctx, &new_inst);
500
501
/* CMP tmpA.y, -src.x, tmpA.y, imm{0.0} */
502
new_inst = tgsi_default_full_instruction();
503
new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
504
new_inst.Instruction.NumDstRegs = 1;
505
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
506
new_inst.Instruction.NumSrcRegs = 3;
507
reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
508
new_inst.Src[0].Register.Negate = true;
509
reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
510
reg_src(&new_inst.Src[2], &ctx->imm, SWIZ(_, X, _, _));
511
tctx->emit_instruction(tctx, &new_inst);
512
513
/* MOV dst.yz, tmpA.xy */
514
new_inst = tgsi_default_full_instruction();
515
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
516
new_inst.Instruction.NumDstRegs = 1;
517
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_YZ);
518
new_inst.Instruction.NumSrcRegs = 1;
519
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, Y, _));
520
tctx->emit_instruction(tctx, &new_inst);
521
}
522
523
if (dst->Register.WriteMask & TGSI_WRITEMASK_XW) {
524
/* MOV dst.xw, imm{1.0} */
525
new_inst = tgsi_default_full_instruction();
526
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
527
new_inst.Instruction.NumDstRegs = 1;
528
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XW);
529
new_inst.Instruction.NumSrcRegs = 1;
530
reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(Y, _, _, Y));
531
tctx->emit_instruction(tctx, &new_inst);
532
}
533
}
534
535
/* EXP - Approximate Exponential Base 2
536
* dst.x = 2^{\lfloor src.x\rfloor}
537
* dst.y = src.x - \lfloor src.x\rfloor
538
* dst.z = 2^{src.x}
539
* dst.w = 1.0
540
*
541
* ; needs: 1 tmp, imm{1.0}
542
* if (lowering FLR) {
543
* FRC tmpA.x, src.x
544
* SUB tmpA.x, src.x, tmpA.x
545
* } else {
546
* FLR tmpA.x, src.x
547
* }
548
* EX2 tmpA.y, src.x
549
* SUB dst.y, src.x, tmpA.x
550
* EX2 dst.x, tmpA.x
551
* MOV dst.z, tmpA.y
552
* MOV dst.w, imm{1.0}
553
*/
554
#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
555
NINST(1)+ NINST(1) - OINST(1))
556
#define EXP_TMP 1
557
static void
558
transform_exp(struct tgsi_transform_context *tctx,
559
struct tgsi_full_instruction *inst)
560
{
561
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
562
struct tgsi_full_dst_register *dst = &inst->Dst[0];
563
struct tgsi_full_src_register *src = &inst->Src[0];
564
struct tgsi_full_instruction new_inst;
565
566
if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
567
if (ctx->config->lower_FLR) {
568
/* FRC tmpA.x, src.x */
569
new_inst = tgsi_default_full_instruction();
570
new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
571
new_inst.Instruction.NumDstRegs = 1;
572
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
573
new_inst.Instruction.NumSrcRegs = 1;
574
reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
575
tctx->emit_instruction(tctx, &new_inst);
576
577
/* SUB tmpA.x, src.x, tmpA.x */
578
new_inst = tgsi_default_full_instruction();
579
new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
580
new_inst.Instruction.NumDstRegs = 1;
581
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
582
new_inst.Instruction.NumSrcRegs = 2;
583
reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
584
reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
585
new_inst.Src[1].Register.Negate = 1;
586
tctx->emit_instruction(tctx, &new_inst);
587
} else {
588
/* FLR tmpA.x, src.x */
589
new_inst = tgsi_default_full_instruction();
590
new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
591
new_inst.Instruction.NumDstRegs = 1;
592
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
593
new_inst.Instruction.NumSrcRegs = 1;
594
reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
595
tctx->emit_instruction(tctx, &new_inst);
596
}
597
}
598
599
if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
600
/* EX2 tmpA.y, src.x */
601
new_inst = tgsi_default_full_instruction();
602
new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
603
new_inst.Instruction.NumDstRegs = 1;
604
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
605
new_inst.Instruction.NumSrcRegs = 1;
606
reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
607
tctx->emit_instruction(tctx, &new_inst);
608
}
609
610
if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
611
/* SUB dst.y, src.x, tmpA.x */
612
new_inst = tgsi_default_full_instruction();
613
new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
614
new_inst.Instruction.NumDstRegs = 1;
615
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
616
new_inst.Instruction.NumSrcRegs = 2;
617
reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
618
reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, X, _, _));
619
new_inst.Src[1].Register.Negate = 1;
620
tctx->emit_instruction(tctx, &new_inst);
621
}
622
623
if (dst->Register.WriteMask & TGSI_WRITEMASK_X) {
624
/* EX2 dst.x, tmpA.x */
625
new_inst = tgsi_default_full_instruction();
626
new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
627
new_inst.Instruction.NumDstRegs = 1;
628
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_X);
629
new_inst.Instruction.NumSrcRegs = 1;
630
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
631
tctx->emit_instruction(tctx, &new_inst);
632
}
633
634
if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
635
/* MOV dst.z, tmpA.y */
636
new_inst = tgsi_default_full_instruction();
637
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
638
new_inst.Instruction.NumDstRegs = 1;
639
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Z);
640
new_inst.Instruction.NumSrcRegs = 1;
641
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, _, Y, _));
642
tctx->emit_instruction(tctx, &new_inst);
643
}
644
645
if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
646
/* MOV dst.w, imm{1.0} */
647
new_inst = tgsi_default_full_instruction();
648
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
649
new_inst.Instruction.NumDstRegs = 1;
650
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
651
new_inst.Instruction.NumSrcRegs = 1;
652
reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
653
tctx->emit_instruction(tctx, &new_inst);
654
}
655
}
656
657
/* LOG - Approximate Logarithm Base 2
658
* dst.x = \lfloor\log_2{|src.x|}\rfloor
659
* dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
660
* dst.z = \log_2{|src.x|}
661
* dst.w = 1.0
662
*
663
* ; needs: 1 tmp, imm{1.0}
664
* LG2 tmpA.x, |src.x|
665
* if (lowering FLR) {
666
* FRC tmpA.y, tmpA.x
667
* SUB tmpA.y, tmpA.x, tmpA.y
668
* } else {
669
* FLR tmpA.y, tmpA.x
670
* }
671
* EX2 tmpA.z, tmpA.y
672
* RCP tmpA.z, tmpA.z
673
* MUL dst.y, |src.x|, tmpA.z
674
* MOV dst.xz, tmpA.yx
675
* MOV dst.w, imm{1.0}
676
*/
677
#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
678
NINST(2) + NINST(1) + NINST(1) - OINST(1))
679
#define LOG_TMP 1
680
static void
681
transform_log(struct tgsi_transform_context *tctx,
682
struct tgsi_full_instruction *inst)
683
{
684
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
685
struct tgsi_full_dst_register *dst = &inst->Dst[0];
686
struct tgsi_full_src_register *src = &inst->Src[0];
687
struct tgsi_full_instruction new_inst;
688
689
if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZ) {
690
/* LG2 tmpA.x, |src.x| */
691
new_inst = tgsi_default_full_instruction();
692
new_inst.Instruction.Opcode = TGSI_OPCODE_LG2;
693
new_inst.Instruction.NumDstRegs = 1;
694
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
695
new_inst.Instruction.NumSrcRegs = 1;
696
reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
697
new_inst.Src[0].Register.Absolute = true;
698
tctx->emit_instruction(tctx, &new_inst);
699
}
700
701
if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
702
if (ctx->config->lower_FLR) {
703
/* FRC tmpA.y, tmpA.x */
704
new_inst = tgsi_default_full_instruction();
705
new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
706
new_inst.Instruction.NumDstRegs = 1;
707
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
708
new_inst.Instruction.NumSrcRegs = 1;
709
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
710
tctx->emit_instruction(tctx, &new_inst);
711
712
/* SUB tmpA.y, tmpA.x, tmpA.y */
713
new_inst = tgsi_default_full_instruction();
714
new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
715
new_inst.Instruction.NumDstRegs = 1;
716
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
717
new_inst.Instruction.NumSrcRegs = 2;
718
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
719
reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
720
new_inst.Src[1].Register.Negate = 1;
721
tctx->emit_instruction(tctx, &new_inst);
722
} else {
723
/* FLR tmpA.y, tmpA.x */
724
new_inst = tgsi_default_full_instruction();
725
new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
726
new_inst.Instruction.NumDstRegs = 1;
727
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
728
new_inst.Instruction.NumSrcRegs = 1;
729
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
730
tctx->emit_instruction(tctx, &new_inst);
731
}
732
}
733
734
if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
735
/* EX2 tmpA.z, tmpA.y */
736
new_inst = tgsi_default_full_instruction();
737
new_inst.Instruction.Opcode = TGSI_OPCODE_EX2;
738
new_inst.Instruction.NumDstRegs = 1;
739
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
740
new_inst.Instruction.NumSrcRegs = 1;
741
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, _, _));
742
tctx->emit_instruction(tctx, &new_inst);
743
744
/* RCP tmpA.z, tmpA.z */
745
new_inst = tgsi_default_full_instruction();
746
new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
747
new_inst.Instruction.NumDstRegs = 1;
748
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Z);
749
new_inst.Instruction.NumSrcRegs = 1;
750
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Z, _, _, _));
751
tctx->emit_instruction(tctx, &new_inst);
752
753
/* MUL dst.y, |src.x|, tmpA.z */
754
new_inst = tgsi_default_full_instruction();
755
new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
756
new_inst.Instruction.NumDstRegs = 1;
757
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_Y);
758
new_inst.Instruction.NumSrcRegs = 2;
759
reg_src(&new_inst.Src[0], src, SWIZ(_, X, _, _));
760
new_inst.Src[0].Register.Absolute = true;
761
reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Z, _, _));
762
tctx->emit_instruction(tctx, &new_inst);
763
}
764
765
if (dst->Register.WriteMask & TGSI_WRITEMASK_XZ) {
766
/* MOV dst.xz, tmpA.yx */
767
new_inst = tgsi_default_full_instruction();
768
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
769
new_inst.Instruction.NumDstRegs = 1;
770
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XZ);
771
new_inst.Instruction.NumSrcRegs = 1;
772
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(Y, _, X, _));
773
tctx->emit_instruction(tctx, &new_inst);
774
}
775
776
if (dst->Register.WriteMask & TGSI_WRITEMASK_W) {
777
/* MOV dst.w, imm{1.0} */
778
new_inst = tgsi_default_full_instruction();
779
new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
780
new_inst.Instruction.NumDstRegs = 1;
781
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_W);
782
new_inst.Instruction.NumSrcRegs = 1;
783
reg_src(&new_inst.Src[0], &ctx->imm, SWIZ(_, _, _, Y));
784
tctx->emit_instruction(tctx, &new_inst);
785
}
786
}
787
788
/* DP4 - 4-component Dot Product
789
* dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z + src0.w \times src1.w
790
*
791
* DP3 - 3-component Dot Product
792
* dst = src0.x \times src1.x + src0.y \times src1.y + src0.z \times src1.z
793
*
794
* DP2 - 2-component Dot Product
795
* dst = src0.x \times src1.x + src0.y \times src1.y
796
*
797
* NOTE: these are translated into sequence of MUL/MAD(/ADD) scalar
798
* operations, which is what you'd prefer for a ISA that is natively
799
* scalar. Probably a native vector ISA would at least already have
800
* DP4/DP3 instructions, but perhaps there is room for an alternative
801
* translation for DP2 using vector instructions.
802
*
803
* ; needs: 1 tmp
804
* MUL tmpA.x, src0.x, src1.x
805
* MAD tmpA.x, src0.y, src1.y, tmpA.x
806
* if (DP3 || DP4) {
807
* MAD tmpA.x, src0.z, src1.z, tmpA.x
808
* if (DP4) {
809
* MAD tmpA.x, src0.w, src1.w, tmpA.x
810
* }
811
* }
812
* ; fixup last instruction to replicate into dst
813
*/
814
#define DP4_GROW (NINST(2) + NINST(3) + NINST(3) + NINST(3) - OINST(2))
815
#define DP3_GROW (NINST(2) + NINST(3) + NINST(3) - OINST(2))
816
#define DP2_GROW (NINST(2) + NINST(3) - OINST(2))
817
#define DOTP_TMP 1
818
static void
819
transform_dotp(struct tgsi_transform_context *tctx,
820
struct tgsi_full_instruction *inst)
821
{
822
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
823
struct tgsi_full_dst_register *dst = &inst->Dst[0];
824
struct tgsi_full_src_register *src0 = &inst->Src[0];
825
struct tgsi_full_src_register *src1 = &inst->Src[1];
826
struct tgsi_full_instruction new_inst;
827
enum tgsi_opcode opcode = inst->Instruction.Opcode;
828
829
/* NOTE: any potential last instruction must replicate src on all
830
* components (since it could be re-written to write to final dst)
831
*/
832
833
if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
834
/* MUL tmpA.x, src0.x, src1.x */
835
new_inst = tgsi_default_full_instruction();
836
new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
837
new_inst.Instruction.NumDstRegs = 1;
838
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
839
new_inst.Instruction.NumSrcRegs = 2;
840
reg_src(&new_inst.Src[0], src0, SWIZ(X, _, _, _));
841
reg_src(&new_inst.Src[1], src1, SWIZ(X, _, _, _));
842
tctx->emit_instruction(tctx, &new_inst);
843
844
/* MAD tmpA.x, src0.y, src1.y, tmpA.x */
845
new_inst = tgsi_default_full_instruction();
846
new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
847
new_inst.Instruction.NumDstRegs = 1;
848
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
849
new_inst.Instruction.NumSrcRegs = 3;
850
reg_src(&new_inst.Src[0], src0, SWIZ(Y, Y, Y, Y));
851
reg_src(&new_inst.Src[1], src1, SWIZ(Y, Y, Y, Y));
852
reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
853
854
if ((opcode == TGSI_OPCODE_DP3) ||
855
(opcode == TGSI_OPCODE_DP4)) {
856
tctx->emit_instruction(tctx, &new_inst);
857
858
/* MAD tmpA.x, src0.z, src1.z, tmpA.x */
859
new_inst = tgsi_default_full_instruction();
860
new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
861
new_inst.Instruction.NumDstRegs = 1;
862
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
863
new_inst.Instruction.NumSrcRegs = 3;
864
reg_src(&new_inst.Src[0], src0, SWIZ(Z, Z, Z, Z));
865
reg_src(&new_inst.Src[1], src1, SWIZ(Z, Z, Z, Z));
866
reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
867
868
if (opcode == TGSI_OPCODE_DP4) {
869
tctx->emit_instruction(tctx, &new_inst);
870
871
/* MAD tmpA.x, src0.w, src1.w, tmpA.x */
872
new_inst = tgsi_default_full_instruction();
873
new_inst.Instruction.Opcode = TGSI_OPCODE_MAD;
874
new_inst.Instruction.NumDstRegs = 1;
875
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
876
new_inst.Instruction.NumSrcRegs = 3;
877
reg_src(&new_inst.Src[0], src0, SWIZ(W, W, W, W));
878
reg_src(&new_inst.Src[1], src1, SWIZ(W, W, W, W));
879
reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, X, X, X));
880
}
881
}
882
883
/* fixup last instruction to write to dst: */
884
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
885
886
tctx->emit_instruction(tctx, &new_inst);
887
}
888
}
889
890
/* FLR - floor, CEIL - ceil
891
* ; needs: 1 tmp
892
* if (CEIL) {
893
* FRC tmpA, -src
894
* ADD dst, src, tmpA
895
* } else {
896
* FRC tmpA, src
897
* SUB dst, src, tmpA
898
* }
899
*/
900
#define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
901
#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
902
#define FLR_TMP 1
903
#define CEIL_TMP 1
904
static void
905
transform_flr_ceil(struct tgsi_transform_context *tctx,
906
struct tgsi_full_instruction *inst)
907
{
908
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
909
struct tgsi_full_dst_register *dst = &inst->Dst[0];
910
struct tgsi_full_src_register *src0 = &inst->Src[0];
911
struct tgsi_full_instruction new_inst;
912
enum tgsi_opcode opcode = inst->Instruction.Opcode;
913
914
if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
915
/* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */
916
new_inst = tgsi_default_full_instruction();
917
new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
918
new_inst.Instruction.NumDstRegs = 1;
919
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
920
new_inst.Instruction.NumSrcRegs = 1;
921
reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
922
923
if (opcode == TGSI_OPCODE_CEIL)
924
new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
925
tctx->emit_instruction(tctx, &new_inst);
926
927
/* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */
928
new_inst = tgsi_default_full_instruction();
929
new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
930
new_inst.Instruction.NumDstRegs = 1;
931
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
932
new_inst.Instruction.NumSrcRegs = 2;
933
reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
934
reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
935
if (opcode == TGSI_OPCODE_FLR)
936
new_inst.Src[1].Register.Negate = 1;
937
tctx->emit_instruction(tctx, &new_inst);
938
}
939
}
940
941
/* TRUNC - truncate off fractional part
942
* dst.x = trunc(src.x)
943
* dst.y = trunc(src.y)
944
* dst.z = trunc(src.z)
945
* dst.w = trunc(src.w)
946
*
947
* ; needs: 1 tmp
948
* if (lower FLR) {
949
* FRC tmpA, |src|
950
* SUB tmpA, |src|, tmpA
951
* } else {
952
* FLR tmpA, |src|
953
* }
954
* CMP dst, src, -tmpA, tmpA
955
*/
956
#define TRUNC_GROW (NINST(1) + NINST(2) + NINST(3) - OINST(1))
957
#define TRUNC_TMP 1
958
static void
959
transform_trunc(struct tgsi_transform_context *tctx,
960
struct tgsi_full_instruction *inst)
961
{
962
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
963
struct tgsi_full_dst_register *dst = &inst->Dst[0];
964
struct tgsi_full_src_register *src0 = &inst->Src[0];
965
struct tgsi_full_instruction new_inst;
966
967
if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
968
if (ctx->config->lower_FLR) {
969
new_inst = tgsi_default_full_instruction();
970
new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
971
new_inst.Instruction.NumDstRegs = 1;
972
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
973
new_inst.Instruction.NumSrcRegs = 1;
974
reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
975
new_inst.Src[0].Register.Absolute = true;
976
new_inst.Src[0].Register.Negate = false;
977
tctx->emit_instruction(tctx, &new_inst);
978
979
new_inst = tgsi_default_full_instruction();
980
new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
981
new_inst.Instruction.NumDstRegs = 1;
982
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
983
new_inst.Instruction.NumSrcRegs = 2;
984
reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
985
new_inst.Src[0].Register.Absolute = true;
986
new_inst.Src[0].Register.Negate = false;
987
reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
988
new_inst.Src[1].Register.Negate = 1;
989
tctx->emit_instruction(tctx, &new_inst);
990
} else {
991
new_inst = tgsi_default_full_instruction();
992
new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
993
new_inst.Instruction.NumDstRegs = 1;
994
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
995
new_inst.Instruction.NumSrcRegs = 1;
996
reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
997
new_inst.Src[0].Register.Absolute = true;
998
new_inst.Src[0].Register.Negate = false;
999
tctx->emit_instruction(tctx, &new_inst);
1000
}
1001
1002
new_inst = tgsi_default_full_instruction();
1003
new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1004
new_inst.Instruction.NumDstRegs = 1;
1005
reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
1006
new_inst.Instruction.NumSrcRegs = 3;
1007
reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
1008
reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1009
new_inst.Src[1].Register.Negate = true;
1010
reg_src(&new_inst.Src[2], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1011
tctx->emit_instruction(tctx, &new_inst);
1012
}
1013
}
1014
1015
/* Inserts a MOV_SAT for the needed components of tex coord. Note that
1016
* in the case of TXP, the clamping must happen *after* projection, so
1017
* we need to lower TXP to TEX.
1018
*
1019
* MOV tmpA, src0
1020
* if (opc == TXP) {
1021
* ; do perspective division manually before clamping:
1022
* RCP tmpB, tmpA.w
1023
* MUL tmpB.<pmask>, tmpA, tmpB.xxxx
1024
* opc = TEX;
1025
* }
1026
* MOV_SAT tmpA.<mask>, tmpA ; <mask> is the clamped s/t/r coords
1027
* <opc> dst, tmpA, ...
1028
*/
1029
#define SAMP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1))
1030
#define SAMP_TMP 2
1031
static int
1032
transform_samp(struct tgsi_transform_context *tctx,
1033
struct tgsi_full_instruction *inst)
1034
{
1035
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1036
struct tgsi_full_src_register *coord = &inst->Src[0];
1037
struct tgsi_full_src_register *samp;
1038
struct tgsi_full_instruction new_inst;
1039
/* mask is clamped coords, pmask is all coords (for projection): */
1040
unsigned mask = 0, pmask = 0, smask;
1041
unsigned tex = inst->Texture.Texture;
1042
enum tgsi_opcode opcode = inst->Instruction.Opcode;
1043
bool lower_txp = (opcode == TGSI_OPCODE_TXP) &&
1044
(ctx->config->lower_TXP & (1 << tex));
1045
1046
if (opcode == TGSI_OPCODE_TXB2) {
1047
samp = &inst->Src[2];
1048
} else {
1049
samp = &inst->Src[1];
1050
}
1051
1052
/* convert sampler # to bitmask to test: */
1053
smask = 1 << samp->Register.Index;
1054
1055
/* check if we actually need to lower this one: */
1056
if (!(ctx->saturate & smask) && !lower_txp)
1057
return -1;
1058
1059
/* figure out which coordinates need saturating:
1060
* - RECT textures should not get saturated
1061
* - array index coords should not get saturated
1062
*/
1063
switch (tex) {
1064
case TGSI_TEXTURE_3D:
1065
case TGSI_TEXTURE_CUBE:
1066
case TGSI_TEXTURE_CUBE_ARRAY:
1067
case TGSI_TEXTURE_SHADOWCUBE:
1068
case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
1069
if (ctx->config->saturate_r & smask)
1070
mask |= TGSI_WRITEMASK_Z;
1071
pmask |= TGSI_WRITEMASK_Z;
1072
FALLTHROUGH;
1073
1074
case TGSI_TEXTURE_2D:
1075
case TGSI_TEXTURE_2D_ARRAY:
1076
case TGSI_TEXTURE_SHADOW2D:
1077
case TGSI_TEXTURE_SHADOW2D_ARRAY:
1078
case TGSI_TEXTURE_2D_MSAA:
1079
case TGSI_TEXTURE_2D_ARRAY_MSAA:
1080
if (ctx->config->saturate_t & smask)
1081
mask |= TGSI_WRITEMASK_Y;
1082
pmask |= TGSI_WRITEMASK_Y;
1083
FALLTHROUGH;
1084
1085
case TGSI_TEXTURE_1D:
1086
case TGSI_TEXTURE_1D_ARRAY:
1087
case TGSI_TEXTURE_SHADOW1D:
1088
case TGSI_TEXTURE_SHADOW1D_ARRAY:
1089
if (ctx->config->saturate_s & smask)
1090
mask |= TGSI_WRITEMASK_X;
1091
pmask |= TGSI_WRITEMASK_X;
1092
break;
1093
1094
case TGSI_TEXTURE_RECT:
1095
case TGSI_TEXTURE_SHADOWRECT:
1096
/* we don't saturate, but in case of lower_txp we
1097
* still need to do the perspective divide:
1098
*/
1099
pmask = TGSI_WRITEMASK_XY;
1100
break;
1101
}
1102
1103
/* sanity check.. driver could be asking to saturate a non-
1104
* existent coordinate component:
1105
*/
1106
if (!mask && !lower_txp)
1107
return -1;
1108
1109
/* MOV tmpA, src0 */
1110
create_mov(tctx, &ctx->tmp[A].dst, coord, TGSI_WRITEMASK_XYZW, 0);
1111
1112
/* This is a bit sad.. we need to clamp *after* the coords
1113
* are projected, which means lowering TXP to TEX and doing
1114
* the projection ourself. But since I haven't figured out
1115
* how to make the lowering code deliver an electric shock
1116
* to anyone using GL_CLAMP, we must do this instead:
1117
*/
1118
if (opcode == TGSI_OPCODE_TXP) {
1119
/* RCP tmpB.x tmpA.w */
1120
new_inst = tgsi_default_full_instruction();
1121
new_inst.Instruction.Opcode = TGSI_OPCODE_RCP;
1122
new_inst.Instruction.NumDstRegs = 1;
1123
reg_dst(&new_inst.Dst[0], &ctx->tmp[B].dst, TGSI_WRITEMASK_X);
1124
new_inst.Instruction.NumSrcRegs = 1;
1125
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(W, _, _, _));
1126
tctx->emit_instruction(tctx, &new_inst);
1127
1128
/* MUL tmpA.mask, tmpA, tmpB.xxxx */
1129
new_inst = tgsi_default_full_instruction();
1130
new_inst.Instruction.Opcode = TGSI_OPCODE_MUL;
1131
new_inst.Instruction.NumDstRegs = 1;
1132
reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, pmask);
1133
new_inst.Instruction.NumSrcRegs = 2;
1134
reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
1135
reg_src(&new_inst.Src[1], &ctx->tmp[B].src, SWIZ(X, X, X, X));
1136
tctx->emit_instruction(tctx, &new_inst);
1137
1138
opcode = TGSI_OPCODE_TEX;
1139
}
1140
1141
/* MOV_SAT tmpA.<mask>, tmpA */
1142
if (mask) {
1143
create_mov(tctx, &ctx->tmp[A].dst, &ctx->tmp[A].src, mask, 1);
1144
}
1145
1146
/* modify the texture samp instruction to take fixed up coord: */
1147
new_inst = *inst;
1148
new_inst.Instruction.Opcode = opcode;
1149
new_inst.Src[0] = ctx->tmp[A].src;
1150
tctx->emit_instruction(tctx, &new_inst);
1151
1152
return 0;
1153
}
1154
1155
/* Two-sided color emulation:
1156
* For each COLOR input, create a corresponding BCOLOR input, plus
1157
* CMP instruction to select front or back color based on FACE
1158
*/
1159
#define TWOSIDE_GROW(n) ( \
1160
2 + /* FACE */ \
1161
((n) * 3) + /* IN[], BCOLOR[n], <intrp> */\
1162
((n) * 1) + /* TEMP[] */ \
1163
((n) * NINST(3)) /* CMP instr */ \
1164
)
1165
1166
static void
1167
emit_twoside(struct tgsi_transform_context *tctx)
1168
{
1169
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1170
struct tgsi_shader_info *info = ctx->info;
1171
struct tgsi_full_declaration decl;
1172
struct tgsi_full_instruction new_inst;
1173
unsigned inbase, tmpbase;
1174
unsigned i;
1175
1176
inbase = info->file_max[TGSI_FILE_INPUT] + 1;
1177
tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1178
1179
/* additional inputs for BCOLOR's */
1180
for (i = 0; i < ctx->two_side_colors; i++) {
1181
unsigned in_idx = ctx->two_side_idx[i];
1182
decl = tgsi_default_full_declaration();
1183
decl.Declaration.File = TGSI_FILE_INPUT;
1184
decl.Declaration.Semantic = true;
1185
decl.Range.First = decl.Range.Last = inbase + i;
1186
decl.Semantic.Name = TGSI_SEMANTIC_BCOLOR;
1187
decl.Semantic.Index = info->input_semantic_index[in_idx];
1188
decl.Declaration.Interpolate = true;
1189
decl.Interp.Interpolate = info->input_interpolate[in_idx];
1190
decl.Interp.Location = info->input_interpolate_loc[in_idx];
1191
decl.Interp.CylindricalWrap = info->input_cylindrical_wrap[in_idx];
1192
tctx->emit_declaration(tctx, &decl);
1193
}
1194
1195
/* additional input for FACE */
1196
if (ctx->two_side_colors && (ctx->face_idx == -1)) {
1197
decl = tgsi_default_full_declaration();
1198
decl.Declaration.File = TGSI_FILE_INPUT;
1199
decl.Declaration.Semantic = true;
1200
decl.Range.First = decl.Range.Last = inbase + ctx->two_side_colors;
1201
decl.Semantic.Name = TGSI_SEMANTIC_FACE;
1202
decl.Semantic.Index = 0;
1203
tctx->emit_declaration(tctx, &decl);
1204
1205
ctx->face_idx = decl.Range.First;
1206
}
1207
1208
/* additional temps for COLOR/BCOLOR selection: */
1209
for (i = 0; i < ctx->two_side_colors; i++) {
1210
decl = tgsi_default_full_declaration();
1211
decl.Declaration.File = TGSI_FILE_TEMPORARY;
1212
decl.Range.First = decl.Range.Last = tmpbase + ctx->numtmp + i;
1213
tctx->emit_declaration(tctx, &decl);
1214
}
1215
1216
/* and finally additional instructions to select COLOR/BCOLOR: */
1217
for (i = 0; i < ctx->two_side_colors; i++) {
1218
new_inst = tgsi_default_full_instruction();
1219
new_inst.Instruction.Opcode = TGSI_OPCODE_CMP;
1220
1221
new_inst.Instruction.NumDstRegs = 1;
1222
new_inst.Dst[0].Register.File = TGSI_FILE_TEMPORARY;
1223
new_inst.Dst[0].Register.Index = tmpbase + ctx->numtmp + i;
1224
new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
1225
1226
new_inst.Instruction.NumSrcRegs = 3;
1227
new_inst.Src[0].Register.File = TGSI_FILE_INPUT;
1228
new_inst.Src[0].Register.Index = ctx->face_idx;
1229
new_inst.Src[0].Register.SwizzleX = TGSI_SWIZZLE_X;
1230
new_inst.Src[0].Register.SwizzleY = TGSI_SWIZZLE_X;
1231
new_inst.Src[0].Register.SwizzleZ = TGSI_SWIZZLE_X;
1232
new_inst.Src[0].Register.SwizzleW = TGSI_SWIZZLE_X;
1233
new_inst.Src[1].Register.File = TGSI_FILE_INPUT;
1234
new_inst.Src[1].Register.Index = inbase + i;
1235
new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
1236
new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_Y;
1237
new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1238
new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_W;
1239
new_inst.Src[2].Register.File = TGSI_FILE_INPUT;
1240
new_inst.Src[2].Register.Index = ctx->two_side_idx[i];
1241
new_inst.Src[2].Register.SwizzleX = TGSI_SWIZZLE_X;
1242
new_inst.Src[2].Register.SwizzleY = TGSI_SWIZZLE_Y;
1243
new_inst.Src[2].Register.SwizzleZ = TGSI_SWIZZLE_Z;
1244
new_inst.Src[2].Register.SwizzleW = TGSI_SWIZZLE_W;
1245
1246
tctx->emit_instruction(tctx, &new_inst);
1247
}
1248
}
1249
1250
static void
1251
emit_decls(struct tgsi_transform_context *tctx)
1252
{
1253
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1254
struct tgsi_shader_info *info = ctx->info;
1255
struct tgsi_full_declaration decl;
1256
struct tgsi_full_immediate immed;
1257
unsigned tmpbase;
1258
unsigned i;
1259
1260
tmpbase = info->file_max[TGSI_FILE_TEMPORARY] + 1;
1261
1262
ctx->color_base = tmpbase + ctx->numtmp;
1263
1264
/* declare immediate: */
1265
immed = tgsi_default_full_immediate();
1266
immed.Immediate.NrTokens = 1 + 4; /* one for the token itself */
1267
immed.u[0].Float = 0.0;
1268
immed.u[1].Float = 1.0;
1269
immed.u[2].Float = 128.0;
1270
immed.u[3].Float = 0.0;
1271
tctx->emit_immediate(tctx, &immed);
1272
1273
ctx->imm.Register.File = TGSI_FILE_IMMEDIATE;
1274
ctx->imm.Register.Index = info->immediate_count;
1275
ctx->imm.Register.SwizzleX = TGSI_SWIZZLE_X;
1276
ctx->imm.Register.SwizzleY = TGSI_SWIZZLE_Y;
1277
ctx->imm.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1278
ctx->imm.Register.SwizzleW = TGSI_SWIZZLE_W;
1279
1280
/* declare temp regs: */
1281
for (i = 0; i < ctx->numtmp; i++) {
1282
decl = tgsi_default_full_declaration();
1283
decl.Declaration.File = TGSI_FILE_TEMPORARY;
1284
decl.Range.First = decl.Range.Last = tmpbase + i;
1285
tctx->emit_declaration(tctx, &decl);
1286
1287
ctx->tmp[i].src.Register.File = TGSI_FILE_TEMPORARY;
1288
ctx->tmp[i].src.Register.Index = tmpbase + i;
1289
ctx->tmp[i].src.Register.SwizzleX = TGSI_SWIZZLE_X;
1290
ctx->tmp[i].src.Register.SwizzleY = TGSI_SWIZZLE_Y;
1291
ctx->tmp[i].src.Register.SwizzleZ = TGSI_SWIZZLE_Z;
1292
ctx->tmp[i].src.Register.SwizzleW = TGSI_SWIZZLE_W;
1293
1294
ctx->tmp[i].dst.Register.File = TGSI_FILE_TEMPORARY;
1295
ctx->tmp[i].dst.Register.Index = tmpbase + i;
1296
ctx->tmp[i].dst.Register.WriteMask = TGSI_WRITEMASK_XYZW;
1297
}
1298
1299
if (ctx->two_side_colors)
1300
emit_twoside(tctx);
1301
}
1302
1303
static void
1304
rename_color_inputs(struct tgsi_lowering_context *ctx,
1305
struct tgsi_full_instruction *inst)
1306
{
1307
unsigned i, j;
1308
for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
1309
struct tgsi_src_register *src = &inst->Src[i].Register;
1310
if (src->File == TGSI_FILE_INPUT) {
1311
for (j = 0; j < ctx->two_side_colors; j++) {
1312
if (src->Index == (int)ctx->two_side_idx[j]) {
1313
src->File = TGSI_FILE_TEMPORARY;
1314
src->Index = ctx->color_base + j;
1315
break;
1316
}
1317
}
1318
}
1319
}
1320
1321
}
1322
1323
static void
1324
transform_instr(struct tgsi_transform_context *tctx,
1325
struct tgsi_full_instruction *inst)
1326
{
1327
struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
1328
1329
if (!ctx->emitted_decls) {
1330
emit_decls(tctx);
1331
ctx->emitted_decls = 1;
1332
}
1333
1334
/* if emulating two-sided-color, we need to re-write some
1335
* src registers:
1336
*/
1337
if (ctx->two_side_colors)
1338
rename_color_inputs(ctx, inst);
1339
1340
switch (inst->Instruction.Opcode) {
1341
case TGSI_OPCODE_DST:
1342
if (!ctx->config->lower_DST)
1343
goto skip;
1344
transform_dst(tctx, inst);
1345
break;
1346
case TGSI_OPCODE_LRP:
1347
if (!ctx->config->lower_LRP)
1348
goto skip;
1349
transform_lrp(tctx, inst);
1350
break;
1351
case TGSI_OPCODE_FRC:
1352
if (!ctx->config->lower_FRC)
1353
goto skip;
1354
transform_frc(tctx, inst);
1355
break;
1356
case TGSI_OPCODE_POW:
1357
if (!ctx->config->lower_POW)
1358
goto skip;
1359
transform_pow(tctx, inst);
1360
break;
1361
case TGSI_OPCODE_LIT:
1362
if (!ctx->config->lower_LIT)
1363
goto skip;
1364
transform_lit(tctx, inst);
1365
break;
1366
case TGSI_OPCODE_EXP:
1367
if (!ctx->config->lower_EXP)
1368
goto skip;
1369
transform_exp(tctx, inst);
1370
break;
1371
case TGSI_OPCODE_LOG:
1372
if (!ctx->config->lower_LOG)
1373
goto skip;
1374
transform_log(tctx, inst);
1375
break;
1376
case TGSI_OPCODE_DP4:
1377
if (!ctx->config->lower_DP4)
1378
goto skip;
1379
transform_dotp(tctx, inst);
1380
break;
1381
case TGSI_OPCODE_DP3:
1382
if (!ctx->config->lower_DP3)
1383
goto skip;
1384
transform_dotp(tctx, inst);
1385
break;
1386
case TGSI_OPCODE_DP2:
1387
if (!ctx->config->lower_DP2)
1388
goto skip;
1389
transform_dotp(tctx, inst);
1390
break;
1391
case TGSI_OPCODE_FLR:
1392
if (!ctx->config->lower_FLR)
1393
goto skip;
1394
transform_flr_ceil(tctx, inst);
1395
break;
1396
case TGSI_OPCODE_CEIL:
1397
if (!ctx->config->lower_CEIL)
1398
goto skip;
1399
transform_flr_ceil(tctx, inst);
1400
break;
1401
case TGSI_OPCODE_TRUNC:
1402
if (!ctx->config->lower_TRUNC)
1403
goto skip;
1404
transform_trunc(tctx, inst);
1405
break;
1406
case TGSI_OPCODE_TEX:
1407
case TGSI_OPCODE_TXP:
1408
case TGSI_OPCODE_TXB:
1409
case TGSI_OPCODE_TXB2:
1410
case TGSI_OPCODE_TXL:
1411
if (transform_samp(tctx, inst))
1412
goto skip;
1413
break;
1414
default:
1415
skip:
1416
tctx->emit_instruction(tctx, inst);
1417
break;
1418
}
1419
}
1420
1421
/* returns NULL if no lowering required, else returns the new
1422
* tokens (which caller is required to free()). In either case
1423
* returns the current info.
1424
*/
1425
const struct tgsi_token *
1426
tgsi_transform_lowering(const struct tgsi_lowering_config *config,
1427
const struct tgsi_token *tokens,
1428
struct tgsi_shader_info *info)
1429
{
1430
struct tgsi_lowering_context ctx;
1431
struct tgsi_token *newtoks;
1432
int newlen, numtmp;
1433
1434
/* sanity check in case limit is ever increased: */
1435
STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
1436
1437
/* sanity check the lowering */
1438
assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
1439
assert(!(config->lower_FRC && config->lower_TRUNC));
1440
1441
memset(&ctx, 0, sizeof(ctx));
1442
ctx.base.transform_instruction = transform_instr;
1443
ctx.info = info;
1444
ctx.config = config;
1445
1446
tgsi_scan_shader(tokens, info);
1447
1448
/* if we are adding fragment shader support to emulate two-sided
1449
* color, then figure out the number of additional inputs we need
1450
* to create for BCOLOR's..
1451
*/
1452
if ((info->processor == PIPE_SHADER_FRAGMENT) &&
1453
config->color_two_side) {
1454
int i;
1455
ctx.face_idx = -1;
1456
for (i = 0; i <= info->file_max[TGSI_FILE_INPUT]; i++) {
1457
if (info->input_semantic_name[i] == TGSI_SEMANTIC_COLOR)
1458
ctx.two_side_idx[ctx.two_side_colors++] = i;
1459
if (info->input_semantic_name[i] == TGSI_SEMANTIC_FACE)
1460
ctx.face_idx = i;
1461
}
1462
}
1463
1464
ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
1465
1466
#define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
1467
/* if there are no instructions to lower, then we are done: */
1468
if (!(OPCS(DST) ||
1469
OPCS(LRP) ||
1470
OPCS(FRC) ||
1471
OPCS(POW) ||
1472
OPCS(LIT) ||
1473
OPCS(EXP) ||
1474
OPCS(LOG) ||
1475
OPCS(DP4) ||
1476
OPCS(DP3) ||
1477
OPCS(DP2) ||
1478
OPCS(FLR) ||
1479
OPCS(CEIL) ||
1480
OPCS(TRUNC) ||
1481
OPCS(TXP) ||
1482
ctx.two_side_colors ||
1483
ctx.saturate))
1484
return NULL;
1485
1486
#if 0 /* debug */
1487
_debug_printf("BEFORE:");
1488
tgsi_dump(tokens, 0);
1489
#endif
1490
1491
numtmp = 0;
1492
newlen = tgsi_num_tokens(tokens);
1493
if (OPCS(DST)) {
1494
newlen += DST_GROW * OPCS(DST);
1495
numtmp = MAX2(numtmp, DST_TMP);
1496
}
1497
if (OPCS(LRP)) {
1498
newlen += LRP_GROW * OPCS(LRP);
1499
numtmp = MAX2(numtmp, LRP_TMP);
1500
}
1501
if (OPCS(FRC)) {
1502
newlen += FRC_GROW * OPCS(FRC);
1503
numtmp = MAX2(numtmp, FRC_TMP);
1504
}
1505
if (OPCS(POW)) {
1506
newlen += POW_GROW * OPCS(POW);
1507
numtmp = MAX2(numtmp, POW_TMP);
1508
}
1509
if (OPCS(LIT)) {
1510
newlen += LIT_GROW * OPCS(LIT);
1511
numtmp = MAX2(numtmp, LIT_TMP);
1512
}
1513
if (OPCS(EXP)) {
1514
newlen += EXP_GROW * OPCS(EXP);
1515
numtmp = MAX2(numtmp, EXP_TMP);
1516
}
1517
if (OPCS(LOG)) {
1518
newlen += LOG_GROW * OPCS(LOG);
1519
numtmp = MAX2(numtmp, LOG_TMP);
1520
}
1521
if (OPCS(DP4)) {
1522
newlen += DP4_GROW * OPCS(DP4);
1523
numtmp = MAX2(numtmp, DOTP_TMP);
1524
}
1525
if (OPCS(DP3)) {
1526
newlen += DP3_GROW * OPCS(DP3);
1527
numtmp = MAX2(numtmp, DOTP_TMP);
1528
}
1529
if (OPCS(DP2)) {
1530
newlen += DP2_GROW * OPCS(DP2);
1531
numtmp = MAX2(numtmp, DOTP_TMP);
1532
}
1533
if (OPCS(FLR)) {
1534
newlen += FLR_GROW * OPCS(FLR);
1535
numtmp = MAX2(numtmp, FLR_TMP);
1536
}
1537
if (OPCS(CEIL)) {
1538
newlen += CEIL_GROW * OPCS(CEIL);
1539
numtmp = MAX2(numtmp, CEIL_TMP);
1540
}
1541
if (OPCS(TRUNC)) {
1542
newlen += TRUNC_GROW * OPCS(TRUNC);
1543
numtmp = MAX2(numtmp, TRUNC_TMP);
1544
}
1545
if (ctx.saturate || config->lower_TXP) {
1546
int n = 0;
1547
1548
if (ctx.saturate) {
1549
n = info->opcode_count[TGSI_OPCODE_TEX] +
1550
info->opcode_count[TGSI_OPCODE_TXP] +
1551
info->opcode_count[TGSI_OPCODE_TXB] +
1552
info->opcode_count[TGSI_OPCODE_TXB2] +
1553
info->opcode_count[TGSI_OPCODE_TXL];
1554
} else if (config->lower_TXP) {
1555
n = info->opcode_count[TGSI_OPCODE_TXP];
1556
}
1557
1558
newlen += SAMP_GROW * n;
1559
numtmp = MAX2(numtmp, SAMP_TMP);
1560
}
1561
1562
/* specifically don't include two_side_colors temps in the count: */
1563
ctx.numtmp = numtmp;
1564
1565
if (ctx.two_side_colors) {
1566
newlen += TWOSIDE_GROW(ctx.two_side_colors);
1567
/* note: we permanently consume temp regs, re-writing references
1568
* to IN.COLOR[n] to TEMP[m] (holding the output of of the CMP
1569
* instruction that selects which varying to use):
1570
*/
1571
numtmp += ctx.two_side_colors;
1572
}
1573
1574
newlen += 2 * numtmp;
1575
newlen += 5; /* immediate */
1576
1577
newtoks = tgsi_alloc_tokens(newlen);
1578
if (!newtoks)
1579
return NULL;
1580
1581
tgsi_transform_shader(tokens, newtoks, newlen, &ctx.base);
1582
1583
tgsi_scan_shader(newtoks, info);
1584
1585
#if 0 /* debug */
1586
_debug_printf("AFTER:");
1587
tgsi_dump(newtoks, 0);
1588
#endif
1589
1590
return newtoks;
1591
}
1592
1593