Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/i915/i915_fpc_translate.c
4570 views
1
/**************************************************************************
2
*
3
* Copyright 2007 VMware, Inc.
4
* All Rights Reserved.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sub license, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
13
*
14
* The above copyright notice and this permission notice (including the
15
* next paragraph) shall be included in all copies or substantial portions
16
* of the Software.
17
*
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
* IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
*
26
**************************************************************************/
27
28
#include <stdarg.h>
29
30
#include "i915_context.h"
31
#include "i915_debug.h"
32
#include "i915_debug_private.h"
33
#include "i915_fpc.h"
34
#include "i915_reg.h"
35
36
#include "pipe/p_shader_tokens.h"
37
#include "tgsi/tgsi_dump.h"
38
#include "tgsi/tgsi_info.h"
39
#include "tgsi/tgsi_parse.h"
40
#include "util/log.h"
41
#include "util/u_math.h"
42
#include "util/u_memory.h"
43
#include "util/u_string.h"
44
45
#include "draw/draw_vertex.h"
46
47
#ifndef M_PI
48
#define M_PI 3.14159265358979323846
49
#endif
50
51
/**
52
* Simple pass-through fragment shader to use when we don't have
53
* a real shader (or it fails to compile for some reason).
54
*/
55
static unsigned passthrough_program[] = {
56
_3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1),
57
/* move to output color:
58
*/
59
(A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL |
60
(REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)),
61
((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) |
62
(SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) |
63
(SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) |
64
(SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)),
65
0};
66
67
/* 2*pi, -(2*pi)^3/3!, (2*pi)^5/5!, -(2*pi)^7/7! */
68
static const float sin_constants[4] = {
69
2.0 * M_PI, -8.0f * M_PI *M_PI *M_PI / (3 * 2 * 1),
70
32.0f * M_PI *M_PI *M_PI *M_PI *M_PI / (5 * 4 * 3 * 2 * 1),
71
-128.0f * M_PI *M_PI *M_PI *M_PI *M_PI *M_PI *M_PI /
72
(7 * 6 * 5 * 4 * 3 * 2 * 1)};
73
74
/* 1, -(2*pi)^2/2!, (2*pi)^4/4!, -(2*pi)^6/6! */
75
static const float cos_constants[4] = {
76
1.0, -4.0f * M_PI *M_PI / (2 * 1),
77
16.0f * M_PI *M_PI *M_PI *M_PI / (4 * 3 * 2 * 1),
78
-64.0f * M_PI *M_PI *M_PI *M_PI *M_PI *M_PI / (6 * 5 * 4 * 3 * 2 * 1)};
79
80
/**
81
* component-wise negation of ureg
82
*/
83
static inline int
84
negate(int reg, int x, int y, int z, int w)
85
{
86
/* Another neat thing about the UREG representation */
87
return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) |
88
((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) |
89
((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) |
90
((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT));
91
}
92
93
/**
94
* In the event of a translation failure, we'll generate a simple color
95
* pass-through program.
96
*/
97
static void
98
i915_use_passthrough_shader(struct i915_fragment_shader *fs)
99
{
100
fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program));
101
if (fs->program) {
102
memcpy(fs->program, passthrough_program, sizeof(passthrough_program));
103
fs->program_len = ARRAY_SIZE(passthrough_program);
104
}
105
fs->num_constants = 0;
106
}
107
108
void
109
i915_program_error(struct i915_fp_compile *p, const char *msg, ...)
110
{
111
if (p->log_program_errors) {
112
va_list args;
113
114
va_start(args, msg);
115
mesa_loge_v(msg, args);
116
va_end(args);
117
}
118
119
p->error = 1;
120
}
121
122
static uint32_t
123
get_mapping(struct i915_fragment_shader *fs, int unit)
124
{
125
int i;
126
for (i = 0; i < I915_TEX_UNITS; i++) {
127
if (fs->generic_mapping[i] == -1) {
128
fs->generic_mapping[i] = unit;
129
return i;
130
}
131
if (fs->generic_mapping[i] == unit)
132
return i;
133
}
134
debug_printf("Exceeded max generics\n");
135
return 0;
136
}
137
138
/**
139
* Construct a ureg for the given source register. Will emit
140
* constants, apply swizzling and negation as needed.
141
*/
142
static uint32_t
143
src_vector(struct i915_fp_compile *p,
144
const struct i915_full_src_register *source,
145
struct i915_fragment_shader *fs)
146
{
147
uint32_t index = source->Register.Index;
148
uint32_t src = 0, sem_name, sem_ind;
149
150
switch (source->Register.File) {
151
case TGSI_FILE_TEMPORARY:
152
if (source->Register.Index >= I915_MAX_TEMPORARY) {
153
i915_program_error(p, "Exceeded max temporary reg");
154
return 0;
155
}
156
src = UREG(REG_TYPE_R, index);
157
break;
158
case TGSI_FILE_INPUT:
159
/* XXX: Packing COL1, FOGC into a single attribute works for
160
* texenv programs, but will fail for real fragment programs
161
* that use these attributes and expect them to be a full 4
162
* components wide. Could use a texcoord to pass these
163
* attributes if necessary, but that won't work in the general
164
* case.
165
*
166
* We also use a texture coordinate to pass wpos when possible.
167
*/
168
169
sem_name = p->shader->info.input_semantic_name[index];
170
sem_ind = p->shader->info.input_semantic_index[index];
171
172
switch (sem_name) {
173
case TGSI_SEMANTIC_POSITION: {
174
/* for fragcoord */
175
int real_tex_unit = get_mapping(fs, I915_SEMANTIC_POS);
176
src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit,
177
D0_CHANNEL_ALL);
178
break;
179
}
180
case TGSI_SEMANTIC_COLOR:
181
if (sem_ind == 0) {
182
src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL);
183
} else {
184
/* secondary color */
185
assert(sem_ind == 1);
186
src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ);
187
src = swizzle(src, X, Y, Z, ONE);
188
}
189
break;
190
case TGSI_SEMANTIC_FOG:
191
src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W);
192
src = swizzle(src, W, W, W, W);
193
break;
194
case TGSI_SEMANTIC_GENERIC: {
195
int real_tex_unit = get_mapping(fs, sem_ind);
196
src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit,
197
D0_CHANNEL_ALL);
198
break;
199
}
200
case TGSI_SEMANTIC_FACE: {
201
/* for back/front faces */
202
int real_tex_unit = get_mapping(fs, I915_SEMANTIC_FACE);
203
src =
204
i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X);
205
break;
206
}
207
default:
208
i915_program_error(p, "Bad source->Index");
209
return 0;
210
}
211
break;
212
213
case TGSI_FILE_IMMEDIATE:
214
assert(index < p->num_immediates);
215
index = p->immediates_map[index];
216
FALLTHROUGH;
217
case TGSI_FILE_CONSTANT:
218
src = UREG(REG_TYPE_CONST, index);
219
break;
220
221
default:
222
i915_program_error(p, "Bad source->File");
223
return 0;
224
}
225
226
src = swizzle(src, source->Register.SwizzleX, source->Register.SwizzleY,
227
source->Register.SwizzleZ, source->Register.SwizzleW);
228
229
/* No HW abs flag, so we have to max with the negation. */
230
if (source->Register.Absolute) {
231
uint32_t tmp = i915_get_utemp(p);
232
i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src,
233
negate(src, 1, 1, 1, 1), 0);
234
src = tmp;
235
}
236
237
/* There's both negate-all-components and per-component negation.
238
* Try to handle both here.
239
*/
240
{
241
int n = source->Register.Negate;
242
src = negate(src, n, n, n, n);
243
}
244
245
return src;
246
}
247
248
/**
249
* Construct a ureg for a destination register.
250
*/
251
static uint32_t
252
get_result_vector(struct i915_fp_compile *p,
253
const struct i915_full_dst_register *dest)
254
{
255
switch (dest->Register.File) {
256
case TGSI_FILE_OUTPUT: {
257
uint32_t sem_name =
258
p->shader->info.output_semantic_name[dest->Register.Index];
259
switch (sem_name) {
260
case TGSI_SEMANTIC_POSITION:
261
return UREG(REG_TYPE_OD, 0);
262
case TGSI_SEMANTIC_COLOR:
263
return UREG(REG_TYPE_OC, 0);
264
default:
265
i915_program_error(p, "Bad inst->DstReg.Index/semantics");
266
return 0;
267
}
268
}
269
case TGSI_FILE_TEMPORARY:
270
return UREG(REG_TYPE_R, dest->Register.Index);
271
default:
272
i915_program_error(p, "Bad inst->DstReg.File");
273
return 0;
274
}
275
}
276
277
/**
278
* Compute flags for saturation and writemask.
279
*/
280
static uint32_t
281
get_result_flags(const struct i915_full_instruction *inst)
282
{
283
const uint32_t writeMask = inst->Dst[0].Register.WriteMask;
284
uint32_t flags = 0x0;
285
286
if (inst->Instruction.Saturate)
287
flags |= A0_DEST_SATURATE;
288
289
if (writeMask & TGSI_WRITEMASK_X)
290
flags |= A0_DEST_CHANNEL_X;
291
if (writeMask & TGSI_WRITEMASK_Y)
292
flags |= A0_DEST_CHANNEL_Y;
293
if (writeMask & TGSI_WRITEMASK_Z)
294
flags |= A0_DEST_CHANNEL_Z;
295
if (writeMask & TGSI_WRITEMASK_W)
296
flags |= A0_DEST_CHANNEL_W;
297
298
return flags;
299
}
300
301
/**
302
* Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token
303
*/
304
static uint32_t
305
translate_tex_src_target(struct i915_fp_compile *p, uint32_t tex)
306
{
307
switch (tex) {
308
case TGSI_TEXTURE_SHADOW1D:
309
FALLTHROUGH;
310
case TGSI_TEXTURE_1D:
311
return D0_SAMPLE_TYPE_2D;
312
313
case TGSI_TEXTURE_SHADOW2D:
314
FALLTHROUGH;
315
case TGSI_TEXTURE_2D:
316
return D0_SAMPLE_TYPE_2D;
317
318
case TGSI_TEXTURE_SHADOWRECT:
319
FALLTHROUGH;
320
case TGSI_TEXTURE_RECT:
321
return D0_SAMPLE_TYPE_2D;
322
323
case TGSI_TEXTURE_3D:
324
return D0_SAMPLE_TYPE_VOLUME;
325
326
case TGSI_TEXTURE_CUBE:
327
return D0_SAMPLE_TYPE_CUBE;
328
329
default:
330
i915_program_error(p, "TexSrc type");
331
return 0;
332
}
333
}
334
335
/**
336
* Return the number of coords needed to access a given TGSI_TEXTURE_*
337
*/
338
uint32_t
339
i915_num_coords(uint32_t tex)
340
{
341
switch (tex) {
342
case TGSI_TEXTURE_SHADOW1D:
343
case TGSI_TEXTURE_1D:
344
return 1;
345
346
case TGSI_TEXTURE_SHADOW2D:
347
case TGSI_TEXTURE_2D:
348
case TGSI_TEXTURE_SHADOWRECT:
349
case TGSI_TEXTURE_RECT:
350
return 2;
351
352
case TGSI_TEXTURE_3D:
353
case TGSI_TEXTURE_CUBE:
354
return 3;
355
356
default:
357
debug_printf("Unknown texture target for num coords");
358
return 2;
359
}
360
}
361
362
/**
363
* Generate texel lookup instruction.
364
*/
365
static void
366
emit_tex(struct i915_fp_compile *p, const struct i915_full_instruction *inst,
367
uint32_t opcode, struct i915_fragment_shader *fs)
368
{
369
uint32_t texture = inst->Texture.Texture;
370
uint32_t unit = inst->Src[1].Register.Index;
371
uint32_t tex = translate_tex_src_target(p, texture);
372
uint32_t sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex);
373
uint32_t coord = src_vector(p, &inst->Src[0], fs);
374
375
i915_emit_texld(p, get_result_vector(p, &inst->Dst[0]),
376
get_result_flags(inst), sampler, coord, opcode,
377
i915_num_coords(texture));
378
}
379
380
/**
381
* Generate a simple arithmetic instruction
382
* \param opcode the i915 opcode
383
* \param numArgs the number of input/src arguments
384
*/
385
static void
386
emit_simple_arith(struct i915_fp_compile *p,
387
const struct i915_full_instruction *inst, uint32_t opcode,
388
uint32_t numArgs, struct i915_fragment_shader *fs)
389
{
390
uint32_t arg1, arg2, arg3;
391
392
assert(numArgs <= 3);
393
394
arg1 = (numArgs < 1) ? 0 : src_vector(p, &inst->Src[0], fs);
395
arg2 = (numArgs < 2) ? 0 : src_vector(p, &inst->Src[1], fs);
396
arg3 = (numArgs < 3) ? 0 : src_vector(p, &inst->Src[2], fs);
397
398
i915_emit_arith(p, opcode, get_result_vector(p, &inst->Dst[0]),
399
get_result_flags(inst), 0, arg1, arg2, arg3);
400
}
401
402
/** As above, but swap the first two src regs */
403
static void
404
emit_simple_arith_swap2(struct i915_fp_compile *p,
405
const struct i915_full_instruction *inst,
406
uint32_t opcode, uint32_t numArgs,
407
struct i915_fragment_shader *fs)
408
{
409
struct i915_full_instruction inst2;
410
411
assert(numArgs == 2);
412
413
/* transpose first two registers */
414
inst2 = *inst;
415
inst2.Src[0] = inst->Src[1];
416
inst2.Src[1] = inst->Src[0];
417
418
emit_simple_arith(p, &inst2, opcode, numArgs, fs);
419
}
420
421
/*
422
* Translate TGSI instruction to i915 instruction.
423
*
424
* Possible concerns:
425
*
426
* DDX, DDY -- return 0
427
* SIN, COS -- could use another taylor step?
428
* LIT -- results seem a little different to sw mesa
429
* LOG -- different to mesa on negative numbers, but this is conformant.
430
*/
431
static void
432
i915_translate_instruction(struct i915_fp_compile *p,
433
const struct i915_full_instruction *inst,
434
struct i915_fragment_shader *fs)
435
{
436
uint32_t src0, src1, src2, flags;
437
uint32_t tmp = 0;
438
439
switch (inst->Instruction.Opcode) {
440
case TGSI_OPCODE_ADD:
441
emit_simple_arith(p, inst, A0_ADD, 2, fs);
442
break;
443
444
case TGSI_OPCODE_CEIL:
445
src0 = src_vector(p, &inst->Src[0], fs);
446
tmp = i915_get_utemp(p);
447
flags = get_result_flags(inst);
448
i915_emit_arith(p, A0_FLR, tmp, flags & A0_DEST_CHANNEL_ALL, 0,
449
negate(src0, 1, 1, 1, 1), 0, 0);
450
i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]), flags, 0,
451
negate(tmp, 1, 1, 1, 1), 0, 0);
452
break;
453
454
case TGSI_OPCODE_CMP:
455
src0 = src_vector(p, &inst->Src[0], fs);
456
src1 = src_vector(p, &inst->Src[1], fs);
457
src2 = src_vector(p, &inst->Src[2], fs);
458
i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]),
459
get_result_flags(inst), 0, src0, src2,
460
src1); /* NOTE: order of src2, src1 */
461
break;
462
463
case TGSI_OPCODE_COS:
464
src0 = src_vector(p, &inst->Src[0], fs);
465
tmp = i915_get_utemp(p);
466
467
i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, src0,
468
i915_emit_const1f(p, 1.0f / (float)(M_PI * 2.0)), 0);
469
470
i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
471
472
/*
473
* t0.xy = MUL x.xx11, x.x111 ; x^2, x, 1, 1
474
* t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, 1
475
* t0 = MUL t0.xxz1 t0.z111 ; x^6 x^4 x^2 1
476
* result = DP4 t0, cos_constants
477
*/
478
i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_XY, 0,
479
swizzle(tmp, X, X, ONE, ONE),
480
swizzle(tmp, X, ONE, ONE, ONE), 0);
481
482
i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_XYZ, 0,
483
swizzle(tmp, X, Y, X, ONE), swizzle(tmp, X, X, ONE, ONE),
484
0);
485
486
i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_XYZ, 0,
487
swizzle(tmp, X, X, Z, ONE),
488
swizzle(tmp, Z, ONE, ONE, ONE), 0);
489
490
i915_emit_arith(p, A0_DP4, get_result_vector(p, &inst->Dst[0]),
491
get_result_flags(inst), 0, swizzle(tmp, ONE, Z, Y, X),
492
i915_emit_const4fv(p, cos_constants), 0);
493
break;
494
495
case TGSI_OPCODE_DDX:
496
case TGSI_OPCODE_DDY:
497
/* XXX We just output 0 here */
498
debug_printf("Punting DDX/DDY\n");
499
src0 = get_result_vector(p, &inst->Dst[0]);
500
i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]),
501
get_result_flags(inst), 0,
502
swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0);
503
break;
504
505
case TGSI_OPCODE_DP2:
506
src0 = src_vector(p, &inst->Src[0], fs);
507
src1 = src_vector(p, &inst->Src[1], fs);
508
509
i915_emit_arith(p, A0_DP3, get_result_vector(p, &inst->Dst[0]),
510
get_result_flags(inst), 0,
511
swizzle(src0, X, Y, ZERO, ZERO), src1, 0);
512
break;
513
514
case TGSI_OPCODE_DP3:
515
emit_simple_arith(p, inst, A0_DP3, 2, fs);
516
break;
517
518
case TGSI_OPCODE_DP4:
519
emit_simple_arith(p, inst, A0_DP4, 2, fs);
520
break;
521
522
case TGSI_OPCODE_DST:
523
src0 = src_vector(p, &inst->Src[0], fs);
524
src1 = src_vector(p, &inst->Src[1], fs);
525
526
/* result[0] = 1 * 1;
527
* result[1] = a[1] * b[1];
528
* result[2] = a[2] * 1;
529
* result[3] = 1 * b[3];
530
*/
531
i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
532
get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE),
533
swizzle(src1, ONE, Y, ONE, W), 0);
534
break;
535
536
case TGSI_OPCODE_END:
537
/* no-op */
538
break;
539
540
case TGSI_OPCODE_EX2:
541
src0 = src_vector(p, &inst->Src[0], fs);
542
543
i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]),
544
get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
545
0);
546
break;
547
548
case TGSI_OPCODE_FLR:
549
emit_simple_arith(p, inst, A0_FLR, 1, fs);
550
break;
551
552
case TGSI_OPCODE_FRC:
553
emit_simple_arith(p, inst, A0_FRC, 1, fs);
554
break;
555
556
case TGSI_OPCODE_KILL_IF:
557
/* kill if src[0].x < 0 || src[0].y < 0 ... */
558
src0 = src_vector(p, &inst->Src[0], fs);
559
tmp = i915_get_utemp(p);
560
561
i915_emit_texld(p, tmp, /* dest reg: a dummy reg */
562
A0_DEST_CHANNEL_ALL, /* dest writemask */
563
0, /* sampler */
564
src0, /* coord*/
565
T0_TEXKILL, /* opcode */
566
1); /* num_coord */
567
break;
568
569
case TGSI_OPCODE_KILL:
570
/* unconditional kill */
571
tmp = i915_get_utemp(p);
572
573
i915_emit_texld(p, tmp, /* dest reg: a dummy reg */
574
A0_DEST_CHANNEL_ALL, /* dest writemask */
575
0, /* sampler */
576
negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE),
577
1, 1, 1, 1), /* coord */
578
T0_TEXKILL, /* opcode */
579
1); /* num_coord */
580
break;
581
582
case TGSI_OPCODE_LG2:
583
src0 = src_vector(p, &inst->Src[0], fs);
584
585
i915_emit_arith(p, A0_LOG, get_result_vector(p, &inst->Dst[0]),
586
get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
587
0);
588
break;
589
590
case TGSI_OPCODE_LIT:
591
src0 = src_vector(p, &inst->Src[0], fs);
592
tmp = i915_get_utemp(p);
593
594
/* tmp = max( a.xyzw, a.00zw )
595
* XXX: Clamp tmp.w to -128..128
596
* tmp.y = log(tmp.y)
597
* tmp.y = tmp.w * tmp.y
598
* tmp.y = exp(tmp.y)
599
* result = cmp (a.11-x1, a.1x01, a.1xy1 )
600
*/
601
i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
602
swizzle(src0, ZERO, ZERO, Z, W), 0);
603
604
i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0,
605
swizzle(tmp, Y, Y, Y, Y), 0, 0);
606
607
i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0,
608
swizzle(tmp, ZERO, Y, ZERO, ZERO),
609
swizzle(tmp, ZERO, W, ZERO, ZERO), 0);
610
611
i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0,
612
swizzle(tmp, Y, Y, Y, Y), 0, 0);
613
614
i915_emit_arith(
615
p, A0_CMP, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst),
616
0, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0),
617
swizzle(tmp, ONE, X, ZERO, ONE), swizzle(tmp, ONE, X, Y, ONE));
618
619
break;
620
621
case TGSI_OPCODE_LRP:
622
src0 = src_vector(p, &inst->Src[0], fs);
623
src1 = src_vector(p, &inst->Src[1], fs);
624
src2 = src_vector(p, &inst->Src[2], fs);
625
flags = get_result_flags(inst);
626
tmp = i915_get_utemp(p);
627
628
/* b*a + c*(1-a)
629
*
630
* b*a + c - ca
631
*
632
* tmp = b*a + c,
633
* result = (-c)*a + tmp
634
*/
635
i915_emit_arith(p, A0_MAD, tmp, flags & A0_DEST_CHANNEL_ALL, 0, src1,
636
src0, src2);
637
638
i915_emit_arith(p, A0_MAD, get_result_vector(p, &inst->Dst[0]), flags, 0,
639
negate(src2, 1, 1, 1, 1), src0, tmp);
640
break;
641
642
case TGSI_OPCODE_MAD:
643
emit_simple_arith(p, inst, A0_MAD, 3, fs);
644
break;
645
646
case TGSI_OPCODE_MAX:
647
emit_simple_arith(p, inst, A0_MAX, 2, fs);
648
break;
649
650
case TGSI_OPCODE_MIN:
651
emit_simple_arith(p, inst, A0_MIN, 2, fs);
652
break;
653
654
case TGSI_OPCODE_MOV:
655
emit_simple_arith(p, inst, A0_MOV, 1, fs);
656
break;
657
658
case TGSI_OPCODE_MUL:
659
emit_simple_arith(p, inst, A0_MUL, 2, fs);
660
break;
661
662
case TGSI_OPCODE_NOP:
663
break;
664
665
case TGSI_OPCODE_POW:
666
src0 = src_vector(p, &inst->Src[0], fs);
667
src1 = src_vector(p, &inst->Src[1], fs);
668
tmp = i915_get_utemp(p);
669
flags = get_result_flags(inst);
670
671
/* XXX: masking on intermediate values, here and elsewhere.
672
*/
673
i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_X, 0,
674
swizzle(src0, X, X, X, X), 0, 0);
675
676
i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0);
677
678
i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]), flags, 0,
679
swizzle(tmp, X, X, X, X), 0, 0);
680
break;
681
682
case TGSI_OPCODE_RET:
683
/* XXX: no-op? */
684
break;
685
686
case TGSI_OPCODE_RCP:
687
src0 = src_vector(p, &inst->Src[0], fs);
688
689
i915_emit_arith(p, A0_RCP, get_result_vector(p, &inst->Dst[0]),
690
get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
691
0);
692
break;
693
694
case TGSI_OPCODE_RSQ:
695
src0 = src_vector(p, &inst->Src[0], fs);
696
697
i915_emit_arith(p, A0_RSQ, get_result_vector(p, &inst->Dst[0]),
698
get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0,
699
0);
700
break;
701
702
case TGSI_OPCODE_SEQ:
703
/* if we're both >= and <= then we're == */
704
src0 = src_vector(p, &inst->Src[0], fs);
705
src1 = src_vector(p, &inst->Src[1], fs);
706
tmp = i915_get_utemp(p);
707
708
i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
709
710
i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]),
711
get_result_flags(inst), 0, src1, src0, 0);
712
713
i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]),
714
get_result_flags(inst), 0,
715
get_result_vector(p, &inst->Dst[0]), tmp, 0);
716
717
break;
718
719
case TGSI_OPCODE_SGE:
720
emit_simple_arith(p, inst, A0_SGE, 2, fs);
721
break;
722
723
case TGSI_OPCODE_SIN:
724
src0 = src_vector(p, &inst->Src[0], fs);
725
tmp = i915_get_utemp(p);
726
727
i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, src0,
728
i915_emit_const1f(p, 1.0f / (float)(M_PI * 2.0)), 0);
729
730
i915_emit_arith(p, A0_MOD, tmp, A0_DEST_CHANNEL_X, 0, tmp, 0, 0);
731
732
/*
733
* t0.xy = MUL x.xx11, x.x1111 ; x^2, x, 1, 1
734
* t0 = MUL t0.xyxy t0.xx11 ; x^4, x^3, x^2, x
735
* t1 = MUL t0.xyyw t0.yz11 ; x^7 x^5 x^3 x
736
* result = DP4 t1.wzyx, sin_constants
737
*/
738
i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_XY, 0,
739
swizzle(tmp, X, X, ONE, ONE),
740
swizzle(tmp, X, ONE, ONE, ONE), 0);
741
742
i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_ALL, 0,
743
swizzle(tmp, X, Y, X, Y), swizzle(tmp, X, X, ONE, ONE),
744
0);
745
746
i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_ALL, 0,
747
swizzle(tmp, X, Y, Y, W), swizzle(tmp, X, Z, ONE, ONE),
748
0);
749
750
i915_emit_arith(p, A0_DP4, get_result_vector(p, &inst->Dst[0]),
751
get_result_flags(inst), 0, swizzle(tmp, W, Z, Y, X),
752
i915_emit_const4fv(p, sin_constants), 0);
753
break;
754
755
case TGSI_OPCODE_SLE:
756
/* like SGE, but swap reg0, reg1 */
757
emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs);
758
break;
759
760
case TGSI_OPCODE_SLT:
761
emit_simple_arith(p, inst, A0_SLT, 2, fs);
762
break;
763
764
case TGSI_OPCODE_SGT:
765
/* like SLT, but swap reg0, reg1 */
766
emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs);
767
break;
768
769
case TGSI_OPCODE_SNE:
770
/* if we're < or > then we're != */
771
src0 = src_vector(p, &inst->Src[0], fs);
772
src1 = src_vector(p, &inst->Src[1], fs);
773
tmp = i915_get_utemp(p);
774
775
i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0);
776
777
i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
778
get_result_flags(inst), 0, src1, src0, 0);
779
780
i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]),
781
get_result_flags(inst), 0,
782
get_result_vector(p, &inst->Dst[0]), tmp, 0);
783
break;
784
785
case TGSI_OPCODE_SSG:
786
/* compute (src>0) - (src<0) */
787
src0 = src_vector(p, &inst->Src[0], fs);
788
tmp = i915_get_utemp(p);
789
790
i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0,
791
swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0);
792
793
i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]),
794
get_result_flags(inst), 0,
795
swizzle(src0, ZERO, ZERO, ZERO, ZERO), src0, 0);
796
797
i915_emit_arith(
798
p, A0_ADD, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 0,
799
get_result_vector(p, &inst->Dst[0]), negate(tmp, 1, 1, 1, 1), 0);
800
break;
801
802
case TGSI_OPCODE_TEX:
803
emit_tex(p, inst, T0_TEXLD, fs);
804
break;
805
806
case TGSI_OPCODE_TRUNC:
807
emit_simple_arith(p, inst, A0_TRC, 1, fs);
808
break;
809
810
case TGSI_OPCODE_TXB:
811
emit_tex(p, inst, T0_TEXLDB, fs);
812
break;
813
814
case TGSI_OPCODE_TXP:
815
emit_tex(p, inst, T0_TEXLDP, fs);
816
break;
817
818
default:
819
i915_program_error(p, "bad opcode %s (%d)",
820
tgsi_get_opcode_name(inst->Instruction.Opcode),
821
inst->Instruction.Opcode);
822
return;
823
}
824
825
i915_release_utemps(p);
826
}
827
828
static void
829
i915_translate_token(struct i915_fp_compile *p,
830
const union i915_full_token *token,
831
struct i915_fragment_shader *fs)
832
{
833
struct i915_fragment_shader *ifs = p->shader;
834
switch (token->Token.Type) {
835
case TGSI_TOKEN_TYPE_PROPERTY:
836
/* Ignore properties where we only support one value. */
837
assert(token->FullProperty.Property.PropertyName ==
838
TGSI_PROPERTY_FS_COORD_ORIGIN ||
839
token->FullProperty.Property.PropertyName ==
840
TGSI_PROPERTY_FS_COORD_PIXEL_CENTER ||
841
token->FullProperty.Property.PropertyName ==
842
TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS);
843
break;
844
845
case TGSI_TOKEN_TYPE_DECLARATION:
846
if (token->FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) {
847
if (token->FullDeclaration.Range.Last >= I915_MAX_CONSTANT) {
848
i915_program_error(p, "Exceeded %d max uniforms",
849
I915_MAX_CONSTANT);
850
} else {
851
uint32_t i;
852
for (i = token->FullDeclaration.Range.First;
853
i <= token->FullDeclaration.Range.Last; i++) {
854
ifs->constant_flags[i] = I915_CONSTFLAG_USER;
855
ifs->num_constants = MAX2(ifs->num_constants, i + 1);
856
}
857
}
858
} else if (token->FullDeclaration.Declaration.File ==
859
TGSI_FILE_TEMPORARY) {
860
if (token->FullDeclaration.Range.Last >= I915_MAX_TEMPORARY) {
861
i915_program_error(p, "Exceeded %d max TGSI temps",
862
I915_MAX_TEMPORARY);
863
} else {
864
uint32_t i;
865
for (i = token->FullDeclaration.Range.First;
866
i <= token->FullDeclaration.Range.Last; i++) {
867
/* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */
868
p->temp_flag |= (1 << i); /* mark temp as used */
869
}
870
}
871
}
872
break;
873
874
case TGSI_TOKEN_TYPE_IMMEDIATE: {
875
const struct tgsi_full_immediate *imm = &token->FullImmediate;
876
const uint32_t pos = p->num_immediates++;
877
uint32_t j;
878
assert(imm->Immediate.NrTokens <= 4 + 1);
879
for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
880
p->immediates[pos][j] = imm->u[j].Float;
881
}
882
} break;
883
884
case TGSI_TOKEN_TYPE_INSTRUCTION:
885
if (p->first_instruction) {
886
/* resolve location of immediates */
887
uint32_t i, j;
888
for (i = 0; i < p->num_immediates; i++) {
889
/* find constant slot for this immediate */
890
for (j = 0; j < I915_MAX_CONSTANT; j++) {
891
if (ifs->constant_flags[j] == 0x0) {
892
memcpy(ifs->constants[j], p->immediates[i],
893
4 * sizeof(float));
894
/*printf("immediate %d maps to const %d\n", i, j);*/
895
ifs->constant_flags[j] = 0xf; /* all four comps used */
896
p->immediates_map[i] = j;
897
ifs->num_constants = MAX2(ifs->num_constants, j + 1);
898
break;
899
}
900
}
901
if (j == I915_MAX_CONSTANT) {
902
i915_program_error(p, "Exceeded %d max uniforms and immediates.",
903
I915_MAX_CONSTANT);
904
}
905
}
906
907
p->first_instruction = false;
908
}
909
910
i915_translate_instruction(p, &token->FullInstruction, fs);
911
break;
912
913
default:
914
assert(0);
915
}
916
}
917
918
/**
919
* Translate TGSI fragment shader into i915 hardware instructions.
920
* \param p the translation state
921
* \param tokens the TGSI token array
922
*/
923
static void
924
i915_translate_instructions(struct i915_fp_compile *p,
925
const struct i915_token_list *tokens,
926
struct i915_fragment_shader *fs)
927
{
928
int i;
929
for (i = 0; i < tokens->NumTokens && !p->error; i++) {
930
i915_translate_token(p, &tokens->Tokens[i], fs);
931
}
932
}
933
934
static struct i915_fp_compile *
935
i915_init_compile(struct i915_context *i915, struct i915_fragment_shader *ifs)
936
{
937
struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile);
938
int i;
939
940
p->shader = ifs;
941
942
/* Put new constants at end of const buffer, growing downward.
943
* The problem is we don't know how many user-defined constants might
944
* be specified with pipe->set_constant_buffer().
945
* Should pre-scan the user's program to determine the highest-numbered
946
* constant referenced.
947
*/
948
ifs->num_constants = 0;
949
memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags));
950
951
memset(&p->register_phases, 0, sizeof(p->register_phases));
952
953
for (i = 0; i < I915_TEX_UNITS; i++)
954
ifs->generic_mapping[i] = -1;
955
956
p->log_program_errors = !i915->no_log_program_errors;
957
958
p->first_instruction = true;
959
960
p->nr_tex_indirect = 1; /* correct? */
961
p->nr_tex_insn = 0;
962
p->nr_alu_insn = 0;
963
p->nr_decl_insn = 0;
964
965
p->csr = p->program;
966
p->decl = p->declarations;
967
p->decl_s = 0;
968
p->decl_t = 0;
969
p->temp_flag = ~0x0 << I915_MAX_TEMPORARY;
970
p->utemp_flag = ~0x7;
971
972
/* initialize the first program word */
973
*(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM;
974
975
return p;
976
}
977
978
/* Copy compile results to the fragment program struct and destroy the
979
* compilation context.
980
*/
981
static void
982
i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p)
983
{
984
struct i915_fragment_shader *ifs = p->shader;
985
unsigned long program_size = (unsigned long)(p->csr - p->program);
986
unsigned long decl_size = (unsigned long)(p->decl - p->declarations);
987
988
if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT)
989
debug_printf("Exceeded max nr indirect texture lookups\n");
990
991
if (p->nr_tex_insn > I915_MAX_TEX_INSN)
992
i915_program_error(p, "Exceeded max TEX instructions");
993
994
if (p->nr_alu_insn > I915_MAX_ALU_INSN)
995
i915_program_error(p, "Exceeded max ALU instructions");
996
997
if (p->nr_decl_insn > I915_MAX_DECL_INSN)
998
i915_program_error(p, "Exceeded max DECL instructions");
999
1000
/* hw doesn't seem to like empty frag programs (num_instructions == 1 is just
1001
* TGSI_END), even when the depth write fixup gets emitted below - maybe that
1002
* one is fishy, too?
1003
*/
1004
if (ifs->info.num_instructions == 1)
1005
i915_program_error(p, "Empty fragment shader");
1006
1007
if (p->error) {
1008
p->NumNativeInstructions = 0;
1009
p->NumNativeAluInstructions = 0;
1010
p->NumNativeTexInstructions = 0;
1011
p->NumNativeTexIndirections = 0;
1012
1013
i915_use_passthrough_shader(ifs);
1014
} else {
1015
p->NumNativeInstructions =
1016
p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn;
1017
p->NumNativeAluInstructions = p->nr_alu_insn;
1018
p->NumNativeTexInstructions = p->nr_tex_insn;
1019
p->NumNativeTexIndirections = p->nr_tex_indirect;
1020
1021
/* patch in the program length */
1022
p->declarations[0] |= program_size + decl_size - 2;
1023
1024
/* Copy compilation results to fragment program struct:
1025
*/
1026
assert(!ifs->program);
1027
1028
ifs->program_len = decl_size + program_size;
1029
ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t));
1030
memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t));
1031
memcpy(&ifs->program[decl_size], p->program,
1032
program_size * sizeof(uint32_t));
1033
}
1034
1035
/* Release the compilation struct:
1036
*/
1037
FREE(p);
1038
}
1039
1040
/**
1041
* Rather than trying to intercept and jiggle depth writes during
1042
* emit, just move the value into its correct position at the end of
1043
* the program:
1044
*/
1045
static void
1046
i915_fixup_depth_write(struct i915_fp_compile *p)
1047
{
1048
for (int i = 0; i < p->shader->info.num_outputs; i++) {
1049
if (p->shader->info.output_semantic_name[i] != TGSI_SEMANTIC_POSITION)
1050
continue;
1051
1052
const uint32_t depth = UREG(REG_TYPE_OD, 0);
1053
1054
i915_emit_arith(p, A0_MOV, /* opcode */
1055
depth, /* dest reg */
1056
A0_DEST_CHANNEL_W, /* write mask */
1057
0, /* saturate? */
1058
swizzle(depth, X, Y, Z, Z), /* src0 */
1059
0, 0 /* src1, src2 */);
1060
}
1061
}
1062
1063
void
1064
i915_translate_fragment_program(struct i915_context *i915,
1065
struct i915_fragment_shader *fs)
1066
{
1067
struct i915_fp_compile *p;
1068
const struct tgsi_token *tokens = fs->state.tokens;
1069
struct i915_token_list *i_tokens;
1070
1071
if (I915_DBG_ON(DBG_FS)) {
1072
mesa_logi("TGSI fragment shader:");
1073
tgsi_dump(tokens, 0);
1074
}
1075
1076
p = i915_init_compile(i915, fs);
1077
1078
i_tokens = i915_optimize(tokens);
1079
i915_translate_instructions(p, i_tokens, fs);
1080
i915_fixup_depth_write(p);
1081
1082
i915_fini_compile(i915, p);
1083
i915_optimize_free(i_tokens);
1084
1085
if (I915_DBG_ON(DBG_FS)) {
1086
mesa_logi("i915 fragment shader with %d constants%s", fs->num_constants,
1087
fs->num_constants ? ":" : "");
1088
1089
for (int i = 0; i < I915_MAX_CONSTANT; i++) {
1090
if (fs->constant_flags[i] &&
1091
fs->constant_flags[i] != I915_CONSTFLAG_USER) {
1092
mesa_logi("\t\tC[%d] = { %f, %f, %f, %f }", i, fs->constants[i][0],
1093
fs->constants[i][1], fs->constants[i][2],
1094
fs->constants[i][3]);
1095
}
1096
}
1097
i915_disassemble_program(fs->program, fs->program_len);
1098
}
1099
}
1100
1101