Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/etnaviv/etnaviv_compiler_tgsi.c
4570 views
1
/*
2
* Copyright (c) 2012-2015 Etnaviv Project
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sub license,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the
12
* next paragraph) shall be included in all copies or substantial portions
13
* of the Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
* DEALINGS IN THE SOFTWARE.
22
*
23
* Authors:
24
* Wladimir J. van der Laan <[email protected]>
25
*/
26
27
/* TGSI->Vivante shader ISA conversion */
28
29
/* What does the compiler return (see etna_shader_object)?
30
* 1) instruction data
31
* 2) input-to-temporary mapping (fixed for ps)
32
* *) in case of ps, semantic -> varying id mapping
33
* *) for each varying: number of components used (r, rg, rgb, rgba)
34
* 3) temporary-to-output mapping (in case of vs, fixed for ps)
35
* 4) for each input/output: possible semantic (position, color, glpointcoord, ...)
36
* 5) immediates base offset, immediates data
37
* 6) used texture units (and possibly the TGSI_TEXTURE_* type); not needed to
38
* configure the hw, but useful for error checking
39
* 7) enough information to add the z=(z+w)/2.0 necessary for older chips
40
* (output reg id is enough)
41
*
42
* Empty shaders are not allowed, should always at least generate a NOP. Also
43
* if there is a label at the end of the shader, an extra NOP should be
44
* generated as jump target.
45
*
46
* TODO
47
* * Use an instruction scheduler
48
* * Indirect access to uniforms / temporaries using amode
49
*/
50
51
#include "etnaviv_compiler.h"
52
53
#include "etnaviv_asm.h"
54
#include "etnaviv_context.h"
55
#include "etnaviv_debug.h"
56
#include "etnaviv_uniforms.h"
57
#include "etnaviv_util.h"
58
59
#include "nir/tgsi_to_nir.h"
60
#include "pipe/p_shader_tokens.h"
61
#include "tgsi/tgsi_info.h"
62
#include "tgsi/tgsi_iterate.h"
63
#include "tgsi/tgsi_lowering.h"
64
#include "tgsi/tgsi_strings.h"
65
#include "tgsi/tgsi_util.h"
66
#include "util/u_math.h"
67
#include "util/u_memory.h"
68
69
#include <fcntl.h>
70
#include <stdio.h>
71
#include <sys/stat.h>
72
#include <sys/types.h>
73
74
#define ETNA_MAX_INNER_TEMPS 2
75
76
static const float sincos_const[2][4] = {
77
{
78
2., -1., 4., -4.,
79
},
80
{
81
1. / (2. * M_PI), 0.75, 0.5, 0.0,
82
},
83
};
84
85
/* Native register description structure */
86
struct etna_native_reg {
87
unsigned valid : 1;
88
unsigned is_tex : 1; /* is texture unit, overrides rgroup */
89
unsigned rgroup : 3;
90
unsigned id : 9;
91
};
92
93
/* Register description */
94
struct etna_reg_desc {
95
enum tgsi_file_type file; /* IN, OUT, TEMP, ... */
96
int idx; /* index into file */
97
bool active; /* used in program */
98
int first_use; /* instruction id of first use (scope begin) */
99
int last_use; /* instruction id of last use (scope end, inclusive) */
100
101
struct etna_native_reg native; /* native register to map to */
102
unsigned usage_mask : 4; /* usage, per channel */
103
bool has_semantic; /* register has associated TGSI semantic */
104
struct tgsi_declaration_semantic semantic; /* TGSI semantic */
105
struct tgsi_declaration_interp interp; /* Interpolation type */
106
};
107
108
/* Label information structure */
109
struct etna_compile_label {
110
int inst_idx; /* Instruction id that label points to */
111
};
112
113
enum etna_compile_frame_type {
114
ETNA_COMPILE_FRAME_IF, /* IF/ELSE/ENDIF */
115
ETNA_COMPILE_FRAME_LOOP,
116
};
117
118
/* nesting scope frame (LOOP, IF, ...) during compilation
119
*/
120
struct etna_compile_frame {
121
enum etna_compile_frame_type type;
122
int lbl_else_idx;
123
int lbl_endif_idx;
124
int lbl_loop_bgn_idx;
125
int lbl_loop_end_idx;
126
};
127
128
struct etna_compile_file {
129
/* Number of registers in each TGSI file (max register+1) */
130
size_t reg_size;
131
/* Register descriptions, per register index */
132
struct etna_reg_desc *reg;
133
};
134
135
#define array_insert(arr, val) \
136
do { \
137
if (arr##_count == arr##_sz) { \
138
arr##_sz = MAX2(2 * arr##_sz, 16); \
139
arr = realloc(arr, arr##_sz * sizeof(arr[0])); \
140
} \
141
arr[arr##_count++] = val; \
142
} while (0)
143
144
145
/* scratch area for compiling shader, freed after compilation finishes */
146
struct etna_compile {
147
const struct tgsi_token *tokens;
148
bool free_tokens;
149
150
struct tgsi_shader_info info;
151
152
/* Register descriptions, per TGSI file, per register index */
153
struct etna_compile_file file[TGSI_FILE_COUNT];
154
155
/* Keep track of TGSI register declarations */
156
struct etna_reg_desc decl[ETNA_MAX_DECL];
157
uint total_decls;
158
159
/* Bitmap of dead instructions which are removed in a separate pass */
160
bool dead_inst[ETNA_MAX_TOKENS];
161
162
/* Immediate data */
163
enum etna_uniform_contents imm_contents[ETNA_MAX_IMM];
164
uint32_t imm_data[ETNA_MAX_IMM];
165
uint32_t imm_base; /* base of immediates (in 32 bit units) */
166
uint32_t imm_size; /* size of immediates (in 32 bit units) */
167
168
/* Next free native register, for register allocation */
169
uint32_t next_free_native;
170
171
/* Temporary register for use within translated TGSI instruction,
172
* only allocated when needed.
173
*/
174
int inner_temps; /* number of inner temps used; only up to one available at
175
this point */
176
struct etna_native_reg inner_temp[ETNA_MAX_INNER_TEMPS];
177
178
/* Fields for handling nested conditionals */
179
struct etna_compile_frame frame_stack[ETNA_MAX_DEPTH];
180
int frame_sp;
181
int lbl_usage[ETNA_MAX_INSTRUCTIONS];
182
183
unsigned labels_count, labels_sz;
184
struct etna_compile_label *labels;
185
186
unsigned num_loops;
187
188
/* Code generation */
189
int inst_ptr; /* current instruction pointer */
190
uint32_t code[ETNA_MAX_INSTRUCTIONS * ETNA_INST_SIZE];
191
192
/* I/O */
193
194
/* Number of varyings (PS only) */
195
int num_varyings;
196
197
/* GPU hardware specs */
198
const struct etna_specs *specs;
199
200
const struct etna_shader_key *key;
201
};
202
203
static struct etna_reg_desc *
204
etna_get_dst_reg(struct etna_compile *c, struct tgsi_dst_register dst)
205
{
206
return &c->file[dst.File].reg[dst.Index];
207
}
208
209
static struct etna_reg_desc *
210
etna_get_src_reg(struct etna_compile *c, struct tgsi_src_register src)
211
{
212
return &c->file[src.File].reg[src.Index];
213
}
214
215
static struct etna_native_reg
216
etna_native_temp(unsigned reg)
217
{
218
return (struct etna_native_reg) {
219
.valid = 1,
220
.rgroup = INST_RGROUP_TEMP,
221
.id = reg
222
};
223
}
224
225
static struct etna_native_reg
226
etna_native_internal(unsigned reg)
227
{
228
return (struct etna_native_reg) {
229
.valid = 1,
230
.rgroup = INST_RGROUP_INTERNAL,
231
.id = reg
232
};
233
}
234
235
/** Register allocation **/
236
enum reg_sort_order {
237
FIRST_USE_ASC,
238
FIRST_USE_DESC,
239
LAST_USE_ASC,
240
LAST_USE_DESC
241
};
242
243
/* Augmented register description for sorting */
244
struct sort_rec {
245
struct etna_reg_desc *ptr;
246
int key;
247
};
248
249
static int
250
sort_rec_compar(const struct sort_rec *a, const struct sort_rec *b)
251
{
252
if (a->key < b->key)
253
return -1;
254
255
if (a->key > b->key)
256
return 1;
257
258
return 0;
259
}
260
261
/* create an index on a register set based on certain criteria. */
262
static int
263
sort_registers(struct sort_rec *sorted, struct etna_compile_file *file,
264
enum reg_sort_order so)
265
{
266
struct etna_reg_desc *regs = file->reg;
267
int ptr = 0;
268
269
/* pre-populate keys from active registers */
270
for (int idx = 0; idx < file->reg_size; ++idx) {
271
/* only interested in active registers now; will only assign inactive ones
272
* if no space in active ones */
273
if (regs[idx].active) {
274
sorted[ptr].ptr = &regs[idx];
275
276
switch (so) {
277
case FIRST_USE_ASC:
278
sorted[ptr].key = regs[idx].first_use;
279
break;
280
case LAST_USE_ASC:
281
sorted[ptr].key = regs[idx].last_use;
282
break;
283
case FIRST_USE_DESC:
284
sorted[ptr].key = -regs[idx].first_use;
285
break;
286
case LAST_USE_DESC:
287
sorted[ptr].key = -regs[idx].last_use;
288
break;
289
}
290
ptr++;
291
}
292
}
293
294
/* sort index by key */
295
qsort(sorted, ptr, sizeof(struct sort_rec),
296
(int (*)(const void *, const void *))sort_rec_compar);
297
298
return ptr;
299
}
300
301
/* Allocate a new, unused, native temp register */
302
static struct etna_native_reg
303
alloc_new_native_reg(struct etna_compile *c)
304
{
305
assert(c->next_free_native < ETNA_MAX_TEMPS);
306
return etna_native_temp(c->next_free_native++);
307
}
308
309
/* assign TEMPs to native registers */
310
static void
311
assign_temporaries_to_native(struct etna_compile *c,
312
struct etna_compile_file *file)
313
{
314
struct etna_reg_desc *temps = file->reg;
315
316
for (int idx = 0; idx < file->reg_size; ++idx)
317
temps[idx].native = alloc_new_native_reg(c);
318
}
319
320
/* assign inputs and outputs to temporaries
321
* Gallium assumes that the hardware has separate registers for taking input and
322
* output, however Vivante GPUs use temporaries both for passing in inputs and
323
* passing back outputs.
324
* Try to re-use temporary registers where possible. */
325
static void
326
assign_inouts_to_temporaries(struct etna_compile *c, uint file)
327
{
328
bool mode_inputs = (file == TGSI_FILE_INPUT);
329
int inout_ptr = 0, num_inouts;
330
int temp_ptr = 0, num_temps;
331
struct sort_rec inout_order[ETNA_MAX_TEMPS];
332
struct sort_rec temps_order[ETNA_MAX_TEMPS];
333
num_inouts = sort_registers(inout_order, &c->file[file],
334
mode_inputs ? LAST_USE_ASC : FIRST_USE_ASC);
335
num_temps = sort_registers(temps_order, &c->file[TGSI_FILE_TEMPORARY],
336
mode_inputs ? FIRST_USE_ASC : LAST_USE_ASC);
337
338
while (inout_ptr < num_inouts && temp_ptr < num_temps) {
339
struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
340
struct etna_reg_desc *temp = temps_order[temp_ptr].ptr;
341
342
if (!inout->active || inout->native.valid) { /* Skip if already a native register assigned */
343
inout_ptr++;
344
continue;
345
}
346
347
/* last usage of this input is before or in same instruction of first use
348
* of temporary? */
349
if (mode_inputs ? (inout->last_use <= temp->first_use)
350
: (inout->first_use >= temp->last_use)) {
351
/* assign it and advance to next input */
352
inout->native = temp->native;
353
inout_ptr++;
354
}
355
356
temp_ptr++;
357
}
358
359
/* if we couldn't reuse current ones, allocate new temporaries */
360
for (inout_ptr = 0; inout_ptr < num_inouts; ++inout_ptr) {
361
struct etna_reg_desc *inout = inout_order[inout_ptr].ptr;
362
363
if (inout->active && !inout->native.valid)
364
inout->native = alloc_new_native_reg(c);
365
}
366
}
367
368
/* Allocate an immediate with a certain value and return the index. If
369
* there is already an immediate with that value, return that.
370
*/
371
static struct etna_inst_src
372
alloc_imm(struct etna_compile *c, enum etna_uniform_contents contents,
373
uint32_t value)
374
{
375
int idx;
376
377
/* Could use a hash table to speed this up */
378
for (idx = 0; idx < c->imm_size; ++idx) {
379
if (c->imm_contents[idx] == contents && c->imm_data[idx] == value)
380
break;
381
}
382
383
/* look if there is an unused slot */
384
if (idx == c->imm_size) {
385
for (idx = 0; idx < c->imm_size; ++idx) {
386
if (c->imm_contents[idx] == ETNA_UNIFORM_UNUSED)
387
break;
388
}
389
}
390
391
/* allocate new immediate */
392
if (idx == c->imm_size) {
393
assert(c->imm_size < ETNA_MAX_IMM);
394
idx = c->imm_size++;
395
c->imm_data[idx] = value;
396
c->imm_contents[idx] = contents;
397
}
398
399
/* swizzle so that component with value is returned in all components */
400
idx += c->imm_base;
401
struct etna_inst_src imm_src = {
402
.use = 1,
403
.rgroup = INST_RGROUP_UNIFORM_0,
404
.reg = idx / 4,
405
.swiz = INST_SWIZ_BROADCAST(idx & 3)
406
};
407
408
return imm_src;
409
}
410
411
static struct etna_inst_src
412
alloc_imm_u32(struct etna_compile *c, uint32_t value)
413
{
414
return alloc_imm(c, ETNA_UNIFORM_CONSTANT, value);
415
}
416
417
static struct etna_inst_src
418
alloc_imm_vec4u(struct etna_compile *c, enum etna_uniform_contents contents,
419
const uint32_t *values)
420
{
421
struct etna_inst_src imm_src = { };
422
int idx, i;
423
424
for (idx = 0; idx + 3 < c->imm_size; idx += 4) {
425
/* What if we can use a uniform with a different swizzle? */
426
for (i = 0; i < 4; i++)
427
if (c->imm_contents[idx + i] != contents || c->imm_data[idx + i] != values[i])
428
break;
429
if (i == 4)
430
break;
431
}
432
433
if (idx + 3 >= c->imm_size) {
434
idx = align(c->imm_size, 4);
435
assert(idx + 4 <= ETNA_MAX_IMM);
436
437
for (i = 0; i < 4; i++) {
438
c->imm_data[idx + i] = values[i];
439
c->imm_contents[idx + i] = contents;
440
}
441
442
c->imm_size = idx + 4;
443
}
444
445
assert((c->imm_base & 3) == 0);
446
idx += c->imm_base;
447
imm_src.use = 1;
448
imm_src.rgroup = INST_RGROUP_UNIFORM_0;
449
imm_src.reg = idx / 4;
450
imm_src.swiz = INST_SWIZ_IDENTITY;
451
452
return imm_src;
453
}
454
455
static uint32_t
456
get_imm_u32(struct etna_compile *c, const struct etna_inst_src *imm,
457
unsigned swiz_idx)
458
{
459
assert(imm->use == 1 && imm->rgroup == INST_RGROUP_UNIFORM_0);
460
unsigned int idx = imm->reg * 4 + ((imm->swiz >> (swiz_idx * 2)) & 3);
461
462
return c->imm_data[idx];
463
}
464
465
/* Allocate immediate with a certain float value. If there is already an
466
* immediate with that value, return that.
467
*/
468
static struct etna_inst_src
469
alloc_imm_f32(struct etna_compile *c, float value)
470
{
471
return alloc_imm_u32(c, fui(value));
472
}
473
474
static struct etna_inst_src
475
etna_imm_vec4f(struct etna_compile *c, const float *vec4)
476
{
477
uint32_t val[4];
478
479
for (int i = 0; i < 4; i++)
480
val[i] = fui(vec4[i]);
481
482
return alloc_imm_vec4u(c, ETNA_UNIFORM_CONSTANT, val);
483
}
484
485
/* Pass -- check register file declarations and immediates */
486
static void
487
etna_compile_parse_declarations(struct etna_compile *c)
488
{
489
struct tgsi_parse_context ctx = { };
490
ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
491
assert(status == TGSI_PARSE_OK);
492
493
while (!tgsi_parse_end_of_tokens(&ctx)) {
494
tgsi_parse_token(&ctx);
495
496
switch (ctx.FullToken.Token.Type) {
497
case TGSI_TOKEN_TYPE_IMMEDIATE: {
498
/* immediates are handled differently from other files; they are
499
* not declared explicitly, and always add four components */
500
const struct tgsi_full_immediate *imm = &ctx.FullToken.FullImmediate;
501
assert(c->imm_size <= (ETNA_MAX_IMM - 4));
502
503
for (int i = 0; i < 4; ++i) {
504
unsigned idx = c->imm_size++;
505
506
c->imm_data[idx] = imm->u[i].Uint;
507
c->imm_contents[idx] = ETNA_UNIFORM_CONSTANT;
508
}
509
}
510
break;
511
}
512
}
513
514
tgsi_parse_free(&ctx);
515
}
516
517
/* Allocate register declarations for the registers in all register files */
518
static void
519
etna_allocate_decls(struct etna_compile *c)
520
{
521
uint idx = 0;
522
523
for (int x = 0; x < TGSI_FILE_COUNT; ++x) {
524
c->file[x].reg = &c->decl[idx];
525
c->file[x].reg_size = c->info.file_max[x] + 1;
526
527
for (int sub = 0; sub < c->file[x].reg_size; ++sub) {
528
c->decl[idx].file = x;
529
c->decl[idx].idx = sub;
530
idx++;
531
}
532
}
533
534
c->total_decls = idx;
535
}
536
537
/* Pass -- check and record usage of temporaries, inputs, outputs */
538
static void
539
etna_compile_pass_check_usage(struct etna_compile *c)
540
{
541
struct tgsi_parse_context ctx = { };
542
ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
543
assert(status == TGSI_PARSE_OK);
544
545
for (int idx = 0; idx < c->total_decls; ++idx) {
546
c->decl[idx].active = false;
547
c->decl[idx].first_use = c->decl[idx].last_use = -1;
548
}
549
550
int inst_idx = 0;
551
while (!tgsi_parse_end_of_tokens(&ctx)) {
552
tgsi_parse_token(&ctx);
553
/* find out max register #s used
554
* For every register mark first and last instruction index where it's
555
* used this allows finding ranges where the temporary can be borrowed
556
* as input and/or output register
557
*
558
* XXX in the case of loops this needs special care, or even be completely
559
* disabled, as
560
* the last usage of a register inside a loop means it can still be used
561
* on next loop
562
* iteration (execution is no longer * chronological). The register can
563
* only be
564
* declared "free" after the loop finishes.
565
*
566
* Same for inputs: the first usage of a register inside a loop doesn't
567
* mean that the register
568
* won't have been overwritten in previous iteration. The register can
569
* only be declared free before the loop
570
* starts.
571
* The proper way would be to do full dominator / post-dominator analysis
572
* (especially with more complicated
573
* control flow such as direct branch instructions) but not for now...
574
*/
575
switch (ctx.FullToken.Token.Type) {
576
case TGSI_TOKEN_TYPE_DECLARATION: {
577
/* Declaration: fill in file details */
578
const struct tgsi_full_declaration *decl = &ctx.FullToken.FullDeclaration;
579
struct etna_compile_file *file = &c->file[decl->Declaration.File];
580
581
for (int idx = decl->Range.First; idx <= decl->Range.Last; ++idx) {
582
file->reg[idx].usage_mask = 0; // we'll compute this ourselves
583
file->reg[idx].has_semantic = decl->Declaration.Semantic;
584
file->reg[idx].semantic = decl->Semantic;
585
file->reg[idx].interp = decl->Interp;
586
}
587
} break;
588
case TGSI_TOKEN_TYPE_INSTRUCTION: {
589
/* Instruction: iterate over operands of instruction */
590
const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
591
592
/* iterate over destination registers */
593
for (int idx = 0; idx < inst->Instruction.NumDstRegs; ++idx) {
594
struct etna_reg_desc *reg_desc = &c->file[inst->Dst[idx].Register.File].reg[inst->Dst[idx].Register.Index];
595
596
if (reg_desc->first_use == -1)
597
reg_desc->first_use = inst_idx;
598
599
reg_desc->last_use = inst_idx;
600
reg_desc->active = true;
601
}
602
603
/* iterate over source registers */
604
for (int idx = 0; idx < inst->Instruction.NumSrcRegs; ++idx) {
605
struct etna_reg_desc *reg_desc = &c->file[inst->Src[idx].Register.File].reg[inst->Src[idx].Register.Index];
606
607
if (reg_desc->first_use == -1)
608
reg_desc->first_use = inst_idx;
609
610
reg_desc->last_use = inst_idx;
611
reg_desc->active = true;
612
/* accumulate usage mask for register, this is used to determine how
613
* many slots for varyings
614
* should be allocated */
615
reg_desc->usage_mask |= tgsi_util_get_inst_usage_mask(inst, idx);
616
}
617
inst_idx += 1;
618
} break;
619
default:
620
break;
621
}
622
}
623
624
tgsi_parse_free(&ctx);
625
}
626
627
/* assign inputs that need to be assigned to specific registers */
628
static void
629
assign_special_inputs(struct etna_compile *c)
630
{
631
if (c->info.processor == PIPE_SHADER_FRAGMENT) {
632
/* never assign t0 as it is the position output, start assigning at t1 */
633
c->next_free_native = 1;
634
635
for (int idx = 0; idx < c->total_decls; ++idx) {
636
struct etna_reg_desc *reg = &c->decl[idx];
637
638
if (!reg->active)
639
continue;
640
641
/* hardwire TGSI_SEMANTIC_POSITION (input and output) to t0 */
642
if (reg->semantic.Name == TGSI_SEMANTIC_POSITION)
643
reg->native = etna_native_temp(0);
644
645
/* hardwire TGSI_SEMANTIC_FACE to i0 */
646
if (reg->semantic.Name == TGSI_SEMANTIC_FACE)
647
reg->native = etna_native_internal(0);
648
}
649
}
650
}
651
652
/* Check that a move instruction does not swizzle any of the components
653
* that it writes.
654
*/
655
static bool
656
etna_mov_check_no_swizzle(const struct tgsi_dst_register dst,
657
const struct tgsi_src_register src)
658
{
659
return (!(dst.WriteMask & TGSI_WRITEMASK_X) || src.SwizzleX == TGSI_SWIZZLE_X) &&
660
(!(dst.WriteMask & TGSI_WRITEMASK_Y) || src.SwizzleY == TGSI_SWIZZLE_Y) &&
661
(!(dst.WriteMask & TGSI_WRITEMASK_Z) || src.SwizzleZ == TGSI_SWIZZLE_Z) &&
662
(!(dst.WriteMask & TGSI_WRITEMASK_W) || src.SwizzleW == TGSI_SWIZZLE_W);
663
}
664
665
/* Pass -- optimize outputs
666
* Mesa tends to generate code like this at the end if their shaders
667
* MOV OUT[1], TEMP[2]
668
* MOV OUT[0], TEMP[0]
669
* MOV OUT[2], TEMP[1]
670
* Recognize if
671
* a) there is only a single assignment to an output register and
672
* b) the temporary is not used after that
673
* Also recognize direct assignment of IN to OUT (passthrough)
674
**/
675
static void
676
etna_compile_pass_optimize_outputs(struct etna_compile *c)
677
{
678
struct tgsi_parse_context ctx = { };
679
int inst_idx = 0;
680
ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
681
assert(status == TGSI_PARSE_OK);
682
683
while (!tgsi_parse_end_of_tokens(&ctx)) {
684
tgsi_parse_token(&ctx);
685
686
switch (ctx.FullToken.Token.Type) {
687
case TGSI_TOKEN_TYPE_INSTRUCTION: {
688
const struct tgsi_full_instruction *inst = &ctx.FullToken.FullInstruction;
689
690
/* iterate over operands */
691
switch (inst->Instruction.Opcode) {
692
case TGSI_OPCODE_MOV: {
693
/* We are only interested in eliminating MOVs which write to
694
* the shader outputs. Test for this early. */
695
if (inst->Dst[0].Register.File != TGSI_FILE_OUTPUT)
696
break;
697
/* Elimination of a MOV must have no visible effect on the
698
* resulting shader: this means the MOV must not swizzle or
699
* saturate, and its source must not have the negate or
700
* absolute modifiers. */
701
if (!etna_mov_check_no_swizzle(inst->Dst[0].Register, inst->Src[0].Register) ||
702
inst->Instruction.Saturate || inst->Src[0].Register.Negate ||
703
inst->Src[0].Register.Absolute)
704
break;
705
706
uint out_idx = inst->Dst[0].Register.Index;
707
uint in_idx = inst->Src[0].Register.Index;
708
/* assignment of temporary to output --
709
* and the output doesn't yet have a native register assigned
710
* and the last use of the temporary is this instruction
711
* and the MOV does not do a swizzle
712
*/
713
if (inst->Src[0].Register.File == TGSI_FILE_TEMPORARY &&
714
!c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
715
c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use == inst_idx) {
716
c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
717
c->file[TGSI_FILE_TEMPORARY].reg[in_idx].native;
718
/* prevent temp from being re-used for the rest of the shader */
719
c->file[TGSI_FILE_TEMPORARY].reg[in_idx].last_use = ETNA_MAX_TOKENS;
720
/* mark this MOV instruction as a no-op */
721
c->dead_inst[inst_idx] = true;
722
}
723
/* direct assignment of input to output --
724
* and the input or output doesn't yet have a native register
725
* assigned
726
* and the output is only used in this instruction,
727
* allocate a new register, and associate both input and output to
728
* it
729
* and the MOV does not do a swizzle
730
*/
731
if (inst->Src[0].Register.File == TGSI_FILE_INPUT &&
732
!c->file[TGSI_FILE_INPUT].reg[in_idx].native.valid &&
733
!c->file[TGSI_FILE_OUTPUT].reg[out_idx].native.valid &&
734
c->file[TGSI_FILE_OUTPUT].reg[out_idx].last_use == inst_idx &&
735
c->file[TGSI_FILE_OUTPUT].reg[out_idx].first_use == inst_idx) {
736
c->file[TGSI_FILE_OUTPUT].reg[out_idx].native =
737
c->file[TGSI_FILE_INPUT].reg[in_idx].native =
738
alloc_new_native_reg(c);
739
/* mark this MOV instruction as a no-op */
740
c->dead_inst[inst_idx] = true;
741
}
742
} break;
743
default:;
744
}
745
inst_idx += 1;
746
} break;
747
}
748
}
749
750
tgsi_parse_free(&ctx);
751
}
752
753
/* Get a temporary to be used within one TGSI instruction.
754
* The first time that this function is called the temporary will be allocated.
755
* Each call to this function will return the same temporary.
756
*/
757
static struct etna_native_reg
758
etna_compile_get_inner_temp(struct etna_compile *c)
759
{
760
int inner_temp = c->inner_temps;
761
762
if (inner_temp < ETNA_MAX_INNER_TEMPS) {
763
if (!c->inner_temp[inner_temp].valid)
764
c->inner_temp[inner_temp] = alloc_new_native_reg(c);
765
766
/* alloc_new_native_reg() handles lack of registers */
767
c->inner_temps += 1;
768
} else {
769
BUG("Too many inner temporaries (%i) requested in one instruction",
770
inner_temp + 1);
771
}
772
773
return c->inner_temp[inner_temp];
774
}
775
776
static struct etna_inst_dst
777
etna_native_to_dst(struct etna_native_reg native, unsigned comps)
778
{
779
/* Can only assign to temporaries */
780
assert(native.valid && !native.is_tex && native.rgroup == INST_RGROUP_TEMP);
781
782
struct etna_inst_dst rv = {
783
.write_mask = comps,
784
.use = 1,
785
.reg = native.id,
786
};
787
788
return rv;
789
}
790
791
static struct etna_inst_src
792
etna_native_to_src(struct etna_native_reg native, uint32_t swizzle)
793
{
794
assert(native.valid && !native.is_tex);
795
796
struct etna_inst_src rv = {
797
.use = 1,
798
.swiz = swizzle,
799
.rgroup = native.rgroup,
800
.reg = native.id,
801
.amode = INST_AMODE_DIRECT,
802
};
803
804
return rv;
805
}
806
807
static inline struct etna_inst_src
808
negate(struct etna_inst_src src)
809
{
810
src.neg = !src.neg;
811
812
return src;
813
}
814
815
static inline struct etna_inst_src
816
absolute(struct etna_inst_src src)
817
{
818
src.abs = 1;
819
820
return src;
821
}
822
823
static inline struct etna_inst_src
824
swizzle(struct etna_inst_src src, unsigned swizzle)
825
{
826
src.swiz = inst_swiz_compose(src.swiz, swizzle);
827
828
return src;
829
}
830
831
/* Emit instruction and append it to program */
832
static void
833
emit_inst(struct etna_compile *c, struct etna_inst *inst)
834
{
835
assert(c->inst_ptr <= ETNA_MAX_INSTRUCTIONS);
836
837
/* Check for uniform conflicts (each instruction can only access one
838
* uniform),
839
* if detected, use an intermediate temporary */
840
unsigned uni_rgroup = -1;
841
unsigned uni_reg = -1;
842
843
for (int src = 0; src < ETNA_NUM_SRC; ++src) {
844
if (inst->src[src].rgroup == INST_RGROUP_INTERNAL &&
845
c->info.processor == PIPE_SHADER_FRAGMENT &&
846
c->key->front_ccw) {
847
struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
848
849
/*
850
* Set temporary register to 0.0 or 1.0 based on the gl_FrontFacing
851
* configuration (CW or CCW).
852
*/
853
etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
854
.opcode = INST_OPCODE_SET,
855
.cond = INST_CONDITION_NE,
856
.dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
857
INST_COMPS_Z | INST_COMPS_W),
858
.src[0] = inst->src[src],
859
.src[1] = alloc_imm_f32(c, 1.0f)
860
});
861
c->inst_ptr++;
862
863
/* Modify instruction to use temp register instead of uniform */
864
inst->src[src].use = 1;
865
inst->src[src].rgroup = INST_RGROUP_TEMP;
866
inst->src[src].reg = inner_temp.id;
867
inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
868
inst->src[src].neg = 0; /* negation happens on MOV */
869
inst->src[src].abs = 0; /* abs happens on MOV */
870
inst->src[src].amode = 0; /* amode effects happen on MOV */
871
} else if (etna_rgroup_is_uniform(inst->src[src].rgroup)) {
872
if (uni_reg == -1) { /* first unique uniform used */
873
uni_rgroup = inst->src[src].rgroup;
874
uni_reg = inst->src[src].reg;
875
} else { /* second or later; check that it is a re-use */
876
if (uni_rgroup != inst->src[src].rgroup ||
877
uni_reg != inst->src[src].reg) {
878
DBG_F(ETNA_DBG_COMPILER_MSGS, "perf warning: instruction that "
879
"accesses different uniforms, "
880
"need to generate extra MOV");
881
struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
882
883
/* Generate move instruction to temporary */
884
etna_assemble(&c->code[c->inst_ptr * 4], &(struct etna_inst) {
885
.opcode = INST_OPCODE_MOV,
886
.dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y |
887
INST_COMPS_Z | INST_COMPS_W),
888
.src[2] = inst->src[src]
889
});
890
891
c->inst_ptr++;
892
893
/* Modify instruction to use temp register instead of uniform */
894
inst->src[src].use = 1;
895
inst->src[src].rgroup = INST_RGROUP_TEMP;
896
inst->src[src].reg = inner_temp.id;
897
inst->src[src].swiz = INST_SWIZ_IDENTITY; /* swizzling happens on MOV */
898
inst->src[src].neg = 0; /* negation happens on MOV */
899
inst->src[src].abs = 0; /* abs happens on MOV */
900
inst->src[src].amode = 0; /* amode effects happen on MOV */
901
}
902
}
903
}
904
}
905
906
/* Finally assemble the actual instruction */
907
etna_assemble(&c->code[c->inst_ptr * 4], inst);
908
c->inst_ptr++;
909
}
910
911
static unsigned int
912
etna_amode(struct tgsi_ind_register indirect)
913
{
914
assert(indirect.File == TGSI_FILE_ADDRESS);
915
assert(indirect.Index == 0);
916
917
switch (indirect.Swizzle) {
918
case TGSI_SWIZZLE_X:
919
return INST_AMODE_ADD_A_X;
920
case TGSI_SWIZZLE_Y:
921
return INST_AMODE_ADD_A_Y;
922
case TGSI_SWIZZLE_Z:
923
return INST_AMODE_ADD_A_Z;
924
case TGSI_SWIZZLE_W:
925
return INST_AMODE_ADD_A_W;
926
default:
927
assert(!"Invalid swizzle");
928
}
929
930
unreachable("bad swizzle");
931
}
932
933
/* convert destination operand */
934
static struct etna_inst_dst
935
convert_dst(struct etna_compile *c, const struct tgsi_full_dst_register *in)
936
{
937
struct etna_inst_dst rv = {
938
/// XXX .amode
939
.write_mask = in->Register.WriteMask,
940
};
941
942
if (in->Register.File == TGSI_FILE_ADDRESS) {
943
assert(in->Register.Index == 0);
944
rv.reg = in->Register.Index;
945
rv.use = 0;
946
} else {
947
rv = etna_native_to_dst(etna_get_dst_reg(c, in->Register)->native,
948
in->Register.WriteMask);
949
}
950
951
if (in->Register.Indirect)
952
rv.amode = etna_amode(in->Indirect);
953
954
return rv;
955
}
956
957
/* convert texture operand */
958
static struct etna_inst_tex
959
convert_tex(struct etna_compile *c, const struct tgsi_full_src_register *in,
960
const struct tgsi_instruction_texture *tex)
961
{
962
struct etna_native_reg native_reg = etna_get_src_reg(c, in->Register)->native;
963
struct etna_inst_tex rv = {
964
// XXX .amode (to allow for an array of samplers?)
965
.swiz = INST_SWIZ_IDENTITY
966
};
967
968
assert(native_reg.is_tex && native_reg.valid);
969
rv.id = native_reg.id;
970
971
return rv;
972
}
973
974
/* convert source operand */
975
static struct etna_inst_src
976
etna_create_src(const struct tgsi_full_src_register *tgsi,
977
const struct etna_native_reg *native)
978
{
979
const struct tgsi_src_register *reg = &tgsi->Register;
980
struct etna_inst_src rv = {
981
.use = 1,
982
.swiz = INST_SWIZ(reg->SwizzleX, reg->SwizzleY, reg->SwizzleZ, reg->SwizzleW),
983
.neg = reg->Negate,
984
.abs = reg->Absolute,
985
.rgroup = native->rgroup,
986
.reg = native->id,
987
.amode = INST_AMODE_DIRECT,
988
};
989
990
assert(native->valid && !native->is_tex);
991
992
if (reg->Indirect)
993
rv.amode = etna_amode(tgsi->Indirect);
994
995
return rv;
996
}
997
998
static struct etna_inst_src
999
etna_mov_src_to_temp(struct etna_compile *c, struct etna_inst_src src,
1000
struct etna_native_reg temp)
1001
{
1002
struct etna_inst mov = { };
1003
1004
mov.opcode = INST_OPCODE_MOV;
1005
mov.sat = 0;
1006
mov.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1007
INST_COMPS_Z | INST_COMPS_W);
1008
mov.src[2] = src;
1009
emit_inst(c, &mov);
1010
1011
src.swiz = INST_SWIZ_IDENTITY;
1012
src.neg = src.abs = 0;
1013
src.rgroup = temp.rgroup;
1014
src.reg = temp.id;
1015
1016
return src;
1017
}
1018
1019
static struct etna_inst_src
1020
etna_mov_src(struct etna_compile *c, struct etna_inst_src src)
1021
{
1022
struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1023
1024
return etna_mov_src_to_temp(c, src, temp);
1025
}
1026
1027
static bool
1028
etna_src_uniforms_conflict(struct etna_inst_src a, struct etna_inst_src b)
1029
{
1030
return etna_rgroup_is_uniform(a.rgroup) &&
1031
etna_rgroup_is_uniform(b.rgroup) &&
1032
(a.rgroup != b.rgroup || a.reg != b.reg);
1033
}
1034
1035
/* create a new label */
1036
static unsigned int
1037
alloc_new_label(struct etna_compile *c)
1038
{
1039
struct etna_compile_label label = {
1040
.inst_idx = -1, /* start by point to no specific instruction */
1041
};
1042
1043
array_insert(c->labels, label);
1044
1045
return c->labels_count - 1;
1046
}
1047
1048
/* place label at current instruction pointer */
1049
static void
1050
label_place(struct etna_compile *c, struct etna_compile_label *label)
1051
{
1052
label->inst_idx = c->inst_ptr;
1053
}
1054
1055
/* mark label use at current instruction.
1056
* target of the label will be filled in in the marked instruction's src2.imm
1057
* slot as soon
1058
* as the value becomes known.
1059
*/
1060
static void
1061
label_mark_use(struct etna_compile *c, int lbl_idx)
1062
{
1063
assert(c->inst_ptr < ETNA_MAX_INSTRUCTIONS);
1064
c->lbl_usage[c->inst_ptr] = lbl_idx;
1065
}
1066
1067
/* walk the frame stack and return first frame with matching type */
1068
static struct etna_compile_frame *
1069
find_frame(struct etna_compile *c, enum etna_compile_frame_type type)
1070
{
1071
for (int sp = c->frame_sp; sp >= 0; sp--)
1072
if (c->frame_stack[sp].type == type)
1073
return &c->frame_stack[sp];
1074
1075
assert(0);
1076
return NULL;
1077
}
1078
1079
struct instr_translater {
1080
void (*fxn)(const struct instr_translater *t, struct etna_compile *c,
1081
const struct tgsi_full_instruction *inst,
1082
struct etna_inst_src *src);
1083
unsigned tgsi_opc;
1084
uint8_t opc;
1085
1086
/* tgsi src -> etna src swizzle */
1087
int src[3];
1088
1089
unsigned cond;
1090
};
1091
1092
static void
1093
trans_instr(const struct instr_translater *t, struct etna_compile *c,
1094
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1095
{
1096
const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst->Instruction.Opcode);
1097
struct etna_inst instr = { };
1098
1099
instr.opcode = t->opc;
1100
instr.cond = t->cond;
1101
instr.sat = inst->Instruction.Saturate;
1102
1103
assert(info->num_dst <= 1);
1104
if (info->num_dst)
1105
instr.dst = convert_dst(c, &inst->Dst[0]);
1106
1107
assert(info->num_src <= ETNA_NUM_SRC);
1108
1109
for (unsigned i = 0; i < info->num_src; i++) {
1110
int swizzle = t->src[i];
1111
1112
assert(swizzle != -1);
1113
instr.src[swizzle] = src[i];
1114
}
1115
1116
emit_inst(c, &instr);
1117
}
1118
1119
static void
1120
trans_min_max(const struct instr_translater *t, struct etna_compile *c,
1121
const struct tgsi_full_instruction *inst,
1122
struct etna_inst_src *src)
1123
{
1124
emit_inst(c, &(struct etna_inst) {
1125
.opcode = INST_OPCODE_SELECT,
1126
.cond = t->cond,
1127
.sat = inst->Instruction.Saturate,
1128
.dst = convert_dst(c, &inst->Dst[0]),
1129
.src[0] = src[0],
1130
.src[1] = src[1],
1131
.src[2] = src[0],
1132
});
1133
}
1134
1135
static void
1136
trans_if(const struct instr_translater *t, struct etna_compile *c,
1137
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1138
{
1139
struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1140
struct etna_inst_src imm_0 = alloc_imm_f32(c, 0.0f);
1141
1142
/* push IF to stack */
1143
f->type = ETNA_COMPILE_FRAME_IF;
1144
/* create "else" label */
1145
f->lbl_else_idx = alloc_new_label(c);
1146
f->lbl_endif_idx = -1;
1147
1148
/* We need to avoid the emit_inst() below becoming two instructions */
1149
if (etna_src_uniforms_conflict(src[0], imm_0))
1150
src[0] = etna_mov_src(c, src[0]);
1151
1152
/* mark position in instruction stream of label reference so that it can be
1153
* filled in in next pass */
1154
label_mark_use(c, f->lbl_else_idx);
1155
1156
/* create conditional branch to label if src0 EQ 0 */
1157
emit_inst(c, &(struct etna_inst){
1158
.opcode = INST_OPCODE_BRANCH,
1159
.cond = INST_CONDITION_EQ,
1160
.src[0] = src[0],
1161
.src[1] = imm_0,
1162
/* imm is filled in later */
1163
});
1164
}
1165
1166
static void
1167
trans_else(const struct instr_translater *t, struct etna_compile *c,
1168
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1169
{
1170
assert(c->frame_sp > 0);
1171
struct etna_compile_frame *f = &c->frame_stack[c->frame_sp - 1];
1172
assert(f->type == ETNA_COMPILE_FRAME_IF);
1173
1174
/* create "endif" label, and branch to endif label */
1175
f->lbl_endif_idx = alloc_new_label(c);
1176
label_mark_use(c, f->lbl_endif_idx);
1177
emit_inst(c, &(struct etna_inst) {
1178
.opcode = INST_OPCODE_BRANCH,
1179
.cond = INST_CONDITION_TRUE,
1180
/* imm is filled in later */
1181
});
1182
1183
/* mark "else" label at this position in instruction stream */
1184
label_place(c, &c->labels[f->lbl_else_idx]);
1185
}
1186
1187
static void
1188
trans_endif(const struct instr_translater *t, struct etna_compile *c,
1189
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1190
{
1191
assert(c->frame_sp > 0);
1192
struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1193
assert(f->type == ETNA_COMPILE_FRAME_IF);
1194
1195
/* assign "endif" or "else" (if no ELSE) label to current position in
1196
* instruction stream, pop IF */
1197
if (f->lbl_endif_idx != -1)
1198
label_place(c, &c->labels[f->lbl_endif_idx]);
1199
else
1200
label_place(c, &c->labels[f->lbl_else_idx]);
1201
}
1202
1203
static void
1204
trans_loop_bgn(const struct instr_translater *t, struct etna_compile *c,
1205
const struct tgsi_full_instruction *inst,
1206
struct etna_inst_src *src)
1207
{
1208
struct etna_compile_frame *f = &c->frame_stack[c->frame_sp++];
1209
1210
/* push LOOP to stack */
1211
f->type = ETNA_COMPILE_FRAME_LOOP;
1212
f->lbl_loop_bgn_idx = alloc_new_label(c);
1213
f->lbl_loop_end_idx = alloc_new_label(c);
1214
1215
label_place(c, &c->labels[f->lbl_loop_bgn_idx]);
1216
1217
c->num_loops++;
1218
}
1219
1220
static void
1221
trans_loop_end(const struct instr_translater *t, struct etna_compile *c,
1222
const struct tgsi_full_instruction *inst,
1223
struct etna_inst_src *src)
1224
{
1225
assert(c->frame_sp > 0);
1226
struct etna_compile_frame *f = &c->frame_stack[--c->frame_sp];
1227
assert(f->type == ETNA_COMPILE_FRAME_LOOP);
1228
1229
/* mark position in instruction stream of label reference so that it can be
1230
* filled in in next pass */
1231
label_mark_use(c, f->lbl_loop_bgn_idx);
1232
1233
/* create branch to loop_bgn label */
1234
emit_inst(c, &(struct etna_inst) {
1235
.opcode = INST_OPCODE_BRANCH,
1236
.cond = INST_CONDITION_TRUE,
1237
.src[0] = src[0],
1238
/* imm is filled in later */
1239
});
1240
1241
label_place(c, &c->labels[f->lbl_loop_end_idx]);
1242
}
1243
1244
static void
1245
trans_brk(const struct instr_translater *t, struct etna_compile *c,
1246
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1247
{
1248
assert(c->frame_sp > 0);
1249
struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1250
1251
/* mark position in instruction stream of label reference so that it can be
1252
* filled in in next pass */
1253
label_mark_use(c, f->lbl_loop_end_idx);
1254
1255
/* create branch to loop_end label */
1256
emit_inst(c, &(struct etna_inst) {
1257
.opcode = INST_OPCODE_BRANCH,
1258
.cond = INST_CONDITION_TRUE,
1259
.src[0] = src[0],
1260
/* imm is filled in later */
1261
});
1262
}
1263
1264
static void
1265
trans_cont(const struct instr_translater *t, struct etna_compile *c,
1266
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1267
{
1268
assert(c->frame_sp > 0);
1269
struct etna_compile_frame *f = find_frame(c, ETNA_COMPILE_FRAME_LOOP);
1270
1271
/* mark position in instruction stream of label reference so that it can be
1272
* filled in in next pass */
1273
label_mark_use(c, f->lbl_loop_bgn_idx);
1274
1275
/* create branch to loop_end label */
1276
emit_inst(c, &(struct etna_inst) {
1277
.opcode = INST_OPCODE_BRANCH,
1278
.cond = INST_CONDITION_TRUE,
1279
.src[0] = src[0],
1280
/* imm is filled in later */
1281
});
1282
}
1283
1284
static void
1285
trans_deriv(const struct instr_translater *t, struct etna_compile *c,
1286
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1287
{
1288
emit_inst(c, &(struct etna_inst) {
1289
.opcode = t->opc,
1290
.sat = inst->Instruction.Saturate,
1291
.dst = convert_dst(c, &inst->Dst[0]),
1292
.src[0] = src[0],
1293
.src[2] = src[0],
1294
});
1295
}
1296
1297
static void
1298
trans_arl(const struct instr_translater *t, struct etna_compile *c,
1299
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1300
{
1301
struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1302
struct etna_inst arl = { };
1303
struct etna_inst_dst dst;
1304
1305
dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z |
1306
INST_COMPS_W);
1307
1308
if (c->specs->has_sign_floor_ceil) {
1309
struct etna_inst floor = { };
1310
1311
floor.opcode = INST_OPCODE_FLOOR;
1312
floor.src[2] = src[0];
1313
floor.dst = dst;
1314
1315
emit_inst(c, &floor);
1316
} else {
1317
struct etna_inst floor[2] = { };
1318
1319
floor[0].opcode = INST_OPCODE_FRC;
1320
floor[0].sat = inst->Instruction.Saturate;
1321
floor[0].dst = dst;
1322
floor[0].src[2] = src[0];
1323
1324
floor[1].opcode = INST_OPCODE_ADD;
1325
floor[1].sat = inst->Instruction.Saturate;
1326
floor[1].dst = dst;
1327
floor[1].src[0] = src[0];
1328
floor[1].src[2].use = 1;
1329
floor[1].src[2].swiz = INST_SWIZ_IDENTITY;
1330
floor[1].src[2].neg = 1;
1331
floor[1].src[2].rgroup = temp.rgroup;
1332
floor[1].src[2].reg = temp.id;
1333
1334
emit_inst(c, &floor[0]);
1335
emit_inst(c, &floor[1]);
1336
}
1337
1338
arl.opcode = INST_OPCODE_MOVAR;
1339
arl.sat = inst->Instruction.Saturate;
1340
arl.dst = convert_dst(c, &inst->Dst[0]);
1341
arl.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1342
1343
emit_inst(c, &arl);
1344
}
1345
1346
static void
1347
trans_lrp(const struct instr_translater *t, struct etna_compile *c,
1348
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1349
{
1350
/* dst = src0 * src1 + (1 - src0) * src2
1351
* => src0 * src1 - (src0 - 1) * src2
1352
* => src0 * src1 - (src0 * src2 - src2)
1353
* MAD tTEMP.xyzw, tSRC0.xyzw, tSRC2.xyzw, -tSRC2.xyzw
1354
* MAD tDST.xyzw, tSRC0.xyzw, tSRC1.xyzw, -tTEMP.xyzw
1355
*/
1356
struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1357
if (etna_src_uniforms_conflict(src[0], src[1]) ||
1358
etna_src_uniforms_conflict(src[0], src[2])) {
1359
src[0] = etna_mov_src(c, src[0]);
1360
}
1361
1362
struct etna_inst mad[2] = { };
1363
mad[0].opcode = INST_OPCODE_MAD;
1364
mad[0].sat = 0;
1365
mad[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1366
INST_COMPS_Z | INST_COMPS_W);
1367
mad[0].src[0] = src[0];
1368
mad[0].src[1] = src[2];
1369
mad[0].src[2] = negate(src[2]);
1370
mad[1].opcode = INST_OPCODE_MAD;
1371
mad[1].sat = inst->Instruction.Saturate;
1372
mad[1].dst = convert_dst(c, &inst->Dst[0]), mad[1].src[0] = src[0];
1373
mad[1].src[1] = src[1];
1374
mad[1].src[2] = negate(etna_native_to_src(temp, INST_SWIZ_IDENTITY));
1375
1376
emit_inst(c, &mad[0]);
1377
emit_inst(c, &mad[1]);
1378
}
1379
1380
static void
1381
trans_lit(const struct instr_translater *t, struct etna_compile *c,
1382
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1383
{
1384
/* SELECT.LT tmp._y__, 0, src.yyyy, 0
1385
* - can be eliminated if src.y is a uniform and >= 0
1386
* SELECT.GT tmp.___w, 128, src.wwww, 128
1387
* SELECT.LT tmp.___w, -128, tmp.wwww, -128
1388
* - can be eliminated if src.w is a uniform and fits clamp
1389
* LOG tmp.x, void, void, tmp.yyyy
1390
* MUL tmp.x, tmp.xxxx, tmp.wwww, void
1391
* LITP dst, undef, src.xxxx, tmp.xxxx
1392
*/
1393
struct etna_native_reg inner_temp = etna_compile_get_inner_temp(c);
1394
struct etna_inst_src src_y = { };
1395
1396
if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1397
src_y = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y));
1398
1399
struct etna_inst ins = { };
1400
ins.opcode = INST_OPCODE_SELECT;
1401
ins.cond = INST_CONDITION_LT;
1402
ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_Y);
1403
ins.src[0] = ins.src[2] = alloc_imm_f32(c, 0.0);
1404
ins.src[1] = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1405
emit_inst(c, &ins);
1406
} else if (uif(get_imm_u32(c, &src[0], 1)) < 0)
1407
src_y = alloc_imm_f32(c, 0.0);
1408
else
1409
src_y = swizzle(src[0], SWIZZLE(Y, Y, Y, Y));
1410
1411
struct etna_inst_src src_w = { };
1412
1413
if (!etna_rgroup_is_uniform(src[0].rgroup)) {
1414
src_w = etna_native_to_src(inner_temp, SWIZZLE(W, W, W, W));
1415
1416
struct etna_inst ins = { };
1417
ins.opcode = INST_OPCODE_SELECT;
1418
ins.cond = INST_CONDITION_GT;
1419
ins.dst = etna_native_to_dst(inner_temp, INST_COMPS_W);
1420
ins.src[0] = ins.src[2] = alloc_imm_f32(c, 128.);
1421
ins.src[1] = swizzle(src[0], SWIZZLE(W, W, W, W));
1422
emit_inst(c, &ins);
1423
ins.cond = INST_CONDITION_LT;
1424
ins.src[0].neg = !ins.src[0].neg;
1425
ins.src[2].neg = !ins.src[2].neg;
1426
ins.src[1] = src_w;
1427
emit_inst(c, &ins);
1428
} else if (uif(get_imm_u32(c, &src[0], 3)) < -128.)
1429
src_w = alloc_imm_f32(c, -128.);
1430
else if (uif(get_imm_u32(c, &src[0], 3)) > 128.)
1431
src_w = alloc_imm_f32(c, 128.);
1432
else
1433
src_w = swizzle(src[0], SWIZZLE(W, W, W, W));
1434
1435
if (c->specs->has_new_transcendentals) { /* Alternative LOG sequence */
1436
emit_inst(c, &(struct etna_inst) {
1437
.opcode = INST_OPCODE_LOG,
1438
.dst = etna_native_to_dst(inner_temp, INST_COMPS_X | INST_COMPS_Y),
1439
.src[2] = src_y,
1440
.tex = { .amode=1 }, /* Unknown bit needs to be set */
1441
});
1442
emit_inst(c, &(struct etna_inst) {
1443
.opcode = INST_OPCODE_MUL,
1444
.dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1445
.src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1446
.src[1] = etna_native_to_src(inner_temp, SWIZZLE(Y, Y, Y, Y)),
1447
});
1448
} else {
1449
struct etna_inst ins[3] = { };
1450
ins[0].opcode = INST_OPCODE_LOG;
1451
ins[0].dst = etna_native_to_dst(inner_temp, INST_COMPS_X);
1452
ins[0].src[2] = src_y;
1453
1454
emit_inst(c, &ins[0]);
1455
}
1456
emit_inst(c, &(struct etna_inst) {
1457
.opcode = INST_OPCODE_MUL,
1458
.sat = 0,
1459
.dst = etna_native_to_dst(inner_temp, INST_COMPS_X),
1460
.src[0] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1461
.src[1] = src_w,
1462
});
1463
emit_inst(c, &(struct etna_inst) {
1464
.opcode = INST_OPCODE_LITP,
1465
.sat = 0,
1466
.dst = convert_dst(c, &inst->Dst[0]),
1467
.src[0] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1468
.src[1] = swizzle(src[0], SWIZZLE(X, X, X, X)),
1469
.src[2] = etna_native_to_src(inner_temp, SWIZZLE(X, X, X, X)),
1470
});
1471
}
1472
1473
static void
1474
trans_ssg(const struct instr_translater *t, struct etna_compile *c,
1475
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1476
{
1477
if (c->specs->has_sign_floor_ceil) {
1478
emit_inst(c, &(struct etna_inst){
1479
.opcode = INST_OPCODE_SIGN,
1480
.sat = inst->Instruction.Saturate,
1481
.dst = convert_dst(c, &inst->Dst[0]),
1482
.src[2] = src[0],
1483
});
1484
} else {
1485
struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1486
struct etna_inst ins[2] = { };
1487
1488
ins[0].opcode = INST_OPCODE_SET;
1489
ins[0].cond = INST_CONDITION_NZ;
1490
ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1491
INST_COMPS_Z | INST_COMPS_W);
1492
ins[0].src[0] = src[0];
1493
1494
ins[1].opcode = INST_OPCODE_SELECT;
1495
ins[1].cond = INST_CONDITION_LZ;
1496
ins[1].sat = inst->Instruction.Saturate;
1497
ins[1].dst = convert_dst(c, &inst->Dst[0]);
1498
ins[1].src[0] = src[0];
1499
ins[1].src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY);
1500
ins[1].src[1] = negate(ins[1].src[2]);
1501
1502
emit_inst(c, &ins[0]);
1503
emit_inst(c, &ins[1]);
1504
}
1505
}
1506
1507
static void
1508
trans_trig(const struct instr_translater *t, struct etna_compile *c,
1509
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1510
{
1511
if (c->specs->has_new_transcendentals) { /* Alternative SIN/COS */
1512
/* On newer chips alternative SIN/COS instructions are implemented,
1513
* which:
1514
* - Need their input scaled by 1/pi instead of 2/pi
1515
* - Output an x and y component, which need to be multiplied to
1516
* get the result
1517
*/
1518
struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xyz */
1519
emit_inst(c, &(struct etna_inst) {
1520
.opcode = INST_OPCODE_MUL,
1521
.sat = 0,
1522
.dst = etna_native_to_dst(temp, INST_COMPS_Z),
1523
.src[0] = src[0], /* any swizzling happens here */
1524
.src[1] = alloc_imm_f32(c, 1.0f / M_PI),
1525
});
1526
emit_inst(c, &(struct etna_inst) {
1527
.opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1528
? INST_OPCODE_COS
1529
: INST_OPCODE_SIN,
1530
.sat = 0,
1531
.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1532
.src[2] = etna_native_to_src(temp, SWIZZLE(Z, Z, Z, Z)),
1533
.tex = { .amode=1 }, /* Unknown bit needs to be set */
1534
});
1535
emit_inst(c, &(struct etna_inst) {
1536
.opcode = INST_OPCODE_MUL,
1537
.sat = inst->Instruction.Saturate,
1538
.dst = convert_dst(c, &inst->Dst[0]),
1539
.src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1540
.src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1541
});
1542
1543
} else if (c->specs->has_sin_cos_sqrt) {
1544
struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1545
/* add divide by PI/2, using a temp register. GC2000
1546
* fails with src==dst for the trig instruction. */
1547
emit_inst(c, &(struct etna_inst) {
1548
.opcode = INST_OPCODE_MUL,
1549
.sat = 0,
1550
.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1551
INST_COMPS_Z | INST_COMPS_W),
1552
.src[0] = src[0], /* any swizzling happens here */
1553
.src[1] = alloc_imm_f32(c, 2.0f / M_PI),
1554
});
1555
emit_inst(c, &(struct etna_inst) {
1556
.opcode = inst->Instruction.Opcode == TGSI_OPCODE_COS
1557
? INST_OPCODE_COS
1558
: INST_OPCODE_SIN,
1559
.sat = inst->Instruction.Saturate,
1560
.dst = convert_dst(c, &inst->Dst[0]),
1561
.src[2] = etna_native_to_src(temp, INST_SWIZ_IDENTITY),
1562
});
1563
} else {
1564
/* Implement Nick's fast sine/cosine. Taken from:
1565
* http://forum.devmaster.net/t/fast-and-accurate-sine-cosine/9648
1566
* A=(1/2*PI 0 1/2*PI 0) B=(0.75 0 0.5 0) C=(-4 4 X X)
1567
* MAD t.x_zw, src.xxxx, A, B
1568
* FRC t.x_z_, void, void, t.xwzw
1569
* MAD t.x_z_, t.xwzw, 2, -1
1570
* MUL t._y__, t.wzww, |t.wzww|, void (for sin/scs)
1571
* DP3 t.x_z_, t.zyww, C, void (for sin)
1572
* DP3 t.__z_, t.zyww, C, void (for scs)
1573
* MUL t._y__, t.wxww, |t.wxww|, void (for cos/scs)
1574
* DP3 t.x_z_, t.xyww, C, void (for cos)
1575
* DP3 t.x___, t.xyww, C, void (for scs)
1576
* MAD t._y_w, t,xxzz, |t.xxzz|, -t.xxzz
1577
* MAD dst, t.ywyw, .2225, t.xzxz
1578
*/
1579
struct etna_inst *p, ins[9] = { };
1580
struct etna_native_reg t0 = etna_compile_get_inner_temp(c);
1581
struct etna_inst_src t0s = etna_native_to_src(t0, INST_SWIZ_IDENTITY);
1582
struct etna_inst_src sincos[3], in = src[0];
1583
sincos[0] = etna_imm_vec4f(c, sincos_const[0]);
1584
sincos[1] = etna_imm_vec4f(c, sincos_const[1]);
1585
1586
/* A uniform source will cause the inner temp limit to
1587
* be exceeded. Explicitly deal with that scenario.
1588
*/
1589
if (etna_rgroup_is_uniform(src[0].rgroup)) {
1590
struct etna_inst ins = { };
1591
ins.opcode = INST_OPCODE_MOV;
1592
ins.dst = etna_native_to_dst(t0, INST_COMPS_X);
1593
ins.src[2] = in;
1594
emit_inst(c, &ins);
1595
in = t0s;
1596
}
1597
1598
ins[0].opcode = INST_OPCODE_MAD;
1599
ins[0].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z | INST_COMPS_W);
1600
ins[0].src[0] = swizzle(in, SWIZZLE(X, X, X, X));
1601
ins[0].src[1] = swizzle(sincos[1], SWIZZLE(X, W, X, W)); /* 1/2*PI */
1602
ins[0].src[2] = swizzle(sincos[1], SWIZZLE(Y, W, Z, W)); /* 0.75, 0, 0.5, 0 */
1603
1604
ins[1].opcode = INST_OPCODE_FRC;
1605
ins[1].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1606
ins[1].src[2] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1607
1608
ins[2].opcode = INST_OPCODE_MAD;
1609
ins[2].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1610
ins[2].src[0] = swizzle(t0s, SWIZZLE(X, W, Z, W));
1611
ins[2].src[1] = swizzle(sincos[0], SWIZZLE(X, X, X, X)); /* 2 */
1612
ins[2].src[2] = swizzle(sincos[0], SWIZZLE(Y, Y, Y, Y)); /* -1 */
1613
1614
unsigned mul_swiz, dp3_swiz;
1615
if (inst->Instruction.Opcode == TGSI_OPCODE_SIN) {
1616
mul_swiz = SWIZZLE(W, Z, W, W);
1617
dp3_swiz = SWIZZLE(Z, Y, W, W);
1618
} else {
1619
mul_swiz = SWIZZLE(W, X, W, W);
1620
dp3_swiz = SWIZZLE(X, Y, W, W);
1621
}
1622
1623
ins[3].opcode = INST_OPCODE_MUL;
1624
ins[3].dst = etna_native_to_dst(t0, INST_COMPS_Y);
1625
ins[3].src[0] = swizzle(t0s, mul_swiz);
1626
ins[3].src[1] = absolute(ins[3].src[0]);
1627
1628
ins[4].opcode = INST_OPCODE_DP3;
1629
ins[4].dst = etna_native_to_dst(t0, INST_COMPS_X | INST_COMPS_Z);
1630
ins[4].src[0] = swizzle(t0s, dp3_swiz);
1631
ins[4].src[1] = swizzle(sincos[0], SWIZZLE(Z, W, W, W));
1632
1633
p = &ins[5];
1634
p->opcode = INST_OPCODE_MAD;
1635
p->dst = etna_native_to_dst(t0, INST_COMPS_Y | INST_COMPS_W);
1636
p->src[0] = swizzle(t0s, SWIZZLE(X, X, Z, Z));
1637
p->src[1] = absolute(p->src[0]);
1638
p->src[2] = negate(p->src[0]);
1639
1640
p++;
1641
p->opcode = INST_OPCODE_MAD;
1642
p->sat = inst->Instruction.Saturate;
1643
p->dst = convert_dst(c, &inst->Dst[0]),
1644
p->src[0] = swizzle(t0s, SWIZZLE(Y, W, Y, W));
1645
p->src[1] = alloc_imm_f32(c, 0.2225);
1646
p->src[2] = swizzle(t0s, SWIZZLE(X, Z, X, Z));
1647
1648
for (int i = 0; &ins[i] <= p; i++)
1649
emit_inst(c, &ins[i]);
1650
}
1651
}
1652
1653
static void
1654
trans_lg2(const struct instr_translater *t, struct etna_compile *c,
1655
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1656
{
1657
if (c->specs->has_new_transcendentals) {
1658
/* On newer chips alternative LOG instruction is implemented,
1659
* which outputs an x and y component, which need to be multiplied to
1660
* get the result.
1661
*/
1662
struct etna_native_reg temp = etna_compile_get_inner_temp(c); /* only using .xy */
1663
emit_inst(c, &(struct etna_inst) {
1664
.opcode = INST_OPCODE_LOG,
1665
.sat = 0,
1666
.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y),
1667
.src[2] = src[0],
1668
.tex = { .amode=1 }, /* Unknown bit needs to be set */
1669
});
1670
emit_inst(c, &(struct etna_inst) {
1671
.opcode = INST_OPCODE_MUL,
1672
.sat = inst->Instruction.Saturate,
1673
.dst = convert_dst(c, &inst->Dst[0]),
1674
.src[0] = etna_native_to_src(temp, SWIZZLE(X, X, X, X)),
1675
.src[1] = etna_native_to_src(temp, SWIZZLE(Y, Y, Y, Y)),
1676
});
1677
} else {
1678
emit_inst(c, &(struct etna_inst) {
1679
.opcode = INST_OPCODE_LOG,
1680
.sat = inst->Instruction.Saturate,
1681
.dst = convert_dst(c, &inst->Dst[0]),
1682
.src[2] = src[0],
1683
});
1684
}
1685
}
1686
1687
static void
1688
trans_sampler(const struct instr_translater *t, struct etna_compile *c,
1689
const struct tgsi_full_instruction *inst,
1690
struct etna_inst_src *src)
1691
{
1692
/* There is no native support for GL texture rectangle coordinates, so
1693
* we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, 1]). */
1694
if (inst->Texture.Texture == TGSI_TEXTURE_RECT) {
1695
uint32_t unit = inst->Src[1].Register.Index;
1696
struct etna_inst ins[2] = { };
1697
struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1698
1699
ins[0].opcode = INST_OPCODE_MUL;
1700
ins[0].dst = etna_native_to_dst(temp, INST_COMPS_X);
1701
ins[0].src[0] = src[0];
1702
ins[0].src[1] = alloc_imm(c, ETNA_UNIFORM_TEXRECT_SCALE_X, unit);
1703
1704
ins[1].opcode = INST_OPCODE_MUL;
1705
ins[1].dst = etna_native_to_dst(temp, INST_COMPS_Y);
1706
ins[1].src[0] = src[0];
1707
ins[1].src[1] = alloc_imm(c, ETNA_UNIFORM_TEXRECT_SCALE_Y, unit);
1708
1709
emit_inst(c, &ins[0]);
1710
emit_inst(c, &ins[1]);
1711
1712
src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY); /* temp.xyzw */
1713
}
1714
1715
switch (inst->Instruction.Opcode) {
1716
case TGSI_OPCODE_TEX:
1717
emit_inst(c, &(struct etna_inst) {
1718
.opcode = INST_OPCODE_TEXLD,
1719
.sat = 0,
1720
.dst = convert_dst(c, &inst->Dst[0]),
1721
.tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1722
.src[0] = src[0],
1723
});
1724
break;
1725
1726
case TGSI_OPCODE_TXB:
1727
emit_inst(c, &(struct etna_inst) {
1728
.opcode = INST_OPCODE_TEXLDB,
1729
.sat = 0,
1730
.dst = convert_dst(c, &inst->Dst[0]),
1731
.tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1732
.src[0] = src[0],
1733
});
1734
break;
1735
1736
case TGSI_OPCODE_TXL:
1737
emit_inst(c, &(struct etna_inst) {
1738
.opcode = INST_OPCODE_TEXLDL,
1739
.sat = 0,
1740
.dst = convert_dst(c, &inst->Dst[0]),
1741
.tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1742
.src[0] = src[0],
1743
});
1744
break;
1745
1746
case TGSI_OPCODE_TXP: { /* divide src.xyz by src.w */
1747
struct etna_native_reg temp = etna_compile_get_inner_temp(c);
1748
1749
emit_inst(c, &(struct etna_inst) {
1750
.opcode = INST_OPCODE_RCP,
1751
.sat = 0,
1752
.dst = etna_native_to_dst(temp, INST_COMPS_W), /* tmp.w */
1753
.src[2] = swizzle(src[0], SWIZZLE(W, W, W, W)),
1754
});
1755
emit_inst(c, &(struct etna_inst) {
1756
.opcode = INST_OPCODE_MUL,
1757
.sat = 0,
1758
.dst = etna_native_to_dst(temp, INST_COMPS_X | INST_COMPS_Y |
1759
INST_COMPS_Z), /* tmp.xyz */
1760
.src[0] = etna_native_to_src(temp, SWIZZLE(W, W, W, W)),
1761
.src[1] = src[0], /* src.xyzw */
1762
});
1763
emit_inst(c, &(struct etna_inst) {
1764
.opcode = INST_OPCODE_TEXLD,
1765
.sat = 0,
1766
.dst = convert_dst(c, &inst->Dst[0]),
1767
.tex = convert_tex(c, &inst->Src[1], &inst->Texture),
1768
.src[0] = etna_native_to_src(temp, INST_SWIZ_IDENTITY), /* tmp.xyzw */
1769
});
1770
} break;
1771
1772
default:
1773
BUG("Unhandled instruction %s",
1774
tgsi_get_opcode_name(inst->Instruction.Opcode));
1775
assert(0);
1776
break;
1777
}
1778
}
1779
1780
static void
1781
trans_dummy(const struct instr_translater *t, struct etna_compile *c,
1782
const struct tgsi_full_instruction *inst, struct etna_inst_src *src)
1783
{
1784
/* nothing to do */
1785
}
1786
1787
static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1788
#define INSTR(n, f, ...) \
1789
[TGSI_OPCODE_##n] = {.fxn = (f), .tgsi_opc = TGSI_OPCODE_##n, ##__VA_ARGS__}
1790
1791
INSTR(MOV, trans_instr, .opc = INST_OPCODE_MOV, .src = {2, -1, -1}),
1792
INSTR(RCP, trans_instr, .opc = INST_OPCODE_RCP, .src = {2, -1, -1}),
1793
INSTR(RSQ, trans_instr, .opc = INST_OPCODE_RSQ, .src = {2, -1, -1}),
1794
INSTR(MUL, trans_instr, .opc = INST_OPCODE_MUL, .src = {0, 1, -1}),
1795
INSTR(ADD, trans_instr, .opc = INST_OPCODE_ADD, .src = {0, 2, -1}),
1796
INSTR(DP2, trans_instr, .opc = INST_OPCODE_DP2, .src = {0, 1, -1}),
1797
INSTR(DP3, trans_instr, .opc = INST_OPCODE_DP3, .src = {0, 1, -1}),
1798
INSTR(DP4, trans_instr, .opc = INST_OPCODE_DP4, .src = {0, 1, -1}),
1799
INSTR(DST, trans_instr, .opc = INST_OPCODE_DST, .src = {0, 1, -1}),
1800
INSTR(MAD, trans_instr, .opc = INST_OPCODE_MAD, .src = {0, 1, 2}),
1801
INSTR(EX2, trans_instr, .opc = INST_OPCODE_EXP, .src = {2, -1, -1}),
1802
INSTR(LG2, trans_lg2),
1803
INSTR(SQRT, trans_instr, .opc = INST_OPCODE_SQRT, .src = {2, -1, -1}),
1804
INSTR(FRC, trans_instr, .opc = INST_OPCODE_FRC, .src = {2, -1, -1}),
1805
INSTR(CEIL, trans_instr, .opc = INST_OPCODE_CEIL, .src = {2, -1, -1}),
1806
INSTR(FLR, trans_instr, .opc = INST_OPCODE_FLOOR, .src = {2, -1, -1}),
1807
INSTR(CMP, trans_instr, .opc = INST_OPCODE_SELECT, .src = {0, 1, 2}, .cond = INST_CONDITION_LZ),
1808
1809
INSTR(KILL, trans_instr, .opc = INST_OPCODE_TEXKILL),
1810
INSTR(KILL_IF, trans_instr, .opc = INST_OPCODE_TEXKILL, .src = {0, -1, -1}, .cond = INST_CONDITION_LZ),
1811
1812
INSTR(DDX, trans_deriv, .opc = INST_OPCODE_DSX),
1813
INSTR(DDY, trans_deriv, .opc = INST_OPCODE_DSY),
1814
1815
INSTR(IF, trans_if),
1816
INSTR(ELSE, trans_else),
1817
INSTR(ENDIF, trans_endif),
1818
1819
INSTR(BGNLOOP, trans_loop_bgn),
1820
INSTR(ENDLOOP, trans_loop_end),
1821
INSTR(BRK, trans_brk),
1822
INSTR(CONT, trans_cont),
1823
1824
INSTR(MIN, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_GT),
1825
INSTR(MAX, trans_min_max, .opc = INST_OPCODE_SELECT, .cond = INST_CONDITION_LT),
1826
1827
INSTR(ARL, trans_arl),
1828
INSTR(LRP, trans_lrp),
1829
INSTR(LIT, trans_lit),
1830
INSTR(SSG, trans_ssg),
1831
1832
INSTR(SIN, trans_trig),
1833
INSTR(COS, trans_trig),
1834
1835
INSTR(SLT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LT),
1836
INSTR(SGE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GE),
1837
INSTR(SEQ, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_EQ),
1838
INSTR(SGT, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_GT),
1839
INSTR(SLE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_LE),
1840
INSTR(SNE, trans_instr, .opc = INST_OPCODE_SET, .src = {0, 1, -1}, .cond = INST_CONDITION_NE),
1841
1842
INSTR(TEX, trans_sampler),
1843
INSTR(TXB, trans_sampler),
1844
INSTR(TXL, trans_sampler),
1845
INSTR(TXP, trans_sampler),
1846
1847
INSTR(NOP, trans_dummy),
1848
INSTR(END, trans_dummy),
1849
};
1850
1851
/* Pass -- compile instructions */
1852
static void
1853
etna_compile_pass_generate_code(struct etna_compile *c)
1854
{
1855
struct tgsi_parse_context ctx = { };
1856
ASSERTED unsigned status = tgsi_parse_init(&ctx, c->tokens);
1857
assert(status == TGSI_PARSE_OK);
1858
1859
int inst_idx = 0;
1860
while (!tgsi_parse_end_of_tokens(&ctx)) {
1861
const struct tgsi_full_instruction *inst = 0;
1862
1863
/* No inner temps used yet for this instruction, clear counter */
1864
c->inner_temps = 0;
1865
1866
tgsi_parse_token(&ctx);
1867
1868
switch (ctx.FullToken.Token.Type) {
1869
case TGSI_TOKEN_TYPE_INSTRUCTION:
1870
/* iterate over operands */
1871
inst = &ctx.FullToken.FullInstruction;
1872
if (c->dead_inst[inst_idx]) { /* skip dead instructions */
1873
inst_idx++;
1874
continue;
1875
}
1876
1877
/* Lookup the TGSI information and generate the source arguments */
1878
struct etna_inst_src src[ETNA_NUM_SRC];
1879
memset(src, 0, sizeof(src));
1880
1881
const struct tgsi_opcode_info *tgsi = tgsi_get_opcode_info(inst->Instruction.Opcode);
1882
1883
for (int i = 0; i < tgsi->num_src && i < ETNA_NUM_SRC; i++) {
1884
const struct tgsi_full_src_register *reg = &inst->Src[i];
1885
const struct etna_reg_desc *srcreg = etna_get_src_reg(c, reg->Register);
1886
const struct etna_native_reg *n = &srcreg->native;
1887
1888
if (!n->valid || n->is_tex)
1889
continue;
1890
1891
src[i] = etna_create_src(reg, n);
1892
1893
/*
1894
* Replace W=1.0 for point sprite coordinates, since hardware
1895
* can only replace X,Y and leaves Z,W=0,0 instead of Z,W=0,1
1896
*/
1897
if (srcreg && srcreg->has_semantic &&
1898
srcreg->semantic.Name == TGSI_SEMANTIC_TEXCOORD &&
1899
(c->key->sprite_coord_enable & BITFIELD_BIT(srcreg->semantic.Index))) {
1900
emit_inst(c, &(struct etna_inst) {
1901
.opcode = INST_OPCODE_SET,
1902
.cond = INST_CONDITION_TRUE,
1903
.dst = etna_native_to_dst(srcreg->native, INST_COMPS_W),
1904
});
1905
}
1906
}
1907
1908
const unsigned opc = inst->Instruction.Opcode;
1909
const struct instr_translater *t = &translaters[opc];
1910
1911
if (t->fxn) {
1912
t->fxn(t, c, inst, src);
1913
1914
inst_idx += 1;
1915
} else {
1916
BUG("Unhandled instruction %s", tgsi_get_opcode_name(opc));
1917
assert(0);
1918
}
1919
break;
1920
}
1921
}
1922
tgsi_parse_free(&ctx);
1923
}
1924
1925
/* Look up register by semantic */
1926
static struct etna_reg_desc *
1927
find_decl_by_semantic(struct etna_compile *c, uint file, uint name, uint index)
1928
{
1929
for (int idx = 0; idx < c->file[file].reg_size; ++idx) {
1930
struct etna_reg_desc *reg = &c->file[file].reg[idx];
1931
1932
if (reg->semantic.Name == name && reg->semantic.Index == index)
1933
return reg;
1934
}
1935
1936
return NULL; /* not found */
1937
}
1938
1939
/** Add ADD and MUL instruction to bring Z/W to 0..1 if -1..1 if needed:
1940
* - this is a vertex shader
1941
* - and this is an older GPU
1942
*/
1943
static void
1944
etna_compile_add_z_div_if_needed(struct etna_compile *c)
1945
{
1946
if (c->info.processor == PIPE_SHADER_VERTEX && c->specs->vs_need_z_div) {
1947
/* find position out */
1948
struct etna_reg_desc *pos_reg =
1949
find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_POSITION, 0);
1950
1951
if (pos_reg != NULL) {
1952
/*
1953
* ADD tX.__z_, tX.zzzz, void, tX.wwww
1954
* MUL tX.__z_, tX.zzzz, 0.5, void
1955
*/
1956
emit_inst(c, &(struct etna_inst) {
1957
.opcode = INST_OPCODE_ADD,
1958
.dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1959
.src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1960
.src[2] = etna_native_to_src(pos_reg->native, SWIZZLE(W, W, W, W)),
1961
});
1962
emit_inst(c, &(struct etna_inst) {
1963
.opcode = INST_OPCODE_MUL,
1964
.dst = etna_native_to_dst(pos_reg->native, INST_COMPS_Z),
1965
.src[0] = etna_native_to_src(pos_reg->native, SWIZZLE(Z, Z, Z, Z)),
1966
.src[1] = alloc_imm_f32(c, 0.5f),
1967
});
1968
}
1969
}
1970
}
1971
1972
static void
1973
etna_compile_frag_rb_swap(struct etna_compile *c)
1974
{
1975
if (c->info.processor == PIPE_SHADER_FRAGMENT && c->key->frag_rb_swap) {
1976
/* find color out */
1977
struct etna_reg_desc *color_reg =
1978
find_decl_by_semantic(c, TGSI_FILE_OUTPUT, TGSI_SEMANTIC_COLOR, 0);
1979
1980
emit_inst(c, &(struct etna_inst) {
1981
.opcode = INST_OPCODE_MOV,
1982
.dst = etna_native_to_dst(color_reg->native, INST_COMPS_X | INST_COMPS_Y | INST_COMPS_Z | INST_COMPS_W),
1983
.src[2] = etna_native_to_src(color_reg->native, SWIZZLE(Z, Y, X, W)),
1984
});
1985
}
1986
}
1987
1988
/** add a NOP to the shader if
1989
* a) the shader is empty
1990
* or
1991
* b) there is a label at the end of the shader
1992
*/
1993
static void
1994
etna_compile_add_nop_if_needed(struct etna_compile *c)
1995
{
1996
bool label_at_last_inst = false;
1997
1998
for (int idx = 0; idx < c->labels_count; ++idx) {
1999
if (c->labels[idx].inst_idx == c->inst_ptr)
2000
label_at_last_inst = true;
2001
2002
}
2003
2004
if (c->inst_ptr == 0 || label_at_last_inst)
2005
emit_inst(c, &(struct etna_inst){.opcode = INST_OPCODE_NOP});
2006
}
2007
2008
static void
2009
assign_uniforms(struct etna_compile_file *file, unsigned base)
2010
{
2011
for (int idx = 0; idx < file->reg_size; ++idx) {
2012
file->reg[idx].native.valid = 1;
2013
file->reg[idx].native.rgroup = INST_RGROUP_UNIFORM_0;
2014
file->reg[idx].native.id = base + idx;
2015
}
2016
}
2017
2018
/* Allocate CONST and IMM to native ETNA_RGROUP_UNIFORM(x).
2019
* CONST must be consecutive as const buffers are supposed to be consecutive,
2020
* and before IMM, as this is
2021
* more convenient because is possible for the compilation process itself to
2022
* generate extra
2023
* immediates for constants such as pi, one, zero.
2024
*/
2025
static void
2026
assign_constants_and_immediates(struct etna_compile *c)
2027
{
2028
assign_uniforms(&c->file[TGSI_FILE_CONSTANT], 0);
2029
/* immediates start after the constants */
2030
c->imm_base = c->file[TGSI_FILE_CONSTANT].reg_size * 4;
2031
assign_uniforms(&c->file[TGSI_FILE_IMMEDIATE], c->imm_base / 4);
2032
DBG_F(ETNA_DBG_COMPILER_MSGS, "imm base: %i size: %i", c->imm_base,
2033
c->imm_size);
2034
}
2035
2036
/* Assign declared samplers to native texture units */
2037
static void
2038
assign_texture_units(struct etna_compile *c)
2039
{
2040
uint tex_base = 0;
2041
2042
if (c->info.processor == PIPE_SHADER_VERTEX)
2043
tex_base = c->specs->vertex_sampler_offset;
2044
2045
for (int idx = 0; idx < c->file[TGSI_FILE_SAMPLER].reg_size; ++idx) {
2046
c->file[TGSI_FILE_SAMPLER].reg[idx].native.valid = 1;
2047
c->file[TGSI_FILE_SAMPLER].reg[idx].native.is_tex = 1; // overrides rgroup
2048
c->file[TGSI_FILE_SAMPLER].reg[idx].native.id = tex_base + idx;
2049
}
2050
}
2051
2052
/* Additional pass to fill in branch targets. This pass should be last
2053
* as no instruction reordering or removing/addition can be done anymore
2054
* once the branch targets are computed.
2055
*/
2056
static void
2057
etna_compile_fill_in_labels(struct etna_compile *c)
2058
{
2059
for (int idx = 0; idx < c->inst_ptr; ++idx) {
2060
if (c->lbl_usage[idx] != -1)
2061
etna_assemble_set_imm(&c->code[idx * 4],
2062
c->labels[c->lbl_usage[idx]].inst_idx);
2063
}
2064
}
2065
2066
/* compare two etna_native_reg structures, return true if equal */
2067
static bool
2068
cmp_etna_native_reg(const struct etna_native_reg to,
2069
const struct etna_native_reg from)
2070
{
2071
return to.valid == from.valid && to.is_tex == from.is_tex &&
2072
to.rgroup == from.rgroup && to.id == from.id;
2073
}
2074
2075
/* go through all declarations and swap native registers *to* and *from* */
2076
static void
2077
swap_native_registers(struct etna_compile *c, const struct etna_native_reg to,
2078
const struct etna_native_reg from)
2079
{
2080
if (cmp_etna_native_reg(from, to))
2081
return; /* Nothing to do */
2082
2083
for (int idx = 0; idx < c->total_decls; ++idx) {
2084
if (cmp_etna_native_reg(c->decl[idx].native, from)) {
2085
c->decl[idx].native = to;
2086
} else if (cmp_etna_native_reg(c->decl[idx].native, to)) {
2087
c->decl[idx].native = from;
2088
}
2089
}
2090
}
2091
2092
/* For PS we need to permute so that inputs are always in temporary 0..N-1.
2093
* Semantic POS is always t0. If that semantic is not used, avoid t0.
2094
*/
2095
static void
2096
permute_ps_inputs(struct etna_compile *c)
2097
{
2098
/* Special inputs:
2099
* gl_FragCoord VARYING_SLOT_POS TGSI_SEMANTIC_POSITION
2100
* gl_FrontFacing VARYING_SLOT_FACE TGSI_SEMANTIC_FACE
2101
* gl_PointCoord VARYING_SLOT_PNTC TGSI_SEMANTIC_PCOORD
2102
* gl_TexCoord VARYING_SLOT_TEX TGSI_SEMANTIC_TEXCOORD
2103
*/
2104
uint native_idx = 1;
2105
2106
for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2107
struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2108
uint input_id;
2109
assert(reg->has_semantic);
2110
2111
if (!reg->active ||
2112
reg->semantic.Name == TGSI_SEMANTIC_POSITION ||
2113
reg->semantic.Name == TGSI_SEMANTIC_FACE)
2114
continue;
2115
2116
input_id = native_idx++;
2117
swap_native_registers(c, etna_native_temp(input_id),
2118
c->file[TGSI_FILE_INPUT].reg[idx].native);
2119
}
2120
2121
c->num_varyings = native_idx - 1;
2122
2123
if (native_idx > c->next_free_native)
2124
c->next_free_native = native_idx;
2125
}
2126
2127
static inline int sem2slot(const struct tgsi_declaration_semantic *semantic)
2128
{
2129
return tgsi_varying_semantic_to_slot(semantic->Name, semantic->Index);
2130
}
2131
2132
/* fill in ps inputs into shader object */
2133
static void
2134
fill_in_ps_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2135
{
2136
struct etna_shader_io_file *sf = &sobj->infile;
2137
2138
sf->num_reg = 0;
2139
2140
for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2141
struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2142
2143
if (reg->native.id > 0) {
2144
assert(sf->num_reg < ETNA_NUM_INPUTS);
2145
sf->reg[sf->num_reg].reg = reg->native.id;
2146
sf->reg[sf->num_reg].slot = sem2slot(&reg->semantic);
2147
/* convert usage mask to number of components (*=wildcard)
2148
* .r (0..1) -> 1 component
2149
* .*g (2..3) -> 2 component
2150
* .**b (4..7) -> 3 components
2151
* .***a (8..15) -> 4 components
2152
*/
2153
sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2154
sf->num_reg++;
2155
}
2156
}
2157
2158
assert(sf->num_reg == c->num_varyings);
2159
sobj->input_count_unk8 = 31; /* XXX what is this */
2160
}
2161
2162
/* fill in output mapping for ps into shader object */
2163
static void
2164
fill_in_ps_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2165
{
2166
sobj->outfile.num_reg = 0;
2167
2168
for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2169
struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2170
2171
switch (reg->semantic.Name) {
2172
case TGSI_SEMANTIC_COLOR: /* FRAG_RESULT_COLOR */
2173
sobj->ps_color_out_reg = reg->native.id;
2174
break;
2175
case TGSI_SEMANTIC_POSITION: /* FRAG_RESULT_DEPTH */
2176
sobj->ps_depth_out_reg = reg->native.id; /* =always native reg 0, only z component should be assigned */
2177
break;
2178
default:
2179
assert(0); /* only outputs supported are COLOR and POSITION at the moment */
2180
}
2181
}
2182
}
2183
2184
/* fill in inputs for vs into shader object */
2185
static void
2186
fill_in_vs_inputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2187
{
2188
struct etna_shader_io_file *sf = &sobj->infile;
2189
2190
sf->num_reg = 0;
2191
for (int idx = 0; idx < c->file[TGSI_FILE_INPUT].reg_size; ++idx) {
2192
struct etna_reg_desc *reg = &c->file[TGSI_FILE_INPUT].reg[idx];
2193
assert(sf->num_reg < ETNA_NUM_INPUTS);
2194
2195
if (!reg->native.valid)
2196
continue;
2197
2198
/* XXX exclude inputs with special semantics such as gl_frontFacing */
2199
sf->reg[sf->num_reg].reg = reg->native.id;
2200
sf->reg[sf->num_reg].slot = sem2slot(&reg->semantic);
2201
sf->reg[sf->num_reg].num_components = util_last_bit(reg->usage_mask);
2202
sf->num_reg++;
2203
}
2204
2205
sobj->input_count_unk8 = (sf->num_reg + 19) / 16; /* XXX what is this */
2206
}
2207
2208
/* fill in outputs for vs into shader object */
2209
static void
2210
fill_in_vs_outputs(struct etna_shader_variant *sobj, struct etna_compile *c)
2211
{
2212
struct etna_shader_io_file *sf = &sobj->outfile;
2213
2214
sf->num_reg = 0;
2215
for (int idx = 0; idx < c->file[TGSI_FILE_OUTPUT].reg_size; ++idx) {
2216
struct etna_reg_desc *reg = &c->file[TGSI_FILE_OUTPUT].reg[idx];
2217
assert(sf->num_reg < ETNA_NUM_INPUTS);
2218
2219
switch (reg->semantic.Name) {
2220
case TGSI_SEMANTIC_POSITION:
2221
sobj->vs_pos_out_reg = reg->native.id;
2222
break;
2223
case TGSI_SEMANTIC_PSIZE:
2224
sobj->vs_pointsize_out_reg = reg->native.id;
2225
break;
2226
default:
2227
sf->reg[sf->num_reg].reg = reg->native.id;
2228
sf->reg[sf->num_reg].slot = sem2slot(&reg->semantic);
2229
sf->reg[sf->num_reg].num_components = 4; // XXX reg->num_components;
2230
sf->num_reg++;
2231
}
2232
}
2233
2234
/* fill in "mystery meat" load balancing value. This value determines how
2235
* work is scheduled between VS and PS
2236
* in the unified shader architecture. More precisely, it is determined from
2237
* the number of VS outputs, as well as chip-specific
2238
* vertex output buffer size, vertex cache size, and the number of shader
2239
* cores.
2240
*
2241
* XXX this is a conservative estimate, the "optimal" value is only known for
2242
* sure at link time because some
2243
* outputs may be unused and thus unmapped. Then again, in the general use
2244
* case with GLSL the vertex and fragment
2245
* shaders are linked already before submitting to Gallium, thus all outputs
2246
* are used.
2247
*/
2248
int half_out = (c->file[TGSI_FILE_OUTPUT].reg_size + 1) / 2;
2249
assert(half_out);
2250
2251
uint32_t b = ((20480 / (c->specs->vertex_output_buffer_size -
2252
2 * half_out * c->specs->vertex_cache_size)) +
2253
9) /
2254
10;
2255
uint32_t a = (b + 256 / (c->specs->shader_core_count * half_out)) / 2;
2256
sobj->vs_load_balancing = VIVS_VS_LOAD_BALANCING_A(MIN2(a, 255)) |
2257
VIVS_VS_LOAD_BALANCING_B(MIN2(b, 255)) |
2258
VIVS_VS_LOAD_BALANCING_C(0x3f) |
2259
VIVS_VS_LOAD_BALANCING_D(0x0f);
2260
}
2261
2262
static bool
2263
etna_compile_check_limits(struct etna_compile *c)
2264
{
2265
int max_uniforms = (c->info.processor == PIPE_SHADER_VERTEX)
2266
? c->specs->max_vs_uniforms
2267
: c->specs->max_ps_uniforms;
2268
/* round up number of uniforms, including immediates, in units of four */
2269
int num_uniforms = c->imm_base / 4 + (c->imm_size + 3) / 4;
2270
2271
if (!c->specs->has_icache && c->inst_ptr > c->specs->max_instructions) {
2272
DBG("Number of instructions (%d) exceeds maximum %d", c->inst_ptr,
2273
c->specs->max_instructions);
2274
return false;
2275
}
2276
2277
if (c->next_free_native > c->specs->max_registers) {
2278
DBG("Number of registers (%d) exceeds maximum %d", c->next_free_native,
2279
c->specs->max_registers);
2280
return false;
2281
}
2282
2283
if (num_uniforms > max_uniforms) {
2284
DBG("Number of uniforms (%d) exceeds maximum %d", num_uniforms,
2285
max_uniforms);
2286
return false;
2287
}
2288
2289
if (c->num_varyings > c->specs->max_varyings) {
2290
DBG("Number of varyings (%d) exceeds maximum %d", c->num_varyings,
2291
c->specs->max_varyings);
2292
return false;
2293
}
2294
2295
if (c->imm_base > c->specs->num_constants) {
2296
DBG("Number of constants (%d) exceeds maximum %d", c->imm_base,
2297
c->specs->num_constants);
2298
}
2299
2300
return true;
2301
}
2302
2303
static void
2304
copy_uniform_state_to_shader(struct etna_compile *c, struct etna_shader_variant *sobj)
2305
{
2306
uint32_t count = c->imm_base + c->imm_size;
2307
struct etna_shader_uniform_info *uinfo = &sobj->uniforms;
2308
2309
uinfo->count = count;
2310
2311
uinfo->data = malloc(count * sizeof(*c->imm_data));
2312
for (unsigned i = 0; i < c->imm_base; i++)
2313
uinfo->data[i] = i;
2314
memcpy(&uinfo->data[c->imm_base], c->imm_data, c->imm_size * sizeof(*c->imm_data));
2315
2316
uinfo->contents = malloc(count * sizeof(*c->imm_contents));
2317
for (unsigned i = 0; i < c->imm_base; i++)
2318
uinfo->contents[i] = ETNA_UNIFORM_UNIFORM;
2319
memcpy(&uinfo->contents[c->imm_base], c->imm_contents, c->imm_size * sizeof(*c->imm_contents));
2320
2321
etna_set_shader_uniforms_dirty_flags(sobj);
2322
}
2323
2324
bool
2325
etna_compile_shader(struct etna_shader_variant *v)
2326
{
2327
if (DBG_ENABLED(ETNA_DBG_NIR))
2328
return etna_compile_shader_nir(v);
2329
2330
/* Create scratch space that may be too large to fit on stack
2331
*/
2332
bool ret;
2333
struct etna_compile *c;
2334
2335
if (unlikely(!v))
2336
return false;
2337
2338
const struct etna_specs *specs = v->shader->specs;
2339
2340
struct tgsi_lowering_config lconfig = {
2341
.lower_FLR = !specs->has_sign_floor_ceil,
2342
.lower_CEIL = !specs->has_sign_floor_ceil,
2343
.lower_POW = true,
2344
.lower_EXP = true,
2345
.lower_LOG = true,
2346
.lower_DP2 = !specs->has_halti2_instructions,
2347
.lower_TRUNC = true,
2348
};
2349
2350
c = CALLOC_STRUCT(etna_compile);
2351
if (!c)
2352
return false;
2353
2354
memset(&c->lbl_usage, -1, sizeof(c->lbl_usage));
2355
2356
const struct tgsi_token *tokens = v->shader->tokens;
2357
2358
c->specs = specs;
2359
c->key = &v->key;
2360
c->tokens = tgsi_transform_lowering(&lconfig, tokens, &c->info);
2361
c->free_tokens = !!c->tokens;
2362
if (!c->tokens) {
2363
/* no lowering */
2364
c->tokens = tokens;
2365
}
2366
2367
/* Build a map from gallium register to native registers for files
2368
* CONST, SAMP, IMM, OUT, IN, TEMP.
2369
* SAMP will map as-is for fragment shaders, there will be a +8 offset for
2370
* vertex shaders.
2371
*/
2372
/* Pass one -- check register file declarations and immediates */
2373
etna_compile_parse_declarations(c);
2374
2375
etna_allocate_decls(c);
2376
2377
/* Pass two -- check usage of temporaries, inputs, outputs */
2378
etna_compile_pass_check_usage(c);
2379
2380
assign_special_inputs(c);
2381
2382
/* Assign native temp register to TEMPs */
2383
assign_temporaries_to_native(c, &c->file[TGSI_FILE_TEMPORARY]);
2384
2385
/* optimize outputs */
2386
etna_compile_pass_optimize_outputs(c);
2387
2388
/* assign inputs: last usage of input should be <= first usage of temp */
2389
/* potential optimization case:
2390
* if single MOV TEMP[y], IN[x] before which temp y is not used, and
2391
* after which IN[x]
2392
* is not read, temp[y] can be used as input register as-is
2393
*/
2394
/* sort temporaries by first use
2395
* sort inputs by last usage
2396
* iterate over inputs, temporaries
2397
* if last usage of input <= first usage of temp:
2398
* assign input to temp
2399
* advance input, temporary pointer
2400
* else
2401
* advance temporary pointer
2402
*
2403
* potential problem: instruction with multiple inputs of which one is the
2404
* temp and the other is the input;
2405
* however, as the temp is not used before this, how would this make
2406
* sense? uninitialized temporaries have an undefined
2407
* value, so this would be ok
2408
*/
2409
assign_inouts_to_temporaries(c, TGSI_FILE_INPUT);
2410
2411
/* assign outputs: first usage of output should be >= last usage of temp */
2412
/* potential optimization case:
2413
* if single MOV OUT[x], TEMP[y] (with full write mask, or at least
2414
* writing all components that are used in
2415
* the shader) after which temp y is no longer used temp[y] can be
2416
* used as output register as-is
2417
*
2418
* potential problem: instruction with multiple outputs of which one is the
2419
* temp and the other is the output;
2420
* however, as the temp is not used after this, how would this make
2421
* sense? could just discard the output value
2422
*/
2423
/* sort temporaries by last use
2424
* sort outputs by first usage
2425
* iterate over outputs, temporaries
2426
* if first usage of output >= last usage of temp:
2427
* assign output to temp
2428
* advance output, temporary pointer
2429
* else
2430
* advance temporary pointer
2431
*/
2432
assign_inouts_to_temporaries(c, TGSI_FILE_OUTPUT);
2433
2434
assign_constants_and_immediates(c);
2435
assign_texture_units(c);
2436
2437
/* list declarations */
2438
for (int x = 0; x < c->total_decls; ++x) {
2439
DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2440
"last_use=%i native=%i usage_mask=%x "
2441
"has_semantic=%i",
2442
x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2443
c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2444
c->decl[x].native.valid ? c->decl[x].native.id : -1,
2445
c->decl[x].usage_mask, c->decl[x].has_semantic);
2446
if (c->decl[x].has_semantic)
2447
DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2448
tgsi_semantic_names[c->decl[x].semantic.Name],
2449
c->decl[x].semantic.Index);
2450
}
2451
/* XXX for PS we need to permute so that inputs are always in temporary
2452
* 0..N-1.
2453
* There is no "switchboard" for varyings (AFAIK!). The output color,
2454
* however, can be routed
2455
* from an arbitrary temporary.
2456
*/
2457
if (c->info.processor == PIPE_SHADER_FRAGMENT)
2458
permute_ps_inputs(c);
2459
2460
2461
/* list declarations */
2462
for (int x = 0; x < c->total_decls; ++x) {
2463
DBG_F(ETNA_DBG_COMPILER_MSGS, "%i: %s,%d active=%i first_use=%i "
2464
"last_use=%i native=%i usage_mask=%x "
2465
"has_semantic=%i",
2466
x, tgsi_file_name(c->decl[x].file), c->decl[x].idx,
2467
c->decl[x].active, c->decl[x].first_use, c->decl[x].last_use,
2468
c->decl[x].native.valid ? c->decl[x].native.id : -1,
2469
c->decl[x].usage_mask, c->decl[x].has_semantic);
2470
if (c->decl[x].has_semantic)
2471
DBG_F(ETNA_DBG_COMPILER_MSGS, " semantic_name=%s semantic_idx=%i",
2472
tgsi_semantic_names[c->decl[x].semantic.Name],
2473
c->decl[x].semantic.Index);
2474
}
2475
2476
/* pass 3: generate instructions */
2477
etna_compile_pass_generate_code(c);
2478
etna_compile_add_z_div_if_needed(c);
2479
etna_compile_frag_rb_swap(c);
2480
etna_compile_add_nop_if_needed(c);
2481
2482
ret = etna_compile_check_limits(c);
2483
if (!ret)
2484
goto out;
2485
2486
etna_compile_fill_in_labels(c);
2487
2488
/* fill in output structure */
2489
v->stage = c->info.processor == PIPE_SHADER_FRAGMENT ? MESA_SHADER_FRAGMENT : MESA_SHADER_VERTEX;
2490
v->uses_discard = c->info.uses_kill;
2491
v->code_size = c->inst_ptr * 4;
2492
v->code = mem_dup(c->code, c->inst_ptr * 16);
2493
v->num_loops = c->num_loops;
2494
v->num_temps = c->next_free_native;
2495
v->vs_id_in_reg = -1;
2496
v->vs_pos_out_reg = -1;
2497
v->vs_pointsize_out_reg = -1;
2498
v->ps_color_out_reg = -1;
2499
v->ps_depth_out_reg = -1;
2500
v->needs_icache = c->inst_ptr > c->specs->max_instructions;
2501
copy_uniform_state_to_shader(c, v);
2502
2503
if (c->info.processor == PIPE_SHADER_VERTEX) {
2504
fill_in_vs_inputs(v, c);
2505
fill_in_vs_outputs(v, c);
2506
} else if (c->info.processor == PIPE_SHADER_FRAGMENT) {
2507
fill_in_ps_inputs(v, c);
2508
fill_in_ps_outputs(v, c);
2509
}
2510
2511
out:
2512
if (c->free_tokens)
2513
FREE((void *)c->tokens);
2514
2515
FREE(c->labels);
2516
FREE(c);
2517
2518
return ret;
2519
}
2520
2521
static const struct etna_shader_inout *
2522
etna_shader_vs_lookup(const struct etna_shader_variant *sobj,
2523
const struct etna_shader_inout *in)
2524
{
2525
for (int i = 0; i < sobj->outfile.num_reg; i++)
2526
if (sobj->outfile.reg[i].slot == in->slot)
2527
return &sobj->outfile.reg[i];
2528
2529
return NULL;
2530
}
2531
2532
bool
2533
etna_link_shader(struct etna_shader_link_info *info,
2534
const struct etna_shader_variant *vs, const struct etna_shader_variant *fs)
2535
{
2536
int comp_ofs = 0;
2537
/* For each fragment input we need to find the associated vertex shader
2538
* output, which can be found by matching on semantic name and index. A
2539
* binary search could be used because the vs outputs are sorted by their
2540
* semantic index and grouped by semantic type by fill_in_vs_outputs.
2541
*/
2542
assert(fs->infile.num_reg < ETNA_NUM_INPUTS);
2543
info->pcoord_varying_comp_ofs = -1;
2544
2545
for (int idx = 0; idx < fs->infile.num_reg; ++idx) {
2546
const struct etna_shader_inout *fsio = &fs->infile.reg[idx];
2547
const struct etna_shader_inout *vsio = etna_shader_vs_lookup(vs, fsio);
2548
struct etna_varying *varying;
2549
bool interpolate_always = ((fsio->slot != VARYING_SLOT_COL0) &&
2550
(fsio->slot != VARYING_SLOT_COL1));
2551
2552
assert(fsio->reg > 0 && fsio->reg <= ARRAY_SIZE(info->varyings));
2553
2554
if (fsio->reg > info->num_varyings)
2555
info->num_varyings = fsio->reg;
2556
2557
varying = &info->varyings[fsio->reg - 1];
2558
varying->num_components = fsio->num_components;
2559
2560
if (!interpolate_always) /* colors affected by flat shading */
2561
varying->pa_attributes = 0x200;
2562
else /* texture coord or other bypasses flat shading */
2563
varying->pa_attributes = 0x2f1;
2564
2565
varying->use[0] = VARYING_COMPONENT_USE_UNUSED;
2566
varying->use[1] = VARYING_COMPONENT_USE_UNUSED;
2567
varying->use[2] = VARYING_COMPONENT_USE_UNUSED;
2568
varying->use[3] = VARYING_COMPONENT_USE_UNUSED;
2569
2570
/* point/tex coord is an input to the PS without matching VS output,
2571
* so it gets a varying slot without being assigned a VS register.
2572
*/
2573
if (util_varying_is_point_coord(fsio->slot, fs->key.sprite_coord_enable)) {
2574
varying->use[0] = VARYING_COMPONENT_USE_POINTCOORD_X;
2575
varying->use[1] = VARYING_COMPONENT_USE_POINTCOORD_Y;
2576
2577
info->pcoord_varying_comp_ofs = comp_ofs;
2578
} else {
2579
if (vsio == NULL) { /* not found -- link error */
2580
BUG("Semantic value not found in vertex shader outputs\n");
2581
return true;
2582
}
2583
2584
varying->reg = vsio->reg;
2585
}
2586
2587
comp_ofs += varying->num_components;
2588
}
2589
2590
assert(info->num_varyings == fs->infile.num_reg);
2591
2592
return false;
2593
}
2594
2595