Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/r500_fragprog_emit.c
4574 views
1
/*
2
* Copyright (C) 2005 Ben Skeggs.
3
*
4
* Copyright 2008 Corbin Simpson <[email protected]>
5
* Adaptation and modification for ATI/AMD Radeon R500 GPU chipsets.
6
*
7
* All Rights Reserved.
8
*
9
* Permission is hereby granted, free of charge, to any person obtaining
10
* a copy of this software and associated documentation files (the
11
* "Software"), to deal in the Software without restriction, including
12
* without limitation the rights to use, copy, modify, merge, publish,
13
* distribute, sublicense, and/or sell copies of the Software, and to
14
* permit persons to whom the Software is furnished to do so, subject to
15
* the following conditions:
16
*
17
* The above copyright notice and this permission notice (including the
18
* next paragraph) shall be included in all copies or substantial
19
* portions of the Software.
20
*
21
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
*
29
*/
30
31
/**
32
* \file
33
*
34
* \author Ben Skeggs <[email protected]>
35
*
36
* \author Jerome Glisse <[email protected]>
37
*
38
* \author Corbin Simpson <[email protected]>
39
*
40
*/
41
42
#include "r500_fragprog.h"
43
44
#include "r300_reg.h"
45
46
#include "radeon_program_pair.h"
47
48
#include "util/compiler.h"
49
50
#define PROG_CODE \
51
struct r500_fragment_program_code *code = &c->code->code.r500
52
53
#define error(fmt, args...) do { \
54
rc_error(&c->Base, "%s::%s(): " fmt "\n", \
55
__FILE__, __FUNCTION__, ##args); \
56
} while(0)
57
58
59
struct branch_info {
60
int If;
61
int Else;
62
int Endif;
63
};
64
65
struct r500_loop_info {
66
int BgnLoop;
67
68
int BranchDepth;
69
int * Brks;
70
int BrkCount;
71
int BrkReserved;
72
73
int * Conts;
74
int ContCount;
75
int ContReserved;
76
};
77
78
struct emit_state {
79
struct radeon_compiler * C;
80
struct r500_fragment_program_code * Code;
81
82
struct branch_info * Branches;
83
unsigned int CurrentBranchDepth;
84
unsigned int BranchesReserved;
85
86
struct r500_loop_info * Loops;
87
unsigned int CurrentLoopDepth;
88
unsigned int LoopsReserved;
89
90
unsigned int MaxBranchDepth;
91
92
};
93
94
static unsigned int translate_rgb_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
95
{
96
switch(opcode) {
97
case RC_OPCODE_CMP: return R500_ALU_RGBA_OP_CMP;
98
case RC_OPCODE_CND: return R500_ALU_RGBA_OP_CND;
99
case RC_OPCODE_DDX: return R500_ALU_RGBA_OP_MDH;
100
case RC_OPCODE_DDY: return R500_ALU_RGBA_OP_MDV;
101
case RC_OPCODE_DP3: return R500_ALU_RGBA_OP_DP3;
102
case RC_OPCODE_DP4: return R500_ALU_RGBA_OP_DP4;
103
case RC_OPCODE_FRC: return R500_ALU_RGBA_OP_FRC;
104
default:
105
error("translate_rgb_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
106
FALLTHROUGH;
107
case RC_OPCODE_NOP:
108
FALLTHROUGH;
109
case RC_OPCODE_MAD: return R500_ALU_RGBA_OP_MAD;
110
case RC_OPCODE_MAX: return R500_ALU_RGBA_OP_MAX;
111
case RC_OPCODE_MIN: return R500_ALU_RGBA_OP_MIN;
112
case RC_OPCODE_REPL_ALPHA: return R500_ALU_RGBA_OP_SOP;
113
}
114
}
115
116
static unsigned int translate_alpha_op(struct r300_fragment_program_compiler *c, rc_opcode opcode)
117
{
118
switch(opcode) {
119
case RC_OPCODE_CMP: return R500_ALPHA_OP_CMP;
120
case RC_OPCODE_CND: return R500_ALPHA_OP_CND;
121
case RC_OPCODE_COS: return R500_ALPHA_OP_COS;
122
case RC_OPCODE_DDX: return R500_ALPHA_OP_MDH;
123
case RC_OPCODE_DDY: return R500_ALPHA_OP_MDV;
124
case RC_OPCODE_DP3: return R500_ALPHA_OP_DP;
125
case RC_OPCODE_DP4: return R500_ALPHA_OP_DP;
126
case RC_OPCODE_EX2: return R500_ALPHA_OP_EX2;
127
case RC_OPCODE_FRC: return R500_ALPHA_OP_FRC;
128
case RC_OPCODE_LG2: return R500_ALPHA_OP_LN2;
129
default:
130
error("translate_alpha_op: unknown opcode %s\n", rc_get_opcode_info(opcode)->Name);
131
FALLTHROUGH;
132
case RC_OPCODE_NOP:
133
FALLTHROUGH;
134
case RC_OPCODE_MAD: return R500_ALPHA_OP_MAD;
135
case RC_OPCODE_MAX: return R500_ALPHA_OP_MAX;
136
case RC_OPCODE_MIN: return R500_ALPHA_OP_MIN;
137
case RC_OPCODE_RCP: return R500_ALPHA_OP_RCP;
138
case RC_OPCODE_RSQ: return R500_ALPHA_OP_RSQ;
139
case RC_OPCODE_SIN: return R500_ALPHA_OP_SIN;
140
}
141
}
142
143
static unsigned int fix_hw_swizzle(unsigned int swz)
144
{
145
switch (swz) {
146
case RC_SWIZZLE_ZERO:
147
case RC_SWIZZLE_UNUSED:
148
swz = 4;
149
break;
150
case RC_SWIZZLE_HALF:
151
swz = 5;
152
break;
153
case RC_SWIZZLE_ONE:
154
swz = 6;
155
break;
156
}
157
158
return swz;
159
}
160
161
static unsigned int translate_arg_rgb(struct rc_pair_instruction *inst, int arg)
162
{
163
unsigned int t = inst->RGB.Arg[arg].Source;
164
int comp;
165
t |= inst->RGB.Arg[arg].Negate << 11;
166
t |= inst->RGB.Arg[arg].Abs << 12;
167
168
for(comp = 0; comp < 3; ++comp)
169
t |= fix_hw_swizzle(GET_SWZ(inst->RGB.Arg[arg].Swizzle, comp)) << (3*comp + 2);
170
171
return t;
172
}
173
174
static unsigned int translate_arg_alpha(struct rc_pair_instruction *inst, int i)
175
{
176
unsigned int t = inst->Alpha.Arg[i].Source;
177
t |= fix_hw_swizzle(GET_SWZ(inst->Alpha.Arg[i].Swizzle, 0)) << 2;
178
t |= inst->Alpha.Arg[i].Negate << 5;
179
t |= inst->Alpha.Arg[i].Abs << 6;
180
return t;
181
}
182
183
static uint32_t translate_alu_result_op(struct r300_fragment_program_compiler * c, rc_compare_func func)
184
{
185
switch(func) {
186
case RC_COMPARE_FUNC_EQUAL: return R500_INST_ALU_RESULT_OP_EQ;
187
case RC_COMPARE_FUNC_LESS: return R500_INST_ALU_RESULT_OP_LT;
188
case RC_COMPARE_FUNC_GEQUAL: return R500_INST_ALU_RESULT_OP_GE;
189
case RC_COMPARE_FUNC_NOTEQUAL: return R500_INST_ALU_RESULT_OP_NE;
190
default:
191
rc_error(&c->Base, "%s: unsupported compare func %i\n", __FUNCTION__, func);
192
return 0;
193
}
194
}
195
196
static void use_temporary(struct r500_fragment_program_code* code, unsigned int index)
197
{
198
if (index > code->max_temp_idx)
199
code->max_temp_idx = index;
200
}
201
202
static unsigned int use_source(struct r500_fragment_program_code* code, struct rc_pair_instruction_source src)
203
{
204
/* From docs:
205
* Note that inline constants set the MSB of ADDR0 and clear ADDR0_CONST.
206
* MSB = 1 << 7 */
207
if (!src.Used)
208
return 1 << 7;
209
210
if (src.File == RC_FILE_CONSTANT) {
211
return src.Index | R500_RGB_ADDR0_CONST;
212
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
213
use_temporary(code, src.Index);
214
return src.Index;
215
} else if (src.File == RC_FILE_INLINE) {
216
return src.Index | (1 << 7);
217
}
218
219
return 0;
220
}
221
222
/**
223
* NOP the specified instruction if it is not a texture lookup.
224
*/
225
static void alu_nop(struct r300_fragment_program_compiler *c, int ip)
226
{
227
PROG_CODE;
228
229
if ((code->inst[ip].inst0 & 0x3) != R500_INST_TYPE_TEX) {
230
code->inst[ip].inst0 |= R500_INST_NOP;
231
}
232
}
233
234
/**
235
* Emit a paired ALU instruction.
236
*/
237
static void emit_paired(struct r300_fragment_program_compiler *c, struct rc_pair_instruction *inst)
238
{
239
int ip;
240
PROG_CODE;
241
242
if (code->inst_end >= c->Base.max_alu_insts-1) {
243
error("emit_alu: Too many instructions");
244
return;
245
}
246
247
ip = ++code->inst_end;
248
249
/* Quirk: MDH/MDV (DDX/DDY) need a NOP on previous non-TEX instructions. */
250
if (inst->RGB.Opcode == RC_OPCODE_DDX || inst->Alpha.Opcode == RC_OPCODE_DDX ||
251
inst->RGB.Opcode == RC_OPCODE_DDY || inst->Alpha.Opcode == RC_OPCODE_DDY) {
252
if (ip > 0) {
253
alu_nop(c, ip - 1);
254
}
255
}
256
257
code->inst[ip].inst5 = translate_rgb_op(c, inst->RGB.Opcode);
258
code->inst[ip].inst4 = translate_alpha_op(c, inst->Alpha.Opcode);
259
260
if (inst->RGB.OutputWriteMask || inst->Alpha.OutputWriteMask || inst->Alpha.DepthWriteMask) {
261
code->inst[ip].inst0 = R500_INST_TYPE_OUT;
262
if (inst->WriteALUResult) {
263
error("Cannot write output and ALU result at the same time");
264
return;
265
}
266
} else {
267
code->inst[ip].inst0 = R500_INST_TYPE_ALU;
268
}
269
code->inst[ip].inst0 |= (inst->SemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
270
271
code->inst[ip].inst0 |= (inst->RGB.WriteMask << 11);
272
code->inst[ip].inst0 |= inst->Alpha.WriteMask ? 1 << 14 : 0;
273
code->inst[ip].inst0 |= (inst->RGB.OutputWriteMask << 15) | (inst->Alpha.OutputWriteMask << 18);
274
if (inst->Nop) {
275
code->inst[ip].inst0 |= R500_INST_NOP;
276
}
277
if (inst->Alpha.DepthWriteMask) {
278
code->inst[ip].inst4 |= R500_ALPHA_W_OMASK;
279
c->code->writes_depth = 1;
280
}
281
282
code->inst[ip].inst4 |= R500_ALPHA_ADDRD(inst->Alpha.DestIndex);
283
code->inst[ip].inst5 |= R500_ALU_RGBA_ADDRD(inst->RGB.DestIndex);
284
use_temporary(code, inst->Alpha.DestIndex);
285
use_temporary(code, inst->RGB.DestIndex);
286
287
if (inst->RGB.Saturate)
288
code->inst[ip].inst0 |= R500_INST_RGB_CLAMP;
289
if (inst->Alpha.Saturate)
290
code->inst[ip].inst0 |= R500_INST_ALPHA_CLAMP;
291
292
/* Set the presubtract operation. */
293
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
294
case RC_PRESUB_BIAS:
295
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_2RGB0;
296
break;
297
case RC_PRESUB_SUB:
298
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_MINUS_RGB0;
299
break;
300
case RC_PRESUB_ADD:
301
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_RGB1_PLUS_RGB0;
302
break;
303
case RC_PRESUB_INV:
304
code->inst[ip].inst1 |= R500_RGB_SRCP_OP_1_MINUS_RGB0;
305
break;
306
default:
307
break;
308
}
309
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
310
case RC_PRESUB_BIAS:
311
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_2A0;
312
break;
313
case RC_PRESUB_SUB:
314
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_MINUS_A0;
315
break;
316
case RC_PRESUB_ADD:
317
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_A1_PLUS_A0;
318
break;
319
case RC_PRESUB_INV:
320
code->inst[ip].inst2 |= R500_ALPHA_SRCP_OP_1_MINUS_A0;
321
break;
322
default:
323
break;
324
}
325
326
/* Set the output modifier */
327
code->inst[ip].inst3 |= inst->RGB.Omod << R500_ALU_RGB_OMOD_SHIFT;
328
code->inst[ip].inst4 |= inst->Alpha.Omod << R500_ALPHA_OMOD_SHIFT;
329
330
code->inst[ip].inst1 |= R500_RGB_ADDR0(use_source(code, inst->RGB.Src[0]));
331
code->inst[ip].inst1 |= R500_RGB_ADDR1(use_source(code, inst->RGB.Src[1]));
332
code->inst[ip].inst1 |= R500_RGB_ADDR2(use_source(code, inst->RGB.Src[2]));
333
334
code->inst[ip].inst2 |= R500_ALPHA_ADDR0(use_source(code, inst->Alpha.Src[0]));
335
code->inst[ip].inst2 |= R500_ALPHA_ADDR1(use_source(code, inst->Alpha.Src[1]));
336
code->inst[ip].inst2 |= R500_ALPHA_ADDR2(use_source(code, inst->Alpha.Src[2]));
337
338
code->inst[ip].inst3 |= translate_arg_rgb(inst, 0) << R500_ALU_RGB_SEL_A_SHIFT;
339
code->inst[ip].inst3 |= translate_arg_rgb(inst, 1) << R500_ALU_RGB_SEL_B_SHIFT;
340
code->inst[ip].inst5 |= translate_arg_rgb(inst, 2) << R500_ALU_RGBA_SEL_C_SHIFT;
341
342
code->inst[ip].inst4 |= translate_arg_alpha(inst, 0) << R500_ALPHA_SEL_A_SHIFT;
343
code->inst[ip].inst4 |= translate_arg_alpha(inst, 1) << R500_ALPHA_SEL_B_SHIFT;
344
code->inst[ip].inst5 |= translate_arg_alpha(inst, 2) << R500_ALU_RGBA_ALPHA_SEL_C_SHIFT;
345
346
code->inst[ip].inst3 |= R500_ALU_RGB_TARGET(inst->RGB.Target);
347
code->inst[ip].inst4 |= R500_ALPHA_TARGET(inst->Alpha.Target);
348
349
if (inst->WriteALUResult) {
350
code->inst[ip].inst3 |= R500_ALU_RGB_WMASK;
351
352
if (inst->WriteALUResult == RC_ALURESULT_X)
353
code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_RED;
354
else
355
code->inst[ip].inst0 |= R500_INST_ALU_RESULT_SEL_ALPHA;
356
357
code->inst[ip].inst0 |= translate_alu_result_op(c, inst->ALUResultCompare);
358
}
359
}
360
361
static unsigned int translate_strq_swizzle(unsigned int swizzle)
362
{
363
unsigned int swiz = 0;
364
int i;
365
for (i = 0; i < 4; i++)
366
swiz |= (GET_SWZ(swizzle, i) & 0x3) << i*2;
367
return swiz;
368
}
369
370
/**
371
* Emit a single TEX instruction
372
*/
373
static int emit_tex(struct r300_fragment_program_compiler *c, struct rc_sub_instruction *inst)
374
{
375
int ip;
376
PROG_CODE;
377
378
if (code->inst_end >= c->Base.max_alu_insts-1) {
379
error("emit_tex: Too many instructions");
380
return 0;
381
}
382
383
ip = ++code->inst_end;
384
385
code->inst[ip].inst0 = R500_INST_TYPE_TEX
386
| (inst->DstReg.WriteMask << 11)
387
| (inst->TexSemWait << R500_INST_TEX_SEM_WAIT_SHIFT);
388
code->inst[ip].inst1 = R500_TEX_ID(inst->TexSrcUnit)
389
| (inst->TexSemAcquire << R500_TEX_SEM_ACQUIRE_SHIFT);
390
391
if (inst->TexSrcTarget == RC_TEXTURE_RECT)
392
code->inst[ip].inst1 |= R500_TEX_UNSCALED;
393
394
switch (inst->Opcode) {
395
case RC_OPCODE_KIL:
396
code->inst[ip].inst1 |= R500_TEX_INST_TEXKILL;
397
break;
398
case RC_OPCODE_TEX:
399
code->inst[ip].inst1 |= R500_TEX_INST_LD;
400
break;
401
case RC_OPCODE_TXB:
402
code->inst[ip].inst1 |= R500_TEX_INST_LODBIAS;
403
break;
404
case RC_OPCODE_TXP:
405
code->inst[ip].inst1 |= R500_TEX_INST_PROJ;
406
break;
407
case RC_OPCODE_TXD:
408
code->inst[ip].inst1 |= R500_TEX_INST_DXDY;
409
break;
410
case RC_OPCODE_TXL:
411
code->inst[ip].inst1 |= R500_TEX_INST_LOD;
412
break;
413
default:
414
error("emit_tex can't handle opcode %s\n", rc_get_opcode_info(inst->Opcode)->Name);
415
}
416
417
use_temporary(code, inst->SrcReg[0].Index);
418
if (inst->Opcode != RC_OPCODE_KIL)
419
use_temporary(code, inst->DstReg.Index);
420
421
code->inst[ip].inst2 = R500_TEX_SRC_ADDR(inst->SrcReg[0].Index)
422
| (translate_strq_swizzle(inst->SrcReg[0].Swizzle) << 8)
423
| R500_TEX_DST_ADDR(inst->DstReg.Index)
424
| (GET_SWZ(inst->TexSwizzle, 0) << 24)
425
| (GET_SWZ(inst->TexSwizzle, 1) << 26)
426
| (GET_SWZ(inst->TexSwizzle, 2) << 28)
427
| (GET_SWZ(inst->TexSwizzle, 3) << 30)
428
;
429
430
if (inst->Opcode == RC_OPCODE_TXD) {
431
use_temporary(code, inst->SrcReg[1].Index);
432
use_temporary(code, inst->SrcReg[2].Index);
433
434
/* DX and DY parameters are specified in a separate register. */
435
code->inst[ip].inst3 =
436
R500_DX_ADDR(inst->SrcReg[1].Index) |
437
(translate_strq_swizzle(inst->SrcReg[1].Swizzle) << 8) |
438
R500_DY_ADDR(inst->SrcReg[2].Index) |
439
(translate_strq_swizzle(inst->SrcReg[2].Swizzle) << 24);
440
}
441
442
return 1;
443
}
444
445
static void emit_flowcontrol(struct emit_state * s, struct rc_instruction * inst)
446
{
447
unsigned int newip;
448
449
if (s->Code->inst_end >= s->C->max_alu_insts-1) {
450
rc_error(s->C, "emit_tex: Too many instructions");
451
return;
452
}
453
454
newip = ++s->Code->inst_end;
455
456
/* Currently all loops use the same integer constant to initialize
457
* the loop variables. */
458
if(!s->Code->int_constants[0]) {
459
s->Code->int_constants[0] = R500_FC_INT_CONST_KR(0xff);
460
s->Code->int_constant_count = 1;
461
}
462
s->Code->inst[newip].inst0 = R500_INST_TYPE_FC | R500_INST_ALU_WAIT;
463
464
switch(inst->U.I.Opcode){
465
struct branch_info * branch;
466
struct r500_loop_info * loop;
467
case RC_OPCODE_BGNLOOP:
468
memory_pool_array_reserve(&s->C->Pool, struct r500_loop_info,
469
s->Loops, s->CurrentLoopDepth, s->LoopsReserved, 1);
470
471
loop = &s->Loops[s->CurrentLoopDepth++];
472
memset(loop, 0, sizeof(struct r500_loop_info));
473
loop->BranchDepth = s->CurrentBranchDepth;
474
loop->BgnLoop = newip;
475
476
s->Code->inst[newip].inst2 = R500_FC_OP_LOOP
477
| R500_FC_JUMP_FUNC(0x00)
478
| R500_FC_IGNORE_UNCOVERED
479
;
480
break;
481
case RC_OPCODE_BRK:
482
loop = &s->Loops[s->CurrentLoopDepth - 1];
483
memory_pool_array_reserve(&s->C->Pool, int, loop->Brks,
484
loop->BrkCount, loop->BrkReserved, 1);
485
486
loop->Brks[loop->BrkCount++] = newip;
487
s->Code->inst[newip].inst2 = R500_FC_OP_BREAKLOOP
488
| R500_FC_JUMP_FUNC(0xff)
489
| R500_FC_B_OP1_DECR
490
| R500_FC_B_POP_CNT(
491
s->CurrentBranchDepth - loop->BranchDepth)
492
| R500_FC_IGNORE_UNCOVERED
493
;
494
break;
495
496
case RC_OPCODE_CONT:
497
loop = &s->Loops[s->CurrentLoopDepth - 1];
498
memory_pool_array_reserve(&s->C->Pool, int, loop->Conts,
499
loop->ContCount, loop->ContReserved, 1);
500
loop->Conts[loop->ContCount++] = newip;
501
s->Code->inst[newip].inst2 = R500_FC_OP_CONTINUE
502
| R500_FC_JUMP_FUNC(0xff)
503
| R500_FC_B_OP1_DECR
504
| R500_FC_B_POP_CNT(
505
s->CurrentBranchDepth - loop->BranchDepth)
506
| R500_FC_IGNORE_UNCOVERED
507
;
508
break;
509
510
case RC_OPCODE_ENDLOOP:
511
{
512
loop = &s->Loops[s->CurrentLoopDepth - 1];
513
/* Emit ENDLOOP */
514
s->Code->inst[newip].inst2 = R500_FC_OP_ENDLOOP
515
| R500_FC_JUMP_FUNC(0xff)
516
| R500_FC_JUMP_ANY
517
| R500_FC_IGNORE_UNCOVERED
518
;
519
/* The constant integer at index 0 is used by all loops. */
520
s->Code->inst[newip].inst3 = R500_FC_INT_ADDR(0)
521
| R500_FC_JUMP_ADDR(loop->BgnLoop + 1)
522
;
523
524
/* Set jump address and int constant for BGNLOOP */
525
s->Code->inst[loop->BgnLoop].inst3 = R500_FC_INT_ADDR(0)
526
| R500_FC_JUMP_ADDR(newip)
527
;
528
529
/* Set jump address for the BRK instructions. */
530
while(loop->BrkCount--) {
531
s->Code->inst[loop->Brks[loop->BrkCount]].inst3 =
532
R500_FC_JUMP_ADDR(newip + 1);
533
}
534
535
/* Set jump address for CONT instructions. */
536
while(loop->ContCount--) {
537
s->Code->inst[loop->Conts[loop->ContCount]].inst3 =
538
R500_FC_JUMP_ADDR(newip);
539
}
540
s->CurrentLoopDepth--;
541
break;
542
}
543
case RC_OPCODE_IF:
544
if ( s->CurrentBranchDepth >= R500_PFS_MAX_BRANCH_DEPTH_FULL) {
545
rc_error(s->C, "Branch depth exceeds hardware limit");
546
return;
547
}
548
memory_pool_array_reserve(&s->C->Pool, struct branch_info,
549
s->Branches, s->CurrentBranchDepth, s->BranchesReserved, 1);
550
551
branch = &s->Branches[s->CurrentBranchDepth++];
552
branch->If = newip;
553
branch->Else = -1;
554
branch->Endif = -1;
555
556
if (s->CurrentBranchDepth > s->MaxBranchDepth)
557
s->MaxBranchDepth = s->CurrentBranchDepth;
558
559
/* actual instruction is filled in at ENDIF time */
560
break;
561
562
case RC_OPCODE_ELSE:
563
if (!s->CurrentBranchDepth) {
564
rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
565
return;
566
}
567
568
branch = &s->Branches[s->CurrentBranchDepth - 1];
569
branch->Else = newip;
570
571
/* actual instruction is filled in at ENDIF time */
572
break;
573
574
case RC_OPCODE_ENDIF:
575
if (!s->CurrentBranchDepth) {
576
rc_error(s->C, "%s: got ELSE outside a branch", __FUNCTION__);
577
return;
578
}
579
580
branch = &s->Branches[s->CurrentBranchDepth - 1];
581
branch->Endif = newip;
582
583
s->Code->inst[branch->Endif].inst2 = R500_FC_OP_JUMP
584
| R500_FC_A_OP_NONE /* no address stack */
585
| R500_FC_JUMP_ANY /* docs says set this, but I don't understand why */
586
| R500_FC_B_OP0_DECR /* decrement branch counter if stay */
587
| R500_FC_B_OP1_NONE /* no branch counter if stay */
588
| R500_FC_B_POP_CNT(1)
589
;
590
s->Code->inst[branch->Endif].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
591
s->Code->inst[branch->If].inst2 = R500_FC_OP_JUMP
592
| R500_FC_A_OP_NONE /* no address stack */
593
| R500_FC_JUMP_FUNC(0x0f) /* jump if ALU result is false */
594
| R500_FC_B_OP0_INCR /* increment branch counter if stay */
595
| R500_FC_IGNORE_UNCOVERED
596
;
597
598
if (branch->Else >= 0) {
599
/* increment branch counter also if jump */
600
s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_INCR;
601
s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Else + 1);
602
603
s->Code->inst[branch->Else].inst2 = R500_FC_OP_JUMP
604
| R500_FC_A_OP_NONE /* no address stack */
605
| R500_FC_B_ELSE /* all active pixels want to jump */
606
| R500_FC_B_OP0_NONE /* no counter op if stay */
607
| R500_FC_B_OP1_DECR /* decrement branch counter if jump */
608
| R500_FC_B_POP_CNT(1)
609
;
610
s->Code->inst[branch->Else].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
611
} else {
612
/* don't touch branch counter on jump */
613
s->Code->inst[branch->If].inst2 |= R500_FC_B_OP1_NONE;
614
s->Code->inst[branch->If].inst3 = R500_FC_JUMP_ADDR(branch->Endif + 1);
615
}
616
617
618
s->CurrentBranchDepth--;
619
break;
620
default:
621
rc_error(s->C, "%s: unknown opcode %s\n", __FUNCTION__, rc_get_opcode_info(inst->U.I.Opcode)->Name);
622
}
623
}
624
625
void r500BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
626
{
627
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
628
struct emit_state s;
629
struct r500_fragment_program_code *code = &compiler->code->code.r500;
630
631
memset(&s, 0, sizeof(s));
632
s.C = &compiler->Base;
633
s.Code = code;
634
635
memset(code, 0, sizeof(*code));
636
code->max_temp_idx = 1;
637
code->inst_end = -1;
638
639
for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
640
inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
641
inst = inst->Next) {
642
if (inst->Type == RC_INSTRUCTION_NORMAL) {
643
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
644
645
if (opcode->IsFlowControl) {
646
emit_flowcontrol(&s, inst);
647
} else if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
648
continue;
649
} else {
650
emit_tex(compiler, &inst->U.I);
651
}
652
} else {
653
emit_paired(compiler, &inst->U.P);
654
}
655
}
656
657
if (code->max_temp_idx >= compiler->Base.max_temp_regs)
658
rc_error(&compiler->Base, "Too many hardware temporaries used");
659
660
if (compiler->Base.Error)
661
return;
662
663
if (code->inst_end == -1 ||
664
(code->inst[code->inst_end].inst0 & R500_INST_TYPE_MASK) != R500_INST_TYPE_OUT) {
665
int ip;
666
667
/* This may happen when dead-code elimination is disabled or
668
* when most of the fragment program logic is leading to a KIL */
669
if (code->inst_end >= compiler->Base.max_alu_insts-1) {
670
rc_error(&compiler->Base, "Introducing fake OUT: Too many instructions");
671
return;
672
}
673
674
ip = ++code->inst_end;
675
code->inst[ip].inst0 = R500_INST_TYPE_OUT | R500_INST_TEX_SEM_WAIT;
676
}
677
678
/* Make sure TEX_SEM_WAIT is set on the last instruction */
679
code->inst[code->inst_end].inst0 |= R500_INST_TEX_SEM_WAIT;
680
681
/* Enable full flow control mode if we are using loops or have if
682
* statements nested at least four deep. */
683
if (s.MaxBranchDepth >= 4 || s.LoopsReserved > 0) {
684
if (code->max_temp_idx < 1)
685
code->max_temp_idx = 1;
686
687
code->us_fc_ctrl |= R500_FC_FULL_FC_EN;
688
}
689
}
690
691