Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/r300_fragprog_emit.c
4574 views
1
/*
2
* Copyright (C) 2005 Ben Skeggs.
3
*
4
* All Rights Reserved.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining
7
* a copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sublicense, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
13
*
14
* The above copyright notice and this permission notice (including the
15
* next paragraph) shall be included in all copies or substantial
16
* portions of the Software.
17
*
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
*
26
*/
27
28
/**
29
* \file
30
*
31
* Emit the r300_fragment_program_code that can be understood by the hardware.
32
* Input is a pre-transformed radeon_program.
33
*
34
* \author Ben Skeggs <[email protected]>
35
*
36
* \author Jerome Glisse <[email protected]>
37
*/
38
39
#include "r300_fragprog.h"
40
41
#include "r300_reg.h"
42
43
#include "radeon_program_pair.h"
44
#include "r300_fragprog_swizzle.h"
45
46
#include "util/compiler.h"
47
48
49
struct r300_emit_state {
50
struct r300_fragment_program_compiler * compiler;
51
52
unsigned current_node : 2;
53
unsigned node_first_tex : 8;
54
unsigned node_first_alu : 8;
55
uint32_t node_flags;
56
};
57
58
#define PROG_CODE \
59
struct r300_fragment_program_compiler *c = emit->compiler; \
60
struct r300_fragment_program_code *code = &c->code->code.r300
61
62
#define error(fmt, args...) do { \
63
rc_error(&c->Base, "%s::%s(): " fmt "\n", \
64
__FILE__, __FUNCTION__, ##args); \
65
} while(0)
66
67
static unsigned int get_msbs_alu(unsigned int bits)
68
{
69
return (bits >> 6) & 0x7;
70
}
71
72
/**
73
* @param lsbs The number of least significant bits
74
*/
75
static unsigned int get_msbs_tex(unsigned int bits, unsigned int lsbs)
76
{
77
return (bits >> lsbs) & 0x15;
78
}
79
80
#define R400_EXT_GET_MSBS(x, lsbs, mask) (((x) >> lsbs) & mask)
81
82
/**
83
* Mark a temporary register as used.
84
*/
85
static void use_temporary(struct r300_fragment_program_code *code, unsigned int index)
86
{
87
if (index > code->pixsize)
88
code->pixsize = index;
89
}
90
91
static unsigned int use_source(struct r300_fragment_program_code* code, struct rc_pair_instruction_source src)
92
{
93
if (!src.Used)
94
return 0;
95
96
if (src.File == RC_FILE_CONSTANT) {
97
return src.Index | (1 << 5);
98
} else if (src.File == RC_FILE_TEMPORARY || src.File == RC_FILE_INPUT) {
99
use_temporary(code, src.Index);
100
return src.Index & 0x1f;
101
}
102
103
return 0;
104
}
105
106
107
static unsigned int translate_rgb_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
108
{
109
switch(opcode) {
110
case RC_OPCODE_CMP: return R300_ALU_OUTC_CMP;
111
case RC_OPCODE_CND: return R300_ALU_OUTC_CND;
112
case RC_OPCODE_DP3: return R300_ALU_OUTC_DP3;
113
case RC_OPCODE_DP4: return R300_ALU_OUTC_DP4;
114
case RC_OPCODE_FRC: return R300_ALU_OUTC_FRC;
115
default:
116
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
117
FALLTHROUGH;
118
case RC_OPCODE_NOP:
119
FALLTHROUGH;
120
case RC_OPCODE_MAD: return R300_ALU_OUTC_MAD;
121
case RC_OPCODE_MAX: return R300_ALU_OUTC_MAX;
122
case RC_OPCODE_MIN: return R300_ALU_OUTC_MIN;
123
case RC_OPCODE_REPL_ALPHA: return R300_ALU_OUTC_REPL_ALPHA;
124
}
125
}
126
127
static unsigned int translate_alpha_opcode(struct r300_fragment_program_compiler * c, rc_opcode opcode)
128
{
129
switch(opcode) {
130
case RC_OPCODE_CMP: return R300_ALU_OUTA_CMP;
131
case RC_OPCODE_CND: return R300_ALU_OUTA_CND;
132
case RC_OPCODE_DP3: return R300_ALU_OUTA_DP4;
133
case RC_OPCODE_DP4: return R300_ALU_OUTA_DP4;
134
case RC_OPCODE_EX2: return R300_ALU_OUTA_EX2;
135
case RC_OPCODE_FRC: return R300_ALU_OUTA_FRC;
136
case RC_OPCODE_LG2: return R300_ALU_OUTA_LG2;
137
default:
138
error("translate_rgb_opcode: Unknown opcode %s", rc_get_opcode_info(opcode)->Name);
139
FALLTHROUGH;
140
case RC_OPCODE_NOP:
141
FALLTHROUGH;
142
case RC_OPCODE_MAD: return R300_ALU_OUTA_MAD;
143
case RC_OPCODE_MAX: return R300_ALU_OUTA_MAX;
144
case RC_OPCODE_MIN: return R300_ALU_OUTA_MIN;
145
case RC_OPCODE_RCP: return R300_ALU_OUTA_RCP;
146
case RC_OPCODE_RSQ: return R300_ALU_OUTA_RSQ;
147
}
148
}
149
150
/**
151
* Emit one paired ALU instruction.
152
*/
153
static int emit_alu(struct r300_emit_state * emit, struct rc_pair_instruction* inst)
154
{
155
int ip;
156
int j;
157
PROG_CODE;
158
159
if (code->alu.length >= c->Base.max_alu_insts) {
160
error("Too many ALU instructions");
161
return 0;
162
}
163
164
ip = code->alu.length++;
165
166
code->alu.inst[ip].rgb_inst = translate_rgb_opcode(c, inst->RGB.Opcode);
167
code->alu.inst[ip].alpha_inst = translate_alpha_opcode(c, inst->Alpha.Opcode);
168
169
for(j = 0; j < 3; ++j) {
170
/* Set the RGB address */
171
unsigned int src = use_source(code, inst->RGB.Src[j]);
172
unsigned int arg;
173
if (inst->RGB.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
174
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_RGB_MSB_BIT(j);
175
176
code->alu.inst[ip].rgb_addr |= src << (6*j);
177
178
/* Set the Alpha address */
179
src = use_source(code, inst->Alpha.Src[j]);
180
if (inst->Alpha.Src[j].Index >= R300_PFS_NUM_TEMP_REGS)
181
code->alu.inst[ip].r400_ext_addr |= R400_ADDR_EXT_A_MSB_BIT(j);
182
183
code->alu.inst[ip].alpha_addr |= src << (6*j);
184
185
arg = r300FPTranslateRGBSwizzle(inst->RGB.Arg[j].Source, inst->RGB.Arg[j].Swizzle);
186
arg |= inst->RGB.Arg[j].Abs << 6;
187
arg |= inst->RGB.Arg[j].Negate << 5;
188
code->alu.inst[ip].rgb_inst |= arg << (7*j);
189
190
arg = r300FPTranslateAlphaSwizzle(inst->Alpha.Arg[j].Source, inst->Alpha.Arg[j].Swizzle);
191
arg |= inst->Alpha.Arg[j].Abs << 6;
192
arg |= inst->Alpha.Arg[j].Negate << 5;
193
code->alu.inst[ip].alpha_inst |= arg << (7*j);
194
}
195
196
/* Presubtract */
197
if (inst->RGB.Src[RC_PAIR_PRESUB_SRC].Used) {
198
switch(inst->RGB.Src[RC_PAIR_PRESUB_SRC].Index) {
199
case RC_PRESUB_BIAS:
200
code->alu.inst[ip].rgb_inst |=
201
R300_ALU_SRCP_1_MINUS_2_SRC0;
202
break;
203
case RC_PRESUB_ADD:
204
code->alu.inst[ip].rgb_inst |=
205
R300_ALU_SRCP_SRC1_PLUS_SRC0;
206
break;
207
case RC_PRESUB_SUB:
208
code->alu.inst[ip].rgb_inst |=
209
R300_ALU_SRCP_SRC1_MINUS_SRC0;
210
break;
211
case RC_PRESUB_INV:
212
code->alu.inst[ip].rgb_inst |=
213
R300_ALU_SRCP_1_MINUS_SRC0;
214
break;
215
default:
216
break;
217
}
218
}
219
220
if (inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Used) {
221
switch(inst->Alpha.Src[RC_PAIR_PRESUB_SRC].Index) {
222
case RC_PRESUB_BIAS:
223
code->alu.inst[ip].alpha_inst |=
224
R300_ALU_SRCP_1_MINUS_2_SRC0;
225
break;
226
case RC_PRESUB_ADD:
227
code->alu.inst[ip].alpha_inst |=
228
R300_ALU_SRCP_SRC1_PLUS_SRC0;
229
break;
230
case RC_PRESUB_SUB:
231
code->alu.inst[ip].alpha_inst |=
232
R300_ALU_SRCP_SRC1_MINUS_SRC0;
233
break;
234
case RC_PRESUB_INV:
235
code->alu.inst[ip].alpha_inst |=
236
R300_ALU_SRCP_1_MINUS_SRC0;
237
break;
238
default:
239
break;
240
}
241
}
242
243
if (inst->RGB.Saturate)
244
code->alu.inst[ip].rgb_inst |= R300_ALU_OUTC_CLAMP;
245
if (inst->Alpha.Saturate)
246
code->alu.inst[ip].alpha_inst |= R300_ALU_OUTA_CLAMP;
247
248
if (inst->RGB.WriteMask) {
249
use_temporary(code, inst->RGB.DestIndex);
250
if (inst->RGB.DestIndex >= R300_PFS_NUM_TEMP_REGS)
251
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_RGB_MSB_BIT;
252
code->alu.inst[ip].rgb_addr |=
253
((inst->RGB.DestIndex & 0x1f) << R300_ALU_DSTC_SHIFT) |
254
(inst->RGB.WriteMask << R300_ALU_DSTC_REG_MASK_SHIFT);
255
}
256
if (inst->RGB.OutputWriteMask) {
257
code->alu.inst[ip].rgb_addr |=
258
(inst->RGB.OutputWriteMask << R300_ALU_DSTC_OUTPUT_MASK_SHIFT) |
259
R300_RGB_TARGET(inst->RGB.Target);
260
emit->node_flags |= R300_RGBA_OUT;
261
}
262
263
if (inst->Alpha.WriteMask) {
264
use_temporary(code, inst->Alpha.DestIndex);
265
if (inst->Alpha.DestIndex >= R300_PFS_NUM_TEMP_REGS)
266
code->alu.inst[ip].r400_ext_addr |= R400_ADDRD_EXT_A_MSB_BIT;
267
code->alu.inst[ip].alpha_addr |=
268
((inst->Alpha.DestIndex & 0x1f) << R300_ALU_DSTA_SHIFT) |
269
R300_ALU_DSTA_REG;
270
}
271
if (inst->Alpha.OutputWriteMask) {
272
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_OUTPUT |
273
R300_ALPHA_TARGET(inst->Alpha.Target);
274
emit->node_flags |= R300_RGBA_OUT;
275
}
276
if (inst->Alpha.DepthWriteMask) {
277
code->alu.inst[ip].alpha_addr |= R300_ALU_DSTA_DEPTH;
278
emit->node_flags |= R300_W_OUT;
279
c->code->writes_depth = 1;
280
}
281
if (inst->Nop)
282
code->alu.inst[ip].rgb_inst |= R300_ALU_INSERT_NOP;
283
284
/* Handle Output Modifier
285
* According to the r300 docs, there is no RC_OMOD_DISABLE for r300 */
286
if (inst->RGB.Omod) {
287
if (inst->RGB.Omod == RC_OMOD_DISABLE) {
288
rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
289
}
290
code->alu.inst[ip].rgb_inst |=
291
(inst->RGB.Omod << R300_ALU_OUTC_MOD_SHIFT);
292
}
293
if (inst->Alpha.Omod) {
294
if (inst->Alpha.Omod == RC_OMOD_DISABLE) {
295
rc_error(&c->Base, "RC_OMOD_DISABLE not supported");
296
}
297
code->alu.inst[ip].alpha_inst |=
298
(inst->Alpha.Omod << R300_ALU_OUTC_MOD_SHIFT);
299
}
300
return 1;
301
}
302
303
304
/**
305
* Finish the current node without advancing to the next one.
306
*/
307
static int finish_node(struct r300_emit_state * emit)
308
{
309
struct r300_fragment_program_compiler * c = emit->compiler;
310
struct r300_fragment_program_code *code = &emit->compiler->code->code.r300;
311
unsigned alu_offset;
312
unsigned alu_end;
313
unsigned tex_offset;
314
unsigned tex_end;
315
316
unsigned int alu_offset_msbs, alu_end_msbs;
317
318
if (code->alu.length == emit->node_first_alu) {
319
/* Generate a single NOP for this node */
320
struct rc_pair_instruction inst;
321
memset(&inst, 0, sizeof(inst));
322
if (!emit_alu(emit, &inst))
323
return 0;
324
}
325
326
alu_offset = emit->node_first_alu;
327
alu_end = code->alu.length - alu_offset - 1;
328
tex_offset = emit->node_first_tex;
329
tex_end = code->tex.length - tex_offset - 1;
330
331
if (code->tex.length == emit->node_first_tex) {
332
if (emit->current_node > 0) {
333
error("Node %i has no TEX instructions", emit->current_node);
334
return 0;
335
}
336
337
tex_end = 0;
338
} else {
339
if (emit->current_node == 0)
340
code->config |= R300_PFS_CNTL_FIRST_NODE_HAS_TEX;
341
}
342
343
/* Write the config register.
344
* Note: The order in which the words for each node are written
345
* is not correct here and needs to be fixed up once we're entirely
346
* done
347
*
348
* Also note that the register specification from AMD is slightly
349
* incorrect in its description of this register. */
350
code->code_addr[emit->current_node] =
351
((alu_offset << R300_ALU_START_SHIFT)
352
& R300_ALU_START_MASK)
353
| ((alu_end << R300_ALU_SIZE_SHIFT)
354
& R300_ALU_SIZE_MASK)
355
| ((tex_offset << R300_TEX_START_SHIFT)
356
& R300_TEX_START_MASK)
357
| ((tex_end << R300_TEX_SIZE_SHIFT)
358
& R300_TEX_SIZE_MASK)
359
| emit->node_flags
360
| (get_msbs_tex(tex_offset, 5)
361
<< R400_TEX_START_MSB_SHIFT)
362
| (get_msbs_tex(tex_end, 5)
363
<< R400_TEX_SIZE_MSB_SHIFT)
364
;
365
366
/* Write r400 extended instruction fields. These will be ignored on
367
* r300 cards. */
368
alu_offset_msbs = get_msbs_alu(alu_offset);
369
alu_end_msbs = get_msbs_alu(alu_end);
370
switch(emit->current_node) {
371
case 0:
372
code->r400_code_offset_ext |=
373
alu_offset_msbs << R400_ALU_START3_MSB_SHIFT
374
| alu_end_msbs << R400_ALU_SIZE3_MSB_SHIFT;
375
break;
376
case 1:
377
code->r400_code_offset_ext |=
378
alu_offset_msbs << R400_ALU_START2_MSB_SHIFT
379
| alu_end_msbs << R400_ALU_SIZE2_MSB_SHIFT;
380
break;
381
case 2:
382
code->r400_code_offset_ext |=
383
alu_offset_msbs << R400_ALU_START1_MSB_SHIFT
384
| alu_end_msbs << R400_ALU_SIZE1_MSB_SHIFT;
385
break;
386
case 3:
387
code->r400_code_offset_ext |=
388
alu_offset_msbs << R400_ALU_START0_MSB_SHIFT
389
| alu_end_msbs << R400_ALU_SIZE0_MSB_SHIFT;
390
break;
391
}
392
return 1;
393
}
394
395
396
/**
397
* Begin a block of texture instructions.
398
* Create the necessary indirection.
399
*/
400
static int begin_tex(struct r300_emit_state * emit)
401
{
402
PROG_CODE;
403
404
if (code->alu.length == emit->node_first_alu &&
405
code->tex.length == emit->node_first_tex) {
406
return 1;
407
}
408
409
if (emit->current_node == 3) {
410
error("Too many texture indirections");
411
return 0;
412
}
413
414
if (!finish_node(emit))
415
return 0;
416
417
emit->current_node++;
418
emit->node_first_tex = code->tex.length;
419
emit->node_first_alu = code->alu.length;
420
emit->node_flags = 0;
421
return 1;
422
}
423
424
425
static int emit_tex(struct r300_emit_state * emit, struct rc_instruction * inst)
426
{
427
unsigned int unit;
428
unsigned int dest;
429
unsigned int opcode;
430
PROG_CODE;
431
432
if (code->tex.length >= emit->compiler->Base.max_tex_insts) {
433
error("Too many TEX instructions");
434
return 0;
435
}
436
437
unit = inst->U.I.TexSrcUnit;
438
dest = inst->U.I.DstReg.Index;
439
440
switch(inst->U.I.Opcode) {
441
case RC_OPCODE_KIL: opcode = R300_TEX_OP_KIL; break;
442
case RC_OPCODE_TEX: opcode = R300_TEX_OP_LD; break;
443
case RC_OPCODE_TXB: opcode = R300_TEX_OP_TXB; break;
444
case RC_OPCODE_TXP: opcode = R300_TEX_OP_TXP; break;
445
default:
446
error("Unknown texture opcode %s", rc_get_opcode_info(inst->U.I.Opcode)->Name);
447
return 0;
448
}
449
450
if (inst->U.I.Opcode == RC_OPCODE_KIL) {
451
unit = 0;
452
dest = 0;
453
} else {
454
use_temporary(code, dest);
455
}
456
457
use_temporary(code, inst->U.I.SrcReg[0].Index);
458
459
code->tex.inst[code->tex.length++] =
460
((inst->U.I.SrcReg[0].Index << R300_SRC_ADDR_SHIFT)
461
& R300_SRC_ADDR_MASK)
462
| ((dest << R300_DST_ADDR_SHIFT)
463
& R300_DST_ADDR_MASK)
464
| (unit << R300_TEX_ID_SHIFT)
465
| (opcode << R300_TEX_INST_SHIFT)
466
| (inst->U.I.SrcReg[0].Index >= R300_PFS_NUM_TEMP_REGS ?
467
R400_SRC_ADDR_EXT_BIT : 0)
468
| (dest >= R300_PFS_NUM_TEMP_REGS ?
469
R400_DST_ADDR_EXT_BIT : 0)
470
;
471
return 1;
472
}
473
474
475
/**
476
* Final compilation step: Turn the intermediate radeon_program into
477
* machine-readable instructions.
478
*/
479
void r300BuildFragmentProgramHwCode(struct radeon_compiler *c, void *user)
480
{
481
struct r300_fragment_program_compiler *compiler = (struct r300_fragment_program_compiler*)c;
482
struct r300_emit_state emit;
483
struct r300_fragment_program_code *code = &compiler->code->code.r300;
484
unsigned int tex_end;
485
486
memset(&emit, 0, sizeof(emit));
487
emit.compiler = compiler;
488
489
memset(code, 0, sizeof(struct r300_fragment_program_code));
490
491
for(struct rc_instruction * inst = compiler->Base.Program.Instructions.Next;
492
inst != &compiler->Base.Program.Instructions && !compiler->Base.Error;
493
inst = inst->Next) {
494
if (inst->Type == RC_INSTRUCTION_NORMAL) {
495
if (inst->U.I.Opcode == RC_OPCODE_BEGIN_TEX) {
496
begin_tex(&emit);
497
continue;
498
}
499
500
emit_tex(&emit, inst);
501
} else {
502
emit_alu(&emit, &inst->U.P);
503
}
504
}
505
506
if (code->pixsize >= compiler->Base.max_temp_regs)
507
rc_error(&compiler->Base, "Too many hardware temporaries used.\n");
508
509
if (compiler->Base.Error)
510
return;
511
512
/* Finish the program */
513
finish_node(&emit);
514
515
code->config |= emit.current_node; /* FIRST_NODE_HAS_TEX set by finish_node */
516
517
/* Set r400 extended instruction fields. These values will be ignored
518
* on r300 cards. */
519
code->r400_code_offset_ext |=
520
(get_msbs_alu(0)
521
<< R400_ALU_OFFSET_MSB_SHIFT)
522
| (get_msbs_alu(code->alu.length - 1)
523
<< R400_ALU_SIZE_MSB_SHIFT);
524
525
tex_end = code->tex.length ? code->tex.length - 1 : 0;
526
code->code_offset =
527
((0 << R300_PFS_CNTL_ALU_OFFSET_SHIFT)
528
& R300_PFS_CNTL_ALU_OFFSET_MASK)
529
| (((code->alu.length - 1) << R300_PFS_CNTL_ALU_END_SHIFT)
530
& R300_PFS_CNTL_ALU_END_MASK)
531
| ((0 << R300_PFS_CNTL_TEX_OFFSET_SHIFT)
532
& R300_PFS_CNTL_TEX_OFFSET_MASK)
533
| ((tex_end << R300_PFS_CNTL_TEX_END_SHIFT)
534
& R300_PFS_CNTL_TEX_END_MASK)
535
| (get_msbs_tex(0, 5) << R400_TEX_START_MSB_SHIFT)
536
| (get_msbs_tex(tex_end, 6) << R400_TEX_SIZE_MSB_SHIFT)
537
;
538
539
if (emit.current_node < 3) {
540
int shift = 3 - emit.current_node;
541
int i;
542
for(i = emit.current_node; i >= 0; --i)
543
code->code_addr[shift + i] = code->code_addr[i];
544
for(i = 0; i < shift; ++i)
545
code->code_addr[i] = 0;
546
}
547
548
if (code->pixsize >= R300_PFS_NUM_TEMP_REGS
549
|| code->alu.length > R300_PFS_MAX_ALU_INST
550
|| code->tex.length > R300_PFS_MAX_TEX_INST) {
551
552
code->r390_mode = 1;
553
}
554
}
555
556