Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/r300/compiler/radeon_optimize.c
4574 views
1
/*
2
* Copyright (C) 2009 Nicolai Haehnle.
3
* Copyright 2010 Tom Stellard <[email protected]>
4
*
5
* All Rights Reserved.
6
*
7
* Permission is hereby granted, free of charge, to any person obtaining
8
* a copy of this software and associated documentation files (the
9
* "Software"), to deal in the Software without restriction, including
10
* without limitation the rights to use, copy, modify, merge, publish,
11
* distribute, sublicense, and/or sell copies of the Software, and to
12
* permit persons to whom the Software is furnished to do so, subject to
13
* the following conditions:
14
*
15
* The above copyright notice and this permission notice (including the
16
* next paragraph) shall be included in all copies or substantial
17
* portions of the Software.
18
*
19
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22
* IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
*
27
*/
28
29
#include "radeon_dataflow.h"
30
31
#include "radeon_compiler.h"
32
#include "radeon_compiler_util.h"
33
#include "radeon_list.h"
34
#include "radeon_swizzle.h"
35
#include "radeon_variable.h"
36
37
struct src_clobbered_reads_cb_data {
38
rc_register_file File;
39
unsigned int Index;
40
unsigned int Mask;
41
struct rc_reader_data * ReaderData;
42
};
43
44
typedef void (*rc_presub_replace_fn)(struct rc_instruction *,
45
struct rc_instruction *,
46
unsigned int);
47
48
static struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner)
49
{
50
struct rc_src_register combine;
51
combine.File = inner.File;
52
combine.Index = inner.Index;
53
combine.RelAddr = inner.RelAddr;
54
if (outer.Abs) {
55
combine.Abs = 1;
56
combine.Negate = outer.Negate;
57
} else {
58
combine.Abs = inner.Abs;
59
combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate);
60
combine.Negate ^= outer.Negate;
61
}
62
combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle);
63
return combine;
64
}
65
66
static void copy_propagate_scan_read(void * data, struct rc_instruction * inst,
67
struct rc_src_register * src)
68
{
69
rc_register_file file = src->File;
70
struct rc_reader_data * reader_data = data;
71
72
if(!rc_inst_can_use_presub(inst,
73
reader_data->Writer->U.I.PreSub.Opcode,
74
rc_swizzle_to_writemask(src->Swizzle),
75
src,
76
&reader_data->Writer->U.I.PreSub.SrcReg[0],
77
&reader_data->Writer->U.I.PreSub.SrcReg[1])) {
78
reader_data->Abort = 1;
79
return;
80
}
81
82
/* XXX This could probably be handled better. */
83
if (file == RC_FILE_ADDRESS) {
84
reader_data->Abort = 1;
85
return;
86
}
87
88
/* These instructions cannot read from the constants file.
89
* see radeonTransformTEX()
90
*/
91
if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY &&
92
reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT &&
93
(inst->U.I.Opcode == RC_OPCODE_TEX ||
94
inst->U.I.Opcode == RC_OPCODE_TXB ||
95
inst->U.I.Opcode == RC_OPCODE_TXP ||
96
inst->U.I.Opcode == RC_OPCODE_TXD ||
97
inst->U.I.Opcode == RC_OPCODE_TXL ||
98
inst->U.I.Opcode == RC_OPCODE_KIL)){
99
reader_data->Abort = 1;
100
return;
101
}
102
}
103
104
static void src_clobbered_reads_cb(
105
void * data,
106
struct rc_instruction * inst,
107
struct rc_src_register * src)
108
{
109
struct src_clobbered_reads_cb_data * sc_data = data;
110
111
if (src->File == sc_data->File
112
&& src->Index == sc_data->Index
113
&& (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) {
114
115
sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
116
}
117
118
if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) {
119
sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW;
120
}
121
}
122
123
static void is_src_clobbered_scan_write(
124
void * data,
125
struct rc_instruction * inst,
126
rc_register_file file,
127
unsigned int index,
128
unsigned int mask)
129
{
130
struct src_clobbered_reads_cb_data sc_data;
131
struct rc_reader_data * reader_data = data;
132
sc_data.File = file;
133
sc_data.Index = index;
134
sc_data.Mask = mask;
135
sc_data.ReaderData = reader_data;
136
rc_for_all_reads_src(reader_data->Writer,
137
src_clobbered_reads_cb, &sc_data);
138
}
139
140
static void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov)
141
{
142
struct rc_reader_data reader_data;
143
unsigned int i;
144
145
if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY ||
146
inst_mov->U.I.WriteALUResult)
147
return;
148
149
/* Get a list of all the readers of this MOV instruction. */
150
reader_data.ExitOnAbort = 1;
151
rc_get_readers(c, inst_mov, &reader_data,
152
copy_propagate_scan_read, NULL,
153
is_src_clobbered_scan_write);
154
155
if (reader_data.Abort || reader_data.ReaderCount == 0)
156
return;
157
158
/* We can propagate SaturateMode if all the readers are MOV instructions
159
* without a presubtract operation, source negation and absolute.
160
* In that case, we just move SaturateMode to all readers. */
161
if (inst_mov->U.I.SaturateMode) {
162
for (i = 0; i < reader_data.ReaderCount; i++) {
163
struct rc_instruction * inst = reader_data.Readers[i].Inst;
164
165
if (inst->U.I.Opcode != RC_OPCODE_MOV ||
166
inst->U.I.SrcReg[0].File == RC_FILE_PRESUB ||
167
inst->U.I.SrcReg[0].Abs ||
168
inst->U.I.SrcReg[0].Negate) {
169
return;
170
}
171
}
172
}
173
174
/* Propagate the MOV instruction. */
175
for (i = 0; i < reader_data.ReaderCount; i++) {
176
struct rc_instruction * inst = reader_data.Readers[i].Inst;
177
*reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]);
178
179
if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB)
180
inst->U.I.PreSub = inst_mov->U.I.PreSub;
181
if (!inst->U.I.SaturateMode)
182
inst->U.I.SaturateMode = inst_mov->U.I.SaturateMode;
183
}
184
185
/* Finally, remove the original MOV instruction */
186
rc_remove_instruction(inst_mov);
187
}
188
189
/**
190
* Check if a source register is actually always the same
191
* swizzle constant.
192
*/
193
static int is_src_uniform_constant(struct rc_src_register src,
194
rc_swizzle * pswz, unsigned int * pnegate)
195
{
196
int have_used = 0;
197
198
if (src.File != RC_FILE_NONE) {
199
*pswz = 0;
200
return 0;
201
}
202
203
for(unsigned int chan = 0; chan < 4; ++chan) {
204
unsigned int swz = GET_SWZ(src.Swizzle, chan);
205
if (swz < 4) {
206
*pswz = 0;
207
return 0;
208
}
209
if (swz == RC_SWIZZLE_UNUSED)
210
continue;
211
212
if (!have_used) {
213
*pswz = swz;
214
*pnegate = GET_BIT(src.Negate, chan);
215
have_used = 1;
216
} else {
217
if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) {
218
*pswz = 0;
219
return 0;
220
}
221
}
222
}
223
224
return 1;
225
}
226
227
static void constant_folding_mad(struct rc_instruction * inst)
228
{
229
rc_swizzle swz = 0;
230
unsigned int negate= 0;
231
232
if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) {
233
if (swz == RC_SWIZZLE_ZERO) {
234
inst->U.I.Opcode = RC_OPCODE_MUL;
235
return;
236
}
237
}
238
239
if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
240
if (swz == RC_SWIZZLE_ONE) {
241
inst->U.I.Opcode = RC_OPCODE_ADD;
242
if (negate)
243
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
244
inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2];
245
return;
246
} else if (swz == RC_SWIZZLE_ZERO) {
247
inst->U.I.Opcode = RC_OPCODE_MOV;
248
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
249
return;
250
}
251
}
252
253
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
254
if (swz == RC_SWIZZLE_ONE) {
255
inst->U.I.Opcode = RC_OPCODE_ADD;
256
if (negate)
257
inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
258
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
259
return;
260
} else if (swz == RC_SWIZZLE_ZERO) {
261
inst->U.I.Opcode = RC_OPCODE_MOV;
262
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2];
263
return;
264
}
265
}
266
}
267
268
static void constant_folding_mul(struct rc_instruction * inst)
269
{
270
rc_swizzle swz = 0;
271
unsigned int negate = 0;
272
273
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
274
if (swz == RC_SWIZZLE_ONE) {
275
inst->U.I.Opcode = RC_OPCODE_MOV;
276
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
277
if (negate)
278
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
279
return;
280
} else if (swz == RC_SWIZZLE_ZERO) {
281
inst->U.I.Opcode = RC_OPCODE_MOV;
282
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
283
return;
284
}
285
}
286
287
if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
288
if (swz == RC_SWIZZLE_ONE) {
289
inst->U.I.Opcode = RC_OPCODE_MOV;
290
if (negate)
291
inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
292
return;
293
} else if (swz == RC_SWIZZLE_ZERO) {
294
inst->U.I.Opcode = RC_OPCODE_MOV;
295
inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
296
return;
297
}
298
}
299
}
300
301
static void constant_folding_add(struct rc_instruction * inst)
302
{
303
rc_swizzle swz = 0;
304
unsigned int negate = 0;
305
306
if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) {
307
if (swz == RC_SWIZZLE_ZERO) {
308
inst->U.I.Opcode = RC_OPCODE_MOV;
309
inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1];
310
return;
311
}
312
}
313
314
if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) {
315
if (swz == RC_SWIZZLE_ZERO) {
316
inst->U.I.Opcode = RC_OPCODE_MOV;
317
return;
318
}
319
}
320
}
321
322
/**
323
* Replace 0.0, 1.0 and 0.5 immediate constants by their
324
* respective swizzles. Simplify instructions like ADD dst, src, 0;
325
*/
326
static void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst)
327
{
328
const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
329
unsigned int i;
330
331
/* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */
332
for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
333
struct rc_constant * constant;
334
struct rc_src_register newsrc;
335
int have_real_reference;
336
unsigned int chan;
337
338
/* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */
339
for (chan = 0; chan < 4; ++chan)
340
if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3)
341
break;
342
if (chan == 4) {
343
inst->U.I.SrcReg[src].File = RC_FILE_NONE;
344
continue;
345
}
346
347
/* Convert immediates to swizzles. */
348
if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT ||
349
inst->U.I.SrcReg[src].RelAddr ||
350
inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count)
351
continue;
352
353
constant =
354
&c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index];
355
356
if (constant->Type != RC_CONSTANT_IMMEDIATE)
357
continue;
358
359
newsrc = inst->U.I.SrcReg[src];
360
have_real_reference = 0;
361
for (chan = 0; chan < 4; ++chan) {
362
unsigned int swz = GET_SWZ(newsrc.Swizzle, chan);
363
unsigned int newswz;
364
float imm;
365
float baseimm;
366
367
if (swz >= 4)
368
continue;
369
370
imm = constant->u.Immediate[swz];
371
baseimm = imm;
372
if (imm < 0.0)
373
baseimm = -baseimm;
374
375
if (baseimm == 0.0) {
376
newswz = RC_SWIZZLE_ZERO;
377
} else if (baseimm == 1.0) {
378
newswz = RC_SWIZZLE_ONE;
379
} else if (baseimm == 0.5 && c->has_half_swizzles) {
380
newswz = RC_SWIZZLE_HALF;
381
} else {
382
have_real_reference = 1;
383
continue;
384
}
385
386
SET_SWZ(newsrc.Swizzle, chan, newswz);
387
if (imm < 0.0 && !newsrc.Abs)
388
newsrc.Negate ^= 1 << chan;
389
}
390
391
if (!have_real_reference) {
392
newsrc.File = RC_FILE_NONE;
393
newsrc.Index = 0;
394
}
395
396
/* don't make the swizzle worse */
397
if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) &&
398
c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src]))
399
continue;
400
401
inst->U.I.SrcReg[src] = newsrc;
402
}
403
404
/* Simplify instructions based on constants */
405
if (inst->U.I.Opcode == RC_OPCODE_MAD)
406
constant_folding_mad(inst);
407
408
/* note: MAD can simplify to MUL or ADD */
409
if (inst->U.I.Opcode == RC_OPCODE_MUL)
410
constant_folding_mul(inst);
411
else if (inst->U.I.Opcode == RC_OPCODE_ADD)
412
constant_folding_add(inst);
413
414
/* In case this instruction has been converted, make sure all of the
415
* registers that are no longer used are empty. */
416
opcode = rc_get_opcode_info(inst->U.I.Opcode);
417
for(i = opcode->NumSrcRegs; i < 3; i++) {
418
memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register));
419
}
420
}
421
422
/**
423
* If src and dst use the same register, this function returns a writemask that
424
* indicates which components are read by src. Otherwise zero is returned.
425
*/
426
static unsigned int src_reads_dst_mask(struct rc_src_register src,
427
struct rc_dst_register dst)
428
{
429
if (dst.File != src.File || dst.Index != src.Index) {
430
return 0;
431
}
432
return rc_swizzle_to_writemask(src.Swizzle);
433
}
434
435
/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0)
436
* in any of its channels. Return 0 otherwise. */
437
static int src_has_const_swz(struct rc_src_register src) {
438
int chan;
439
for(chan = 0; chan < 4; chan++) {
440
unsigned int swz = GET_SWZ(src.Swizzle, chan);
441
if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF
442
|| swz == RC_SWIZZLE_ONE) {
443
return 1;
444
}
445
}
446
return 0;
447
}
448
449
static void presub_scan_read(
450
void * data,
451
struct rc_instruction * inst,
452
struct rc_src_register * src)
453
{
454
struct rc_reader_data * reader_data = data;
455
rc_presubtract_op * presub_opcode = reader_data->CbData;
456
457
if (!rc_inst_can_use_presub(inst, *presub_opcode,
458
reader_data->Writer->U.I.DstReg.WriteMask,
459
src,
460
&reader_data->Writer->U.I.SrcReg[0],
461
&reader_data->Writer->U.I.SrcReg[1])) {
462
reader_data->Abort = 1;
463
return;
464
}
465
}
466
467
static int presub_helper(
468
struct radeon_compiler * c,
469
struct rc_instruction * inst_add,
470
rc_presubtract_op presub_opcode,
471
rc_presub_replace_fn presub_replace)
472
{
473
struct rc_reader_data reader_data;
474
unsigned int i;
475
rc_presubtract_op cb_op = presub_opcode;
476
477
reader_data.CbData = &cb_op;
478
reader_data.ExitOnAbort = 1;
479
rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL,
480
is_src_clobbered_scan_write);
481
482
if (reader_data.Abort || reader_data.ReaderCount == 0)
483
return 0;
484
485
for(i = 0; i < reader_data.ReaderCount; i++) {
486
unsigned int src_index;
487
struct rc_reader reader = reader_data.Readers[i];
488
const struct rc_opcode_info * info =
489
rc_get_opcode_info(reader.Inst->U.I.Opcode);
490
491
for (src_index = 0; src_index < info->NumSrcRegs; src_index++) {
492
if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src)
493
presub_replace(inst_add, reader.Inst, src_index);
494
}
495
}
496
return 1;
497
}
498
499
/* This function assumes that inst_add->U.I.SrcReg[0] and
500
* inst_add->U.I.SrcReg[1] aren't both negative. */
501
static void presub_replace_add(
502
struct rc_instruction * inst_add,
503
struct rc_instruction * inst_reader,
504
unsigned int src_index)
505
{
506
rc_presubtract_op presub_opcode;
507
if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate)
508
presub_opcode = RC_PRESUB_SUB;
509
else
510
presub_opcode = RC_PRESUB_ADD;
511
512
if (inst_add->U.I.SrcReg[1].Negate) {
513
inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
514
inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0];
515
} else {
516
inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0];
517
inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1];
518
}
519
inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
520
inst_reader->U.I.PreSub.SrcReg[1].Negate = 0;
521
inst_reader->U.I.PreSub.Opcode = presub_opcode;
522
inst_reader->U.I.SrcReg[src_index] =
523
chain_srcregs(inst_reader->U.I.SrcReg[src_index],
524
inst_reader->U.I.PreSub.SrcReg[0]);
525
inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
526
inst_reader->U.I.SrcReg[src_index].Index = presub_opcode;
527
}
528
529
static int is_presub_candidate(
530
struct radeon_compiler * c,
531
struct rc_instruction * inst)
532
{
533
const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode);
534
unsigned int i;
535
unsigned int is_constant[2] = {0, 0};
536
537
assert(inst->U.I.Opcode == RC_OPCODE_ADD);
538
539
if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE
540
|| inst->U.I.SaturateMode
541
|| inst->U.I.WriteALUResult
542
|| inst->U.I.Omod) {
543
return 0;
544
}
545
546
/* If both sources use a constant swizzle, then we can't convert it to
547
* a presubtract operation. In fact for the ADD and SUB presubtract
548
* operations neither source can contain a constant swizzle. This
549
* specific case is checked in peephole_add_presub_add() when
550
* we make sure the swizzles for both sources are equal, so we
551
* don't need to worry about it here. */
552
for (i = 0; i < 2; i++) {
553
int chan;
554
for (chan = 0; chan < 4; chan++) {
555
rc_swizzle swz =
556
get_swz(inst->U.I.SrcReg[i].Swizzle, chan);
557
if (swz == RC_SWIZZLE_ONE
558
|| swz == RC_SWIZZLE_ZERO
559
|| swz == RC_SWIZZLE_HALF) {
560
is_constant[i] = 1;
561
}
562
}
563
}
564
if (is_constant[0] && is_constant[1])
565
return 0;
566
567
for(i = 0; i < info->NumSrcRegs; i++) {
568
struct rc_src_register src = inst->U.I.SrcReg[i];
569
if (src_reads_dst_mask(src, inst->U.I.DstReg))
570
return 0;
571
572
src.File = RC_FILE_PRESUB;
573
if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src))
574
return 0;
575
}
576
return 1;
577
}
578
579
static int peephole_add_presub_add(
580
struct radeon_compiler * c,
581
struct rc_instruction * inst_add)
582
{
583
unsigned dstmask = inst_add->U.I.DstReg.WriteMask;
584
unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask;
585
unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask;
586
587
if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle)
588
return 0;
589
590
/* src0 and src1 can't have absolute values */
591
if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs)
592
return 0;
593
594
/* presub_replace_add() assumes only one is negative */
595
if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate)
596
return 0;
597
598
/* if src0 is negative, at least all bits of dstmask have to be set */
599
if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask)
600
return 0;
601
602
/* if src1 is negative, at least all bits of dstmask have to be set */
603
if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask)
604
return 0;
605
606
if (!is_presub_candidate(c, inst_add))
607
return 0;
608
609
if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) {
610
rc_remove_instruction(inst_add);
611
return 1;
612
}
613
return 0;
614
}
615
616
static void presub_replace_inv(
617
struct rc_instruction * inst_add,
618
struct rc_instruction * inst_reader,
619
unsigned int src_index)
620
{
621
/* We must be careful not to modify inst_add, since it
622
* is possible it will remain part of the program.*/
623
inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1];
624
inst_reader->U.I.PreSub.SrcReg[0].Negate = 0;
625
inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV;
626
inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index],
627
inst_reader->U.I.PreSub.SrcReg[0]);
628
629
inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB;
630
inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV;
631
}
632
633
/**
634
* PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1]
635
* Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source
636
* of the add instruction must have the constatnt 1 swizzle. This function
637
* does not check const registers to see if their value is 1.0, so it should
638
* be called after the constant_folding optimization.
639
* @return
640
* 0 if the ADD instruction is still part of the program.
641
* 1 if the ADD instruction is no longer part of the program.
642
*/
643
static int peephole_add_presub_inv(
644
struct radeon_compiler * c,
645
struct rc_instruction * inst_add)
646
{
647
unsigned int i, swz;
648
649
if (!is_presub_candidate(c, inst_add))
650
return 0;
651
652
/* Check if src0 is 1. */
653
/* XXX It would be nice to use is_src_uniform_constant here, but that
654
* function only works if the register's file is RC_FILE_NONE */
655
for(i = 0; i < 4; i++ ) {
656
swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i);
657
if(((1 << i) & inst_add->U.I.DstReg.WriteMask)
658
&& swz != RC_SWIZZLE_ONE) {
659
return 0;
660
}
661
}
662
663
/* Check src1. */
664
if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) !=
665
inst_add->U.I.DstReg.WriteMask
666
|| inst_add->U.I.SrcReg[1].Abs
667
|| (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY
668
&& inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT)
669
|| src_has_const_swz(inst_add->U.I.SrcReg[1])) {
670
671
return 0;
672
}
673
674
if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) {
675
rc_remove_instruction(inst_add);
676
return 1;
677
}
678
return 0;
679
}
680
681
struct peephole_mul_cb_data {
682
struct rc_dst_register * Writer;
683
unsigned int Clobbered;
684
};
685
686
static void omod_filter_reader_cb(
687
void * userdata,
688
struct rc_instruction * inst,
689
rc_register_file file,
690
unsigned int index,
691
unsigned int mask)
692
{
693
struct peephole_mul_cb_data * d = userdata;
694
if (rc_src_reads_dst_mask(file, mask, index,
695
d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) {
696
697
d->Clobbered = 1;
698
}
699
}
700
701
static void omod_filter_writer_cb(
702
void * userdata,
703
struct rc_instruction * inst,
704
rc_register_file file,
705
unsigned int index,
706
unsigned int mask)
707
{
708
struct peephole_mul_cb_data * d = userdata;
709
if (file == d->Writer->File && index == d->Writer->Index &&
710
(mask & d->Writer->WriteMask)) {
711
d->Clobbered = 1;
712
}
713
}
714
715
static int peephole_mul_omod(
716
struct radeon_compiler * c,
717
struct rc_instruction * inst_mul,
718
struct rc_list * var_list)
719
{
720
unsigned int chan = 0, swz, i;
721
int const_index = -1;
722
int temp_index = -1;
723
float const_value;
724
rc_omod_op omod_op = RC_OMOD_DISABLE;
725
struct rc_list * writer_list;
726
struct rc_variable * var;
727
struct peephole_mul_cb_data cb_data;
728
unsigned writemask_sum;
729
730
for (i = 0; i < 2; i++) {
731
unsigned int j;
732
if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT
733
&& inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) {
734
return 0;
735
}
736
if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) {
737
if (temp_index != -1) {
738
/* The instruction has two temp sources */
739
return 0;
740
} else {
741
temp_index = i;
742
continue;
743
}
744
}
745
/* If we get this far Src[i] must be a constant src */
746
if (inst_mul->U.I.SrcReg[i].Negate) {
747
return 0;
748
}
749
/* The constant src needs to read from the same swizzle */
750
swz = RC_SWIZZLE_UNUSED;
751
chan = 0;
752
for (j = 0; j < 4; j++) {
753
unsigned int j_swz =
754
GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j);
755
if (j_swz == RC_SWIZZLE_UNUSED) {
756
continue;
757
}
758
if (swz == RC_SWIZZLE_UNUSED) {
759
swz = j_swz;
760
chan = j;
761
} else if (j_swz != swz) {
762
return 0;
763
}
764
}
765
766
if (const_index != -1) {
767
/* The instruction has two constant sources */
768
return 0;
769
} else {
770
const_index = i;
771
}
772
}
773
774
if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File,
775
inst_mul->U.I.SrcReg[const_index].Index)) {
776
return 0;
777
}
778
const_value = rc_get_constant_value(c,
779
inst_mul->U.I.SrcReg[const_index].Index,
780
inst_mul->U.I.SrcReg[const_index].Swizzle,
781
inst_mul->U.I.SrcReg[const_index].Negate,
782
chan);
783
784
if (const_value == 2.0f) {
785
omod_op = RC_OMOD_MUL_2;
786
} else if (const_value == 4.0f) {
787
omod_op = RC_OMOD_MUL_4;
788
} else if (const_value == 8.0f) {
789
omod_op = RC_OMOD_MUL_8;
790
} else if (const_value == (1.0f / 2.0f)) {
791
omod_op = RC_OMOD_DIV_2;
792
} else if (const_value == (1.0f / 4.0f)) {
793
omod_op = RC_OMOD_DIV_4;
794
} else if (const_value == (1.0f / 8.0f)) {
795
omod_op = RC_OMOD_DIV_8;
796
} else {
797
return 0;
798
}
799
800
writer_list = rc_variable_list_get_writers_one_reader(var_list,
801
RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]);
802
803
if (!writer_list) {
804
return 0;
805
}
806
807
cb_data.Clobbered = 0;
808
cb_data.Writer = &inst_mul->U.I.DstReg;
809
for (var = writer_list->Item; var; var = var->Friend) {
810
struct rc_instruction * inst;
811
const struct rc_opcode_info * info = rc_get_opcode_info(
812
var->Inst->U.I.Opcode);
813
if (info->HasTexture) {
814
return 0;
815
}
816
if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) {
817
return 0;
818
}
819
for (inst = inst_mul->Prev; inst != var->Inst;
820
inst = inst->Prev) {
821
rc_for_all_reads_mask(inst, omod_filter_reader_cb,
822
&cb_data);
823
rc_for_all_writes_mask(inst, omod_filter_writer_cb,
824
&cb_data);
825
if (cb_data.Clobbered) {
826
break;
827
}
828
}
829
}
830
831
if (cb_data.Clobbered) {
832
return 0;
833
}
834
835
/* Rewrite the instructions */
836
writemask_sum = rc_variable_writemask_sum(writer_list->Item);
837
for (var = writer_list->Item; var; var = var->Friend) {
838
struct rc_variable * writer = var;
839
unsigned conversion_swizzle = rc_make_conversion_swizzle(
840
writemask_sum,
841
inst_mul->U.I.DstReg.WriteMask);
842
writer->Inst->U.I.Omod = omod_op;
843
writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File;
844
writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index;
845
rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle);
846
writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode;
847
}
848
849
rc_remove_instruction(inst_mul);
850
851
return 1;
852
}
853
854
/**
855
* @return
856
* 0 if inst is still part of the program.
857
* 1 if inst is no longer part of the program.
858
*/
859
static int peephole(struct radeon_compiler * c, struct rc_instruction * inst)
860
{
861
switch(inst->U.I.Opcode){
862
case RC_OPCODE_ADD:
863
if (c->has_presub) {
864
if(peephole_add_presub_inv(c, inst))
865
return 1;
866
if(peephole_add_presub_add(c, inst))
867
return 1;
868
}
869
break;
870
default:
871
break;
872
}
873
return 0;
874
}
875
876
void rc_optimize(struct radeon_compiler * c, void *user)
877
{
878
struct rc_instruction * inst = c->Program.Instructions.Next;
879
struct rc_list * var_list;
880
while(inst != &c->Program.Instructions) {
881
struct rc_instruction * cur = inst;
882
inst = inst->Next;
883
884
constant_folding(c, cur);
885
886
if(peephole(c, cur))
887
continue;
888
889
if (cur->U.I.Opcode == RC_OPCODE_MOV) {
890
copy_propagate(c, cur);
891
/* cur may no longer be part of the program */
892
}
893
}
894
895
if (!c->has_omod) {
896
return;
897
}
898
899
inst = c->Program.Instructions.Next;
900
while(inst != &c->Program.Instructions) {
901
struct rc_instruction * cur = inst;
902
inst = inst->Next;
903
if (cur->U.I.Opcode == RC_OPCODE_MUL) {
904
var_list = rc_get_variables(c);
905
peephole_mul_omod(c, cur, var_list);
906
}
907
}
908
}
909
910