Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/i915/i915_fpc_optimize.c
4570 views
1
/**************************************************************************
2
*
3
* Copyright 2011 The Chromium OS authors.
4
* All Rights Reserved.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the
8
* "Software"), to deal in the Software without restriction, including
9
* without limitation the rights to use, copy, modify, merge, publish,
10
* distribute, sub license, and/or sell copies of the Software, and to
11
* permit persons to whom the Software is furnished to do so, subject to
12
* the following conditions:
13
*
14
* The above copyright notice and this permission notice (including the
15
* next paragraph) shall be included in all copies or substantial portions
16
* of the Software.
17
*
18
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
* IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22
* ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
*
26
**************************************************************************/
27
28
#include "i915_context.h"
29
#include "i915_fpc.h"
30
#include "i915_reg.h"
31
32
#include "pipe/p_shader_tokens.h"
33
#include "tgsi/tgsi_dump.h"
34
#include "tgsi/tgsi_exec.h"
35
#include "tgsi/tgsi_parse.h"
36
#include "util/u_math.h"
37
#include "util/u_memory.h"
38
#include "util/u_string.h"
39
40
struct i915_optimize_context {
41
int first_write[TGSI_EXEC_NUM_TEMPS];
42
int last_read[TGSI_EXEC_NUM_TEMPS];
43
};
44
45
static bool
46
same_src_dst_reg(struct i915_full_src_register *s1,
47
struct i915_full_dst_register *d1)
48
{
49
return (s1->Register.File == d1->Register.File &&
50
s1->Register.Indirect == d1->Register.Indirect &&
51
s1->Register.Dimension == d1->Register.Dimension &&
52
s1->Register.Index == d1->Register.Index);
53
}
54
55
static bool
56
same_dst_reg(struct i915_full_dst_register *d1,
57
struct i915_full_dst_register *d2)
58
{
59
return (d1->Register.File == d2->Register.File &&
60
d1->Register.Indirect == d2->Register.Indirect &&
61
d1->Register.Dimension == d2->Register.Dimension &&
62
d1->Register.Index == d2->Register.Index);
63
}
64
65
static bool
66
same_src_reg(struct i915_full_src_register *d1,
67
struct i915_full_src_register *d2)
68
{
69
return (d1->Register.File == d2->Register.File &&
70
d1->Register.Indirect == d2->Register.Indirect &&
71
d1->Register.Dimension == d2->Register.Dimension &&
72
d1->Register.Index == d2->Register.Index &&
73
d1->Register.Absolute == d2->Register.Absolute &&
74
d1->Register.Negate == d2->Register.Negate);
75
}
76
77
static const struct {
78
bool is_texture;
79
bool commutes;
80
unsigned neutral_element;
81
unsigned num_dst;
82
unsigned num_src;
83
} op_table[TGSI_OPCODE_LAST] = {
84
[TGSI_OPCODE_ADD] = {false, true, TGSI_SWIZZLE_ZERO, 1, 2},
85
[TGSI_OPCODE_CEIL] = {false, false, 0, 1, 1},
86
[TGSI_OPCODE_CMP] = {false, false, 0, 1, 2},
87
[TGSI_OPCODE_COS] = {false, false, 0, 1, 1},
88
[TGSI_OPCODE_DDX] = {false, false, 0, 1, 0},
89
[TGSI_OPCODE_DDY] = {false, false, 0, 1, 0},
90
[TGSI_OPCODE_DP2] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
91
[TGSI_OPCODE_DP3] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
92
[TGSI_OPCODE_DP4] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
93
[TGSI_OPCODE_DST] = {false, false, 0, 1, 2},
94
[TGSI_OPCODE_END] = {false, false, 0, 0, 0},
95
[TGSI_OPCODE_EX2] = {false, false, 0, 1, 1},
96
[TGSI_OPCODE_FLR] = {false, false, 0, 1, 1},
97
[TGSI_OPCODE_FRC] = {false, false, 0, 1, 1},
98
[TGSI_OPCODE_KILL_IF] = {false, false, 0, 0, 1},
99
[TGSI_OPCODE_KILL] = {false, false, 0, 0, 0},
100
[TGSI_OPCODE_LG2] = {false, false, 0, 1, 1},
101
[TGSI_OPCODE_LIT] = {false, false, 0, 1, 1},
102
[TGSI_OPCODE_LRP] = {false, false, 0, 1, 3},
103
[TGSI_OPCODE_MAX] = {false, false, 0, 1, 2},
104
[TGSI_OPCODE_MAD] = {false, false, 0, 1, 3},
105
[TGSI_OPCODE_MIN] = {false, false, 0, 1, 2},
106
[TGSI_OPCODE_MOV] = {false, false, 0, 1, 1},
107
[TGSI_OPCODE_MUL] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
108
[TGSI_OPCODE_NOP] = {false, false, 0, 0, 0},
109
[TGSI_OPCODE_POW] = {false, false, 0, 1, 2},
110
[TGSI_OPCODE_RCP] = {false, false, 0, 1, 1},
111
[TGSI_OPCODE_RET] = {false, false, 0, 0, 0},
112
[TGSI_OPCODE_RSQ] = {false, false, 0, 1, 1},
113
[TGSI_OPCODE_SEQ] = {false, false, 0, 1, 2},
114
[TGSI_OPCODE_SGE] = {false, false, 0, 1, 2},
115
[TGSI_OPCODE_SGT] = {false, false, 0, 1, 2},
116
[TGSI_OPCODE_SIN] = {false, false, 0, 1, 1},
117
[TGSI_OPCODE_SLE] = {false, false, 0, 1, 2},
118
[TGSI_OPCODE_SLT] = {false, false, 0, 1, 2},
119
[TGSI_OPCODE_SNE] = {false, false, 0, 1, 2},
120
[TGSI_OPCODE_SSG] = {false, false, 0, 1, 1},
121
[TGSI_OPCODE_TEX] = {true, false, 0, 1, 2},
122
[TGSI_OPCODE_TRUNC] = {false, false, 0, 1, 1},
123
[TGSI_OPCODE_TXB] = {true, false, 0, 1, 2},
124
[TGSI_OPCODE_TXP] = {true, false, 0, 1, 2},
125
};
126
127
static bool
128
op_has_dst(unsigned opcode)
129
{
130
return (op_table[opcode].num_dst > 0);
131
}
132
133
static int
134
op_num_dst(unsigned opcode)
135
{
136
return op_table[opcode].num_dst;
137
}
138
139
static int
140
op_num_src(unsigned opcode)
141
{
142
return op_table[opcode].num_src;
143
}
144
145
static bool
146
op_commutes(unsigned opcode)
147
{
148
return op_table[opcode].commutes;
149
}
150
151
static unsigned
152
mask_for_unswizzled(int num_components)
153
{
154
unsigned mask = 0;
155
switch (num_components) {
156
case 4:
157
mask |= TGSI_WRITEMASK_W;
158
FALLTHROUGH;
159
case 3:
160
mask |= TGSI_WRITEMASK_Z;
161
FALLTHROUGH;
162
case 2:
163
mask |= TGSI_WRITEMASK_Y;
164
FALLTHROUGH;
165
case 1:
166
mask |= TGSI_WRITEMASK_X;
167
}
168
return mask;
169
}
170
171
static bool
172
is_unswizzled(struct i915_full_src_register *r, unsigned write_mask)
173
{
174
if (write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
175
return false;
176
if (write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
177
return false;
178
if (write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
179
return false;
180
if (write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
181
return false;
182
return true;
183
}
184
185
static bool
186
op_is_texture(unsigned opcode)
187
{
188
return op_table[opcode].is_texture;
189
}
190
191
static unsigned
192
op_neutral_element(unsigned opcode)
193
{
194
unsigned ne = op_table[opcode].neutral_element;
195
if (!ne) {
196
debug_printf("No neutral element for opcode %d\n", opcode);
197
ne = TGSI_SWIZZLE_ZERO;
198
}
199
return ne;
200
}
201
202
/*
203
* Sets the swizzle to the neutral element for the operation for the bits
204
* of writemask which are set, swizzle to identity otherwise.
205
*/
206
static void
207
set_neutral_element_swizzle(struct i915_full_src_register *r,
208
unsigned write_mask, unsigned neutral)
209
{
210
if (write_mask & TGSI_WRITEMASK_X)
211
r->Register.SwizzleX = neutral;
212
else
213
r->Register.SwizzleX = TGSI_SWIZZLE_X;
214
215
if (write_mask & TGSI_WRITEMASK_Y)
216
r->Register.SwizzleY = neutral;
217
else
218
r->Register.SwizzleY = TGSI_SWIZZLE_Y;
219
220
if (write_mask & TGSI_WRITEMASK_Z)
221
r->Register.SwizzleZ = neutral;
222
else
223
r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
224
225
if (write_mask & TGSI_WRITEMASK_W)
226
r->Register.SwizzleW = neutral;
227
else
228
r->Register.SwizzleW = TGSI_SWIZZLE_W;
229
}
230
231
static void
232
copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
233
{
234
o->File = i->File;
235
o->Indirect = i->Indirect;
236
o->Dimension = i->Dimension;
237
o->Index = i->Index;
238
o->SwizzleX = i->SwizzleX;
239
o->SwizzleY = i->SwizzleY;
240
o->SwizzleZ = i->SwizzleZ;
241
o->SwizzleW = i->SwizzleW;
242
o->Absolute = i->Absolute;
243
o->Negate = i->Negate;
244
}
245
246
static void
247
copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
248
{
249
o->File = i->File;
250
o->WriteMask = i->WriteMask;
251
o->Indirect = i->Indirect;
252
o->Dimension = i->Dimension;
253
o->Index = i->Index;
254
}
255
256
static void
257
copy_instruction(struct i915_full_instruction *o,
258
const struct tgsi_full_instruction *i)
259
{
260
memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
261
memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
262
263
copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
264
265
copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
266
copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
267
copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
268
}
269
270
static void
271
copy_token(union i915_full_token *o, union tgsi_full_token *i)
272
{
273
if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
274
memcpy(o, i, sizeof(*o));
275
else
276
copy_instruction(&o->FullInstruction, &i->FullInstruction);
277
}
278
279
static void
280
liveness_mark_written(struct i915_optimize_context *ctx,
281
struct i915_full_dst_register *dst_reg, int pos)
282
{
283
int dst_reg_index;
284
if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
285
dst_reg_index = dst_reg->Register.Index;
286
assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
287
/* dead -> live transition */
288
if (ctx->first_write[dst_reg_index] != -1)
289
ctx->first_write[dst_reg_index] = pos;
290
}
291
}
292
293
static void
294
liveness_mark_read(struct i915_optimize_context *ctx,
295
struct i915_full_src_register *src_reg, int pos)
296
{
297
int src_reg_index;
298
if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
299
src_reg_index = src_reg->Register.Index;
300
assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
301
/* live -> dead transition */
302
if (ctx->last_read[src_reg_index] != -1)
303
ctx->last_read[src_reg_index] = pos;
304
}
305
}
306
307
static void
308
liveness_analysis(struct i915_optimize_context *ctx,
309
struct i915_token_list *tokens)
310
{
311
struct i915_full_dst_register *dst_reg;
312
struct i915_full_src_register *src_reg;
313
union i915_full_token *current;
314
unsigned opcode;
315
int num_dst, num_src;
316
int i = 0;
317
318
for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) {
319
ctx->first_write[i] = -1;
320
ctx->last_read[i] = -1;
321
}
322
323
for (i = 0; i < tokens->NumTokens; i++) {
324
current = &tokens->Tokens[i];
325
326
if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
327
continue;
328
329
opcode = current->FullInstruction.Instruction.Opcode;
330
num_dst = op_num_dst(opcode);
331
332
switch (num_dst) {
333
case 1:
334
dst_reg = &current->FullInstruction.Dst[0];
335
liveness_mark_written(ctx, dst_reg, i);
336
case 0:
337
break;
338
default:
339
debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
340
break;
341
}
342
}
343
344
for (i = tokens->NumTokens - 1; i >= 0; i--) {
345
current = &tokens->Tokens[i];
346
347
if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
348
continue;
349
350
opcode = current->FullInstruction.Instruction.Opcode;
351
num_src = op_num_src(opcode);
352
353
switch (num_src) {
354
case 3:
355
src_reg = &current->FullInstruction.Src[2];
356
liveness_mark_read(ctx, src_reg, i);
357
FALLTHROUGH;
358
case 2:
359
src_reg = &current->FullInstruction.Src[1];
360
liveness_mark_read(ctx, src_reg, i);
361
FALLTHROUGH;
362
case 1:
363
src_reg = &current->FullInstruction.Src[0];
364
liveness_mark_read(ctx, src_reg, i);
365
FALLTHROUGH;
366
case 0:
367
break;
368
default:
369
debug_printf("Op %d has %d src regs\n", opcode, num_src);
370
break;
371
}
372
}
373
}
374
375
static int
376
unused_from(struct i915_optimize_context *ctx,
377
struct i915_full_dst_register *dst_reg, int from)
378
{
379
int dst_reg_index = dst_reg->Register.Index;
380
assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
381
return (from >= ctx->last_read[dst_reg_index]);
382
}
383
384
/* Returns a mask with the components used for a texture access instruction */
385
static unsigned
386
i915_tex_mask(union i915_full_token *instr)
387
{
388
unsigned mask;
389
390
/* Get the number of coords */
391
mask = mask_for_unswizzled(
392
i915_num_coords(instr->FullInstruction.Texture.Texture));
393
394
/* Add the W component if projective */
395
if (instr->FullInstruction.Instruction.Opcode == TGSI_OPCODE_TXP)
396
mask |= TGSI_WRITEMASK_W;
397
398
return mask;
399
}
400
401
static bool
402
target_is_texture2d(uint32_t tex)
403
{
404
switch (tex) {
405
case TGSI_TEXTURE_2D:
406
case TGSI_TEXTURE_RECT:
407
return true;
408
default:
409
return false;
410
}
411
}
412
413
/*
414
* Optimize away useless indirect texture reads:
415
* MOV TEMP[0].xy, IN[0].xyyy
416
* TEX TEMP[1], TEMP[0], SAMP[0], 2D
417
* into:
418
* TEX TEMP[1], IN[0], SAMP[0], 2D
419
*
420
* note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
421
*/
422
static void
423
i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
424
struct i915_token_list *tokens, int index)
425
{
426
union i915_full_token *current = &tokens->Tokens[index - 1];
427
union i915_full_token *next = &tokens->Tokens[index];
428
429
if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
430
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
431
current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
432
op_is_texture(next->FullInstruction.Instruction.Opcode) &&
433
target_is_texture2d(next->FullInstruction.Texture.Texture) &&
434
same_src_dst_reg(&next->FullInstruction.Src[0],
435
&current->FullInstruction.Dst[0]) &&
436
is_unswizzled(&current->FullInstruction.Src[0], i915_tex_mask(next)) &&
437
unused_from(ctx, &current->FullInstruction.Dst[0], index)) {
438
memcpy(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0],
439
sizeof(struct i915_src_register));
440
current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
441
}
442
}
443
444
/*
445
* Optimize away things like:
446
* MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
447
* MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
448
* into:
449
* NOP
450
* MOV OUT[0].xyw, TEMP[1].xyww
451
*/
452
static void
453
i915_fpc_optimize_mov_after_mov(union i915_full_token *current,
454
union i915_full_token *next)
455
{
456
struct i915_full_src_register *src_reg1, *src_reg2;
457
struct i915_full_dst_register *dst_reg1, *dst_reg2;
458
unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
459
460
if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
461
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
462
current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
463
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
464
current->FullInstruction.Instruction.Saturate ==
465
next->FullInstruction.Instruction.Saturate &&
466
same_dst_reg(&next->FullInstruction.Dst[0],
467
&current->FullInstruction.Dst[0]) &&
468
same_src_reg(&next->FullInstruction.Src[0],
469
&current->FullInstruction.Src[0]) &&
470
!same_src_dst_reg(&current->FullInstruction.Src[0],
471
&current->FullInstruction.Dst[0])) {
472
src_reg1 = &current->FullInstruction.Src[0];
473
dst_reg1 = &current->FullInstruction.Dst[0];
474
src_reg2 = &next->FullInstruction.Src[0];
475
dst_reg2 = &next->FullInstruction.Dst[0];
476
477
/* Start with swizzles from the first mov */
478
swizzle_x = src_reg1->Register.SwizzleX;
479
swizzle_y = src_reg1->Register.SwizzleY;
480
swizzle_z = src_reg1->Register.SwizzleZ;
481
swizzle_w = src_reg1->Register.SwizzleW;
482
483
/* Pile the second mov on top */
484
if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
485
swizzle_x = src_reg2->Register.SwizzleX;
486
if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
487
swizzle_y = src_reg2->Register.SwizzleY;
488
if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
489
swizzle_z = src_reg2->Register.SwizzleZ;
490
if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
491
swizzle_w = src_reg2->Register.SwizzleW;
492
493
dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
494
src_reg2->Register.SwizzleX = swizzle_x;
495
src_reg2->Register.SwizzleY = swizzle_y;
496
src_reg2->Register.SwizzleZ = swizzle_z;
497
src_reg2->Register.SwizzleW = swizzle_w;
498
499
current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
500
501
return;
502
}
503
}
504
505
/*
506
* Optimize away things like:
507
* MUL OUT[0].xyz, TEMP[1], TEMP[2]
508
* MOV OUT[0].w, TEMP[2]
509
* into:
510
* MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
511
* This is useful for optimizing texenv.
512
*/
513
static void
514
i915_fpc_optimize_mov_after_alu(union i915_full_token *current,
515
union i915_full_token *next)
516
{
517
if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
518
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
519
op_commutes(current->FullInstruction.Instruction.Opcode) &&
520
current->FullInstruction.Instruction.Saturate ==
521
next->FullInstruction.Instruction.Saturate &&
522
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
523
same_dst_reg(&next->FullInstruction.Dst[0],
524
&current->FullInstruction.Dst[0]) &&
525
same_src_reg(&next->FullInstruction.Src[0],
526
&current->FullInstruction.Src[1]) &&
527
!same_src_dst_reg(&next->FullInstruction.Src[0],
528
&current->FullInstruction.Dst[0]) &&
529
is_unswizzled(&current->FullInstruction.Src[0],
530
current->FullInstruction.Dst[0].Register.WriteMask) &&
531
is_unswizzled(&current->FullInstruction.Src[1],
532
current->FullInstruction.Dst[0].Register.WriteMask) &&
533
is_unswizzled(&next->FullInstruction.Src[0],
534
next->FullInstruction.Dst[0].Register.WriteMask)) {
535
next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
536
537
set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
538
set_neutral_element_swizzle(
539
&current->FullInstruction.Src[0],
540
next->FullInstruction.Dst[0].Register.WriteMask,
541
op_neutral_element(current->FullInstruction.Instruction.Opcode));
542
543
current->FullInstruction.Dst[0].Register.WriteMask =
544
current->FullInstruction.Dst[0].Register.WriteMask |
545
next->FullInstruction.Dst[0].Register.WriteMask;
546
return;
547
}
548
549
if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
550
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
551
op_commutes(current->FullInstruction.Instruction.Opcode) &&
552
current->FullInstruction.Instruction.Saturate ==
553
next->FullInstruction.Instruction.Saturate &&
554
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
555
same_dst_reg(&next->FullInstruction.Dst[0],
556
&current->FullInstruction.Dst[0]) &&
557
same_src_reg(&next->FullInstruction.Src[0],
558
&current->FullInstruction.Src[0]) &&
559
!same_src_dst_reg(&next->FullInstruction.Src[0],
560
&current->FullInstruction.Dst[0]) &&
561
is_unswizzled(&current->FullInstruction.Src[0],
562
current->FullInstruction.Dst[0].Register.WriteMask) &&
563
is_unswizzled(&current->FullInstruction.Src[1],
564
current->FullInstruction.Dst[0].Register.WriteMask) &&
565
is_unswizzled(&next->FullInstruction.Src[0],
566
next->FullInstruction.Dst[0].Register.WriteMask)) {
567
next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
568
569
set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
570
set_neutral_element_swizzle(
571
&current->FullInstruction.Src[1],
572
next->FullInstruction.Dst[0].Register.WriteMask,
573
op_neutral_element(current->FullInstruction.Instruction.Opcode));
574
575
current->FullInstruction.Dst[0].Register.WriteMask =
576
current->FullInstruction.Dst[0].Register.WriteMask |
577
next->FullInstruction.Dst[0].Register.WriteMask;
578
return;
579
}
580
}
581
582
/*
583
* Optimize away things like:
584
* MOV TEMP[0].xyz TEMP[0].xyzx
585
* into:
586
* NOP
587
*/
588
static bool
589
i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
590
{
591
union i915_full_token current;
592
copy_token(&current, tgsi_current);
593
if (current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
594
current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
595
op_has_dst(current.FullInstruction.Instruction.Opcode) &&
596
!current.FullInstruction.Instruction.Saturate &&
597
current.FullInstruction.Src[0].Register.Absolute == 0 &&
598
current.FullInstruction.Src[0].Register.Negate == 0 &&
599
is_unswizzled(&current.FullInstruction.Src[0],
600
current.FullInstruction.Dst[0].Register.WriteMask) &&
601
same_src_dst_reg(&current.FullInstruction.Src[0],
602
&current.FullInstruction.Dst[0])) {
603
return true;
604
}
605
return false;
606
}
607
608
/*
609
* Optimize away things like:
610
* *** TEMP[0], TEMP[1], TEMP[2]
611
* MOV OUT[0] TEMP[0]
612
* into:
613
* *** OUT[0], TEMP[1], TEMP[2]
614
*/
615
static void
616
i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
617
struct i915_token_list *tokens,
618
int index)
619
{
620
union i915_full_token *current = &tokens->Tokens[index - 1];
621
union i915_full_token *next = &tokens->Tokens[index];
622
623
// &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
624
if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
625
next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
626
next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
627
op_has_dst(current->FullInstruction.Instruction.Opcode) &&
628
!next->FullInstruction.Instruction.Saturate &&
629
next->FullInstruction.Src[0].Register.Absolute == 0 &&
630
next->FullInstruction.Src[0].Register.Negate == 0 &&
631
unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
632
current->FullInstruction.Dst[0].Register.WriteMask ==
633
TGSI_WRITEMASK_XYZW &&
634
is_unswizzled(&next->FullInstruction.Src[0],
635
next->FullInstruction.Dst[0].Register.WriteMask) &&
636
current->FullInstruction.Dst[0].Register.WriteMask ==
637
next->FullInstruction.Dst[0].Register.WriteMask &&
638
same_src_dst_reg(&next->FullInstruction.Src[0],
639
&current->FullInstruction.Dst[0])) {
640
next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
641
642
current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
643
return;
644
}
645
}
646
647
struct i915_token_list *
648
i915_optimize(const struct tgsi_token *tokens)
649
{
650
struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
651
struct tgsi_parse_context parse;
652
struct i915_optimize_context *ctx;
653
int i = 0;
654
655
ctx = malloc(sizeof(*ctx));
656
657
out_tokens->NumTokens = 0;
658
659
/* Count the tokens */
660
tgsi_parse_init(&parse, tokens);
661
while (!tgsi_parse_end_of_tokens(&parse)) {
662
tgsi_parse_token(&parse);
663
out_tokens->NumTokens++;
664
}
665
tgsi_parse_free(&parse);
666
667
/* Allocate our tokens */
668
out_tokens->Tokens =
669
MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
670
671
tgsi_parse_init(&parse, tokens);
672
while (!tgsi_parse_end_of_tokens(&parse)) {
673
tgsi_parse_token(&parse);
674
675
if (i915_fpc_useless_mov(&parse.FullToken)) {
676
out_tokens->NumTokens--;
677
continue;
678
}
679
680
copy_token(&out_tokens->Tokens[i], &parse.FullToken);
681
682
i++;
683
}
684
tgsi_parse_free(&parse);
685
686
liveness_analysis(ctx, out_tokens);
687
688
i = 1;
689
while (i < out_tokens->NumTokens) {
690
i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
691
i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i - 1],
692
&out_tokens->Tokens[i]);
693
i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i - 1],
694
&out_tokens->Tokens[i]);
695
i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
696
i++;
697
}
698
699
free(ctx);
700
701
return out_tokens;
702
}
703
704
void
705
i915_optimize_free(struct i915_token_list *tokens)
706
{
707
free(tokens->Tokens);
708
free(tokens);
709
}
710
711