Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/ir3/ir3_postsched.c
4565 views
1
/*
2
* Copyright (C) 2019 Google, Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*
23
* Authors:
24
* Rob Clark <[email protected]>
25
*/
26
27
#include "util/dag.h"
28
#include "util/u_math.h"
29
30
#include "ir3.h"
31
#include "ir3_compiler.h"
32
#include "ir3_context.h"
33
34
#ifdef DEBUG
35
#define SCHED_DEBUG (ir3_shader_debug & IR3_DBG_SCHEDMSGS)
36
#else
37
#define SCHED_DEBUG 0
38
#endif
39
#define d(fmt, ...) \
40
do { \
41
if (SCHED_DEBUG) { \
42
printf("PSCHED: " fmt "\n", ##__VA_ARGS__); \
43
} \
44
} while (0)
45
46
#define di(instr, fmt, ...) \
47
do { \
48
if (SCHED_DEBUG) { \
49
printf("PSCHED: " fmt ": ", ##__VA_ARGS__); \
50
ir3_print_instr(instr); \
51
} \
52
} while (0)
53
54
/*
55
* Post RA Instruction Scheduling
56
*/
57
58
struct ir3_postsched_ctx {
59
struct ir3 *ir;
60
61
struct ir3_shader_variant *v;
62
63
void *mem_ctx;
64
struct ir3_block *block; /* the current block */
65
struct dag *dag;
66
67
struct list_head unscheduled_list; /* unscheduled instructions */
68
69
int sfu_delay;
70
int tex_delay;
71
};
72
73
struct ir3_postsched_node {
74
struct dag_node dag; /* must be first for util_dynarray_foreach */
75
struct ir3_instruction *instr;
76
bool partially_evaluated_path;
77
78
bool has_tex_src, has_sfu_src;
79
80
unsigned delay;
81
unsigned max_delay;
82
};
83
84
#define foreach_sched_node(__n, __list) \
85
list_for_each_entry (struct ir3_postsched_node, __n, __list, dag.link)
86
87
static bool
88
has_tex_src(struct ir3_instruction *instr)
89
{
90
struct ir3_postsched_node *node = instr->data;
91
return node->has_tex_src;
92
}
93
94
static bool
95
has_sfu_src(struct ir3_instruction *instr)
96
{
97
struct ir3_postsched_node *node = instr->data;
98
return node->has_sfu_src;
99
}
100
101
static void
102
schedule(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
103
{
104
debug_assert(ctx->block == instr->block);
105
106
/* remove from unscheduled_list:
107
*/
108
list_delinit(&instr->node);
109
110
di(instr, "schedule");
111
112
list_addtail(&instr->node, &instr->block->instr_list);
113
114
struct ir3_postsched_node *n = instr->data;
115
dag_prune_head(ctx->dag, &n->dag);
116
117
if (is_meta(instr) && (instr->opc != OPC_META_TEX_PREFETCH))
118
return;
119
120
if (is_sfu(instr)) {
121
ctx->sfu_delay = 8;
122
} else if (has_sfu_src(instr)) {
123
ctx->sfu_delay = 0;
124
} else if (ctx->sfu_delay > 0) {
125
ctx->sfu_delay--;
126
}
127
128
if (is_tex_or_prefetch(instr)) {
129
ctx->tex_delay = 10;
130
} else if (has_tex_src(instr)) {
131
ctx->tex_delay = 0;
132
} else if (ctx->tex_delay > 0) {
133
ctx->tex_delay--;
134
}
135
}
136
137
static void
138
dump_state(struct ir3_postsched_ctx *ctx)
139
{
140
if (!SCHED_DEBUG)
141
return;
142
143
foreach_sched_node (n, &ctx->dag->heads) {
144
di(n->instr, "maxdel=%3d ", n->max_delay);
145
146
util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
147
struct ir3_postsched_node *child =
148
(struct ir3_postsched_node *)edge->child;
149
150
di(child->instr, " -> (%d parents) ", child->dag.parent_count);
151
}
152
}
153
}
154
155
/* Determine if this is an instruction that we'd prefer not to schedule
156
* yet, in order to avoid an (ss) sync. This is limited by the sfu_delay
157
* counter, ie. the more cycles it has been since the last SFU, the less
158
* costly a sync would be.
159
*/
160
static bool
161
would_sync(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
162
{
163
if (ctx->sfu_delay) {
164
if (has_sfu_src(instr))
165
return true;
166
}
167
168
if (ctx->tex_delay) {
169
if (has_tex_src(instr))
170
return true;
171
}
172
173
return false;
174
}
175
176
/* find instruction to schedule: */
177
static struct ir3_instruction *
178
choose_instr(struct ir3_postsched_ctx *ctx)
179
{
180
struct ir3_postsched_node *chosen = NULL;
181
182
dump_state(ctx);
183
184
foreach_sched_node (n, &ctx->dag->heads) {
185
if (!is_meta(n->instr))
186
continue;
187
188
if (!chosen || (chosen->max_delay < n->max_delay))
189
chosen = n;
190
}
191
192
if (chosen) {
193
di(chosen->instr, "prio: chose (meta)");
194
return chosen->instr;
195
}
196
197
/* Try to schedule inputs with a higher priority, if possible, as
198
* the last bary.f unlocks varying storage to unblock more VS
199
* warps.
200
*/
201
foreach_sched_node (n, &ctx->dag->heads) {
202
if (!is_input(n->instr))
203
continue;
204
205
if (!chosen || (chosen->max_delay < n->max_delay))
206
chosen = n;
207
}
208
209
if (chosen) {
210
di(chosen->instr, "prio: chose (input)");
211
return chosen->instr;
212
}
213
214
/* Next prioritize discards: */
215
foreach_sched_node (n, &ctx->dag->heads) {
216
unsigned d =
217
ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs);
218
219
if (d > 0)
220
continue;
221
222
if (!is_kill_or_demote(n->instr))
223
continue;
224
225
if (!chosen || (chosen->max_delay < n->max_delay))
226
chosen = n;
227
}
228
229
if (chosen) {
230
di(chosen->instr, "csp: chose (kill, hard ready)");
231
return chosen->instr;
232
}
233
234
/* Next prioritize expensive instructions: */
235
foreach_sched_node (n, &ctx->dag->heads) {
236
unsigned d =
237
ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs);
238
239
if (d > 0)
240
continue;
241
242
if (!(is_sfu(n->instr) || is_tex(n->instr)))
243
continue;
244
245
if (!chosen || (chosen->max_delay < n->max_delay))
246
chosen = n;
247
}
248
249
if (chosen) {
250
di(chosen->instr, "csp: chose (sfu/tex, hard ready)");
251
return chosen->instr;
252
}
253
254
/*
255
* Sometimes be better to take a nop, rather than scheduling an
256
* instruction that would require an (ss) shortly after another
257
* SFU.. ie. if last SFU was just one or two instr ago, and we
258
* could choose between taking a nop and then scheduling
259
* something else, vs scheduling the immed avail instruction that
260
* would require (ss), we are better with the nop.
261
*/
262
for (unsigned delay = 0; delay < 4; delay++) {
263
foreach_sched_node (n, &ctx->dag->heads) {
264
if (would_sync(ctx, n->instr))
265
continue;
266
267
unsigned d = ir3_delay_calc_postra(ctx->block, n->instr, true,
268
ctx->v->mergedregs);
269
270
if (d > delay)
271
continue;
272
273
if (!chosen || (chosen->max_delay < n->max_delay))
274
chosen = n;
275
}
276
277
if (chosen) {
278
di(chosen->instr, "csp: chose (soft ready, delay=%u)", delay);
279
return chosen->instr;
280
}
281
}
282
283
/* Next try to find a ready leader w/ soft delay (ie. including extra
284
* delay for things like tex fetch which can be synchronized w/ sync
285
* bit (but we probably do want to schedule some other instructions
286
* while we wait)
287
*/
288
foreach_sched_node (n, &ctx->dag->heads) {
289
unsigned d =
290
ir3_delay_calc_postra(ctx->block, n->instr, true, ctx->v->mergedregs);
291
292
if (d > 0)
293
continue;
294
295
if (!chosen || (chosen->max_delay < n->max_delay))
296
chosen = n;
297
}
298
299
if (chosen) {
300
di(chosen->instr, "csp: chose (soft ready)");
301
return chosen->instr;
302
}
303
304
/* Next try to find a ready leader that can be scheduled without nop's,
305
* which in the case of things that need (sy)/(ss) could result in
306
* stalls.. but we've already decided there is not a better option.
307
*/
308
foreach_sched_node (n, &ctx->dag->heads) {
309
unsigned d =
310
ir3_delay_calc_postra(ctx->block, n->instr, false, ctx->v->mergedregs);
311
312
if (d > 0)
313
continue;
314
315
if (!chosen || (chosen->max_delay < n->max_delay))
316
chosen = n;
317
}
318
319
if (chosen) {
320
di(chosen->instr, "csp: chose (hard ready)");
321
return chosen->instr;
322
}
323
324
/* Otherwise choose leader with maximum cost:
325
*
326
* TODO should we try to balance cost and delays? I guess it is
327
* a balance between now-nop's and future-nop's?
328
*/
329
foreach_sched_node (n, &ctx->dag->heads) {
330
if (!chosen || chosen->max_delay < n->max_delay)
331
chosen = n;
332
}
333
334
if (chosen) {
335
di(chosen->instr, "csp: chose (leader)");
336
return chosen->instr;
337
}
338
339
return NULL;
340
}
341
342
struct ir3_postsched_deps_state {
343
struct ir3_postsched_ctx *ctx;
344
345
enum { F, R } direction;
346
347
bool merged;
348
349
/* Track the mapping between sched node (instruction) that last
350
* wrote a given register (in whichever direction we are iterating
351
* the block)
352
*
353
* Note, this table is twice as big as the # of regs, to deal with
354
* half-precision regs. The approach differs depending on whether
355
* the half and full precision register files are "merged" (conflict,
356
* ie. a6xx+) in which case we consider each full precision dep
357
* as two half-precision dependencies, vs older separate (non-
358
* conflicting) in which case the first half of the table is used
359
* for full precision and 2nd half for half-precision.
360
*/
361
struct ir3_postsched_node *regs[2 * 256];
362
};
363
364
/* bounds checking read/write accessors, since OoB access to stuff on
365
* the stack is gonna cause a bad day.
366
*/
367
#define dep_reg(state, idx) \
368
*({ \
369
assert((idx) < ARRAY_SIZE((state)->regs)); \
370
&(state)->regs[(idx)]; \
371
})
372
373
static void
374
add_dep(struct ir3_postsched_deps_state *state,
375
struct ir3_postsched_node *before, struct ir3_postsched_node *after)
376
{
377
if (!before || !after)
378
return;
379
380
assert(before != after);
381
382
if (state->direction == F) {
383
dag_add_edge(&before->dag, &after->dag, NULL);
384
} else {
385
dag_add_edge(&after->dag, &before->dag, NULL);
386
}
387
}
388
389
static void
390
add_single_reg_dep(struct ir3_postsched_deps_state *state,
391
struct ir3_postsched_node *node, unsigned num, int src_n)
392
{
393
struct ir3_postsched_node *dep = dep_reg(state, num);
394
395
if (src_n >= 0 && dep && state->direction == F) {
396
unsigned d = ir3_delayslots(dep->instr, node->instr, src_n, true);
397
node->delay = MAX2(node->delay, d);
398
if (is_tex_or_prefetch(dep->instr))
399
node->has_tex_src = true;
400
if (is_tex_or_prefetch(dep->instr))
401
node->has_sfu_src = true;
402
}
403
404
add_dep(state, dep, node);
405
if (src_n < 0) {
406
dep_reg(state, num) = node;
407
}
408
}
409
410
/* This is where we handled full vs half-precision, and potential conflicts
411
* between half and full precision that result in additional dependencies.
412
* The 'reg' arg is really just to know half vs full precision.
413
*
414
* If non-negative, then this adds a dependency on a source register, and
415
* src_n is the index passed into ir3_delayslots() for calculating the delay:
416
* If positive, corresponds to node->instr->regs[src_n]. If negative, then
417
* this is for a destination register.
418
*/
419
static void
420
add_reg_dep(struct ir3_postsched_deps_state *state,
421
struct ir3_postsched_node *node, const struct ir3_register *reg,
422
unsigned num, int src_n)
423
{
424
if (state->merged) {
425
/* Make sure that special registers like a0.x that are written as
426
* half-registers don't alias random full registers by pretending that
427
* they're full registers:
428
*/
429
if ((reg->flags & IR3_REG_HALF) && !is_reg_special(reg)) {
430
/* single conflict in half-reg space: */
431
add_single_reg_dep(state, node, num, src_n);
432
} else {
433
/* two conflicts in half-reg space: */
434
add_single_reg_dep(state, node, 2 * num + 0, src_n);
435
add_single_reg_dep(state, node, 2 * num + 1, src_n);
436
}
437
} else {
438
if (reg->flags & IR3_REG_HALF)
439
num += ARRAY_SIZE(state->regs) / 2;
440
add_single_reg_dep(state, node, num, src_n);
441
}
442
}
443
444
static void
445
calculate_deps(struct ir3_postsched_deps_state *state,
446
struct ir3_postsched_node *node)
447
{
448
/* Add dependencies on instructions that previously (or next,
449
* in the reverse direction) wrote any of our src registers:
450
*/
451
foreach_src_n (reg, i, node->instr) {
452
if (reg->flags & (IR3_REG_CONST | IR3_REG_IMMED))
453
continue;
454
455
if (reg->flags & IR3_REG_RELATIV) {
456
/* mark entire array as read: */
457
for (unsigned j = 0; j < reg->size; j++) {
458
add_reg_dep(state, node, reg, reg->array.base + j, i);
459
}
460
} else {
461
assert(reg->wrmask >= 1);
462
u_foreach_bit (b, reg->wrmask) {
463
add_reg_dep(state, node, reg, reg->num + b, i);
464
}
465
}
466
}
467
468
/* And then after we update the state for what this instruction
469
* wrote:
470
*/
471
foreach_dst (reg, node->instr) {
472
if (reg->wrmask == 0)
473
continue;
474
if (reg->flags & IR3_REG_RELATIV) {
475
/* mark the entire array as written: */
476
for (unsigned i = 0; i < reg->size; i++) {
477
add_reg_dep(state, node, reg, reg->array.base + i, -1);
478
}
479
} else {
480
assert(reg->wrmask >= 1);
481
u_foreach_bit (b, reg->wrmask) {
482
add_reg_dep(state, node, reg, reg->num + b, -1);
483
}
484
}
485
}
486
}
487
488
static void
489
calculate_forward_deps(struct ir3_postsched_ctx *ctx)
490
{
491
struct ir3_postsched_deps_state state = {
492
.ctx = ctx,
493
.direction = F,
494
.merged = ctx->v->mergedregs,
495
};
496
497
foreach_instr (instr, &ctx->unscheduled_list) {
498
calculate_deps(&state, instr->data);
499
}
500
}
501
502
static void
503
calculate_reverse_deps(struct ir3_postsched_ctx *ctx)
504
{
505
struct ir3_postsched_deps_state state = {
506
.ctx = ctx,
507
.direction = R,
508
.merged = ctx->v->mergedregs,
509
};
510
511
foreach_instr_rev (instr, &ctx->unscheduled_list) {
512
calculate_deps(&state, instr->data);
513
}
514
}
515
516
static void
517
sched_node_init(struct ir3_postsched_ctx *ctx, struct ir3_instruction *instr)
518
{
519
struct ir3_postsched_node *n =
520
rzalloc(ctx->mem_ctx, struct ir3_postsched_node);
521
522
dag_init_node(ctx->dag, &n->dag);
523
524
n->instr = instr;
525
instr->data = n;
526
}
527
528
static void
529
sched_dag_max_delay_cb(struct dag_node *node, void *state)
530
{
531
struct ir3_postsched_node *n = (struct ir3_postsched_node *)node;
532
uint32_t max_delay = 0;
533
534
util_dynarray_foreach (&n->dag.edges, struct dag_edge, edge) {
535
struct ir3_postsched_node *child =
536
(struct ir3_postsched_node *)edge->child;
537
max_delay = MAX2(child->max_delay, max_delay);
538
}
539
540
n->max_delay = MAX2(n->max_delay, max_delay + n->delay);
541
}
542
543
static void
544
sched_dag_init(struct ir3_postsched_ctx *ctx)
545
{
546
ctx->mem_ctx = ralloc_context(NULL);
547
548
ctx->dag = dag_create(ctx->mem_ctx);
549
550
foreach_instr (instr, &ctx->unscheduled_list)
551
sched_node_init(ctx, instr);
552
553
calculate_forward_deps(ctx);
554
calculate_reverse_deps(ctx);
555
556
/*
557
* To avoid expensive texture fetches, etc, from being moved ahead
558
* of kills, track the kills we've seen so far, so we can add an
559
* extra dependency on them for tex/mem instructions
560
*/
561
struct util_dynarray kills;
562
util_dynarray_init(&kills, ctx->mem_ctx);
563
564
/* The last bary.f with the (ei) flag must be scheduled before any kills,
565
* or the hw gets angry. Keep track of inputs here so we can add the
566
* false dep on the kill instruction.
567
*/
568
struct util_dynarray inputs;
569
util_dynarray_init(&inputs, ctx->mem_ctx);
570
571
/*
572
* Normal srcs won't be in SSA at this point, those are dealt with in
573
* calculate_forward_deps() and calculate_reverse_deps(). But we still
574
* have the false-dep information in SSA form, so go ahead and add
575
* dependencies for that here:
576
*/
577
foreach_instr (instr, &ctx->unscheduled_list) {
578
struct ir3_postsched_node *n = instr->data;
579
580
foreach_ssa_src_n (src, i, instr) {
581
if (src->block != instr->block)
582
continue;
583
584
/* we can end up with unused false-deps.. just skip them: */
585
if (src->flags & IR3_INSTR_UNUSED)
586
continue;
587
588
struct ir3_postsched_node *sn = src->data;
589
590
/* don't consider dependencies in other blocks: */
591
if (src->block != instr->block)
592
continue;
593
594
dag_add_edge(&sn->dag, &n->dag, NULL);
595
}
596
597
if (is_input(instr)) {
598
util_dynarray_append(&inputs, struct ir3_instruction *, instr);
599
} else if (is_kill_or_demote(instr)) {
600
util_dynarray_foreach (&inputs, struct ir3_instruction *, instrp) {
601
struct ir3_instruction *input = *instrp;
602
struct ir3_postsched_node *in = input->data;
603
dag_add_edge(&in->dag, &n->dag, NULL);
604
}
605
util_dynarray_append(&kills, struct ir3_instruction *, instr);
606
} else if (is_tex(instr) || is_mem(instr)) {
607
util_dynarray_foreach (&kills, struct ir3_instruction *, instrp) {
608
struct ir3_instruction *kill = *instrp;
609
struct ir3_postsched_node *kn = kill->data;
610
dag_add_edge(&kn->dag, &n->dag, NULL);
611
}
612
}
613
}
614
615
// TODO do we want to do this after reverse-dependencies?
616
dag_traverse_bottom_up(ctx->dag, sched_dag_max_delay_cb, NULL);
617
}
618
619
static void
620
sched_dag_destroy(struct ir3_postsched_ctx *ctx)
621
{
622
ralloc_free(ctx->mem_ctx);
623
ctx->mem_ctx = NULL;
624
ctx->dag = NULL;
625
}
626
627
static void
628
sched_block(struct ir3_postsched_ctx *ctx, struct ir3_block *block)
629
{
630
ctx->block = block;
631
ctx->tex_delay = 0;
632
ctx->sfu_delay = 0;
633
634
/* move all instructions to the unscheduled list, and
635
* empty the block's instruction list (to which we will
636
* be inserting).
637
*/
638
list_replace(&block->instr_list, &ctx->unscheduled_list);
639
list_inithead(&block->instr_list);
640
641
// TODO once we are using post-sched for everything we can
642
// just not stick in NOP's prior to post-sched, and drop this.
643
// for now keep this, since it makes post-sched optional:
644
foreach_instr_safe (instr, &ctx->unscheduled_list) {
645
switch (instr->opc) {
646
case OPC_NOP:
647
case OPC_B:
648
case OPC_JUMP:
649
list_delinit(&instr->node);
650
break;
651
default:
652
break;
653
}
654
}
655
656
sched_dag_init(ctx);
657
658
/* First schedule all meta:input instructions, followed by
659
* tex-prefetch. We want all of the instructions that load
660
* values into registers before the shader starts to go
661
* before any other instructions. But in particular we
662
* want inputs to come before prefetches. This is because
663
* a FS's bary_ij input may not actually be live in the
664
* shader, but it should not be scheduled on top of any
665
* other input (but can be overwritten by a tex prefetch)
666
*/
667
foreach_instr_safe (instr, &ctx->unscheduled_list)
668
if (instr->opc == OPC_META_INPUT)
669
schedule(ctx, instr);
670
671
foreach_instr_safe (instr, &ctx->unscheduled_list)
672
if (instr->opc == OPC_META_TEX_PREFETCH)
673
schedule(ctx, instr);
674
675
while (!list_is_empty(&ctx->unscheduled_list)) {
676
struct ir3_instruction *instr = choose_instr(ctx);
677
678
unsigned delay =
679
ir3_delay_calc_postra(ctx->block, instr, false, ctx->v->mergedregs);
680
d("delay=%u", delay);
681
682
/* and if we run out of instructions that can be scheduled,
683
* then it is time for nop's:
684
*/
685
debug_assert(delay <= 6);
686
while (delay > 0) {
687
ir3_NOP(block);
688
delay--;
689
}
690
691
schedule(ctx, instr);
692
}
693
694
sched_dag_destroy(ctx);
695
}
696
697
static bool
698
is_self_mov(struct ir3_instruction *instr)
699
{
700
if (!is_same_type_mov(instr))
701
return false;
702
703
if (instr->dsts[0]->num != instr->srcs[0]->num)
704
return false;
705
706
if (instr->dsts[0]->flags & IR3_REG_RELATIV)
707
return false;
708
709
if (instr->cat1.round != ROUND_ZERO)
710
return false;
711
712
if (instr->srcs[0]->flags &
713
(IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_RELATIV | IR3_REG_FNEG |
714
IR3_REG_FABS | IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT))
715
return false;
716
717
return true;
718
}
719
720
/* sometimes we end up w/ in-place mov's, ie. mov.u32u32 r1.y, r1.y
721
* as a result of places were before RA we are not sure that it is
722
* safe to eliminate. We could eliminate these earlier, but sometimes
723
* they are tangled up in false-dep's, etc, so it is easier just to
724
* let them exist until after RA
725
*/
726
static void
727
cleanup_self_movs(struct ir3 *ir)
728
{
729
foreach_block (block, &ir->block_list) {
730
foreach_instr_safe (instr, &block->instr_list) {
731
for (unsigned i = 0; i < instr->deps_count; i++) {
732
if (instr->deps[i] && is_self_mov(instr->deps[i])) {
733
instr->deps[i] = NULL;
734
}
735
}
736
737
if (is_self_mov(instr))
738
list_delinit(&instr->node);
739
}
740
}
741
}
742
743
bool
744
ir3_postsched(struct ir3 *ir, struct ir3_shader_variant *v)
745
{
746
struct ir3_postsched_ctx ctx = {
747
.ir = ir,
748
.v = v,
749
};
750
751
ir3_remove_nops(ir);
752
cleanup_self_movs(ir);
753
754
foreach_block (block, &ir->block_list) {
755
sched_block(&ctx, block);
756
}
757
758
return true;
759
}
760
761