Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/ir3/ir3_ra_validate.c
4565 views
1
/*
2
* Copyright (C) 2021 Valve Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*/
23
24
#include "util/ralloc.h"
25
#include "ir3_ra.h"
26
#include "ir3_shader.h"
27
28
/* This file implements a validation pass for register allocation. We check
29
* that the assignment of SSA values to registers is "valid", in the sense
30
* that each original definition reaches all of its uses without being
31
* clobbered by something else.
32
*
33
* The validation is a forward dataflow analysis. The state at each point
34
* consists of, for each physical register, the SSA value occupying it, or a
35
* few special values:
36
*
37
* - "unknown" is set initially, before the dataflow analysis assigns it a
38
* value. This is the lattice bottom.
39
* - Values at the start get "undef", which acts like a special SSA value that
40
* indicates it is never written.
41
* - "overdefined" registers are set to more than one value, depending on
42
* which path you take to get to the spot. This is the lattice top.
43
*
44
* Overdefined is necessary to distinguish because in some programs, like this
45
* simple example, it's perfectly normal and allowed:
46
*
47
* if (...) {
48
* mov.u32u32 ssa_1(r1.x), ...
49
* ...
50
* } else {
51
* mov.u32u32 ssa_2(r1.x), ...
52
* ...
53
* }
54
* // r1.x is overdefined here!
55
*
56
* However, if an ssa value after the if is accidentally assigned to r1.x, we
57
* need to remember that it's invalid to catch the mistake. Overdef has to be
58
* distinguished from undef so that the state forms a valid lattice to
59
* guarantee that the analysis always terminates. We could avoid relying on
60
* overdef by using liveness analysis, but not relying on liveness has the
61
* benefit that we can catch bugs in liveness analysis too.
62
*
63
* One tricky thing we have to handle is the coalescing of splits/collects,
64
* which means that multiple SSA values can occupy a register at the same
65
* time. While we could use the same merge set indices that RA uses, again
66
* that would rely on the merge set calculation being correct which we don't
67
* want to. Instead we treat splits/collects as transfer instructions, similar
68
* to the parallelcopy instructions inserted by RA, and have them copy their
69
* sources to their destinations. This means that each physreg must carry the
70
* SSA def assigned to it plus an offset into that definition, and when
71
* validating sources we must look through splits/collects to find the
72
* "original" source for each subregister.
73
*/
74
75
#define UNKNOWN ((struct ir3_register *)NULL)
76
#define UNDEF ((struct ir3_register *)(uintptr_t)1)
77
#define OVERDEF ((struct ir3_register *)(uintptr_t)2)
78
79
struct reg_state {
80
struct ir3_register *def;
81
unsigned offset;
82
};
83
84
struct file_state {
85
struct reg_state regs[RA_MAX_FILE_SIZE];
86
};
87
88
struct reaching_state {
89
struct file_state half, full, shared;
90
};
91
92
struct ra_val_ctx {
93
struct ir3_instruction *current_instr;
94
95
struct reaching_state reaching;
96
struct reaching_state *block_reaching;
97
unsigned block_count;
98
99
unsigned full_size, half_size;
100
101
bool merged_regs;
102
103
bool failed;
104
};
105
106
static void
107
validate_error(struct ra_val_ctx *ctx, const char *condstr)
108
{
109
fprintf(stderr, "ra validation fail: %s\n", condstr);
110
fprintf(stderr, " -> for instruction: ");
111
ir3_print_instr(ctx->current_instr);
112
abort();
113
}
114
115
#define validate_assert(ctx, cond) \
116
do { \
117
if (!(cond)) { \
118
validate_error(ctx, #cond); \
119
} \
120
} while (0)
121
122
static unsigned
123
get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
124
{
125
if (reg->flags & IR3_REG_SHARED)
126
return RA_SHARED_SIZE;
127
else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
128
return ctx->full_size;
129
else
130
return ctx->half_size;
131
}
132
133
/* Validate simple things, like the registers being in-bounds. This way we
134
* don't have to worry about out-of-bounds accesses later.
135
*/
136
137
static void
138
validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
139
{
140
ctx->current_instr = instr;
141
ra_foreach_dst (dst, instr) {
142
unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
143
validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
144
if (dst->tied)
145
validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied));
146
}
147
148
ra_foreach_src (src, instr) {
149
unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
150
validate_assert(ctx, src_max <= get_file_size(ctx, src));
151
}
152
}
153
154
/* This is the lattice operator. */
155
static bool
156
merge_reg(struct reg_state *dst, const struct reg_state *src)
157
{
158
if (dst->def == UNKNOWN) {
159
*dst = *src;
160
return src->def != UNKNOWN;
161
} else if (dst->def == OVERDEF) {
162
return false;
163
} else {
164
if (src->def == UNKNOWN)
165
return false;
166
else if (src->def == OVERDEF) {
167
*dst = *src;
168
return true;
169
} else {
170
if (dst->def != src->def || dst->offset != src->offset) {
171
dst->def = OVERDEF;
172
dst->offset = 0;
173
return true;
174
} else {
175
return false;
176
}
177
}
178
}
179
}
180
181
static bool
182
merge_file(struct file_state *dst, const struct file_state *src, unsigned size)
183
{
184
bool progress = false;
185
for (unsigned i = 0; i < size; i++)
186
progress |= merge_reg(&dst->regs[i], &src->regs[i]);
187
return progress;
188
}
189
190
static bool
191
merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
192
const struct reaching_state *src)
193
{
194
bool progress = false;
195
progress |= merge_file(&dst->full, &src->full, ctx->full_size);
196
progress |= merge_file(&dst->half, &src->half, ctx->half_size);
197
return progress;
198
}
199
200
static bool
201
merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,
202
const struct reaching_state *src)
203
{
204
return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
205
}
206
207
static struct file_state *
208
ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
209
{
210
if (reg->flags & IR3_REG_SHARED)
211
return &ctx->reaching.shared;
212
else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
213
return &ctx->reaching.full;
214
else
215
return &ctx->reaching.half;
216
}
217
218
static void
219
propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
220
{
221
ra_foreach_dst (dst, instr) {
222
struct file_state *file = ra_val_get_file(ctx, dst);
223
physreg_t physreg = ra_reg_get_physreg(dst);
224
for (unsigned i = 0; i < reg_size(dst); i++) {
225
file->regs[physreg + i] = (struct reg_state){
226
.def = dst,
227
.offset = i,
228
};
229
}
230
}
231
}
232
233
static void
234
propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)
235
{
236
struct ir3_register *dst = split->dsts[0];
237
struct ir3_register *src = split->srcs[0];
238
physreg_t dst_physreg = ra_reg_get_physreg(dst);
239
physreg_t src_physreg = ra_reg_get_physreg(src);
240
struct file_state *file = ra_val_get_file(ctx, dst);
241
242
unsigned offset = split->split.off * reg_elem_size(src);
243
for (unsigned i = 0; i < reg_elem_size(src); i++) {
244
file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
245
}
246
}
247
248
static void
249
propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)
250
{
251
struct ir3_register *dst = collect->dsts[0];
252
physreg_t dst_physreg = ra_reg_get_physreg(dst);
253
struct file_state *file = ra_val_get_file(ctx, dst);
254
255
unsigned size = reg_size(dst);
256
struct reg_state srcs[size];
257
258
for (unsigned i = 0; i < collect->srcs_count; i++) {
259
struct ir3_register *src = collect->srcs[i];
260
unsigned dst_offset = i * reg_elem_size(dst);
261
for (unsigned j = 0; j < reg_elem_size(dst); j++) {
262
if (!ra_reg_is_src(src)) {
263
srcs[dst_offset + j] = (struct reg_state){
264
.def = dst,
265
.offset = dst_offset + j,
266
};
267
} else {
268
physreg_t src_physreg = ra_reg_get_physreg(src);
269
srcs[dst_offset + j] = file->regs[src_physreg + j];
270
}
271
}
272
}
273
274
for (unsigned i = 0; i < size; i++)
275
file->regs[dst_physreg + i] = srcs[i];
276
}
277
278
static void
279
propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
280
{
281
unsigned size = 0;
282
for (unsigned i = 0; i < pcopy->dsts_count; i++) {
283
size += reg_size(pcopy->srcs[i]);
284
}
285
286
struct reg_state srcs[size];
287
288
unsigned offset = 0;
289
for (unsigned i = 0; i < pcopy->srcs_count; i++) {
290
struct ir3_register *dst = pcopy->dsts[i];
291
struct ir3_register *src = pcopy->srcs[i];
292
struct file_state *file = ra_val_get_file(ctx, dst);
293
294
for (unsigned j = 0; j < reg_size(dst); j++) {
295
if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
296
srcs[offset + j] = (struct reg_state){
297
.def = dst,
298
.offset = j,
299
};
300
} else {
301
physreg_t src_physreg = ra_reg_get_physreg(src);
302
srcs[offset + j] = file->regs[src_physreg + j];
303
}
304
}
305
306
offset += reg_size(dst);
307
}
308
assert(offset == size);
309
310
offset = 0;
311
for (unsigned i = 0; i < pcopy->dsts_count; i++) {
312
struct ir3_register *dst = pcopy->dsts[i];
313
physreg_t dst_physreg = ra_reg_get_physreg(dst);
314
struct file_state *file = ra_val_get_file(ctx, dst);
315
316
for (unsigned j = 0; j < reg_size(dst); j++)
317
file->regs[dst_physreg + j] = srcs[offset + j];
318
319
offset += reg_size(dst);
320
}
321
assert(offset == size);
322
}
323
324
static void
325
propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
326
{
327
if (instr->opc == OPC_META_SPLIT)
328
propagate_split(ctx, instr);
329
else if (instr->opc == OPC_META_COLLECT)
330
propagate_collect(ctx, instr);
331
else if (instr->opc == OPC_META_PARALLEL_COPY)
332
propagate_parallelcopy(ctx, instr);
333
else
334
propagate_normal_instr(ctx, instr);
335
}
336
337
static bool
338
propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
339
{
340
ctx->reaching = ctx->block_reaching[block->index];
341
342
foreach_instr (instr, &block->instr_list) {
343
propagate_instr(ctx, instr);
344
}
345
346
bool progress = false;
347
for (unsigned i = 0; i < 2; i++) {
348
struct ir3_block *succ = block->successors[i];
349
if (!succ)
350
continue;
351
progress |=
352
merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
353
}
354
for (unsigned i = 0; i < 2; i++) {
355
struct ir3_block *succ = block->physical_successors[i];
356
if (!succ)
357
continue;
358
progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
359
&ctx->reaching);
360
}
361
return progress;
362
}
363
364
static void
365
chase_definition(struct reg_state *state)
366
{
367
while (true) {
368
struct ir3_instruction *instr = state->def->instr;
369
switch (instr->opc) {
370
case OPC_META_SPLIT: {
371
struct ir3_register *new_def = instr->srcs[0]->def;
372
unsigned offset = instr->split.off * reg_elem_size(new_def);
373
*state = (struct reg_state){
374
.def = new_def,
375
.offset = state->offset + offset,
376
};
377
break;
378
}
379
case OPC_META_COLLECT: {
380
unsigned src_idx = state->offset / reg_elem_size(state->def);
381
unsigned src_offset = state->offset % reg_elem_size(state->def);
382
struct ir3_register *new_def = instr->srcs[src_idx]->def;
383
if (new_def) {
384
*state = (struct reg_state){
385
.def = new_def,
386
.offset = src_offset,
387
};
388
} else {
389
/* Bail on immed/const */
390
return;
391
}
392
break;
393
}
394
case OPC_META_PARALLEL_COPY: {
395
unsigned dst_idx = ~0;
396
for (unsigned i = 0; i < instr->dsts_count; i++) {
397
if (instr->dsts[i] == state->def) {
398
dst_idx = i;
399
break;
400
}
401
}
402
assert(dst_idx != ~0);
403
404
struct ir3_register *new_def = instr->srcs[dst_idx]->def;
405
if (new_def) {
406
state->def = new_def;
407
} else {
408
/* Bail on immed/const */
409
return;
410
}
411
break;
412
}
413
default:
414
return;
415
}
416
}
417
}
418
419
static void
420
dump_reg_state(struct reg_state *state)
421
{
422
if (state->def == UNDEF) {
423
fprintf(stderr, "no reaching definition");
424
} else if (state->def == OVERDEF) {
425
fprintf(stderr,
426
"more than one reaching definition or partial definition");
427
} else {
428
/* The analysis should always remove UNKNOWN eventually. */
429
assert(state->def != UNKNOWN);
430
431
fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno,
432
state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "",
433
state->def->num / 4, "xyzw"[state->def->num % 4],
434
state -> offset);
435
}
436
}
437
438
static void
439
check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,
440
struct ir3_register *src)
441
{
442
struct file_state *file = ra_val_get_file(ctx, src);
443
physreg_t physreg = ra_reg_get_physreg(src);
444
for (unsigned i = 0; i < reg_size(src); i++) {
445
struct reg_state expected = (struct reg_state){
446
.def = src->def,
447
.offset = i,
448
};
449
chase_definition(&expected);
450
451
struct reg_state actual = file->regs[physreg + i];
452
453
if (expected.def != actual.def || expected.offset != actual.offset) {
454
fprintf(
455
stderr,
456
"ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",
457
src->def->instr->serialno, src->def->name, i);
458
fprintf(stderr, "expected: ");
459
dump_reg_state(&expected);
460
fprintf(stderr, "\n");
461
fprintf(stderr, "actual: ");
462
dump_reg_state(&actual);
463
fprintf(stderr, "\n");
464
fprintf(stderr, "-> for instruction: ");
465
ir3_print_instr(instr);
466
ctx->failed = true;
467
}
468
}
469
}
470
471
static void
472
check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
473
{
474
if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT ||
475
instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) {
476
return;
477
}
478
479
ra_foreach_src (src, instr) {
480
check_reaching_src(ctx, instr, src);
481
}
482
}
483
484
static void
485
check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block)
486
{
487
ctx->reaching = ctx->block_reaching[block->index];
488
489
foreach_instr (instr, &block->instr_list) {
490
check_reaching_instr(ctx, instr);
491
propagate_instr(ctx, instr);
492
}
493
494
for (unsigned i = 0; i < 2; i++) {
495
struct ir3_block *succ = block->successors[i];
496
if (!succ)
497
continue;
498
499
unsigned pred_idx = ir3_block_get_pred_index(succ, block);
500
foreach_instr (instr, &succ->instr_list) {
501
if (instr->opc != OPC_META_PHI)
502
break;
503
if (instr->srcs[pred_idx]->def)
504
check_reaching_src(ctx, instr, instr->srcs[pred_idx]);
505
}
506
}
507
}
508
509
static void
510
check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)
511
{
512
ctx->block_reaching =
513
rzalloc_array(ctx, struct reaching_state, ctx->block_count);
514
515
struct reaching_state *start = &ctx->block_reaching[0];
516
for (unsigned i = 0; i < ctx->full_size; i++)
517
start->full.regs[i].def = UNDEF;
518
for (unsigned i = 0; i < ctx->half_size; i++)
519
start->half.regs[i].def = UNDEF;
520
for (unsigned i = 0; i < RA_SHARED_SIZE; i++)
521
start->shared.regs[i].def = UNDEF;
522
523
bool progress;
524
do {
525
progress = false;
526
foreach_block (block, &ir->block_list) {
527
progress |= propagate_block(ctx, block);
528
}
529
} while (progress);
530
531
foreach_block (block, &ir->block_list) {
532
check_reaching_block(ctx, block);
533
}
534
535
if (ctx->failed) {
536
fprintf(stderr, "failing shader:\n");
537
ir3_print(ir);
538
abort();
539
}
540
}
541
542
void
543
ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
544
unsigned half_size, unsigned block_count)
545
{
546
#ifdef NDEBUG
547
#define VALIDATE 0
548
#else
549
#define VALIDATE 1
550
#endif
551
552
if (!VALIDATE)
553
return;
554
555
struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);
556
ctx->merged_regs = v->mergedregs;
557
ctx->full_size = full_size;
558
ctx->half_size = half_size;
559
ctx->block_count = block_count;
560
561
foreach_block (block, &v->ir->block_list) {
562
foreach_instr (instr, &block->instr_list) {
563
validate_simple(ctx, instr);
564
}
565
}
566
567
check_reaching_defs(ctx, v->ir);
568
569
ralloc_free(ctx);
570
}
571
572