Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/gp/regalloc.c
4574 views
1
/*
2
* Copyright (c) 2017 Lima Project
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sub license,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the
12
* next paragraph) shall be included in all copies or substantial portions
13
* of the Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
* DEALINGS IN THE SOFTWARE.
22
*
23
*/
24
25
#include "gpir.h"
26
#include "util/u_dynarray.h"
27
28
/* Per-register information */
29
30
struct reg_info {
31
BITSET_WORD *conflicts;
32
struct util_dynarray conflict_list;
33
34
/* Number of conflicts that must be allocated to physical registers.
35
*/
36
unsigned phys_conflicts;
37
38
unsigned node_conflicts;
39
40
/* Number of conflicts that can be allocated to either. */
41
unsigned total_conflicts;
42
43
int assigned_color;
44
45
bool visited;
46
};
47
48
struct regalloc_ctx {
49
unsigned bitset_words, num_nodes_and_regs;
50
struct reg_info *registers;
51
52
/* Reusable scratch liveness array */
53
BITSET_WORD *live;
54
55
unsigned *worklist;
56
unsigned worklist_start, worklist_end;
57
58
unsigned *stack;
59
unsigned stack_size;
60
61
gpir_compiler *comp;
62
void *mem_ctx;
63
};
64
65
/* Liveness analysis */
66
67
static void propagate_liveness_instr(gpir_node *node, BITSET_WORD *live,
68
gpir_compiler *comp)
69
{
70
/* KILL */
71
if (node->type == gpir_node_type_store) {
72
if (node->op == gpir_op_store_reg) {
73
gpir_store_node *store = gpir_node_to_store(node);
74
BITSET_CLEAR(live, store->reg->index);
75
}
76
}
77
78
/* GEN */
79
if (node->type == gpir_node_type_load) {
80
if (node->op == gpir_op_load_reg) {
81
gpir_load_node *load = gpir_node_to_load(node);
82
BITSET_SET(live, load->reg->index);
83
}
84
}
85
}
86
87
static bool propagate_liveness_block(gpir_block *block, struct regalloc_ctx *ctx)
88
{
89
for (unsigned i = 0; i < 2; i++) {
90
if (block->successors[i]) {
91
for (unsigned j = 0; j < ctx->bitset_words; j++)
92
block->live_out[j] |= block->successors[i]->live_in[j];
93
}
94
}
95
96
memcpy(ctx->live, block->live_out, ctx->bitset_words * sizeof(BITSET_WORD));
97
98
list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
99
propagate_liveness_instr(node, ctx->live, block->comp);
100
}
101
102
bool changed = false;
103
for (unsigned i = 0; i < ctx->bitset_words; i++) {
104
changed |= (block->live_in[i] != ctx->live[i]);
105
block->live_in[i] = ctx->live[i];
106
}
107
return changed;
108
}
109
110
static void calc_def_block(gpir_block *block)
111
{
112
list_for_each_entry(gpir_node, node, &block->node_list, list) {
113
if (node->op == gpir_op_store_reg) {
114
gpir_store_node *store = gpir_node_to_store(node);
115
BITSET_SET(block->def_out, store->reg->index);
116
}
117
}
118
}
119
120
static void calc_liveness(struct regalloc_ctx *ctx)
121
{
122
bool changed = true;
123
while (changed) {
124
changed = false;
125
list_for_each_entry_rev(gpir_block, block, &ctx->comp->block_list, list) {
126
changed |= propagate_liveness_block(block, ctx);
127
}
128
}
129
130
list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
131
calc_def_block(block);
132
}
133
134
changed = true;
135
while (changed) {
136
changed = false;
137
list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
138
for (unsigned i = 0; i < 2; i++) {
139
gpir_block *succ = block->successors[i];
140
if (!succ)
141
continue;
142
143
for (unsigned j = 0; j < ctx->bitset_words; j++) {
144
BITSET_WORD new = block->def_out[j] & ~succ->def_out[j];
145
changed |= (new != 0);
146
succ->def_out[j] |= block->def_out[j];
147
}
148
}
149
}
150
}
151
}
152
153
/* Interference calculation */
154
155
static void add_interference(struct regalloc_ctx *ctx, unsigned i, unsigned j)
156
{
157
if (i == j)
158
return;
159
160
struct reg_info *a = &ctx->registers[i];
161
struct reg_info *b = &ctx->registers[j];
162
163
if (BITSET_TEST(a->conflicts, j))
164
return;
165
166
BITSET_SET(a->conflicts, j);
167
BITSET_SET(b->conflicts, i);
168
169
a->total_conflicts++;
170
b->total_conflicts++;
171
if (j < ctx->comp->cur_reg)
172
a->phys_conflicts++;
173
else
174
a->node_conflicts++;
175
176
if (i < ctx->comp->cur_reg)
177
b->phys_conflicts++;
178
else
179
b->node_conflicts++;
180
181
util_dynarray_append(&a->conflict_list, unsigned, j);
182
util_dynarray_append(&b->conflict_list, unsigned, i);
183
}
184
185
/* Make the register or node "i" interfere with all the other live registers
186
* and nodes.
187
*/
188
static void add_all_interferences(struct regalloc_ctx *ctx,
189
unsigned i,
190
BITSET_WORD *live_nodes,
191
BITSET_WORD *live_regs)
192
{
193
int live_node;
194
BITSET_FOREACH_SET(live_node, live_nodes, ctx->comp->cur_index) {
195
add_interference(ctx, i,
196
live_node + ctx->comp->cur_reg);
197
}
198
199
int live_reg;
200
BITSET_FOREACH_SET(live_reg, ctx->live, ctx->comp->cur_index) {
201
add_interference(ctx, i, live_reg);
202
}
203
204
}
205
206
static void print_liveness(struct regalloc_ctx *ctx,
207
BITSET_WORD *live_reg, BITSET_WORD *live_val)
208
{
209
if (!(lima_debug & LIMA_DEBUG_GP))
210
return;
211
212
int live_idx;
213
BITSET_FOREACH_SET(live_idx, live_reg, ctx->comp->cur_reg) {
214
printf("reg%d ", live_idx);
215
}
216
BITSET_FOREACH_SET(live_idx, live_val, ctx->comp->cur_index) {
217
printf("%d ", live_idx);
218
}
219
printf("\n");
220
}
221
222
static void calc_interference(struct regalloc_ctx *ctx)
223
{
224
BITSET_WORD *live_nodes =
225
rzalloc_array(ctx->mem_ctx, BITSET_WORD, ctx->comp->cur_index);
226
227
list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
228
/* Initialize liveness at the end of the block, but exclude values that
229
* definitely aren't defined by the end. This helps out with
230
* partially-defined registers, like:
231
*
232
* if (condition) {
233
* foo = ...;
234
* }
235
* if (condition) {
236
* ... = foo;
237
* }
238
*
239
* If we naively propagated liveness backwards, foo would be live from
240
* the beginning of the program, but if we're not inside a loop then
241
* its value is undefined before the first if and we don't have to
242
* consider it live. Mask out registers like foo here.
243
*/
244
for (unsigned i = 0; i < ctx->bitset_words; i++) {
245
ctx->live[i] = block->live_out[i] & block->def_out[i];
246
}
247
248
list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {
249
gpir_debug("processing node %d\n", node->index);
250
print_liveness(ctx, ctx->live, live_nodes);
251
if (node->type != gpir_node_type_store &&
252
node->type != gpir_node_type_branch) {
253
add_all_interferences(ctx, node->index + ctx->comp->cur_reg,
254
live_nodes, ctx->live);
255
256
/* KILL */
257
BITSET_CLEAR(live_nodes, node->index);
258
} else if (node->op == gpir_op_store_reg) {
259
gpir_store_node *store = gpir_node_to_store(node);
260
add_all_interferences(ctx, store->reg->index,
261
live_nodes, ctx->live);
262
263
/* KILL */
264
BITSET_CLEAR(ctx->live, store->reg->index);
265
}
266
267
/* GEN */
268
if (node->type == gpir_node_type_store) {
269
gpir_store_node *store = gpir_node_to_store(node);
270
BITSET_SET(live_nodes, store->child->index);
271
} else if (node->type == gpir_node_type_alu) {
272
gpir_alu_node *alu = gpir_node_to_alu(node);
273
for (int i = 0; i < alu->num_child; i++)
274
BITSET_SET(live_nodes, alu->children[i]->index);
275
} else if (node->type == gpir_node_type_branch) {
276
gpir_branch_node *branch = gpir_node_to_branch(node);
277
BITSET_SET(live_nodes, branch->cond->index);
278
} else if (node->op == gpir_op_load_reg) {
279
gpir_load_node *load = gpir_node_to_load(node);
280
BITSET_SET(ctx->live, load->reg->index);
281
}
282
}
283
}
284
}
285
286
/* Register allocation */
287
288
static bool can_simplify(struct regalloc_ctx *ctx, unsigned i)
289
{
290
struct reg_info *info = &ctx->registers[i];
291
if (i < ctx->comp->cur_reg) {
292
/* Physical regs. */
293
return info->phys_conflicts + info->node_conflicts < GPIR_PHYSICAL_REG_NUM;
294
} else {
295
/* Nodes: if we manage to allocate all of its conflicting physical
296
* registers, they will take up at most GPIR_PHYSICAL_REG_NUM colors, so
297
* we can ignore any more than that.
298
*/
299
return MIN2(info->phys_conflicts, GPIR_PHYSICAL_REG_NUM) +
300
info->node_conflicts < GPIR_PHYSICAL_REG_NUM + GPIR_VALUE_REG_NUM;
301
}
302
}
303
304
static void push_stack(struct regalloc_ctx *ctx, unsigned i)
305
{
306
ctx->stack[ctx->stack_size++] = i;
307
if (i < ctx->comp->cur_reg)
308
gpir_debug("pushing reg%u\n", i);
309
else
310
gpir_debug("pushing %d\n", i - ctx->comp->cur_reg);
311
312
struct reg_info *info = &ctx->registers[i];
313
assert(info->visited);
314
315
util_dynarray_foreach(&info->conflict_list, unsigned, conflict) {
316
struct reg_info *conflict_info = &ctx->registers[*conflict];
317
if (i < ctx->comp->cur_reg) {
318
assert(conflict_info->phys_conflicts > 0);
319
conflict_info->phys_conflicts--;
320
} else {
321
assert(conflict_info->node_conflicts > 0);
322
conflict_info->node_conflicts--;
323
}
324
if (!ctx->registers[*conflict].visited && can_simplify(ctx, *conflict)) {
325
ctx->worklist[ctx->worklist_end++] = *conflict;
326
ctx->registers[*conflict].visited = true;
327
}
328
}
329
}
330
331
static bool do_regalloc(struct regalloc_ctx *ctx)
332
{
333
ctx->worklist_start = 0;
334
ctx->worklist_end = 0;
335
ctx->stack_size = 0;
336
337
/* Step 1: find the initially simplifiable registers */
338
for (int i = 0; i < ctx->comp->cur_reg + ctx->comp->cur_index; i++) {
339
if (can_simplify(ctx, i)) {
340
ctx->worklist[ctx->worklist_end++] = i;
341
ctx->registers[i].visited = true;
342
}
343
}
344
345
while (true) {
346
/* Step 2: push onto the stack whatever we can */
347
while (ctx->worklist_start != ctx->worklist_end) {
348
push_stack(ctx, ctx->worklist[ctx->worklist_start++]);
349
}
350
351
if (ctx->stack_size < ctx->num_nodes_and_regs) {
352
/* If there are still unsimplifiable nodes left, we need to
353
* optimistically push a node onto the stack. Choose the one with
354
* the smallest number of current neighbors, since that's the most
355
* likely to succeed.
356
*/
357
unsigned min_conflicts = UINT_MAX;
358
unsigned best_reg = 0;
359
for (unsigned reg = 0; reg < ctx->num_nodes_and_regs; reg++) {
360
struct reg_info *info = &ctx->registers[reg];
361
if (info->visited)
362
continue;
363
if (info->phys_conflicts + info->node_conflicts < min_conflicts) {
364
best_reg = reg;
365
min_conflicts = info->phys_conflicts + info->node_conflicts;
366
}
367
}
368
gpir_debug("optimistic triggered\n");
369
ctx->registers[best_reg].visited = true;
370
push_stack(ctx, best_reg);
371
} else {
372
break;
373
}
374
}
375
376
/* Step 4: pop off the stack and assign colors */
377
for (int i = ctx->num_nodes_and_regs - 1; i >= 0; i--) {
378
unsigned idx = ctx->stack[i];
379
struct reg_info *reg = &ctx->registers[idx];
380
381
unsigned num_available_regs;
382
if (idx < ctx->comp->cur_reg) {
383
num_available_regs = GPIR_PHYSICAL_REG_NUM;
384
} else {
385
num_available_regs = GPIR_VALUE_REG_NUM + GPIR_PHYSICAL_REG_NUM;
386
}
387
388
bool found = false;
389
unsigned start = i % num_available_regs;
390
for (unsigned j = 0; j < num_available_regs; j++) {
391
unsigned candidate = (j + start) % num_available_regs;
392
bool available = true;
393
util_dynarray_foreach(&reg->conflict_list, unsigned, conflict_idx) {
394
struct reg_info *conflict = &ctx->registers[*conflict_idx];
395
if (conflict->assigned_color >= 0 &&
396
conflict->assigned_color == (int) candidate) {
397
available = false;
398
break;
399
}
400
}
401
402
if (available) {
403
reg->assigned_color = candidate;
404
found = true;
405
break;
406
}
407
}
408
409
/* TODO: spilling */
410
if (!found) {
411
gpir_error("Failed to allocate registers\n");
412
return false;
413
}
414
}
415
416
return true;
417
}
418
419
static void assign_regs(struct regalloc_ctx *ctx)
420
{
421
list_for_each_entry(gpir_block, block, &ctx->comp->block_list, list) {
422
list_for_each_entry(gpir_node, node, &block->node_list, list) {
423
if (node->index >= 0) {
424
node->value_reg =
425
ctx->registers[ctx->comp->cur_reg + node->index].assigned_color;
426
}
427
428
if (node->op == gpir_op_load_reg) {
429
gpir_load_node *load = gpir_node_to_load(node);
430
unsigned color = ctx->registers[load->reg->index].assigned_color;
431
load->index = color / 4;
432
load->component = color % 4;
433
}
434
435
if (node->op == gpir_op_store_reg) {
436
gpir_store_node *store = gpir_node_to_store(node);
437
unsigned color = ctx->registers[store->reg->index].assigned_color;
438
store->index = color / 4;
439
store->component = color % 4;
440
node->value_reg = color;
441
}
442
}
443
444
block->live_out_phys = 0;
445
446
int reg_idx;
447
BITSET_FOREACH_SET(reg_idx, block->live_out, ctx->comp->cur_reg) {
448
if (BITSET_TEST(block->def_out, reg_idx)) {
449
block->live_out_phys |= (1ull << ctx->registers[reg_idx].assigned_color);
450
}
451
}
452
}
453
}
454
455
static void regalloc_print_result(gpir_compiler *comp)
456
{
457
if (!(lima_debug & LIMA_DEBUG_GP))
458
return;
459
460
int index = 0;
461
printf("======== regalloc ========\n");
462
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
463
list_for_each_entry(gpir_node, node, &block->node_list, list) {
464
printf("%03d: %d/%d %s ", index++, node->index, node->value_reg,
465
gpir_op_infos[node->op].name);
466
gpir_node_foreach_pred(node, dep) {
467
gpir_node *pred = dep->pred;
468
printf(" %d/%d", pred->index, pred->value_reg);
469
}
470
if (node->op == gpir_op_load_reg) {
471
gpir_load_node *load = gpir_node_to_load(node);
472
printf(" -/%d", 4 * load->index + load->component);
473
printf(" (%d)", load->reg->index);
474
} else if (node->op == gpir_op_store_reg) {
475
gpir_store_node *store = gpir_node_to_store(node);
476
printf(" (%d)", store->reg->index);
477
}
478
printf("\n");
479
}
480
printf("----------------------------\n");
481
}
482
}
483
484
bool gpir_regalloc_prog(gpir_compiler *comp)
485
{
486
struct regalloc_ctx ctx;
487
488
ctx.mem_ctx = ralloc_context(NULL);
489
ctx.num_nodes_and_regs = comp->cur_reg + comp->cur_index;
490
ctx.bitset_words = BITSET_WORDS(ctx.num_nodes_and_regs);
491
ctx.live = ralloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
492
ctx.worklist = ralloc_array(ctx.mem_ctx, unsigned, ctx.num_nodes_and_regs);
493
ctx.stack = ralloc_array(ctx.mem_ctx, unsigned, ctx.num_nodes_and_regs);
494
ctx.comp = comp;
495
496
ctx.registers = rzalloc_array(ctx.mem_ctx, struct reg_info, ctx.num_nodes_and_regs);
497
for (unsigned i = 0; i < ctx.num_nodes_and_regs; i++) {
498
ctx.registers[i].conflicts = rzalloc_array(ctx.mem_ctx, BITSET_WORD,
499
ctx.bitset_words);
500
util_dynarray_init(&ctx.registers[i].conflict_list, ctx.mem_ctx);
501
}
502
503
list_for_each_entry(gpir_block, block, &comp->block_list, list) {
504
block->live_out = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
505
block->live_in = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
506
block->def_out = rzalloc_array(ctx.mem_ctx, BITSET_WORD, ctx.bitset_words);
507
}
508
509
calc_liveness(&ctx);
510
calc_interference(&ctx);
511
if (!do_regalloc(&ctx)) {
512
ralloc_free(ctx.mem_ctx);
513
return false;
514
}
515
assign_regs(&ctx);
516
517
regalloc_print_result(comp);
518
ralloc_free(ctx.mem_ctx);
519
return true;
520
}
521
522