Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/pp/regalloc.c
4574 views
1
/*
2
* Copyright (c) 2017 Lima Project
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sub license,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the
12
* next paragraph) shall be included in all copies or substantial portions
13
* of the Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
* DEALINGS IN THE SOFTWARE.
22
*
23
*/
24
25
#include "util/ralloc.h"
26
#include "util/register_allocate.h"
27
#include "util/u_debug.h"
28
29
#include "ppir.h"
30
#include "lima_context.h"
31
32
#define PPIR_REG_COUNT (6 * 4)
33
34
enum ppir_ra_reg_class {
35
ppir_ra_reg_class_vec1,
36
ppir_ra_reg_class_vec2,
37
ppir_ra_reg_class_vec3,
38
ppir_ra_reg_class_vec4,
39
40
/* 4 reg class for load/store instr regs:
41
* load/store instr has no swizzle field, so the (virtual) register
42
* must be allocated at the beginning of a (physical) register,
43
*/
44
ppir_ra_reg_class_head_vec1,
45
ppir_ra_reg_class_head_vec2,
46
ppir_ra_reg_class_head_vec3,
47
ppir_ra_reg_class_head_vec4,
48
49
ppir_ra_reg_class_num,
50
};
51
52
struct ra_regs *ppir_regalloc_init(void *mem_ctx)
53
{
54
struct ra_regs *ret = ra_alloc_reg_set(mem_ctx, PPIR_REG_COUNT, false);
55
if (!ret)
56
return NULL;
57
58
/* Classes for contiguous 1-4 channel groups anywhere within a register. */
59
struct ra_class *classes[ppir_ra_reg_class_num];
60
for (int i = 0; i < ppir_ra_reg_class_head_vec1; i++) {
61
classes[i] = ra_alloc_contig_reg_class(ret, i + 1);
62
63
for (int j = 0; j < PPIR_REG_COUNT; j += 4) {
64
for (int swiz = 0; swiz < (4 - i); swiz++)
65
ra_class_add_reg(classes[i], j + swiz);
66
}
67
}
68
69
/* Classes for contiguous 1-4 channels with a start channel of .x */
70
for (int i = ppir_ra_reg_class_head_vec1; i < ppir_ra_reg_class_num; i++) {
71
classes[i] = ra_alloc_contig_reg_class(ret, i - ppir_ra_reg_class_head_vec1 + 1);
72
73
for (int j = 0; j < PPIR_REG_COUNT; j += 4)
74
ra_class_add_reg(classes[i], j);
75
}
76
77
ra_set_finalize(ret, NULL);
78
return ret;
79
}
80
81
static void ppir_regalloc_update_reglist_ssa(ppir_compiler *comp)
82
{
83
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
84
list_for_each_entry(ppir_node, node, &block->node_list, list) {
85
if (node->is_end)
86
continue;
87
88
if (!node->instr || node->op == ppir_op_const)
89
continue;
90
91
ppir_dest *dest = ppir_node_get_dest(node);
92
if (dest) {
93
ppir_reg *reg = NULL;
94
95
if (dest->type == ppir_target_ssa) {
96
reg = &dest->ssa;
97
list_addtail(&reg->list, &comp->reg_list);
98
comp->reg_num++;
99
}
100
}
101
}
102
}
103
}
104
105
static void ppir_regalloc_print_result(ppir_compiler *comp)
106
{
107
printf("======ppir regalloc result======\n");
108
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
109
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
110
printf("%03d:", instr->index);
111
for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
112
ppir_node *node = instr->slots[i];
113
if (!node)
114
continue;
115
116
printf(" (%d|", node->index);
117
118
ppir_dest *dest = ppir_node_get_dest(node);
119
if (dest)
120
printf("%d", ppir_target_get_dest_reg_index(dest));
121
122
printf("|");
123
124
for (int i = 0; i < ppir_node_get_src_num(node); i++) {
125
if (i)
126
printf(" ");
127
printf("%d", ppir_target_get_src_reg_index(ppir_node_get_src(node, i)));
128
}
129
130
printf(")");
131
}
132
printf("\n");
133
}
134
}
135
printf("--------------------------\n");
136
}
137
138
static bool create_new_instr_after(ppir_block *block, ppir_instr *ref,
139
ppir_node *node)
140
{
141
ppir_instr *newinstr = ppir_instr_create(block);
142
if (unlikely(!newinstr))
143
return false;
144
145
list_del(&newinstr->list);
146
list_add(&newinstr->list, &ref->list);
147
148
if (!ppir_instr_insert_node(newinstr, node))
149
return false;
150
151
list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
152
instr->seq++;
153
}
154
newinstr->seq = ref->seq+1;
155
newinstr->scheduled = true;
156
return true;
157
}
158
159
static bool create_new_instr_before(ppir_block *block, ppir_instr *ref,
160
ppir_node *node)
161
{
162
ppir_instr *newinstr = ppir_instr_create(block);
163
if (unlikely(!newinstr))
164
return false;
165
166
list_del(&newinstr->list);
167
list_addtail(&newinstr->list, &ref->list);
168
169
if (!ppir_instr_insert_node(newinstr, node))
170
return false;
171
172
list_for_each_entry_from(ppir_instr, instr, ref, &block->instr_list, list) {
173
instr->seq++;
174
}
175
newinstr->seq = ref->seq-1;
176
newinstr->scheduled = true;
177
return true;
178
}
179
180
static bool ppir_update_spilled_src(ppir_compiler *comp, ppir_block *block,
181
ppir_node *node, ppir_src *src,
182
ppir_node **fill_node)
183
{
184
/* nodes might have multiple references to the same value.
185
* avoid creating unnecessary loads for the same fill by
186
* saving the node resulting from the temporary load */
187
if (*fill_node)
188
goto update_src;
189
190
int num_components = src->reg->num_components;
191
192
/* alloc new node to load value */
193
ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
194
if (!load_node)
195
return false;
196
list_addtail(&load_node->list, &node->list);
197
comp->num_fills++;
198
199
ppir_load_node *load = ppir_node_to_load(load_node);
200
201
load->index = -comp->prog->state.stack_size; /* index sizes are negative */
202
load->num_components = num_components;
203
204
ppir_dest *ld_dest = &load->dest;
205
ld_dest->type = ppir_target_pipeline;
206
ld_dest->pipeline = ppir_pipeline_reg_uniform;
207
ld_dest->write_mask = u_bit_consecutive(0, num_components);
208
209
/* If the uniform slot is empty, we can insert the load_temp
210
* there and use it directly. Exceptionally, if the node is in the
211
* varying or texld slot, this doesn't work. */
212
if (!node->instr->slots[PPIR_INSTR_SLOT_UNIFORM] &&
213
node->instr_pos != PPIR_INSTR_SLOT_VARYING &&
214
node->instr_pos != PPIR_INSTR_SLOT_TEXLD) {
215
ppir_node_target_assign(src, load_node);
216
*fill_node = load_node;
217
return ppir_instr_insert_node(node->instr, load_node);
218
}
219
220
/* Uniform slot was taken, so fall back to a new instruction with a mov */
221
if (!create_new_instr_before(block, node->instr, load_node))
222
return false;
223
224
/* Create move node */
225
ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
226
if (unlikely(!move_node))
227
return false;
228
list_addtail(&move_node->list, &node->list);
229
230
ppir_alu_node *move_alu = ppir_node_to_alu(move_node);
231
232
move_alu->num_src = 1;
233
move_alu->src->type = ppir_target_pipeline;
234
move_alu->src->pipeline = ppir_pipeline_reg_uniform;
235
for (int i = 0; i < 4; i++)
236
move_alu->src->swizzle[i] = i;
237
238
ppir_dest *alu_dest = &move_alu->dest;
239
alu_dest->type = ppir_target_ssa;
240
alu_dest->ssa.num_components = num_components;
241
alu_dest->ssa.spilled = true;
242
alu_dest->write_mask = u_bit_consecutive(0, num_components);
243
244
list_addtail(&alu_dest->ssa.list, &comp->reg_list);
245
comp->reg_num++;
246
247
if (!ppir_instr_insert_node(load_node->instr, move_node))
248
return false;
249
250
/* insert the new node as predecessor */
251
ppir_node_foreach_pred_safe(node, dep) {
252
ppir_node *pred = dep->pred;
253
ppir_node_remove_dep(dep);
254
ppir_node_add_dep(load_node, pred, ppir_dep_src);
255
}
256
ppir_node_add_dep(node, move_node, ppir_dep_src);
257
ppir_node_add_dep(move_node, load_node, ppir_dep_src);
258
259
*fill_node = move_node;
260
261
update_src:
262
/* switch node src to use the fill node dest */
263
ppir_node_target_assign(src, *fill_node);
264
265
return true;
266
}
267
268
static bool ppir_update_spilled_dest_load(ppir_compiler *comp, ppir_block *block,
269
ppir_node *node)
270
{
271
ppir_dest *dest = ppir_node_get_dest(node);
272
assert(dest != NULL);
273
assert(dest->type == ppir_target_register);
274
ppir_reg *reg = dest->reg;
275
int num_components = reg->num_components;
276
277
/* alloc new node to load value */
278
ppir_node *load_node = ppir_node_create(block, ppir_op_load_temp, -1, 0);
279
if (!load_node)
280
return NULL;
281
list_addtail(&load_node->list, &node->list);
282
comp->num_fills++;
283
284
ppir_load_node *load = ppir_node_to_load(load_node);
285
286
load->index = -comp->prog->state.stack_size; /* index sizes are negative */
287
load->num_components = num_components;
288
289
load->dest.type = ppir_target_pipeline;
290
load->dest.pipeline = ppir_pipeline_reg_uniform;
291
load->dest.write_mask = u_bit_consecutive(0, num_components);
292
293
/* New instruction is needed since we're updating a dest register
294
* and we can't write to the uniform pipeline reg */
295
if (!create_new_instr_before(block, node->instr, load_node))
296
return false;
297
298
/* Create move node */
299
ppir_node *move_node = ppir_node_create(block, ppir_op_mov, -1 , 0);
300
if (unlikely(!move_node))
301
return false;
302
list_addtail(&move_node->list, &node->list);
303
304
ppir_alu_node *move_alu = ppir_node_to_alu(move_node);
305
306
move_alu->num_src = 1;
307
move_alu->src->type = ppir_target_pipeline;
308
move_alu->src->pipeline = ppir_pipeline_reg_uniform;
309
for (int i = 0; i < 4; i++)
310
move_alu->src->swizzle[i] = i;
311
312
move_alu->dest.type = ppir_target_register;
313
move_alu->dest.reg = reg;
314
move_alu->dest.write_mask = u_bit_consecutive(0, num_components);
315
316
if (!ppir_instr_insert_node(load_node->instr, move_node))
317
return false;
318
319
ppir_node_foreach_pred_safe(node, dep) {
320
ppir_node *pred = dep->pred;
321
ppir_node_remove_dep(dep);
322
ppir_node_add_dep(load_node, pred, ppir_dep_src);
323
}
324
ppir_node_add_dep(node, move_node, ppir_dep_src);
325
ppir_node_add_dep(move_node, load_node, ppir_dep_src);
326
327
return true;
328
}
329
330
static bool ppir_update_spilled_dest(ppir_compiler *comp, ppir_block *block,
331
ppir_node *node)
332
{
333
ppir_dest *dest = ppir_node_get_dest(node);
334
assert(dest != NULL);
335
ppir_reg *reg = ppir_dest_get_reg(dest);
336
337
/* alloc new node to store value */
338
ppir_node *store_node = ppir_node_create(block, ppir_op_store_temp, -1, 0);
339
if (!store_node)
340
return false;
341
list_addtail(&store_node->list, &node->list);
342
comp->num_spills++;
343
344
ppir_store_node *store = ppir_node_to_store(store_node);
345
346
store->index = -comp->prog->state.stack_size; /* index sizes are negative */
347
348
ppir_node_target_assign(&store->src, node);
349
store->num_components = reg->num_components;
350
351
/* insert the new node as successor */
352
ppir_node_foreach_succ_safe(node, dep) {
353
ppir_node *succ = dep->succ;
354
ppir_node_remove_dep(dep);
355
ppir_node_add_dep(succ, store_node, ppir_dep_src);
356
}
357
ppir_node_add_dep(store_node, node, ppir_dep_src);
358
359
/* If the store temp slot is empty, we can insert the store_temp
360
* there and use it directly. Exceptionally, if the node is in the
361
* combine slot, this doesn't work. */
362
if (!node->instr->slots[PPIR_INSTR_SLOT_STORE_TEMP] &&
363
node->instr_pos != PPIR_INSTR_SLOT_ALU_COMBINE)
364
return ppir_instr_insert_node(node->instr, store_node);
365
366
/* Not possible to merge store, so fall back to a new instruction */
367
return create_new_instr_after(block, node->instr, store_node);
368
}
369
370
static bool ppir_regalloc_spill_reg(ppir_compiler *comp, ppir_reg *chosen)
371
{
372
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
373
list_for_each_entry(ppir_node, node, &block->node_list, list) {
374
375
ppir_dest *dest = ppir_node_get_dest(node);
376
if (dest && ppir_dest_get_reg(dest) == chosen) {
377
/* If dest is a register, it might be updating only some its
378
* components, so need to load the existing value first */
379
if (dest->type == ppir_target_register) {
380
if (!ppir_update_spilled_dest_load(comp, block, node))
381
return false;
382
}
383
if (!ppir_update_spilled_dest(comp, block, node))
384
return false;
385
}
386
387
ppir_node *fill_node = NULL;
388
/* nodes might have multiple references to the same value.
389
* avoid creating unnecessary loads for the same fill by
390
* saving the node resulting from the temporary load */
391
for (int i = 0; i < ppir_node_get_src_num(node); i++) {
392
ppir_src *src = ppir_node_get_src(node, i);
393
ppir_reg *reg = ppir_src_get_reg(src);
394
if (reg == chosen) {
395
if (!ppir_update_spilled_src(comp, block, node, src, &fill_node))
396
return false;
397
}
398
}
399
}
400
}
401
402
return true;
403
}
404
405
static ppir_reg *ppir_regalloc_choose_spill_node(ppir_compiler *comp,
406
struct ra_graph *g)
407
{
408
float spill_costs[comp->reg_num];
409
/* experimentally determined, it seems to be worth scaling cost of
410
* regs in instructions that have used uniform/store_temp slots,
411
* but not too much as to offset the num_components base cost. */
412
const float slot_scale = 1.1f;
413
414
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
415
if (reg->spilled) {
416
/* not considered for spilling */
417
spill_costs[reg->regalloc_index] = 0.0f;
418
continue;
419
}
420
421
/* It is beneficial to spill registers with higher component number,
422
* so increase the cost of spilling registers with few components */
423
float spill_cost = 4.0f / (float)reg->num_components;
424
spill_costs[reg->regalloc_index] = spill_cost;
425
}
426
427
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
428
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
429
if (instr->slots[PPIR_INSTR_SLOT_UNIFORM]) {
430
for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
431
ppir_node *node = instr->slots[i];
432
if (!node)
433
continue;
434
for (int j = 0; j < ppir_node_get_src_num(node); j++) {
435
ppir_src *src = ppir_node_get_src(node, j);
436
if (!src)
437
continue;
438
ppir_reg *reg = ppir_src_get_reg(src);
439
if (!reg)
440
continue;
441
442
spill_costs[reg->regalloc_index] *= slot_scale;
443
}
444
}
445
}
446
if (instr->slots[PPIR_INSTR_SLOT_STORE_TEMP]) {
447
for (int i = 0; i < PPIR_INSTR_SLOT_NUM; i++) {
448
ppir_node *node = instr->slots[i];
449
if (!node)
450
continue;
451
ppir_dest *dest = ppir_node_get_dest(node);
452
if (!dest)
453
continue;
454
ppir_reg *reg = ppir_dest_get_reg(dest);
455
if (!reg)
456
continue;
457
458
spill_costs[reg->regalloc_index] *= slot_scale;
459
}
460
}
461
}
462
}
463
464
for (int i = 0; i < comp->reg_num; i++)
465
ra_set_node_spill_cost(g, i, spill_costs[i]);
466
467
int r = ra_get_best_spill_node(g);
468
if (r == -1)
469
return NULL;
470
471
ppir_reg *chosen = NULL;
472
int i = 0;
473
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
474
if (i++ == r) {
475
chosen = reg;
476
break;
477
}
478
}
479
assert(chosen);
480
chosen->spilled = true;
481
chosen->is_head = true; /* store_temp unable to do swizzle */
482
483
return chosen;
484
}
485
486
static void ppir_regalloc_reset_liveness_info(ppir_compiler *comp)
487
{
488
int idx = 0;
489
490
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
491
reg->regalloc_index = idx++;
492
}
493
494
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
495
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
496
497
if (instr->live_mask)
498
ralloc_free(instr->live_mask);
499
instr->live_mask = rzalloc_array(comp, uint8_t,
500
reg_mask_size(comp->reg_num));
501
502
if (instr->live_set)
503
ralloc_free(instr->live_set);
504
instr->live_set = rzalloc_array(comp, BITSET_WORD, comp->reg_num);
505
506
if (instr->live_internal)
507
ralloc_free(instr->live_internal);
508
instr->live_internal = rzalloc_array(comp, BITSET_WORD, comp->reg_num);
509
}
510
}
511
}
512
513
static void ppir_all_interference(ppir_compiler *comp, struct ra_graph *g,
514
BITSET_WORD *liveness)
515
{
516
int i, j;
517
BITSET_FOREACH_SET(i, liveness, comp->reg_num) {
518
BITSET_FOREACH_SET(j, liveness, comp->reg_num) {
519
ra_add_node_interference(g, i, j);
520
}
521
BITSET_CLEAR(liveness, i);
522
}
523
}
524
525
int lima_ppir_force_spilling = 0;
526
527
static bool ppir_regalloc_prog_try(ppir_compiler *comp, bool *spilled)
528
{
529
ppir_regalloc_reset_liveness_info(comp);
530
531
struct ra_graph *g = ra_alloc_interference_graph(
532
comp->ra, comp->reg_num);
533
534
int n = 0;
535
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
536
int c = ppir_ra_reg_class_vec1 + (reg->num_components - 1);
537
if (reg->is_head)
538
c += 4;
539
ra_set_node_class(g, n++, ra_get_class_from_index(comp->ra, c));
540
}
541
542
ppir_liveness_analysis(comp);
543
544
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
545
list_for_each_entry(ppir_instr, instr, &block->instr_list, list) {
546
int i;
547
BITSET_FOREACH_SET(i, instr->live_internal, comp->reg_num) {
548
BITSET_SET(instr->live_set, i);
549
}
550
ppir_all_interference(comp, g, instr->live_set);
551
}
552
}
553
554
*spilled = false;
555
bool ok = ra_allocate(g);
556
if (!ok || (comp->force_spilling-- > 0)) {
557
ppir_reg *chosen = ppir_regalloc_choose_spill_node(comp, g);
558
if (chosen) {
559
/* stack_size will be used to assemble the frame reg in lima_draw.
560
* It is also be used in the spilling code, as negative indices
561
* starting from -1, to create stack addresses. */
562
comp->prog->state.stack_size++;
563
if (!ppir_regalloc_spill_reg(comp, chosen))
564
goto err_out;
565
/* Ask the outer loop to call back in. */
566
*spilled = true;
567
568
ppir_debug("spilled register %d/%d, num_components: %d\n",
569
chosen->regalloc_index, comp->reg_num,
570
chosen->num_components);
571
goto err_out;
572
}
573
574
ppir_error("regalloc fail\n");
575
goto err_out;
576
}
577
578
n = 0;
579
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
580
reg->index = ra_get_node_reg(g, n++);
581
}
582
583
ralloc_free(g);
584
585
if (lima_debug & LIMA_DEBUG_PP)
586
ppir_regalloc_print_result(comp);
587
588
return true;
589
590
err_out:
591
ralloc_free(g);
592
return false;
593
}
594
595
bool ppir_regalloc_prog(ppir_compiler *comp)
596
{
597
bool spilled = false;
598
comp->prog->state.stack_size = 0;
599
600
/* Set from an environment variable to force spilling
601
* for debugging purposes, see lima_screen.c */
602
comp->force_spilling = lima_ppir_force_spilling;
603
604
ppir_regalloc_update_reglist_ssa(comp);
605
606
/* No registers? Probably shader consists of discard instruction */
607
if (list_is_empty(&comp->reg_list))
608
return true;
609
610
/* this will most likely succeed in the first
611
* try, except for very complicated shaders */
612
while (!ppir_regalloc_prog_try(comp, &spilled))
613
if (!spilled)
614
return false;
615
616
return true;
617
}
618
619