Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/pp/nir.c
4574 views
1
/*
2
* Copyright (c) 2017 Lima Project
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sub license,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the
12
* next paragraph) shall be included in all copies or substantial portions
13
* of the Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
* DEALINGS IN THE SOFTWARE.
22
*
23
*/
24
25
#include <string.h>
26
27
#include "util/hash_table.h"
28
#include "util/ralloc.h"
29
#include "util/bitscan.h"
30
#include "compiler/nir/nir.h"
31
#include "pipe/p_state.h"
32
33
34
#include "ppir.h"
35
36
static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_ssa_def *ssa)
37
{
38
ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39
if (!node)
40
return NULL;
41
42
ppir_dest *dest = ppir_node_get_dest(node);
43
dest->type = ppir_target_ssa;
44
dest->ssa.num_components = ssa->num_components;
45
dest->write_mask = u_bit_consecutive(0, ssa->num_components);
46
47
if (node->type == ppir_node_type_load ||
48
node->type == ppir_node_type_store)
49
dest->ssa.is_head = true;
50
51
return node;
52
}
53
54
static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
55
nir_register *reg, unsigned mask)
56
{
57
ppir_node *node = ppir_node_create(block, op, reg->index, mask);
58
if (!node)
59
return NULL;
60
61
ppir_dest *dest = ppir_node_get_dest(node);
62
63
list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
64
if (r->index == reg->index) {
65
dest->reg = r;
66
break;
67
}
68
}
69
70
dest->type = ppir_target_register;
71
dest->write_mask = mask;
72
73
if (node->type == ppir_node_type_load ||
74
node->type == ppir_node_type_store)
75
dest->reg->is_head = true;
76
77
return node;
78
}
79
80
static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
81
nir_dest *dest, unsigned mask)
82
{
83
unsigned index = -1;
84
85
if (dest) {
86
if (dest->is_ssa)
87
return ppir_node_create_ssa(block, op, &dest->ssa);
88
else
89
return ppir_node_create_reg(block, op, dest->reg.reg, mask);
90
}
91
92
return ppir_node_create(block, op, index, 0);
93
}
94
95
static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
96
ppir_src *ps, nir_src *ns, unsigned mask)
97
{
98
ppir_node *child = NULL;
99
100
if (ns->is_ssa) {
101
child = comp->var_nodes[ns->ssa->index];
102
if (child->op != ppir_op_undef)
103
ppir_node_add_dep(node, child, ppir_dep_src);
104
}
105
else {
106
nir_register *reg = ns->reg.reg;
107
while (mask) {
108
int swizzle = ps->swizzle[u_bit_scan(&mask)];
109
child = comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle];
110
/* Reg is read before it was written, create a dummy node for it */
111
if (!child) {
112
child = ppir_node_create_reg(node->block, ppir_op_dummy, reg,
113
u_bit_consecutive(0, 4));
114
comp->var_nodes[(reg->index << 2) + comp->reg_base + swizzle] = child;
115
}
116
/* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
117
if (child && node != child && child->op != ppir_op_dummy)
118
ppir_node_add_dep(node, child, ppir_dep_src);
119
}
120
}
121
122
ppir_node_target_assign(ps, child);
123
}
124
125
static int nir_to_ppir_opcodes[nir_num_opcodes] = {
126
/* not supported */
127
[0 ... nir_last_opcode] = -1,
128
129
[nir_op_mov] = ppir_op_mov,
130
[nir_op_fmul] = ppir_op_mul,
131
[nir_op_fabs] = ppir_op_abs,
132
[nir_op_fneg] = ppir_op_neg,
133
[nir_op_fadd] = ppir_op_add,
134
[nir_op_fsum3] = ppir_op_sum3,
135
[nir_op_fsum4] = ppir_op_sum4,
136
[nir_op_frsq] = ppir_op_rsqrt,
137
[nir_op_flog2] = ppir_op_log2,
138
[nir_op_fexp2] = ppir_op_exp2,
139
[nir_op_fsqrt] = ppir_op_sqrt,
140
[nir_op_fsin] = ppir_op_sin,
141
[nir_op_fcos] = ppir_op_cos,
142
[nir_op_fmax] = ppir_op_max,
143
[nir_op_fmin] = ppir_op_min,
144
[nir_op_frcp] = ppir_op_rcp,
145
[nir_op_ffloor] = ppir_op_floor,
146
[nir_op_fceil] = ppir_op_ceil,
147
[nir_op_ffract] = ppir_op_fract,
148
[nir_op_sge] = ppir_op_ge,
149
[nir_op_slt] = ppir_op_lt,
150
[nir_op_seq] = ppir_op_eq,
151
[nir_op_sne] = ppir_op_ne,
152
[nir_op_fcsel] = ppir_op_select,
153
[nir_op_inot] = ppir_op_not,
154
[nir_op_ftrunc] = ppir_op_trunc,
155
[nir_op_fsat] = ppir_op_sat,
156
[nir_op_fddx] = ppir_op_ddx,
157
[nir_op_fddy] = ppir_op_ddy,
158
};
159
160
static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
161
{
162
nir_alu_instr *instr = nir_instr_as_alu(ni);
163
int op = nir_to_ppir_opcodes[instr->op];
164
165
if (op < 0) {
166
ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
167
return false;
168
}
169
170
ppir_alu_node *node = ppir_node_create_dest(block, op, &instr->dest.dest,
171
instr->dest.write_mask);
172
if (!node)
173
return false;
174
175
ppir_dest *pd = &node->dest;
176
nir_alu_dest *nd = &instr->dest;
177
if (nd->saturate)
178
pd->modifier = ppir_outmod_clamp_fraction;
179
180
unsigned src_mask;
181
switch (op) {
182
case ppir_op_sum3:
183
src_mask = 0b0111;
184
break;
185
case ppir_op_sum4:
186
src_mask = 0b1111;
187
break;
188
default:
189
src_mask = pd->write_mask;
190
break;
191
}
192
193
unsigned num_child = nir_op_infos[instr->op].num_inputs;
194
node->num_src = num_child;
195
196
for (int i = 0; i < num_child; i++) {
197
nir_alu_src *ns = instr->src + i;
198
ppir_src *ps = node->src + i;
199
memcpy(ps->swizzle, ns->swizzle, sizeof(ps->swizzle));
200
ppir_node_add_src(block->comp, &node->node, ps, &ns->src, src_mask);
201
202
ps->absolute = ns->abs;
203
ps->negate = ns->negate;
204
}
205
206
list_addtail(&node->node.list, &block->node_list);
207
return true;
208
}
209
210
static ppir_block *ppir_block_create(ppir_compiler *comp);
211
212
static bool ppir_emit_discard_block(ppir_compiler *comp)
213
{
214
ppir_block *block = ppir_block_create(comp);
215
ppir_discard_node *discard;
216
if (!block)
217
return false;
218
219
comp->discard_block = block;
220
block->comp = comp;
221
222
discard = ppir_node_create(block, ppir_op_discard, -1, 0);
223
if (discard)
224
list_addtail(&discard->node.list, &block->node_list);
225
else
226
return false;
227
228
return true;
229
}
230
231
static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
232
{
233
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
234
ppir_node *node;
235
ppir_compiler *comp = block->comp;
236
ppir_branch_node *branch;
237
238
if (!comp->discard_block && !ppir_emit_discard_block(comp))
239
return NULL;
240
241
node = ppir_node_create(block, ppir_op_branch, -1, 0);
242
if (!node)
243
return NULL;
244
branch = ppir_node_to_branch(node);
245
246
/* second src and condition will be updated during lowering */
247
ppir_node_add_src(block->comp, node, &branch->src[0],
248
&instr->src[0], u_bit_consecutive(0, instr->num_components));
249
branch->num_src = 1;
250
branch->target = comp->discard_block;
251
252
return node;
253
}
254
255
static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
256
{
257
ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
258
259
return node;
260
}
261
262
static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
263
{
264
ppir_node *node;
265
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
266
unsigned mask = 0;
267
ppir_load_node *lnode;
268
ppir_alu_node *alu_node;
269
270
switch (instr->intrinsic) {
271
case nir_intrinsic_load_input:
272
if (!instr->dest.is_ssa)
273
mask = u_bit_consecutive(0, instr->num_components);
274
275
lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->dest, mask);
276
if (!lnode)
277
return false;
278
279
lnode->num_components = instr->num_components;
280
lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
281
if (nir_src_is_const(instr->src[0]))
282
lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
283
else {
284
lnode->num_src = 1;
285
ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
286
}
287
list_addtail(&lnode->node.list, &block->node_list);
288
return true;
289
290
case nir_intrinsic_load_frag_coord:
291
case nir_intrinsic_load_point_coord:
292
case nir_intrinsic_load_front_face:
293
if (!instr->dest.is_ssa)
294
mask = u_bit_consecutive(0, instr->num_components);
295
296
ppir_op op;
297
switch (instr->intrinsic) {
298
case nir_intrinsic_load_frag_coord:
299
op = ppir_op_load_fragcoord;
300
break;
301
case nir_intrinsic_load_point_coord:
302
op = ppir_op_load_pointcoord;
303
break;
304
case nir_intrinsic_load_front_face:
305
op = ppir_op_load_frontface;
306
break;
307
default:
308
unreachable("bad intrinsic");
309
break;
310
}
311
312
lnode = ppir_node_create_dest(block, op, &instr->dest, mask);
313
if (!lnode)
314
return false;
315
316
lnode->num_components = instr->num_components;
317
list_addtail(&lnode->node.list, &block->node_list);
318
return true;
319
320
case nir_intrinsic_load_uniform:
321
if (!instr->dest.is_ssa)
322
mask = u_bit_consecutive(0, instr->num_components);
323
324
lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->dest, mask);
325
if (!lnode)
326
return false;
327
328
lnode->num_components = instr->num_components;
329
lnode->index = nir_intrinsic_base(instr);
330
if (nir_src_is_const(instr->src[0]))
331
lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
332
else {
333
lnode->num_src = 1;
334
ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
335
}
336
337
list_addtail(&lnode->node.list, &block->node_list);
338
return true;
339
340
case nir_intrinsic_store_output: {
341
/* In simple cases where the store_output is ssa, that register
342
* can be directly marked as the output.
343
* If discard is used or the source is not ssa, things can get a
344
* lot more complicated, so don't try to optimize those and fall
345
* back to inserting a mov at the end.
346
* If the source node will only be able to output to pipeline
347
* registers, fall back to the mov as well. */
348
if (!block->comp->uses_discard && instr->src->is_ssa) {
349
node = block->comp->var_nodes[instr->src->ssa->index];
350
switch (node->op) {
351
case ppir_op_load_uniform:
352
case ppir_op_load_texture:
353
case ppir_op_const:
354
break;
355
default:
356
node->is_end = 1;
357
return true;
358
}
359
}
360
361
alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
362
if (!alu_node)
363
return false;
364
365
ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
366
dest->type = ppir_target_ssa;
367
dest->ssa.num_components = instr->num_components;
368
dest->ssa.index = 0;
369
dest->write_mask = u_bit_consecutive(0, instr->num_components);
370
371
alu_node->num_src = 1;
372
373
for (int i = 0; i < instr->num_components; i++)
374
alu_node->src[0].swizzle[i] = i;
375
376
ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
377
u_bit_consecutive(0, instr->num_components));
378
379
alu_node->node.is_end = 1;
380
381
list_addtail(&alu_node->node.list, &block->node_list);
382
return true;
383
}
384
385
case nir_intrinsic_discard:
386
node = ppir_emit_discard(block, ni);
387
list_addtail(&node->list, &block->node_list);
388
return true;
389
390
case nir_intrinsic_discard_if:
391
node = ppir_emit_discard_if(block, ni);
392
list_addtail(&node->list, &block->node_list);
393
return true;
394
395
default:
396
ppir_error("unsupported nir_intrinsic_instr %s\n",
397
nir_intrinsic_infos[instr->intrinsic].name);
398
return false;
399
}
400
}
401
402
static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni)
403
{
404
nir_load_const_instr *instr = nir_instr_as_load_const(ni);
405
ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
406
if (!node)
407
return false;
408
409
assert(instr->def.bit_size == 32);
410
411
for (int i = 0; i < instr->def.num_components; i++)
412
node->constant.value[i].i = instr->value[i].i32;
413
node->constant.num = instr->def.num_components;
414
415
list_addtail(&node->node.list, &block->node_list);
416
return true;
417
}
418
419
static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
420
{
421
nir_ssa_undef_instr *undef = nir_instr_as_ssa_undef(ni);
422
ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
423
if (!node)
424
return false;
425
ppir_alu_node *alu = ppir_node_to_alu(node);
426
427
ppir_dest *dest = &alu->dest;
428
dest->ssa.undef = true;
429
430
list_addtail(&node->list, &block->node_list);
431
return true;
432
}
433
434
static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
435
{
436
nir_tex_instr *instr = nir_instr_as_tex(ni);
437
ppir_load_texture_node *node;
438
439
switch (instr->op) {
440
case nir_texop_tex:
441
case nir_texop_txb:
442
case nir_texop_txl:
443
break;
444
default:
445
ppir_error("unsupported texop %d\n", instr->op);
446
return false;
447
}
448
449
switch (instr->sampler_dim) {
450
case GLSL_SAMPLER_DIM_2D:
451
case GLSL_SAMPLER_DIM_CUBE:
452
case GLSL_SAMPLER_DIM_RECT:
453
case GLSL_SAMPLER_DIM_EXTERNAL:
454
break;
455
default:
456
ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
457
return false;
458
}
459
460
/* emit ld_tex node */
461
462
unsigned mask = 0;
463
if (!instr->dest.is_ssa)
464
mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
465
466
node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->dest, mask);
467
if (!node)
468
return false;
469
470
node->sampler = instr->texture_index;
471
node->sampler_dim = instr->sampler_dim;
472
473
for (int i = 0; i < instr->coord_components; i++)
474
node->src[0].swizzle[i] = i;
475
476
for (int i = 0; i < instr->num_srcs; i++) {
477
switch (instr->src[i].src_type) {
478
case nir_tex_src_coord: {
479
nir_src *ns = &instr->src[i].src;
480
if (ns->is_ssa) {
481
ppir_node *child = block->comp->var_nodes[ns->ssa->index];
482
if (child->op == ppir_op_load_varying) {
483
/* If the successor is load_texture, promote it to load_coords */
484
nir_tex_src *nts = (nir_tex_src *)ns;
485
if (nts->src_type == nir_tex_src_coord)
486
child->op = ppir_op_load_coords;
487
}
488
}
489
490
/* src[0] is not used by the ld_tex instruction but ensures
491
* correct scheduling due to the pipeline dependency */
492
ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src,
493
u_bit_consecutive(0, instr->coord_components));
494
node->num_src++;
495
break;
496
}
497
case nir_tex_src_bias:
498
case nir_tex_src_lod:
499
node->lod_bias_en = true;
500
node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
501
ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1);
502
node->num_src++;
503
break;
504
default:
505
ppir_error("unsupported texture source type\n");
506
return false;
507
}
508
}
509
510
list_addtail(&node->node.list, &block->node_list);
511
512
/* validate load coords node */
513
514
ppir_node *src_coords = ppir_node_get_src(&node->node, 0)->node;
515
ppir_load_node *load = NULL;
516
517
if (src_coords && ppir_node_has_single_src_succ(src_coords) &&
518
(src_coords->op == ppir_op_load_coords))
519
load = ppir_node_to_load(src_coords);
520
else {
521
/* Create load_coords node */
522
load = ppir_node_create(block, ppir_op_load_coords_reg, -1, 0);
523
if (!load)
524
return false;
525
list_addtail(&load->node.list, &block->node_list);
526
527
load->src = node->src[0];
528
load->num_src = 1;
529
if (node->sampler_dim == GLSL_SAMPLER_DIM_CUBE)
530
load->num_components = 3;
531
else
532
load->num_components = 2;
533
534
ppir_debug("%s create load_coords node %d for %d\n",
535
__FUNCTION__, load->index, node->node.index);
536
537
ppir_node_foreach_pred_safe((&node->node), dep) {
538
ppir_node *pred = dep->pred;
539
ppir_node_remove_dep(dep);
540
ppir_node_add_dep(&load->node, pred, ppir_dep_src);
541
}
542
ppir_node_add_dep(&node->node, &load->node, ppir_dep_src);
543
}
544
545
assert(load);
546
node->src[0].type = load->dest.type = ppir_target_pipeline;
547
node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard;
548
549
return true;
550
}
551
552
static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
553
{
554
ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uintptr_t)nblock);
555
556
return block;
557
}
558
559
static bool ppir_emit_jump(ppir_block *block, nir_instr *ni)
560
{
561
ppir_node *node;
562
ppir_compiler *comp = block->comp;
563
ppir_branch_node *branch;
564
ppir_block *jump_block;
565
nir_jump_instr *jump = nir_instr_as_jump(ni);
566
567
switch (jump->type) {
568
case nir_jump_break: {
569
assert(comp->current_block->successors[0]);
570
assert(!comp->current_block->successors[1]);
571
jump_block = comp->current_block->successors[0];
572
}
573
break;
574
case nir_jump_continue:
575
jump_block = comp->loop_cont_block;
576
break;
577
default:
578
ppir_error("nir_jump_instr not support\n");
579
return false;
580
}
581
582
assert(jump_block != NULL);
583
584
node = ppir_node_create(block, ppir_op_branch, -1, 0);
585
if (!node)
586
return false;
587
branch = ppir_node_to_branch(node);
588
589
/* Unconditional */
590
branch->num_src = 0;
591
branch->target = jump_block;
592
593
list_addtail(&node->list, &block->node_list);
594
return true;
595
}
596
597
static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
598
[nir_instr_type_alu] = ppir_emit_alu,
599
[nir_instr_type_intrinsic] = ppir_emit_intrinsic,
600
[nir_instr_type_load_const] = ppir_emit_load_const,
601
[nir_instr_type_ssa_undef] = ppir_emit_ssa_undef,
602
[nir_instr_type_tex] = ppir_emit_tex,
603
[nir_instr_type_jump] = ppir_emit_jump,
604
};
605
606
static ppir_block *ppir_block_create(ppir_compiler *comp)
607
{
608
ppir_block *block = rzalloc(comp, ppir_block);
609
if (!block)
610
return NULL;
611
612
list_inithead(&block->node_list);
613
list_inithead(&block->instr_list);
614
615
block->comp = comp;
616
617
return block;
618
}
619
620
static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
621
{
622
ppir_block *block = ppir_get_block(comp, nblock);
623
624
comp->current_block = block;
625
626
list_addtail(&block->list, &comp->block_list);
627
628
nir_foreach_instr(instr, nblock) {
629
assert(instr->type < nir_instr_type_phi);
630
if (!ppir_emit_instr[instr->type](block, instr))
631
return false;
632
}
633
634
return true;
635
}
636
637
static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
638
639
static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
640
{
641
ppir_node *node;
642
ppir_branch_node *else_branch, *after_branch;
643
nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
644
bool empty_else_block =
645
(nir_else_block == nir_if_last_else_block(if_stmt) &&
646
exec_list_is_empty(&nir_else_block->instr_list));
647
ppir_block *block = comp->current_block;
648
649
node = ppir_node_create(block, ppir_op_branch, -1, 0);
650
if (!node)
651
return false;
652
else_branch = ppir_node_to_branch(node);
653
ppir_node_add_src(block->comp, node, &else_branch->src[0],
654
&if_stmt->condition, 1);
655
else_branch->num_src = 1;
656
/* Negate condition to minimize branching. We're generating following:
657
* current_block: { ...; if (!statement) branch else_block; }
658
* then_block: { ...; branch after_block; }
659
* else_block: { ... }
660
* after_block: { ... }
661
*
662
* or if else list is empty:
663
* block: { if (!statement) branch else_block; }
664
* then_block: { ... }
665
* else_block: after_block: { ... }
666
*/
667
else_branch->negate = true;
668
list_addtail(&else_branch->node.list, &block->node_list);
669
670
if (!ppir_emit_cf_list(comp, &if_stmt->then_list))
671
return false;
672
673
if (empty_else_block) {
674
nir_block *nblock = nir_if_last_else_block(if_stmt);
675
assert(nblock->successors[0]);
676
assert(!nblock->successors[1]);
677
else_branch->target = ppir_get_block(comp, nblock->successors[0]);
678
/* Add empty else block to the list */
679
list_addtail(&block->successors[1]->list, &comp->block_list);
680
return true;
681
}
682
683
else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
684
685
nir_block *last_then_block = nir_if_last_then_block(if_stmt);
686
assert(last_then_block->successors[0]);
687
assert(!last_then_block->successors[1]);
688
block = ppir_get_block(comp, last_then_block);
689
node = ppir_node_create(block, ppir_op_branch, -1, 0);
690
if (!node)
691
return false;
692
after_branch = ppir_node_to_branch(node);
693
/* Unconditional */
694
after_branch->num_src = 0;
695
after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
696
/* Target should be after_block, will fixup later */
697
list_addtail(&after_branch->node.list, &block->node_list);
698
699
if (!ppir_emit_cf_list(comp, &if_stmt->else_list))
700
return false;
701
702
return true;
703
}
704
705
static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
706
{
707
ppir_block *save_loop_cont_block = comp->loop_cont_block;
708
ppir_block *block;
709
ppir_branch_node *loop_branch;
710
nir_block *loop_last_block;
711
ppir_node *node;
712
713
comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
714
715
if (!ppir_emit_cf_list(comp, &nloop->body))
716
return false;
717
718
loop_last_block = nir_loop_last_block(nloop);
719
block = ppir_get_block(comp, loop_last_block);
720
node = ppir_node_create(block, ppir_op_branch, -1, 0);
721
if (!node)
722
return false;
723
loop_branch = ppir_node_to_branch(node);
724
/* Unconditional */
725
loop_branch->num_src = 0;
726
loop_branch->target = comp->loop_cont_block;
727
list_addtail(&loop_branch->node.list, &block->node_list);
728
729
comp->loop_cont_block = save_loop_cont_block;
730
731
comp->num_loops++;
732
733
return true;
734
}
735
736
static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
737
{
738
ppir_error("function nir_cf_node not support\n");
739
return false;
740
}
741
742
static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
743
{
744
foreach_list_typed(nir_cf_node, node, node, list) {
745
bool ret;
746
747
switch (node->type) {
748
case nir_cf_node_block:
749
ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
750
break;
751
case nir_cf_node_if:
752
ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
753
break;
754
case nir_cf_node_loop:
755
ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
756
break;
757
case nir_cf_node_function:
758
ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
759
break;
760
default:
761
ppir_error("unknown NIR node type %d\n", node->type);
762
return false;
763
}
764
765
if (!ret)
766
return false;
767
}
768
769
return true;
770
}
771
772
static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_reg, unsigned num_ssa)
773
{
774
ppir_compiler *comp = rzalloc_size(
775
prog, sizeof(*comp) + ((num_reg << 2) + num_ssa) * sizeof(ppir_node *));
776
if (!comp)
777
return NULL;
778
779
list_inithead(&comp->block_list);
780
list_inithead(&comp->reg_list);
781
comp->reg_num = 0;
782
comp->blocks = _mesa_hash_table_u64_create(prog);
783
784
comp->var_nodes = (ppir_node **)(comp + 1);
785
comp->reg_base = num_ssa;
786
comp->prog = prog;
787
return comp;
788
}
789
790
static void ppir_add_ordering_deps(ppir_compiler *comp)
791
{
792
/* Some intrinsics do not have explicit dependencies and thus depend
793
* on instructions order. Consider discard_if and the is_end node as
794
* example. If we don't add fake dependency of discard_if to is_end,
795
* scheduler may put the is_end first and since is_end terminates
796
* shader on Utgard PP, rest of it will never be executed.
797
* Add fake dependencies for discard/branch/store to preserve
798
* instruction order.
799
*
800
* TODO: scheduler should schedule discard_if as early as possible otherwise
801
* we may end up with suboptimal code for cases like this:
802
*
803
* s3 = s1 < s2
804
* discard_if s3
805
* s4 = s1 + s2
806
* store s4
807
*
808
* In this case store depends on discard_if and s4, but since dependencies can
809
* be scheduled in any order it can result in code like this:
810
*
811
* instr1: s3 = s1 < s3
812
* instr2: s4 = s1 + s2
813
* instr3: discard_if s3
814
* instr4: store s4
815
*/
816
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
817
ppir_node *prev_node = NULL;
818
list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
819
if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
820
ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
821
}
822
if (node->is_end ||
823
node->op == ppir_op_discard ||
824
node->op == ppir_op_store_temp ||
825
node->op == ppir_op_branch) {
826
prev_node = node;
827
}
828
}
829
}
830
}
831
832
static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
833
struct pipe_debug_callback *debug)
834
{
835
const struct shader_info *info = &nir->info;
836
char *shaderdb;
837
ASSERTED int ret = asprintf(&shaderdb,
838
"%s shader: %d inst, %d loops, %d:%d spills:fills\n",
839
gl_shader_stage_name(info->stage),
840
comp->cur_instr_index,
841
comp->num_loops,
842
comp->num_spills,
843
comp->num_fills);
844
assert(ret >= 0);
845
846
if (lima_debug & LIMA_DEBUG_SHADERDB)
847
fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
848
849
pipe_debug_message(debug, SHADER_INFO, "%s", shaderdb);
850
free(shaderdb);
851
}
852
853
static void ppir_add_write_after_read_deps(ppir_compiler *comp)
854
{
855
list_for_each_entry(ppir_block, block, &comp->block_list, list) {
856
list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
857
ppir_node *write = NULL;
858
list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
859
for (int i = 0; i < ppir_node_get_src_num(node); i++) {
860
ppir_src *src = ppir_node_get_src(node, i);
861
if (src && src->type == ppir_target_register &&
862
src->reg == reg &&
863
write) {
864
ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
865
ppir_node_add_dep(write, node, ppir_dep_write_after_read);
866
}
867
}
868
ppir_dest *dest = ppir_node_get_dest(node);
869
if (dest && dest->type == ppir_target_register &&
870
dest->reg == reg)
871
write = node;
872
}
873
}
874
}
875
}
876
877
bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir,
878
struct ra_regs *ra,
879
struct pipe_debug_callback *debug)
880
{
881
nir_function_impl *func = nir_shader_get_entrypoint(nir);
882
ppir_compiler *comp = ppir_compiler_create(prog, func->reg_alloc, func->ssa_alloc);
883
if (!comp)
884
return false;
885
886
comp->ra = ra;
887
comp->uses_discard = nir->info.fs.uses_discard;
888
889
/* 1st pass: create ppir blocks */
890
nir_foreach_function(function, nir) {
891
if (!function->impl)
892
continue;
893
894
nir_foreach_block(nblock, function->impl) {
895
ppir_block *block = ppir_block_create(comp);
896
if (!block)
897
return false;
898
block->index = nblock->index;
899
_mesa_hash_table_u64_insert(comp->blocks, (uintptr_t)nblock, block);
900
}
901
}
902
903
/* 2nd pass: populate successors */
904
nir_foreach_function(function, nir) {
905
if (!function->impl)
906
continue;
907
908
nir_foreach_block(nblock, function->impl) {
909
ppir_block *block = ppir_get_block(comp, nblock);
910
assert(block);
911
912
for (int i = 0; i < 2; i++) {
913
if (nblock->successors[i])
914
block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
915
}
916
}
917
}
918
919
/* Validate outputs, we support only gl_FragColor */
920
nir_foreach_shader_out_variable(var, nir) {
921
switch (var->data.location) {
922
case FRAG_RESULT_COLOR:
923
case FRAG_RESULT_DATA0:
924
break;
925
default:
926
ppir_error("unsupported output type\n");
927
goto err_out0;
928
break;
929
}
930
}
931
932
foreach_list_typed(nir_register, reg, node, &func->registers) {
933
ppir_reg *r = rzalloc(comp, ppir_reg);
934
if (!r)
935
return false;
936
937
r->index = reg->index;
938
r->num_components = reg->num_components;
939
r->is_head = false;
940
list_addtail(&r->list, &comp->reg_list);
941
comp->reg_num++;
942
}
943
944
if (!ppir_emit_cf_list(comp, &func->body))
945
goto err_out0;
946
947
/* If we have discard block add it to the very end */
948
if (comp->discard_block)
949
list_addtail(&comp->discard_block->list, &comp->block_list);
950
951
ppir_node_print_prog(comp);
952
953
if (!ppir_lower_prog(comp))
954
goto err_out0;
955
956
ppir_add_ordering_deps(comp);
957
ppir_add_write_after_read_deps(comp);
958
959
ppir_node_print_prog(comp);
960
961
if (!ppir_node_to_instr(comp))
962
goto err_out0;
963
964
if (!ppir_schedule_prog(comp))
965
goto err_out0;
966
967
if (!ppir_regalloc_prog(comp))
968
goto err_out0;
969
970
if (!ppir_codegen_prog(comp))
971
goto err_out0;
972
973
ppir_print_shader_db(nir, comp, debug);
974
975
_mesa_hash_table_u64_destroy(comp->blocks);
976
ralloc_free(comp);
977
return true;
978
979
err_out0:
980
_mesa_hash_table_u64_destroy(comp->blocks);
981
ralloc_free(comp);
982
return false;
983
}
984
985
986