Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/ir3/ir3_nir_lower_tess.c
4565 views
1
/*
2
* Copyright © 2019 Google, Inc.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
* SOFTWARE.
22
*/
23
24
#include "compiler/nir/nir_builder.h"
25
#include "ir3_compiler.h"
26
#include "ir3_nir.h"
27
28
struct state {
29
uint32_t topology;
30
31
struct primitive_map {
32
unsigned loc[32 + 4]; /* +POSITION +PSIZE +CLIP_DIST0 +CLIP_DIST1 */
33
unsigned stride;
34
} map;
35
36
nir_ssa_def *header;
37
38
nir_variable *vertex_count_var;
39
nir_variable *emitted_vertex_var;
40
nir_variable *vertex_flags_out;
41
42
struct exec_list old_outputs;
43
struct exec_list new_outputs;
44
struct exec_list emit_outputs;
45
46
/* tess ctrl shader on a650 gets the local primitive id at different bits: */
47
unsigned local_primitive_id_start;
48
};
49
50
static nir_ssa_def *
51
bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)
52
{
53
return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),
54
nir_imm_int(b, mask));
55
}
56
57
static nir_ssa_def *
58
build_invocation_id(nir_builder *b, struct state *state)
59
{
60
return bitfield_extract(b, state->header, 11, 31);
61
}
62
63
static nir_ssa_def *
64
build_vertex_id(nir_builder *b, struct state *state)
65
{
66
return bitfield_extract(b, state->header, 6, 31);
67
}
68
69
static nir_ssa_def *
70
build_local_primitive_id(nir_builder *b, struct state *state)
71
{
72
return bitfield_extract(b, state->header, state->local_primitive_id_start,
73
63);
74
}
75
76
static bool
77
is_tess_levels(gl_varying_slot slot)
78
{
79
return (slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
80
slot == VARYING_SLOT_TESS_LEVEL_INNER);
81
}
82
83
/* Return a deterministic index for varyings. We can't rely on driver_location
84
* to be correct without linking the different stages first, so we create
85
* "primitive maps" where the producer decides on the location of each varying
86
* slot and then exports a per-slot array to the consumer. This compacts the
87
* gl_varying_slot space down a bit so that the primitive maps aren't too
88
* large.
89
*
90
* Note: per-patch varyings are currently handled separately, without any
91
* compacting.
92
*
93
* TODO: We could probably use the driver_location's directly in the non-SSO
94
* (Vulkan) case.
95
*/
96
97
static unsigned
98
shader_io_get_unique_index(gl_varying_slot slot)
99
{
100
if (slot == VARYING_SLOT_POS)
101
return 0;
102
if (slot == VARYING_SLOT_PSIZ)
103
return 1;
104
if (slot == VARYING_SLOT_CLIP_DIST0)
105
return 2;
106
if (slot == VARYING_SLOT_CLIP_DIST1)
107
return 3;
108
if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
109
return 4 + (slot - VARYING_SLOT_VAR0);
110
unreachable("illegal slot in get unique index\n");
111
}
112
113
static nir_ssa_def *
114
build_local_offset(nir_builder *b, struct state *state, nir_ssa_def *vertex,
115
uint32_t location, uint32_t comp, nir_ssa_def *offset)
116
{
117
nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
118
nir_ssa_def *primitive_offset =
119
nir_imul24(b, build_local_primitive_id(b, state), primitive_stride);
120
nir_ssa_def *attr_offset;
121
nir_ssa_def *vertex_stride;
122
unsigned index = shader_io_get_unique_index(location);
123
124
switch (b->shader->info.stage) {
125
case MESA_SHADER_VERTEX:
126
case MESA_SHADER_TESS_EVAL:
127
vertex_stride = nir_imm_int(b, state->map.stride * 4);
128
attr_offset = nir_imm_int(b, state->map.loc[index] + 4 * comp);
129
break;
130
case MESA_SHADER_TESS_CTRL:
131
case MESA_SHADER_GEOMETRY:
132
vertex_stride = nir_load_vs_vertex_stride_ir3(b);
133
attr_offset = nir_iadd(b, nir_load_primitive_location_ir3(b, index),
134
nir_imm_int(b, comp * 4));
135
break;
136
default:
137
unreachable("bad shader stage");
138
}
139
140
nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);
141
142
return nir_iadd(
143
b, nir_iadd(b, primitive_offset, vertex_offset),
144
nir_iadd(b, attr_offset, nir_ishl(b, offset, nir_imm_int(b, 4))));
145
}
146
147
static nir_intrinsic_instr *
148
replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
149
nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1,
150
nir_ssa_def *src2)
151
{
152
nir_intrinsic_instr *new_intr = nir_intrinsic_instr_create(b->shader, op);
153
154
new_intr->src[0] = nir_src_for_ssa(src0);
155
if (src1)
156
new_intr->src[1] = nir_src_for_ssa(src1);
157
if (src2)
158
new_intr->src[2] = nir_src_for_ssa(src2);
159
160
new_intr->num_components = intr->num_components;
161
162
if (nir_intrinsic_infos[op].has_dest)
163
nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, intr->num_components,
164
32, NULL);
165
166
nir_builder_instr_insert(b, &new_intr->instr);
167
168
if (nir_intrinsic_infos[op].has_dest)
169
nir_ssa_def_rewrite_uses(&intr->dest.ssa, &new_intr->dest.ssa);
170
171
nir_instr_remove(&intr->instr);
172
173
return new_intr;
174
}
175
176
static void
177
build_primitive_map(nir_shader *shader, struct primitive_map *map)
178
{
179
/* All interfaces except the TCS <-> TES interface use ldlw, which takes
180
* an offset in bytes, so each vec4 slot is 16 bytes. TCS <-> TES uses
181
* ldg, which takes an offset in dwords, but each per-vertex slot has
182
* space for every vertex, and there's space at the beginning for
183
* per-patch varyings.
184
*/
185
unsigned slot_size = 16, start = 0;
186
if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
187
slot_size = shader->info.tess.tcs_vertices_out * 4;
188
start = util_last_bit(shader->info.patch_outputs_written) * 4;
189
}
190
191
uint64_t mask = shader->info.outputs_written;
192
unsigned loc = start;
193
while (mask) {
194
int location = u_bit_scan64(&mask);
195
if (is_tess_levels(location))
196
continue;
197
198
unsigned index = shader_io_get_unique_index(location);
199
map->loc[index] = loc;
200
loc += slot_size;
201
}
202
203
map->stride = loc;
204
/* Use units of dwords for the stride. */
205
if (shader->info.stage != MESA_SHADER_TESS_CTRL)
206
map->stride /= 4;
207
}
208
209
/* For shader stages that receive a primitive map, calculate how big it should
210
* be.
211
*/
212
213
static unsigned
214
calc_primitive_map_size(nir_shader *shader)
215
{
216
uint64_t mask = shader->info.inputs_read;
217
unsigned max_index = 0;
218
while (mask) {
219
int location = u_bit_scan64(&mask);
220
221
if (is_tess_levels(location))
222
continue;
223
224
unsigned index = shader_io_get_unique_index(location);
225
max_index = MAX2(max_index, index + 1);
226
}
227
228
return max_index;
229
}
230
231
static void
232
lower_block_to_explicit_output(nir_block *block, nir_builder *b,
233
struct state *state)
234
{
235
nir_foreach_instr_safe (instr, block) {
236
if (instr->type != nir_instr_type_intrinsic)
237
continue;
238
239
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
240
241
switch (intr->intrinsic) {
242
case nir_intrinsic_store_output: {
243
// src[] = { value, offset }.
244
245
/* nir_lower_io_to_temporaries replaces all access to output
246
* variables with temp variables and then emits a nir_copy_var at
247
* the end of the shader. Thus, we should always get a full wrmask
248
* here.
249
*/
250
assert(
251
util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
252
253
b->cursor = nir_instr_remove(&intr->instr);
254
255
nir_ssa_def *vertex_id = build_vertex_id(b, state);
256
nir_ssa_def *offset = build_local_offset(
257
b, state, vertex_id, nir_intrinsic_io_semantics(intr).location,
258
nir_intrinsic_component(intr), intr->src[1].ssa);
259
260
nir_store_shared_ir3(b, intr->src[0].ssa, offset);
261
break;
262
}
263
264
default:
265
break;
266
}
267
}
268
}
269
270
static nir_ssa_def *
271
local_thread_id(nir_builder *b)
272
{
273
return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);
274
}
275
276
void
277
ir3_nir_lower_to_explicit_output(nir_shader *shader,
278
struct ir3_shader_variant *v,
279
unsigned topology)
280
{
281
struct state state = {};
282
283
build_primitive_map(shader, &state.map);
284
memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
285
286
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
287
assert(impl);
288
289
nir_builder b;
290
nir_builder_init(&b, impl);
291
b.cursor = nir_before_cf_list(&impl->body);
292
293
if (v->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
294
state.header = nir_load_tcs_header_ir3(&b);
295
else
296
state.header = nir_load_gs_header_ir3(&b);
297
298
nir_foreach_block_safe (block, impl)
299
lower_block_to_explicit_output(block, &b, &state);
300
301
nir_metadata_preserve(impl,
302
nir_metadata_block_index | nir_metadata_dominance);
303
304
v->output_size = state.map.stride;
305
}
306
307
static void
308
lower_block_to_explicit_input(nir_block *block, nir_builder *b,
309
struct state *state)
310
{
311
nir_foreach_instr_safe (instr, block) {
312
if (instr->type != nir_instr_type_intrinsic)
313
continue;
314
315
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
316
317
switch (intr->intrinsic) {
318
case nir_intrinsic_load_per_vertex_input: {
319
// src[] = { vertex, offset }.
320
321
b->cursor = nir_before_instr(&intr->instr);
322
323
nir_ssa_def *offset = build_local_offset(
324
b, state,
325
intr->src[0].ssa, // this is typically gl_InvocationID
326
nir_intrinsic_io_semantics(intr).location,
327
nir_intrinsic_component(intr), intr->src[1].ssa);
328
329
replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL,
330
NULL);
331
break;
332
}
333
334
case nir_intrinsic_load_invocation_id: {
335
b->cursor = nir_before_instr(&intr->instr);
336
337
nir_ssa_def *iid = build_invocation_id(b, state);
338
nir_ssa_def_rewrite_uses(&intr->dest.ssa, iid);
339
nir_instr_remove(&intr->instr);
340
break;
341
}
342
343
default:
344
break;
345
}
346
}
347
}
348
349
void
350
ir3_nir_lower_to_explicit_input(nir_shader *shader,
351
struct ir3_shader_variant *v)
352
{
353
struct state state = {};
354
355
/* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,
356
* HS uses a different primitive id, which starts at bit 16 in the header
357
*/
358
if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
359
v->shader->compiler->tess_use_shared)
360
state.local_primitive_id_start = 16;
361
362
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
363
assert(impl);
364
365
nir_builder b;
366
nir_builder_init(&b, impl);
367
b.cursor = nir_before_cf_list(&impl->body);
368
369
if (shader->info.stage == MESA_SHADER_GEOMETRY)
370
state.header = nir_load_gs_header_ir3(&b);
371
else
372
state.header = nir_load_tcs_header_ir3(&b);
373
374
nir_foreach_block_safe (block, impl)
375
lower_block_to_explicit_input(block, &b, &state);
376
377
v->input_size = calc_primitive_map_size(shader);
378
}
379
380
static nir_ssa_def *
381
build_tcs_out_vertices(nir_builder *b)
382
{
383
if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
384
return nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);
385
else
386
return nir_load_patch_vertices_in(b);
387
}
388
389
static nir_ssa_def *
390
build_per_vertex_offset(nir_builder *b, struct state *state,
391
nir_ssa_def *vertex, uint32_t location, uint32_t comp,
392
nir_ssa_def *offset)
393
{
394
nir_ssa_def *primitive_id = nir_load_primitive_id(b);
395
nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
396
nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
397
nir_ssa_def *attr_offset;
398
399
if (nir_src_is_const(nir_src_for_ssa(offset))) {
400
location += nir_src_as_uint(nir_src_for_ssa(offset));
401
offset = nir_imm_int(b, 0);
402
} else {
403
/* Offset is in vec4's, but we need it in unit of components for the
404
* load/store_global_ir3 offset.
405
*/
406
offset = nir_ishl(b, offset, nir_imm_int(b, 2));
407
}
408
409
nir_ssa_def *vertex_offset;
410
if (vertex) {
411
unsigned index = shader_io_get_unique_index(location);
412
switch (b->shader->info.stage) {
413
case MESA_SHADER_TESS_CTRL:
414
attr_offset = nir_imm_int(b, state->map.loc[index] + comp);
415
break;
416
case MESA_SHADER_TESS_EVAL:
417
attr_offset = nir_iadd(b, nir_load_primitive_location_ir3(b, index),
418
nir_imm_int(b, comp));
419
break;
420
default:
421
unreachable("bad shader state");
422
}
423
424
attr_offset = nir_iadd(b, attr_offset,
425
nir_imul24(b, offset, build_tcs_out_vertices(b)));
426
vertex_offset = nir_ishl(b, vertex, nir_imm_int(b, 2));
427
} else {
428
assert(location >= VARYING_SLOT_PATCH0 &&
429
location <= VARYING_SLOT_TESS_MAX);
430
unsigned index = location - VARYING_SLOT_PATCH0;
431
attr_offset = nir_iadd(b, nir_imm_int(b, index * 4 + comp), offset);
432
vertex_offset = nir_imm_int(b, 0);
433
}
434
435
return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset), vertex_offset);
436
}
437
438
static nir_ssa_def *
439
build_patch_offset(nir_builder *b, struct state *state, uint32_t base,
440
uint32_t comp, nir_ssa_def *offset)
441
{
442
return build_per_vertex_offset(b, state, NULL, base, comp, offset);
443
}
444
445
static void
446
tess_level_components(struct state *state, uint32_t *inner, uint32_t *outer)
447
{
448
switch (state->topology) {
449
case IR3_TESS_TRIANGLES:
450
*inner = 1;
451
*outer = 3;
452
break;
453
case IR3_TESS_QUADS:
454
*inner = 2;
455
*outer = 4;
456
break;
457
case IR3_TESS_ISOLINES:
458
*inner = 0;
459
*outer = 2;
460
break;
461
default:
462
unreachable("bad");
463
}
464
}
465
466
static nir_ssa_def *
467
build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
468
{
469
uint32_t inner_levels, outer_levels;
470
tess_level_components(state, &inner_levels, &outer_levels);
471
472
const uint32_t patch_stride = 1 + inner_levels + outer_levels;
473
474
nir_ssa_def *primitive_id = nir_load_primitive_id(b);
475
476
nir_ssa_def *patch_offset =
477
nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));
478
479
uint32_t offset;
480
switch (slot) {
481
case VARYING_SLOT_TESS_LEVEL_OUTER:
482
/* There's some kind of header dword, tess levels start at index 1. */
483
offset = 1;
484
break;
485
case VARYING_SLOT_TESS_LEVEL_INNER:
486
offset = 1 + outer_levels;
487
break;
488
default:
489
unreachable("bad");
490
}
491
492
return nir_iadd(b, patch_offset, nir_imm_int(b, offset));
493
}
494
495
static void
496
lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
497
{
498
nir_foreach_instr_safe (instr, block) {
499
if (instr->type != nir_instr_type_intrinsic)
500
continue;
501
502
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
503
504
switch (intr->intrinsic) {
505
case nir_intrinsic_load_per_vertex_output: {
506
// src[] = { vertex, offset }.
507
508
b->cursor = nir_before_instr(&intr->instr);
509
510
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
511
nir_ssa_def *offset = build_per_vertex_offset(
512
b, state, intr->src[0].ssa,
513
nir_intrinsic_io_semantics(intr).location,
514
nir_intrinsic_component(intr), intr->src[1].ssa);
515
516
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
517
offset, NULL);
518
break;
519
}
520
521
case nir_intrinsic_store_per_vertex_output: {
522
// src[] = { value, vertex, offset }.
523
524
b->cursor = nir_before_instr(&intr->instr);
525
526
/* sparse writemask not supported */
527
assert(
528
util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
529
530
nir_ssa_def *value = intr->src[0].ssa;
531
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
532
nir_ssa_def *offset = build_per_vertex_offset(
533
b, state, intr->src[1].ssa,
534
nir_intrinsic_io_semantics(intr).location,
535
nir_intrinsic_component(intr), intr->src[2].ssa);
536
537
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value,
538
address, offset);
539
540
break;
541
}
542
543
case nir_intrinsic_load_output: {
544
// src[] = { offset }.
545
546
b->cursor = nir_before_instr(&intr->instr);
547
548
nir_ssa_def *address, *offset;
549
550
/* note if vectorization of the tess level loads ever happens:
551
* "ldg" across 16-byte boundaries can behave incorrectly if results
552
* are never used. most likely some issue with (sy) not properly
553
* syncing with values coming from a second memory transaction.
554
*/
555
gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
556
if (is_tess_levels(location)) {
557
assert(intr->dest.ssa.num_components == 1);
558
address = nir_load_tess_factor_base_ir3(b);
559
offset = build_tessfactor_base(b, location, state);
560
} else {
561
address = nir_load_tess_param_base_ir3(b);
562
offset = build_patch_offset(b, state, location,
563
nir_intrinsic_component(intr),
564
intr->src[0].ssa);
565
}
566
567
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
568
offset, NULL);
569
break;
570
}
571
572
case nir_intrinsic_store_output: {
573
// src[] = { value, offset }.
574
575
/* write patch output to bo */
576
577
b->cursor = nir_before_instr(&intr->instr);
578
579
/* sparse writemask not supported */
580
assert(
581
util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
582
583
gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
584
if (is_tess_levels(location)) {
585
/* with tess levels are defined as float[4] and float[2],
586
* but tess factor BO has smaller sizes for tris/isolines,
587
* so we have to discard any writes beyond the number of
588
* components for inner/outer levels */
589
uint32_t inner_levels, outer_levels, levels;
590
tess_level_components(state, &inner_levels, &outer_levels);
591
592
if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
593
levels = outer_levels;
594
else
595
levels = inner_levels;
596
597
assert(intr->src[0].ssa->num_components == 1);
598
599
nir_ssa_def *offset =
600
nir_iadd_imm(b, intr->src[1].ssa, nir_intrinsic_component(intr));
601
602
nir_if *nif =
603
nir_push_if(b, nir_ult(b, offset, nir_imm_int(b, levels)));
604
605
replace_intrinsic(
606
b, intr, nir_intrinsic_store_global_ir3, intr->src[0].ssa,
607
nir_load_tess_factor_base_ir3(b),
608
nir_iadd(b, offset, build_tessfactor_base(b, location, state)));
609
610
nir_pop_if(b, nif);
611
} else {
612
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
613
nir_ssa_def *offset = build_patch_offset(
614
b, state, location, nir_intrinsic_component(intr),
615
intr->src[1].ssa);
616
617
debug_assert(nir_intrinsic_component(intr) == 0);
618
619
replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
620
intr->src[0].ssa, address, offset);
621
}
622
break;
623
}
624
625
default:
626
break;
627
}
628
}
629
}
630
631
static void
632
emit_tess_epilouge(nir_builder *b, struct state *state)
633
{
634
/* Insert endpatch instruction:
635
*
636
* TODO we should re-work this to use normal flow control.
637
*/
638
639
nir_end_patch_ir3(b);
640
}
641
642
void
643
ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
644
unsigned topology)
645
{
646
struct state state = {.topology = topology};
647
648
if (shader_debug_enabled(shader->info.stage)) {
649
mesa_logi("NIR (before tess lowering) for %s shader:",
650
_mesa_shader_stage_to_string(shader->info.stage));
651
nir_log_shaderi(shader);
652
}
653
654
build_primitive_map(shader, &state.map);
655
memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
656
v->output_size = state.map.stride;
657
658
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
659
assert(impl);
660
661
nir_builder b;
662
nir_builder_init(&b, impl);
663
b.cursor = nir_before_cf_list(&impl->body);
664
665
state.header = nir_load_tcs_header_ir3(&b);
666
667
nir_foreach_block_safe (block, impl)
668
lower_tess_ctrl_block(block, &b, &state);
669
670
/* Now move the body of the TCS into a conditional:
671
*
672
* if (gl_InvocationID < num_vertices)
673
* // body
674
*
675
*/
676
677
nir_cf_list body;
678
nir_cf_extract(&body, nir_before_cf_list(&impl->body),
679
nir_after_cf_list(&impl->body));
680
681
b.cursor = nir_after_cf_list(&impl->body);
682
683
/* Re-emit the header, since the old one got moved into the if branch */
684
state.header = nir_load_tcs_header_ir3(&b);
685
nir_ssa_def *iid = build_invocation_id(&b, &state);
686
687
const uint32_t nvertices = shader->info.tess.tcs_vertices_out;
688
nir_ssa_def *cond = nir_ult(&b, iid, nir_imm_int(&b, nvertices));
689
690
nir_if *nif = nir_push_if(&b, cond);
691
692
nir_cf_reinsert(&body, b.cursor);
693
694
b.cursor = nir_after_cf_list(&nif->then_list);
695
696
/* Insert conditional exit for threads invocation id != 0 */
697
nir_ssa_def *iid0_cond = nir_ieq_imm(&b, iid, 0);
698
nir_cond_end_ir3(&b, iid0_cond);
699
700
emit_tess_epilouge(&b, &state);
701
702
nir_pop_if(&b, nif);
703
704
nir_metadata_preserve(impl, 0);
705
}
706
707
static void
708
lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
709
{
710
nir_foreach_instr_safe (instr, block) {
711
if (instr->type != nir_instr_type_intrinsic)
712
continue;
713
714
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
715
716
switch (intr->intrinsic) {
717
case nir_intrinsic_load_tess_coord: {
718
b->cursor = nir_after_instr(&intr->instr);
719
nir_ssa_def *x = nir_channel(b, &intr->dest.ssa, 0);
720
nir_ssa_def *y = nir_channel(b, &intr->dest.ssa, 1);
721
nir_ssa_def *z;
722
723
if (state->topology == IR3_TESS_TRIANGLES)
724
z = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), y), x);
725
else
726
z = nir_imm_float(b, 0.0f);
727
728
nir_ssa_def *coord = nir_vec3(b, x, y, z);
729
730
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, coord,
731
b->cursor.instr);
732
break;
733
}
734
735
case nir_intrinsic_load_per_vertex_input: {
736
// src[] = { vertex, offset }.
737
738
b->cursor = nir_before_instr(&intr->instr);
739
740
nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
741
nir_ssa_def *offset = build_per_vertex_offset(
742
b, state, intr->src[0].ssa,
743
nir_intrinsic_io_semantics(intr).location,
744
nir_intrinsic_component(intr), intr->src[1].ssa);
745
746
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
747
offset, NULL);
748
break;
749
}
750
751
case nir_intrinsic_load_input: {
752
// src[] = { offset }.
753
754
b->cursor = nir_before_instr(&intr->instr);
755
756
nir_ssa_def *address, *offset;
757
758
/* note if vectorization of the tess level loads ever happens:
759
* "ldg" across 16-byte boundaries can behave incorrectly if results
760
* are never used. most likely some issue with (sy) not properly
761
* syncing with values coming from a second memory transaction.
762
*/
763
gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
764
if (is_tess_levels(location)) {
765
assert(intr->dest.ssa.num_components == 1);
766
address = nir_load_tess_factor_base_ir3(b);
767
offset = build_tessfactor_base(b, location, state);
768
} else {
769
address = nir_load_tess_param_base_ir3(b);
770
offset = build_patch_offset(b, state, location,
771
nir_intrinsic_component(intr),
772
intr->src[0].ssa);
773
}
774
775
offset =
776
nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr)));
777
778
replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
779
offset, NULL);
780
break;
781
}
782
783
default:
784
break;
785
}
786
}
787
}
788
789
void
790
ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v,
791
unsigned topology)
792
{
793
struct state state = {.topology = topology};
794
795
if (shader_debug_enabled(shader->info.stage)) {
796
mesa_logi("NIR (before tess lowering) for %s shader:",
797
_mesa_shader_stage_to_string(shader->info.stage));
798
nir_log_shaderi(shader);
799
}
800
801
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
802
assert(impl);
803
804
nir_builder b;
805
nir_builder_init(&b, impl);
806
807
nir_foreach_block_safe (block, impl)
808
lower_tess_eval_block(block, &b, &state);
809
810
v->input_size = calc_primitive_map_size(shader);
811
812
nir_metadata_preserve(impl, 0);
813
}
814
815
static void
816
lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
817
{
818
nir_foreach_instr_safe (instr, block) {
819
if (instr->type != nir_instr_type_intrinsic)
820
continue;
821
822
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
823
824
switch (intr->intrinsic) {
825
case nir_intrinsic_end_primitive: {
826
/* Note: This ignores the stream, which seems to match the blob
827
* behavior. I'm guessing the HW ignores any extraneous cut
828
* signals from an EndPrimitive() that doesn't correspond to the
829
* rasterized stream.
830
*/
831
b->cursor = nir_before_instr(&intr->instr);
832
nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 4), 0x1);
833
nir_instr_remove(&intr->instr);
834
break;
835
}
836
837
case nir_intrinsic_emit_vertex: {
838
/* Load the vertex count */
839
b->cursor = nir_before_instr(&intr->instr);
840
nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
841
842
nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
843
844
unsigned stream = nir_intrinsic_stream_id(intr);
845
/* vertex_flags_out |= stream */
846
nir_store_var(b, state->vertex_flags_out,
847
nir_ior(b, nir_load_var(b, state->vertex_flags_out),
848
nir_imm_int(b, stream)),
849
0x1 /* .x */);
850
851
foreach_two_lists (dest_node, &state->emit_outputs, src_node,
852
&state->old_outputs) {
853
nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
854
nir_variable *src = exec_node_data(nir_variable, src_node, node);
855
nir_copy_var(b, dest, src);
856
}
857
858
nir_instr_remove(&intr->instr);
859
860
nir_store_var(b, state->emitted_vertex_var,
861
nir_iadd(b, nir_load_var(b, state->emitted_vertex_var),
862
nir_imm_int(b, 1)),
863
0x1);
864
865
nir_pop_if(b, NULL);
866
867
/* Increment the vertex count by 1 */
868
nir_store_var(b, state->vertex_count_var,
869
nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
870
nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 0), 0x1);
871
872
break;
873
}
874
875
default:
876
break;
877
}
878
}
879
}
880
881
void
882
ir3_nir_lower_gs(nir_shader *shader)
883
{
884
struct state state = {};
885
886
if (shader_debug_enabled(shader->info.stage)) {
887
mesa_logi("NIR (before gs lowering):");
888
nir_log_shaderi(shader);
889
}
890
891
/* Create an output var for vertex_flags. This will be shadowed below,
892
* same way regular outputs get shadowed, and this variable will become a
893
* temporary.
894
*/
895
state.vertex_flags_out = nir_variable_create(
896
shader, nir_var_shader_out, glsl_uint_type(), "vertex_flags");
897
state.vertex_flags_out->data.driver_location = shader->num_outputs++;
898
state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
899
state.vertex_flags_out->data.interpolation = INTERP_MODE_NONE;
900
901
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
902
assert(impl);
903
904
nir_builder b;
905
nir_builder_init(&b, impl);
906
b.cursor = nir_before_cf_list(&impl->body);
907
908
state.header = nir_load_gs_header_ir3(&b);
909
910
/* Generate two set of shadow vars for the output variables. The first
911
* set replaces the real outputs and the second set (emit_outputs) we'll
912
* assign in the emit_vertex conditionals. Then at the end of the shader
913
* we copy the emit_outputs to the real outputs, so that we get
914
* store_output in uniform control flow.
915
*/
916
exec_list_make_empty(&state.old_outputs);
917
nir_foreach_shader_out_variable_safe (var, shader) {
918
exec_node_remove(&var->node);
919
exec_list_push_tail(&state.old_outputs, &var->node);
920
}
921
exec_list_make_empty(&state.new_outputs);
922
exec_list_make_empty(&state.emit_outputs);
923
nir_foreach_variable_in_list (var, &state.old_outputs) {
924
/* Create a new output var by cloning the original output var and
925
* stealing the name.
926
*/
927
nir_variable *output = nir_variable_clone(var, shader);
928
exec_list_push_tail(&state.new_outputs, &output->node);
929
930
/* Rewrite the original output to be a shadow variable. */
931
var->name = ralloc_asprintf(var, "%s@gs-temp", output->name);
932
var->data.mode = nir_var_shader_temp;
933
934
/* Clone the shadow variable to create the emit shadow variable that
935
* we'll assign in the emit conditionals.
936
*/
937
nir_variable *emit_output = nir_variable_clone(var, shader);
938
emit_output->name = ralloc_asprintf(var, "%s@emit-temp", output->name);
939
exec_list_push_tail(&state.emit_outputs, &emit_output->node);
940
}
941
942
/* During the shader we'll keep track of which vertex we're currently
943
* emitting for the EmitVertex test and how many vertices we emitted so we
944
* know to discard if didn't emit any. In most simple shaders, this can
945
* all be statically determined and gets optimized away.
946
*/
947
state.vertex_count_var =
948
nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
949
state.emitted_vertex_var =
950
nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
951
952
/* Initialize to 0. */
953
b.cursor = nir_before_cf_list(&impl->body);
954
nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
955
nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
956
nir_store_var(&b, state.vertex_flags_out, nir_imm_int(&b, 4), 0x1);
957
958
nir_foreach_block_safe (block, impl)
959
lower_gs_block(block, &b, &state);
960
961
set_foreach (impl->end_block->predecessors, block_entry) {
962
struct nir_block *block = (void *)block_entry->key;
963
b.cursor = nir_after_block_before_jump(block);
964
965
nir_ssa_def *cond =
966
nir_ieq_imm(&b, nir_load_var(&b, state.emitted_vertex_var), 0);
967
968
nir_discard_if(&b, cond);
969
970
foreach_two_lists (dest_node, &state.new_outputs, src_node,
971
&state.emit_outputs) {
972
nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
973
nir_variable *src = exec_node_data(nir_variable, src_node, node);
974
nir_copy_var(&b, dest, src);
975
}
976
}
977
978
exec_list_append(&shader->variables, &state.old_outputs);
979
exec_list_append(&shader->variables, &state.emit_outputs);
980
exec_list_append(&shader->variables, &state.new_outputs);
981
982
nir_metadata_preserve(impl, 0);
983
984
nir_lower_global_vars_to_local(shader);
985
nir_split_var_copies(shader);
986
nir_lower_var_copies(shader);
987
988
nir_fixup_deref_modes(shader);
989
990
if (shader_debug_enabled(shader->info.stage)) {
991
mesa_logi("NIR (after gs lowering):");
992
nir_log_shaderi(shader);
993
}
994
}
995
996