Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/compiler/v3d_nir_lower_io.c
4564 views
1
/*
2
* Copyright © 2015 Broadcom
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "compiler/v3d_compiler.h"
25
#include "compiler/nir/nir_builder.h"
26
27
/**
28
* Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
29
* intrinsics into something amenable to the V3D architecture.
30
*
31
* Most of the work is turning the VS's store_output intrinsics from working
32
* on a base representing the gallium-level vec4 driver_location to an offset
33
* within the VPM, and emitting the header that's read by the fixed function
34
* hardware between the VS and FS.
35
*
36
* We also adjust the offsets on uniform loads to be in bytes, since that's
37
* what we need for indirect addressing with general TMU access.
38
*/
39
40
struct v3d_nir_lower_io_state {
41
int pos_vpm_offset;
42
int vp_vpm_offset;
43
int zs_vpm_offset;
44
int rcp_wc_vpm_offset;
45
int psiz_vpm_offset;
46
int varyings_vpm_offset;
47
48
/* Geometry shader state */
49
struct {
50
/* VPM offset for the current vertex data output */
51
nir_variable *output_offset_var;
52
/* VPM offset for the current vertex header */
53
nir_variable *header_offset_var;
54
/* VPM header for the current vertex */
55
nir_variable *header_var;
56
57
/* Size of the complete VPM output header */
58
uint32_t output_header_size;
59
/* Size of the output data for a single vertex */
60
uint32_t output_vertex_data_size;
61
} gs;
62
63
BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
64
65
nir_ssa_def *pos[4];
66
};
67
68
static void
69
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
70
struct v3d_nir_lower_io_state *state);
71
72
static void
73
v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset,
74
nir_ssa_def *chan)
75
{
76
if (offset) {
77
/* When generating the VIR instruction, the base and the offset
78
* are just going to get added together with an ADD instruction
79
* so we might as well do the add here at the NIR level instead
80
* and let the constant folding do its magic.
81
*/
82
offset = nir_iadd_imm(b, offset, base);
83
base = 0;
84
} else {
85
offset = nir_imm_int(b, 0);
86
}
87
88
nir_store_output(b, chan, offset, .base = base, .write_mask = 0x1, .component = 0);
89
}
90
91
/* Convert the uniform offset to bytes. If it happens to be a constant,
92
* constant-folding will clean up the shift for us.
93
*/
94
static void
95
v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b,
96
nir_intrinsic_instr *intr)
97
{
98
/* On SPIR-V/Vulkan we are already getting our offsets in
99
* bytes.
100
*/
101
if (c->key->environment == V3D_ENVIRONMENT_VULKAN)
102
return;
103
104
b->cursor = nir_before_instr(&intr->instr);
105
106
nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 16);
107
108
nir_instr_rewrite_src(&intr->instr,
109
&intr->src[0],
110
nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
111
nir_imm_int(b, 4))));
112
}
113
114
static int
115
v3d_varying_slot_vpm_offset(struct v3d_compile *c, unsigned location, unsigned component)
116
{
117
uint32_t num_used_outputs = 0;
118
struct v3d_varying_slot *used_outputs = NULL;
119
switch (c->s->info.stage) {
120
case MESA_SHADER_VERTEX:
121
num_used_outputs = c->vs_key->num_used_outputs;
122
used_outputs = c->vs_key->used_outputs;
123
break;
124
case MESA_SHADER_GEOMETRY:
125
num_used_outputs = c->gs_key->num_used_outputs;
126
used_outputs = c->gs_key->used_outputs;
127
break;
128
default:
129
unreachable("Unsupported shader stage");
130
}
131
132
for (int i = 0; i < num_used_outputs; i++) {
133
struct v3d_varying_slot slot = used_outputs[i];
134
135
if (v3d_slot_get_slot(slot) == location &&
136
v3d_slot_get_component(slot) == component) {
137
return i;
138
}
139
}
140
141
return -1;
142
}
143
144
/* Lowers a store_output(gallium driver location) to a series of store_outputs
145
* with a driver_location equal to the offset in the VPM.
146
*
147
* For geometry shaders we need to emit multiple vertices so the VPM offsets
148
* need to be computed in the shader code based on the current vertex index.
149
*/
150
static void
151
v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
152
nir_intrinsic_instr *intr,
153
struct v3d_nir_lower_io_state *state)
154
{
155
b->cursor = nir_before_instr(&intr->instr);
156
157
/* If this is a geometry shader we need to emit our outputs
158
* to the current vertex offset in the VPM.
159
*/
160
nir_ssa_def *offset_reg =
161
c->s->info.stage == MESA_SHADER_GEOMETRY ?
162
nir_load_var(b, state->gs.output_offset_var) : NULL;
163
164
int start_comp = nir_intrinsic_component(intr);
165
unsigned location = nir_intrinsic_io_semantics(intr).location;
166
nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0],
167
intr->num_components);
168
/* Save off the components of the position for the setup of VPM inputs
169
* read by fixed function HW.
170
*/
171
if (location == VARYING_SLOT_POS) {
172
for (int i = 0; i < intr->num_components; i++) {
173
state->pos[start_comp + i] = nir_channel(b, src, i);
174
}
175
}
176
177
/* Just psiz to the position in the FF header right now. */
178
if (location == VARYING_SLOT_PSIZ &&
179
state->psiz_vpm_offset != -1) {
180
v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
181
}
182
183
if (location == VARYING_SLOT_LAYER) {
184
assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
185
nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
186
header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff));
187
188
/* From the GLES 3.2 spec:
189
*
190
* "When fragments are written to a layered framebuffer, the
191
* fragment’s layer number selects an image from the array
192
* of images at each attachment (...). If the fragment’s
193
* layer number is negative, or greater than or equal to
194
* the minimum number of layers of any attachment, the
195
* effects of the fragment on the framebuffer contents are
196
* undefined."
197
*
198
* This suggests we can just ignore that situation, however,
199
* for V3D an out-of-bounds layer index means that the binner
200
* might do out-of-bounds writes access to the tile state. The
201
* simulator has an assert to catch this, so we play safe here
202
* and we make sure that doesn't happen by setting gl_Layer
203
* to 0 in that case (we always allocate tile state for at
204
* least one layer).
205
*/
206
nir_ssa_def *fb_layers = nir_load_fb_layers_v3d(b, 32);
207
nir_ssa_def *cond = nir_ige(b, src, fb_layers);
208
nir_ssa_def *layer_id =
209
nir_bcsel(b, cond,
210
nir_imm_int(b, 0),
211
nir_ishl(b, src, nir_imm_int(b, 16)));
212
header = nir_ior(b, header, layer_id);
213
nir_store_var(b, state->gs.header_var, header, 0x1);
214
}
215
216
/* Scalarize outputs if it hasn't happened already, since we want to
217
* schedule each VPM write individually. We can skip any outut
218
* components not read by the FS.
219
*/
220
for (int i = 0; i < intr->num_components; i++) {
221
int vpm_offset =
222
v3d_varying_slot_vpm_offset(c, location, start_comp + i);
223
224
225
if (vpm_offset == -1)
226
continue;
227
228
if (nir_src_is_const(intr->src[1]))
229
vpm_offset += nir_src_as_uint(intr->src[1]) * 4;
230
231
BITSET_SET(state->varyings_stored, vpm_offset);
232
233
v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,
234
offset_reg, nir_channel(b, src, i));
235
}
236
237
nir_instr_remove(&intr->instr);
238
}
239
240
static inline void
241
reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state)
242
{
243
const uint8_t NEW_PRIMITIVE_OFFSET = 0;
244
const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8;
245
246
uint32_t vertex_data_size = state->gs.output_vertex_data_size;
247
assert((vertex_data_size & 0xffffff00) == 0);
248
249
uint32_t header;
250
header = 1 << NEW_PRIMITIVE_OFFSET;
251
header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET;
252
nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1);
253
}
254
255
static void
256
v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
257
nir_intrinsic_instr *instr,
258
struct v3d_nir_lower_io_state *state)
259
{
260
b->cursor = nir_before_instr(&instr->instr);
261
262
nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
263
nir_ssa_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
264
nir_ssa_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
265
266
/* Emit fixed function outputs */
267
v3d_nir_emit_ff_vpm_outputs(c, b, state);
268
269
/* Emit vertex header */
270
v3d_nir_store_output(b, 0, header_offset, header);
271
272
/* Update VPM offset for next vertex output data and header */
273
output_offset =
274
nir_iadd(b, output_offset,
275
nir_imm_int(b, state->gs.output_vertex_data_size));
276
277
header_offset = nir_iadd(b, header_offset, nir_imm_int(b, 1));
278
279
/* Reset the New Primitive bit */
280
header = nir_iand(b, header, nir_imm_int(b, 0xfffffffe));
281
282
nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1);
283
nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1);
284
nir_store_var(b, state->gs.header_var, header, 0x1);
285
286
nir_instr_remove(&instr->instr);
287
}
288
289
static void
290
v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b,
291
nir_intrinsic_instr *instr,
292
struct v3d_nir_lower_io_state *state)
293
{
294
assert(state->gs.header_var);
295
b->cursor = nir_before_instr(&instr->instr);
296
reset_gs_header(b, state);
297
298
nir_instr_remove(&instr->instr);
299
}
300
301
/* Some vertex attribute formats may require to apply a swizzle but the hardware
302
* doesn't provide means to do that, so we need to apply the swizzle in the
303
* vertex shader.
304
*
305
* This is required at least in Vulkan to support madatory vertex attribute
306
* format VK_FORMAT_B8G8R8A8_UNORM.
307
*/
308
static void
309
v3d_nir_lower_vertex_input(struct v3d_compile *c, nir_builder *b,
310
nir_intrinsic_instr *instr)
311
{
312
assert(c->s->info.stage == MESA_SHADER_VERTEX);
313
314
if (!c->vs_key->va_swap_rb_mask)
315
return;
316
317
const uint32_t location = nir_intrinsic_io_semantics(instr).location;
318
319
if (!(c->vs_key->va_swap_rb_mask & (1 << location)))
320
return;
321
322
assert(instr->num_components == 1);
323
const uint32_t comp = nir_intrinsic_component(instr);
324
if (comp == 0 || comp == 2)
325
nir_intrinsic_set_component(instr, (comp + 2) % 4);
326
}
327
328
static void
329
v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
330
struct nir_instr *instr,
331
struct v3d_nir_lower_io_state *state)
332
{
333
if (instr->type != nir_instr_type_intrinsic)
334
return;
335
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
336
337
switch (intr->intrinsic) {
338
case nir_intrinsic_load_input:
339
if (c->s->info.stage == MESA_SHADER_VERTEX)
340
v3d_nir_lower_vertex_input(c, b, intr);
341
break;
342
343
case nir_intrinsic_load_uniform:
344
v3d_nir_lower_uniform(c, b, intr);
345
break;
346
347
case nir_intrinsic_store_output:
348
if (c->s->info.stage == MESA_SHADER_VERTEX ||
349
c->s->info.stage == MESA_SHADER_GEOMETRY) {
350
v3d_nir_lower_vpm_output(c, b, intr, state);
351
}
352
break;
353
354
case nir_intrinsic_emit_vertex:
355
v3d_nir_lower_emit_vertex(c, b, intr, state);
356
break;
357
358
case nir_intrinsic_end_primitive:
359
v3d_nir_lower_end_primitive(c, b, intr, state);
360
break;
361
362
default:
363
break;
364
}
365
}
366
367
/* Remap the output var's .driver_location. This is purely for
368
* nir_print_shader() so that store_output can map back to a variable name.
369
*/
370
static void
371
v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c,
372
struct v3d_nir_lower_io_state *state)
373
{
374
nir_foreach_shader_out_variable_safe(var, c->s) {
375
if (var->data.location == VARYING_SLOT_POS &&
376
state->pos_vpm_offset != -1) {
377
var->data.driver_location = state->pos_vpm_offset;
378
continue;
379
}
380
381
if (var->data.location == VARYING_SLOT_PSIZ &&
382
state->psiz_vpm_offset != -1) {
383
var->data.driver_location = state->psiz_vpm_offset;
384
continue;
385
}
386
387
int vpm_offset =
388
v3d_varying_slot_vpm_offset(c,
389
var->data.location,
390
var->data.location_frac);
391
if (vpm_offset != -1) {
392
var->data.driver_location =
393
state->varyings_vpm_offset + vpm_offset;
394
} else {
395
/* If we couldn't find a mapping for the var, delete
396
* it so that its old .driver_location doesn't confuse
397
* nir_print_shader().
398
*/
399
exec_node_remove(&var->node);
400
}
401
}
402
}
403
404
static void
405
v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c,
406
struct v3d_nir_lower_io_state *state)
407
{
408
uint32_t vpm_offset = 0;
409
410
state->pos_vpm_offset = -1;
411
state->vp_vpm_offset = -1;
412
state->zs_vpm_offset = -1;
413
state->rcp_wc_vpm_offset = -1;
414
state->psiz_vpm_offset = -1;
415
416
bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage;
417
if (needs_ff_outputs) {
418
if (c->vs_key->is_coord) {
419
state->pos_vpm_offset = vpm_offset;
420
vpm_offset += 4;
421
}
422
423
state->vp_vpm_offset = vpm_offset;
424
vpm_offset += 2;
425
426
if (!c->vs_key->is_coord) {
427
state->zs_vpm_offset = vpm_offset++;
428
state->rcp_wc_vpm_offset = vpm_offset++;
429
}
430
431
if (c->vs_key->per_vertex_point_size)
432
state->psiz_vpm_offset = vpm_offset++;
433
}
434
435
state->varyings_vpm_offset = vpm_offset;
436
437
c->vpm_output_size = MAX2(1, vpm_offset + c->vs_key->num_used_outputs);
438
}
439
440
static void
441
v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c,
442
struct v3d_nir_lower_io_state *state)
443
{
444
/* 1 header slot for number of output vertices */
445
uint32_t vpm_offset = 1;
446
447
/* 1 header slot per output vertex */
448
const uint32_t num_vertices = c->s->info.gs.vertices_out;
449
vpm_offset += num_vertices;
450
451
state->gs.output_header_size = vpm_offset;
452
453
/* Vertex data: here we only compute offsets into a generic vertex data
454
* elements. When it is time to actually write a particular vertex to
455
* the VPM, we will add the offset for that vertex into the VPM output
456
* to these offsets.
457
*
458
* If geometry shaders are present, they are always the last shader
459
* stage before rasterization, so we always emit fixed function outputs.
460
*/
461
vpm_offset = 0;
462
if (c->gs_key->is_coord) {
463
state->pos_vpm_offset = vpm_offset;
464
vpm_offset += 4;
465
} else {
466
state->pos_vpm_offset = -1;
467
}
468
469
state->vp_vpm_offset = vpm_offset;
470
vpm_offset += 2;
471
472
if (!c->gs_key->is_coord) {
473
state->zs_vpm_offset = vpm_offset++;
474
state->rcp_wc_vpm_offset = vpm_offset++;
475
} else {
476
state->zs_vpm_offset = -1;
477
state->rcp_wc_vpm_offset = -1;
478
}
479
480
/* Mesa enables OES_geometry_shader_point_size automatically with
481
* OES_geometry_shader so we always need to handle point size
482
* writes if present.
483
*/
484
if (c->gs_key->per_vertex_point_size)
485
state->psiz_vpm_offset = vpm_offset++;
486
487
state->varyings_vpm_offset = vpm_offset;
488
489
state->gs.output_vertex_data_size =
490
state->varyings_vpm_offset + c->gs_key->num_used_outputs;
491
492
c->vpm_output_size =
493
state->gs.output_header_size +
494
state->gs.output_vertex_data_size * num_vertices;
495
}
496
497
static void
498
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
499
struct v3d_nir_lower_io_state *state)
500
{
501
/* If this is a geometry shader we need to emit our fixed function
502
* outputs to the current vertex offset in the VPM.
503
*/
504
nir_ssa_def *offset_reg =
505
c->s->info.stage == MESA_SHADER_GEOMETRY ?
506
nir_load_var(b, state->gs.output_offset_var) : NULL;
507
508
for (int i = 0; i < 4; i++) {
509
if (!state->pos[i])
510
state->pos[i] = nir_ssa_undef(b, 1, 32);
511
}
512
513
nir_ssa_def *rcp_wc = nir_frcp(b, state->pos[3]);
514
515
if (state->pos_vpm_offset != -1) {
516
for (int i = 0; i < 4; i++) {
517
v3d_nir_store_output(b, state->pos_vpm_offset + i,
518
offset_reg, state->pos[i]);
519
}
520
}
521
522
if (state->vp_vpm_offset != -1) {
523
for (int i = 0; i < 2; i++) {
524
nir_ssa_def *pos;
525
nir_ssa_def *scale;
526
pos = state->pos[i];
527
if (i == 0)
528
scale = nir_load_viewport_x_scale(b);
529
else
530
scale = nir_load_viewport_y_scale(b);
531
pos = nir_fmul(b, pos, scale);
532
pos = nir_fmul(b, pos, rcp_wc);
533
/* Pre-V3D 4.3 hardware has a quirk where it expects XY
534
* coordinates in .8 fixed-point format, but then it
535
* will internally round it to .6 fixed-point,
536
* introducing a double rounding. The double rounding
537
* can cause very slight differences in triangle
538
* raterization coverage that can actually be noticed by
539
* some CTS tests.
540
*
541
* The correct fix for this as recommended by Broadcom
542
* is to convert to .8 fixed-point with ffloor().
543
*/
544
pos = nir_f2i32(b, nir_ffloor(b, pos));
545
v3d_nir_store_output(b, state->vp_vpm_offset + i,
546
offset_reg, pos);
547
}
548
}
549
550
if (state->zs_vpm_offset != -1) {
551
nir_ssa_def *z = state->pos[2];
552
z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
553
z = nir_fmul(b, z, rcp_wc);
554
z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
555
v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z);
556
}
557
558
if (state->rcp_wc_vpm_offset != -1) {
559
v3d_nir_store_output(b, state->rcp_wc_vpm_offset,
560
offset_reg, rcp_wc);
561
}
562
563
/* Store 0 to varyings requested by the FS but not stored by the
564
* previous stage. This should be undefined behavior, but
565
* glsl-routing seems to rely on it.
566
*/
567
uint32_t num_used_outputs;
568
switch (c->s->info.stage) {
569
case MESA_SHADER_VERTEX:
570
num_used_outputs = c->vs_key->num_used_outputs;
571
break;
572
case MESA_SHADER_GEOMETRY:
573
num_used_outputs = c->gs_key->num_used_outputs;
574
break;
575
default:
576
unreachable("Unsupported shader stage");
577
}
578
579
for (int i = 0; i < num_used_outputs; i++) {
580
if (!BITSET_TEST(state->varyings_stored, i)) {
581
v3d_nir_store_output(b, state->varyings_vpm_offset + i,
582
offset_reg, nir_imm_int(b, 0));
583
}
584
}
585
}
586
587
static void
588
emit_gs_prolog(struct v3d_compile *c, nir_builder *b,
589
nir_function_impl *impl,
590
struct v3d_nir_lower_io_state *state)
591
{
592
nir_block *first = nir_start_block(impl);
593
b->cursor = nir_before_block(first);
594
595
const struct glsl_type *uint_type = glsl_uint_type();
596
597
assert(!state->gs.output_offset_var);
598
state->gs.output_offset_var =
599
nir_local_variable_create(impl, uint_type, "output_offset");
600
nir_store_var(b, state->gs.output_offset_var,
601
nir_imm_int(b, state->gs.output_header_size), 0x1);
602
603
assert(!state->gs.header_offset_var);
604
state->gs.header_offset_var =
605
nir_local_variable_create(impl, uint_type, "header_offset");
606
nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1);
607
608
assert(!state->gs.header_var);
609
state->gs.header_var =
610
nir_local_variable_create(impl, uint_type, "header");
611
reset_gs_header(b, state);
612
}
613
614
static void
615
emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
616
struct v3d_nir_lower_io_state *state)
617
{
618
const uint8_t VERTEX_COUNT_OFFSET = 16;
619
620
/* Our GS header has 1 generic header slot (at VPM offset 0) and then
621
* one slot per output vertex after it. This means we don't need to
622
* have a variable just to keep track of the number of vertices we
623
* emitted and instead we can just compute it here from the header
624
* offset variable by removing the one generic header slot that always
625
* goes at the begining of out header.
626
*/
627
nir_ssa_def *header_offset =
628
nir_load_var(b, state->gs.header_offset_var);
629
nir_ssa_def *vertex_count =
630
nir_isub(b, header_offset, nir_imm_int(b, 1));
631
nir_ssa_def *header =
632
nir_ior(b, nir_imm_int(b, state->gs.output_header_size),
633
nir_ishl(b, vertex_count,
634
nir_imm_int(b, VERTEX_COUNT_OFFSET)));
635
636
v3d_nir_store_output(b, 0, NULL, header);
637
}
638
639
void
640
v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
641
{
642
struct v3d_nir_lower_io_state state = { 0 };
643
644
/* Set up the layout of the VPM outputs. */
645
switch (s->info.stage) {
646
case MESA_SHADER_VERTEX:
647
v3d_nir_setup_vpm_layout_vs(c, &state);
648
break;
649
case MESA_SHADER_GEOMETRY:
650
v3d_nir_setup_vpm_layout_gs(c, &state);
651
break;
652
case MESA_SHADER_FRAGMENT:
653
case MESA_SHADER_COMPUTE:
654
break;
655
default:
656
unreachable("Unsupported shader stage");
657
}
658
659
nir_foreach_function(function, s) {
660
if (function->impl) {
661
nir_builder b;
662
nir_builder_init(&b, function->impl);
663
664
if (c->s->info.stage == MESA_SHADER_GEOMETRY)
665
emit_gs_prolog(c, &b, function->impl, &state);
666
667
nir_foreach_block(block, function->impl) {
668
nir_foreach_instr_safe(instr, block)
669
v3d_nir_lower_io_instr(c, &b, instr,
670
&state);
671
}
672
673
nir_block *last = nir_impl_last_block(function->impl);
674
b.cursor = nir_after_block(last);
675
if (s->info.stage == MESA_SHADER_VERTEX) {
676
v3d_nir_emit_ff_vpm_outputs(c, &b, &state);
677
} else if (s->info.stage == MESA_SHADER_GEOMETRY) {
678
emit_gs_vpm_output_header_prolog(c, &b, &state);
679
}
680
681
nir_metadata_preserve(function->impl,
682
nir_metadata_block_index |
683
nir_metadata_dominance);
684
}
685
}
686
687
if (s->info.stage == MESA_SHADER_VERTEX ||
688
s->info.stage == MESA_SHADER_GEOMETRY) {
689
v3d_nir_lower_io_update_output_var_base(c, &state);
690
}
691
}
692
693