CoCalc -- v3d_nir_lower

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/compiler/v3d_nir_lower_io.c
⁴⁵⁶⁴ views
1
/*
2
 * Copyright © 2015 Broadcom
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
 * IN THE SOFTWARE.
22
 */
23

24
#include "compiler/v3d_compiler.h"
25
#include "compiler/nir/nir_builder.h"
26

27
/**
28
 * Walks the NIR generated by TGSI-to-NIR or GLSL-to-NIR to lower its io
29
 * intrinsics into something amenable to the V3D architecture.
30
 *
31
 * Most of the work is turning the VS's store_output intrinsics from working
32
 * on a base representing the gallium-level vec4 driver_location to an offset
33
 * within the VPM, and emitting the header that's read by the fixed function
34
 * hardware between the VS and FS.
35
 *
36
 * We also adjust the offsets on uniform loads to be in bytes, since that's
37
 * what we need for indirect addressing with general TMU access.
38
 */
39

40
struct v3d_nir_lower_io_state {
41
        int pos_vpm_offset;
42
        int vp_vpm_offset;
43
        int zs_vpm_offset;
44
        int rcp_wc_vpm_offset;
45
        int psiz_vpm_offset;
46
        int varyings_vpm_offset;
47

48
        /* Geometry shader state */
49
        struct {
50
                /* VPM offset for the current vertex data output */
51
                nir_variable *output_offset_var;
52
                /* VPM offset for the current vertex header */
53
                nir_variable *header_offset_var;
54
                /* VPM header for the current vertex */
55
                nir_variable *header_var;
56

57
                /* Size of the complete VPM output header */
58
                uint32_t output_header_size;
59
                /* Size of the output data for a single vertex */
60
                uint32_t output_vertex_data_size;
61
        } gs;
62

63
        BITSET_WORD varyings_stored[BITSET_WORDS(V3D_MAX_ANY_STAGE_INPUTS)];
64

65
        nir_ssa_def *pos[4];
66
};
67

68
static void
69
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
70
                            struct v3d_nir_lower_io_state *state);
71

72
static void
73
v3d_nir_store_output(nir_builder *b, int base, nir_ssa_def *offset,
74
                     nir_ssa_def *chan)
75
{
76
        if (offset) {
77
                /* When generating the VIR instruction, the base and the offset
78
                 * are just going to get added together with an ADD instruction
79
                 * so we might as well do the add here at the NIR level instead
80
                 * and let the constant folding do its magic.
81
                 */
82
                offset = nir_iadd_imm(b, offset, base);
83
                base = 0;
84
        } else {
85
                offset = nir_imm_int(b, 0);
86
        }
87

88
        nir_store_output(b, chan, offset, .base = base, .write_mask = 0x1, .component = 0);
89
}
90

91
/* Convert the uniform offset to bytes.  If it happens to be a constant,
92
 * constant-folding will clean up the shift for us.
93
 */
94
static void
95
v3d_nir_lower_uniform(struct v3d_compile *c, nir_builder *b,
96
                      nir_intrinsic_instr *intr)
97
{
98
        /* On SPIR-V/Vulkan we are already getting our offsets in
99
         * bytes.
100
         */
101
        if (c->key->environment == V3D_ENVIRONMENT_VULKAN)
102
                return;
103

104
        b->cursor = nir_before_instr(&intr->instr);
105

106
        nir_intrinsic_set_base(intr, nir_intrinsic_base(intr) * 16);
107

108
        nir_instr_rewrite_src(&intr->instr,
109
                              &intr->src[0],
110
                              nir_src_for_ssa(nir_ishl(b, intr->src[0].ssa,
111
                                                       nir_imm_int(b, 4))));
112
}
113

114
static int
115
v3d_varying_slot_vpm_offset(struct v3d_compile *c, unsigned location, unsigned component)
116
{
117
        uint32_t num_used_outputs = 0;
118
        struct v3d_varying_slot *used_outputs = NULL;
119
        switch (c->s->info.stage) {
120
        case MESA_SHADER_VERTEX:
121
                num_used_outputs = c->vs_key->num_used_outputs;
122
                used_outputs = c->vs_key->used_outputs;
123
                break;
124
        case MESA_SHADER_GEOMETRY:
125
                num_used_outputs = c->gs_key->num_used_outputs;
126
                used_outputs = c->gs_key->used_outputs;
127
                break;
128
        default:
129
                unreachable("Unsupported shader stage");
130
        }
131

132
        for (int i = 0; i < num_used_outputs; i++) {
133
                struct v3d_varying_slot slot = used_outputs[i];
134

135
                if (v3d_slot_get_slot(slot) == location &&
136
                    v3d_slot_get_component(slot) == component) {
137
                        return i;
138
                }
139
        }
140

141
        return -1;
142
}
143

144
/* Lowers a store_output(gallium driver location) to a series of store_outputs
145
 * with a driver_location equal to the offset in the VPM.
146
 *
147
 * For geometry shaders we need to emit multiple vertices so the VPM offsets
148
 * need to be computed in the shader code based on the current vertex index.
149
 */
150
static void
151
v3d_nir_lower_vpm_output(struct v3d_compile *c, nir_builder *b,
152
                         nir_intrinsic_instr *intr,
153
                         struct v3d_nir_lower_io_state *state)
154
{
155
        b->cursor = nir_before_instr(&intr->instr);
156

157
        /* If this is a geometry shader we need to emit our outputs
158
         * to the current vertex offset in the VPM.
159
         */
160
        nir_ssa_def *offset_reg =
161
                c->s->info.stage == MESA_SHADER_GEOMETRY ?
162
                        nir_load_var(b, state->gs.output_offset_var) : NULL;
163

164
        int start_comp = nir_intrinsic_component(intr);
165
        unsigned location = nir_intrinsic_io_semantics(intr).location;
166
        nir_ssa_def *src = nir_ssa_for_src(b, intr->src[0],
167
                                           intr->num_components);
168
        /* Save off the components of the position for the setup of VPM inputs
169
         * read by fixed function HW.
170
         */
171
        if (location == VARYING_SLOT_POS) {
172
                for (int i = 0; i < intr->num_components; i++) {
173
                        state->pos[start_comp + i] = nir_channel(b, src, i);
174
                }
175
        }
176

177
        /* Just psiz to the position in the FF header right now. */
178
        if (location == VARYING_SLOT_PSIZ &&
179
            state->psiz_vpm_offset != -1) {
180
                v3d_nir_store_output(b, state->psiz_vpm_offset, offset_reg, src);
181
        }
182

183
        if (location == VARYING_SLOT_LAYER) {
184
                assert(c->s->info.stage == MESA_SHADER_GEOMETRY);
185
                nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
186
                header = nir_iand(b, header, nir_imm_int(b, 0xff00ffff));
187

188
                /* From the GLES 3.2 spec:
189
                 *
190
                 *    "When fragments are written to a layered framebuffer, the
191
                 *     fragment’s layer number selects an image from the array
192
                 *     of images at each attachment (...). If the fragment’s
193
                 *     layer number is negative, or greater than or equal to
194
                 *     the minimum number of layers of any attachment, the
195
                 *     effects of the fragment on the framebuffer contents are
196
                 *     undefined."
197
                 *
198
                 * This suggests we can just ignore that situation, however,
199
                 * for V3D an out-of-bounds layer index means that the binner
200
                 * might do out-of-bounds writes access to the tile state. The
201
                 * simulator has an assert to catch this, so we play safe here
202
                 * and we make sure that doesn't happen by setting gl_Layer
203
                 * to 0 in that case (we always allocate tile state for at
204
                 * least one layer).
205
                 */
206
                nir_ssa_def *fb_layers = nir_load_fb_layers_v3d(b, 32);
207
                nir_ssa_def *cond = nir_ige(b, src, fb_layers);
208
                nir_ssa_def *layer_id =
209
                        nir_bcsel(b, cond,
210
                                  nir_imm_int(b, 0),
211
                                  nir_ishl(b, src, nir_imm_int(b, 16)));
212
                header = nir_ior(b, header, layer_id);
213
                nir_store_var(b, state->gs.header_var, header, 0x1);
214
        }
215

216
        /* Scalarize outputs if it hasn't happened already, since we want to
217
         * schedule each VPM write individually.  We can skip any outut
218
         * components not read by the FS.
219
         */
220
        for (int i = 0; i < intr->num_components; i++) {
221
                int vpm_offset =
222
                        v3d_varying_slot_vpm_offset(c, location, start_comp + i);
223

224

225
                if (vpm_offset == -1)
226
                        continue;
227

228
                if (nir_src_is_const(intr->src[1]))
229
                    vpm_offset += nir_src_as_uint(intr->src[1]) * 4;
230

231
                BITSET_SET(state->varyings_stored, vpm_offset);
232

233
                v3d_nir_store_output(b, state->varyings_vpm_offset + vpm_offset,
234
                                     offset_reg, nir_channel(b, src, i));
235
        }
236

237
        nir_instr_remove(&intr->instr);
238
}
239

240
static inline void
241
reset_gs_header(nir_builder *b, struct v3d_nir_lower_io_state *state)
242
{
243
        const uint8_t NEW_PRIMITIVE_OFFSET = 0;
244
        const uint8_t VERTEX_DATA_LENGTH_OFFSET = 8;
245

246
        uint32_t vertex_data_size = state->gs.output_vertex_data_size;
247
        assert((vertex_data_size & 0xffffff00) == 0);
248

249
        uint32_t header;
250
        header  = 1 << NEW_PRIMITIVE_OFFSET;
251
        header |= vertex_data_size << VERTEX_DATA_LENGTH_OFFSET;
252
        nir_store_var(b, state->gs.header_var, nir_imm_int(b, header), 0x1);
253
}
254

255
static void
256
v3d_nir_lower_emit_vertex(struct v3d_compile *c, nir_builder *b,
257
                          nir_intrinsic_instr *instr,
258
                          struct v3d_nir_lower_io_state *state)
259
{
260
        b->cursor = nir_before_instr(&instr->instr);
261

262
        nir_ssa_def *header = nir_load_var(b, state->gs.header_var);
263
        nir_ssa_def *header_offset = nir_load_var(b, state->gs.header_offset_var);
264
        nir_ssa_def *output_offset = nir_load_var(b, state->gs.output_offset_var);
265

266
        /* Emit fixed function outputs */
267
        v3d_nir_emit_ff_vpm_outputs(c, b, state);
268

269
        /* Emit vertex header */
270
        v3d_nir_store_output(b, 0, header_offset, header);
271

272
        /* Update VPM offset for next vertex output data and header */
273
        output_offset =
274
                nir_iadd(b, output_offset,
275
                            nir_imm_int(b, state->gs.output_vertex_data_size));
276

277
        header_offset = nir_iadd(b, header_offset, nir_imm_int(b, 1));
278

279
        /* Reset the New Primitive bit */
280
        header = nir_iand(b, header, nir_imm_int(b, 0xfffffffe));
281

282
        nir_store_var(b, state->gs.output_offset_var, output_offset, 0x1);
283
        nir_store_var(b, state->gs.header_offset_var, header_offset, 0x1);
284
        nir_store_var(b, state->gs.header_var, header, 0x1);
285

286
        nir_instr_remove(&instr->instr);
287
}
288

289
static void
290
v3d_nir_lower_end_primitive(struct v3d_compile *c, nir_builder *b,
291
                            nir_intrinsic_instr *instr,
292
                            struct v3d_nir_lower_io_state *state)
293
{
294
        assert(state->gs.header_var);
295
        b->cursor = nir_before_instr(&instr->instr);
296
        reset_gs_header(b, state);
297

298
        nir_instr_remove(&instr->instr);
299
}
300

301
/* Some vertex attribute formats may require to apply a swizzle but the hardware
302
 * doesn't provide means to do that, so we need to apply the swizzle in the
303
 * vertex shader.
304
 *
305
 * This is required at least in Vulkan to support madatory vertex attribute
306
 * format VK_FORMAT_B8G8R8A8_UNORM.
307
 */
308
static void
309
v3d_nir_lower_vertex_input(struct v3d_compile *c, nir_builder *b,
310
                           nir_intrinsic_instr *instr)
311
{
312
        assert(c->s->info.stage == MESA_SHADER_VERTEX);
313

314
        if (!c->vs_key->va_swap_rb_mask)
315
                return;
316

317
        const uint32_t location = nir_intrinsic_io_semantics(instr).location;
318

319
        if (!(c->vs_key->va_swap_rb_mask & (1 << location)))
320
                return;
321

322
        assert(instr->num_components == 1);
323
        const uint32_t comp = nir_intrinsic_component(instr);
324
        if (comp == 0 || comp == 2)
325
                nir_intrinsic_set_component(instr, (comp + 2) % 4);
326
}
327

328
static void
329
v3d_nir_lower_io_instr(struct v3d_compile *c, nir_builder *b,
330
                       struct nir_instr *instr,
331
                       struct v3d_nir_lower_io_state *state)
332
{
333
        if (instr->type != nir_instr_type_intrinsic)
334
                return;
335
        nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
336

337
        switch (intr->intrinsic) {
338
        case nir_intrinsic_load_input:
339
                if (c->s->info.stage == MESA_SHADER_VERTEX)
340
                        v3d_nir_lower_vertex_input(c, b, intr);
341
                break;
342

343
        case nir_intrinsic_load_uniform:
344
                v3d_nir_lower_uniform(c, b, intr);
345
                break;
346

347
        case nir_intrinsic_store_output:
348
                if (c->s->info.stage == MESA_SHADER_VERTEX ||
349
                    c->s->info.stage == MESA_SHADER_GEOMETRY) {
350
                        v3d_nir_lower_vpm_output(c, b, intr, state);
351
                }
352
                break;
353

354
        case nir_intrinsic_emit_vertex:
355
                v3d_nir_lower_emit_vertex(c, b, intr, state);
356
                break;
357

358
        case nir_intrinsic_end_primitive:
359
                v3d_nir_lower_end_primitive(c, b, intr, state);
360
                break;
361

362
        default:
363
                break;
364
        }
365
}
366

367
/* Remap the output var's .driver_location.  This is purely for
368
 * nir_print_shader() so that store_output can map back to a variable name.
369
 */
370
static void
371
v3d_nir_lower_io_update_output_var_base(struct v3d_compile *c,
372
                                        struct v3d_nir_lower_io_state *state)
373
{
374
        nir_foreach_shader_out_variable_safe(var, c->s) {
375
                if (var->data.location == VARYING_SLOT_POS &&
376
                    state->pos_vpm_offset != -1) {
377
                        var->data.driver_location = state->pos_vpm_offset;
378
                        continue;
379
                }
380

381
                if (var->data.location == VARYING_SLOT_PSIZ &&
382
                    state->psiz_vpm_offset != -1) {
383
                        var->data.driver_location = state->psiz_vpm_offset;
384
                        continue;
385
                }
386

387
                int vpm_offset =
388
                        v3d_varying_slot_vpm_offset(c,
389
                                                    var->data.location,
390
                                                    var->data.location_frac);
391
                if (vpm_offset != -1) {
392
                        var->data.driver_location =
393
                                state->varyings_vpm_offset + vpm_offset;
394
                } else {
395
                        /* If we couldn't find a mapping for the var, delete
396
                         * it so that its old .driver_location doesn't confuse
397
                         * nir_print_shader().
398
                         */
399
                        exec_node_remove(&var->node);
400
                }
401
        }
402
}
403

404
static void
405
v3d_nir_setup_vpm_layout_vs(struct v3d_compile *c,
406
                            struct v3d_nir_lower_io_state *state)
407
{
408
        uint32_t vpm_offset = 0;
409

410
        state->pos_vpm_offset = -1;
411
        state->vp_vpm_offset = -1;
412
        state->zs_vpm_offset = -1;
413
        state->rcp_wc_vpm_offset = -1;
414
        state->psiz_vpm_offset = -1;
415

416
        bool needs_ff_outputs = c->vs_key->base.is_last_geometry_stage;
417
        if (needs_ff_outputs) {
418
                if (c->vs_key->is_coord) {
419
                        state->pos_vpm_offset = vpm_offset;
420
                        vpm_offset += 4;
421
                }
422

423
                state->vp_vpm_offset = vpm_offset;
424
                vpm_offset += 2;
425

426
                if (!c->vs_key->is_coord) {
427
                        state->zs_vpm_offset = vpm_offset++;
428
                        state->rcp_wc_vpm_offset = vpm_offset++;
429
                }
430

431
                if (c->vs_key->per_vertex_point_size)
432
                        state->psiz_vpm_offset = vpm_offset++;
433
        }
434

435
        state->varyings_vpm_offset = vpm_offset;
436

437
        c->vpm_output_size = MAX2(1, vpm_offset + c->vs_key->num_used_outputs);
438
}
439

440
static void
441
v3d_nir_setup_vpm_layout_gs(struct v3d_compile *c,
442
                            struct v3d_nir_lower_io_state *state)
443
{
444
        /* 1 header slot for number of output vertices */
445
        uint32_t vpm_offset = 1;
446

447
        /* 1 header slot per output vertex */
448
        const uint32_t num_vertices = c->s->info.gs.vertices_out;
449
        vpm_offset += num_vertices;
450

451
        state->gs.output_header_size = vpm_offset;
452

453
        /* Vertex data: here we only compute offsets into a generic vertex data
454
         * elements. When it is time to actually write a particular vertex to
455
         * the VPM, we will add the offset for that vertex into the VPM output
456
         * to these offsets.
457
         *
458
         * If geometry shaders are present, they are always the last shader
459
         * stage before rasterization, so we always emit fixed function outputs.
460
         */
461
        vpm_offset = 0;
462
        if (c->gs_key->is_coord) {
463
                state->pos_vpm_offset = vpm_offset;
464
                vpm_offset += 4;
465
        } else {
466
                state->pos_vpm_offset = -1;
467
        }
468

469
        state->vp_vpm_offset = vpm_offset;
470
        vpm_offset += 2;
471

472
        if (!c->gs_key->is_coord) {
473
                state->zs_vpm_offset = vpm_offset++;
474
                state->rcp_wc_vpm_offset = vpm_offset++;
475
        } else {
476
                state->zs_vpm_offset = -1;
477
                state->rcp_wc_vpm_offset = -1;
478
        }
479

480
        /* Mesa enables OES_geometry_shader_point_size automatically with
481
         * OES_geometry_shader so we always need to handle point size
482
         * writes if present.
483
         */
484
        if (c->gs_key->per_vertex_point_size)
485
                state->psiz_vpm_offset = vpm_offset++;
486

487
        state->varyings_vpm_offset = vpm_offset;
488

489
        state->gs.output_vertex_data_size =
490
                state->varyings_vpm_offset + c->gs_key->num_used_outputs;
491

492
        c->vpm_output_size =
493
                state->gs.output_header_size +
494
                state->gs.output_vertex_data_size * num_vertices;
495
}
496

497
static void
498
v3d_nir_emit_ff_vpm_outputs(struct v3d_compile *c, nir_builder *b,
499
                            struct v3d_nir_lower_io_state *state)
500
{
501
        /* If this is a geometry shader we need to emit our fixed function
502
         * outputs to the current vertex offset in the VPM.
503
         */
504
        nir_ssa_def *offset_reg =
505
                c->s->info.stage == MESA_SHADER_GEOMETRY ?
506
                        nir_load_var(b, state->gs.output_offset_var) : NULL;
507

508
        for (int i = 0; i < 4; i++) {
509
                if (!state->pos[i])
510
                        state->pos[i] = nir_ssa_undef(b, 1, 32);
511
        }
512

513
        nir_ssa_def *rcp_wc = nir_frcp(b, state->pos[3]);
514

515
        if (state->pos_vpm_offset != -1) {
516
                for (int i = 0; i < 4; i++) {
517
                        v3d_nir_store_output(b, state->pos_vpm_offset + i,
518
                                             offset_reg, state->pos[i]);
519
                }
520
        }
521

522
        if (state->vp_vpm_offset != -1) {
523
                for (int i = 0; i < 2; i++) {
524
                        nir_ssa_def *pos;
525
                        nir_ssa_def *scale;
526
                        pos = state->pos[i];
527
                        if (i == 0)
528
                                scale = nir_load_viewport_x_scale(b);
529
                        else
530
                                scale = nir_load_viewport_y_scale(b);
531
                        pos = nir_fmul(b, pos, scale);
532
                        pos = nir_fmul(b, pos, rcp_wc);
533
                        /* Pre-V3D 4.3 hardware has a quirk where it expects XY
534
                         * coordinates in .8 fixed-point format, but then it
535
                         * will internally round it to .6 fixed-point,
536
                         * introducing a double rounding. The double rounding
537
                         * can cause very slight differences in triangle
538
                         * raterization coverage that can actually be noticed by
539
                         * some CTS tests.
540
                         *
541
                         * The correct fix for this as recommended by Broadcom
542
                         * is to convert to .8 fixed-point with ffloor().
543
                         */
544
                        pos = nir_f2i32(b, nir_ffloor(b, pos));
545
                        v3d_nir_store_output(b, state->vp_vpm_offset + i,
546
                                             offset_reg, pos);
547
                }
548
        }
549

550
        if (state->zs_vpm_offset != -1) {
551
                nir_ssa_def *z = state->pos[2];
552
                z = nir_fmul(b, z, nir_load_viewport_z_scale(b));
553
                z = nir_fmul(b, z, rcp_wc);
554
                z = nir_fadd(b, z, nir_load_viewport_z_offset(b));
555
                v3d_nir_store_output(b, state->zs_vpm_offset, offset_reg, z);
556
        }
557

558
        if (state->rcp_wc_vpm_offset != -1) {
559
                v3d_nir_store_output(b, state->rcp_wc_vpm_offset,
560
                                     offset_reg, rcp_wc);
561
        }
562

563
        /* Store 0 to varyings requested by the FS but not stored by the
564
         * previous stage. This should be undefined behavior, but
565
         * glsl-routing seems to rely on it.
566
         */
567
        uint32_t num_used_outputs;
568
        switch (c->s->info.stage) {
569
        case MESA_SHADER_VERTEX:
570
                num_used_outputs = c->vs_key->num_used_outputs;
571
                break;
572
        case MESA_SHADER_GEOMETRY:
573
                num_used_outputs = c->gs_key->num_used_outputs;
574
                break;
575
        default:
576
                unreachable("Unsupported shader stage");
577
        }
578

579
        for (int i = 0; i < num_used_outputs; i++) {
580
                if (!BITSET_TEST(state->varyings_stored, i)) {
581
                        v3d_nir_store_output(b, state->varyings_vpm_offset + i,
582
                                             offset_reg, nir_imm_int(b, 0));
583
                }
584
        }
585
}
586

587
static void
588
emit_gs_prolog(struct v3d_compile *c, nir_builder *b,
589
               nir_function_impl *impl,
590
               struct v3d_nir_lower_io_state *state)
591
{
592
        nir_block *first = nir_start_block(impl);
593
        b->cursor = nir_before_block(first);
594

595
        const struct glsl_type *uint_type = glsl_uint_type();
596

597
        assert(!state->gs.output_offset_var);
598
        state->gs.output_offset_var =
599
                nir_local_variable_create(impl, uint_type, "output_offset");
600
        nir_store_var(b, state->gs.output_offset_var,
601
                      nir_imm_int(b, state->gs.output_header_size), 0x1);
602

603
        assert(!state->gs.header_offset_var);
604
        state->gs.header_offset_var =
605
                nir_local_variable_create(impl, uint_type, "header_offset");
606
        nir_store_var(b, state->gs.header_offset_var, nir_imm_int(b, 1), 0x1);
607

608
        assert(!state->gs.header_var);
609
        state->gs.header_var =
610
                nir_local_variable_create(impl, uint_type, "header");
611
        reset_gs_header(b, state);
612
}
613

614
static void
615
emit_gs_vpm_output_header_prolog(struct v3d_compile *c, nir_builder *b,
616
                                 struct v3d_nir_lower_io_state *state)
617
{
618
        const uint8_t VERTEX_COUNT_OFFSET = 16;
619

620
        /* Our GS header has 1 generic header slot (at VPM offset 0) and then
621
         * one slot per output vertex after it. This means we don't need to
622
         * have a variable just to keep track of the number of vertices we
623
         * emitted and instead we can just compute it here from the header
624
         * offset variable by removing the one generic header slot that always
625
         * goes at the begining of out header.
626
         */
627
        nir_ssa_def *header_offset =
628
                nir_load_var(b, state->gs.header_offset_var);
629
        nir_ssa_def *vertex_count =
630
                nir_isub(b, header_offset, nir_imm_int(b, 1));
631
        nir_ssa_def *header =
632
                nir_ior(b, nir_imm_int(b, state->gs.output_header_size),
633
                           nir_ishl(b, vertex_count,
634
                                    nir_imm_int(b, VERTEX_COUNT_OFFSET)));
635

636
        v3d_nir_store_output(b, 0, NULL, header);
637
}
638

639
void
640
v3d_nir_lower_io(nir_shader *s, struct v3d_compile *c)
641
{
642
        struct v3d_nir_lower_io_state state = { 0 };
643

644
        /* Set up the layout of the VPM outputs. */
645
        switch (s->info.stage) {
646
        case MESA_SHADER_VERTEX:
647
                v3d_nir_setup_vpm_layout_vs(c, &state);
648
                break;
649
        case MESA_SHADER_GEOMETRY:
650
                v3d_nir_setup_vpm_layout_gs(c, &state);
651
                break;
652
        case MESA_SHADER_FRAGMENT:
653
        case MESA_SHADER_COMPUTE:
654
                break;
655
        default:
656
                unreachable("Unsupported shader stage");
657
        }
658

659
        nir_foreach_function(function, s) {
660
                if (function->impl) {
661
                        nir_builder b;
662
                        nir_builder_init(&b, function->impl);
663

664
                        if (c->s->info.stage == MESA_SHADER_GEOMETRY)
665
                                emit_gs_prolog(c, &b, function->impl, &state);
666

667
                        nir_foreach_block(block, function->impl) {
668
                                nir_foreach_instr_safe(instr, block)
669
                                        v3d_nir_lower_io_instr(c, &b, instr,
670
                                                               &state);
671
                        }
672

673
                        nir_block *last = nir_impl_last_block(function->impl);
674
                        b.cursor = nir_after_block(last);
675
                        if (s->info.stage == MESA_SHADER_VERTEX) {
676
                                v3d_nir_emit_ff_vpm_outputs(c, &b, &state);
677
                        } else if (s->info.stage == MESA_SHADER_GEOMETRY) {
678
                                emit_gs_vpm_output_header_prolog(c, &b, &state);
679
                        }
680

681
                        nir_metadata_preserve(function->impl,
682
                                              nir_metadata_block_index |
683
                                              nir_metadata_dominance);
684
                }
685
        }
686

687
        if (s->info.stage == MESA_SHADER_VERTEX ||
688
            s->info.stage == MESA_SHADER_GEOMETRY) {
689
                v3d_nir_lower_io_update_output_var_base(c, &state);
690
        }
691
}
692

693
Product

Resources

Company