CoCalc -- ir3_nir_lower

GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/ir3/ir3_nir_lower_tess.c
⁴⁵⁶⁵ views
1
/*
2
 * Copyright © 2019 Google, Inc.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * SOFTWARE.
22
 */
23

24
#include "compiler/nir/nir_builder.h"
25
#include "ir3_compiler.h"
26
#include "ir3_nir.h"
27

28
struct state {
29
   uint32_t topology;
30

31
   struct primitive_map {
32
      unsigned loc[32 + 4]; /* +POSITION +PSIZE +CLIP_DIST0 +CLIP_DIST1 */
33
      unsigned stride;
34
   } map;
35

36
   nir_ssa_def *header;
37

38
   nir_variable *vertex_count_var;
39
   nir_variable *emitted_vertex_var;
40
   nir_variable *vertex_flags_out;
41

42
   struct exec_list old_outputs;
43
   struct exec_list new_outputs;
44
   struct exec_list emit_outputs;
45

46
   /* tess ctrl shader on a650 gets the local primitive id at different bits: */
47
   unsigned local_primitive_id_start;
48
};
49

50
static nir_ssa_def *
51
bitfield_extract(nir_builder *b, nir_ssa_def *v, uint32_t start, uint32_t mask)
52
{
53
   return nir_iand(b, nir_ushr(b, v, nir_imm_int(b, start)),
54
                   nir_imm_int(b, mask));
55
}
56

57
static nir_ssa_def *
58
build_invocation_id(nir_builder *b, struct state *state)
59
{
60
   return bitfield_extract(b, state->header, 11, 31);
61
}
62

63
static nir_ssa_def *
64
build_vertex_id(nir_builder *b, struct state *state)
65
{
66
   return bitfield_extract(b, state->header, 6, 31);
67
}
68

69
static nir_ssa_def *
70
build_local_primitive_id(nir_builder *b, struct state *state)
71
{
72
   return bitfield_extract(b, state->header, state->local_primitive_id_start,
73
                           63);
74
}
75

76
static bool
77
is_tess_levels(gl_varying_slot slot)
78
{
79
   return (slot == VARYING_SLOT_TESS_LEVEL_OUTER ||
80
           slot == VARYING_SLOT_TESS_LEVEL_INNER);
81
}
82

83
/* Return a deterministic index for varyings. We can't rely on driver_location
84
 * to be correct without linking the different stages first, so we create
85
 * "primitive maps" where the producer decides on the location of each varying
86
 * slot and then exports a per-slot array to the consumer. This compacts the
87
 * gl_varying_slot space down a bit so that the primitive maps aren't too
88
 * large.
89
 *
90
 * Note: per-patch varyings are currently handled separately, without any
91
 * compacting.
92
 *
93
 * TODO: We could probably use the driver_location's directly in the non-SSO
94
 * (Vulkan) case.
95
 */
96

97
static unsigned
98
shader_io_get_unique_index(gl_varying_slot slot)
99
{
100
   if (slot == VARYING_SLOT_POS)
101
      return 0;
102
   if (slot == VARYING_SLOT_PSIZ)
103
      return 1;
104
   if (slot == VARYING_SLOT_CLIP_DIST0)
105
      return 2;
106
   if (slot == VARYING_SLOT_CLIP_DIST1)
107
      return 3;
108
   if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
109
      return 4 + (slot - VARYING_SLOT_VAR0);
110
   unreachable("illegal slot in get unique index\n");
111
}
112

113
static nir_ssa_def *
114
build_local_offset(nir_builder *b, struct state *state, nir_ssa_def *vertex,
115
                   uint32_t location, uint32_t comp, nir_ssa_def *offset)
116
{
117
   nir_ssa_def *primitive_stride = nir_load_vs_primitive_stride_ir3(b);
118
   nir_ssa_def *primitive_offset =
119
      nir_imul24(b, build_local_primitive_id(b, state), primitive_stride);
120
   nir_ssa_def *attr_offset;
121
   nir_ssa_def *vertex_stride;
122
   unsigned index = shader_io_get_unique_index(location);
123

124
   switch (b->shader->info.stage) {
125
   case MESA_SHADER_VERTEX:
126
   case MESA_SHADER_TESS_EVAL:
127
      vertex_stride = nir_imm_int(b, state->map.stride * 4);
128
      attr_offset = nir_imm_int(b, state->map.loc[index] + 4 * comp);
129
      break;
130
   case MESA_SHADER_TESS_CTRL:
131
   case MESA_SHADER_GEOMETRY:
132
      vertex_stride = nir_load_vs_vertex_stride_ir3(b);
133
      attr_offset = nir_iadd(b, nir_load_primitive_location_ir3(b, index),
134
                             nir_imm_int(b, comp * 4));
135
      break;
136
   default:
137
      unreachable("bad shader stage");
138
   }
139

140
   nir_ssa_def *vertex_offset = nir_imul24(b, vertex, vertex_stride);
141

142
   return nir_iadd(
143
      b, nir_iadd(b, primitive_offset, vertex_offset),
144
      nir_iadd(b, attr_offset, nir_ishl(b, offset, nir_imm_int(b, 4))));
145
}
146

147
static nir_intrinsic_instr *
148
replace_intrinsic(nir_builder *b, nir_intrinsic_instr *intr,
149
                  nir_intrinsic_op op, nir_ssa_def *src0, nir_ssa_def *src1,
150
                  nir_ssa_def *src2)
151
{
152
   nir_intrinsic_instr *new_intr = nir_intrinsic_instr_create(b->shader, op);
153

154
   new_intr->src[0] = nir_src_for_ssa(src0);
155
   if (src1)
156
      new_intr->src[1] = nir_src_for_ssa(src1);
157
   if (src2)
158
      new_intr->src[2] = nir_src_for_ssa(src2);
159

160
   new_intr->num_components = intr->num_components;
161

162
   if (nir_intrinsic_infos[op].has_dest)
163
      nir_ssa_dest_init(&new_intr->instr, &new_intr->dest, intr->num_components,
164
                        32, NULL);
165

166
   nir_builder_instr_insert(b, &new_intr->instr);
167

168
   if (nir_intrinsic_infos[op].has_dest)
169
      nir_ssa_def_rewrite_uses(&intr->dest.ssa, &new_intr->dest.ssa);
170

171
   nir_instr_remove(&intr->instr);
172

173
   return new_intr;
174
}
175

176
static void
177
build_primitive_map(nir_shader *shader, struct primitive_map *map)
178
{
179
   /* All interfaces except the TCS <-> TES interface use ldlw, which takes
180
    * an offset in bytes, so each vec4 slot is 16 bytes. TCS <-> TES uses
181
    * ldg, which takes an offset in dwords, but each per-vertex slot has
182
    * space for every vertex, and there's space at the beginning for
183
    * per-patch varyings.
184
    */
185
   unsigned slot_size = 16, start = 0;
186
   if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
187
      slot_size = shader->info.tess.tcs_vertices_out * 4;
188
      start = util_last_bit(shader->info.patch_outputs_written) * 4;
189
   }
190

191
   uint64_t mask = shader->info.outputs_written;
192
   unsigned loc = start;
193
   while (mask) {
194
      int location = u_bit_scan64(&mask);
195
      if (is_tess_levels(location))
196
         continue;
197

198
      unsigned index = shader_io_get_unique_index(location);
199
      map->loc[index] = loc;
200
      loc += slot_size;
201
   }
202

203
   map->stride = loc;
204
   /* Use units of dwords for the stride. */
205
   if (shader->info.stage != MESA_SHADER_TESS_CTRL)
206
      map->stride /= 4;
207
}
208

209
/* For shader stages that receive a primitive map, calculate how big it should
210
 * be.
211
 */
212

213
static unsigned
214
calc_primitive_map_size(nir_shader *shader)
215
{
216
   uint64_t mask = shader->info.inputs_read;
217
   unsigned max_index = 0;
218
   while (mask) {
219
      int location = u_bit_scan64(&mask);
220

221
      if (is_tess_levels(location))
222
         continue;
223

224
      unsigned index = shader_io_get_unique_index(location);
225
      max_index = MAX2(max_index, index + 1);
226
   }
227

228
   return max_index;
229
}
230

231
static void
232
lower_block_to_explicit_output(nir_block *block, nir_builder *b,
233
                               struct state *state)
234
{
235
   nir_foreach_instr_safe (instr, block) {
236
      if (instr->type != nir_instr_type_intrinsic)
237
         continue;
238

239
      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
240

241
      switch (intr->intrinsic) {
242
      case nir_intrinsic_store_output: {
243
         // src[] = { value, offset }.
244

245
         /* nir_lower_io_to_temporaries replaces all access to output
246
          * variables with temp variables and then emits a nir_copy_var at
247
          * the end of the shader.  Thus, we should always get a full wrmask
248
          * here.
249
          */
250
         assert(
251
            util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
252

253
         b->cursor = nir_instr_remove(&intr->instr);
254

255
         nir_ssa_def *vertex_id = build_vertex_id(b, state);
256
         nir_ssa_def *offset = build_local_offset(
257
            b, state, vertex_id, nir_intrinsic_io_semantics(intr).location,
258
            nir_intrinsic_component(intr), intr->src[1].ssa);
259

260
         nir_store_shared_ir3(b, intr->src[0].ssa, offset);
261
         break;
262
      }
263

264
      default:
265
         break;
266
      }
267
   }
268
}
269

270
static nir_ssa_def *
271
local_thread_id(nir_builder *b)
272
{
273
   return bitfield_extract(b, nir_load_gs_header_ir3(b), 16, 1023);
274
}
275

276
void
277
ir3_nir_lower_to_explicit_output(nir_shader *shader,
278
                                 struct ir3_shader_variant *v,
279
                                 unsigned topology)
280
{
281
   struct state state = {};
282

283
   build_primitive_map(shader, &state.map);
284
   memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
285

286
   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
287
   assert(impl);
288

289
   nir_builder b;
290
   nir_builder_init(&b, impl);
291
   b.cursor = nir_before_cf_list(&impl->body);
292

293
   if (v->type == MESA_SHADER_VERTEX && topology != IR3_TESS_NONE)
294
      state.header = nir_load_tcs_header_ir3(&b);
295
   else
296
      state.header = nir_load_gs_header_ir3(&b);
297

298
   nir_foreach_block_safe (block, impl)
299
      lower_block_to_explicit_output(block, &b, &state);
300

301
   nir_metadata_preserve(impl,
302
                         nir_metadata_block_index | nir_metadata_dominance);
303

304
   v->output_size = state.map.stride;
305
}
306

307
static void
308
lower_block_to_explicit_input(nir_block *block, nir_builder *b,
309
                              struct state *state)
310
{
311
   nir_foreach_instr_safe (instr, block) {
312
      if (instr->type != nir_instr_type_intrinsic)
313
         continue;
314

315
      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
316

317
      switch (intr->intrinsic) {
318
      case nir_intrinsic_load_per_vertex_input: {
319
         // src[] = { vertex, offset }.
320

321
         b->cursor = nir_before_instr(&intr->instr);
322

323
         nir_ssa_def *offset = build_local_offset(
324
            b, state,
325
            intr->src[0].ssa, // this is typically gl_InvocationID
326
            nir_intrinsic_io_semantics(intr).location,
327
            nir_intrinsic_component(intr), intr->src[1].ssa);
328

329
         replace_intrinsic(b, intr, nir_intrinsic_load_shared_ir3, offset, NULL,
330
                           NULL);
331
         break;
332
      }
333

334
      case nir_intrinsic_load_invocation_id: {
335
         b->cursor = nir_before_instr(&intr->instr);
336

337
         nir_ssa_def *iid = build_invocation_id(b, state);
338
         nir_ssa_def_rewrite_uses(&intr->dest.ssa, iid);
339
         nir_instr_remove(&intr->instr);
340
         break;
341
      }
342

343
      default:
344
         break;
345
      }
346
   }
347
}
348

349
void
350
ir3_nir_lower_to_explicit_input(nir_shader *shader,
351
                                struct ir3_shader_variant *v)
352
{
353
   struct state state = {};
354

355
   /* when using stl/ldl (instead of stlw/ldlw) for linking VS and HS,
356
    * HS uses a different primitive id, which starts at bit 16 in the header
357
    */
358
   if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
359
       v->shader->compiler->tess_use_shared)
360
      state.local_primitive_id_start = 16;
361

362
   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
363
   assert(impl);
364

365
   nir_builder b;
366
   nir_builder_init(&b, impl);
367
   b.cursor = nir_before_cf_list(&impl->body);
368

369
   if (shader->info.stage == MESA_SHADER_GEOMETRY)
370
      state.header = nir_load_gs_header_ir3(&b);
371
   else
372
      state.header = nir_load_tcs_header_ir3(&b);
373

374
   nir_foreach_block_safe (block, impl)
375
      lower_block_to_explicit_input(block, &b, &state);
376

377
   v->input_size = calc_primitive_map_size(shader);
378
}
379

380
static nir_ssa_def *
381
build_tcs_out_vertices(nir_builder *b)
382
{
383
   if (b->shader->info.stage == MESA_SHADER_TESS_CTRL)
384
      return nir_imm_int(b, b->shader->info.tess.tcs_vertices_out);
385
   else
386
      return nir_load_patch_vertices_in(b);
387
}
388

389
static nir_ssa_def *
390
build_per_vertex_offset(nir_builder *b, struct state *state,
391
                        nir_ssa_def *vertex, uint32_t location, uint32_t comp,
392
                        nir_ssa_def *offset)
393
{
394
   nir_ssa_def *primitive_id = nir_load_primitive_id(b);
395
   nir_ssa_def *patch_stride = nir_load_hs_patch_stride_ir3(b);
396
   nir_ssa_def *patch_offset = nir_imul24(b, primitive_id, patch_stride);
397
   nir_ssa_def *attr_offset;
398

399
   if (nir_src_is_const(nir_src_for_ssa(offset))) {
400
      location += nir_src_as_uint(nir_src_for_ssa(offset));
401
      offset = nir_imm_int(b, 0);
402
   } else {
403
      /* Offset is in vec4's, but we need it in unit of components for the
404
       * load/store_global_ir3 offset.
405
       */
406
      offset = nir_ishl(b, offset, nir_imm_int(b, 2));
407
   }
408

409
   nir_ssa_def *vertex_offset;
410
   if (vertex) {
411
      unsigned index = shader_io_get_unique_index(location);
412
      switch (b->shader->info.stage) {
413
      case MESA_SHADER_TESS_CTRL:
414
         attr_offset = nir_imm_int(b, state->map.loc[index] + comp);
415
         break;
416
      case MESA_SHADER_TESS_EVAL:
417
         attr_offset = nir_iadd(b, nir_load_primitive_location_ir3(b, index),
418
                                nir_imm_int(b, comp));
419
         break;
420
      default:
421
         unreachable("bad shader state");
422
      }
423

424
      attr_offset = nir_iadd(b, attr_offset,
425
                             nir_imul24(b, offset, build_tcs_out_vertices(b)));
426
      vertex_offset = nir_ishl(b, vertex, nir_imm_int(b, 2));
427
   } else {
428
      assert(location >= VARYING_SLOT_PATCH0 &&
429
             location <= VARYING_SLOT_TESS_MAX);
430
      unsigned index = location - VARYING_SLOT_PATCH0;
431
      attr_offset = nir_iadd(b, nir_imm_int(b, index * 4 + comp), offset);
432
      vertex_offset = nir_imm_int(b, 0);
433
   }
434

435
   return nir_iadd(b, nir_iadd(b, patch_offset, attr_offset), vertex_offset);
436
}
437

438
static nir_ssa_def *
439
build_patch_offset(nir_builder *b, struct state *state, uint32_t base,
440
                   uint32_t comp, nir_ssa_def *offset)
441
{
442
   return build_per_vertex_offset(b, state, NULL, base, comp, offset);
443
}
444

445
static void
446
tess_level_components(struct state *state, uint32_t *inner, uint32_t *outer)
447
{
448
   switch (state->topology) {
449
   case IR3_TESS_TRIANGLES:
450
      *inner = 1;
451
      *outer = 3;
452
      break;
453
   case IR3_TESS_QUADS:
454
      *inner = 2;
455
      *outer = 4;
456
      break;
457
   case IR3_TESS_ISOLINES:
458
      *inner = 0;
459
      *outer = 2;
460
      break;
461
   default:
462
      unreachable("bad");
463
   }
464
}
465

466
static nir_ssa_def *
467
build_tessfactor_base(nir_builder *b, gl_varying_slot slot, struct state *state)
468
{
469
   uint32_t inner_levels, outer_levels;
470
   tess_level_components(state, &inner_levels, &outer_levels);
471

472
   const uint32_t patch_stride = 1 + inner_levels + outer_levels;
473

474
   nir_ssa_def *primitive_id = nir_load_primitive_id(b);
475

476
   nir_ssa_def *patch_offset =
477
      nir_imul24(b, primitive_id, nir_imm_int(b, patch_stride));
478

479
   uint32_t offset;
480
   switch (slot) {
481
   case VARYING_SLOT_TESS_LEVEL_OUTER:
482
      /* There's some kind of header dword, tess levels start at index 1. */
483
      offset = 1;
484
      break;
485
   case VARYING_SLOT_TESS_LEVEL_INNER:
486
      offset = 1 + outer_levels;
487
      break;
488
   default:
489
      unreachable("bad");
490
   }
491

492
   return nir_iadd(b, patch_offset, nir_imm_int(b, offset));
493
}
494

495
static void
496
lower_tess_ctrl_block(nir_block *block, nir_builder *b, struct state *state)
497
{
498
   nir_foreach_instr_safe (instr, block) {
499
      if (instr->type != nir_instr_type_intrinsic)
500
         continue;
501

502
      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
503

504
      switch (intr->intrinsic) {
505
      case nir_intrinsic_load_per_vertex_output: {
506
         // src[] = { vertex, offset }.
507

508
         b->cursor = nir_before_instr(&intr->instr);
509

510
         nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
511
         nir_ssa_def *offset = build_per_vertex_offset(
512
            b, state, intr->src[0].ssa,
513
            nir_intrinsic_io_semantics(intr).location,
514
            nir_intrinsic_component(intr), intr->src[1].ssa);
515

516
         replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
517
                           offset, NULL);
518
         break;
519
      }
520

521
      case nir_intrinsic_store_per_vertex_output: {
522
         // src[] = { value, vertex, offset }.
523

524
         b->cursor = nir_before_instr(&intr->instr);
525

526
         /* sparse writemask not supported */
527
         assert(
528
            util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
529

530
         nir_ssa_def *value = intr->src[0].ssa;
531
         nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
532
         nir_ssa_def *offset = build_per_vertex_offset(
533
            b, state, intr->src[1].ssa,
534
            nir_intrinsic_io_semantics(intr).location,
535
            nir_intrinsic_component(intr), intr->src[2].ssa);
536

537
         replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3, value,
538
                           address, offset);
539

540
         break;
541
      }
542

543
      case nir_intrinsic_load_output: {
544
         // src[] = { offset }.
545

546
         b->cursor = nir_before_instr(&intr->instr);
547

548
         nir_ssa_def *address, *offset;
549

550
         /* note if vectorization of the tess level loads ever happens:
551
          * "ldg" across 16-byte boundaries can behave incorrectly if results
552
          * are never used. most likely some issue with (sy) not properly
553
          * syncing with values coming from a second memory transaction.
554
          */
555
         gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
556
         if (is_tess_levels(location)) {
557
            assert(intr->dest.ssa.num_components == 1);
558
            address = nir_load_tess_factor_base_ir3(b);
559
            offset = build_tessfactor_base(b, location, state);
560
         } else {
561
            address = nir_load_tess_param_base_ir3(b);
562
            offset = build_patch_offset(b, state, location,
563
                                        nir_intrinsic_component(intr),
564
                                        intr->src[0].ssa);
565
         }
566

567
         replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
568
                           offset, NULL);
569
         break;
570
      }
571

572
      case nir_intrinsic_store_output: {
573
         // src[] = { value, offset }.
574

575
         /* write patch output to bo */
576

577
         b->cursor = nir_before_instr(&intr->instr);
578

579
         /* sparse writemask not supported */
580
         assert(
581
            util_is_power_of_two_nonzero(nir_intrinsic_write_mask(intr) + 1));
582

583
         gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
584
         if (is_tess_levels(location)) {
585
            /* with tess levels are defined as float[4] and float[2],
586
             * but tess factor BO has smaller sizes for tris/isolines,
587
             * so we have to discard any writes beyond the number of
588
             * components for inner/outer levels */
589
            uint32_t inner_levels, outer_levels, levels;
590
            tess_level_components(state, &inner_levels, &outer_levels);
591

592
            if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
593
               levels = outer_levels;
594
            else
595
               levels = inner_levels;
596

597
            assert(intr->src[0].ssa->num_components == 1);
598

599
            nir_ssa_def *offset =
600
               nir_iadd_imm(b, intr->src[1].ssa, nir_intrinsic_component(intr));
601

602
            nir_if *nif =
603
               nir_push_if(b, nir_ult(b, offset, nir_imm_int(b, levels)));
604

605
            replace_intrinsic(
606
               b, intr, nir_intrinsic_store_global_ir3, intr->src[0].ssa,
607
               nir_load_tess_factor_base_ir3(b),
608
               nir_iadd(b, offset, build_tessfactor_base(b, location, state)));
609

610
            nir_pop_if(b, nif);
611
         } else {
612
            nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
613
            nir_ssa_def *offset = build_patch_offset(
614
               b, state, location, nir_intrinsic_component(intr),
615
               intr->src[1].ssa);
616

617
            debug_assert(nir_intrinsic_component(intr) == 0);
618

619
            replace_intrinsic(b, intr, nir_intrinsic_store_global_ir3,
620
                              intr->src[0].ssa, address, offset);
621
         }
622
         break;
623
      }
624

625
      default:
626
         break;
627
      }
628
   }
629
}
630

631
static void
632
emit_tess_epilouge(nir_builder *b, struct state *state)
633
{
634
   /* Insert endpatch instruction:
635
    *
636
    * TODO we should re-work this to use normal flow control.
637
    */
638

639
   nir_end_patch_ir3(b);
640
}
641

642
void
643
ir3_nir_lower_tess_ctrl(nir_shader *shader, struct ir3_shader_variant *v,
644
                        unsigned topology)
645
{
646
   struct state state = {.topology = topology};
647

648
   if (shader_debug_enabled(shader->info.stage)) {
649
      mesa_logi("NIR (before tess lowering) for %s shader:",
650
                _mesa_shader_stage_to_string(shader->info.stage));
651
      nir_log_shaderi(shader);
652
   }
653

654
   build_primitive_map(shader, &state.map);
655
   memcpy(v->output_loc, state.map.loc, sizeof(v->output_loc));
656
   v->output_size = state.map.stride;
657

658
   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
659
   assert(impl);
660

661
   nir_builder b;
662
   nir_builder_init(&b, impl);
663
   b.cursor = nir_before_cf_list(&impl->body);
664

665
   state.header = nir_load_tcs_header_ir3(&b);
666

667
   nir_foreach_block_safe (block, impl)
668
      lower_tess_ctrl_block(block, &b, &state);
669

670
   /* Now move the body of the TCS into a conditional:
671
    *
672
    *   if (gl_InvocationID < num_vertices)
673
    *     // body
674
    *
675
    */
676

677
   nir_cf_list body;
678
   nir_cf_extract(&body, nir_before_cf_list(&impl->body),
679
                  nir_after_cf_list(&impl->body));
680

681
   b.cursor = nir_after_cf_list(&impl->body);
682

683
   /* Re-emit the header, since the old one got moved into the if branch */
684
   state.header = nir_load_tcs_header_ir3(&b);
685
   nir_ssa_def *iid = build_invocation_id(&b, &state);
686

687
   const uint32_t nvertices = shader->info.tess.tcs_vertices_out;
688
   nir_ssa_def *cond = nir_ult(&b, iid, nir_imm_int(&b, nvertices));
689

690
   nir_if *nif = nir_push_if(&b, cond);
691

692
   nir_cf_reinsert(&body, b.cursor);
693

694
   b.cursor = nir_after_cf_list(&nif->then_list);
695

696
   /* Insert conditional exit for threads invocation id != 0 */
697
   nir_ssa_def *iid0_cond = nir_ieq_imm(&b, iid, 0);
698
   nir_cond_end_ir3(&b, iid0_cond);
699

700
   emit_tess_epilouge(&b, &state);
701

702
   nir_pop_if(&b, nif);
703

704
   nir_metadata_preserve(impl, 0);
705
}
706

707
static void
708
lower_tess_eval_block(nir_block *block, nir_builder *b, struct state *state)
709
{
710
   nir_foreach_instr_safe (instr, block) {
711
      if (instr->type != nir_instr_type_intrinsic)
712
         continue;
713

714
      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
715

716
      switch (intr->intrinsic) {
717
      case nir_intrinsic_load_tess_coord: {
718
         b->cursor = nir_after_instr(&intr->instr);
719
         nir_ssa_def *x = nir_channel(b, &intr->dest.ssa, 0);
720
         nir_ssa_def *y = nir_channel(b, &intr->dest.ssa, 1);
721
         nir_ssa_def *z;
722

723
         if (state->topology == IR3_TESS_TRIANGLES)
724
            z = nir_fsub(b, nir_fsub(b, nir_imm_float(b, 1.0f), y), x);
725
         else
726
            z = nir_imm_float(b, 0.0f);
727

728
         nir_ssa_def *coord = nir_vec3(b, x, y, z);
729

730
         nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, coord,
731
                                        b->cursor.instr);
732
         break;
733
      }
734

735
      case nir_intrinsic_load_per_vertex_input: {
736
         // src[] = { vertex, offset }.
737

738
         b->cursor = nir_before_instr(&intr->instr);
739

740
         nir_ssa_def *address = nir_load_tess_param_base_ir3(b);
741
         nir_ssa_def *offset = build_per_vertex_offset(
742
            b, state, intr->src[0].ssa,
743
            nir_intrinsic_io_semantics(intr).location,
744
            nir_intrinsic_component(intr), intr->src[1].ssa);
745

746
         replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
747
                           offset, NULL);
748
         break;
749
      }
750

751
      case nir_intrinsic_load_input: {
752
         // src[] = { offset }.
753

754
         b->cursor = nir_before_instr(&intr->instr);
755

756
         nir_ssa_def *address, *offset;
757

758
         /* note if vectorization of the tess level loads ever happens:
759
          * "ldg" across 16-byte boundaries can behave incorrectly if results
760
          * are never used. most likely some issue with (sy) not properly
761
          * syncing with values coming from a second memory transaction.
762
          */
763
         gl_varying_slot location = nir_intrinsic_io_semantics(intr).location;
764
         if (is_tess_levels(location)) {
765
            assert(intr->dest.ssa.num_components == 1);
766
            address = nir_load_tess_factor_base_ir3(b);
767
            offset = build_tessfactor_base(b, location, state);
768
         } else {
769
            address = nir_load_tess_param_base_ir3(b);
770
            offset = build_patch_offset(b, state, location,
771
                                        nir_intrinsic_component(intr),
772
                                        intr->src[0].ssa);
773
         }
774

775
         offset =
776
            nir_iadd(b, offset, nir_imm_int(b, nir_intrinsic_component(intr)));
777

778
         replace_intrinsic(b, intr, nir_intrinsic_load_global_ir3, address,
779
                           offset, NULL);
780
         break;
781
      }
782

783
      default:
784
         break;
785
      }
786
   }
787
}
788

789
void
790
ir3_nir_lower_tess_eval(nir_shader *shader, struct ir3_shader_variant *v,
791
                        unsigned topology)
792
{
793
   struct state state = {.topology = topology};
794

795
   if (shader_debug_enabled(shader->info.stage)) {
796
      mesa_logi("NIR (before tess lowering) for %s shader:",
797
                _mesa_shader_stage_to_string(shader->info.stage));
798
      nir_log_shaderi(shader);
799
   }
800

801
   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
802
   assert(impl);
803

804
   nir_builder b;
805
   nir_builder_init(&b, impl);
806

807
   nir_foreach_block_safe (block, impl)
808
      lower_tess_eval_block(block, &b, &state);
809

810
   v->input_size = calc_primitive_map_size(shader);
811

812
   nir_metadata_preserve(impl, 0);
813
}
814

815
static void
816
lower_gs_block(nir_block *block, nir_builder *b, struct state *state)
817
{
818
   nir_foreach_instr_safe (instr, block) {
819
      if (instr->type != nir_instr_type_intrinsic)
820
         continue;
821

822
      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
823

824
      switch (intr->intrinsic) {
825
      case nir_intrinsic_end_primitive: {
826
         /* Note: This ignores the stream, which seems to match the blob
827
          * behavior. I'm guessing the HW ignores any extraneous cut
828
          * signals from an EndPrimitive() that doesn't correspond to the
829
          * rasterized stream.
830
          */
831
         b->cursor = nir_before_instr(&intr->instr);
832
         nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 4), 0x1);
833
         nir_instr_remove(&intr->instr);
834
         break;
835
      }
836

837
      case nir_intrinsic_emit_vertex: {
838
         /* Load the vertex count */
839
         b->cursor = nir_before_instr(&intr->instr);
840
         nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
841

842
         nir_push_if(b, nir_ieq(b, count, local_thread_id(b)));
843

844
         unsigned stream = nir_intrinsic_stream_id(intr);
845
         /* vertex_flags_out |= stream */
846
         nir_store_var(b, state->vertex_flags_out,
847
                       nir_ior(b, nir_load_var(b, state->vertex_flags_out),
848
                               nir_imm_int(b, stream)),
849
                       0x1 /* .x */);
850

851
         foreach_two_lists (dest_node, &state->emit_outputs, src_node,
852
                            &state->old_outputs) {
853
            nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
854
            nir_variable *src = exec_node_data(nir_variable, src_node, node);
855
            nir_copy_var(b, dest, src);
856
         }
857

858
         nir_instr_remove(&intr->instr);
859

860
         nir_store_var(b, state->emitted_vertex_var,
861
                       nir_iadd(b, nir_load_var(b, state->emitted_vertex_var),
862
                                nir_imm_int(b, 1)),
863
                       0x1);
864

865
         nir_pop_if(b, NULL);
866

867
         /* Increment the vertex count by 1 */
868
         nir_store_var(b, state->vertex_count_var,
869
                       nir_iadd(b, count, nir_imm_int(b, 1)), 0x1); /* .x */
870
         nir_store_var(b, state->vertex_flags_out, nir_imm_int(b, 0), 0x1);
871

872
         break;
873
      }
874

875
      default:
876
         break;
877
      }
878
   }
879
}
880

881
void
882
ir3_nir_lower_gs(nir_shader *shader)
883
{
884
   struct state state = {};
885

886
   if (shader_debug_enabled(shader->info.stage)) {
887
      mesa_logi("NIR (before gs lowering):");
888
      nir_log_shaderi(shader);
889
   }
890

891
   /* Create an output var for vertex_flags. This will be shadowed below,
892
    * same way regular outputs get shadowed, and this variable will become a
893
    * temporary.
894
    */
895
   state.vertex_flags_out = nir_variable_create(
896
      shader, nir_var_shader_out, glsl_uint_type(), "vertex_flags");
897
   state.vertex_flags_out->data.driver_location = shader->num_outputs++;
898
   state.vertex_flags_out->data.location = VARYING_SLOT_GS_VERTEX_FLAGS_IR3;
899
   state.vertex_flags_out->data.interpolation = INTERP_MODE_NONE;
900

901
   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
902
   assert(impl);
903

904
   nir_builder b;
905
   nir_builder_init(&b, impl);
906
   b.cursor = nir_before_cf_list(&impl->body);
907

908
   state.header = nir_load_gs_header_ir3(&b);
909

910
   /* Generate two set of shadow vars for the output variables.  The first
911
    * set replaces the real outputs and the second set (emit_outputs) we'll
912
    * assign in the emit_vertex conditionals.  Then at the end of the shader
913
    * we copy the emit_outputs to the real outputs, so that we get
914
    * store_output in uniform control flow.
915
    */
916
   exec_list_make_empty(&state.old_outputs);
917
   nir_foreach_shader_out_variable_safe (var, shader) {
918
      exec_node_remove(&var->node);
919
      exec_list_push_tail(&state.old_outputs, &var->node);
920
   }
921
   exec_list_make_empty(&state.new_outputs);
922
   exec_list_make_empty(&state.emit_outputs);
923
   nir_foreach_variable_in_list (var, &state.old_outputs) {
924
      /* Create a new output var by cloning the original output var and
925
       * stealing the name.
926
       */
927
      nir_variable *output = nir_variable_clone(var, shader);
928
      exec_list_push_tail(&state.new_outputs, &output->node);
929

930
      /* Rewrite the original output to be a shadow variable. */
931
      var->name = ralloc_asprintf(var, "%s@gs-temp", output->name);
932
      var->data.mode = nir_var_shader_temp;
933

934
      /* Clone the shadow variable to create the emit shadow variable that
935
       * we'll assign in the emit conditionals.
936
       */
937
      nir_variable *emit_output = nir_variable_clone(var, shader);
938
      emit_output->name = ralloc_asprintf(var, "%s@emit-temp", output->name);
939
      exec_list_push_tail(&state.emit_outputs, &emit_output->node);
940
   }
941

942
   /* During the shader we'll keep track of which vertex we're currently
943
    * emitting for the EmitVertex test and how many vertices we emitted so we
944
    * know to discard if didn't emit any.  In most simple shaders, this can
945
    * all be statically determined and gets optimized away.
946
    */
947
   state.vertex_count_var =
948
      nir_local_variable_create(impl, glsl_uint_type(), "vertex_count");
949
   state.emitted_vertex_var =
950
      nir_local_variable_create(impl, glsl_uint_type(), "emitted_vertex");
951

952
   /* Initialize to 0. */
953
   b.cursor = nir_before_cf_list(&impl->body);
954
   nir_store_var(&b, state.vertex_count_var, nir_imm_int(&b, 0), 0x1);
955
   nir_store_var(&b, state.emitted_vertex_var, nir_imm_int(&b, 0), 0x1);
956
   nir_store_var(&b, state.vertex_flags_out, nir_imm_int(&b, 4), 0x1);
957

958
   nir_foreach_block_safe (block, impl)
959
      lower_gs_block(block, &b, &state);
960

961
   set_foreach (impl->end_block->predecessors, block_entry) {
962
      struct nir_block *block = (void *)block_entry->key;
963
      b.cursor = nir_after_block_before_jump(block);
964

965
      nir_ssa_def *cond =
966
         nir_ieq_imm(&b, nir_load_var(&b, state.emitted_vertex_var), 0);
967

968
      nir_discard_if(&b, cond);
969

970
      foreach_two_lists (dest_node, &state.new_outputs, src_node,
971
                         &state.emit_outputs) {
972
         nir_variable *dest = exec_node_data(nir_variable, dest_node, node);
973
         nir_variable *src = exec_node_data(nir_variable, src_node, node);
974
         nir_copy_var(&b, dest, src);
975
      }
976
   }
977

978
   exec_list_append(&shader->variables, &state.old_outputs);
979
   exec_list_append(&shader->variables, &state.emit_outputs);
980
   exec_list_append(&shader->variables, &state.new_outputs);
981

982
   nir_metadata_preserve(impl, 0);
983

984
   nir_lower_global_vars_to_local(shader);
985
   nir_split_var_copies(shader);
986
   nir_lower_var_copies(shader);
987

988
   nir_fixup_deref_modes(shader);
989

990
   if (shader_debug_enabled(shader->info.stage)) {
991
      mesa_logi("NIR (after gs lowering):");
992
      nir_log_shaderi(shader);
993
   }
994
}
995

996
Product

Resources

Company