Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/zink/zink_compiler.c
4570 views
1
/*
2
* Copyright 2018 Collabora Ltd.
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* on the rights to use, copy, modify, merge, publish, distribute, sub
8
* license, and/or sell copies of the Software, and to permit persons to whom
9
* the Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
* USE OR OTHER DEALINGS IN THE SOFTWARE.
22
*/
23
24
#include "zink_context.h"
25
#include "zink_compiler.h"
26
#include "zink_program.h"
27
#include "zink_screen.h"
28
#include "nir_to_spirv/nir_to_spirv.h"
29
30
#include "pipe/p_state.h"
31
32
#include "nir.h"
33
#include "compiler/nir/nir_builder.h"
34
35
#include "nir/tgsi_to_nir.h"
36
#include "tgsi/tgsi_dump.h"
37
#include "tgsi/tgsi_from_mesa.h"
38
39
#include "util/u_memory.h"
40
41
static void
42
create_vs_pushconst(nir_shader *nir)
43
{
44
nir_variable *vs_pushconst;
45
/* create compatible layout for the ntv push constant loader */
46
struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 2);
47
fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
48
fields[0].name = ralloc_asprintf(nir, "draw_mode_is_indexed");
49
fields[0].offset = offsetof(struct zink_gfx_push_constant, draw_mode_is_indexed);
50
fields[1].type = glsl_array_type(glsl_uint_type(), 1, 0);
51
fields[1].name = ralloc_asprintf(nir, "draw_id");
52
fields[1].offset = offsetof(struct zink_gfx_push_constant, draw_id);
53
vs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
54
glsl_struct_type(fields, 2, "struct", false), "vs_pushconst");
55
vs_pushconst->data.location = INT_MAX; //doesn't really matter
56
}
57
58
static void
59
create_cs_pushconst(nir_shader *nir)
60
{
61
nir_variable *cs_pushconst;
62
/* create compatible layout for the ntv push constant loader */
63
struct glsl_struct_field *fields = rzalloc_size(nir, 1 * sizeof(struct glsl_struct_field));
64
fields[0].type = glsl_array_type(glsl_uint_type(), 1, 0);
65
fields[0].name = ralloc_asprintf(nir, "work_dim");
66
fields[0].offset = 0;
67
cs_pushconst = nir_variable_create(nir, nir_var_mem_push_const,
68
glsl_struct_type(fields, 1, "struct", false), "cs_pushconst");
69
cs_pushconst->data.location = INT_MAX; //doesn't really matter
70
}
71
72
static bool
73
reads_work_dim(nir_shader *shader)
74
{
75
return BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_WORK_DIM);
76
}
77
78
static bool
79
lower_discard_if_instr(nir_intrinsic_instr *instr, nir_builder *b)
80
{
81
if (instr->intrinsic == nir_intrinsic_discard_if) {
82
b->cursor = nir_before_instr(&instr->instr);
83
84
nir_if *if_stmt = nir_push_if(b, nir_ssa_for_src(b, instr->src[0], 1));
85
nir_discard(b);
86
nir_pop_if(b, if_stmt);
87
nir_instr_remove(&instr->instr);
88
return true;
89
}
90
/* a shader like this (shaders@glsl-fs-discard-04):
91
92
uniform int j, k;
93
94
void main()
95
{
96
for (int i = 0; i < j; i++) {
97
if (i > k)
98
continue;
99
discard;
100
}
101
gl_FragColor = vec4(0.0, 1.0, 0.0, 0.0);
102
}
103
104
105
106
will generate nir like:
107
108
loop {
109
//snip
110
if ssa_11 {
111
block block_5:
112
/ preds: block_4 /
113
vec1 32 ssa_17 = iadd ssa_50, ssa_31
114
/ succs: block_7 /
115
} else {
116
block block_6:
117
/ preds: block_4 /
118
intrinsic discard () () <-- not last instruction
119
vec1 32 ssa_23 = iadd ssa_50, ssa_31 <-- dead code loop itr increment
120
/ succs: block_7 /
121
}
122
//snip
123
}
124
125
which means that we can't assert like this:
126
127
assert(instr->intrinsic != nir_intrinsic_discard ||
128
nir_block_last_instr(instr->instr.block) == &instr->instr);
129
130
131
and it's unnecessary anyway since post-vtn optimizing will dce the instructions following the discard
132
*/
133
134
return false;
135
}
136
137
static bool
138
lower_discard_if(nir_shader *shader)
139
{
140
bool progress = false;
141
142
nir_foreach_function(function, shader) {
143
if (function->impl) {
144
nir_builder builder;
145
nir_builder_init(&builder, function->impl);
146
nir_foreach_block(block, function->impl) {
147
nir_foreach_instr_safe(instr, block) {
148
if (instr->type == nir_instr_type_intrinsic)
149
progress |= lower_discard_if_instr(
150
nir_instr_as_intrinsic(instr),
151
&builder);
152
}
153
}
154
155
nir_metadata_preserve(function->impl, nir_metadata_dominance);
156
}
157
}
158
159
return progress;
160
}
161
162
static bool
163
lower_work_dim_instr(nir_builder *b, nir_instr *in, void *data)
164
{
165
if (in->type != nir_instr_type_intrinsic)
166
return false;
167
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
168
if (instr->intrinsic != nir_intrinsic_load_work_dim)
169
return false;
170
171
if (instr->intrinsic == nir_intrinsic_load_work_dim) {
172
b->cursor = nir_after_instr(&instr->instr);
173
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
174
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
175
nir_intrinsic_set_range(load, 3 * sizeof(uint32_t));
176
load->num_components = 1;
177
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "work_dim");
178
nir_builder_instr_insert(b, &load->instr);
179
180
nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
181
}
182
183
return true;
184
}
185
186
static bool
187
lower_work_dim(nir_shader *shader)
188
{
189
if (shader->info.stage != MESA_SHADER_KERNEL)
190
return false;
191
192
if (!reads_work_dim(shader))
193
return false;
194
195
return nir_shader_instructions_pass(shader, lower_work_dim_instr, nir_metadata_dominance, NULL);
196
}
197
198
static bool
199
lower_64bit_vertex_attribs_instr(nir_builder *b, nir_instr *instr, void *data)
200
{
201
if (instr->type != nir_instr_type_deref)
202
return false;
203
nir_deref_instr *deref = nir_instr_as_deref(instr);
204
if (deref->deref_type != nir_deref_type_var)
205
return false;
206
nir_variable *var = nir_deref_instr_get_variable(deref);
207
if (var->data.mode != nir_var_shader_in)
208
return false;
209
if (!glsl_type_is_64bit(var->type) || !glsl_type_is_vector(var->type) || glsl_get_vector_elements(var->type) < 3)
210
return false;
211
212
/* create second variable for the split */
213
nir_variable *var2 = nir_variable_clone(var, b->shader);
214
/* split new variable into second slot */
215
var2->data.driver_location++;
216
nir_shader_add_variable(b->shader, var2);
217
218
unsigned total_num_components = glsl_get_vector_elements(var->type);
219
/* new variable is the second half of the dvec */
220
var2->type = glsl_vector_type(glsl_get_base_type(var->type), glsl_get_vector_elements(var->type) - 2);
221
/* clamp original variable to a dvec2 */
222
deref->type = var->type = glsl_vector_type(glsl_get_base_type(var->type), 2);
223
224
/* create deref instr for new variable */
225
b->cursor = nir_after_instr(instr);
226
nir_deref_instr *deref2 = nir_build_deref_var(b, var2);
227
228
nir_foreach_use_safe(use_src, &deref->dest.ssa) {
229
nir_instr *use_instr = use_src->parent_instr;
230
assert(use_instr->type == nir_instr_type_intrinsic &&
231
nir_instr_as_intrinsic(use_instr)->intrinsic == nir_intrinsic_load_deref);
232
233
/* this is a load instruction for the deref, and we need to split it into two instructions that we can
234
* then zip back into a single ssa def */
235
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(use_instr);
236
/* clamp the first load to 2 64bit components */
237
intr->num_components = intr->dest.ssa.num_components = 2;
238
b->cursor = nir_after_instr(use_instr);
239
/* this is the second load instruction for the second half of the dvec3/4 components */
240
nir_intrinsic_instr *intr2 = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_deref);
241
intr2->src[0] = nir_src_for_ssa(&deref2->dest.ssa);
242
intr2->num_components = total_num_components - 2;
243
nir_ssa_dest_init(&intr2->instr, &intr2->dest, intr2->num_components, 64, NULL);
244
nir_builder_instr_insert(b, &intr2->instr);
245
246
nir_ssa_def *def[4];
247
/* create a new dvec3/4 comprised of all the loaded components from both variables */
248
def[0] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 0));
249
def[1] = nir_vector_extract(b, &intr->dest.ssa, nir_imm_int(b, 1));
250
def[2] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 0));
251
if (total_num_components == 4)
252
def[3] = nir_vector_extract(b, &intr2->dest.ssa, nir_imm_int(b, 1));
253
nir_ssa_def *new_vec = nir_vec(b, def, total_num_components);
254
/* use the assembled dvec3/4 for all other uses of the load */
255
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, new_vec,
256
new_vec->parent_instr);
257
}
258
259
return true;
260
}
261
262
/* "64-bit three- and four-component vectors consume two consecutive locations."
263
* - 14.1.4. Location Assignment
264
*
265
* this pass splits dvec3 and dvec4 vertex inputs into a dvec2 and a double/dvec2 which
266
* are assigned to consecutive locations, loaded separately, and then assembled back into a
267
* composite value that's used in place of the original loaded ssa src
268
*/
269
static bool
270
lower_64bit_vertex_attribs(nir_shader *shader)
271
{
272
if (shader->info.stage != MESA_SHADER_VERTEX)
273
return false;
274
275
return nir_shader_instructions_pass(shader, lower_64bit_vertex_attribs_instr, nir_metadata_dominance, NULL);
276
}
277
278
static bool
279
lower_basevertex_instr(nir_builder *b, nir_instr *in, void *data)
280
{
281
if (in->type != nir_instr_type_intrinsic)
282
return false;
283
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
284
if (instr->intrinsic != nir_intrinsic_load_base_vertex)
285
return false;
286
287
b->cursor = nir_after_instr(&instr->instr);
288
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
289
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
290
nir_intrinsic_set_range(load, 4);
291
load->num_components = 1;
292
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_mode_is_indexed");
293
nir_builder_instr_insert(b, &load->instr);
294
295
nir_ssa_def *composite = nir_build_alu(b, nir_op_bcsel,
296
nir_build_alu(b, nir_op_ieq, &load->dest.ssa, nir_imm_int(b, 1), NULL, NULL),
297
&instr->dest.ssa,
298
nir_imm_int(b, 0),
299
NULL);
300
301
nir_ssa_def_rewrite_uses_after(&instr->dest.ssa, composite,
302
composite->parent_instr);
303
return true;
304
}
305
306
static bool
307
lower_basevertex(nir_shader *shader)
308
{
309
if (shader->info.stage != MESA_SHADER_VERTEX)
310
return false;
311
312
if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_BASE_VERTEX))
313
return false;
314
315
return nir_shader_instructions_pass(shader, lower_basevertex_instr, nir_metadata_dominance, NULL);
316
}
317
318
319
static bool
320
lower_drawid_instr(nir_builder *b, nir_instr *in, void *data)
321
{
322
if (in->type != nir_instr_type_intrinsic)
323
return false;
324
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(in);
325
if (instr->intrinsic != nir_intrinsic_load_draw_id)
326
return false;
327
328
b->cursor = nir_before_instr(&instr->instr);
329
nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_push_constant);
330
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 1));
331
nir_intrinsic_set_range(load, 4);
332
load->num_components = 1;
333
nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, "draw_id");
334
nir_builder_instr_insert(b, &load->instr);
335
336
nir_ssa_def_rewrite_uses(&instr->dest.ssa, &load->dest.ssa);
337
338
return true;
339
}
340
341
static bool
342
lower_drawid(nir_shader *shader)
343
{
344
if (shader->info.stage != MESA_SHADER_VERTEX)
345
return false;
346
347
if (!BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_DRAW_ID))
348
return false;
349
350
return nir_shader_instructions_pass(shader, lower_drawid_instr, nir_metadata_dominance, NULL);
351
}
352
353
static bool
354
lower_dual_blend(nir_shader *shader)
355
{
356
bool progress = false;
357
nir_variable *var = nir_find_variable_with_location(shader, nir_var_shader_out, FRAG_RESULT_DATA1);
358
if (var) {
359
var->data.location = FRAG_RESULT_DATA0;
360
var->data.index = 1;
361
progress = true;
362
}
363
nir_shader_preserve_all_metadata(shader);
364
return progress;
365
}
366
367
void
368
zink_screen_init_compiler(struct zink_screen *screen)
369
{
370
static const struct nir_shader_compiler_options
371
default_options = {
372
.lower_ffma16 = true,
373
.lower_ffma32 = true,
374
.lower_ffma64 = true,
375
.lower_scmp = true,
376
.lower_fdph = true,
377
.lower_flrp32 = true,
378
.lower_fpow = true,
379
.lower_fsat = true,
380
.lower_extract_byte = true,
381
.lower_extract_word = true,
382
.lower_insert_byte = true,
383
.lower_insert_word = true,
384
.lower_mul_high = true,
385
.lower_rotate = true,
386
.lower_uadd_carry = true,
387
.lower_pack_64_2x32_split = true,
388
.lower_unpack_64_2x32_split = true,
389
.lower_vector_cmp = true,
390
.lower_int64_options = 0,
391
.lower_doubles_options = ~nir_lower_fp64_full_software,
392
.lower_uniforms_to_ubo = true,
393
.has_fsub = true,
394
.has_isub = true,
395
.lower_mul_2x32_64 = true,
396
.support_16bit_alu = true, /* not quite what it sounds like */
397
};
398
399
screen->nir_options = default_options;
400
401
if (!screen->info.feats.features.shaderInt64)
402
screen->nir_options.lower_int64_options = ~0;
403
404
if (!screen->info.feats.features.shaderFloat64) {
405
screen->nir_options.lower_doubles_options = ~0;
406
screen->nir_options.lower_flrp64 = true;
407
screen->nir_options.lower_ffma64 = true;
408
}
409
}
410
411
const void *
412
zink_get_compiler_options(struct pipe_screen *pscreen,
413
enum pipe_shader_ir ir,
414
enum pipe_shader_type shader)
415
{
416
assert(ir == PIPE_SHADER_IR_NIR);
417
return &zink_screen(pscreen)->nir_options;
418
}
419
420
struct nir_shader *
421
zink_tgsi_to_nir(struct pipe_screen *screen, const struct tgsi_token *tokens)
422
{
423
if (zink_debug & ZINK_DEBUG_TGSI) {
424
fprintf(stderr, "TGSI shader:\n---8<---\n");
425
tgsi_dump_to_file(tokens, 0, stderr);
426
fprintf(stderr, "---8<---\n\n");
427
}
428
429
return tgsi_to_nir(tokens, screen, false);
430
}
431
432
static void
433
optimize_nir(struct nir_shader *s)
434
{
435
bool progress;
436
do {
437
progress = false;
438
NIR_PASS_V(s, nir_lower_vars_to_ssa);
439
NIR_PASS(progress, s, nir_copy_prop);
440
NIR_PASS(progress, s, nir_opt_remove_phis);
441
NIR_PASS(progress, s, nir_opt_dce);
442
NIR_PASS(progress, s, nir_opt_dead_cf);
443
NIR_PASS(progress, s, nir_opt_cse);
444
NIR_PASS(progress, s, nir_opt_peephole_select, 8, true, true);
445
NIR_PASS(progress, s, nir_opt_algebraic);
446
NIR_PASS(progress, s, nir_opt_constant_folding);
447
NIR_PASS(progress, s, nir_opt_undef);
448
NIR_PASS(progress, s, zink_nir_lower_b2b);
449
} while (progress);
450
451
do {
452
progress = false;
453
NIR_PASS(progress, s, nir_opt_algebraic_late);
454
if (progress) {
455
NIR_PASS_V(s, nir_copy_prop);
456
NIR_PASS_V(s, nir_opt_dce);
457
NIR_PASS_V(s, nir_opt_cse);
458
}
459
} while (progress);
460
}
461
462
/* check for a genuine gl_PointSize output vs one from nir_lower_point_size_mov */
463
static bool
464
check_psiz(struct nir_shader *s)
465
{
466
nir_foreach_shader_out_variable(var, s) {
467
if (var->data.location == VARYING_SLOT_PSIZ) {
468
/* genuine PSIZ outputs will have this set */
469
return !!var->data.explicit_location;
470
}
471
}
472
return false;
473
}
474
475
static void
476
update_so_info(struct zink_shader *zs, const struct pipe_stream_output_info *so_info,
477
uint64_t outputs_written, bool have_psiz)
478
{
479
uint8_t reverse_map[64] = {0};
480
unsigned slot = 0;
481
/* semi-copied from iris */
482
while (outputs_written) {
483
int bit = u_bit_scan64(&outputs_written);
484
/* PSIZ from nir_lower_point_size_mov breaks stream output, so always skip it */
485
if (bit == VARYING_SLOT_PSIZ && !have_psiz)
486
continue;
487
reverse_map[slot++] = bit;
488
}
489
490
nir_foreach_shader_out_variable(var, zs->nir)
491
var->data.explicit_xfb_buffer = 0;
492
493
bool inlined[64] = {0};
494
for (unsigned i = 0; i < so_info->num_outputs; i++) {
495
const struct pipe_stream_output *output = &so_info->output[i];
496
unsigned slot = reverse_map[output->register_index];
497
/* always set stride to be used during draw */
498
zs->streamout.so_info.stride[output->output_buffer] = so_info->stride[output->output_buffer];
499
if ((zs->nir->info.stage != MESA_SHADER_GEOMETRY || util_bitcount(zs->nir->info.gs.active_stream_mask) == 1) &&
500
!output->start_component) {
501
nir_variable *var = NULL;
502
while (!var)
503
var = nir_find_variable_with_location(zs->nir, nir_var_shader_out, slot--);
504
slot++;
505
if (inlined[slot])
506
continue;
507
assert(var && var->data.location == slot);
508
/* if this is the entire variable, try to blast it out during the initial declaration */
509
if (glsl_get_components(var->type) == output->num_components) {
510
var->data.explicit_xfb_buffer = 1;
511
var->data.xfb.buffer = output->output_buffer;
512
var->data.xfb.stride = so_info->stride[output->output_buffer] * 4;
513
var->data.offset = output->dst_offset * 4;
514
var->data.stream = output->stream;
515
inlined[slot] = true;
516
continue;
517
}
518
}
519
zs->streamout.so_info.output[zs->streamout.so_info.num_outputs] = *output;
520
/* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */
521
zs->streamout.so_info_slots[zs->streamout.so_info.num_outputs++] = reverse_map[output->register_index];
522
}
523
zs->streamout.have_xfb = !!zs->streamout.so_info.num_outputs;
524
}
525
526
static void
527
assign_producer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
528
{
529
unsigned slot = var->data.location;
530
switch (var->data.location) {
531
case VARYING_SLOT_POS:
532
case VARYING_SLOT_PNTC:
533
case VARYING_SLOT_PSIZ:
534
case VARYING_SLOT_LAYER:
535
case VARYING_SLOT_PRIMITIVE_ID:
536
case VARYING_SLOT_CLIP_DIST0:
537
case VARYING_SLOT_CULL_DIST0:
538
case VARYING_SLOT_VIEWPORT:
539
case VARYING_SLOT_FACE:
540
case VARYING_SLOT_TESS_LEVEL_OUTER:
541
case VARYING_SLOT_TESS_LEVEL_INNER:
542
/* use a sentinel value to avoid counting later */
543
var->data.driver_location = UINT_MAX;
544
break;
545
546
default:
547
if (var->data.patch) {
548
assert(var->data.location >= VARYING_SLOT_PATCH0);
549
slot = var->data.location - VARYING_SLOT_PATCH0;
550
} else if (var->data.location >= VARYING_SLOT_VAR0 &&
551
var->data.mode == nir_var_shader_in &&
552
stage == MESA_SHADER_TESS_EVAL) {
553
slot = var->data.location - VARYING_SLOT_VAR0;
554
} else {
555
if (slot_map[var->data.location] == 0xff) {
556
assert(*reserved < MAX_VARYING);
557
slot_map[var->data.location] = *reserved;
558
*reserved += glsl_count_vec4_slots(var->type, false, false);
559
}
560
slot = slot_map[var->data.location];
561
assert(slot < MAX_VARYING);
562
}
563
var->data.driver_location = slot;
564
}
565
}
566
567
ALWAYS_INLINE static bool
568
is_texcoord(gl_shader_stage stage, const nir_variable *var)
569
{
570
if (stage != MESA_SHADER_FRAGMENT)
571
return false;
572
return var->data.location >= VARYING_SLOT_TEX0 &&
573
var->data.location <= VARYING_SLOT_TEX7;
574
}
575
576
static bool
577
assign_consumer_var_io(gl_shader_stage stage, nir_variable *var, unsigned *reserved, unsigned char *slot_map)
578
{
579
switch (var->data.location) {
580
case VARYING_SLOT_POS:
581
case VARYING_SLOT_PNTC:
582
case VARYING_SLOT_PSIZ:
583
case VARYING_SLOT_LAYER:
584
case VARYING_SLOT_PRIMITIVE_ID:
585
case VARYING_SLOT_CLIP_DIST0:
586
case VARYING_SLOT_CULL_DIST0:
587
case VARYING_SLOT_VIEWPORT:
588
case VARYING_SLOT_FACE:
589
case VARYING_SLOT_TESS_LEVEL_OUTER:
590
case VARYING_SLOT_TESS_LEVEL_INNER:
591
/* use a sentinel value to avoid counting later */
592
var->data.driver_location = UINT_MAX;
593
break;
594
default:
595
if (var->data.patch) {
596
assert(var->data.location >= VARYING_SLOT_PATCH0);
597
var->data.driver_location = var->data.location - VARYING_SLOT_PATCH0;
598
} else if (var->data.location >= VARYING_SLOT_VAR0 &&
599
stage == MESA_SHADER_TESS_CTRL &&
600
var->data.mode == nir_var_shader_out)
601
var->data.driver_location = var->data.location - VARYING_SLOT_VAR0;
602
else {
603
if (slot_map[var->data.location] == (unsigned char)-1) {
604
if (!is_texcoord(stage, var))
605
/* dead io */
606
return false;
607
/* texcoords can't be eliminated in fs due to GL_COORD_REPLACE */
608
slot_map[var->data.location] = (*reserved)++;
609
}
610
var->data.driver_location = slot_map[var->data.location];
611
}
612
}
613
return true;
614
}
615
616
617
static bool
618
rewrite_and_discard_read(nir_builder *b, nir_instr *instr, void *data)
619
{
620
nir_variable *var = data;
621
if (instr->type != nir_instr_type_intrinsic)
622
return false;
623
624
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
625
if (intr->intrinsic != nir_intrinsic_load_deref)
626
return false;
627
nir_variable *deref_var = nir_intrinsic_get_var(intr, 0);
628
if (deref_var != var)
629
return false;
630
nir_ssa_def *undef = nir_ssa_undef(b, nir_dest_num_components(intr->dest), nir_dest_bit_size(intr->dest));
631
nir_ssa_def_rewrite_uses(&intr->dest.ssa, undef);
632
return true;
633
}
634
635
void
636
zink_compiler_assign_io(nir_shader *producer, nir_shader *consumer)
637
{
638
unsigned reserved = 0;
639
unsigned char slot_map[VARYING_SLOT_MAX];
640
memset(slot_map, -1, sizeof(slot_map));
641
bool do_fixup = false;
642
nir_shader *nir = producer->info.stage == MESA_SHADER_TESS_CTRL ? producer : consumer;
643
if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
644
/* never assign from tcs -> tes, always invert */
645
nir_foreach_variable_with_modes(var, consumer, nir_var_shader_in)
646
assign_producer_var_io(consumer->info.stage, var, &reserved, slot_map);
647
nir_foreach_variable_with_modes_safe(var, producer, nir_var_shader_out) {
648
if (!assign_consumer_var_io(producer->info.stage, var, &reserved, slot_map))
649
/* this is an output, nothing more needs to be done for it to be dropped */
650
do_fixup = true;
651
}
652
} else {
653
nir_foreach_variable_with_modes(var, producer, nir_var_shader_out)
654
assign_producer_var_io(producer->info.stage, var, &reserved, slot_map);
655
nir_foreach_variable_with_modes_safe(var, consumer, nir_var_shader_in) {
656
if (!assign_consumer_var_io(consumer->info.stage, var, &reserved, slot_map)) {
657
do_fixup = true;
658
/* input needs to be rewritten as an undef to ensure the entire deref chain is deleted */
659
nir_shader_instructions_pass(consumer, rewrite_and_discard_read, nir_metadata_dominance, var);
660
}
661
}
662
}
663
if (!do_fixup)
664
return;
665
nir_fixup_deref_modes(nir);
666
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
667
optimize_nir(nir);
668
}
669
670
VkShaderModule
671
zink_shader_compile(struct zink_screen *screen, struct zink_shader *zs, nir_shader *base_nir, struct zink_shader_key *key)
672
{
673
VkShaderModule mod = VK_NULL_HANDLE;
674
void *streamout = NULL;
675
nir_shader *nir = nir_shader_clone(NULL, base_nir);
676
677
if (key) {
678
if (key->inline_uniforms) {
679
NIR_PASS_V(nir, nir_inline_uniforms,
680
nir->info.num_inlinable_uniforms,
681
key->base.inlined_uniform_values,
682
nir->info.inlinable_uniform_dw_offsets);
683
684
optimize_nir(nir);
685
686
/* This must be done again. */
687
NIR_PASS_V(nir, nir_io_add_const_offset_to_base, nir_var_shader_in |
688
nir_var_shader_out);
689
}
690
}
691
692
/* TODO: use a separate mem ctx here for ralloc */
693
if (zs->nir->info.stage < MESA_SHADER_FRAGMENT) {
694
if (zink_vs_key(key)->last_vertex_stage) {
695
if (zs->streamout.have_xfb)
696
streamout = &zs->streamout;
697
698
if (!zink_vs_key(key)->clip_halfz) {
699
NIR_PASS_V(nir, nir_lower_clip_halfz);
700
}
701
if (zink_vs_key(key)->push_drawid) {
702
NIR_PASS_V(nir, lower_drawid);
703
}
704
}
705
} else if (zs->nir->info.stage == MESA_SHADER_FRAGMENT) {
706
if (!zink_fs_key(key)->samples &&
707
nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) {
708
/* VK will always use gl_SampleMask[] values even if sample count is 0,
709
* so we need to skip this write here to mimic GL's behavior of ignoring it
710
*/
711
nir_foreach_shader_out_variable(var, nir) {
712
if (var->data.location == FRAG_RESULT_SAMPLE_MASK)
713
var->data.mode = nir_var_shader_temp;
714
}
715
nir_fixup_deref_modes(nir);
716
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);
717
optimize_nir(nir);
718
}
719
if (zink_fs_key(key)->force_dual_color_blend && nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA1)) {
720
NIR_PASS_V(nir, lower_dual_blend);
721
}
722
if (zink_fs_key(key)->coord_replace_bits) {
723
NIR_PASS_V(nir, nir_lower_texcoord_replace, zink_fs_key(key)->coord_replace_bits,
724
false, zink_fs_key(key)->coord_replace_yinvert);
725
}
726
}
727
NIR_PASS_V(nir, nir_convert_from_ssa, true);
728
729
struct spirv_shader *spirv = nir_to_spirv(nir, streamout, screen->spirv_version);
730
if (!spirv)
731
goto done;
732
733
if (zink_debug & ZINK_DEBUG_SPIRV) {
734
char buf[256];
735
static int i;
736
snprintf(buf, sizeof(buf), "dump%02d.spv", i++);
737
FILE *fp = fopen(buf, "wb");
738
if (fp) {
739
fwrite(spirv->words, sizeof(uint32_t), spirv->num_words, fp);
740
fclose(fp);
741
fprintf(stderr, "wrote '%s'...\n", buf);
742
}
743
}
744
745
VkShaderModuleCreateInfo smci = {0};
746
smci.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
747
smci.codeSize = spirv->num_words * sizeof(uint32_t);
748
smci.pCode = spirv->words;
749
750
if (vkCreateShaderModule(screen->dev, &smci, NULL, &mod) != VK_SUCCESS)
751
mod = VK_NULL_HANDLE;
752
753
done:
754
ralloc_free(nir);
755
756
/* TODO: determine if there's any reason to cache spirv output? */
757
ralloc_free(spirv);
758
return mod;
759
}
760
761
static bool
762
lower_baseinstance_instr(nir_builder *b, nir_instr *instr, void *data)
763
{
764
if (instr->type != nir_instr_type_intrinsic)
765
return false;
766
nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
767
if (intr->intrinsic != nir_intrinsic_load_instance_id)
768
return false;
769
b->cursor = nir_after_instr(instr);
770
nir_ssa_def *def = nir_isub(b, &intr->dest.ssa, nir_load_base_instance(b));
771
nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, def, def->parent_instr);
772
return true;
773
}
774
775
static bool
776
lower_baseinstance(nir_shader *shader)
777
{
778
if (shader->info.stage != MESA_SHADER_VERTEX)
779
return false;
780
return nir_shader_instructions_pass(shader, lower_baseinstance_instr, nir_metadata_dominance, NULL);
781
}
782
783
bool nir_lower_dynamic_bo_access(nir_shader *shader);
784
785
/* gl_nir_lower_buffers makes variables unusable for all UBO/SSBO access
786
* so instead we delete all those broken variables and just make new ones
787
*/
788
static bool
789
unbreak_bos(nir_shader *shader)
790
{
791
uint32_t ssbo_used = 0;
792
uint32_t ubo_used = 0;
793
uint64_t max_ssbo_size = 0;
794
uint64_t max_ubo_size = 0;
795
bool ssbo_sizes[PIPE_MAX_SHADER_BUFFERS] = {false};
796
797
if (!shader->info.num_ssbos && !shader->info.num_ubos && !shader->num_uniforms)
798
return false;
799
nir_function_impl *impl = nir_shader_get_entrypoint(shader);
800
nir_foreach_block(block, impl) {
801
nir_foreach_instr(instr, block) {
802
if (instr->type != nir_instr_type_intrinsic)
803
continue;
804
805
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
806
switch (intrin->intrinsic) {
807
case nir_intrinsic_store_ssbo:
808
ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[1]));
809
break;
810
811
case nir_intrinsic_get_ssbo_size: {
812
uint32_t slot = nir_src_as_uint(intrin->src[0]);
813
ssbo_used |= BITFIELD_BIT(slot);
814
ssbo_sizes[slot] = true;
815
break;
816
}
817
case nir_intrinsic_ssbo_atomic_add:
818
case nir_intrinsic_ssbo_atomic_imin:
819
case nir_intrinsic_ssbo_atomic_umin:
820
case nir_intrinsic_ssbo_atomic_imax:
821
case nir_intrinsic_ssbo_atomic_umax:
822
case nir_intrinsic_ssbo_atomic_and:
823
case nir_intrinsic_ssbo_atomic_or:
824
case nir_intrinsic_ssbo_atomic_xor:
825
case nir_intrinsic_ssbo_atomic_exchange:
826
case nir_intrinsic_ssbo_atomic_comp_swap:
827
case nir_intrinsic_ssbo_atomic_fmin:
828
case nir_intrinsic_ssbo_atomic_fmax:
829
case nir_intrinsic_ssbo_atomic_fcomp_swap:
830
case nir_intrinsic_load_ssbo:
831
ssbo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0]));
832
break;
833
case nir_intrinsic_load_ubo:
834
case nir_intrinsic_load_ubo_vec4:
835
ubo_used |= BITFIELD_BIT(nir_src_as_uint(intrin->src[0]));
836
break;
837
default:
838
break;
839
}
840
}
841
}
842
843
nir_foreach_variable_with_modes(var, shader, nir_var_mem_ssbo | nir_var_mem_ubo) {
844
const struct glsl_type *type = glsl_without_array(var->type);
845
if (type_is_counter(type))
846
continue;
847
unsigned size = glsl_count_attribute_slots(glsl_type_is_array(var->type) ? var->type : type, false);
848
if (var->data.mode == nir_var_mem_ubo)
849
max_ubo_size = MAX2(max_ubo_size, size);
850
else
851
max_ssbo_size = MAX2(max_ssbo_size, size);
852
var->data.mode = nir_var_shader_temp;
853
}
854
nir_fixup_deref_modes(shader);
855
NIR_PASS_V(shader, nir_remove_dead_variables, nir_var_shader_temp, NULL);
856
optimize_nir(shader);
857
858
if (!ssbo_used && !ubo_used)
859
return false;
860
861
struct glsl_struct_field *fields = rzalloc_array(shader, struct glsl_struct_field, 2);
862
fields[0].name = ralloc_strdup(shader, "base");
863
fields[1].name = ralloc_strdup(shader, "unsized");
864
if (ubo_used) {
865
const struct glsl_type *ubo_type = glsl_array_type(glsl_uint_type(), max_ubo_size * 4, 4);
866
fields[0].type = ubo_type;
867
u_foreach_bit(slot, ubo_used) {
868
char buf[64];
869
snprintf(buf, sizeof(buf), "ubo_slot_%u", slot);
870
nir_variable *var = nir_variable_create(shader, nir_var_mem_ubo, glsl_struct_type(fields, 1, "struct", false), buf);
871
var->interface_type = var->type;
872
var->data.driver_location = slot;
873
}
874
}
875
if (ssbo_used) {
876
const struct glsl_type *ssbo_type = glsl_array_type(glsl_uint_type(), max_ssbo_size * 4, 4);
877
const struct glsl_type *unsized = glsl_array_type(glsl_uint_type(), 0, 4);
878
fields[0].type = ssbo_type;
879
u_foreach_bit(slot, ssbo_used) {
880
char buf[64];
881
snprintf(buf, sizeof(buf), "ssbo_slot_%u", slot);
882
if (ssbo_sizes[slot])
883
fields[1].type = unsized;
884
else
885
fields[1].type = NULL;
886
nir_variable *var = nir_variable_create(shader, nir_var_mem_ssbo,
887
glsl_struct_type(fields, 1 + !!ssbo_sizes[slot], "struct", false), buf);
888
var->interface_type = var->type;
889
var->data.driver_location = slot;
890
}
891
}
892
return true;
893
}
894
895
static uint32_t
896
zink_binding(gl_shader_stage stage, VkDescriptorType type, int index)
897
{
898
if (stage == MESA_SHADER_NONE) {
899
unreachable("not supported");
900
} else {
901
switch (type) {
902
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
903
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
904
assert(index < PIPE_MAX_CONSTANT_BUFFERS);
905
return (stage * PIPE_MAX_CONSTANT_BUFFERS) + index;
906
907
case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
908
case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
909
assert(index < PIPE_MAX_SAMPLERS);
910
return (stage * PIPE_MAX_SAMPLERS) + index;
911
912
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
913
assert(index < PIPE_MAX_SHADER_BUFFERS);
914
return (stage * PIPE_MAX_SHADER_BUFFERS) + index;
915
916
case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
917
case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
918
assert(index < PIPE_MAX_SHADER_IMAGES);
919
return (stage * PIPE_MAX_SHADER_IMAGES) + index;
920
921
default:
922
unreachable("unexpected type");
923
}
924
}
925
}
926
927
struct zink_shader *
928
zink_shader_create(struct zink_screen *screen, struct nir_shader *nir,
929
const struct pipe_stream_output_info *so_info)
930
{
931
struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
932
bool have_psiz = false;
933
934
ret->programs = _mesa_pointer_set_create(NULL);
935
936
nir_variable_mode indirect_derefs_modes = nir_var_function_temp;
937
if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
938
nir->info.stage == MESA_SHADER_TESS_EVAL)
939
indirect_derefs_modes |= nir_var_shader_in | nir_var_shader_out;
940
941
NIR_PASS_V(nir, nir_lower_indirect_derefs, indirect_derefs_modes,
942
UINT32_MAX);
943
944
if (nir->info.stage == MESA_SHADER_VERTEX)
945
create_vs_pushconst(nir);
946
else if (nir->info.stage == MESA_SHADER_TESS_CTRL ||
947
nir->info.stage == MESA_SHADER_TESS_EVAL)
948
NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);
949
else if (nir->info.stage == MESA_SHADER_KERNEL)
950
create_cs_pushconst(nir);
951
952
if (nir->info.stage < MESA_SHADER_FRAGMENT)
953
have_psiz = check_psiz(nir);
954
NIR_PASS_V(nir, lower_basevertex);
955
NIR_PASS_V(nir, lower_work_dim);
956
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
957
NIR_PASS_V(nir, lower_baseinstance);
958
optimize_nir(nir);
959
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
960
NIR_PASS_V(nir, lower_discard_if);
961
NIR_PASS_V(nir, nir_lower_fragcolor,
962
nir->info.fs.color_is_dual_source ? 1 : 8);
963
NIR_PASS_V(nir, lower_64bit_vertex_attribs);
964
NIR_PASS_V(nir, unbreak_bos);
965
966
if (zink_debug & ZINK_DEBUG_NIR) {
967
fprintf(stderr, "NIR shader:\n---8<---\n");
968
nir_print_shader(nir, stderr);
969
fprintf(stderr, "---8<---\n");
970
}
971
972
foreach_list_typed_reverse(nir_variable, var, node, &nir->variables) {
973
if (_nir_shader_variable_has_mode(var, nir_var_uniform |
974
nir_var_mem_ubo |
975
nir_var_mem_ssbo)) {
976
enum zink_descriptor_type ztype;
977
const struct glsl_type *type = glsl_without_array(var->type);
978
if (var->data.mode == nir_var_mem_ubo) {
979
ztype = ZINK_DESCRIPTOR_TYPE_UBO;
980
/* buffer 0 is a push descriptor */
981
var->data.descriptor_set = !!var->data.driver_location;
982
var->data.binding = !var->data.driver_location ? nir->info.stage :
983
zink_binding(nir->info.stage,
984
VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
985
var->data.driver_location);
986
assert(var->data.driver_location || var->data.binding < 10);
987
VkDescriptorType vktype = !var->data.driver_location ? VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC : VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
988
int binding = var->data.binding;
989
990
ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
991
ret->bindings[ztype][ret->num_bindings[ztype]].binding = binding;
992
ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
993
ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
994
ret->ubos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index);
995
ret->num_bindings[ztype]++;
996
} else if (var->data.mode == nir_var_mem_ssbo) {
997
ztype = ZINK_DESCRIPTOR_TYPE_SSBO;
998
var->data.descriptor_set = ztype + 1;
999
var->data.binding = zink_binding(nir->info.stage,
1000
VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
1001
var->data.driver_location);
1002
ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
1003
ret->ssbos_used |= (1 << ret->bindings[ztype][ret->num_bindings[ztype]].index);
1004
ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
1005
ret->bindings[ztype][ret->num_bindings[ztype]].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
1006
ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
1007
ret->num_bindings[ztype]++;
1008
} else {
1009
assert(var->data.mode == nir_var_uniform);
1010
if (glsl_type_is_sampler(type) || glsl_type_is_image(type)) {
1011
VkDescriptorType vktype = glsl_type_is_image(type) ? zink_image_type(type) : zink_sampler_type(type);
1012
if (vktype == VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER)
1013
ret->num_texel_buffers++;
1014
ztype = zink_desc_type_from_vktype(vktype);
1015
var->data.driver_location = var->data.binding;
1016
var->data.descriptor_set = ztype + 1;
1017
var->data.binding = zink_binding(nir->info.stage, vktype, var->data.driver_location);
1018
ret->bindings[ztype][ret->num_bindings[ztype]].index = var->data.driver_location;
1019
ret->bindings[ztype][ret->num_bindings[ztype]].binding = var->data.binding;
1020
ret->bindings[ztype][ret->num_bindings[ztype]].type = vktype;
1021
if (glsl_type_is_array(var->type))
1022
ret->bindings[ztype][ret->num_bindings[ztype]].size = glsl_get_aoa_size(var->type);
1023
else
1024
ret->bindings[ztype][ret->num_bindings[ztype]].size = 1;
1025
ret->num_bindings[ztype]++;
1026
}
1027
}
1028
}
1029
}
1030
1031
ret->nir = nir;
1032
if (so_info && nir->info.outputs_written && nir->info.has_transform_feedback_varyings)
1033
update_so_info(ret, so_info, nir->info.outputs_written, have_psiz);
1034
1035
return ret;
1036
}
1037
1038
void
1039
zink_shader_finalize(struct pipe_screen *pscreen, void *nirptr, bool optimize)
1040
{
1041
struct zink_screen *screen = zink_screen(pscreen);
1042
nir_shader *nir = nirptr;
1043
1044
if (!screen->info.feats.features.shaderImageGatherExtended) {
1045
nir_lower_tex_options tex_opts = {0};
1046
tex_opts.lower_tg4_offsets = true;
1047
NIR_PASS_V(nir, nir_lower_tex, &tex_opts);
1048
}
1049
NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, true, false);
1050
if (nir->info.stage == MESA_SHADER_GEOMETRY)
1051
NIR_PASS_V(nir, nir_lower_gs_intrinsics, nir_lower_gs_intrinsics_per_stream);
1052
optimize_nir(nir);
1053
if (nir->info.num_ubos || nir->info.num_ssbos)
1054
NIR_PASS_V(nir, nir_lower_dynamic_bo_access);
1055
nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));
1056
if (screen->driconf.inline_uniforms)
1057
nir_find_inlinable_uniforms(nir);
1058
}
1059
1060
void
1061
zink_shader_free(struct zink_context *ctx, struct zink_shader *shader)
1062
{
1063
struct zink_screen *screen = zink_screen(ctx->base.screen);
1064
set_foreach(shader->programs, entry) {
1065
if (shader->nir->info.stage == MESA_SHADER_COMPUTE) {
1066
struct zink_compute_program *comp = (void*)entry->key;
1067
_mesa_hash_table_remove_key(ctx->compute_program_cache, comp->shader);
1068
comp->shader = NULL;
1069
bool in_use = comp == ctx->curr_compute;
1070
if (in_use)
1071
ctx->compute_stage = NULL;
1072
if (zink_compute_program_reference(screen, &comp, NULL) && in_use)
1073
ctx->curr_compute = NULL;
1074
} else {
1075
struct zink_gfx_program *prog = (void*)entry->key;
1076
enum pipe_shader_type pstage = pipe_shader_type_from_mesa(shader->nir->info.stage);
1077
assert(pstage < ZINK_SHADER_COUNT);
1078
bool in_use = prog == ctx->curr_program;
1079
if (shader->nir->info.stage != MESA_SHADER_TESS_CTRL || !shader->is_generated)
1080
_mesa_hash_table_remove_key(ctx->program_cache, prog->shaders);
1081
prog->shaders[pstage] = NULL;
1082
if (shader->nir->info.stage == MESA_SHADER_TESS_EVAL && shader->generated)
1083
/* automatically destroy generated tcs shaders when tes is destroyed */
1084
zink_shader_free(ctx, shader->generated);
1085
if (in_use) {
1086
ctx->gfx_pipeline_state.modules[pstage] = VK_NULL_HANDLE;
1087
ctx->gfx_stages[pstage] = NULL;
1088
}
1089
if (zink_gfx_program_reference(screen, &prog, NULL) && in_use)
1090
ctx->curr_program = NULL;
1091
}
1092
}
1093
_mesa_set_destroy(shader->programs, NULL);
1094
ralloc_free(shader->nir);
1095
FREE(shader);
1096
}
1097
1098
1099
/* creating a passthrough tcs shader that's roughly:
1100
1101
#version 150
1102
#extension GL_ARB_tessellation_shader : require
1103
1104
in vec4 some_var[gl_MaxPatchVertices];
1105
out vec4 some_var_out;
1106
1107
layout(push_constant) uniform tcsPushConstants {
1108
layout(offset = 0) float TessLevelInner[2];
1109
layout(offset = 8) float TessLevelOuter[4];
1110
} u_tcsPushConstants;
1111
layout(vertices = $vertices_per_patch) out;
1112
void main()
1113
{
1114
gl_TessLevelInner = u_tcsPushConstants.TessLevelInner;
1115
gl_TessLevelOuter = u_tcsPushConstants.TessLevelOuter;
1116
some_var_out = some_var[gl_InvocationID];
1117
}
1118
1119
*/
1120
struct zink_shader *
1121
zink_shader_tcs_create(struct zink_context *ctx, struct zink_shader *vs)
1122
{
1123
unsigned vertices_per_patch = ctx->gfx_pipeline_state.vertices_per_patch;
1124
struct zink_shader *ret = CALLOC_STRUCT(zink_shader);
1125
ret->programs = _mesa_pointer_set_create(NULL);
1126
1127
nir_shader *nir = nir_shader_create(NULL, MESA_SHADER_TESS_CTRL, &zink_screen(ctx->base.screen)->nir_options, NULL);
1128
nir_function *fn = nir_function_create(nir, "main");
1129
fn->is_entrypoint = true;
1130
nir_function_impl *impl = nir_function_impl_create(fn);
1131
1132
nir_builder b;
1133
nir_builder_init(&b, impl);
1134
b.cursor = nir_before_block(nir_start_block(impl));
1135
1136
nir_ssa_def *invocation_id = nir_load_invocation_id(&b);
1137
1138
nir_foreach_shader_out_variable(var, vs->nir) {
1139
const struct glsl_type *type = var->type;
1140
const struct glsl_type *in_type = var->type;
1141
const struct glsl_type *out_type = var->type;
1142
char buf[1024];
1143
snprintf(buf, sizeof(buf), "%s_out", var->name);
1144
in_type = glsl_array_type(type, 32 /* MAX_PATCH_VERTICES */, 0);
1145
out_type = glsl_array_type(type, vertices_per_patch, 0);
1146
1147
nir_variable *in = nir_variable_create(nir, nir_var_shader_in, in_type, var->name);
1148
nir_variable *out = nir_variable_create(nir, nir_var_shader_out, out_type, buf);
1149
out->data.location = in->data.location = var->data.location;
1150
out->data.location_frac = in->data.location_frac = var->data.location_frac;
1151
1152
/* gl_in[] receives values from equivalent built-in output
1153
variables written by the vertex shader (section 2.14.7). Each array
1154
element of gl_in[] is a structure holding values for a specific vertex of
1155
the input patch. The length of gl_in[] is equal to the
1156
implementation-dependent maximum patch size (gl_MaxPatchVertices).
1157
- ARB_tessellation_shader
1158
*/
1159
for (unsigned i = 0; i < vertices_per_patch; i++) {
1160
/* we need to load the invocation-specific value of the vertex output and then store it to the per-patch output */
1161
nir_if *start_block = nir_push_if(&b, nir_ieq(&b, invocation_id, nir_imm_int(&b, i)));
1162
nir_deref_instr *in_array_var = nir_build_deref_array(&b, nir_build_deref_var(&b, in), invocation_id);
1163
nir_ssa_def *load = nir_load_deref(&b, in_array_var);
1164
nir_deref_instr *out_array_var = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, out), i);
1165
nir_store_deref(&b, out_array_var, load, 0xff);
1166
nir_pop_if(&b, start_block);
1167
}
1168
}
1169
nir_variable *gl_TessLevelInner = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 2, 0), "gl_TessLevelInner");
1170
gl_TessLevelInner->data.location = VARYING_SLOT_TESS_LEVEL_INNER;
1171
gl_TessLevelInner->data.patch = 1;
1172
nir_variable *gl_TessLevelOuter = nir_variable_create(nir, nir_var_shader_out, glsl_array_type(glsl_float_type(), 4, 0), "gl_TessLevelOuter");
1173
gl_TessLevelOuter->data.location = VARYING_SLOT_TESS_LEVEL_OUTER;
1174
gl_TessLevelOuter->data.patch = 1;
1175
1176
/* hacks so we can size these right for now */
1177
struct glsl_struct_field *fields = rzalloc_array(nir, struct glsl_struct_field, 3);
1178
/* just use a single blob for padding here because it's easier */
1179
fields[0].type = glsl_array_type(glsl_uint_type(), offsetof(struct zink_gfx_push_constant, default_inner_level) / 4, 0);
1180
fields[0].name = ralloc_asprintf(nir, "padding");
1181
fields[0].offset = 0;
1182
fields[1].type = glsl_array_type(glsl_uint_type(), 2, 0);
1183
fields[1].name = ralloc_asprintf(nir, "gl_TessLevelInner");
1184
fields[1].offset = offsetof(struct zink_gfx_push_constant, default_inner_level);
1185
fields[2].type = glsl_array_type(glsl_uint_type(), 4, 0);
1186
fields[2].name = ralloc_asprintf(nir, "gl_TessLevelOuter");
1187
fields[2].offset = offsetof(struct zink_gfx_push_constant, default_outer_level);
1188
nir_variable *pushconst = nir_variable_create(nir, nir_var_mem_push_const,
1189
glsl_struct_type(fields, 3, "struct", false), "pushconst");
1190
pushconst->data.location = VARYING_SLOT_VAR0;
1191
1192
nir_ssa_def *load_inner = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 1), .base = 1, .range = 8);
1193
nir_ssa_def *load_outer = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 2), .base = 2, .range = 16);
1194
1195
for (unsigned i = 0; i < 2; i++) {
1196
nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelInner), i);
1197
nir_store_deref(&b, store_idx, nir_channel(&b, load_inner, i), 0xff);
1198
}
1199
for (unsigned i = 0; i < 4; i++) {
1200
nir_deref_instr *store_idx = nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gl_TessLevelOuter), i);
1201
nir_store_deref(&b, store_idx, nir_channel(&b, load_outer, i), 0xff);
1202
}
1203
1204
nir->info.tess.tcs_vertices_out = vertices_per_patch;
1205
nir_validate_shader(nir, "created");
1206
1207
NIR_PASS_V(nir, nir_lower_regs_to_ssa);
1208
optimize_nir(nir);
1209
NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);
1210
NIR_PASS_V(nir, lower_discard_if);
1211
NIR_PASS_V(nir, nir_convert_from_ssa, true);
1212
1213
ret->nir = nir;
1214
ret->is_generated = true;
1215
return ret;
1216
}
1217
1218