Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/vulkan/radv_shader.h
7233 views
1
/*
2
* Copyright © 2016 Red Hat.
3
* Copyright © 2016 Bas Nieuwenhuizen
4
*
5
* based in part on anv driver which is:
6
* Copyright © 2015 Intel Corporation
7
*
8
* Permission is hereby granted, free of charge, to any person obtaining a
9
* copy of this software and associated documentation files (the "Software"),
10
* to deal in the Software without restriction, including without limitation
11
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
12
* and/or sell copies of the Software, and to permit persons to whom the
13
* Software is furnished to do so, subject to the following conditions:
14
*
15
* The above copyright notice and this permission notice (including the next
16
* paragraph) shall be included in all copies or substantial portions of the
17
* Software.
18
*
19
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25
* IN THE SOFTWARE.
26
*/
27
28
#ifndef RADV_SHADER_H
29
#define RADV_SHADER_H
30
31
#include "ac_binary.h"
32
#include "ac_shader_util.h"
33
34
#include "amd_family.h"
35
#include "radv_constants.h"
36
37
#include "nir/nir.h"
38
#include "vulkan/util/vk_object.h"
39
#include "vulkan/util/vk_shader_module.h"
40
#include "vulkan/vulkan.h"
41
42
#define RADV_VERT_ATTRIB_MAX MAX2(VERT_ATTRIB_MAX, VERT_ATTRIB_GENERIC0 + MAX_VERTEX_ATTRIBS)
43
44
struct radv_device;
45
struct radv_pipeline;
46
struct radv_pipeline_cache;
47
struct radv_pipeline_key;
48
49
struct radv_vs_out_key {
50
uint32_t as_es : 1;
51
uint32_t as_ls : 1;
52
uint32_t as_ngg : 1;
53
uint32_t as_ngg_passthrough : 1;
54
uint32_t export_prim_id : 1;
55
uint32_t export_layer_id : 1;
56
uint32_t export_clip_dists : 1;
57
uint32_t export_viewport_index : 1;
58
};
59
60
struct radv_vs_variant_key {
61
struct radv_vs_out_key out;
62
63
uint32_t instance_rate_inputs;
64
uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS];
65
uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS];
66
uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS];
67
uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS];
68
uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS];
69
uint8_t vertex_binding_align[MAX_VBS];
70
71
/* For 2_10_10_10 formats the alpha is handled as unsigned by pre-vega HW.
72
* so we may need to fix it up. */
73
enum ac_fetch_format alpha_adjust[MAX_VERTEX_ATTRIBS];
74
75
/* For some formats the channels have to be shuffled. */
76
uint32_t post_shuffle;
77
78
/* Output primitive type. */
79
uint8_t outprim;
80
81
/* Provoking vertex mode. */
82
bool provoking_vtx_last;
83
};
84
85
struct radv_tes_variant_key {
86
struct radv_vs_out_key out;
87
};
88
89
struct radv_tcs_variant_key {
90
struct radv_vs_variant_key vs_key;
91
unsigned primitive_mode;
92
unsigned input_vertices;
93
};
94
95
struct radv_fs_variant_key {
96
uint32_t col_format;
97
uint8_t log2_ps_iter_samples;
98
uint8_t num_samples;
99
uint32_t is_int8;
100
uint32_t is_int10;
101
};
102
103
struct radv_cs_variant_key {
104
uint8_t subgroup_size;
105
};
106
107
struct radv_shader_variant_key {
108
union {
109
struct radv_vs_variant_key vs;
110
struct radv_fs_variant_key fs;
111
struct radv_tes_variant_key tes;
112
struct radv_tcs_variant_key tcs;
113
struct radv_cs_variant_key cs;
114
115
/* A common prefix of the vs and tes keys. */
116
struct radv_vs_out_key vs_common_out;
117
};
118
bool has_multiview_view_index;
119
};
120
121
enum radv_compiler_debug_level {
122
RADV_COMPILER_DEBUG_LEVEL_PERFWARN,
123
RADV_COMPILER_DEBUG_LEVEL_ERROR,
124
};
125
126
struct radv_nir_compiler_options {
127
struct radv_pipeline_layout *layout;
128
struct radv_shader_variant_key key;
129
bool explicit_scratch_args;
130
bool clamp_shadow_reference;
131
bool robust_buffer_access;
132
bool adjust_frag_coord_z;
133
bool dump_shader;
134
bool dump_preoptir;
135
bool record_ir;
136
bool record_stats;
137
bool check_ir;
138
bool has_ls_vgpr_init_bug;
139
bool has_image_load_dcc_bug;
140
bool use_ngg_streamout;
141
bool enable_mrt_output_nan_fixup;
142
bool disable_optimizations; /* only used by ACO */
143
bool wgp_mode;
144
enum radeon_family family;
145
enum chip_class chip_class;
146
const struct radeon_info *info;
147
uint32_t tess_offchip_block_dw_size;
148
uint32_t address32_hi;
149
uint8_t force_vrs_rates;
150
151
struct {
152
void (*func)(void *private_data, enum radv_compiler_debug_level level, const char *message);
153
void *private_data;
154
} debug;
155
};
156
157
enum radv_ud_index {
158
AC_UD_SCRATCH_RING_OFFSETS = 0,
159
AC_UD_PUSH_CONSTANTS = 1,
160
AC_UD_INLINE_PUSH_CONSTANTS = 2,
161
AC_UD_INDIRECT_DESCRIPTOR_SETS = 3,
162
AC_UD_VIEW_INDEX = 4,
163
AC_UD_STREAMOUT_BUFFERS = 5,
164
AC_UD_NGG_GS_STATE = 6,
165
AC_UD_NGG_CULLING_SETTINGS = 7,
166
AC_UD_NGG_VIEWPORT = 8,
167
AC_UD_SHADER_START = 9,
168
AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START,
169
AC_UD_VS_BASE_VERTEX_START_INSTANCE,
170
AC_UD_VS_MAX_UD,
171
AC_UD_PS_MAX_UD,
172
AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START,
173
AC_UD_CS_SBT_DESCRIPTORS,
174
AC_UD_CS_MAX_UD,
175
AC_UD_GS_MAX_UD,
176
AC_UD_TCS_MAX_UD,
177
AC_UD_TES_MAX_UD,
178
AC_UD_MAX_UD = AC_UD_TCS_MAX_UD,
179
};
180
181
struct radv_stream_output {
182
uint8_t location;
183
uint8_t buffer;
184
uint16_t offset;
185
uint8_t component_mask;
186
uint8_t stream;
187
};
188
189
struct radv_streamout_info {
190
uint16_t num_outputs;
191
struct radv_stream_output outputs[MAX_SO_OUTPUTS];
192
uint16_t strides[MAX_SO_BUFFERS];
193
uint32_t enabled_stream_buffers_mask;
194
};
195
196
struct radv_userdata_info {
197
int8_t sgpr_idx;
198
uint8_t num_sgprs;
199
};
200
201
struct radv_userdata_locations {
202
struct radv_userdata_info descriptor_sets[MAX_SETS];
203
struct radv_userdata_info shader_data[AC_UD_MAX_UD];
204
uint32_t descriptor_sets_enabled;
205
};
206
207
struct radv_vs_output_info {
208
uint8_t vs_output_param_offset[VARYING_SLOT_MAX];
209
uint8_t clip_dist_mask;
210
uint8_t cull_dist_mask;
211
uint8_t param_exports;
212
bool writes_pointsize;
213
bool writes_layer;
214
bool writes_viewport_index;
215
bool writes_primitive_shading_rate;
216
bool export_prim_id;
217
unsigned pos_exports;
218
};
219
220
struct radv_es_output_info {
221
uint32_t esgs_itemsize;
222
};
223
224
struct gfx9_gs_info {
225
uint32_t vgt_gs_onchip_cntl;
226
uint32_t vgt_gs_max_prims_per_subgroup;
227
uint32_t vgt_esgs_ring_itemsize;
228
uint32_t lds_size;
229
};
230
231
struct gfx10_ngg_info {
232
uint16_t ngg_emit_size; /* in dwords */
233
uint32_t hw_max_esverts;
234
uint32_t max_gsprims;
235
uint32_t max_out_verts;
236
uint32_t prim_amp_factor;
237
uint32_t vgt_esgs_ring_itemsize;
238
uint32_t esgs_ring_size;
239
bool max_vert_out_per_gs_instance;
240
bool enable_vertex_grouping;
241
};
242
243
struct radv_shader_info {
244
bool loads_push_constants;
245
bool loads_dynamic_offsets;
246
uint8_t min_push_constant_used;
247
uint8_t max_push_constant_used;
248
bool has_only_32bit_push_constants;
249
bool has_indirect_push_constants;
250
uint8_t num_inline_push_consts;
251
uint8_t base_inline_push_consts;
252
uint32_t desc_set_used_mask;
253
bool needs_multiview_view_index;
254
bool uses_invocation_id;
255
bool uses_prim_id;
256
uint8_t wave_size;
257
uint8_t ballot_bit_size;
258
struct radv_userdata_locations user_sgprs_locs;
259
unsigned num_user_sgprs;
260
unsigned num_input_sgprs;
261
unsigned num_input_vgprs;
262
unsigned private_mem_vgprs;
263
bool need_indirect_descriptor_sets;
264
bool is_ngg;
265
bool is_ngg_passthrough;
266
bool has_ngg_culling;
267
bool has_ngg_early_prim_export;
268
uint32_t num_lds_blocks_when_not_culling;
269
uint32_t num_tess_patches;
270
struct {
271
uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX];
272
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
273
bool needs_draw_id;
274
bool needs_instance_id;
275
struct radv_vs_output_info outinfo;
276
struct radv_es_output_info es_info;
277
bool as_es;
278
bool as_ls;
279
bool export_prim_id;
280
bool tcs_in_out_eq;
281
uint64_t tcs_temp_only_input_mask;
282
uint8_t num_linked_outputs;
283
bool needs_base_instance;
284
bool use_per_attribute_vb_descs;
285
uint32_t vb_desc_usage_mask;
286
} vs;
287
struct {
288
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
289
uint8_t num_stream_output_components[4];
290
uint8_t output_streams[VARYING_SLOT_VAR31 + 1];
291
uint8_t max_stream;
292
unsigned gsvs_vertex_size;
293
unsigned max_gsvs_emit_size;
294
unsigned vertices_in;
295
unsigned vertices_out;
296
unsigned output_prim;
297
unsigned invocations;
298
unsigned es_type; /* GFX9: VS or TES */
299
uint8_t num_linked_inputs;
300
} gs;
301
struct {
302
uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1];
303
struct radv_vs_output_info outinfo;
304
struct radv_es_output_info es_info;
305
bool as_es;
306
unsigned primitive_mode;
307
enum gl_tess_spacing spacing;
308
bool ccw;
309
bool point_mode;
310
bool export_prim_id;
311
uint8_t num_linked_inputs;
312
uint8_t num_linked_patch_inputs;
313
uint8_t num_linked_outputs;
314
} tes;
315
struct {
316
bool uses_sample_shading;
317
bool needs_sample_positions;
318
bool writes_memory;
319
bool writes_z;
320
bool writes_stencil;
321
bool writes_sample_mask;
322
bool has_pcoord;
323
bool prim_id_input;
324
bool layer_input;
325
bool viewport_index_input;
326
uint8_t num_input_clips_culls;
327
uint32_t input_mask;
328
uint32_t flat_shaded_mask;
329
uint32_t explicit_shaded_mask;
330
uint32_t float16_shaded_mask;
331
uint32_t num_interp;
332
bool can_discard;
333
bool early_fragment_test;
334
bool post_depth_coverage;
335
bool reads_sample_mask_in;
336
uint8_t depth_layout;
337
bool uses_persp_or_linear_interp;
338
bool allow_flat_shading;
339
} ps;
340
struct {
341
bool uses_grid_size;
342
bool uses_block_id[3];
343
bool uses_thread_id[3];
344
bool uses_local_invocation_idx;
345
unsigned block_size[3];
346
347
bool uses_sbt;
348
} cs;
349
struct {
350
uint64_t tes_inputs_read;
351
uint64_t tes_patch_inputs_read;
352
unsigned tcs_vertices_out;
353
uint32_t num_lds_blocks;
354
uint8_t num_linked_inputs;
355
uint8_t num_linked_outputs;
356
uint8_t num_linked_patch_outputs;
357
bool tes_reads_tess_factors : 1;
358
} tcs;
359
360
struct radv_streamout_info so;
361
362
struct gfx9_gs_info gs_ring_info;
363
struct gfx10_ngg_info ngg_info;
364
365
unsigned float_controls_mode;
366
};
367
368
enum radv_shader_binary_type { RADV_BINARY_TYPE_LEGACY, RADV_BINARY_TYPE_RTLD };
369
370
struct radv_shader_binary {
371
enum radv_shader_binary_type type;
372
gl_shader_stage stage;
373
bool is_gs_copy_shader;
374
375
struct radv_shader_info info;
376
377
/* Self-referential size so we avoid consistency issues. */
378
uint32_t total_size;
379
};
380
381
struct radv_shader_binary_legacy {
382
struct radv_shader_binary base;
383
struct ac_shader_config config;
384
unsigned code_size;
385
unsigned exec_size;
386
unsigned ir_size;
387
unsigned disasm_size;
388
unsigned stats_size;
389
390
/* data has size of stats_size + code_size + ir_size + disasm_size + 2,
391
* where the +2 is for 0 of the ir strings. */
392
uint8_t data[0];
393
};
394
395
struct radv_shader_binary_rtld {
396
struct radv_shader_binary base;
397
unsigned elf_size;
398
unsigned llvm_ir_size;
399
uint8_t data[0];
400
};
401
402
struct radv_shader_variant {
403
uint32_t ref_count;
404
405
struct radeon_winsys_bo *bo;
406
uint64_t bo_offset;
407
struct ac_shader_config config;
408
uint8_t *code_ptr;
409
uint32_t code_size;
410
uint32_t exec_size;
411
struct radv_shader_info info;
412
413
/* debug only */
414
char *spirv;
415
uint32_t spirv_size;
416
char *nir_string;
417
char *disasm_string;
418
char *ir_string;
419
uint32_t *statistics;
420
421
struct list_head slab_list;
422
};
423
424
struct radv_shader_slab {
425
struct list_head slabs;
426
struct list_head shaders;
427
struct radeon_winsys_bo *bo;
428
uint64_t size;
429
char *ptr;
430
};
431
432
void radv_optimize_nir(const struct radv_device *device, struct nir_shader *shader,
433
bool optimize_conservatively, bool allow_copies);
434
void radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets);
435
bool radv_nir_lower_ycbcr_textures(nir_shader *shader, const struct radv_pipeline_layout *layout);
436
437
nir_shader *radv_shader_compile_to_nir(struct radv_device *device, struct vk_shader_module *module,
438
const char *entrypoint_name, gl_shader_stage stage,
439
const VkSpecializationInfo *spec_info,
440
const VkPipelineCreateFlags flags,
441
const struct radv_pipeline_layout *layout,
442
const struct radv_pipeline_key *key);
443
444
void radv_destroy_shader_slabs(struct radv_device *device);
445
446
VkResult radv_create_shaders(struct radv_pipeline *pipeline, struct radv_device *device,
447
struct radv_pipeline_cache *cache, const struct radv_pipeline_key *key,
448
const VkPipelineShaderStageCreateInfo **pStages,
449
const VkPipelineCreateFlags flags,
450
VkPipelineCreationFeedbackEXT *pipeline_feedback,
451
VkPipelineCreationFeedbackEXT **stage_feedbacks);
452
453
struct radv_shader_variant *radv_shader_variant_create(struct radv_device *device,
454
const struct radv_shader_binary *binary,
455
bool keep_shader_info);
456
struct radv_shader_variant *radv_shader_variant_compile(
457
struct radv_device *device, struct vk_shader_module *module, struct nir_shader *const *shaders,
458
int shader_count, struct radv_pipeline_layout *layout, const struct radv_shader_variant_key *key,
459
struct radv_shader_info *info, bool keep_shader_info, bool keep_statistic_info,
460
bool disable_optimizations, struct radv_shader_binary **binary_out);
461
462
struct radv_shader_variant *
463
radv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *nir,
464
struct radv_shader_info *info, struct radv_shader_binary **binary_out,
465
bool multiview, bool keep_shader_info, bool keep_statistic_info,
466
bool disable_optimizations);
467
468
struct radv_shader_variant *radv_create_trap_handler_shader(struct radv_device *device);
469
470
void radv_shader_variant_destroy(struct radv_device *device, struct radv_shader_variant *variant);
471
472
unsigned radv_get_max_waves(struct radv_device *device, struct radv_shader_variant *variant,
473
gl_shader_stage stage);
474
475
unsigned radv_get_max_workgroup_size(enum chip_class chip_class, gl_shader_stage stage,
476
const unsigned *sizes);
477
478
const char *radv_get_shader_name(struct radv_shader_info *info, gl_shader_stage stage);
479
480
bool radv_can_dump_shader(struct radv_device *device, struct vk_shader_module *module,
481
bool meta_shader);
482
483
bool radv_can_dump_shader_stats(struct radv_device *device, struct vk_shader_module *module);
484
485
VkResult radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline,
486
gl_shader_stage stage, FILE *output);
487
488
static inline unsigned
489
calculate_tess_lds_size(enum chip_class chip_class, unsigned tcs_num_input_vertices,
490
unsigned tcs_num_output_vertices, unsigned tcs_num_inputs,
491
unsigned tcs_num_patches, unsigned tcs_num_outputs,
492
unsigned tcs_num_patch_outputs)
493
{
494
unsigned input_vertex_size = tcs_num_inputs * 16;
495
unsigned output_vertex_size = tcs_num_outputs * 16;
496
497
unsigned input_patch_size = tcs_num_input_vertices * input_vertex_size;
498
499
unsigned pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
500
unsigned output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
501
502
unsigned output_patch0_offset = input_patch_size * tcs_num_patches;
503
504
unsigned lds_size = output_patch0_offset + output_patch_size * tcs_num_patches;
505
506
if (chip_class >= GFX7) {
507
assert(lds_size <= 65536);
508
lds_size = align(lds_size, 512) / 512;
509
} else {
510
assert(lds_size <= 32768);
511
lds_size = align(lds_size, 256) / 256;
512
}
513
514
return lds_size;
515
}
516
517
static inline unsigned
518
get_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_vertices,
519
unsigned tcs_num_inputs, unsigned tcs_num_outputs,
520
unsigned tcs_num_patch_outputs, unsigned tess_offchip_block_dw_size,
521
enum chip_class chip_class, enum radeon_family family)
522
{
523
uint32_t input_vertex_size = tcs_num_inputs * 16;
524
uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size;
525
uint32_t output_vertex_size = tcs_num_outputs * 16;
526
uint32_t pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size;
527
uint32_t output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16;
528
529
/* Ensure that we only need one wave per SIMD so we don't need to check
530
* resource usage. Also ensures that the number of tcs in and out
531
* vertices per threadgroup are at most 256.
532
*/
533
unsigned num_patches = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices) * 4;
534
/* Make sure that the data fits in LDS. This assumes the shaders only
535
* use LDS for the inputs and outputs.
536
*/
537
unsigned hardware_lds_size = 32768;
538
539
/* Looks like STONEY hangs if we use more than 32 KiB LDS in a single
540
* threadgroup, even though there is more than 32 KiB LDS.
541
*
542
* Test: dEQP-VK.tessellation.shader_input_output.barrier
543
*/
544
if (chip_class >= GFX7 && family != CHIP_STONEY)
545
hardware_lds_size = 65536;
546
547
if (input_patch_size + output_patch_size)
548
num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
549
/* Make sure the output data fits in the offchip buffer */
550
if (output_patch_size)
551
num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size);
552
/* Not necessary for correctness, but improves performance. The
553
* specific value is taken from the proprietary driver.
554
*/
555
num_patches = MIN2(num_patches, 40);
556
557
/* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */
558
if (chip_class == GFX6) {
559
unsigned one_wave = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices);
560
num_patches = MIN2(num_patches, one_wave);
561
}
562
return num_patches;
563
}
564
565
void radv_lower_io(struct radv_device *device, nir_shader *nir);
566
567
bool radv_lower_io_to_mem(struct radv_device *device, struct nir_shader *nir,
568
struct radv_shader_info *info, const struct radv_pipeline_key *pl_key);
569
570
void radv_lower_ngg(struct radv_device *device, struct nir_shader *nir,
571
struct radv_shader_info *info,
572
const struct radv_pipeline_key *pl_key,
573
struct radv_shader_variant_key *key,
574
bool consider_culling);
575
576
bool radv_consider_culling(struct radv_device *device, struct nir_shader *nir,
577
uint64_t ps_inputs_read);
578
579
#endif
580
581