Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_shader_llvm_tess.c
4570 views
1
/*
2
* Copyright 2020 Advanced Micro Devices, Inc.
3
* All Rights Reserved.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* on the rights to use, copy, modify, merge, publish, distribute, sub
9
* license, and/or sell copies of the Software, and to permit persons to whom
10
* the Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
14
* Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22
* USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24
25
#include "si_pipe.h"
26
#include "si_shader_internal.h"
27
#include "sid.h"
28
29
static LLVMValueRef get_rel_patch_id(struct si_shader_context *ctx)
30
{
31
switch (ctx->stage) {
32
case MESA_SHADER_TESS_CTRL:
33
return si_unpack_param(ctx, ctx->args.tcs_rel_ids, 0, 8);
34
35
case MESA_SHADER_TESS_EVAL:
36
return ac_get_arg(&ctx->ac, ctx->args.tes_rel_patch_id);
37
38
default:
39
assert(0);
40
return NULL;
41
}
42
}
43
44
/* Tessellation shaders pass outputs to the next shader using LDS.
45
*
46
* LS outputs = TCS inputs
47
* TCS outputs = TES inputs
48
*
49
* The LDS layout is:
50
* - TCS inputs for patch 0
51
* - TCS inputs for patch 1
52
* - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
53
* - ...
54
* - TCS outputs for patch 0 = get_tcs_out_patch0_offset
55
* - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
56
* - TCS outputs for patch 1
57
* - Per-patch TCS outputs for patch 1
58
* - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
59
* - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
60
* - ...
61
*
62
* All three shaders VS(LS), TCS, TES share the same LDS space.
63
*/
64
65
static LLVMValueRef get_tcs_in_patch_stride(struct si_shader_context *ctx)
66
{
67
return si_unpack_param(ctx, ctx->vs_state_bits, 11, 13);
68
}
69
70
static unsigned get_tcs_out_vertex_dw_stride_constant(struct si_shader_context *ctx)
71
{
72
assert(ctx->stage == MESA_SHADER_TESS_CTRL);
73
74
if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
75
return util_last_bit64(ctx->shader->key.mono.u.ff_tcs_inputs_to_copy) * 4;
76
77
return util_last_bit64(ctx->shader->selector->outputs_written) * 4;
78
}
79
80
static LLVMValueRef get_tcs_out_vertex_dw_stride(struct si_shader_context *ctx)
81
{
82
unsigned stride = get_tcs_out_vertex_dw_stride_constant(ctx);
83
84
return LLVMConstInt(ctx->ac.i32, stride, 0);
85
}
86
87
static LLVMValueRef get_tcs_out_patch_stride(struct si_shader_context *ctx)
88
{
89
if (ctx->shader->key.mono.u.ff_tcs_inputs_to_copy)
90
return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 0, 13);
91
92
const struct si_shader_info *info = &ctx->shader->selector->info;
93
unsigned tcs_out_vertices = info->base.tess.tcs_vertices_out;
94
unsigned vertex_dw_stride = get_tcs_out_vertex_dw_stride_constant(ctx);
95
unsigned num_patch_outputs = util_last_bit64(ctx->shader->selector->patch_outputs_written);
96
unsigned patch_dw_stride = tcs_out_vertices * vertex_dw_stride + num_patch_outputs * 4;
97
return LLVMConstInt(ctx->ac.i32, patch_dw_stride, 0);
98
}
99
100
static LLVMValueRef get_tcs_out_patch0_offset(struct si_shader_context *ctx)
101
{
102
return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 0, 16),
103
LLVMConstInt(ctx->ac.i32, 4, 0), "");
104
}
105
106
static LLVMValueRef get_tcs_out_patch0_patch_data_offset(struct si_shader_context *ctx)
107
{
108
return LLVMBuildMul(ctx->ac.builder, si_unpack_param(ctx, ctx->tcs_out_lds_offsets, 16, 16),
109
LLVMConstInt(ctx->ac.i32, 4, 0), "");
110
}
111
112
static LLVMValueRef get_tcs_in_current_patch_offset(struct si_shader_context *ctx)
113
{
114
LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
115
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
116
117
return LLVMBuildMul(ctx->ac.builder, patch_stride, rel_patch_id, "");
118
}
119
120
static LLVMValueRef get_tcs_out_current_patch_offset(struct si_shader_context *ctx)
121
{
122
LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
123
LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
124
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
125
126
return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_offset);
127
}
128
129
static LLVMValueRef get_tcs_out_current_patch_data_offset(struct si_shader_context *ctx)
130
{
131
LLVMValueRef patch0_patch_data_offset = get_tcs_out_patch0_patch_data_offset(ctx);
132
LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
133
LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
134
135
return ac_build_imad(&ctx->ac, patch_stride, rel_patch_id, patch0_patch_data_offset);
136
}
137
138
static LLVMValueRef get_num_tcs_out_vertices(struct si_shader_context *ctx)
139
{
140
unsigned tcs_out_vertices =
141
ctx->shader->selector ? ctx->shader->selector->info.base.tess.tcs_vertices_out
142
: 0;
143
144
/* If !tcs_out_vertices, it's either the fixed-func TCS or the TCS epilog. */
145
if (ctx->stage == MESA_SHADER_TESS_CTRL && tcs_out_vertices)
146
return LLVMConstInt(ctx->ac.i32, tcs_out_vertices, 0);
147
148
return LLVMBuildAdd(ctx->ac.builder,
149
si_unpack_param(ctx, ctx->tcs_offchip_layout, 6, 5), ctx->ac.i32_1, "");
150
}
151
152
static LLVMValueRef get_tcs_in_vertex_dw_stride(struct si_shader_context *ctx)
153
{
154
unsigned stride;
155
156
switch (ctx->stage) {
157
case MESA_SHADER_VERTEX:
158
stride = ctx->shader->selector->lshs_vertex_stride / 4;
159
return LLVMConstInt(ctx->ac.i32, stride, 0);
160
161
case MESA_SHADER_TESS_CTRL:
162
if (ctx->screen->info.chip_class >= GFX9 && ctx->shader->is_monolithic) {
163
stride = ctx->shader->key.part.tcs.ls->lshs_vertex_stride / 4;
164
return LLVMConstInt(ctx->ac.i32, stride, 0);
165
}
166
return si_unpack_param(ctx, ctx->vs_state_bits, 24, 8);
167
168
default:
169
assert(0);
170
return NULL;
171
}
172
}
173
174
static LLVMValueRef
175
get_dw_address_from_generic_indices(struct si_shader_context *ctx, LLVMValueRef vertex_dw_stride,
176
LLVMValueRef base_addr, LLVMValueRef vertex_index,
177
LLVMValueRef param_index, ubyte name)
178
{
179
if (vertex_dw_stride) {
180
base_addr = ac_build_imad(&ctx->ac, vertex_index, vertex_dw_stride, base_addr);
181
}
182
183
if (param_index) {
184
base_addr = ac_build_imad(&ctx->ac, param_index, LLVMConstInt(ctx->ac.i32, 4, 0), base_addr);
185
}
186
187
int param = name >= VARYING_SLOT_PATCH0 ||
188
name == VARYING_SLOT_TESS_LEVEL_INNER ||
189
name == VARYING_SLOT_TESS_LEVEL_OUTER
190
? si_shader_io_get_unique_index_patch(name)
191
: si_shader_io_get_unique_index(name, false);
192
193
/* Add the base address of the element. */
194
return LLVMBuildAdd(ctx->ac.builder, base_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");
195
}
196
197
/* The offchip buffer layout for TCS->TES is
198
*
199
* - attribute 0 of patch 0 vertex 0
200
* - attribute 0 of patch 0 vertex 1
201
* - attribute 0 of patch 0 vertex 2
202
* ...
203
* - attribute 0 of patch 1 vertex 0
204
* - attribute 0 of patch 1 vertex 1
205
* ...
206
* - attribute 1 of patch 0 vertex 0
207
* - attribute 1 of patch 0 vertex 1
208
* ...
209
* - per patch attribute 0 of patch 0
210
* - per patch attribute 0 of patch 1
211
* ...
212
*
213
* Note that every attribute has 4 components.
214
*/
215
static LLVMValueRef get_tcs_tes_buffer_address(struct si_shader_context *ctx,
216
LLVMValueRef rel_patch_id, LLVMValueRef vertex_index,
217
LLVMValueRef param_index)
218
{
219
LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
220
LLVMValueRef param_stride, constant16;
221
222
vertices_per_patch = get_num_tcs_out_vertices(ctx);
223
num_patches = si_unpack_param(ctx, ctx->tcs_offchip_layout, 0, 6);
224
num_patches = LLVMBuildAdd(ctx->ac.builder, num_patches, ctx->ac.i32_1, "");
225
total_vertices = LLVMBuildMul(ctx->ac.builder, vertices_per_patch, num_patches, "");
226
227
constant16 = LLVMConstInt(ctx->ac.i32, 16, 0);
228
if (vertex_index) {
229
base_addr = ac_build_imad(&ctx->ac, rel_patch_id, vertices_per_patch, vertex_index);
230
param_stride = total_vertices;
231
} else {
232
base_addr = rel_patch_id;
233
param_stride = num_patches;
234
}
235
236
base_addr = ac_build_imad(&ctx->ac, param_index, param_stride, base_addr);
237
base_addr = LLVMBuildMul(ctx->ac.builder, base_addr, constant16, "");
238
239
if (!vertex_index) {
240
LLVMValueRef patch_data_offset = si_unpack_param(ctx, ctx->tcs_offchip_layout, 11, 21);
241
242
base_addr = LLVMBuildAdd(ctx->ac.builder, base_addr, patch_data_offset, "");
243
}
244
return base_addr;
245
}
246
247
static LLVMValueRef get_tcs_tes_buffer_address_from_generic_indices(struct si_shader_context *ctx,
248
LLVMValueRef vertex_index,
249
LLVMValueRef param_index,
250
ubyte name)
251
{
252
unsigned param_index_base;
253
254
param_index_base = name >= VARYING_SLOT_PATCH0 ||
255
name == VARYING_SLOT_TESS_LEVEL_INNER ||
256
name == VARYING_SLOT_TESS_LEVEL_OUTER
257
? si_shader_io_get_unique_index_patch(name)
258
: si_shader_io_get_unique_index(name, false);
259
260
if (param_index) {
261
param_index = LLVMBuildAdd(ctx->ac.builder, param_index,
262
LLVMConstInt(ctx->ac.i32, param_index_base, 0), "");
263
} else {
264
param_index = LLVMConstInt(ctx->ac.i32, param_index_base, 0);
265
}
266
267
return get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), vertex_index, param_index);
268
}
269
270
static LLVMValueRef buffer_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle,
271
LLVMValueRef buffer, LLVMValueRef offset, LLVMValueRef base,
272
bool can_speculate)
273
{
274
LLVMValueRef value;
275
LLVMTypeRef vec_type = LLVMVectorType(type, 4);
276
277
if (swizzle == ~0) {
278
value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, type, ac_glc,
279
can_speculate, false);
280
281
return LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
282
}
283
284
value = ac_build_buffer_load(&ctx->ac, buffer, 4, NULL, base, offset, 0, type, ac_glc,
285
can_speculate, false);
286
287
value = LLVMBuildBitCast(ctx->ac.builder, value, vec_type, "");
288
return LLVMBuildExtractElement(ctx->ac.builder, value, LLVMConstInt(ctx->ac.i32, swizzle, 0),
289
"");
290
}
291
292
/**
293
* Load from LSHS LDS storage.
294
*
295
* \param type output value type
296
* \param swizzle offset (typically 0..3); it can be ~0, which loads a vec4
297
* \param dw_addr address in dwords
298
*/
299
static LLVMValueRef lshs_lds_load(struct si_shader_context *ctx, LLVMTypeRef type, unsigned swizzle,
300
LLVMValueRef dw_addr)
301
{
302
LLVMValueRef value;
303
304
if (swizzle == ~0) {
305
LLVMValueRef values[4];
306
307
for (unsigned chan = 0; chan < 4; chan++)
308
values[chan] = lshs_lds_load(ctx, type, chan, dw_addr);
309
310
return ac_build_gather_values(&ctx->ac, values, 4);
311
}
312
313
dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, swizzle, 0), "");
314
value = ac_lds_load(&ctx->ac, dw_addr);
315
return LLVMBuildBitCast(ctx->ac.builder, value, type, "");
316
}
317
318
/**
319
* Store to LSHS LDS storage.
320
*
321
* \param swizzle offset (typically 0..3)
322
* \param dw_addr address in dwords
323
* \param value value to store
324
*/
325
static void lshs_lds_store(struct si_shader_context *ctx, unsigned dw_offset_imm,
326
LLVMValueRef dw_addr, LLVMValueRef value)
327
{
328
dw_addr =
329
LLVMBuildAdd(ctx->ac.builder, dw_addr, LLVMConstInt(ctx->ac.i32, dw_offset_imm, 0), "");
330
331
ac_lds_store(&ctx->ac, dw_addr, value);
332
}
333
334
enum si_tess_ring
335
{
336
TCS_FACTOR_RING,
337
TESS_OFFCHIP_RING_TCS,
338
TESS_OFFCHIP_RING_TES,
339
};
340
341
static LLVMValueRef get_tess_ring_descriptor(struct si_shader_context *ctx, enum si_tess_ring ring)
342
{
343
LLVMBuilderRef builder = ctx->ac.builder;
344
LLVMValueRef addr = ac_get_arg(
345
&ctx->ac, ring == TESS_OFFCHIP_RING_TES ? ctx->tes_offchip_addr : ctx->tcs_out_lds_layout);
346
347
/* TCS only receives high 13 bits of the address. */
348
if (ring == TESS_OFFCHIP_RING_TCS || ring == TCS_FACTOR_RING) {
349
addr = LLVMBuildAnd(builder, addr, LLVMConstInt(ctx->ac.i32, 0xfff80000, 0), "");
350
}
351
352
if (ring == TCS_FACTOR_RING) {
353
unsigned tf_offset = ctx->screen->tess_offchip_ring_size;
354
addr = LLVMBuildAdd(builder, addr, LLVMConstInt(ctx->ac.i32, tf_offset, 0), "");
355
}
356
357
uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
358
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
359
360
if (ctx->screen->info.chip_class >= GFX10)
361
rsrc3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
362
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
363
else
364
rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
365
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
366
367
LLVMValueRef desc[4];
368
desc[0] = addr;
369
desc[1] = LLVMConstInt(ctx->ac.i32, S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
370
desc[2] = LLVMConstInt(ctx->ac.i32, 0xffffffff, 0);
371
desc[3] = LLVMConstInt(ctx->ac.i32, rsrc3, false);
372
373
return ac_build_gather_values(&ctx->ac, desc, 4);
374
}
375
376
void si_llvm_preload_tes_rings(struct si_shader_context *ctx)
377
{
378
ctx->tess_offchip_ring = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TES);
379
}
380
381
static LLVMValueRef si_nir_load_tcs_varyings(struct ac_shader_abi *abi, LLVMTypeRef type,
382
LLVMValueRef vertex_index, LLVMValueRef param_index,
383
unsigned driver_location, unsigned component,
384
unsigned num_components, bool load_input,
385
bool vertex_index_is_invoc_id)
386
{
387
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
388
struct si_shader_info *info = &ctx->shader->selector->info;
389
LLVMValueRef dw_addr, stride;
390
ubyte semantic;
391
392
if (load_input) {
393
semantic = info->input_semantic[driver_location];
394
} else {
395
semantic = info->output_semantic[driver_location];
396
}
397
398
/* Load the TCS input from a VGPR if possible. */
399
if (ctx->shader->key.opt.same_patch_vertices &&
400
load_input && vertex_index_is_invoc_id && !param_index) {
401
unsigned func_param = ctx->args.tcs_rel_ids.arg_index + 1 +
402
si_shader_io_get_unique_index(semantic, false) * 4;
403
LLVMValueRef value[4];
404
405
for (unsigned i = component; i < component + num_components; i++) {
406
value[i] = LLVMGetParam(ctx->main_fn, func_param + i);
407
value[i] = LLVMBuildBitCast(ctx->ac.builder, value[i], type, "");
408
}
409
410
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
411
}
412
413
bool is_patch = vertex_index == NULL;
414
assert((semantic >= VARYING_SLOT_PATCH0 ||
415
semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
416
semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == is_patch);
417
418
if (load_input) {
419
stride = get_tcs_in_vertex_dw_stride(ctx);
420
dw_addr = get_tcs_in_current_patch_offset(ctx);
421
} else {
422
if (is_patch) {
423
stride = NULL;
424
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
425
} else {
426
stride = get_tcs_out_vertex_dw_stride(ctx);
427
dw_addr = get_tcs_out_current_patch_offset(ctx);
428
}
429
}
430
431
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
432
semantic);
433
434
LLVMValueRef value[4];
435
for (unsigned i = component; i < component + num_components; i++)
436
value[i] = lshs_lds_load(ctx, type, i, dw_addr);
437
438
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
439
}
440
441
static LLVMValueRef si_nir_load_input_tes(struct ac_shader_abi *abi, LLVMTypeRef type,
442
LLVMValueRef vertex_index, LLVMValueRef param_index,
443
unsigned driver_location, unsigned component,
444
unsigned num_components,
445
bool load_input, bool vertex_index_is_invoc_id)
446
{
447
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
448
struct si_shader_info *info = &ctx->shader->selector->info;
449
LLVMValueRef base, addr;
450
451
ubyte semantic = info->input_semantic[driver_location];
452
453
assert((semantic >= VARYING_SLOT_PATCH0 ||
454
semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
455
semantic == VARYING_SLOT_TESS_LEVEL_OUTER) == (vertex_index == NULL));
456
457
base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
458
459
addr =
460
get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, semantic);
461
462
/* TODO: This will generate rather ordinary llvm code, although it
463
* should be easy for the optimizer to fix up. In future we might want
464
* to refactor buffer_load().
465
*/
466
LLVMValueRef value[4];
467
for (unsigned i = component; i < component + num_components; i++)
468
value[i] = buffer_load(ctx, type, i, ctx->tess_offchip_ring, base, addr, true);
469
470
return ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
471
}
472
473
static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
474
LLVMValueRef vertex_index, LLVMValueRef param_index,
475
LLVMValueRef src, unsigned writemask,
476
unsigned component, unsigned location, unsigned driver_location)
477
{
478
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
479
struct si_shader_info *info = &ctx->shader->selector->info;
480
LLVMValueRef dw_addr, stride;
481
LLVMValueRef buffer, base, addr;
482
LLVMValueRef values[8];
483
bool is_tess_factor = false, is_tess_inner = false;
484
485
ubyte semantic = info->output_semantic[driver_location];
486
487
const bool is_const = !param_index;
488
const bool is_patch = vertex_index == NULL;
489
490
/* Invalid SPIR-V can cause this. */
491
if ((semantic >= VARYING_SLOT_PATCH0 || semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
492
semantic == VARYING_SLOT_TESS_LEVEL_OUTER) != is_patch)
493
return;
494
495
if (!is_patch) {
496
stride = get_tcs_out_vertex_dw_stride(ctx);
497
dw_addr = get_tcs_out_current_patch_offset(ctx);
498
dw_addr = get_dw_address_from_generic_indices(ctx, stride, dw_addr, vertex_index, param_index,
499
semantic);
500
} else {
501
dw_addr = get_tcs_out_current_patch_data_offset(ctx);
502
dw_addr = get_dw_address_from_generic_indices(ctx, NULL, dw_addr, vertex_index, param_index,
503
semantic);
504
505
if (is_const) {
506
int semantic = info->output_semantic[driver_location];
507
508
/* Always write tess factors into LDS for the TCS epilog. */
509
if (semantic == VARYING_SLOT_TESS_LEVEL_INNER ||
510
semantic == VARYING_SLOT_TESS_LEVEL_OUTER) {
511
is_tess_factor = true;
512
is_tess_inner = semantic == VARYING_SLOT_TESS_LEVEL_INNER;
513
}
514
}
515
}
516
517
buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
518
519
base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
520
521
addr =
522
get_tcs_tes_buffer_address_from_generic_indices(ctx, vertex_index, param_index, semantic);
523
524
for (unsigned chan = component; chan < 4; chan++) {
525
if (!(writemask & (1 << chan)))
526
continue;
527
LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
528
529
/* Skip LDS stores if there is no LDS read of this output. */
530
if (info->output_readmask[driver_location] & (1 << chan) ||
531
/* The epilog reads LDS if invocation 0 doesn't define tess factors. */
532
(is_tess_factor &&
533
!ctx->shader->selector->info.tessfactors_are_def_in_all_invocs))
534
lshs_lds_store(ctx, chan, dw_addr, value);
535
536
value = ac_to_integer(&ctx->ac, value);
537
values[chan] = value;
538
539
if (writemask != 0xF && !is_tess_factor) {
540
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1, addr, base,
541
4 * chan, ac_glc);
542
}
543
544
/* Write tess factors into VGPRs for the epilog. */
545
if (is_tess_factor && ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
546
if (!is_tess_inner) {
547
LLVMBuildStore(ctx->ac.builder, value, /* outer */
548
ctx->invoc0_tess_factors[chan]);
549
} else if (chan < 2) {
550
LLVMBuildStore(ctx->ac.builder, value, /* inner */
551
ctx->invoc0_tess_factors[4 + chan]);
552
}
553
}
554
}
555
556
if (writemask == 0xF && !is_tess_factor) {
557
LLVMValueRef value = ac_build_gather_values(&ctx->ac, values, 4);
558
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr, base, 0, ac_glc);
559
}
560
}
561
562
static LLVMValueRef si_load_tess_coord(struct ac_shader_abi *abi)
563
{
564
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
565
LLVMValueRef coord[4] = {ac_get_arg(&ctx->ac, ctx->args.tes_u),
566
ac_get_arg(&ctx->ac, ctx->args.tes_v),
567
ctx->ac.f32_0, ctx->ac.f32_0};
568
569
/* For triangles, the vector should be (u, v, 1-u-v). */
570
if (ctx->shader->selector->info.base.tess.primitive_mode == GL_TRIANGLES) {
571
coord[2] = LLVMBuildFSub(ctx->ac.builder, ctx->ac.f32_1,
572
LLVMBuildFAdd(ctx->ac.builder, coord[0], coord[1], ""), "");
573
}
574
return ac_build_gather_values(&ctx->ac, coord, 4);
575
}
576
577
static LLVMValueRef load_tess_level(struct si_shader_context *ctx, unsigned semantic)
578
{
579
LLVMValueRef base, addr;
580
581
int param = si_shader_io_get_unique_index_patch(semantic);
582
583
base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
584
addr = get_tcs_tes_buffer_address(ctx, get_rel_patch_id(ctx), NULL,
585
LLVMConstInt(ctx->ac.i32, param, 0));
586
587
return buffer_load(ctx, ctx->ac.f32, ~0, ctx->tess_offchip_ring, base, addr, true);
588
}
589
590
static LLVMValueRef load_tess_level_default(struct si_shader_context *ctx, unsigned sysval)
591
{
592
LLVMValueRef buf, slot, val[4];
593
int i, offset;
594
595
slot = LLVMConstInt(ctx->ac.i32, SI_HS_CONST_DEFAULT_TESS_LEVELS, 0);
596
buf = ac_get_arg(&ctx->ac, ctx->internal_bindings);
597
buf = ac_build_load_to_sgpr(&ctx->ac, buf, slot);
598
offset = sysval == SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT ? 4 : 0;
599
600
for (i = 0; i < 4; i++)
601
val[i] = si_buffer_load_const(ctx, buf, LLVMConstInt(ctx->ac.i32, (offset + i) * 4, 0));
602
return ac_build_gather_values(&ctx->ac, val, 4);
603
}
604
605
static LLVMValueRef si_load_tess_level(struct ac_shader_abi *abi, unsigned varying_id,
606
bool load_default_state)
607
{
608
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
609
unsigned semantic;
610
611
if (load_default_state) {
612
switch (varying_id) {
613
case VARYING_SLOT_TESS_LEVEL_INNER:
614
semantic = SYSTEM_VALUE_TESS_LEVEL_INNER_DEFAULT;
615
break;
616
case VARYING_SLOT_TESS_LEVEL_OUTER:
617
semantic = SYSTEM_VALUE_TESS_LEVEL_OUTER_DEFAULT;
618
break;
619
default:
620
unreachable("unknown tess level");
621
}
622
return load_tess_level_default(ctx, semantic);
623
}
624
625
switch (varying_id) {
626
case VARYING_SLOT_TESS_LEVEL_INNER:
627
semantic = VARYING_SLOT_TESS_LEVEL_INNER;
628
break;
629
case VARYING_SLOT_TESS_LEVEL_OUTER:
630
semantic = VARYING_SLOT_TESS_LEVEL_OUTER;
631
break;
632
default:
633
unreachable("unknown tess level");
634
}
635
636
return load_tess_level(ctx, semantic);
637
}
638
639
static LLVMValueRef si_load_patch_vertices_in(struct ac_shader_abi *abi)
640
{
641
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
642
if (ctx->stage == MESA_SHADER_TESS_CTRL)
643
return si_unpack_param(ctx, ctx->tcs_out_lds_layout, 13, 6);
644
else if (ctx->stage == MESA_SHADER_TESS_EVAL)
645
return get_num_tcs_out_vertices(ctx);
646
else
647
unreachable("invalid shader stage for VERTICESIN");
648
}
649
650
/**
651
* Forward all outputs from the vertex shader to the TES. This is only used
652
* for the fixed function TCS.
653
*/
654
static void si_copy_tcs_inputs(struct si_shader_context *ctx)
655
{
656
LLVMValueRef invocation_id, buffer, buffer_offset;
657
LLVMValueRef lds_vertex_stride, lds_base;
658
uint64_t inputs;
659
660
invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
661
buffer = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
662
buffer_offset = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
663
664
lds_vertex_stride = get_tcs_in_vertex_dw_stride(ctx);
665
lds_base = get_tcs_in_current_patch_offset(ctx);
666
lds_base = ac_build_imad(&ctx->ac, invocation_id, lds_vertex_stride, lds_base);
667
668
inputs = ctx->shader->key.mono.u.ff_tcs_inputs_to_copy;
669
while (inputs) {
670
unsigned i = u_bit_scan64(&inputs);
671
672
LLVMValueRef lds_ptr =
673
LLVMBuildAdd(ctx->ac.builder, lds_base, LLVMConstInt(ctx->ac.i32, 4 * i, 0), "");
674
675
LLVMValueRef buffer_addr = get_tcs_tes_buffer_address(
676
ctx, get_rel_patch_id(ctx), invocation_id, LLVMConstInt(ctx->ac.i32, i, 0));
677
678
LLVMValueRef value = lshs_lds_load(ctx, ctx->ac.i32, ~0, lds_ptr);
679
680
ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr, buffer_offset, 0,
681
ac_glc);
682
}
683
}
684
685
static void si_write_tess_factors(struct si_shader_context *ctx, LLVMValueRef rel_patch_id,
686
LLVMValueRef invocation_id,
687
LLVMValueRef tcs_out_current_patch_data_offset,
688
LLVMValueRef invoc0_tf_outer[4], LLVMValueRef invoc0_tf_inner[2])
689
{
690
struct si_shader *shader = ctx->shader;
691
unsigned tess_inner_index, tess_outer_index;
692
LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
693
LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
694
unsigned stride, outer_comps, inner_comps, i, offset;
695
696
/* Add a barrier before loading tess factors from LDS. */
697
if (!shader->key.part.tcs.epilog.invoc0_tess_factors_are_def)
698
si_llvm_emit_barrier(ctx);
699
700
/* Do this only for invocation 0, because the tess levels are per-patch,
701
* not per-vertex.
702
*
703
* This can't jump, because invocation 0 executes this. It should
704
* at least mask out the loads and stores for other invocations.
705
*/
706
ac_build_ifcc(&ctx->ac,
707
LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, invocation_id, ctx->ac.i32_0, ""), 6503);
708
709
/* Determine the layout of one tess factor element in the buffer. */
710
switch (shader->key.part.tcs.epilog.prim_mode) {
711
case GL_LINES:
712
stride = 2; /* 2 dwords, 1 vec2 store */
713
outer_comps = 2;
714
inner_comps = 0;
715
break;
716
case GL_TRIANGLES:
717
stride = 4; /* 4 dwords, 1 vec4 store */
718
outer_comps = 3;
719
inner_comps = 1;
720
break;
721
case GL_QUADS:
722
stride = 6; /* 6 dwords, 2 stores (vec4 + vec2) */
723
outer_comps = 4;
724
inner_comps = 2;
725
break;
726
default:
727
assert(0);
728
return;
729
}
730
731
for (i = 0; i < 4; i++) {
732
inner[i] = LLVMGetUndef(ctx->ac.i32);
733
outer[i] = LLVMGetUndef(ctx->ac.i32);
734
}
735
736
if (shader->key.part.tcs.epilog.invoc0_tess_factors_are_def) {
737
/* Tess factors are in VGPRs. */
738
for (i = 0; i < outer_comps; i++)
739
outer[i] = out[i] = invoc0_tf_outer[i];
740
for (i = 0; i < inner_comps; i++)
741
inner[i] = out[outer_comps + i] = invoc0_tf_inner[i];
742
} else {
743
/* Load tess_inner and tess_outer from LDS.
744
* Any invocation can write them, so we can't get them from a temporary.
745
*/
746
tess_inner_index = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER);
747
tess_outer_index = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER);
748
749
lds_base = tcs_out_current_patch_data_offset;
750
lds_inner = LLVMBuildAdd(ctx->ac.builder, lds_base,
751
LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, 0), "");
752
lds_outer = LLVMBuildAdd(ctx->ac.builder, lds_base,
753
LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, 0), "");
754
755
for (i = 0; i < outer_comps; i++) {
756
outer[i] = out[i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_outer);
757
}
758
for (i = 0; i < inner_comps; i++) {
759
inner[i] = out[outer_comps + i] = lshs_lds_load(ctx, ctx->ac.i32, i, lds_inner);
760
}
761
}
762
763
if (shader->key.part.tcs.epilog.prim_mode == GL_LINES) {
764
/* For isolines, the hardware expects tess factors in the
765
* reverse order from what NIR specifies.
766
*/
767
LLVMValueRef tmp = out[0];
768
out[0] = out[1];
769
out[1] = tmp;
770
}
771
772
/* Convert the outputs to vectors for stores. */
773
vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
774
vec1 = NULL;
775
776
if (stride > 4)
777
vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
778
779
/* Get the buffer. */
780
buffer = get_tess_ring_descriptor(ctx, TCS_FACTOR_RING);
781
782
/* Get the offset. */
783
tf_base = ac_get_arg(&ctx->ac, ctx->args.tcs_factor_offset);
784
byteoffset =
785
LLVMBuildMul(ctx->ac.builder, rel_patch_id, LLVMConstInt(ctx->ac.i32, 4 * stride, 0), "");
786
offset = 0;
787
788
/* Store the dynamic HS control word. */
789
if (ctx->screen->info.chip_class <= GFX8) {
790
ac_build_ifcc(&ctx->ac,
791
LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, rel_patch_id, ctx->ac.i32_0, ""), 6504);
792
ac_build_buffer_store_dword(&ctx->ac, buffer, LLVMConstInt(ctx->ac.i32, 0x80000000, 0), 1,
793
ctx->ac.i32_0, tf_base, offset, ac_glc);
794
ac_build_endif(&ctx->ac, 6504);
795
offset += 4;
796
}
797
798
/* Store the tessellation factors. */
799
ac_build_buffer_store_dword(&ctx->ac, buffer, vec0, MIN2(stride, 4), byteoffset, tf_base, offset,
800
ac_glc);
801
offset += 16;
802
if (vec1)
803
ac_build_buffer_store_dword(&ctx->ac, buffer, vec1, stride - 4, byteoffset, tf_base, offset,
804
ac_glc);
805
806
/* Store the tess factors into the offchip buffer if TES reads them. */
807
if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
808
LLVMValueRef buf, base, inner_vec, outer_vec, tf_outer_offset;
809
LLVMValueRef tf_inner_offset;
810
unsigned param_outer, param_inner;
811
812
buf = get_tess_ring_descriptor(ctx, TESS_OFFCHIP_RING_TCS);
813
base = ac_get_arg(&ctx->ac, ctx->args.tess_offchip_offset);
814
815
param_outer = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_OUTER);
816
tf_outer_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
817
LLVMConstInt(ctx->ac.i32, param_outer, 0));
818
819
unsigned outer_vec_size = ac_has_vec3_support(ctx->screen->info.chip_class, false)
820
? outer_comps
821
: util_next_power_of_two(outer_comps);
822
outer_vec = ac_build_gather_values(&ctx->ac, outer, outer_vec_size);
823
824
ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec, outer_comps, tf_outer_offset, base, 0,
825
ac_glc);
826
if (inner_comps) {
827
param_inner = si_shader_io_get_unique_index_patch(VARYING_SLOT_TESS_LEVEL_INNER);
828
tf_inner_offset = get_tcs_tes_buffer_address(ctx, rel_patch_id, NULL,
829
LLVMConstInt(ctx->ac.i32, param_inner, 0));
830
831
inner_vec =
832
inner_comps == 1 ? inner[0] : ac_build_gather_values(&ctx->ac, inner, inner_comps);
833
ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec, inner_comps, tf_inner_offset, base,
834
0, ac_glc);
835
}
836
}
837
838
ac_build_endif(&ctx->ac, 6503);
839
}
840
841
/* This only writes the tessellation factor levels. */
842
static void si_llvm_emit_tcs_epilogue(struct ac_shader_abi *abi, unsigned max_outputs,
843
LLVMValueRef *addrs)
844
{
845
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
846
LLVMBuilderRef builder = ctx->ac.builder;
847
LLVMValueRef rel_patch_id, invocation_id, tf_lds_offset;
848
849
si_copy_tcs_inputs(ctx);
850
851
rel_patch_id = get_rel_patch_id(ctx);
852
invocation_id = si_unpack_param(ctx, ctx->args.tcs_rel_ids, 8, 5);
853
tf_lds_offset = get_tcs_out_current_patch_data_offset(ctx);
854
855
if (ctx->screen->info.chip_class >= GFX9 && !ctx->shader->is_monolithic) {
856
LLVMBasicBlockRef blocks[2] = {LLVMGetInsertBlock(builder), ctx->merged_wrap_if_entry_block};
857
LLVMValueRef values[2];
858
859
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
860
861
values[0] = rel_patch_id;
862
values[1] = LLVMGetUndef(ctx->ac.i32);
863
rel_patch_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
864
865
values[0] = tf_lds_offset;
866
values[1] = LLVMGetUndef(ctx->ac.i32);
867
tf_lds_offset = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
868
869
values[0] = invocation_id;
870
values[1] = ctx->ac.i32_1; /* cause the epilog to skip threads */
871
invocation_id = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, values, blocks);
872
}
873
874
/* Return epilog parameters from this function. */
875
LLVMValueRef ret = ctx->return_value;
876
unsigned vgpr;
877
878
if (ctx->screen->info.chip_class >= GFX9) {
879
ret =
880
si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
881
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
882
/* Tess offchip and tess factor offsets are at the beginning. */
883
ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, 2);
884
ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, 4);
885
vgpr = 8 + GFX9_SGPR_TCS_OUT_LAYOUT + 1;
886
} else {
887
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, GFX6_SGPR_TCS_OFFCHIP_LAYOUT);
888
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, GFX6_SGPR_TCS_OUT_LAYOUT);
889
/* Tess offchip and tess factor offsets are after user SGPRs. */
890
ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, GFX6_TCS_NUM_USER_SGPR);
891
ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, GFX6_TCS_NUM_USER_SGPR + 1);
892
vgpr = GFX6_TCS_NUM_USER_SGPR + 2;
893
}
894
895
/* VGPRs */
896
rel_patch_id = ac_to_float(&ctx->ac, rel_patch_id);
897
invocation_id = ac_to_float(&ctx->ac, invocation_id);
898
tf_lds_offset = ac_to_float(&ctx->ac, tf_lds_offset);
899
900
/* Leave a hole corresponding to the two input VGPRs. This ensures that
901
* the invocation_id output does not alias the tcs_rel_ids input,
902
* which saves a V_MOV on gfx9.
903
*/
904
vgpr += 2;
905
906
ret = LLVMBuildInsertValue(builder, ret, rel_patch_id, vgpr++, "");
907
ret = LLVMBuildInsertValue(builder, ret, invocation_id, vgpr++, "");
908
909
if (ctx->shader->selector->info.tessfactors_are_def_in_all_invocs) {
910
vgpr++; /* skip the tess factor LDS offset */
911
for (unsigned i = 0; i < 6; i++) {
912
LLVMValueRef value = LLVMBuildLoad(builder, ctx->invoc0_tess_factors[i], "");
913
value = ac_to_float(&ctx->ac, value);
914
ret = LLVMBuildInsertValue(builder, ret, value, vgpr++, "");
915
}
916
} else {
917
ret = LLVMBuildInsertValue(builder, ret, tf_lds_offset, vgpr++, "");
918
}
919
ctx->return_value = ret;
920
}
921
922
/* Pass TCS inputs from LS to TCS on GFX9. */
923
static void si_set_ls_return_value_for_tcs(struct si_shader_context *ctx)
924
{
925
if (!ctx->shader->is_monolithic)
926
ac_build_endif(&ctx->ac, ctx->merged_wrap_if_label);
927
928
LLVMValueRef ret = ctx->return_value;
929
930
ret = si_insert_input_ptr(ctx, ret, ctx->other_const_and_shader_buffers, 0);
931
ret = si_insert_input_ptr(ctx, ret, ctx->other_samplers_and_images, 1);
932
ret = si_insert_input_ret(ctx, ret, ctx->args.tess_offchip_offset, 2);
933
ret = si_insert_input_ret(ctx, ret, ctx->args.merged_wave_info, 3);
934
ret = si_insert_input_ret(ctx, ret, ctx->args.tcs_factor_offset, 4);
935
ret = si_insert_input_ret(ctx, ret, ctx->args.scratch_offset, 5);
936
937
ret = si_insert_input_ptr(ctx, ret, ctx->internal_bindings, 8 + SI_SGPR_INTERNAL_BINDINGS);
938
ret = si_insert_input_ptr(ctx, ret, ctx->bindless_samplers_and_images,
939
8 + SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES);
940
941
ret = si_insert_input_ret(ctx, ret, ctx->vs_state_bits, 8 + SI_SGPR_VS_STATE_BITS);
942
943
ret = si_insert_input_ret(ctx, ret, ctx->tcs_offchip_layout, 8 + GFX9_SGPR_TCS_OFFCHIP_LAYOUT);
944
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_offsets, 8 + GFX9_SGPR_TCS_OUT_OFFSETS);
945
ret = si_insert_input_ret(ctx, ret, ctx->tcs_out_lds_layout, 8 + GFX9_SGPR_TCS_OUT_LAYOUT);
946
947
unsigned vgpr = 8 + GFX9_TCS_NUM_USER_SGPR;
948
ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
949
ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id)),
950
vgpr++, "");
951
ret = LLVMBuildInsertValue(ctx->ac.builder, ret,
952
ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.tcs_rel_ids)),
953
vgpr++, "");
954
ctx->return_value = ret;
955
}
956
957
void si_llvm_emit_ls_epilogue(struct ac_shader_abi *abi, unsigned max_outputs, LLVMValueRef *addrs)
958
{
959
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
960
struct si_shader *shader = ctx->shader;
961
struct si_shader_info *info = &shader->selector->info;
962
unsigned i, chan;
963
LLVMValueRef vertex_id = ac_get_arg(&ctx->ac, ctx->args.vs_rel_patch_id);
964
LLVMValueRef vertex_dw_stride = get_tcs_in_vertex_dw_stride(ctx);
965
LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->ac.builder, vertex_id, vertex_dw_stride, "");
966
unsigned ret_offset = 8 + GFX9_TCS_NUM_USER_SGPR + 2;
967
968
/* Write outputs to LDS. The next shader (TCS aka HS) will read
969
* its inputs from it. */
970
for (i = 0; i < info->num_outputs; i++) {
971
unsigned semantic = info->output_semantic[i];
972
973
/* The ARB_shader_viewport_layer_array spec contains the
974
* following issue:
975
*
976
* 2) What happens if gl_ViewportIndex or gl_Layer is
977
* written in the vertex shader and a geometry shader is
978
* present?
979
*
980
* RESOLVED: The value written by the last vertex processing
981
* stage is used. If the last vertex processing stage
982
* (vertex, tessellation evaluation or geometry) does not
983
* statically assign to gl_ViewportIndex or gl_Layer, index
984
* or layer zero is assumed.
985
*
986
* So writes to those outputs in VS-as-LS are simply ignored.
987
*/
988
if (semantic == VARYING_SLOT_LAYER || semantic == VARYING_SLOT_VIEWPORT)
989
continue;
990
991
int param = si_shader_io_get_unique_index(semantic, false);
992
LLVMValueRef dw_addr =
993
LLVMBuildAdd(ctx->ac.builder, base_dw_addr, LLVMConstInt(ctx->ac.i32, param * 4, 0), "");
994
995
for (chan = 0; chan < 4; chan++) {
996
if (!(info->output_usagemask[i] & (1 << chan)))
997
continue;
998
999
LLVMValueRef value = LLVMBuildLoad(ctx->ac.builder, addrs[4 * i + chan], "");
1000
1001
if (!shader->key.opt.same_patch_vertices ||
1002
!(ctx->next_shader_sel->tcs_vgpr_only_inputs & (1ull << semantic)))
1003
lshs_lds_store(ctx, chan, dw_addr, value);
1004
1005
if (shader->key.opt.same_patch_vertices) {
1006
ctx->return_value = LLVMBuildInsertValue(ctx->ac.builder, ctx->return_value,
1007
value, ret_offset + param * 4 + chan, "");
1008
}
1009
}
1010
}
1011
1012
if (ctx->screen->info.chip_class >= GFX9)
1013
si_set_ls_return_value_for_tcs(ctx);
1014
}
1015
1016
/**
1017
* Compile the TCS epilog function. This writes tesselation factors to memory
1018
* based on the output primitive type of the tesselator (determined by TES).
1019
*/
1020
void si_llvm_build_tcs_epilog(struct si_shader_context *ctx, union si_shader_part_key *key)
1021
{
1022
memset(&ctx->args, 0, sizeof(ctx->args));
1023
1024
if (ctx->screen->info.chip_class >= GFX9) {
1025
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1026
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1027
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
1028
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); /* wave info */
1029
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset);
1030
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1031
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1032
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1033
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1034
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1035
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1036
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1037
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1038
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1039
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1040
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1041
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
1042
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1043
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
1044
} else {
1045
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1046
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1047
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1048
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1049
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_offchip_layout);
1050
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1051
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->tcs_out_lds_layout);
1052
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
1053
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tess_offchip_offset);
1054
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->args.tcs_factor_offset);
1055
}
1056
1057
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
1058
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, NULL); /* VGPR gap */
1059
struct ac_arg rel_patch_id; /* patch index within the wave (REL_PATCH_ID) */
1060
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &rel_patch_id);
1061
struct ac_arg invocation_id; /* invocation ID within the patch */
1062
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &invocation_id);
1063
struct ac_arg
1064
tcs_out_current_patch_data_offset; /* LDS offset where tess factors should be loaded from */
1065
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tcs_out_current_patch_data_offset);
1066
1067
struct ac_arg tess_factors[6];
1068
for (unsigned i = 0; i < 6; i++)
1069
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_INT, &tess_factors[i]);
1070
1071
/* Create the function. */
1072
si_llvm_create_func(ctx, "tcs_epilog", NULL, 0, ctx->screen->info.chip_class >= GFX7 ? 128 : 0);
1073
ac_declare_lds_as_pointer(&ctx->ac);
1074
1075
LLVMValueRef invoc0_tess_factors[6];
1076
for (unsigned i = 0; i < 6; i++)
1077
invoc0_tess_factors[i] = ac_get_arg(&ctx->ac, tess_factors[i]);
1078
1079
si_write_tess_factors(ctx, ac_get_arg(&ctx->ac, rel_patch_id),
1080
ac_get_arg(&ctx->ac, invocation_id),
1081
ac_get_arg(&ctx->ac, tcs_out_current_patch_data_offset),
1082
invoc0_tess_factors, invoc0_tess_factors + 4);
1083
1084
LLVMBuildRetVoid(ctx->ac.builder);
1085
}
1086
1087
void si_llvm_init_tcs_callbacks(struct si_shader_context *ctx)
1088
{
1089
ctx->abi.load_tess_varyings = si_nir_load_tcs_varyings;
1090
ctx->abi.load_tess_level = si_load_tess_level;
1091
ctx->abi.store_tcs_outputs = si_nir_store_output_tcs;
1092
ctx->abi.emit_outputs = si_llvm_emit_tcs_epilogue;
1093
ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
1094
}
1095
1096
void si_llvm_init_tes_callbacks(struct si_shader_context *ctx, bool ngg_cull_shader)
1097
{
1098
ctx->abi.load_tess_varyings = si_nir_load_input_tes;
1099
ctx->abi.load_tess_coord = si_load_tess_coord;
1100
ctx->abi.load_tess_level = si_load_tess_level;
1101
ctx->abi.load_patch_vertices_in = si_load_patch_vertices_in;
1102
1103
if (ctx->shader->key.as_es)
1104
ctx->abi.emit_outputs = si_llvm_emit_es_epilogue;
1105
else if (ngg_cull_shader)
1106
ctx->abi.emit_outputs = gfx10_emit_ngg_culling_epilogue;
1107
else if (ctx->shader->key.as_ngg)
1108
ctx->abi.emit_outputs = gfx10_emit_ngg_epilogue;
1109
else
1110
ctx->abi.emit_outputs = si_llvm_emit_vs_epilogue;
1111
}
1112
1113