Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_shader_llvm_ps.c
4570 views
1
/*
2
* Copyright 2020 Advanced Micro Devices, Inc.
3
* All Rights Reserved.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* on the rights to use, copy, modify, merge, publish, distribute, sub
9
* license, and/or sell copies of the Software, and to permit persons to whom
10
* the Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
14
* Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22
* USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24
25
#include "si_pipe.h"
26
#include "si_shader_internal.h"
27
#include "sid.h"
28
29
LLVMValueRef si_get_sample_id(struct si_shader_context *ctx)
30
{
31
return si_unpack_param(ctx, ctx->args.ancillary, 8, 4);
32
}
33
34
static LLVMValueRef load_sample_mask_in(struct ac_shader_abi *abi)
35
{
36
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
37
return ac_to_integer(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args.sample_coverage));
38
}
39
40
static LLVMValueRef load_sample_position(struct ac_shader_abi *abi, LLVMValueRef sample_id)
41
{
42
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
43
LLVMValueRef desc = ac_get_arg(&ctx->ac, ctx->internal_bindings);
44
LLVMValueRef buf_index = LLVMConstInt(ctx->ac.i32, SI_PS_CONST_SAMPLE_POSITIONS, 0);
45
LLVMValueRef resource = ac_build_load_to_sgpr(&ctx->ac, desc, buf_index);
46
47
/* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
48
LLVMValueRef offset0 =
49
LLVMBuildMul(ctx->ac.builder, sample_id, LLVMConstInt(ctx->ac.i32, 8, 0), "");
50
LLVMValueRef offset1 =
51
LLVMBuildAdd(ctx->ac.builder, offset0, LLVMConstInt(ctx->ac.i32, 4, 0), "");
52
53
LLVMValueRef pos[4] = {si_buffer_load_const(ctx, resource, offset0),
54
si_buffer_load_const(ctx, resource, offset1),
55
LLVMConstReal(ctx->ac.f32, 0), LLVMConstReal(ctx->ac.f32, 0)};
56
57
return ac_build_gather_values(&ctx->ac, pos, 4);
58
}
59
60
static LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
61
{
62
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
63
struct ac_image_args args = {};
64
LLVMValueRef ptr, image, fmask;
65
66
/* Ignore src0, because KHR_blend_func_extended disallows multiple render
67
* targets.
68
*/
69
70
/* Load the image descriptor. */
71
STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
72
ptr = ac_get_arg(&ctx->ac, ctx->internal_bindings);
73
ptr =
74
LLVMBuildPointerCast(ctx->ac.builder, ptr, ac_array_in_const32_addr_space(ctx->ac.v8i32), "");
75
image =
76
ac_build_load_to_sgpr(&ctx->ac, ptr, LLVMConstInt(ctx->ac.i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
77
78
unsigned chan = 0;
79
80
args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16);
81
82
if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D)
83
args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16);
84
85
/* Get the current render target layer index. */
86
if (ctx->shader->key.mono.u.ps.fbfetch_layered)
87
args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11);
88
89
if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
90
args.coords[chan++] = si_get_sample_id(ctx);
91
92
if (ctx->shader->key.mono.u.ps.fbfetch_msaa && !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
93
fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
94
LLVMConstInt(ctx->ac.i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
95
96
ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
97
ctx->shader->key.mono.u.ps.fbfetch_layered);
98
}
99
100
args.opcode = ac_image_load;
101
args.resource = image;
102
args.dmask = 0xf;
103
args.attributes = AC_FUNC_ATTR_READNONE;
104
105
if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
106
args.dim =
107
ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_2darraymsaa : ac_image_2dmsaa;
108
else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
109
args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_1darray : ac_image_1d;
110
else
111
args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ? ac_image_2darray : ac_image_2d;
112
113
return ac_build_image_opcode(&ctx->ac, &args);
114
}
115
116
static LLVMValueRef si_build_fs_interp(struct si_shader_context *ctx, unsigned attr_index,
117
unsigned chan, LLVMValueRef prim_mask, LLVMValueRef i,
118
LLVMValueRef j)
119
{
120
if (i || j) {
121
return ac_build_fs_interp(&ctx->ac, LLVMConstInt(ctx->ac.i32, chan, 0),
122
LLVMConstInt(ctx->ac.i32, attr_index, 0), prim_mask, i, j);
123
}
124
return ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, 2, 0), /* P0 */
125
LLVMConstInt(ctx->ac.i32, chan, 0),
126
LLVMConstInt(ctx->ac.i32, attr_index, 0), prim_mask);
127
}
128
129
/**
130
* Interpolate a fragment shader input.
131
*
132
* @param ctx context
133
* @param input_index index of the input in hardware
134
* @param semantic_index semantic index
135
* @param num_interp_inputs number of all interpolated inputs (= BCOLOR offset)
136
* @param colors_read_mask color components read (4 bits for each color, 8 bits in total)
137
* @param interp_param interpolation weights (i,j)
138
* @param prim_mask SI_PARAM_PRIM_MASK
139
* @param face SI_PARAM_FRONT_FACE
140
* @param result the return value (4 components)
141
*/
142
static void interp_fs_color(struct si_shader_context *ctx, unsigned input_index,
143
unsigned semantic_index, unsigned num_interp_inputs,
144
unsigned colors_read_mask, LLVMValueRef interp_param,
145
LLVMValueRef prim_mask, LLVMValueRef face, LLVMValueRef result[4])
146
{
147
LLVMValueRef i = NULL, j = NULL;
148
unsigned chan;
149
150
/* fs.constant returns the param from the middle vertex, so it's not
151
* really useful for flat shading. It's meant to be used for custom
152
* interpolation (but the intrinsic can't fetch from the other two
153
* vertices).
154
*
155
* Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
156
* to do the right thing. The only reason we use fs.constant is that
157
* fs.interp cannot be used on integers, because they can be equal
158
* to NaN.
159
*
160
* When interp is false we will use fs.constant or for newer llvm,
161
* amdgcn.interp.mov.
162
*/
163
bool interp = interp_param != NULL;
164
165
if (interp) {
166
interp_param =
167
LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2f32, "");
168
169
i = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
170
j = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
171
}
172
173
if (ctx->shader->key.part.ps.prolog.color_two_side) {
174
LLVMValueRef is_face_positive;
175
176
/* If BCOLOR0 is used, BCOLOR1 is at offset "num_inputs + 1",
177
* otherwise it's at offset "num_inputs".
178
*/
179
unsigned back_attr_offset = num_interp_inputs;
180
if (semantic_index == 1 && colors_read_mask & 0xf)
181
back_attr_offset += 1;
182
183
is_face_positive = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, face, ctx->ac.i32_0, "");
184
185
for (chan = 0; chan < 4; chan++) {
186
LLVMValueRef front, back;
187
188
front = si_build_fs_interp(ctx, input_index, chan, prim_mask, i, j);
189
back = si_build_fs_interp(ctx, back_attr_offset, chan, prim_mask, i, j);
190
191
result[chan] = LLVMBuildSelect(ctx->ac.builder, is_face_positive, front, back, "");
192
}
193
} else {
194
for (chan = 0; chan < 4; chan++) {
195
result[chan] = si_build_fs_interp(ctx, input_index, chan, prim_mask, i, j);
196
}
197
}
198
}
199
200
static void si_alpha_test(struct si_shader_context *ctx, LLVMValueRef alpha)
201
{
202
if (ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_NEVER) {
203
static LLVMRealPredicate cond_map[PIPE_FUNC_ALWAYS + 1] = {
204
[PIPE_FUNC_LESS] = LLVMRealOLT, [PIPE_FUNC_EQUAL] = LLVMRealOEQ,
205
[PIPE_FUNC_LEQUAL] = LLVMRealOLE, [PIPE_FUNC_GREATER] = LLVMRealOGT,
206
[PIPE_FUNC_NOTEQUAL] = LLVMRealONE, [PIPE_FUNC_GEQUAL] = LLVMRealOGE,
207
};
208
LLVMRealPredicate cond = cond_map[ctx->shader->key.part.ps.epilog.alpha_func];
209
assert(cond);
210
211
LLVMValueRef alpha_ref = LLVMGetParam(ctx->main_fn, SI_PARAM_ALPHA_REF);
212
if (LLVMTypeOf(alpha) == ctx->ac.f16)
213
alpha_ref = LLVMBuildFPTrunc(ctx->ac.builder, alpha_ref, ctx->ac.f16, "");
214
215
LLVMValueRef alpha_pass = LLVMBuildFCmp(ctx->ac.builder, cond, alpha, alpha_ref, "");
216
ac_build_kill_if_false(&ctx->ac, alpha_pass);
217
} else {
218
ac_build_kill_if_false(&ctx->ac, ctx->ac.i1false);
219
}
220
}
221
222
static LLVMValueRef si_scale_alpha_by_sample_mask(struct si_shader_context *ctx, LLVMValueRef alpha,
223
unsigned samplemask_param)
224
{
225
LLVMValueRef coverage;
226
227
/* alpha = alpha * popcount(coverage) / SI_NUM_SMOOTH_AA_SAMPLES */
228
coverage = LLVMGetParam(ctx->main_fn, samplemask_param);
229
coverage = ac_build_bit_count(&ctx->ac, ac_to_integer(&ctx->ac, coverage));
230
coverage = LLVMBuildUIToFP(ctx->ac.builder, coverage, ctx->ac.f32, "");
231
232
coverage = LLVMBuildFMul(ctx->ac.builder, coverage,
233
LLVMConstReal(ctx->ac.f32, 1.0 / SI_NUM_SMOOTH_AA_SAMPLES), "");
234
235
if (LLVMTypeOf(alpha) == ctx->ac.f16)
236
coverage = LLVMBuildFPTrunc(ctx->ac.builder, coverage, ctx->ac.f16, "");
237
238
return LLVMBuildFMul(ctx->ac.builder, alpha, coverage, "");
239
}
240
241
struct si_ps_exports {
242
unsigned num;
243
struct ac_export_args args[10];
244
};
245
246
static LLVMValueRef pack_two_16bit(struct ac_llvm_context *ctx, LLVMValueRef args[2])
247
{
248
LLVMValueRef tmp = ac_build_gather_values(ctx, args, 2);
249
return LLVMBuildBitCast(ctx->builder, tmp, ctx->v2f16, "");
250
}
251
252
static LLVMValueRef get_color_32bit(struct si_shader_context *ctx, unsigned color_type,
253
LLVMValueRef value)
254
{
255
switch (color_type) {
256
case SI_TYPE_FLOAT16:
257
return LLVMBuildFPExt(ctx->ac.builder, value, ctx->ac.f32, "");
258
case SI_TYPE_INT16:
259
value = ac_to_integer(&ctx->ac, value);
260
value = LLVMBuildSExt(ctx->ac.builder, value, ctx->ac.i32, "");
261
return ac_to_float(&ctx->ac, value);
262
case SI_TYPE_UINT16:
263
value = ac_to_integer(&ctx->ac, value);
264
value = LLVMBuildZExt(ctx->ac.builder, value, ctx->ac.i32, "");
265
return ac_to_float(&ctx->ac, value);
266
case SI_TYPE_ANY32:
267
return value;
268
}
269
return NULL;
270
}
271
272
/* Initialize arguments for the shader export intrinsic */
273
static void si_llvm_init_ps_export_args(struct si_shader_context *ctx, LLVMValueRef *values,
274
unsigned cbuf, unsigned compacted_mrt_index,
275
unsigned color_type, struct ac_export_args *args)
276
{
277
const struct si_shader_key *key = &ctx->shader->key;
278
unsigned col_formats = key->part.ps.epilog.spi_shader_col_format;
279
LLVMValueRef f32undef = LLVMGetUndef(ctx->ac.f32);
280
unsigned spi_shader_col_format;
281
unsigned chan;
282
bool is_int8, is_int10;
283
284
assert(cbuf < 8);
285
286
spi_shader_col_format = (col_formats >> (cbuf * 4)) & 0xf;
287
is_int8 = (key->part.ps.epilog.color_is_int8 >> cbuf) & 0x1;
288
is_int10 = (key->part.ps.epilog.color_is_int10 >> cbuf) & 0x1;
289
290
/* Default is 0xf. Adjusted below depending on the format. */
291
args->enabled_channels = 0xf; /* writemask */
292
293
/* Specify whether the EXEC mask represents the valid mask */
294
args->valid_mask = 0;
295
296
/* Specify whether this is the last export */
297
args->done = 0;
298
299
/* Specify the target we are exporting */
300
args->target = V_008DFC_SQ_EXP_MRT + compacted_mrt_index;
301
302
args->compr = false;
303
args->out[0] = f32undef;
304
args->out[1] = f32undef;
305
args->out[2] = f32undef;
306
args->out[3] = f32undef;
307
308
LLVMValueRef (*packf)(struct ac_llvm_context * ctx, LLVMValueRef args[2]) = NULL;
309
LLVMValueRef (*packi)(struct ac_llvm_context * ctx, LLVMValueRef args[2], unsigned bits,
310
bool hi) = NULL;
311
312
switch (spi_shader_col_format) {
313
case V_028714_SPI_SHADER_ZERO:
314
args->enabled_channels = 0; /* writemask */
315
args->target = V_008DFC_SQ_EXP_NULL;
316
break;
317
318
case V_028714_SPI_SHADER_32_R:
319
args->enabled_channels = 1; /* writemask */
320
args->out[0] = get_color_32bit(ctx, color_type, values[0]);
321
break;
322
323
case V_028714_SPI_SHADER_32_GR:
324
args->enabled_channels = 0x3; /* writemask */
325
args->out[0] = get_color_32bit(ctx, color_type, values[0]);
326
args->out[1] = get_color_32bit(ctx, color_type, values[1]);
327
break;
328
329
case V_028714_SPI_SHADER_32_AR:
330
if (ctx->screen->info.chip_class >= GFX10) {
331
args->enabled_channels = 0x3; /* writemask */
332
args->out[0] = get_color_32bit(ctx, color_type, values[0]);
333
args->out[1] = get_color_32bit(ctx, color_type, values[3]);
334
} else {
335
args->enabled_channels = 0x9; /* writemask */
336
args->out[0] = get_color_32bit(ctx, color_type, values[0]);
337
args->out[3] = get_color_32bit(ctx, color_type, values[3]);
338
}
339
break;
340
341
case V_028714_SPI_SHADER_FP16_ABGR:
342
if (color_type != SI_TYPE_ANY32)
343
packf = pack_two_16bit;
344
else
345
packf = ac_build_cvt_pkrtz_f16;
346
break;
347
348
case V_028714_SPI_SHADER_UNORM16_ABGR:
349
if (color_type != SI_TYPE_ANY32)
350
packf = ac_build_cvt_pknorm_u16_f16;
351
else
352
packf = ac_build_cvt_pknorm_u16;
353
break;
354
355
case V_028714_SPI_SHADER_SNORM16_ABGR:
356
if (color_type != SI_TYPE_ANY32)
357
packf = ac_build_cvt_pknorm_i16_f16;
358
else
359
packf = ac_build_cvt_pknorm_i16;
360
break;
361
362
case V_028714_SPI_SHADER_UINT16_ABGR:
363
if (color_type != SI_TYPE_ANY32)
364
packf = pack_two_16bit;
365
else
366
packi = ac_build_cvt_pk_u16;
367
break;
368
369
case V_028714_SPI_SHADER_SINT16_ABGR:
370
if (color_type != SI_TYPE_ANY32)
371
packf = pack_two_16bit;
372
else
373
packi = ac_build_cvt_pk_i16;
374
break;
375
376
case V_028714_SPI_SHADER_32_ABGR:
377
for (unsigned i = 0; i < 4; i++)
378
args->out[i] = get_color_32bit(ctx, color_type, values[i]);
379
break;
380
}
381
382
/* Pack f16 or norm_i16/u16. */
383
if (packf) {
384
for (chan = 0; chan < 2; chan++) {
385
LLVMValueRef pack_args[2] = {values[2 * chan], values[2 * chan + 1]};
386
LLVMValueRef packed;
387
388
packed = packf(&ctx->ac, pack_args);
389
args->out[chan] = ac_to_float(&ctx->ac, packed);
390
}
391
args->compr = 1; /* COMPR flag */
392
}
393
/* Pack i16/u16. */
394
if (packi) {
395
for (chan = 0; chan < 2; chan++) {
396
LLVMValueRef pack_args[2] = {ac_to_integer(&ctx->ac, values[2 * chan]),
397
ac_to_integer(&ctx->ac, values[2 * chan + 1])};
398
LLVMValueRef packed;
399
400
packed = packi(&ctx->ac, pack_args, is_int8 ? 8 : is_int10 ? 10 : 16, chan == 1);
401
args->out[chan] = ac_to_float(&ctx->ac, packed);
402
}
403
args->compr = 1; /* COMPR flag */
404
}
405
}
406
407
static bool si_export_mrt_color(struct si_shader_context *ctx, LLVMValueRef *color, unsigned index,
408
unsigned compacted_mrt_index, unsigned samplemask_param,
409
bool is_last, unsigned color_type, struct si_ps_exports *exp)
410
{
411
int i;
412
413
/* Clamp color */
414
if (ctx->shader->key.part.ps.epilog.clamp_color)
415
for (i = 0; i < 4; i++)
416
color[i] = ac_build_clamp(&ctx->ac, color[i]);
417
418
/* Alpha to one */
419
if (ctx->shader->key.part.ps.epilog.alpha_to_one)
420
color[3] = LLVMConstReal(LLVMTypeOf(color[0]), 1);
421
422
/* Alpha test */
423
if (index == 0 && ctx->shader->key.part.ps.epilog.alpha_func != PIPE_FUNC_ALWAYS)
424
si_alpha_test(ctx, color[3]);
425
426
/* Line & polygon smoothing */
427
if (ctx->shader->key.part.ps.epilog.poly_line_smoothing)
428
color[3] = si_scale_alpha_by_sample_mask(ctx, color[3], samplemask_param);
429
430
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
431
if (ctx->shader->key.part.ps.epilog.last_cbuf > 0) {
432
struct ac_export_args args[8];
433
int c, last = -1;
434
435
assert(compacted_mrt_index == 0);
436
437
/* Get the export arguments, also find out what the last one is. */
438
for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) {
439
si_llvm_init_ps_export_args(ctx, color, c, compacted_mrt_index,
440
color_type, &args[c]);
441
if (args[c].enabled_channels) {
442
compacted_mrt_index++;
443
last = c;
444
}
445
}
446
if (last == -1)
447
return false;
448
449
/* Emit all exports. */
450
for (c = 0; c <= ctx->shader->key.part.ps.epilog.last_cbuf; c++) {
451
if (is_last && last == c) {
452
args[c].valid_mask = 1; /* whether the EXEC mask is valid */
453
args[c].done = 1; /* DONE bit */
454
} else if (!args[c].enabled_channels)
455
continue; /* unnecessary NULL export */
456
457
memcpy(&exp->args[exp->num++], &args[c], sizeof(args[c]));
458
}
459
} else {
460
struct ac_export_args args;
461
462
/* Export */
463
si_llvm_init_ps_export_args(ctx, color, index, compacted_mrt_index,
464
color_type, &args);
465
if (is_last) {
466
args.valid_mask = 1; /* whether the EXEC mask is valid */
467
args.done = 1; /* DONE bit */
468
} else if (!args.enabled_channels)
469
return false; /* unnecessary NULL export */
470
471
memcpy(&exp->args[exp->num++], &args, sizeof(args));
472
}
473
return true;
474
}
475
476
/**
477
* Return PS outputs in this order:
478
*
479
* v[0:3] = color0.xyzw
480
* v[4:7] = color1.xyzw
481
* ...
482
* vN+0 = Depth
483
* vN+1 = Stencil
484
* vN+2 = SampleMask
485
* vN+3 = SampleMaskIn (used for OpenGL smoothing)
486
*
487
* The alpha-ref SGPR is returned via its original location.
488
*/
489
static void si_llvm_return_fs_outputs(struct ac_shader_abi *abi, unsigned max_outputs,
490
LLVMValueRef *addrs)
491
{
492
struct si_shader_context *ctx = si_shader_context_from_abi(abi);
493
struct si_shader *shader = ctx->shader;
494
struct si_shader_info *info = &shader->selector->info;
495
LLVMBuilderRef builder = ctx->ac.builder;
496
unsigned i, j, first_vgpr, vgpr;
497
498
LLVMValueRef color[8][4] = {};
499
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
500
LLVMValueRef ret;
501
502
/* Read the output values. */
503
for (i = 0; i < info->num_outputs; i++) {
504
unsigned semantic = info->output_semantic[i];
505
506
switch (semantic) {
507
case FRAG_RESULT_DEPTH:
508
depth = LLVMBuildLoad(builder, addrs[4 * i + 0], "");
509
break;
510
case FRAG_RESULT_STENCIL:
511
stencil = LLVMBuildLoad(builder, addrs[4 * i + 0], "");
512
break;
513
case FRAG_RESULT_SAMPLE_MASK:
514
samplemask = LLVMBuildLoad(builder, addrs[4 * i + 0], "");
515
break;
516
default:
517
if (semantic >= FRAG_RESULT_DATA0 && semantic <= FRAG_RESULT_DATA7) {
518
unsigned index = semantic - FRAG_RESULT_DATA0;
519
520
for (j = 0; j < 4; j++) {
521
LLVMValueRef ptr = addrs[4 * i + j];
522
LLVMValueRef result = LLVMBuildLoad(builder, ptr, "");
523
color[index][j] = result;
524
}
525
} else {
526
fprintf(stderr, "Warning: Unhandled fs output type:%d\n", semantic);
527
}
528
break;
529
}
530
}
531
532
/* Fill the return structure. */
533
ret = ctx->return_value;
534
535
/* Set SGPRs. */
536
ret = LLVMBuildInsertValue(
537
builder, ret, ac_to_integer(&ctx->ac, LLVMGetParam(ctx->main_fn, SI_PARAM_ALPHA_REF)),
538
SI_SGPR_ALPHA_REF, "");
539
540
/* Set VGPRs */
541
first_vgpr = vgpr = SI_SGPR_ALPHA_REF + 1;
542
for (i = 0; i < ARRAY_SIZE(color); i++) {
543
if (!color[i][0])
544
continue;
545
546
if (LLVMTypeOf(color[i][0]) == ctx->ac.f16) {
547
for (j = 0; j < 2; j++) {
548
LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, &color[i][j * 2], 2);
549
tmp = LLVMBuildBitCast(builder, tmp, ctx->ac.f32, "");
550
ret = LLVMBuildInsertValue(builder, ret, tmp, vgpr++, "");
551
}
552
vgpr += 2;
553
} else {
554
for (j = 0; j < 4; j++)
555
ret = LLVMBuildInsertValue(builder, ret, color[i][j], vgpr++, "");
556
}
557
}
558
if (depth)
559
ret = LLVMBuildInsertValue(builder, ret, depth, vgpr++, "");
560
if (stencil)
561
ret = LLVMBuildInsertValue(builder, ret, stencil, vgpr++, "");
562
if (samplemask)
563
ret = LLVMBuildInsertValue(builder, ret, samplemask, vgpr++, "");
564
565
/* Add the input sample mask for smoothing at the end. */
566
if (vgpr < first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC)
567
vgpr = first_vgpr + PS_EPILOG_SAMPLEMASK_MIN_LOC;
568
ret = LLVMBuildInsertValue(builder, ret, LLVMGetParam(ctx->main_fn, SI_PARAM_SAMPLE_COVERAGE),
569
vgpr++, "");
570
571
ctx->return_value = ret;
572
}
573
574
static void si_llvm_emit_polygon_stipple(struct si_shader_context *ctx,
575
LLVMValueRef param_internal_bindings,
576
struct ac_arg param_pos_fixed_pt)
577
{
578
LLVMBuilderRef builder = ctx->ac.builder;
579
LLVMValueRef slot, desc, offset, row, bit, address[2];
580
581
/* Use the fixed-point gl_FragCoord input.
582
* Since the stipple pattern is 32x32 and it repeats, just get 5 bits
583
* per coordinate to get the repeating effect.
584
*/
585
address[0] = si_unpack_param(ctx, param_pos_fixed_pt, 0, 5);
586
address[1] = si_unpack_param(ctx, param_pos_fixed_pt, 16, 5);
587
588
/* Load the buffer descriptor. */
589
slot = LLVMConstInt(ctx->ac.i32, SI_PS_CONST_POLY_STIPPLE, 0);
590
desc = ac_build_load_to_sgpr(&ctx->ac, param_internal_bindings, slot);
591
592
/* The stipple pattern is 32x32, each row has 32 bits. */
593
offset = LLVMBuildMul(builder, address[1], LLVMConstInt(ctx->ac.i32, 4, 0), "");
594
row = si_buffer_load_const(ctx, desc, offset);
595
row = ac_to_integer(&ctx->ac, row);
596
bit = LLVMBuildLShr(builder, row, address[0], "");
597
bit = LLVMBuildTrunc(builder, bit, ctx->ac.i1, "");
598
ac_build_kill_if_false(&ctx->ac, bit);
599
}
600
601
/**
602
* Build the pixel shader prolog function. This handles:
603
* - two-side color selection and interpolation
604
* - overriding interpolation parameters for the API PS
605
* - polygon stippling
606
*
607
* All preloaded SGPRs and VGPRs are passed through unmodified unless they are
608
* overriden by other states. (e.g. per-sample interpolation)
609
* Interpolated colors are stored after the preloaded VGPRs.
610
*/
611
void si_llvm_build_ps_prolog(struct si_shader_context *ctx, union si_shader_part_key *key)
612
{
613
LLVMValueRef ret, func;
614
int num_returns, i, num_color_channels;
615
616
memset(&ctx->args, 0, sizeof(ctx->args));
617
618
/* Declare inputs. */
619
LLVMTypeRef return_types[AC_MAX_ARGS];
620
num_returns = 0;
621
num_color_channels = util_bitcount(key->ps_prolog.colors_read);
622
assert(key->ps_prolog.num_input_sgprs + key->ps_prolog.num_input_vgprs + num_color_channels <=
623
AC_MAX_ARGS);
624
for (i = 0; i < key->ps_prolog.num_input_sgprs; i++) {
625
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, NULL);
626
return_types[num_returns++] = ctx->ac.i32;
627
}
628
629
struct ac_arg pos_fixed_pt;
630
struct ac_arg ancillary;
631
struct ac_arg param_sample_mask;
632
for (i = 0; i < key->ps_prolog.num_input_vgprs; i++) {
633
struct ac_arg *arg = NULL;
634
if (i == key->ps_prolog.ancillary_vgpr_index) {
635
arg = &ancillary;
636
} else if (i == key->ps_prolog.ancillary_vgpr_index + 1) {
637
arg = &param_sample_mask;
638
} else if (i == key->ps_prolog.num_input_vgprs - 1) {
639
/* POS_FIXED_PT is always last. */
640
arg = &pos_fixed_pt;
641
}
642
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, arg);
643
return_types[num_returns++] = ctx->ac.f32;
644
}
645
646
/* Declare outputs (same as inputs + add colors if needed) */
647
for (i = 0; i < num_color_channels; i++)
648
return_types[num_returns++] = ctx->ac.f32;
649
650
/* Create the function. */
651
si_llvm_create_func(ctx, "ps_prolog", return_types, num_returns, 0);
652
func = ctx->main_fn;
653
654
/* Copy inputs to outputs. This should be no-op, as the registers match,
655
* but it will prevent the compiler from overwriting them unintentionally.
656
*/
657
ret = ctx->return_value;
658
for (i = 0; i < ctx->args.arg_count; i++) {
659
LLVMValueRef p = LLVMGetParam(func, i);
660
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, p, i, "");
661
}
662
663
/* Polygon stippling. */
664
if (key->ps_prolog.states.poly_stipple) {
665
LLVMValueRef list = si_prolog_get_internal_bindings(ctx);
666
667
si_llvm_emit_polygon_stipple(ctx, list, pos_fixed_pt);
668
}
669
670
if (key->ps_prolog.states.bc_optimize_for_persp ||
671
key->ps_prolog.states.bc_optimize_for_linear) {
672
unsigned i, base = key->ps_prolog.num_input_sgprs;
673
LLVMValueRef center[2], centroid[2], tmp, bc_optimize;
674
675
/* The shader should do: if (PRIM_MASK[31]) CENTROID = CENTER;
676
* The hw doesn't compute CENTROID if the whole wave only
677
* contains fully-covered quads.
678
*
679
* PRIM_MASK is after user SGPRs.
680
*/
681
bc_optimize = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
682
bc_optimize =
683
LLVMBuildLShr(ctx->ac.builder, bc_optimize, LLVMConstInt(ctx->ac.i32, 31, 0), "");
684
bc_optimize = LLVMBuildTrunc(ctx->ac.builder, bc_optimize, ctx->ac.i1, "");
685
686
if (key->ps_prolog.states.bc_optimize_for_persp) {
687
/* Read PERSP_CENTER. */
688
for (i = 0; i < 2; i++)
689
center[i] = LLVMGetParam(func, base + 2 + i);
690
/* Read PERSP_CENTROID. */
691
for (i = 0; i < 2; i++)
692
centroid[i] = LLVMGetParam(func, base + 4 + i);
693
/* Select PERSP_CENTROID. */
694
for (i = 0; i < 2; i++) {
695
tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center[i], centroid[i], "");
696
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, base + 4 + i, "");
697
}
698
}
699
if (key->ps_prolog.states.bc_optimize_for_linear) {
700
/* Read LINEAR_CENTER. */
701
for (i = 0; i < 2; i++)
702
center[i] = LLVMGetParam(func, base + 8 + i);
703
/* Read LINEAR_CENTROID. */
704
for (i = 0; i < 2; i++)
705
centroid[i] = LLVMGetParam(func, base + 10 + i);
706
/* Select LINEAR_CENTROID. */
707
for (i = 0; i < 2; i++) {
708
tmp = LLVMBuildSelect(ctx->ac.builder, bc_optimize, center[i], centroid[i], "");
709
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, tmp, base + 10 + i, "");
710
}
711
}
712
}
713
714
/* Force per-sample interpolation. */
715
if (key->ps_prolog.states.force_persp_sample_interp) {
716
unsigned i, base = key->ps_prolog.num_input_sgprs;
717
LLVMValueRef persp_sample[2];
718
719
/* Read PERSP_SAMPLE. */
720
for (i = 0; i < 2; i++)
721
persp_sample[i] = LLVMGetParam(func, base + i);
722
/* Overwrite PERSP_CENTER. */
723
for (i = 0; i < 2; i++)
724
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, persp_sample[i], base + 2 + i, "");
725
/* Overwrite PERSP_CENTROID. */
726
for (i = 0; i < 2; i++)
727
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, persp_sample[i], base + 4 + i, "");
728
}
729
if (key->ps_prolog.states.force_linear_sample_interp) {
730
unsigned i, base = key->ps_prolog.num_input_sgprs;
731
LLVMValueRef linear_sample[2];
732
733
/* Read LINEAR_SAMPLE. */
734
for (i = 0; i < 2; i++)
735
linear_sample[i] = LLVMGetParam(func, base + 6 + i);
736
/* Overwrite LINEAR_CENTER. */
737
for (i = 0; i < 2; i++)
738
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], base + 8 + i, "");
739
/* Overwrite LINEAR_CENTROID. */
740
for (i = 0; i < 2; i++)
741
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_sample[i], base + 10 + i, "");
742
}
743
744
/* Force center interpolation. */
745
if (key->ps_prolog.states.force_persp_center_interp) {
746
unsigned i, base = key->ps_prolog.num_input_sgprs;
747
LLVMValueRef persp_center[2];
748
749
/* Read PERSP_CENTER. */
750
for (i = 0; i < 2; i++)
751
persp_center[i] = LLVMGetParam(func, base + 2 + i);
752
/* Overwrite PERSP_SAMPLE. */
753
for (i = 0; i < 2; i++)
754
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, persp_center[i], base + i, "");
755
/* Overwrite PERSP_CENTROID. */
756
for (i = 0; i < 2; i++)
757
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, persp_center[i], base + 4 + i, "");
758
}
759
if (key->ps_prolog.states.force_linear_center_interp) {
760
unsigned i, base = key->ps_prolog.num_input_sgprs;
761
LLVMValueRef linear_center[2];
762
763
/* Read LINEAR_CENTER. */
764
for (i = 0; i < 2; i++)
765
linear_center[i] = LLVMGetParam(func, base + 8 + i);
766
/* Overwrite LINEAR_SAMPLE. */
767
for (i = 0; i < 2; i++)
768
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_center[i], base + 6 + i, "");
769
/* Overwrite LINEAR_CENTROID. */
770
for (i = 0; i < 2; i++)
771
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, linear_center[i], base + 10 + i, "");
772
}
773
774
/* Interpolate colors. */
775
unsigned color_out_idx = 0;
776
for (i = 0; i < 2; i++) {
777
unsigned writemask = (key->ps_prolog.colors_read >> (i * 4)) & 0xf;
778
unsigned face_vgpr = key->ps_prolog.num_input_sgprs + key->ps_prolog.face_vgpr_index;
779
LLVMValueRef interp[2], color[4];
780
LLVMValueRef interp_ij = NULL, prim_mask = NULL, face = NULL;
781
782
if (!writemask)
783
continue;
784
785
/* If the interpolation qualifier is not CONSTANT (-1). */
786
if (key->ps_prolog.color_interp_vgpr_index[i] != -1) {
787
unsigned interp_vgpr =
788
key->ps_prolog.num_input_sgprs + key->ps_prolog.color_interp_vgpr_index[i];
789
790
/* Get the (i,j) updated by bc_optimize handling. */
791
interp[0] = LLVMBuildExtractValue(ctx->ac.builder, ret, interp_vgpr, "");
792
interp[1] = LLVMBuildExtractValue(ctx->ac.builder, ret, interp_vgpr + 1, "");
793
interp_ij = ac_build_gather_values(&ctx->ac, interp, 2);
794
}
795
796
/* Use the absolute location of the input. */
797
prim_mask = LLVMGetParam(func, SI_PS_NUM_USER_SGPR);
798
799
if (key->ps_prolog.states.color_two_side) {
800
face = LLVMGetParam(func, face_vgpr);
801
face = ac_to_integer(&ctx->ac, face);
802
}
803
804
interp_fs_color(ctx, key->ps_prolog.color_attr_index[i], i, key->ps_prolog.num_interp_inputs,
805
key->ps_prolog.colors_read, interp_ij, prim_mask, face, color);
806
807
while (writemask) {
808
unsigned chan = u_bit_scan(&writemask);
809
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, color[chan],
810
ctx->args.arg_count + color_out_idx++, "");
811
}
812
}
813
814
/* Section 15.2.2 (Shader Inputs) of the OpenGL 4.5 (Core Profile) spec
815
* says:
816
*
817
* "When per-sample shading is active due to the use of a fragment
818
* input qualified by sample or due to the use of the gl_SampleID
819
* or gl_SamplePosition variables, only the bit for the current
820
* sample is set in gl_SampleMaskIn. When state specifies multiple
821
* fragment shader invocations for a given fragment, the sample
822
* mask for any single fragment shader invocation may specify a
823
* subset of the covered samples for the fragment. In this case,
824
* the bit corresponding to each covered sample will be set in
825
* exactly one fragment shader invocation."
826
*
827
* The samplemask loaded by hardware is always the coverage of the
828
* entire pixel/fragment, so mask bits out based on the sample ID.
829
*/
830
if (key->ps_prolog.states.samplemask_log_ps_iter) {
831
/* The bit pattern matches that used by fixed function fragment
832
* processing. */
833
static const uint16_t ps_iter_masks[] = {
834
0xffff, /* not used */
835
0x5555, 0x1111, 0x0101, 0x0001,
836
};
837
assert(key->ps_prolog.states.samplemask_log_ps_iter < ARRAY_SIZE(ps_iter_masks));
838
839
uint32_t ps_iter_mask = ps_iter_masks[key->ps_prolog.states.samplemask_log_ps_iter];
840
LLVMValueRef sampleid = si_unpack_param(ctx, ancillary, 8, 4);
841
LLVMValueRef samplemask = ac_get_arg(&ctx->ac, param_sample_mask);
842
843
samplemask = ac_to_integer(&ctx->ac, samplemask);
844
samplemask =
845
LLVMBuildAnd(ctx->ac.builder, samplemask,
846
LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, ps_iter_mask, false),
847
sampleid, ""),
848
"");
849
samplemask = ac_to_float(&ctx->ac, samplemask);
850
851
ret = LLVMBuildInsertValue(ctx->ac.builder, ret, samplemask, param_sample_mask.arg_index, "");
852
}
853
854
/* Tell LLVM to insert WQM instruction sequence when needed. */
855
if (key->ps_prolog.wqm) {
856
LLVMAddTargetDependentFunctionAttr(func, "amdgpu-ps-wqm-outputs", "");
857
}
858
859
si_llvm_build_ret(ctx, ret);
860
}
861
862
/**
863
* Build the pixel shader epilog function. This handles everything that must be
864
* emulated for pixel shader exports. (alpha-test, format conversions, etc)
865
*/
866
void si_llvm_build_ps_epilog(struct si_shader_context *ctx, union si_shader_part_key *key)
867
{
868
LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
869
int i;
870
struct si_ps_exports exp = {};
871
872
memset(&ctx->args, 0, sizeof(ctx->args));
873
874
/* Declare input SGPRs. */
875
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->internal_bindings);
876
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->bindless_samplers_and_images);
877
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->const_and_shader_buffers);
878
ac_add_arg(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_INT, &ctx->samplers_and_images);
879
si_add_arg_checked(&ctx->args, AC_ARG_SGPR, 1, AC_ARG_FLOAT, NULL, SI_PARAM_ALPHA_REF);
880
881
/* Declare input VGPRs. */
882
unsigned required_num_params =
883
ctx->args.num_sgprs_used + util_bitcount(key->ps_epilog.colors_written) * 4 +
884
key->ps_epilog.writes_z + key->ps_epilog.writes_stencil + key->ps_epilog.writes_samplemask;
885
886
required_num_params =
887
MAX2(required_num_params, ctx->args.num_sgprs_used + PS_EPILOG_SAMPLEMASK_MIN_LOC + 1);
888
889
while (ctx->args.arg_count < required_num_params)
890
ac_add_arg(&ctx->args, AC_ARG_VGPR, 1, AC_ARG_FLOAT, NULL);
891
892
/* Create the function. */
893
si_llvm_create_func(ctx, "ps_epilog", NULL, 0, 0);
894
/* Disable elimination of unused inputs. */
895
ac_llvm_add_target_dep_function_attr(ctx->main_fn, "InitialPSInputAddr", 0xffffff);
896
897
/* Process colors. */
898
unsigned vgpr = ctx->args.num_sgprs_used;
899
unsigned colors_written = key->ps_epilog.colors_written;
900
int last_color_export = -1;
901
902
/* Find the last color export. */
903
if (!key->ps_epilog.writes_z && !key->ps_epilog.writes_stencil &&
904
!key->ps_epilog.writes_samplemask) {
905
unsigned spi_format = key->ps_epilog.states.spi_shader_col_format;
906
907
/* If last_cbuf > 0, FS_COLOR0_WRITES_ALL_CBUFS is true. */
908
if (colors_written == 0x1 && key->ps_epilog.states.last_cbuf > 0) {
909
/* Just set this if any of the colorbuffers are enabled. */
910
if (spi_format & ((1ull << (4 * (key->ps_epilog.states.last_cbuf + 1))) - 1))
911
last_color_export = 0;
912
} else {
913
for (i = 0; i < 8; i++)
914
if (colors_written & (1 << i) && (spi_format >> (i * 4)) & 0xf)
915
last_color_export = i;
916
}
917
}
918
919
unsigned num_compacted_mrts = 0;
920
while (colors_written) {
921
LLVMValueRef color[4];
922
int output_index = u_bit_scan(&colors_written);
923
unsigned color_type = (key->ps_epilog.color_types >> (output_index * 2)) & 0x3;
924
925
if (color_type != SI_TYPE_ANY32) {
926
for (i = 0; i < 4; i++) {
927
color[i] = LLVMGetParam(ctx->main_fn, vgpr + i / 2);
928
color[i] = LLVMBuildBitCast(ctx->ac.builder, color[i], ctx->ac.v2f16, "");
929
color[i] = ac_llvm_extract_elem(&ctx->ac, color[i], i % 2);
930
}
931
vgpr += 4;
932
} else {
933
for (i = 0; i < 4; i++)
934
color[i] = LLVMGetParam(ctx->main_fn, vgpr++);
935
}
936
937
if (si_export_mrt_color(ctx, color, output_index, num_compacted_mrts,
938
ctx->args.arg_count - 1,
939
output_index == last_color_export, color_type, &exp))
940
num_compacted_mrts++;
941
}
942
943
/* Process depth, stencil, samplemask. */
944
if (key->ps_epilog.writes_z)
945
depth = LLVMGetParam(ctx->main_fn, vgpr++);
946
if (key->ps_epilog.writes_stencil)
947
stencil = LLVMGetParam(ctx->main_fn, vgpr++);
948
if (key->ps_epilog.writes_samplemask)
949
samplemask = LLVMGetParam(ctx->main_fn, vgpr++);
950
951
if (depth || stencil || samplemask)
952
ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &exp.args[exp.num++]);
953
else if (last_color_export == -1)
954
ac_build_export_null(&ctx->ac);
955
956
if (exp.num) {
957
for (unsigned i = 0; i < exp.num; i++)
958
ac_build_export(&ctx->ac, &exp.args[i]);
959
}
960
961
/* Compile. */
962
LLVMBuildRetVoid(ctx->ac.builder);
963
}
964
965
void si_llvm_build_monolithic_ps(struct si_shader_context *ctx, struct si_shader *shader)
966
{
967
LLVMValueRef parts[3];
968
unsigned num_parts = 0, main_index;
969
LLVMValueRef main_fn = ctx->main_fn;
970
971
union si_shader_part_key prolog_key;
972
si_get_ps_prolog_key(shader, &prolog_key, false);
973
974
if (si_need_ps_prolog(&prolog_key)) {
975
si_llvm_build_ps_prolog(ctx, &prolog_key);
976
parts[num_parts++] = ctx->main_fn;
977
}
978
979
main_index = num_parts;
980
parts[num_parts++] = main_fn;
981
982
union si_shader_part_key epilog_key;
983
si_get_ps_epilog_key(shader, &epilog_key);
984
si_llvm_build_ps_epilog(ctx, &epilog_key);
985
parts[num_parts++] = ctx->main_fn;
986
987
si_build_wrapper_function(ctx, parts, num_parts, main_index, 0, false);
988
}
989
990
void si_llvm_init_ps_callbacks(struct si_shader_context *ctx)
991
{
992
ctx->abi.emit_outputs = si_llvm_return_fs_outputs;
993
ctx->abi.load_sample_position = load_sample_position;
994
ctx->abi.load_sample_mask_in = load_sample_mask_in;
995
ctx->abi.emit_fbfetch = si_nir_emit_fbfetch;
996
}
997
998