Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/llvm/ac_nir_to_llvm.c
7236 views
1
/*
2
* Copyright © 2016 Bas Nieuwenhuizen
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "ac_nir_to_llvm.h"
25
#include "ac_gpu_info.h"
26
#include "ac_binary.h"
27
#include "ac_llvm_build.h"
28
#include "ac_llvm_util.h"
29
#include "ac_shader_abi.h"
30
#include "ac_shader_util.h"
31
#include "nir/nir.h"
32
#include "nir/nir_deref.h"
33
#include "sid.h"
34
#include "util/bitscan.h"
35
#include "util/u_math.h"
36
#include <llvm/Config/llvm-config.h>
37
38
struct ac_nir_context {
39
struct ac_llvm_context ac;
40
struct ac_shader_abi *abi;
41
const struct ac_shader_args *args;
42
43
gl_shader_stage stage;
44
shader_info *info;
45
46
LLVMValueRef *ssa_defs;
47
48
LLVMValueRef scratch;
49
LLVMValueRef constant_data;
50
51
struct hash_table *defs;
52
struct hash_table *phis;
53
struct hash_table *vars;
54
struct hash_table *verified_interp;
55
56
LLVMValueRef main_function;
57
LLVMBasicBlockRef continue_block;
58
LLVMBasicBlockRef break_block;
59
};
60
61
static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
62
const nir_instr *instr, bool image);
63
64
static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
65
enum ac_descriptor_type desc_type, const nir_instr *instr,
66
LLVMValueRef index, bool image, bool write);
67
68
static LLVMTypeRef get_def_type(struct ac_nir_context *ctx, const nir_ssa_def *def)
69
{
70
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
71
if (def->num_components > 1) {
72
type = LLVMVectorType(type, def->num_components);
73
}
74
return type;
75
}
76
77
static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
78
{
79
assert(src.is_ssa);
80
return nir->ssa_defs[src.ssa->index];
81
}
82
83
static LLVMValueRef get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned bit_size, unsigned c_off)
84
{
85
LLVMValueRef ptr = get_src(ctx, src);
86
LLVMValueRef lds_i8 = ctx->ac.lds;
87
if (ctx->stage != MESA_SHADER_COMPUTE)
88
lds_i8 = LLVMBuildBitCast(ctx->ac.builder, ctx->ac.lds, LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS), "");
89
90
ptr = LLVMBuildAdd(ctx->ac.builder, ptr, LLVMConstInt(ctx->ac.i32, c_off, 0), "");
91
ptr = LLVMBuildGEP(ctx->ac.builder, lds_i8, &ptr, 1, "");
92
int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
93
94
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, bit_size);
95
96
return LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(type, addr_space), "");
97
}
98
99
static LLVMBasicBlockRef get_block(struct ac_nir_context *nir, const struct nir_block *b)
100
{
101
struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
102
return (LLVMBasicBlockRef)entry->data;
103
}
104
105
static LLVMValueRef get_alu_src(struct ac_nir_context *ctx, nir_alu_src src,
106
unsigned num_components)
107
{
108
LLVMValueRef value = get_src(ctx, src.src);
109
bool need_swizzle = false;
110
111
assert(value);
112
unsigned src_components = ac_get_llvm_num_components(value);
113
for (unsigned i = 0; i < num_components; ++i) {
114
assert(src.swizzle[i] < src_components);
115
if (src.swizzle[i] != i)
116
need_swizzle = true;
117
}
118
119
if (need_swizzle || num_components != src_components) {
120
LLVMValueRef masks[] = {LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
121
LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
122
LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
123
LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
124
125
if (src_components > 1 && num_components == 1) {
126
value = LLVMBuildExtractElement(ctx->ac.builder, value, masks[0], "");
127
} else if (src_components == 1 && num_components > 1) {
128
LLVMValueRef values[] = {value, value, value, value};
129
value = ac_build_gather_values(&ctx->ac, values, num_components);
130
} else {
131
LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
132
value = LLVMBuildShuffleVector(ctx->ac.builder, value, value, swizzle, "");
133
}
134
}
135
assert(!src.negate);
136
assert(!src.abs);
137
return value;
138
}
139
140
static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx, LLVMIntPredicate pred,
141
LLVMValueRef src0, LLVMValueRef src1)
142
{
143
src0 = ac_to_integer(ctx, src0);
144
src1 = ac_to_integer(ctx, src1);
145
return LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
146
}
147
148
static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx, LLVMRealPredicate pred,
149
LLVMValueRef src0, LLVMValueRef src1)
150
{
151
src0 = ac_to_float(ctx, src0);
152
src1 = ac_to_float(ctx, src1);
153
return LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
154
}
155
156
static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx, const char *intrin,
157
LLVMTypeRef result_type, LLVMValueRef src0)
158
{
159
char name[64], type[64];
160
LLVMValueRef params[] = {
161
ac_to_float(ctx, src0),
162
};
163
164
ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
165
ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
166
assert(length < sizeof(name));
167
return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
168
}
169
170
static LLVMValueRef emit_intrin_1f_param_scalar(struct ac_llvm_context *ctx, const char *intrin,
171
LLVMTypeRef result_type, LLVMValueRef src0)
172
{
173
if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind)
174
return emit_intrin_1f_param(ctx, intrin, result_type, src0);
175
176
LLVMTypeRef elem_type = LLVMGetElementType(result_type);
177
LLVMValueRef ret = LLVMGetUndef(result_type);
178
179
/* Scalarize the intrinsic, because vectors are not supported. */
180
for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) {
181
char name[64], type[64];
182
LLVMValueRef params[] = {
183
ac_to_float(ctx, ac_llvm_extract_elem(ctx, src0, i)),
184
};
185
186
ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
187
ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
188
assert(length < sizeof(name));
189
ret = LLVMBuildInsertElement(
190
ctx->builder, ret,
191
ac_build_intrinsic(ctx, name, elem_type, params, 1, AC_FUNC_ATTR_READNONE),
192
LLVMConstInt(ctx->i32, i, 0), "");
193
}
194
return ret;
195
}
196
197
static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx, const char *intrin,
198
LLVMTypeRef result_type, LLVMValueRef src0,
199
LLVMValueRef src1)
200
{
201
char name[64], type[64];
202
LLVMValueRef params[] = {
203
ac_to_float(ctx, src0),
204
ac_to_float(ctx, src1),
205
};
206
207
ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
208
ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
209
assert(length < sizeof(name));
210
return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
211
}
212
213
static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx, const char *intrin,
214
LLVMTypeRef result_type, LLVMValueRef src0,
215
LLVMValueRef src1, LLVMValueRef src2)
216
{
217
char name[64], type[64];
218
LLVMValueRef params[] = {
219
ac_to_float(ctx, src0),
220
ac_to_float(ctx, src1),
221
ac_to_float(ctx, src2),
222
};
223
224
ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
225
ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
226
assert(length < sizeof(name));
227
return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
228
}
229
230
static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx, LLVMValueRef src0, LLVMValueRef src1,
231
LLVMValueRef src2)
232
{
233
LLVMTypeRef src1_type = LLVMTypeOf(src1);
234
LLVMTypeRef src2_type = LLVMTypeOf(src2);
235
236
if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
237
LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
238
src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");
239
} else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
240
LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
241
src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, "");
242
}
243
244
return LLVMBuildSelect(ctx->builder, src0, ac_to_integer_or_pointer(ctx, src1),
245
ac_to_integer_or_pointer(ctx, src2), "");
246
}
247
248
static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx, LLVMValueRef src0)
249
{
250
return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
251
}
252
253
static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx, const char *intrin,
254
LLVMValueRef src0, LLVMValueRef src1)
255
{
256
LLVMTypeRef ret_type;
257
LLVMTypeRef types[] = {ctx->i32, ctx->i1};
258
LLVMValueRef res;
259
LLVMValueRef params[] = {src0, src1};
260
ret_type = LLVMStructTypeInContext(ctx->context, types, 2, true);
261
262
res = ac_build_intrinsic(ctx, intrin, ret_type, params, 2, AC_FUNC_ATTR_READNONE);
263
264
res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
265
res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
266
return res;
267
}
268
269
static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
270
{
271
assert(ac_get_elem_bits(ctx, LLVMTypeOf(src0)) == 1);
272
273
switch (bitsize) {
274
case 16:
275
if (LLVMGetTypeKind(LLVMTypeOf(src0)) == LLVMVectorTypeKind) {
276
assert(LLVMGetVectorSize(LLVMTypeOf(src0)) == 2);
277
LLVMValueRef f[] = {
278
LLVMBuildSelect(ctx->builder, ac_llvm_extract_elem(ctx, src0, 0),
279
ctx->f16_1, ctx->f16_0, ""),
280
LLVMBuildSelect(ctx->builder, ac_llvm_extract_elem(ctx, src0, 1),
281
ctx->f16_1, ctx->f16_0, ""),
282
};
283
return ac_build_gather_values(ctx, f, 2);
284
}
285
return LLVMBuildSelect(ctx->builder, src0, ctx->f16_1, ctx->f16_0, "");
286
case 32:
287
return LLVMBuildSelect(ctx->builder, src0, ctx->f32_1, ctx->f32_0, "");
288
case 64:
289
return LLVMBuildSelect(ctx->builder, src0, ctx->f64_1, ctx->f64_0, "");
290
default:
291
unreachable("Unsupported bit size.");
292
}
293
}
294
295
static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx, LLVMValueRef src0)
296
{
297
src0 = ac_to_float(ctx, src0);
298
LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
299
return LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, "");
300
}
301
302
static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx, LLVMValueRef src0, unsigned bitsize)
303
{
304
switch (bitsize) {
305
case 8:
306
return LLVMBuildSelect(ctx->builder, src0, ctx->i8_1, ctx->i8_0, "");
307
case 16:
308
return LLVMBuildSelect(ctx->builder, src0, ctx->i16_1, ctx->i16_0, "");
309
case 32:
310
return LLVMBuildSelect(ctx->builder, src0, ctx->i32_1, ctx->i32_0, "");
311
case 64:
312
return LLVMBuildSelect(ctx->builder, src0, ctx->i64_1, ctx->i64_0, "");
313
default:
314
unreachable("Unsupported bit size.");
315
}
316
}
317
318
static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx, LLVMValueRef src0)
319
{
320
LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
321
return LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, "");
322
}
323
324
static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx, LLVMValueRef src0)
325
{
326
LLVMValueRef result;
327
LLVMValueRef cond = NULL;
328
329
src0 = ac_to_float(ctx, src0);
330
result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
331
332
if (ctx->chip_class >= GFX8) {
333
LLVMValueRef args[2];
334
/* Check if the result is a denormal - and flush to 0 if so. */
335
args[0] = result;
336
args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
337
cond =
338
ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
339
}
340
341
/* need to convert back up to f32 */
342
result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
343
344
if (ctx->chip_class >= GFX8)
345
result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
346
else {
347
/* for GFX6-GFX7 */
348
/* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
349
* so compare the result and flush to 0 if it's smaller.
350
*/
351
LLVMValueRef temp, cond2;
352
temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
353
cond = LLVMBuildFCmp(
354
ctx->builder, LLVMRealOGT,
355
LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
356
temp, "");
357
cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealONE, temp, ctx->f32_0, "");
358
cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
359
result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
360
}
361
return result;
362
}
363
364
static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx, LLVMValueRef src0,
365
LLVMValueRef src1)
366
{
367
LLVMValueRef dst64, result;
368
src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
369
src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
370
371
dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
372
dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
373
result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
374
return result;
375
}
376
377
static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx, LLVMValueRef src0,
378
LLVMValueRef src1)
379
{
380
LLVMValueRef dst64, result;
381
src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
382
src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
383
384
dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
385
dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
386
result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
387
return result;
388
}
389
390
static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx, LLVMValueRef bits, LLVMValueRef offset)
391
{
392
/* mask = ((1 << bits) - 1) << offset */
393
return LLVMBuildShl(
394
ctx->builder,
395
LLVMBuildSub(ctx->builder, LLVMBuildShl(ctx->builder, ctx->i32_1, bits, ""), ctx->i32_1, ""),
396
offset, "");
397
}
398
399
static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx, LLVMValueRef mask,
400
LLVMValueRef insert, LLVMValueRef base)
401
{
402
/* Calculate:
403
* (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
404
* Use the right-hand side, which the LLVM backend can convert to V_BFI.
405
*/
406
return LLVMBuildXor(
407
ctx->builder, base,
408
LLVMBuildAnd(ctx->builder, mask, LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
409
}
410
411
static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0,
412
LLVMValueRef (*pack)(struct ac_llvm_context *ctx,
413
LLVMValueRef args[2]))
414
{
415
LLVMValueRef comp[2];
416
417
src0 = ac_to_float(ctx, src0);
418
comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
419
comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
420
421
return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
422
}
423
424
static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx, LLVMValueRef src0)
425
{
426
LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
427
LLVMValueRef temps[2], val;
428
int i;
429
430
for (i = 0; i < 2; i++) {
431
val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
432
val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
433
val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
434
temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
435
}
436
return ac_build_gather_values(ctx, temps, 2);
437
}
438
439
static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx, nir_op op, LLVMValueRef src0)
440
{
441
unsigned mask;
442
int idx;
443
LLVMValueRef result;
444
445
if (op == nir_op_fddx_fine)
446
mask = AC_TID_MASK_LEFT;
447
else if (op == nir_op_fddy_fine)
448
mask = AC_TID_MASK_TOP;
449
else
450
mask = AC_TID_MASK_TOP_LEFT;
451
452
/* for DDX we want to next X pixel, DDY next Y pixel. */
453
if (op == nir_op_fddx_fine || op == nir_op_fddx_coarse || op == nir_op_fddx)
454
idx = 1;
455
else
456
idx = 2;
457
458
result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
459
return result;
460
}
461
462
struct waterfall_context {
463
LLVMBasicBlockRef phi_bb[2];
464
bool use_waterfall;
465
};
466
467
/* To deal with divergent descriptors we can create a loop that handles all
468
* lanes with the same descriptor on a given iteration (henceforth a
469
* waterfall loop).
470
*
471
* These helper create the begin and end of the loop leaving the caller
472
* to implement the body.
473
*
474
* params:
475
* - ctx is the usal nir context
476
* - wctx is a temporary struct containing some loop info. Can be left uninitialized.
477
* - value is the possibly divergent value for which we built the loop
478
* - divergent is whether value is actually divergent. If false we just pass
479
* things through.
480
*/
481
static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx,
482
LLVMValueRef value, bool divergent)
483
{
484
/* If the app claims the value is divergent but it is constant we can
485
* end up with a dynamic index of NULL. */
486
if (!value)
487
divergent = false;
488
489
wctx->use_waterfall = divergent;
490
if (!divergent)
491
return value;
492
493
ac_build_bgnloop(&ctx->ac, 6000);
494
495
LLVMValueRef active = LLVMConstInt(ctx->ac.i1, 1, false);
496
LLVMValueRef scalar_value[NIR_MAX_VEC_COMPONENTS];
497
498
for (unsigned i = 0; i < ac_get_llvm_num_components(value); i++) {
499
LLVMValueRef comp = ac_llvm_extract_elem(&ctx->ac, value, i);
500
scalar_value[i] = ac_build_readlane(&ctx->ac, comp, NULL);
501
active = LLVMBuildAnd(ctx->ac.builder, active,
502
LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, comp, scalar_value[i], ""), "");
503
}
504
505
wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder);
506
ac_build_ifcc(&ctx->ac, active, 6001);
507
508
return ac_build_gather_values(&ctx->ac, scalar_value, ac_get_llvm_num_components(value));
509
}
510
511
static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx, struct waterfall_context *wctx,
512
LLVMValueRef value)
513
{
514
LLVMValueRef ret = NULL;
515
LLVMValueRef phi_src[2];
516
LLVMValueRef cc_phi_src[2] = {
517
LLVMConstInt(ctx->ac.i32, 0, false),
518
LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
519
};
520
521
if (!wctx->use_waterfall)
522
return value;
523
524
wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder);
525
526
ac_build_endif(&ctx->ac, 6001);
527
528
if (value) {
529
phi_src[0] = LLVMGetUndef(LLVMTypeOf(value));
530
phi_src[1] = value;
531
532
ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb);
533
}
534
535
/*
536
* By using the optimization barrier on the exit decision, we decouple
537
* the operations from the break, and hence avoid LLVM hoisting the
538
* opteration into the break block.
539
*/
540
LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb);
541
ac_build_optimization_barrier(&ctx->ac, &cc, false);
542
543
LLVMValueRef active =
544
LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2");
545
ac_build_ifcc(&ctx->ac, active, 6002);
546
ac_build_break(&ctx->ac);
547
ac_build_endif(&ctx->ac, 6002);
548
549
ac_build_endloop(&ctx->ac, 6000);
550
return ret;
551
}
552
553
static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
554
{
555
LLVMValueRef src[4], result = NULL;
556
unsigned num_components = instr->dest.dest.ssa.num_components;
557
unsigned src_components;
558
LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
559
560
assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
561
switch (instr->op) {
562
case nir_op_vec2:
563
case nir_op_vec3:
564
case nir_op_vec4:
565
case nir_op_vec5:
566
case nir_op_unpack_32_2x16:
567
case nir_op_unpack_64_2x32:
568
case nir_op_unpack_64_4x16:
569
src_components = 1;
570
break;
571
case nir_op_pack_half_2x16:
572
case nir_op_pack_snorm_2x16:
573
case nir_op_pack_unorm_2x16:
574
case nir_op_pack_32_2x16:
575
case nir_op_pack_64_2x32:
576
src_components = 2;
577
break;
578
case nir_op_unpack_half_2x16:
579
src_components = 1;
580
break;
581
case nir_op_cube_face_coord_amd:
582
case nir_op_cube_face_index_amd:
583
src_components = 3;
584
break;
585
case nir_op_pack_64_4x16:
586
src_components = 4;
587
break;
588
default:
589
src_components = num_components;
590
break;
591
}
592
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
593
src[i] = get_alu_src(ctx, instr->src[i], src_components);
594
595
switch (instr->op) {
596
case nir_op_mov:
597
result = src[0];
598
break;
599
case nir_op_fneg:
600
src[0] = ac_to_float(&ctx->ac, src[0]);
601
result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
602
if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
603
/* fneg will be optimized by backend compiler with sign
604
* bit removed via XOR. This is probably a LLVM bug.
605
*/
606
result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
607
}
608
break;
609
case nir_op_ineg:
610
if (instr->no_unsigned_wrap)
611
result = LLVMBuildNUWNeg(ctx->ac.builder, src[0], "");
612
else if (instr->no_signed_wrap)
613
result = LLVMBuildNSWNeg(ctx->ac.builder, src[0], "");
614
else
615
result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
616
break;
617
case nir_op_inot:
618
result = LLVMBuildNot(ctx->ac.builder, src[0], "");
619
break;
620
case nir_op_iadd:
621
if (instr->no_unsigned_wrap)
622
result = LLVMBuildNUWAdd(ctx->ac.builder, src[0], src[1], "");
623
else if (instr->no_signed_wrap)
624
result = LLVMBuildNSWAdd(ctx->ac.builder, src[0], src[1], "");
625
else
626
result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
627
break;
628
case nir_op_fadd:
629
src[0] = ac_to_float(&ctx->ac, src[0]);
630
src[1] = ac_to_float(&ctx->ac, src[1]);
631
result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
632
break;
633
case nir_op_fsub:
634
src[0] = ac_to_float(&ctx->ac, src[0]);
635
src[1] = ac_to_float(&ctx->ac, src[1]);
636
result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
637
break;
638
case nir_op_isub:
639
if (instr->no_unsigned_wrap)
640
result = LLVMBuildNUWSub(ctx->ac.builder, src[0], src[1], "");
641
else if (instr->no_signed_wrap)
642
result = LLVMBuildNSWSub(ctx->ac.builder, src[0], src[1], "");
643
else
644
result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
645
break;
646
case nir_op_imul:
647
if (instr->no_unsigned_wrap)
648
result = LLVMBuildNUWMul(ctx->ac.builder, src[0], src[1], "");
649
else if (instr->no_signed_wrap)
650
result = LLVMBuildNSWMul(ctx->ac.builder, src[0], src[1], "");
651
else
652
result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
653
break;
654
case nir_op_imod:
655
result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
656
break;
657
case nir_op_umod:
658
result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
659
break;
660
case nir_op_irem:
661
result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
662
break;
663
case nir_op_idiv:
664
result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
665
break;
666
case nir_op_udiv:
667
result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
668
break;
669
case nir_op_fmul:
670
src[0] = ac_to_float(&ctx->ac, src[0]);
671
src[1] = ac_to_float(&ctx->ac, src[1]);
672
result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
673
break;
674
case nir_op_frcp:
675
/* For doubles, we need precise division to pass GLCTS. */
676
if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL && ac_get_type_size(def_type) == 8) {
677
result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1, ac_to_float(&ctx->ac, src[0]), "");
678
} else {
679
result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rcp",
680
ac_to_float_type(&ctx->ac, def_type), src[0]);
681
}
682
if (ctx->abi->clamp_div_by_zero)
683
result = ac_build_fmin(&ctx->ac, result,
684
LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
685
break;
686
case nir_op_iand:
687
result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
688
break;
689
case nir_op_ior:
690
result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
691
break;
692
case nir_op_ixor:
693
result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
694
break;
695
case nir_op_ishl:
696
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
697
ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
698
src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
699
else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
700
ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
701
src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
702
result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
703
break;
704
case nir_op_ishr:
705
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
706
ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
707
src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
708
else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
709
ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
710
src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
711
result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
712
break;
713
case nir_op_ushr:
714
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) <
715
ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
716
src[1] = LLVMBuildZExt(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
717
else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) >
718
ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
719
src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1], LLVMTypeOf(src[0]), "");
720
result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
721
break;
722
case nir_op_ilt:
723
result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
724
break;
725
case nir_op_ine:
726
result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
727
break;
728
case nir_op_ieq:
729
result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
730
break;
731
case nir_op_ige:
732
result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
733
break;
734
case nir_op_ult:
735
result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
736
break;
737
case nir_op_uge:
738
result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
739
break;
740
case nir_op_feq:
741
result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
742
break;
743
case nir_op_fneu:
744
result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
745
break;
746
case nir_op_flt:
747
result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
748
break;
749
case nir_op_fge:
750
result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
751
break;
752
case nir_op_fabs:
753
result =
754
emit_intrin_1f_param(&ctx->ac, "llvm.fabs", ac_to_float_type(&ctx->ac, def_type), src[0]);
755
if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
756
/* fabs will be optimized by backend compiler with sign
757
* bit removed via AND.
758
*/
759
result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
760
}
761
break;
762
case nir_op_fsat:
763
src[0] = ac_to_float(&ctx->ac, src[0]);
764
result = ac_build_fsat(&ctx->ac, src[0],
765
ac_to_float_type(&ctx->ac, def_type));
766
break;
767
case nir_op_iabs:
768
result = emit_iabs(&ctx->ac, src[0]);
769
break;
770
case nir_op_imax:
771
result = ac_build_imax(&ctx->ac, src[0], src[1]);
772
break;
773
case nir_op_imin:
774
result = ac_build_imin(&ctx->ac, src[0], src[1]);
775
break;
776
case nir_op_umax:
777
result = ac_build_umax(&ctx->ac, src[0], src[1]);
778
break;
779
case nir_op_umin:
780
result = ac_build_umin(&ctx->ac, src[0], src[1]);
781
break;
782
case nir_op_isign:
783
result = ac_build_isign(&ctx->ac, src[0]);
784
break;
785
case nir_op_fsign:
786
src[0] = ac_to_float(&ctx->ac, src[0]);
787
result = ac_build_fsign(&ctx->ac, src[0]);
788
break;
789
case nir_op_ffloor:
790
result =
791
emit_intrin_1f_param(&ctx->ac, "llvm.floor", ac_to_float_type(&ctx->ac, def_type), src[0]);
792
break;
793
case nir_op_ftrunc:
794
result =
795
emit_intrin_1f_param(&ctx->ac, "llvm.trunc", ac_to_float_type(&ctx->ac, def_type), src[0]);
796
break;
797
case nir_op_fceil:
798
result =
799
emit_intrin_1f_param(&ctx->ac, "llvm.ceil", ac_to_float_type(&ctx->ac, def_type), src[0]);
800
break;
801
case nir_op_fround_even:
802
result =
803
emit_intrin_1f_param(&ctx->ac, "llvm.rint", ac_to_float_type(&ctx->ac, def_type), src[0]);
804
break;
805
case nir_op_ffract:
806
result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract",
807
ac_to_float_type(&ctx->ac, def_type), src[0]);
808
break;
809
case nir_op_fsin:
810
result =
811
emit_intrin_1f_param(&ctx->ac, "llvm.sin", ac_to_float_type(&ctx->ac, def_type), src[0]);
812
break;
813
case nir_op_fcos:
814
result =
815
emit_intrin_1f_param(&ctx->ac, "llvm.cos", ac_to_float_type(&ctx->ac, def_type), src[0]);
816
break;
817
case nir_op_fsqrt:
818
result =
819
emit_intrin_1f_param(&ctx->ac, "llvm.sqrt", ac_to_float_type(&ctx->ac, def_type), src[0]);
820
break;
821
case nir_op_fexp2:
822
result =
823
emit_intrin_1f_param(&ctx->ac, "llvm.exp2", ac_to_float_type(&ctx->ac, def_type), src[0]);
824
break;
825
case nir_op_flog2:
826
result =
827
emit_intrin_1f_param(&ctx->ac, "llvm.log2", ac_to_float_type(&ctx->ac, def_type), src[0]);
828
break;
829
case nir_op_frsq:
830
result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rsq",
831
ac_to_float_type(&ctx->ac, def_type), src[0]);
832
if (ctx->abi->clamp_div_by_zero)
833
result = ac_build_fmin(&ctx->ac, result,
834
LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
835
break;
836
case nir_op_frexp_exp:
837
src[0] = ac_to_float(&ctx->ac, src[0]);
838
result = ac_build_frexp_exp(&ctx->ac, src[0], ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));
839
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)
840
result = LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, "");
841
break;
842
case nir_op_frexp_sig:
843
src[0] = ac_to_float(&ctx->ac, src[0]);
844
result = ac_build_frexp_mant(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
845
break;
846
case nir_op_fpow:
847
if (instr->dest.dest.ssa.bit_size != 32) {
848
/* 16 and 64 bits */
849
result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
850
ac_to_float_type(&ctx->ac, def_type), src[0]);
851
result = LLVMBuildFMul(ctx->ac.builder, result, ac_to_float(&ctx->ac, src[1]), "");
852
result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
853
ac_to_float_type(&ctx->ac, def_type), result);
854
break;
855
}
856
if (LLVM_VERSION_MAJOR >= 12) {
857
result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
858
ac_to_float_type(&ctx->ac, def_type), src[0]);
859
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.fmul.legacy", ctx->ac.f32,
860
(LLVMValueRef[]){result, ac_to_float(&ctx->ac, src[1])},
861
2, AC_FUNC_ATTR_READNONE);
862
result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
863
ac_to_float_type(&ctx->ac, def_type), result);
864
break;
865
}
866
/* Older LLVM doesn't have fmul.legacy. */
867
result = emit_intrin_2f_param(&ctx->ac, "llvm.pow", ac_to_float_type(&ctx->ac, def_type),
868
src[0], src[1]);
869
break;
870
case nir_op_fmax:
871
result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum", ac_to_float_type(&ctx->ac, def_type),
872
src[0], src[1]);
873
if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
874
/* Only pre-GFX9 chips do not flush denorms. */
875
result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
876
}
877
break;
878
case nir_op_fmin:
879
result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum", ac_to_float_type(&ctx->ac, def_type),
880
src[0], src[1]);
881
if (ctx->ac.chip_class < GFX9 && instr->dest.dest.ssa.bit_size == 32) {
882
/* Only pre-GFX9 chips do not flush denorms. */
883
result = ac_build_canonicalize(&ctx->ac, result, instr->dest.dest.ssa.bit_size);
884
}
885
break;
886
case nir_op_ffma:
887
/* FMA is slow on gfx6-8, so it shouldn't be used. */
888
assert(instr->dest.dest.ssa.bit_size != 32 || ctx->ac.chip_class >= GFX9);
889
result = emit_intrin_3f_param(&ctx->ac, "llvm.fma", ac_to_float_type(&ctx->ac, def_type),
890
src[0], src[1], src[2]);
891
break;
892
case nir_op_ldexp:
893
src[0] = ac_to_float(&ctx->ac, src[0]);
894
if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
895
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2,
896
AC_FUNC_ATTR_READNONE);
897
else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
898
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2,
899
AC_FUNC_ATTR_READNONE);
900
else
901
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2,
902
AC_FUNC_ATTR_READNONE);
903
break;
904
case nir_op_bfm:
905
result = emit_bfm(&ctx->ac, src[0], src[1]);
906
break;
907
case nir_op_bitfield_select:
908
result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
909
break;
910
case nir_op_ubfe:
911
result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false);
912
break;
913
case nir_op_ibfe:
914
result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true);
915
break;
916
case nir_op_bitfield_reverse:
917
result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
918
break;
919
case nir_op_bit_count:
920
result = ac_build_bit_count(&ctx->ac, src[0]);
921
break;
922
case nir_op_vec2:
923
case nir_op_vec3:
924
case nir_op_vec4:
925
case nir_op_vec5:
926
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
927
src[i] = ac_to_integer(&ctx->ac, src[i]);
928
result = ac_build_gather_values(&ctx->ac, src, num_components);
929
break;
930
case nir_op_f2i8:
931
case nir_op_f2i16:
932
case nir_op_f2imp:
933
case nir_op_f2i32:
934
case nir_op_f2i64:
935
src[0] = ac_to_float(&ctx->ac, src[0]);
936
result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
937
break;
938
case nir_op_f2u8:
939
case nir_op_f2u16:
940
case nir_op_f2ump:
941
case nir_op_f2u32:
942
case nir_op_f2u64:
943
src[0] = ac_to_float(&ctx->ac, src[0]);
944
result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
945
break;
946
case nir_op_i2f16:
947
case nir_op_i2fmp:
948
case nir_op_i2f32:
949
case nir_op_i2f64:
950
result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
951
break;
952
case nir_op_u2f16:
953
case nir_op_u2fmp:
954
case nir_op_u2f32:
955
case nir_op_u2f64:
956
result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
957
break;
958
case nir_op_f2f16_rtz:
959
case nir_op_f2f16:
960
case nir_op_f2fmp:
961
src[0] = ac_to_float(&ctx->ac, src[0]);
962
963
/* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
964
* all f32->f16 conversions have to round towards zero, because both scalar
965
* and vec2 down-conversions have to round equally.
966
*/
967
if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL || instr->op == nir_op_f2f16_rtz) {
968
src[0] = ac_to_float(&ctx->ac, src[0]);
969
970
if (LLVMTypeOf(src[0]) == ctx->ac.f64)
971
src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
972
973
/* Fast path conversion. This only works if NIR is vectorized
974
* to vec2 16.
975
*/
976
if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) {
977
LLVMValueRef args[] = {
978
ac_llvm_extract_elem(&ctx->ac, src[0], 0),
979
ac_llvm_extract_elem(&ctx->ac, src[0], 1),
980
};
981
result = ac_build_cvt_pkrtz_f16(&ctx->ac, args);
982
break;
983
}
984
985
assert(ac_get_llvm_num_components(src[0]) == 1);
986
LLVMValueRef param[2] = {src[0], LLVMGetUndef(ctx->ac.f32)};
987
result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
988
result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
989
} else {
990
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
991
result =
992
LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
993
else
994
result =
995
LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
996
}
997
break;
998
case nir_op_f2f16_rtne:
999
case nir_op_f2f32:
1000
case nir_op_f2f64:
1001
src[0] = ac_to_float(&ctx->ac, src[0]);
1002
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
1003
result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1004
else
1005
result =
1006
LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1007
break;
1008
case nir_op_u2u8:
1009
case nir_op_u2u16:
1010
case nir_op_u2u32:
1011
case nir_op_u2u64:
1012
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
1013
result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
1014
else
1015
result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
1016
break;
1017
case nir_op_i2i8:
1018
case nir_op_i2i16:
1019
case nir_op_i2imp:
1020
case nir_op_i2i32:
1021
case nir_op_i2i64:
1022
if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
1023
result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
1024
else
1025
result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
1026
break;
1027
case nir_op_bcsel:
1028
result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
1029
break;
1030
case nir_op_find_lsb:
1031
result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
1032
break;
1033
case nir_op_ufind_msb:
1034
result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
1035
break;
1036
case nir_op_ifind_msb:
1037
result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
1038
break;
1039
case nir_op_uadd_carry:
1040
result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1041
break;
1042
case nir_op_usub_borrow:
1043
result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
1044
break;
1045
case nir_op_b2f16:
1046
case nir_op_b2f32:
1047
case nir_op_b2f64:
1048
result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
1049
break;
1050
case nir_op_f2b1:
1051
result = emit_f2b(&ctx->ac, src[0]);
1052
break;
1053
case nir_op_b2i8:
1054
case nir_op_b2i16:
1055
case nir_op_b2i32:
1056
case nir_op_b2i64:
1057
result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
1058
break;
1059
case nir_op_i2b1:
1060
case nir_op_b2b1: /* after loads */
1061
result = emit_i2b(&ctx->ac, src[0]);
1062
break;
1063
case nir_op_b2b16: /* before stores */
1064
result = LLVMBuildZExt(ctx->ac.builder, src[0], ctx->ac.i16, "");
1065
break;
1066
case nir_op_b2b32: /* before stores */
1067
result = LLVMBuildZExt(ctx->ac.builder, src[0], ctx->ac.i32, "");
1068
break;
1069
case nir_op_fquantize2f16:
1070
result = emit_f2f16(&ctx->ac, src[0]);
1071
break;
1072
case nir_op_umul_high:
1073
result = emit_umul_high(&ctx->ac, src[0], src[1]);
1074
break;
1075
case nir_op_imul_high:
1076
result = emit_imul_high(&ctx->ac, src[0], src[1]);
1077
break;
1078
case nir_op_pack_half_2x16:
1079
result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16);
1080
break;
1081
case nir_op_pack_half_2x16_split:
1082
src[0] = ac_to_float(&ctx->ac, src[0]);
1083
src[1] = ac_to_float(&ctx->ac, src[1]);
1084
result = LLVMBuildBitCast(ctx->ac.builder,
1085
ac_build_cvt_pkrtz_f16(&ctx->ac, src),
1086
ctx->ac.i32, "");
1087
break;
1088
case nir_op_pack_snorm_2x16:
1089
result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16);
1090
break;
1091
case nir_op_pack_unorm_2x16:
1092
result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16);
1093
break;
1094
case nir_op_unpack_half_2x16:
1095
result = emit_unpack_half_2x16(&ctx->ac, src[0]);
1096
break;
1097
case nir_op_unpack_half_2x16_split_x: {
1098
assert(ac_get_llvm_num_components(src[0]) == 1);
1099
LLVMValueRef tmp = emit_unpack_half_2x16(&ctx->ac, src[0]);
1100
result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
1101
break;
1102
}
1103
case nir_op_unpack_half_2x16_split_y: {
1104
assert(ac_get_llvm_num_components(src[0]) == 1);
1105
LLVMValueRef tmp = emit_unpack_half_2x16(&ctx->ac, src[0]);
1106
result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
1107
break;
1108
}
1109
case nir_op_fddx:
1110
case nir_op_fddy:
1111
case nir_op_fddx_fine:
1112
case nir_op_fddy_fine:
1113
case nir_op_fddx_coarse:
1114
case nir_op_fddy_coarse:
1115
result = emit_ddxy(ctx, instr->op, src[0]);
1116
break;
1117
1118
case nir_op_unpack_64_4x16: {
1119
result = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v4i16, "");
1120
break;
1121
}
1122
case nir_op_pack_64_4x16: {
1123
result = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.i64, "");
1124
break;
1125
}
1126
1127
case nir_op_unpack_64_2x32: {
1128
result = LLVMBuildBitCast(ctx->ac.builder, src[0],
1129
ctx->ac.v2i32, "");
1130
break;
1131
}
1132
case nir_op_unpack_64_2x32_split_x: {
1133
assert(ac_get_llvm_num_components(src[0]) == 1);
1134
LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, "");
1135
result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
1136
break;
1137
}
1138
case nir_op_unpack_64_2x32_split_y: {
1139
assert(ac_get_llvm_num_components(src[0]) == 1);
1140
LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i32, "");
1141
result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
1142
break;
1143
}
1144
1145
case nir_op_pack_64_2x32: {
1146
result = LLVMBuildBitCast(ctx->ac.builder, src[0],
1147
ctx->ac.i64, "");
1148
break;
1149
}
1150
case nir_op_pack_64_2x32_split: {
1151
LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
1152
result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
1153
break;
1154
}
1155
1156
case nir_op_pack_32_2x16: {
1157
result = LLVMBuildBitCast(ctx->ac.builder, src[0],
1158
ctx->ac.i32, "");
1159
break;
1160
}
1161
case nir_op_pack_32_2x16_split: {
1162
LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
1163
result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
1164
break;
1165
}
1166
1167
case nir_op_unpack_32_2x16: {
1168
result = LLVMBuildBitCast(ctx->ac.builder, src[0],
1169
ctx->ac.v2i16, "");
1170
break;
1171
}
1172
case nir_op_unpack_32_2x16_split_x: {
1173
LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");
1174
result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_0, "");
1175
break;
1176
}
1177
case nir_op_unpack_32_2x16_split_y: {
1178
LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0], ctx->ac.v2i16, "");
1179
result = LLVMBuildExtractElement(ctx->ac.builder, tmp, ctx->ac.i32_1, "");
1180
break;
1181
}
1182
1183
case nir_op_cube_face_coord_amd: {
1184
src[0] = ac_to_float(&ctx->ac, src[0]);
1185
LLVMValueRef results[2];
1186
LLVMValueRef in[3];
1187
for (unsigned chan = 0; chan < 3; chan++)
1188
in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
1189
results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc", ctx->ac.f32, in, 3,
1190
AC_FUNC_ATTR_READNONE);
1191
results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc", ctx->ac.f32, in, 3,
1192
AC_FUNC_ATTR_READNONE);
1193
LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema", ctx->ac.f32, in, 3,
1194
AC_FUNC_ATTR_READNONE);
1195
results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
1196
results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
1197
LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
1198
results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
1199
results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
1200
result = ac_build_gather_values(&ctx->ac, results, 2);
1201
break;
1202
}
1203
1204
case nir_op_cube_face_index_amd: {
1205
src[0] = ac_to_float(&ctx->ac, src[0]);
1206
LLVMValueRef in[3];
1207
for (unsigned chan = 0; chan < 3; chan++)
1208
in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
1209
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid", ctx->ac.f32, in, 3,
1210
AC_FUNC_ATTR_READNONE);
1211
break;
1212
}
1213
1214
case nir_op_extract_u8:
1215
case nir_op_extract_i8:
1216
case nir_op_extract_u16:
1217
case nir_op_extract_i16: {
1218
bool is_signed = instr->op == nir_op_extract_i16 || instr->op == nir_op_extract_i8;
1219
unsigned size = instr->op == nir_op_extract_u8 || instr->op == nir_op_extract_i8 ? 8 : 16;
1220
LLVMValueRef offset = LLVMConstInt(LLVMTypeOf(src[0]), nir_src_as_uint(instr->src[1].src) * size, false);
1221
result = LLVMBuildLShr(ctx->ac.builder, src[0], offset, "");
1222
result = LLVMBuildTrunc(ctx->ac.builder, result, LLVMIntTypeInContext(ctx->ac.context, size), "");
1223
if (is_signed)
1224
result = LLVMBuildSExt(ctx->ac.builder, result, LLVMTypeOf(src[0]), "");
1225
else
1226
result = LLVMBuildZExt(ctx->ac.builder, result, LLVMTypeOf(src[0]), "");
1227
break;
1228
}
1229
1230
case nir_op_insert_u8:
1231
case nir_op_insert_u16: {
1232
unsigned size = instr->op == nir_op_insert_u8 ? 8 : 16;
1233
LLVMValueRef offset = LLVMConstInt(LLVMTypeOf(src[0]), nir_src_as_uint(instr->src[1].src) * size, false);
1234
LLVMValueRef mask = LLVMConstInt(LLVMTypeOf(src[0]), u_bit_consecutive(0, size), false);
1235
result = LLVMBuildShl(ctx->ac.builder, LLVMBuildAnd(ctx->ac.builder, src[0], mask, ""), offset, "");
1236
break;
1237
}
1238
1239
default:
1240
fprintf(stderr, "Unknown NIR alu instr: ");
1241
nir_print_instr(&instr->instr, stderr);
1242
fprintf(stderr, "\n");
1243
abort();
1244
}
1245
1246
if (result) {
1247
assert(instr->dest.dest.is_ssa);
1248
result = ac_to_integer_or_pointer(&ctx->ac, result);
1249
ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
1250
}
1251
}
1252
1253
static void visit_load_const(struct ac_nir_context *ctx, const nir_load_const_instr *instr)
1254
{
1255
LLVMValueRef values[4], value = NULL;
1256
LLVMTypeRef element_type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
1257
1258
for (unsigned i = 0; i < instr->def.num_components; ++i) {
1259
switch (instr->def.bit_size) {
1260
case 1:
1261
values[i] = LLVMConstInt(element_type, instr->value[i].b, false);
1262
break;
1263
case 8:
1264
values[i] = LLVMConstInt(element_type, instr->value[i].u8, false);
1265
break;
1266
case 16:
1267
values[i] = LLVMConstInt(element_type, instr->value[i].u16, false);
1268
break;
1269
case 32:
1270
values[i] = LLVMConstInt(element_type, instr->value[i].u32, false);
1271
break;
1272
case 64:
1273
values[i] = LLVMConstInt(element_type, instr->value[i].u64, false);
1274
break;
1275
default:
1276
fprintf(stderr, "unsupported nir load_const bit_size: %d\n", instr->def.bit_size);
1277
abort();
1278
}
1279
}
1280
if (instr->def.num_components > 1) {
1281
value = LLVMConstVector(values, instr->def.num_components);
1282
} else
1283
value = values[0];
1284
1285
ctx->ssa_defs[instr->def.index] = value;
1286
}
1287
1288
static LLVMValueRef get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor,
1289
bool in_elements)
1290
{
1291
LLVMValueRef size =
1292
LLVMBuildExtractElement(ctx->ac.builder, descriptor, LLVMConstInt(ctx->ac.i32, 2, false), "");
1293
1294
/* GFX8 only */
1295
if (ctx->ac.chip_class == GFX8 && in_elements) {
1296
/* On GFX8, the descriptor contains the size in bytes,
1297
* but TXQ must return the size in elements.
1298
* The stride is always non-zero for resources using TXQ.
1299
*/
1300
LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, descriptor, ctx->ac.i32_1, "");
1301
stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, false), "");
1302
stride = LLVMBuildAnd(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
1303
1304
size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
1305
}
1306
return size;
1307
}
1308
1309
/* Gather4 should follow the same rules as bilinear filtering, but the hardware
1310
* incorrectly forces nearest filtering if the texture format is integer.
1311
* The only effect it has on Gather4, which always returns 4 texels for
1312
* bilinear filtering, is that the final coordinates are off by 0.5 of
1313
* the texel size.
1314
*
1315
* The workaround is to subtract 0.5 from the unnormalized coordinates,
1316
* or (0.5 / size) from the normalized coordinates.
1317
*
1318
* However, cube textures with 8_8_8_8 data formats require a different
1319
* workaround of overriding the num format to USCALED/SSCALED. This would lose
1320
* precision in 32-bit data formats, so it needs to be applied dynamically at
1321
* runtime. In this case, return an i1 value that indicates whether the
1322
* descriptor was overridden (and hence a fixup of the sampler result is needed).
1323
*/
1324
static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx, nir_variable *var,
1325
struct ac_image_args *args, const nir_tex_instr *instr)
1326
{
1327
const struct glsl_type *type = glsl_without_array(var->type);
1328
enum glsl_base_type stype = glsl_get_sampler_result_type(type);
1329
LLVMValueRef wa_8888 = NULL;
1330
LLVMValueRef half_texel[2];
1331
LLVMValueRef result;
1332
1333
assert(stype == GLSL_TYPE_INT || stype == GLSL_TYPE_UINT);
1334
1335
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1336
LLVMValueRef formats;
1337
LLVMValueRef data_format;
1338
LLVMValueRef wa_formats;
1339
1340
formats = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
1341
1342
data_format = LLVMBuildLShr(ctx->builder, formats, LLVMConstInt(ctx->i32, 20, false), "");
1343
data_format =
1344
LLVMBuildAnd(ctx->builder, data_format, LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
1345
wa_8888 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, data_format,
1346
LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
1347
1348
uint32_t wa_num_format = stype == GLSL_TYPE_UINT
1349
? S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED)
1350
: S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);
1351
wa_formats = LLVMBuildAnd(ctx->builder, formats,
1352
LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false), "");
1353
wa_formats =
1354
LLVMBuildOr(ctx->builder, wa_formats, LLVMConstInt(ctx->i32, wa_num_format, false), "");
1355
1356
formats = LLVMBuildSelect(ctx->builder, wa_8888, wa_formats, formats, "");
1357
args->resource =
1358
LLVMBuildInsertElement(ctx->builder, args->resource, formats, ctx->i32_1, "");
1359
}
1360
1361
if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
1362
assert(!wa_8888);
1363
half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
1364
} else {
1365
struct ac_image_args resinfo = {0};
1366
LLVMBasicBlockRef bbs[2];
1367
1368
LLVMValueRef unnorm = NULL;
1369
LLVMValueRef default_offset = ctx->f32_0;
1370
if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D && !instr->is_array) {
1371
/* In vulkan, whether the sampler uses unnormalized
1372
* coordinates or not is a dynamic property of the
1373
* sampler. Hence, to figure out whether or not we
1374
* need to divide by the texture size, we need to test
1375
* the sampler at runtime. This tests the bit set by
1376
* radv_init_sampler().
1377
*/
1378
LLVMValueRef sampler0 =
1379
LLVMBuildExtractElement(ctx->builder, args->sampler, ctx->i32_0, "");
1380
sampler0 = LLVMBuildLShr(ctx->builder, sampler0, LLVMConstInt(ctx->i32, 15, false), "");
1381
sampler0 = LLVMBuildAnd(ctx->builder, sampler0, ctx->i32_1, "");
1382
unnorm = LLVMBuildICmp(ctx->builder, LLVMIntEQ, sampler0, ctx->i32_1, "");
1383
default_offset = LLVMConstReal(ctx->f32, -0.5);
1384
}
1385
1386
bbs[0] = LLVMGetInsertBlock(ctx->builder);
1387
if (wa_8888 || unnorm) {
1388
assert(!(wa_8888 && unnorm));
1389
LLVMValueRef not_needed = wa_8888 ? wa_8888 : unnorm;
1390
/* Skip the texture size query entirely if we don't need it. */
1391
ac_build_ifcc(ctx, LLVMBuildNot(ctx->builder, not_needed, ""), 2000);
1392
bbs[1] = LLVMGetInsertBlock(ctx->builder);
1393
}
1394
1395
/* Query the texture size. */
1396
resinfo.dim = ac_get_sampler_dim(ctx->chip_class, instr->sampler_dim, instr->is_array);
1397
resinfo.opcode = ac_image_get_resinfo;
1398
resinfo.dmask = 0xf;
1399
resinfo.lod = ctx->i32_0;
1400
resinfo.resource = args->resource;
1401
resinfo.attributes = AC_FUNC_ATTR_READNONE;
1402
LLVMValueRef size = ac_build_image_opcode(ctx, &resinfo);
1403
1404
/* Compute -0.5 / size. */
1405
for (unsigned c = 0; c < 2; c++) {
1406
half_texel[c] =
1407
LLVMBuildExtractElement(ctx->builder, size, LLVMConstInt(ctx->i32, c, 0), "");
1408
half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1409
half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
1410
half_texel[c] =
1411
LLVMBuildFMul(ctx->builder, half_texel[c], LLVMConstReal(ctx->f32, -0.5), "");
1412
}
1413
1414
if (wa_8888 || unnorm) {
1415
ac_build_endif(ctx, 2000);
1416
1417
for (unsigned c = 0; c < 2; c++) {
1418
LLVMValueRef values[2] = {default_offset, half_texel[c]};
1419
half_texel[c] = ac_build_phi(ctx, ctx->f32, 2, values, bbs);
1420
}
1421
}
1422
}
1423
1424
for (unsigned c = 0; c < 2; c++) {
1425
LLVMValueRef tmp;
1426
tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");
1427
args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1428
}
1429
1430
args->attributes = AC_FUNC_ATTR_READNONE;
1431
result = ac_build_image_opcode(ctx, args);
1432
1433
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1434
LLVMValueRef tmp, tmp2;
1435
1436
/* if the cube workaround is in place, f2i the result. */
1437
for (unsigned c = 0; c < 4; c++) {
1438
tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
1439
if (stype == GLSL_TYPE_UINT)
1440
tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
1441
else
1442
tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
1443
tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1444
tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
1445
tmp = LLVMBuildSelect(ctx->builder, wa_8888, tmp2, tmp, "");
1446
tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1447
result =
1448
LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
1449
}
1450
}
1451
return result;
1452
}
1453
1454
static nir_deref_instr *get_tex_texture_deref(const nir_tex_instr *instr)
1455
{
1456
nir_deref_instr *texture_deref_instr = NULL;
1457
1458
for (unsigned i = 0; i < instr->num_srcs; i++) {
1459
switch (instr->src[i].src_type) {
1460
case nir_tex_src_texture_deref:
1461
texture_deref_instr = nir_src_as_deref(instr->src[i].src);
1462
break;
1463
default:
1464
break;
1465
}
1466
}
1467
return texture_deref_instr;
1468
}
1469
1470
static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx, const nir_tex_instr *instr,
1471
struct ac_image_args *args)
1472
{
1473
assert((!args->tfe || !args->d16) && "unsupported");
1474
1475
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
1476
unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
1477
1478
assert(instr->dest.is_ssa);
1479
1480
/* Buffers don't support A16. */
1481
if (args->a16)
1482
args->coords[0] = LLVMBuildZExt(ctx->ac.builder, args->coords[0], ctx->ac.i32, "");
1483
1484
return ac_build_buffer_load_format(&ctx->ac, args->resource, args->coords[0], ctx->ac.i32_0,
1485
util_last_bit(mask), 0, true,
1486
instr->dest.ssa.bit_size == 16,
1487
args->tfe);
1488
}
1489
1490
args->opcode = ac_image_sample;
1491
1492
switch (instr->op) {
1493
case nir_texop_txf:
1494
case nir_texop_txf_ms:
1495
case nir_texop_samples_identical:
1496
args->opcode = args->level_zero || instr->sampler_dim == GLSL_SAMPLER_DIM_MS
1497
? ac_image_load
1498
: ac_image_load_mip;
1499
args->level_zero = false;
1500
break;
1501
case nir_texop_txs:
1502
case nir_texop_query_levels:
1503
args->opcode = ac_image_get_resinfo;
1504
if (!args->lod)
1505
args->lod = ctx->ac.i32_0;
1506
args->level_zero = false;
1507
break;
1508
case nir_texop_tex:
1509
if (ctx->stage != MESA_SHADER_FRAGMENT &&
1510
(ctx->stage != MESA_SHADER_COMPUTE ||
1511
ctx->info->cs.derivative_group == DERIVATIVE_GROUP_NONE)) {
1512
assert(!args->lod);
1513
args->level_zero = true;
1514
}
1515
break;
1516
case nir_texop_tg4:
1517
args->opcode = ac_image_gather4;
1518
if (!args->lod && !args->bias)
1519
args->level_zero = true;
1520
break;
1521
case nir_texop_lod:
1522
args->opcode = ac_image_get_lod;
1523
break;
1524
case nir_texop_fragment_fetch:
1525
case nir_texop_fragment_mask_fetch:
1526
args->opcode = ac_image_load;
1527
args->level_zero = false;
1528
break;
1529
default:
1530
break;
1531
}
1532
1533
/* Aldebaran doesn't have image_sample_lz, but image_sample behaves like lz. */
1534
if (!ctx->ac.info->has_3d_cube_border_color_mipmap)
1535
args->level_zero = false;
1536
1537
if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) {
1538
nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr);
1539
nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr);
1540
const struct glsl_type *type = glsl_without_array(var->type);
1541
enum glsl_base_type stype = glsl_get_sampler_result_type(type);
1542
if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
1543
return lower_gather4_integer(&ctx->ac, var, args, instr);
1544
}
1545
}
1546
1547
/* Fixup for GFX9 which allocates 1D textures as 2D. */
1548
if (instr->op == nir_texop_lod && ctx->ac.chip_class == GFX9) {
1549
if ((args->dim == ac_image_2darray || args->dim == ac_image_2d) && !args->coords[1]) {
1550
args->coords[1] = ctx->ac.i32_0;
1551
}
1552
}
1553
1554
args->attributes = AC_FUNC_ATTR_READNONE;
1555
bool cs_derivs =
1556
ctx->stage == MESA_SHADER_COMPUTE && ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
1557
if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) {
1558
/* Prevent texture instructions with implicit derivatives from being
1559
* sinked into branches. */
1560
switch (instr->op) {
1561
case nir_texop_tex:
1562
case nir_texop_txb:
1563
case nir_texop_lod:
1564
args->attributes |= AC_FUNC_ATTR_CONVERGENT;
1565
break;
1566
default:
1567
break;
1568
}
1569
}
1570
1571
return ac_build_image_opcode(&ctx->ac, args);
1572
}
1573
1574
static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
1575
{
1576
LLVMValueRef ptr, addr;
1577
LLVMValueRef src0 = get_src(ctx, instr->src[0]);
1578
unsigned index = nir_intrinsic_base(instr);
1579
1580
addr = LLVMConstInt(ctx->ac.i32, index, 0);
1581
addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
1582
1583
/* Load constant values from user SGPRS when possible, otherwise
1584
* fallback to the default path that loads directly from memory.
1585
*/
1586
if (LLVMIsConstant(src0) && instr->dest.ssa.bit_size == 32) {
1587
unsigned count = instr->dest.ssa.num_components;
1588
unsigned offset = index;
1589
1590
offset += LLVMConstIntGetZExtValue(src0);
1591
offset /= 4;
1592
1593
offset -= ctx->args->base_inline_push_consts;
1594
1595
unsigned num_inline_push_consts = ctx->args->num_inline_push_consts;
1596
if (offset + count <= num_inline_push_consts) {
1597
LLVMValueRef *const push_constants = alloca(num_inline_push_consts * sizeof(LLVMValueRef));
1598
for (unsigned i = 0; i < num_inline_push_consts; i++)
1599
push_constants[i] = ac_get_arg(&ctx->ac, ctx->args->inline_push_consts[i]);
1600
return ac_build_gather_values(&ctx->ac, push_constants + offset, count);
1601
}
1602
}
1603
1604
ptr =
1605
LLVMBuildGEP(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->push_constants), &addr, 1, "");
1606
1607
if (instr->dest.ssa.bit_size == 8) {
1608
unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
1609
LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords);
1610
ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
1611
LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
1612
1613
LLVMValueRef params[3];
1614
if (load_dwords > 1) {
1615
LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, "");
1616
params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec,
1617
LLVMConstInt(ctx->ac.i32, 1, false), "");
1618
params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec,
1619
LLVMConstInt(ctx->ac.i32, 0, false), "");
1620
} else {
1621
res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, "");
1622
params[0] = ctx->ac.i32_0;
1623
params[1] = res;
1624
}
1625
params[2] = addr;
1626
res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0);
1627
1628
res = LLVMBuildTrunc(
1629
ctx->ac.builder, res,
1630
LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
1631
if (instr->dest.ssa.num_components > 1)
1632
res = LLVMBuildBitCast(ctx->ac.builder, res,
1633
LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), "");
1634
return res;
1635
} else if (instr->dest.ssa.bit_size == 16) {
1636
unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
1637
LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords);
1638
ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
1639
LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
1640
res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
1641
LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, "");
1642
cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
1643
LLVMValueRef mask[] = {
1644
LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
1645
LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
1646
LLVMConstInt(ctx->ac.i32, 4, false)};
1647
LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components);
1648
LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components);
1649
LLVMValueRef shuffle_aligned =
1650
LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, "");
1651
LLVMValueRef shuffle_unaligned =
1652
LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, "");
1653
res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, "");
1654
return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), "");
1655
}
1656
1657
ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa));
1658
1659
return LLVMBuildLoad(ctx->ac.builder, ptr, "");
1660
}
1661
1662
static LLVMValueRef visit_get_ssbo_size(struct ac_nir_context *ctx,
1663
const nir_intrinsic_instr *instr)
1664
{
1665
bool non_uniform = nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM;
1666
LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, get_src(ctx, instr->src[0]), false, non_uniform);
1667
return get_buffer_size(ctx, rsrc, false);
1668
}
1669
1670
static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
1671
unsigned start, unsigned count)
1672
{
1673
LLVMValueRef mask[] = {ctx->i32_0, ctx->i32_1, LLVMConstInt(ctx->i32, 2, false),
1674
LLVMConstInt(ctx->i32, 3, false)};
1675
1676
unsigned src_elements = ac_get_llvm_num_components(src);
1677
1678
if (count == src_elements) {
1679
assert(start == 0);
1680
return src;
1681
} else if (count == 1) {
1682
assert(start < src_elements);
1683
return LLVMBuildExtractElement(ctx->builder, src, mask[start], "");
1684
} else {
1685
assert(start + count <= src_elements);
1686
assert(count <= 4);
1687
LLVMValueRef swizzle = LLVMConstVector(&mask[start], count);
1688
return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
1689
}
1690
}
1691
1692
static unsigned get_cache_policy(struct ac_nir_context *ctx, enum gl_access_qualifier access,
1693
bool may_store_unaligned, bool writeonly_memory)
1694
{
1695
unsigned cache_policy = 0;
1696
1697
/* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All
1698
* store opcodes not aligned to a dword are affected. The only way to
1699
* get unaligned stores is through shader images.
1700
*/
1701
if (((may_store_unaligned && ctx->ac.chip_class == GFX6) ||
1702
/* If this is write-only, don't keep data in L1 to prevent
1703
* evicting L1 cache lines that may be needed by other
1704
* instructions.
1705
*/
1706
writeonly_memory || access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
1707
cache_policy |= ac_glc;
1708
}
1709
1710
if (access & ACCESS_STREAM_CACHE_POLICY)
1711
cache_policy |= ac_slc | ac_glc;
1712
1713
return cache_policy;
1714
}
1715
1716
static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx, struct waterfall_context *wctx,
1717
const nir_intrinsic_instr *instr, nir_src src)
1718
{
1719
return enter_waterfall(ctx, wctx, get_src(ctx, src),
1720
nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
1721
}
1722
1723
static void visit_store_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
1724
{
1725
if (ctx->ac.postponed_kill) {
1726
LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
1727
ac_build_ifcc(&ctx->ac, cond, 7000);
1728
}
1729
1730
LLVMValueRef src_data = get_src(ctx, instr->src[0]);
1731
int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
1732
unsigned writemask = nir_intrinsic_write_mask(instr);
1733
enum gl_access_qualifier access = nir_intrinsic_access(instr);
1734
bool writeonly_memory = access & ACCESS_NON_READABLE;
1735
unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
1736
1737
struct waterfall_context wctx;
1738
LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
1739
1740
LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true, false);
1741
LLVMValueRef base_data = src_data;
1742
base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
1743
LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
1744
1745
while (writemask) {
1746
int start, count;
1747
LLVMValueRef data, offset;
1748
LLVMTypeRef data_type;
1749
1750
u_bit_scan_consecutive_range(&writemask, &start, &count);
1751
1752
if (count == 3 && (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) {
1753
writemask |= 1 << (start + 2);
1754
count = 2;
1755
}
1756
int num_bytes = count * elem_size_bytes; /* count in bytes */
1757
1758
/* we can only store 4 DWords at the same time.
1759
* can only happen for 64 Bit vectors. */
1760
if (num_bytes > 16) {
1761
writemask |= ((1u << (count - 2)) - 1u) << (start + 2);
1762
count = 2;
1763
num_bytes = 16;
1764
}
1765
1766
/* check alignment of 16 Bit stores */
1767
if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {
1768
writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
1769
count = 1;
1770
num_bytes = 2;
1771
}
1772
1773
/* Due to alignment issues, split stores of 8-bit/16-bit
1774
* vectors.
1775
*/
1776
if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) {
1777
writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
1778
count = 1;
1779
num_bytes = elem_size_bytes;
1780
}
1781
1782
data = extract_vector_range(&ctx->ac, base_data, start, count);
1783
1784
offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
1785
LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
1786
1787
if (num_bytes == 1) {
1788
ac_build_tbuffer_store_byte(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);
1789
} else if (num_bytes == 2) {
1790
ac_build_tbuffer_store_short(&ctx->ac, rsrc, data, offset, ctx->ac.i32_0, cache_policy);
1791
} else {
1792
int num_channels = num_bytes / 4;
1793
1794
switch (num_bytes) {
1795
case 16: /* v4f32 */
1796
data_type = ctx->ac.v4f32;
1797
break;
1798
case 12: /* v3f32 */
1799
data_type = ctx->ac.v3f32;
1800
break;
1801
case 8: /* v2f32 */
1802
data_type = ctx->ac.v2f32;
1803
break;
1804
case 4: /* f32 */
1805
data_type = ctx->ac.f32;
1806
break;
1807
default:
1808
unreachable("Malformed vector store.");
1809
}
1810
data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
1811
1812
ac_build_buffer_store_dword(&ctx->ac, rsrc, data, num_channels, offset, ctx->ac.i32_0, 0,
1813
cache_policy);
1814
}
1815
}
1816
1817
exit_waterfall(ctx, &wctx, NULL);
1818
1819
if (ctx->ac.postponed_kill)
1820
ac_build_endif(&ctx->ac, 7000);
1821
}
1822
1823
static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx, LLVMValueRef descriptor,
1824
LLVMValueRef offset, LLVMValueRef compare,
1825
LLVMValueRef exchange, bool image)
1826
{
1827
LLVMBasicBlockRef start_block = NULL, then_block = NULL;
1828
if (ctx->abi->robust_buffer_access || image) {
1829
LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
1830
1831
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
1832
start_block = LLVMGetInsertBlock(ctx->ac.builder);
1833
1834
ac_build_ifcc(&ctx->ac, cond, -1);
1835
1836
then_block = LLVMGetInsertBlock(ctx->ac.builder);
1837
}
1838
1839
if (image)
1840
offset = LLVMBuildMul(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, 8, false), "");
1841
1842
LLVMValueRef ptr_parts[2] = {
1843
ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
1844
LLVMBuildAnd(ctx->ac.builder, ac_llvm_extract_elem(&ctx->ac, descriptor, 1),
1845
LLVMConstInt(ctx->ac.i32, 65535, 0), "")};
1846
1847
ptr_parts[1] = LLVMBuildTrunc(ctx->ac.builder, ptr_parts[1], ctx->ac.i16, "");
1848
ptr_parts[1] = LLVMBuildSExt(ctx->ac.builder, ptr_parts[1], ctx->ac.i32, "");
1849
1850
offset = LLVMBuildZExt(ctx->ac.builder, offset, ctx->ac.i64, "");
1851
1852
LLVMValueRef ptr = ac_build_gather_values(&ctx->ac, ptr_parts, 2);
1853
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, "");
1854
ptr = LLVMBuildAdd(ctx->ac.builder, ptr, offset, "");
1855
ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, LLVMPointerType(ctx->ac.i64, AC_ADDR_SPACE_GLOBAL),
1856
"");
1857
1858
LLVMValueRef result =
1859
ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
1860
result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
1861
1862
if (ctx->abi->robust_buffer_access || image) {
1863
ac_build_endif(&ctx->ac, -1);
1864
1865
LLVMBasicBlockRef incoming_blocks[2] = {
1866
start_block,
1867
then_block,
1868
};
1869
1870
LLVMValueRef incoming_values[2] = {
1871
LLVMConstInt(ctx->ac.i64, 0, 0),
1872
result,
1873
};
1874
LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
1875
LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
1876
return ret;
1877
} else {
1878
return result;
1879
}
1880
}
1881
1882
static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
1883
{
1884
if (ctx->ac.postponed_kill) {
1885
LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
1886
ac_build_ifcc(&ctx->ac, cond, 7001);
1887
}
1888
1889
LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2]));
1890
const char *op;
1891
char name[64], type[8];
1892
LLVMValueRef params[6], descriptor;
1893
LLVMValueRef result;
1894
int arg_count = 0;
1895
1896
struct waterfall_context wctx;
1897
LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
1898
1899
switch (instr->intrinsic) {
1900
case nir_intrinsic_ssbo_atomic_add:
1901
op = "add";
1902
break;
1903
case nir_intrinsic_ssbo_atomic_imin:
1904
op = "smin";
1905
break;
1906
case nir_intrinsic_ssbo_atomic_umin:
1907
op = "umin";
1908
break;
1909
case nir_intrinsic_ssbo_atomic_imax:
1910
op = "smax";
1911
break;
1912
case nir_intrinsic_ssbo_atomic_umax:
1913
op = "umax";
1914
break;
1915
case nir_intrinsic_ssbo_atomic_and:
1916
op = "and";
1917
break;
1918
case nir_intrinsic_ssbo_atomic_or:
1919
op = "or";
1920
break;
1921
case nir_intrinsic_ssbo_atomic_xor:
1922
op = "xor";
1923
break;
1924
case nir_intrinsic_ssbo_atomic_exchange:
1925
op = "swap";
1926
break;
1927
case nir_intrinsic_ssbo_atomic_comp_swap:
1928
op = "cmpswap";
1929
break;
1930
default:
1931
abort();
1932
}
1933
1934
descriptor = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true, false);
1935
1936
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap && return_type == ctx->ac.i64) {
1937
result = emit_ssbo_comp_swap_64(ctx, descriptor, get_src(ctx, instr->src[1]),
1938
get_src(ctx, instr->src[2]), get_src(ctx, instr->src[3]), false);
1939
} else {
1940
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
1941
params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
1942
}
1943
params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
1944
params[arg_count++] = descriptor;
1945
params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
1946
params[arg_count++] = ctx->ac.i32_0; /* soffset */
1947
params[arg_count++] = ctx->ac.i32_0; /* slc */
1948
1949
ac_build_type_name_for_intr(return_type, type, sizeof(type));
1950
snprintf(name, sizeof(name), "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
1951
1952
result = ac_build_intrinsic(&ctx->ac, name, return_type, params, arg_count, 0);
1953
}
1954
1955
result = exit_waterfall(ctx, &wctx, result);
1956
if (ctx->ac.postponed_kill)
1957
ac_build_endif(&ctx->ac, 7001);
1958
return result;
1959
}
1960
1961
static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
1962
{
1963
struct waterfall_context wctx;
1964
LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
1965
1966
int elem_size_bytes = instr->dest.ssa.bit_size / 8;
1967
int num_components = instr->num_components;
1968
enum gl_access_qualifier access = nir_intrinsic_access(instr);
1969
unsigned cache_policy = get_cache_policy(ctx, access, false, false);
1970
1971
LLVMValueRef offset = get_src(ctx, instr->src[1]);
1972
LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, false, false);
1973
LLVMValueRef vindex = ctx->ac.i32_0;
1974
1975
LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
1976
LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
1977
1978
LLVMValueRef results[4];
1979
for (int i = 0; i < num_components;) {
1980
int num_elems = num_components - i;
1981
if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)
1982
num_elems = 1;
1983
if (num_elems * elem_size_bytes > 16)
1984
num_elems = 16 / elem_size_bytes;
1985
int load_bytes = num_elems * elem_size_bytes;
1986
1987
LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
1988
1989
LLVMValueRef ret;
1990
1991
if (load_bytes == 1) {
1992
ret = ac_build_tbuffer_load_byte(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset,
1993
cache_policy);
1994
} else if (load_bytes == 2) {
1995
ret = ac_build_tbuffer_load_short(&ctx->ac, rsrc, offset, ctx->ac.i32_0, immoffset,
1996
cache_policy);
1997
} else {
1998
int num_channels = util_next_power_of_two(load_bytes) / 4;
1999
bool can_speculate = access & ACCESS_CAN_REORDER;
2000
2001
ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels, vindex, offset, immoffset, 0,
2002
ctx->ac.f32, cache_policy, can_speculate, false);
2003
}
2004
2005
LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
2006
ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
2007
ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
2008
2009
LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
2010
ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
2011
2012
for (unsigned j = 0; j < num_elems; j++) {
2013
results[i + j] =
2014
LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
2015
}
2016
i += num_elems;
2017
}
2018
2019
LLVMValueRef ret = ac_build_gather_values(&ctx->ac, results, num_components);
2020
return exit_waterfall(ctx, &wctx, ret);
2021
}
2022
2023
static LLVMValueRef enter_waterfall_ubo(struct ac_nir_context *ctx, struct waterfall_context *wctx,
2024
const nir_intrinsic_instr *instr)
2025
{
2026
return enter_waterfall(ctx, wctx, get_src(ctx, instr->src[0]),
2027
nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
2028
}
2029
2030
static LLVMValueRef visit_load_global(struct ac_nir_context *ctx,
2031
nir_intrinsic_instr *instr)
2032
{
2033
LLVMValueRef addr = get_src(ctx, instr->src[0]);
2034
LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
2035
LLVMValueRef val;
2036
2037
LLVMTypeRef ptr_type = LLVMPointerType(result_type, AC_ADDR_SPACE_GLOBAL);
2038
2039
addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");
2040
2041
val = LLVMBuildLoad(ctx->ac.builder, addr, "");
2042
2043
if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) {
2044
LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
2045
LLVMSetAlignment(val, ac_get_type_size(result_type));
2046
}
2047
2048
return val;
2049
}
2050
2051
static void visit_store_global(struct ac_nir_context *ctx,
2052
nir_intrinsic_instr *instr)
2053
{
2054
if (ctx->ac.postponed_kill) {
2055
LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2056
ac_build_ifcc(&ctx->ac, cond, 7002);
2057
}
2058
2059
LLVMValueRef data = get_src(ctx, instr->src[0]);
2060
LLVMValueRef addr = get_src(ctx, instr->src[1]);
2061
LLVMTypeRef type = LLVMTypeOf(data);
2062
LLVMValueRef val;
2063
2064
LLVMTypeRef ptr_type = LLVMPointerType(type, AC_ADDR_SPACE_GLOBAL);
2065
2066
addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");
2067
2068
val = LLVMBuildStore(ctx->ac.builder, data, addr);
2069
2070
if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE)) {
2071
LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
2072
LLVMSetAlignment(val, ac_get_type_size(type));
2073
}
2074
2075
if (ctx->ac.postponed_kill)
2076
ac_build_endif(&ctx->ac, 7002);
2077
}
2078
2079
static LLVMValueRef visit_global_atomic(struct ac_nir_context *ctx,
2080
nir_intrinsic_instr *instr)
2081
{
2082
if (ctx->ac.postponed_kill) {
2083
LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2084
ac_build_ifcc(&ctx->ac, cond, 7002);
2085
}
2086
2087
LLVMValueRef addr = get_src(ctx, instr->src[0]);
2088
LLVMValueRef data = get_src(ctx, instr->src[1]);
2089
LLVMAtomicRMWBinOp op;
2090
LLVMValueRef result;
2091
2092
/* use "singlethread" sync scope to implement relaxed ordering */
2093
const char *sync_scope = "singlethread-one-as";
2094
2095
LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(data), AC_ADDR_SPACE_GLOBAL);
2096
2097
addr = LLVMBuildIntToPtr(ctx->ac.builder, addr, ptr_type, "");
2098
2099
if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap) {
2100
LLVMValueRef data1 = get_src(ctx, instr->src[2]);
2101
result = ac_build_atomic_cmp_xchg(&ctx->ac, addr, data, data1, sync_scope);
2102
result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
2103
} else {
2104
switch (instr->intrinsic) {
2105
case nir_intrinsic_global_atomic_add:
2106
op = LLVMAtomicRMWBinOpAdd;
2107
break;
2108
case nir_intrinsic_global_atomic_umin:
2109
op = LLVMAtomicRMWBinOpUMin;
2110
break;
2111
case nir_intrinsic_global_atomic_umax:
2112
op = LLVMAtomicRMWBinOpUMax;
2113
break;
2114
case nir_intrinsic_global_atomic_imin:
2115
op = LLVMAtomicRMWBinOpMin;
2116
break;
2117
case nir_intrinsic_global_atomic_imax:
2118
op = LLVMAtomicRMWBinOpMax;
2119
break;
2120
case nir_intrinsic_global_atomic_and:
2121
op = LLVMAtomicRMWBinOpAnd;
2122
break;
2123
case nir_intrinsic_global_atomic_or:
2124
op = LLVMAtomicRMWBinOpOr;
2125
break;
2126
case nir_intrinsic_global_atomic_xor:
2127
op = LLVMAtomicRMWBinOpXor;
2128
break;
2129
case nir_intrinsic_global_atomic_exchange:
2130
op = LLVMAtomicRMWBinOpXchg;
2131
break;
2132
default:
2133
unreachable("Invalid global atomic operation");
2134
}
2135
2136
result = ac_build_atomic_rmw(&ctx->ac, op, addr, ac_to_integer(&ctx->ac, data), sync_scope);
2137
}
2138
2139
if (ctx->ac.postponed_kill)
2140
ac_build_endif(&ctx->ac, 7002);
2141
2142
return result;
2143
}
2144
2145
static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
2146
{
2147
struct waterfall_context wctx;
2148
LLVMValueRef rsrc_base = enter_waterfall_ubo(ctx, &wctx, instr);
2149
2150
LLVMValueRef ret;
2151
LLVMValueRef rsrc = rsrc_base;
2152
LLVMValueRef offset = get_src(ctx, instr->src[1]);
2153
int num_components = instr->num_components;
2154
2155
if (ctx->abi->load_ubo) {
2156
nir_binding binding = nir_chase_binding(instr->src[0]);
2157
rsrc = ctx->abi->load_ubo(ctx->abi, binding.desc_set, binding.binding, binding.success, rsrc);
2158
}
2159
2160
/* Convert to a scalar 32-bit load. */
2161
if (instr->dest.ssa.bit_size == 64)
2162
num_components *= 2;
2163
else if (instr->dest.ssa.bit_size == 16)
2164
num_components = DIV_ROUND_UP(num_components, 2);
2165
else if (instr->dest.ssa.bit_size == 8)
2166
num_components = DIV_ROUND_UP(num_components, 4);
2167
2168
ret =
2169
ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset, NULL, 0,
2170
ctx->ac.f32, 0, true, true);
2171
2172
/* Convert to the original type. */
2173
if (instr->dest.ssa.bit_size == 64) {
2174
ret = LLVMBuildBitCast(ctx->ac.builder, ret,
2175
LLVMVectorType(ctx->ac.i64, num_components / 2), "");
2176
} else if (instr->dest.ssa.bit_size == 16) {
2177
ret = LLVMBuildBitCast(ctx->ac.builder, ret,
2178
LLVMVectorType(ctx->ac.i16, num_components * 2), "");
2179
} else if (instr->dest.ssa.bit_size == 8) {
2180
ret = LLVMBuildBitCast(ctx->ac.builder, ret,
2181
LLVMVectorType(ctx->ac.i8, num_components * 4), "");
2182
}
2183
2184
ret = ac_trim_vector(&ctx->ac, ret, instr->num_components);
2185
ret = LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
2186
2187
return exit_waterfall(ctx, &wctx, ret);
2188
}
2189
2190
static unsigned type_scalar_size_bytes(const struct glsl_type *type)
2191
{
2192
assert(glsl_type_is_vector_or_scalar(type) || glsl_type_is_matrix(type));
2193
return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
2194
}
2195
2196
static void visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
2197
{
2198
if (ctx->ac.postponed_kill) {
2199
LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2200
ac_build_ifcc(&ctx->ac, cond, 7002);
2201
}
2202
2203
unsigned base = nir_intrinsic_base(instr);
2204
unsigned writemask = nir_intrinsic_write_mask(instr);
2205
unsigned component = nir_intrinsic_component(instr);
2206
LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
2207
nir_src offset = *nir_get_io_offset_src(instr);
2208
LLVMValueRef indir_index = NULL;
2209
2210
if (nir_src_is_const(offset))
2211
assert(nir_src_as_uint(offset) == 0);
2212
else
2213
indir_index = get_src(ctx, offset);
2214
2215
switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {
2216
case 16:
2217
case 32:
2218
break;
2219
case 64:
2220
unreachable("64-bit IO should have been lowered to 32 bits");
2221
return;
2222
default:
2223
unreachable("unhandled store_output bit size");
2224
return;
2225
}
2226
2227
writemask <<= component;
2228
2229
if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2230
nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
2231
LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
2232
unsigned location = nir_intrinsic_io_semantics(instr).location;
2233
2234
ctx->abi->store_tcs_outputs(ctx->abi, vertex_index, indir_index, src,
2235
writemask, component, location, base);
2236
return;
2237
}
2238
2239
/* No indirect indexing is allowed after this point. */
2240
assert(!indir_index);
2241
2242
for (unsigned chan = 0; chan < 8; chan++) {
2243
if (!(writemask & (1 << chan)))
2244
continue;
2245
2246
LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
2247
LLVMValueRef output_addr = ctx->abi->outputs[base * 4 + chan];
2248
2249
if (LLVMGetElementType(LLVMTypeOf(output_addr)) == ctx->ac.f32 &&
2250
LLVMTypeOf(value) == ctx->ac.f16) {
2251
LLVMValueRef output, index;
2252
2253
/* Insert the 16-bit value into the low or high bits of the 32-bit output
2254
* using read-modify-write.
2255
*/
2256
index = LLVMConstInt(ctx->ac.i32, nir_intrinsic_io_semantics(instr).high_16bits, 0);
2257
output = LLVMBuildLoad(ctx->ac.builder, output_addr, "");
2258
output = LLVMBuildBitCast(ctx->ac.builder, output, ctx->ac.v2f16, "");
2259
output = LLVMBuildInsertElement(ctx->ac.builder, output, value, index, "");
2260
value = LLVMBuildBitCast(ctx->ac.builder, output, ctx->ac.f32, "");
2261
}
2262
LLVMBuildStore(ctx->ac.builder, value, output_addr);
2263
}
2264
2265
if (ctx->ac.postponed_kill)
2266
ac_build_endif(&ctx->ac, 7002);
2267
}
2268
2269
static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
2270
{
2271
switch (dim) {
2272
case GLSL_SAMPLER_DIM_BUF:
2273
return 1;
2274
case GLSL_SAMPLER_DIM_1D:
2275
return array ? 2 : 1;
2276
case GLSL_SAMPLER_DIM_2D:
2277
return array ? 3 : 2;
2278
case GLSL_SAMPLER_DIM_MS:
2279
return array ? 4 : 3;
2280
case GLSL_SAMPLER_DIM_3D:
2281
case GLSL_SAMPLER_DIM_CUBE:
2282
return 3;
2283
case GLSL_SAMPLER_DIM_RECT:
2284
case GLSL_SAMPLER_DIM_SUBPASS:
2285
return 2;
2286
case GLSL_SAMPLER_DIM_SUBPASS_MS:
2287
return 3;
2288
default:
2289
break;
2290
}
2291
return 0;
2292
}
2293
2294
static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
2295
LLVMValueRef coord_x, LLVMValueRef coord_y,
2296
LLVMValueRef coord_z, LLVMValueRef sample_index,
2297
LLVMValueRef fmask_desc_ptr)
2298
{
2299
unsigned sample_chan = coord_z ? 3 : 2;
2300
LLVMValueRef addr[4] = {coord_x, coord_y, coord_z};
2301
addr[sample_chan] = sample_index;
2302
2303
ac_apply_fmask_to_sample(ctx, fmask_desc_ptr, addr, coord_z != NULL);
2304
return addr[sample_chan];
2305
}
2306
2307
static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr)
2308
{
2309
assert(instr->src[0].is_ssa);
2310
return nir_instr_as_deref(instr->src[0].ssa->parent_instr);
2311
}
2312
2313
static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,
2314
const nir_intrinsic_instr *instr,
2315
LLVMValueRef dynamic_index,
2316
enum ac_descriptor_type desc_type, bool write)
2317
{
2318
nir_deref_instr *deref_instr = instr->src[0].ssa->parent_instr->type == nir_instr_type_deref
2319
? nir_instr_as_deref(instr->src[0].ssa->parent_instr)
2320
: NULL;
2321
2322
return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, dynamic_index, true, write);
2323
}
2324
2325
static void get_image_coords(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2326
LLVMValueRef dynamic_desc_index, struct ac_image_args *args,
2327
enum glsl_sampler_dim dim, bool is_array)
2328
{
2329
LLVMValueRef src0 = get_src(ctx, instr->src[1]);
2330
LLVMValueRef masks[] = {
2331
LLVMConstInt(ctx->ac.i32, 0, false),
2332
LLVMConstInt(ctx->ac.i32, 1, false),
2333
LLVMConstInt(ctx->ac.i32, 2, false),
2334
LLVMConstInt(ctx->ac.i32, 3, false),
2335
};
2336
LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
2337
2338
int count;
2339
ASSERTED bool add_frag_pos =
2340
(dim == GLSL_SAMPLER_DIM_SUBPASS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2341
bool is_ms = (dim == GLSL_SAMPLER_DIM_MS || dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2342
bool gfx9_1d = ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D;
2343
assert(!add_frag_pos && "Input attachments should be lowered by this point.");
2344
count = image_type_to_components_count(dim, is_array);
2345
2346
if (is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load ||
2347
instr->intrinsic == nir_intrinsic_bindless_image_load ||
2348
instr->intrinsic == nir_intrinsic_image_deref_sparse_load ||
2349
instr->intrinsic == nir_intrinsic_bindless_image_sparse_load)) {
2350
LLVMValueRef fmask_load_address[3];
2351
2352
fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
2353
fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
2354
if (is_array)
2355
fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
2356
else
2357
fmask_load_address[2] = NULL;
2358
2359
sample_index = adjust_sample_index_using_fmask(
2360
&ctx->ac, fmask_load_address[0], fmask_load_address[1], fmask_load_address[2],
2361
sample_index,
2362
get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), AC_DESC_FMASK,
2363
&instr->instr, dynamic_desc_index, true, false));
2364
}
2365
if (count == 1 && !gfx9_1d) {
2366
if (instr->src[1].ssa->num_components)
2367
args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
2368
else
2369
args->coords[0] = src0;
2370
} else {
2371
int chan;
2372
if (is_ms)
2373
count--;
2374
for (chan = 0; chan < count; ++chan) {
2375
args->coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
2376
}
2377
2378
if (gfx9_1d) {
2379
if (is_array) {
2380
args->coords[2] = args->coords[1];
2381
args->coords[1] = ctx->ac.i32_0;
2382
} else
2383
args->coords[1] = ctx->ac.i32_0;
2384
count++;
2385
}
2386
if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_2D && !is_array) {
2387
/* The hw can't bind a slice of a 3D image as a 2D
2388
* image, because it ignores BASE_ARRAY if the target
2389
* is 3D. The workaround is to read BASE_ARRAY and set
2390
* it as the 3rd address operand for all 2D images.
2391
*/
2392
LLVMValueRef first_layer, const5, mask;
2393
2394
const5 = LLVMConstInt(ctx->ac.i32, 5, 0);
2395
mask = LLVMConstInt(ctx->ac.i32, S_008F24_BASE_ARRAY(~0), 0);
2396
first_layer = LLVMBuildExtractElement(ctx->ac.builder, args->resource, const5, "");
2397
first_layer = LLVMBuildAnd(ctx->ac.builder, first_layer, mask, "");
2398
2399
args->coords[count] = first_layer;
2400
count++;
2401
}
2402
2403
if (is_ms) {
2404
args->coords[count] = sample_index;
2405
count++;
2406
}
2407
}
2408
}
2409
2410
static LLVMValueRef enter_waterfall_image(struct ac_nir_context *ctx,
2411
struct waterfall_context *wctx,
2412
const nir_intrinsic_instr *instr)
2413
{
2414
nir_deref_instr *deref_instr = NULL;
2415
2416
if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref)
2417
deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
2418
2419
LLVMValueRef value = get_sampler_desc_index(ctx, deref_instr, &instr->instr, true);
2420
return enter_waterfall(ctx, wctx, value, nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
2421
}
2422
2423
static LLVMValueRef visit_image_load(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2424
bool bindless)
2425
{
2426
LLVMValueRef res;
2427
2428
enum glsl_sampler_dim dim;
2429
enum gl_access_qualifier access = nir_intrinsic_access(instr);
2430
bool is_array;
2431
if (bindless) {
2432
dim = nir_intrinsic_image_dim(instr);
2433
is_array = nir_intrinsic_image_array(instr);
2434
} else {
2435
const nir_deref_instr *image_deref = get_image_deref(instr);
2436
const struct glsl_type *type = image_deref->type;
2437
const nir_variable *var = nir_deref_instr_get_variable(image_deref);
2438
dim = glsl_get_sampler_dim(type);
2439
access |= var->data.access;
2440
is_array = glsl_sampler_type_is_array(type);
2441
}
2442
2443
struct waterfall_context wctx;
2444
LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2445
2446
struct ac_image_args args = {0};
2447
2448
args.cache_policy = get_cache_policy(ctx, access, false, false);
2449
args.tfe = instr->intrinsic == nir_intrinsic_image_deref_sparse_load;
2450
2451
if (dim == GLSL_SAMPLER_DIM_BUF) {
2452
unsigned num_channels = util_last_bit(nir_ssa_def_components_read(&instr->dest.ssa));
2453
if (instr->dest.ssa.bit_size == 64)
2454
num_channels = num_channels < 4 ? 2 : 4;
2455
LLVMValueRef rsrc, vindex;
2456
2457
rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false);
2458
vindex =
2459
LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, "");
2460
2461
assert(instr->dest.is_ssa);
2462
bool can_speculate = access & ACCESS_CAN_REORDER;
2463
res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex, ctx->ac.i32_0, num_channels,
2464
args.cache_policy, can_speculate,
2465
instr->dest.ssa.bit_size == 16,
2466
args.tfe);
2467
res = ac_build_expand(&ctx->ac, res, num_channels, args.tfe ? 5 : 4);
2468
2469
res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
2470
res = ac_to_integer(&ctx->ac, res);
2471
} else {
2472
bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
2473
2474
args.opcode = level_zero ? ac_image_load : ac_image_load_mip;
2475
args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
2476
get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
2477
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2478
if (!level_zero)
2479
args.lod = get_src(ctx, instr->src[3]);
2480
args.dmask = 15;
2481
args.attributes = AC_FUNC_ATTR_READONLY;
2482
2483
assert(instr->dest.is_ssa);
2484
args.d16 = instr->dest.ssa.bit_size == 16;
2485
2486
res = ac_build_image_opcode(&ctx->ac, &args);
2487
}
2488
2489
if (instr->dest.ssa.bit_size == 64) {
2490
LLVMValueRef code = NULL;
2491
if (args.tfe) {
2492
code = ac_llvm_extract_elem(&ctx->ac, res, 4);
2493
res = ac_trim_vector(&ctx->ac, res, 4);
2494
}
2495
2496
res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i64, 2), "");
2497
LLVMValueRef x = LLVMBuildExtractElement(ctx->ac.builder, res, ctx->ac.i32_0, "");
2498
LLVMValueRef w = LLVMBuildExtractElement(ctx->ac.builder, res, ctx->ac.i32_1, "");
2499
2500
if (code)
2501
code = LLVMBuildZExt(ctx->ac.builder, code, ctx->ac.i64, "");
2502
LLVMValueRef values[5] = {x, ctx->ac.i64_0, ctx->ac.i64_0, w, code};
2503
res = ac_build_gather_values(&ctx->ac, values, 4 + args.tfe);
2504
}
2505
2506
return exit_waterfall(ctx, &wctx, res);
2507
}
2508
2509
static void visit_image_store(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2510
bool bindless)
2511
{
2512
if (ctx->ac.postponed_kill) {
2513
LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2514
ac_build_ifcc(&ctx->ac, cond, 7003);
2515
}
2516
2517
enum glsl_sampler_dim dim;
2518
enum gl_access_qualifier access = nir_intrinsic_access(instr);
2519
bool is_array;
2520
2521
if (bindless) {
2522
dim = nir_intrinsic_image_dim(instr);
2523
is_array = nir_intrinsic_image_array(instr);
2524
} else {
2525
const nir_deref_instr *image_deref = get_image_deref(instr);
2526
const struct glsl_type *type = image_deref->type;
2527
const nir_variable *var = nir_deref_instr_get_variable(image_deref);
2528
dim = glsl_get_sampler_dim(type);
2529
access |= var->data.access;
2530
is_array = glsl_sampler_type_is_array(type);
2531
}
2532
2533
struct waterfall_context wctx;
2534
LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2535
2536
bool writeonly_memory = access & ACCESS_NON_READABLE;
2537
struct ac_image_args args = {0};
2538
2539
args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
2540
2541
LLVMValueRef src = get_src(ctx, instr->src[3]);
2542
if (instr->src[3].ssa->bit_size == 64) {
2543
/* only R64_UINT and R64_SINT supported */
2544
src = ac_llvm_extract_elem(&ctx->ac, src, 0);
2545
src = LLVMBuildBitCast(ctx->ac.builder, src, ctx->ac.v2f32, "");
2546
} else {
2547
src = ac_to_float(&ctx->ac, src);
2548
}
2549
2550
if (dim == GLSL_SAMPLER_DIM_BUF) {
2551
LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, true);
2552
unsigned src_channels = ac_get_llvm_num_components(src);
2553
LLVMValueRef vindex;
2554
2555
if (src_channels == 3)
2556
src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
2557
2558
vindex =
2559
LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]), ctx->ac.i32_0, "");
2560
2561
ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex, ctx->ac.i32_0, args.cache_policy);
2562
} else {
2563
bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
2564
2565
args.opcode = level_zero ? ac_image_store : ac_image_store_mip;
2566
args.data[0] = src;
2567
args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
2568
get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
2569
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2570
if (!level_zero)
2571
args.lod = get_src(ctx, instr->src[4]);
2572
args.dmask = 15;
2573
args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16;
2574
2575
ac_build_image_opcode(&ctx->ac, &args);
2576
}
2577
2578
exit_waterfall(ctx, &wctx, NULL);
2579
if (ctx->ac.postponed_kill)
2580
ac_build_endif(&ctx->ac, 7003);
2581
}
2582
2583
static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2584
bool bindless)
2585
{
2586
if (ctx->ac.postponed_kill) {
2587
LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2588
ac_build_ifcc(&ctx->ac, cond, 7004);
2589
}
2590
2591
LLVMValueRef params[7];
2592
int param_count = 0;
2593
2594
bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
2595
instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap;
2596
const char *atomic_name;
2597
char intrinsic_name[64];
2598
enum ac_atomic_op atomic_subop;
2599
ASSERTED int length;
2600
2601
enum glsl_sampler_dim dim;
2602
bool is_array;
2603
if (bindless) {
2604
dim = nir_intrinsic_image_dim(instr);
2605
is_array = nir_intrinsic_image_array(instr);
2606
} else {
2607
const struct glsl_type *type = get_image_deref(instr)->type;
2608
dim = glsl_get_sampler_dim(type);
2609
is_array = glsl_sampler_type_is_array(type);
2610
}
2611
2612
struct waterfall_context wctx;
2613
LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2614
2615
switch (instr->intrinsic) {
2616
case nir_intrinsic_bindless_image_atomic_add:
2617
case nir_intrinsic_image_deref_atomic_add:
2618
atomic_name = "add";
2619
atomic_subop = ac_atomic_add;
2620
break;
2621
case nir_intrinsic_bindless_image_atomic_imin:
2622
case nir_intrinsic_image_deref_atomic_imin:
2623
atomic_name = "smin";
2624
atomic_subop = ac_atomic_smin;
2625
break;
2626
case nir_intrinsic_bindless_image_atomic_umin:
2627
case nir_intrinsic_image_deref_atomic_umin:
2628
atomic_name = "umin";
2629
atomic_subop = ac_atomic_umin;
2630
break;
2631
case nir_intrinsic_bindless_image_atomic_imax:
2632
case nir_intrinsic_image_deref_atomic_imax:
2633
atomic_name = "smax";
2634
atomic_subop = ac_atomic_smax;
2635
break;
2636
case nir_intrinsic_bindless_image_atomic_umax:
2637
case nir_intrinsic_image_deref_atomic_umax:
2638
atomic_name = "umax";
2639
atomic_subop = ac_atomic_umax;
2640
break;
2641
case nir_intrinsic_bindless_image_atomic_and:
2642
case nir_intrinsic_image_deref_atomic_and:
2643
atomic_name = "and";
2644
atomic_subop = ac_atomic_and;
2645
break;
2646
case nir_intrinsic_bindless_image_atomic_or:
2647
case nir_intrinsic_image_deref_atomic_or:
2648
atomic_name = "or";
2649
atomic_subop = ac_atomic_or;
2650
break;
2651
case nir_intrinsic_bindless_image_atomic_xor:
2652
case nir_intrinsic_image_deref_atomic_xor:
2653
atomic_name = "xor";
2654
atomic_subop = ac_atomic_xor;
2655
break;
2656
case nir_intrinsic_bindless_image_atomic_exchange:
2657
case nir_intrinsic_image_deref_atomic_exchange:
2658
atomic_name = "swap";
2659
atomic_subop = ac_atomic_swap;
2660
break;
2661
case nir_intrinsic_bindless_image_atomic_comp_swap:
2662
case nir_intrinsic_image_deref_atomic_comp_swap:
2663
atomic_name = "cmpswap";
2664
atomic_subop = 0; /* not used */
2665
break;
2666
case nir_intrinsic_bindless_image_atomic_inc_wrap:
2667
case nir_intrinsic_image_deref_atomic_inc_wrap: {
2668
atomic_name = "inc";
2669
atomic_subop = ac_atomic_inc_wrap;
2670
break;
2671
}
2672
case nir_intrinsic_bindless_image_atomic_dec_wrap:
2673
case nir_intrinsic_image_deref_atomic_dec_wrap:
2674
atomic_name = "dec";
2675
atomic_subop = ac_atomic_dec_wrap;
2676
break;
2677
default:
2678
abort();
2679
}
2680
2681
if (cmpswap)
2682
params[param_count++] = get_src(ctx, instr->src[4]);
2683
params[param_count++] = get_src(ctx, instr->src[3]);
2684
2685
LLVMValueRef result;
2686
if (dim == GLSL_SAMPLER_DIM_BUF) {
2687
params[param_count++] = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, true);
2688
params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
2689
ctx->ac.i32_0, ""); /* vindex */
2690
params[param_count++] = ctx->ac.i32_0; /* voffset */
2691
if (cmpswap && instr->dest.ssa.bit_size == 64) {
2692
result = emit_ssbo_comp_swap_64(ctx, params[2], params[3], params[1], params[0], true);
2693
} else {
2694
params[param_count++] = ctx->ac.i32_0; /* soffset */
2695
params[param_count++] = ctx->ac.i32_0; /* slc */
2696
2697
length = snprintf(intrinsic_name, sizeof(intrinsic_name),
2698
"llvm.amdgcn.struct.buffer.atomic.%s.%s", atomic_name,
2699
instr->dest.ssa.bit_size == 64 ? "i64" : "i32");
2700
2701
assert(length < sizeof(intrinsic_name));
2702
result = ac_build_intrinsic(&ctx->ac, intrinsic_name, LLVMTypeOf(params[0]), params, param_count, 0);
2703
}
2704
} else {
2705
struct ac_image_args args = {0};
2706
args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic;
2707
args.atomic = atomic_subop;
2708
args.data[0] = params[0];
2709
if (cmpswap)
2710
args.data[1] = params[1];
2711
args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
2712
get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
2713
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2714
2715
result = ac_build_image_opcode(&ctx->ac, &args);
2716
}
2717
2718
result = exit_waterfall(ctx, &wctx, result);
2719
if (ctx->ac.postponed_kill)
2720
ac_build_endif(&ctx->ac, 7004);
2721
return result;
2722
}
2723
2724
static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
2725
{
2726
struct waterfall_context wctx;
2727
LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2728
LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
2729
2730
LLVMValueRef ret = ac_build_image_get_sample_count(&ctx->ac, rsrc);
2731
2732
return exit_waterfall(ctx, &wctx, ret);
2733
}
2734
2735
static LLVMValueRef visit_image_size(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2736
bool bindless)
2737
{
2738
LLVMValueRef res;
2739
2740
enum glsl_sampler_dim dim;
2741
bool is_array;
2742
if (bindless) {
2743
dim = nir_intrinsic_image_dim(instr);
2744
is_array = nir_intrinsic_image_array(instr);
2745
} else {
2746
const struct glsl_type *type = get_image_deref(instr)->type;
2747
dim = glsl_get_sampler_dim(type);
2748
is_array = glsl_sampler_type_is_array(type);
2749
}
2750
2751
struct waterfall_context wctx;
2752
LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2753
2754
if (dim == GLSL_SAMPLER_DIM_BUF) {
2755
res = get_buffer_size(
2756
ctx, get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false), true);
2757
} else {
2758
2759
struct ac_image_args args = {0};
2760
2761
args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2762
args.dmask = 0xf;
2763
args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
2764
args.opcode = ac_image_get_resinfo;
2765
assert(nir_src_as_uint(instr->src[1]) == 0);
2766
args.lod = ctx->ac.i32_0;
2767
args.attributes = AC_FUNC_ATTR_READNONE;
2768
2769
res = ac_build_image_opcode(&ctx->ac, &args);
2770
2771
LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
2772
2773
if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {
2774
LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
2775
LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
2776
z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
2777
res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
2778
}
2779
2780
if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
2781
LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
2782
res = LLVMBuildInsertElement(ctx->ac.builder, res, layers, ctx->ac.i32_1, "");
2783
}
2784
}
2785
return exit_waterfall(ctx, &wctx, res);
2786
}
2787
2788
static void emit_membar(struct ac_llvm_context *ac, const nir_intrinsic_instr *instr)
2789
{
2790
unsigned wait_flags = 0;
2791
2792
switch (instr->intrinsic) {
2793
case nir_intrinsic_memory_barrier:
2794
case nir_intrinsic_group_memory_barrier:
2795
wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
2796
break;
2797
case nir_intrinsic_memory_barrier_buffer:
2798
case nir_intrinsic_memory_barrier_image:
2799
wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;
2800
break;
2801
case nir_intrinsic_memory_barrier_shared:
2802
wait_flags = AC_WAIT_LGKM;
2803
break;
2804
default:
2805
break;
2806
}
2807
2808
ac_build_waitcnt(ac, wait_flags);
2809
}
2810
2811
void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
2812
{
2813
/* GFX6 only (thanks to a hw bug workaround):
2814
* The real barrier instruction isn’t needed, because an entire patch
2815
* always fits into a single wave.
2816
*/
2817
if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
2818
ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
2819
return;
2820
}
2821
ac_build_s_barrier(ac);
2822
}
2823
2824
static void emit_discard(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
2825
{
2826
LLVMValueRef cond;
2827
2828
if (instr->intrinsic == nir_intrinsic_discard_if ||
2829
instr->intrinsic == nir_intrinsic_terminate_if) {
2830
cond = LLVMBuildNot(ctx->ac.builder, get_src(ctx, instr->src[0]), "");
2831
} else {
2832
assert(instr->intrinsic == nir_intrinsic_discard);
2833
cond = ctx->ac.i1false;
2834
}
2835
2836
ac_build_kill_if_false(&ctx->ac, cond);
2837
}
2838
2839
static void emit_demote(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
2840
{
2841
LLVMValueRef cond;
2842
2843
if (instr->intrinsic == nir_intrinsic_demote_if) {
2844
cond = LLVMBuildNot(ctx->ac.builder, get_src(ctx, instr->src[0]), "");
2845
} else {
2846
assert(instr->intrinsic == nir_intrinsic_demote);
2847
cond = ctx->ac.i1false;
2848
}
2849
2850
if (LLVM_VERSION_MAJOR >= 13) {
2851
/* This demotes the pixel if the condition is false. */
2852
ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.wqm.demote", ctx->ac.voidt, &cond, 1, 0);
2853
return;
2854
}
2855
2856
LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2857
mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, "");
2858
LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill);
2859
2860
if (!ctx->info->fs.needs_all_helper_invocations) {
2861
/* This is an optional optimization that only kills whole inactive quads.
2862
* It's not used when subgroup operations can possibly use all helper
2863
* invocations.
2864
*/
2865
if (ctx->ac.flow->depth == 0) {
2866
ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
2867
} else {
2868
/* amdgcn.wqm.vote doesn't work inside conditional blocks. Here's why.
2869
*
2870
* The problem is that kill(wqm.vote(0)) kills all active threads within
2871
* the block, which breaks the whole quad mode outside the block if
2872
* the conditional block has partially active quads (2x2 pixel blocks).
2873
* E.g. threads 0-3 are active outside the block, but only thread 0 is
2874
* active inside the block. Thread 0 shouldn't be killed by demote,
2875
* because threads 1-3 are still active outside the block.
2876
*
2877
* The fix for amdgcn.wqm.vote would be to return S_WQM((live & ~exec) | cond)
2878
* instead of S_WQM(cond).
2879
*
2880
* The less efficient workaround we do here is to save the kill condition
2881
* to a temporary (postponed_kill) and do kill(wqm.vote(cond)) after we
2882
* exit the conditional block.
2883
*/
2884
ctx->ac.conditional_demote_seen = true;
2885
}
2886
}
2887
}
2888
2889
static LLVMValueRef visit_load_local_invocation_index(struct ac_nir_context *ctx)
2890
{
2891
if (ctx->args->vs_rel_patch_id.used) {
2892
return ac_get_arg(&ctx->ac, ctx->args->vs_rel_patch_id);
2893
} else if (ctx->args->merged_wave_info.used) {
2894
/* Thread ID in threadgroup in merged ESGS. */
2895
LLVMValueRef wave_id = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->merged_wave_info), 24, 4);
2896
LLVMValueRef wave_size = LLVMConstInt(ctx->ac.i32, ctx->ac.wave_size, false);
2897
LLVMValueRef threads_before = LLVMBuildMul(ctx->ac.builder, wave_id, wave_size, "");
2898
return LLVMBuildAdd(ctx->ac.builder, threads_before, ac_get_thread_id(&ctx->ac), "");
2899
}
2900
2901
LLVMValueRef result;
2902
LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
2903
result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
2904
LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
2905
2906
if (ctx->ac.wave_size == 32)
2907
result = LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 1, false), "");
2908
2909
return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
2910
}
2911
2912
static LLVMValueRef visit_load_subgroup_id(struct ac_nir_context *ctx)
2913
{
2914
if (ctx->stage == MESA_SHADER_COMPUTE) {
2915
LLVMValueRef result;
2916
result = LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
2917
LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
2918
return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");
2919
} else {
2920
return LLVMConstInt(ctx->ac.i32, 0, false);
2921
}
2922
}
2923
2924
static LLVMValueRef visit_load_num_subgroups(struct ac_nir_context *ctx)
2925
{
2926
if (ctx->stage == MESA_SHADER_COMPUTE) {
2927
return LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->tg_size),
2928
LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
2929
} else {
2930
return LLVMConstInt(ctx->ac.i32, 1, false);
2931
}
2932
}
2933
2934
static LLVMValueRef visit_first_invocation(struct ac_nir_context *ctx)
2935
{
2936
LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
2937
const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64";
2938
2939
/* The second argument is whether cttz(0) should be defined, but we do not care. */
2940
LLVMValueRef args[] = {active_set, ctx->ac.i1false};
2941
LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr, ctx->ac.iN_wavemask, args, 2,
2942
AC_FUNC_ATTR_NOUNWIND | AC_FUNC_ATTR_READNONE);
2943
2944
return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
2945
}
2946
2947
static LLVMValueRef visit_load_shared(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
2948
{
2949
unsigned alignment = nir_intrinsic_align(instr);
2950
unsigned const_off = nir_intrinsic_base(instr);
2951
2952
LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->dest.ssa.bit_size, const_off);
2953
LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
2954
int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
2955
LLVMValueRef derived_ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(result_type, addr_space), "");
2956
LLVMValueRef ret = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
2957
LLVMSetAlignment(ret, alignment);
2958
2959
return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
2960
}
2961
2962
static void visit_store_shared(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr)
2963
{
2964
LLVMValueRef derived_ptr, data, index;
2965
LLVMBuilderRef builder = ctx->ac.builder;
2966
2967
unsigned const_off = nir_intrinsic_base(instr);
2968
LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1], instr->src[0].ssa->bit_size, const_off);
2969
LLVMValueRef src = get_src(ctx, instr->src[0]);
2970
2971
int writemask = nir_intrinsic_write_mask(instr);
2972
for (int chan = 0; chan < 4; chan++) {
2973
if (!(writemask & (1 << chan))) {
2974
continue;
2975
}
2976
data = ac_llvm_extract_elem(&ctx->ac, src, chan);
2977
index = LLVMConstInt(ctx->ac.i32, chan, 0);
2978
derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
2979
LLVMBuildStore(builder, data, derived_ptr);
2980
}
2981
}
2982
2983
static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx, const nir_intrinsic_instr *instr,
2984
LLVMValueRef ptr, int src_idx)
2985
{
2986
if (ctx->ac.postponed_kill) {
2987
LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
2988
ac_build_ifcc(&ctx->ac, cond, 7005);
2989
}
2990
2991
LLVMValueRef result;
2992
LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
2993
2994
const char *sync_scope = "workgroup-one-as";
2995
2996
if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap) {
2997
LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
2998
result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope);
2999
result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
3000
} else {
3001
LLVMAtomicRMWBinOp op;
3002
switch (instr->intrinsic) {
3003
case nir_intrinsic_shared_atomic_add:
3004
op = LLVMAtomicRMWBinOpAdd;
3005
break;
3006
case nir_intrinsic_shared_atomic_umin:
3007
op = LLVMAtomicRMWBinOpUMin;
3008
break;
3009
case nir_intrinsic_shared_atomic_umax:
3010
op = LLVMAtomicRMWBinOpUMax;
3011
break;
3012
case nir_intrinsic_shared_atomic_imin:
3013
op = LLVMAtomicRMWBinOpMin;
3014
break;
3015
case nir_intrinsic_shared_atomic_imax:
3016
op = LLVMAtomicRMWBinOpMax;
3017
break;
3018
case nir_intrinsic_shared_atomic_and:
3019
op = LLVMAtomicRMWBinOpAnd;
3020
break;
3021
case nir_intrinsic_shared_atomic_or:
3022
op = LLVMAtomicRMWBinOpOr;
3023
break;
3024
case nir_intrinsic_shared_atomic_xor:
3025
op = LLVMAtomicRMWBinOpXor;
3026
break;
3027
case nir_intrinsic_shared_atomic_exchange:
3028
op = LLVMAtomicRMWBinOpXchg;
3029
break;
3030
case nir_intrinsic_shared_atomic_fadd:
3031
op = LLVMAtomicRMWBinOpFAdd;
3032
break;
3033
default:
3034
return NULL;
3035
}
3036
3037
LLVMValueRef val;
3038
3039
if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd) {
3040
val = ac_to_float(&ctx->ac, src);
3041
3042
LLVMTypeRef ptr_type =
3043
LLVMPointerType(LLVMTypeOf(val), LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)));
3044
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type, "");
3045
} else {
3046
val = ac_to_integer(&ctx->ac, src);
3047
}
3048
3049
result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope);
3050
3051
if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd ||
3052
instr->intrinsic == nir_intrinsic_deref_atomic_fadd) {
3053
result = ac_to_integer(&ctx->ac, result);
3054
}
3055
}
3056
3057
if (ctx->ac.postponed_kill)
3058
ac_build_endif(&ctx->ac, 7005);
3059
return result;
3060
}
3061
3062
static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
3063
{
3064
LLVMValueRef values[2];
3065
LLVMValueRef pos[2];
3066
3067
pos[0] = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]));
3068
pos[1] = ac_to_float(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]));
3069
3070
values[0] = ac_build_fract(&ctx->ac, pos[0], 32);
3071
values[1] = ac_build_fract(&ctx->ac, pos[1], 32);
3072
return ac_build_gather_values(&ctx->ac, values, 2);
3073
}
3074
3075
static LLVMValueRef lookup_interp_param(struct ac_nir_context *ctx, enum glsl_interp_mode interp,
3076
unsigned location)
3077
{
3078
switch (interp) {
3079
case INTERP_MODE_FLAT:
3080
default:
3081
return NULL;
3082
case INTERP_MODE_SMOOTH:
3083
case INTERP_MODE_NONE:
3084
if (location == INTERP_CENTER)
3085
return ac_get_arg(&ctx->ac, ctx->args->persp_center);
3086
else if (location == INTERP_CENTROID)
3087
return ctx->abi->persp_centroid;
3088
else if (location == INTERP_SAMPLE)
3089
return ac_get_arg(&ctx->ac, ctx->args->persp_sample);
3090
break;
3091
case INTERP_MODE_NOPERSPECTIVE:
3092
if (location == INTERP_CENTER)
3093
return ac_get_arg(&ctx->ac, ctx->args->linear_center);
3094
else if (location == INTERP_CENTROID)
3095
return ctx->abi->linear_centroid;
3096
else if (location == INTERP_SAMPLE)
3097
return ac_get_arg(&ctx->ac, ctx->args->linear_sample);
3098
break;
3099
}
3100
return NULL;
3101
}
3102
3103
static LLVMValueRef barycentric_center(struct ac_nir_context *ctx, unsigned mode)
3104
{
3105
LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
3106
return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3107
}
3108
3109
static LLVMValueRef barycentric_offset(struct ac_nir_context *ctx, unsigned mode,
3110
LLVMValueRef offset)
3111
{
3112
LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
3113
LLVMValueRef src_c0 =
3114
ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_0, ""));
3115
LLVMValueRef src_c1 =
3116
ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_1, ""));
3117
3118
LLVMValueRef ij_out[2];
3119
LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);
3120
3121
/*
3122
* take the I then J parameters, and the DDX/Y for it, and
3123
* calculate the IJ inputs for the interpolator.
3124
* temp1 = ddx * offset/sample.x + I;
3125
* interp_param.I = ddy * offset/sample.y + temp1;
3126
* temp1 = ddx * offset/sample.x + J;
3127
* interp_param.J = ddy * offset/sample.y + temp1;
3128
*/
3129
for (unsigned i = 0; i < 2; i++) {
3130
LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
3131
LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
3132
LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, ix_ll, "");
3133
LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder, ddxy_out, iy_ll, "");
3134
LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder, interp_param, ix_ll, "");
3135
LLVMValueRef temp1, temp2;
3136
3137
interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el, ctx->ac.f32, "");
3138
3139
temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el);
3140
temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1);
3141
3142
ij_out[i] = LLVMBuildBitCast(ctx->ac.builder, temp2, ctx->ac.i32, "");
3143
}
3144
interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
3145
return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3146
}
3147
3148
static LLVMValueRef barycentric_centroid(struct ac_nir_context *ctx, unsigned mode)
3149
{
3150
LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTROID);
3151
return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3152
}
3153
3154
static LLVMValueRef barycentric_at_sample(struct ac_nir_context *ctx, unsigned mode,
3155
LLVMValueRef sample_id)
3156
{
3157
if (ctx->abi->interp_at_sample_force_center)
3158
return barycentric_center(ctx, mode);
3159
3160
LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
3161
3162
/* fetch sample ID */
3163
LLVMValueRef sample_pos = ctx->abi->load_sample_position(ctx->abi, sample_id);
3164
3165
LLVMValueRef src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_0, "");
3166
src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, "");
3167
LLVMValueRef src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_1, "");
3168
src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
3169
LLVMValueRef coords[] = {src_c0, src_c1};
3170
LLVMValueRef offset = ac_build_gather_values(&ctx->ac, coords, 2);
3171
3172
return barycentric_offset(ctx, mode, offset);
3173
}
3174
3175
static LLVMValueRef barycentric_sample(struct ac_nir_context *ctx, unsigned mode)
3176
{
3177
LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_SAMPLE);
3178
return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3179
}
3180
3181
static LLVMValueRef barycentric_model(struct ac_nir_context *ctx)
3182
{
3183
return LLVMBuildBitCast(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->pull_model),
3184
ctx->ac.v3i32, "");
3185
}
3186
3187
static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx, LLVMValueRef interp_param,
3188
unsigned index, unsigned comp_start,
3189
unsigned num_components, unsigned bitsize,
3190
bool high_16bits)
3191
{
3192
LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
3193
LLVMValueRef interp_param_f;
3194
3195
interp_param_f = LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2f32, "");
3196
LLVMValueRef i = LLVMBuildExtractElement(ctx->ac.builder, interp_param_f, ctx->ac.i32_0, "");
3197
LLVMValueRef j = LLVMBuildExtractElement(ctx->ac.builder, interp_param_f, ctx->ac.i32_1, "");
3198
3199
/* Workaround for issue 2647: kill threads with infinite interpolation coeffs */
3200
if (ctx->verified_interp && !_mesa_hash_table_search(ctx->verified_interp, interp_param)) {
3201
LLVMValueRef args[2];
3202
args[0] = i;
3203
args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false);
3204
LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1, args, 2,
3205
AC_FUNC_ATTR_READNONE);
3206
ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, ""));
3207
_mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param);
3208
}
3209
3210
LLVMValueRef values[4];
3211
assert(bitsize == 16 || bitsize == 32);
3212
for (unsigned comp = 0; comp < num_components; comp++) {
3213
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, comp_start + comp, false);
3214
if (bitsize == 16) {
3215
values[comp] = ac_build_fs_interp_f16(&ctx->ac, llvm_chan, attr_number,
3216
ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j,
3217
high_16bits);
3218
} else {
3219
values[comp] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
3220
ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j);
3221
}
3222
}
3223
3224
return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
3225
}
3226
3227
static LLVMValueRef visit_load(struct ac_nir_context *ctx, nir_intrinsic_instr *instr,
3228
bool is_output)
3229
{
3230
LLVMValueRef values[8];
3231
LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
3232
LLVMTypeRef component_type;
3233
unsigned base = nir_intrinsic_base(instr);
3234
unsigned component = nir_intrinsic_component(instr);
3235
unsigned count = instr->dest.ssa.num_components;
3236
nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
3237
LLVMValueRef vertex_index = vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
3238
nir_src offset = *nir_get_io_offset_src(instr);
3239
LLVMValueRef indir_index = NULL;
3240
3241
switch (instr->dest.ssa.bit_size) {
3242
case 16:
3243
case 32:
3244
break;
3245
case 64:
3246
unreachable("64-bit IO should have been lowered");
3247
return NULL;
3248
default:
3249
unreachable("unhandled load type");
3250
return NULL;
3251
}
3252
3253
if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
3254
component_type = LLVMGetElementType(dest_type);
3255
else
3256
component_type = dest_type;
3257
3258
if (nir_src_is_const(offset))
3259
assert(nir_src_as_uint(offset) == 0);
3260
else
3261
indir_index = get_src(ctx, offset);
3262
3263
if (ctx->stage == MESA_SHADER_TESS_CTRL ||
3264
(ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
3265
bool vertex_index_is_invoc_id =
3266
vertex_index_src &&
3267
vertex_index_src->ssa->parent_instr->type == nir_instr_type_intrinsic &&
3268
nir_instr_as_intrinsic(vertex_index_src->ssa->parent_instr)->intrinsic ==
3269
nir_intrinsic_load_invocation_id;
3270
3271
LLVMValueRef result = ctx->abi->load_tess_varyings(ctx->abi, component_type,
3272
vertex_index, indir_index,
3273
base, component,
3274
count, !is_output,
3275
vertex_index_is_invoc_id);
3276
if (instr->dest.ssa.bit_size == 16) {
3277
result = ac_to_integer(&ctx->ac, result);
3278
result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
3279
}
3280
return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
3281
}
3282
3283
/* No indirect indexing is allowed after this point. */
3284
assert(!indir_index);
3285
3286
if (ctx->stage == MESA_SHADER_GEOMETRY) {
3287
assert(nir_src_is_const(*vertex_index_src));
3288
3289
return ctx->abi->load_inputs(ctx->abi, base, component, count,
3290
nir_src_as_uint(*vertex_index_src), component_type);
3291
}
3292
3293
if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
3294
nir_intrinsic_io_semantics(instr).fb_fetch_output)
3295
return ctx->abi->emit_fbfetch(ctx->abi);
3296
3297
/* Other non-fragment cases have inputs and outputs in temporaries. */
3298
if (ctx->stage != MESA_SHADER_FRAGMENT) {
3299
for (unsigned chan = component; chan < count + component; chan++) {
3300
if (is_output) {
3301
values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->abi->outputs[base * 4 + chan], "");
3302
} else {
3303
values[chan] = ctx->abi->inputs[base * 4 + chan];
3304
if (!values[chan])
3305
values[chan] = LLVMGetUndef(ctx->ac.i32);
3306
}
3307
}
3308
LLVMValueRef result = ac_build_varying_gather_values(&ctx->ac, values, count, component);
3309
return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
3310
}
3311
3312
/* Fragment shader inputs. */
3313
unsigned vertex_id = 2; /* P0 */
3314
3315
if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
3316
nir_const_value *src0 = nir_src_as_const_value(instr->src[0]);
3317
3318
switch (src0[0].i32) {
3319
case 0:
3320
vertex_id = 2;
3321
break;
3322
case 1:
3323
vertex_id = 0;
3324
break;
3325
case 2:
3326
vertex_id = 1;
3327
break;
3328
default:
3329
unreachable("Invalid vertex index");
3330
}
3331
}
3332
3333
LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false);
3334
3335
for (unsigned chan = 0; chan < count; chan++) {
3336
LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false);
3337
values[chan] =
3338
ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, vertex_id, false), llvm_chan,
3339
attr_number, ac_get_arg(&ctx->ac, ctx->args->prim_mask));
3340
values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
3341
if (instr->dest.ssa.bit_size == 16 &&
3342
nir_intrinsic_io_semantics(instr).high_16bits)
3343
values[chan] = LLVMBuildLShr(ctx->ac.builder, values[chan], LLVMConstInt(ctx->ac.i32, 16, 0), "");
3344
values[chan] =
3345
LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan],
3346
instr->dest.ssa.bit_size == 16 ? ctx->ac.i16 : ctx->ac.i32, "");
3347
}
3348
3349
LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, count);
3350
return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
3351
}
3352
3353
static LLVMValueRef
3354
emit_load_frag_shading_rate(struct ac_nir_context *ctx)
3355
{
3356
LLVMValueRef x_rate, y_rate, cond;
3357
3358
/* VRS Rate X = Ancillary[2:3]
3359
* VRS Rate Y = Ancillary[4:5]
3360
*/
3361
x_rate = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 2, 2);
3362
y_rate = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 4, 2);
3363
3364
/* xRate = xRate == 0x1 ? Horizontal2Pixels : None. */
3365
cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, x_rate, ctx->ac.i32_1, "");
3366
x_rate = LLVMBuildSelect(ctx->ac.builder, cond,
3367
LLVMConstInt(ctx->ac.i32, 4, false), ctx->ac.i32_0, "");
3368
3369
/* yRate = yRate == 0x1 ? Vertical2Pixels : None. */
3370
cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, y_rate, ctx->ac.i32_1, "");
3371
y_rate = LLVMBuildSelect(ctx->ac.builder, cond,
3372
LLVMConstInt(ctx->ac.i32, 1, false), ctx->ac.i32_0, "");
3373
3374
return LLVMBuildOr(ctx->ac.builder, x_rate, y_rate, "");
3375
}
3376
3377
static LLVMValueRef
3378
emit_load_frag_coord(struct ac_nir_context *ctx)
3379
{
3380
LLVMValueRef values[4] = {
3381
ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]), ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
3382
ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
3383
ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))};
3384
3385
if (ctx->abi->adjust_frag_coord_z) {
3386
/* Adjust gl_FragCoord.z for VRS due to a hw bug on some GFX10.3 chips. */
3387
LLVMValueRef frag_z = values[2];
3388
3389
/* dFdx fine */
3390
LLVMValueRef adjusted_frag_z = emit_ddxy(ctx, nir_op_fddx_fine, frag_z);
3391
3392
/* adjusted_frag_z * 0.0625 + frag_z */
3393
adjusted_frag_z = LLVMBuildFAdd(ctx->ac.builder, frag_z,
3394
LLVMBuildFMul(ctx->ac.builder, adjusted_frag_z,
3395
LLVMConstReal(ctx->ac.f32, 0.0625), ""), "");
3396
3397
/* VRS Rate X = Ancillary[2:3] */
3398
LLVMValueRef x_rate = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 2, 2);
3399
3400
/* xRate = xRate == 0x1 ? adjusted_frag_z : frag_z. */
3401
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, x_rate, ctx->ac.i32_1, "");
3402
values[2] = LLVMBuildSelect(ctx->ac.builder, cond, adjusted_frag_z, frag_z, "");
3403
}
3404
3405
return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
3406
}
3407
3408
static void visit_intrinsic(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
3409
{
3410
LLVMValueRef result = NULL;
3411
3412
switch (instr->intrinsic) {
3413
case nir_intrinsic_ballot:
3414
result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
3415
if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
3416
result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
3417
break;
3418
case nir_intrinsic_read_invocation:
3419
result =
3420
ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
3421
break;
3422
case nir_intrinsic_read_first_invocation:
3423
result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
3424
break;
3425
case nir_intrinsic_load_subgroup_invocation:
3426
result = ac_get_thread_id(&ctx->ac);
3427
break;
3428
case nir_intrinsic_load_workgroup_id: {
3429
LLVMValueRef values[3];
3430
3431
for (int i = 0; i < 3; i++) {
3432
values[i] = ctx->args->workgroup_ids[i].used
3433
? ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i])
3434
: ctx->ac.i32_0;
3435
}
3436
3437
result = ac_build_gather_values(&ctx->ac, values, 3);
3438
break;
3439
}
3440
case nir_intrinsic_load_base_vertex:
3441
case nir_intrinsic_load_first_vertex:
3442
result = ctx->abi->load_base_vertex(ctx->abi,
3443
instr->intrinsic == nir_intrinsic_load_base_vertex);
3444
break;
3445
case nir_intrinsic_load_workgroup_size:
3446
result = ctx->abi->load_local_group_size(ctx->abi);
3447
break;
3448
case nir_intrinsic_load_vertex_id:
3449
result = LLVMBuildAdd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->vertex_id),
3450
ac_get_arg(&ctx->ac, ctx->args->base_vertex), "");
3451
break;
3452
case nir_intrinsic_load_vertex_id_zero_base: {
3453
result = ctx->abi->vertex_id;
3454
break;
3455
}
3456
case nir_intrinsic_load_local_invocation_id: {
3457
LLVMValueRef ids = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);
3458
3459
if (LLVMGetTypeKind(LLVMTypeOf(ids)) == LLVMIntegerTypeKind) {
3460
/* Thread IDs are packed in VGPR0, 10 bits per component. */
3461
LLVMValueRef id[3];
3462
3463
for (unsigned i = 0; i < 3; i++)
3464
id[i] = ac_unpack_param(&ctx->ac, ids, i * 10, 10);
3465
3466
result = ac_build_gather_values(&ctx->ac, id, 3);
3467
} else {
3468
result = ids;
3469
}
3470
break;
3471
}
3472
case nir_intrinsic_load_base_instance:
3473
result = ac_get_arg(&ctx->ac, ctx->args->start_instance);
3474
break;
3475
case nir_intrinsic_load_draw_id:
3476
result = ac_get_arg(&ctx->ac, ctx->args->draw_id);
3477
break;
3478
case nir_intrinsic_load_view_index:
3479
result = ac_get_arg(&ctx->ac, ctx->args->view_index);
3480
break;
3481
case nir_intrinsic_load_invocation_id:
3482
if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3483
result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 8, 5);
3484
} else {
3485
if (ctx->ac.chip_class >= GFX10) {
3486
result =
3487
LLVMBuildAnd(ctx->ac.builder, ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
3488
LLVMConstInt(ctx->ac.i32, 127, 0), "");
3489
} else {
3490
result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);
3491
}
3492
}
3493
break;
3494
case nir_intrinsic_load_primitive_id:
3495
if (ctx->stage == MESA_SHADER_GEOMETRY) {
3496
result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);
3497
} else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3498
result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);
3499
} else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
3500
result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);
3501
} else
3502
fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
3503
break;
3504
case nir_intrinsic_load_sample_id:
3505
result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->ancillary), 8, 4);
3506
break;
3507
case nir_intrinsic_load_sample_pos:
3508
result = load_sample_pos(ctx);
3509
break;
3510
case nir_intrinsic_load_sample_mask_in:
3511
result = ctx->abi->load_sample_mask_in(ctx->abi);
3512
break;
3513
case nir_intrinsic_load_frag_coord:
3514
result = emit_load_frag_coord(ctx);
3515
break;
3516
case nir_intrinsic_load_frag_shading_rate:
3517
result = emit_load_frag_shading_rate(ctx);
3518
break;
3519
case nir_intrinsic_load_layer_id:
3520
result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
3521
break;
3522
case nir_intrinsic_load_front_face:
3523
result = emit_i2b(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->front_face));
3524
break;
3525
case nir_intrinsic_load_helper_invocation:
3526
result = ac_build_load_helper_invocation(&ctx->ac);
3527
break;
3528
case nir_intrinsic_is_helper_invocation:
3529
result = ac_build_is_helper_invocation(&ctx->ac);
3530
break;
3531
case nir_intrinsic_load_color0:
3532
result = ctx->abi->color0;
3533
break;
3534
case nir_intrinsic_load_color1:
3535
result = ctx->abi->color1;
3536
break;
3537
case nir_intrinsic_load_user_data_amd:
3538
assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
3539
result = ctx->abi->user_data;
3540
break;
3541
case nir_intrinsic_load_instance_id:
3542
result = ctx->abi->instance_id;
3543
break;
3544
case nir_intrinsic_load_num_workgroups:
3545
result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
3546
break;
3547
case nir_intrinsic_load_local_invocation_index:
3548
result = visit_load_local_invocation_index(ctx);
3549
break;
3550
case nir_intrinsic_load_subgroup_id:
3551
result = visit_load_subgroup_id(ctx);
3552
break;
3553
case nir_intrinsic_load_num_subgroups:
3554
result = visit_load_num_subgroups(ctx);
3555
break;
3556
case nir_intrinsic_first_invocation:
3557
result = visit_first_invocation(ctx);
3558
break;
3559
case nir_intrinsic_load_push_constant:
3560
result = visit_load_push_constant(ctx, instr);
3561
break;
3562
case nir_intrinsic_vulkan_resource_index: {
3563
LLVMValueRef index = get_src(ctx, instr->src[0]);
3564
unsigned desc_set = nir_intrinsic_desc_set(instr);
3565
unsigned binding = nir_intrinsic_binding(instr);
3566
3567
result = ctx->abi->load_resource(ctx->abi, index, desc_set, binding);
3568
break;
3569
}
3570
case nir_intrinsic_store_ssbo:
3571
visit_store_ssbo(ctx, instr);
3572
break;
3573
case nir_intrinsic_load_ssbo:
3574
result = visit_load_buffer(ctx, instr);
3575
break;
3576
case nir_intrinsic_load_global:
3577
result = visit_load_global(ctx, instr);
3578
break;
3579
case nir_intrinsic_store_global:
3580
visit_store_global(ctx, instr);
3581
break;
3582
case nir_intrinsic_global_atomic_add:
3583
case nir_intrinsic_global_atomic_imin:
3584
case nir_intrinsic_global_atomic_umin:
3585
case nir_intrinsic_global_atomic_imax:
3586
case nir_intrinsic_global_atomic_umax:
3587
case nir_intrinsic_global_atomic_and:
3588
case nir_intrinsic_global_atomic_or:
3589
case nir_intrinsic_global_atomic_xor:
3590
case nir_intrinsic_global_atomic_exchange:
3591
case nir_intrinsic_global_atomic_comp_swap:
3592
result = visit_global_atomic(ctx, instr);
3593
break;
3594
case nir_intrinsic_ssbo_atomic_add:
3595
case nir_intrinsic_ssbo_atomic_imin:
3596
case nir_intrinsic_ssbo_atomic_umin:
3597
case nir_intrinsic_ssbo_atomic_imax:
3598
case nir_intrinsic_ssbo_atomic_umax:
3599
case nir_intrinsic_ssbo_atomic_and:
3600
case nir_intrinsic_ssbo_atomic_or:
3601
case nir_intrinsic_ssbo_atomic_xor:
3602
case nir_intrinsic_ssbo_atomic_exchange:
3603
case nir_intrinsic_ssbo_atomic_comp_swap:
3604
result = visit_atomic_ssbo(ctx, instr);
3605
break;
3606
case nir_intrinsic_load_ubo:
3607
result = visit_load_ubo_buffer(ctx, instr);
3608
break;
3609
case nir_intrinsic_get_ssbo_size:
3610
result = visit_get_ssbo_size(ctx, instr);
3611
break;
3612
case nir_intrinsic_load_input:
3613
case nir_intrinsic_load_input_vertex:
3614
case nir_intrinsic_load_per_vertex_input:
3615
result = visit_load(ctx, instr, false);
3616
break;
3617
case nir_intrinsic_load_output:
3618
case nir_intrinsic_load_per_vertex_output:
3619
result = visit_load(ctx, instr, true);
3620
break;
3621
case nir_intrinsic_store_output:
3622
case nir_intrinsic_store_per_vertex_output:
3623
visit_store_output(ctx, instr);
3624
break;
3625
case nir_intrinsic_load_shared:
3626
result = visit_load_shared(ctx, instr);
3627
break;
3628
case nir_intrinsic_store_shared:
3629
visit_store_shared(ctx, instr);
3630
break;
3631
case nir_intrinsic_bindless_image_samples:
3632
case nir_intrinsic_image_deref_samples:
3633
result = visit_image_samples(ctx, instr);
3634
break;
3635
case nir_intrinsic_bindless_image_load:
3636
result = visit_image_load(ctx, instr, true);
3637
break;
3638
case nir_intrinsic_image_deref_load:
3639
case nir_intrinsic_image_deref_sparse_load:
3640
result = visit_image_load(ctx, instr, false);
3641
break;
3642
case nir_intrinsic_bindless_image_store:
3643
visit_image_store(ctx, instr, true);
3644
break;
3645
case nir_intrinsic_image_deref_store:
3646
visit_image_store(ctx, instr, false);
3647
break;
3648
case nir_intrinsic_bindless_image_atomic_add:
3649
case nir_intrinsic_bindless_image_atomic_imin:
3650
case nir_intrinsic_bindless_image_atomic_umin:
3651
case nir_intrinsic_bindless_image_atomic_imax:
3652
case nir_intrinsic_bindless_image_atomic_umax:
3653
case nir_intrinsic_bindless_image_atomic_and:
3654
case nir_intrinsic_bindless_image_atomic_or:
3655
case nir_intrinsic_bindless_image_atomic_xor:
3656
case nir_intrinsic_bindless_image_atomic_exchange:
3657
case nir_intrinsic_bindless_image_atomic_comp_swap:
3658
case nir_intrinsic_bindless_image_atomic_inc_wrap:
3659
case nir_intrinsic_bindless_image_atomic_dec_wrap:
3660
result = visit_image_atomic(ctx, instr, true);
3661
break;
3662
case nir_intrinsic_image_deref_atomic_add:
3663
case nir_intrinsic_image_deref_atomic_imin:
3664
case nir_intrinsic_image_deref_atomic_umin:
3665
case nir_intrinsic_image_deref_atomic_imax:
3666
case nir_intrinsic_image_deref_atomic_umax:
3667
case nir_intrinsic_image_deref_atomic_and:
3668
case nir_intrinsic_image_deref_atomic_or:
3669
case nir_intrinsic_image_deref_atomic_xor:
3670
case nir_intrinsic_image_deref_atomic_exchange:
3671
case nir_intrinsic_image_deref_atomic_comp_swap:
3672
case nir_intrinsic_image_deref_atomic_inc_wrap:
3673
case nir_intrinsic_image_deref_atomic_dec_wrap:
3674
result = visit_image_atomic(ctx, instr, false);
3675
break;
3676
case nir_intrinsic_bindless_image_size:
3677
result = visit_image_size(ctx, instr, true);
3678
break;
3679
case nir_intrinsic_image_deref_size:
3680
result = visit_image_size(ctx, instr, false);
3681
break;
3682
case nir_intrinsic_shader_clock:
3683
result = ac_build_shader_clock(&ctx->ac, nir_intrinsic_memory_scope(instr));
3684
break;
3685
case nir_intrinsic_discard:
3686
case nir_intrinsic_discard_if:
3687
case nir_intrinsic_terminate:
3688
case nir_intrinsic_terminate_if:
3689
emit_discard(ctx, instr);
3690
break;
3691
case nir_intrinsic_demote:
3692
case nir_intrinsic_demote_if:
3693
emit_demote(ctx, instr);
3694
break;
3695
case nir_intrinsic_memory_barrier:
3696
case nir_intrinsic_group_memory_barrier:
3697
case nir_intrinsic_memory_barrier_buffer:
3698
case nir_intrinsic_memory_barrier_image:
3699
case nir_intrinsic_memory_barrier_shared:
3700
emit_membar(&ctx->ac, instr);
3701
break;
3702
case nir_intrinsic_scoped_barrier: {
3703
assert(!(nir_intrinsic_memory_semantics(instr) &
3704
(NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
3705
3706
nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
3707
3708
unsigned wait_flags = 0;
3709
if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
3710
wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
3711
if (modes & nir_var_mem_shared)
3712
wait_flags |= AC_WAIT_LGKM;
3713
3714
if (wait_flags)
3715
ac_build_waitcnt(&ctx->ac, wait_flags);
3716
3717
if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
3718
ac_emit_barrier(&ctx->ac, ctx->stage);
3719
break;
3720
}
3721
case nir_intrinsic_memory_barrier_tcs_patch:
3722
break;
3723
case nir_intrinsic_control_barrier:
3724
ac_emit_barrier(&ctx->ac, ctx->stage);
3725
break;
3726
case nir_intrinsic_shared_atomic_add:
3727
case nir_intrinsic_shared_atomic_imin:
3728
case nir_intrinsic_shared_atomic_umin:
3729
case nir_intrinsic_shared_atomic_imax:
3730
case nir_intrinsic_shared_atomic_umax:
3731
case nir_intrinsic_shared_atomic_and:
3732
case nir_intrinsic_shared_atomic_or:
3733
case nir_intrinsic_shared_atomic_xor:
3734
case nir_intrinsic_shared_atomic_exchange:
3735
case nir_intrinsic_shared_atomic_comp_swap:
3736
case nir_intrinsic_shared_atomic_fadd: {
3737
LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0], instr->src[1].ssa->bit_size, 0);
3738
result = visit_var_atomic(ctx, instr, ptr, 1);
3739
break;
3740
}
3741
case nir_intrinsic_deref_atomic_add:
3742
case nir_intrinsic_deref_atomic_imin:
3743
case nir_intrinsic_deref_atomic_umin:
3744
case nir_intrinsic_deref_atomic_imax:
3745
case nir_intrinsic_deref_atomic_umax:
3746
case nir_intrinsic_deref_atomic_and:
3747
case nir_intrinsic_deref_atomic_or:
3748
case nir_intrinsic_deref_atomic_xor:
3749
case nir_intrinsic_deref_atomic_exchange:
3750
case nir_intrinsic_deref_atomic_comp_swap:
3751
case nir_intrinsic_deref_atomic_fadd: {
3752
LLVMValueRef ptr = get_src(ctx, instr->src[0]);
3753
result = visit_var_atomic(ctx, instr, ptr, 1);
3754
break;
3755
}
3756
case nir_intrinsic_load_barycentric_pixel:
3757
result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
3758
break;
3759
case nir_intrinsic_load_barycentric_centroid:
3760
result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
3761
break;
3762
case nir_intrinsic_load_barycentric_sample:
3763
result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
3764
break;
3765
case nir_intrinsic_load_barycentric_model:
3766
result = barycentric_model(ctx);
3767
break;
3768
case nir_intrinsic_load_barycentric_at_offset: {
3769
LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
3770
result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
3771
break;
3772
}
3773
case nir_intrinsic_load_barycentric_at_sample: {
3774
LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
3775
result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
3776
break;
3777
}
3778
case nir_intrinsic_load_interpolated_input: {
3779
/* We assume any indirect loads have been lowered away */
3780
ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
3781
assert(offset);
3782
assert(offset[0].i32 == 0);
3783
3784
LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
3785
unsigned index = nir_intrinsic_base(instr);
3786
unsigned component = nir_intrinsic_component(instr);
3787
result = load_interpolated_input(ctx, interp_param, index, component,
3788
instr->dest.ssa.num_components, instr->dest.ssa.bit_size,
3789
nir_intrinsic_io_semantics(instr).high_16bits);
3790
break;
3791
}
3792
case nir_intrinsic_emit_vertex:
3793
ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
3794
break;
3795
case nir_intrinsic_emit_vertex_with_counter: {
3796
unsigned stream = nir_intrinsic_stream_id(instr);
3797
LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
3798
ctx->abi->emit_vertex_with_counter(ctx->abi, stream, next_vertex, ctx->abi->outputs);
3799
break;
3800
}
3801
case nir_intrinsic_end_primitive:
3802
case nir_intrinsic_end_primitive_with_counter:
3803
ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
3804
break;
3805
case nir_intrinsic_load_tess_coord:
3806
result = ctx->abi->load_tess_coord(ctx->abi);
3807
break;
3808
case nir_intrinsic_load_tess_level_outer:
3809
result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
3810
break;
3811
case nir_intrinsic_load_tess_level_inner:
3812
result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
3813
break;
3814
case nir_intrinsic_load_tess_level_outer_default:
3815
result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
3816
break;
3817
case nir_intrinsic_load_tess_level_inner_default:
3818
result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
3819
break;
3820
case nir_intrinsic_load_patch_vertices_in:
3821
result = ctx->abi->load_patch_vertices_in(ctx->abi);
3822
break;
3823
case nir_intrinsic_load_tess_rel_patch_id_amd:
3824
if (ctx->stage == MESA_SHADER_TESS_CTRL)
3825
result = ac_unpack_param(&ctx->ac, ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids), 0, 8);
3826
else if (ctx->stage == MESA_SHADER_TESS_EVAL)
3827
result = ac_get_arg(&ctx->ac, ctx->args->tes_rel_patch_id);
3828
else
3829
unreachable("tess_rel_patch_id_amd is only supported by tessellation shaders");
3830
break;
3831
case nir_intrinsic_load_ring_tess_factors_amd:
3832
result = ctx->abi->load_ring_tess_factors(ctx->abi);
3833
break;
3834
case nir_intrinsic_load_ring_tess_factors_offset_amd:
3835
result = ac_get_arg(&ctx->ac, ctx->args->tcs_factor_offset);
3836
break;
3837
case nir_intrinsic_load_ring_tess_offchip_amd:
3838
result = ctx->abi->load_ring_tess_offchip(ctx->abi);
3839
break;
3840
case nir_intrinsic_load_ring_tess_offchip_offset_amd:
3841
result = ac_get_arg(&ctx->ac, ctx->args->tess_offchip_offset);
3842
break;
3843
case nir_intrinsic_load_ring_esgs_amd:
3844
result = ctx->abi->load_ring_esgs(ctx->abi);
3845
break;
3846
case nir_intrinsic_load_ring_es2gs_offset_amd:
3847
result = ac_get_arg(&ctx->ac, ctx->args->es2gs_offset);
3848
break;
3849
case nir_intrinsic_load_gs_vertex_offset_amd:
3850
result = ac_get_arg(&ctx->ac, ctx->args->gs_vtx_offset[nir_intrinsic_base(instr)]);
3851
break;
3852
case nir_intrinsic_vote_all: {
3853
result = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
3854
break;
3855
}
3856
case nir_intrinsic_vote_any: {
3857
result = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
3858
break;
3859
}
3860
case nir_intrinsic_shuffle:
3861
if (ctx->ac.chip_class == GFX8 || ctx->ac.chip_class == GFX9 ||
3862
(ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
3863
result =
3864
ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
3865
} else {
3866
LLVMValueRef src = get_src(ctx, instr->src[0]);
3867
LLVMValueRef index = get_src(ctx, instr->src[1]);
3868
LLVMTypeRef type = LLVMTypeOf(src);
3869
struct waterfall_context wctx;
3870
LLVMValueRef index_val;
3871
3872
index_val = enter_waterfall(ctx, &wctx, index, true);
3873
3874
src = LLVMBuildZExt(ctx->ac.builder, src, ctx->ac.i32, "");
3875
3876
result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane", ctx->ac.i32,
3877
(LLVMValueRef[]){src, index_val}, 2,
3878
AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_CONVERGENT);
3879
3880
result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
3881
3882
result = exit_waterfall(ctx, &wctx, result);
3883
}
3884
break;
3885
case nir_intrinsic_reduce:
3886
result = ac_build_reduce(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0],
3887
instr->const_index[1]);
3888
break;
3889
case nir_intrinsic_inclusive_scan:
3890
result =
3891
ac_build_inclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
3892
break;
3893
case nir_intrinsic_exclusive_scan:
3894
result =
3895
ac_build_exclusive_scan(&ctx->ac, get_src(ctx, instr->src[0]), instr->const_index[0]);
3896
break;
3897
case nir_intrinsic_quad_broadcast: {
3898
unsigned lane = nir_src_as_uint(instr->src[1]);
3899
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), lane, lane, lane, lane);
3900
break;
3901
}
3902
case nir_intrinsic_quad_swap_horizontal:
3903
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3, 2);
3904
break;
3905
case nir_intrinsic_quad_swap_vertical:
3906
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0, 1);
3907
break;
3908
case nir_intrinsic_quad_swap_diagonal:
3909
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1, 0);
3910
break;
3911
case nir_intrinsic_quad_swizzle_amd: {
3912
uint32_t mask = nir_intrinsic_swizzle_mask(instr);
3913
result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask & 0x3,
3914
(mask >> 2) & 0x3, (mask >> 4) & 0x3, (mask >> 6) & 0x3);
3915
break;
3916
}
3917
case nir_intrinsic_masked_swizzle_amd: {
3918
uint32_t mask = nir_intrinsic_swizzle_mask(instr);
3919
result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
3920
break;
3921
}
3922
case nir_intrinsic_write_invocation_amd:
3923
result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
3924
get_src(ctx, instr->src[1]), get_src(ctx, instr->src[2]));
3925
break;
3926
case nir_intrinsic_mbcnt_amd:
3927
result = ac_build_mbcnt_add(&ctx->ac, get_src(ctx, instr->src[0]), get_src(ctx, instr->src[1]));
3928
break;
3929
case nir_intrinsic_load_scratch: {
3930
LLVMValueRef offset = get_src(ctx, instr->src[0]);
3931
LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);
3932
LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
3933
LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
3934
? comp_type
3935
: LLVMVectorType(comp_type, instr->dest.ssa.num_components);
3936
unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
3937
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
3938
result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
3939
break;
3940
}
3941
case nir_intrinsic_store_scratch: {
3942
LLVMValueRef offset = get_src(ctx, instr->src[1]);
3943
LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch, offset);
3944
LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
3945
unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
3946
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(comp_type, addr_space), "");
3947
LLVMValueRef src = get_src(ctx, instr->src[0]);
3948
unsigned wrmask = nir_intrinsic_write_mask(instr);
3949
while (wrmask) {
3950
int start, count;
3951
u_bit_scan_consecutive_range(&wrmask, &start, &count);
3952
3953
LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
3954
LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
3955
LLVMTypeRef vec_type = count == 1 ? comp_type : LLVMVectorType(comp_type, count);
3956
offset_ptr = LLVMBuildBitCast(ctx->ac.builder, offset_ptr,
3957
LLVMPointerType(vec_type, addr_space), "");
3958
LLVMValueRef offset_src = ac_extract_components(&ctx->ac, src, start, count);
3959
LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
3960
}
3961
break;
3962
}
3963
case nir_intrinsic_load_constant: {
3964
unsigned base = nir_intrinsic_base(instr);
3965
unsigned range = nir_intrinsic_range(instr);
3966
3967
LLVMValueRef offset = get_src(ctx, instr->src[0]);
3968
offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, base, false), "");
3969
3970
/* Clamp the offset to avoid out-of-bound access because global
3971
* instructions can't handle them.
3972
*/
3973
LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false);
3974
LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
3975
offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, "");
3976
3977
LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data, offset);
3978
LLVMTypeRef comp_type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
3979
LLVMTypeRef vec_type = instr->dest.ssa.num_components == 1
3980
? comp_type
3981
: LLVMVectorType(comp_type, instr->dest.ssa.num_components);
3982
unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
3983
ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, LLVMPointerType(vec_type, addr_space), "");
3984
result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
3985
break;
3986
}
3987
case nir_intrinsic_set_vertex_and_primitive_count:
3988
/* Currently ignored. */
3989
break;
3990
case nir_intrinsic_load_buffer_amd: {
3991
LLVMValueRef descriptor = get_src(ctx, instr->src[0]);
3992
LLVMValueRef addr_voffset = get_src(ctx, instr->src[1]);
3993
LLVMValueRef addr_soffset = get_src(ctx, instr->src[2]);
3994
unsigned num_components = instr->dest.ssa.num_components;
3995
unsigned const_offset = nir_intrinsic_base(instr);
3996
bool swizzled = nir_intrinsic_is_swizzled(instr);
3997
bool reorder = nir_intrinsic_can_reorder(instr);
3998
bool slc = nir_intrinsic_slc_amd(instr);
3999
4000
enum ac_image_cache_policy cache_policy = ac_glc;
4001
if (swizzled)
4002
cache_policy |= ac_swizzled;
4003
if (slc)
4004
cache_policy |= ac_slc;
4005
if (ctx->ac.chip_class >= GFX10)
4006
cache_policy |= ac_dlc;
4007
4008
LLVMTypeRef channel_type;
4009
if (instr->dest.ssa.bit_size == 8)
4010
channel_type = ctx->ac.i8;
4011
else if (instr->dest.ssa.bit_size == 16)
4012
channel_type = ctx->ac.i16;
4013
else if (instr->dest.ssa.bit_size == 32)
4014
channel_type = ctx->ac.i32;
4015
else if (instr->dest.ssa.bit_size == 64)
4016
channel_type = ctx->ac.i64;
4017
else if (instr->dest.ssa.bit_size == 128)
4018
channel_type = ctx->ac.i128;
4019
else
4020
unreachable("Unsupported channel type for load_buffer_amd");
4021
4022
result = ac_build_buffer_load(&ctx->ac, descriptor, num_components, NULL,
4023
addr_voffset, addr_soffset, const_offset,
4024
channel_type, cache_policy, reorder, false);
4025
result = ac_to_integer(&ctx->ac, ac_trim_vector(&ctx->ac, result, num_components));
4026
break;
4027
}
4028
case nir_intrinsic_store_buffer_amd: {
4029
LLVMValueRef store_data = get_src(ctx, instr->src[0]);
4030
LLVMValueRef descriptor = get_src(ctx, instr->src[1]);
4031
LLVMValueRef addr_voffset = get_src(ctx, instr->src[2]);
4032
LLVMValueRef addr_soffset = get_src(ctx, instr->src[3]);
4033
unsigned num_components = instr->src[0].ssa->num_components;
4034
unsigned const_offset = nir_intrinsic_base(instr);
4035
bool swizzled = nir_intrinsic_is_swizzled(instr);
4036
bool slc = nir_intrinsic_slc_amd(instr);
4037
4038
enum ac_image_cache_policy cache_policy = ac_glc;
4039
if (swizzled)
4040
cache_policy |= ac_swizzled;
4041
if (slc)
4042
cache_policy |= ac_slc;
4043
4044
ac_build_buffer_store_dword(&ctx->ac, descriptor, store_data, num_components,
4045
addr_voffset, addr_soffset, const_offset,
4046
cache_policy);
4047
break;
4048
}
4049
default:
4050
fprintf(stderr, "Unknown intrinsic: ");
4051
nir_print_instr(&instr->instr, stderr);
4052
fprintf(stderr, "\n");
4053
abort();
4054
break;
4055
}
4056
if (result) {
4057
ctx->ssa_defs[instr->dest.ssa.index] = result;
4058
}
4059
}
4060
4061
static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx, unsigned base_index,
4062
unsigned constant_index,
4063
LLVMValueRef dynamic_index)
4064
{
4065
LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0);
4066
LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
4067
LLVMConstInt(ctx->ac.i32, constant_index, 0), "");
4068
4069
/* Bindless uniforms are 64bit so multiple index by 8 */
4070
index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), "");
4071
offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
4072
4073
LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, 0, 0, false, ctx->ac.i32_0);
4074
4075
LLVMValueRef ret =
4076
ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset, NULL, 0, ctx->ac.f32, 0, true, true);
4077
4078
return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
4079
}
4080
4081
struct sampler_desc_address {
4082
unsigned descriptor_set;
4083
unsigned base_index; /* binding in vulkan */
4084
unsigned constant_index;
4085
LLVMValueRef dynamic_index;
4086
bool image;
4087
bool bindless;
4088
};
4089
4090
static struct sampler_desc_address get_sampler_desc_internal(struct ac_nir_context *ctx,
4091
nir_deref_instr *deref_instr,
4092
const nir_instr *instr, bool image)
4093
{
4094
LLVMValueRef index = NULL;
4095
unsigned constant_index = 0;
4096
unsigned descriptor_set;
4097
unsigned base_index;
4098
bool bindless = false;
4099
4100
if (!deref_instr) {
4101
descriptor_set = 0;
4102
if (image) {
4103
nir_intrinsic_instr *img_instr = nir_instr_as_intrinsic(instr);
4104
base_index = 0;
4105
bindless = true;
4106
index = get_src(ctx, img_instr->src[0]);
4107
} else {
4108
nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
4109
int sampSrcIdx = nir_tex_instr_src_index(tex_instr, nir_tex_src_sampler_handle);
4110
if (sampSrcIdx != -1) {
4111
base_index = 0;
4112
bindless = true;
4113
index = get_src(ctx, tex_instr->src[sampSrcIdx].src);
4114
} else {
4115
assert(tex_instr && !image);
4116
base_index = tex_instr->sampler_index;
4117
}
4118
}
4119
} else {
4120
while (deref_instr->deref_type != nir_deref_type_var) {
4121
if (deref_instr->deref_type == nir_deref_type_array) {
4122
unsigned array_size = glsl_get_aoa_size(deref_instr->type);
4123
if (!array_size)
4124
array_size = 1;
4125
4126
if (nir_src_is_const(deref_instr->arr.index)) {
4127
constant_index += array_size * nir_src_as_uint(deref_instr->arr.index);
4128
} else {
4129
LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
4130
4131
indirect = LLVMBuildMul(ctx->ac.builder, indirect,
4132
LLVMConstInt(ctx->ac.i32, array_size, false), "");
4133
4134
if (!index)
4135
index = indirect;
4136
else
4137
index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
4138
}
4139
4140
deref_instr = nir_src_as_deref(deref_instr->parent);
4141
} else if (deref_instr->deref_type == nir_deref_type_struct) {
4142
unsigned sidx = deref_instr->strct.index;
4143
deref_instr = nir_src_as_deref(deref_instr->parent);
4144
constant_index += glsl_get_struct_location_offset(deref_instr->type, sidx);
4145
} else {
4146
unreachable("Unsupported deref type");
4147
}
4148
}
4149
descriptor_set = deref_instr->var->data.descriptor_set;
4150
4151
if (deref_instr->var->data.bindless) {
4152
/* For now just assert on unhandled variable types */
4153
assert(deref_instr->var->data.mode == nir_var_uniform);
4154
4155
base_index = deref_instr->var->data.driver_location;
4156
bindless = true;
4157
4158
index = index ? index : ctx->ac.i32_0;
4159
index = get_bindless_index_from_uniform(ctx, base_index, constant_index, index);
4160
} else
4161
base_index = deref_instr->var->data.binding;
4162
}
4163
return (struct sampler_desc_address){
4164
.descriptor_set = descriptor_set,
4165
.base_index = base_index,
4166
.constant_index = constant_index,
4167
.dynamic_index = index,
4168
.image = image,
4169
.bindless = bindless,
4170
};
4171
}
4172
4173
/* Extract any possibly divergent index into a separate value that can be fed
4174
* into get_sampler_desc with the same arguments. */
4175
static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
4176
const nir_instr *instr, bool image)
4177
{
4178
struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
4179
return addr.dynamic_index;
4180
}
4181
4182
static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx, nir_deref_instr *deref_instr,
4183
enum ac_descriptor_type desc_type, const nir_instr *instr,
4184
LLVMValueRef index, bool image, bool write)
4185
{
4186
struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
4187
return ctx->abi->load_sampler_desc(ctx->abi, addr.descriptor_set, addr.base_index,
4188
addr.constant_index, index, desc_type, addr.image, write,
4189
addr.bindless);
4190
}
4191
4192
/* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
4193
*
4194
* GFX6-GFX7:
4195
* If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
4196
* filtering manually. The driver sets img7 to a mask clearing
4197
* MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
4198
* s_and_b32 samp0, samp0, img7
4199
*
4200
* GFX8:
4201
* The ANISO_OVERRIDE sampler field enables this fix in TA.
4202
*/
4203
static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx, LLVMValueRef res,
4204
LLVMValueRef samp)
4205
{
4206
LLVMBuilderRef builder = ctx->ac.builder;
4207
LLVMValueRef img7, samp0;
4208
4209
if (ctx->ac.chip_class >= GFX8)
4210
return samp;
4211
4212
img7 = LLVMBuildExtractElement(builder, res, LLVMConstInt(ctx->ac.i32, 7, 0), "");
4213
samp0 = LLVMBuildExtractElement(builder, samp, LLVMConstInt(ctx->ac.i32, 0, 0), "");
4214
samp0 = LLVMBuildAnd(builder, samp0, img7, "");
4215
return LLVMBuildInsertElement(builder, samp, samp0, LLVMConstInt(ctx->ac.i32, 0, 0), "");
4216
}
4217
4218
static void tex_fetch_ptrs(struct ac_nir_context *ctx, nir_tex_instr *instr,
4219
struct waterfall_context *wctx, LLVMValueRef *res_ptr,
4220
LLVMValueRef *samp_ptr, LLVMValueRef *fmask_ptr)
4221
{
4222
nir_deref_instr *texture_deref_instr = NULL;
4223
nir_deref_instr *sampler_deref_instr = NULL;
4224
int plane = -1;
4225
4226
for (unsigned i = 0; i < instr->num_srcs; i++) {
4227
switch (instr->src[i].src_type) {
4228
case nir_tex_src_texture_deref:
4229
texture_deref_instr = nir_src_as_deref(instr->src[i].src);
4230
break;
4231
case nir_tex_src_sampler_deref:
4232
sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
4233
break;
4234
case nir_tex_src_plane:
4235
plane = nir_src_as_int(instr->src[i].src);
4236
break;
4237
default:
4238
break;
4239
}
4240
}
4241
4242
LLVMValueRef texture_dynamic_index =
4243
get_sampler_desc_index(ctx, texture_deref_instr, &instr->instr, false);
4244
if (!sampler_deref_instr)
4245
sampler_deref_instr = texture_deref_instr;
4246
4247
LLVMValueRef sampler_dynamic_index =
4248
get_sampler_desc_index(ctx, sampler_deref_instr, &instr->instr, false);
4249
if (instr->texture_non_uniform)
4250
texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, true);
4251
4252
if (instr->sampler_non_uniform)
4253
sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, true);
4254
4255
enum ac_descriptor_type main_descriptor =
4256
instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
4257
4258
if (plane >= 0) {
4259
assert(instr->op != nir_texop_txf_ms && instr->op != nir_texop_samples_identical);
4260
assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF);
4261
4262
main_descriptor = AC_DESC_PLANE_0 + plane;
4263
}
4264
4265
if (instr->op == nir_texop_fragment_mask_fetch) {
4266
/* The fragment mask is fetched from the compressed
4267
* multisampled surface.
4268
*/
4269
main_descriptor = AC_DESC_FMASK;
4270
}
4271
4272
*res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr,
4273
texture_dynamic_index, false, false);
4274
4275
if (samp_ptr) {
4276
*samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr,
4277
sampler_dynamic_index, false, false);
4278
if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
4279
*samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
4280
}
4281
if (fmask_ptr && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_samples_identical))
4282
*fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK, &instr->instr,
4283
texture_dynamic_index, false, false);
4284
}
4285
4286
static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx, LLVMValueRef coord)
4287
{
4288
coord = ac_to_float(ctx, coord);
4289
coord = ac_build_round(ctx, coord);
4290
coord = ac_to_integer(ctx, coord);
4291
return coord;
4292
}
4293
4294
static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
4295
{
4296
LLVMValueRef result = NULL;
4297
struct ac_image_args args = {0};
4298
LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
4299
LLVMValueRef ddx = NULL, ddy = NULL;
4300
unsigned offset_src = 0;
4301
struct waterfall_context wctx[2] = {{{0}}};
4302
4303
tex_fetch_ptrs(ctx, instr, wctx, &args.resource, &args.sampler, &fmask_ptr);
4304
4305
for (unsigned i = 0; i < instr->num_srcs; i++) {
4306
switch (instr->src[i].src_type) {
4307
case nir_tex_src_coord: {
4308
LLVMValueRef coord = get_src(ctx, instr->src[i].src);
4309
args.a16 = instr->src[i].src.ssa->bit_size == 16;
4310
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
4311
args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
4312
break;
4313
}
4314
case nir_tex_src_projector:
4315
break;
4316
case nir_tex_src_comparator:
4317
if (instr->is_shadow) {
4318
args.compare = get_src(ctx, instr->src[i].src);
4319
args.compare = ac_to_float(&ctx->ac, args.compare);
4320
assert(instr->src[i].src.ssa->bit_size == 32);
4321
}
4322
break;
4323
case nir_tex_src_offset:
4324
args.offset = get_src(ctx, instr->src[i].src);
4325
offset_src = i;
4326
/* We pack it with bit shifts, so we need it to be 32-bit. */
4327
assert(ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.offset)) == 32);
4328
break;
4329
case nir_tex_src_bias:
4330
args.bias = get_src(ctx, instr->src[i].src);
4331
assert(ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.bias)) == 32);
4332
break;
4333
case nir_tex_src_lod:
4334
if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
4335
args.level_zero = true;
4336
else
4337
args.lod = get_src(ctx, instr->src[i].src);
4338
break;
4339
case nir_tex_src_ms_index:
4340
sample_index = get_src(ctx, instr->src[i].src);
4341
break;
4342
case nir_tex_src_ms_mcs:
4343
break;
4344
case nir_tex_src_ddx:
4345
ddx = get_src(ctx, instr->src[i].src);
4346
args.g16 = instr->src[i].src.ssa->bit_size == 16;
4347
break;
4348
case nir_tex_src_ddy:
4349
ddy = get_src(ctx, instr->src[i].src);
4350
assert(LLVMTypeOf(ddy) == LLVMTypeOf(ddx));
4351
break;
4352
case nir_tex_src_min_lod:
4353
args.min_lod = get_src(ctx, instr->src[i].src);
4354
break;
4355
case nir_tex_src_texture_offset:
4356
case nir_tex_src_sampler_offset:
4357
case nir_tex_src_plane:
4358
default:
4359
break;
4360
}
4361
}
4362
4363
if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4364
result = get_buffer_size(ctx, args.resource, true);
4365
goto write_result;
4366
}
4367
4368
if (instr->op == nir_texop_texture_samples) {
4369
LLVMValueRef res, samples, is_msaa;
4370
LLVMValueRef default_sample;
4371
4372
res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
4373
samples =
4374
LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 3, false), "");
4375
is_msaa = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 28, false), "");
4376
is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa, LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4377
is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
4378
LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4379
4380
samples = LLVMBuildLShr(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 16, false), "");
4381
samples = LLVMBuildAnd(ctx->ac.builder, samples, LLVMConstInt(ctx->ac.i32, 0xf, false), "");
4382
samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1, samples, "");
4383
4384
if (ctx->abi->robust_buffer_access) {
4385
LLVMValueRef dword1, is_null_descriptor;
4386
4387
/* Extract the second dword of the descriptor, if it's
4388
* all zero, then it's a null descriptor.
4389
*/
4390
dword1 =
4391
LLVMBuildExtractElement(ctx->ac.builder, res, LLVMConstInt(ctx->ac.i32, 1, false), "");
4392
is_null_descriptor = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1,
4393
LLVMConstInt(ctx->ac.i32, 0, false), "");
4394
default_sample =
4395
LLVMBuildSelect(ctx->ac.builder, is_null_descriptor, ctx->ac.i32_0, ctx->ac.i32_1, "");
4396
} else {
4397
default_sample = ctx->ac.i32_1;
4398
}
4399
4400
samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples, default_sample, "");
4401
result = samples;
4402
goto write_result;
4403
}
4404
4405
if (args.offset && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
4406
LLVMValueRef offset[3], pack;
4407
for (unsigned chan = 0; chan < 3; ++chan)
4408
offset[chan] = ctx->ac.i32_0;
4409
4410
unsigned num_components = ac_get_llvm_num_components(args.offset);
4411
for (unsigned chan = 0; chan < num_components; chan++) {
4412
offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan);
4413
offset[chan] =
4414
LLVMBuildAnd(ctx->ac.builder, offset[chan], LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
4415
if (chan)
4416
offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
4417
LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
4418
}
4419
pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
4420
pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
4421
args.offset = pack;
4422
}
4423
4424
/* Section 8.23.1 (Depth Texture Comparison Mode) of the
4425
* OpenGL 4.5 spec says:
4426
*
4427
* "If the texture’s internal format indicates a fixed-point
4428
* depth texture, then D_t and D_ref are clamped to the
4429
* range [0, 1]; otherwise no clamping is performed."
4430
*
4431
* TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
4432
* so the depth comparison value isn't clamped for Z16 and
4433
* Z24 anymore. Do it manually here for GFX8-9; GFX10 has
4434
* an explicitly clamped 32-bit float format.
4435
*/
4436
if (args.compare && ctx->ac.chip_class >= GFX8 && ctx->ac.chip_class <= GFX9 &&
4437
ctx->abi->clamp_shadow_reference) {
4438
LLVMValueRef upgraded, clamped;
4439
4440
upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
4441
LLVMConstInt(ctx->ac.i32, 3, false), "");
4442
upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded, LLVMConstInt(ctx->ac.i32, 29, false), "");
4443
upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->ac.i1, "");
4444
clamped = ac_build_clamp(&ctx->ac, args.compare);
4445
args.compare = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped, args.compare, "");
4446
}
4447
4448
/* pack derivatives */
4449
if (ddx || ddy) {
4450
int num_src_deriv_channels, num_dest_deriv_channels;
4451
switch (instr->sampler_dim) {
4452
case GLSL_SAMPLER_DIM_3D:
4453
case GLSL_SAMPLER_DIM_CUBE:
4454
num_src_deriv_channels = 3;
4455
num_dest_deriv_channels = 3;
4456
break;
4457
case GLSL_SAMPLER_DIM_2D:
4458
default:
4459
num_src_deriv_channels = 2;
4460
num_dest_deriv_channels = 2;
4461
break;
4462
case GLSL_SAMPLER_DIM_1D:
4463
num_src_deriv_channels = 1;
4464
if (ctx->ac.chip_class == GFX9) {
4465
num_dest_deriv_channels = 2;
4466
} else {
4467
num_dest_deriv_channels = 1;
4468
}
4469
break;
4470
}
4471
4472
for (unsigned i = 0; i < num_src_deriv_channels; i++) {
4473
args.derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
4474
args.derivs[num_dest_deriv_channels + i] =
4475
ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
4476
}
4477
for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
4478
LLVMValueRef zero = args.g16 ? ctx->ac.f16_0 : ctx->ac.f32_0;
4479
args.derivs[i] = zero;
4480
args.derivs[num_dest_deriv_channels + i] = zero;
4481
}
4482
}
4483
4484
if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {
4485
for (unsigned chan = 0; chan < instr->coord_components; chan++)
4486
args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
4487
if (instr->coord_components == 3)
4488
args.coords[3] = LLVMGetUndef(args.a16 ? ctx->ac.f16 : ctx->ac.f32);
4489
ac_prepare_cube_coords(&ctx->ac, instr->op == nir_texop_txd, instr->is_array,
4490
instr->op == nir_texop_lod, args.coords, args.derivs);
4491
}
4492
4493
/* Texture coordinates fixups */
4494
if (instr->coord_components > 1 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
4495
instr->is_array && instr->op != nir_texop_txf) {
4496
args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]);
4497
}
4498
4499
if (instr->coord_components > 2 &&
4500
(instr->sampler_dim == GLSL_SAMPLER_DIM_2D || instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
4501
instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
4502
instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
4503
instr->is_array && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms &&
4504
instr->op != nir_texop_fragment_fetch && instr->op != nir_texop_fragment_mask_fetch) {
4505
args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
4506
}
4507
4508
if (ctx->ac.chip_class == GFX9 && instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
4509
instr->op != nir_texop_lod) {
4510
LLVMValueRef filler;
4511
if (instr->op == nir_texop_txf)
4512
filler = args.a16 ? ctx->ac.i16_0 : ctx->ac.i32_0;
4513
else
4514
filler = LLVMConstReal(args.a16 ? ctx->ac.f16 : ctx->ac.f32, 0.5);
4515
4516
if (instr->is_array)
4517
args.coords[2] = args.coords[1];
4518
args.coords[1] = filler;
4519
}
4520
4521
/* Pack sample index */
4522
if (sample_index && (instr->op == nir_texop_txf_ms || instr->op == nir_texop_fragment_fetch))
4523
args.coords[instr->coord_components] = sample_index;
4524
4525
if (instr->op == nir_texop_samples_identical) {
4526
struct ac_image_args txf_args = {0};
4527
memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords));
4528
4529
txf_args.dmask = 0xf;
4530
txf_args.resource = fmask_ptr;
4531
txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d;
4532
result = build_tex_intrinsic(ctx, instr, &txf_args);
4533
4534
result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
4535
result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
4536
goto write_result;
4537
}
4538
4539
if ((instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ||
4540
instr->sampler_dim == GLSL_SAMPLER_DIM_MS) &&
4541
instr->op != nir_texop_txs && instr->op != nir_texop_fragment_fetch &&
4542
instr->op != nir_texop_fragment_mask_fetch) {
4543
unsigned sample_chan = instr->is_array ? 3 : 2;
4544
args.coords[sample_chan] = adjust_sample_index_using_fmask(
4545
&ctx->ac, args.coords[0], args.coords[1], instr->is_array ? args.coords[2] : NULL,
4546
args.coords[sample_chan], fmask_ptr);
4547
}
4548
4549
if (args.offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) {
4550
int num_offsets = instr->src[offset_src].src.ssa->num_components;
4551
num_offsets = MIN2(num_offsets, instr->coord_components);
4552
for (unsigned i = 0; i < num_offsets; ++i) {
4553
LLVMValueRef off = ac_llvm_extract_elem(&ctx->ac, args.offset, i);
4554
if (args.a16)
4555
off = LLVMBuildTrunc(ctx->ac.builder, off, ctx->ac.i16, "");
4556
args.coords[i] = LLVMBuildAdd(ctx->ac.builder, args.coords[i], off, "");
4557
}
4558
args.offset = NULL;
4559
}
4560
4561
/* DMASK was repurposed for GATHER4. 4 components are always
4562
* returned and DMASK works like a swizzle - it selects
4563
* the component to fetch. The only valid DMASK values are
4564
* 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4565
* (red,red,red,red) etc.) The ISA document doesn't mention
4566
* this.
4567
*/
4568
args.dmask = 0xf;
4569
if (instr->op == nir_texop_tg4) {
4570
if (instr->is_shadow)
4571
args.dmask = 1;
4572
else
4573
args.dmask = 1 << instr->component;
4574
}
4575
4576
if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) {
4577
args.dim = ac_get_sampler_dim(ctx->ac.chip_class, instr->sampler_dim, instr->is_array);
4578
args.unorm = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
4579
}
4580
4581
/* Adjust the number of coordinates because we only need (x,y) for 2D
4582
* multisampled images and (x,y,layer) for 2D multisampled layered
4583
* images or for multisampled input attachments.
4584
*/
4585
if (instr->op == nir_texop_fragment_mask_fetch) {
4586
if (args.dim == ac_image_2dmsaa) {
4587
args.dim = ac_image_2d;
4588
} else {
4589
assert(args.dim == ac_image_2darraymsaa);
4590
args.dim = ac_image_2darray;
4591
}
4592
}
4593
4594
/* Set TRUNC_COORD=0 for textureGather(). */
4595
if (instr->op == nir_texop_tg4) {
4596
LLVMValueRef dword0 = LLVMBuildExtractElement(ctx->ac.builder, args.sampler, ctx->ac.i32_0, "");
4597
dword0 = LLVMBuildAnd(ctx->ac.builder, dword0, LLVMConstInt(ctx->ac.i32, C_008F30_TRUNC_COORD, 0), "");
4598
args.sampler = LLVMBuildInsertElement(ctx->ac.builder, args.sampler, dword0, ctx->ac.i32_0, "");
4599
}
4600
4601
assert(instr->dest.is_ssa);
4602
args.d16 = instr->dest.ssa.bit_size == 16;
4603
args.tfe = instr->is_sparse;
4604
4605
result = build_tex_intrinsic(ctx, instr, &args);
4606
4607
LLVMValueRef code = NULL;
4608
if (instr->is_sparse) {
4609
code = ac_llvm_extract_elem(&ctx->ac, result, 4);
4610
result = ac_trim_vector(&ctx->ac, result, 4);
4611
}
4612
4613
if (instr->op == nir_texop_query_levels)
4614
result =
4615
LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
4616
else if (instr->is_shadow && instr->is_new_style_shadow && instr->op != nir_texop_txs &&
4617
instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
4618
result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
4619
else if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
4620
instr->is_array) {
4621
LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
4622
LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
4623
LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
4624
z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
4625
result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
4626
} else if (ctx->ac.chip_class == GFX9 && instr->op == nir_texop_txs &&
4627
instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array) {
4628
LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
4629
LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
4630
result = LLVMBuildInsertElement(ctx->ac.builder, result, layers, ctx->ac.i32_1, "");
4631
} else if (nir_tex_instr_result_size(instr) != 4)
4632
result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
4633
4634
if (instr->is_sparse)
4635
result = ac_build_concat(&ctx->ac, result, code);
4636
4637
write_result:
4638
if (result) {
4639
assert(instr->dest.is_ssa);
4640
result = ac_to_integer(&ctx->ac, result);
4641
4642
for (int i = ARRAY_SIZE(wctx); --i >= 0;) {
4643
result = exit_waterfall(ctx, wctx + i, result);
4644
}
4645
4646
ctx->ssa_defs[instr->dest.ssa.index] = result;
4647
}
4648
}
4649
4650
static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
4651
{
4652
LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
4653
LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
4654
4655
ctx->ssa_defs[instr->dest.ssa.index] = result;
4656
_mesa_hash_table_insert(ctx->phis, instr, result);
4657
}
4658
4659
static void visit_post_phi(struct ac_nir_context *ctx, nir_phi_instr *instr, LLVMValueRef llvm_phi)
4660
{
4661
nir_foreach_phi_src (src, instr) {
4662
LLVMBasicBlockRef block = get_block(ctx, src->pred);
4663
LLVMValueRef llvm_src = get_src(ctx, src->src);
4664
4665
LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
4666
}
4667
}
4668
4669
static void phi_post_pass(struct ac_nir_context *ctx)
4670
{
4671
hash_table_foreach(ctx->phis, entry)
4672
{
4673
visit_post_phi(ctx, (nir_phi_instr *)entry->key, (LLVMValueRef)entry->data);
4674
}
4675
}
4676
4677
static bool is_def_used_in_an_export(const nir_ssa_def *def)
4678
{
4679
nir_foreach_use (use_src, def) {
4680
if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
4681
nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
4682
if (instr->intrinsic == nir_intrinsic_store_deref)
4683
return true;
4684
} else if (use_src->parent_instr->type == nir_instr_type_alu) {
4685
nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
4686
if (instr->op == nir_op_vec4 && is_def_used_in_an_export(&instr->dest.dest.ssa)) {
4687
return true;
4688
}
4689
}
4690
}
4691
return false;
4692
}
4693
4694
static void visit_ssa_undef(struct ac_nir_context *ctx, const nir_ssa_undef_instr *instr)
4695
{
4696
unsigned num_components = instr->def.num_components;
4697
LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
4698
4699
if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) {
4700
LLVMValueRef undef;
4701
4702
if (num_components == 1)
4703
undef = LLVMGetUndef(type);
4704
else {
4705
undef = LLVMGetUndef(LLVMVectorType(type, num_components));
4706
}
4707
ctx->ssa_defs[instr->def.index] = undef;
4708
} else {
4709
LLVMValueRef zero = LLVMConstInt(type, 0, false);
4710
if (num_components > 1) {
4711
zero = ac_build_gather_values_extended(&ctx->ac, &zero, 4, 0, false, false);
4712
}
4713
ctx->ssa_defs[instr->def.index] = zero;
4714
}
4715
}
4716
4717
static void visit_jump(struct ac_llvm_context *ctx, const nir_jump_instr *instr)
4718
{
4719
switch (instr->type) {
4720
case nir_jump_break:
4721
ac_build_break(ctx);
4722
break;
4723
case nir_jump_continue:
4724
ac_build_continue(ctx);
4725
break;
4726
default:
4727
fprintf(stderr, "Unknown NIR jump instr: ");
4728
nir_print_instr(&instr->instr, stderr);
4729
fprintf(stderr, "\n");
4730
abort();
4731
}
4732
}
4733
4734
static LLVMTypeRef glsl_base_to_llvm_type(struct ac_llvm_context *ac, enum glsl_base_type type)
4735
{
4736
switch (type) {
4737
case GLSL_TYPE_INT:
4738
case GLSL_TYPE_UINT:
4739
case GLSL_TYPE_BOOL:
4740
case GLSL_TYPE_SUBROUTINE:
4741
return ac->i32;
4742
case GLSL_TYPE_INT8:
4743
case GLSL_TYPE_UINT8:
4744
return ac->i8;
4745
case GLSL_TYPE_INT16:
4746
case GLSL_TYPE_UINT16:
4747
return ac->i16;
4748
case GLSL_TYPE_FLOAT:
4749
return ac->f32;
4750
case GLSL_TYPE_FLOAT16:
4751
return ac->f16;
4752
case GLSL_TYPE_INT64:
4753
case GLSL_TYPE_UINT64:
4754
return ac->i64;
4755
case GLSL_TYPE_DOUBLE:
4756
return ac->f64;
4757
default:
4758
unreachable("unknown GLSL type");
4759
}
4760
}
4761
4762
static LLVMTypeRef glsl_to_llvm_type(struct ac_llvm_context *ac, const struct glsl_type *type)
4763
{
4764
if (glsl_type_is_scalar(type)) {
4765
return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
4766
}
4767
4768
if (glsl_type_is_vector(type)) {
4769
return LLVMVectorType(glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
4770
glsl_get_vector_elements(type));
4771
}
4772
4773
if (glsl_type_is_matrix(type)) {
4774
return LLVMArrayType(glsl_to_llvm_type(ac, glsl_get_column_type(type)),
4775
glsl_get_matrix_columns(type));
4776
}
4777
4778
if (glsl_type_is_array(type)) {
4779
return LLVMArrayType(glsl_to_llvm_type(ac, glsl_get_array_element(type)),
4780
glsl_get_length(type));
4781
}
4782
4783
assert(glsl_type_is_struct_or_ifc(type));
4784
4785
LLVMTypeRef *const member_types = alloca(glsl_get_length(type) * sizeof(LLVMTypeRef));
4786
4787
for (unsigned i = 0; i < glsl_get_length(type); i++) {
4788
member_types[i] = glsl_to_llvm_type(ac, glsl_get_struct_field(type, i));
4789
}
4790
4791
return LLVMStructTypeInContext(ac->context, member_types, glsl_get_length(type), false);
4792
}
4793
4794
static void visit_deref(struct ac_nir_context *ctx, nir_deref_instr *instr)
4795
{
4796
if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared | nir_var_mem_global))
4797
return;
4798
4799
LLVMValueRef result = NULL;
4800
switch (instr->deref_type) {
4801
case nir_deref_type_var: {
4802
struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var);
4803
result = entry->data;
4804
break;
4805
}
4806
case nir_deref_type_struct:
4807
if (nir_deref_mode_is(instr, nir_var_mem_global)) {
4808
nir_deref_instr *parent = nir_deref_instr_parent(instr);
4809
uint64_t offset = glsl_get_struct_field_offset(parent->type, instr->strct.index);
4810
result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
4811
LLVMConstInt(ctx->ac.i32, offset, 0));
4812
} else {
4813
result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
4814
LLVMConstInt(ctx->ac.i32, instr->strct.index, 0));
4815
}
4816
break;
4817
case nir_deref_type_array:
4818
if (nir_deref_mode_is(instr, nir_var_mem_global)) {
4819
nir_deref_instr *parent = nir_deref_instr_parent(instr);
4820
unsigned stride = glsl_get_explicit_stride(parent->type);
4821
4822
if ((glsl_type_is_matrix(parent->type) && glsl_matrix_type_is_row_major(parent->type)) ||
4823
(glsl_type_is_vector(parent->type) && stride == 0))
4824
stride = type_scalar_size_bytes(parent->type);
4825
4826
assert(stride > 0);
4827
LLVMValueRef index = get_src(ctx, instr->arr.index);
4828
if (LLVMTypeOf(index) != ctx->ac.i64)
4829
index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
4830
4831
LLVMValueRef offset =
4832
LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
4833
4834
result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
4835
} else {
4836
result =
4837
ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
4838
}
4839
break;
4840
case nir_deref_type_ptr_as_array:
4841
if (nir_deref_mode_is(instr, nir_var_mem_global)) {
4842
unsigned stride = nir_deref_instr_array_stride(instr);
4843
4844
LLVMValueRef index = get_src(ctx, instr->arr.index);
4845
if (LLVMTypeOf(index) != ctx->ac.i64)
4846
index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
4847
4848
LLVMValueRef offset =
4849
LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
4850
4851
result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
4852
} else {
4853
result =
4854
ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), get_src(ctx, instr->arr.index));
4855
}
4856
break;
4857
case nir_deref_type_cast: {
4858
result = get_src(ctx, instr->parent);
4859
4860
/* We can't use the structs from LLVM because the shader
4861
* specifies its own offsets. */
4862
LLVMTypeRef pointee_type = ctx->ac.i8;
4863
if (nir_deref_mode_is(instr, nir_var_mem_shared))
4864
pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
4865
4866
unsigned address_space;
4867
4868
switch (instr->modes) {
4869
case nir_var_mem_shared:
4870
address_space = AC_ADDR_SPACE_LDS;
4871
break;
4872
case nir_var_mem_global:
4873
address_space = AC_ADDR_SPACE_GLOBAL;
4874
break;
4875
default:
4876
unreachable("Unhandled address space");
4877
}
4878
4879
LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
4880
4881
if (LLVMTypeOf(result) != type) {
4882
if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
4883
result = LLVMBuildBitCast(ctx->ac.builder, result, type, "");
4884
} else {
4885
result = LLVMBuildIntToPtr(ctx->ac.builder, result, type, "");
4886
}
4887
}
4888
break;
4889
}
4890
default:
4891
unreachable("Unhandled deref_instr deref type");
4892
}
4893
4894
ctx->ssa_defs[instr->dest.ssa.index] = result;
4895
}
4896
4897
static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list);
4898
4899
static void visit_block(struct ac_nir_context *ctx, nir_block *block)
4900
{
4901
LLVMBasicBlockRef blockref = LLVMGetInsertBlock(ctx->ac.builder);
4902
LLVMValueRef first = LLVMGetFirstInstruction(blockref);
4903
if (first) {
4904
/* ac_branch_exited() might have already inserted non-phis */
4905
LLVMPositionBuilderBefore(ctx->ac.builder, LLVMGetFirstInstruction(blockref));
4906
}
4907
4908
nir_foreach_instr(instr, block) {
4909
if (instr->type != nir_instr_type_phi)
4910
break;
4911
visit_phi(ctx, nir_instr_as_phi(instr));
4912
}
4913
4914
LLVMPositionBuilderAtEnd(ctx->ac.builder, blockref);
4915
4916
nir_foreach_instr (instr, block) {
4917
switch (instr->type) {
4918
case nir_instr_type_alu:
4919
visit_alu(ctx, nir_instr_as_alu(instr));
4920
break;
4921
case nir_instr_type_load_const:
4922
visit_load_const(ctx, nir_instr_as_load_const(instr));
4923
break;
4924
case nir_instr_type_intrinsic:
4925
visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
4926
break;
4927
case nir_instr_type_tex:
4928
visit_tex(ctx, nir_instr_as_tex(instr));
4929
break;
4930
case nir_instr_type_phi:
4931
break;
4932
case nir_instr_type_ssa_undef:
4933
visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
4934
break;
4935
case nir_instr_type_jump:
4936
visit_jump(&ctx->ac, nir_instr_as_jump(instr));
4937
break;
4938
case nir_instr_type_deref:
4939
visit_deref(ctx, nir_instr_as_deref(instr));
4940
break;
4941
default:
4942
fprintf(stderr, "Unknown NIR instr type: ");
4943
nir_print_instr(instr, stderr);
4944
fprintf(stderr, "\n");
4945
abort();
4946
}
4947
}
4948
4949
_mesa_hash_table_insert(ctx->defs, block, LLVMGetInsertBlock(ctx->ac.builder));
4950
}
4951
4952
static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
4953
{
4954
LLVMValueRef value = get_src(ctx, if_stmt->condition);
4955
4956
nir_block *then_block = (nir_block *)exec_list_get_head(&if_stmt->then_list);
4957
4958
ac_build_ifcc(&ctx->ac, value, then_block->index);
4959
4960
visit_cf_list(ctx, &if_stmt->then_list);
4961
4962
if (!exec_list_is_empty(&if_stmt->else_list)) {
4963
nir_block *else_block = (nir_block *)exec_list_get_head(&if_stmt->else_list);
4964
4965
ac_build_else(&ctx->ac, else_block->index);
4966
visit_cf_list(ctx, &if_stmt->else_list);
4967
}
4968
4969
ac_build_endif(&ctx->ac, then_block->index);
4970
}
4971
4972
static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
4973
{
4974
nir_block *first_loop_block = (nir_block *)exec_list_get_head(&loop->body);
4975
4976
ac_build_bgnloop(&ctx->ac, first_loop_block->index);
4977
4978
visit_cf_list(ctx, &loop->body);
4979
4980
ac_build_endloop(&ctx->ac, first_loop_block->index);
4981
}
4982
4983
static void visit_cf_list(struct ac_nir_context *ctx, struct exec_list *list)
4984
{
4985
foreach_list_typed(nir_cf_node, node, node, list)
4986
{
4987
switch (node->type) {
4988
case nir_cf_node_block:
4989
visit_block(ctx, nir_cf_node_as_block(node));
4990
break;
4991
4992
case nir_cf_node_if:
4993
visit_if(ctx, nir_cf_node_as_if(node));
4994
break;
4995
4996
case nir_cf_node_loop:
4997
visit_loop(ctx, nir_cf_node_as_loop(node));
4998
break;
4999
5000
default:
5001
assert(0);
5002
}
5003
}
5004
}
5005
5006
void ac_handle_shader_output_decl(struct ac_llvm_context *ctx, struct ac_shader_abi *abi,
5007
struct nir_shader *nir, struct nir_variable *variable,
5008
gl_shader_stage stage)
5009
{
5010
unsigned output_loc = variable->data.driver_location;
5011
unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5012
5013
/* tess ctrl has it's own load/store paths for outputs */
5014
if (stage == MESA_SHADER_TESS_CTRL)
5015
return;
5016
5017
if (stage == MESA_SHADER_VERTEX || stage == MESA_SHADER_TESS_EVAL ||
5018
stage == MESA_SHADER_GEOMETRY) {
5019
int idx = variable->data.location + variable->data.index;
5020
if (idx == VARYING_SLOT_CLIP_DIST0) {
5021
int length = nir->info.clip_distance_array_size + nir->info.cull_distance_array_size;
5022
5023
if (length > 4)
5024
attrib_count = 2;
5025
else
5026
attrib_count = 1;
5027
}
5028
}
5029
5030
bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
5031
LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;
5032
for (unsigned i = 0; i < attrib_count; ++i) {
5033
for (unsigned chan = 0; chan < 4; chan++) {
5034
abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] =
5035
ac_build_alloca_undef(ctx, type, "");
5036
}
5037
}
5038
}
5039
5040
static void setup_scratch(struct ac_nir_context *ctx, struct nir_shader *shader)
5041
{
5042
if (shader->scratch_size == 0)
5043
return;
5044
5045
ctx->scratch =
5046
ac_build_alloca_undef(&ctx->ac, LLVMArrayType(ctx->ac.i8, shader->scratch_size), "scratch");
5047
}
5048
5049
static void setup_constant_data(struct ac_nir_context *ctx, struct nir_shader *shader)
5050
{
5051
if (!shader->constant_data)
5052
return;
5053
5054
LLVMValueRef data = LLVMConstStringInContext(ctx->ac.context, shader->constant_data,
5055
shader->constant_data_size, true);
5056
LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, shader->constant_data_size);
5057
LLVMValueRef global =
5058
LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "const_data", AC_ADDR_SPACE_CONST);
5059
5060
LLVMSetInitializer(global, data);
5061
LLVMSetGlobalConstant(global, true);
5062
LLVMSetVisibility(global, LLVMHiddenVisibility);
5063
ctx->constant_data = global;
5064
}
5065
5066
static void setup_shared(struct ac_nir_context *ctx, struct nir_shader *nir)
5067
{
5068
if (ctx->ac.lds)
5069
return;
5070
5071
LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, nir->info.shared_size);
5072
5073
LLVMValueRef lds =
5074
LLVMAddGlobalInAddressSpace(ctx->ac.module, type, "compute_lds", AC_ADDR_SPACE_LDS);
5075
LLVMSetAlignment(lds, 64 * 1024);
5076
5077
ctx->ac.lds =
5078
LLVMBuildBitCast(ctx->ac.builder, lds, LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS), "");
5079
}
5080
5081
void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
5082
const struct ac_shader_args *args, struct nir_shader *nir)
5083
{
5084
struct ac_nir_context ctx = {0};
5085
struct nir_function *func;
5086
5087
ctx.ac = *ac;
5088
ctx.abi = abi;
5089
ctx.args = args;
5090
5091
ctx.stage = nir->info.stage;
5092
ctx.info = &nir->info;
5093
5094
ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
5095
5096
/* TODO: remove this after RADV switches to lowered IO */
5097
if (!nir->info.io_lowered) {
5098
nir_foreach_shader_out_variable(variable, nir)
5099
{
5100
ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable, ctx.stage);
5101
}
5102
}
5103
5104
ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
5105
ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
5106
ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
5107
5108
if (ctx.abi->kill_ps_if_inf_interp)
5109
ctx.verified_interp =
5110
_mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
5111
5112
func = (struct nir_function *)exec_list_get_head(&nir->functions);
5113
5114
nir_index_ssa_defs(func->impl);
5115
ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
5116
5117
setup_scratch(&ctx, nir);
5118
setup_constant_data(&ctx, nir);
5119
5120
if (gl_shader_stage_is_compute(nir->info.stage))
5121
setup_shared(&ctx, nir);
5122
5123
if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_demote &&
5124
LLVM_VERSION_MAJOR < 13) {
5125
/* true = don't kill. */
5126
ctx.ac.postponed_kill = ac_build_alloca_init(&ctx.ac, ctx.ac.i1true, "");
5127
}
5128
5129
visit_cf_list(&ctx, &func->impl->body);
5130
phi_post_pass(&ctx);
5131
5132
if (ctx.ac.postponed_kill)
5133
ac_build_kill_if_false(&ctx.ac, LLVMBuildLoad(ctx.ac.builder, ctx.ac.postponed_kill, ""));
5134
5135
if (!gl_shader_stage_is_compute(nir->info.stage))
5136
ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS, ctx.abi->outputs);
5137
5138
free(ctx.ssa_defs);
5139
ralloc_free(ctx.defs);
5140
ralloc_free(ctx.phis);
5141
ralloc_free(ctx.vars);
5142
if (ctx.abi->kill_ps_if_inf_interp)
5143
ralloc_free(ctx.verified_interp);
5144
}
5145
5146
static unsigned get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
5147
{
5148
if (intrin->intrinsic != nir_intrinsic_store_output)
5149
return 0;
5150
5151
unsigned writemask = nir_intrinsic_write_mask(intrin) << nir_intrinsic_component(intrin);
5152
unsigned location = nir_intrinsic_io_semantics(intrin).location;
5153
5154
if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
5155
return writemask << 4;
5156
else if (location == VARYING_SLOT_TESS_LEVEL_INNER)
5157
return writemask;
5158
5159
return 0;
5160
}
5161
5162
static void scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,
5163
unsigned *cond_block_tf_writemask,
5164
bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)
5165
{
5166
switch (cf_node->type) {
5167
case nir_cf_node_block: {
5168
nir_block *block = nir_cf_node_as_block(cf_node);
5169
nir_foreach_instr (instr, block) {
5170
if (instr->type != nir_instr_type_intrinsic)
5171
continue;
5172
5173
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
5174
if (intrin->intrinsic == nir_intrinsic_control_barrier) {
5175
5176
/* If we find a barrier in nested control flow put this in the
5177
* too hard basket. In GLSL this is not possible but it is in
5178
* SPIR-V.
5179
*/
5180
if (is_nested_cf) {
5181
*tessfactors_are_def_in_all_invocs = false;
5182
return;
5183
}
5184
5185
/* The following case must be prevented:
5186
* gl_TessLevelInner = ...;
5187
* barrier();
5188
* if (gl_InvocationID == 1)
5189
* gl_TessLevelInner = ...;
5190
*
5191
* If you consider disjoint code segments separated by barriers, each
5192
* such segment that writes tess factor channels should write the same
5193
* channels in all codepaths within that segment.
5194
*/
5195
if (*upper_block_tf_writemask || *cond_block_tf_writemask) {
5196
/* Accumulate the result: */
5197
*tessfactors_are_def_in_all_invocs &=
5198
!(*cond_block_tf_writemask & ~(*upper_block_tf_writemask));
5199
5200
/* Analyze the next code segment from scratch. */
5201
*upper_block_tf_writemask = 0;
5202
*cond_block_tf_writemask = 0;
5203
}
5204
} else
5205
*upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin);
5206
}
5207
5208
break;
5209
}
5210
case nir_cf_node_if: {
5211
unsigned then_tessfactor_writemask = 0;
5212
unsigned else_tessfactor_writemask = 0;
5213
5214
nir_if *if_stmt = nir_cf_node_as_if(cf_node);
5215
foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list)
5216
{
5217
scan_tess_ctrl(nested_node, &then_tessfactor_writemask, cond_block_tf_writemask,
5218
tessfactors_are_def_in_all_invocs, true);
5219
}
5220
5221
foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list)
5222
{
5223
scan_tess_ctrl(nested_node, &else_tessfactor_writemask, cond_block_tf_writemask,
5224
tessfactors_are_def_in_all_invocs, true);
5225
}
5226
5227
if (then_tessfactor_writemask || else_tessfactor_writemask) {
5228
/* If both statements write the same tess factor channels,
5229
* we can say that the upper block writes them too.
5230
*/
5231
*upper_block_tf_writemask |= then_tessfactor_writemask & else_tessfactor_writemask;
5232
*cond_block_tf_writemask |= then_tessfactor_writemask | else_tessfactor_writemask;
5233
}
5234
5235
break;
5236
}
5237
case nir_cf_node_loop: {
5238
nir_loop *loop = nir_cf_node_as_loop(cf_node);
5239
foreach_list_typed(nir_cf_node, nested_node, node, &loop->body)
5240
{
5241
scan_tess_ctrl(nested_node, cond_block_tf_writemask, cond_block_tf_writemask,
5242
tessfactors_are_def_in_all_invocs, true);
5243
}
5244
5245
break;
5246
}
5247
default:
5248
unreachable("unknown cf node type");
5249
}
5250
}
5251
5252
bool ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir)
5253
{
5254
assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
5255
5256
/* The pass works as follows:
5257
* If all codepaths write tess factors, we can say that all
5258
* invocations define tess factors.
5259
*
5260
* Each tess factor channel is tracked separately.
5261
*/
5262
unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */
5263
unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */
5264
5265
/* Initial value = true. Here the pass will accumulate results from
5266
* multiple segments surrounded by barriers. If tess factors aren't
5267
* written at all, it's a shader bug and we don't care if this will be
5268
* true.
5269
*/
5270
bool tessfactors_are_def_in_all_invocs = true;
5271
5272
nir_foreach_function (function, nir) {
5273
if (function->impl) {
5274
foreach_list_typed(nir_cf_node, node, node, &function->impl->body)
5275
{
5276
scan_tess_ctrl(node, &main_block_tf_writemask, &cond_block_tf_writemask,
5277
&tessfactors_are_def_in_all_invocs, false);
5278
}
5279
}
5280
}
5281
5282
/* Accumulate the result for the last code segment separated by a
5283
* barrier.
5284
*/
5285
if (main_block_tf_writemask || cond_block_tf_writemask) {
5286
tessfactors_are_def_in_all_invocs &= !(cond_block_tf_writemask & ~main_block_tf_writemask);
5287
}
5288
5289
return tessfactors_are_def_in_all_invocs;
5290
}
5291
5292