Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/auxiliary/gallivm/lp_bld_nir.c
4565 views
1
/**************************************************************************
2
*
3
* Copyright 2019 Red Hat.
4
* All Rights Reserved.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the "Software"),
8
* to deal in the Software without restriction, including without limitation
9
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
* and/or sell copies of the Software, and to permit persons to whom the
11
* Software is furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included
14
* in all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*
24
**************************************************************************/
25
26
#include "lp_bld_nir.h"
27
#include "lp_bld_arit.h"
28
#include "lp_bld_bitarit.h"
29
#include "lp_bld_const.h"
30
#include "lp_bld_gather.h"
31
#include "lp_bld_logic.h"
32
#include "lp_bld_quad.h"
33
#include "lp_bld_flow.h"
34
#include "lp_bld_struct.h"
35
#include "lp_bld_debug.h"
36
#include "lp_bld_printf.h"
37
#include "nir_deref.h"
38
#include "nir_search_helpers.h"
39
40
static void visit_cf_list(struct lp_build_nir_context *bld_base,
41
struct exec_list *list);
42
43
static LLVMValueRef cast_type(struct lp_build_nir_context *bld_base, LLVMValueRef val,
44
nir_alu_type alu_type, unsigned bit_size)
45
{
46
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
47
switch (alu_type) {
48
case nir_type_float:
49
switch (bit_size) {
50
case 16:
51
return LLVMBuildBitCast(builder, val, LLVMVectorType(LLVMHalfTypeInContext(bld_base->base.gallivm->context), bld_base->base.type.length), "");
52
case 32:
53
return LLVMBuildBitCast(builder, val, bld_base->base.vec_type, "");
54
case 64:
55
return LLVMBuildBitCast(builder, val, bld_base->dbl_bld.vec_type, "");
56
default:
57
assert(0);
58
break;
59
}
60
break;
61
case nir_type_int:
62
switch (bit_size) {
63
case 8:
64
return LLVMBuildBitCast(builder, val, bld_base->int8_bld.vec_type, "");
65
case 16:
66
return LLVMBuildBitCast(builder, val, bld_base->int16_bld.vec_type, "");
67
case 32:
68
return LLVMBuildBitCast(builder, val, bld_base->int_bld.vec_type, "");
69
case 64:
70
return LLVMBuildBitCast(builder, val, bld_base->int64_bld.vec_type, "");
71
default:
72
assert(0);
73
break;
74
}
75
break;
76
case nir_type_uint:
77
switch (bit_size) {
78
case 8:
79
return LLVMBuildBitCast(builder, val, bld_base->uint8_bld.vec_type, "");
80
case 16:
81
return LLVMBuildBitCast(builder, val, bld_base->uint16_bld.vec_type, "");
82
case 1:
83
case 32:
84
return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
85
case 64:
86
return LLVMBuildBitCast(builder, val, bld_base->uint64_bld.vec_type, "");
87
default:
88
assert(0);
89
break;
90
}
91
break;
92
case nir_type_uint32:
93
return LLVMBuildBitCast(builder, val, bld_base->uint_bld.vec_type, "");
94
default:
95
return val;
96
}
97
return NULL;
98
}
99
100
101
static unsigned glsl_sampler_to_pipe(int sampler_dim, bool is_array)
102
{
103
unsigned pipe_target = PIPE_BUFFER;
104
switch (sampler_dim) {
105
case GLSL_SAMPLER_DIM_1D:
106
pipe_target = is_array ? PIPE_TEXTURE_1D_ARRAY : PIPE_TEXTURE_1D;
107
break;
108
case GLSL_SAMPLER_DIM_2D:
109
pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
110
break;
111
case GLSL_SAMPLER_DIM_SUBPASS:
112
case GLSL_SAMPLER_DIM_SUBPASS_MS:
113
pipe_target = PIPE_TEXTURE_2D_ARRAY;
114
break;
115
case GLSL_SAMPLER_DIM_3D:
116
pipe_target = PIPE_TEXTURE_3D;
117
break;
118
case GLSL_SAMPLER_DIM_MS:
119
pipe_target = is_array ? PIPE_TEXTURE_2D_ARRAY : PIPE_TEXTURE_2D;
120
break;
121
case GLSL_SAMPLER_DIM_CUBE:
122
pipe_target = is_array ? PIPE_TEXTURE_CUBE_ARRAY : PIPE_TEXTURE_CUBE;
123
break;
124
case GLSL_SAMPLER_DIM_RECT:
125
pipe_target = PIPE_TEXTURE_RECT;
126
break;
127
case GLSL_SAMPLER_DIM_BUF:
128
pipe_target = PIPE_BUFFER;
129
break;
130
default:
131
break;
132
}
133
return pipe_target;
134
}
135
136
static LLVMValueRef get_ssa_src(struct lp_build_nir_context *bld_base, nir_ssa_def *ssa)
137
{
138
return bld_base->ssa_defs[ssa->index];
139
}
140
141
static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src);
142
143
static LLVMValueRef get_reg_src(struct lp_build_nir_context *bld_base, nir_reg_src src)
144
{
145
struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, src.reg);
146
LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
147
struct lp_build_context *reg_bld = get_int_bld(bld_base, true, src.reg->bit_size);
148
LLVMValueRef indir_src = NULL;
149
if (src.indirect)
150
indir_src = get_src(bld_base, *src.indirect);
151
return bld_base->load_reg(bld_base, reg_bld, &src, indir_src, reg_storage);
152
}
153
154
static LLVMValueRef get_src(struct lp_build_nir_context *bld_base, nir_src src)
155
{
156
if (src.is_ssa)
157
return get_ssa_src(bld_base, src.ssa);
158
else
159
return get_reg_src(bld_base, src.reg);
160
}
161
162
static void assign_ssa(struct lp_build_nir_context *bld_base, int idx, LLVMValueRef ptr)
163
{
164
bld_base->ssa_defs[idx] = ptr;
165
}
166
167
static void assign_ssa_dest(struct lp_build_nir_context *bld_base, const nir_ssa_def *ssa,
168
LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
169
{
170
assign_ssa(bld_base, ssa->index, ssa->num_components == 1 ? vals[0] : lp_nir_array_build_gather_values(bld_base->base.gallivm->builder, vals, ssa->num_components));
171
}
172
173
static void assign_reg(struct lp_build_nir_context *bld_base, const nir_reg_dest *reg,
174
unsigned write_mask,
175
LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
176
{
177
struct hash_entry *entry = _mesa_hash_table_search(bld_base->regs, reg->reg);
178
LLVMValueRef reg_storage = (LLVMValueRef)entry->data;
179
struct lp_build_context *reg_bld = get_int_bld(bld_base, true, reg->reg->bit_size);
180
LLVMValueRef indir_src = NULL;
181
if (reg->indirect)
182
indir_src = get_src(bld_base, *reg->indirect);
183
bld_base->store_reg(bld_base, reg_bld, reg, write_mask ? write_mask : 0xf, indir_src, reg_storage, vals);
184
}
185
186
static void assign_dest(struct lp_build_nir_context *bld_base, const nir_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
187
{
188
if (dest->is_ssa)
189
assign_ssa_dest(bld_base, &dest->ssa, vals);
190
else
191
assign_reg(bld_base, &dest->reg, 0, vals);
192
}
193
194
static void assign_alu_dest(struct lp_build_nir_context *bld_base, const nir_alu_dest *dest, LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS])
195
{
196
if (dest->dest.is_ssa)
197
assign_ssa_dest(bld_base, &dest->dest.ssa, vals);
198
else
199
assign_reg(bld_base, &dest->dest.reg, dest->write_mask, vals);
200
}
201
202
static LLVMValueRef int_to_bool32(struct lp_build_nir_context *bld_base,
203
uint32_t src_bit_size,
204
bool is_unsigned,
205
LLVMValueRef val)
206
{
207
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
208
struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
209
LLVMValueRef result = lp_build_compare(bld_base->base.gallivm, int_bld->type, PIPE_FUNC_NOTEQUAL, val, int_bld->zero);
210
if (src_bit_size == 16)
211
result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
212
else if (src_bit_size == 64)
213
result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
214
return result;
215
}
216
217
static LLVMValueRef flt_to_bool32(struct lp_build_nir_context *bld_base,
218
uint32_t src_bit_size,
219
LLVMValueRef val)
220
{
221
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
222
struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
223
LLVMValueRef result = lp_build_cmp(flt_bld, PIPE_FUNC_NOTEQUAL, val, flt_bld->zero);
224
if (src_bit_size == 64)
225
result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
226
return result;
227
}
228
229
static LLVMValueRef fcmp32(struct lp_build_nir_context *bld_base,
230
enum pipe_compare_func compare,
231
uint32_t src_bit_size,
232
LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
233
{
234
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
235
struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size);
236
LLVMValueRef result;
237
238
if (compare != PIPE_FUNC_NOTEQUAL)
239
result = lp_build_cmp_ordered(flt_bld, compare, src[0], src[1]);
240
else
241
result = lp_build_cmp(flt_bld, compare, src[0], src[1]);
242
if (src_bit_size == 64)
243
result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
244
return result;
245
}
246
247
static LLVMValueRef icmp32(struct lp_build_nir_context *bld_base,
248
enum pipe_compare_func compare,
249
bool is_unsigned,
250
uint32_t src_bit_size,
251
LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
252
{
253
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
254
struct lp_build_context *i_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
255
LLVMValueRef result = lp_build_cmp(i_bld, compare, src[0], src[1]);
256
if (src_bit_size < 32)
257
result = LLVMBuildSExt(builder, result, bld_base->int_bld.vec_type, "");
258
else if (src_bit_size == 64)
259
result = LLVMBuildTrunc(builder, result, bld_base->int_bld.vec_type, "");
260
return result;
261
}
262
263
static LLVMValueRef get_alu_src(struct lp_build_nir_context *bld_base,
264
nir_alu_src src,
265
unsigned num_components)
266
{
267
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
268
struct gallivm_state *gallivm = bld_base->base.gallivm;
269
LLVMValueRef value = get_src(bld_base, src.src);
270
bool need_swizzle = false;
271
272
assert(value);
273
unsigned src_components = nir_src_num_components(src.src);
274
for (unsigned i = 0; i < num_components; ++i) {
275
assert(src.swizzle[i] < src_components);
276
if (src.swizzle[i] != i)
277
need_swizzle = true;
278
}
279
280
if (need_swizzle || num_components != src_components) {
281
if (src_components > 1 && num_components == 1) {
282
value = LLVMBuildExtractValue(gallivm->builder, value,
283
src.swizzle[0], "");
284
} else if (src_components == 1 && num_components > 1) {
285
LLVMValueRef values[] = {value, value, value, value, value, value, value, value, value, value, value, value, value, value, value, value};
286
value = lp_nir_array_build_gather_values(builder, values, num_components);
287
} else {
288
LLVMValueRef arr = LLVMGetUndef(LLVMArrayType(LLVMTypeOf(LLVMBuildExtractValue(builder, value, 0, "")), num_components));
289
for (unsigned i = 0; i < num_components; i++)
290
arr = LLVMBuildInsertValue(builder, arr, LLVMBuildExtractValue(builder, value, src.swizzle[i], ""), i, "");
291
value = arr;
292
}
293
}
294
assert(!src.negate);
295
assert(!src.abs);
296
return value;
297
}
298
299
static LLVMValueRef emit_b2f(struct lp_build_nir_context *bld_base,
300
LLVMValueRef src0,
301
unsigned bitsize)
302
{
303
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
304
LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
305
LLVMBuildBitCast(builder, lp_build_const_vec(bld_base->base.gallivm, bld_base->base.type,
306
1.0), bld_base->int_bld.vec_type, ""),
307
"");
308
result = LLVMBuildBitCast(builder, result, bld_base->base.vec_type, "");
309
switch (bitsize) {
310
case 32:
311
break;
312
case 64:
313
result = LLVMBuildFPExt(builder, result, bld_base->dbl_bld.vec_type, "");
314
break;
315
default:
316
unreachable("unsupported bit size.");
317
}
318
return result;
319
}
320
321
static LLVMValueRef emit_b2i(struct lp_build_nir_context *bld_base,
322
LLVMValueRef src0,
323
unsigned bitsize)
324
{
325
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
326
LLVMValueRef result = LLVMBuildAnd(builder, cast_type(bld_base, src0, nir_type_int, 32),
327
lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, 1), "");
328
switch (bitsize) {
329
case 8:
330
return LLVMBuildTrunc(builder, result, bld_base->int8_bld.vec_type, "");
331
case 16:
332
return LLVMBuildTrunc(builder, result, bld_base->int16_bld.vec_type, "");
333
case 32:
334
return result;
335
case 64:
336
return LLVMBuildZExt(builder, result, bld_base->int64_bld.vec_type, "");
337
default:
338
unreachable("unsupported bit size.");
339
}
340
}
341
342
static LLVMValueRef emit_b32csel(struct lp_build_nir_context *bld_base,
343
unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
344
LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
345
{
346
LLVMValueRef sel = cast_type(bld_base, src[0], nir_type_int, 32);
347
LLVMValueRef v = lp_build_compare(bld_base->base.gallivm, bld_base->int_bld.type, PIPE_FUNC_NOTEQUAL, sel, bld_base->int_bld.zero);
348
struct lp_build_context *bld = get_int_bld(bld_base, false, src_bit_size[1]);
349
return lp_build_select(bld, v, src[1], src[2]);
350
}
351
352
static LLVMValueRef split_64bit(struct lp_build_nir_context *bld_base,
353
LLVMValueRef src,
354
bool hi)
355
{
356
struct gallivm_state *gallivm = bld_base->base.gallivm;
357
LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
358
LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
359
int len = bld_base->base.type.length * 2;
360
for (unsigned i = 0; i < bld_base->base.type.length; i++) {
361
#if UTIL_ARCH_LITTLE_ENDIAN
362
shuffles[i] = lp_build_const_int32(gallivm, i * 2);
363
shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
364
#else
365
shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
366
shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
367
#endif
368
}
369
370
src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
371
return LLVMBuildShuffleVector(gallivm->builder, src,
372
LLVMGetUndef(LLVMTypeOf(src)),
373
LLVMConstVector(hi ? shuffles2 : shuffles,
374
bld_base->base.type.length),
375
"");
376
}
377
378
static LLVMValueRef
379
merge_64bit(struct lp_build_nir_context *bld_base,
380
LLVMValueRef input,
381
LLVMValueRef input2)
382
{
383
struct gallivm_state *gallivm = bld_base->base.gallivm;
384
LLVMBuilderRef builder = gallivm->builder;
385
int i;
386
LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
387
int len = bld_base->base.type.length * 2;
388
assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
389
390
for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
391
#if UTIL_ARCH_LITTLE_ENDIAN
392
shuffles[i] = lp_build_const_int32(gallivm, i / 2);
393
shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
394
#else
395
shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
396
shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
397
#endif
398
}
399
return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
400
}
401
402
static LLVMValueRef split_16bit(struct lp_build_nir_context *bld_base,
403
LLVMValueRef src,
404
bool hi)
405
{
406
struct gallivm_state *gallivm = bld_base->base.gallivm;
407
LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
408
LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
409
int len = bld_base->base.type.length * 2;
410
for (unsigned i = 0; i < bld_base->base.type.length; i++) {
411
#if UTIL_ARCH_LITTLE_ENDIAN
412
shuffles[i] = lp_build_const_int32(gallivm, i * 2);
413
shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
414
#else
415
shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
416
shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
417
#endif
418
}
419
420
src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt16TypeInContext(gallivm->context), len), "");
421
return LLVMBuildShuffleVector(gallivm->builder, src,
422
LLVMGetUndef(LLVMTypeOf(src)),
423
LLVMConstVector(hi ? shuffles2 : shuffles,
424
bld_base->base.type.length),
425
"");
426
}
427
static LLVMValueRef
428
merge_16bit(struct lp_build_nir_context *bld_base,
429
LLVMValueRef input,
430
LLVMValueRef input2)
431
{
432
struct gallivm_state *gallivm = bld_base->base.gallivm;
433
LLVMBuilderRef builder = gallivm->builder;
434
int i;
435
LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
436
int len = bld_base->int16_bld.type.length * 2;
437
assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
438
439
for (i = 0; i < bld_base->int_bld.type.length * 2; i+=2) {
440
#if UTIL_ARCH_LITTLE_ENDIAN
441
shuffles[i] = lp_build_const_int32(gallivm, i / 2);
442
shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
443
#else
444
shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
445
shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
446
#endif
447
}
448
return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
449
}
450
451
static LLVMValueRef
452
do_int_divide(struct lp_build_nir_context *bld_base,
453
bool is_unsigned, unsigned src_bit_size,
454
LLVMValueRef src, LLVMValueRef src2)
455
{
456
struct gallivm_state *gallivm = bld_base->base.gallivm;
457
LLVMBuilderRef builder = gallivm->builder;
458
struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
459
struct lp_build_context *mask_bld = get_int_bld(bld_base, true, src_bit_size);
460
LLVMValueRef div_mask = lp_build_cmp(mask_bld, PIPE_FUNC_EQUAL, src2,
461
mask_bld->zero);
462
463
if (!is_unsigned) {
464
/* INT_MIN (0x80000000) / -1 (0xffffffff) causes sigfpe, seen with blender. */
465
div_mask = LLVMBuildAnd(builder, div_mask, lp_build_const_int_vec(gallivm, int_bld->type, 0x7fffffff), "");
466
}
467
LLVMValueRef divisor = LLVMBuildOr(builder,
468
div_mask,
469
src2, "");
470
LLVMValueRef result = lp_build_div(int_bld, src, divisor);
471
472
if (!is_unsigned) {
473
LLVMValueRef not_div_mask = LLVMBuildNot(builder, div_mask, "");
474
return LLVMBuildAnd(builder, not_div_mask, result, "");
475
} else
476
/* udiv by zero is guaranteed to return 0xffffffff at least with d3d10
477
* may as well do same for idiv */
478
return LLVMBuildOr(builder, div_mask, result, "");
479
}
480
481
static LLVMValueRef
482
do_int_mod(struct lp_build_nir_context *bld_base,
483
bool is_unsigned, unsigned src_bit_size,
484
LLVMValueRef src, LLVMValueRef src2)
485
{
486
struct gallivm_state *gallivm = bld_base->base.gallivm;
487
LLVMBuilderRef builder = gallivm->builder;
488
struct lp_build_context *int_bld = get_int_bld(bld_base, is_unsigned, src_bit_size);
489
LLVMValueRef div_mask = lp_build_cmp(int_bld, PIPE_FUNC_EQUAL, src2,
490
int_bld->zero);
491
LLVMValueRef divisor = LLVMBuildOr(builder,
492
div_mask,
493
src2, "");
494
LLVMValueRef result = lp_build_mod(int_bld, src, divisor);
495
return LLVMBuildOr(builder, div_mask, result, "");
496
}
497
498
static LLVMValueRef
499
do_quantize_to_f16(struct lp_build_nir_context *bld_base,
500
LLVMValueRef src)
501
{
502
struct gallivm_state *gallivm = bld_base->base.gallivm;
503
LLVMBuilderRef builder = gallivm->builder;
504
LLVMValueRef result, cond, cond2, temp;
505
506
result = LLVMBuildFPTrunc(builder, src, LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), "");
507
result = LLVMBuildFPExt(builder, result, bld_base->base.vec_type, "");
508
509
temp = lp_build_abs(get_flt_bld(bld_base, 32), result);
510
cond = LLVMBuildFCmp(builder, LLVMRealOGT,
511
LLVMBuildBitCast(builder, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, 0x38800000), bld_base->base.vec_type, ""),
512
temp, "");
513
cond2 = LLVMBuildFCmp(builder, LLVMRealONE, temp, bld_base->base.zero, "");
514
cond = LLVMBuildAnd(builder, cond, cond2, "");
515
result = LLVMBuildSelect(builder, cond, bld_base->base.zero, result, "");
516
return result;
517
}
518
519
static LLVMValueRef do_alu_action(struct lp_build_nir_context *bld_base,
520
const nir_alu_instr *instr,
521
unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS],
522
LLVMValueRef src[NIR_MAX_VEC_COMPONENTS])
523
{
524
struct gallivm_state *gallivm = bld_base->base.gallivm;
525
LLVMBuilderRef builder = gallivm->builder;
526
LLVMValueRef result;
527
528
switch (instr->op) {
529
case nir_op_b2f32:
530
result = emit_b2f(bld_base, src[0], 32);
531
break;
532
case nir_op_b2f64:
533
result = emit_b2f(bld_base, src[0], 64);
534
break;
535
case nir_op_b2i8:
536
result = emit_b2i(bld_base, src[0], 8);
537
break;
538
case nir_op_b2i16:
539
result = emit_b2i(bld_base, src[0], 16);
540
break;
541
case nir_op_b2i32:
542
result = emit_b2i(bld_base, src[0], 32);
543
break;
544
case nir_op_b2i64:
545
result = emit_b2i(bld_base, src[0], 64);
546
break;
547
case nir_op_b32csel:
548
result = emit_b32csel(bld_base, src_bit_size, src);
549
break;
550
case nir_op_bit_count:
551
result = lp_build_popcount(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
552
break;
553
case nir_op_bitfield_select:
554
result = lp_build_xor(&bld_base->uint_bld, src[2], lp_build_and(&bld_base->uint_bld, src[0], lp_build_xor(&bld_base->uint_bld, src[1], src[2])));
555
break;
556
case nir_op_bitfield_reverse:
557
result = lp_build_bitfield_reverse(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
558
break;
559
case nir_op_f2b32:
560
result = flt_to_bool32(bld_base, src_bit_size[0], src[0]);
561
break;
562
case nir_op_f2f16:
563
if (src_bit_size[0] == 64)
564
src[0] = LLVMBuildFPTrunc(builder, src[0],
565
bld_base->base.vec_type, "");
566
result = LLVMBuildFPTrunc(builder, src[0],
567
LLVMVectorType(LLVMHalfTypeInContext(gallivm->context), bld_base->base.type.length), "");
568
break;
569
case nir_op_f2f32:
570
if (src_bit_size[0] < 32)
571
result = LLVMBuildFPExt(builder, src[0],
572
bld_base->base.vec_type, "");
573
else
574
result = LLVMBuildFPTrunc(builder, src[0],
575
bld_base->base.vec_type, "");
576
break;
577
case nir_op_f2f64:
578
result = LLVMBuildFPExt(builder, src[0],
579
bld_base->dbl_bld.vec_type, "");
580
break;
581
case nir_op_f2i8:
582
result = LLVMBuildFPToSI(builder,
583
src[0],
584
bld_base->uint8_bld.vec_type, "");
585
break;
586
case nir_op_f2i16:
587
result = LLVMBuildFPToSI(builder,
588
src[0],
589
bld_base->uint16_bld.vec_type, "");
590
break;
591
case nir_op_f2i32:
592
result = LLVMBuildFPToSI(builder, src[0], bld_base->base.int_vec_type, "");
593
break;
594
case nir_op_f2u8:
595
result = LLVMBuildFPToUI(builder,
596
src[0],
597
bld_base->uint8_bld.vec_type, "");
598
break;
599
case nir_op_f2u16:
600
result = LLVMBuildFPToUI(builder,
601
src[0],
602
bld_base->uint16_bld.vec_type, "");
603
break;
604
case nir_op_f2u32:
605
result = LLVMBuildFPToUI(builder,
606
src[0],
607
bld_base->base.int_vec_type, "");
608
break;
609
case nir_op_f2i64:
610
result = LLVMBuildFPToSI(builder,
611
src[0],
612
bld_base->int64_bld.vec_type, "");
613
break;
614
case nir_op_f2u64:
615
result = LLVMBuildFPToUI(builder,
616
src[0],
617
bld_base->uint64_bld.vec_type, "");
618
break;
619
case nir_op_fabs:
620
result = lp_build_abs(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
621
break;
622
case nir_op_fadd:
623
result = lp_build_add(get_flt_bld(bld_base, src_bit_size[0]),
624
src[0], src[1]);
625
break;
626
case nir_op_fceil:
627
result = lp_build_ceil(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
628
break;
629
case nir_op_fcos:
630
result = lp_build_cos(&bld_base->base, src[0]);
631
break;
632
case nir_op_fddx:
633
case nir_op_fddx_coarse:
634
case nir_op_fddx_fine:
635
result = lp_build_ddx(&bld_base->base, src[0]);
636
break;
637
case nir_op_fddy:
638
case nir_op_fddy_coarse:
639
case nir_op_fddy_fine:
640
result = lp_build_ddy(&bld_base->base, src[0]);
641
break;
642
case nir_op_fdiv:
643
result = lp_build_div(get_flt_bld(bld_base, src_bit_size[0]),
644
src[0], src[1]);
645
break;
646
case nir_op_feq32:
647
result = fcmp32(bld_base, PIPE_FUNC_EQUAL, src_bit_size[0], src);
648
break;
649
case nir_op_fexp2:
650
result = lp_build_exp2(&bld_base->base, src[0]);
651
break;
652
case nir_op_ffloor:
653
result = lp_build_floor(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
654
break;
655
case nir_op_ffma:
656
result = lp_build_fmuladd(builder, src[0], src[1], src[2]);
657
break;
658
case nir_op_ffract: {
659
struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
660
LLVMValueRef tmp = lp_build_floor(flt_bld, src[0]);
661
result = lp_build_sub(flt_bld, src[0], tmp);
662
break;
663
}
664
case nir_op_fge32:
665
result = fcmp32(bld_base, PIPE_FUNC_GEQUAL, src_bit_size[0], src);
666
break;
667
case nir_op_find_lsb: {
668
struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
669
result = lp_build_cttz(int_bld, src[0]);
670
if (src_bit_size[0] < 32)
671
result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
672
else if (src_bit_size[0] > 32)
673
result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
674
break;
675
}
676
case nir_op_flog2:
677
result = lp_build_log2_safe(&bld_base->base, src[0]);
678
break;
679
case nir_op_flt:
680
case nir_op_flt32:
681
result = fcmp32(bld_base, PIPE_FUNC_LESS, src_bit_size[0], src);
682
break;
683
case nir_op_fmax:
684
case nir_op_fmin: {
685
enum gallivm_nan_behavior minmax_nan;
686
int first = 0;
687
688
/* If one of the sources is known to be a number (i.e., not NaN), then
689
* better code can be generated by passing that information along.
690
*/
691
if (is_a_number(bld_base->range_ht, instr, 1,
692
0 /* unused num_components */,
693
NULL /* unused swizzle */)) {
694
minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
695
} else if (is_a_number(bld_base->range_ht, instr, 0,
696
0 /* unused num_components */,
697
NULL /* unused swizzle */)) {
698
first = 1;
699
minmax_nan = GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN;
700
} else {
701
minmax_nan = GALLIVM_NAN_RETURN_OTHER;
702
}
703
704
if (instr->op == nir_op_fmin) {
705
result = lp_build_min_ext(get_flt_bld(bld_base, src_bit_size[0]),
706
src[first], src[1 - first], minmax_nan);
707
} else {
708
result = lp_build_max_ext(get_flt_bld(bld_base, src_bit_size[0]),
709
src[first], src[1 - first], minmax_nan);
710
}
711
break;
712
}
713
case nir_op_fmod: {
714
struct lp_build_context *flt_bld = get_flt_bld(bld_base, src_bit_size[0]);
715
result = lp_build_div(flt_bld, src[0], src[1]);
716
result = lp_build_floor(flt_bld, result);
717
result = lp_build_mul(flt_bld, src[1], result);
718
result = lp_build_sub(flt_bld, src[0], result);
719
break;
720
}
721
case nir_op_fmul:
722
result = lp_build_mul(get_flt_bld(bld_base, src_bit_size[0]),
723
src[0], src[1]);
724
break;
725
case nir_op_fneu32:
726
result = fcmp32(bld_base, PIPE_FUNC_NOTEQUAL, src_bit_size[0], src);
727
break;
728
case nir_op_fneg:
729
result = lp_build_negate(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
730
break;
731
case nir_op_fpow:
732
result = lp_build_pow(&bld_base->base, src[0], src[1]);
733
break;
734
case nir_op_fquantize2f16:
735
result = do_quantize_to_f16(bld_base, src[0]);
736
break;
737
case nir_op_frcp:
738
result = lp_build_rcp(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
739
break;
740
case nir_op_fround_even:
741
result = lp_build_round(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
742
break;
743
case nir_op_frsq:
744
result = lp_build_rsqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
745
break;
746
case nir_op_fsat:
747
result = lp_build_clamp_zero_one_nanzero(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
748
break;
749
case nir_op_fsign:
750
result = lp_build_sgn(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
751
break;
752
case nir_op_fsin:
753
result = lp_build_sin(&bld_base->base, src[0]);
754
break;
755
case nir_op_fsqrt:
756
result = lp_build_sqrt(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
757
break;
758
case nir_op_ftrunc:
759
result = lp_build_trunc(get_flt_bld(bld_base, src_bit_size[0]), src[0]);
760
break;
761
case nir_op_i2b32:
762
result = int_to_bool32(bld_base, src_bit_size[0], false, src[0]);
763
break;
764
case nir_op_i2f32:
765
result = lp_build_int_to_float(&bld_base->base, src[0]);
766
break;
767
case nir_op_i2f64:
768
result = lp_build_int_to_float(&bld_base->dbl_bld, src[0]);
769
break;
770
case nir_op_i2i8:
771
result = LLVMBuildTrunc(builder, src[0], bld_base->int8_bld.vec_type, "");
772
break;
773
case nir_op_i2i16:
774
if (src_bit_size[0] < 16)
775
result = LLVMBuildSExt(builder, src[0], bld_base->int16_bld.vec_type, "");
776
else
777
result = LLVMBuildTrunc(builder, src[0], bld_base->int16_bld.vec_type, "");
778
break;
779
case nir_op_i2i32:
780
if (src_bit_size[0] < 32)
781
result = LLVMBuildSExt(builder, src[0], bld_base->int_bld.vec_type, "");
782
else
783
result = LLVMBuildTrunc(builder, src[0], bld_base->int_bld.vec_type, "");
784
break;
785
case nir_op_i2i64:
786
result = LLVMBuildSExt(builder, src[0], bld_base->int64_bld.vec_type, "");
787
break;
788
case nir_op_iabs:
789
result = lp_build_abs(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
790
break;
791
case nir_op_iadd:
792
result = lp_build_add(get_int_bld(bld_base, false, src_bit_size[0]),
793
src[0], src[1]);
794
break;
795
case nir_op_iand:
796
result = lp_build_and(get_int_bld(bld_base, false, src_bit_size[0]),
797
src[0], src[1]);
798
break;
799
case nir_op_idiv:
800
result = do_int_divide(bld_base, false, src_bit_size[0], src[0], src[1]);
801
break;
802
case nir_op_ieq32:
803
result = icmp32(bld_base, PIPE_FUNC_EQUAL, false, src_bit_size[0], src);
804
break;
805
case nir_op_ige32:
806
result = icmp32(bld_base, PIPE_FUNC_GEQUAL, false, src_bit_size[0], src);
807
break;
808
case nir_op_ilt32:
809
result = icmp32(bld_base, PIPE_FUNC_LESS, false, src_bit_size[0], src);
810
break;
811
case nir_op_imax:
812
result = lp_build_max(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
813
break;
814
case nir_op_imin:
815
result = lp_build_min(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1]);
816
break;
817
case nir_op_imul:
818
case nir_op_imul24:
819
result = lp_build_mul(get_int_bld(bld_base, false, src_bit_size[0]),
820
src[0], src[1]);
821
break;
822
case nir_op_imul_high: {
823
LLVMValueRef hi_bits;
824
lp_build_mul_32_lohi(get_int_bld(bld_base, false, src_bit_size[0]), src[0], src[1], &hi_bits);
825
result = hi_bits;
826
break;
827
}
828
case nir_op_ine32:
829
result = icmp32(bld_base, PIPE_FUNC_NOTEQUAL, false, src_bit_size[0], src);
830
break;
831
case nir_op_ineg:
832
result = lp_build_negate(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
833
break;
834
case nir_op_inot:
835
result = lp_build_not(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
836
break;
837
case nir_op_ior:
838
result = lp_build_or(get_int_bld(bld_base, false, src_bit_size[0]),
839
src[0], src[1]);
840
break;
841
case nir_op_imod:
842
case nir_op_irem:
843
result = do_int_mod(bld_base, false, src_bit_size[0], src[0], src[1]);
844
break;
845
case nir_op_ishl: {
846
struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
847
struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
848
if (src_bit_size[0] == 64)
849
src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
850
if (src_bit_size[0] < 32)
851
src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
852
src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
853
result = lp_build_shl(int_bld, src[0], src[1]);
854
break;
855
}
856
case nir_op_ishr: {
857
struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
858
struct lp_build_context *int_bld = get_int_bld(bld_base, false, src_bit_size[0]);
859
if (src_bit_size[0] == 64)
860
src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
861
if (src_bit_size[0] < 32)
862
src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
863
src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
864
result = lp_build_shr(int_bld, src[0], src[1]);
865
break;
866
}
867
case nir_op_isign:
868
result = lp_build_sgn(get_int_bld(bld_base, false, src_bit_size[0]), src[0]);
869
break;
870
case nir_op_isub:
871
result = lp_build_sub(get_int_bld(bld_base, false, src_bit_size[0]),
872
src[0], src[1]);
873
break;
874
case nir_op_ixor:
875
result = lp_build_xor(get_int_bld(bld_base, false, src_bit_size[0]),
876
src[0], src[1]);
877
break;
878
case nir_op_mov:
879
result = src[0];
880
break;
881
case nir_op_unpack_64_2x32_split_x:
882
result = split_64bit(bld_base, src[0], false);
883
break;
884
case nir_op_unpack_64_2x32_split_y:
885
result = split_64bit(bld_base, src[0], true);
886
break;
887
888
case nir_op_pack_32_2x16_split: {
889
LLVMValueRef tmp = merge_16bit(bld_base, src[0], src[1]);
890
result = LLVMBuildBitCast(builder, tmp, bld_base->base.vec_type, "");
891
break;
892
}
893
case nir_op_unpack_32_2x16_split_x:
894
result = split_16bit(bld_base, src[0], false);
895
break;
896
case nir_op_unpack_32_2x16_split_y:
897
result = split_16bit(bld_base, src[0], true);
898
break;
899
case nir_op_pack_64_2x32_split: {
900
LLVMValueRef tmp = merge_64bit(bld_base, src[0], src[1]);
901
result = LLVMBuildBitCast(builder, tmp, bld_base->uint64_bld.vec_type, "");
902
break;
903
}
904
case nir_op_u2f32:
905
result = LLVMBuildUIToFP(builder, src[0], bld_base->base.vec_type, "");
906
break;
907
case nir_op_u2f64:
908
result = LLVMBuildUIToFP(builder, src[0], bld_base->dbl_bld.vec_type, "");
909
break;
910
case nir_op_u2u8:
911
result = LLVMBuildTrunc(builder, src[0], bld_base->uint8_bld.vec_type, "");
912
break;
913
case nir_op_u2u16:
914
if (src_bit_size[0] < 16)
915
result = LLVMBuildZExt(builder, src[0], bld_base->uint16_bld.vec_type, "");
916
else
917
result = LLVMBuildTrunc(builder, src[0], bld_base->uint16_bld.vec_type, "");
918
break;
919
case nir_op_u2u32:
920
if (src_bit_size[0] < 32)
921
result = LLVMBuildZExt(builder, src[0], bld_base->uint_bld.vec_type, "");
922
else
923
result = LLVMBuildTrunc(builder, src[0], bld_base->uint_bld.vec_type, "");
924
break;
925
case nir_op_u2u64:
926
result = LLVMBuildZExt(builder, src[0], bld_base->uint64_bld.vec_type, "");
927
break;
928
case nir_op_udiv:
929
result = do_int_divide(bld_base, true, src_bit_size[0], src[0], src[1]);
930
break;
931
case nir_op_ufind_msb: {
932
struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
933
result = lp_build_ctlz(uint_bld, src[0]);
934
result = lp_build_sub(uint_bld, lp_build_const_int_vec(gallivm, uint_bld->type, src_bit_size[0] - 1), result);
935
if (src_bit_size[0] < 32)
936
result = LLVMBuildZExt(builder, result, bld_base->uint_bld.vec_type, "");
937
else
938
result = LLVMBuildTrunc(builder, result, bld_base->uint_bld.vec_type, "");
939
break;
940
}
941
case nir_op_uge32:
942
result = icmp32(bld_base, PIPE_FUNC_GEQUAL, true, src_bit_size[0], src);
943
break;
944
case nir_op_ult32:
945
result = icmp32(bld_base, PIPE_FUNC_LESS, true, src_bit_size[0], src);
946
break;
947
case nir_op_umax:
948
result = lp_build_max(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
949
break;
950
case nir_op_umin:
951
result = lp_build_min(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1]);
952
break;
953
case nir_op_umod:
954
result = do_int_mod(bld_base, true, src_bit_size[0], src[0], src[1]);
955
break;
956
case nir_op_umul_high: {
957
LLVMValueRef hi_bits;
958
lp_build_mul_32_lohi(get_int_bld(bld_base, true, src_bit_size[0]), src[0], src[1], &hi_bits);
959
result = hi_bits;
960
break;
961
}
962
case nir_op_ushr: {
963
struct lp_build_context *uint_bld = get_int_bld(bld_base, true, src_bit_size[0]);
964
if (src_bit_size[0] == 64)
965
src[1] = LLVMBuildZExt(builder, src[1], uint_bld->vec_type, "");
966
if (src_bit_size[0] < 32)
967
src[1] = LLVMBuildTrunc(builder, src[1], uint_bld->vec_type, "");
968
src[1] = lp_build_and(uint_bld, src[1], lp_build_const_int_vec(gallivm, uint_bld->type, (src_bit_size[0] - 1)));
969
result = lp_build_shr(uint_bld, src[0], src[1]);
970
break;
971
}
972
default:
973
assert(0);
974
break;
975
}
976
return result;
977
}
978
979
static void visit_alu(struct lp_build_nir_context *bld_base, const nir_alu_instr *instr)
980
{
981
struct gallivm_state *gallivm = bld_base->base.gallivm;
982
LLVMValueRef src[NIR_MAX_VEC_COMPONENTS];
983
unsigned src_bit_size[NIR_MAX_VEC_COMPONENTS];
984
unsigned num_components = nir_dest_num_components(instr->dest.dest);
985
unsigned src_components;
986
switch (instr->op) {
987
case nir_op_vec2:
988
case nir_op_vec3:
989
case nir_op_vec4:
990
case nir_op_vec8:
991
case nir_op_vec16:
992
src_components = 1;
993
break;
994
case nir_op_pack_half_2x16:
995
src_components = 2;
996
break;
997
case nir_op_unpack_half_2x16:
998
src_components = 1;
999
break;
1000
case nir_op_cube_face_coord_amd:
1001
case nir_op_cube_face_index_amd:
1002
src_components = 3;
1003
break;
1004
case nir_op_fsum2:
1005
case nir_op_fsum3:
1006
case nir_op_fsum4:
1007
src_components = nir_op_infos[instr->op].input_sizes[0];
1008
break;
1009
default:
1010
src_components = num_components;
1011
break;
1012
}
1013
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1014
src[i] = get_alu_src(bld_base, instr->src[i], src_components);
1015
src_bit_size[i] = nir_src_bit_size(instr->src[i].src);
1016
}
1017
1018
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1019
if (instr->op == nir_op_vec4 || instr->op == nir_op_vec3 || instr->op == nir_op_vec2 || instr->op == nir_op_vec8 || instr->op == nir_op_vec16) {
1020
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1021
result[i] = cast_type(bld_base, src[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
1022
}
1023
} else if (instr->op == nir_op_fsum4 || instr->op == nir_op_fsum3 || instr->op == nir_op_fsum2) {
1024
for (unsigned c = 0; c < nir_op_infos[instr->op].input_sizes[0]; c++) {
1025
LLVMValueRef temp_chan = LLVMBuildExtractValue(gallivm->builder,
1026
src[0], c, "");
1027
temp_chan = cast_type(bld_base, temp_chan, nir_op_infos[instr->op].input_types[0], src_bit_size[0]);
1028
result[0] = (c == 0) ? temp_chan : lp_build_add(get_flt_bld(bld_base, src_bit_size[0]), result[0], temp_chan);
1029
}
1030
} else {
1031
for (unsigned c = 0; c < num_components; c++) {
1032
LLVMValueRef src_chan[NIR_MAX_VEC_COMPONENTS];
1033
1034
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
1035
if (num_components > 1) {
1036
src_chan[i] = LLVMBuildExtractValue(gallivm->builder,
1037
src[i], c, "");
1038
} else
1039
src_chan[i] = src[i];
1040
src_chan[i] = cast_type(bld_base, src_chan[i], nir_op_infos[instr->op].input_types[i], src_bit_size[i]);
1041
}
1042
result[c] = do_alu_action(bld_base, instr, src_bit_size, src_chan);
1043
result[c] = cast_type(bld_base, result[c], nir_op_infos[instr->op].output_type, nir_dest_bit_size(instr->dest.dest));
1044
}
1045
}
1046
assign_alu_dest(bld_base, &instr->dest, result);
1047
}
1048
1049
static void visit_load_const(struct lp_build_nir_context *bld_base,
1050
const nir_load_const_instr *instr)
1051
{
1052
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1053
struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
1054
for (unsigned i = 0; i < instr->def.num_components; i++)
1055
result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64);
1056
memset(&result[instr->def.num_components], 0, NIR_MAX_VEC_COMPONENTS - instr->def.num_components);
1057
assign_ssa_dest(bld_base, &instr->def, result);
1058
}
1059
1060
static void
1061
get_deref_offset(struct lp_build_nir_context *bld_base, nir_deref_instr *instr,
1062
bool vs_in, unsigned *vertex_index_out,
1063
LLVMValueRef *vertex_index_ref,
1064
unsigned *const_out, LLVMValueRef *indir_out)
1065
{
1066
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1067
nir_variable *var = nir_deref_instr_get_variable(instr);
1068
nir_deref_path path;
1069
unsigned idx_lvl = 1;
1070
1071
nir_deref_path_init(&path, instr, NULL);
1072
1073
if (vertex_index_out != NULL || vertex_index_ref != NULL) {
1074
if (vertex_index_ref) {
1075
*vertex_index_ref = get_src(bld_base, path.path[idx_lvl]->arr.index);
1076
if (vertex_index_out)
1077
*vertex_index_out = 0;
1078
} else {
1079
*vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
1080
}
1081
++idx_lvl;
1082
}
1083
1084
uint32_t const_offset = 0;
1085
LLVMValueRef offset = NULL;
1086
1087
if (var->data.compact && nir_src_is_const(instr->arr.index)) {
1088
assert(instr->deref_type == nir_deref_type_array);
1089
const_offset = nir_src_as_uint(instr->arr.index);
1090
goto out;
1091
}
1092
1093
for (; path.path[idx_lvl]; ++idx_lvl) {
1094
const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
1095
if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
1096
unsigned index = path.path[idx_lvl]->strct.index;
1097
1098
for (unsigned i = 0; i < index; i++) {
1099
const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
1100
const_offset += glsl_count_attribute_slots(ft, vs_in);
1101
}
1102
} else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
1103
unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
1104
if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
1105
const_offset += nir_src_comp_as_int(path.path[idx_lvl]->arr.index, 0) * size;
1106
} else {
1107
LLVMValueRef idx_src = get_src(bld_base, path.path[idx_lvl]->arr.index);
1108
idx_src = cast_type(bld_base, idx_src, nir_type_uint, 32);
1109
LLVMValueRef array_off = lp_build_mul(&bld_base->uint_bld, lp_build_const_int_vec(bld_base->base.gallivm, bld_base->base.type, size),
1110
idx_src);
1111
if (offset)
1112
offset = lp_build_add(&bld_base->uint_bld, offset, array_off);
1113
else
1114
offset = array_off;
1115
}
1116
} else
1117
unreachable("Uhandled deref type in get_deref_instr_offset");
1118
}
1119
1120
out:
1121
nir_deref_path_finish(&path);
1122
1123
if (const_offset && offset)
1124
offset = LLVMBuildAdd(builder, offset,
1125
lp_build_const_int_vec(bld_base->base.gallivm, bld_base->uint_bld.type, const_offset),
1126
"");
1127
*const_out = const_offset;
1128
*indir_out = offset;
1129
}
1130
1131
static void
1132
visit_load_input(struct lp_build_nir_context *bld_base,
1133
nir_intrinsic_instr *instr,
1134
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1135
{
1136
nir_variable var = {0};
1137
var.data.location = nir_intrinsic_io_semantics(instr).location;
1138
var.data.driver_location = nir_intrinsic_base(instr);
1139
var.data.location_frac = nir_intrinsic_component(instr);
1140
1141
unsigned nc = nir_dest_num_components(instr->dest);
1142
unsigned bit_size = nir_dest_bit_size(instr->dest);
1143
1144
nir_src offset = *nir_get_io_offset_src(instr);
1145
bool indirect = !nir_src_is_const(offset);
1146
if (!indirect)
1147
assert(nir_src_as_uint(offset) == 0);
1148
LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1149
1150
bld_base->load_var(bld_base, nir_var_shader_in, nc, bit_size, &var, 0, NULL, 0, indir_index, result);
1151
}
1152
1153
static void
1154
visit_store_output(struct lp_build_nir_context *bld_base,
1155
nir_intrinsic_instr *instr)
1156
{
1157
nir_variable var = {0};
1158
var.data.location = nir_intrinsic_io_semantics(instr).location;
1159
var.data.driver_location = nir_intrinsic_base(instr);
1160
var.data.location_frac = nir_intrinsic_component(instr);
1161
1162
unsigned mask = nir_intrinsic_write_mask(instr);
1163
1164
unsigned bit_size = nir_src_bit_size(instr->src[0]);
1165
LLVMValueRef src = get_src(bld_base, instr->src[0]);
1166
1167
nir_src offset = *nir_get_io_offset_src(instr);
1168
bool indirect = !nir_src_is_const(offset);
1169
if (!indirect)
1170
assert(nir_src_as_uint(offset) == 0);
1171
LLVMValueRef indir_index = indirect ? get_src(bld_base, offset) : NULL;
1172
1173
if (mask == 0x1 && LLVMGetTypeKind(LLVMTypeOf(src)) == LLVMArrayTypeKind) {
1174
src = LLVMBuildExtractValue(bld_base->base.gallivm->builder,
1175
src, 0, "");
1176
}
1177
1178
bld_base->store_var(bld_base, nir_var_shader_out, util_last_bit(mask),
1179
bit_size, &var, mask, NULL, 0, indir_index, src);
1180
}
1181
1182
static void visit_load_var(struct lp_build_nir_context *bld_base,
1183
nir_intrinsic_instr *instr,
1184
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1185
{
1186
nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1187
nir_variable *var = nir_deref_instr_get_variable(deref);
1188
assert(util_bitcount(deref->modes) == 1);
1189
nir_variable_mode mode = deref->modes;
1190
unsigned const_index;
1191
LLVMValueRef indir_index;
1192
LLVMValueRef indir_vertex_index = NULL;
1193
unsigned vertex_index = 0;
1194
unsigned nc = nir_dest_num_components(instr->dest);
1195
unsigned bit_size = nir_dest_bit_size(instr->dest);
1196
if (var) {
1197
bool vs_in = bld_base->shader->info.stage == MESA_SHADER_VERTEX &&
1198
var->data.mode == nir_var_shader_in;
1199
bool gs_in = bld_base->shader->info.stage == MESA_SHADER_GEOMETRY &&
1200
var->data.mode == nir_var_shader_in;
1201
bool tcs_in = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1202
var->data.mode == nir_var_shader_in;
1203
bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1204
var->data.mode == nir_var_shader_out && !var->data.patch;
1205
bool tes_in = bld_base->shader->info.stage == MESA_SHADER_TESS_EVAL &&
1206
var->data.mode == nir_var_shader_in && !var->data.patch;
1207
1208
mode = var->data.mode;
1209
1210
get_deref_offset(bld_base, deref, vs_in, gs_in ? &vertex_index : NULL, (tcs_in || tcs_out || tes_in) ? &indir_vertex_index : NULL,
1211
&const_index, &indir_index);
1212
}
1213
bld_base->load_var(bld_base, mode, nc, bit_size, var, vertex_index, indir_vertex_index, const_index, indir_index, result);
1214
}
1215
1216
static void
1217
visit_store_var(struct lp_build_nir_context *bld_base,
1218
nir_intrinsic_instr *instr)
1219
{
1220
nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1221
nir_variable *var = nir_deref_instr_get_variable(deref);
1222
assert(util_bitcount(deref->modes) == 1);
1223
nir_variable_mode mode = deref->modes;
1224
int writemask = instr->const_index[0];
1225
unsigned bit_size = nir_src_bit_size(instr->src[1]);
1226
LLVMValueRef src = get_src(bld_base, instr->src[1]);
1227
unsigned const_index = 0;
1228
LLVMValueRef indir_index, indir_vertex_index = NULL;
1229
if (var) {
1230
bool tcs_out = bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL &&
1231
var->data.mode == nir_var_shader_out && !var->data.patch;
1232
get_deref_offset(bld_base, deref, false, NULL, tcs_out ? &indir_vertex_index : NULL,
1233
&const_index, &indir_index);
1234
}
1235
bld_base->store_var(bld_base, mode, instr->num_components, bit_size, var, writemask, indir_vertex_index, const_index, indir_index, src);
1236
}
1237
1238
static void visit_load_ubo(struct lp_build_nir_context *bld_base,
1239
nir_intrinsic_instr *instr,
1240
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1241
{
1242
struct gallivm_state *gallivm = bld_base->base.gallivm;
1243
LLVMBuilderRef builder = gallivm->builder;
1244
LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1245
LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1246
1247
bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[1]);
1248
idx = LLVMBuildExtractElement(builder, idx, lp_build_const_int32(gallivm, 0), "");
1249
bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1250
offset_is_uniform, idx, offset, result);
1251
}
1252
1253
static void visit_load_push_constant(struct lp_build_nir_context *bld_base,
1254
nir_intrinsic_instr *instr,
1255
LLVMValueRef result[4])
1256
{
1257
struct gallivm_state *gallivm = bld_base->base.gallivm;
1258
LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1259
LLVMValueRef idx = lp_build_const_int32(gallivm, 0);
1260
bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
1261
1262
bld_base->load_ubo(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1263
offset_is_uniform, idx, offset, result);
1264
}
1265
1266
1267
static void visit_load_ssbo(struct lp_build_nir_context *bld_base,
1268
nir_intrinsic_instr *instr,
1269
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1270
{
1271
LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1272
LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1273
bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1274
idx, offset, result);
1275
}
1276
1277
static void visit_store_ssbo(struct lp_build_nir_context *bld_base,
1278
nir_intrinsic_instr *instr)
1279
{
1280
LLVMValueRef val = get_src(bld_base, instr->src[0]);
1281
LLVMValueRef idx = get_src(bld_base, instr->src[1]);
1282
LLVMValueRef offset = get_src(bld_base, instr->src[2]);
1283
int writemask = instr->const_index[0];
1284
int nc = nir_src_num_components(instr->src[0]);
1285
int bitsize = nir_src_bit_size(instr->src[0]);
1286
bld_base->store_mem(bld_base, writemask, nc, bitsize, idx, offset, val);
1287
}
1288
1289
static void visit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1290
nir_intrinsic_instr *instr,
1291
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1292
{
1293
LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1294
result[0] = bld_base->get_ssbo_size(bld_base, idx);
1295
}
1296
1297
static void visit_ssbo_atomic(struct lp_build_nir_context *bld_base,
1298
nir_intrinsic_instr *instr,
1299
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1300
{
1301
LLVMValueRef idx = get_src(bld_base, instr->src[0]);
1302
LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1303
LLVMValueRef val = get_src(bld_base, instr->src[2]);
1304
LLVMValueRef val2 = NULL;
1305
int bitsize = nir_src_bit_size(instr->src[2]);
1306
if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap)
1307
val2 = get_src(bld_base, instr->src[3]);
1308
1309
bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, idx, offset, val, val2, &result[0]);
1310
1311
}
1312
1313
static void visit_load_image(struct lp_build_nir_context *bld_base,
1314
nir_intrinsic_instr *instr,
1315
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1316
{
1317
struct gallivm_state *gallivm = bld_base->base.gallivm;
1318
LLVMBuilderRef builder = gallivm->builder;
1319
nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1320
nir_variable *var = nir_deref_instr_get_variable(deref);
1321
LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1322
LLVMValueRef coords[5];
1323
struct lp_img_params params;
1324
const struct glsl_type *type = glsl_without_array(var->type);
1325
unsigned const_index;
1326
LLVMValueRef indir_index;
1327
get_deref_offset(bld_base, deref, false, NULL, NULL,
1328
&const_index, &indir_index);
1329
1330
memset(&params, 0, sizeof(params));
1331
params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1332
for (unsigned i = 0; i < 4; i++)
1333
coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1334
if (params.target == PIPE_TEXTURE_1D_ARRAY)
1335
coords[2] = coords[1];
1336
1337
params.coords = coords;
1338
params.outdata = result;
1339
params.img_op = LP_IMG_LOAD;
1340
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS || glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS)
1341
params.ms_index = cast_type(bld_base, get_src(bld_base, instr->src[2]), nir_type_uint, 32);
1342
params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1343
params.image_index_offset = indir_index;
1344
bld_base->image_op(bld_base, &params);
1345
}
1346
1347
static void visit_store_image(struct lp_build_nir_context *bld_base,
1348
nir_intrinsic_instr *instr)
1349
{
1350
struct gallivm_state *gallivm = bld_base->base.gallivm;
1351
LLVMBuilderRef builder = gallivm->builder;
1352
nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1353
nir_variable *var = nir_deref_instr_get_variable(deref);
1354
LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1355
LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1356
LLVMValueRef coords[5];
1357
struct lp_img_params params;
1358
const struct glsl_type *type = glsl_without_array(var->type);
1359
unsigned const_index;
1360
LLVMValueRef indir_index;
1361
get_deref_offset(bld_base, deref, false, NULL, NULL,
1362
&const_index, &indir_index);
1363
1364
memset(&params, 0, sizeof(params));
1365
params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1366
for (unsigned i = 0; i < 4; i++)
1367
coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1368
if (params.target == PIPE_TEXTURE_1D_ARRAY)
1369
coords[2] = coords[1];
1370
params.coords = coords;
1371
1372
for (unsigned i = 0; i < 4; i++) {
1373
params.indata[i] = LLVMBuildExtractValue(builder, in_val, i, "");
1374
params.indata[i] = LLVMBuildBitCast(builder, params.indata[i], bld_base->base.vec_type, "");
1375
}
1376
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1377
params.ms_index = get_src(bld_base, instr->src[2]);
1378
params.img_op = LP_IMG_STORE;
1379
params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1380
params.image_index_offset = indir_index;
1381
1382
if (params.target == PIPE_TEXTURE_1D_ARRAY)
1383
coords[2] = coords[1];
1384
bld_base->image_op(bld_base, &params);
1385
}
1386
1387
static void visit_atomic_image(struct lp_build_nir_context *bld_base,
1388
nir_intrinsic_instr *instr,
1389
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1390
{
1391
struct gallivm_state *gallivm = bld_base->base.gallivm;
1392
LLVMBuilderRef builder = gallivm->builder;
1393
nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1394
nir_variable *var = nir_deref_instr_get_variable(deref);
1395
struct lp_img_params params;
1396
LLVMValueRef coord_val = get_src(bld_base, instr->src[1]);
1397
LLVMValueRef in_val = get_src(bld_base, instr->src[3]);
1398
LLVMValueRef coords[5];
1399
const struct glsl_type *type = glsl_without_array(var->type);
1400
unsigned const_index;
1401
LLVMValueRef indir_index;
1402
get_deref_offset(bld_base, deref, false, NULL, NULL,
1403
&const_index, &indir_index);
1404
1405
memset(&params, 0, sizeof(params));
1406
1407
switch (instr->intrinsic) {
1408
case nir_intrinsic_image_deref_atomic_add:
1409
params.op = LLVMAtomicRMWBinOpAdd;
1410
break;
1411
case nir_intrinsic_image_deref_atomic_exchange:
1412
params.op = LLVMAtomicRMWBinOpXchg;
1413
break;
1414
case nir_intrinsic_image_deref_atomic_and:
1415
params.op = LLVMAtomicRMWBinOpAnd;
1416
break;
1417
case nir_intrinsic_image_deref_atomic_or:
1418
params.op = LLVMAtomicRMWBinOpOr;
1419
break;
1420
case nir_intrinsic_image_deref_atomic_xor:
1421
params.op = LLVMAtomicRMWBinOpXor;
1422
break;
1423
case nir_intrinsic_image_deref_atomic_umin:
1424
params.op = LLVMAtomicRMWBinOpUMin;
1425
break;
1426
case nir_intrinsic_image_deref_atomic_umax:
1427
params.op = LLVMAtomicRMWBinOpUMax;
1428
break;
1429
case nir_intrinsic_image_deref_atomic_imin:
1430
params.op = LLVMAtomicRMWBinOpMin;
1431
break;
1432
case nir_intrinsic_image_deref_atomic_imax:
1433
params.op = LLVMAtomicRMWBinOpMax;
1434
break;
1435
default:
1436
break;
1437
}
1438
1439
params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1440
for (unsigned i = 0; i < 4; i++)
1441
coords[i] = LLVMBuildExtractValue(builder, coord_val, i, "");
1442
if (params.target == PIPE_TEXTURE_1D_ARRAY)
1443
coords[2] = coords[1];
1444
params.coords = coords;
1445
if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_MS)
1446
params.ms_index = get_src(bld_base, instr->src[2]);
1447
if (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) {
1448
LLVMValueRef cas_val = get_src(bld_base, instr->src[4]);
1449
params.indata[0] = in_val;
1450
params.indata2[0] = cas_val;
1451
} else
1452
params.indata[0] = in_val;
1453
1454
params.outdata = result;
1455
params.img_op = (instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
1456
params.image_index = var->data.binding + (indir_index ? 0 : const_index);
1457
params.image_index_offset = indir_index;
1458
1459
bld_base->image_op(bld_base, &params);
1460
}
1461
1462
1463
static void visit_image_size(struct lp_build_nir_context *bld_base,
1464
nir_intrinsic_instr *instr,
1465
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1466
{
1467
nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1468
nir_variable *var = nir_deref_instr_get_variable(deref);
1469
struct lp_sampler_size_query_params params = { 0 };
1470
unsigned const_index;
1471
LLVMValueRef indir_index;
1472
const struct glsl_type *type = glsl_without_array(var->type);
1473
get_deref_offset(bld_base, deref, false, NULL, NULL,
1474
&const_index, &indir_index);
1475
params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1476
params.texture_unit_offset = indir_index;
1477
params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1478
params.sizes_out = result;
1479
1480
bld_base->image_size(bld_base, &params);
1481
}
1482
1483
static void visit_image_samples(struct lp_build_nir_context *bld_base,
1484
nir_intrinsic_instr *instr,
1485
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1486
{
1487
nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1488
nir_variable *var = nir_deref_instr_get_variable(deref);
1489
struct lp_sampler_size_query_params params = { 0 };
1490
unsigned const_index;
1491
LLVMValueRef indir_index;
1492
const struct glsl_type *type = glsl_without_array(var->type);
1493
get_deref_offset(bld_base, deref, false, NULL, NULL,
1494
&const_index, &indir_index);
1495
1496
params.texture_unit = var->data.binding + (indir_index ? 0 : const_index);
1497
params.texture_unit_offset = indir_index;
1498
params.target = glsl_sampler_to_pipe(glsl_get_sampler_dim(type), glsl_sampler_type_is_array(type));
1499
params.sizes_out = result;
1500
params.samples_only = true;
1501
1502
bld_base->image_size(bld_base, &params);
1503
}
1504
1505
static void visit_shared_load(struct lp_build_nir_context *bld_base,
1506
nir_intrinsic_instr *instr,
1507
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1508
{
1509
LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1510
bld_base->load_mem(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1511
NULL, offset, result);
1512
}
1513
1514
static void visit_shared_store(struct lp_build_nir_context *bld_base,
1515
nir_intrinsic_instr *instr)
1516
{
1517
LLVMValueRef val = get_src(bld_base, instr->src[0]);
1518
LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1519
int writemask = instr->const_index[1];
1520
int nc = nir_src_num_components(instr->src[0]);
1521
int bitsize = nir_src_bit_size(instr->src[0]);
1522
bld_base->store_mem(bld_base, writemask, nc, bitsize, NULL, offset, val);
1523
}
1524
1525
static void visit_shared_atomic(struct lp_build_nir_context *bld_base,
1526
nir_intrinsic_instr *instr,
1527
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1528
{
1529
LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1530
LLVMValueRef val = get_src(bld_base, instr->src[1]);
1531
LLVMValueRef val2 = NULL;
1532
int bitsize = nir_src_bit_size(instr->src[1]);
1533
if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap)
1534
val2 = get_src(bld_base, instr->src[2]);
1535
1536
bld_base->atomic_mem(bld_base, instr->intrinsic, bitsize, NULL, offset, val, val2, &result[0]);
1537
1538
}
1539
1540
static void visit_barrier(struct lp_build_nir_context *bld_base)
1541
{
1542
bld_base->barrier(bld_base);
1543
}
1544
1545
static void visit_discard(struct lp_build_nir_context *bld_base,
1546
nir_intrinsic_instr *instr)
1547
{
1548
LLVMValueRef cond = NULL;
1549
if (instr->intrinsic == nir_intrinsic_discard_if) {
1550
cond = get_src(bld_base, instr->src[0]);
1551
cond = cast_type(bld_base, cond, nir_type_int, 32);
1552
}
1553
bld_base->discard(bld_base, cond);
1554
}
1555
1556
static void visit_load_kernel_input(struct lp_build_nir_context *bld_base,
1557
nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1558
{
1559
LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1560
1561
bool offset_is_uniform = nir_src_is_dynamically_uniform(instr->src[0]);
1562
bld_base->load_kernel_arg(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1563
nir_src_bit_size(instr->src[0]),
1564
offset_is_uniform, offset, result);
1565
}
1566
1567
static void visit_load_global(struct lp_build_nir_context *bld_base,
1568
nir_intrinsic_instr *instr, LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1569
{
1570
LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1571
bld_base->load_global(bld_base, nir_dest_num_components(instr->dest), nir_dest_bit_size(instr->dest),
1572
nir_src_bit_size(instr->src[0]),
1573
addr, result);
1574
}
1575
1576
static void visit_store_global(struct lp_build_nir_context *bld_base,
1577
nir_intrinsic_instr *instr)
1578
{
1579
LLVMValueRef val = get_src(bld_base, instr->src[0]);
1580
int nc = nir_src_num_components(instr->src[0]);
1581
int bitsize = nir_src_bit_size(instr->src[0]);
1582
LLVMValueRef addr = get_src(bld_base, instr->src[1]);
1583
int addr_bitsize = nir_src_bit_size(instr->src[1]);
1584
int writemask = instr->const_index[0];
1585
bld_base->store_global(bld_base, writemask, nc, bitsize, addr_bitsize, addr, val);
1586
}
1587
1588
static void visit_global_atomic(struct lp_build_nir_context *bld_base,
1589
nir_intrinsic_instr *instr,
1590
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1591
{
1592
LLVMValueRef addr = get_src(bld_base, instr->src[0]);
1593
LLVMValueRef val = get_src(bld_base, instr->src[1]);
1594
LLVMValueRef val2 = NULL;
1595
int addr_bitsize = nir_src_bit_size(instr->src[0]);
1596
int val_bitsize = nir_src_bit_size(instr->src[1]);
1597
if (instr->intrinsic == nir_intrinsic_global_atomic_comp_swap)
1598
val2 = get_src(bld_base, instr->src[2]);
1599
1600
bld_base->atomic_global(bld_base, instr->intrinsic, addr_bitsize,
1601
val_bitsize, addr, val, val2, &result[0]);
1602
}
1603
1604
static void visit_interp(struct lp_build_nir_context *bld_base,
1605
nir_intrinsic_instr *instr,
1606
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1607
{
1608
struct gallivm_state *gallivm = bld_base->base.gallivm;
1609
LLVMBuilderRef builder = gallivm->builder;
1610
nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1611
unsigned num_components = nir_dest_num_components(instr->dest);
1612
nir_variable *var = nir_deref_instr_get_variable(deref);
1613
unsigned const_index;
1614
LLVMValueRef indir_index;
1615
LLVMValueRef offsets[2] = { NULL, NULL };
1616
get_deref_offset(bld_base, deref, false, NULL, NULL,
1617
&const_index, &indir_index);
1618
bool centroid = instr->intrinsic == nir_intrinsic_interp_deref_at_centroid;
1619
bool sample = false;
1620
if (instr->intrinsic == nir_intrinsic_interp_deref_at_offset) {
1621
for (unsigned i = 0; i < 2; i++) {
1622
offsets[i] = LLVMBuildExtractValue(builder, get_src(bld_base, instr->src[1]), i, "");
1623
offsets[i] = cast_type(bld_base, offsets[i], nir_type_float, 32);
1624
}
1625
} else if (instr->intrinsic == nir_intrinsic_interp_deref_at_sample) {
1626
offsets[0] = get_src(bld_base, instr->src[1]);
1627
offsets[0] = cast_type(bld_base, offsets[0], nir_type_int, 32);
1628
sample = true;
1629
}
1630
bld_base->interp_at(bld_base, num_components, var, centroid, sample, const_index, indir_index, offsets, result);
1631
}
1632
1633
static void visit_load_scratch(struct lp_build_nir_context *bld_base,
1634
nir_intrinsic_instr *instr,
1635
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1636
{
1637
LLVMValueRef offset = get_src(bld_base, instr->src[0]);
1638
1639
bld_base->load_scratch(bld_base, nir_dest_num_components(instr->dest),
1640
nir_dest_bit_size(instr->dest), offset, result);
1641
}
1642
1643
static void visit_store_scratch(struct lp_build_nir_context *bld_base,
1644
nir_intrinsic_instr *instr)
1645
{
1646
LLVMValueRef val = get_src(bld_base, instr->src[0]);
1647
LLVMValueRef offset = get_src(bld_base, instr->src[1]);
1648
int writemask = instr->const_index[2];
1649
int nc = nir_src_num_components(instr->src[0]);
1650
int bitsize = nir_src_bit_size(instr->src[0]);
1651
bld_base->store_scratch(bld_base, writemask, nc, bitsize, offset, val);
1652
}
1653
1654
1655
static void visit_intrinsic(struct lp_build_nir_context *bld_base,
1656
nir_intrinsic_instr *instr)
1657
{
1658
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS] = {0};
1659
switch (instr->intrinsic) {
1660
case nir_intrinsic_load_input:
1661
visit_load_input(bld_base, instr, result);
1662
break;
1663
case nir_intrinsic_store_output:
1664
visit_store_output(bld_base, instr);
1665
break;
1666
case nir_intrinsic_load_deref:
1667
visit_load_var(bld_base, instr, result);
1668
break;
1669
case nir_intrinsic_store_deref:
1670
visit_store_var(bld_base, instr);
1671
break;
1672
case nir_intrinsic_load_ubo:
1673
visit_load_ubo(bld_base, instr, result);
1674
break;
1675
case nir_intrinsic_load_push_constant:
1676
visit_load_push_constant(bld_base, instr, result);
1677
break;
1678
case nir_intrinsic_load_ssbo:
1679
visit_load_ssbo(bld_base, instr, result);
1680
break;
1681
case nir_intrinsic_store_ssbo:
1682
visit_store_ssbo(bld_base, instr);
1683
break;
1684
case nir_intrinsic_get_ssbo_size:
1685
visit_get_ssbo_size(bld_base, instr, result);
1686
break;
1687
case nir_intrinsic_load_vertex_id:
1688
case nir_intrinsic_load_primitive_id:
1689
case nir_intrinsic_load_instance_id:
1690
case nir_intrinsic_load_base_instance:
1691
case nir_intrinsic_load_base_vertex:
1692
case nir_intrinsic_load_first_vertex:
1693
case nir_intrinsic_load_workgroup_id:
1694
case nir_intrinsic_load_local_invocation_id:
1695
case nir_intrinsic_load_num_workgroups:
1696
case nir_intrinsic_load_invocation_id:
1697
case nir_intrinsic_load_front_face:
1698
case nir_intrinsic_load_draw_id:
1699
case nir_intrinsic_load_workgroup_size:
1700
case nir_intrinsic_load_work_dim:
1701
case nir_intrinsic_load_tess_coord:
1702
case nir_intrinsic_load_tess_level_outer:
1703
case nir_intrinsic_load_tess_level_inner:
1704
case nir_intrinsic_load_patch_vertices_in:
1705
case nir_intrinsic_load_sample_id:
1706
case nir_intrinsic_load_sample_pos:
1707
case nir_intrinsic_load_sample_mask_in:
1708
case nir_intrinsic_load_view_index:
1709
case nir_intrinsic_load_subgroup_invocation:
1710
case nir_intrinsic_load_subgroup_id:
1711
case nir_intrinsic_load_num_subgroups:
1712
bld_base->sysval_intrin(bld_base, instr, result);
1713
break;
1714
case nir_intrinsic_load_helper_invocation:
1715
bld_base->helper_invocation(bld_base, &result[0]);
1716
break;
1717
case nir_intrinsic_discard_if:
1718
case nir_intrinsic_discard:
1719
visit_discard(bld_base, instr);
1720
break;
1721
case nir_intrinsic_emit_vertex:
1722
bld_base->emit_vertex(bld_base, nir_intrinsic_stream_id(instr));
1723
break;
1724
case nir_intrinsic_end_primitive:
1725
bld_base->end_primitive(bld_base, nir_intrinsic_stream_id(instr));
1726
break;
1727
case nir_intrinsic_ssbo_atomic_add:
1728
case nir_intrinsic_ssbo_atomic_imin:
1729
case nir_intrinsic_ssbo_atomic_imax:
1730
case nir_intrinsic_ssbo_atomic_umin:
1731
case nir_intrinsic_ssbo_atomic_umax:
1732
case nir_intrinsic_ssbo_atomic_and:
1733
case nir_intrinsic_ssbo_atomic_or:
1734
case nir_intrinsic_ssbo_atomic_xor:
1735
case nir_intrinsic_ssbo_atomic_exchange:
1736
case nir_intrinsic_ssbo_atomic_comp_swap:
1737
visit_ssbo_atomic(bld_base, instr, result);
1738
break;
1739
case nir_intrinsic_image_deref_load:
1740
visit_load_image(bld_base, instr, result);
1741
break;
1742
case nir_intrinsic_image_deref_store:
1743
visit_store_image(bld_base, instr);
1744
break;
1745
case nir_intrinsic_image_deref_atomic_add:
1746
case nir_intrinsic_image_deref_atomic_imin:
1747
case nir_intrinsic_image_deref_atomic_imax:
1748
case nir_intrinsic_image_deref_atomic_umin:
1749
case nir_intrinsic_image_deref_atomic_umax:
1750
case nir_intrinsic_image_deref_atomic_and:
1751
case nir_intrinsic_image_deref_atomic_or:
1752
case nir_intrinsic_image_deref_atomic_xor:
1753
case nir_intrinsic_image_deref_atomic_exchange:
1754
case nir_intrinsic_image_deref_atomic_comp_swap:
1755
visit_atomic_image(bld_base, instr, result);
1756
break;
1757
case nir_intrinsic_image_deref_size:
1758
visit_image_size(bld_base, instr, result);
1759
break;
1760
case nir_intrinsic_image_deref_samples:
1761
visit_image_samples(bld_base, instr, result);
1762
break;
1763
case nir_intrinsic_load_shared:
1764
visit_shared_load(bld_base, instr, result);
1765
break;
1766
case nir_intrinsic_store_shared:
1767
visit_shared_store(bld_base, instr);
1768
break;
1769
case nir_intrinsic_shared_atomic_add:
1770
case nir_intrinsic_shared_atomic_imin:
1771
case nir_intrinsic_shared_atomic_umin:
1772
case nir_intrinsic_shared_atomic_imax:
1773
case nir_intrinsic_shared_atomic_umax:
1774
case nir_intrinsic_shared_atomic_and:
1775
case nir_intrinsic_shared_atomic_or:
1776
case nir_intrinsic_shared_atomic_xor:
1777
case nir_intrinsic_shared_atomic_exchange:
1778
case nir_intrinsic_shared_atomic_comp_swap:
1779
visit_shared_atomic(bld_base, instr, result);
1780
break;
1781
case nir_intrinsic_control_barrier:
1782
visit_barrier(bld_base);
1783
break;
1784
case nir_intrinsic_group_memory_barrier:
1785
case nir_intrinsic_memory_barrier:
1786
case nir_intrinsic_memory_barrier_shared:
1787
case nir_intrinsic_memory_barrier_buffer:
1788
case nir_intrinsic_memory_barrier_image:
1789
case nir_intrinsic_memory_barrier_tcs_patch:
1790
break;
1791
case nir_intrinsic_load_kernel_input:
1792
visit_load_kernel_input(bld_base, instr, result);
1793
break;
1794
case nir_intrinsic_load_global:
1795
case nir_intrinsic_load_global_constant:
1796
visit_load_global(bld_base, instr, result);
1797
break;
1798
case nir_intrinsic_store_global:
1799
visit_store_global(bld_base, instr);
1800
break;
1801
case nir_intrinsic_global_atomic_add:
1802
case nir_intrinsic_global_atomic_imin:
1803
case nir_intrinsic_global_atomic_umin:
1804
case nir_intrinsic_global_atomic_imax:
1805
case nir_intrinsic_global_atomic_umax:
1806
case nir_intrinsic_global_atomic_and:
1807
case nir_intrinsic_global_atomic_or:
1808
case nir_intrinsic_global_atomic_xor:
1809
case nir_intrinsic_global_atomic_exchange:
1810
case nir_intrinsic_global_atomic_comp_swap:
1811
visit_global_atomic(bld_base, instr, result);
1812
break;
1813
case nir_intrinsic_vote_all:
1814
case nir_intrinsic_vote_any:
1815
case nir_intrinsic_vote_ieq:
1816
case nir_intrinsic_vote_feq:
1817
bld_base->vote(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
1818
break;
1819
case nir_intrinsic_elect:
1820
bld_base->elect(bld_base, result);
1821
break;
1822
case nir_intrinsic_reduce:
1823
case nir_intrinsic_inclusive_scan:
1824
case nir_intrinsic_exclusive_scan:
1825
bld_base->reduce(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, nir_src_bit_size(instr->src[0])), instr, result);
1826
break;
1827
case nir_intrinsic_ballot:
1828
bld_base->ballot(bld_base, cast_type(bld_base, get_src(bld_base, instr->src[0]), nir_type_int, 32), instr, result);
1829
break;
1830
case nir_intrinsic_read_invocation:
1831
case nir_intrinsic_read_first_invocation: {
1832
LLVMValueRef src1 = NULL;
1833
1834
if (instr->intrinsic == nir_intrinsic_read_invocation)
1835
src1 = cast_type(bld_base, get_src(bld_base, instr->src[1]), nir_type_int, 32);
1836
bld_base->read_invocation(bld_base, get_src(bld_base, instr->src[0]), nir_src_bit_size(instr->src[0]), src1, result);
1837
break;
1838
}
1839
case nir_intrinsic_interp_deref_at_offset:
1840
case nir_intrinsic_interp_deref_at_centroid:
1841
case nir_intrinsic_interp_deref_at_sample:
1842
visit_interp(bld_base, instr, result);
1843
break;
1844
case nir_intrinsic_load_scratch:
1845
visit_load_scratch(bld_base, instr, result);
1846
break;
1847
case nir_intrinsic_store_scratch:
1848
visit_store_scratch(bld_base, instr);
1849
break;
1850
default:
1851
fprintf(stderr, "Unsupported intrinsic: ");
1852
nir_print_instr(&instr->instr, stderr);
1853
fprintf(stderr, "\n");
1854
assert(0);
1855
break;
1856
}
1857
if (result[0]) {
1858
assign_dest(bld_base, &instr->dest, result);
1859
}
1860
}
1861
1862
static void visit_txs(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
1863
{
1864
struct lp_sampler_size_query_params params = { 0 };
1865
LLVMValueRef sizes_out[NIR_MAX_VEC_COMPONENTS];
1866
LLVMValueRef explicit_lod = NULL;
1867
LLVMValueRef texture_unit_offset = NULL;
1868
for (unsigned i = 0; i < instr->num_srcs; i++) {
1869
switch (instr->src[i].src_type) {
1870
case nir_tex_src_lod:
1871
explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
1872
break;
1873
case nir_tex_src_texture_offset:
1874
texture_unit_offset = get_src(bld_base, instr->src[i].src);
1875
break;
1876
default:
1877
break;
1878
}
1879
}
1880
1881
params.target = glsl_sampler_to_pipe(instr->sampler_dim, instr->is_array);
1882
params.texture_unit = instr->texture_index;
1883
params.explicit_lod = explicit_lod;
1884
params.is_sviewinfo = TRUE;
1885
params.sizes_out = sizes_out;
1886
params.samples_only = (instr->op == nir_texop_texture_samples);
1887
params.texture_unit_offset = texture_unit_offset;
1888
1889
if (instr->op == nir_texop_query_levels)
1890
params.explicit_lod = bld_base->uint_bld.zero;
1891
bld_base->tex_size(bld_base, &params);
1892
assign_dest(bld_base, &instr->dest, &sizes_out[instr->op == nir_texop_query_levels ? 3 : 0]);
1893
}
1894
1895
static enum lp_sampler_lod_property lp_build_nir_lod_property(struct lp_build_nir_context *bld_base,
1896
nir_src lod_src)
1897
{
1898
enum lp_sampler_lod_property lod_property;
1899
1900
if (nir_src_is_dynamically_uniform(lod_src))
1901
lod_property = LP_SAMPLER_LOD_SCALAR;
1902
else if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1903
if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
1904
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1905
else
1906
lod_property = LP_SAMPLER_LOD_PER_QUAD;
1907
}
1908
else
1909
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
1910
return lod_property;
1911
}
1912
1913
static void visit_tex(struct lp_build_nir_context *bld_base, nir_tex_instr *instr)
1914
{
1915
struct gallivm_state *gallivm = bld_base->base.gallivm;
1916
LLVMBuilderRef builder = gallivm->builder;
1917
LLVMValueRef coords[5];
1918
LLVMValueRef offsets[3] = { NULL };
1919
LLVMValueRef explicit_lod = NULL, projector = NULL, ms_index = NULL;
1920
struct lp_sampler_params params;
1921
struct lp_derivatives derivs;
1922
unsigned sample_key = 0;
1923
nir_deref_instr *texture_deref_instr = NULL;
1924
nir_deref_instr *sampler_deref_instr = NULL;
1925
LLVMValueRef texture_unit_offset = NULL;
1926
LLVMValueRef texel[NIR_MAX_VEC_COMPONENTS];
1927
unsigned lod_src = 0;
1928
LLVMValueRef coord_undef = LLVMGetUndef(bld_base->base.int_vec_type);
1929
1930
memset(&params, 0, sizeof(params));
1931
enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
1932
1933
if (instr->op == nir_texop_txs || instr->op == nir_texop_query_levels || instr->op == nir_texop_texture_samples) {
1934
visit_txs(bld_base, instr);
1935
return;
1936
}
1937
if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
1938
sample_key |= LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
1939
else if (instr->op == nir_texop_tg4) {
1940
sample_key |= LP_SAMPLER_OP_GATHER << LP_SAMPLER_OP_TYPE_SHIFT;
1941
sample_key |= (instr->component << LP_SAMPLER_GATHER_COMP_SHIFT);
1942
} else if (instr->op == nir_texop_lod)
1943
sample_key |= LP_SAMPLER_OP_LODQ << LP_SAMPLER_OP_TYPE_SHIFT;
1944
for (unsigned i = 0; i < instr->num_srcs; i++) {
1945
switch (instr->src[i].src_type) {
1946
case nir_tex_src_coord: {
1947
LLVMValueRef coord = get_src(bld_base, instr->src[i].src);
1948
if (instr->coord_components == 1)
1949
coords[0] = coord;
1950
else {
1951
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
1952
coords[chan] = LLVMBuildExtractValue(builder, coord,
1953
chan, "");
1954
}
1955
for (unsigned chan = instr->coord_components; chan < 5; chan++)
1956
coords[chan] = coord_undef;
1957
1958
break;
1959
}
1960
case nir_tex_src_texture_deref:
1961
texture_deref_instr = nir_src_as_deref(instr->src[i].src);
1962
break;
1963
case nir_tex_src_sampler_deref:
1964
sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
1965
break;
1966
case nir_tex_src_projector:
1967
projector = lp_build_rcp(&bld_base->base, cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32));
1968
break;
1969
case nir_tex_src_comparator:
1970
sample_key |= LP_SAMPLER_SHADOW;
1971
coords[4] = get_src(bld_base, instr->src[i].src);
1972
coords[4] = cast_type(bld_base, coords[4], nir_type_float, 32);
1973
break;
1974
case nir_tex_src_bias:
1975
sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
1976
lod_src = i;
1977
explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
1978
break;
1979
case nir_tex_src_lod:
1980
sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
1981
lod_src = i;
1982
if (instr->op == nir_texop_txf)
1983
explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
1984
else
1985
explicit_lod = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_float, 32);
1986
break;
1987
case nir_tex_src_ddx: {
1988
int deriv_cnt = instr->coord_components;
1989
if (instr->is_array)
1990
deriv_cnt--;
1991
LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
1992
if (deriv_cnt == 1)
1993
derivs.ddx[0] = deriv_val;
1994
else
1995
for (unsigned chan = 0; chan < deriv_cnt; ++chan)
1996
derivs.ddx[chan] = LLVMBuildExtractValue(builder, deriv_val,
1997
chan, "");
1998
for (unsigned chan = 0; chan < deriv_cnt; ++chan)
1999
derivs.ddx[chan] = cast_type(bld_base, derivs.ddx[chan], nir_type_float, 32);
2000
break;
2001
}
2002
case nir_tex_src_ddy: {
2003
int deriv_cnt = instr->coord_components;
2004
if (instr->is_array)
2005
deriv_cnt--;
2006
LLVMValueRef deriv_val = get_src(bld_base, instr->src[i].src);
2007
if (deriv_cnt == 1)
2008
derivs.ddy[0] = deriv_val;
2009
else
2010
for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2011
derivs.ddy[chan] = LLVMBuildExtractValue(builder, deriv_val,
2012
chan, "");
2013
for (unsigned chan = 0; chan < deriv_cnt; ++chan)
2014
derivs.ddy[chan] = cast_type(bld_base, derivs.ddy[chan], nir_type_float, 32);
2015
break;
2016
}
2017
case nir_tex_src_offset: {
2018
int offset_cnt = instr->coord_components;
2019
if (instr->is_array)
2020
offset_cnt--;
2021
LLVMValueRef offset_val = get_src(bld_base, instr->src[i].src);
2022
sample_key |= LP_SAMPLER_OFFSETS;
2023
if (offset_cnt == 1)
2024
offsets[0] = cast_type(bld_base, offset_val, nir_type_int, 32);
2025
else {
2026
for (unsigned chan = 0; chan < offset_cnt; ++chan) {
2027
offsets[chan] = LLVMBuildExtractValue(builder, offset_val,
2028
chan, "");
2029
offsets[chan] = cast_type(bld_base, offsets[chan], nir_type_int, 32);
2030
}
2031
}
2032
break;
2033
}
2034
case nir_tex_src_ms_index:
2035
sample_key |= LP_SAMPLER_FETCH_MS;
2036
ms_index = cast_type(bld_base, get_src(bld_base, instr->src[i].src), nir_type_int, 32);
2037
break;
2038
2039
case nir_tex_src_texture_offset:
2040
texture_unit_offset = get_src(bld_base, instr->src[i].src);
2041
break;
2042
case nir_tex_src_sampler_offset:
2043
break;
2044
default:
2045
assert(0);
2046
break;
2047
}
2048
}
2049
if (!sampler_deref_instr)
2050
sampler_deref_instr = texture_deref_instr;
2051
2052
if (explicit_lod)
2053
lod_property = lp_build_nir_lod_property(bld_base, instr->src[lod_src].src);
2054
2055
if (instr->op == nir_texop_tex || instr->op == nir_texop_tg4 || instr->op == nir_texop_txb ||
2056
instr->op == nir_texop_txl || instr->op == nir_texop_txd || instr->op == nir_texop_lod)
2057
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2058
coords[chan] = cast_type(bld_base, coords[chan], nir_type_float, 32);
2059
else if (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)
2060
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2061
coords[chan] = cast_type(bld_base, coords[chan], nir_type_int, 32);
2062
2063
if (instr->is_array && instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
2064
/* move layer coord for 1d arrays. */
2065
coords[2] = coords[1];
2066
coords[1] = coord_undef;
2067
}
2068
2069
if (projector) {
2070
for (unsigned chan = 0; chan < instr->coord_components; ++chan)
2071
coords[chan] = lp_build_mul(&bld_base->base, coords[chan], projector);
2072
if (sample_key & LP_SAMPLER_SHADOW)
2073
coords[4] = lp_build_mul(&bld_base->base, coords[4], projector);
2074
}
2075
2076
uint32_t samp_base_index = 0, tex_base_index = 0;
2077
if (!sampler_deref_instr) {
2078
int samp_src_index = nir_tex_instr_src_index(instr, nir_tex_src_sampler_handle);
2079
if (samp_src_index == -1) {
2080
samp_base_index = instr->sampler_index;
2081
}
2082
}
2083
if (!texture_deref_instr) {
2084
int tex_src_index = nir_tex_instr_src_index(instr, nir_tex_src_texture_handle);
2085
if (tex_src_index == -1) {
2086
tex_base_index = instr->texture_index;
2087
}
2088
}
2089
2090
if (instr->op == nir_texop_txd) {
2091
sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2092
params.derivs = &derivs;
2093
if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
2094
if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD)
2095
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2096
else
2097
lod_property = LP_SAMPLER_LOD_PER_QUAD;
2098
} else
2099
lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2100
}
2101
2102
sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2103
params.sample_key = sample_key;
2104
params.offsets = offsets;
2105
params.texture_index = tex_base_index;
2106
params.texture_index_offset = texture_unit_offset;
2107
params.sampler_index = samp_base_index;
2108
params.coords = coords;
2109
params.texel = texel;
2110
params.lod = explicit_lod;
2111
params.ms_index = ms_index;
2112
bld_base->tex(bld_base, &params);
2113
assign_dest(bld_base, &instr->dest, texel);
2114
}
2115
2116
static void visit_ssa_undef(struct lp_build_nir_context *bld_base,
2117
const nir_ssa_undef_instr *instr)
2118
{
2119
unsigned num_components = instr->def.num_components;
2120
LLVMValueRef undef[NIR_MAX_VEC_COMPONENTS];
2121
struct lp_build_context *undef_bld = get_int_bld(bld_base, true, instr->def.bit_size);
2122
for (unsigned i = 0; i < num_components; i++)
2123
undef[i] = LLVMGetUndef(undef_bld->vec_type);
2124
memset(&undef[num_components], 0, NIR_MAX_VEC_COMPONENTS - num_components);
2125
assign_ssa_dest(bld_base, &instr->def, undef);
2126
}
2127
2128
static void visit_jump(struct lp_build_nir_context *bld_base,
2129
const nir_jump_instr *instr)
2130
{
2131
switch (instr->type) {
2132
case nir_jump_break:
2133
bld_base->break_stmt(bld_base);
2134
break;
2135
case nir_jump_continue:
2136
bld_base->continue_stmt(bld_base);
2137
break;
2138
default:
2139
unreachable("Unknown jump instr\n");
2140
}
2141
}
2142
2143
static void visit_deref(struct lp_build_nir_context *bld_base,
2144
nir_deref_instr *instr)
2145
{
2146
if (!nir_deref_mode_is_one_of(instr, nir_var_mem_shared |
2147
nir_var_mem_global))
2148
return;
2149
LLVMValueRef result = NULL;
2150
switch(instr->deref_type) {
2151
case nir_deref_type_var: {
2152
struct hash_entry *entry = _mesa_hash_table_search(bld_base->vars, instr->var);
2153
result = entry->data;
2154
break;
2155
}
2156
default:
2157
unreachable("Unhandled deref_instr deref type");
2158
}
2159
2160
assign_ssa(bld_base, instr->dest.ssa.index, result);
2161
}
2162
2163
static void visit_block(struct lp_build_nir_context *bld_base, nir_block *block)
2164
{
2165
nir_foreach_instr(instr, block)
2166
{
2167
switch (instr->type) {
2168
case nir_instr_type_alu:
2169
visit_alu(bld_base, nir_instr_as_alu(instr));
2170
break;
2171
case nir_instr_type_load_const:
2172
visit_load_const(bld_base, nir_instr_as_load_const(instr));
2173
break;
2174
case nir_instr_type_intrinsic:
2175
visit_intrinsic(bld_base, nir_instr_as_intrinsic(instr));
2176
break;
2177
case nir_instr_type_tex:
2178
visit_tex(bld_base, nir_instr_as_tex(instr));
2179
break;
2180
case nir_instr_type_phi:
2181
assert(0);
2182
break;
2183
case nir_instr_type_ssa_undef:
2184
visit_ssa_undef(bld_base, nir_instr_as_ssa_undef(instr));
2185
break;
2186
case nir_instr_type_jump:
2187
visit_jump(bld_base, nir_instr_as_jump(instr));
2188
break;
2189
case nir_instr_type_deref:
2190
visit_deref(bld_base, nir_instr_as_deref(instr));
2191
break;
2192
default:
2193
fprintf(stderr, "Unknown NIR instr type: ");
2194
nir_print_instr(instr, stderr);
2195
fprintf(stderr, "\n");
2196
abort();
2197
}
2198
}
2199
}
2200
2201
static void visit_if(struct lp_build_nir_context *bld_base, nir_if *if_stmt)
2202
{
2203
LLVMValueRef cond = get_src(bld_base, if_stmt->condition);
2204
2205
bld_base->if_cond(bld_base, cond);
2206
visit_cf_list(bld_base, &if_stmt->then_list);
2207
2208
if (!exec_list_is_empty(&if_stmt->else_list)) {
2209
bld_base->else_stmt(bld_base);
2210
visit_cf_list(bld_base, &if_stmt->else_list);
2211
}
2212
bld_base->endif_stmt(bld_base);
2213
}
2214
2215
static void visit_loop(struct lp_build_nir_context *bld_base, nir_loop *loop)
2216
{
2217
bld_base->bgnloop(bld_base);
2218
visit_cf_list(bld_base, &loop->body);
2219
bld_base->endloop(bld_base);
2220
}
2221
2222
static void visit_cf_list(struct lp_build_nir_context *bld_base,
2223
struct exec_list *list)
2224
{
2225
foreach_list_typed(nir_cf_node, node, node, list)
2226
{
2227
switch (node->type) {
2228
case nir_cf_node_block:
2229
visit_block(bld_base, nir_cf_node_as_block(node));
2230
break;
2231
2232
case nir_cf_node_if:
2233
visit_if(bld_base, nir_cf_node_as_if(node));
2234
break;
2235
2236
case nir_cf_node_loop:
2237
visit_loop(bld_base, nir_cf_node_as_loop(node));
2238
break;
2239
2240
default:
2241
assert(0);
2242
}
2243
}
2244
}
2245
2246
static void
2247
handle_shader_output_decl(struct lp_build_nir_context *bld_base,
2248
struct nir_shader *nir,
2249
struct nir_variable *variable)
2250
{
2251
bld_base->emit_var_decl(bld_base, variable);
2252
}
2253
2254
/* vector registers are stored as arrays in LLVM side,
2255
so we can use GEP on them, as to do exec mask stores
2256
we need to operate on a single components.
2257
arrays are:
2258
0.x, 1.x, 2.x, 3.x
2259
0.y, 1.y, 2.y, 3.y
2260
....
2261
*/
2262
static LLVMTypeRef get_register_type(struct lp_build_nir_context *bld_base,
2263
nir_register *reg)
2264
{
2265
struct lp_build_context *int_bld = get_int_bld(bld_base, true, reg->bit_size == 1 ? 32 : reg->bit_size);
2266
2267
LLVMTypeRef type = int_bld->vec_type;
2268
if (reg->num_array_elems)
2269
type = LLVMArrayType(type, reg->num_array_elems);
2270
if (reg->num_components > 1)
2271
type = LLVMArrayType(type, reg->num_components);
2272
2273
return type;
2274
}
2275
2276
2277
bool lp_build_nir_llvm(
2278
struct lp_build_nir_context *bld_base,
2279
struct nir_shader *nir)
2280
{
2281
struct nir_function *func;
2282
2283
nir_convert_from_ssa(nir, true);
2284
nir_lower_locals_to_regs(nir);
2285
nir_remove_dead_derefs(nir);
2286
nir_remove_dead_variables(nir, nir_var_function_temp, NULL);
2287
2288
nir_foreach_shader_out_variable(variable, nir)
2289
handle_shader_output_decl(bld_base, nir, variable);
2290
2291
if (nir->info.io_lowered) {
2292
uint64_t outputs_written = nir->info.outputs_written;
2293
2294
while (outputs_written) {
2295
unsigned location = u_bit_scan64(&outputs_written);
2296
nir_variable var = {0};
2297
2298
var.type = glsl_vec4_type();
2299
var.data.mode = nir_var_shader_out;
2300
var.data.location = location;
2301
var.data.driver_location = util_bitcount64(nir->info.outputs_written &
2302
BITFIELD64_MASK(location));
2303
bld_base->emit_var_decl(bld_base, &var);
2304
}
2305
}
2306
2307
bld_base->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2308
_mesa_key_pointer_equal);
2309
bld_base->vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
2310
_mesa_key_pointer_equal);
2311
bld_base->range_ht = _mesa_pointer_hash_table_create(NULL);
2312
2313
func = (struct nir_function *)exec_list_get_head(&nir->functions);
2314
2315
nir_foreach_register(reg, &func->impl->registers) {
2316
LLVMTypeRef type = get_register_type(bld_base, reg);
2317
LLVMValueRef reg_alloc = lp_build_alloca(bld_base->base.gallivm,
2318
type, "reg");
2319
_mesa_hash_table_insert(bld_base->regs, reg, reg_alloc);
2320
}
2321
nir_index_ssa_defs(func->impl);
2322
bld_base->ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
2323
visit_cf_list(bld_base, &func->impl->body);
2324
2325
free(bld_base->ssa_defs);
2326
ralloc_free(bld_base->vars);
2327
ralloc_free(bld_base->regs);
2328
ralloc_free(bld_base->range_ht);
2329
return true;
2330
}
2331
2332
/* do some basic opts to remove some things we don't want to see. */
2333
void lp_build_opt_nir(struct nir_shader *nir)
2334
{
2335
bool progress;
2336
2337
static const struct nir_lower_tex_options lower_tex_options = {
2338
.lower_tg4_offsets = true,
2339
};
2340
NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options);
2341
NIR_PASS_V(nir, nir_lower_frexp);
2342
2343
NIR_PASS_V(nir, nir_lower_flrp, 16|32|64, true);
2344
NIR_PASS_V(nir, nir_lower_fp16_casts);
2345
do {
2346
progress = false;
2347
NIR_PASS(progress, nir, nir_opt_constant_folding);
2348
NIR_PASS(progress, nir, nir_opt_algebraic);
2349
NIR_PASS(progress, nir, nir_lower_pack);
2350
2351
nir_lower_tex_options options = { 0, };
2352
NIR_PASS_V(nir, nir_lower_tex, &options);
2353
2354
const nir_lower_subgroups_options subgroups_options = {
2355
.subgroup_size = lp_native_vector_width / 32,
2356
.ballot_bit_size = 32,
2357
.ballot_components = 1,
2358
.lower_to_scalar = true,
2359
.lower_subgroup_masks = true,
2360
};
2361
NIR_PASS_V(nir, nir_lower_subgroups, &subgroups_options);
2362
2363
} while (progress);
2364
nir_lower_bool_to_int32(nir);
2365
2366
do {
2367
progress = false;
2368
NIR_PASS(progress, nir, nir_opt_algebraic_late);
2369
if (progress) {
2370
NIR_PASS_V(nir, nir_copy_prop);
2371
NIR_PASS_V(nir, nir_opt_dce);
2372
NIR_PASS_V(nir, nir_opt_cse);
2373
}
2374
} while (progress);
2375
}
2376
2377