Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c
4565 views
1
/**************************************************************************
2
*
3
* Copyright 2019 Red Hat.
4
* All Rights Reserved.
5
*
6
* Permission is hereby granted, free of charge, to any person obtaining a
7
* copy of this software and associated documentation files (the "Software"),
8
* to deal in the Software without restriction, including without limitation
9
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
* and/or sell copies of the Software, and to permit persons to whom the
11
* Software is furnished to do so, subject to the following conditions:
12
*
13
* The above copyright notice and this permission notice shall be included
14
* in all copies or substantial portions of the Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17
* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22
* SOFTWARE.
23
*
24
**************************************************************************/
25
26
#include "lp_bld_nir.h"
27
#include "lp_bld_init.h"
28
#include "lp_bld_flow.h"
29
#include "lp_bld_logic.h"
30
#include "lp_bld_gather.h"
31
#include "lp_bld_const.h"
32
#include "lp_bld_struct.h"
33
#include "lp_bld_arit.h"
34
#include "lp_bld_bitarit.h"
35
#include "lp_bld_coro.h"
36
#include "lp_bld_printf.h"
37
#include "util/u_math.h"
38
39
static int bit_size_to_shift_size(int bit_size)
40
{
41
switch (bit_size) {
42
case 64:
43
return 3;
44
default:
45
case 32:
46
return 2;
47
case 16:
48
return 1;
49
case 8:
50
return 0;
51
}
52
}
53
54
/*
55
* combine the execution mask if there is one with the current mask.
56
*/
57
static LLVMValueRef
58
mask_vec(struct lp_build_nir_context *bld_base)
59
{
60
struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
61
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
62
struct lp_exec_mask *exec_mask = &bld->exec_mask;
63
LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
64
if (!exec_mask->has_mask) {
65
return bld_mask;
66
}
67
if (!bld_mask)
68
return exec_mask->exec_mask;
69
return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
70
exec_mask->exec_mask, "");
71
}
72
73
static LLVMValueRef
74
emit_fetch_64bit(
75
struct lp_build_nir_context * bld_base,
76
LLVMValueRef input,
77
LLVMValueRef input2)
78
{
79
struct gallivm_state *gallivm = bld_base->base.gallivm;
80
LLVMBuilderRef builder = gallivm->builder;
81
LLVMValueRef res;
82
int i;
83
LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
84
int len = bld_base->base.type.length * 2;
85
assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
86
87
for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
88
#if UTIL_ARCH_LITTLE_ENDIAN
89
shuffles[i] = lp_build_const_int32(gallivm, i / 2);
90
shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
91
#else
92
shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
93
shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
94
#endif
95
}
96
res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
97
98
return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
99
}
100
101
static void
102
emit_store_64bit_split(struct lp_build_nir_context *bld_base,
103
LLVMValueRef value,
104
LLVMValueRef split_values[2])
105
{
106
struct gallivm_state *gallivm = bld_base->base.gallivm;
107
LLVMBuilderRef builder = gallivm->builder;
108
unsigned i;
109
LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
110
LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
111
int len = bld_base->base.type.length * 2;
112
113
value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
114
for (i = 0; i < bld_base->base.type.length; i++) {
115
#if UTIL_ARCH_LITTLE_ENDIAN
116
shuffles[i] = lp_build_const_int32(gallivm, i * 2);
117
shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
118
#else
119
shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
120
shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
121
#endif
122
}
123
124
split_values[0] = LLVMBuildShuffleVector(builder, value,
125
LLVMGetUndef(LLVMTypeOf(value)),
126
LLVMConstVector(shuffles,
127
bld_base->base.type.length),
128
"");
129
split_values[1] = LLVMBuildShuffleVector(builder, value,
130
LLVMGetUndef(LLVMTypeOf(value)),
131
LLVMConstVector(shuffles2,
132
bld_base->base.type.length),
133
"");
134
}
135
136
static void
137
emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
138
LLVMValueRef chan_ptr,
139
LLVMValueRef chan_ptr2,
140
LLVMValueRef value)
141
{
142
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
143
struct lp_build_context *float_bld = &bld_base->base;
144
LLVMValueRef split_vals[2];
145
146
emit_store_64bit_split(bld_base, value, split_vals);
147
148
lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
149
lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
150
}
151
152
static LLVMValueRef
153
get_soa_array_offsets(struct lp_build_context *uint_bld,
154
LLVMValueRef indirect_index,
155
int num_components,
156
unsigned chan_index,
157
bool need_perelement_offset)
158
{
159
struct gallivm_state *gallivm = uint_bld->gallivm;
160
LLVMValueRef chan_vec =
161
lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
162
LLVMValueRef length_vec =
163
lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
164
LLVMValueRef index_vec;
165
166
/* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
167
index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
168
index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
169
index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
170
171
if (need_perelement_offset) {
172
LLVMValueRef pixel_offsets;
173
unsigned i;
174
/* build pixel offset vector: {0, 1, 2, 3, ...} */
175
pixel_offsets = uint_bld->undef;
176
for (i = 0; i < uint_bld->type.length; i++) {
177
LLVMValueRef ii = lp_build_const_int32(gallivm, i);
178
pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
179
ii, ii, "");
180
}
181
index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
182
}
183
return index_vec;
184
}
185
186
static LLVMValueRef
187
build_gather(struct lp_build_nir_context *bld_base,
188
struct lp_build_context *bld,
189
LLVMValueRef base_ptr,
190
LLVMValueRef indexes,
191
LLVMValueRef overflow_mask,
192
LLVMValueRef indexes2)
193
{
194
struct gallivm_state *gallivm = bld_base->base.gallivm;
195
LLVMBuilderRef builder = gallivm->builder;
196
struct lp_build_context *uint_bld = &bld_base->uint_bld;
197
LLVMValueRef res;
198
unsigned i;
199
200
if (indexes2)
201
res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
202
else
203
res = bld->undef;
204
/*
205
* overflow_mask is a vector telling us which channels
206
* in the vector overflowed. We use the overflow behavior for
207
* constant buffers which is defined as:
208
* Out of bounds access to constant buffer returns 0 in all
209
* components. Out of bounds behavior is always with respect
210
* to the size of the buffer bound at that slot.
211
*/
212
213
if (overflow_mask) {
214
/*
215
* We avoid per-element control flow here (also due to llvm going crazy,
216
* though I suspect it's better anyway since overflow is likely rare).
217
* Note that since we still fetch from buffers even if num_elements was
218
* zero (in this case we'll fetch from index zero) the jit func callers
219
* MUST provide valid fake constant buffers of size 4x32 (the values do
220
* not matter), otherwise we'd still need (not per element though)
221
* control flow.
222
*/
223
indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
224
if (indexes2)
225
indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
226
}
227
228
/*
229
* Loop over elements of index_vec, load scalar value, insert it into 'res'.
230
*/
231
for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
232
LLVMValueRef si, di;
233
LLVMValueRef index;
234
LLVMValueRef scalar_ptr, scalar;
235
236
di = lp_build_const_int32(gallivm, i);
237
if (indexes2)
238
si = lp_build_const_int32(gallivm, i >> 1);
239
else
240
si = di;
241
242
if (indexes2 && (i & 1)) {
243
index = LLVMBuildExtractElement(builder,
244
indexes2, si, "");
245
} else {
246
index = LLVMBuildExtractElement(builder,
247
indexes, si, "");
248
}
249
scalar_ptr = LLVMBuildGEP(builder, base_ptr,
250
&index, 1, "gather_ptr");
251
scalar = LLVMBuildLoad(builder, scalar_ptr, "");
252
253
res = LLVMBuildInsertElement(builder, res, scalar, di, "");
254
}
255
256
if (overflow_mask) {
257
if (indexes2) {
258
res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
259
overflow_mask = LLVMBuildSExt(builder, overflow_mask,
260
bld_base->dbl_bld.int_vec_type, "");
261
res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
262
bld_base->dbl_bld.zero, res);
263
} else
264
res = lp_build_select(bld, overflow_mask, bld->zero, res);
265
}
266
267
return res;
268
}
269
270
/**
271
* Scatter/store vector.
272
*/
273
static void
274
emit_mask_scatter(struct lp_build_nir_soa_context *bld,
275
LLVMValueRef base_ptr,
276
LLVMValueRef indexes,
277
LLVMValueRef values,
278
struct lp_exec_mask *mask)
279
{
280
struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
281
LLVMBuilderRef builder = gallivm->builder;
282
unsigned i;
283
LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
284
285
/*
286
* Loop over elements of index_vec, store scalar value.
287
*/
288
for (i = 0; i < bld->bld_base.base.type.length; i++) {
289
LLVMValueRef ii = lp_build_const_int32(gallivm, i);
290
LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
291
LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr");
292
LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
293
LLVMValueRef scalar_pred = pred ?
294
LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
295
296
if (0)
297
lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
298
ii, val, index, scalar_ptr);
299
300
if (scalar_pred) {
301
LLVMValueRef real_val, dst_val;
302
dst_val = LLVMBuildLoad(builder, scalar_ptr, "");
303
real_val = lp_build_select(&bld->uint_elem_bld, scalar_pred, val, dst_val);
304
LLVMBuildStore(builder, real_val, scalar_ptr);
305
}
306
else {
307
LLVMBuildStore(builder, val, scalar_ptr);
308
}
309
}
310
}
311
312
static void emit_load_var(struct lp_build_nir_context *bld_base,
313
nir_variable_mode deref_mode,
314
unsigned num_components,
315
unsigned bit_size,
316
nir_variable *var,
317
unsigned vertex_index,
318
LLVMValueRef indir_vertex_index,
319
unsigned const_index,
320
LLVMValueRef indir_index,
321
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
322
{
323
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
324
struct gallivm_state *gallivm = bld_base->base.gallivm;
325
int dmul = bit_size == 64 ? 2 : 1;
326
unsigned location = var->data.driver_location;
327
unsigned location_frac = var->data.location_frac;
328
329
if (!var->data.compact && !indir_index)
330
location += const_index;
331
else if (var->data.compact) {
332
location += const_index / 4;
333
location_frac += const_index % 4;
334
const_index = 0;
335
}
336
switch (deref_mode) {
337
case nir_var_shader_in:
338
for (unsigned i = 0; i < num_components; i++) {
339
int idx = (i * dmul) + location_frac;
340
int comp_loc = location;
341
342
if (bit_size == 64 && idx >= 4) {
343
comp_loc++;
344
idx = idx % 4;
345
}
346
347
if (bld->gs_iface) {
348
LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
349
LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
350
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
351
LLVMValueRef result2;
352
353
result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
354
false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
355
if (bit_size == 64) {
356
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
357
result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
358
false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
359
result[i] = emit_fetch_64bit(bld_base, result[i], result2);
360
}
361
} else if (bld->tes_iface) {
362
LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
363
LLVMValueRef attrib_index_val;
364
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
365
LLVMValueRef result2;
366
367
if (indir_index) {
368
if (var->data.compact) {
369
swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
370
attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
371
} else
372
attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
373
} else
374
attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
375
376
if (var->data.patch) {
377
result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
378
indir_index ? true : false, attrib_index_val, swizzle_index_val);
379
if (bit_size == 64) {
380
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
381
result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
382
indir_index ? true : false, attrib_index_val, swizzle_index_val);
383
result[i] = emit_fetch_64bit(bld_base, result[i], result2);
384
}
385
}
386
else {
387
result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
388
indir_vertex_index ? true : false,
389
indir_vertex_index ? indir_vertex_index : vertex_index_val,
390
(indir_index && !var->data.compact) ? true : false, attrib_index_val,
391
(indir_index && var->data.compact) ? true : false, swizzle_index_val);
392
if (bit_size == 64) {
393
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
394
result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
395
indir_vertex_index ? true : false,
396
indir_vertex_index ? indir_vertex_index : vertex_index_val,
397
indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
398
result[i] = emit_fetch_64bit(bld_base, result[i], result2);
399
}
400
}
401
} else if (bld->tcs_iface) {
402
LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
403
LLVMValueRef attrib_index_val;
404
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
405
406
if (indir_index) {
407
if (var->data.compact) {
408
swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
409
attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
410
} else
411
attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
412
} else
413
attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
414
result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
415
indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
416
(indir_index && !var->data.compact) ? true : false, attrib_index_val,
417
(indir_index && var->data.compact) ? true : false, swizzle_index_val);
418
if (bit_size == 64) {
419
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
420
LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
421
indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
422
indir_index ? true : false, attrib_index_val,
423
false, swizzle_index_val);
424
result[i] = emit_fetch_64bit(bld_base, result[i], result2);
425
}
426
} else {
427
if (indir_index) {
428
LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
429
LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
430
attrib_index_val, 4, idx,
431
TRUE);
432
LLVMValueRef index_vec2 = NULL;
433
LLVMTypeRef fptr_type;
434
LLVMValueRef inputs_array;
435
fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
436
inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, fptr_type, "");
437
438
if (bit_size == 64)
439
index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
440
indir_index, 4, idx + 1, TRUE);
441
442
/* Gather values from the input register array */
443
result[i] = build_gather(bld_base, &bld_base->base, inputs_array, index_vec, NULL, index_vec2);
444
} else {
445
if (bld->indirects & nir_var_shader_in) {
446
LLVMValueRef lindex = lp_build_const_int32(gallivm,
447
comp_loc * 4 + idx);
448
LLVMValueRef input_ptr = lp_build_pointer_get(gallivm->builder,
449
bld->inputs_array, lindex);
450
if (bit_size == 64) {
451
LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
452
comp_loc * 4 + (idx + 1));
453
LLVMValueRef input_ptr2 = lp_build_pointer_get(gallivm->builder,
454
bld->inputs_array, lindex2);
455
result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
456
} else {
457
result[i] = input_ptr;
458
}
459
} else {
460
if (bit_size == 64) {
461
LLVMValueRef tmp[2];
462
tmp[0] = bld->inputs[comp_loc][idx];
463
tmp[1] = bld->inputs[comp_loc][idx + 1];
464
result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
465
} else {
466
result[i] = bld->inputs[comp_loc][idx];
467
}
468
}
469
}
470
}
471
}
472
break;
473
case nir_var_shader_out:
474
if (bld->fs_iface && bld->fs_iface->fb_fetch) {
475
bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.driver_location, result);
476
return;
477
}
478
for (unsigned i = 0; i < num_components; i++) {
479
int idx = (i * dmul) + location_frac;
480
if (bld->tcs_iface) {
481
LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
482
LLVMValueRef attrib_index_val;
483
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
484
485
if (indir_index)
486
attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
487
else
488
attrib_index_val = lp_build_const_int32(gallivm, location);
489
490
result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
491
indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
492
(indir_index && !var->data.compact) ? true : false, attrib_index_val,
493
(indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
494
if (bit_size == 64) {
495
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
496
LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
497
indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
498
indir_index ? true : false, attrib_index_val,
499
false, swizzle_index_val, 0);
500
result[i] = emit_fetch_64bit(bld_base, result[i], result2);
501
}
502
}
503
}
504
break;
505
default:
506
break;
507
}
508
}
509
510
static void emit_store_chan(struct lp_build_nir_context *bld_base,
511
nir_variable_mode deref_mode,
512
unsigned bit_size,
513
unsigned location, unsigned comp,
514
unsigned chan,
515
LLVMValueRef dst)
516
{
517
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
518
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
519
struct lp_build_context *float_bld = &bld_base->base;
520
521
if (bit_size == 64) {
522
chan *= 2;
523
chan += comp;
524
if (chan >= 4) {
525
chan -= 4;
526
location++;
527
}
528
emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
529
bld->outputs[location][chan + 1], dst);
530
} else {
531
dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
532
lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
533
bld->outputs[location][chan + comp]);
534
}
535
}
536
537
static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
538
bool is_compact,
539
unsigned bit_size,
540
unsigned location,
541
unsigned const_index,
542
LLVMValueRef indir_vertex_index,
543
LLVMValueRef indir_index,
544
unsigned comp,
545
unsigned chan,
546
LLVMValueRef chan_val)
547
{
548
struct gallivm_state *gallivm = bld_base->base.gallivm;
549
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
550
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
551
unsigned swizzle = chan;
552
if (bit_size == 64) {
553
swizzle *= 2;
554
swizzle += comp;
555
if (swizzle >= 4) {
556
swizzle -= 4;
557
location++;
558
}
559
} else
560
swizzle += comp;
561
LLVMValueRef attrib_index_val;
562
LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
563
564
if (indir_index) {
565
if (is_compact) {
566
swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
567
attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
568
} else
569
attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
570
} else
571
attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
572
if (bit_size == 64) {
573
LLVMValueRef split_vals[2];
574
LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
575
emit_store_64bit_split(bld_base, chan_val, split_vals);
576
bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
577
indir_vertex_index ? true : false,
578
indir_vertex_index,
579
indir_index ? true : false,
580
attrib_index_val,
581
false, swizzle_index_val,
582
split_vals[0], mask_vec(bld_base));
583
bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
584
indir_vertex_index ? true : false,
585
indir_vertex_index,
586
indir_index ? true : false,
587
attrib_index_val,
588
false, swizzle_index_val2,
589
split_vals[1], mask_vec(bld_base));
590
} else {
591
chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
592
bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
593
indir_vertex_index ? true : false,
594
indir_vertex_index,
595
indir_index && !is_compact ? true : false,
596
attrib_index_val,
597
indir_index && is_compact ? true : false,
598
swizzle_index_val,
599
chan_val, mask_vec(bld_base));
600
}
601
}
602
603
static void emit_store_var(struct lp_build_nir_context *bld_base,
604
nir_variable_mode deref_mode,
605
unsigned num_components,
606
unsigned bit_size,
607
nir_variable *var,
608
unsigned writemask,
609
LLVMValueRef indir_vertex_index,
610
unsigned const_index,
611
LLVMValueRef indir_index,
612
LLVMValueRef dst)
613
{
614
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
615
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
616
switch (deref_mode) {
617
case nir_var_shader_out: {
618
unsigned location = var->data.driver_location;
619
unsigned comp = var->data.location_frac;
620
if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
621
if (var->data.location == FRAG_RESULT_STENCIL)
622
comp = 1;
623
else if (var->data.location == FRAG_RESULT_DEPTH)
624
comp = 2;
625
}
626
627
if (var->data.compact) {
628
location += const_index / 4;
629
comp += const_index % 4;
630
const_index = 0;
631
}
632
633
for (unsigned chan = 0; chan < num_components; chan++) {
634
if (writemask & (1u << chan)) {
635
LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
636
if (bld->tcs_iface) {
637
emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
638
} else
639
emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
640
}
641
}
642
break;
643
}
644
default:
645
break;
646
}
647
}
648
649
static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
650
struct lp_build_context *reg_bld,
651
const nir_reg_src *reg,
652
LLVMValueRef indir_src,
653
LLVMValueRef reg_storage)
654
{
655
struct gallivm_state *gallivm = bld_base->base.gallivm;
656
LLVMBuilderRef builder = gallivm->builder;
657
int nc = reg->reg->num_components;
658
LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
659
struct lp_build_context *uint_bld = &bld_base->uint_bld;
660
if (reg->reg->num_array_elems) {
661
LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
662
if (reg->indirect) {
663
LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
664
indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
665
indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
666
}
667
reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
668
for (unsigned i = 0; i < nc; i++) {
669
LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
670
vals[i] = build_gather(bld_base, reg_bld, reg_storage, indirect_offset, NULL, NULL);
671
}
672
} else {
673
for (unsigned i = 0; i < nc; i++) {
674
LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
675
lp_build_const_int32(gallivm, i));
676
vals[i] = LLVMBuildLoad(builder, this_storage, "");
677
}
678
}
679
return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
680
}
681
682
static void emit_store_reg(struct lp_build_nir_context *bld_base,
683
struct lp_build_context *reg_bld,
684
const nir_reg_dest *reg,
685
unsigned writemask,
686
LLVMValueRef indir_src,
687
LLVMValueRef reg_storage,
688
LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
689
{
690
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
691
struct gallivm_state *gallivm = bld_base->base.gallivm;
692
LLVMBuilderRef builder = gallivm->builder;
693
struct lp_build_context *uint_bld = &bld_base->uint_bld;
694
int nc = reg->reg->num_components;
695
if (reg->reg->num_array_elems > 0) {
696
LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, reg->base_offset);
697
if (reg->indirect) {
698
LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, reg->reg->num_array_elems - 1);
699
indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
700
indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
701
}
702
reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
703
for (unsigned i = 0; i < nc; i++) {
704
if (!(writemask & (1 << i)))
705
continue;
706
LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, TRUE);
707
dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
708
emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
709
}
710
return;
711
}
712
713
for (unsigned i = 0; i < nc; i++) {
714
LLVMValueRef this_storage = nc == 1 ? reg_storage : lp_build_array_get_ptr(gallivm, reg_storage,
715
lp_build_const_int32(gallivm, i));
716
dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
717
lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i], this_storage);
718
}
719
}
720
721
static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
722
unsigned nc,
723
unsigned bit_size,
724
unsigned offset_bit_size,
725
bool offset_is_uniform,
726
LLVMValueRef offset,
727
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
728
{
729
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
730
struct gallivm_state *gallivm = bld_base->base.gallivm;
731
LLVMBuilderRef builder = gallivm->builder;
732
struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
733
LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
734
unsigned size_shift = bit_size_to_shift_size(bit_size);
735
struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
736
if (size_shift)
737
offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
738
739
LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
740
kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
741
742
if (offset_is_uniform) {
743
offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
744
745
for (unsigned c = 0; c < nc; c++) {
746
LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
747
748
LLVMValueRef scalar = lp_build_pointer_get(builder, kernel_args_ptr, this_offset);
749
result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
750
}
751
}
752
}
753
754
static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
755
{
756
LLVMBuilderRef builder = gallivm->builder;
757
switch (bit_size) {
758
case 8:
759
addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
760
break;
761
case 16:
762
addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
763
break;
764
case 32:
765
default:
766
addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
767
break;
768
case 64:
769
addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
770
break;
771
}
772
return addr_ptr;
773
}
774
775
static void emit_load_global(struct lp_build_nir_context *bld_base,
776
unsigned nc,
777
unsigned bit_size,
778
unsigned addr_bit_size,
779
LLVMValueRef addr,
780
LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
781
{
782
struct gallivm_state *gallivm = bld_base->base.gallivm;
783
LLVMBuilderRef builder = gallivm->builder;
784
struct lp_build_context *uint_bld = &bld_base->uint_bld;
785
struct lp_build_context *res_bld;
786
787
res_bld = get_int_bld(bld_base, true, bit_size);
788
789
for (unsigned c = 0; c < nc; c++) {
790
LLVMValueRef result = lp_build_alloca(gallivm, res_bld->vec_type, "");
791
LLVMValueRef exec_mask = mask_vec(bld_base);
792
struct lp_build_loop_state loop_state;
793
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
794
795
struct lp_build_if_state ifthen;
796
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
797
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
798
lp_build_if(&ifthen, gallivm, cond);
799
800
LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
801
loop_state.counter, "");
802
addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
803
804
LLVMValueRef value_ptr = lp_build_pointer_get(builder, addr_ptr, lp_build_const_int32(gallivm, c));
805
806
LLVMValueRef temp_res;
807
temp_res = LLVMBuildLoad(builder, result, "");
808
temp_res = LLVMBuildInsertElement(builder, temp_res, value_ptr, loop_state.counter, "");
809
LLVMBuildStore(builder, temp_res, result);
810
lp_build_endif(&ifthen);
811
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
812
NULL, LLVMIntUGE);
813
outval[c] = LLVMBuildLoad(builder, result, "");
814
}
815
}
816
817
static void emit_store_global(struct lp_build_nir_context *bld_base,
818
unsigned writemask,
819
unsigned nc, unsigned bit_size,
820
unsigned addr_bit_size,
821
LLVMValueRef addr,
822
LLVMValueRef dst)
823
{
824
struct gallivm_state *gallivm = bld_base->base.gallivm;
825
LLVMBuilderRef builder = gallivm->builder;
826
struct lp_build_context *uint_bld = &bld_base->uint_bld;
827
828
for (unsigned c = 0; c < nc; c++) {
829
if (!(writemask & (1u << c)))
830
continue;
831
LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
832
833
LLVMValueRef exec_mask = mask_vec(bld_base);
834
struct lp_build_loop_state loop_state;
835
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
836
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
837
loop_state.counter, "");
838
839
LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
840
loop_state.counter, "");
841
addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
842
switch (bit_size) {
843
case 8:
844
value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt8TypeInContext(gallivm->context), "");
845
break;
846
case 16:
847
value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt16TypeInContext(gallivm->context), "");
848
break;
849
case 32:
850
value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt32TypeInContext(gallivm->context), "");
851
break;
852
case 64:
853
value_ptr = LLVMBuildBitCast(builder, value_ptr, LLVMInt64TypeInContext(gallivm->context), "");
854
break;
855
default:
856
break;
857
}
858
struct lp_build_if_state ifthen;
859
860
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
861
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
862
lp_build_if(&ifthen, gallivm, cond);
863
lp_build_pointer_set(builder, addr_ptr, lp_build_const_int32(gallivm, c), value_ptr);
864
lp_build_endif(&ifthen);
865
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
866
NULL, LLVMIntUGE);
867
}
868
}
869
870
static void emit_atomic_global(struct lp_build_nir_context *bld_base,
871
nir_intrinsic_op nir_op,
872
unsigned addr_bit_size,
873
unsigned val_bit_size,
874
LLVMValueRef addr,
875
LLVMValueRef val, LLVMValueRef val2,
876
LLVMValueRef *result)
877
{
878
struct gallivm_state *gallivm = bld_base->base.gallivm;
879
LLVMBuilderRef builder = gallivm->builder;
880
struct lp_build_context *uint_bld = &bld_base->uint_bld;
881
struct lp_build_context *atom_bld = get_int_bld(bld_base, true, val_bit_size);
882
LLVMValueRef atom_res = lp_build_alloca(gallivm,
883
LLVMTypeOf(val), "");
884
LLVMValueRef exec_mask = mask_vec(bld_base);
885
struct lp_build_loop_state loop_state;
886
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
887
888
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
889
loop_state.counter, "");
890
891
LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
892
loop_state.counter, "");
893
addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
894
struct lp_build_if_state ifthen;
895
LLVMValueRef cond, temp_res;
896
LLVMValueRef scalar;
897
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
898
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
899
lp_build_if(&ifthen, gallivm, cond);
900
901
addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
902
if (nir_op == nir_intrinsic_global_atomic_comp_swap) {
903
LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
904
loop_state.counter, "");
905
cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
906
scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
907
cas_src_ptr,
908
LLVMAtomicOrderingSequentiallyConsistent,
909
LLVMAtomicOrderingSequentiallyConsistent,
910
false);
911
scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
912
} else {
913
LLVMAtomicRMWBinOp op;
914
switch (nir_op) {
915
case nir_intrinsic_global_atomic_add:
916
op = LLVMAtomicRMWBinOpAdd;
917
break;
918
case nir_intrinsic_global_atomic_exchange:
919
920
op = LLVMAtomicRMWBinOpXchg;
921
break;
922
case nir_intrinsic_global_atomic_and:
923
op = LLVMAtomicRMWBinOpAnd;
924
break;
925
case nir_intrinsic_global_atomic_or:
926
op = LLVMAtomicRMWBinOpOr;
927
break;
928
case nir_intrinsic_global_atomic_xor:
929
op = LLVMAtomicRMWBinOpXor;
930
break;
931
case nir_intrinsic_global_atomic_umin:
932
op = LLVMAtomicRMWBinOpUMin;
933
break;
934
case nir_intrinsic_global_atomic_umax:
935
op = LLVMAtomicRMWBinOpUMax;
936
break;
937
case nir_intrinsic_global_atomic_imin:
938
op = LLVMAtomicRMWBinOpMin;
939
break;
940
case nir_intrinsic_global_atomic_imax:
941
op = LLVMAtomicRMWBinOpMax;
942
break;
943
default:
944
unreachable("unknown atomic op");
945
}
946
947
scalar = LLVMBuildAtomicRMW(builder, op,
948
addr_ptr, value_ptr,
949
LLVMAtomicOrderingSequentiallyConsistent,
950
false);
951
}
952
temp_res = LLVMBuildLoad(builder, atom_res, "");
953
temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
954
LLVMBuildStore(builder, temp_res, atom_res);
955
lp_build_else(&ifthen);
956
temp_res = LLVMBuildLoad(builder, atom_res, "");
957
bool is_float = LLVMTypeOf(val) == bld_base->base.vec_type;
958
LLVMValueRef zero_val;
959
if (is_float) {
960
if (val_bit_size == 64)
961
zero_val = lp_build_const_double(gallivm, 0);
962
else
963
zero_val = lp_build_const_float(gallivm, 0);
964
} else {
965
if (val_bit_size == 64)
966
zero_val = lp_build_const_int64(gallivm, 0);
967
else
968
zero_val = lp_build_const_int32(gallivm, 0);
969
}
970
971
temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");
972
LLVMBuildStore(builder, temp_res, atom_res);
973
lp_build_endif(&ifthen);
974
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
975
NULL, LLVMIntUGE);
976
*result = LLVMBuildLoad(builder, atom_res, "");
977
}
978
979
static void emit_load_ubo(struct lp_build_nir_context *bld_base,
980
unsigned nc,
981
unsigned bit_size,
982
bool offset_is_uniform,
983
LLVMValueRef index,
984
LLVMValueRef offset,
985
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
986
{
987
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
988
struct gallivm_state *gallivm = bld_base->base.gallivm;
989
LLVMBuilderRef builder = gallivm->builder;
990
struct lp_build_context *uint_bld = &bld_base->uint_bld;
991
struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
992
LLVMValueRef consts_ptr = lp_build_array_get(gallivm, bld->consts_ptr, index);
993
unsigned size_shift = bit_size_to_shift_size(bit_size);
994
if (size_shift)
995
offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
996
997
LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
998
consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
999
1000
if (offset_is_uniform) {
1001
offset = LLVMBuildExtractElement(builder, offset, lp_build_const_int32(gallivm, 0), "");
1002
1003
for (unsigned c = 0; c < nc; c++) {
1004
LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1005
1006
LLVMValueRef scalar = lp_build_pointer_get(builder, consts_ptr, this_offset);
1007
result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
1008
}
1009
} else {
1010
LLVMValueRef overflow_mask;
1011
LLVMValueRef num_consts = lp_build_array_get(gallivm, bld->const_sizes_ptr, index);
1012
1013
num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1014
if (bit_size == 64)
1015
num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);
1016
else if (bit_size == 16)
1017
num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);
1018
else if (bit_size == 8)
1019
num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);
1020
1021
for (unsigned c = 0; c < nc; c++) {
1022
LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1023
overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1024
this_offset, num_consts);
1025
result[c] = build_gather(bld_base, bld_broad, consts_ptr, this_offset, overflow_mask, NULL);
1026
}
1027
}
1028
}
1029
1030
1031
static void emit_load_mem(struct lp_build_nir_context *bld_base,
1032
unsigned nc,
1033
unsigned bit_size,
1034
LLVMValueRef index,
1035
LLVMValueRef offset,
1036
LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1037
{
1038
struct gallivm_state *gallivm = bld_base->base.gallivm;
1039
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1040
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1041
LLVMValueRef ssbo_ptr = NULL;
1042
struct lp_build_context *uint_bld = &bld_base->uint_bld;
1043
LLVMValueRef ssbo_limit = NULL;
1044
struct lp_build_context *load_bld;
1045
uint32_t shift_val = bit_size_to_shift_size(bit_size);
1046
1047
load_bld = get_int_bld(bld_base, true, bit_size);
1048
1049
if (index) {
1050
LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1051
ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1052
ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
1053
1054
ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1055
} else
1056
ssbo_ptr = bld->shared_ptr;
1057
1058
offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1059
for (unsigned c = 0; c < nc; c++) {
1060
LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1061
LLVMValueRef exec_mask = mask_vec(bld_base);
1062
1063
if (ssbo_limit) {
1064
LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
1065
exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
1066
}
1067
1068
LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, "");
1069
struct lp_build_loop_state loop_state;
1070
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1071
1072
struct lp_build_if_state ifthen;
1073
LLVMValueRef cond, temp_res;
1074
1075
loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
1076
loop_state.counter, "");
1077
1078
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1079
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1080
1081
lp_build_if(&ifthen, gallivm, cond);
1082
LLVMValueRef scalar;
1083
if (bit_size != 32) {
1084
LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(load_bld->elem_type, 0), "");
1085
scalar = lp_build_pointer_get(builder, ssbo_ptr2, loop_index);
1086
} else
1087
scalar = lp_build_pointer_get(builder, ssbo_ptr, loop_index);
1088
1089
temp_res = LLVMBuildLoad(builder, result, "");
1090
temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1091
LLVMBuildStore(builder, temp_res, result);
1092
lp_build_else(&ifthen);
1093
temp_res = LLVMBuildLoad(builder, result, "");
1094
LLVMValueRef zero;
1095
if (bit_size == 64)
1096
zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
1097
else if (bit_size == 16)
1098
zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
1099
else if (bit_size == 8)
1100
zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
1101
else
1102
zero = lp_build_const_int32(gallivm, 0);
1103
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1104
LLVMBuildStore(builder, temp_res, result);
1105
lp_build_endif(&ifthen);
1106
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1107
NULL, LLVMIntUGE);
1108
outval[c] = LLVMBuildLoad(gallivm->builder, result, "");
1109
}
1110
}
1111
1112
static void emit_store_mem(struct lp_build_nir_context *bld_base,
1113
unsigned writemask,
1114
unsigned nc,
1115
unsigned bit_size,
1116
LLVMValueRef index,
1117
LLVMValueRef offset,
1118
LLVMValueRef dst)
1119
{
1120
struct gallivm_state *gallivm = bld_base->base.gallivm;
1121
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1122
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1123
LLVMValueRef ssbo_ptr;
1124
struct lp_build_context *uint_bld = &bld_base->uint_bld;
1125
LLVMValueRef ssbo_limit = NULL;
1126
struct lp_build_context *store_bld;
1127
uint32_t shift_val = bit_size_to_shift_size(bit_size);
1128
store_bld = get_int_bld(bld_base, true, bit_size);
1129
1130
if (index) {
1131
LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1132
ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1133
ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
1134
ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1135
} else
1136
ssbo_ptr = bld->shared_ptr;
1137
1138
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1139
for (unsigned c = 0; c < nc; c++) {
1140
if (!(writemask & (1u << c)))
1141
continue;
1142
LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1143
LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1144
1145
LLVMValueRef exec_mask = mask_vec(bld_base);
1146
if (ssbo_limit) {
1147
LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
1148
exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
1149
}
1150
1151
struct lp_build_loop_state loop_state;
1152
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1153
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1154
loop_state.counter, "");
1155
value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1156
struct lp_build_if_state ifthen;
1157
LLVMValueRef cond;
1158
1159
loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
1160
loop_state.counter, "");
1161
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1162
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1163
lp_build_if(&ifthen, gallivm, cond);
1164
if (bit_size != 32) {
1165
LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(store_bld->elem_type, 0), "");
1166
lp_build_pointer_set(builder, ssbo_ptr2, loop_index, value_ptr);
1167
} else
1168
lp_build_pointer_set(builder, ssbo_ptr, loop_index, value_ptr);
1169
lp_build_endif(&ifthen);
1170
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1171
NULL, LLVMIntUGE);
1172
}
1173
}
1174
1175
static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1176
nir_intrinsic_op nir_op,
1177
uint32_t bit_size,
1178
LLVMValueRef index, LLVMValueRef offset,
1179
LLVMValueRef val, LLVMValueRef val2,
1180
LLVMValueRef *result)
1181
{
1182
struct gallivm_state *gallivm = bld_base->base.gallivm;
1183
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1184
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1185
LLVMValueRef ssbo_ptr;
1186
struct lp_build_context *uint_bld = &bld_base->uint_bld;
1187
LLVMValueRef ssbo_limit = NULL;
1188
uint32_t shift_val = bit_size_to_shift_size(bit_size);
1189
struct lp_build_context *atomic_bld = get_int_bld(bld_base, true, bit_size);
1190
if (index) {
1191
LLVMValueRef ssbo_size_ptr = lp_build_array_get(gallivm, bld->ssbo_sizes_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1192
ssbo_limit = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, 2), "");
1193
ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
1194
ssbo_ptr = lp_build_array_get(gallivm, bld->ssbo_ptr, LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1195
} else
1196
ssbo_ptr = bld->shared_ptr;
1197
1198
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1199
LLVMValueRef atom_res = lp_build_alloca(gallivm,
1200
atomic_bld->vec_type, "");
1201
1202
LLVMValueRef exec_mask = mask_vec(bld_base);
1203
if (ssbo_limit) {
1204
LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, offset, ssbo_limit);
1205
exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
1206
}
1207
1208
struct lp_build_loop_state loop_state;
1209
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1210
1211
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1212
loop_state.counter, "");
1213
value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
1214
1215
offset = LLVMBuildExtractElement(gallivm->builder, offset,
1216
loop_state.counter, "");
1217
1218
LLVMValueRef scalar_ptr;
1219
if (bit_size != 32) {
1220
LLVMValueRef ssbo_ptr2 = LLVMBuildBitCast(builder, ssbo_ptr, LLVMPointerType(atomic_bld->elem_type, 0), "");
1221
scalar_ptr = LLVMBuildGEP(builder, ssbo_ptr2, &offset, 1, "");
1222
} else
1223
scalar_ptr = LLVMBuildGEP(builder, ssbo_ptr, &offset, 1, "");
1224
1225
struct lp_build_if_state ifthen;
1226
LLVMValueRef cond, temp_res;
1227
LLVMValueRef scalar;
1228
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1229
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1230
lp_build_if(&ifthen, gallivm, cond);
1231
1232
if (nir_op == nir_intrinsic_ssbo_atomic_comp_swap || nir_op == nir_intrinsic_shared_atomic_comp_swap) {
1233
LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1234
loop_state.counter, "");
1235
cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
1236
scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1237
cas_src_ptr,
1238
LLVMAtomicOrderingSequentiallyConsistent,
1239
LLVMAtomicOrderingSequentiallyConsistent,
1240
false);
1241
scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1242
} else {
1243
LLVMAtomicRMWBinOp op;
1244
1245
switch (nir_op) {
1246
case nir_intrinsic_shared_atomic_add:
1247
case nir_intrinsic_ssbo_atomic_add:
1248
op = LLVMAtomicRMWBinOpAdd;
1249
break;
1250
case nir_intrinsic_shared_atomic_exchange:
1251
case nir_intrinsic_ssbo_atomic_exchange:
1252
op = LLVMAtomicRMWBinOpXchg;
1253
break;
1254
case nir_intrinsic_shared_atomic_and:
1255
case nir_intrinsic_ssbo_atomic_and:
1256
op = LLVMAtomicRMWBinOpAnd;
1257
break;
1258
case nir_intrinsic_shared_atomic_or:
1259
case nir_intrinsic_ssbo_atomic_or:
1260
op = LLVMAtomicRMWBinOpOr;
1261
break;
1262
case nir_intrinsic_shared_atomic_xor:
1263
case nir_intrinsic_ssbo_atomic_xor:
1264
op = LLVMAtomicRMWBinOpXor;
1265
break;
1266
case nir_intrinsic_shared_atomic_umin:
1267
case nir_intrinsic_ssbo_atomic_umin:
1268
op = LLVMAtomicRMWBinOpUMin;
1269
break;
1270
case nir_intrinsic_shared_atomic_umax:
1271
case nir_intrinsic_ssbo_atomic_umax:
1272
op = LLVMAtomicRMWBinOpUMax;
1273
break;
1274
case nir_intrinsic_ssbo_atomic_imin:
1275
case nir_intrinsic_shared_atomic_imin:
1276
op = LLVMAtomicRMWBinOpMin;
1277
break;
1278
case nir_intrinsic_ssbo_atomic_imax:
1279
case nir_intrinsic_shared_atomic_imax:
1280
op = LLVMAtomicRMWBinOpMax;
1281
break;
1282
default:
1283
unreachable("unknown atomic op");
1284
}
1285
scalar = LLVMBuildAtomicRMW(builder, op,
1286
scalar_ptr, value_ptr,
1287
LLVMAtomicOrderingSequentiallyConsistent,
1288
false);
1289
}
1290
temp_res = LLVMBuildLoad(builder, atom_res, "");
1291
temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1292
LLVMBuildStore(builder, temp_res, atom_res);
1293
lp_build_else(&ifthen);
1294
temp_res = LLVMBuildLoad(builder, atom_res, "");
1295
LLVMValueRef zero = bit_size == 64 ? lp_build_const_int64(gallivm, 0) : lp_build_const_int32(gallivm, 0);
1296
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
1297
LLVMBuildStore(builder, temp_res, atom_res);
1298
lp_build_endif(&ifthen);
1299
1300
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1301
NULL, LLVMIntUGE);
1302
*result = LLVMBuildLoad(builder, atom_res, "");
1303
}
1304
1305
static void emit_barrier(struct lp_build_nir_context *bld_base)
1306
{
1307
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1308
struct gallivm_state * gallivm = bld_base->base.gallivm;
1309
1310
LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1311
1312
lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1313
LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1314
}
1315
1316
static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1317
LLVMValueRef index)
1318
{
1319
struct gallivm_state *gallivm = bld_base->base.gallivm;
1320
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1321
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1322
struct lp_build_context *bld_broad = &bld_base->uint_bld;
1323
LLVMValueRef size_ptr = lp_build_array_get(bld_base->base.gallivm, bld->ssbo_sizes_ptr,
1324
LLVMBuildExtractElement(builder, index, lp_build_const_int32(gallivm, 0), ""));
1325
return lp_build_broadcast_scalar(bld_broad, size_ptr);
1326
}
1327
1328
static void emit_image_op(struct lp_build_nir_context *bld_base,
1329
struct lp_img_params *params)
1330
{
1331
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1332
struct gallivm_state *gallivm = bld_base->base.gallivm;
1333
1334
params->type = bld_base->base.type;
1335
params->context_ptr = bld->context_ptr;
1336
params->thread_data_ptr = bld->thread_data_ptr;
1337
params->exec_mask = mask_vec(bld_base);
1338
1339
if (params->image_index_offset)
1340
params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1341
lp_build_const_int32(gallivm, 0), "");
1342
1343
bld->image->emit_op(bld->image,
1344
bld->bld_base.base.gallivm,
1345
params);
1346
1347
}
1348
1349
static void emit_image_size(struct lp_build_nir_context *bld_base,
1350
struct lp_sampler_size_query_params *params)
1351
{
1352
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1353
struct gallivm_state *gallivm = bld_base->base.gallivm;
1354
1355
params->int_type = bld_base->int_bld.type;
1356
params->context_ptr = bld->context_ptr;
1357
1358
if (params->texture_unit_offset)
1359
params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1360
lp_build_const_int32(gallivm, 0), "");
1361
bld->image->emit_size_query(bld->image,
1362
bld->bld_base.base.gallivm,
1363
params);
1364
1365
}
1366
1367
static void init_var_slots(struct lp_build_nir_context *bld_base,
1368
nir_variable *var, unsigned sc)
1369
{
1370
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1371
unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1372
1373
if (!bld->outputs)
1374
return;
1375
for (unsigned comp = sc; comp < slots + sc; comp++) {
1376
unsigned this_loc = var->data.driver_location + (comp / 4);
1377
unsigned this_chan = comp % 4;
1378
1379
if (!bld->outputs[this_loc][this_chan])
1380
bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1381
bld_base->base.vec_type, "output");
1382
}
1383
}
1384
1385
static void emit_var_decl(struct lp_build_nir_context *bld_base,
1386
nir_variable *var)
1387
{
1388
unsigned sc = var->data.location_frac;
1389
switch (var->data.mode) {
1390
case nir_var_shader_out: {
1391
if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1392
if (var->data.location == FRAG_RESULT_STENCIL)
1393
sc = 1;
1394
else if (var->data.location == FRAG_RESULT_DEPTH)
1395
sc = 2;
1396
}
1397
init_var_slots(bld_base, var, sc);
1398
break;
1399
}
1400
default:
1401
break;
1402
}
1403
}
1404
1405
static void emit_tex(struct lp_build_nir_context *bld_base,
1406
struct lp_sampler_params *params)
1407
{
1408
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1409
struct gallivm_state *gallivm = bld_base->base.gallivm;
1410
1411
params->type = bld_base->base.type;
1412
params->context_ptr = bld->context_ptr;
1413
params->thread_data_ptr = bld->thread_data_ptr;
1414
1415
if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1416
/* this is horrible but this can be dynamic */
1417
LLVMValueRef coords[5];
1418
LLVMValueRef *orig_texel_ptr;
1419
struct lp_build_context *uint_bld = &bld_base->uint_bld;
1420
LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1421
LLVMGetUndef(bld_base->base.vec_type),
1422
LLVMGetUndef(bld_base->base.vec_type),
1423
LLVMGetUndef(bld_base->base.vec_type) };
1424
LLVMValueRef texel[4], orig_offset, orig_lod;
1425
unsigned i;
1426
orig_texel_ptr = params->texel;
1427
orig_lod = params->lod;
1428
for (i = 0; i < 5; i++) {
1429
coords[i] = params->coords[i];
1430
}
1431
orig_offset = params->texture_index_offset;
1432
1433
for (unsigned v = 0; v < uint_bld->type.length; v++) {
1434
LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1435
LLVMValueRef new_coords[5];
1436
for (i = 0; i < 5; i++) {
1437
new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1438
coords[i], idx, "");
1439
}
1440
params->coords = new_coords;
1441
params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1442
orig_offset,
1443
idx, "");
1444
params->type = lp_elem_type(bld_base->base.type);
1445
1446
if (orig_lod)
1447
params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");
1448
params->texel = texel;
1449
bld->sampler->emit_tex_sample(bld->sampler,
1450
gallivm,
1451
params);
1452
1453
for (i = 0; i < 4; i++) {
1454
result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1455
}
1456
}
1457
for (i = 0; i < 4; i++) {
1458
orig_texel_ptr[i] = result[i];
1459
}
1460
return;
1461
}
1462
1463
if (params->texture_index_offset)
1464
params->texture_index_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1465
params->texture_index_offset,
1466
lp_build_const_int32(bld_base->base.gallivm, 0), "");
1467
1468
params->type = bld_base->base.type;
1469
bld->sampler->emit_tex_sample(bld->sampler,
1470
bld->bld_base.base.gallivm,
1471
params);
1472
}
1473
1474
static void emit_tex_size(struct lp_build_nir_context *bld_base,
1475
struct lp_sampler_size_query_params *params)
1476
{
1477
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1478
1479
params->int_type = bld_base->int_bld.type;
1480
params->context_ptr = bld->context_ptr;
1481
1482
if (params->texture_unit_offset)
1483
params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1484
params->texture_unit_offset,
1485
lp_build_const_int32(bld_base->base.gallivm, 0), "");
1486
bld->sampler->emit_size_query(bld->sampler,
1487
bld->bld_base.base.gallivm,
1488
params);
1489
}
1490
1491
static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1492
nir_intrinsic_instr *instr,
1493
LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1494
{
1495
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1496
struct gallivm_state *gallivm = bld_base->base.gallivm;
1497
struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->dest.ssa.bit_size);
1498
switch (instr->intrinsic) {
1499
case nir_intrinsic_load_instance_id:
1500
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1501
break;
1502
case nir_intrinsic_load_base_instance:
1503
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1504
break;
1505
case nir_intrinsic_load_base_vertex:
1506
result[0] = bld->system_values.basevertex;
1507
break;
1508
case nir_intrinsic_load_first_vertex:
1509
result[0] = bld->system_values.firstvertex;
1510
break;
1511
case nir_intrinsic_load_vertex_id:
1512
result[0] = bld->system_values.vertex_id;
1513
break;
1514
case nir_intrinsic_load_primitive_id:
1515
result[0] = bld->system_values.prim_id;
1516
break;
1517
case nir_intrinsic_load_workgroup_id: {
1518
LLVMValueRef tmp[3];
1519
for (unsigned i = 0; i < 3; i++) {
1520
tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_id, lp_build_const_int32(gallivm, i), "");
1521
if (instr->dest.ssa.bit_size == 64)
1522
tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1523
result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1524
}
1525
break;
1526
}
1527
case nir_intrinsic_load_local_invocation_id:
1528
for (unsigned i = 0; i < 3; i++)
1529
result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.thread_id, i, "");
1530
break;
1531
case nir_intrinsic_load_num_workgroups: {
1532
LLVMValueRef tmp[3];
1533
for (unsigned i = 0; i < 3; i++) {
1534
tmp[i] = LLVMBuildExtractElement(gallivm->builder, bld->system_values.grid_size, lp_build_const_int32(gallivm, i), "");
1535
if (instr->dest.ssa.bit_size == 64)
1536
tmp[i] = LLVMBuildZExt(gallivm->builder, tmp[i], bld_base->uint64_bld.elem_type, "");
1537
result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1538
}
1539
break;
1540
}
1541
case nir_intrinsic_load_invocation_id:
1542
if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1543
result[0] = bld->system_values.invocation_id;
1544
else
1545
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1546
break;
1547
case nir_intrinsic_load_front_face:
1548
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1549
break;
1550
case nir_intrinsic_load_draw_id:
1551
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1552
break;
1553
default:
1554
break;
1555
case nir_intrinsic_load_workgroup_size:
1556
for (unsigned i = 0; i < 3; i++)
1557
result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildExtractElement(gallivm->builder, bld->system_values.block_size, lp_build_const_int32(gallivm, i), ""));
1558
break;
1559
case nir_intrinsic_load_work_dim:
1560
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1561
break;
1562
case nir_intrinsic_load_tess_coord:
1563
for (unsigned i = 0; i < 3; i++) {
1564
result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1565
}
1566
break;
1567
case nir_intrinsic_load_tess_level_outer:
1568
for (unsigned i = 0; i < 4; i++)
1569
result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1570
break;
1571
case nir_intrinsic_load_tess_level_inner:
1572
for (unsigned i = 0; i < 2; i++)
1573
result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1574
break;
1575
case nir_intrinsic_load_patch_vertices_in:
1576
result[0] = bld->system_values.vertices_in;
1577
break;
1578
case nir_intrinsic_load_sample_id:
1579
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1580
break;
1581
case nir_intrinsic_load_sample_pos:
1582
for (unsigned i = 0; i < 2; i++) {
1583
LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1584
idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1585
LLVMValueRef val = lp_build_array_get(gallivm, bld->system_values.sample_pos, idx);
1586
result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1587
}
1588
break;
1589
case nir_intrinsic_load_sample_mask_in:
1590
result[0] = bld->system_values.sample_mask_in;
1591
break;
1592
case nir_intrinsic_load_view_index:
1593
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);
1594
break;
1595
case nir_intrinsic_load_subgroup_invocation: {
1596
LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
1597
for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)
1598
elems[i] = lp_build_const_int32(gallivm, i);
1599
result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);
1600
break;
1601
}
1602
case nir_intrinsic_load_subgroup_id:
1603
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);
1604
break;
1605
case nir_intrinsic_load_num_subgroups:
1606
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);
1607
break;
1608
}
1609
}
1610
1611
static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
1612
LLVMValueRef *dst)
1613
{
1614
struct gallivm_state *gallivm = bld_base->base.gallivm;
1615
struct lp_build_context *uint_bld = &bld_base->uint_bld;
1616
*dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
1617
}
1618
1619
static void bgnloop(struct lp_build_nir_context *bld_base)
1620
{
1621
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1622
lp_exec_bgnloop(&bld->exec_mask, true);
1623
}
1624
1625
static void endloop(struct lp_build_nir_context *bld_base)
1626
{
1627
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1628
lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask);
1629
}
1630
1631
static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1632
{
1633
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1634
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1635
lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
1636
}
1637
1638
static void else_stmt(struct lp_build_nir_context *bld_base)
1639
{
1640
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1641
lp_exec_mask_cond_invert(&bld->exec_mask);
1642
}
1643
1644
static void endif_stmt(struct lp_build_nir_context *bld_base)
1645
{
1646
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1647
lp_exec_mask_cond_pop(&bld->exec_mask);
1648
}
1649
1650
static void break_stmt(struct lp_build_nir_context *bld_base)
1651
{
1652
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1653
1654
lp_exec_break(&bld->exec_mask, NULL, false);
1655
}
1656
1657
static void continue_stmt(struct lp_build_nir_context *bld_base)
1658
{
1659
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1660
lp_exec_continue(&bld->exec_mask);
1661
}
1662
1663
static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
1664
{
1665
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1666
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1667
LLVMValueRef mask;
1668
1669
if (!cond) {
1670
if (bld->exec_mask.has_mask) {
1671
mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1672
} else {
1673
mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
1674
}
1675
} else {
1676
mask = LLVMBuildNot(builder, cond, "");
1677
if (bld->exec_mask.has_mask) {
1678
LLVMValueRef invmask;
1679
invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
1680
mask = LLVMBuildOr(builder, mask, invmask, "");
1681
}
1682
}
1683
lp_build_mask_update(bld->mask, mask);
1684
}
1685
1686
static void
1687
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
1688
LLVMValueRef ptr,
1689
LLVMValueRef mask)
1690
{
1691
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1692
LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1693
1694
current_vec = LLVMBuildSub(builder, current_vec, mask, "");
1695
1696
LLVMBuildStore(builder, current_vec, ptr);
1697
}
1698
1699
static void
1700
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
1701
LLVMValueRef ptr,
1702
LLVMValueRef mask)
1703
{
1704
LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1705
LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, "");
1706
1707
current_vec = lp_build_select(&bld_base->uint_bld,
1708
mask,
1709
bld_base->uint_bld.zero,
1710
current_vec);
1711
1712
LLVMBuildStore(builder, current_vec, ptr);
1713
}
1714
1715
static LLVMValueRef
1716
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
1717
LLVMValueRef current_mask_vec,
1718
LLVMValueRef total_emitted_vertices_vec)
1719
{
1720
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1721
struct lp_build_context *int_bld = &bld->bld_base.int_bld;
1722
LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
1723
total_emitted_vertices_vec,
1724
bld->max_output_vertices_vec);
1725
1726
return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
1727
}
1728
1729
static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1730
{
1731
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1732
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1733
1734
if (stream_id >= bld->gs_vertex_streams)
1735
return;
1736
assert(bld->gs_iface->emit_vertex);
1737
LLVMValueRef total_emitted_vertices_vec =
1738
LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1739
LLVMValueRef mask = mask_vec(bld_base);
1740
mask = clamp_mask_to_max_output_vertices(bld, mask,
1741
total_emitted_vertices_vec);
1742
bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
1743
bld->outputs,
1744
total_emitted_vertices_vec,
1745
mask,
1746
lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
1747
1748
increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1749
mask);
1750
increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
1751
mask);
1752
}
1753
1754
static void
1755
end_primitive_masked(struct lp_build_nir_context * bld_base,
1756
LLVMValueRef mask, uint32_t stream_id)
1757
{
1758
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1759
LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1760
1761
if (stream_id >= bld->gs_vertex_streams)
1762
return;
1763
struct lp_build_context *uint_bld = &bld_base->uint_bld;
1764
LLVMValueRef emitted_vertices_vec =
1765
LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr[stream_id], "");
1766
LLVMValueRef emitted_prims_vec =
1767
LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr[stream_id], "");
1768
LLVMValueRef total_emitted_vertices_vec =
1769
LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr[stream_id], "");
1770
1771
LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
1772
PIPE_FUNC_NOTEQUAL,
1773
emitted_vertices_vec,
1774
uint_bld->zero);
1775
mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
1776
bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
1777
total_emitted_vertices_vec,
1778
emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
1779
increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
1780
mask);
1781
clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
1782
mask);
1783
}
1784
1785
static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
1786
{
1787
ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1788
1789
assert(bld->gs_iface->end_primitive);
1790
1791
LLVMValueRef mask = mask_vec(bld_base);
1792
end_primitive_masked(bld_base, mask, stream_id);
1793
}
1794
1795
static void
1796
emit_prologue(struct lp_build_nir_soa_context *bld)
1797
{
1798
struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
1799
if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
1800
uint32_t num_inputs = util_bitcount64(bld->bld_base.shader->info.inputs_read);
1801
unsigned index, chan;
1802
LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1803
LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
1804
bld->inputs_array = lp_build_array_alloca(gallivm,
1805
vec_type, array_size,
1806
"input_array");
1807
1808
for (index = 0; index < num_inputs; ++index) {
1809
for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
1810
LLVMValueRef lindex =
1811
lp_build_const_int32(gallivm, index * 4 + chan);
1812
LLVMValueRef input_ptr =
1813
LLVMBuildGEP(gallivm->builder, bld->inputs_array,
1814
&lindex, 1, "");
1815
LLVMValueRef value = bld->inputs[index][chan];
1816
if (value)
1817
LLVMBuildStore(gallivm->builder, value, input_ptr);
1818
}
1819
}
1820
}
1821
}
1822
1823
static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,
1824
nir_intrinsic_instr *instr, LLVMValueRef result[4])
1825
{
1826
struct gallivm_state * gallivm = bld_base->base.gallivm;
1827
LLVMBuilderRef builder = gallivm->builder;
1828
uint32_t bit_size = nir_src_bit_size(instr->src[0]);
1829
LLVMValueRef exec_mask = mask_vec(bld_base);
1830
struct lp_build_loop_state loop_state;
1831
LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
1832
1833
LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
1834
LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");
1835
LLVMValueRef init_val = NULL;
1836
if (instr->intrinsic == nir_intrinsic_vote_ieq ||
1837
instr->intrinsic == nir_intrinsic_vote_feq) {
1838
/* for equal we unfortunately have to loop and find the first valid one. */
1839
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1840
LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1841
1842
struct lp_build_if_state ifthen;
1843
lp_build_if(&ifthen, gallivm, if_cond);
1844
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1845
loop_state.counter, "");
1846
LLVMBuildStore(builder, value_ptr, eq_store);
1847
LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);
1848
lp_build_endif(&ifthen);
1849
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1850
NULL, LLVMIntUGE);
1851
init_val = LLVMBuildLoad(builder, eq_store, "");
1852
} else {
1853
LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
1854
}
1855
1856
LLVMValueRef res;
1857
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1858
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1859
loop_state.counter, "");
1860
struct lp_build_if_state ifthen;
1861
LLVMValueRef if_cond;
1862
if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
1863
1864
lp_build_if(&ifthen, gallivm, if_cond);
1865
res = LLVMBuildLoad(builder, res_store, "");
1866
1867
if (instr->intrinsic == nir_intrinsic_vote_feq) {
1868
struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);
1869
LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,
1870
LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),
1871
LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");
1872
tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
1873
res = LLVMBuildAnd(builder, res, tmp, "");
1874
} else if (instr->intrinsic == nir_intrinsic_vote_ieq) {
1875
LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
1876
tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
1877
res = LLVMBuildAnd(builder, res, tmp, "");
1878
} else if (instr->intrinsic == nir_intrinsic_vote_any)
1879
res = LLVMBuildOr(builder, res, value_ptr, "");
1880
else
1881
res = LLVMBuildAnd(builder, res, value_ptr, "");
1882
LLVMBuildStore(builder, res, res_store);
1883
lp_build_endif(&ifthen);
1884
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1885
NULL, LLVMIntUGE);
1886
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
1887
}
1888
1889
static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
1890
{
1891
struct gallivm_state * gallivm = bld_base->base.gallivm;
1892
LLVMBuilderRef builder = gallivm->builder;
1893
LLVMValueRef exec_mask = mask_vec(bld_base);
1894
struct lp_build_loop_state loop_state;
1895
src = LLVMBuildAnd(builder, src, exec_mask, "");
1896
LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
1897
LLVMValueRef res;
1898
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1899
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
1900
loop_state.counter, "");
1901
res = LLVMBuildLoad(builder, res_store, "");
1902
res = LLVMBuildOr(builder,
1903
res,
1904
LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");
1905
LLVMBuildStore(builder, res, res_store);
1906
1907
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1908
NULL, LLVMIntUGE);
1909
result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, LLVMBuildLoad(builder, res_store, ""));
1910
}
1911
1912
static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])
1913
{
1914
struct gallivm_state *gallivm = bld_base->base.gallivm;
1915
LLVMBuilderRef builder = gallivm->builder;
1916
LLVMValueRef exec_mask = mask_vec(bld_base);
1917
struct lp_build_loop_state loop_state;
1918
1919
LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
1920
LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
1921
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1922
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,
1923
loop_state.counter, "");
1924
LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,
1925
LLVMIntEQ,
1926
value_ptr,
1927
lp_build_const_int32(gallivm, -1), "");
1928
LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,
1929
LLVMIntEQ,
1930
LLVMBuildLoad(builder, found_store, ""),
1931
lp_build_const_int32(gallivm, 0), "");
1932
1933
cond = LLVMBuildAnd(builder, cond, cond2, "");
1934
struct lp_build_if_state ifthen;
1935
lp_build_if(&ifthen, gallivm, cond);
1936
LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);
1937
LLVMBuildStore(builder, loop_state.counter, idx_store);
1938
lp_build_endif(&ifthen);
1939
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
1940
NULL, LLVMIntUGE);
1941
1942
result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,
1943
lp_build_const_int32(gallivm, -1),
1944
LLVMBuildLoad(builder, idx_store, ""),
1945
"");
1946
}
1947
1948
static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,
1949
nir_intrinsic_instr *instr, LLVMValueRef result[4])
1950
{
1951
struct gallivm_state *gallivm = bld_base->base.gallivm;
1952
LLVMBuilderRef builder = gallivm->builder;
1953
uint32_t bit_size = nir_src_bit_size(instr->src[0]);
1954
/* can't use llvm reduction intrinsics because of exec_mask */
1955
LLVMValueRef exec_mask = mask_vec(bld_base);
1956
struct lp_build_loop_state loop_state;
1957
nir_op reduction_op = nir_intrinsic_reduction_op(instr);
1958
1959
LLVMValueRef res_store = NULL;
1960
LLVMValueRef scan_store;
1961
struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
1962
1963
if (instr->intrinsic != nir_intrinsic_reduce)
1964
res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
1965
1966
scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");
1967
1968
struct lp_build_context elem_bld;
1969
bool is_flt = reduction_op == nir_op_fadd ||
1970
reduction_op == nir_op_fmul ||
1971
reduction_op == nir_op_fmin ||
1972
reduction_op == nir_op_fmax;
1973
bool is_unsigned = reduction_op == nir_op_umin ||
1974
reduction_op == nir_op_umax;
1975
1976
struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :
1977
get_int_bld(bld_base, is_unsigned, bit_size);
1978
1979
lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));
1980
1981
LLVMValueRef store_val = NULL;
1982
/*
1983
* Put the identity value for the operation into the storage
1984
*/
1985
switch (reduction_op) {
1986
case nir_op_fmin: {
1987
LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :
1988
lp_build_const_float(gallivm, INFINITY);
1989
store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");
1990
break;
1991
}
1992
case nir_op_fmax: {
1993
LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :
1994
lp_build_const_float(gallivm, -INFINITY);
1995
store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");
1996
break;
1997
}
1998
case nir_op_fmul: {
1999
LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :
2000
lp_build_const_float(gallivm, 1.0);
2001
store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");
2002
break;
2003
}
2004
case nir_op_umin:
2005
store_val = lp_build_const_int32(gallivm, UINT_MAX);
2006
break;
2007
case nir_op_imin:
2008
store_val = lp_build_const_int32(gallivm, INT_MAX);
2009
break;
2010
case nir_op_imax:
2011
store_val = lp_build_const_int32(gallivm, INT_MIN);
2012
break;
2013
case nir_op_imul:
2014
store_val = lp_build_const_int32(gallivm, 1);
2015
break;
2016
case nir_op_iand:
2017
store_val = lp_build_const_int32(gallivm, 0xffffffff);
2018
break;
2019
default:
2020
break;
2021
}
2022
if (store_val)
2023
LLVMBuildStore(builder, store_val, scan_store);
2024
2025
LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2026
2027
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2028
2029
struct lp_build_if_state ifthen;
2030
LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2031
lp_build_if(&ifthen, gallivm, if_cond);
2032
LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, "");
2033
2034
LLVMValueRef res = NULL;
2035
LLVMValueRef scan_val = LLVMBuildLoad(gallivm->builder, scan_store, "");
2036
if (instr->intrinsic != nir_intrinsic_reduce)
2037
res = LLVMBuildLoad(gallivm->builder, res_store, "");
2038
2039
if (instr->intrinsic == nir_intrinsic_exclusive_scan)
2040
res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2041
2042
if (is_flt) {
2043
scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");
2044
value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");
2045
}
2046
switch (reduction_op) {
2047
case nir_op_fadd:
2048
case nir_op_iadd:
2049
scan_val = lp_build_add(&elem_bld, value, scan_val);
2050
break;
2051
case nir_op_fmul:
2052
case nir_op_imul:
2053
scan_val = lp_build_mul(&elem_bld, value, scan_val);
2054
break;
2055
case nir_op_imin:
2056
case nir_op_umin:
2057
case nir_op_fmin:
2058
scan_val = lp_build_min(&elem_bld, value, scan_val);
2059
break;
2060
case nir_op_imax:
2061
case nir_op_umax:
2062
case nir_op_fmax:
2063
scan_val = lp_build_max(&elem_bld, value, scan_val);
2064
break;
2065
case nir_op_iand:
2066
scan_val = lp_build_and(&elem_bld, value, scan_val);
2067
break;
2068
case nir_op_ior:
2069
scan_val = lp_build_or(&elem_bld, value, scan_val);
2070
break;
2071
case nir_op_ixor:
2072
scan_val = lp_build_xor(&elem_bld, value, scan_val);
2073
break;
2074
default:
2075
assert(0);
2076
break;
2077
}
2078
if (is_flt)
2079
scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");
2080
LLVMBuildStore(builder, scan_val, scan_store);
2081
2082
if (instr->intrinsic == nir_intrinsic_inclusive_scan) {
2083
res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2084
}
2085
2086
if (instr->intrinsic != nir_intrinsic_reduce)
2087
LLVMBuildStore(builder, res, res_store);
2088
lp_build_endif(&ifthen);
2089
2090
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2091
NULL, LLVMIntUGE);
2092
if (instr->intrinsic == nir_intrinsic_reduce)
2093
result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad(builder, scan_store, ""));
2094
else
2095
result[0] = LLVMBuildLoad(builder, res_store, "");
2096
}
2097
2098
static void emit_read_invocation(struct lp_build_nir_context *bld_base,
2099
LLVMValueRef src,
2100
unsigned bit_size,
2101
LLVMValueRef invoc,
2102
LLVMValueRef result[4])
2103
{
2104
struct gallivm_state *gallivm = bld_base->base.gallivm;
2105
LLVMBuilderRef builder = gallivm->builder;
2106
LLVMValueRef idx;
2107
struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);
2108
if (invoc) {
2109
idx = invoc;
2110
idx = LLVMBuildExtractElement(gallivm->builder, idx, lp_build_const_int32(gallivm, 0), "");
2111
} else {
2112
/* have to find the first active invocation */
2113
LLVMValueRef exec_mask = mask_vec(bld_base);
2114
struct lp_build_loop_state loop_state;
2115
LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2116
LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2117
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length));
2118
2119
LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2120
struct lp_build_if_state ifthen;
2121
2122
lp_build_if(&ifthen, gallivm, if_cond);
2123
LLVMBuildStore(builder, loop_state.counter, res_store);
2124
lp_build_endif(&ifthen);
2125
2126
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, -1),
2127
lp_build_const_int32(gallivm, -1), LLVMIntEQ);
2128
idx = LLVMBuildLoad(builder, res_store, "");
2129
}
2130
2131
LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,
2132
src, idx, "");
2133
result[0] = lp_build_broadcast_scalar(uint_bld, value);
2134
}
2135
2136
static void
2137
emit_interp_at(struct lp_build_nir_context *bld_base,
2138
unsigned num_components,
2139
nir_variable *var,
2140
bool centroid,
2141
bool sample,
2142
unsigned const_index,
2143
LLVMValueRef indir_index,
2144
LLVMValueRef offsets[2],
2145
LLVMValueRef dst[4])
2146
{
2147
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2148
2149
for (unsigned i = 0; i < num_components; i++) {
2150
dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
2151
const_index + var->data.driver_location, i + var->data.location_frac,
2152
centroid, sample, indir_index, offsets);
2153
}
2154
}
2155
2156
static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
2157
struct lp_type type,
2158
unsigned scratch_size)
2159
{
2160
LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
2161
LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2162
unsigned i;
2163
2164
if (type.length == 1)
2165
return LLVMConstInt(elem_type, 0, 0);
2166
2167
for (i = 0; i < type.length; ++i)
2168
elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
2169
2170
return LLVMConstVector(elems, type.length);
2171
}
2172
2173
static void
2174
emit_load_scratch(struct lp_build_nir_context *bld_base,
2175
unsigned nc, unsigned bit_size,
2176
LLVMValueRef offset,
2177
LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
2178
{
2179
struct gallivm_state * gallivm = bld_base->base.gallivm;
2180
LLVMBuilderRef builder = gallivm->builder;
2181
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2182
struct lp_build_context *uint_bld = &bld_base->uint_bld;
2183
struct lp_build_context *load_bld;
2184
LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2185
uint32_t shift_val = bit_size_to_shift_size(bit_size);
2186
2187
load_bld = get_int_bld(bld_base, true, bit_size);
2188
2189
offset = lp_build_add(uint_bld, offset, thread_offsets);
2190
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2191
for (unsigned c = 0; c < nc; c++) {
2192
LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2193
LLVMValueRef exec_mask = mask_vec(bld_base);
2194
2195
LLVMValueRef result = lp_build_alloca(gallivm, load_bld->vec_type, "");
2196
struct lp_build_loop_state loop_state;
2197
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2198
2199
struct lp_build_if_state ifthen;
2200
LLVMValueRef cond, temp_res;
2201
2202
loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2203
loop_state.counter, "");
2204
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2205
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2206
2207
lp_build_if(&ifthen, gallivm, cond);
2208
LLVMValueRef scalar;
2209
LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(load_bld->elem_type, 0), "");
2210
scalar = lp_build_pointer_get(builder, ptr2, loop_index);
2211
2212
temp_res = LLVMBuildLoad(builder, result, "");
2213
temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
2214
LLVMBuildStore(builder, temp_res, result);
2215
lp_build_else(&ifthen);
2216
temp_res = LLVMBuildLoad(builder, result, "");
2217
LLVMValueRef zero;
2218
if (bit_size == 64)
2219
zero = LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
2220
else if (bit_size == 16)
2221
zero = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
2222
else if (bit_size == 8)
2223
zero = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
2224
else
2225
zero = lp_build_const_int32(gallivm, 0);
2226
temp_res = LLVMBuildInsertElement(builder, temp_res, zero, loop_state.counter, "");
2227
LLVMBuildStore(builder, temp_res, result);
2228
lp_build_endif(&ifthen);
2229
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2230
NULL, LLVMIntUGE);
2231
outval[c] = LLVMBuildLoad(gallivm->builder, result, "");
2232
}
2233
}
2234
2235
static void
2236
emit_store_scratch(struct lp_build_nir_context *bld_base,
2237
unsigned writemask, unsigned nc,
2238
unsigned bit_size, LLVMValueRef offset,
2239
LLVMValueRef dst)
2240
{
2241
struct gallivm_state * gallivm = bld_base->base.gallivm;
2242
LLVMBuilderRef builder = gallivm->builder;
2243
struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2244
struct lp_build_context *uint_bld = &bld_base->uint_bld;
2245
struct lp_build_context *store_bld;
2246
LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);;
2247
uint32_t shift_val = bit_size_to_shift_size(bit_size);
2248
store_bld = get_int_bld(bld_base, true, bit_size);
2249
2250
LLVMValueRef exec_mask = mask_vec(bld_base);
2251
offset = lp_build_add(uint_bld, offset, thread_offsets);
2252
offset = lp_build_shr_imm(uint_bld, offset, shift_val);
2253
2254
for (unsigned c = 0; c < nc; c++) {
2255
if (!(writemask & (1u << c)))
2256
continue;
2257
LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
2258
LLVMValueRef loop_index = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
2259
2260
struct lp_build_loop_state loop_state;
2261
lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2262
2263
LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
2264
loop_state.counter, "");
2265
value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
2266
2267
struct lp_build_if_state ifthen;
2268
LLVMValueRef cond;
2269
2270
loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
2271
loop_state.counter, "");
2272
2273
cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
2274
cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
2275
lp_build_if(&ifthen, gallivm, cond);
2276
2277
LLVMValueRef ptr2 = LLVMBuildBitCast(builder, bld->scratch_ptr, LLVMPointerType(store_bld->elem_type, 0), "");
2278
lp_build_pointer_set(builder, ptr2, loop_index, value_ptr);
2279
2280
lp_build_endif(&ifthen);
2281
lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
2282
NULL, LLVMIntUGE);
2283
}
2284
}
2285
2286
void lp_build_nir_soa(struct gallivm_state *gallivm,
2287
struct nir_shader *shader,
2288
const struct lp_build_tgsi_params *params,
2289
LLVMValueRef (*outputs)[4])
2290
{
2291
struct lp_build_nir_soa_context bld;
2292
struct lp_type type = params->type;
2293
struct lp_type res_type;
2294
2295
assert(type.length <= LP_MAX_VECTOR_LENGTH);
2296
memset(&res_type, 0, sizeof res_type);
2297
res_type.width = type.width;
2298
res_type.length = type.length;
2299
res_type.sign = 1;
2300
2301
/* Setup build context */
2302
memset(&bld, 0, sizeof bld);
2303
lp_build_context_init(&bld.bld_base.base, gallivm, type);
2304
lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2305
lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2306
lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2307
lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
2308
{
2309
struct lp_type dbl_type;
2310
dbl_type = type;
2311
dbl_type.width *= 2;
2312
lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
2313
}
2314
{
2315
struct lp_type uint64_type;
2316
uint64_type = lp_uint_type(type);
2317
uint64_type.width *= 2;
2318
lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
2319
}
2320
{
2321
struct lp_type int64_type;
2322
int64_type = lp_int_type(type);
2323
int64_type.width *= 2;
2324
lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
2325
}
2326
{
2327
struct lp_type uint16_type;
2328
uint16_type = lp_uint_type(type);
2329
uint16_type.width /= 2;
2330
lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
2331
}
2332
{
2333
struct lp_type int16_type;
2334
int16_type = lp_int_type(type);
2335
int16_type.width /= 2;
2336
lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
2337
}
2338
{
2339
struct lp_type uint8_type;
2340
uint8_type = lp_uint_type(type);
2341
uint8_type.width /= 4;
2342
lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
2343
}
2344
{
2345
struct lp_type int8_type;
2346
int8_type = lp_int_type(type);
2347
int8_type.width /= 4;
2348
lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
2349
}
2350
bld.bld_base.load_var = emit_load_var;
2351
bld.bld_base.store_var = emit_store_var;
2352
bld.bld_base.load_reg = emit_load_reg;
2353
bld.bld_base.store_reg = emit_store_reg;
2354
bld.bld_base.emit_var_decl = emit_var_decl;
2355
bld.bld_base.load_ubo = emit_load_ubo;
2356
bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
2357
bld.bld_base.load_global = emit_load_global;
2358
bld.bld_base.store_global = emit_store_global;
2359
bld.bld_base.atomic_global = emit_atomic_global;
2360
bld.bld_base.tex = emit_tex;
2361
bld.bld_base.tex_size = emit_tex_size;
2362
bld.bld_base.bgnloop = bgnloop;
2363
bld.bld_base.endloop = endloop;
2364
bld.bld_base.if_cond = if_cond;
2365
bld.bld_base.else_stmt = else_stmt;
2366
bld.bld_base.endif_stmt = endif_stmt;
2367
bld.bld_base.break_stmt = break_stmt;
2368
bld.bld_base.continue_stmt = continue_stmt;
2369
bld.bld_base.sysval_intrin = emit_sysval_intrin;
2370
bld.bld_base.discard = discard;
2371
bld.bld_base.emit_vertex = emit_vertex;
2372
bld.bld_base.end_primitive = end_primitive;
2373
bld.bld_base.load_mem = emit_load_mem;
2374
bld.bld_base.store_mem = emit_store_mem;
2375
bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
2376
bld.bld_base.atomic_mem = emit_atomic_mem;
2377
bld.bld_base.barrier = emit_barrier;
2378
bld.bld_base.image_op = emit_image_op;
2379
bld.bld_base.image_size = emit_image_size;
2380
bld.bld_base.vote = emit_vote;
2381
bld.bld_base.elect = emit_elect;
2382
bld.bld_base.reduce = emit_reduce;
2383
bld.bld_base.ballot = emit_ballot;
2384
bld.bld_base.read_invocation = emit_read_invocation;
2385
bld.bld_base.helper_invocation = emit_helper_invocation;
2386
bld.bld_base.interp_at = emit_interp_at;
2387
bld.bld_base.load_scratch = emit_load_scratch;
2388
bld.bld_base.store_scratch = emit_store_scratch;
2389
2390
bld.mask = params->mask;
2391
bld.inputs = params->inputs;
2392
bld.outputs = outputs;
2393
bld.consts_ptr = params->consts_ptr;
2394
bld.const_sizes_ptr = params->const_sizes_ptr;
2395
bld.ssbo_ptr = params->ssbo_ptr;
2396
bld.ssbo_sizes_ptr = params->ssbo_sizes_ptr;
2397
bld.sampler = params->sampler;
2398
// bld.bld_base.info = params->info;
2399
2400
bld.context_ptr = params->context_ptr;
2401
bld.thread_data_ptr = params->thread_data_ptr;
2402
bld.image = params->image;
2403
bld.shared_ptr = params->shared_ptr;
2404
bld.coro = params->coro;
2405
bld.kernel_args_ptr = params->kernel_args;
2406
bld.indirects = 0;
2407
if (params->info->indirect_files & (1 << TGSI_FILE_INPUT))
2408
bld.indirects |= nir_var_shader_in;
2409
2410
bld.gs_iface = params->gs_iface;
2411
bld.tcs_iface = params->tcs_iface;
2412
bld.tes_iface = params->tes_iface;
2413
bld.fs_iface = params->fs_iface;
2414
if (bld.gs_iface) {
2415
struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
2416
2417
bld.gs_vertex_streams = params->gs_vertex_streams;
2418
bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
2419
shader->info.gs.vertices_out);
2420
for (int i = 0; i < params->gs_vertex_streams; i++) {
2421
bld.emitted_prims_vec_ptr[i] =
2422
lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
2423
bld.emitted_vertices_vec_ptr[i] =
2424
lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
2425
bld.total_emitted_vertices_vec_ptr[i] =
2426
lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
2427
}
2428
}
2429
lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
2430
2431
bld.system_values = *params->system_values;
2432
2433
bld.bld_base.shader = shader;
2434
2435
if (shader->scratch_size) {
2436
bld.scratch_ptr = lp_build_array_alloca(gallivm,
2437
LLVMInt8TypeInContext(gallivm->context),
2438
lp_build_const_int32(gallivm, shader->scratch_size * type.length),
2439
"scratch");
2440
}
2441
bld.scratch_size = shader->scratch_size;
2442
emit_prologue(&bld);
2443
lp_build_nir_llvm(&bld.bld_base, shader);
2444
2445
if (bld.gs_iface) {
2446
LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
2447
LLVMValueRef total_emitted_vertices_vec;
2448
LLVMValueRef emitted_prims_vec;
2449
2450
for (int i = 0; i < params->gs_vertex_streams; i++) {
2451
end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
2452
2453
total_emitted_vertices_vec =
2454
LLVMBuildLoad(builder, bld.total_emitted_vertices_vec_ptr[i], "");
2455
2456
emitted_prims_vec =
2457
LLVMBuildLoad(builder, bld.emitted_prims_vec_ptr[i], "");
2458
bld.gs_iface->gs_epilogue(bld.gs_iface,
2459
total_emitted_vertices_vec,
2460
emitted_prims_vec, i);
2461
}
2462
}
2463
lp_exec_mask_fini(&bld.exec_mask);
2464
}
2465
2466