Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/intel/vulkan/anv_nir_apply_pipeline_layout.c
4547 views
1
/*
2
* Copyright © 2015 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "anv_nir.h"
25
#include "program/prog_parameter.h"
26
#include "nir/nir_builder.h"
27
#include "compiler/brw_nir.h"
28
#include "util/mesa-sha1.h"
29
#include "util/set.h"
30
31
/* Sampler tables don't actually have a maximum size but we pick one just so
32
* that we don't end up emitting too much state on-the-fly.
33
*/
34
#define MAX_SAMPLER_TABLE_SIZE 128
35
#define BINDLESS_OFFSET 255
36
37
struct apply_pipeline_layout_state {
38
const struct anv_physical_device *pdevice;
39
40
const struct anv_pipeline_layout *layout;
41
bool add_bounds_checks;
42
nir_address_format desc_addr_format;
43
nir_address_format ssbo_addr_format;
44
nir_address_format ubo_addr_format;
45
46
/* Place to flag lowered instructions so we don't lower them twice */
47
struct set *lowered_instrs;
48
49
bool uses_constants;
50
bool has_dynamic_buffers;
51
uint8_t constants_offset;
52
struct {
53
bool desc_buffer_used;
54
uint8_t desc_offset;
55
56
uint8_t *use_count;
57
uint8_t *surface_offsets;
58
uint8_t *sampler_offsets;
59
} set[MAX_SETS];
60
};
61
62
static nir_address_format
63
addr_format_for_desc_type(VkDescriptorType desc_type,
64
struct apply_pipeline_layout_state *state)
65
{
66
switch (desc_type) {
67
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
68
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
69
return state->ssbo_addr_format;
70
71
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
72
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
73
return state->ubo_addr_format;
74
75
case VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT:
76
return state->desc_addr_format;
77
78
default:
79
unreachable("Unsupported descriptor type");
80
}
81
}
82
83
static void
84
add_binding(struct apply_pipeline_layout_state *state,
85
uint32_t set, uint32_t binding)
86
{
87
const struct anv_descriptor_set_binding_layout *bind_layout =
88
&state->layout->set[set].layout->binding[binding];
89
90
if (state->set[set].use_count[binding] < UINT8_MAX)
91
state->set[set].use_count[binding]++;
92
93
/* Only flag the descriptor buffer as used if there's actually data for
94
* this binding. This lets us be lazy and call this function constantly
95
* without worrying about unnecessarily enabling the buffer.
96
*/
97
if (anv_descriptor_size(bind_layout))
98
state->set[set].desc_buffer_used = true;
99
}
100
101
static void
102
add_deref_src_binding(struct apply_pipeline_layout_state *state, nir_src src)
103
{
104
nir_deref_instr *deref = nir_src_as_deref(src);
105
nir_variable *var = nir_deref_instr_get_variable(deref);
106
add_binding(state, var->data.descriptor_set, var->data.binding);
107
}
108
109
static void
110
add_tex_src_binding(struct apply_pipeline_layout_state *state,
111
nir_tex_instr *tex, nir_tex_src_type deref_src_type)
112
{
113
int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
114
if (deref_src_idx < 0)
115
return;
116
117
add_deref_src_binding(state, tex->src[deref_src_idx].src);
118
}
119
120
static bool
121
get_used_bindings(UNUSED nir_builder *_b, nir_instr *instr, void *_state)
122
{
123
struct apply_pipeline_layout_state *state = _state;
124
125
switch (instr->type) {
126
case nir_instr_type_intrinsic: {
127
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
128
switch (intrin->intrinsic) {
129
case nir_intrinsic_vulkan_resource_index:
130
add_binding(state, nir_intrinsic_desc_set(intrin),
131
nir_intrinsic_binding(intrin));
132
break;
133
134
case nir_intrinsic_image_deref_load:
135
case nir_intrinsic_image_deref_store:
136
case nir_intrinsic_image_deref_atomic_add:
137
case nir_intrinsic_image_deref_atomic_imin:
138
case nir_intrinsic_image_deref_atomic_umin:
139
case nir_intrinsic_image_deref_atomic_imax:
140
case nir_intrinsic_image_deref_atomic_umax:
141
case nir_intrinsic_image_deref_atomic_and:
142
case nir_intrinsic_image_deref_atomic_or:
143
case nir_intrinsic_image_deref_atomic_xor:
144
case nir_intrinsic_image_deref_atomic_exchange:
145
case nir_intrinsic_image_deref_atomic_comp_swap:
146
case nir_intrinsic_image_deref_size:
147
case nir_intrinsic_image_deref_samples:
148
case nir_intrinsic_image_deref_load_param_intel:
149
case nir_intrinsic_image_deref_load_raw_intel:
150
case nir_intrinsic_image_deref_store_raw_intel:
151
add_deref_src_binding(state, intrin->src[0]);
152
break;
153
154
case nir_intrinsic_load_constant:
155
state->uses_constants = true;
156
break;
157
158
default:
159
break;
160
}
161
break;
162
}
163
case nir_instr_type_tex: {
164
nir_tex_instr *tex = nir_instr_as_tex(instr);
165
add_tex_src_binding(state, tex, nir_tex_src_texture_deref);
166
add_tex_src_binding(state, tex, nir_tex_src_sampler_deref);
167
break;
168
}
169
default:
170
break;
171
}
172
173
return false;
174
}
175
176
static nir_intrinsic_instr *
177
find_descriptor_for_index_src(nir_src src,
178
struct apply_pipeline_layout_state *state)
179
{
180
nir_intrinsic_instr *intrin = nir_src_as_intrinsic(src);
181
182
while (intrin && intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex)
183
intrin = nir_src_as_intrinsic(intrin->src[0]);
184
185
if (!intrin || intrin->intrinsic != nir_intrinsic_vulkan_resource_index)
186
return NULL;
187
188
return intrin;
189
}
190
191
static bool
192
descriptor_has_bti(nir_intrinsic_instr *intrin,
193
struct apply_pipeline_layout_state *state)
194
{
195
assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
196
197
uint32_t set = nir_intrinsic_desc_set(intrin);
198
uint32_t binding = nir_intrinsic_binding(intrin);
199
const struct anv_descriptor_set_binding_layout *bind_layout =
200
&state->layout->set[set].layout->binding[binding];
201
202
uint32_t surface_index;
203
if (bind_layout->data & ANV_DESCRIPTOR_INLINE_UNIFORM)
204
surface_index = state->set[set].desc_offset;
205
else
206
surface_index = state->set[set].surface_offsets[binding];
207
208
/* Only lower to a BTI message if we have a valid binding table index. */
209
return surface_index < MAX_BINDING_TABLE_SIZE;
210
}
211
212
static nir_address_format
213
descriptor_address_format(nir_intrinsic_instr *intrin,
214
struct apply_pipeline_layout_state *state)
215
{
216
assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_index);
217
218
uint32_t set = nir_intrinsic_desc_set(intrin);
219
uint32_t binding = nir_intrinsic_binding(intrin);
220
const struct anv_descriptor_set_binding_layout *bind_layout =
221
&state->layout->set[set].layout->binding[binding];
222
223
return addr_format_for_desc_type(bind_layout->type, state);
224
}
225
226
static nir_intrinsic_instr *
227
nir_deref_find_descriptor(nir_deref_instr *deref,
228
struct apply_pipeline_layout_state *state)
229
{
230
while (1) {
231
/* Nothing we will use this on has a variable */
232
assert(deref->deref_type != nir_deref_type_var);
233
234
nir_deref_instr *parent = nir_src_as_deref(deref->parent);
235
if (!parent)
236
break;
237
238
deref = parent;
239
}
240
assert(deref->deref_type == nir_deref_type_cast);
241
242
nir_intrinsic_instr *intrin = nir_src_as_intrinsic(deref->parent);
243
if (!intrin || intrin->intrinsic != nir_intrinsic_load_vulkan_descriptor)
244
return false;
245
246
return find_descriptor_for_index_src(intrin->src[0], state);
247
}
248
249
static nir_ssa_def *
250
build_load_descriptor_mem(nir_builder *b,
251
nir_ssa_def *desc_addr, unsigned desc_offset,
252
unsigned num_components, unsigned bit_size,
253
struct apply_pipeline_layout_state *state)
254
255
{
256
switch (state->desc_addr_format) {
257
case nir_address_format_64bit_global_32bit_offset: {
258
nir_ssa_def *base_addr =
259
nir_pack_64_2x32(b, nir_channels(b, desc_addr, 0x3));
260
nir_ssa_def *offset32 =
261
nir_iadd_imm(b, nir_channel(b, desc_addr, 3), desc_offset);
262
263
return nir_load_global_constant_offset(b, num_components, bit_size,
264
base_addr, offset32,
265
.align_mul = 8,
266
.align_offset = desc_offset % 8);
267
}
268
269
case nir_address_format_32bit_index_offset: {
270
nir_ssa_def *surface_index = nir_channel(b, desc_addr, 0);
271
nir_ssa_def *offset32 =
272
nir_iadd_imm(b, nir_channel(b, desc_addr, 1), desc_offset);
273
274
return nir_load_ubo(b, num_components, bit_size,
275
surface_index, offset32,
276
.align_mul = 8,
277
.align_offset = desc_offset % 8,
278
.range_base = 0,
279
.range = ~0);
280
}
281
282
default:
283
unreachable("Unsupported address format");
284
}
285
}
286
287
/** Build a Vulkan resource index
288
*
289
* A "resource index" is the term used by our SPIR-V parser and the relevant
290
* NIR intrinsics for a reference into a descriptor set. It acts much like a
291
* deref in NIR except that it accesses opaque descriptors instead of memory.
292
*
293
* Coming out of SPIR-V, both the resource indices (in the form of
294
* vulkan_resource_[re]index intrinsics) and the memory derefs (in the form
295
* of nir_deref_instr) use the same vector component/bit size. The meaning
296
* of those values for memory derefs (nir_deref_instr) is given by the
297
* nir_address_format associated with the descriptor type. For resource
298
* indices, it's an entirely internal to ANV encoding which describes, in some
299
* sense, the address of the descriptor. Thanks to the NIR/SPIR-V rules, it
300
* must be packed into the same size SSA values as a memory address. For this
301
* reason, the actual encoding may depend both on the address format for
302
* memory derefs and the descriptor address format.
303
*
304
* The load_vulkan_descriptor intrinsic exists to provide a transition point
305
* between these two forms of derefs: descriptor and memory.
306
*/
307
static nir_ssa_def *
308
build_res_index(nir_builder *b, uint32_t set, uint32_t binding,
309
nir_ssa_def *array_index, nir_address_format addr_format,
310
struct apply_pipeline_layout_state *state)
311
{
312
const struct anv_descriptor_set_binding_layout *bind_layout =
313
&state->layout->set[set].layout->binding[binding];
314
315
uint32_t array_size = bind_layout->array_size;
316
317
switch (addr_format) {
318
case nir_address_format_64bit_global_32bit_offset:
319
case nir_address_format_64bit_bounded_global: {
320
uint32_t set_idx;
321
switch (state->desc_addr_format) {
322
case nir_address_format_64bit_global_32bit_offset:
323
set_idx = set;
324
break;
325
326
case nir_address_format_32bit_index_offset:
327
assert(state->set[set].desc_offset < MAX_BINDING_TABLE_SIZE);
328
set_idx = state->set[set].desc_offset;
329
break;
330
331
default:
332
unreachable("Unsupported address format");
333
}
334
335
assert(bind_layout->dynamic_offset_index < MAX_DYNAMIC_BUFFERS);
336
uint32_t dynamic_offset_index = 0xff; /* No dynamic offset */
337
if (bind_layout->dynamic_offset_index >= 0) {
338
dynamic_offset_index =
339
state->layout->set[set].dynamic_offset_start +
340
bind_layout->dynamic_offset_index;
341
}
342
343
const uint32_t packed = (set_idx << 16) | dynamic_offset_index;
344
345
return nir_vec4(b, nir_imm_int(b, packed),
346
nir_imm_int(b, bind_layout->descriptor_offset),
347
nir_imm_int(b, array_size - 1),
348
array_index);
349
}
350
351
case nir_address_format_32bit_index_offset: {
352
assert(state->desc_addr_format == nir_address_format_32bit_index_offset);
353
if (bind_layout->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
354
uint32_t surface_index = state->set[set].desc_offset;
355
return nir_imm_ivec2(b, surface_index,
356
bind_layout->descriptor_offset);
357
} else {
358
uint32_t surface_index = state->set[set].surface_offsets[binding];
359
assert(array_size > 0 && array_size <= UINT16_MAX);
360
assert(surface_index <= UINT16_MAX);
361
uint32_t packed = ((array_size - 1) << 16) | surface_index;
362
return nir_vec2(b, array_index, nir_imm_int(b, packed));
363
}
364
}
365
366
default:
367
unreachable("Unsupported address format");
368
}
369
}
370
371
struct res_index_defs {
372
nir_ssa_def *set_idx;
373
nir_ssa_def *dyn_offset_base;
374
nir_ssa_def *desc_offset_base;
375
nir_ssa_def *array_index;
376
};
377
378
static struct res_index_defs
379
unpack_res_index(nir_builder *b, nir_ssa_def *index)
380
{
381
struct res_index_defs defs;
382
383
nir_ssa_def *packed = nir_channel(b, index, 0);
384
defs.set_idx = nir_extract_u16(b, packed, nir_imm_int(b, 1));
385
defs.dyn_offset_base = nir_extract_u16(b, packed, nir_imm_int(b, 0));
386
387
defs.desc_offset_base = nir_channel(b, index, 1);
388
defs.array_index = nir_umin(b, nir_channel(b, index, 2),
389
nir_channel(b, index, 3));
390
391
return defs;
392
}
393
394
/** Adjust a Vulkan resource index
395
*
396
* This is the equivalent of nir_deref_type_ptr_as_array for resource indices.
397
* For array descriptors, it allows us to adjust the array index. Thanks to
398
* variable pointers, we cannot always fold this re-index operation into the
399
* vulkan_resource_index intrinsic and we have to do it based on nothing but
400
* the address format.
401
*/
402
static nir_ssa_def *
403
build_res_reindex(nir_builder *b, nir_ssa_def *orig, nir_ssa_def *delta,
404
nir_address_format addr_format)
405
{
406
switch (addr_format) {
407
case nir_address_format_64bit_global_32bit_offset:
408
case nir_address_format_64bit_bounded_global:
409
return nir_vec4(b, nir_channel(b, orig, 0),
410
nir_channel(b, orig, 1),
411
nir_channel(b, orig, 2),
412
nir_iadd(b, nir_channel(b, orig, 3), delta));
413
414
case nir_address_format_32bit_index_offset:
415
return nir_vec2(b, nir_iadd(b, nir_channel(b, orig, 0), delta),
416
nir_channel(b, orig, 1));
417
418
default:
419
unreachable("Unhandled address format");
420
}
421
}
422
423
/** Get the address for a descriptor given its resource index
424
*
425
* Because of the re-indexing operations, we can't bounds check descriptor
426
* array access until we have the final index. That means we end up doing the
427
* bounds check here, if needed. See unpack_res_index() for more details.
428
*
429
* This function takes both a bind_layout and a desc_type which are used to
430
* determine the descriptor stride for array descriptors. The bind_layout is
431
* optional for buffer descriptor types.
432
*/
433
static nir_ssa_def *
434
build_desc_addr(nir_builder *b,
435
const struct anv_descriptor_set_binding_layout *bind_layout,
436
const VkDescriptorType desc_type,
437
nir_ssa_def *index, nir_address_format addr_format,
438
struct apply_pipeline_layout_state *state)
439
{
440
switch (addr_format) {
441
case nir_address_format_64bit_global_32bit_offset:
442
case nir_address_format_64bit_bounded_global: {
443
struct res_index_defs res = unpack_res_index(b, index);
444
445
nir_ssa_def *desc_offset = res.desc_offset_base;
446
if (desc_type != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
447
/* Compute the actual descriptor offset. For inline uniform blocks,
448
* the array index is ignored as they are only allowed to be a single
449
* descriptor (not an array) and there is no concept of a "stride".
450
*
451
* We use the bind_layout, if available, because it provides a more
452
* accurate descriptor size.
453
*/
454
const unsigned stride = bind_layout ?
455
anv_descriptor_size(bind_layout) :
456
anv_descriptor_type_size(state->pdevice, desc_type);
457
458
desc_offset =
459
nir_iadd(b, desc_offset, nir_imul_imm(b, res.array_index, stride));
460
}
461
462
switch (state->desc_addr_format) {
463
case nir_address_format_64bit_global_32bit_offset: {
464
nir_ssa_def *base_addr =
465
nir_load_desc_set_address_intel(b, res.set_idx);
466
return nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_addr),
467
nir_unpack_64_2x32_split_y(b, base_addr),
468
nir_imm_int(b, UINT32_MAX),
469
desc_offset);
470
}
471
472
case nir_address_format_32bit_index_offset:
473
return nir_vec2(b, res.set_idx, desc_offset);
474
475
default:
476
unreachable("Unhandled address format");
477
}
478
}
479
480
case nir_address_format_32bit_index_offset:
481
assert(desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT);
482
assert(state->desc_addr_format == nir_address_format_32bit_index_offset);
483
return index;
484
485
default:
486
unreachable("Unhandled address format");
487
}
488
}
489
490
/** Convert a Vulkan resource index into a buffer address
491
*
492
* In some cases, this does a memory load from the descriptor set and, in
493
* others, it simply converts from one form to another.
494
*
495
* See build_res_index for details about each resource index format.
496
*/
497
static nir_ssa_def *
498
build_buffer_addr_for_res_index(nir_builder *b,
499
const VkDescriptorType desc_type,
500
nir_ssa_def *res_index,
501
nir_address_format addr_format,
502
struct apply_pipeline_layout_state *state)
503
{
504
if (desc_type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT) {
505
assert(addr_format == state->desc_addr_format);
506
return build_desc_addr(b, NULL, desc_type, res_index, addr_format, state);
507
} else if (addr_format == nir_address_format_32bit_index_offset) {
508
nir_ssa_def *array_index = nir_channel(b, res_index, 0);
509
nir_ssa_def *packed = nir_channel(b, res_index, 1);
510
nir_ssa_def *array_max = nir_extract_u16(b, packed, nir_imm_int(b, 1));
511
nir_ssa_def *surface_index = nir_extract_u16(b, packed, nir_imm_int(b, 0));
512
513
if (state->add_bounds_checks)
514
array_index = nir_umin(b, array_index, array_max);
515
516
return nir_vec2(b, nir_iadd(b, surface_index, array_index),
517
nir_imm_int(b, 0));
518
}
519
520
nir_ssa_def *desc_addr =
521
build_desc_addr(b, NULL, desc_type, res_index, addr_format, state);
522
523
nir_ssa_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 4, 32, state);
524
525
if (state->has_dynamic_buffers) {
526
struct res_index_defs res = unpack_res_index(b, res_index);
527
528
/* This shader has dynamic offsets and we have no way of knowing
529
* (save from the dynamic offset base index) if this buffer has a
530
* dynamic offset.
531
*/
532
nir_ssa_def *dyn_offset_idx =
533
nir_iadd(b, res.dyn_offset_base, res.array_index);
534
if (state->add_bounds_checks) {
535
dyn_offset_idx = nir_umin(b, dyn_offset_idx,
536
nir_imm_int(b, MAX_DYNAMIC_BUFFERS));
537
}
538
539
nir_ssa_def *dyn_load =
540
nir_load_push_constant(b, 1, 32, nir_imul_imm(b, dyn_offset_idx, 4),
541
.base = offsetof(struct anv_push_constants, dynamic_offsets),
542
.range = MAX_DYNAMIC_BUFFERS * 4);
543
544
nir_ssa_def *dynamic_offset =
545
nir_bcsel(b, nir_ieq_imm(b, res.dyn_offset_base, 0xff),
546
nir_imm_int(b, 0), dyn_load);
547
548
/* The dynamic offset gets added to the base pointer so that we
549
* have a sliding window range.
550
*/
551
nir_ssa_def *base_ptr =
552
nir_pack_64_2x32(b, nir_channels(b, desc, 0x3));
553
base_ptr = nir_iadd(b, base_ptr, nir_u2u64(b, dynamic_offset));
554
desc = nir_vec4(b, nir_unpack_64_2x32_split_x(b, base_ptr),
555
nir_unpack_64_2x32_split_y(b, base_ptr),
556
nir_channel(b, desc, 2),
557
nir_channel(b, desc, 3));
558
}
559
560
/* The last element of the vec4 is always zero.
561
*
562
* See also struct anv_address_range_descriptor
563
*/
564
return nir_vec4(b, nir_channel(b, desc, 0),
565
nir_channel(b, desc, 1),
566
nir_channel(b, desc, 2),
567
nir_imm_int(b, 0));
568
}
569
570
/** Loads descriptor memory for a variable-based deref chain
571
*
572
* The deref chain has to terminate at a variable with a descriptor_set and
573
* binding set. This is used for images, textures, and samplers.
574
*/
575
static nir_ssa_def *
576
build_load_var_deref_descriptor_mem(nir_builder *b, nir_deref_instr *deref,
577
unsigned desc_offset,
578
unsigned num_components, unsigned bit_size,
579
struct apply_pipeline_layout_state *state)
580
{
581
nir_variable *var = nir_deref_instr_get_variable(deref);
582
583
const uint32_t set = var->data.descriptor_set;
584
const uint32_t binding = var->data.binding;
585
const struct anv_descriptor_set_binding_layout *bind_layout =
586
&state->layout->set[set].layout->binding[binding];
587
588
nir_ssa_def *array_index;
589
if (deref->deref_type != nir_deref_type_var) {
590
assert(deref->deref_type == nir_deref_type_array);
591
assert(nir_deref_instr_parent(deref)->deref_type == nir_deref_type_var);
592
assert(deref->arr.index.is_ssa);
593
array_index = deref->arr.index.ssa;
594
} else {
595
array_index = nir_imm_int(b, 0);
596
}
597
598
/* It doesn't really matter what address format we choose as everything
599
* will constant-fold nicely. Choose one that uses the actual descriptor
600
* buffer so we don't run into issues index/offset assumptions.
601
*/
602
const nir_address_format addr_format =
603
nir_address_format_64bit_bounded_global;
604
605
nir_ssa_def *res_index =
606
build_res_index(b, set, binding, array_index, addr_format, state);
607
608
nir_ssa_def *desc_addr =
609
build_desc_addr(b, bind_layout, bind_layout->type,
610
res_index, addr_format, state);
611
612
return build_load_descriptor_mem(b, desc_addr, desc_offset,
613
num_components, bit_size, state);
614
}
615
616
/** A recursive form of build_res_index()
617
*
618
* This recursively walks a resource [re]index chain and builds the resource
619
* index. It places the new code with the resource [re]index operation in the
620
* hopes of better CSE. This means the cursor is not where you left it when
621
* this function returns.
622
*/
623
static nir_ssa_def *
624
build_res_index_for_chain(nir_builder *b, nir_intrinsic_instr *intrin,
625
nir_address_format addr_format,
626
uint32_t *set, uint32_t *binding,
627
struct apply_pipeline_layout_state *state)
628
{
629
if (intrin->intrinsic == nir_intrinsic_vulkan_resource_index) {
630
b->cursor = nir_before_instr(&intrin->instr);
631
assert(intrin->src[0].is_ssa);
632
*set = nir_intrinsic_desc_set(intrin);
633
*binding = nir_intrinsic_binding(intrin);
634
return build_res_index(b, *set, *binding, intrin->src[0].ssa,
635
addr_format, state);
636
} else {
637
assert(intrin->intrinsic == nir_intrinsic_vulkan_resource_reindex);
638
nir_intrinsic_instr *parent = nir_src_as_intrinsic(intrin->src[0]);
639
nir_ssa_def *index =
640
build_res_index_for_chain(b, parent, addr_format,
641
set, binding, state);
642
643
b->cursor = nir_before_instr(&intrin->instr);
644
645
assert(intrin->src[1].is_ssa);
646
return build_res_reindex(b, index, intrin->src[1].ssa, addr_format);
647
}
648
}
649
650
/** Builds a buffer address for a given vulkan [re]index intrinsic
651
*
652
* The cursor is not where you left it when this function returns.
653
*/
654
static nir_ssa_def *
655
build_buffer_addr_for_idx_intrin(nir_builder *b,
656
nir_intrinsic_instr *idx_intrin,
657
nir_address_format addr_format,
658
struct apply_pipeline_layout_state *state)
659
{
660
uint32_t set = UINT32_MAX, binding = UINT32_MAX;
661
nir_ssa_def *res_index =
662
build_res_index_for_chain(b, idx_intrin, addr_format,
663
&set, &binding, state);
664
665
const struct anv_descriptor_set_binding_layout *bind_layout =
666
&state->layout->set[set].layout->binding[binding];
667
668
return build_buffer_addr_for_res_index(b, bind_layout->type,
669
res_index, addr_format, state);
670
}
671
672
/** Builds a buffer address for deref chain
673
*
674
* This assumes that you can chase the chain all the way back to the original
675
* vulkan_resource_index intrinsic.
676
*
677
* The cursor is not where you left it when this function returns.
678
*/
679
static nir_ssa_def *
680
build_buffer_addr_for_deref(nir_builder *b, nir_deref_instr *deref,
681
nir_address_format addr_format,
682
struct apply_pipeline_layout_state *state)
683
{
684
nir_deref_instr *parent = nir_deref_instr_parent(deref);
685
if (parent) {
686
nir_ssa_def *addr =
687
build_buffer_addr_for_deref(b, parent, addr_format, state);
688
689
b->cursor = nir_before_instr(&deref->instr);
690
return nir_explicit_io_address_from_deref(b, deref, addr, addr_format);
691
}
692
693
nir_intrinsic_instr *load_desc = nir_src_as_intrinsic(deref->parent);
694
assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
695
696
nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
697
698
b->cursor = nir_before_instr(&deref->instr);
699
700
return build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state);
701
}
702
703
static bool
704
try_lower_direct_buffer_intrinsic(nir_builder *b,
705
nir_intrinsic_instr *intrin, bool is_atomic,
706
struct apply_pipeline_layout_state *state)
707
{
708
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
709
if (!nir_deref_mode_is_one_of(deref, nir_var_mem_ubo | nir_var_mem_ssbo))
710
return false;
711
712
nir_intrinsic_instr *desc = nir_deref_find_descriptor(deref, state);
713
if (desc == NULL) {
714
/* We should always be able to find the descriptor for UBO access. */
715
assert(nir_deref_mode_is_one_of(deref, nir_var_mem_ssbo));
716
return false;
717
}
718
719
nir_address_format addr_format = descriptor_address_format(desc, state);
720
721
if (nir_deref_mode_is(deref, nir_var_mem_ssbo)) {
722
/* 64-bit atomics only support A64 messages so we can't lower them to
723
* the index+offset model.
724
*/
725
if (is_atomic && nir_dest_bit_size(intrin->dest) == 64)
726
return false;
727
728
/* Normal binding table-based messages can't handle non-uniform access
729
* so we have to fall back to A64.
730
*/
731
if (nir_intrinsic_access(intrin) & ACCESS_NON_UNIFORM)
732
return false;
733
734
if (!descriptor_has_bti(desc, state))
735
return false;
736
737
/* Rewrite to 32bit_index_offset whenever we can */
738
addr_format = nir_address_format_32bit_index_offset;
739
} else {
740
assert(nir_deref_mode_is(deref, nir_var_mem_ubo));
741
742
/* Rewrite to 32bit_index_offset whenever we can */
743
if (descriptor_has_bti(desc, state))
744
addr_format = nir_address_format_32bit_index_offset;
745
}
746
747
nir_ssa_def *addr =
748
build_buffer_addr_for_deref(b, deref, addr_format, state);
749
750
b->cursor = nir_before_instr(&intrin->instr);
751
nir_lower_explicit_io_instr(b, intrin, addr, addr_format);
752
753
return true;
754
}
755
756
static bool
757
lower_load_accel_struct_desc(nir_builder *b,
758
nir_intrinsic_instr *load_desc,
759
struct apply_pipeline_layout_state *state)
760
{
761
assert(load_desc->intrinsic == nir_intrinsic_load_vulkan_descriptor);
762
763
nir_intrinsic_instr *idx_intrin = nir_src_as_intrinsic(load_desc->src[0]);
764
765
/* It doesn't really matter what address format we choose as
766
* everything will constant-fold nicely. Choose one that uses the
767
* actual descriptor buffer.
768
*/
769
const nir_address_format addr_format =
770
nir_address_format_64bit_bounded_global;
771
772
uint32_t set = UINT32_MAX, binding = UINT32_MAX;
773
nir_ssa_def *res_index =
774
build_res_index_for_chain(b, idx_intrin, addr_format,
775
&set, &binding, state);
776
777
const struct anv_descriptor_set_binding_layout *bind_layout =
778
&state->layout->set[set].layout->binding[binding];
779
780
b->cursor = nir_before_instr(&load_desc->instr);
781
782
nir_ssa_def *desc_addr =
783
build_desc_addr(b, bind_layout, bind_layout->type,
784
res_index, addr_format, state);
785
786
/* Acceleration structure descriptors are always uint64_t */
787
nir_ssa_def *desc = build_load_descriptor_mem(b, desc_addr, 0, 1, 64, state);
788
789
assert(load_desc->dest.is_ssa);
790
assert(load_desc->dest.ssa.bit_size == 64);
791
assert(load_desc->dest.ssa.num_components == 1);
792
nir_ssa_def_rewrite_uses(&load_desc->dest.ssa, desc);
793
nir_instr_remove(&load_desc->instr);
794
795
return true;
796
}
797
798
static bool
799
lower_direct_buffer_instr(nir_builder *b, nir_instr *instr, void *_state)
800
{
801
struct apply_pipeline_layout_state *state = _state;
802
803
if (instr->type != nir_instr_type_intrinsic)
804
return false;
805
806
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
807
switch (intrin->intrinsic) {
808
case nir_intrinsic_load_deref:
809
case nir_intrinsic_store_deref:
810
return try_lower_direct_buffer_intrinsic(b, intrin, false, state);
811
812
case nir_intrinsic_deref_atomic_add:
813
case nir_intrinsic_deref_atomic_imin:
814
case nir_intrinsic_deref_atomic_umin:
815
case nir_intrinsic_deref_atomic_imax:
816
case nir_intrinsic_deref_atomic_umax:
817
case nir_intrinsic_deref_atomic_and:
818
case nir_intrinsic_deref_atomic_or:
819
case nir_intrinsic_deref_atomic_xor:
820
case nir_intrinsic_deref_atomic_exchange:
821
case nir_intrinsic_deref_atomic_comp_swap:
822
case nir_intrinsic_deref_atomic_fmin:
823
case nir_intrinsic_deref_atomic_fmax:
824
case nir_intrinsic_deref_atomic_fcomp_swap:
825
return try_lower_direct_buffer_intrinsic(b, intrin, true, state);
826
827
case nir_intrinsic_get_ssbo_size: {
828
/* The get_ssbo_size intrinsic always just takes a
829
* index/reindex intrinsic.
830
*/
831
nir_intrinsic_instr *idx_intrin =
832
find_descriptor_for_index_src(intrin->src[0], state);
833
if (idx_intrin == NULL || !descriptor_has_bti(idx_intrin, state))
834
return false;
835
836
b->cursor = nir_before_instr(&intrin->instr);
837
838
/* We just checked that this is a BTI descriptor */
839
const nir_address_format addr_format =
840
nir_address_format_32bit_index_offset;
841
842
nir_ssa_def *buffer_addr =
843
build_buffer_addr_for_idx_intrin(b, idx_intrin, addr_format, state);
844
845
b->cursor = nir_before_instr(&intrin->instr);
846
nir_ssa_def *bti = nir_channel(b, buffer_addr, 0);
847
848
nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
849
nir_src_for_ssa(bti));
850
_mesa_set_add(state->lowered_instrs, intrin);
851
return true;
852
}
853
854
case nir_intrinsic_load_vulkan_descriptor:
855
if (nir_intrinsic_desc_type(intrin) ==
856
VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR)
857
return lower_load_accel_struct_desc(b, intrin, state);
858
return false;
859
860
default:
861
return false;
862
}
863
}
864
865
static bool
866
lower_res_index_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
867
struct apply_pipeline_layout_state *state)
868
{
869
b->cursor = nir_before_instr(&intrin->instr);
870
871
nir_address_format addr_format =
872
addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
873
874
assert(intrin->src[0].is_ssa);
875
nir_ssa_def *index =
876
build_res_index(b, nir_intrinsic_desc_set(intrin),
877
nir_intrinsic_binding(intrin),
878
intrin->src[0].ssa,
879
addr_format, state);
880
881
assert(intrin->dest.is_ssa);
882
assert(intrin->dest.ssa.bit_size == index->bit_size);
883
assert(intrin->dest.ssa.num_components == index->num_components);
884
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, index);
885
nir_instr_remove(&intrin->instr);
886
887
return true;
888
}
889
890
static bool
891
lower_res_reindex_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
892
struct apply_pipeline_layout_state *state)
893
{
894
b->cursor = nir_before_instr(&intrin->instr);
895
896
nir_address_format addr_format =
897
addr_format_for_desc_type(nir_intrinsic_desc_type(intrin), state);
898
899
assert(intrin->src[0].is_ssa && intrin->src[1].is_ssa);
900
nir_ssa_def *index =
901
build_res_reindex(b, intrin->src[0].ssa,
902
intrin->src[1].ssa,
903
addr_format);
904
905
assert(intrin->dest.is_ssa);
906
assert(intrin->dest.ssa.bit_size == index->bit_size);
907
assert(intrin->dest.ssa.num_components == index->num_components);
908
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, index);
909
nir_instr_remove(&intrin->instr);
910
911
return true;
912
}
913
914
static bool
915
lower_load_vulkan_descriptor(nir_builder *b, nir_intrinsic_instr *intrin,
916
struct apply_pipeline_layout_state *state)
917
{
918
b->cursor = nir_before_instr(&intrin->instr);
919
920
const VkDescriptorType desc_type = nir_intrinsic_desc_type(intrin);
921
nir_address_format addr_format = addr_format_for_desc_type(desc_type, state);
922
923
assert(intrin->dest.is_ssa);
924
nir_foreach_use(src, &intrin->dest.ssa) {
925
if (src->parent_instr->type != nir_instr_type_deref)
926
continue;
927
928
nir_deref_instr *cast = nir_instr_as_deref(src->parent_instr);
929
assert(cast->deref_type == nir_deref_type_cast);
930
switch (desc_type) {
931
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
932
case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
933
cast->cast.align_mul = ANV_UBO_ALIGNMENT;
934
cast->cast.align_offset = 0;
935
break;
936
937
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
938
case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
939
cast->cast.align_mul = ANV_SSBO_ALIGNMENT;
940
cast->cast.align_offset = 0;
941
break;
942
943
default:
944
break;
945
}
946
}
947
948
assert(intrin->src[0].is_ssa);
949
nir_ssa_def *desc =
950
build_buffer_addr_for_res_index(b, desc_type, intrin->src[0].ssa,
951
addr_format, state);
952
953
assert(intrin->dest.is_ssa);
954
assert(intrin->dest.ssa.bit_size == desc->bit_size);
955
assert(intrin->dest.ssa.num_components == desc->num_components);
956
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
957
nir_instr_remove(&intrin->instr);
958
959
return true;
960
}
961
962
static bool
963
lower_get_ssbo_size(nir_builder *b, nir_intrinsic_instr *intrin,
964
struct apply_pipeline_layout_state *state)
965
{
966
if (_mesa_set_search(state->lowered_instrs, intrin))
967
return false;
968
969
b->cursor = nir_before_instr(&intrin->instr);
970
971
nir_address_format addr_format =
972
addr_format_for_desc_type(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, state);
973
974
assert(intrin->src[0].is_ssa);
975
nir_ssa_def *desc =
976
build_buffer_addr_for_res_index(b, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
977
intrin->src[0].ssa, addr_format, state);
978
979
switch (addr_format) {
980
case nir_address_format_64bit_global_32bit_offset:
981
case nir_address_format_64bit_bounded_global: {
982
nir_ssa_def *size = nir_channel(b, desc, 2);
983
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, size);
984
nir_instr_remove(&intrin->instr);
985
break;
986
}
987
988
case nir_address_format_32bit_index_offset:
989
/* The binding table index is the first component of the address. The
990
* back-end wants a scalar binding table index source.
991
*/
992
nir_instr_rewrite_src(&intrin->instr, &intrin->src[0],
993
nir_src_for_ssa(nir_channel(b, desc, 0)));
994
break;
995
996
default:
997
unreachable("Unsupported address format");
998
}
999
1000
return true;
1001
}
1002
1003
static bool
1004
lower_image_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin,
1005
struct apply_pipeline_layout_state *state)
1006
{
1007
nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1008
nir_variable *var = nir_deref_instr_get_variable(deref);
1009
1010
unsigned set = var->data.descriptor_set;
1011
unsigned binding = var->data.binding;
1012
unsigned binding_offset = state->set[set].surface_offsets[binding];
1013
1014
b->cursor = nir_before_instr(&intrin->instr);
1015
1016
ASSERTED const bool use_bindless = state->pdevice->has_bindless_images;
1017
1018
if (intrin->intrinsic == nir_intrinsic_image_deref_load_param_intel) {
1019
b->cursor = nir_instr_remove(&intrin->instr);
1020
1021
assert(!use_bindless); /* Otherwise our offsets would be wrong */
1022
const unsigned param = nir_intrinsic_base(intrin);
1023
1024
nir_ssa_def *desc =
1025
build_load_var_deref_descriptor_mem(b, deref, param * 16,
1026
intrin->dest.ssa.num_components,
1027
intrin->dest.ssa.bit_size, state);
1028
1029
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, desc);
1030
} else if (binding_offset > MAX_BINDING_TABLE_SIZE) {
1031
const bool write_only =
1032
(var->data.access & ACCESS_NON_READABLE) != 0;
1033
nir_ssa_def *desc =
1034
build_load_var_deref_descriptor_mem(b, deref, 0, 2, 32, state);
1035
nir_ssa_def *handle = nir_channel(b, desc, write_only ? 1 : 0);
1036
nir_rewrite_image_intrinsic(intrin, handle, true);
1037
} else {
1038
unsigned array_size =
1039
state->layout->set[set].layout->binding[binding].array_size;
1040
1041
nir_ssa_def *index = NULL;
1042
if (deref->deref_type != nir_deref_type_var) {
1043
assert(deref->deref_type == nir_deref_type_array);
1044
index = nir_ssa_for_src(b, deref->arr.index, 1);
1045
if (state->add_bounds_checks)
1046
index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
1047
} else {
1048
index = nir_imm_int(b, 0);
1049
}
1050
1051
index = nir_iadd_imm(b, index, binding_offset);
1052
nir_rewrite_image_intrinsic(intrin, index, false);
1053
}
1054
1055
return true;
1056
}
1057
1058
static bool
1059
lower_load_constant(nir_builder *b, nir_intrinsic_instr *intrin,
1060
struct apply_pipeline_layout_state *state)
1061
{
1062
b->cursor = nir_instr_remove(&intrin->instr);
1063
1064
/* Any constant-offset load_constant instructions should have been removed
1065
* by constant folding.
1066
*/
1067
assert(!nir_src_is_const(intrin->src[0]));
1068
nir_ssa_def *offset = nir_iadd_imm(b, nir_ssa_for_src(b, intrin->src[0], 1),
1069
nir_intrinsic_base(intrin));
1070
1071
nir_ssa_def *data;
1072
if (state->pdevice->use_softpin) {
1073
unsigned load_size = intrin->dest.ssa.num_components *
1074
intrin->dest.ssa.bit_size / 8;
1075
unsigned load_align = intrin->dest.ssa.bit_size / 8;
1076
1077
assert(load_size < b->shader->constant_data_size);
1078
unsigned max_offset = b->shader->constant_data_size - load_size;
1079
offset = nir_umin(b, offset, nir_imm_int(b, max_offset));
1080
1081
nir_ssa_def *const_data_base_addr = nir_pack_64_2x32_split(b,
1082
nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_LOW),
1083
nir_load_reloc_const_intel(b, BRW_SHADER_RELOC_CONST_DATA_ADDR_HIGH));
1084
1085
data = nir_load_global_constant(b, nir_iadd(b, const_data_base_addr,
1086
nir_u2u64(b, offset)),
1087
load_align,
1088
intrin->dest.ssa.num_components,
1089
intrin->dest.ssa.bit_size);
1090
} else {
1091
nir_ssa_def *index = nir_imm_int(b, state->constants_offset);
1092
1093
data = nir_load_ubo(b, intrin->num_components, intrin->dest.ssa.bit_size,
1094
index, offset,
1095
.align_mul = intrin->dest.ssa.bit_size / 8,
1096
.align_offset = 0,
1097
.range_base = nir_intrinsic_base(intrin),
1098
.range = nir_intrinsic_range(intrin));
1099
}
1100
1101
nir_ssa_def_rewrite_uses(&intrin->dest.ssa, data);
1102
1103
return true;
1104
}
1105
1106
static void
1107
lower_tex_deref(nir_builder *b, nir_tex_instr *tex,
1108
nir_tex_src_type deref_src_type,
1109
unsigned *base_index, unsigned plane,
1110
struct apply_pipeline_layout_state *state)
1111
{
1112
int deref_src_idx = nir_tex_instr_src_index(tex, deref_src_type);
1113
if (deref_src_idx < 0)
1114
return;
1115
1116
nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
1117
nir_variable *var = nir_deref_instr_get_variable(deref);
1118
1119
unsigned set = var->data.descriptor_set;
1120
unsigned binding = var->data.binding;
1121
unsigned array_size =
1122
state->layout->set[set].layout->binding[binding].array_size;
1123
1124
unsigned binding_offset;
1125
if (deref_src_type == nir_tex_src_texture_deref) {
1126
binding_offset = state->set[set].surface_offsets[binding];
1127
} else {
1128
assert(deref_src_type == nir_tex_src_sampler_deref);
1129
binding_offset = state->set[set].sampler_offsets[binding];
1130
}
1131
1132
nir_tex_src_type offset_src_type;
1133
nir_ssa_def *index = NULL;
1134
if (binding_offset > MAX_BINDING_TABLE_SIZE) {
1135
const unsigned plane_offset =
1136
plane * sizeof(struct anv_sampled_image_descriptor);
1137
1138
nir_ssa_def *desc =
1139
build_load_var_deref_descriptor_mem(b, deref, plane_offset,
1140
2, 32, state);
1141
1142
if (deref_src_type == nir_tex_src_texture_deref) {
1143
offset_src_type = nir_tex_src_texture_handle;
1144
index = nir_channel(b, desc, 0);
1145
} else {
1146
assert(deref_src_type == nir_tex_src_sampler_deref);
1147
offset_src_type = nir_tex_src_sampler_handle;
1148
index = nir_channel(b, desc, 1);
1149
}
1150
} else {
1151
if (deref_src_type == nir_tex_src_texture_deref) {
1152
offset_src_type = nir_tex_src_texture_offset;
1153
} else {
1154
assert(deref_src_type == nir_tex_src_sampler_deref);
1155
offset_src_type = nir_tex_src_sampler_offset;
1156
}
1157
1158
*base_index = binding_offset + plane;
1159
1160
if (deref->deref_type != nir_deref_type_var) {
1161
assert(deref->deref_type == nir_deref_type_array);
1162
1163
if (nir_src_is_const(deref->arr.index)) {
1164
unsigned arr_index = MIN2(nir_src_as_uint(deref->arr.index), array_size - 1);
1165
struct anv_sampler **immutable_samplers =
1166
state->layout->set[set].layout->binding[binding].immutable_samplers;
1167
if (immutable_samplers) {
1168
/* Array of YCbCr samplers are tightly packed in the binding
1169
* tables, compute the offset of an element in the array by
1170
* adding the number of planes of all preceding elements.
1171
*/
1172
unsigned desc_arr_index = 0;
1173
for (int i = 0; i < arr_index; i++)
1174
desc_arr_index += immutable_samplers[i]->n_planes;
1175
*base_index += desc_arr_index;
1176
} else {
1177
*base_index += arr_index;
1178
}
1179
} else {
1180
/* From VK_KHR_sampler_ycbcr_conversion:
1181
*
1182
* If sampler Y’CBCR conversion is enabled, the combined image
1183
* sampler must be indexed only by constant integral expressions
1184
* when aggregated into arrays in shader code, irrespective of
1185
* the shaderSampledImageArrayDynamicIndexing feature.
1186
*/
1187
assert(nir_tex_instr_src_index(tex, nir_tex_src_plane) == -1);
1188
1189
index = nir_ssa_for_src(b, deref->arr.index, 1);
1190
1191
if (state->add_bounds_checks)
1192
index = nir_umin(b, index, nir_imm_int(b, array_size - 1));
1193
}
1194
}
1195
}
1196
1197
if (index) {
1198
nir_instr_rewrite_src(&tex->instr, &tex->src[deref_src_idx].src,
1199
nir_src_for_ssa(index));
1200
tex->src[deref_src_idx].src_type = offset_src_type;
1201
} else {
1202
nir_tex_instr_remove_src(tex, deref_src_idx);
1203
}
1204
}
1205
1206
static uint32_t
1207
tex_instr_get_and_remove_plane_src(nir_tex_instr *tex)
1208
{
1209
int plane_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_plane);
1210
if (plane_src_idx < 0)
1211
return 0;
1212
1213
unsigned plane = nir_src_as_uint(tex->src[plane_src_idx].src);
1214
1215
nir_tex_instr_remove_src(tex, plane_src_idx);
1216
1217
return plane;
1218
}
1219
1220
static nir_ssa_def *
1221
build_def_array_select(nir_builder *b, nir_ssa_def **srcs, nir_ssa_def *idx,
1222
unsigned start, unsigned end)
1223
{
1224
if (start == end - 1) {
1225
return srcs[start];
1226
} else {
1227
unsigned mid = start + (end - start) / 2;
1228
return nir_bcsel(b, nir_ilt(b, idx, nir_imm_int(b, mid)),
1229
build_def_array_select(b, srcs, idx, start, mid),
1230
build_def_array_select(b, srcs, idx, mid, end));
1231
}
1232
}
1233
1234
static void
1235
lower_gfx7_tex_swizzle(nir_builder *b, nir_tex_instr *tex, unsigned plane,
1236
struct apply_pipeline_layout_state *state)
1237
{
1238
assert(state->pdevice->info.verx10 == 70);
1239
if (tex->sampler_dim == GLSL_SAMPLER_DIM_BUF ||
1240
nir_tex_instr_is_query(tex) ||
1241
tex->op == nir_texop_tg4 || /* We can't swizzle TG4 */
1242
(tex->is_shadow && tex->is_new_style_shadow))
1243
return;
1244
1245
int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);
1246
assert(deref_src_idx >= 0);
1247
1248
nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);
1249
nir_variable *var = nir_deref_instr_get_variable(deref);
1250
1251
unsigned set = var->data.descriptor_set;
1252
unsigned binding = var->data.binding;
1253
const struct anv_descriptor_set_binding_layout *bind_layout =
1254
&state->layout->set[set].layout->binding[binding];
1255
1256
if ((bind_layout->data & ANV_DESCRIPTOR_TEXTURE_SWIZZLE) == 0)
1257
return;
1258
1259
b->cursor = nir_before_instr(&tex->instr);
1260
1261
const unsigned plane_offset =
1262
plane * sizeof(struct anv_texture_swizzle_descriptor);
1263
nir_ssa_def *swiz =
1264
build_load_var_deref_descriptor_mem(b, deref, plane_offset,
1265
1, 32, state);
1266
1267
b->cursor = nir_after_instr(&tex->instr);
1268
1269
assert(tex->dest.ssa.bit_size == 32);
1270
assert(tex->dest.ssa.num_components == 4);
1271
1272
/* Initializing to undef is ok; nir_opt_undef will clean it up. */
1273
nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
1274
nir_ssa_def *comps[8];
1275
for (unsigned i = 0; i < ARRAY_SIZE(comps); i++)
1276
comps[i] = undef;
1277
1278
comps[ISL_CHANNEL_SELECT_ZERO] = nir_imm_int(b, 0);
1279
if (nir_alu_type_get_base_type(tex->dest_type) == nir_type_float)
1280
comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_float(b, 1);
1281
else
1282
comps[ISL_CHANNEL_SELECT_ONE] = nir_imm_int(b, 1);
1283
comps[ISL_CHANNEL_SELECT_RED] = nir_channel(b, &tex->dest.ssa, 0);
1284
comps[ISL_CHANNEL_SELECT_GREEN] = nir_channel(b, &tex->dest.ssa, 1);
1285
comps[ISL_CHANNEL_SELECT_BLUE] = nir_channel(b, &tex->dest.ssa, 2);
1286
comps[ISL_CHANNEL_SELECT_ALPHA] = nir_channel(b, &tex->dest.ssa, 3);
1287
1288
nir_ssa_def *swiz_comps[4];
1289
for (unsigned i = 0; i < 4; i++) {
1290
nir_ssa_def *comp_swiz = nir_extract_u8(b, swiz, nir_imm_int(b, i));
1291
swiz_comps[i] = build_def_array_select(b, comps, comp_swiz, 0, 8);
1292
}
1293
nir_ssa_def *swiz_tex_res = nir_vec(b, swiz_comps, 4);
1294
1295
/* Rewrite uses before we insert so we don't rewrite this use */
1296
nir_ssa_def_rewrite_uses_after(&tex->dest.ssa,
1297
swiz_tex_res,
1298
swiz_tex_res->parent_instr);
1299
}
1300
1301
static bool
1302
lower_tex(nir_builder *b, nir_tex_instr *tex,
1303
struct apply_pipeline_layout_state *state)
1304
{
1305
unsigned plane = tex_instr_get_and_remove_plane_src(tex);
1306
1307
/* On Ivy Bridge and Bay Trail, we have to swizzle in the shader. Do this
1308
* before we lower the derefs away so we can still find the descriptor.
1309
*/
1310
if (state->pdevice->info.verx10 == 70)
1311
lower_gfx7_tex_swizzle(b, tex, plane, state);
1312
1313
b->cursor = nir_before_instr(&tex->instr);
1314
1315
lower_tex_deref(b, tex, nir_tex_src_texture_deref,
1316
&tex->texture_index, plane, state);
1317
1318
lower_tex_deref(b, tex, nir_tex_src_sampler_deref,
1319
&tex->sampler_index, plane, state);
1320
1321
return true;
1322
}
1323
1324
static bool
1325
apply_pipeline_layout(nir_builder *b, nir_instr *instr, void *_state)
1326
{
1327
struct apply_pipeline_layout_state *state = _state;
1328
1329
switch (instr->type) {
1330
case nir_instr_type_intrinsic: {
1331
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1332
switch (intrin->intrinsic) {
1333
case nir_intrinsic_vulkan_resource_index:
1334
return lower_res_index_intrinsic(b, intrin, state);
1335
case nir_intrinsic_vulkan_resource_reindex:
1336
return lower_res_reindex_intrinsic(b, intrin, state);
1337
case nir_intrinsic_load_vulkan_descriptor:
1338
return lower_load_vulkan_descriptor(b, intrin, state);
1339
case nir_intrinsic_get_ssbo_size:
1340
return lower_get_ssbo_size(b, intrin, state);
1341
case nir_intrinsic_image_deref_load:
1342
case nir_intrinsic_image_deref_store:
1343
case nir_intrinsic_image_deref_atomic_add:
1344
case nir_intrinsic_image_deref_atomic_imin:
1345
case nir_intrinsic_image_deref_atomic_umin:
1346
case nir_intrinsic_image_deref_atomic_imax:
1347
case nir_intrinsic_image_deref_atomic_umax:
1348
case nir_intrinsic_image_deref_atomic_and:
1349
case nir_intrinsic_image_deref_atomic_or:
1350
case nir_intrinsic_image_deref_atomic_xor:
1351
case nir_intrinsic_image_deref_atomic_exchange:
1352
case nir_intrinsic_image_deref_atomic_comp_swap:
1353
case nir_intrinsic_image_deref_size:
1354
case nir_intrinsic_image_deref_samples:
1355
case nir_intrinsic_image_deref_load_param_intel:
1356
case nir_intrinsic_image_deref_load_raw_intel:
1357
case nir_intrinsic_image_deref_store_raw_intel:
1358
return lower_image_intrinsic(b, intrin, state);
1359
case nir_intrinsic_load_constant:
1360
return lower_load_constant(b, intrin, state);
1361
default:
1362
return false;
1363
}
1364
break;
1365
}
1366
case nir_instr_type_tex:
1367
return lower_tex(b, nir_instr_as_tex(instr), state);
1368
default:
1369
return false;
1370
}
1371
}
1372
1373
struct binding_info {
1374
uint32_t binding;
1375
uint8_t set;
1376
uint16_t score;
1377
};
1378
1379
static int
1380
compare_binding_infos(const void *_a, const void *_b)
1381
{
1382
const struct binding_info *a = _a, *b = _b;
1383
if (a->score != b->score)
1384
return b->score - a->score;
1385
1386
if (a->set != b->set)
1387
return a->set - b->set;
1388
1389
return a->binding - b->binding;
1390
}
1391
1392
void
1393
anv_nir_apply_pipeline_layout(const struct anv_physical_device *pdevice,
1394
bool robust_buffer_access,
1395
const struct anv_pipeline_layout *layout,
1396
nir_shader *shader,
1397
struct anv_pipeline_bind_map *map)
1398
{
1399
void *mem_ctx = ralloc_context(NULL);
1400
1401
struct apply_pipeline_layout_state state = {
1402
.pdevice = pdevice,
1403
.layout = layout,
1404
.add_bounds_checks = robust_buffer_access,
1405
.desc_addr_format = brw_shader_stage_is_bindless(shader->info.stage) ?
1406
nir_address_format_64bit_global_32bit_offset :
1407
nir_address_format_32bit_index_offset,
1408
.ssbo_addr_format = anv_nir_ssbo_addr_format(pdevice, robust_buffer_access),
1409
.ubo_addr_format = anv_nir_ubo_addr_format(pdevice, robust_buffer_access),
1410
.lowered_instrs = _mesa_pointer_set_create(mem_ctx),
1411
};
1412
1413
for (unsigned s = 0; s < layout->num_sets; s++) {
1414
const unsigned count = layout->set[s].layout->binding_count;
1415
state.set[s].use_count = rzalloc_array(mem_ctx, uint8_t, count);
1416
state.set[s].surface_offsets = rzalloc_array(mem_ctx, uint8_t, count);
1417
state.set[s].sampler_offsets = rzalloc_array(mem_ctx, uint8_t, count);
1418
}
1419
1420
nir_shader_instructions_pass(shader, get_used_bindings,
1421
nir_metadata_all, &state);
1422
1423
for (unsigned s = 0; s < layout->num_sets; s++) {
1424
if (state.desc_addr_format != nir_address_format_32bit_index_offset) {
1425
state.set[s].desc_offset = BINDLESS_OFFSET;
1426
} else if (state.set[s].desc_buffer_used) {
1427
map->surface_to_descriptor[map->surface_count] =
1428
(struct anv_pipeline_binding) {
1429
.set = ANV_DESCRIPTOR_SET_DESCRIPTORS,
1430
.index = s,
1431
};
1432
state.set[s].desc_offset = map->surface_count;
1433
map->surface_count++;
1434
}
1435
}
1436
1437
if (state.uses_constants && !pdevice->use_softpin) {
1438
state.constants_offset = map->surface_count;
1439
map->surface_to_descriptor[map->surface_count].set =
1440
ANV_DESCRIPTOR_SET_SHADER_CONSTANTS;
1441
map->surface_count++;
1442
}
1443
1444
unsigned used_binding_count = 0;
1445
for (uint32_t set = 0; set < layout->num_sets; set++) {
1446
struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
1447
for (unsigned b = 0; b < set_layout->binding_count; b++) {
1448
if (state.set[set].use_count[b] == 0)
1449
continue;
1450
1451
used_binding_count++;
1452
}
1453
}
1454
1455
struct binding_info *infos =
1456
rzalloc_array(mem_ctx, struct binding_info, used_binding_count);
1457
used_binding_count = 0;
1458
for (uint32_t set = 0; set < layout->num_sets; set++) {
1459
const struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
1460
for (unsigned b = 0; b < set_layout->binding_count; b++) {
1461
if (state.set[set].use_count[b] == 0)
1462
continue;
1463
1464
const struct anv_descriptor_set_binding_layout *binding =
1465
&layout->set[set].layout->binding[b];
1466
1467
/* Do a fixed-point calculation to generate a score based on the
1468
* number of uses and the binding array size. We shift by 7 instead
1469
* of 8 because we're going to use the top bit below to make
1470
* everything which does not support bindless super higher priority
1471
* than things which do.
1472
*/
1473
uint16_t score = ((uint16_t)state.set[set].use_count[b] << 7) /
1474
binding->array_size;
1475
1476
/* If the descriptor type doesn't support bindless then put it at the
1477
* beginning so we guarantee it gets a slot.
1478
*/
1479
if (!anv_descriptor_supports_bindless(pdevice, binding, true) ||
1480
!anv_descriptor_supports_bindless(pdevice, binding, false))
1481
score |= 1 << 15;
1482
1483
infos[used_binding_count++] = (struct binding_info) {
1484
.set = set,
1485
.binding = b,
1486
.score = score,
1487
};
1488
}
1489
}
1490
1491
/* Order the binding infos based on score with highest scores first. If
1492
* scores are equal we then order by set and binding.
1493
*/
1494
qsort(infos, used_binding_count, sizeof(struct binding_info),
1495
compare_binding_infos);
1496
1497
for (unsigned i = 0; i < used_binding_count; i++) {
1498
unsigned set = infos[i].set, b = infos[i].binding;
1499
const struct anv_descriptor_set_binding_layout *binding =
1500
&layout->set[set].layout->binding[b];
1501
1502
const uint32_t array_size = binding->array_size;
1503
1504
if (binding->dynamic_offset_index >= 0)
1505
state.has_dynamic_buffers = true;
1506
1507
if (binding->data & ANV_DESCRIPTOR_SURFACE_STATE) {
1508
if (map->surface_count + array_size > MAX_BINDING_TABLE_SIZE ||
1509
anv_descriptor_requires_bindless(pdevice, binding, false) ||
1510
brw_shader_stage_is_bindless(shader->info.stage)) {
1511
/* If this descriptor doesn't fit in the binding table or if it
1512
* requires bindless for some reason, flag it as bindless.
1513
*/
1514
assert(anv_descriptor_supports_bindless(pdevice, binding, false));
1515
state.set[set].surface_offsets[b] = BINDLESS_OFFSET;
1516
} else {
1517
state.set[set].surface_offsets[b] = map->surface_count;
1518
if (binding->dynamic_offset_index < 0) {
1519
struct anv_sampler **samplers = binding->immutable_samplers;
1520
for (unsigned i = 0; i < binding->array_size; i++) {
1521
uint8_t planes = samplers ? samplers[i]->n_planes : 1;
1522
for (uint8_t p = 0; p < planes; p++) {
1523
map->surface_to_descriptor[map->surface_count++] =
1524
(struct anv_pipeline_binding) {
1525
.set = set,
1526
.index = binding->descriptor_index + i,
1527
.plane = p,
1528
};
1529
}
1530
}
1531
} else {
1532
for (unsigned i = 0; i < binding->array_size; i++) {
1533
map->surface_to_descriptor[map->surface_count++] =
1534
(struct anv_pipeline_binding) {
1535
.set = set,
1536
.index = binding->descriptor_index + i,
1537
.dynamic_offset_index =
1538
layout->set[set].dynamic_offset_start +
1539
binding->dynamic_offset_index + i,
1540
};
1541
}
1542
}
1543
}
1544
assert(map->surface_count <= MAX_BINDING_TABLE_SIZE);
1545
}
1546
1547
if (binding->data & ANV_DESCRIPTOR_SAMPLER_STATE) {
1548
if (map->sampler_count + array_size > MAX_SAMPLER_TABLE_SIZE ||
1549
anv_descriptor_requires_bindless(pdevice, binding, true) ||
1550
brw_shader_stage_is_bindless(shader->info.stage)) {
1551
/* If this descriptor doesn't fit in the binding table or if it
1552
* requires bindless for some reason, flag it as bindless.
1553
*
1554
* We also make large sampler arrays bindless because we can avoid
1555
* using indirect sends thanks to bindless samplers being packed
1556
* less tightly than the sampler table.
1557
*/
1558
assert(anv_descriptor_supports_bindless(pdevice, binding, true));
1559
state.set[set].sampler_offsets[b] = BINDLESS_OFFSET;
1560
} else {
1561
state.set[set].sampler_offsets[b] = map->sampler_count;
1562
struct anv_sampler **samplers = binding->immutable_samplers;
1563
for (unsigned i = 0; i < binding->array_size; i++) {
1564
uint8_t planes = samplers ? samplers[i]->n_planes : 1;
1565
for (uint8_t p = 0; p < planes; p++) {
1566
map->sampler_to_descriptor[map->sampler_count++] =
1567
(struct anv_pipeline_binding) {
1568
.set = set,
1569
.index = binding->descriptor_index + i,
1570
.plane = p,
1571
};
1572
}
1573
}
1574
}
1575
}
1576
}
1577
1578
nir_foreach_uniform_variable(var, shader) {
1579
const struct glsl_type *glsl_type = glsl_without_array(var->type);
1580
1581
if (!glsl_type_is_image(glsl_type))
1582
continue;
1583
1584
enum glsl_sampler_dim dim = glsl_get_sampler_dim(glsl_type);
1585
1586
const uint32_t set = var->data.descriptor_set;
1587
const uint32_t binding = var->data.binding;
1588
const struct anv_descriptor_set_binding_layout *bind_layout =
1589
&layout->set[set].layout->binding[binding];
1590
const uint32_t array_size = bind_layout->array_size;
1591
1592
if (state.set[set].use_count[binding] == 0)
1593
continue;
1594
1595
if (state.set[set].surface_offsets[binding] >= MAX_BINDING_TABLE_SIZE)
1596
continue;
1597
1598
struct anv_pipeline_binding *pipe_binding =
1599
&map->surface_to_descriptor[state.set[set].surface_offsets[binding]];
1600
for (unsigned i = 0; i < array_size; i++) {
1601
assert(pipe_binding[i].set == set);
1602
assert(pipe_binding[i].index == bind_layout->descriptor_index + i);
1603
1604
if (dim == GLSL_SAMPLER_DIM_SUBPASS ||
1605
dim == GLSL_SAMPLER_DIM_SUBPASS_MS)
1606
pipe_binding[i].input_attachment_index = var->data.index + i;
1607
1608
/* NOTE: This is a uint8_t so we really do need to != 0 here */
1609
pipe_binding[i].write_only =
1610
(var->data.access & ACCESS_NON_READABLE) != 0;
1611
}
1612
}
1613
1614
/* Before we do the normal lowering, we look for any SSBO operations
1615
* that we can lower to the BTI model and lower them up-front. The BTI
1616
* model can perform better than the A64 model for a couple reasons:
1617
*
1618
* 1. 48-bit address calculations are potentially expensive and using
1619
* the BTI model lets us simply compute 32-bit offsets and the
1620
* hardware adds the 64-bit surface base address.
1621
*
1622
* 2. The BTI messages, because they use surface states, do bounds
1623
* checking for us. With the A64 model, we have to do our own
1624
* bounds checking and this means wider pointers and extra
1625
* calculations and branching in the shader.
1626
*
1627
* The solution to both of these is to convert things to the BTI model
1628
* opportunistically. The reason why we need to do this as a pre-pass
1629
* is for two reasons:
1630
*
1631
* 1. The BTI model requires nir_address_format_32bit_index_offset
1632
* pointers which are not the same type as the pointers needed for
1633
* the A64 model. Because all our derefs are set up for the A64
1634
* model (in case we have variable pointers), we have to crawl all
1635
* the way back to the vulkan_resource_index intrinsic and build a
1636
* completely fresh index+offset calculation.
1637
*
1638
* 2. Because the variable-pointers-capable lowering that we do as part
1639
* of apply_pipeline_layout_block is destructive (It really has to
1640
* be to handle variable pointers properly), we've lost the deref
1641
* information by the time we get to the load/store/atomic
1642
* intrinsics in that pass.
1643
*/
1644
nir_shader_instructions_pass(shader, lower_direct_buffer_instr,
1645
nir_metadata_block_index |
1646
nir_metadata_dominance,
1647
&state);
1648
1649
/* We just got rid of all the direct access. Delete it so it's not in the
1650
* way when we do our indirect lowering.
1651
*/
1652
nir_opt_dce(shader);
1653
1654
nir_shader_instructions_pass(shader, apply_pipeline_layout,
1655
nir_metadata_block_index |
1656
nir_metadata_dominance,
1657
&state);
1658
1659
ralloc_free(mem_ctx);
1660
1661
if (brw_shader_stage_is_bindless(shader->info.stage)) {
1662
assert(map->surface_count == 0);
1663
assert(map->sampler_count == 0);
1664
}
1665
1666
/* Now that we're done computing the surface and sampler portions of the
1667
* bind map, hash them. This lets us quickly determine if the actual
1668
* mapping has changed and not just a no-op pipeline change.
1669
*/
1670
_mesa_sha1_compute(map->surface_to_descriptor,
1671
map->surface_count * sizeof(struct anv_pipeline_binding),
1672
map->surface_sha1);
1673
_mesa_sha1_compute(map->sampler_to_descriptor,
1674
map->sampler_count * sizeof(struct anv_pipeline_binding),
1675
map->sampler_sha1);
1676
}
1677
1678