Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_descriptors.c
4570 views
1
/*
2
* Copyright 2013 Advanced Micro Devices, Inc.
3
* All Rights Reserved.
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* on the rights to use, copy, modify, merge, publish, distribute, sub
9
* license, and/or sell copies of the Software, and to permit persons to whom
10
* the Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
14
* Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22
* USE OR OTHER DEALINGS IN THE SOFTWARE.
23
*/
24
25
/* Resource binding slots and sampler states (each described with 8 or
26
* 4 dwords) are stored in lists in memory which is accessed by shaders
27
* using scalar load instructions.
28
*
29
* This file is responsible for managing such lists. It keeps a copy of all
30
* descriptors in CPU memory and re-uploads a whole list if some slots have
31
* been changed.
32
*
33
* This code is also responsible for updating shader pointers to those lists.
34
*
35
* Note that CP DMA can't be used for updating the lists, because a GPU hang
36
* could leave the list in a mid-IB state and the next IB would get wrong
37
* descriptors and the whole context would be unusable at that point.
38
* (Note: The register shadowing can't be used due to the same reason)
39
*
40
* Also, uploading descriptors to newly allocated memory doesn't require
41
* a KCACHE flush.
42
*
43
*
44
* Possible scenarios for one 16 dword image+sampler slot:
45
*
46
* | Image | w/ FMASK | Buffer | NULL
47
* [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3]
48
* [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 0
49
* [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3]
50
* [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3]
51
*
52
* FMASK implies MSAA, therefore no sampler state.
53
* Sampler states are never unbound except when FMASK is bound.
54
*/
55
56
#include "si_pipe.h"
57
#include "si_compute.h"
58
#include "si_build_pm4.h"
59
#include "sid.h"
60
#include "util/format/u_format.h"
61
#include "util/hash_table.h"
62
#include "util/u_idalloc.h"
63
#include "util/u_memory.h"
64
#include "util/u_upload_mgr.h"
65
66
/* NULL image and buffer descriptor for textures (alpha = 1) and images
67
* (alpha = 0).
68
*
69
* For images, all fields must be zero except for the swizzle, which
70
* supports arbitrary combinations of 0s and 1s. The texture type must be
71
* any valid type (e.g. 1D). If the texture type isn't set, the hw hangs.
72
*
73
* For buffers, all fields must be zero. If they are not, the hw hangs.
74
*
75
* This is the only reason why the buffer descriptor must be in words [4:7].
76
*/
77
static uint32_t null_texture_descriptor[8] = {
78
0, 0, 0, S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) | S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
79
/* the rest must contain zeros, which is also used by the buffer
80
* descriptor */
81
};
82
83
static uint32_t null_image_descriptor[8] = {
84
0, 0, 0, S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)
85
/* the rest must contain zeros, which is also used by the buffer
86
* descriptor */
87
};
88
89
static uint64_t si_desc_extract_buffer_address(const uint32_t *desc)
90
{
91
uint64_t va = desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
92
93
/* Sign-extend the 48-bit address. */
94
va <<= 16;
95
va = (int64_t)va >> 16;
96
return va;
97
}
98
99
static void si_init_descriptor_list(uint32_t *desc_list, unsigned element_dw_size,
100
unsigned num_elements, const uint32_t *null_descriptor)
101
{
102
int i;
103
104
/* Initialize the array to NULL descriptors if the element size is 8. */
105
if (null_descriptor) {
106
assert(element_dw_size % 8 == 0);
107
for (i = 0; i < num_elements * element_dw_size / 8; i++)
108
memcpy(desc_list + i * 8, null_descriptor, 8 * 4);
109
}
110
}
111
112
static void si_init_descriptors(struct si_descriptors *desc, short shader_userdata_rel_index,
113
unsigned element_dw_size, unsigned num_elements)
114
{
115
desc->list = CALLOC(num_elements, element_dw_size * 4);
116
desc->element_dw_size = element_dw_size;
117
desc->num_elements = num_elements;
118
desc->shader_userdata_offset = shader_userdata_rel_index * 4;
119
desc->slot_index_to_bind_directly = -1;
120
}
121
122
static void si_release_descriptors(struct si_descriptors *desc)
123
{
124
si_resource_reference(&desc->buffer, NULL);
125
FREE(desc->list);
126
}
127
128
static bool si_upload_descriptors(struct si_context *sctx, struct si_descriptors *desc)
129
{
130
unsigned slot_size = desc->element_dw_size * 4;
131
unsigned first_slot_offset = desc->first_active_slot * slot_size;
132
unsigned upload_size = desc->num_active_slots * slot_size;
133
134
/* Skip the upload if no shader is using the descriptors. dirty_mask
135
* will stay dirty and the descriptors will be uploaded when there is
136
* a shader using them.
137
*/
138
if (!upload_size)
139
return true;
140
141
/* If there is just one active descriptor, bind it directly. */
142
if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly &&
143
desc->num_active_slots == 1) {
144
uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly * desc->element_dw_size];
145
146
/* The buffer is already in the buffer list. */
147
si_resource_reference(&desc->buffer, NULL);
148
desc->gpu_list = NULL;
149
desc->gpu_address = si_desc_extract_buffer_address(descriptor);
150
return true;
151
}
152
153
uint32_t *ptr;
154
unsigned buffer_offset;
155
u_upload_alloc(sctx->b.const_uploader, first_slot_offset, upload_size,
156
si_optimal_tcc_alignment(sctx, upload_size), &buffer_offset,
157
(struct pipe_resource **)&desc->buffer, (void **)&ptr);
158
if (!desc->buffer) {
159
desc->gpu_address = 0;
160
return false; /* skip the draw call */
161
}
162
163
util_memcpy_cpu_to_le32(ptr, (char *)desc->list + first_slot_offset, upload_size);
164
desc->gpu_list = ptr - first_slot_offset / 4;
165
166
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, desc->buffer, RADEON_USAGE_READ,
167
RADEON_PRIO_DESCRIPTORS);
168
169
/* The shader pointer should point to slot 0. */
170
buffer_offset -= first_slot_offset;
171
desc->gpu_address = desc->buffer->gpu_address + buffer_offset;
172
173
assert(desc->buffer->flags & RADEON_FLAG_32BIT);
174
assert((desc->buffer->gpu_address >> 32) == sctx->screen->info.address32_hi);
175
assert((desc->gpu_address >> 32) == sctx->screen->info.address32_hi);
176
return true;
177
}
178
179
static void
180
si_add_descriptors_to_bo_list(struct si_context *sctx, struct si_descriptors *desc)
181
{
182
if (!desc->buffer)
183
return;
184
185
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, desc->buffer, RADEON_USAGE_READ,
186
RADEON_PRIO_DESCRIPTORS);
187
}
188
189
/* SAMPLER VIEWS */
190
191
static inline enum radeon_bo_priority si_get_sampler_view_priority(struct si_resource *res)
192
{
193
if (res->b.b.target == PIPE_BUFFER)
194
return RADEON_PRIO_SAMPLER_BUFFER;
195
196
if (res->b.b.nr_samples > 1)
197
return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;
198
199
return RADEON_PRIO_SAMPLER_TEXTURE;
200
}
201
202
static struct si_descriptors *si_sampler_and_image_descriptors(struct si_context *sctx,
203
unsigned shader)
204
{
205
return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)];
206
}
207
208
static void si_release_sampler_views(struct si_samplers *samplers)
209
{
210
int i;
211
212
for (i = 0; i < ARRAY_SIZE(samplers->views); i++) {
213
pipe_sampler_view_reference(&samplers->views[i], NULL);
214
}
215
}
216
217
static void si_sampler_view_add_buffer(struct si_context *sctx, struct pipe_resource *resource,
218
enum radeon_bo_usage usage, bool is_stencil_sampler,
219
bool check_mem)
220
{
221
struct si_texture *tex = (struct si_texture *)resource;
222
enum radeon_bo_priority priority;
223
224
if (!resource)
225
return;
226
227
/* Use the flushed depth texture if direct sampling is unsupported. */
228
if (resource->target != PIPE_BUFFER && tex->is_depth &&
229
!si_can_sample_zs(tex, is_stencil_sampler))
230
tex = tex->flushed_depth_texture;
231
232
priority = si_get_sampler_view_priority(&tex->buffer);
233
radeon_add_to_gfx_buffer_list_check_mem(sctx, &tex->buffer, usage, priority, check_mem);
234
}
235
236
static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_samplers *samplers)
237
{
238
unsigned mask = samplers->enabled_mask;
239
240
/* Add buffers to the CS. */
241
while (mask) {
242
int i = u_bit_scan(&mask);
243
struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i];
244
245
si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ,
246
sview->is_stencil_sampler, false);
247
}
248
}
249
250
static bool si_sampler_views_check_encrypted(struct si_context *sctx, struct si_samplers *samplers,
251
unsigned samplers_declared)
252
{
253
unsigned mask = samplers->enabled_mask & samplers_declared;
254
255
/* Verify if a samplers uses an encrypted resource */
256
while (mask) {
257
int i = u_bit_scan(&mask);
258
struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i];
259
260
struct si_resource *res = si_resource(sview->base.texture);
261
if (res->flags & RADEON_FLAG_ENCRYPTED)
262
return true;
263
}
264
return false;
265
}
266
267
/* Set buffer descriptor fields that can be changed by reallocations. */
268
static void si_set_buf_desc_address(struct si_resource *buf, uint64_t offset, uint32_t *state)
269
{
270
uint64_t va = buf->gpu_address + offset;
271
272
state[0] = va;
273
state[1] &= C_008F04_BASE_ADDRESS_HI;
274
state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32);
275
}
276
277
/* Set texture descriptor fields that can be changed by reallocations.
278
*
279
* \param tex texture
280
* \param base_level_info information of the level of BASE_ADDRESS
281
* \param base_level the level of BASE_ADDRESS
282
* \param first_level pipe_sampler_view.u.tex.first_level
283
* \param block_width util_format_get_blockwidth()
284
* \param is_stencil select between separate Z & Stencil
285
* \param state descriptor to update
286
*/
287
void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture *tex,
288
const struct legacy_surf_level *base_level_info,
289
unsigned base_level, unsigned first_level, unsigned block_width,
290
/* restrict decreases overhead of si_set_sampler_view_desc ~8x. */
291
bool is_stencil, uint16_t access, uint32_t * restrict state)
292
{
293
uint64_t va, meta_va = 0;
294
295
if (tex->is_depth && !si_can_sample_zs(tex, is_stencil)) {
296
tex = tex->flushed_depth_texture;
297
is_stencil = false;
298
}
299
300
va = tex->buffer.gpu_address;
301
302
if (sscreen->info.chip_class >= GFX9) {
303
/* Only stencil_offset needs to be added here. */
304
if (is_stencil)
305
va += tex->surface.u.gfx9.zs.stencil_offset;
306
else
307
va += tex->surface.u.gfx9.surf_offset;
308
} else {
309
va += (uint64_t)base_level_info->offset_256B * 256;
310
}
311
312
state[0] = va >> 8;
313
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
314
315
/* Only macrotiled modes can set tile swizzle.
316
* GFX9 doesn't use (legacy) base_level_info.
317
*/
318
if (sscreen->info.chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D)
319
state[0] |= tex->surface.tile_swizzle;
320
321
if (sscreen->info.chip_class >= GFX8) {
322
if (!(access & SI_IMAGE_ACCESS_DCC_OFF) && vi_dcc_enabled(tex, first_level)) {
323
meta_va = tex->buffer.gpu_address + tex->surface.meta_offset;
324
325
if (sscreen->info.chip_class == GFX8) {
326
meta_va += tex->surface.u.legacy.color.dcc_level[base_level].dcc_offset;
327
assert(base_level_info->mode == RADEON_SURF_MODE_2D);
328
}
329
330
unsigned dcc_tile_swizzle = tex->surface.tile_swizzle << 8;
331
dcc_tile_swizzle &= (1 << tex->surface.meta_alignment_log2) - 1;
332
meta_va |= dcc_tile_swizzle;
333
} else if (vi_tc_compat_htile_enabled(tex, first_level,
334
is_stencil ? PIPE_MASK_S : PIPE_MASK_Z)) {
335
meta_va = tex->buffer.gpu_address + tex->surface.meta_offset;
336
}
337
338
if (meta_va)
339
state[6] |= S_008F28_COMPRESSION_EN(1);
340
}
341
342
if (sscreen->info.chip_class >= GFX8 && sscreen->info.chip_class <= GFX9)
343
state[7] = meta_va >> 8;
344
345
if (sscreen->info.chip_class >= GFX10) {
346
if (is_stencil) {
347
state[3] |= S_00A00C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode);
348
} else {
349
state[3] |= S_00A00C_SW_MODE(tex->surface.u.gfx9.swizzle_mode);
350
}
351
352
if (meta_va) {
353
struct gfx9_surf_meta_flags meta = {
354
.rb_aligned = 1,
355
.pipe_aligned = 1,
356
};
357
358
if (!tex->is_depth && tex->surface.meta_offset)
359
meta = tex->surface.u.gfx9.color.dcc;
360
361
state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
362
S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8) |
363
S_00A018_WRITE_COMPRESS_ENABLE((access & SI_IMAGE_ACCESS_DCC_WRITE) != 0);
364
}
365
366
state[7] = meta_va >> 16;
367
} else if (sscreen->info.chip_class == GFX9) {
368
if (is_stencil) {
369
state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode);
370
state[4] |= S_008F20_PITCH(tex->surface.u.gfx9.zs.stencil_epitch);
371
} else {
372
uint16_t epitch = tex->surface.u.gfx9.epitch;
373
if (tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM &&
374
block_width == 1) {
375
/* epitch is patched in ac_surface for sdma/vcn blocks to get
376
* a value expressed in elements unit.
377
* But here the texture is used with block_width == 1 so we
378
* need epitch in pixel units.
379
*/
380
epitch = (epitch + 1) / tex->surface.blk_w - 1;
381
}
382
state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.swizzle_mode);
383
state[4] |= S_008F20_PITCH(epitch);
384
}
385
386
state[5] &=
387
C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED;
388
if (meta_va) {
389
struct gfx9_surf_meta_flags meta = {
390
.rb_aligned = 1,
391
.pipe_aligned = 1,
392
};
393
394
if (!tex->is_depth && tex->surface.meta_offset)
395
meta = tex->surface.u.gfx9.color.dcc;
396
397
state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
398
S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
399
S_008F24_META_RB_ALIGNED(meta.rb_aligned);
400
}
401
} else {
402
/* GFX6-GFX8 */
403
unsigned pitch = base_level_info->nblk_x * block_width;
404
unsigned index = si_tile_mode_index(tex, base_level, is_stencil);
405
406
state[3] |= S_008F1C_TILING_INDEX(index);
407
state[4] |= S_008F20_PITCH(pitch - 1);
408
}
409
410
if (tex->swap_rgb_to_bgr) {
411
unsigned swizzle_x = G_008F1C_DST_SEL_X(state[3]);
412
unsigned swizzle_z = G_008F1C_DST_SEL_Z(state[3]);
413
414
state[3] &= C_008F1C_DST_SEL_X;
415
state[3] |= S_008F1C_DST_SEL_X(swizzle_z);
416
state[3] &= C_008F1C_DST_SEL_Z;
417
state[3] |= S_008F1C_DST_SEL_Z(swizzle_x);
418
}
419
}
420
421
static void si_set_sampler_state_desc(struct si_sampler_state *sstate,
422
struct si_sampler_view *sview, struct si_texture *tex,
423
uint32_t *desc)
424
{
425
if (tex && tex->upgraded_depth && sview && !sview->is_stencil_sampler)
426
memcpy(desc, sstate->upgraded_depth_val, 4 * 4);
427
else
428
memcpy(desc, sstate->val, 4 * 4);
429
}
430
431
static void si_set_sampler_view_desc(struct si_context *sctx, struct si_sampler_view *sview,
432
struct si_sampler_state *sstate,
433
/* restrict decreases overhead of si_set_sampler_view_desc ~8x. */
434
uint32_t * restrict desc)
435
{
436
struct pipe_sampler_view *view = &sview->base;
437
struct si_texture *tex = (struct si_texture *)view->texture;
438
439
assert(tex); /* views with texture == NULL aren't supported */
440
441
if (tex->buffer.b.b.target == PIPE_BUFFER) {
442
memcpy(desc, sview->state, 8 * 4);
443
memcpy(desc + 8, null_texture_descriptor, 4 * 4); /* Disable FMASK. */
444
si_set_buf_desc_address(&tex->buffer, sview->base.u.buf.offset, desc + 4);
445
return;
446
}
447
448
if (unlikely(sview->dcc_incompatible)) {
449
if (vi_dcc_enabled(tex, view->u.tex.first_level))
450
if (!si_texture_disable_dcc(sctx, tex))
451
si_decompress_dcc(sctx, tex);
452
453
sview->dcc_incompatible = false;
454
}
455
456
bool is_separate_stencil = tex->db_compatible && sview->is_stencil_sampler;
457
458
memcpy(desc, sview->state, 8 * 4);
459
si_set_mutable_tex_desc_fields(sctx->screen, tex, sview->base_level_info, sview->base_level,
460
sview->base.u.tex.first_level, sview->block_width,
461
is_separate_stencil, 0, desc);
462
463
if (tex->surface.fmask_size) {
464
memcpy(desc + 8, sview->fmask_state, 8 * 4);
465
} else {
466
/* Disable FMASK and bind sampler state in [12:15]. */
467
memcpy(desc + 8, null_texture_descriptor, 4 * 4);
468
469
if (sstate)
470
si_set_sampler_state_desc(sstate, sview, tex, desc + 12);
471
}
472
}
473
474
static bool color_needs_decompression(struct si_texture *tex)
475
{
476
if (tex->is_depth)
477
return false;
478
479
return tex->surface.fmask_size ||
480
(tex->dirty_level_mask && (tex->cmask_buffer || tex->surface.meta_offset));
481
}
482
483
static bool depth_needs_decompression(struct si_texture *tex)
484
{
485
/* If the depth/stencil texture is TC-compatible, no decompression
486
* will be done. The decompression function will only flush DB caches
487
* to make it coherent with shaders. That's necessary because the driver
488
* doesn't flush DB caches in any other case.
489
*/
490
return tex->db_compatible;
491
}
492
493
static void si_reset_sampler_view_slot(struct si_samplers *samplers, unsigned slot,
494
uint32_t * restrict desc)
495
{
496
pipe_sampler_view_reference(&samplers->views[slot], NULL);
497
memcpy(desc, null_texture_descriptor, 8 * 4);
498
/* Only clear the lower dwords of FMASK. */
499
memcpy(desc + 8, null_texture_descriptor, 4 * 4);
500
/* Re-set the sampler state if we are transitioning from FMASK. */
501
if (samplers->sampler_states[slot])
502
si_set_sampler_state_desc(samplers->sampler_states[slot], NULL, NULL, desc + 12);
503
}
504
505
static void si_set_sampler_views(struct si_context *sctx, unsigned shader,
506
unsigned start_slot, unsigned count,
507
unsigned unbind_num_trailing_slots,
508
struct pipe_sampler_view **views,
509
bool disallow_early_out)
510
{
511
struct si_samplers *samplers = &sctx->samplers[shader];
512
struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
513
uint32_t unbound_mask = 0;
514
515
if (views) {
516
for (unsigned i = 0; i < count; i++) {
517
unsigned slot = start_slot + i;
518
struct si_sampler_view *sview = (struct si_sampler_view *)views[i];
519
unsigned desc_slot = si_get_sampler_slot(slot);
520
/* restrict decreases overhead of si_set_sampler_view_desc ~8x. */
521
uint32_t *restrict desc = descs->list + desc_slot * 16;
522
523
if (samplers->views[slot] == &sview->base && !disallow_early_out)
524
continue;
525
526
if (sview) {
527
struct si_texture *tex = (struct si_texture *)sview->base.texture;
528
529
si_set_sampler_view_desc(sctx, sview, samplers->sampler_states[slot], desc);
530
531
if (tex->buffer.b.b.target == PIPE_BUFFER) {
532
tex->buffer.bind_history |= PIPE_BIND_SAMPLER_VIEW;
533
samplers->needs_depth_decompress_mask &= ~(1u << slot);
534
samplers->needs_color_decompress_mask &= ~(1u << slot);
535
} else {
536
if (depth_needs_decompression(tex)) {
537
samplers->needs_depth_decompress_mask |= 1u << slot;
538
} else {
539
samplers->needs_depth_decompress_mask &= ~(1u << slot);
540
}
541
if (color_needs_decompression(tex)) {
542
samplers->needs_color_decompress_mask |= 1u << slot;
543
} else {
544
samplers->needs_color_decompress_mask &= ~(1u << slot);
545
}
546
547
if (vi_dcc_enabled(tex, sview->base.u.tex.first_level) &&
548
p_atomic_read(&tex->framebuffers_bound))
549
sctx->need_check_render_feedback = true;
550
}
551
552
pipe_sampler_view_reference(&samplers->views[slot], &sview->base);
553
samplers->enabled_mask |= 1u << slot;
554
555
/* Since this can flush, it must be done after enabled_mask is
556
* updated. */
557
si_sampler_view_add_buffer(sctx, &tex->buffer.b.b, RADEON_USAGE_READ,
558
sview->is_stencil_sampler, true);
559
} else {
560
si_reset_sampler_view_slot(samplers, slot, desc);
561
unbound_mask |= 1u << slot;
562
}
563
}
564
} else {
565
unbind_num_trailing_slots += count;
566
count = 0;
567
}
568
569
for (unsigned i = 0; i < unbind_num_trailing_slots; i++) {
570
unsigned slot = start_slot + count + i;
571
unsigned desc_slot = si_get_sampler_slot(slot);
572
uint32_t * restrict desc = descs->list + desc_slot * 16;
573
574
if (samplers->views[slot])
575
si_reset_sampler_view_slot(samplers, slot, desc);
576
}
577
578
unbound_mask |= BITFIELD_RANGE(start_slot + count, unbind_num_trailing_slots);
579
samplers->enabled_mask &= ~unbound_mask;
580
samplers->needs_depth_decompress_mask &= ~unbound_mask;
581
samplers->needs_color_decompress_mask &= ~unbound_mask;
582
583
sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
584
}
585
586
static void si_update_shader_needs_decompress_mask(struct si_context *sctx, unsigned shader)
587
{
588
struct si_samplers *samplers = &sctx->samplers[shader];
589
unsigned shader_bit = 1 << shader;
590
591
if (samplers->needs_depth_decompress_mask || samplers->needs_color_decompress_mask ||
592
sctx->images[shader].needs_color_decompress_mask)
593
sctx->shader_needs_decompress_mask |= shader_bit;
594
else
595
sctx->shader_needs_decompress_mask &= ~shader_bit;
596
}
597
598
static void si_pipe_set_sampler_views(struct pipe_context *ctx, enum pipe_shader_type shader,
599
unsigned start, unsigned count,
600
unsigned unbind_num_trailing_slots,
601
struct pipe_sampler_view **views)
602
{
603
struct si_context *sctx = (struct si_context *)ctx;
604
605
if ((!count && !unbind_num_trailing_slots) || shader >= SI_NUM_SHADERS)
606
return;
607
608
si_set_sampler_views(sctx, shader, start, count, unbind_num_trailing_slots,
609
views, false);
610
si_update_shader_needs_decompress_mask(sctx, shader);
611
}
612
613
static void si_samplers_update_needs_color_decompress_mask(struct si_samplers *samplers)
614
{
615
unsigned mask = samplers->enabled_mask;
616
617
while (mask) {
618
int i = u_bit_scan(&mask);
619
struct pipe_resource *res = samplers->views[i]->texture;
620
621
if (res && res->target != PIPE_BUFFER) {
622
struct si_texture *tex = (struct si_texture *)res;
623
624
if (color_needs_decompression(tex)) {
625
samplers->needs_color_decompress_mask |= 1u << i;
626
} else {
627
samplers->needs_color_decompress_mask &= ~(1u << i);
628
}
629
}
630
}
631
}
632
633
/* IMAGE VIEWS */
634
635
static void si_release_image_views(struct si_images *images)
636
{
637
unsigned i;
638
639
for (i = 0; i < SI_NUM_IMAGES; ++i) {
640
struct pipe_image_view *view = &images->views[i];
641
642
pipe_resource_reference(&view->resource, NULL);
643
}
644
}
645
646
static void si_image_views_begin_new_cs(struct si_context *sctx, struct si_images *images)
647
{
648
uint mask = images->enabled_mask;
649
650
/* Add buffers to the CS. */
651
while (mask) {
652
int i = u_bit_scan(&mask);
653
struct pipe_image_view *view = &images->views[i];
654
655
assert(view->resource);
656
657
si_sampler_view_add_buffer(sctx, view->resource, RADEON_USAGE_READWRITE, false, false);
658
}
659
}
660
661
static bool si_image_views_check_encrypted(struct si_context *sctx, struct si_images *images,
662
unsigned images_declared)
663
{
664
uint mask = images->enabled_mask & images_declared;
665
666
while (mask) {
667
int i = u_bit_scan(&mask);
668
struct pipe_image_view *view = &images->views[i];
669
670
assert(view->resource);
671
672
struct si_texture *tex = (struct si_texture *)view->resource;
673
if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)
674
return true;
675
}
676
return false;
677
}
678
679
static void si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot)
680
{
681
struct si_images *images = &ctx->images[shader];
682
683
if (images->enabled_mask & (1u << slot)) {
684
struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
685
unsigned desc_slot = si_get_image_slot(slot);
686
687
pipe_resource_reference(&images->views[slot].resource, NULL);
688
images->needs_color_decompress_mask &= ~(1 << slot);
689
690
memcpy(descs->list + desc_slot * 8, null_image_descriptor, 8 * 4);
691
images->enabled_mask &= ~(1u << slot);
692
images->display_dcc_store_mask &= ~(1u << slot);
693
ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
694
}
695
}
696
697
static void si_mark_image_range_valid(const struct pipe_image_view *view)
698
{
699
struct si_resource *res = si_resource(view->resource);
700
701
if (res->b.b.target != PIPE_BUFFER)
702
return;
703
704
util_range_add(&res->b.b, &res->valid_buffer_range, view->u.buf.offset,
705
view->u.buf.offset + view->u.buf.size);
706
}
707
708
static void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_image_view *view,
709
bool skip_decompress, uint32_t *desc, uint32_t *fmask_desc)
710
{
711
struct si_screen *screen = ctx->screen;
712
struct si_resource *res;
713
714
res = si_resource(view->resource);
715
716
if (res->b.b.target == PIPE_BUFFER) {
717
if (view->access & PIPE_IMAGE_ACCESS_WRITE)
718
si_mark_image_range_valid(view);
719
720
si_make_buffer_descriptor(screen, res, view->format, view->u.buf.offset, view->u.buf.size,
721
desc);
722
si_set_buf_desc_address(res, view->u.buf.offset, desc + 4);
723
} else {
724
static const unsigned char swizzle[4] = {0, 1, 2, 3};
725
struct si_texture *tex = (struct si_texture *)res;
726
unsigned level = view->u.tex.level;
727
unsigned width, height, depth, hw_level;
728
bool uses_dcc = vi_dcc_enabled(tex, level);
729
unsigned access = view->access;
730
731
assert(!tex->is_depth);
732
assert(fmask_desc || tex->surface.fmask_offset == 0);
733
734
if (uses_dcc && !skip_decompress &&
735
!(access & SI_IMAGE_ACCESS_DCC_OFF) &&
736
((!(access & SI_IMAGE_ACCESS_DCC_WRITE) && (access & PIPE_IMAGE_ACCESS_WRITE)) ||
737
!vi_dcc_formats_compatible(screen, res->b.b.format, view->format))) {
738
/* If DCC can't be disabled, at least decompress it.
739
* The decompression is relatively cheap if the surface
740
* has been decompressed already.
741
*/
742
if (!si_texture_disable_dcc(ctx, tex))
743
si_decompress_dcc(ctx, tex);
744
}
745
746
if (ctx->chip_class >= GFX9) {
747
/* Always set the base address. The swizzle modes don't
748
* allow setting mipmap level offsets as the base.
749
*/
750
width = res->b.b.width0;
751
height = res->b.b.height0;
752
depth = res->b.b.depth0;
753
hw_level = level;
754
} else {
755
/* Always force the base level to the selected level.
756
*
757
* This is required for 3D textures, where otherwise
758
* selecting a single slice for non-layered bindings
759
* fails. It doesn't hurt the other targets.
760
*/
761
width = u_minify(res->b.b.width0, level);
762
height = u_minify(res->b.b.height0, level);
763
depth = u_minify(res->b.b.depth0, level);
764
hw_level = 0;
765
}
766
767
screen->make_texture_descriptor(
768
screen, tex, false, res->b.b.target, view->format, swizzle, hw_level, hw_level,
769
view->u.tex.first_layer, view->u.tex.last_layer, width, height, depth, desc, fmask_desc);
770
si_set_mutable_tex_desc_fields(screen, tex, &tex->surface.u.legacy.level[level], level, level,
771
util_format_get_blockwidth(view->format),
772
false, view->access, desc);
773
}
774
}
775
776
static void si_set_shader_image(struct si_context *ctx, unsigned shader, unsigned slot,
777
const struct pipe_image_view *view, bool skip_decompress)
778
{
779
struct si_images *images = &ctx->images[shader];
780
struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);
781
struct si_resource *res;
782
783
if (!view || !view->resource) {
784
si_disable_shader_image(ctx, shader, slot);
785
return;
786
}
787
788
res = si_resource(view->resource);
789
790
si_set_shader_image_desc(ctx, view, skip_decompress, descs->list + si_get_image_slot(slot) * 8,
791
descs->list + si_get_image_slot(slot + SI_NUM_IMAGES) * 8);
792
793
if (&images->views[slot] != view)
794
util_copy_image_view(&images->views[slot], view);
795
796
if (res->b.b.target == PIPE_BUFFER) {
797
images->needs_color_decompress_mask &= ~(1 << slot);
798
images->display_dcc_store_mask &= ~(1u << slot);
799
res->bind_history |= PIPE_BIND_SHADER_IMAGE;
800
} else {
801
struct si_texture *tex = (struct si_texture *)res;
802
unsigned level = view->u.tex.level;
803
804
if (color_needs_decompression(tex)) {
805
images->needs_color_decompress_mask |= 1 << slot;
806
} else {
807
images->needs_color_decompress_mask &= ~(1 << slot);
808
}
809
810
if (tex->surface.display_dcc_offset && view->access & PIPE_IMAGE_ACCESS_WRITE)
811
images->display_dcc_store_mask |= 1u << slot;
812
else
813
images->display_dcc_store_mask &= ~(1u << slot);
814
815
if (vi_dcc_enabled(tex, level) && p_atomic_read(&tex->framebuffers_bound))
816
ctx->need_check_render_feedback = true;
817
}
818
819
images->enabled_mask |= 1u << slot;
820
ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
821
822
/* Since this can flush, it must be done after enabled_mask is updated. */
823
si_sampler_view_add_buffer(
824
ctx, &res->b.b,
825
(view->access & PIPE_IMAGE_ACCESS_WRITE) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, false,
826
true);
827
}
828
829
static void si_set_shader_images(struct pipe_context *pipe, enum pipe_shader_type shader,
830
unsigned start_slot, unsigned count,
831
unsigned unbind_num_trailing_slots,
832
const struct pipe_image_view *views)
833
{
834
struct si_context *ctx = (struct si_context *)pipe;
835
unsigned i, slot;
836
837
assert(shader < SI_NUM_SHADERS);
838
839
if (!count && !unbind_num_trailing_slots)
840
return;
841
842
assert(start_slot + count + unbind_num_trailing_slots <= SI_NUM_IMAGES);
843
844
if (views) {
845
for (i = 0, slot = start_slot; i < count; ++i, ++slot)
846
si_set_shader_image(ctx, shader, slot, &views[i], false);
847
} else {
848
for (i = 0, slot = start_slot; i < count; ++i, ++slot)
849
si_set_shader_image(ctx, shader, slot, NULL, false);
850
}
851
852
for (i = 0; i < unbind_num_trailing_slots; ++i, ++slot)
853
si_set_shader_image(ctx, shader, slot, NULL, false);
854
855
if (shader == PIPE_SHADER_COMPUTE &&
856
ctx->cs_shader_state.program &&
857
start_slot < ctx->cs_shader_state.program->sel.cs_num_images_in_user_sgprs)
858
ctx->compute_image_sgprs_dirty = true;
859
860
si_update_shader_needs_decompress_mask(ctx, shader);
861
}
862
863
static void si_images_update_needs_color_decompress_mask(struct si_images *images)
864
{
865
unsigned mask = images->enabled_mask;
866
867
while (mask) {
868
int i = u_bit_scan(&mask);
869
struct pipe_resource *res = images->views[i].resource;
870
871
if (res && res->target != PIPE_BUFFER) {
872
struct si_texture *tex = (struct si_texture *)res;
873
874
if (color_needs_decompression(tex)) {
875
images->needs_color_decompress_mask |= 1 << i;
876
} else {
877
images->needs_color_decompress_mask &= ~(1 << i);
878
}
879
}
880
}
881
}
882
883
void si_update_ps_colorbuf0_slot(struct si_context *sctx)
884
{
885
struct si_buffer_resources *buffers = &sctx->internal_bindings;
886
struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL];
887
unsigned slot = SI_PS_IMAGE_COLORBUF0;
888
struct pipe_surface *surf = NULL;
889
890
/* si_texture_disable_dcc can get us here again. */
891
if (sctx->blitter_running)
892
return;
893
894
/* See whether FBFETCH is used and color buffer 0 is set. */
895
if (sctx->shader.ps.cso && sctx->shader.ps.cso->info.base.fs.uses_fbfetch_output &&
896
sctx->framebuffer.state.nr_cbufs && sctx->framebuffer.state.cbufs[0])
897
surf = sctx->framebuffer.state.cbufs[0];
898
899
/* Return if FBFETCH transitions from disabled to disabled. */
900
if (!buffers->buffers[slot] && !surf)
901
return;
902
903
sctx->ps_uses_fbfetch = surf != NULL;
904
si_update_ps_iter_samples(sctx);
905
906
if (surf) {
907
struct si_texture *tex = (struct si_texture *)surf->texture;
908
struct pipe_image_view view = {0};
909
910
assert(tex);
911
assert(!tex->is_depth);
912
913
/* Disable DCC, because the texture is used as both a sampler
914
* and color buffer.
915
*/
916
si_texture_disable_dcc(sctx, tex);
917
918
if (tex->buffer.b.b.nr_samples <= 1 && tex->cmask_buffer) {
919
/* Disable CMASK. */
920
assert(tex->cmask_buffer != &tex->buffer);
921
si_eliminate_fast_color_clear(sctx, tex, NULL);
922
si_texture_discard_cmask(sctx->screen, tex);
923
}
924
925
view.resource = surf->texture;
926
view.format = surf->format;
927
view.access = PIPE_IMAGE_ACCESS_READ;
928
view.u.tex.first_layer = surf->u.tex.first_layer;
929
view.u.tex.last_layer = surf->u.tex.last_layer;
930
view.u.tex.level = surf->u.tex.level;
931
932
/* Set the descriptor. */
933
uint32_t *desc = descs->list + slot * 4;
934
memset(desc, 0, 16 * 4);
935
si_set_shader_image_desc(sctx, &view, true, desc, desc + 8);
936
937
pipe_resource_reference(&buffers->buffers[slot], &tex->buffer.b.b);
938
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READ,
939
RADEON_PRIO_SHADER_RW_IMAGE);
940
buffers->enabled_mask |= 1llu << slot;
941
} else {
942
/* Clear the descriptor. */
943
memset(descs->list + slot * 4, 0, 8 * 4);
944
pipe_resource_reference(&buffers->buffers[slot], NULL);
945
buffers->enabled_mask &= ~(1llu << slot);
946
}
947
948
sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL;
949
}
950
951
/* SAMPLER STATES */
952
953
static void si_bind_sampler_states(struct pipe_context *ctx, enum pipe_shader_type shader,
954
unsigned start, unsigned count, void **states)
955
{
956
struct si_context *sctx = (struct si_context *)ctx;
957
struct si_samplers *samplers = &sctx->samplers[shader];
958
struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader);
959
struct si_sampler_state **sstates = (struct si_sampler_state **)states;
960
int i;
961
962
if (!count || shader >= SI_NUM_SHADERS || !sstates)
963
return;
964
965
for (i = 0; i < count; i++) {
966
unsigned slot = start + i;
967
unsigned desc_slot = si_get_sampler_slot(slot);
968
969
if (!sstates[i] || sstates[i] == samplers->sampler_states[slot])
970
continue;
971
972
#ifndef NDEBUG
973
assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);
974
#endif
975
samplers->sampler_states[slot] = sstates[i];
976
977
/* If FMASK is bound, don't overwrite it.
978
* The sampler state will be set after FMASK is unbound.
979
*/
980
struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[slot];
981
982
struct si_texture *tex = NULL;
983
984
if (sview && sview->base.texture && sview->base.texture->target != PIPE_BUFFER)
985
tex = (struct si_texture *)sview->base.texture;
986
987
if (tex && tex->surface.fmask_size)
988
continue;
989
990
si_set_sampler_state_desc(sstates[i], sview, tex, desc->list + desc_slot * 16 + 12);
991
992
sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
993
}
994
}
995
996
/* BUFFER RESOURCES */
997
998
static void si_init_buffer_resources(struct si_context *sctx,
999
struct si_buffer_resources *buffers,
1000
struct si_descriptors *descs, unsigned num_buffers,
1001
short shader_userdata_rel_index,
1002
enum radeon_bo_priority priority,
1003
enum radeon_bo_priority priority_constbuf)
1004
{
1005
buffers->priority = priority;
1006
buffers->priority_constbuf = priority_constbuf;
1007
buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource *));
1008
buffers->offsets = CALLOC(num_buffers, sizeof(buffers->offsets[0]));
1009
1010
si_init_descriptors(descs, shader_userdata_rel_index, 4, num_buffers);
1011
1012
/* Initialize buffer descriptors, so that we don't have to do it at bind time. */
1013
for (unsigned i = 0; i < num_buffers; i++) {
1014
uint32_t *desc = descs->list + i * 4;
1015
1016
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1017
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
1018
1019
if (sctx->chip_class >= GFX10) {
1020
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
1021
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
1022
} else {
1023
desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1024
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1025
}
1026
}
1027
}
1028
1029
static void si_release_buffer_resources(struct si_buffer_resources *buffers,
1030
struct si_descriptors *descs)
1031
{
1032
int i;
1033
1034
for (i = 0; i < descs->num_elements; i++) {
1035
pipe_resource_reference(&buffers->buffers[i], NULL);
1036
}
1037
1038
FREE(buffers->buffers);
1039
FREE(buffers->offsets);
1040
}
1041
1042
static void si_buffer_resources_begin_new_cs(struct si_context *sctx,
1043
struct si_buffer_resources *buffers)
1044
{
1045
uint64_t mask = buffers->enabled_mask;
1046
1047
/* Add buffers to the CS. */
1048
while (mask) {
1049
int i = u_bit_scan64(&mask);
1050
1051
radeon_add_to_buffer_list(
1052
sctx, &sctx->gfx_cs, si_resource(buffers->buffers[i]),
1053
buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
1054
i < SI_NUM_SHADER_BUFFERS ? buffers->priority : buffers->priority_constbuf);
1055
}
1056
}
1057
1058
static bool si_buffer_resources_check_encrypted(struct si_context *sctx,
1059
struct si_buffer_resources *buffers)
1060
{
1061
uint64_t mask = buffers->enabled_mask;
1062
1063
while (mask) {
1064
int i = u_bit_scan64(&mask);
1065
1066
if (si_resource(buffers->buffers[i])->flags & RADEON_FLAG_ENCRYPTED)
1067
return true;
1068
}
1069
1070
return false;
1071
}
1072
1073
static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,
1074
struct si_descriptors *descs, unsigned idx,
1075
struct pipe_resource **buf, unsigned *offset,
1076
unsigned *size)
1077
{
1078
pipe_resource_reference(buf, buffers->buffers[idx]);
1079
if (*buf) {
1080
struct si_resource *res = si_resource(*buf);
1081
const uint32_t *desc = descs->list + idx * 4;
1082
uint64_t va;
1083
1084
*size = desc[2];
1085
1086
assert(G_008F04_STRIDE(desc[1]) == 0);
1087
va = si_desc_extract_buffer_address(desc);
1088
1089
assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size);
1090
*offset = va - res->gpu_address;
1091
}
1092
}
1093
1094
/* VERTEX BUFFERS */
1095
1096
static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)
1097
{
1098
int count = sctx->num_vertex_elements;
1099
int i;
1100
1101
for (i = 0; i < count; i++) {
1102
int vb = sctx->vertex_elements->vertex_buffer_index[i];
1103
1104
if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
1105
continue;
1106
if (!sctx->vertex_buffer[vb].buffer.resource)
1107
continue;
1108
1109
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs,
1110
si_resource(sctx->vertex_buffer[vb].buffer.resource),
1111
RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);
1112
}
1113
1114
if (!sctx->vb_descriptors_buffer)
1115
return;
1116
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->vb_descriptors_buffer, RADEON_USAGE_READ,
1117
RADEON_PRIO_DESCRIPTORS);
1118
}
1119
1120
/* CONSTANT BUFFERS */
1121
1122
static struct si_descriptors *si_const_and_shader_buffer_descriptors(struct si_context *sctx,
1123
unsigned shader)
1124
{
1125
return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];
1126
}
1127
1128
static void si_upload_const_buffer(struct si_context *sctx, struct si_resource **buf,
1129
const uint8_t *ptr, unsigned size, uint32_t *const_offset)
1130
{
1131
void *tmp;
1132
1133
u_upload_alloc(sctx->b.const_uploader, 0, size, si_optimal_tcc_alignment(sctx, size),
1134
const_offset, (struct pipe_resource **)buf, &tmp);
1135
if (*buf)
1136
util_memcpy_cpu_to_le32(tmp, ptr, size);
1137
}
1138
1139
static void si_set_constant_buffer(struct si_context *sctx, struct si_buffer_resources *buffers,
1140
unsigned descriptors_idx, uint slot, bool take_ownership,
1141
const struct pipe_constant_buffer *input)
1142
{
1143
struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1144
assert(slot < descs->num_elements);
1145
pipe_resource_reference(&buffers->buffers[slot], NULL);
1146
1147
/* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD is buggy
1148
* with a NULL buffer). We need to use a dummy buffer instead. */
1149
if (sctx->chip_class == GFX7 && (!input || (!input->buffer && !input->user_buffer)))
1150
input = &sctx->null_const_buf;
1151
1152
if (input && (input->buffer || input->user_buffer)) {
1153
struct pipe_resource *buffer = NULL;
1154
uint64_t va;
1155
unsigned buffer_offset;
1156
1157
/* Upload the user buffer if needed. */
1158
if (input->user_buffer) {
1159
si_upload_const_buffer(sctx, (struct si_resource **)&buffer, input->user_buffer,
1160
input->buffer_size, &buffer_offset);
1161
if (!buffer) {
1162
/* Just unbind on failure. */
1163
si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, false, NULL);
1164
return;
1165
}
1166
} else {
1167
if (take_ownership) {
1168
buffer = input->buffer;
1169
} else {
1170
pipe_resource_reference(&buffer, input->buffer);
1171
}
1172
buffer_offset = input->buffer_offset;
1173
}
1174
1175
va = si_resource(buffer)->gpu_address + buffer_offset;
1176
1177
/* Set the descriptor. */
1178
uint32_t *desc = descs->list + slot * 4;
1179
desc[0] = va;
1180
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(0);
1181
desc[2] = input->buffer_size;
1182
1183
buffers->buffers[slot] = buffer;
1184
buffers->offsets[slot] = buffer_offset;
1185
radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ,
1186
buffers->priority_constbuf, true);
1187
buffers->enabled_mask |= 1llu << slot;
1188
} else {
1189
/* Clear the descriptor. Only 3 dwords are cleared. The 4th dword is immutable. */
1190
memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 3);
1191
buffers->enabled_mask &= ~(1llu << slot);
1192
}
1193
1194
sctx->descriptors_dirty |= 1u << descriptors_idx;
1195
}
1196
1197
static void si_pipe_set_constant_buffer(struct pipe_context *ctx, enum pipe_shader_type shader,
1198
uint slot, bool take_ownership,
1199
const struct pipe_constant_buffer *input)
1200
{
1201
struct si_context *sctx = (struct si_context *)ctx;
1202
1203
if (shader >= SI_NUM_SHADERS)
1204
return;
1205
1206
if (input) {
1207
if (input->buffer) {
1208
if (slot == 0 &&
1209
!(si_resource(input->buffer)->flags & RADEON_FLAG_32BIT)) {
1210
assert(!"constant buffer 0 must have a 32-bit VM address, use const_uploader");
1211
return;
1212
}
1213
si_resource(input->buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER;
1214
}
1215
1216
if (slot == 0) {
1217
/* Invalidate current inlinable uniforms. */
1218
sctx->inlinable_uniforms_valid_mask &= ~(1 << shader);
1219
}
1220
}
1221
1222
slot = si_get_constbuf_slot(slot);
1223
si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader],
1224
si_const_and_shader_buffer_descriptors_idx(shader), slot,
1225
take_ownership, input);
1226
}
1227
1228
static void si_set_inlinable_constants(struct pipe_context *ctx,
1229
enum pipe_shader_type shader,
1230
uint num_values, uint32_t *values)
1231
{
1232
struct si_context *sctx = (struct si_context *)ctx;
1233
1234
if (!(sctx->inlinable_uniforms_valid_mask & BITFIELD_BIT(shader))) {
1235
/* It's the first time we set the constants. Always update shaders. */
1236
memcpy(sctx->inlinable_uniforms[shader], values, num_values * 4);
1237
sctx->inlinable_uniforms_valid_mask |= BITFIELD_BIT(shader);
1238
sctx->do_update_shaders = true;
1239
return;
1240
}
1241
1242
/* We have already set inlinable constants for this shader. Update the shader only if
1243
* the constants are being changed so as not to update shaders needlessly.
1244
*/
1245
if (memcmp(sctx->inlinable_uniforms[shader], values, num_values * 4)) {
1246
memcpy(sctx->inlinable_uniforms[shader], values, num_values * 4);
1247
sctx->do_update_shaders = true;
1248
}
1249
}
1250
1251
void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, uint slot,
1252
struct pipe_constant_buffer *cbuf)
1253
{
1254
cbuf->user_buffer = NULL;
1255
si_get_buffer_from_descriptors(
1256
&sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors(sctx, shader),
1257
si_get_constbuf_slot(slot), &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);
1258
}
1259
1260
/* SHADER BUFFERS */
1261
1262
static void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resources *buffers,
1263
unsigned descriptors_idx, uint slot,
1264
const struct pipe_shader_buffer *sbuffer, bool writable,
1265
enum radeon_bo_priority priority)
1266
{
1267
struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1268
uint32_t *desc = descs->list + slot * 4;
1269
1270
if (!sbuffer || !sbuffer->buffer) {
1271
pipe_resource_reference(&buffers->buffers[slot], NULL);
1272
/* Clear the descriptor. Only 3 dwords are cleared. The 4th dword is immutable. */
1273
memset(desc, 0, sizeof(uint32_t) * 3);
1274
buffers->enabled_mask &= ~(1llu << slot);
1275
buffers->writable_mask &= ~(1llu << slot);
1276
sctx->descriptors_dirty |= 1u << descriptors_idx;
1277
return;
1278
}
1279
1280
struct si_resource *buf = si_resource(sbuffer->buffer);
1281
uint64_t va = buf->gpu_address + sbuffer->buffer_offset;
1282
1283
desc[0] = va;
1284
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(0);
1285
desc[2] = sbuffer->buffer_size;
1286
1287
pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
1288
buffers->offsets[slot] = sbuffer->buffer_offset;
1289
radeon_add_to_gfx_buffer_list_check_mem(
1290
sctx, buf, writable ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, priority, true);
1291
if (writable)
1292
buffers->writable_mask |= 1llu << slot;
1293
else
1294
buffers->writable_mask &= ~(1llu << slot);
1295
1296
buffers->enabled_mask |= 1llu << slot;
1297
sctx->descriptors_dirty |= 1lu << descriptors_idx;
1298
1299
util_range_add(&buf->b.b, &buf->valid_buffer_range, sbuffer->buffer_offset,
1300
sbuffer->buffer_offset + sbuffer->buffer_size);
1301
}
1302
1303
static void si_set_shader_buffers(struct pipe_context *ctx, enum pipe_shader_type shader,
1304
unsigned start_slot, unsigned count,
1305
const struct pipe_shader_buffer *sbuffers,
1306
unsigned writable_bitmask)
1307
{
1308
struct si_context *sctx = (struct si_context *)ctx;
1309
struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
1310
unsigned descriptors_idx = si_const_and_shader_buffer_descriptors_idx(shader);
1311
unsigned i;
1312
1313
assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
1314
1315
if (shader == PIPE_SHADER_COMPUTE &&
1316
sctx->cs_shader_state.program &&
1317
start_slot < sctx->cs_shader_state.program->sel.cs_num_shaderbufs_in_user_sgprs)
1318
sctx->compute_shaderbuf_sgprs_dirty = true;
1319
1320
for (i = 0; i < count; ++i) {
1321
const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
1322
unsigned slot = si_get_shaderbuf_slot(start_slot + i);
1323
1324
if (sbuffer && sbuffer->buffer)
1325
si_resource(sbuffer->buffer)->bind_history |= PIPE_BIND_SHADER_BUFFER;
1326
1327
si_set_shader_buffer(sctx, buffers, descriptors_idx, slot, sbuffer,
1328
!!(writable_bitmask & (1u << i)), buffers->priority);
1329
}
1330
}
1331
1332
void si_get_shader_buffers(struct si_context *sctx, enum pipe_shader_type shader, uint start_slot,
1333
uint count, struct pipe_shader_buffer *sbuf)
1334
{
1335
struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];
1336
struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);
1337
1338
for (unsigned i = 0; i < count; ++i) {
1339
si_get_buffer_from_descriptors(buffers, descs, si_get_shaderbuf_slot(start_slot + i),
1340
&sbuf[i].buffer, &sbuf[i].buffer_offset, &sbuf[i].buffer_size);
1341
}
1342
}
1343
1344
/* RING BUFFERS */
1345
1346
void si_set_internal_const_buffer(struct si_context *sctx, uint slot,
1347
const struct pipe_constant_buffer *input)
1348
{
1349
si_set_constant_buffer(sctx, &sctx->internal_bindings, SI_DESCS_INTERNAL, slot, false, input);
1350
}
1351
1352
void si_set_internal_shader_buffer(struct si_context *sctx, uint slot,
1353
const struct pipe_shader_buffer *sbuffer)
1354
{
1355
si_set_shader_buffer(sctx, &sctx->internal_bindings, SI_DESCS_INTERNAL, slot, sbuffer, true,
1356
RADEON_PRIO_SHADER_RW_BUFFER);
1357
}
1358
1359
void si_set_ring_buffer(struct si_context *sctx, uint slot, struct pipe_resource *buffer,
1360
unsigned stride, unsigned num_records, bool add_tid, bool swizzle,
1361
unsigned element_size, unsigned index_stride, uint64_t offset)
1362
{
1363
struct si_buffer_resources *buffers = &sctx->internal_bindings;
1364
struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL];
1365
1366
/* The stride field in the resource descriptor has 14 bits */
1367
assert(stride < (1 << 14));
1368
1369
assert(slot < descs->num_elements);
1370
pipe_resource_reference(&buffers->buffers[slot], NULL);
1371
1372
if (buffer) {
1373
uint64_t va;
1374
1375
va = si_resource(buffer)->gpu_address + offset;
1376
1377
switch (element_size) {
1378
default:
1379
assert(!"Unsupported ring buffer element size");
1380
case 0:
1381
case 2:
1382
element_size = 0;
1383
break;
1384
case 4:
1385
element_size = 1;
1386
break;
1387
case 8:
1388
element_size = 2;
1389
break;
1390
case 16:
1391
element_size = 3;
1392
break;
1393
}
1394
1395
switch (index_stride) {
1396
default:
1397
assert(!"Unsupported ring buffer index stride");
1398
case 0:
1399
case 8:
1400
index_stride = 0;
1401
break;
1402
case 16:
1403
index_stride = 1;
1404
break;
1405
case 32:
1406
index_stride = 2;
1407
break;
1408
case 64:
1409
index_stride = 3;
1410
break;
1411
}
1412
1413
if (sctx->chip_class >= GFX8 && stride)
1414
num_records *= stride;
1415
1416
/* Set the descriptor. */
1417
uint32_t *desc = descs->list + slot * 4;
1418
desc[0] = va;
1419
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride) |
1420
S_008F04_SWIZZLE_ENABLE(swizzle);
1421
desc[2] = num_records;
1422
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
1423
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
1424
S_008F0C_INDEX_STRIDE(index_stride) | S_008F0C_ADD_TID_ENABLE(add_tid);
1425
1426
if (sctx->chip_class >= GFX9)
1427
assert(!swizzle || element_size == 1); /* always 4 bytes on GFX9 */
1428
else
1429
desc[3] |= S_008F0C_ELEMENT_SIZE(element_size);
1430
1431
if (sctx->chip_class >= GFX10) {
1432
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
1433
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
1434
} else {
1435
desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
1436
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
1437
}
1438
1439
pipe_resource_reference(&buffers->buffers[slot], buffer);
1440
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_READWRITE,
1441
buffers->priority);
1442
buffers->enabled_mask |= 1llu << slot;
1443
} else {
1444
/* Clear the descriptor. */
1445
memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 4);
1446
buffers->enabled_mask &= ~(1llu << slot);
1447
}
1448
1449
sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL;
1450
}
1451
1452
/* INTERNAL CONST BUFFERS */
1453
1454
static void si_set_polygon_stipple(struct pipe_context *ctx, const struct pipe_poly_stipple *state)
1455
{
1456
struct si_context *sctx = (struct si_context *)ctx;
1457
struct pipe_constant_buffer cb = {};
1458
unsigned stipple[32];
1459
int i;
1460
1461
for (i = 0; i < 32; i++)
1462
stipple[i] = util_bitreverse(state->stipple[i]);
1463
1464
cb.user_buffer = stipple;
1465
cb.buffer_size = sizeof(stipple);
1466
1467
si_set_internal_const_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb);
1468
}
1469
1470
/* TEXTURE METADATA ENABLE/DISABLE */
1471
1472
static void si_resident_handles_update_needs_color_decompress(struct si_context *sctx)
1473
{
1474
util_dynarray_clear(&sctx->resident_tex_needs_color_decompress);
1475
util_dynarray_clear(&sctx->resident_img_needs_color_decompress);
1476
1477
util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
1478
struct pipe_resource *res = (*tex_handle)->view->texture;
1479
struct si_texture *tex;
1480
1481
if (!res || res->target == PIPE_BUFFER)
1482
continue;
1483
1484
tex = (struct si_texture *)res;
1485
if (!color_needs_decompression(tex))
1486
continue;
1487
1488
util_dynarray_append(&sctx->resident_tex_needs_color_decompress, struct si_texture_handle *,
1489
*tex_handle);
1490
}
1491
1492
util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {
1493
struct pipe_image_view *view = &(*img_handle)->view;
1494
struct pipe_resource *res = view->resource;
1495
struct si_texture *tex;
1496
1497
if (!res || res->target == PIPE_BUFFER)
1498
continue;
1499
1500
tex = (struct si_texture *)res;
1501
if (!color_needs_decompression(tex))
1502
continue;
1503
1504
util_dynarray_append(&sctx->resident_img_needs_color_decompress, struct si_image_handle *,
1505
*img_handle);
1506
}
1507
}
1508
1509
/* CMASK can be enabled (for fast clear) and disabled (for texture export)
1510
* while the texture is bound, possibly by a different context. In that case,
1511
* call this function to update needs_*_decompress_masks.
1512
*/
1513
void si_update_needs_color_decompress_masks(struct si_context *sctx)
1514
{
1515
for (int i = 0; i < SI_NUM_SHADERS; ++i) {
1516
si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]);
1517
si_images_update_needs_color_decompress_mask(&sctx->images[i]);
1518
si_update_shader_needs_decompress_mask(sctx, i);
1519
}
1520
1521
si_resident_handles_update_needs_color_decompress(sctx);
1522
}
1523
1524
/* BUFFER DISCARD/INVALIDATION */
1525
1526
/* Reset descriptors of buffer resources after \p buf has been invalidated.
1527
* If buf == NULL, reset all descriptors.
1528
*/
1529
static bool si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers,
1530
unsigned descriptors_idx, uint64_t slot_mask,
1531
struct pipe_resource *buf, enum radeon_bo_priority priority)
1532
{
1533
struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
1534
bool noop = true;
1535
uint64_t mask = buffers->enabled_mask & slot_mask;
1536
1537
while (mask) {
1538
unsigned i = u_bit_scan64(&mask);
1539
struct pipe_resource *buffer = buffers->buffers[i];
1540
1541
if (buffer && (!buf || buffer == buf)) {
1542
si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i * 4);
1543
sctx->descriptors_dirty |= 1u << descriptors_idx;
1544
1545
radeon_add_to_gfx_buffer_list_check_mem(
1546
sctx, si_resource(buffer),
1547
buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
1548
priority, true);
1549
noop = false;
1550
}
1551
}
1552
return !noop;
1553
}
1554
1555
/* Update all buffer bindings where the buffer is bound, including
1556
* all resource descriptors. This is invalidate_buffer without
1557
* the invalidation.
1558
*
1559
* If buf == NULL, update all buffer bindings.
1560
*/
1561
void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
1562
{
1563
struct si_resource *buffer = si_resource(buf);
1564
unsigned i, shader;
1565
unsigned num_elems = sctx->num_vertex_elements;
1566
1567
/* We changed the buffer, now we need to bind it where the old one
1568
* was bound. This consists of 2 things:
1569
* 1) Updating the resource descriptor and dirtying it.
1570
* 2) Adding a relocation to the CS, so that it's usable.
1571
*/
1572
1573
/* Vertex buffers. */
1574
if (!buffer) {
1575
sctx->vertex_buffers_dirty = num_elems > 0;
1576
} else if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {
1577
for (i = 0; i < num_elems; i++) {
1578
int vb = sctx->vertex_elements->vertex_buffer_index[i];
1579
1580
if (vb >= ARRAY_SIZE(sctx->vertex_buffer))
1581
continue;
1582
if (!sctx->vertex_buffer[vb].buffer.resource)
1583
continue;
1584
1585
if (sctx->vertex_buffer[vb].buffer.resource == buf) {
1586
sctx->vertex_buffers_dirty = num_elems > 0;
1587
break;
1588
}
1589
}
1590
}
1591
1592
/* Streamout buffers. (other internal buffers can't be invalidated) */
1593
if (!buffer || buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {
1594
for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {
1595
struct si_buffer_resources *buffers = &sctx->internal_bindings;
1596
struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL];
1597
struct pipe_resource *buffer = buffers->buffers[i];
1598
1599
if (!buffer || (buf && buffer != buf))
1600
continue;
1601
1602
si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i * 4);
1603
sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL;
1604
1605
radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_WRITE,
1606
RADEON_PRIO_SHADER_RW_BUFFER, true);
1607
1608
/* Update the streamout state. */
1609
if (sctx->streamout.begin_emitted)
1610
si_emit_streamout_end(sctx);
1611
sctx->streamout.append_bitmask = sctx->streamout.enabled_mask;
1612
si_streamout_buffers_dirty(sctx);
1613
}
1614
}
1615
1616
/* Constant and shader buffers. */
1617
if (!buffer || buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {
1618
for (shader = 0; shader < SI_NUM_SHADERS; shader++)
1619
si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
1620
si_const_and_shader_buffer_descriptors_idx(shader),
1621
u_bit_consecutive64(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),
1622
buf, sctx->const_and_shader_buffers[shader].priority_constbuf);
1623
}
1624
1625
if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
1626
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1627
if (si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
1628
si_const_and_shader_buffer_descriptors_idx(shader),
1629
u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf,
1630
sctx->const_and_shader_buffers[shader].priority) &&
1631
shader == PIPE_SHADER_COMPUTE) {
1632
sctx->compute_shaderbuf_sgprs_dirty = true;
1633
}
1634
}
1635
}
1636
1637
if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
1638
/* Texture buffers - update bindings. */
1639
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1640
struct si_samplers *samplers = &sctx->samplers[shader];
1641
struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
1642
unsigned mask = samplers->enabled_mask;
1643
1644
while (mask) {
1645
unsigned i = u_bit_scan(&mask);
1646
struct pipe_resource *buffer = samplers->views[i]->texture;
1647
1648
if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) {
1649
unsigned desc_slot = si_get_sampler_slot(i);
1650
1651
si_set_buf_desc_address(si_resource(buffer), samplers->views[i]->u.buf.offset,
1652
descs->list + desc_slot * 16 + 4);
1653
sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
1654
1655
radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ,
1656
RADEON_PRIO_SAMPLER_BUFFER, true);
1657
}
1658
}
1659
}
1660
}
1661
1662
/* Shader images */
1663
if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_IMAGE) {
1664
for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {
1665
struct si_images *images = &sctx->images[shader];
1666
struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);
1667
unsigned mask = images->enabled_mask;
1668
1669
while (mask) {
1670
unsigned i = u_bit_scan(&mask);
1671
struct pipe_resource *buffer = images->views[i].resource;
1672
1673
if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) {
1674
unsigned desc_slot = si_get_image_slot(i);
1675
1676
if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)
1677
si_mark_image_range_valid(&images->views[i]);
1678
1679
si_set_buf_desc_address(si_resource(buffer), images->views[i].u.buf.offset,
1680
descs->list + desc_slot * 8 + 4);
1681
sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);
1682
1683
radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer),
1684
RADEON_USAGE_READWRITE,
1685
RADEON_PRIO_SAMPLER_BUFFER, true);
1686
1687
if (shader == PIPE_SHADER_COMPUTE)
1688
sctx->compute_image_sgprs_dirty = true;
1689
}
1690
}
1691
}
1692
}
1693
1694
/* Bindless texture handles */
1695
if (!buffer || buffer->texture_handle_allocated) {
1696
struct si_descriptors *descs = &sctx->bindless_descriptors;
1697
1698
util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
1699
struct pipe_sampler_view *view = (*tex_handle)->view;
1700
unsigned desc_slot = (*tex_handle)->desc_slot;
1701
struct pipe_resource *buffer = view->texture;
1702
1703
if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) {
1704
si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset,
1705
descs->list + desc_slot * 16 + 4);
1706
1707
(*tex_handle)->desc_dirty = true;
1708
sctx->bindless_descriptors_dirty = true;
1709
1710
radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ,
1711
RADEON_PRIO_SAMPLER_BUFFER, true);
1712
}
1713
}
1714
}
1715
1716
/* Bindless image handles */
1717
if (!buffer || buffer->image_handle_allocated) {
1718
struct si_descriptors *descs = &sctx->bindless_descriptors;
1719
1720
util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {
1721
struct pipe_image_view *view = &(*img_handle)->view;
1722
unsigned desc_slot = (*img_handle)->desc_slot;
1723
struct pipe_resource *buffer = view->resource;
1724
1725
if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) {
1726
if (view->access & PIPE_IMAGE_ACCESS_WRITE)
1727
si_mark_image_range_valid(view);
1728
1729
si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset,
1730
descs->list + desc_slot * 16 + 4);
1731
1732
(*img_handle)->desc_dirty = true;
1733
sctx->bindless_descriptors_dirty = true;
1734
1735
radeon_add_to_gfx_buffer_list_check_mem(
1736
sctx, si_resource(buffer), RADEON_USAGE_READWRITE, RADEON_PRIO_SAMPLER_BUFFER, true);
1737
}
1738
}
1739
}
1740
1741
if (buffer) {
1742
/* Do the same for other contexts. They will invoke this function
1743
* with buffer == NULL.
1744
*/
1745
unsigned new_counter = p_atomic_inc_return(&sctx->screen->dirty_buf_counter);
1746
1747
/* Skip the update for the current context, because we have already updated
1748
* the buffer bindings.
1749
*/
1750
if (new_counter == sctx->last_dirty_buf_counter + 1)
1751
sctx->last_dirty_buf_counter = new_counter;
1752
}
1753
}
1754
1755
static void si_upload_bindless_descriptor(struct si_context *sctx, unsigned desc_slot,
1756
unsigned num_dwords)
1757
{
1758
struct si_descriptors *desc = &sctx->bindless_descriptors;
1759
unsigned desc_slot_offset = desc_slot * 16;
1760
uint32_t *data;
1761
uint64_t va;
1762
1763
data = desc->list + desc_slot_offset;
1764
va = desc->gpu_address + desc_slot_offset * 4;
1765
1766
si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address, num_dwords * 4, V_370_TC_L2,
1767
V_370_ME, data);
1768
}
1769
1770
static void si_upload_bindless_descriptors(struct si_context *sctx)
1771
{
1772
if (!sctx->bindless_descriptors_dirty)
1773
return;
1774
1775
/* Wait for graphics/compute to be idle before updating the resident
1776
* descriptors directly in memory, in case the GPU is using them.
1777
*/
1778
sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;
1779
sctx->emit_cache_flush(sctx, &sctx->gfx_cs);
1780
1781
util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
1782
unsigned desc_slot = (*tex_handle)->desc_slot;
1783
1784
if (!(*tex_handle)->desc_dirty)
1785
continue;
1786
1787
si_upload_bindless_descriptor(sctx, desc_slot, 16);
1788
(*tex_handle)->desc_dirty = false;
1789
}
1790
1791
util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {
1792
unsigned desc_slot = (*img_handle)->desc_slot;
1793
1794
if (!(*img_handle)->desc_dirty)
1795
continue;
1796
1797
si_upload_bindless_descriptor(sctx, desc_slot, 8);
1798
(*img_handle)->desc_dirty = false;
1799
}
1800
1801
/* Invalidate scalar L0 because the cache doesn't know that L2 changed. */
1802
sctx->flags |= SI_CONTEXT_INV_SCACHE;
1803
sctx->bindless_descriptors_dirty = false;
1804
}
1805
1806
/* Update mutable image descriptor fields of all resident textures. */
1807
static void si_update_bindless_texture_descriptor(struct si_context *sctx,
1808
struct si_texture_handle *tex_handle)
1809
{
1810
struct si_sampler_view *sview = (struct si_sampler_view *)tex_handle->view;
1811
struct si_descriptors *desc = &sctx->bindless_descriptors;
1812
unsigned desc_slot_offset = tex_handle->desc_slot * 16;
1813
uint32_t desc_list[16];
1814
1815
if (sview->base.texture->target == PIPE_BUFFER)
1816
return;
1817
1818
memcpy(desc_list, desc->list + desc_slot_offset, sizeof(desc_list));
1819
si_set_sampler_view_desc(sctx, sview, &tex_handle->sstate, desc->list + desc_slot_offset);
1820
1821
if (memcmp(desc_list, desc->list + desc_slot_offset, sizeof(desc_list))) {
1822
tex_handle->desc_dirty = true;
1823
sctx->bindless_descriptors_dirty = true;
1824
}
1825
}
1826
1827
static void si_update_bindless_image_descriptor(struct si_context *sctx,
1828
struct si_image_handle *img_handle)
1829
{
1830
struct si_descriptors *desc = &sctx->bindless_descriptors;
1831
unsigned desc_slot_offset = img_handle->desc_slot * 16;
1832
struct pipe_image_view *view = &img_handle->view;
1833
struct pipe_resource *res = view->resource;
1834
uint32_t image_desc[16];
1835
unsigned desc_size = (res->nr_samples >= 2 ? 16 : 8) * 4;
1836
1837
if (res->target == PIPE_BUFFER)
1838
return;
1839
1840
memcpy(image_desc, desc->list + desc_slot_offset, desc_size);
1841
si_set_shader_image_desc(sctx, view, true, desc->list + desc_slot_offset,
1842
desc->list + desc_slot_offset + 8);
1843
1844
if (memcmp(image_desc, desc->list + desc_slot_offset, desc_size)) {
1845
img_handle->desc_dirty = true;
1846
sctx->bindless_descriptors_dirty = true;
1847
}
1848
}
1849
1850
static void si_update_all_resident_texture_descriptors(struct si_context *sctx)
1851
{
1852
util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
1853
si_update_bindless_texture_descriptor(sctx, *tex_handle);
1854
}
1855
1856
util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {
1857
si_update_bindless_image_descriptor(sctx, *img_handle);
1858
}
1859
1860
si_upload_bindless_descriptors(sctx);
1861
}
1862
1863
/* Update mutable image descriptor fields of all bound textures. */
1864
void si_update_all_texture_descriptors(struct si_context *sctx)
1865
{
1866
unsigned shader;
1867
1868
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
1869
struct si_samplers *samplers = &sctx->samplers[shader];
1870
struct si_images *images = &sctx->images[shader];
1871
unsigned mask;
1872
1873
/* Images. */
1874
mask = images->enabled_mask;
1875
while (mask) {
1876
unsigned i = u_bit_scan(&mask);
1877
struct pipe_image_view *view = &images->views[i];
1878
1879
if (!view->resource || view->resource->target == PIPE_BUFFER)
1880
continue;
1881
1882
si_set_shader_image(sctx, shader, i, view, true);
1883
}
1884
1885
/* Sampler views. */
1886
mask = samplers->enabled_mask;
1887
while (mask) {
1888
unsigned i = u_bit_scan(&mask);
1889
struct pipe_sampler_view *view = samplers->views[i];
1890
1891
if (!view || !view->texture || view->texture->target == PIPE_BUFFER)
1892
continue;
1893
1894
si_set_sampler_views(sctx, shader, i, 1, 0, &samplers->views[i], true);
1895
}
1896
1897
si_update_shader_needs_decompress_mask(sctx, shader);
1898
}
1899
1900
si_update_all_resident_texture_descriptors(sctx);
1901
si_update_ps_colorbuf0_slot(sctx);
1902
}
1903
1904
/* SHADER USER DATA */
1905
1906
static void si_mark_shader_pointers_dirty(struct si_context *sctx, unsigned shader)
1907
{
1908
sctx->shader_pointers_dirty |=
1909
u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS, SI_NUM_SHADER_DESCS);
1910
1911
if (shader == PIPE_SHADER_VERTEX) {
1912
sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL &&
1913
sctx->num_vertex_elements >
1914
sctx->screen->num_vbos_in_user_sgprs;
1915
sctx->vertex_buffer_user_sgprs_dirty =
1916
sctx->num_vertex_elements > 0 && sctx->screen->num_vbos_in_user_sgprs;
1917
}
1918
1919
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
1920
}
1921
1922
void si_shader_pointers_mark_dirty(struct si_context *sctx)
1923
{
1924
sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
1925
sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL &&
1926
sctx->num_vertex_elements >
1927
sctx->screen->num_vbos_in_user_sgprs;
1928
sctx->vertex_buffer_user_sgprs_dirty =
1929
sctx->num_vertex_elements > 0 && sctx->screen->num_vbos_in_user_sgprs;
1930
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
1931
sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
1932
sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;
1933
sctx->compute_shaderbuf_sgprs_dirty = true;
1934
sctx->compute_image_sgprs_dirty = true;
1935
}
1936
1937
/* Set a base register address for user data constants in the given shader.
1938
* This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.
1939
*/
1940
static void si_set_user_data_base(struct si_context *sctx, unsigned shader, uint32_t new_base)
1941
{
1942
uint32_t *base = &sctx->shader_pointers.sh_base[shader];
1943
1944
if (*base != new_base) {
1945
*base = new_base;
1946
1947
if (new_base)
1948
si_mark_shader_pointers_dirty(sctx, shader);
1949
1950
/* Any change in enabled shader stages requires re-emitting
1951
* the VS state SGPR, because it contains the clamp_vertex_color
1952
* state, which can be done in VS, TES, and GS.
1953
*/
1954
sctx->last_vs_state = ~0;
1955
}
1956
}
1957
1958
/* This must be called when these are changed between enabled and disabled
1959
* - geometry shader
1960
* - tessellation evaluation shader
1961
* - NGG
1962
*/
1963
void si_shader_change_notify(struct si_context *sctx)
1964
{
1965
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
1966
si_get_user_data_base(sctx->chip_class,
1967
sctx->shader.tes.cso ? TESS_ON : TESS_OFF,
1968
sctx->shader.gs.cso ? GS_ON : GS_OFF,
1969
sctx->ngg ? NGG_ON : NGG_OFF,
1970
PIPE_SHADER_VERTEX));
1971
1972
si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,
1973
si_get_user_data_base(sctx->chip_class,
1974
sctx->shader.tes.cso ? TESS_ON : TESS_OFF,
1975
sctx->shader.gs.cso ? GS_ON : GS_OFF,
1976
sctx->ngg ? NGG_ON : NGG_OFF,
1977
PIPE_SHADER_TESS_EVAL));
1978
}
1979
1980
#define si_emit_consecutive_shader_pointers(sctx, pointer_mask, sh_base) do { \
1981
unsigned sh_reg_base = (sh_base); \
1982
if (sh_reg_base) { \
1983
unsigned mask = sctx->shader_pointers_dirty & (pointer_mask); \
1984
\
1985
while (mask) { \
1986
int start, count; \
1987
u_bit_scan_consecutive_range(&mask, &start, &count); \
1988
\
1989
struct si_descriptors *descs = &sctx->descriptors[start]; \
1990
unsigned sh_offset = sh_reg_base + descs->shader_userdata_offset; \
1991
\
1992
radeon_set_sh_reg_seq(&sctx->gfx_cs, sh_offset, count); \
1993
for (int i = 0; i < count; i++) \
1994
radeon_emit_32bit_pointer(sctx->screen, cs, descs[i].gpu_address); \
1995
} \
1996
} \
1997
} while (0)
1998
1999
static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs)
2000
{
2001
radeon_begin(&sctx->gfx_cs);
2002
2003
if (sctx->chip_class >= GFX10) {
2004
radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2005
/* HW VS stage only used in non-NGG mode. */
2006
radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
2007
radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
2008
radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
2009
radeon_end();
2010
return;
2011
} else if (sctx->chip_class == GFX9 && sctx->shadowed_regs) {
2012
/* We can't use the COMMON registers with register shadowing. */
2013
radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2014
radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
2015
radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0);
2016
radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_LS_0);
2017
radeon_end();
2018
return;
2019
} else if (sctx->chip_class == GFX9) {
2020
/* Broadcast it to all shader stages. */
2021
radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_COMMON_0);
2022
radeon_end();
2023
return;
2024
}
2025
2026
radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2027
radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);
2028
radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0);
2029
radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);
2030
radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);
2031
radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_LS_0);
2032
radeon_end();
2033
}
2034
2035
void si_emit_graphics_shader_pointers(struct si_context *sctx)
2036
{
2037
uint32_t *sh_base = sctx->shader_pointers.sh_base;
2038
2039
if (sctx->shader_pointers_dirty & (1 << SI_DESCS_INTERNAL)) {
2040
si_emit_global_shader_pointers(sctx, &sctx->descriptors[SI_DESCS_INTERNAL]);
2041
}
2042
2043
radeon_begin(&sctx->gfx_cs);
2044
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),
2045
sh_base[PIPE_SHADER_VERTEX]);
2046
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL),
2047
sh_base[PIPE_SHADER_TESS_EVAL]);
2048
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),
2049
sh_base[PIPE_SHADER_FRAGMENT]);
2050
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),
2051
sh_base[PIPE_SHADER_TESS_CTRL]);
2052
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),
2053
sh_base[PIPE_SHADER_GEOMETRY]);
2054
radeon_end();
2055
2056
sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_INTERNAL, SI_DESCS_FIRST_COMPUTE);
2057
2058
if (sctx->graphics_bindless_pointer_dirty) {
2059
si_emit_global_shader_pointers(sctx, &sctx->bindless_descriptors);
2060
sctx->graphics_bindless_pointer_dirty = false;
2061
}
2062
}
2063
2064
void si_emit_compute_shader_pointers(struct si_context *sctx)
2065
{
2066
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
2067
struct si_shader_selector *shader = &sctx->cs_shader_state.program->sel;
2068
unsigned base = R_00B900_COMPUTE_USER_DATA_0;
2069
2070
radeon_begin(cs);
2071
si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),
2072
R_00B900_COMPUTE_USER_DATA_0);
2073
sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);
2074
2075
if (sctx->compute_bindless_pointer_dirty) {
2076
radeon_emit_one_32bit_pointer(sctx, &sctx->bindless_descriptors, base);
2077
sctx->compute_bindless_pointer_dirty = false;
2078
}
2079
2080
/* Set shader buffer descriptors in user SGPRs. */
2081
unsigned num_shaderbufs = shader->cs_num_shaderbufs_in_user_sgprs;
2082
if (num_shaderbufs && sctx->compute_shaderbuf_sgprs_dirty) {
2083
struct si_descriptors *desc = si_const_and_shader_buffer_descriptors(sctx, PIPE_SHADER_COMPUTE);
2084
2085
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
2086
shader->cs_shaderbufs_sgpr_index * 4,
2087
num_shaderbufs * 4);
2088
2089
for (unsigned i = 0; i < num_shaderbufs; i++)
2090
radeon_emit_array(cs, &desc->list[si_get_shaderbuf_slot(i) * 4], 4);
2091
2092
sctx->compute_shaderbuf_sgprs_dirty = false;
2093
}
2094
2095
/* Set image descriptors in user SGPRs. */
2096
unsigned num_images = shader->cs_num_images_in_user_sgprs;
2097
if (num_images && sctx->compute_image_sgprs_dirty) {
2098
struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, PIPE_SHADER_COMPUTE);
2099
2100
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +
2101
shader->cs_images_sgpr_index * 4,
2102
shader->cs_images_num_sgprs);
2103
2104
for (unsigned i = 0; i < num_images; i++) {
2105
unsigned desc_offset = si_get_image_slot(i) * 8;
2106
unsigned num_sgprs = 8;
2107
2108
/* Image buffers are in desc[4..7]. */
2109
if (shader->info.base.image_buffers & (1 << i)) {
2110
desc_offset += 4;
2111
num_sgprs = 4;
2112
}
2113
2114
radeon_emit_array(cs, &desc->list[desc_offset], num_sgprs);
2115
}
2116
2117
sctx->compute_image_sgprs_dirty = false;
2118
}
2119
radeon_end();
2120
}
2121
2122
/* BINDLESS */
2123
2124
static void si_init_bindless_descriptors(struct si_context *sctx, struct si_descriptors *desc,
2125
short shader_userdata_rel_index, unsigned num_elements)
2126
{
2127
ASSERTED unsigned desc_slot;
2128
2129
si_init_descriptors(desc, shader_userdata_rel_index, 16, num_elements);
2130
sctx->bindless_descriptors.num_active_slots = num_elements;
2131
2132
/* The first bindless descriptor is stored at slot 1, because 0 is not
2133
* considered to be a valid handle.
2134
*/
2135
sctx->num_bindless_descriptors = 1;
2136
2137
/* Track which bindless slots are used (or not). */
2138
util_idalloc_init(&sctx->bindless_used_slots, num_elements);
2139
2140
/* Reserve slot 0 because it's an invalid handle for bindless. */
2141
desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
2142
assert(desc_slot == 0);
2143
}
2144
2145
static void si_release_bindless_descriptors(struct si_context *sctx)
2146
{
2147
si_release_descriptors(&sctx->bindless_descriptors);
2148
util_idalloc_fini(&sctx->bindless_used_slots);
2149
}
2150
2151
static unsigned si_get_first_free_bindless_slot(struct si_context *sctx)
2152
{
2153
struct si_descriptors *desc = &sctx->bindless_descriptors;
2154
unsigned desc_slot;
2155
2156
desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);
2157
if (desc_slot >= desc->num_elements) {
2158
/* The array of bindless descriptors is full, resize it. */
2159
unsigned slot_size = desc->element_dw_size * 4;
2160
unsigned new_num_elements = desc->num_elements * 2;
2161
2162
desc->list =
2163
REALLOC(desc->list, desc->num_elements * slot_size, new_num_elements * slot_size);
2164
desc->num_elements = new_num_elements;
2165
desc->num_active_slots = new_num_elements;
2166
}
2167
2168
assert(desc_slot);
2169
return desc_slot;
2170
}
2171
2172
static unsigned si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list,
2173
unsigned size)
2174
{
2175
struct si_descriptors *desc = &sctx->bindless_descriptors;
2176
unsigned desc_slot, desc_slot_offset;
2177
2178
/* Find a free slot. */
2179
desc_slot = si_get_first_free_bindless_slot(sctx);
2180
2181
/* For simplicity, sampler and image bindless descriptors use fixed
2182
* 16-dword slots for now. Image descriptors only need 8-dword but this
2183
* doesn't really matter because no real apps use image handles.
2184
*/
2185
desc_slot_offset = desc_slot * 16;
2186
2187
/* Copy the descriptor into the array. */
2188
memcpy(desc->list + desc_slot_offset, desc_list, size);
2189
2190
/* Re-upload the whole array of bindless descriptors into a new buffer.
2191
*/
2192
if (!si_upload_descriptors(sctx, desc))
2193
return 0;
2194
2195
/* Make sure to re-emit the shader pointers for all stages. */
2196
sctx->graphics_bindless_pointer_dirty = true;
2197
sctx->compute_bindless_pointer_dirty = true;
2198
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
2199
2200
return desc_slot;
2201
}
2202
2203
static void si_update_bindless_buffer_descriptor(struct si_context *sctx, unsigned desc_slot,
2204
struct pipe_resource *resource, uint64_t offset,
2205
bool *desc_dirty)
2206
{
2207
struct si_descriptors *desc = &sctx->bindless_descriptors;
2208
struct si_resource *buf = si_resource(resource);
2209
unsigned desc_slot_offset = desc_slot * 16;
2210
uint32_t *desc_list = desc->list + desc_slot_offset + 4;
2211
uint64_t old_desc_va;
2212
2213
assert(resource->target == PIPE_BUFFER);
2214
2215
/* Retrieve the old buffer addr from the descriptor. */
2216
old_desc_va = si_desc_extract_buffer_address(desc_list);
2217
2218
if (old_desc_va != buf->gpu_address + offset) {
2219
/* The buffer has been invalidated when the handle wasn't
2220
* resident, update the descriptor and the dirty flag.
2221
*/
2222
si_set_buf_desc_address(buf, offset, &desc_list[0]);
2223
2224
*desc_dirty = true;
2225
}
2226
}
2227
2228
static uint64_t si_create_texture_handle(struct pipe_context *ctx, struct pipe_sampler_view *view,
2229
const struct pipe_sampler_state *state)
2230
{
2231
struct si_sampler_view *sview = (struct si_sampler_view *)view;
2232
struct si_context *sctx = (struct si_context *)ctx;
2233
struct si_texture_handle *tex_handle;
2234
struct si_sampler_state *sstate;
2235
uint32_t desc_list[16];
2236
uint64_t handle;
2237
2238
tex_handle = CALLOC_STRUCT(si_texture_handle);
2239
if (!tex_handle)
2240
return 0;
2241
2242
memset(desc_list, 0, sizeof(desc_list));
2243
si_init_descriptor_list(&desc_list[0], 16, 1, null_texture_descriptor);
2244
2245
sstate = ctx->create_sampler_state(ctx, state);
2246
if (!sstate) {
2247
FREE(tex_handle);
2248
return 0;
2249
}
2250
2251
si_set_sampler_view_desc(sctx, sview, sstate, &desc_list[0]);
2252
memcpy(&tex_handle->sstate, sstate, sizeof(*sstate));
2253
ctx->delete_sampler_state(ctx, sstate);
2254
2255
tex_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, sizeof(desc_list));
2256
if (!tex_handle->desc_slot) {
2257
FREE(tex_handle);
2258
return 0;
2259
}
2260
2261
handle = tex_handle->desc_slot;
2262
2263
if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)(uintptr_t)handle, tex_handle)) {
2264
FREE(tex_handle);
2265
return 0;
2266
}
2267
2268
pipe_sampler_view_reference(&tex_handle->view, view);
2269
2270
si_resource(sview->base.texture)->texture_handle_allocated = true;
2271
2272
return handle;
2273
}
2274
2275
static void si_delete_texture_handle(struct pipe_context *ctx, uint64_t handle)
2276
{
2277
struct si_context *sctx = (struct si_context *)ctx;
2278
struct si_texture_handle *tex_handle;
2279
struct hash_entry *entry;
2280
2281
entry = _mesa_hash_table_search(sctx->tex_handles, (void *)(uintptr_t)handle);
2282
if (!entry)
2283
return;
2284
2285
tex_handle = (struct si_texture_handle *)entry->data;
2286
2287
/* Allow this descriptor slot to be re-used. */
2288
util_idalloc_free(&sctx->bindless_used_slots, tex_handle->desc_slot);
2289
2290
pipe_sampler_view_reference(&tex_handle->view, NULL);
2291
_mesa_hash_table_remove(sctx->tex_handles, entry);
2292
FREE(tex_handle);
2293
}
2294
2295
static void si_make_texture_handle_resident(struct pipe_context *ctx, uint64_t handle,
2296
bool resident)
2297
{
2298
struct si_context *sctx = (struct si_context *)ctx;
2299
struct si_texture_handle *tex_handle;
2300
struct si_sampler_view *sview;
2301
struct hash_entry *entry;
2302
2303
entry = _mesa_hash_table_search(sctx->tex_handles, (void *)(uintptr_t)handle);
2304
if (!entry)
2305
return;
2306
2307
tex_handle = (struct si_texture_handle *)entry->data;
2308
sview = (struct si_sampler_view *)tex_handle->view;
2309
2310
if (resident) {
2311
if (sview->base.texture->target != PIPE_BUFFER) {
2312
struct si_texture *tex = (struct si_texture *)sview->base.texture;
2313
2314
if (depth_needs_decompression(tex)) {
2315
util_dynarray_append(&sctx->resident_tex_needs_depth_decompress,
2316
struct si_texture_handle *, tex_handle);
2317
}
2318
2319
if (color_needs_decompression(tex)) {
2320
util_dynarray_append(&sctx->resident_tex_needs_color_decompress,
2321
struct si_texture_handle *, tex_handle);
2322
}
2323
2324
if (vi_dcc_enabled(tex, sview->base.u.tex.first_level) &&
2325
p_atomic_read(&tex->framebuffers_bound))
2326
sctx->need_check_render_feedback = true;
2327
2328
si_update_bindless_texture_descriptor(sctx, tex_handle);
2329
} else {
2330
si_update_bindless_buffer_descriptor(sctx, tex_handle->desc_slot, sview->base.texture,
2331
sview->base.u.buf.offset, &tex_handle->desc_dirty);
2332
}
2333
2334
/* Re-upload the descriptor if it has been updated while it
2335
* wasn't resident.
2336
*/
2337
if (tex_handle->desc_dirty)
2338
sctx->bindless_descriptors_dirty = true;
2339
2340
/* Add the texture handle to the per-context list. */
2341
util_dynarray_append(&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle);
2342
2343
/* Add the buffers to the current CS in case si_begin_new_cs()
2344
* is not going to be called.
2345
*/
2346
si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ,
2347
sview->is_stencil_sampler, false);
2348
} else {
2349
/* Remove the texture handle from the per-context list. */
2350
util_dynarray_delete_unordered(&sctx->resident_tex_handles, struct si_texture_handle *,
2351
tex_handle);
2352
2353
if (sview->base.texture->target != PIPE_BUFFER) {
2354
util_dynarray_delete_unordered(&sctx->resident_tex_needs_depth_decompress,
2355
struct si_texture_handle *, tex_handle);
2356
2357
util_dynarray_delete_unordered(&sctx->resident_tex_needs_color_decompress,
2358
struct si_texture_handle *, tex_handle);
2359
}
2360
}
2361
}
2362
2363
static uint64_t si_create_image_handle(struct pipe_context *ctx, const struct pipe_image_view *view)
2364
{
2365
struct si_context *sctx = (struct si_context *)ctx;
2366
struct si_image_handle *img_handle;
2367
uint32_t desc_list[16];
2368
uint64_t handle;
2369
2370
if (!view || !view->resource)
2371
return 0;
2372
2373
img_handle = CALLOC_STRUCT(si_image_handle);
2374
if (!img_handle)
2375
return 0;
2376
2377
memset(desc_list, 0, sizeof(desc_list));
2378
si_init_descriptor_list(&desc_list[0], 8, 2, null_image_descriptor);
2379
2380
si_set_shader_image_desc(sctx, view, false, &desc_list[0], &desc_list[8]);
2381
2382
img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, sizeof(desc_list));
2383
if (!img_handle->desc_slot) {
2384
FREE(img_handle);
2385
return 0;
2386
}
2387
2388
handle = img_handle->desc_slot;
2389
2390
if (!_mesa_hash_table_insert(sctx->img_handles, (void *)(uintptr_t)handle, img_handle)) {
2391
FREE(img_handle);
2392
return 0;
2393
}
2394
2395
util_copy_image_view(&img_handle->view, view);
2396
2397
si_resource(view->resource)->image_handle_allocated = true;
2398
2399
return handle;
2400
}
2401
2402
static void si_delete_image_handle(struct pipe_context *ctx, uint64_t handle)
2403
{
2404
struct si_context *sctx = (struct si_context *)ctx;
2405
struct si_image_handle *img_handle;
2406
struct hash_entry *entry;
2407
2408
entry = _mesa_hash_table_search(sctx->img_handles, (void *)(uintptr_t)handle);
2409
if (!entry)
2410
return;
2411
2412
img_handle = (struct si_image_handle *)entry->data;
2413
2414
util_copy_image_view(&img_handle->view, NULL);
2415
_mesa_hash_table_remove(sctx->img_handles, entry);
2416
FREE(img_handle);
2417
}
2418
2419
static void si_make_image_handle_resident(struct pipe_context *ctx, uint64_t handle,
2420
unsigned access, bool resident)
2421
{
2422
struct si_context *sctx = (struct si_context *)ctx;
2423
struct si_image_handle *img_handle;
2424
struct pipe_image_view *view;
2425
struct si_resource *res;
2426
struct hash_entry *entry;
2427
2428
entry = _mesa_hash_table_search(sctx->img_handles, (void *)(uintptr_t)handle);
2429
if (!entry)
2430
return;
2431
2432
img_handle = (struct si_image_handle *)entry->data;
2433
view = &img_handle->view;
2434
res = si_resource(view->resource);
2435
2436
if (resident) {
2437
if (res->b.b.target != PIPE_BUFFER) {
2438
struct si_texture *tex = (struct si_texture *)res;
2439
unsigned level = view->u.tex.level;
2440
2441
if (color_needs_decompression(tex)) {
2442
util_dynarray_append(&sctx->resident_img_needs_color_decompress,
2443
struct si_image_handle *, img_handle);
2444
}
2445
2446
if (vi_dcc_enabled(tex, level) && p_atomic_read(&tex->framebuffers_bound))
2447
sctx->need_check_render_feedback = true;
2448
2449
si_update_bindless_image_descriptor(sctx, img_handle);
2450
} else {
2451
si_update_bindless_buffer_descriptor(sctx, img_handle->desc_slot, view->resource,
2452
view->u.buf.offset, &img_handle->desc_dirty);
2453
}
2454
2455
/* Re-upload the descriptor if it has been updated while it
2456
* wasn't resident.
2457
*/
2458
if (img_handle->desc_dirty)
2459
sctx->bindless_descriptors_dirty = true;
2460
2461
/* Add the image handle to the per-context list. */
2462
util_dynarray_append(&sctx->resident_img_handles, struct si_image_handle *, img_handle);
2463
2464
/* Add the buffers to the current CS in case si_begin_new_cs()
2465
* is not going to be called.
2466
*/
2467
si_sampler_view_add_buffer(
2468
sctx, view->resource,
2469
(access & PIPE_IMAGE_ACCESS_WRITE) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, false,
2470
false);
2471
} else {
2472
/* Remove the image handle from the per-context list. */
2473
util_dynarray_delete_unordered(&sctx->resident_img_handles, struct si_image_handle *,
2474
img_handle);
2475
2476
if (res->b.b.target != PIPE_BUFFER) {
2477
util_dynarray_delete_unordered(&sctx->resident_img_needs_color_decompress,
2478
struct si_image_handle *, img_handle);
2479
}
2480
}
2481
}
2482
2483
static void si_resident_buffers_add_all_to_bo_list(struct si_context *sctx)
2484
{
2485
unsigned num_resident_tex_handles, num_resident_img_handles;
2486
2487
num_resident_tex_handles = sctx->resident_tex_handles.size / sizeof(struct si_texture_handle *);
2488
num_resident_img_handles = sctx->resident_img_handles.size / sizeof(struct si_image_handle *);
2489
2490
/* Add all resident texture handles. */
2491
util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {
2492
struct si_sampler_view *sview = (struct si_sampler_view *)(*tex_handle)->view;
2493
2494
si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ,
2495
sview->is_stencil_sampler, false);
2496
}
2497
2498
/* Add all resident image handles. */
2499
util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {
2500
struct pipe_image_view *view = &(*img_handle)->view;
2501
2502
si_sampler_view_add_buffer(sctx, view->resource, RADEON_USAGE_READWRITE, false, false);
2503
}
2504
2505
sctx->num_resident_handles += num_resident_tex_handles + num_resident_img_handles;
2506
assert(sctx->bo_list_add_all_resident_resources);
2507
sctx->bo_list_add_all_resident_resources = false;
2508
}
2509
2510
/* INIT/DEINIT/UPLOAD */
2511
2512
void si_init_all_descriptors(struct si_context *sctx)
2513
{
2514
int i;
2515
unsigned first_shader = sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;
2516
2517
for (i = first_shader; i < SI_NUM_SHADERS; i++) {
2518
bool is_2nd =
2519
sctx->chip_class >= GFX9 && (i == PIPE_SHADER_TESS_CTRL || i == PIPE_SHADER_GEOMETRY);
2520
unsigned num_sampler_slots = SI_NUM_IMAGE_SLOTS / 2 + SI_NUM_SAMPLERS;
2521
unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;
2522
int rel_dw_offset;
2523
struct si_descriptors *desc;
2524
2525
if (is_2nd) {
2526
if (i == PIPE_SHADER_TESS_CTRL) {
2527
rel_dw_offset =
2528
(R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS - R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;
2529
} else if (sctx->chip_class >= GFX10) { /* PIPE_SHADER_GEOMETRY */
2530
rel_dw_offset =
2531
(R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS - R_00B230_SPI_SHADER_USER_DATA_GS_0) / 4;
2532
} else {
2533
rel_dw_offset =
2534
(R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS - R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
2535
}
2536
} else {
2537
rel_dw_offset = SI_SGPR_CONST_AND_SHADER_BUFFERS;
2538
}
2539
desc = si_const_and_shader_buffer_descriptors(sctx, i);
2540
si_init_buffer_resources(sctx, &sctx->const_and_shader_buffers[i], desc, num_buffer_slots,
2541
rel_dw_offset, RADEON_PRIO_SHADER_RW_BUFFER,
2542
RADEON_PRIO_CONST_BUFFER);
2543
desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);
2544
2545
if (is_2nd) {
2546
if (i == PIPE_SHADER_TESS_CTRL) {
2547
rel_dw_offset =
2548
(R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS - R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;
2549
} else if (sctx->chip_class >= GFX10) { /* PIPE_SHADER_GEOMETRY */
2550
rel_dw_offset =
2551
(R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS - R_00B230_SPI_SHADER_USER_DATA_GS_0) / 4;
2552
} else {
2553
rel_dw_offset =
2554
(R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS - R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;
2555
}
2556
} else {
2557
rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES;
2558
}
2559
2560
desc = si_sampler_and_image_descriptors(sctx, i);
2561
si_init_descriptors(desc, rel_dw_offset, 16, num_sampler_slots);
2562
2563
int j;
2564
for (j = 0; j < SI_NUM_IMAGE_SLOTS; j++)
2565
memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4);
2566
for (; j < SI_NUM_IMAGE_SLOTS + SI_NUM_SAMPLERS * 2; j++)
2567
memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);
2568
}
2569
2570
si_init_buffer_resources(sctx, &sctx->internal_bindings, &sctx->descriptors[SI_DESCS_INTERNAL],
2571
SI_NUM_INTERNAL_BINDINGS, SI_SGPR_INTERNAL_BINDINGS,
2572
/* The second priority is used by
2573
* const buffers in RW buffer slots. */
2574
RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER);
2575
sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots = SI_NUM_INTERNAL_BINDINGS;
2576
2577
/* Initialize an array of 1024 bindless descriptors, when the limit is
2578
* reached, just make it larger and re-upload the whole array.
2579
*/
2580
si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,
2581
SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, 1024);
2582
2583
sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
2584
2585
/* Set pipe_context functions. */
2586
sctx->b.bind_sampler_states = si_bind_sampler_states;
2587
sctx->b.set_shader_images = si_set_shader_images;
2588
sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;
2589
sctx->b.set_inlinable_constants = si_set_inlinable_constants;
2590
sctx->b.set_shader_buffers = si_set_shader_buffers;
2591
sctx->b.set_sampler_views = si_pipe_set_sampler_views;
2592
sctx->b.create_texture_handle = si_create_texture_handle;
2593
sctx->b.delete_texture_handle = si_delete_texture_handle;
2594
sctx->b.make_texture_handle_resident = si_make_texture_handle_resident;
2595
sctx->b.create_image_handle = si_create_image_handle;
2596
sctx->b.delete_image_handle = si_delete_image_handle;
2597
sctx->b.make_image_handle_resident = si_make_image_handle_resident;
2598
2599
if (!sctx->has_graphics)
2600
return;
2601
2602
sctx->b.set_polygon_stipple = si_set_polygon_stipple;
2603
2604
/* Shader user data. */
2605
sctx->atoms.s.shader_pointers.emit = si_emit_graphics_shader_pointers;
2606
2607
/* Set default and immutable mappings. */
2608
si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,
2609
si_get_user_data_base(sctx->chip_class, TESS_OFF, GS_OFF,
2610
sctx->ngg, PIPE_SHADER_VERTEX));
2611
si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,
2612
si_get_user_data_base(sctx->chip_class, TESS_OFF, GS_OFF,
2613
NGG_OFF, PIPE_SHADER_TESS_CTRL));
2614
si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,
2615
si_get_user_data_base(sctx->chip_class, TESS_OFF, GS_OFF,
2616
NGG_OFF, PIPE_SHADER_GEOMETRY));
2617
si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);
2618
}
2619
2620
static bool si_upload_shader_descriptors(struct si_context *sctx, unsigned mask)
2621
{
2622
unsigned dirty = sctx->descriptors_dirty & mask;
2623
2624
if (dirty) {
2625
unsigned iter_mask = dirty;
2626
2627
do {
2628
if (!si_upload_descriptors(sctx, &sctx->descriptors[u_bit_scan(&iter_mask)]))
2629
return false;
2630
} while (iter_mask);
2631
2632
sctx->descriptors_dirty &= ~dirty;
2633
sctx->shader_pointers_dirty |= dirty;
2634
si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
2635
}
2636
2637
si_upload_bindless_descriptors(sctx);
2638
return true;
2639
}
2640
2641
bool si_upload_graphics_shader_descriptors(struct si_context *sctx)
2642
{
2643
const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);
2644
return si_upload_shader_descriptors(sctx, mask);
2645
}
2646
2647
bool si_upload_compute_shader_descriptors(struct si_context *sctx)
2648
{
2649
/* This does not update internal bindings as that is not needed for compute shaders
2650
* and the input buffer is using the same SGPR's anyway.
2651
*/
2652
const unsigned mask =
2653
u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);
2654
return si_upload_shader_descriptors(sctx, mask);
2655
}
2656
2657
void si_release_all_descriptors(struct si_context *sctx)
2658
{
2659
int i;
2660
2661
for (i = 0; i < SI_NUM_SHADERS; i++) {
2662
si_release_buffer_resources(&sctx->const_and_shader_buffers[i],
2663
si_const_and_shader_buffer_descriptors(sctx, i));
2664
si_release_sampler_views(&sctx->samplers[i]);
2665
si_release_image_views(&sctx->images[i]);
2666
}
2667
si_release_buffer_resources(&sctx->internal_bindings, &sctx->descriptors[SI_DESCS_INTERNAL]);
2668
for (i = 0; i < SI_NUM_VERTEX_BUFFERS; i++)
2669
pipe_vertex_buffer_unreference(&sctx->vertex_buffer[i]);
2670
2671
for (i = 0; i < SI_NUM_DESCS; ++i)
2672
si_release_descriptors(&sctx->descriptors[i]);
2673
2674
si_resource_reference(&sctx->vb_descriptors_buffer, NULL);
2675
sctx->vb_descriptors_gpu_list = NULL; /* points into a mapped buffer */
2676
2677
si_release_bindless_descriptors(sctx);
2678
}
2679
2680
bool si_gfx_resources_check_encrypted(struct si_context *sctx)
2681
{
2682
bool use_encrypted_bo = false;
2683
2684
for (unsigned i = 0; i < SI_NUM_GRAPHICS_SHADERS && !use_encrypted_bo; i++) {
2685
struct si_shader_ctx_state *current_shader = &sctx->shaders[i];
2686
if (!current_shader->cso)
2687
continue;
2688
2689
use_encrypted_bo |=
2690
si_buffer_resources_check_encrypted(sctx, &sctx->const_and_shader_buffers[i]);
2691
use_encrypted_bo |=
2692
si_sampler_views_check_encrypted(sctx, &sctx->samplers[i],
2693
current_shader->cso->info.base.textures_used[0]);
2694
use_encrypted_bo |= si_image_views_check_encrypted(sctx, &sctx->images[i],
2695
u_bit_consecutive(0, current_shader->cso->info.base.num_images));
2696
}
2697
use_encrypted_bo |= si_buffer_resources_check_encrypted(sctx, &sctx->internal_bindings);
2698
2699
struct si_state_blend *blend = sctx->queued.named.blend;
2700
for (int i = 0; i < sctx->framebuffer.state.nr_cbufs && !use_encrypted_bo; i++) {
2701
struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i];
2702
if (surf && surf->texture) {
2703
struct si_texture *tex = (struct si_texture *)surf->texture;
2704
if (!(tex->buffer.flags & RADEON_FLAG_ENCRYPTED))
2705
continue;
2706
2707
/* Are we reading from this framebuffer */
2708
if (((blend->blend_enable_4bit >> (4 * i)) & 0xf) ||
2709
vi_dcc_enabled(tex, 0)) {
2710
use_encrypted_bo = true;
2711
}
2712
}
2713
}
2714
2715
if (sctx->framebuffer.state.zsbuf) {
2716
struct si_texture* zs = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture;
2717
if (zs &&
2718
(zs->buffer.flags & RADEON_FLAG_ENCRYPTED)) {
2719
/* TODO: This isn't needed if depth.func is PIPE_FUNC_NEVER or PIPE_FUNC_ALWAYS */
2720
use_encrypted_bo = true;
2721
}
2722
}
2723
2724
#ifndef NDEBUG
2725
if (use_encrypted_bo) {
2726
/* Verify that color buffers are encrypted */
2727
for (int i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {
2728
struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i];
2729
if (!surf)
2730
continue;
2731
struct si_texture *tex = (struct si_texture *)surf->texture;
2732
assert(!surf->texture || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED));
2733
}
2734
/* Verify that depth/stencil buffer is encrypted */
2735
if (sctx->framebuffer.state.zsbuf) {
2736
struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;
2737
struct si_texture *tex = (struct si_texture *)surf->texture;
2738
assert(!surf->texture || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED));
2739
}
2740
}
2741
#endif
2742
2743
return use_encrypted_bo;
2744
}
2745
2746
void si_gfx_resources_add_all_to_bo_list(struct si_context *sctx)
2747
{
2748
for (unsigned i = 0; i < SI_NUM_GRAPHICS_SHADERS; i++) {
2749
si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]);
2750
si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i]);
2751
si_image_views_begin_new_cs(sctx, &sctx->images[i]);
2752
}
2753
si_buffer_resources_begin_new_cs(sctx, &sctx->internal_bindings);
2754
si_vertex_buffers_begin_new_cs(sctx);
2755
2756
if (sctx->bo_list_add_all_resident_resources)
2757
si_resident_buffers_add_all_to_bo_list(sctx);
2758
2759
assert(sctx->bo_list_add_all_gfx_resources);
2760
sctx->bo_list_add_all_gfx_resources = false;
2761
}
2762
2763
bool si_compute_resources_check_encrypted(struct si_context *sctx)
2764
{
2765
unsigned sh = PIPE_SHADER_COMPUTE;
2766
2767
struct si_shader_info* info = &sctx->cs_shader_state.program->sel.info;
2768
2769
/* TODO: we should assert that either use_encrypted_bo is false,
2770
* or all writable buffers are encrypted.
2771
*/
2772
return si_buffer_resources_check_encrypted(sctx, &sctx->const_and_shader_buffers[sh]) ||
2773
si_sampler_views_check_encrypted(sctx, &sctx->samplers[sh], info->base.textures_used[0]) ||
2774
si_image_views_check_encrypted(sctx, &sctx->images[sh], u_bit_consecutive(0, info->base.num_images)) ||
2775
si_buffer_resources_check_encrypted(sctx, &sctx->internal_bindings);
2776
}
2777
2778
void si_compute_resources_add_all_to_bo_list(struct si_context *sctx)
2779
{
2780
unsigned sh = PIPE_SHADER_COMPUTE;
2781
2782
si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[sh]);
2783
si_sampler_views_begin_new_cs(sctx, &sctx->samplers[sh]);
2784
si_image_views_begin_new_cs(sctx, &sctx->images[sh]);
2785
si_buffer_resources_begin_new_cs(sctx, &sctx->internal_bindings);
2786
2787
if (sctx->bo_list_add_all_resident_resources)
2788
si_resident_buffers_add_all_to_bo_list(sctx);
2789
2790
assert(sctx->bo_list_add_all_compute_resources);
2791
sctx->bo_list_add_all_compute_resources = false;
2792
}
2793
2794
void si_add_all_descriptors_to_bo_list(struct si_context *sctx)
2795
{
2796
for (unsigned i = 0; i < SI_NUM_DESCS; ++i)
2797
si_add_descriptors_to_bo_list(sctx, &sctx->descriptors[i]);
2798
si_add_descriptors_to_bo_list(sctx, &sctx->bindless_descriptors);
2799
2800
sctx->bo_list_add_all_resident_resources = true;
2801
sctx->bo_list_add_all_gfx_resources = true;
2802
sctx->bo_list_add_all_compute_resources = true;
2803
}
2804
2805
void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx, uint64_t new_active_mask)
2806
{
2807
struct si_descriptors *desc = &sctx->descriptors[desc_idx];
2808
2809
/* Ignore no-op updates and updates that disable all slots. */
2810
if (!new_active_mask ||
2811
new_active_mask == u_bit_consecutive64(desc->first_active_slot, desc->num_active_slots))
2812
return;
2813
2814
int first, count;
2815
u_bit_scan_consecutive_range64(&new_active_mask, &first, &count);
2816
assert(new_active_mask == 0);
2817
2818
/* Upload/dump descriptors if slots are being enabled. */
2819
if (first < desc->first_active_slot ||
2820
first + count > desc->first_active_slot + desc->num_active_slots)
2821
sctx->descriptors_dirty |= 1u << desc_idx;
2822
2823
desc->first_active_slot = first;
2824
desc->num_active_slots = count;
2825
}
2826
2827
void si_set_active_descriptors_for_shader(struct si_context *sctx, struct si_shader_selector *sel)
2828
{
2829
if (!sel)
2830
return;
2831
2832
si_set_active_descriptors(sctx, sel->const_and_shader_buf_descriptors_index,
2833
sel->active_const_and_shader_buffers);
2834
si_set_active_descriptors(sctx, sel->sampler_and_images_descriptors_index,
2835
sel->active_samplers_and_images);
2836
}
2837
2838