Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_descriptors.c
4570 views
/*1* Copyright 2013 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* on the rights to use, copy, modify, merge, publish, distribute, sub8* license, and/or sell copies of the Software, and to permit persons to whom9* the Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL18* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,19* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR20* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE21* USE OR OTHER DEALINGS IN THE SOFTWARE.22*/2324/* Resource binding slots and sampler states (each described with 8 or25* 4 dwords) are stored in lists in memory which is accessed by shaders26* using scalar load instructions.27*28* This file is responsible for managing such lists. It keeps a copy of all29* descriptors in CPU memory and re-uploads a whole list if some slots have30* been changed.31*32* This code is also responsible for updating shader pointers to those lists.33*34* Note that CP DMA can't be used for updating the lists, because a GPU hang35* could leave the list in a mid-IB state and the next IB would get wrong36* descriptors and the whole context would be unusable at that point.37* (Note: The register shadowing can't be used due to the same reason)38*39* Also, uploading descriptors to newly allocated memory doesn't require40* a KCACHE flush.41*42*43* Possible scenarios for one 16 dword image+sampler slot:44*45* | Image | w/ FMASK | Buffer | NULL46* [ 0: 3] Image[0:3] | Image[0:3] | Null[0:3] | Null[0:3]47* [ 4: 7] Image[4:7] | Image[4:7] | Buffer[0:3] | 048* [ 8:11] Null[0:3] | Fmask[0:3] | Null[0:3] | Null[0:3]49* [12:15] Sampler[0:3] | Fmask[4:7] | Sampler[0:3] | Sampler[0:3]50*51* FMASK implies MSAA, therefore no sampler state.52* Sampler states are never unbound except when FMASK is bound.53*/5455#include "si_pipe.h"56#include "si_compute.h"57#include "si_build_pm4.h"58#include "sid.h"59#include "util/format/u_format.h"60#include "util/hash_table.h"61#include "util/u_idalloc.h"62#include "util/u_memory.h"63#include "util/u_upload_mgr.h"6465/* NULL image and buffer descriptor for textures (alpha = 1) and images66* (alpha = 0).67*68* For images, all fields must be zero except for the swizzle, which69* supports arbitrary combinations of 0s and 1s. The texture type must be70* any valid type (e.g. 1D). If the texture type isn't set, the hw hangs.71*72* For buffers, all fields must be zero. If they are not, the hw hangs.73*74* This is the only reason why the buffer descriptor must be in words [4:7].75*/76static uint32_t null_texture_descriptor[8] = {770, 0, 0, S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_1) | S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)78/* the rest must contain zeros, which is also used by the buffer79* descriptor */80};8182static uint32_t null_image_descriptor[8] = {830, 0, 0, S_008F1C_TYPE(V_008F1C_SQ_RSRC_IMG_1D)84/* the rest must contain zeros, which is also used by the buffer85* descriptor */86};8788static uint64_t si_desc_extract_buffer_address(const uint32_t *desc)89{90uint64_t va = desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);9192/* Sign-extend the 48-bit address. */93va <<= 16;94va = (int64_t)va >> 16;95return va;96}9798static void si_init_descriptor_list(uint32_t *desc_list, unsigned element_dw_size,99unsigned num_elements, const uint32_t *null_descriptor)100{101int i;102103/* Initialize the array to NULL descriptors if the element size is 8. */104if (null_descriptor) {105assert(element_dw_size % 8 == 0);106for (i = 0; i < num_elements * element_dw_size / 8; i++)107memcpy(desc_list + i * 8, null_descriptor, 8 * 4);108}109}110111static void si_init_descriptors(struct si_descriptors *desc, short shader_userdata_rel_index,112unsigned element_dw_size, unsigned num_elements)113{114desc->list = CALLOC(num_elements, element_dw_size * 4);115desc->element_dw_size = element_dw_size;116desc->num_elements = num_elements;117desc->shader_userdata_offset = shader_userdata_rel_index * 4;118desc->slot_index_to_bind_directly = -1;119}120121static void si_release_descriptors(struct si_descriptors *desc)122{123si_resource_reference(&desc->buffer, NULL);124FREE(desc->list);125}126127static bool si_upload_descriptors(struct si_context *sctx, struct si_descriptors *desc)128{129unsigned slot_size = desc->element_dw_size * 4;130unsigned first_slot_offset = desc->first_active_slot * slot_size;131unsigned upload_size = desc->num_active_slots * slot_size;132133/* Skip the upload if no shader is using the descriptors. dirty_mask134* will stay dirty and the descriptors will be uploaded when there is135* a shader using them.136*/137if (!upload_size)138return true;139140/* If there is just one active descriptor, bind it directly. */141if ((int)desc->first_active_slot == desc->slot_index_to_bind_directly &&142desc->num_active_slots == 1) {143uint32_t *descriptor = &desc->list[desc->slot_index_to_bind_directly * desc->element_dw_size];144145/* The buffer is already in the buffer list. */146si_resource_reference(&desc->buffer, NULL);147desc->gpu_list = NULL;148desc->gpu_address = si_desc_extract_buffer_address(descriptor);149return true;150}151152uint32_t *ptr;153unsigned buffer_offset;154u_upload_alloc(sctx->b.const_uploader, first_slot_offset, upload_size,155si_optimal_tcc_alignment(sctx, upload_size), &buffer_offset,156(struct pipe_resource **)&desc->buffer, (void **)&ptr);157if (!desc->buffer) {158desc->gpu_address = 0;159return false; /* skip the draw call */160}161162util_memcpy_cpu_to_le32(ptr, (char *)desc->list + first_slot_offset, upload_size);163desc->gpu_list = ptr - first_slot_offset / 4;164165radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, desc->buffer, RADEON_USAGE_READ,166RADEON_PRIO_DESCRIPTORS);167168/* The shader pointer should point to slot 0. */169buffer_offset -= first_slot_offset;170desc->gpu_address = desc->buffer->gpu_address + buffer_offset;171172assert(desc->buffer->flags & RADEON_FLAG_32BIT);173assert((desc->buffer->gpu_address >> 32) == sctx->screen->info.address32_hi);174assert((desc->gpu_address >> 32) == sctx->screen->info.address32_hi);175return true;176}177178static void179si_add_descriptors_to_bo_list(struct si_context *sctx, struct si_descriptors *desc)180{181if (!desc->buffer)182return;183184radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, desc->buffer, RADEON_USAGE_READ,185RADEON_PRIO_DESCRIPTORS);186}187188/* SAMPLER VIEWS */189190static inline enum radeon_bo_priority si_get_sampler_view_priority(struct si_resource *res)191{192if (res->b.b.target == PIPE_BUFFER)193return RADEON_PRIO_SAMPLER_BUFFER;194195if (res->b.b.nr_samples > 1)196return RADEON_PRIO_SAMPLER_TEXTURE_MSAA;197198return RADEON_PRIO_SAMPLER_TEXTURE;199}200201static struct si_descriptors *si_sampler_and_image_descriptors(struct si_context *sctx,202unsigned shader)203{204return &sctx->descriptors[si_sampler_and_image_descriptors_idx(shader)];205}206207static void si_release_sampler_views(struct si_samplers *samplers)208{209int i;210211for (i = 0; i < ARRAY_SIZE(samplers->views); i++) {212pipe_sampler_view_reference(&samplers->views[i], NULL);213}214}215216static void si_sampler_view_add_buffer(struct si_context *sctx, struct pipe_resource *resource,217enum radeon_bo_usage usage, bool is_stencil_sampler,218bool check_mem)219{220struct si_texture *tex = (struct si_texture *)resource;221enum radeon_bo_priority priority;222223if (!resource)224return;225226/* Use the flushed depth texture if direct sampling is unsupported. */227if (resource->target != PIPE_BUFFER && tex->is_depth &&228!si_can_sample_zs(tex, is_stencil_sampler))229tex = tex->flushed_depth_texture;230231priority = si_get_sampler_view_priority(&tex->buffer);232radeon_add_to_gfx_buffer_list_check_mem(sctx, &tex->buffer, usage, priority, check_mem);233}234235static void si_sampler_views_begin_new_cs(struct si_context *sctx, struct si_samplers *samplers)236{237unsigned mask = samplers->enabled_mask;238239/* Add buffers to the CS. */240while (mask) {241int i = u_bit_scan(&mask);242struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i];243244si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ,245sview->is_stencil_sampler, false);246}247}248249static bool si_sampler_views_check_encrypted(struct si_context *sctx, struct si_samplers *samplers,250unsigned samplers_declared)251{252unsigned mask = samplers->enabled_mask & samplers_declared;253254/* Verify if a samplers uses an encrypted resource */255while (mask) {256int i = u_bit_scan(&mask);257struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[i];258259struct si_resource *res = si_resource(sview->base.texture);260if (res->flags & RADEON_FLAG_ENCRYPTED)261return true;262}263return false;264}265266/* Set buffer descriptor fields that can be changed by reallocations. */267static void si_set_buf_desc_address(struct si_resource *buf, uint64_t offset, uint32_t *state)268{269uint64_t va = buf->gpu_address + offset;270271state[0] = va;272state[1] &= C_008F04_BASE_ADDRESS_HI;273state[1] |= S_008F04_BASE_ADDRESS_HI(va >> 32);274}275276/* Set texture descriptor fields that can be changed by reallocations.277*278* \param tex texture279* \param base_level_info information of the level of BASE_ADDRESS280* \param base_level the level of BASE_ADDRESS281* \param first_level pipe_sampler_view.u.tex.first_level282* \param block_width util_format_get_blockwidth()283* \param is_stencil select between separate Z & Stencil284* \param state descriptor to update285*/286void si_set_mutable_tex_desc_fields(struct si_screen *sscreen, struct si_texture *tex,287const struct legacy_surf_level *base_level_info,288unsigned base_level, unsigned first_level, unsigned block_width,289/* restrict decreases overhead of si_set_sampler_view_desc ~8x. */290bool is_stencil, uint16_t access, uint32_t * restrict state)291{292uint64_t va, meta_va = 0;293294if (tex->is_depth && !si_can_sample_zs(tex, is_stencil)) {295tex = tex->flushed_depth_texture;296is_stencil = false;297}298299va = tex->buffer.gpu_address;300301if (sscreen->info.chip_class >= GFX9) {302/* Only stencil_offset needs to be added here. */303if (is_stencil)304va += tex->surface.u.gfx9.zs.stencil_offset;305else306va += tex->surface.u.gfx9.surf_offset;307} else {308va += (uint64_t)base_level_info->offset_256B * 256;309}310311state[0] = va >> 8;312state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);313314/* Only macrotiled modes can set tile swizzle.315* GFX9 doesn't use (legacy) base_level_info.316*/317if (sscreen->info.chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D)318state[0] |= tex->surface.tile_swizzle;319320if (sscreen->info.chip_class >= GFX8) {321if (!(access & SI_IMAGE_ACCESS_DCC_OFF) && vi_dcc_enabled(tex, first_level)) {322meta_va = tex->buffer.gpu_address + tex->surface.meta_offset;323324if (sscreen->info.chip_class == GFX8) {325meta_va += tex->surface.u.legacy.color.dcc_level[base_level].dcc_offset;326assert(base_level_info->mode == RADEON_SURF_MODE_2D);327}328329unsigned dcc_tile_swizzle = tex->surface.tile_swizzle << 8;330dcc_tile_swizzle &= (1 << tex->surface.meta_alignment_log2) - 1;331meta_va |= dcc_tile_swizzle;332} else if (vi_tc_compat_htile_enabled(tex, first_level,333is_stencil ? PIPE_MASK_S : PIPE_MASK_Z)) {334meta_va = tex->buffer.gpu_address + tex->surface.meta_offset;335}336337if (meta_va)338state[6] |= S_008F28_COMPRESSION_EN(1);339}340341if (sscreen->info.chip_class >= GFX8 && sscreen->info.chip_class <= GFX9)342state[7] = meta_va >> 8;343344if (sscreen->info.chip_class >= GFX10) {345if (is_stencil) {346state[3] |= S_00A00C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode);347} else {348state[3] |= S_00A00C_SW_MODE(tex->surface.u.gfx9.swizzle_mode);349}350351if (meta_va) {352struct gfx9_surf_meta_flags meta = {353.rb_aligned = 1,354.pipe_aligned = 1,355};356357if (!tex->is_depth && tex->surface.meta_offset)358meta = tex->surface.u.gfx9.color.dcc;359360state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |361S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8) |362S_00A018_WRITE_COMPRESS_ENABLE((access & SI_IMAGE_ACCESS_DCC_WRITE) != 0);363}364365state[7] = meta_va >> 16;366} else if (sscreen->info.chip_class == GFX9) {367if (is_stencil) {368state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode);369state[4] |= S_008F20_PITCH(tex->surface.u.gfx9.zs.stencil_epitch);370} else {371uint16_t epitch = tex->surface.u.gfx9.epitch;372if (tex->buffer.b.b.format == PIPE_FORMAT_R8G8_R8B8_UNORM &&373block_width == 1) {374/* epitch is patched in ac_surface for sdma/vcn blocks to get375* a value expressed in elements unit.376* But here the texture is used with block_width == 1 so we377* need epitch in pixel units.378*/379epitch = (epitch + 1) / tex->surface.blk_w - 1;380}381state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.swizzle_mode);382state[4] |= S_008F20_PITCH(epitch);383}384385state[5] &=386C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED;387if (meta_va) {388struct gfx9_surf_meta_flags meta = {389.rb_aligned = 1,390.pipe_aligned = 1,391};392393if (!tex->is_depth && tex->surface.meta_offset)394meta = tex->surface.u.gfx9.color.dcc;395396state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |397S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |398S_008F24_META_RB_ALIGNED(meta.rb_aligned);399}400} else {401/* GFX6-GFX8 */402unsigned pitch = base_level_info->nblk_x * block_width;403unsigned index = si_tile_mode_index(tex, base_level, is_stencil);404405state[3] |= S_008F1C_TILING_INDEX(index);406state[4] |= S_008F20_PITCH(pitch - 1);407}408409if (tex->swap_rgb_to_bgr) {410unsigned swizzle_x = G_008F1C_DST_SEL_X(state[3]);411unsigned swizzle_z = G_008F1C_DST_SEL_Z(state[3]);412413state[3] &= C_008F1C_DST_SEL_X;414state[3] |= S_008F1C_DST_SEL_X(swizzle_z);415state[3] &= C_008F1C_DST_SEL_Z;416state[3] |= S_008F1C_DST_SEL_Z(swizzle_x);417}418}419420static void si_set_sampler_state_desc(struct si_sampler_state *sstate,421struct si_sampler_view *sview, struct si_texture *tex,422uint32_t *desc)423{424if (tex && tex->upgraded_depth && sview && !sview->is_stencil_sampler)425memcpy(desc, sstate->upgraded_depth_val, 4 * 4);426else427memcpy(desc, sstate->val, 4 * 4);428}429430static void si_set_sampler_view_desc(struct si_context *sctx, struct si_sampler_view *sview,431struct si_sampler_state *sstate,432/* restrict decreases overhead of si_set_sampler_view_desc ~8x. */433uint32_t * restrict desc)434{435struct pipe_sampler_view *view = &sview->base;436struct si_texture *tex = (struct si_texture *)view->texture;437438assert(tex); /* views with texture == NULL aren't supported */439440if (tex->buffer.b.b.target == PIPE_BUFFER) {441memcpy(desc, sview->state, 8 * 4);442memcpy(desc + 8, null_texture_descriptor, 4 * 4); /* Disable FMASK. */443si_set_buf_desc_address(&tex->buffer, sview->base.u.buf.offset, desc + 4);444return;445}446447if (unlikely(sview->dcc_incompatible)) {448if (vi_dcc_enabled(tex, view->u.tex.first_level))449if (!si_texture_disable_dcc(sctx, tex))450si_decompress_dcc(sctx, tex);451452sview->dcc_incompatible = false;453}454455bool is_separate_stencil = tex->db_compatible && sview->is_stencil_sampler;456457memcpy(desc, sview->state, 8 * 4);458si_set_mutable_tex_desc_fields(sctx->screen, tex, sview->base_level_info, sview->base_level,459sview->base.u.tex.first_level, sview->block_width,460is_separate_stencil, 0, desc);461462if (tex->surface.fmask_size) {463memcpy(desc + 8, sview->fmask_state, 8 * 4);464} else {465/* Disable FMASK and bind sampler state in [12:15]. */466memcpy(desc + 8, null_texture_descriptor, 4 * 4);467468if (sstate)469si_set_sampler_state_desc(sstate, sview, tex, desc + 12);470}471}472473static bool color_needs_decompression(struct si_texture *tex)474{475if (tex->is_depth)476return false;477478return tex->surface.fmask_size ||479(tex->dirty_level_mask && (tex->cmask_buffer || tex->surface.meta_offset));480}481482static bool depth_needs_decompression(struct si_texture *tex)483{484/* If the depth/stencil texture is TC-compatible, no decompression485* will be done. The decompression function will only flush DB caches486* to make it coherent with shaders. That's necessary because the driver487* doesn't flush DB caches in any other case.488*/489return tex->db_compatible;490}491492static void si_reset_sampler_view_slot(struct si_samplers *samplers, unsigned slot,493uint32_t * restrict desc)494{495pipe_sampler_view_reference(&samplers->views[slot], NULL);496memcpy(desc, null_texture_descriptor, 8 * 4);497/* Only clear the lower dwords of FMASK. */498memcpy(desc + 8, null_texture_descriptor, 4 * 4);499/* Re-set the sampler state if we are transitioning from FMASK. */500if (samplers->sampler_states[slot])501si_set_sampler_state_desc(samplers->sampler_states[slot], NULL, NULL, desc + 12);502}503504static void si_set_sampler_views(struct si_context *sctx, unsigned shader,505unsigned start_slot, unsigned count,506unsigned unbind_num_trailing_slots,507struct pipe_sampler_view **views,508bool disallow_early_out)509{510struct si_samplers *samplers = &sctx->samplers[shader];511struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);512uint32_t unbound_mask = 0;513514if (views) {515for (unsigned i = 0; i < count; i++) {516unsigned slot = start_slot + i;517struct si_sampler_view *sview = (struct si_sampler_view *)views[i];518unsigned desc_slot = si_get_sampler_slot(slot);519/* restrict decreases overhead of si_set_sampler_view_desc ~8x. */520uint32_t *restrict desc = descs->list + desc_slot * 16;521522if (samplers->views[slot] == &sview->base && !disallow_early_out)523continue;524525if (sview) {526struct si_texture *tex = (struct si_texture *)sview->base.texture;527528si_set_sampler_view_desc(sctx, sview, samplers->sampler_states[slot], desc);529530if (tex->buffer.b.b.target == PIPE_BUFFER) {531tex->buffer.bind_history |= PIPE_BIND_SAMPLER_VIEW;532samplers->needs_depth_decompress_mask &= ~(1u << slot);533samplers->needs_color_decompress_mask &= ~(1u << slot);534} else {535if (depth_needs_decompression(tex)) {536samplers->needs_depth_decompress_mask |= 1u << slot;537} else {538samplers->needs_depth_decompress_mask &= ~(1u << slot);539}540if (color_needs_decompression(tex)) {541samplers->needs_color_decompress_mask |= 1u << slot;542} else {543samplers->needs_color_decompress_mask &= ~(1u << slot);544}545546if (vi_dcc_enabled(tex, sview->base.u.tex.first_level) &&547p_atomic_read(&tex->framebuffers_bound))548sctx->need_check_render_feedback = true;549}550551pipe_sampler_view_reference(&samplers->views[slot], &sview->base);552samplers->enabled_mask |= 1u << slot;553554/* Since this can flush, it must be done after enabled_mask is555* updated. */556si_sampler_view_add_buffer(sctx, &tex->buffer.b.b, RADEON_USAGE_READ,557sview->is_stencil_sampler, true);558} else {559si_reset_sampler_view_slot(samplers, slot, desc);560unbound_mask |= 1u << slot;561}562}563} else {564unbind_num_trailing_slots += count;565count = 0;566}567568for (unsigned i = 0; i < unbind_num_trailing_slots; i++) {569unsigned slot = start_slot + count + i;570unsigned desc_slot = si_get_sampler_slot(slot);571uint32_t * restrict desc = descs->list + desc_slot * 16;572573if (samplers->views[slot])574si_reset_sampler_view_slot(samplers, slot, desc);575}576577unbound_mask |= BITFIELD_RANGE(start_slot + count, unbind_num_trailing_slots);578samplers->enabled_mask &= ~unbound_mask;579samplers->needs_depth_decompress_mask &= ~unbound_mask;580samplers->needs_color_decompress_mask &= ~unbound_mask;581582sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);583}584585static void si_update_shader_needs_decompress_mask(struct si_context *sctx, unsigned shader)586{587struct si_samplers *samplers = &sctx->samplers[shader];588unsigned shader_bit = 1 << shader;589590if (samplers->needs_depth_decompress_mask || samplers->needs_color_decompress_mask ||591sctx->images[shader].needs_color_decompress_mask)592sctx->shader_needs_decompress_mask |= shader_bit;593else594sctx->shader_needs_decompress_mask &= ~shader_bit;595}596597static void si_pipe_set_sampler_views(struct pipe_context *ctx, enum pipe_shader_type shader,598unsigned start, unsigned count,599unsigned unbind_num_trailing_slots,600struct pipe_sampler_view **views)601{602struct si_context *sctx = (struct si_context *)ctx;603604if ((!count && !unbind_num_trailing_slots) || shader >= SI_NUM_SHADERS)605return;606607si_set_sampler_views(sctx, shader, start, count, unbind_num_trailing_slots,608views, false);609si_update_shader_needs_decompress_mask(sctx, shader);610}611612static void si_samplers_update_needs_color_decompress_mask(struct si_samplers *samplers)613{614unsigned mask = samplers->enabled_mask;615616while (mask) {617int i = u_bit_scan(&mask);618struct pipe_resource *res = samplers->views[i]->texture;619620if (res && res->target != PIPE_BUFFER) {621struct si_texture *tex = (struct si_texture *)res;622623if (color_needs_decompression(tex)) {624samplers->needs_color_decompress_mask |= 1u << i;625} else {626samplers->needs_color_decompress_mask &= ~(1u << i);627}628}629}630}631632/* IMAGE VIEWS */633634static void si_release_image_views(struct si_images *images)635{636unsigned i;637638for (i = 0; i < SI_NUM_IMAGES; ++i) {639struct pipe_image_view *view = &images->views[i];640641pipe_resource_reference(&view->resource, NULL);642}643}644645static void si_image_views_begin_new_cs(struct si_context *sctx, struct si_images *images)646{647uint mask = images->enabled_mask;648649/* Add buffers to the CS. */650while (mask) {651int i = u_bit_scan(&mask);652struct pipe_image_view *view = &images->views[i];653654assert(view->resource);655656si_sampler_view_add_buffer(sctx, view->resource, RADEON_USAGE_READWRITE, false, false);657}658}659660static bool si_image_views_check_encrypted(struct si_context *sctx, struct si_images *images,661unsigned images_declared)662{663uint mask = images->enabled_mask & images_declared;664665while (mask) {666int i = u_bit_scan(&mask);667struct pipe_image_view *view = &images->views[i];668669assert(view->resource);670671struct si_texture *tex = (struct si_texture *)view->resource;672if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)673return true;674}675return false;676}677678static void si_disable_shader_image(struct si_context *ctx, unsigned shader, unsigned slot)679{680struct si_images *images = &ctx->images[shader];681682if (images->enabled_mask & (1u << slot)) {683struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);684unsigned desc_slot = si_get_image_slot(slot);685686pipe_resource_reference(&images->views[slot].resource, NULL);687images->needs_color_decompress_mask &= ~(1 << slot);688689memcpy(descs->list + desc_slot * 8, null_image_descriptor, 8 * 4);690images->enabled_mask &= ~(1u << slot);691images->display_dcc_store_mask &= ~(1u << slot);692ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);693}694}695696static void si_mark_image_range_valid(const struct pipe_image_view *view)697{698struct si_resource *res = si_resource(view->resource);699700if (res->b.b.target != PIPE_BUFFER)701return;702703util_range_add(&res->b.b, &res->valid_buffer_range, view->u.buf.offset,704view->u.buf.offset + view->u.buf.size);705}706707static void si_set_shader_image_desc(struct si_context *ctx, const struct pipe_image_view *view,708bool skip_decompress, uint32_t *desc, uint32_t *fmask_desc)709{710struct si_screen *screen = ctx->screen;711struct si_resource *res;712713res = si_resource(view->resource);714715if (res->b.b.target == PIPE_BUFFER) {716if (view->access & PIPE_IMAGE_ACCESS_WRITE)717si_mark_image_range_valid(view);718719si_make_buffer_descriptor(screen, res, view->format, view->u.buf.offset, view->u.buf.size,720desc);721si_set_buf_desc_address(res, view->u.buf.offset, desc + 4);722} else {723static const unsigned char swizzle[4] = {0, 1, 2, 3};724struct si_texture *tex = (struct si_texture *)res;725unsigned level = view->u.tex.level;726unsigned width, height, depth, hw_level;727bool uses_dcc = vi_dcc_enabled(tex, level);728unsigned access = view->access;729730assert(!tex->is_depth);731assert(fmask_desc || tex->surface.fmask_offset == 0);732733if (uses_dcc && !skip_decompress &&734!(access & SI_IMAGE_ACCESS_DCC_OFF) &&735((!(access & SI_IMAGE_ACCESS_DCC_WRITE) && (access & PIPE_IMAGE_ACCESS_WRITE)) ||736!vi_dcc_formats_compatible(screen, res->b.b.format, view->format))) {737/* If DCC can't be disabled, at least decompress it.738* The decompression is relatively cheap if the surface739* has been decompressed already.740*/741if (!si_texture_disable_dcc(ctx, tex))742si_decompress_dcc(ctx, tex);743}744745if (ctx->chip_class >= GFX9) {746/* Always set the base address. The swizzle modes don't747* allow setting mipmap level offsets as the base.748*/749width = res->b.b.width0;750height = res->b.b.height0;751depth = res->b.b.depth0;752hw_level = level;753} else {754/* Always force the base level to the selected level.755*756* This is required for 3D textures, where otherwise757* selecting a single slice for non-layered bindings758* fails. It doesn't hurt the other targets.759*/760width = u_minify(res->b.b.width0, level);761height = u_minify(res->b.b.height0, level);762depth = u_minify(res->b.b.depth0, level);763hw_level = 0;764}765766screen->make_texture_descriptor(767screen, tex, false, res->b.b.target, view->format, swizzle, hw_level, hw_level,768view->u.tex.first_layer, view->u.tex.last_layer, width, height, depth, desc, fmask_desc);769si_set_mutable_tex_desc_fields(screen, tex, &tex->surface.u.legacy.level[level], level, level,770util_format_get_blockwidth(view->format),771false, view->access, desc);772}773}774775static void si_set_shader_image(struct si_context *ctx, unsigned shader, unsigned slot,776const struct pipe_image_view *view, bool skip_decompress)777{778struct si_images *images = &ctx->images[shader];779struct si_descriptors *descs = si_sampler_and_image_descriptors(ctx, shader);780struct si_resource *res;781782if (!view || !view->resource) {783si_disable_shader_image(ctx, shader, slot);784return;785}786787res = si_resource(view->resource);788789si_set_shader_image_desc(ctx, view, skip_decompress, descs->list + si_get_image_slot(slot) * 8,790descs->list + si_get_image_slot(slot + SI_NUM_IMAGES) * 8);791792if (&images->views[slot] != view)793util_copy_image_view(&images->views[slot], view);794795if (res->b.b.target == PIPE_BUFFER) {796images->needs_color_decompress_mask &= ~(1 << slot);797images->display_dcc_store_mask &= ~(1u << slot);798res->bind_history |= PIPE_BIND_SHADER_IMAGE;799} else {800struct si_texture *tex = (struct si_texture *)res;801unsigned level = view->u.tex.level;802803if (color_needs_decompression(tex)) {804images->needs_color_decompress_mask |= 1 << slot;805} else {806images->needs_color_decompress_mask &= ~(1 << slot);807}808809if (tex->surface.display_dcc_offset && view->access & PIPE_IMAGE_ACCESS_WRITE)810images->display_dcc_store_mask |= 1u << slot;811else812images->display_dcc_store_mask &= ~(1u << slot);813814if (vi_dcc_enabled(tex, level) && p_atomic_read(&tex->framebuffers_bound))815ctx->need_check_render_feedback = true;816}817818images->enabled_mask |= 1u << slot;819ctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);820821/* Since this can flush, it must be done after enabled_mask is updated. */822si_sampler_view_add_buffer(823ctx, &res->b.b,824(view->access & PIPE_IMAGE_ACCESS_WRITE) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, false,825true);826}827828static void si_set_shader_images(struct pipe_context *pipe, enum pipe_shader_type shader,829unsigned start_slot, unsigned count,830unsigned unbind_num_trailing_slots,831const struct pipe_image_view *views)832{833struct si_context *ctx = (struct si_context *)pipe;834unsigned i, slot;835836assert(shader < SI_NUM_SHADERS);837838if (!count && !unbind_num_trailing_slots)839return;840841assert(start_slot + count + unbind_num_trailing_slots <= SI_NUM_IMAGES);842843if (views) {844for (i = 0, slot = start_slot; i < count; ++i, ++slot)845si_set_shader_image(ctx, shader, slot, &views[i], false);846} else {847for (i = 0, slot = start_slot; i < count; ++i, ++slot)848si_set_shader_image(ctx, shader, slot, NULL, false);849}850851for (i = 0; i < unbind_num_trailing_slots; ++i, ++slot)852si_set_shader_image(ctx, shader, slot, NULL, false);853854if (shader == PIPE_SHADER_COMPUTE &&855ctx->cs_shader_state.program &&856start_slot < ctx->cs_shader_state.program->sel.cs_num_images_in_user_sgprs)857ctx->compute_image_sgprs_dirty = true;858859si_update_shader_needs_decompress_mask(ctx, shader);860}861862static void si_images_update_needs_color_decompress_mask(struct si_images *images)863{864unsigned mask = images->enabled_mask;865866while (mask) {867int i = u_bit_scan(&mask);868struct pipe_resource *res = images->views[i].resource;869870if (res && res->target != PIPE_BUFFER) {871struct si_texture *tex = (struct si_texture *)res;872873if (color_needs_decompression(tex)) {874images->needs_color_decompress_mask |= 1 << i;875} else {876images->needs_color_decompress_mask &= ~(1 << i);877}878}879}880}881882void si_update_ps_colorbuf0_slot(struct si_context *sctx)883{884struct si_buffer_resources *buffers = &sctx->internal_bindings;885struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL];886unsigned slot = SI_PS_IMAGE_COLORBUF0;887struct pipe_surface *surf = NULL;888889/* si_texture_disable_dcc can get us here again. */890if (sctx->blitter_running)891return;892893/* See whether FBFETCH is used and color buffer 0 is set. */894if (sctx->shader.ps.cso && sctx->shader.ps.cso->info.base.fs.uses_fbfetch_output &&895sctx->framebuffer.state.nr_cbufs && sctx->framebuffer.state.cbufs[0])896surf = sctx->framebuffer.state.cbufs[0];897898/* Return if FBFETCH transitions from disabled to disabled. */899if (!buffers->buffers[slot] && !surf)900return;901902sctx->ps_uses_fbfetch = surf != NULL;903si_update_ps_iter_samples(sctx);904905if (surf) {906struct si_texture *tex = (struct si_texture *)surf->texture;907struct pipe_image_view view = {0};908909assert(tex);910assert(!tex->is_depth);911912/* Disable DCC, because the texture is used as both a sampler913* and color buffer.914*/915si_texture_disable_dcc(sctx, tex);916917if (tex->buffer.b.b.nr_samples <= 1 && tex->cmask_buffer) {918/* Disable CMASK. */919assert(tex->cmask_buffer != &tex->buffer);920si_eliminate_fast_color_clear(sctx, tex, NULL);921si_texture_discard_cmask(sctx->screen, tex);922}923924view.resource = surf->texture;925view.format = surf->format;926view.access = PIPE_IMAGE_ACCESS_READ;927view.u.tex.first_layer = surf->u.tex.first_layer;928view.u.tex.last_layer = surf->u.tex.last_layer;929view.u.tex.level = surf->u.tex.level;930931/* Set the descriptor. */932uint32_t *desc = descs->list + slot * 4;933memset(desc, 0, 16 * 4);934si_set_shader_image_desc(sctx, &view, true, desc, desc + 8);935936pipe_resource_reference(&buffers->buffers[slot], &tex->buffer.b.b);937radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READ,938RADEON_PRIO_SHADER_RW_IMAGE);939buffers->enabled_mask |= 1llu << slot;940} else {941/* Clear the descriptor. */942memset(descs->list + slot * 4, 0, 8 * 4);943pipe_resource_reference(&buffers->buffers[slot], NULL);944buffers->enabled_mask &= ~(1llu << slot);945}946947sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL;948}949950/* SAMPLER STATES */951952static void si_bind_sampler_states(struct pipe_context *ctx, enum pipe_shader_type shader,953unsigned start, unsigned count, void **states)954{955struct si_context *sctx = (struct si_context *)ctx;956struct si_samplers *samplers = &sctx->samplers[shader];957struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader);958struct si_sampler_state **sstates = (struct si_sampler_state **)states;959int i;960961if (!count || shader >= SI_NUM_SHADERS || !sstates)962return;963964for (i = 0; i < count; i++) {965unsigned slot = start + i;966unsigned desc_slot = si_get_sampler_slot(slot);967968if (!sstates[i] || sstates[i] == samplers->sampler_states[slot])969continue;970971#ifndef NDEBUG972assert(sstates[i]->magic == SI_SAMPLER_STATE_MAGIC);973#endif974samplers->sampler_states[slot] = sstates[i];975976/* If FMASK is bound, don't overwrite it.977* The sampler state will be set after FMASK is unbound.978*/979struct si_sampler_view *sview = (struct si_sampler_view *)samplers->views[slot];980981struct si_texture *tex = NULL;982983if (sview && sview->base.texture && sview->base.texture->target != PIPE_BUFFER)984tex = (struct si_texture *)sview->base.texture;985986if (tex && tex->surface.fmask_size)987continue;988989si_set_sampler_state_desc(sstates[i], sview, tex, desc->list + desc_slot * 16 + 12);990991sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);992}993}994995/* BUFFER RESOURCES */996997static void si_init_buffer_resources(struct si_context *sctx,998struct si_buffer_resources *buffers,999struct si_descriptors *descs, unsigned num_buffers,1000short shader_userdata_rel_index,1001enum radeon_bo_priority priority,1002enum radeon_bo_priority priority_constbuf)1003{1004buffers->priority = priority;1005buffers->priority_constbuf = priority_constbuf;1006buffers->buffers = CALLOC(num_buffers, sizeof(struct pipe_resource *));1007buffers->offsets = CALLOC(num_buffers, sizeof(buffers->offsets[0]));10081009si_init_descriptors(descs, shader_userdata_rel_index, 4, num_buffers);10101011/* Initialize buffer descriptors, so that we don't have to do it at bind time. */1012for (unsigned i = 0; i < num_buffers; i++) {1013uint32_t *desc = descs->list + i * 4;10141015desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |1016S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);10171018if (sctx->chip_class >= GFX10) {1019desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |1020S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);1021} else {1022desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |1023S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);1024}1025}1026}10271028static void si_release_buffer_resources(struct si_buffer_resources *buffers,1029struct si_descriptors *descs)1030{1031int i;10321033for (i = 0; i < descs->num_elements; i++) {1034pipe_resource_reference(&buffers->buffers[i], NULL);1035}10361037FREE(buffers->buffers);1038FREE(buffers->offsets);1039}10401041static void si_buffer_resources_begin_new_cs(struct si_context *sctx,1042struct si_buffer_resources *buffers)1043{1044uint64_t mask = buffers->enabled_mask;10451046/* Add buffers to the CS. */1047while (mask) {1048int i = u_bit_scan64(&mask);10491050radeon_add_to_buffer_list(1051sctx, &sctx->gfx_cs, si_resource(buffers->buffers[i]),1052buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ,1053i < SI_NUM_SHADER_BUFFERS ? buffers->priority : buffers->priority_constbuf);1054}1055}10561057static bool si_buffer_resources_check_encrypted(struct si_context *sctx,1058struct si_buffer_resources *buffers)1059{1060uint64_t mask = buffers->enabled_mask;10611062while (mask) {1063int i = u_bit_scan64(&mask);10641065if (si_resource(buffers->buffers[i])->flags & RADEON_FLAG_ENCRYPTED)1066return true;1067}10681069return false;1070}10711072static void si_get_buffer_from_descriptors(struct si_buffer_resources *buffers,1073struct si_descriptors *descs, unsigned idx,1074struct pipe_resource **buf, unsigned *offset,1075unsigned *size)1076{1077pipe_resource_reference(buf, buffers->buffers[idx]);1078if (*buf) {1079struct si_resource *res = si_resource(*buf);1080const uint32_t *desc = descs->list + idx * 4;1081uint64_t va;10821083*size = desc[2];10841085assert(G_008F04_STRIDE(desc[1]) == 0);1086va = si_desc_extract_buffer_address(desc);10871088assert(va >= res->gpu_address && va + *size <= res->gpu_address + res->bo_size);1089*offset = va - res->gpu_address;1090}1091}10921093/* VERTEX BUFFERS */10941095static void si_vertex_buffers_begin_new_cs(struct si_context *sctx)1096{1097int count = sctx->num_vertex_elements;1098int i;10991100for (i = 0; i < count; i++) {1101int vb = sctx->vertex_elements->vertex_buffer_index[i];11021103if (vb >= ARRAY_SIZE(sctx->vertex_buffer))1104continue;1105if (!sctx->vertex_buffer[vb].buffer.resource)1106continue;11071108radeon_add_to_buffer_list(sctx, &sctx->gfx_cs,1109si_resource(sctx->vertex_buffer[vb].buffer.resource),1110RADEON_USAGE_READ, RADEON_PRIO_VERTEX_BUFFER);1111}11121113if (!sctx->vb_descriptors_buffer)1114return;1115radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, sctx->vb_descriptors_buffer, RADEON_USAGE_READ,1116RADEON_PRIO_DESCRIPTORS);1117}11181119/* CONSTANT BUFFERS */11201121static struct si_descriptors *si_const_and_shader_buffer_descriptors(struct si_context *sctx,1122unsigned shader)1123{1124return &sctx->descriptors[si_const_and_shader_buffer_descriptors_idx(shader)];1125}11261127static void si_upload_const_buffer(struct si_context *sctx, struct si_resource **buf,1128const uint8_t *ptr, unsigned size, uint32_t *const_offset)1129{1130void *tmp;11311132u_upload_alloc(sctx->b.const_uploader, 0, size, si_optimal_tcc_alignment(sctx, size),1133const_offset, (struct pipe_resource **)buf, &tmp);1134if (*buf)1135util_memcpy_cpu_to_le32(tmp, ptr, size);1136}11371138static void si_set_constant_buffer(struct si_context *sctx, struct si_buffer_resources *buffers,1139unsigned descriptors_idx, uint slot, bool take_ownership,1140const struct pipe_constant_buffer *input)1141{1142struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];1143assert(slot < descs->num_elements);1144pipe_resource_reference(&buffers->buffers[slot], NULL);11451146/* GFX7 cannot unbind a constant buffer (S_BUFFER_LOAD is buggy1147* with a NULL buffer). We need to use a dummy buffer instead. */1148if (sctx->chip_class == GFX7 && (!input || (!input->buffer && !input->user_buffer)))1149input = &sctx->null_const_buf;11501151if (input && (input->buffer || input->user_buffer)) {1152struct pipe_resource *buffer = NULL;1153uint64_t va;1154unsigned buffer_offset;11551156/* Upload the user buffer if needed. */1157if (input->user_buffer) {1158si_upload_const_buffer(sctx, (struct si_resource **)&buffer, input->user_buffer,1159input->buffer_size, &buffer_offset);1160if (!buffer) {1161/* Just unbind on failure. */1162si_set_constant_buffer(sctx, buffers, descriptors_idx, slot, false, NULL);1163return;1164}1165} else {1166if (take_ownership) {1167buffer = input->buffer;1168} else {1169pipe_resource_reference(&buffer, input->buffer);1170}1171buffer_offset = input->buffer_offset;1172}11731174va = si_resource(buffer)->gpu_address + buffer_offset;11751176/* Set the descriptor. */1177uint32_t *desc = descs->list + slot * 4;1178desc[0] = va;1179desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(0);1180desc[2] = input->buffer_size;11811182buffers->buffers[slot] = buffer;1183buffers->offsets[slot] = buffer_offset;1184radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ,1185buffers->priority_constbuf, true);1186buffers->enabled_mask |= 1llu << slot;1187} else {1188/* Clear the descriptor. Only 3 dwords are cleared. The 4th dword is immutable. */1189memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 3);1190buffers->enabled_mask &= ~(1llu << slot);1191}11921193sctx->descriptors_dirty |= 1u << descriptors_idx;1194}11951196static void si_pipe_set_constant_buffer(struct pipe_context *ctx, enum pipe_shader_type shader,1197uint slot, bool take_ownership,1198const struct pipe_constant_buffer *input)1199{1200struct si_context *sctx = (struct si_context *)ctx;12011202if (shader >= SI_NUM_SHADERS)1203return;12041205if (input) {1206if (input->buffer) {1207if (slot == 0 &&1208!(si_resource(input->buffer)->flags & RADEON_FLAG_32BIT)) {1209assert(!"constant buffer 0 must have a 32-bit VM address, use const_uploader");1210return;1211}1212si_resource(input->buffer)->bind_history |= PIPE_BIND_CONSTANT_BUFFER;1213}12141215if (slot == 0) {1216/* Invalidate current inlinable uniforms. */1217sctx->inlinable_uniforms_valid_mask &= ~(1 << shader);1218}1219}12201221slot = si_get_constbuf_slot(slot);1222si_set_constant_buffer(sctx, &sctx->const_and_shader_buffers[shader],1223si_const_and_shader_buffer_descriptors_idx(shader), slot,1224take_ownership, input);1225}12261227static void si_set_inlinable_constants(struct pipe_context *ctx,1228enum pipe_shader_type shader,1229uint num_values, uint32_t *values)1230{1231struct si_context *sctx = (struct si_context *)ctx;12321233if (!(sctx->inlinable_uniforms_valid_mask & BITFIELD_BIT(shader))) {1234/* It's the first time we set the constants. Always update shaders. */1235memcpy(sctx->inlinable_uniforms[shader], values, num_values * 4);1236sctx->inlinable_uniforms_valid_mask |= BITFIELD_BIT(shader);1237sctx->do_update_shaders = true;1238return;1239}12401241/* We have already set inlinable constants for this shader. Update the shader only if1242* the constants are being changed so as not to update shaders needlessly.1243*/1244if (memcmp(sctx->inlinable_uniforms[shader], values, num_values * 4)) {1245memcpy(sctx->inlinable_uniforms[shader], values, num_values * 4);1246sctx->do_update_shaders = true;1247}1248}12491250void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader, uint slot,1251struct pipe_constant_buffer *cbuf)1252{1253cbuf->user_buffer = NULL;1254si_get_buffer_from_descriptors(1255&sctx->const_and_shader_buffers[shader], si_const_and_shader_buffer_descriptors(sctx, shader),1256si_get_constbuf_slot(slot), &cbuf->buffer, &cbuf->buffer_offset, &cbuf->buffer_size);1257}12581259/* SHADER BUFFERS */12601261static void si_set_shader_buffer(struct si_context *sctx, struct si_buffer_resources *buffers,1262unsigned descriptors_idx, uint slot,1263const struct pipe_shader_buffer *sbuffer, bool writable,1264enum radeon_bo_priority priority)1265{1266struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];1267uint32_t *desc = descs->list + slot * 4;12681269if (!sbuffer || !sbuffer->buffer) {1270pipe_resource_reference(&buffers->buffers[slot], NULL);1271/* Clear the descriptor. Only 3 dwords are cleared. The 4th dword is immutable. */1272memset(desc, 0, sizeof(uint32_t) * 3);1273buffers->enabled_mask &= ~(1llu << slot);1274buffers->writable_mask &= ~(1llu << slot);1275sctx->descriptors_dirty |= 1u << descriptors_idx;1276return;1277}12781279struct si_resource *buf = si_resource(sbuffer->buffer);1280uint64_t va = buf->gpu_address + sbuffer->buffer_offset;12811282desc[0] = va;1283desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(0);1284desc[2] = sbuffer->buffer_size;12851286pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);1287buffers->offsets[slot] = sbuffer->buffer_offset;1288radeon_add_to_gfx_buffer_list_check_mem(1289sctx, buf, writable ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, priority, true);1290if (writable)1291buffers->writable_mask |= 1llu << slot;1292else1293buffers->writable_mask &= ~(1llu << slot);12941295buffers->enabled_mask |= 1llu << slot;1296sctx->descriptors_dirty |= 1lu << descriptors_idx;12971298util_range_add(&buf->b.b, &buf->valid_buffer_range, sbuffer->buffer_offset,1299sbuffer->buffer_offset + sbuffer->buffer_size);1300}13011302static void si_set_shader_buffers(struct pipe_context *ctx, enum pipe_shader_type shader,1303unsigned start_slot, unsigned count,1304const struct pipe_shader_buffer *sbuffers,1305unsigned writable_bitmask)1306{1307struct si_context *sctx = (struct si_context *)ctx;1308struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];1309unsigned descriptors_idx = si_const_and_shader_buffer_descriptors_idx(shader);1310unsigned i;13111312assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);13131314if (shader == PIPE_SHADER_COMPUTE &&1315sctx->cs_shader_state.program &&1316start_slot < sctx->cs_shader_state.program->sel.cs_num_shaderbufs_in_user_sgprs)1317sctx->compute_shaderbuf_sgprs_dirty = true;13181319for (i = 0; i < count; ++i) {1320const struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;1321unsigned slot = si_get_shaderbuf_slot(start_slot + i);13221323if (sbuffer && sbuffer->buffer)1324si_resource(sbuffer->buffer)->bind_history |= PIPE_BIND_SHADER_BUFFER;13251326si_set_shader_buffer(sctx, buffers, descriptors_idx, slot, sbuffer,1327!!(writable_bitmask & (1u << i)), buffers->priority);1328}1329}13301331void si_get_shader_buffers(struct si_context *sctx, enum pipe_shader_type shader, uint start_slot,1332uint count, struct pipe_shader_buffer *sbuf)1333{1334struct si_buffer_resources *buffers = &sctx->const_and_shader_buffers[shader];1335struct si_descriptors *descs = si_const_and_shader_buffer_descriptors(sctx, shader);13361337for (unsigned i = 0; i < count; ++i) {1338si_get_buffer_from_descriptors(buffers, descs, si_get_shaderbuf_slot(start_slot + i),1339&sbuf[i].buffer, &sbuf[i].buffer_offset, &sbuf[i].buffer_size);1340}1341}13421343/* RING BUFFERS */13441345void si_set_internal_const_buffer(struct si_context *sctx, uint slot,1346const struct pipe_constant_buffer *input)1347{1348si_set_constant_buffer(sctx, &sctx->internal_bindings, SI_DESCS_INTERNAL, slot, false, input);1349}13501351void si_set_internal_shader_buffer(struct si_context *sctx, uint slot,1352const struct pipe_shader_buffer *sbuffer)1353{1354si_set_shader_buffer(sctx, &sctx->internal_bindings, SI_DESCS_INTERNAL, slot, sbuffer, true,1355RADEON_PRIO_SHADER_RW_BUFFER);1356}13571358void si_set_ring_buffer(struct si_context *sctx, uint slot, struct pipe_resource *buffer,1359unsigned stride, unsigned num_records, bool add_tid, bool swizzle,1360unsigned element_size, unsigned index_stride, uint64_t offset)1361{1362struct si_buffer_resources *buffers = &sctx->internal_bindings;1363struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL];13641365/* The stride field in the resource descriptor has 14 bits */1366assert(stride < (1 << 14));13671368assert(slot < descs->num_elements);1369pipe_resource_reference(&buffers->buffers[slot], NULL);13701371if (buffer) {1372uint64_t va;13731374va = si_resource(buffer)->gpu_address + offset;13751376switch (element_size) {1377default:1378assert(!"Unsupported ring buffer element size");1379case 0:1380case 2:1381element_size = 0;1382break;1383case 4:1384element_size = 1;1385break;1386case 8:1387element_size = 2;1388break;1389case 16:1390element_size = 3;1391break;1392}13931394switch (index_stride) {1395default:1396assert(!"Unsupported ring buffer index stride");1397case 0:1398case 8:1399index_stride = 0;1400break;1401case 16:1402index_stride = 1;1403break;1404case 32:1405index_stride = 2;1406break;1407case 64:1408index_stride = 3;1409break;1410}14111412if (sctx->chip_class >= GFX8 && stride)1413num_records *= stride;14141415/* Set the descriptor. */1416uint32_t *desc = descs->list + slot * 4;1417desc[0] = va;1418desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride) |1419S_008F04_SWIZZLE_ENABLE(swizzle);1420desc[2] = num_records;1421desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |1422S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |1423S_008F0C_INDEX_STRIDE(index_stride) | S_008F0C_ADD_TID_ENABLE(add_tid);14241425if (sctx->chip_class >= GFX9)1426assert(!swizzle || element_size == 1); /* always 4 bytes on GFX9 */1427else1428desc[3] |= S_008F0C_ELEMENT_SIZE(element_size);14291430if (sctx->chip_class >= GFX10) {1431desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |1432S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);1433} else {1434desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |1435S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);1436}14371438pipe_resource_reference(&buffers->buffers[slot], buffer);1439radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(buffer), RADEON_USAGE_READWRITE,1440buffers->priority);1441buffers->enabled_mask |= 1llu << slot;1442} else {1443/* Clear the descriptor. */1444memset(descs->list + slot * 4, 0, sizeof(uint32_t) * 4);1445buffers->enabled_mask &= ~(1llu << slot);1446}14471448sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL;1449}14501451/* INTERNAL CONST BUFFERS */14521453static void si_set_polygon_stipple(struct pipe_context *ctx, const struct pipe_poly_stipple *state)1454{1455struct si_context *sctx = (struct si_context *)ctx;1456struct pipe_constant_buffer cb = {};1457unsigned stipple[32];1458int i;14591460for (i = 0; i < 32; i++)1461stipple[i] = util_bitreverse(state->stipple[i]);14621463cb.user_buffer = stipple;1464cb.buffer_size = sizeof(stipple);14651466si_set_internal_const_buffer(sctx, SI_PS_CONST_POLY_STIPPLE, &cb);1467}14681469/* TEXTURE METADATA ENABLE/DISABLE */14701471static void si_resident_handles_update_needs_color_decompress(struct si_context *sctx)1472{1473util_dynarray_clear(&sctx->resident_tex_needs_color_decompress);1474util_dynarray_clear(&sctx->resident_img_needs_color_decompress);14751476util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {1477struct pipe_resource *res = (*tex_handle)->view->texture;1478struct si_texture *tex;14791480if (!res || res->target == PIPE_BUFFER)1481continue;14821483tex = (struct si_texture *)res;1484if (!color_needs_decompression(tex))1485continue;14861487util_dynarray_append(&sctx->resident_tex_needs_color_decompress, struct si_texture_handle *,1488*tex_handle);1489}14901491util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {1492struct pipe_image_view *view = &(*img_handle)->view;1493struct pipe_resource *res = view->resource;1494struct si_texture *tex;14951496if (!res || res->target == PIPE_BUFFER)1497continue;14981499tex = (struct si_texture *)res;1500if (!color_needs_decompression(tex))1501continue;15021503util_dynarray_append(&sctx->resident_img_needs_color_decompress, struct si_image_handle *,1504*img_handle);1505}1506}15071508/* CMASK can be enabled (for fast clear) and disabled (for texture export)1509* while the texture is bound, possibly by a different context. In that case,1510* call this function to update needs_*_decompress_masks.1511*/1512void si_update_needs_color_decompress_masks(struct si_context *sctx)1513{1514for (int i = 0; i < SI_NUM_SHADERS; ++i) {1515si_samplers_update_needs_color_decompress_mask(&sctx->samplers[i]);1516si_images_update_needs_color_decompress_mask(&sctx->images[i]);1517si_update_shader_needs_decompress_mask(sctx, i);1518}15191520si_resident_handles_update_needs_color_decompress(sctx);1521}15221523/* BUFFER DISCARD/INVALIDATION */15241525/* Reset descriptors of buffer resources after \p buf has been invalidated.1526* If buf == NULL, reset all descriptors.1527*/1528static bool si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers,1529unsigned descriptors_idx, uint64_t slot_mask,1530struct pipe_resource *buf, enum radeon_bo_priority priority)1531{1532struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];1533bool noop = true;1534uint64_t mask = buffers->enabled_mask & slot_mask;15351536while (mask) {1537unsigned i = u_bit_scan64(&mask);1538struct pipe_resource *buffer = buffers->buffers[i];15391540if (buffer && (!buf || buffer == buf)) {1541si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i * 4);1542sctx->descriptors_dirty |= 1u << descriptors_idx;15431544radeon_add_to_gfx_buffer_list_check_mem(1545sctx, si_resource(buffer),1546buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ,1547priority, true);1548noop = false;1549}1550}1551return !noop;1552}15531554/* Update all buffer bindings where the buffer is bound, including1555* all resource descriptors. This is invalidate_buffer without1556* the invalidation.1557*1558* If buf == NULL, update all buffer bindings.1559*/1560void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)1561{1562struct si_resource *buffer = si_resource(buf);1563unsigned i, shader;1564unsigned num_elems = sctx->num_vertex_elements;15651566/* We changed the buffer, now we need to bind it where the old one1567* was bound. This consists of 2 things:1568* 1) Updating the resource descriptor and dirtying it.1569* 2) Adding a relocation to the CS, so that it's usable.1570*/15711572/* Vertex buffers. */1573if (!buffer) {1574sctx->vertex_buffers_dirty = num_elems > 0;1575} else if (buffer->bind_history & PIPE_BIND_VERTEX_BUFFER) {1576for (i = 0; i < num_elems; i++) {1577int vb = sctx->vertex_elements->vertex_buffer_index[i];15781579if (vb >= ARRAY_SIZE(sctx->vertex_buffer))1580continue;1581if (!sctx->vertex_buffer[vb].buffer.resource)1582continue;15831584if (sctx->vertex_buffer[vb].buffer.resource == buf) {1585sctx->vertex_buffers_dirty = num_elems > 0;1586break;1587}1588}1589}15901591/* Streamout buffers. (other internal buffers can't be invalidated) */1592if (!buffer || buffer->bind_history & PIPE_BIND_STREAM_OUTPUT) {1593for (i = SI_VS_STREAMOUT_BUF0; i <= SI_VS_STREAMOUT_BUF3; i++) {1594struct si_buffer_resources *buffers = &sctx->internal_bindings;1595struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_INTERNAL];1596struct pipe_resource *buffer = buffers->buffers[i];15971598if (!buffer || (buf && buffer != buf))1599continue;16001601si_set_buf_desc_address(si_resource(buffer), buffers->offsets[i], descs->list + i * 4);1602sctx->descriptors_dirty |= 1u << SI_DESCS_INTERNAL;16031604radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_WRITE,1605RADEON_PRIO_SHADER_RW_BUFFER, true);16061607/* Update the streamout state. */1608if (sctx->streamout.begin_emitted)1609si_emit_streamout_end(sctx);1610sctx->streamout.append_bitmask = sctx->streamout.enabled_mask;1611si_streamout_buffers_dirty(sctx);1612}1613}16141615/* Constant and shader buffers. */1616if (!buffer || buffer->bind_history & PIPE_BIND_CONSTANT_BUFFER) {1617for (shader = 0; shader < SI_NUM_SHADERS; shader++)1618si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],1619si_const_and_shader_buffer_descriptors_idx(shader),1620u_bit_consecutive64(SI_NUM_SHADER_BUFFERS, SI_NUM_CONST_BUFFERS),1621buf, sctx->const_and_shader_buffers[shader].priority_constbuf);1622}16231624if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {1625for (shader = 0; shader < SI_NUM_SHADERS; shader++) {1626if (si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],1627si_const_and_shader_buffer_descriptors_idx(shader),1628u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf,1629sctx->const_and_shader_buffers[shader].priority) &&1630shader == PIPE_SHADER_COMPUTE) {1631sctx->compute_shaderbuf_sgprs_dirty = true;1632}1633}1634}16351636if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {1637/* Texture buffers - update bindings. */1638for (shader = 0; shader < SI_NUM_SHADERS; shader++) {1639struct si_samplers *samplers = &sctx->samplers[shader];1640struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);1641unsigned mask = samplers->enabled_mask;16421643while (mask) {1644unsigned i = u_bit_scan(&mask);1645struct pipe_resource *buffer = samplers->views[i]->texture;16461647if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) {1648unsigned desc_slot = si_get_sampler_slot(i);16491650si_set_buf_desc_address(si_resource(buffer), samplers->views[i]->u.buf.offset,1651descs->list + desc_slot * 16 + 4);1652sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);16531654radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ,1655RADEON_PRIO_SAMPLER_BUFFER, true);1656}1657}1658}1659}16601661/* Shader images */1662if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_IMAGE) {1663for (shader = 0; shader < SI_NUM_SHADERS; ++shader) {1664struct si_images *images = &sctx->images[shader];1665struct si_descriptors *descs = si_sampler_and_image_descriptors(sctx, shader);1666unsigned mask = images->enabled_mask;16671668while (mask) {1669unsigned i = u_bit_scan(&mask);1670struct pipe_resource *buffer = images->views[i].resource;16711672if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) {1673unsigned desc_slot = si_get_image_slot(i);16741675if (images->views[i].access & PIPE_IMAGE_ACCESS_WRITE)1676si_mark_image_range_valid(&images->views[i]);16771678si_set_buf_desc_address(si_resource(buffer), images->views[i].u.buf.offset,1679descs->list + desc_slot * 8 + 4);1680sctx->descriptors_dirty |= 1u << si_sampler_and_image_descriptors_idx(shader);16811682radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer),1683RADEON_USAGE_READWRITE,1684RADEON_PRIO_SAMPLER_BUFFER, true);16851686if (shader == PIPE_SHADER_COMPUTE)1687sctx->compute_image_sgprs_dirty = true;1688}1689}1690}1691}16921693/* Bindless texture handles */1694if (!buffer || buffer->texture_handle_allocated) {1695struct si_descriptors *descs = &sctx->bindless_descriptors;16961697util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {1698struct pipe_sampler_view *view = (*tex_handle)->view;1699unsigned desc_slot = (*tex_handle)->desc_slot;1700struct pipe_resource *buffer = view->texture;17011702if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) {1703si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset,1704descs->list + desc_slot * 16 + 4);17051706(*tex_handle)->desc_dirty = true;1707sctx->bindless_descriptors_dirty = true;17081709radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer), RADEON_USAGE_READ,1710RADEON_PRIO_SAMPLER_BUFFER, true);1711}1712}1713}17141715/* Bindless image handles */1716if (!buffer || buffer->image_handle_allocated) {1717struct si_descriptors *descs = &sctx->bindless_descriptors;17181719util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {1720struct pipe_image_view *view = &(*img_handle)->view;1721unsigned desc_slot = (*img_handle)->desc_slot;1722struct pipe_resource *buffer = view->resource;17231724if (buffer && buffer->target == PIPE_BUFFER && (!buf || buffer == buf)) {1725if (view->access & PIPE_IMAGE_ACCESS_WRITE)1726si_mark_image_range_valid(view);17271728si_set_buf_desc_address(si_resource(buffer), view->u.buf.offset,1729descs->list + desc_slot * 16 + 4);17301731(*img_handle)->desc_dirty = true;1732sctx->bindless_descriptors_dirty = true;17331734radeon_add_to_gfx_buffer_list_check_mem(1735sctx, si_resource(buffer), RADEON_USAGE_READWRITE, RADEON_PRIO_SAMPLER_BUFFER, true);1736}1737}1738}17391740if (buffer) {1741/* Do the same for other contexts. They will invoke this function1742* with buffer == NULL.1743*/1744unsigned new_counter = p_atomic_inc_return(&sctx->screen->dirty_buf_counter);17451746/* Skip the update for the current context, because we have already updated1747* the buffer bindings.1748*/1749if (new_counter == sctx->last_dirty_buf_counter + 1)1750sctx->last_dirty_buf_counter = new_counter;1751}1752}17531754static void si_upload_bindless_descriptor(struct si_context *sctx, unsigned desc_slot,1755unsigned num_dwords)1756{1757struct si_descriptors *desc = &sctx->bindless_descriptors;1758unsigned desc_slot_offset = desc_slot * 16;1759uint32_t *data;1760uint64_t va;17611762data = desc->list + desc_slot_offset;1763va = desc->gpu_address + desc_slot_offset * 4;17641765si_cp_write_data(sctx, desc->buffer, va - desc->buffer->gpu_address, num_dwords * 4, V_370_TC_L2,1766V_370_ME, data);1767}17681769static void si_upload_bindless_descriptors(struct si_context *sctx)1770{1771if (!sctx->bindless_descriptors_dirty)1772return;17731774/* Wait for graphics/compute to be idle before updating the resident1775* descriptors directly in memory, in case the GPU is using them.1776*/1777sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH;1778sctx->emit_cache_flush(sctx, &sctx->gfx_cs);17791780util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {1781unsigned desc_slot = (*tex_handle)->desc_slot;17821783if (!(*tex_handle)->desc_dirty)1784continue;17851786si_upload_bindless_descriptor(sctx, desc_slot, 16);1787(*tex_handle)->desc_dirty = false;1788}17891790util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {1791unsigned desc_slot = (*img_handle)->desc_slot;17921793if (!(*img_handle)->desc_dirty)1794continue;17951796si_upload_bindless_descriptor(sctx, desc_slot, 8);1797(*img_handle)->desc_dirty = false;1798}17991800/* Invalidate scalar L0 because the cache doesn't know that L2 changed. */1801sctx->flags |= SI_CONTEXT_INV_SCACHE;1802sctx->bindless_descriptors_dirty = false;1803}18041805/* Update mutable image descriptor fields of all resident textures. */1806static void si_update_bindless_texture_descriptor(struct si_context *sctx,1807struct si_texture_handle *tex_handle)1808{1809struct si_sampler_view *sview = (struct si_sampler_view *)tex_handle->view;1810struct si_descriptors *desc = &sctx->bindless_descriptors;1811unsigned desc_slot_offset = tex_handle->desc_slot * 16;1812uint32_t desc_list[16];18131814if (sview->base.texture->target == PIPE_BUFFER)1815return;18161817memcpy(desc_list, desc->list + desc_slot_offset, sizeof(desc_list));1818si_set_sampler_view_desc(sctx, sview, &tex_handle->sstate, desc->list + desc_slot_offset);18191820if (memcmp(desc_list, desc->list + desc_slot_offset, sizeof(desc_list))) {1821tex_handle->desc_dirty = true;1822sctx->bindless_descriptors_dirty = true;1823}1824}18251826static void si_update_bindless_image_descriptor(struct si_context *sctx,1827struct si_image_handle *img_handle)1828{1829struct si_descriptors *desc = &sctx->bindless_descriptors;1830unsigned desc_slot_offset = img_handle->desc_slot * 16;1831struct pipe_image_view *view = &img_handle->view;1832struct pipe_resource *res = view->resource;1833uint32_t image_desc[16];1834unsigned desc_size = (res->nr_samples >= 2 ? 16 : 8) * 4;18351836if (res->target == PIPE_BUFFER)1837return;18381839memcpy(image_desc, desc->list + desc_slot_offset, desc_size);1840si_set_shader_image_desc(sctx, view, true, desc->list + desc_slot_offset,1841desc->list + desc_slot_offset + 8);18421843if (memcmp(image_desc, desc->list + desc_slot_offset, desc_size)) {1844img_handle->desc_dirty = true;1845sctx->bindless_descriptors_dirty = true;1846}1847}18481849static void si_update_all_resident_texture_descriptors(struct si_context *sctx)1850{1851util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {1852si_update_bindless_texture_descriptor(sctx, *tex_handle);1853}18541855util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {1856si_update_bindless_image_descriptor(sctx, *img_handle);1857}18581859si_upload_bindless_descriptors(sctx);1860}18611862/* Update mutable image descriptor fields of all bound textures. */1863void si_update_all_texture_descriptors(struct si_context *sctx)1864{1865unsigned shader;18661867for (shader = 0; shader < SI_NUM_SHADERS; shader++) {1868struct si_samplers *samplers = &sctx->samplers[shader];1869struct si_images *images = &sctx->images[shader];1870unsigned mask;18711872/* Images. */1873mask = images->enabled_mask;1874while (mask) {1875unsigned i = u_bit_scan(&mask);1876struct pipe_image_view *view = &images->views[i];18771878if (!view->resource || view->resource->target == PIPE_BUFFER)1879continue;18801881si_set_shader_image(sctx, shader, i, view, true);1882}18831884/* Sampler views. */1885mask = samplers->enabled_mask;1886while (mask) {1887unsigned i = u_bit_scan(&mask);1888struct pipe_sampler_view *view = samplers->views[i];18891890if (!view || !view->texture || view->texture->target == PIPE_BUFFER)1891continue;18921893si_set_sampler_views(sctx, shader, i, 1, 0, &samplers->views[i], true);1894}18951896si_update_shader_needs_decompress_mask(sctx, shader);1897}18981899si_update_all_resident_texture_descriptors(sctx);1900si_update_ps_colorbuf0_slot(sctx);1901}19021903/* SHADER USER DATA */19041905static void si_mark_shader_pointers_dirty(struct si_context *sctx, unsigned shader)1906{1907sctx->shader_pointers_dirty |=1908u_bit_consecutive(SI_DESCS_FIRST_SHADER + shader * SI_NUM_SHADER_DESCS, SI_NUM_SHADER_DESCS);19091910if (shader == PIPE_SHADER_VERTEX) {1911sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL &&1912sctx->num_vertex_elements >1913sctx->screen->num_vbos_in_user_sgprs;1914sctx->vertex_buffer_user_sgprs_dirty =1915sctx->num_vertex_elements > 0 && sctx->screen->num_vbos_in_user_sgprs;1916}19171918si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);1919}19201921void si_shader_pointers_mark_dirty(struct si_context *sctx)1922{1923sctx->shader_pointers_dirty = u_bit_consecutive(0, SI_NUM_DESCS);1924sctx->vertex_buffer_pointer_dirty = sctx->vb_descriptors_buffer != NULL &&1925sctx->num_vertex_elements >1926sctx->screen->num_vbos_in_user_sgprs;1927sctx->vertex_buffer_user_sgprs_dirty =1928sctx->num_vertex_elements > 0 && sctx->screen->num_vbos_in_user_sgprs;1929si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);1930sctx->graphics_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;1931sctx->compute_bindless_pointer_dirty = sctx->bindless_descriptors.buffer != NULL;1932sctx->compute_shaderbuf_sgprs_dirty = true;1933sctx->compute_image_sgprs_dirty = true;1934}19351936/* Set a base register address for user data constants in the given shader.1937* This assigns a mapping from PIPE_SHADER_* to SPI_SHADER_USER_DATA_*.1938*/1939static void si_set_user_data_base(struct si_context *sctx, unsigned shader, uint32_t new_base)1940{1941uint32_t *base = &sctx->shader_pointers.sh_base[shader];19421943if (*base != new_base) {1944*base = new_base;19451946if (new_base)1947si_mark_shader_pointers_dirty(sctx, shader);19481949/* Any change in enabled shader stages requires re-emitting1950* the VS state SGPR, because it contains the clamp_vertex_color1951* state, which can be done in VS, TES, and GS.1952*/1953sctx->last_vs_state = ~0;1954}1955}19561957/* This must be called when these are changed between enabled and disabled1958* - geometry shader1959* - tessellation evaluation shader1960* - NGG1961*/1962void si_shader_change_notify(struct si_context *sctx)1963{1964si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,1965si_get_user_data_base(sctx->chip_class,1966sctx->shader.tes.cso ? TESS_ON : TESS_OFF,1967sctx->shader.gs.cso ? GS_ON : GS_OFF,1968sctx->ngg ? NGG_ON : NGG_OFF,1969PIPE_SHADER_VERTEX));19701971si_set_user_data_base(sctx, PIPE_SHADER_TESS_EVAL,1972si_get_user_data_base(sctx->chip_class,1973sctx->shader.tes.cso ? TESS_ON : TESS_OFF,1974sctx->shader.gs.cso ? GS_ON : GS_OFF,1975sctx->ngg ? NGG_ON : NGG_OFF,1976PIPE_SHADER_TESS_EVAL));1977}19781979#define si_emit_consecutive_shader_pointers(sctx, pointer_mask, sh_base) do { \1980unsigned sh_reg_base = (sh_base); \1981if (sh_reg_base) { \1982unsigned mask = sctx->shader_pointers_dirty & (pointer_mask); \1983\1984while (mask) { \1985int start, count; \1986u_bit_scan_consecutive_range(&mask, &start, &count); \1987\1988struct si_descriptors *descs = &sctx->descriptors[start]; \1989unsigned sh_offset = sh_reg_base + descs->shader_userdata_offset; \1990\1991radeon_set_sh_reg_seq(&sctx->gfx_cs, sh_offset, count); \1992for (int i = 0; i < count; i++) \1993radeon_emit_32bit_pointer(sctx->screen, cs, descs[i].gpu_address); \1994} \1995} \1996} while (0)19971998static void si_emit_global_shader_pointers(struct si_context *sctx, struct si_descriptors *descs)1999{2000radeon_begin(&sctx->gfx_cs);20012002if (sctx->chip_class >= GFX10) {2003radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);2004/* HW VS stage only used in non-NGG mode. */2005radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);2006radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);2007radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);2008radeon_end();2009return;2010} else if (sctx->chip_class == GFX9 && sctx->shadowed_regs) {2011/* We can't use the COMMON registers with register shadowing. */2012radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);2013radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);2014radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0);2015radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_LS_0);2016radeon_end();2017return;2018} else if (sctx->chip_class == GFX9) {2019/* Broadcast it to all shader stages. */2020radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_COMMON_0);2021radeon_end();2022return;2023}20242025radeon_emit_one_32bit_pointer(sctx, descs, R_00B030_SPI_SHADER_USER_DATA_PS_0);2026radeon_emit_one_32bit_pointer(sctx, descs, R_00B130_SPI_SHADER_USER_DATA_VS_0);2027radeon_emit_one_32bit_pointer(sctx, descs, R_00B330_SPI_SHADER_USER_DATA_ES_0);2028radeon_emit_one_32bit_pointer(sctx, descs, R_00B230_SPI_SHADER_USER_DATA_GS_0);2029radeon_emit_one_32bit_pointer(sctx, descs, R_00B430_SPI_SHADER_USER_DATA_HS_0);2030radeon_emit_one_32bit_pointer(sctx, descs, R_00B530_SPI_SHADER_USER_DATA_LS_0);2031radeon_end();2032}20332034void si_emit_graphics_shader_pointers(struct si_context *sctx)2035{2036uint32_t *sh_base = sctx->shader_pointers.sh_base;20372038if (sctx->shader_pointers_dirty & (1 << SI_DESCS_INTERNAL)) {2039si_emit_global_shader_pointers(sctx, &sctx->descriptors[SI_DESCS_INTERNAL]);2040}20412042radeon_begin(&sctx->gfx_cs);2043si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(VERTEX),2044sh_base[PIPE_SHADER_VERTEX]);2045si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_EVAL),2046sh_base[PIPE_SHADER_TESS_EVAL]);2047si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(FRAGMENT),2048sh_base[PIPE_SHADER_FRAGMENT]);2049si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(TESS_CTRL),2050sh_base[PIPE_SHADER_TESS_CTRL]);2051si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(GEOMETRY),2052sh_base[PIPE_SHADER_GEOMETRY]);2053radeon_end();20542055sctx->shader_pointers_dirty &= ~u_bit_consecutive(SI_DESCS_INTERNAL, SI_DESCS_FIRST_COMPUTE);20562057if (sctx->graphics_bindless_pointer_dirty) {2058si_emit_global_shader_pointers(sctx, &sctx->bindless_descriptors);2059sctx->graphics_bindless_pointer_dirty = false;2060}2061}20622063void si_emit_compute_shader_pointers(struct si_context *sctx)2064{2065struct radeon_cmdbuf *cs = &sctx->gfx_cs;2066struct si_shader_selector *shader = &sctx->cs_shader_state.program->sel;2067unsigned base = R_00B900_COMPUTE_USER_DATA_0;20682069radeon_begin(cs);2070si_emit_consecutive_shader_pointers(sctx, SI_DESCS_SHADER_MASK(COMPUTE),2071R_00B900_COMPUTE_USER_DATA_0);2072sctx->shader_pointers_dirty &= ~SI_DESCS_SHADER_MASK(COMPUTE);20732074if (sctx->compute_bindless_pointer_dirty) {2075radeon_emit_one_32bit_pointer(sctx, &sctx->bindless_descriptors, base);2076sctx->compute_bindless_pointer_dirty = false;2077}20782079/* Set shader buffer descriptors in user SGPRs. */2080unsigned num_shaderbufs = shader->cs_num_shaderbufs_in_user_sgprs;2081if (num_shaderbufs && sctx->compute_shaderbuf_sgprs_dirty) {2082struct si_descriptors *desc = si_const_and_shader_buffer_descriptors(sctx, PIPE_SHADER_COMPUTE);20832084radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +2085shader->cs_shaderbufs_sgpr_index * 4,2086num_shaderbufs * 4);20872088for (unsigned i = 0; i < num_shaderbufs; i++)2089radeon_emit_array(cs, &desc->list[si_get_shaderbuf_slot(i) * 4], 4);20902091sctx->compute_shaderbuf_sgprs_dirty = false;2092}20932094/* Set image descriptors in user SGPRs. */2095unsigned num_images = shader->cs_num_images_in_user_sgprs;2096if (num_images && sctx->compute_image_sgprs_dirty) {2097struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, PIPE_SHADER_COMPUTE);20982099radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 +2100shader->cs_images_sgpr_index * 4,2101shader->cs_images_num_sgprs);21022103for (unsigned i = 0; i < num_images; i++) {2104unsigned desc_offset = si_get_image_slot(i) * 8;2105unsigned num_sgprs = 8;21062107/* Image buffers are in desc[4..7]. */2108if (shader->info.base.image_buffers & (1 << i)) {2109desc_offset += 4;2110num_sgprs = 4;2111}21122113radeon_emit_array(cs, &desc->list[desc_offset], num_sgprs);2114}21152116sctx->compute_image_sgprs_dirty = false;2117}2118radeon_end();2119}21202121/* BINDLESS */21222123static void si_init_bindless_descriptors(struct si_context *sctx, struct si_descriptors *desc,2124short shader_userdata_rel_index, unsigned num_elements)2125{2126ASSERTED unsigned desc_slot;21272128si_init_descriptors(desc, shader_userdata_rel_index, 16, num_elements);2129sctx->bindless_descriptors.num_active_slots = num_elements;21302131/* The first bindless descriptor is stored at slot 1, because 0 is not2132* considered to be a valid handle.2133*/2134sctx->num_bindless_descriptors = 1;21352136/* Track which bindless slots are used (or not). */2137util_idalloc_init(&sctx->bindless_used_slots, num_elements);21382139/* Reserve slot 0 because it's an invalid handle for bindless. */2140desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);2141assert(desc_slot == 0);2142}21432144static void si_release_bindless_descriptors(struct si_context *sctx)2145{2146si_release_descriptors(&sctx->bindless_descriptors);2147util_idalloc_fini(&sctx->bindless_used_slots);2148}21492150static unsigned si_get_first_free_bindless_slot(struct si_context *sctx)2151{2152struct si_descriptors *desc = &sctx->bindless_descriptors;2153unsigned desc_slot;21542155desc_slot = util_idalloc_alloc(&sctx->bindless_used_slots);2156if (desc_slot >= desc->num_elements) {2157/* The array of bindless descriptors is full, resize it. */2158unsigned slot_size = desc->element_dw_size * 4;2159unsigned new_num_elements = desc->num_elements * 2;21602161desc->list =2162REALLOC(desc->list, desc->num_elements * slot_size, new_num_elements * slot_size);2163desc->num_elements = new_num_elements;2164desc->num_active_slots = new_num_elements;2165}21662167assert(desc_slot);2168return desc_slot;2169}21702171static unsigned si_create_bindless_descriptor(struct si_context *sctx, uint32_t *desc_list,2172unsigned size)2173{2174struct si_descriptors *desc = &sctx->bindless_descriptors;2175unsigned desc_slot, desc_slot_offset;21762177/* Find a free slot. */2178desc_slot = si_get_first_free_bindless_slot(sctx);21792180/* For simplicity, sampler and image bindless descriptors use fixed2181* 16-dword slots for now. Image descriptors only need 8-dword but this2182* doesn't really matter because no real apps use image handles.2183*/2184desc_slot_offset = desc_slot * 16;21852186/* Copy the descriptor into the array. */2187memcpy(desc->list + desc_slot_offset, desc_list, size);21882189/* Re-upload the whole array of bindless descriptors into a new buffer.2190*/2191if (!si_upload_descriptors(sctx, desc))2192return 0;21932194/* Make sure to re-emit the shader pointers for all stages. */2195sctx->graphics_bindless_pointer_dirty = true;2196sctx->compute_bindless_pointer_dirty = true;2197si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);21982199return desc_slot;2200}22012202static void si_update_bindless_buffer_descriptor(struct si_context *sctx, unsigned desc_slot,2203struct pipe_resource *resource, uint64_t offset,2204bool *desc_dirty)2205{2206struct si_descriptors *desc = &sctx->bindless_descriptors;2207struct si_resource *buf = si_resource(resource);2208unsigned desc_slot_offset = desc_slot * 16;2209uint32_t *desc_list = desc->list + desc_slot_offset + 4;2210uint64_t old_desc_va;22112212assert(resource->target == PIPE_BUFFER);22132214/* Retrieve the old buffer addr from the descriptor. */2215old_desc_va = si_desc_extract_buffer_address(desc_list);22162217if (old_desc_va != buf->gpu_address + offset) {2218/* The buffer has been invalidated when the handle wasn't2219* resident, update the descriptor and the dirty flag.2220*/2221si_set_buf_desc_address(buf, offset, &desc_list[0]);22222223*desc_dirty = true;2224}2225}22262227static uint64_t si_create_texture_handle(struct pipe_context *ctx, struct pipe_sampler_view *view,2228const struct pipe_sampler_state *state)2229{2230struct si_sampler_view *sview = (struct si_sampler_view *)view;2231struct si_context *sctx = (struct si_context *)ctx;2232struct si_texture_handle *tex_handle;2233struct si_sampler_state *sstate;2234uint32_t desc_list[16];2235uint64_t handle;22362237tex_handle = CALLOC_STRUCT(si_texture_handle);2238if (!tex_handle)2239return 0;22402241memset(desc_list, 0, sizeof(desc_list));2242si_init_descriptor_list(&desc_list[0], 16, 1, null_texture_descriptor);22432244sstate = ctx->create_sampler_state(ctx, state);2245if (!sstate) {2246FREE(tex_handle);2247return 0;2248}22492250si_set_sampler_view_desc(sctx, sview, sstate, &desc_list[0]);2251memcpy(&tex_handle->sstate, sstate, sizeof(*sstate));2252ctx->delete_sampler_state(ctx, sstate);22532254tex_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, sizeof(desc_list));2255if (!tex_handle->desc_slot) {2256FREE(tex_handle);2257return 0;2258}22592260handle = tex_handle->desc_slot;22612262if (!_mesa_hash_table_insert(sctx->tex_handles, (void *)(uintptr_t)handle, tex_handle)) {2263FREE(tex_handle);2264return 0;2265}22662267pipe_sampler_view_reference(&tex_handle->view, view);22682269si_resource(sview->base.texture)->texture_handle_allocated = true;22702271return handle;2272}22732274static void si_delete_texture_handle(struct pipe_context *ctx, uint64_t handle)2275{2276struct si_context *sctx = (struct si_context *)ctx;2277struct si_texture_handle *tex_handle;2278struct hash_entry *entry;22792280entry = _mesa_hash_table_search(sctx->tex_handles, (void *)(uintptr_t)handle);2281if (!entry)2282return;22832284tex_handle = (struct si_texture_handle *)entry->data;22852286/* Allow this descriptor slot to be re-used. */2287util_idalloc_free(&sctx->bindless_used_slots, tex_handle->desc_slot);22882289pipe_sampler_view_reference(&tex_handle->view, NULL);2290_mesa_hash_table_remove(sctx->tex_handles, entry);2291FREE(tex_handle);2292}22932294static void si_make_texture_handle_resident(struct pipe_context *ctx, uint64_t handle,2295bool resident)2296{2297struct si_context *sctx = (struct si_context *)ctx;2298struct si_texture_handle *tex_handle;2299struct si_sampler_view *sview;2300struct hash_entry *entry;23012302entry = _mesa_hash_table_search(sctx->tex_handles, (void *)(uintptr_t)handle);2303if (!entry)2304return;23052306tex_handle = (struct si_texture_handle *)entry->data;2307sview = (struct si_sampler_view *)tex_handle->view;23082309if (resident) {2310if (sview->base.texture->target != PIPE_BUFFER) {2311struct si_texture *tex = (struct si_texture *)sview->base.texture;23122313if (depth_needs_decompression(tex)) {2314util_dynarray_append(&sctx->resident_tex_needs_depth_decompress,2315struct si_texture_handle *, tex_handle);2316}23172318if (color_needs_decompression(tex)) {2319util_dynarray_append(&sctx->resident_tex_needs_color_decompress,2320struct si_texture_handle *, tex_handle);2321}23222323if (vi_dcc_enabled(tex, sview->base.u.tex.first_level) &&2324p_atomic_read(&tex->framebuffers_bound))2325sctx->need_check_render_feedback = true;23262327si_update_bindless_texture_descriptor(sctx, tex_handle);2328} else {2329si_update_bindless_buffer_descriptor(sctx, tex_handle->desc_slot, sview->base.texture,2330sview->base.u.buf.offset, &tex_handle->desc_dirty);2331}23322333/* Re-upload the descriptor if it has been updated while it2334* wasn't resident.2335*/2336if (tex_handle->desc_dirty)2337sctx->bindless_descriptors_dirty = true;23382339/* Add the texture handle to the per-context list. */2340util_dynarray_append(&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle);23412342/* Add the buffers to the current CS in case si_begin_new_cs()2343* is not going to be called.2344*/2345si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ,2346sview->is_stencil_sampler, false);2347} else {2348/* Remove the texture handle from the per-context list. */2349util_dynarray_delete_unordered(&sctx->resident_tex_handles, struct si_texture_handle *,2350tex_handle);23512352if (sview->base.texture->target != PIPE_BUFFER) {2353util_dynarray_delete_unordered(&sctx->resident_tex_needs_depth_decompress,2354struct si_texture_handle *, tex_handle);23552356util_dynarray_delete_unordered(&sctx->resident_tex_needs_color_decompress,2357struct si_texture_handle *, tex_handle);2358}2359}2360}23612362static uint64_t si_create_image_handle(struct pipe_context *ctx, const struct pipe_image_view *view)2363{2364struct si_context *sctx = (struct si_context *)ctx;2365struct si_image_handle *img_handle;2366uint32_t desc_list[16];2367uint64_t handle;23682369if (!view || !view->resource)2370return 0;23712372img_handle = CALLOC_STRUCT(si_image_handle);2373if (!img_handle)2374return 0;23752376memset(desc_list, 0, sizeof(desc_list));2377si_init_descriptor_list(&desc_list[0], 8, 2, null_image_descriptor);23782379si_set_shader_image_desc(sctx, view, false, &desc_list[0], &desc_list[8]);23802381img_handle->desc_slot = si_create_bindless_descriptor(sctx, desc_list, sizeof(desc_list));2382if (!img_handle->desc_slot) {2383FREE(img_handle);2384return 0;2385}23862387handle = img_handle->desc_slot;23882389if (!_mesa_hash_table_insert(sctx->img_handles, (void *)(uintptr_t)handle, img_handle)) {2390FREE(img_handle);2391return 0;2392}23932394util_copy_image_view(&img_handle->view, view);23952396si_resource(view->resource)->image_handle_allocated = true;23972398return handle;2399}24002401static void si_delete_image_handle(struct pipe_context *ctx, uint64_t handle)2402{2403struct si_context *sctx = (struct si_context *)ctx;2404struct si_image_handle *img_handle;2405struct hash_entry *entry;24062407entry = _mesa_hash_table_search(sctx->img_handles, (void *)(uintptr_t)handle);2408if (!entry)2409return;24102411img_handle = (struct si_image_handle *)entry->data;24122413util_copy_image_view(&img_handle->view, NULL);2414_mesa_hash_table_remove(sctx->img_handles, entry);2415FREE(img_handle);2416}24172418static void si_make_image_handle_resident(struct pipe_context *ctx, uint64_t handle,2419unsigned access, bool resident)2420{2421struct si_context *sctx = (struct si_context *)ctx;2422struct si_image_handle *img_handle;2423struct pipe_image_view *view;2424struct si_resource *res;2425struct hash_entry *entry;24262427entry = _mesa_hash_table_search(sctx->img_handles, (void *)(uintptr_t)handle);2428if (!entry)2429return;24302431img_handle = (struct si_image_handle *)entry->data;2432view = &img_handle->view;2433res = si_resource(view->resource);24342435if (resident) {2436if (res->b.b.target != PIPE_BUFFER) {2437struct si_texture *tex = (struct si_texture *)res;2438unsigned level = view->u.tex.level;24392440if (color_needs_decompression(tex)) {2441util_dynarray_append(&sctx->resident_img_needs_color_decompress,2442struct si_image_handle *, img_handle);2443}24442445if (vi_dcc_enabled(tex, level) && p_atomic_read(&tex->framebuffers_bound))2446sctx->need_check_render_feedback = true;24472448si_update_bindless_image_descriptor(sctx, img_handle);2449} else {2450si_update_bindless_buffer_descriptor(sctx, img_handle->desc_slot, view->resource,2451view->u.buf.offset, &img_handle->desc_dirty);2452}24532454/* Re-upload the descriptor if it has been updated while it2455* wasn't resident.2456*/2457if (img_handle->desc_dirty)2458sctx->bindless_descriptors_dirty = true;24592460/* Add the image handle to the per-context list. */2461util_dynarray_append(&sctx->resident_img_handles, struct si_image_handle *, img_handle);24622463/* Add the buffers to the current CS in case si_begin_new_cs()2464* is not going to be called.2465*/2466si_sampler_view_add_buffer(2467sctx, view->resource,2468(access & PIPE_IMAGE_ACCESS_WRITE) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ, false,2469false);2470} else {2471/* Remove the image handle from the per-context list. */2472util_dynarray_delete_unordered(&sctx->resident_img_handles, struct si_image_handle *,2473img_handle);24742475if (res->b.b.target != PIPE_BUFFER) {2476util_dynarray_delete_unordered(&sctx->resident_img_needs_color_decompress,2477struct si_image_handle *, img_handle);2478}2479}2480}24812482static void si_resident_buffers_add_all_to_bo_list(struct si_context *sctx)2483{2484unsigned num_resident_tex_handles, num_resident_img_handles;24852486num_resident_tex_handles = sctx->resident_tex_handles.size / sizeof(struct si_texture_handle *);2487num_resident_img_handles = sctx->resident_img_handles.size / sizeof(struct si_image_handle *);24882489/* Add all resident texture handles. */2490util_dynarray_foreach (&sctx->resident_tex_handles, struct si_texture_handle *, tex_handle) {2491struct si_sampler_view *sview = (struct si_sampler_view *)(*tex_handle)->view;24922493si_sampler_view_add_buffer(sctx, sview->base.texture, RADEON_USAGE_READ,2494sview->is_stencil_sampler, false);2495}24962497/* Add all resident image handles. */2498util_dynarray_foreach (&sctx->resident_img_handles, struct si_image_handle *, img_handle) {2499struct pipe_image_view *view = &(*img_handle)->view;25002501si_sampler_view_add_buffer(sctx, view->resource, RADEON_USAGE_READWRITE, false, false);2502}25032504sctx->num_resident_handles += num_resident_tex_handles + num_resident_img_handles;2505assert(sctx->bo_list_add_all_resident_resources);2506sctx->bo_list_add_all_resident_resources = false;2507}25082509/* INIT/DEINIT/UPLOAD */25102511void si_init_all_descriptors(struct si_context *sctx)2512{2513int i;2514unsigned first_shader = sctx->has_graphics ? 0 : PIPE_SHADER_COMPUTE;25152516for (i = first_shader; i < SI_NUM_SHADERS; i++) {2517bool is_2nd =2518sctx->chip_class >= GFX9 && (i == PIPE_SHADER_TESS_CTRL || i == PIPE_SHADER_GEOMETRY);2519unsigned num_sampler_slots = SI_NUM_IMAGE_SLOTS / 2 + SI_NUM_SAMPLERS;2520unsigned num_buffer_slots = SI_NUM_SHADER_BUFFERS + SI_NUM_CONST_BUFFERS;2521int rel_dw_offset;2522struct si_descriptors *desc;25232524if (is_2nd) {2525if (i == PIPE_SHADER_TESS_CTRL) {2526rel_dw_offset =2527(R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS - R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;2528} else if (sctx->chip_class >= GFX10) { /* PIPE_SHADER_GEOMETRY */2529rel_dw_offset =2530(R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS - R_00B230_SPI_SHADER_USER_DATA_GS_0) / 4;2531} else {2532rel_dw_offset =2533(R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS - R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;2534}2535} else {2536rel_dw_offset = SI_SGPR_CONST_AND_SHADER_BUFFERS;2537}2538desc = si_const_and_shader_buffer_descriptors(sctx, i);2539si_init_buffer_resources(sctx, &sctx->const_and_shader_buffers[i], desc, num_buffer_slots,2540rel_dw_offset, RADEON_PRIO_SHADER_RW_BUFFER,2541RADEON_PRIO_CONST_BUFFER);2542desc->slot_index_to_bind_directly = si_get_constbuf_slot(0);25432544if (is_2nd) {2545if (i == PIPE_SHADER_TESS_CTRL) {2546rel_dw_offset =2547(R_00B40C_SPI_SHADER_USER_DATA_ADDR_HI_HS - R_00B430_SPI_SHADER_USER_DATA_LS_0) / 4;2548} else if (sctx->chip_class >= GFX10) { /* PIPE_SHADER_GEOMETRY */2549rel_dw_offset =2550(R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS - R_00B230_SPI_SHADER_USER_DATA_GS_0) / 4;2551} else {2552rel_dw_offset =2553(R_00B20C_SPI_SHADER_USER_DATA_ADDR_HI_GS - R_00B330_SPI_SHADER_USER_DATA_ES_0) / 4;2554}2555} else {2556rel_dw_offset = SI_SGPR_SAMPLERS_AND_IMAGES;2557}25582559desc = si_sampler_and_image_descriptors(sctx, i);2560si_init_descriptors(desc, rel_dw_offset, 16, num_sampler_slots);25612562int j;2563for (j = 0; j < SI_NUM_IMAGE_SLOTS; j++)2564memcpy(desc->list + j * 8, null_image_descriptor, 8 * 4);2565for (; j < SI_NUM_IMAGE_SLOTS + SI_NUM_SAMPLERS * 2; j++)2566memcpy(desc->list + j * 8, null_texture_descriptor, 8 * 4);2567}25682569si_init_buffer_resources(sctx, &sctx->internal_bindings, &sctx->descriptors[SI_DESCS_INTERNAL],2570SI_NUM_INTERNAL_BINDINGS, SI_SGPR_INTERNAL_BINDINGS,2571/* The second priority is used by2572* const buffers in RW buffer slots. */2573RADEON_PRIO_SHADER_RINGS, RADEON_PRIO_CONST_BUFFER);2574sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots = SI_NUM_INTERNAL_BINDINGS;25752576/* Initialize an array of 1024 bindless descriptors, when the limit is2577* reached, just make it larger and re-upload the whole array.2578*/2579si_init_bindless_descriptors(sctx, &sctx->bindless_descriptors,2580SI_SGPR_BINDLESS_SAMPLERS_AND_IMAGES, 1024);25812582sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);25832584/* Set pipe_context functions. */2585sctx->b.bind_sampler_states = si_bind_sampler_states;2586sctx->b.set_shader_images = si_set_shader_images;2587sctx->b.set_constant_buffer = si_pipe_set_constant_buffer;2588sctx->b.set_inlinable_constants = si_set_inlinable_constants;2589sctx->b.set_shader_buffers = si_set_shader_buffers;2590sctx->b.set_sampler_views = si_pipe_set_sampler_views;2591sctx->b.create_texture_handle = si_create_texture_handle;2592sctx->b.delete_texture_handle = si_delete_texture_handle;2593sctx->b.make_texture_handle_resident = si_make_texture_handle_resident;2594sctx->b.create_image_handle = si_create_image_handle;2595sctx->b.delete_image_handle = si_delete_image_handle;2596sctx->b.make_image_handle_resident = si_make_image_handle_resident;25972598if (!sctx->has_graphics)2599return;26002601sctx->b.set_polygon_stipple = si_set_polygon_stipple;26022603/* Shader user data. */2604sctx->atoms.s.shader_pointers.emit = si_emit_graphics_shader_pointers;26052606/* Set default and immutable mappings. */2607si_set_user_data_base(sctx, PIPE_SHADER_VERTEX,2608si_get_user_data_base(sctx->chip_class, TESS_OFF, GS_OFF,2609sctx->ngg, PIPE_SHADER_VERTEX));2610si_set_user_data_base(sctx, PIPE_SHADER_TESS_CTRL,2611si_get_user_data_base(sctx->chip_class, TESS_OFF, GS_OFF,2612NGG_OFF, PIPE_SHADER_TESS_CTRL));2613si_set_user_data_base(sctx, PIPE_SHADER_GEOMETRY,2614si_get_user_data_base(sctx->chip_class, TESS_OFF, GS_OFF,2615NGG_OFF, PIPE_SHADER_GEOMETRY));2616si_set_user_data_base(sctx, PIPE_SHADER_FRAGMENT, R_00B030_SPI_SHADER_USER_DATA_PS_0);2617}26182619static bool si_upload_shader_descriptors(struct si_context *sctx, unsigned mask)2620{2621unsigned dirty = sctx->descriptors_dirty & mask;26222623if (dirty) {2624unsigned iter_mask = dirty;26252626do {2627if (!si_upload_descriptors(sctx, &sctx->descriptors[u_bit_scan(&iter_mask)]))2628return false;2629} while (iter_mask);26302631sctx->descriptors_dirty &= ~dirty;2632sctx->shader_pointers_dirty |= dirty;2633si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);2634}26352636si_upload_bindless_descriptors(sctx);2637return true;2638}26392640bool si_upload_graphics_shader_descriptors(struct si_context *sctx)2641{2642const unsigned mask = u_bit_consecutive(0, SI_DESCS_FIRST_COMPUTE);2643return si_upload_shader_descriptors(sctx, mask);2644}26452646bool si_upload_compute_shader_descriptors(struct si_context *sctx)2647{2648/* This does not update internal bindings as that is not needed for compute shaders2649* and the input buffer is using the same SGPR's anyway.2650*/2651const unsigned mask =2652u_bit_consecutive(SI_DESCS_FIRST_COMPUTE, SI_NUM_DESCS - SI_DESCS_FIRST_COMPUTE);2653return si_upload_shader_descriptors(sctx, mask);2654}26552656void si_release_all_descriptors(struct si_context *sctx)2657{2658int i;26592660for (i = 0; i < SI_NUM_SHADERS; i++) {2661si_release_buffer_resources(&sctx->const_and_shader_buffers[i],2662si_const_and_shader_buffer_descriptors(sctx, i));2663si_release_sampler_views(&sctx->samplers[i]);2664si_release_image_views(&sctx->images[i]);2665}2666si_release_buffer_resources(&sctx->internal_bindings, &sctx->descriptors[SI_DESCS_INTERNAL]);2667for (i = 0; i < SI_NUM_VERTEX_BUFFERS; i++)2668pipe_vertex_buffer_unreference(&sctx->vertex_buffer[i]);26692670for (i = 0; i < SI_NUM_DESCS; ++i)2671si_release_descriptors(&sctx->descriptors[i]);26722673si_resource_reference(&sctx->vb_descriptors_buffer, NULL);2674sctx->vb_descriptors_gpu_list = NULL; /* points into a mapped buffer */26752676si_release_bindless_descriptors(sctx);2677}26782679bool si_gfx_resources_check_encrypted(struct si_context *sctx)2680{2681bool use_encrypted_bo = false;26822683for (unsigned i = 0; i < SI_NUM_GRAPHICS_SHADERS && !use_encrypted_bo; i++) {2684struct si_shader_ctx_state *current_shader = &sctx->shaders[i];2685if (!current_shader->cso)2686continue;26872688use_encrypted_bo |=2689si_buffer_resources_check_encrypted(sctx, &sctx->const_and_shader_buffers[i]);2690use_encrypted_bo |=2691si_sampler_views_check_encrypted(sctx, &sctx->samplers[i],2692current_shader->cso->info.base.textures_used[0]);2693use_encrypted_bo |= si_image_views_check_encrypted(sctx, &sctx->images[i],2694u_bit_consecutive(0, current_shader->cso->info.base.num_images));2695}2696use_encrypted_bo |= si_buffer_resources_check_encrypted(sctx, &sctx->internal_bindings);26972698struct si_state_blend *blend = sctx->queued.named.blend;2699for (int i = 0; i < sctx->framebuffer.state.nr_cbufs && !use_encrypted_bo; i++) {2700struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i];2701if (surf && surf->texture) {2702struct si_texture *tex = (struct si_texture *)surf->texture;2703if (!(tex->buffer.flags & RADEON_FLAG_ENCRYPTED))2704continue;27052706/* Are we reading from this framebuffer */2707if (((blend->blend_enable_4bit >> (4 * i)) & 0xf) ||2708vi_dcc_enabled(tex, 0)) {2709use_encrypted_bo = true;2710}2711}2712}27132714if (sctx->framebuffer.state.zsbuf) {2715struct si_texture* zs = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture;2716if (zs &&2717(zs->buffer.flags & RADEON_FLAG_ENCRYPTED)) {2718/* TODO: This isn't needed if depth.func is PIPE_FUNC_NEVER or PIPE_FUNC_ALWAYS */2719use_encrypted_bo = true;2720}2721}27222723#ifndef NDEBUG2724if (use_encrypted_bo) {2725/* Verify that color buffers are encrypted */2726for (int i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) {2727struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i];2728if (!surf)2729continue;2730struct si_texture *tex = (struct si_texture *)surf->texture;2731assert(!surf->texture || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED));2732}2733/* Verify that depth/stencil buffer is encrypted */2734if (sctx->framebuffer.state.zsbuf) {2735struct pipe_surface *surf = sctx->framebuffer.state.zsbuf;2736struct si_texture *tex = (struct si_texture *)surf->texture;2737assert(!surf->texture || (tex->buffer.flags & RADEON_FLAG_ENCRYPTED));2738}2739}2740#endif27412742return use_encrypted_bo;2743}27442745void si_gfx_resources_add_all_to_bo_list(struct si_context *sctx)2746{2747for (unsigned i = 0; i < SI_NUM_GRAPHICS_SHADERS; i++) {2748si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[i]);2749si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i]);2750si_image_views_begin_new_cs(sctx, &sctx->images[i]);2751}2752si_buffer_resources_begin_new_cs(sctx, &sctx->internal_bindings);2753si_vertex_buffers_begin_new_cs(sctx);27542755if (sctx->bo_list_add_all_resident_resources)2756si_resident_buffers_add_all_to_bo_list(sctx);27572758assert(sctx->bo_list_add_all_gfx_resources);2759sctx->bo_list_add_all_gfx_resources = false;2760}27612762bool si_compute_resources_check_encrypted(struct si_context *sctx)2763{2764unsigned sh = PIPE_SHADER_COMPUTE;27652766struct si_shader_info* info = &sctx->cs_shader_state.program->sel.info;27672768/* TODO: we should assert that either use_encrypted_bo is false,2769* or all writable buffers are encrypted.2770*/2771return si_buffer_resources_check_encrypted(sctx, &sctx->const_and_shader_buffers[sh]) ||2772si_sampler_views_check_encrypted(sctx, &sctx->samplers[sh], info->base.textures_used[0]) ||2773si_image_views_check_encrypted(sctx, &sctx->images[sh], u_bit_consecutive(0, info->base.num_images)) ||2774si_buffer_resources_check_encrypted(sctx, &sctx->internal_bindings);2775}27762777void si_compute_resources_add_all_to_bo_list(struct si_context *sctx)2778{2779unsigned sh = PIPE_SHADER_COMPUTE;27802781si_buffer_resources_begin_new_cs(sctx, &sctx->const_and_shader_buffers[sh]);2782si_sampler_views_begin_new_cs(sctx, &sctx->samplers[sh]);2783si_image_views_begin_new_cs(sctx, &sctx->images[sh]);2784si_buffer_resources_begin_new_cs(sctx, &sctx->internal_bindings);27852786if (sctx->bo_list_add_all_resident_resources)2787si_resident_buffers_add_all_to_bo_list(sctx);27882789assert(sctx->bo_list_add_all_compute_resources);2790sctx->bo_list_add_all_compute_resources = false;2791}27922793void si_add_all_descriptors_to_bo_list(struct si_context *sctx)2794{2795for (unsigned i = 0; i < SI_NUM_DESCS; ++i)2796si_add_descriptors_to_bo_list(sctx, &sctx->descriptors[i]);2797si_add_descriptors_to_bo_list(sctx, &sctx->bindless_descriptors);27982799sctx->bo_list_add_all_resident_resources = true;2800sctx->bo_list_add_all_gfx_resources = true;2801sctx->bo_list_add_all_compute_resources = true;2802}28032804void si_set_active_descriptors(struct si_context *sctx, unsigned desc_idx, uint64_t new_active_mask)2805{2806struct si_descriptors *desc = &sctx->descriptors[desc_idx];28072808/* Ignore no-op updates and updates that disable all slots. */2809if (!new_active_mask ||2810new_active_mask == u_bit_consecutive64(desc->first_active_slot, desc->num_active_slots))2811return;28122813int first, count;2814u_bit_scan_consecutive_range64(&new_active_mask, &first, &count);2815assert(new_active_mask == 0);28162817/* Upload/dump descriptors if slots are being enabled. */2818if (first < desc->first_active_slot ||2819first + count > desc->first_active_slot + desc->num_active_slots)2820sctx->descriptors_dirty |= 1u << desc_idx;28212822desc->first_active_slot = first;2823desc->num_active_slots = count;2824}28252826void si_set_active_descriptors_for_shader(struct si_context *sctx, struct si_shader_selector *sel)2827{2828if (!sel)2829return;28302831si_set_active_descriptors(sctx, sel->const_and_shader_buf_descriptors_index,2832sel->active_const_and_shader_buffers);2833si_set_active_descriptors(sctx, sel->sampler_and_images_descriptors_index,2834sel->active_samplers_and_images);2835}283628372838