Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_pipe.h
4570 views
/*1* Copyright 2010 Jerome Glisse <[email protected]>2* Copyright 2018 Advanced Micro Devices, Inc.3* All Rights Reserved.4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the "Software"),7* to deal in the Software without restriction, including without limitation8* on the rights to use, copy, modify, merge, publish, distribute, sub9* license, and/or sell copies of the Software, and to permit persons to whom10* the Software is furnished to do so, subject to the following conditions:11*12* The above copyright notice and this permission notice (including the next13* paragraph) shall be included in all copies or substantial portions of the14* Software.15*16* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR17* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,18* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL19* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,20* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR21* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE22* USE OR OTHER DEALINGS IN THE SOFTWARE.23*/24#ifndef SI_PIPE_H25#define SI_PIPE_H2627#include "si_shader.h"28#include "si_state.h"29#include "util/u_dynarray.h"30#include "util/u_idalloc.h"31#include "util/u_suballoc.h"32#include "util/u_threaded_context.h"33#include "ac_sqtt.h"3435#ifdef __cplusplus36extern "C" {37#endif3839#if UTIL_ARCH_BIG_ENDIAN40#define SI_BIG_ENDIAN 141#else42#define SI_BIG_ENDIAN 043#endif4445#define ATI_VENDOR_ID 0x100246#define SI_PRIM_DISCARD_DEBUG 047#define SI_NOT_QUERY 0xffffffff4849/* The base vertex and primitive restart can be any number, but we must pick50* one which will mean "unknown" for the purpose of state tracking and51* the number shouldn't be a commonly-used one. */52#define SI_BASE_VERTEX_UNKNOWN INT_MIN53#define SI_START_INSTANCE_UNKNOWN ((unsigned)INT_MIN)54#define SI_DRAW_ID_UNKNOWN ((unsigned)INT_MIN)55#define SI_RESTART_INDEX_UNKNOWN ((unsigned)INT_MIN)56#define SI_INSTANCE_COUNT_UNKNOWN ((unsigned)INT_MIN)57#define SI_NUM_SMOOTH_AA_SAMPLES 458#define SI_MAX_POINT_SIZE 204859#define SI_GS_PER_ES 12860/* Alignment for optimal CP DMA performance. */61#define SI_CPDMA_ALIGNMENT 326263/* Tunables for compute-based clear_buffer and copy_buffer: */64#define SI_COMPUTE_CLEAR_DW_PER_THREAD 465#define SI_COMPUTE_COPY_DW_PER_THREAD 466/* L2 LRU is recommended because the compute shader can finish sooner due to fewer L2 evictions. */67#define SI_COMPUTE_DST_CACHE_POLICY L2_LRU6869/* Pipeline & streamout query controls. */70#define SI_CONTEXT_START_PIPELINE_STATS (1 << 0)71#define SI_CONTEXT_STOP_PIPELINE_STATS (1 << 1)72#define SI_CONTEXT_FLUSH_FOR_RENDER_COND (1 << 2)73/* Instruction cache. */74#define SI_CONTEXT_INV_ICACHE (1 << 3)75/* Scalar cache. (GFX6-9: scalar L1; GFX10: scalar L0)76* GFX10: This also invalidates the L1 shader array cache. */77#define SI_CONTEXT_INV_SCACHE (1 << 4)78/* Vector cache. (GFX6-9: vector L1; GFX10: vector L0)79* GFX10: This also invalidates the L1 shader array cache. */80#define SI_CONTEXT_INV_VCACHE (1 << 5)81/* L2 cache + L2 metadata cache writeback & invalidate.82* GFX6-8: Used by shaders only. GFX9-10: Used by everything. */83#define SI_CONTEXT_INV_L2 (1 << 6)84/* L2 writeback (write dirty L2 lines to memory for non-L2 clients).85* Only used for coherency with non-L2 clients like CB, DB, CP on GFX6-8.86* GFX6-7 will do complete invalidation, because the writeback is unsupported. */87#define SI_CONTEXT_WB_L2 (1 << 7)88/* Writeback & invalidate the L2 metadata cache only. It can only be coupled with89* a CB or DB flush. */90#define SI_CONTEXT_INV_L2_METADATA (1 << 8)91/* Framebuffer caches. */92#define SI_CONTEXT_FLUSH_AND_INV_DB (1 << 9)93#define SI_CONTEXT_FLUSH_AND_INV_DB_META (1 << 10)94#define SI_CONTEXT_FLUSH_AND_INV_CB (1 << 11)95/* Engine synchronization. */96#define SI_CONTEXT_VS_PARTIAL_FLUSH (1 << 12)97#define SI_CONTEXT_PS_PARTIAL_FLUSH (1 << 13)98#define SI_CONTEXT_CS_PARTIAL_FLUSH (1 << 14)99#define SI_CONTEXT_VGT_FLUSH (1 << 15)100#define SI_CONTEXT_VGT_STREAMOUT_SYNC (1 << 16)101/* PFP waits for ME to finish. Used to sync for index and indirect buffers and render102* condition. It's typically set when doing a VS/PS/CS partial flush for buffers. */103#define SI_CONTEXT_PFP_SYNC_ME (1 << 17)104105#define SI_PREFETCH_LS (1 << 1)106#define SI_PREFETCH_HS (1 << 2)107#define SI_PREFETCH_ES (1 << 3)108#define SI_PREFETCH_GS (1 << 4)109#define SI_PREFETCH_VS (1 << 5)110#define SI_PREFETCH_PS (1 << 6)111112#define SI_MAX_BORDER_COLORS 4096113#define SI_MAX_VIEWPORTS 16114#define SIX_BITS 0x3F115#define SI_MAP_BUFFER_ALIGNMENT 64116/* We only support the minimum allowed value (512), so that we can pack a 3D block size117* in 1 SGPR. */118#define SI_MAX_VARIABLE_THREADS_PER_BLOCK 512119120#define SI_CONTEXT_FLAG_AUX (1u << 31)121122#define SI_RESOURCE_FLAG_FORCE_LINEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 0)123#define SI_RESOURCE_FLAG_FLUSHED_DEPTH (PIPE_RESOURCE_FLAG_DRV_PRIV << 1)124#define SI_RESOURCE_FLAG_FORCE_MSAA_TILING (PIPE_RESOURCE_FLAG_DRV_PRIV << 2)125#define SI_RESOURCE_FLAG_DISABLE_DCC (PIPE_RESOURCE_FLAG_DRV_PRIV << 3)126#define SI_RESOURCE_FLAG_UNMAPPABLE (PIPE_RESOURCE_FLAG_DRV_PRIV << 4)127#define SI_RESOURCE_FLAG_READ_ONLY (PIPE_RESOURCE_FLAG_DRV_PRIV << 5)128#define SI_RESOURCE_FLAG_32BIT (PIPE_RESOURCE_FLAG_DRV_PRIV << 6)129#define SI_RESOURCE_FLAG_CLEAR (PIPE_RESOURCE_FLAG_DRV_PRIV << 7)130/* gap */131/* Set a micro tile mode: */132#define SI_RESOURCE_FLAG_FORCE_MICRO_TILE_MODE (PIPE_RESOURCE_FLAG_DRV_PRIV << 9)133#define SI_RESOURCE_FLAG_MICRO_TILE_MODE_SHIFT (util_logbase2(PIPE_RESOURCE_FLAG_DRV_PRIV) + 10)134#define SI_RESOURCE_FLAG_MICRO_TILE_MODE_SET(x) \135(((x)&0x3) << SI_RESOURCE_FLAG_MICRO_TILE_MODE_SHIFT)136#define SI_RESOURCE_FLAG_MICRO_TILE_MODE_GET(x) \137(((x) >> SI_RESOURCE_FLAG_MICRO_TILE_MODE_SHIFT) & 0x3)138#define SI_RESOURCE_FLAG_UNCACHED (PIPE_RESOURCE_FLAG_DRV_PRIV << 12)139#define SI_RESOURCE_FLAG_DRIVER_INTERNAL (PIPE_RESOURCE_FLAG_DRV_PRIV << 13)140#define SI_RESOURCE_AUX_PLANE (PIPE_RESOURCE_FLAG_DRV_PRIV << 14)141142enum si_has_gs {143GS_OFF,144GS_ON,145};146147enum si_has_tess {148TESS_OFF,149TESS_ON,150};151152enum si_has_ngg {153NGG_OFF,154NGG_ON,155};156157enum si_has_prim_discard_cs {158PRIM_DISCARD_CS_OFF,159PRIM_DISCARD_CS_ON,160};161162enum si_clear_code163{164DCC_CLEAR_COLOR_0000 = 0x00000000,165DCC_CLEAR_COLOR_0001 = 0x40404040,166DCC_CLEAR_COLOR_1110 = 0x80808080,167DCC_CLEAR_COLOR_1111 = 0xC0C0C0C0,168DCC_CLEAR_COLOR_REG = 0x20202020,169DCC_UNCOMPRESSED = 0xFFFFFFFF,170};171172#define SI_IMAGE_ACCESS_DCC_OFF (1 << 8)173#define SI_IMAGE_ACCESS_DCC_WRITE (1 << 9)174175/* Debug flags. */176enum177{178/* Shader logging options: */179DBG_VS = MESA_SHADER_VERTEX,180DBG_TCS = MESA_SHADER_TESS_CTRL,181DBG_TES = MESA_SHADER_TESS_EVAL,182DBG_GS = MESA_SHADER_GEOMETRY,183DBG_PS = MESA_SHADER_FRAGMENT,184DBG_CS = MESA_SHADER_COMPUTE,185DBG_NO_IR,186DBG_NO_NIR,187DBG_NO_ASM,188DBG_PREOPT_IR,189190/* Shader compiler options the shader cache should be aware of: */191DBG_FS_CORRECT_DERIVS_AFTER_KILL,192DBG_GISEL,193DBG_W32_GE,194DBG_W32_PS,195DBG_W32_CS,196DBG_W64_GE,197DBG_W64_PS,198DBG_W64_CS,199200/* Shader compiler options (with no effect on the shader cache): */201DBG_CHECK_IR,202DBG_MONOLITHIC_SHADERS,203DBG_NO_OPT_VARIANT,204205/* Information logging options: */206DBG_INFO,207DBG_TEX,208DBG_COMPUTE,209DBG_VM,210DBG_CACHE_STATS,211212/* Driver options: */213DBG_NO_WC,214DBG_CHECK_VM,215DBG_RESERVE_VMID,216DBG_SHADOW_REGS,217218/* 3D engine options: */219DBG_NO_GFX,220DBG_NO_NGG,221DBG_ALWAYS_NGG_CULLING_ALL,222DBG_ALWAYS_NGG_CULLING_TESS,223DBG_NO_NGG_CULLING,224DBG_NO_FAST_LAUNCH,225DBG_ALWAYS_PD,226DBG_PD,227DBG_NO_PD,228DBG_SWITCH_ON_EOP,229DBG_NO_OUT_OF_ORDER,230DBG_NO_DPBB,231DBG_DPBB,232DBG_NO_HYPERZ,233DBG_NO_2D_TILING,234DBG_NO_TILING,235DBG_NO_DISPLAY_TILING,236DBG_NO_DISPLAY_DCC,237DBG_NO_DCC,238DBG_NO_DCC_CLEAR,239DBG_NO_DCC_FB,240DBG_NO_DCC_MSAA,241DBG_NO_FMASK,242243DBG_TMZ,244DBG_SQTT,245246DBG_COUNT247};248249enum250{251/* Tests: */252DBG_TEST_BLIT,253DBG_TEST_VMFAULT_CP,254DBG_TEST_VMFAULT_SHADER,255DBG_TEST_DMA_PERF,256DBG_TEST_GDS,257DBG_TEST_GDS_MM,258DBG_TEST_GDS_OA_MM,259};260261#define DBG_ALL_SHADERS (((1 << (DBG_CS + 1)) - 1))262#define DBG(name) (1ull << DBG_##name)263264enum si_cache_policy265{266L2_BYPASS,267L2_STREAM, /* same as SLC=1 */268L2_LRU, /* same as SLC=0 */269};270271enum si_coherency272{273SI_COHERENCY_NONE, /* no cache flushes needed */274SI_COHERENCY_SHADER,275SI_COHERENCY_CB_META,276SI_COHERENCY_DB_META,277SI_COHERENCY_CP,278};279280struct si_compute;281struct si_shader_context;282struct hash_table;283284/* Only 32-bit buffer allocations are supported, gallium doesn't support more285* at the moment.286*/287struct si_resource {288struct threaded_resource b;289290/* Winsys objects. */291struct pb_buffer *buf;292uint64_t gpu_address;293/* Memory usage if the buffer placement is optimal. */294uint32_t vram_usage_kb;295uint32_t gart_usage_kb;296297/* Resource properties. */298uint64_t bo_size;299uint8_t bo_alignment_log2;300enum radeon_bo_domain domains:8;301enum radeon_bo_flag flags:16;302unsigned bind_history;303int max_forced_staging_uploads;304305/* The buffer range which is initialized (with a write transfer,306* streamout, DMA, or as a random access target). The rest of307* the buffer is considered invalid and can be mapped unsynchronized.308*309* This allows unsynchronized mapping of a buffer range which hasn't310* been used yet. It's for applications which forget to use311* the unsynchronized map flag and expect the driver to figure it out.312*/313struct util_range valid_buffer_range;314315/* For buffers only. This indicates that a write operation has been316* performed by TC L2, but the cache hasn't been flushed.317* Any hw block which doesn't use or bypasses TC L2 should check this318* flag and flush the cache before using the buffer.319*320* For example, TC L2 must be flushed if a buffer which has been321* modified by a shader store instruction is about to be used as322* an index buffer. The reason is that VGT DMA index fetching doesn't323* use TC L2.324*/325bool TC_L2_dirty;326327/* Whether this resource is referenced by bindless handles. */328bool texture_handle_allocated;329bool image_handle_allocated;330331/* Whether the resource has been exported via resource_get_handle. */332uint8_t external_usage; /* PIPE_HANDLE_USAGE_* */333};334335struct si_transfer {336struct threaded_transfer b;337struct si_resource *staging;338};339340struct si_texture {341struct si_resource buffer;342343struct radeon_surf surface;344struct si_texture *flushed_depth_texture;345346/* One texture allocation can contain these buffers:347* - image (pixel data)348* - FMASK buffer (MSAA compression)349* - CMASK buffer (MSAA compression and/or legacy fast color clear)350* - HTILE buffer (Z/S compression and fast Z/S clear)351* - DCC buffer (color compression and new fast color clear)352* - displayable DCC buffer (if the DCC buffer is not displayable)353*/354uint64_t cmask_base_address_reg;355struct si_resource *cmask_buffer;356unsigned cb_color_info; /* fast clear enable bit */357unsigned color_clear_value[2];358unsigned last_msaa_resolve_target_micro_mode;359bool swap_rgb_to_bgr_on_next_clear;360bool swap_rgb_to_bgr;361unsigned num_level0_transfers;362unsigned plane_index; /* other planes are different pipe_resources */363unsigned num_planes;364365/* Depth buffer compression and fast clear. */366float depth_clear_value[RADEON_SURF_MAX_LEVELS];367uint8_t stencil_clear_value[RADEON_SURF_MAX_LEVELS];368uint16_t depth_cleared_level_mask_once; /* if it was cleared at least once */369uint16_t depth_cleared_level_mask; /* track if it was cleared (not 100% accurate) */370uint16_t stencil_cleared_level_mask; /* if it was cleared at least once */371uint16_t dirty_level_mask; /* each bit says if that mipmap is compressed */372uint16_t stencil_dirty_level_mask; /* each bit says if that mipmap is compressed */373enum pipe_format db_render_format : 16;374bool fmask_is_identity : 1;375bool tc_compatible_htile : 1;376bool enable_tc_compatible_htile_next_clear : 1;377bool htile_stencil_disabled : 1;378bool upgraded_depth : 1; /* upgraded from unorm to Z32_FLOAT */379bool is_depth : 1;380bool db_compatible : 1;381bool can_sample_z : 1;382bool can_sample_s : 1;383384/* We need to track DCC dirtiness, because st/dri usually calls385* flush_resource twice per frame (not a bug) and we don't wanna386* decompress DCC twice.387*/388bool displayable_dcc_dirty : 1;389390/* Counter that should be non-zero if the texture is bound to a391* framebuffer.392*/393unsigned framebuffers_bound;394};395396/* State trackers create separate textures in a next-chain for extra planes397* even if those are planes created purely for modifiers. Because the linking398* of the chain happens outside of the driver, and NULL is interpreted as399* failure, let's create some dummy texture structs. We could use these400* later to use the offsets for linking if we really wanted to.401*402* For now just create a dummy struct and completely ignore it.403*404* Potentially in the future we could store stride/offset and use it during405* creation, though we might want to change how linking is done first.406*/407struct si_auxiliary_texture {408struct threaded_resource b;409struct pb_buffer *buffer;410uint32_t offset;411uint32_t stride;412};413414struct si_surface {415struct pipe_surface base;416417/* These can vary with block-compressed textures. */418uint16_t width0;419uint16_t height0;420421bool color_initialized : 1;422bool depth_initialized : 1;423424/* Misc. color flags. */425bool color_is_int8 : 1;426bool color_is_int10 : 1;427bool dcc_incompatible : 1;428429/* Color registers. */430unsigned cb_color_info;431unsigned cb_color_view;432unsigned cb_color_attrib;433unsigned cb_color_attrib2; /* GFX9 and later */434unsigned cb_color_attrib3; /* GFX10 and later */435unsigned cb_dcc_control; /* GFX8 and later */436unsigned spi_shader_col_format : 8; /* no blending, no alpha-to-coverage. */437unsigned spi_shader_col_format_alpha : 8; /* alpha-to-coverage */438unsigned spi_shader_col_format_blend : 8; /* blending without alpha. */439unsigned spi_shader_col_format_blend_alpha : 8; /* blending with alpha. */440441/* DB registers. */442uint64_t db_depth_base; /* DB_Z_READ/WRITE_BASE */443uint64_t db_stencil_base;444uint64_t db_htile_data_base;445unsigned db_depth_info;446unsigned db_z_info;447unsigned db_z_info2; /* GFX9 only */448unsigned db_depth_view;449unsigned db_depth_size;450unsigned db_depth_slice;451unsigned db_stencil_info;452unsigned db_stencil_info2; /* GFX9 only */453unsigned db_htile_surface;454};455456struct si_mmio_counter {457unsigned busy;458unsigned idle;459};460461union si_mmio_counters {462struct si_mmio_counters_named {463/* For global GPU load including SDMA. */464struct si_mmio_counter gpu;465466/* GRBM_STATUS */467struct si_mmio_counter spi;468struct si_mmio_counter gui;469struct si_mmio_counter ta;470struct si_mmio_counter gds;471struct si_mmio_counter vgt;472struct si_mmio_counter ia;473struct si_mmio_counter sx;474struct si_mmio_counter wd;475struct si_mmio_counter bci;476struct si_mmio_counter sc;477struct si_mmio_counter pa;478struct si_mmio_counter db;479struct si_mmio_counter cp;480struct si_mmio_counter cb;481482/* SRBM_STATUS2 */483struct si_mmio_counter sdma;484485/* CP_STAT */486struct si_mmio_counter pfp;487struct si_mmio_counter meq;488struct si_mmio_counter me;489struct si_mmio_counter surf_sync;490struct si_mmio_counter cp_dma;491struct si_mmio_counter scratch_ram;492} named;493494unsigned array[sizeof(struct si_mmio_counters_named) / sizeof(unsigned)];495};496497struct si_memory_object {498struct pipe_memory_object b;499struct pb_buffer *buf;500uint32_t stride;501};502503/* Saved CS data for debugging features. */504struct radeon_saved_cs {505uint32_t *ib;506unsigned num_dw;507508struct radeon_bo_list_item *bo_list;509unsigned bo_count;510};511512struct si_screen {513struct pipe_screen b;514struct radeon_winsys *ws;515struct disk_cache *disk_shader_cache;516517struct radeon_info info;518struct nir_shader_compiler_options nir_options;519uint64_t debug_flags;520char renderer_string[183];521522void (*make_texture_descriptor)(struct si_screen *screen, struct si_texture *tex, bool sampler,523enum pipe_texture_target target, enum pipe_format pipe_format,524const unsigned char state_swizzle[4], unsigned first_level,525unsigned last_level, unsigned first_layer, unsigned last_layer,526unsigned width, unsigned height, unsigned depth, uint32_t *state,527uint32_t *fmask_state);528529unsigned num_vbos_in_user_sgprs;530unsigned pa_sc_raster_config;531unsigned pa_sc_raster_config_1;532unsigned se_tile_repeat;533unsigned gs_table_depth;534unsigned tess_offchip_block_dw_size;535unsigned tess_offchip_ring_size;536unsigned tess_factor_ring_size;537unsigned vgt_hs_offchip_param;538unsigned eqaa_force_coverage_samples;539unsigned eqaa_force_z_samples;540unsigned eqaa_force_color_samples;541unsigned pbb_context_states_per_bin;542unsigned pbb_persistent_states_per_bin;543bool has_draw_indirect_multi;544bool has_out_of_order_rast;545bool assume_no_z_fights;546bool commutative_blend_add;547bool allow_draw_out_of_order;548bool dpbb_allowed;549bool use_ngg;550bool use_ngg_culling;551bool use_ngg_streamout;552bool allow_dcc_msaa_clear_to_reg_for_bpp[5]; /* indexed by log2(Bpp) */553554struct {555#define OPT_BOOL(name, dflt, description) bool name : 1;556#include "si_debug_options.h"557} options;558559/* Whether shaders are monolithic (1-part) or separate (3-part). */560bool use_monolithic_shaders;561bool record_llvm_ir;562563struct slab_parent_pool pool_transfers;564565/* Texture filter settings. */566int force_aniso; /* -1 = disabled */567568/* Auxiliary context. Mainly used to initialize resources.569* It must be locked prior to using and flushed before unlocking. */570struct pipe_context *aux_context;571simple_mtx_t aux_context_lock;572573/* This must be in the screen, because UE4 uses one context for574* compilation and another one for rendering.575*/576unsigned num_compilations;577/* Along with ST_DEBUG=precompile, this should show if applications578* are loading shaders on demand. This is a monotonic counter.579*/580unsigned num_shaders_created;581unsigned num_memory_shader_cache_hits;582unsigned num_memory_shader_cache_misses;583unsigned num_disk_shader_cache_hits;584unsigned num_disk_shader_cache_misses;585586/* GPU load thread. */587simple_mtx_t gpu_load_mutex;588thrd_t gpu_load_thread;589union si_mmio_counters mmio_counters;590volatile unsigned gpu_load_stop_thread; /* bool */591592/* Performance counters. */593struct si_perfcounters *perfcounters;594595/* If pipe_screen wants to recompute and re-emit the framebuffer,596* sampler, and image states of all contexts, it should atomically597* increment this.598*599* Each context will compare this with its own last known value of600* the counter before drawing and re-emit the states accordingly.601*/602unsigned dirty_tex_counter;603unsigned dirty_buf_counter;604605/* Atomically increment this counter when an existing texture's606* metadata is enabled or disabled in a way that requires changing607* contexts' compressed texture binding masks.608*/609unsigned compressed_colortex_counter;610611struct {612/* Context flags to set so that all writes from earlier jobs613* in the CP are seen by L2 clients.614*/615unsigned cp_to_L2;616617/* Context flags to set so that all writes from earlier jobs618* that end in L2 are seen by CP.619*/620unsigned L2_to_cp;621} barrier_flags;622623simple_mtx_t shader_parts_mutex;624struct si_shader_part *vs_prologs;625struct si_shader_part *tcs_epilogs;626struct si_shader_part *gs_prologs;627struct si_shader_part *ps_prologs;628struct si_shader_part *ps_epilogs;629630/* Shader cache in memory.631*632* Design & limitations:633* - The shader cache is per screen (= per process), never saved to634* disk, and skips redundant shader compilations from NIR to bytecode.635* - It can only be used with one-variant-per-shader support, in which636* case only the main (typically middle) part of shaders is cached.637* - Only VS, TCS, TES, PS are cached, out of which only the hw VS638* variants of VS and TES are cached, so LS and ES aren't.639* - GS and CS aren't cached, but it's certainly possible to cache640* those as well.641*/642simple_mtx_t shader_cache_mutex;643struct hash_table *shader_cache;644/* Maximum and current size */645uint32_t shader_cache_size;646uint32_t shader_cache_max_size;647648/* Shader cache of live shaders. */649struct util_live_shader_cache live_shader_cache;650651/* Shader compiler queue for multithreaded compilation. */652struct util_queue shader_compiler_queue;653/* Use at most 3 normal compiler threads on quadcore and better.654* Hyperthreaded CPUs report the number of threads, but we want655* the number of cores. We only need this many threads for shader-db. */656struct ac_llvm_compiler compiler[24]; /* used by the queue only */657658struct util_queue shader_compiler_queue_low_priority;659/* Use at most 2 low priority threads on quadcore and better.660* We want to minimize the impact on multithreaded Mesa. */661struct ac_llvm_compiler compiler_lowp[10];662663unsigned compute_wave_size;664unsigned ps_wave_size;665unsigned ge_wave_size;666unsigned ngg_subgroup_size;667668struct util_idalloc_mt buffer_ids;669};670671struct si_sampler_view {672struct pipe_sampler_view base;673/* [0..7] = image descriptor674* [4..7] = buffer descriptor */675uint32_t state[8];676uint32_t fmask_state[8];677const struct legacy_surf_level *base_level_info;678ubyte base_level;679ubyte block_width;680bool is_stencil_sampler;681bool dcc_incompatible;682};683684#define SI_SAMPLER_STATE_MAGIC 0x34f1c35a685686struct si_sampler_state {687#ifndef NDEBUG688unsigned magic;689#endif690uint32_t val[4];691uint32_t upgraded_depth_val[4];692};693694struct si_cs_shader_state {695struct si_compute *program;696struct si_compute *emitted_program;697unsigned offset;698bool initialized;699bool uses_scratch;700};701702struct si_samplers {703struct pipe_sampler_view *views[SI_NUM_SAMPLERS];704struct si_sampler_state *sampler_states[SI_NUM_SAMPLERS];705706/* The i-th bit is set if that element is enabled (non-NULL resource). */707unsigned enabled_mask;708uint32_t needs_depth_decompress_mask;709uint32_t needs_color_decompress_mask;710};711712struct si_images {713struct pipe_image_view views[SI_NUM_IMAGES];714uint32_t needs_color_decompress_mask;715unsigned enabled_mask;716unsigned display_dcc_store_mask;717};718719struct si_framebuffer {720struct pipe_framebuffer_state state;721unsigned colorbuf_enabled_4bit;722unsigned spi_shader_col_format;723unsigned spi_shader_col_format_alpha;724unsigned spi_shader_col_format_blend;725unsigned spi_shader_col_format_blend_alpha;726ubyte nr_samples : 5; /* at most 16xAA */727ubyte log_samples : 3; /* at most 4 = 16xAA */728ubyte nr_color_samples; /* at most 8xAA */729ubyte compressed_cb_mask;730ubyte uncompressed_cb_mask;731ubyte color_is_int8;732ubyte color_is_int10;733ubyte dirty_cbufs;734ubyte dcc_overwrite_combiner_watermark;735ubyte min_bytes_per_pixel;736bool dirty_zsbuf;737bool any_dst_linear;738bool CB_has_shader_readable_metadata;739bool DB_has_shader_readable_metadata;740bool all_DCC_pipe_aligned;741bool has_dcc_msaa;742};743744enum si_quant_mode745{746/* This is the list we want to support. */747SI_QUANT_MODE_16_8_FIXED_POINT_1_256TH,748SI_QUANT_MODE_14_10_FIXED_POINT_1_1024TH,749SI_QUANT_MODE_12_12_FIXED_POINT_1_4096TH,750};751752struct si_signed_scissor {753int minx;754int miny;755int maxx;756int maxy;757enum si_quant_mode quant_mode;758};759760struct si_viewports {761struct pipe_viewport_state states[SI_MAX_VIEWPORTS];762struct si_signed_scissor as_scissor[SI_MAX_VIEWPORTS];763};764765struct si_streamout_target {766struct pipe_stream_output_target b;767768/* The buffer where BUFFER_FILLED_SIZE is stored. */769struct si_resource *buf_filled_size;770unsigned buf_filled_size_offset;771bool buf_filled_size_valid;772773unsigned stride_in_dw;774};775776struct si_streamout {777bool begin_emitted;778779unsigned enabled_mask;780unsigned num_targets;781struct si_streamout_target *targets[PIPE_MAX_SO_BUFFERS];782783unsigned append_bitmask;784bool suspended;785786/* External state which comes from the vertex shader,787* it must be set explicitly when binding a shader. */788uint16_t *stride_in_dw;789unsigned enabled_stream_buffers_mask; /* stream0 buffers0-3 in 4 LSB */790791/* The state of VGT_STRMOUT_BUFFER_(CONFIG|EN). */792unsigned hw_enabled_mask;793794/* The state of VGT_STRMOUT_(CONFIG|EN). */795bool streamout_enabled;796bool prims_gen_query_enabled;797int num_prims_gen_queries;798};799800/* A shader state consists of the shader selector, which is a constant state801* object shared by multiple contexts and shouldn't be modified, and802* the current shader variant selected for this context.803*/804struct si_shader_ctx_state {805struct si_shader_selector *cso;806struct si_shader *current;807};808809#define SI_NUM_VGT_PARAM_KEY_BITS 12810#define SI_NUM_VGT_PARAM_STATES (1 << SI_NUM_VGT_PARAM_KEY_BITS)811812/* The IA_MULTI_VGT_PARAM key used to index the table of precomputed values.813* Some fields are set by state-change calls, most are set by draw_vbo.814*/815union si_vgt_param_key {816struct {817#if UTIL_ARCH_LITTLE_ENDIAN818uint16_t prim : 4;819uint16_t uses_instancing : 1;820uint16_t multi_instances_smaller_than_primgroup : 1;821uint16_t primitive_restart : 1;822uint16_t count_from_stream_output : 1;823uint16_t line_stipple_enabled : 1;824uint16_t uses_tess : 1;825uint16_t tess_uses_prim_id : 1;826uint16_t uses_gs : 1;827uint16_t _pad : 16 - SI_NUM_VGT_PARAM_KEY_BITS;828#else /* UTIL_ARCH_BIG_ENDIAN */829uint16_t _pad : 16 - SI_NUM_VGT_PARAM_KEY_BITS;830uint16_t uses_gs : 1;831uint16_t tess_uses_prim_id : 1;832uint16_t uses_tess : 1;833uint16_t line_stipple_enabled : 1;834uint16_t count_from_stream_output : 1;835uint16_t primitive_restart : 1;836uint16_t multi_instances_smaller_than_primgroup : 1;837uint16_t uses_instancing : 1;838uint16_t prim : 4;839#endif840} u;841uint16_t index;842};843844#define SI_NUM_VGT_STAGES_KEY_BITS 6845#define SI_NUM_VGT_STAGES_STATES (1 << SI_NUM_VGT_STAGES_KEY_BITS)846847/* The VGT_SHADER_STAGES key used to index the table of precomputed values.848* Some fields are set by state-change calls, most are set by draw_vbo.849*/850union si_vgt_stages_key {851struct {852#if UTIL_ARCH_LITTLE_ENDIAN853uint8_t tess : 1;854uint8_t gs : 1;855uint8_t ngg_gs_fast_launch : 1;856uint8_t ngg_passthrough : 1;857uint8_t ngg : 1; /* gfx10+ */858uint8_t streamout : 1; /* only used with NGG */859uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS;860#else /* UTIL_ARCH_BIG_ENDIAN */861uint8_t _pad : 8 - SI_NUM_VGT_STAGES_KEY_BITS;862uint8_t streamout : 1;863uint8_t ngg : 1;864uint8_t ngg_passthrough : 1;865uint8_t ngg_gs_fast_launch : 1;866uint8_t gs : 1;867uint8_t tess : 1;868#endif869} u;870uint8_t index;871};872873struct si_texture_handle {874unsigned desc_slot;875bool desc_dirty;876struct pipe_sampler_view *view;877struct si_sampler_state sstate;878};879880struct si_image_handle {881unsigned desc_slot;882bool desc_dirty;883struct pipe_image_view view;884};885886struct si_saved_cs {887struct pipe_reference reference;888struct si_context *ctx;889struct radeon_saved_cs gfx;890struct radeon_saved_cs compute;891struct si_resource *trace_buf;892unsigned trace_id;893894unsigned gfx_last_dw;895unsigned compute_last_dw;896bool flushed;897int64_t time_flush;898};899900struct si_small_prim_cull_info {901float scale[2], translate[2];902float small_prim_precision;903};904905typedef void (*pipe_draw_vbo_func)(struct pipe_context *pipe,906const struct pipe_draw_info *info,907unsigned drawid_offset,908const struct pipe_draw_indirect_info *indirect,909const struct pipe_draw_start_count_bias *draws,910unsigned num_draws);911912struct si_context {913struct pipe_context b; /* base class */914915enum radeon_family family;916enum chip_class chip_class;917918struct radeon_winsys *ws;919struct radeon_winsys_ctx *ctx;920struct radeon_cmdbuf gfx_cs; /* compute IB if graphics is disabled */921struct pipe_fence_handle *last_gfx_fence;922struct si_resource *eop_bug_scratch;923struct si_resource *eop_bug_scratch_tmz;924struct u_upload_mgr *cached_gtt_allocator;925struct threaded_context *tc;926struct u_suballocator allocator_zeroed_memory;927struct slab_child_pool pool_transfers;928struct slab_child_pool pool_transfers_unsync; /* for threaded_context */929struct pipe_device_reset_callback device_reset_callback;930struct u_log_context *log;931void *query_result_shader;932void *sh_query_result_shader;933struct si_resource *shadowed_regs;934935void (*emit_cache_flush)(struct si_context *ctx, struct radeon_cmdbuf *cs);936937struct blitter_context *blitter;938void *noop_blend;939void *noop_dsa;940void *no_velems_state;941void *discard_rasterizer_state;942void *custom_dsa_flush;943void *custom_blend_resolve;944void *custom_blend_fmask_decompress;945void *custom_blend_eliminate_fastclear;946void *custom_blend_dcc_decompress;947void *vs_blit_pos;948void *vs_blit_pos_layered;949void *vs_blit_color;950void *vs_blit_color_layered;951void *vs_blit_texcoord;952void *cs_clear_buffer;953void *cs_clear_buffer_rmw;954void *cs_copy_buffer;955void *cs_copy_image;956void *cs_copy_image_1d_array;957void *cs_clear_render_target;958void *cs_clear_render_target_1d_array;959void *cs_clear_12bytes_buffer;960void *cs_dcc_decompress;961void *cs_dcc_retile;962void *cs_fmask_expand[3][2]; /* [log2(samples)-1][is_array] */963struct si_screen *screen;964struct pipe_debug_callback debug;965struct ac_llvm_compiler compiler; /* only non-threaded compilation */966struct si_shader_ctx_state fixed_func_tcs_shader;967/* Offset 0: EOP flush number; Offset 4: GDS prim restart counter */968struct si_resource *wait_mem_scratch;969struct si_resource *wait_mem_scratch_tmz;970unsigned wait_mem_number;971uint16_t prefetch_L2_mask;972973bool blitter_running;974bool is_noop:1;975bool has_graphics:1;976bool gfx_flush_in_progress : 1;977bool gfx_last_ib_is_busy : 1;978bool compute_is_busy : 1;979int8_t pipeline_stats_enabled; /* -1 = unknown, 0 = disabled, 1 = enabled */980981unsigned num_gfx_cs_flushes;982unsigned initial_gfx_cs_size;983unsigned last_dirty_tex_counter;984unsigned last_dirty_buf_counter;985unsigned last_compressed_colortex_counter;986unsigned last_num_draw_calls;987unsigned flags; /* flush flags */988/* Current unaccounted memory usage. */989uint32_t vram_kb;990uint32_t gtt_kb;991992/* NGG streamout. */993struct pb_buffer *gds;994struct pb_buffer *gds_oa;995/* Compute-based primitive discard. */996unsigned prim_discard_vertex_count_threshold;997struct radeon_cmdbuf prim_discard_compute_cs;998struct si_shader *compute_ib_last_shader;999uint32_t compute_rewind_va;1000unsigned compute_num_prims_in_batch;1001/* index_ring is divided into 2 halves for doublebuffering. */1002struct si_resource *index_ring;1003unsigned index_ring_base; /* offset of a per-IB portion */1004unsigned index_ring_offset; /* offset within a per-IB portion */1005unsigned index_ring_size_per_ib; /* max available size per IB */1006bool prim_discard_compute_ib_initialized;1007/* For tracking the last execution barrier - it can be either1008* a WRITE_DATA packet or a fence. */1009uint32_t *last_pkt3_write_data;1010struct si_resource *barrier_buf;1011unsigned barrier_buf_offset;1012struct pipe_fence_handle *last_ib_barrier_fence;1013struct si_resource *last_ib_barrier_buf;1014unsigned last_ib_barrier_buf_offset;10151016/* Atoms (direct states). */1017union si_state_atoms atoms;1018unsigned dirty_atoms; /* mask */1019/* PM4 states (precomputed immutable states) */1020unsigned dirty_states;1021union si_state queued;1022union si_state emitted;10231024/* Atom declarations. */1025struct si_framebuffer framebuffer;1026unsigned sample_locs_num_samples;1027uint16_t sample_mask;1028unsigned last_cb_target_mask;1029struct pipe_blend_color blend_color;1030struct pipe_clip_state clip_state;1031struct si_shader_data shader_pointers;1032struct si_stencil_ref stencil_ref;1033bool blend_color_any_nonzeros:1;1034bool clip_state_any_nonzeros:1;1035bool viewport0_y_inverted;1036struct pipe_scissor_state scissors[SI_MAX_VIEWPORTS];1037struct si_streamout streamout;1038struct si_viewports viewports;1039unsigned num_window_rectangles;1040bool window_rectangles_include;1041struct pipe_scissor_state window_rectangles[4];10421043/* Precomputed states. */1044struct si_pm4_state *cs_preamble_state;1045struct si_pm4_state *cs_preamble_tess_rings;1046struct si_pm4_state *cs_preamble_tess_rings_tmz;1047struct si_pm4_state *cs_preamble_gs_rings;1048bool cs_preamble_has_vgt_flush;1049struct si_pm4_state *vgt_shader_config[SI_NUM_VGT_STAGES_STATES];10501051/* shaders */1052union {1053struct {1054struct si_shader_ctx_state vs;1055struct si_shader_ctx_state ps;1056struct si_shader_ctx_state gs;1057struct si_shader_ctx_state tcs;1058struct si_shader_ctx_state tes;1059} shader;1060/* indexed access using pipe_shader_type (not by MESA_SHADER_*) */1061struct si_shader_ctx_state shaders[SI_NUM_GRAPHICS_SHADERS];1062};1063struct si_shader_ctx_state cs_prim_discard_state;1064struct si_cs_shader_state cs_shader_state;10651066/* shader information */1067struct si_vertex_elements *vertex_elements;1068unsigned num_vertex_elements;1069unsigned sprite_coord_enable;1070unsigned cs_max_waves_per_sh;1071bool flatshade;1072bool do_update_shaders;1073bool compute_shaderbuf_sgprs_dirty;1074bool compute_image_sgprs_dirty;1075bool vs_uses_base_instance;1076bool vs_uses_draw_id;10771078/* shader descriptors */1079struct si_descriptors descriptors[SI_NUM_DESCS];1080unsigned descriptors_dirty;1081unsigned shader_pointers_dirty;1082unsigned shader_needs_decompress_mask;1083unsigned inlinable_uniforms_valid_mask;1084uint32_t inlinable_uniforms[SI_NUM_SHADERS][MAX_INLINABLE_UNIFORMS];1085struct si_buffer_resources internal_bindings;1086struct si_buffer_resources const_and_shader_buffers[SI_NUM_SHADERS];1087struct si_samplers samplers[SI_NUM_SHADERS];1088struct si_images images[SI_NUM_SHADERS];1089bool bo_list_add_all_resident_resources;1090bool bo_list_add_all_gfx_resources;1091bool bo_list_add_all_compute_resources;10921093/* other shader resources */1094struct pipe_constant_buffer null_const_buf; /* used for set_constant_buffer(NULL) on GFX7 */1095struct pipe_resource *esgs_ring;1096struct pipe_resource *gsvs_ring;1097struct pipe_resource *tess_rings;1098struct pipe_resource *tess_rings_tmz;1099union pipe_color_union *border_color_table; /* in CPU memory, any endian */1100struct si_resource *border_color_buffer;1101union pipe_color_union *border_color_map; /* in VRAM (slow access), little endian */1102unsigned border_color_count;1103unsigned num_vs_blit_sgprs;1104uint32_t vs_blit_sh_data[SI_VS_BLIT_SGPRS_POS_TEXCOORD];1105uint32_t cs_user_data[4];11061107/* Vertex buffers. */1108bool vertex_buffers_dirty;1109bool vertex_buffer_pointer_dirty;1110bool vertex_buffer_user_sgprs_dirty;1111struct pipe_vertex_buffer vertex_buffer[SI_NUM_VERTEX_BUFFERS];1112uint16_t vertex_buffer_unaligned; /* bitmask of not dword-aligned buffers */1113uint32_t *vb_descriptors_gpu_list;1114struct si_resource *vb_descriptors_buffer;1115unsigned vb_descriptors_offset;1116unsigned vb_descriptor_user_sgprs[5 * 4];11171118/* MSAA config state. */1119int ps_iter_samples;1120bool ps_uses_fbfetch;1121bool smoothing_enabled;11221123/* DB render state. */1124unsigned ps_db_shader_control;1125unsigned dbcb_copy_sample;1126bool dbcb_depth_copy_enabled : 1;1127bool dbcb_stencil_copy_enabled : 1;1128bool db_flush_depth_inplace : 1;1129bool db_flush_stencil_inplace : 1;1130bool db_depth_clear : 1;1131bool db_depth_disable_expclear : 1;1132bool db_stencil_clear : 1;1133bool db_stencil_disable_expclear : 1;1134bool occlusion_queries_disabled : 1;1135bool generate_mipmap_for_depth : 1;1136bool allow_flat_shading : 1;11371138/* Emitted draw state. */1139bool gs_tri_strip_adj_fix : 1;1140bool ls_vgpr_fix : 1;1141bool prim_discard_cs_instancing : 1;1142bool ngg : 1;1143bool same_patch_vertices : 1;1144uint8_t ngg_culling;1145unsigned last_index_size;1146int last_base_vertex;1147unsigned last_start_instance;1148unsigned last_instance_count;1149unsigned last_drawid;1150unsigned last_sh_base_reg;1151int last_primitive_restart_en;1152unsigned last_restart_index;1153unsigned last_prim;1154unsigned last_multi_vgt_param;1155unsigned last_gs_out_prim;1156int last_binning_enabled;1157unsigned current_vs_state;1158unsigned last_vs_state;1159enum pipe_prim_type current_rast_prim; /* primitive type after TES, GS */11601161struct si_small_prim_cull_info last_small_prim_cull_info;1162struct si_resource *small_prim_cull_info_buf;1163uint64_t small_prim_cull_info_address;11641165/* Scratch buffer */1166struct si_resource *scratch_buffer;1167unsigned scratch_waves;1168unsigned spi_tmpring_size;1169unsigned max_seen_scratch_bytes_per_wave;1170unsigned max_seen_compute_scratch_bytes_per_wave;11711172struct si_resource *compute_scratch_buffer;11731174/* Emitted derived tessellation state. */1175/* Local shader (VS), or HS if LS-HS are merged. */1176struct si_shader *last_ls;1177struct si_shader_selector *last_tcs;1178unsigned last_num_tcs_input_cp;1179unsigned last_tes_sh_base;1180bool last_tess_uses_primid;1181unsigned last_num_patches;1182unsigned last_ls_hs_config;11831184/* Debug state. */1185bool is_debug;1186struct si_saved_cs *current_saved_cs;1187uint64_t dmesg_timestamp;1188unsigned apitrace_call_number;11891190/* Other state */1191bool need_check_render_feedback;1192bool decompression_enabled;1193bool dpbb_force_off;1194bool vs_writes_viewport_index;1195bool vs_disables_clipping_viewport;11961197/* Precomputed IA_MULTI_VGT_PARAM */1198union si_vgt_param_key ia_multi_vgt_param_key;1199unsigned ia_multi_vgt_param[SI_NUM_VGT_PARAM_STATES];12001201/* Bindless descriptors. */1202struct si_descriptors bindless_descriptors;1203struct util_idalloc bindless_used_slots;1204unsigned num_bindless_descriptors;1205bool bindless_descriptors_dirty;1206bool graphics_bindless_pointer_dirty;1207bool compute_bindless_pointer_dirty;12081209/* Allocated bindless handles */1210struct hash_table *tex_handles;1211struct hash_table *img_handles;12121213/* Resident bindless handles */1214struct util_dynarray resident_tex_handles;1215struct util_dynarray resident_img_handles;12161217/* Resident bindless handles which need decompression */1218struct util_dynarray resident_tex_needs_color_decompress;1219struct util_dynarray resident_img_needs_color_decompress;1220struct util_dynarray resident_tex_needs_depth_decompress;12211222/* Bindless state */1223bool uses_bindless_samplers;1224bool uses_bindless_images;12251226/* MSAA sample locations.1227* The first index is the sample index.1228* The second index is the coordinate: X, Y. */1229struct {1230float x1[1][2];1231float x2[2][2];1232float x4[4][2];1233float x8[8][2];1234float x16[16][2];1235} sample_positions;1236struct pipe_resource *sample_pos_buffer;12371238/* Misc stats. */1239unsigned num_draw_calls;1240unsigned num_decompress_calls;1241unsigned num_prim_restart_calls;1242unsigned num_compute_calls;1243unsigned num_cp_dma_calls;1244unsigned num_vs_flushes;1245unsigned num_ps_flushes;1246unsigned num_cs_flushes;1247unsigned num_cb_cache_flushes;1248unsigned num_db_cache_flushes;1249unsigned num_L2_invalidates;1250unsigned num_L2_writebacks;1251unsigned num_resident_handles;1252uint64_t num_alloc_tex_transfer_bytes;1253unsigned last_tex_ps_draw_ratio; /* for query */1254unsigned compute_num_verts_accepted;1255unsigned compute_num_verts_rejected;1256unsigned compute_num_verts_ineligible; /* due to low vertex count */1257unsigned context_roll;12581259/* Queries. */1260/* Maintain the list of active queries for pausing between IBs. */1261int num_occlusion_queries;1262int num_perfect_occlusion_queries;1263int num_pipeline_stat_queries;1264struct list_head active_queries;1265unsigned num_cs_dw_queries_suspend;12661267/* Render condition. */1268struct pipe_query *render_cond;1269unsigned render_cond_mode;1270bool render_cond_invert;1271bool render_cond_enabled; /* for u_blitter */12721273/* Shader-based queries. */1274struct list_head shader_query_buffers;1275unsigned num_active_shader_queries;12761277bool force_cb_shader_coherent;12781279struct si_tracked_regs tracked_regs;12801281/* Resources that need to be flushed, but will not get an explicit1282* flush_resource from the frontend and that will need to get flushed during1283* a context flush.1284*/1285struct hash_table *dirty_implicit_resources;12861287pipe_draw_vbo_func draw_vbo[2][2][2][2];1288/* When b.draw_vbo is a wrapper, real_draw_vbo is the real draw_vbo function */1289pipe_draw_vbo_func real_draw_vbo;12901291/* SQTT */1292struct ac_thread_trace_data *thread_trace;1293struct pipe_fence_handle *last_sqtt_fence;1294enum rgp_sqtt_marker_event_type sqtt_next_event;1295bool thread_trace_enabled;12961297unsigned context_flags;12981299/* Shaders. */1300/* TODO: move other shaders here too */1301/* Only used for DCC MSAA clears with 4-8 fragments and 4-16 samples. */1302void *cs_clear_dcc_msaa[32][5][2][3][2]; /* [swizzle_mode][log2(bpe)][fragments == 8][log2(samples)-2][is_array] */1303};13041305/* si_blit.c */1306enum si_blitter_op /* bitmask */1307{1308SI_SAVE_TEXTURES = 1,1309SI_SAVE_FRAMEBUFFER = 2,1310SI_SAVE_FRAGMENT_STATE = 4,1311SI_DISABLE_RENDER_COND = 8,1312};13131314void si_blitter_begin(struct si_context *sctx, enum si_blitter_op op);1315void si_blitter_end(struct si_context *sctx);1316void si_init_blit_functions(struct si_context *sctx);1317void si_decompress_textures(struct si_context *sctx, unsigned shader_mask);1318void si_decompress_subresource(struct pipe_context *ctx, struct pipe_resource *tex, unsigned planes,1319unsigned level, unsigned first_layer, unsigned last_layer);1320void si_resource_copy_region(struct pipe_context *ctx, struct pipe_resource *dst,1321unsigned dst_level, unsigned dstx, unsigned dsty, unsigned dstz,1322struct pipe_resource *src, unsigned src_level,1323const struct pipe_box *src_box);1324void si_decompress_dcc(struct si_context *sctx, struct si_texture *tex);1325void si_flush_implicit_resources(struct si_context *sctx);13261327/* si_nir_optim.c */1328bool si_nir_is_output_const_if_tex_is_const(nir_shader *shader, float *in, float *out, int *texunit);13291330/* si_buffer.c */1331bool si_cs_is_buffer_referenced(struct si_context *sctx, struct pb_buffer *buf,1332enum radeon_bo_usage usage);1333void *si_buffer_map(struct si_context *sctx, struct si_resource *resource,1334unsigned usage);1335void si_init_resource_fields(struct si_screen *sscreen, struct si_resource *res, uint64_t size,1336unsigned alignment);1337bool si_alloc_resource(struct si_screen *sscreen, struct si_resource *res);1338struct pipe_resource *pipe_aligned_buffer_create(struct pipe_screen *screen, unsigned flags,1339unsigned usage, unsigned size, unsigned alignment);1340struct si_resource *si_aligned_buffer_create(struct pipe_screen *screen, unsigned flags,1341unsigned usage, unsigned size, unsigned alignment);1342void si_replace_buffer_storage(struct pipe_context *ctx, struct pipe_resource *dst,1343struct pipe_resource *src, unsigned num_rebinds,1344uint32_t rebind_mask, uint32_t delete_buffer_id);1345void si_init_screen_buffer_functions(struct si_screen *sscreen);1346void si_init_buffer_functions(struct si_context *sctx);13471348/* si_clear.c */1349#define SI_CLEAR_TYPE_CMASK (1 << 0)1350#define SI_CLEAR_TYPE_DCC (1 << 1)1351#define SI_CLEAR_TYPE_HTILE (1 << 2)13521353struct si_clear_info {1354struct pipe_resource *resource;1355uint64_t offset;1356uint32_t size;1357uint32_t clear_value;1358uint32_t writemask;1359bool is_dcc_msaa; /* Clear it as a DCC MSAA image. */1360};13611362enum pipe_format si_simplify_cb_format(enum pipe_format format);1363bool vi_alpha_is_on_msb(struct si_screen *sscreen, enum pipe_format format);1364bool vi_dcc_get_clear_info(struct si_context *sctx, struct si_texture *tex, unsigned level,1365unsigned clear_value, struct si_clear_info *out);1366void si_init_buffer_clear(struct si_clear_info *info,1367struct pipe_resource *resource, uint64_t offset,1368uint32_t size, uint32_t clear_value);1369void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,1370unsigned num_clears, unsigned types);1371void si_init_clear_functions(struct si_context *sctx);13721373/* si_compute_blit.c */1374#define SI_OP_SYNC_CS_BEFORE (1 << 0)1375#define SI_OP_SYNC_PS_BEFORE (1 << 1)1376#define SI_OP_SYNC_CPDMA_BEFORE (1 << 2) /* only affects CP DMA calls */1377#define SI_OP_SYNC_BEFORE (SI_OP_SYNC_CS_BEFORE | SI_OP_SYNC_PS_BEFORE | SI_OP_SYNC_CPDMA_BEFORE)1378#define SI_OP_SYNC_AFTER (1 << 3)1379#define SI_OP_SYNC_BEFORE_AFTER (SI_OP_SYNC_BEFORE | SI_OP_SYNC_AFTER)1380#define SI_OP_SKIP_CACHE_INV_BEFORE (1 << 4) /* don't invalidate caches */1381#define SI_OP_CS_IMAGE (1 << 5)1382#define SI_OP_CS_RENDER_COND_ENABLE (1 << 6)1383#define SI_OP_CPDMA_SKIP_CHECK_CS_SPACE (1 << 7) /* don't call need_cs_space */13841385unsigned si_get_flush_flags(struct si_context *sctx, enum si_coherency coher,1386enum si_cache_policy cache_policy);1387void si_launch_grid_internal(struct si_context *sctx, struct pipe_grid_info *info,1388void *shader, unsigned flags);1389void si_launch_grid_internal_ssbos(struct si_context *sctx, struct pipe_grid_info *info,1390void *shader, unsigned flags, enum si_coherency coher,1391unsigned num_buffers, const struct pipe_shader_buffer *buffers,1392unsigned writeable_bitmask);1393enum si_clear_method {1394SI_CP_DMA_CLEAR_METHOD,1395SI_COMPUTE_CLEAR_METHOD,1396SI_AUTO_SELECT_CLEAR_METHOD1397};1398void si_clear_buffer(struct si_context *sctx, struct pipe_resource *dst,1399uint64_t offset, uint64_t size, uint32_t *clear_value,1400uint32_t clear_value_size, unsigned flags,1401enum si_coherency coher, enum si_clear_method method);1402void si_compute_clear_buffer_rmw(struct si_context *sctx, struct pipe_resource *dst,1403unsigned dst_offset, unsigned size,1404uint32_t clear_value, uint32_t writebitmask,1405unsigned flags, enum si_coherency coher);1406void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst, uint64_t offset,1407uint64_t size, unsigned value, unsigned flags);1408void si_copy_buffer(struct si_context *sctx, struct pipe_resource *dst, struct pipe_resource *src,1409uint64_t dst_offset, uint64_t src_offset, unsigned size, unsigned flags);1410void si_compute_copy_image(struct si_context *sctx, struct pipe_resource *dst, unsigned dst_level,1411struct pipe_resource *src, unsigned src_level, unsigned dstx,1412unsigned dsty, unsigned dstz, const struct pipe_box *src_box,1413bool is_dcc_decompress, unsigned flags);1414void si_compute_clear_render_target(struct pipe_context *ctx, struct pipe_surface *dstsurf,1415const union pipe_color_union *color, unsigned dstx,1416unsigned dsty, unsigned width, unsigned height,1417bool render_condition_enabled);1418void si_retile_dcc(struct si_context *sctx, struct si_texture *tex);1419void gfx9_clear_dcc_msaa(struct si_context *sctx, struct pipe_resource *res, uint32_t clear_value,1420unsigned flags, enum si_coherency coher);1421void si_compute_expand_fmask(struct pipe_context *ctx, struct pipe_resource *tex);1422void si_init_compute_blit_functions(struct si_context *sctx);14231424/* si_cp_dma.c */1425void si_cp_dma_wait_for_idle(struct si_context *sctx, struct radeon_cmdbuf *cs);1426void si_cp_dma_clear_buffer(struct si_context *sctx, struct radeon_cmdbuf *cs,1427struct pipe_resource *dst, uint64_t offset, uint64_t size,1428unsigned value, unsigned user_flags, enum si_coherency coher,1429enum si_cache_policy cache_policy);1430void si_cp_dma_copy_buffer(struct si_context *sctx, struct pipe_resource *dst,1431struct pipe_resource *src, uint64_t dst_offset, uint64_t src_offset,1432unsigned size, unsigned user_flags, enum si_coherency coher,1433enum si_cache_policy cache_policy);1434void si_cp_dma_prefetch(struct si_context *sctx, struct pipe_resource *buf,1435unsigned offset, unsigned size);1436void si_test_gds(struct si_context *sctx);1437void si_cp_write_data(struct si_context *sctx, struct si_resource *buf, unsigned offset,1438unsigned size, unsigned dst_sel, unsigned engine, const void *data);1439void si_cp_copy_data(struct si_context *sctx, struct radeon_cmdbuf *cs, unsigned dst_sel,1440struct si_resource *dst, unsigned dst_offset, unsigned src_sel,1441struct si_resource *src, unsigned src_offset);14421443/* si_cp_reg_shadowing.c */1444void si_init_cp_reg_shadowing(struct si_context *sctx);14451446/* si_debug.c */1447void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, struct radeon_saved_cs *saved,1448bool get_buffer_list);1449void si_clear_saved_cs(struct radeon_saved_cs *saved);1450void si_destroy_saved_cs(struct si_saved_cs *scs);1451void si_auto_log_cs(void *data, struct u_log_context *log);1452void si_log_hw_flush(struct si_context *sctx);1453void si_log_draw_state(struct si_context *sctx, struct u_log_context *log);1454void si_log_compute_state(struct si_context *sctx, struct u_log_context *log);1455void si_init_debug_functions(struct si_context *sctx);1456void si_check_vm_faults(struct si_context *sctx, struct radeon_saved_cs *saved,1457enum ring_type ring);1458bool si_replace_shader(unsigned num, struct si_shader_binary *binary);14591460/* si_fence.c */1461void si_cp_release_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, unsigned event,1462unsigned event_flags, unsigned dst_sel, unsigned int_sel, unsigned data_sel,1463struct si_resource *buf, uint64_t va, uint32_t new_fence,1464unsigned query_type);1465unsigned si_cp_write_fence_dwords(struct si_screen *screen);1466void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs, uint64_t va, uint32_t ref,1467uint32_t mask, unsigned flags);1468void si_init_fence_functions(struct si_context *ctx);1469void si_init_screen_fence_functions(struct si_screen *screen);1470struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx,1471struct tc_unflushed_batch_token *tc_token);14721473/* si_get.c */1474void si_init_screen_get_functions(struct si_screen *sscreen);14751476/* si_gfx_cs.c */1477void si_flush_gfx_cs(struct si_context *ctx, unsigned flags, struct pipe_fence_handle **fence);1478void si_allocate_gds(struct si_context *ctx);1479void si_set_tracked_regs_to_clear_state(struct si_context *ctx);1480void si_begin_new_gfx_cs(struct si_context *ctx, bool first_cs);1481void si_need_gfx_cs_space(struct si_context *ctx, unsigned num_draws);1482void si_trace_emit(struct si_context *sctx);1483void si_prim_discard_signal_next_compute_ib_start(struct si_context *sctx);1484void si_emit_surface_sync(struct si_context *sctx, struct radeon_cmdbuf *cs,1485unsigned cp_coher_cntl);1486void gfx10_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs);1487void si_emit_cache_flush(struct si_context *sctx, struct radeon_cmdbuf *cs);1488/* Replace the sctx->b.draw_vbo function with a wrapper. This can be use to implement1489* optimizations without affecting the normal draw_vbo functions perf.1490*/1491void si_install_draw_wrapper(struct si_context *sctx, pipe_draw_vbo_func wrapper);14921493/* si_gpu_load.c */1494void si_gpu_load_kill_thread(struct si_screen *sscreen);1495uint64_t si_begin_counter(struct si_screen *sscreen, unsigned type);1496unsigned si_end_counter(struct si_screen *sscreen, unsigned type, uint64_t begin);14971498/* si_compute.c */1499void si_emit_initial_compute_regs(struct si_context *sctx, struct radeon_cmdbuf *cs);1500void si_init_compute_functions(struct si_context *sctx);15011502/* si_compute_prim_discard.c */1503enum si_prim_discard_outcome1504{1505SI_PRIM_DISCARD_ENABLED,1506SI_PRIM_DISCARD_DISABLED,1507SI_PRIM_DISCARD_DRAW_SPLIT,1508SI_PRIM_DISCARD_MULTI_DRAW_SPLIT,1509};15101511void si_build_prim_discard_compute_shader(struct si_shader_context *ctx);1512enum si_prim_discard_outcome1513si_prepare_prim_discard_or_split_draw(struct si_context *sctx, const struct pipe_draw_info *info,1514unsigned drawid_offset,1515const struct pipe_draw_start_count_bias *draws,1516unsigned num_draws, unsigned total_count);1517void si_compute_signal_gfx(struct si_context *sctx);1518void si_dispatch_prim_discard_cs_and_draw(struct si_context *sctx,1519const struct pipe_draw_info *info,1520const struct pipe_draw_start_count_bias *draws,1521unsigned num_draws, unsigned index_size,1522unsigned total_count, uint64_t input_indexbuf_va,1523unsigned index_max_size);1524void si_initialize_prim_discard_tunables(struct si_screen *sscreen, bool is_aux_context,1525unsigned *prim_discard_vertex_count_threshold,1526unsigned *index_ring_size_per_ib);15271528/* si_pipe.c */1529void si_init_compiler(struct si_screen *sscreen, struct ac_llvm_compiler *compiler);15301531/* si_perfcounters.c */1532void si_init_perfcounters(struct si_screen *screen);1533void si_destroy_perfcounters(struct si_screen *screen);1534void si_inhibit_clockgating(struct si_context *sctx, struct radeon_cmdbuf *cs, bool inhibit);15351536/* si_query.c */1537void si_init_screen_query_functions(struct si_screen *sscreen);1538void si_init_query_functions(struct si_context *sctx);1539void si_suspend_queries(struct si_context *sctx);1540void si_resume_queries(struct si_context *sctx);15411542/* si_shaderlib_nir.c */1543void *si_create_dcc_retile_cs(struct si_context *sctx, struct radeon_surf *surf);1544void *gfx9_create_clear_dcc_msaa_cs(struct si_context *sctx, struct si_texture *tex);15451546/* si_shaderlib_tgsi.c */1547void *si_get_blitter_vs(struct si_context *sctx, enum blitter_attrib_type type,1548unsigned num_layers);1549void *si_create_fixed_func_tcs(struct si_context *sctx);1550void *si_create_dma_compute_shader(struct pipe_context *ctx, unsigned num_dwords_per_thread,1551bool dst_stream_cache_policy, bool is_copy);1552void *si_create_clear_buffer_rmw_cs(struct pipe_context *ctx);1553void *si_create_copy_image_compute_shader(struct pipe_context *ctx);1554void *si_create_copy_image_compute_shader_1d_array(struct pipe_context *ctx);1555void *si_create_dcc_decompress_cs(struct pipe_context *ctx);1556void *si_clear_render_target_shader(struct pipe_context *ctx);1557void *si_clear_render_target_shader_1d_array(struct pipe_context *ctx);1558void *si_clear_12bytes_buffer_shader(struct pipe_context *ctx);1559void *si_create_fmask_expand_cs(struct pipe_context *ctx, unsigned num_samples, bool is_array);1560void *si_create_query_result_cs(struct si_context *sctx);1561void *gfx10_create_sh_query_result_cs(struct si_context *sctx);15621563/* gfx10_query.c */1564void gfx10_init_query(struct si_context *sctx);1565void gfx10_destroy_query(struct si_context *sctx);15661567/* si_test_blit.c */1568void si_test_blit(struct si_screen *sscreen);15691570/* si_test_clearbuffer.c */1571void si_test_dma_perf(struct si_screen *sscreen);15721573/* si_uvd.c */1574struct pipe_video_codec *si_uvd_create_decoder(struct pipe_context *context,1575const struct pipe_video_codec *templ);15761577struct pipe_video_buffer *si_video_buffer_create(struct pipe_context *pipe,1578const struct pipe_video_buffer *tmpl);1579struct pipe_video_buffer *si_video_buffer_create_with_modifiers(struct pipe_context *pipe,1580const struct pipe_video_buffer *tmpl,1581const uint64_t *modifiers,1582unsigned int modifiers_count);15831584/* si_viewport.c */1585void si_get_small_prim_cull_info(struct si_context *sctx, struct si_small_prim_cull_info *out);1586void si_update_vs_viewport_state(struct si_context *ctx);1587void si_init_viewport_functions(struct si_context *ctx);15881589/* si_texture.c */1590void si_eliminate_fast_color_clear(struct si_context *sctx, struct si_texture *tex,1591bool *ctx_flushed);1592void si_texture_discard_cmask(struct si_screen *sscreen, struct si_texture *tex);1593bool si_init_flushed_depth_texture(struct pipe_context *ctx, struct pipe_resource *texture);1594void si_print_texture_info(struct si_screen *sscreen, struct si_texture *tex,1595struct u_log_context *log);1596struct pipe_resource *si_texture_create(struct pipe_screen *screen,1597const struct pipe_resource *templ);1598bool vi_dcc_formats_compatible(struct si_screen *sscreen, enum pipe_format format1,1599enum pipe_format format2);1600bool vi_dcc_formats_are_incompatible(struct pipe_resource *tex, unsigned level,1601enum pipe_format view_format);1602void vi_disable_dcc_if_incompatible_format(struct si_context *sctx, struct pipe_resource *tex,1603unsigned level, enum pipe_format view_format);1604struct pipe_surface *si_create_surface_custom(struct pipe_context *pipe,1605struct pipe_resource *texture,1606const struct pipe_surface *templ, unsigned width0,1607unsigned height0, unsigned width, unsigned height);1608unsigned si_translate_colorswap(enum pipe_format format, bool do_endian_swap);1609bool si_texture_disable_dcc(struct si_context *sctx, struct si_texture *tex);1610void si_init_screen_texture_functions(struct si_screen *sscreen);1611void si_init_context_texture_functions(struct si_context *sctx);16121613/* si_sqtt.c */1614void si_sqtt_write_event_marker(struct si_context* sctx, struct radeon_cmdbuf *rcs,1615enum rgp_sqtt_marker_event_type api_type,1616uint32_t vertex_offset_user_data,1617uint32_t instance_offset_user_data,1618uint32_t draw_index_user_data);1619bool si_sqtt_register_pipeline(struct si_context* sctx, uint64_t pipeline_hash, uint64_t base_address, bool is_compute);1620bool si_sqtt_pipeline_is_registered(struct ac_thread_trace_data *thread_trace_data,1621uint64_t pipeline_hash);1622void si_sqtt_describe_pipeline_bind(struct si_context* sctx, uint64_t pipeline_hash, int bind_point);1623void1624si_write_event_with_dims_marker(struct si_context* sctx, struct radeon_cmdbuf *rcs,1625enum rgp_sqtt_marker_event_type api_type,1626uint32_t x, uint32_t y, uint32_t z);1627void1628si_write_user_event(struct si_context* sctx, struct radeon_cmdbuf *rcs,1629enum rgp_sqtt_marker_user_event_type type,1630const char *str, int len);1631void1632si_sqtt_describe_barrier_start(struct si_context* sctx, struct radeon_cmdbuf *rcs);1633void1634si_sqtt_describe_barrier_end(struct si_context* sctx, struct radeon_cmdbuf *rcs, unsigned flags);1635bool si_init_thread_trace(struct si_context *sctx);1636void si_destroy_thread_trace(struct si_context *sctx);1637void si_handle_thread_trace(struct si_context *sctx, struct radeon_cmdbuf *rcs);16381639/*1640* common helpers1641*/16421643static inline struct si_resource *si_resource(struct pipe_resource *r)1644{1645return (struct si_resource *)r;1646}16471648static inline void si_resource_reference(struct si_resource **ptr, struct si_resource *res)1649{1650pipe_resource_reference((struct pipe_resource **)ptr, (struct pipe_resource *)res);1651}16521653static inline void si_texture_reference(struct si_texture **ptr, struct si_texture *res)1654{1655pipe_resource_reference((struct pipe_resource **)ptr, &res->buffer.b.b);1656}16571658static inline void1659si_shader_selector_reference(struct si_context *sctx, /* sctx can optionally be NULL */1660struct si_shader_selector **dst, struct si_shader_selector *src)1661{1662if (*dst == src)1663return;16641665struct si_screen *sscreen = src ? src->screen : (*dst)->screen;1666util_shader_reference(&sctx->b, &sscreen->live_shader_cache, (void **)dst, src);1667}16681669static inline bool vi_dcc_enabled(struct si_texture *tex, unsigned level)1670{1671return !tex->is_depth && tex->surface.meta_offset && level < tex->surface.num_meta_levels;1672}16731674static inline unsigned si_tile_mode_index(struct si_texture *tex, unsigned level, bool stencil)1675{1676if (stencil)1677return tex->surface.u.legacy.zs.stencil_tiling_index[level];1678else1679return tex->surface.u.legacy.tiling_index[level];1680}16811682static inline unsigned si_get_minimum_num_gfx_cs_dwords(struct si_context *sctx,1683unsigned num_draws)1684{1685/* Don't count the needed CS space exactly and just use an upper bound.1686*1687* Also reserve space for stopping queries at the end of IB, because1688* the number of active queries is unlimited in theory.1689*/1690return 2048 + sctx->num_cs_dw_queries_suspend + num_draws * 10;1691}16921693static inline void si_context_add_resource_size(struct si_context *sctx, struct pipe_resource *r)1694{1695if (r) {1696/* Add memory usage for need_gfx_cs_space */1697sctx->vram_kb += si_resource(r)->vram_usage_kb;1698sctx->gtt_kb += si_resource(r)->gart_usage_kb;1699}1700}17011702static inline void si_invalidate_draw_sh_constants(struct si_context *sctx)1703{1704sctx->last_base_vertex = SI_BASE_VERTEX_UNKNOWN;1705sctx->last_start_instance = SI_START_INSTANCE_UNKNOWN;1706sctx->last_drawid = SI_DRAW_ID_UNKNOWN;1707}17081709static inline void si_invalidate_draw_constants(struct si_context *sctx)1710{1711si_invalidate_draw_sh_constants(sctx);1712sctx->last_instance_count = SI_INSTANCE_COUNT_UNKNOWN;1713}17141715static inline unsigned si_get_atom_bit(struct si_context *sctx, struct si_atom *atom)1716{1717return 1 << (atom - sctx->atoms.array);1718}17191720static inline void si_set_atom_dirty(struct si_context *sctx, struct si_atom *atom, bool dirty)1721{1722unsigned bit = si_get_atom_bit(sctx, atom);17231724if (dirty)1725sctx->dirty_atoms |= bit;1726else1727sctx->dirty_atoms &= ~bit;1728}17291730static inline bool si_is_atom_dirty(struct si_context *sctx, struct si_atom *atom)1731{1732return (sctx->dirty_atoms & si_get_atom_bit(sctx, atom)) != 0;1733}17341735static inline void si_mark_atom_dirty(struct si_context *sctx, struct si_atom *atom)1736{1737si_set_atom_dirty(sctx, atom, true);1738}17391740/* This should be evaluated at compile time if all parameters except sctx are constants. */1741static ALWAYS_INLINE struct si_shader_ctx_state *1742si_get_vs_inline(struct si_context *sctx, enum si_has_tess has_tess, enum si_has_gs has_gs)1743{1744if (has_gs)1745return &sctx->shader.gs;1746if (has_tess)1747return &sctx->shader.tes;17481749return &sctx->shader.vs;1750}17511752static inline struct si_shader_ctx_state *si_get_vs(struct si_context *sctx)1753{1754return si_get_vs_inline(sctx, sctx->shader.tes.cso ? TESS_ON : TESS_OFF,1755sctx->shader.gs.cso ? GS_ON : GS_OFF);1756}17571758static inline struct si_shader_info *si_get_vs_info(struct si_context *sctx)1759{1760struct si_shader_ctx_state *vs = si_get_vs(sctx);17611762return vs->cso ? &vs->cso->info : NULL;1763}17641765static inline bool si_can_dump_shader(struct si_screen *sscreen, gl_shader_stage stage)1766{1767return sscreen->debug_flags & (1 << stage);1768}17691770static inline bool si_get_strmout_en(struct si_context *sctx)1771{1772return sctx->streamout.streamout_enabled || sctx->streamout.prims_gen_query_enabled;1773}17741775static inline unsigned si_optimal_tcc_alignment(struct si_context *sctx, unsigned upload_size)1776{1777unsigned alignment, tcc_cache_line_size;17781779/* If the upload size is less than the cache line size (e.g. 16, 32),1780* the whole thing will fit into a cache line if we align it to its size.1781* The idea is that multiple small uploads can share a cache line.1782* If the upload size is greater, align it to the cache line size.1783*/1784alignment = util_next_power_of_two(upload_size);1785tcc_cache_line_size = sctx->screen->info.tcc_cache_line_size;1786return MIN2(alignment, tcc_cache_line_size);1787}17881789static inline void si_saved_cs_reference(struct si_saved_cs **dst, struct si_saved_cs *src)1790{1791if (pipe_reference(&(*dst)->reference, &src->reference))1792si_destroy_saved_cs(*dst);17931794*dst = src;1795}17961797static inline void si_make_CB_shader_coherent(struct si_context *sctx, unsigned num_samples,1798bool shaders_read_metadata, bool dcc_pipe_aligned)1799{1800sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB | SI_CONTEXT_INV_VCACHE;1801sctx->force_cb_shader_coherent = false;18021803if (sctx->chip_class >= GFX10) {1804if (sctx->screen->info.tcc_rb_non_coherent)1805sctx->flags |= SI_CONTEXT_INV_L2;1806else if (shaders_read_metadata)1807sctx->flags |= SI_CONTEXT_INV_L2_METADATA;1808} else if (sctx->chip_class == GFX9) {1809/* Single-sample color is coherent with shaders on GFX9, but1810* L2 metadata must be flushed if shaders read metadata.1811* (DCC, CMASK).1812*/1813if (num_samples >= 2 || (shaders_read_metadata && !dcc_pipe_aligned))1814sctx->flags |= SI_CONTEXT_INV_L2;1815else if (shaders_read_metadata)1816sctx->flags |= SI_CONTEXT_INV_L2_METADATA;1817} else {1818/* GFX6-GFX8 */1819sctx->flags |= SI_CONTEXT_INV_L2;1820}1821}18221823static inline void si_make_DB_shader_coherent(struct si_context *sctx, unsigned num_samples,1824bool include_stencil, bool shaders_read_metadata)1825{1826sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB | SI_CONTEXT_INV_VCACHE;18271828if (sctx->chip_class >= GFX10) {1829if (sctx->screen->info.tcc_rb_non_coherent)1830sctx->flags |= SI_CONTEXT_INV_L2;1831else if (shaders_read_metadata)1832sctx->flags |= SI_CONTEXT_INV_L2_METADATA;1833} else if (sctx->chip_class == GFX9) {1834/* Single-sample depth (not stencil) is coherent with shaders1835* on GFX9, but L2 metadata must be flushed if shaders read1836* metadata.1837*/1838if (num_samples >= 2 || include_stencil)1839sctx->flags |= SI_CONTEXT_INV_L2;1840else if (shaders_read_metadata)1841sctx->flags |= SI_CONTEXT_INV_L2_METADATA;1842} else {1843/* GFX6-GFX8 */1844sctx->flags |= SI_CONTEXT_INV_L2;1845}1846}18471848static inline bool si_can_sample_zs(struct si_texture *tex, bool stencil_sampler)1849{1850return (stencil_sampler && tex->can_sample_s) || (!stencil_sampler && tex->can_sample_z);1851}18521853static inline bool si_htile_enabled(struct si_texture *tex, unsigned level, unsigned zs_mask)1854{1855if (zs_mask == PIPE_MASK_S && (tex->htile_stencil_disabled || !tex->surface.has_stencil))1856return false;18571858return tex->is_depth && tex->surface.meta_offset && level < tex->surface.num_meta_levels;1859}18601861static inline bool vi_tc_compat_htile_enabled(struct si_texture *tex, unsigned level,1862unsigned zs_mask)1863{1864assert(!tex->tc_compatible_htile || tex->surface.meta_offset);1865return tex->tc_compatible_htile && si_htile_enabled(tex, level, zs_mask);1866}18671868static inline unsigned si_get_ps_iter_samples(struct si_context *sctx)1869{1870if (sctx->ps_uses_fbfetch)1871return sctx->framebuffer.nr_color_samples;18721873return MIN2(sctx->ps_iter_samples, sctx->framebuffer.nr_color_samples);1874}18751876static inline unsigned si_get_total_colormask(struct si_context *sctx)1877{1878if (sctx->queued.named.rasterizer->rasterizer_discard)1879return 0;18801881struct si_shader_selector *ps = sctx->shader.ps.cso;1882if (!ps)1883return 0;18841885unsigned colormask =1886sctx->framebuffer.colorbuf_enabled_4bit & sctx->queued.named.blend->cb_target_mask;18871888if (!ps->info.color0_writes_all_cbufs)1889colormask &= ps->colors_written_4bit;1890else if (!ps->colors_written_4bit)1891colormask = 0; /* color0 writes all cbufs, but it's not written */18921893return colormask;1894}18951896#define UTIL_ALL_PRIM_LINE_MODES \1897((1 << PIPE_PRIM_LINES) | (1 << PIPE_PRIM_LINE_LOOP) | (1 << PIPE_PRIM_LINE_STRIP) | \1898(1 << PIPE_PRIM_LINES_ADJACENCY) | (1 << PIPE_PRIM_LINE_STRIP_ADJACENCY))18991900static inline bool util_prim_is_lines(unsigned prim)1901{1902return ((1 << prim) & UTIL_ALL_PRIM_LINE_MODES) != 0;1903}19041905static inline bool util_prim_is_points_or_lines(unsigned prim)1906{1907return ((1 << prim) & (UTIL_ALL_PRIM_LINE_MODES | (1 << PIPE_PRIM_POINTS))) != 0;1908}19091910static inline bool util_rast_prim_is_triangles(unsigned prim)1911{1912return ((1 << prim) &1913((1 << PIPE_PRIM_TRIANGLES) | (1 << PIPE_PRIM_TRIANGLE_STRIP) |1914(1 << PIPE_PRIM_TRIANGLE_FAN) | (1 << PIPE_PRIM_QUADS) | (1 << PIPE_PRIM_QUAD_STRIP) |1915(1 << PIPE_PRIM_POLYGON) | (1 << PIPE_PRIM_TRIANGLES_ADJACENCY) |1916(1 << PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY)));1917}19181919/**1920* Return true if there is enough memory in VRAM and GTT for the buffers1921* added so far.1922*1923* \param vram VRAM memory size not added to the buffer list yet1924* \param gtt GTT memory size not added to the buffer list yet1925*/1926static inline bool radeon_cs_memory_below_limit(struct si_screen *screen, struct radeon_cmdbuf *cs,1927uint32_t vram_kb, uint32_t gtt_kb)1928{1929vram_kb += cs->used_vram_kb;1930gtt_kb += cs->used_gart_kb;19311932/* Anything that goes above the VRAM size should go to GTT. */1933if (vram_kb > screen->info.vram_size_kb)1934gtt_kb += vram_kb - screen->info.vram_size_kb;19351936/* Now we just need to check if we have enough GTT (the limit is 75% of max). */1937return gtt_kb < screen->info.gart_size_kb / 4 * 3;1938}19391940/**1941* Add a buffer to the buffer list for the given command stream (CS).1942*1943* All buffers used by a CS must be added to the list. This tells the kernel1944* driver which buffers are used by GPU commands. Other buffers can1945* be swapped out (not accessible) during execution.1946*1947* The buffer list becomes empty after every context flush and must be1948* rebuilt.1949*/1950static inline void radeon_add_to_buffer_list(struct si_context *sctx, struct radeon_cmdbuf *cs,1951struct si_resource *bo, enum radeon_bo_usage usage,1952enum radeon_bo_priority priority)1953{1954assert(usage);1955sctx->ws->cs_add_buffer(cs, bo->buf, (enum radeon_bo_usage)(usage | RADEON_USAGE_SYNCHRONIZED),1956bo->domains, priority);1957}19581959/**1960* Same as above, but also checks memory usage and flushes the context1961* accordingly.1962*1963* When this SHOULD NOT be used:1964*1965* - if si_context_add_resource_size has been called for the buffer1966* followed by *_need_cs_space for checking the memory usage1967*1968* - when emitting state packets and draw packets (because preceding packets1969* can't be re-emitted at that point)1970*1971* - if shader resource "enabled_mask" is not up-to-date or there is1972* a different constraint disallowing a context flush1973*/1974static inline void radeon_add_to_gfx_buffer_list_check_mem(struct si_context *sctx,1975struct si_resource *bo,1976enum radeon_bo_usage usage,1977enum radeon_bo_priority priority,1978bool check_mem)1979{1980if (check_mem &&1981!radeon_cs_memory_below_limit(sctx->screen, &sctx->gfx_cs, sctx->vram_kb + bo->vram_usage_kb,1982sctx->gtt_kb + bo->gart_usage_kb))1983si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW, NULL);19841985radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, bo, usage, priority);1986}19871988static inline bool si_compute_prim_discard_enabled(struct si_context *sctx)1989{1990return sctx->prim_discard_vertex_count_threshold != UINT_MAX;1991}19921993static inline unsigned si_get_wave_size(struct si_screen *sscreen,1994gl_shader_stage stage, bool ngg, bool es,1995bool gs_fast_launch, bool prim_discard_cs)1996{1997if (stage == MESA_SHADER_COMPUTE)1998return sscreen->compute_wave_size;1999else if (stage == MESA_SHADER_FRAGMENT)2000return sscreen->ps_wave_size;2001else if (gs_fast_launch)2002return 32; /* GS fast launch hangs with Wave64, so always use Wave32. */2003else if ((stage == MESA_SHADER_VERTEX && prim_discard_cs) || /* only Wave64 implemented */2004(stage == MESA_SHADER_VERTEX && es && !ngg) ||2005(stage == MESA_SHADER_TESS_EVAL && es && !ngg) ||2006(stage == MESA_SHADER_GEOMETRY && !ngg)) /* legacy GS only supports Wave64 */2007return 64;2008else2009return sscreen->ge_wave_size;2010}20112012static inline unsigned si_get_shader_wave_size(struct si_shader *shader)2013{2014return si_get_wave_size(shader->selector->screen, shader->selector->info.stage,2015shader->key.as_ngg,2016shader->key.as_es,2017shader->key.opt.ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL,2018shader->key.opt.vs_as_prim_discard_cs);2019}20202021static inline void si_select_draw_vbo(struct si_context *sctx)2022{2023bool has_prim_discard_cs = si_compute_prim_discard_enabled(sctx) &&2024!sctx->shader.tes.cso && !sctx->shader.gs.cso;2025pipe_draw_vbo_func draw_vbo = sctx->draw_vbo[!!sctx->shader.tes.cso]2026[!!sctx->shader.gs.cso]2027[sctx->ngg]2028[has_prim_discard_cs];2029assert(draw_vbo);2030if (unlikely(sctx->real_draw_vbo))2031sctx->real_draw_vbo = draw_vbo;2032else2033sctx->b.draw_vbo = draw_vbo;20342035if (!has_prim_discard_cs) {2036/* Reset this to false if prim discard CS is disabled because draw_vbo doesn't reset it. */2037if (sctx->prim_discard_cs_instancing) {2038sctx->do_update_shaders = true;2039sctx->prim_discard_cs_instancing = false;2040}2041}2042}20432044/* Return the number of samples that the rasterizer uses. */2045static inline unsigned si_get_num_coverage_samples(struct si_context *sctx)2046{2047if (sctx->framebuffer.nr_samples > 1 &&2048sctx->queued.named.rasterizer->multisample_enable)2049return sctx->framebuffer.nr_samples;20502051/* Note that smoothing_enabled is set by si_update_shaders. */2052if (sctx->smoothing_enabled)2053return SI_NUM_SMOOTH_AA_SAMPLES;20542055return 1;2056}20572058#define PRINT_ERR(fmt, args...) \2059fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)20602061struct pipe_resource *si_buffer_from_winsys_buffer(struct pipe_screen *screen,2062const struct pipe_resource *templ,2063struct pb_buffer *imported_buf,2064bool dedicated);20652066#ifdef __cplusplus2067}2068#endif20692070#endif207120722073