Path: blob/21.2-virgl/src/intel/vulkan/anv_private.h
4547 views
/*1* Copyright © 2015 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#ifndef ANV_PRIVATE_H24#define ANV_PRIVATE_H2526#include <stdlib.h>27#include <stdio.h>28#include <stdbool.h>29#include <pthread.h>30#include <assert.h>31#include <stdint.h>32#include "drm-uapi/i915_drm.h"3334#ifdef HAVE_VALGRIND35#include <valgrind.h>36#include <memcheck.h>37#define VG(x) x38#ifndef NDEBUG39#define __gen_validate_value(x) VALGRIND_CHECK_MEM_IS_DEFINED(&(x), sizeof(x))40#endif41#else42#define VG(x) ((void)0)43#endif4445#include "common/intel_clflush.h"46#include "common/intel_decoder.h"47#include "common/intel_gem.h"48#include "common/intel_l3_config.h"49#include "common/intel_measure.h"50#include "dev/intel_device_info.h"51#include "blorp/blorp.h"52#include "compiler/brw_compiler.h"53#include "compiler/brw_rt.h"54#include "util/bitset.h"55#include "util/bitscan.h"56#include "util/macros.h"57#include "util/hash_table.h"58#include "util/list.h"59#include "util/sparse_array.h"60#include "util/u_atomic.h"61#include "util/u_vector.h"62#include "util/u_math.h"63#include "util/vma.h"64#include "util/xmlconfig.h"65#include "vk_alloc.h"66#include "vk_debug_report.h"67#include "vk_device.h"68#include "vk_instance.h"69#include "vk_physical_device.h"70#include "vk_shader_module.h"71#include "vk_util.h"7273/* Pre-declarations needed for WSI entrypoints */74struct wl_surface;75struct wl_display;76typedef struct xcb_connection_t xcb_connection_t;77typedef uint32_t xcb_visualid_t;78typedef uint32_t xcb_window_t;7980struct anv_batch;81struct anv_buffer;82struct anv_buffer_view;83struct anv_image_view;84struct anv_acceleration_structure;85struct anv_instance;8687struct intel_aux_map_context;88struct intel_perf_config;89struct intel_perf_counter_pass;90struct intel_perf_query_result;9192#include <vulkan/vulkan.h>93#include <vulkan/vk_icd.h>9495#include "anv_android.h"96#include "anv_entrypoints.h"97#include "isl/isl.h"9899#include "dev/intel_debug.h"100#undef MESA_LOG_TAG101#define MESA_LOG_TAG "MESA-INTEL"102#include "util/log.h"103#include "wsi_common.h"104105#define NSEC_PER_SEC 1000000000ull106107/* anv Virtual Memory Layout108* =========================109*110* When the anv driver is determining the virtual graphics addresses of memory111* objects itself using the softpin mechanism, the following memory ranges112* will be used.113*114* Three special considerations to notice:115*116* (1) the dynamic state pool is located within the same 4 GiB as the low117* heap. This is to work around a VF cache issue described in a comment in118* anv_physical_device_init_heaps.119*120* (2) the binding table pool is located at lower addresses than the surface121* state pool, within a 4 GiB range. This allows surface state base addresses122* to cover both binding tables (16 bit offsets) and surface states (32 bit123* offsets).124*125* (3) the last 4 GiB of the address space is withheld from the high126* heap. Various hardware units will read past the end of an object for127* various reasons. This healthy margin prevents reads from wrapping around128* 48-bit addresses.129*/130#define GENERAL_STATE_POOL_MIN_ADDRESS 0x000000010000ULL /* 64 KiB */131#define GENERAL_STATE_POOL_MAX_ADDRESS 0x00003fffffffULL132#define LOW_HEAP_MIN_ADDRESS 0x000040000000ULL /* 1 GiB */133#define LOW_HEAP_MAX_ADDRESS 0x00007fffffffULL134#define DYNAMIC_STATE_POOL_MIN_ADDRESS 0x0000c0000000ULL /* 3 GiB */135#define DYNAMIC_STATE_POOL_MAX_ADDRESS 0x0000ffffffffULL136#define BINDING_TABLE_POOL_MIN_ADDRESS 0x000100000000ULL /* 4 GiB */137#define BINDING_TABLE_POOL_MAX_ADDRESS 0x00013fffffffULL138#define SURFACE_STATE_POOL_MIN_ADDRESS 0x000140000000ULL /* 5 GiB */139#define SURFACE_STATE_POOL_MAX_ADDRESS 0x00017fffffffULL140#define INSTRUCTION_STATE_POOL_MIN_ADDRESS 0x000180000000ULL /* 6 GiB */141#define INSTRUCTION_STATE_POOL_MAX_ADDRESS 0x0001bfffffffULL142#define CLIENT_VISIBLE_HEAP_MIN_ADDRESS 0x0001c0000000ULL /* 7 GiB */143#define CLIENT_VISIBLE_HEAP_MAX_ADDRESS 0x0002bfffffffULL144#define HIGH_HEAP_MIN_ADDRESS 0x0002c0000000ULL /* 11 GiB */145146#define GENERAL_STATE_POOL_SIZE \147(GENERAL_STATE_POOL_MAX_ADDRESS - GENERAL_STATE_POOL_MIN_ADDRESS + 1)148#define LOW_HEAP_SIZE \149(LOW_HEAP_MAX_ADDRESS - LOW_HEAP_MIN_ADDRESS + 1)150#define DYNAMIC_STATE_POOL_SIZE \151(DYNAMIC_STATE_POOL_MAX_ADDRESS - DYNAMIC_STATE_POOL_MIN_ADDRESS + 1)152#define BINDING_TABLE_POOL_SIZE \153(BINDING_TABLE_POOL_MAX_ADDRESS - BINDING_TABLE_POOL_MIN_ADDRESS + 1)154#define SURFACE_STATE_POOL_SIZE \155(SURFACE_STATE_POOL_MAX_ADDRESS - SURFACE_STATE_POOL_MIN_ADDRESS + 1)156#define INSTRUCTION_STATE_POOL_SIZE \157(INSTRUCTION_STATE_POOL_MAX_ADDRESS - INSTRUCTION_STATE_POOL_MIN_ADDRESS + 1)158#define CLIENT_VISIBLE_HEAP_SIZE \159(CLIENT_VISIBLE_HEAP_MAX_ADDRESS - CLIENT_VISIBLE_HEAP_MIN_ADDRESS + 1)160161/* Allowing different clear colors requires us to perform a depth resolve at162* the end of certain render passes. This is because while slow clears store163* the clear color in the HiZ buffer, fast clears (without a resolve) don't.164* See the PRMs for examples describing when additional resolves would be165* necessary. To enable fast clears without requiring extra resolves, we set166* the clear value to a globally-defined one. We could allow different values167* if the user doesn't expect coherent data during or after a render passes168* (VK_ATTACHMENT_STORE_OP_DONT_CARE), but such users (aside from the CTS)169* don't seem to exist yet. In almost all Vulkan applications tested thus far,170* 1.0f seems to be the only value used. The only application that doesn't set171* this value does so through the usage of an seemingly uninitialized clear172* value.173*/174#define ANV_HZ_FC_VAL 1.0f175176#define MAX_VBS 28177#define MAX_XFB_BUFFERS 4178#define MAX_XFB_STREAMS 4179#define MAX_SETS 8180#define MAX_RTS 8181#define MAX_VIEWPORTS 16182#define MAX_SCISSORS 16183#define MAX_PUSH_CONSTANTS_SIZE 128184#define MAX_DYNAMIC_BUFFERS 16185#define MAX_IMAGES 64186#define MAX_PUSH_DESCRIPTORS 32 /* Minimum requirement */187#define MAX_INLINE_UNIFORM_BLOCK_SIZE 4096188#define MAX_INLINE_UNIFORM_BLOCK_DESCRIPTORS 32189/* We need 16 for UBO block reads to work and 32 for push UBOs. However, we190* use 64 here to avoid cache issues. This could most likely bring it back to191* 32 if we had different virtual addresses for the different views on a given192* GEM object.193*/194#define ANV_UBO_ALIGNMENT 64195#define ANV_SSBO_ALIGNMENT 4196#define ANV_SSBO_BOUNDS_CHECK_ALIGNMENT 4197#define MAX_VIEWS_FOR_PRIMITIVE_REPLICATION 16198#define MAX_SAMPLE_LOCATIONS 16199200/* From the Skylake PRM Vol. 7 "Binding Table Surface State Model":201*202* "The surface state model is used when a Binding Table Index (specified203* in the message descriptor) of less than 240 is specified. In this model,204* the Binding Table Index is used to index into the binding table, and the205* binding table entry contains a pointer to the SURFACE_STATE."206*207* Binding table values above 240 are used for various things in the hardware208* such as stateless, stateless with incoherent cache, SLM, and bindless.209*/210#define MAX_BINDING_TABLE_SIZE 240211212/* The kernel relocation API has a limitation of a 32-bit delta value213* applied to the address before it is written which, in spite of it being214* unsigned, is treated as signed . Because of the way that this maps to215* the Vulkan API, we cannot handle an offset into a buffer that does not216* fit into a signed 32 bits. The only mechanism we have for dealing with217* this at the moment is to limit all VkDeviceMemory objects to a maximum218* of 2GB each. The Vulkan spec allows us to do this:219*220* "Some platforms may have a limit on the maximum size of a single221* allocation. For example, certain systems may fail to create222* allocations with a size greater than or equal to 4GB. Such a limit is223* implementation-dependent, and if such a failure occurs then the error224* VK_ERROR_OUT_OF_DEVICE_MEMORY should be returned."225*226* We don't use vk_error here because it's not an error so much as an227* indication to the application that the allocation is too large.228*/229#define MAX_MEMORY_ALLOCATION_SIZE (1ull << 31)230231#define ANV_SVGS_VB_INDEX MAX_VBS232#define ANV_DRAWID_VB_INDEX (MAX_VBS + 1)233234/* We reserve this MI ALU register for the purpose of handling predication.235* Other code which uses the MI ALU should leave it alone.236*/237#define ANV_PREDICATE_RESULT_REG 0x2678 /* MI_ALU_REG15 */238239/* We reserve this MI ALU register to pass around an offset computed from240* VkPerformanceQuerySubmitInfoKHR::counterPassIndex VK_KHR_performance_query.241* Other code which uses the MI ALU should leave it alone.242*/243#define ANV_PERF_QUERY_OFFSET_REG 0x2670 /* MI_ALU_REG14 */244245/* For gfx12 we set the streamout buffers using 4 separate commands246* (3DSTATE_SO_BUFFER_INDEX_*) instead of 3DSTATE_SO_BUFFER. However the layout247* of the 3DSTATE_SO_BUFFER_INDEX_* commands is identical to that of248* 3DSTATE_SO_BUFFER apart from the SOBufferIndex field, so for now we use the249* 3DSTATE_SO_BUFFER command, but change the 3DCommandSubOpcode.250* SO_BUFFER_INDEX_0_CMD is actually the 3DCommandSubOpcode for251* 3DSTATE_SO_BUFFER_INDEX_0.252*/253#define SO_BUFFER_INDEX_0_CMD 0x60254#define anv_printflike(a, b) __attribute__((__format__(__printf__, a, b)))255256static inline uint32_t257align_down_npot_u32(uint32_t v, uint32_t a)258{259return v - (v % a);260}261262static inline uint32_t263align_down_u32(uint32_t v, uint32_t a)264{265assert(a != 0 && a == (a & -a));266return v & ~(a - 1);267}268269static inline uint32_t270align_u32(uint32_t v, uint32_t a)271{272assert(a != 0 && a == (a & -a));273return align_down_u32(v + a - 1, a);274}275276static inline uint64_t277align_down_u64(uint64_t v, uint64_t a)278{279assert(a != 0 && a == (a & -a));280return v & ~(a - 1);281}282283static inline uint64_t284align_u64(uint64_t v, uint64_t a)285{286return align_down_u64(v + a - 1, a);287}288289static inline int32_t290align_i32(int32_t v, int32_t a)291{292assert(a != 0 && a == (a & -a));293return (v + a - 1) & ~(a - 1);294}295296/** Alignment must be a power of 2. */297static inline bool298anv_is_aligned(uintmax_t n, uintmax_t a)299{300assert(a == (a & -a));301return (n & (a - 1)) == 0;302}303304static inline uint32_t305anv_minify(uint32_t n, uint32_t levels)306{307if (unlikely(n == 0))308return 0;309else310return MAX2(n >> levels, 1);311}312313static inline float314anv_clamp_f(float f, float min, float max)315{316assert(min < max);317318if (f > max)319return max;320else if (f < min)321return min;322else323return f;324}325326static inline bool327anv_clear_mask(uint32_t *inout_mask, uint32_t clear_mask)328{329if (*inout_mask & clear_mask) {330*inout_mask &= ~clear_mask;331return true;332} else {333return false;334}335}336337static inline union isl_color_value338vk_to_isl_color(VkClearColorValue color)339{340return (union isl_color_value) {341.u32 = {342color.uint32[0],343color.uint32[1],344color.uint32[2],345color.uint32[3],346},347};348}349350static inline void *anv_unpack_ptr(uintptr_t ptr, int bits, int *flags)351{352uintptr_t mask = (1ull << bits) - 1;353*flags = ptr & mask;354return (void *) (ptr & ~mask);355}356357static inline uintptr_t anv_pack_ptr(void *ptr, int bits, int flags)358{359uintptr_t value = (uintptr_t) ptr;360uintptr_t mask = (1ull << bits) - 1;361return value | (mask & flags);362}363364/* Whenever we generate an error, pass it through this function. Useful for365* debugging, where we can break on it. Only call at error site, not when366* propagating errors. Might be useful to plug in a stack trace here.367*/368369VkResult __vk_errorv(struct anv_instance *instance,370const struct vk_object_base *object, VkResult error,371const char *file, int line, const char *format,372va_list args);373374VkResult __vk_errorf(struct anv_instance *instance,375const struct vk_object_base *object, VkResult error,376const char *file, int line, const char *format, ...)377anv_printflike(6, 7);378379#ifdef DEBUG380#define vk_error(error) __vk_errorf(NULL, NULL, error, __FILE__, __LINE__, NULL)381#define vk_errorfi(instance, obj, error, format, ...)\382__vk_errorf(instance, obj, error,\383__FILE__, __LINE__, format, ## __VA_ARGS__)384#define vk_errorf(device, obj, error, format, ...)\385vk_errorfi(anv_device_instance_or_null(device),\386obj, error, format, ## __VA_ARGS__)387#else388389static inline VkResult __dummy_vk_error(VkResult error, UNUSED const void *ignored)390{391return error;392}393394#define vk_error(error) __dummy_vk_error(error, NULL)395#define vk_errorfi(instance, obj, error, format, ...) __dummy_vk_error(error, instance)396#define vk_errorf(device, obj, error, format, ...) __dummy_vk_error(error, device)397#endif398399/**400* Warn on ignored extension structs.401*402* The Vulkan spec requires us to ignore unsupported or unknown structs in403* a pNext chain. In debug mode, emitting warnings for ignored structs may404* help us discover structs that we should not have ignored.405*406*407* From the Vulkan 1.0.38 spec:408*409* Any component of the implementation (the loader, any enabled layers,410* and drivers) must skip over, without processing (other than reading the411* sType and pNext members) any chained structures with sType values not412* defined by extensions supported by that component.413*/414#define anv_debug_ignored_stype(sType) \415mesa_logd("%s: ignored VkStructureType %u\n", __func__, (sType))416417void __anv_perf_warn(struct anv_device *device,418const struct vk_object_base *object,419const char *file, int line, const char *format, ...)420anv_printflike(5, 6);421void anv_loge(const char *format, ...) anv_printflike(1, 2);422void anv_loge_v(const char *format, va_list va);423424/**425* Print a FINISHME message, including its source location.426*/427#define anv_finishme(format, ...) \428do { \429static bool reported = false; \430if (!reported) { \431mesa_logw("%s:%d: FINISHME: " format, __FILE__, __LINE__, \432##__VA_ARGS__); \433reported = true; \434} \435} while (0)436437/**438* Print a perf warning message. Set INTEL_DEBUG=perf to see these.439*/440#define anv_perf_warn(instance, obj, format, ...) \441do { \442static bool reported = false; \443if (!reported && (INTEL_DEBUG & DEBUG_PERF)) { \444__anv_perf_warn(instance, obj, __FILE__, __LINE__,\445format, ##__VA_ARGS__); \446reported = true; \447} \448} while (0)449450/* A non-fatal assert. Useful for debugging. */451#ifdef DEBUG452#define anv_assert(x) ({ \453if (unlikely(!(x))) \454mesa_loge("%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \455})456#else457#define anv_assert(x)458#endif459460struct anv_bo {461const char *name;462463uint32_t gem_handle;464465uint32_t refcount;466467/* Index into the current validation list. This is used by the468* validation list building alrogithm to track which buffers are already469* in the validation list so that we can ensure uniqueness.470*/471uint32_t index;472473/* Index for use with util_sparse_array_free_list */474uint32_t free_index;475476/* Last known offset. This value is provided by the kernel when we477* execbuf and is used as the presumed offset for the next bunch of478* relocations.479*/480uint64_t offset;481482/** Size of the buffer not including implicit aux */483uint64_t size;484485/* Map for internally mapped BOs.486*487* If ANV_BO_WRAPPER is set in flags, map points to the wrapped BO.488*/489void *map;490491/** Size of the implicit CCS range at the end of the buffer492*493* On Gfx12, CCS data is always a direct 1/256 scale-down. A single 64K494* page of main surface data maps to a 256B chunk of CCS data and that495* mapping is provided on TGL-LP by the AUX table which maps virtual memory496* addresses in the main surface to virtual memory addresses for CCS data.497*498* Because we can't change these maps around easily and because Vulkan499* allows two VkImages to be bound to overlapping memory regions (as long500* as the app is careful), it's not feasible to make this mapping part of501* the image. (On Gfx11 and earlier, the mapping was provided via502* RENDER_SURFACE_STATE so each image had its own main -> CCS mapping.)503* Instead, we attach the CCS data directly to the buffer object and setup504* the AUX table mapping at BO creation time.505*506* This field is for internal tracking use by the BO allocator only and507* should not be touched by other parts of the code. If something wants to508* know if a BO has implicit CCS data, it should instead look at the509* has_implicit_ccs boolean below.510*511* This data is not included in maps of this buffer.512*/513uint32_t _ccs_size;514515/** Flags to pass to the kernel through drm_i915_exec_object2::flags */516uint32_t flags;517518/** True if this BO may be shared with other processes */519bool is_external:1;520521/** True if this BO is a wrapper522*523* When set to true, none of the fields in this BO are meaningful except524* for anv_bo::is_wrapper and anv_bo::map which points to the actual BO.525* See also anv_bo_unwrap(). Wrapper BOs are not allowed when use_softpin526* is set in the physical device.527*/528bool is_wrapper:1;529530/** See also ANV_BO_ALLOC_FIXED_ADDRESS */531bool has_fixed_address:1;532533/** True if this BO wraps a host pointer */534bool from_host_ptr:1;535536/** See also ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS */537bool has_client_visible_address:1;538539/** True if this BO has implicit CCS data attached to it */540bool has_implicit_ccs:1;541};542543static inline struct anv_bo *544anv_bo_ref(struct anv_bo *bo)545{546p_atomic_inc(&bo->refcount);547return bo;548}549550static inline struct anv_bo *551anv_bo_unwrap(struct anv_bo *bo)552{553while (bo->is_wrapper)554bo = bo->map;555return bo;556}557558/* Represents a lock-free linked list of "free" things. This is used by559* both the block pool and the state pools. Unfortunately, in order to560* solve the ABA problem, we can't use a single uint32_t head.561*/562union anv_free_list {563struct {564uint32_t offset;565566/* A simple count that is incremented every time the head changes. */567uint32_t count;568};569/* Make sure it's aligned to 64 bits. This will make atomic operations570* faster on 32 bit platforms.571*/572uint64_t u64 __attribute__ ((aligned (8)));573};574575#define ANV_FREE_LIST_EMPTY ((union anv_free_list) { { UINT32_MAX, 0 } })576577struct anv_block_state {578union {579struct {580uint32_t next;581uint32_t end;582};583/* Make sure it's aligned to 64 bits. This will make atomic operations584* faster on 32 bit platforms.585*/586uint64_t u64 __attribute__ ((aligned (8)));587};588};589590#define anv_block_pool_foreach_bo(bo, pool) \591for (struct anv_bo **_pp_bo = (pool)->bos, *bo; \592_pp_bo != &(pool)->bos[(pool)->nbos] && (bo = *_pp_bo, true); \593_pp_bo++)594595#define ANV_MAX_BLOCK_POOL_BOS 20596597struct anv_block_pool {598const char *name;599600struct anv_device *device;601bool use_softpin;602603/* Wrapper BO for use in relocation lists. This BO is simply a wrapper604* around the actual BO so that we grow the pool after the wrapper BO has605* been put in a relocation list. This is only used in the non-softpin606* case.607*/608struct anv_bo wrapper_bo;609610struct anv_bo *bos[ANV_MAX_BLOCK_POOL_BOS];611struct anv_bo *bo;612uint32_t nbos;613614uint64_t size;615616/* The address where the start of the pool is pinned. The various bos that617* are created as the pool grows will have addresses in the range618* [start_address, start_address + BLOCK_POOL_MEMFD_SIZE).619*/620uint64_t start_address;621622/* The offset from the start of the bo to the "center" of the block623* pool. Pointers to allocated blocks are given by624* bo.map + center_bo_offset + offsets.625*/626uint32_t center_bo_offset;627628/* Current memory map of the block pool. This pointer may or may not629* point to the actual beginning of the block pool memory. If630* anv_block_pool_alloc_back has ever been called, then this pointer631* will point to the "center" position of the buffer and all offsets632* (negative or positive) given out by the block pool alloc functions633* will be valid relative to this pointer.634*635* In particular, map == bo.map + center_offset636*637* DO NOT access this pointer directly. Use anv_block_pool_map() instead,638* since it will handle the softpin case as well, where this points to NULL.639*/640void *map;641int fd;642643/**644* Array of mmaps and gem handles owned by the block pool, reclaimed when645* the block pool is destroyed.646*/647struct u_vector mmap_cleanups;648649struct anv_block_state state;650651struct anv_block_state back_state;652};653654/* Block pools are backed by a fixed-size 1GB memfd */655#define BLOCK_POOL_MEMFD_SIZE (1ul << 30)656657/* The center of the block pool is also the middle of the memfd. This may658* change in the future if we decide differently for some reason.659*/660#define BLOCK_POOL_MEMFD_CENTER (BLOCK_POOL_MEMFD_SIZE / 2)661662static inline uint32_t663anv_block_pool_size(struct anv_block_pool *pool)664{665return pool->state.end + pool->back_state.end;666}667668struct anv_state {669int32_t offset;670uint32_t alloc_size;671void *map;672uint32_t idx;673};674675#define ANV_STATE_NULL ((struct anv_state) { .alloc_size = 0 })676677struct anv_fixed_size_state_pool {678union anv_free_list free_list;679struct anv_block_state block;680};681682#define ANV_MIN_STATE_SIZE_LOG2 6683#define ANV_MAX_STATE_SIZE_LOG2 21684685#define ANV_STATE_BUCKETS (ANV_MAX_STATE_SIZE_LOG2 - ANV_MIN_STATE_SIZE_LOG2 + 1)686687struct anv_free_entry {688uint32_t next;689struct anv_state state;690};691692struct anv_state_table {693struct anv_device *device;694int fd;695struct anv_free_entry *map;696uint32_t size;697struct anv_block_state state;698struct u_vector cleanups;699};700701struct anv_state_pool {702struct anv_block_pool block_pool;703704/* Offset into the relevant state base address where the state pool starts705* allocating memory.706*/707int32_t start_offset;708709struct anv_state_table table;710711/* The size of blocks which will be allocated from the block pool */712uint32_t block_size;713714/** Free list for "back" allocations */715union anv_free_list back_alloc_free_list;716717struct anv_fixed_size_state_pool buckets[ANV_STATE_BUCKETS];718};719720struct anv_state_reserved_pool {721struct anv_state_pool *pool;722union anv_free_list reserved_blocks;723uint32_t count;724};725726struct anv_state_stream {727struct anv_state_pool *state_pool;728729/* The size of blocks to allocate from the state pool */730uint32_t block_size;731732/* Current block we're allocating from */733struct anv_state block;734735/* Offset into the current block at which to allocate the next state */736uint32_t next;737738/* List of all blocks allocated from this pool */739struct util_dynarray all_blocks;740};741742/* The block_pool functions exported for testing only. The block pool should743* only be used via a state pool (see below).744*/745VkResult anv_block_pool_init(struct anv_block_pool *pool,746struct anv_device *device,747const char *name,748uint64_t start_address,749uint32_t initial_size);750void anv_block_pool_finish(struct anv_block_pool *pool);751int32_t anv_block_pool_alloc(struct anv_block_pool *pool,752uint32_t block_size, uint32_t *padding);753int32_t anv_block_pool_alloc_back(struct anv_block_pool *pool,754uint32_t block_size);755void* anv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t756size);757758VkResult anv_state_pool_init(struct anv_state_pool *pool,759struct anv_device *device,760const char *name,761uint64_t base_address,762int32_t start_offset,763uint32_t block_size);764void anv_state_pool_finish(struct anv_state_pool *pool);765struct anv_state anv_state_pool_alloc(struct anv_state_pool *pool,766uint32_t state_size, uint32_t alignment);767struct anv_state anv_state_pool_alloc_back(struct anv_state_pool *pool);768void anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state);769void anv_state_stream_init(struct anv_state_stream *stream,770struct anv_state_pool *state_pool,771uint32_t block_size);772void anv_state_stream_finish(struct anv_state_stream *stream);773struct anv_state anv_state_stream_alloc(struct anv_state_stream *stream,774uint32_t size, uint32_t alignment);775776void anv_state_reserved_pool_init(struct anv_state_reserved_pool *pool,777struct anv_state_pool *parent,778uint32_t count, uint32_t size,779uint32_t alignment);780void anv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool);781struct anv_state anv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool);782void anv_state_reserved_pool_free(struct anv_state_reserved_pool *pool,783struct anv_state state);784785VkResult anv_state_table_init(struct anv_state_table *table,786struct anv_device *device,787uint32_t initial_entries);788void anv_state_table_finish(struct anv_state_table *table);789VkResult anv_state_table_add(struct anv_state_table *table, uint32_t *idx,790uint32_t count);791void anv_free_list_push(union anv_free_list *list,792struct anv_state_table *table,793uint32_t idx, uint32_t count);794struct anv_state* anv_free_list_pop(union anv_free_list *list,795struct anv_state_table *table);796797798static inline struct anv_state *799anv_state_table_get(struct anv_state_table *table, uint32_t idx)800{801return &table->map[idx].state;802}803/**804* Implements a pool of re-usable BOs. The interface is identical to that805* of block_pool except that each block is its own BO.806*/807struct anv_bo_pool {808const char *name;809810struct anv_device *device;811812struct util_sparse_array_free_list free_list[16];813};814815void anv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device,816const char *name);817void anv_bo_pool_finish(struct anv_bo_pool *pool);818VkResult anv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size,819struct anv_bo **bo_out);820void anv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo);821822struct anv_scratch_pool {823/* Indexed by Per-Thread Scratch Space number (the hardware value) and stage */824struct anv_bo *bos[16][MESA_SHADER_STAGES];825uint32_t surfs[16];826struct anv_state surf_states[16];827};828829void anv_scratch_pool_init(struct anv_device *device,830struct anv_scratch_pool *pool);831void anv_scratch_pool_finish(struct anv_device *device,832struct anv_scratch_pool *pool);833struct anv_bo *anv_scratch_pool_alloc(struct anv_device *device,834struct anv_scratch_pool *pool,835gl_shader_stage stage,836unsigned per_thread_scratch);837uint32_t anv_scratch_pool_get_surf(struct anv_device *device,838struct anv_scratch_pool *pool,839unsigned per_thread_scratch);840841/** Implements a BO cache that ensures a 1-1 mapping of GEM BOs to anv_bos */842struct anv_bo_cache {843struct util_sparse_array bo_map;844pthread_mutex_t mutex;845};846847VkResult anv_bo_cache_init(struct anv_bo_cache *cache);848void anv_bo_cache_finish(struct anv_bo_cache *cache);849850struct anv_queue_family {851/* Standard bits passed on to the client */852VkQueueFlags queueFlags;853uint32_t queueCount;854855/* Driver internal information */856enum drm_i915_gem_engine_class engine_class;857};858859#define ANV_MAX_QUEUE_FAMILIES 3860861struct anv_memory_type {862/* Standard bits passed on to the client */863VkMemoryPropertyFlags propertyFlags;864uint32_t heapIndex;865};866867struct anv_memory_heap {868/* Standard bits passed on to the client */869VkDeviceSize size;870VkMemoryHeapFlags flags;871872/** Driver-internal book-keeping.873*874* Align it to 64 bits to make atomic operations faster on 32 bit platforms.875*/876VkDeviceSize used __attribute__ ((aligned (8)));877878bool is_local_mem;879};880881struct anv_memregion {882struct drm_i915_gem_memory_class_instance region;883uint64_t size;884};885886struct anv_physical_device {887struct vk_physical_device vk;888889/* Link in anv_instance::physical_devices */890struct list_head link;891892struct anv_instance * instance;893bool no_hw;894char path[20];895const char * name;896struct {897uint16_t domain;898uint8_t bus;899uint8_t device;900uint8_t function;901} pci_info;902struct intel_device_info info;903/** Amount of "GPU memory" we want to advertise904*905* Clearly, this value is bogus since Intel is a UMA architecture. On906* gfx7 platforms, we are limited by GTT size unless we want to implement907* fine-grained tracking and GTT splitting. On Broadwell and above we are908* practically unlimited. However, we will never report more than 3/4 of909* the total system ram to try and avoid running out of RAM.910*/911bool supports_48bit_addresses;912struct brw_compiler * compiler;913struct isl_device isl_dev;914struct intel_perf_config * perf;915/*916* Number of commands required to implement a performance query begin +917* end.918*/919uint32_t n_perf_query_commands;920int cmd_parser_version;921bool has_exec_async;922bool has_exec_capture;923bool has_exec_fence;924bool has_syncobj;925bool has_syncobj_wait;926bool has_syncobj_wait_available;927bool has_context_priority;928bool has_context_isolation;929bool has_thread_submit;930bool has_mem_available;931bool has_mmap_offset;932uint64_t gtt_size;933934bool use_softpin;935bool always_use_bindless;936bool use_call_secondary;937938/** True if we can access buffers using A64 messages */939bool has_a64_buffer_access;940/** True if we can use bindless access for images */941bool has_bindless_images;942/** True if we can use bindless access for samplers */943bool has_bindless_samplers;944/** True if we can use timeline semaphores through execbuf */945bool has_exec_timeline;946947/** True if we can read the GPU timestamp register948*949* When running in a virtual context, the timestamp register is unreadable950* on Gfx12+.951*/952bool has_reg_timestamp;953954/** True if this device has implicit AUX955*956* If true, CCS is handled as an implicit attachment to the BO rather than957* as an explicitly bound surface.958*/959bool has_implicit_ccs;960961bool always_flush_cache;962963uint32_t eu_total;964uint32_t subslice_total;965966struct {967uint32_t family_count;968struct anv_queue_family families[ANV_MAX_QUEUE_FAMILIES];969} queue;970971struct {972uint32_t type_count;973struct anv_memory_type types[VK_MAX_MEMORY_TYPES];974uint32_t heap_count;975struct anv_memory_heap heaps[VK_MAX_MEMORY_HEAPS];976bool need_clflush;977} memory;978979struct anv_memregion vram;980struct anv_memregion sys;981uint8_t driver_build_sha1[20];982uint8_t pipeline_cache_uuid[VK_UUID_SIZE];983uint8_t driver_uuid[VK_UUID_SIZE];984uint8_t device_uuid[VK_UUID_SIZE];985986struct disk_cache * disk_cache;987988struct wsi_device wsi_device;989int local_fd;990bool has_local;991int64_t local_major;992int64_t local_minor;993int master_fd;994bool has_master;995int64_t master_major;996int64_t master_minor;997struct drm_i915_query_engine_info * engine_info;998999void (*cmd_emit_timestamp)(struct anv_batch *, struct anv_bo *, uint32_t );1000struct intel_measure_device measure_device;1001};10021003struct anv_app_info {1004const char* app_name;1005uint32_t app_version;1006const char* engine_name;1007uint32_t engine_version;1008uint32_t api_version;1009};10101011struct anv_instance {1012struct vk_instance vk;10131014bool physical_devices_enumerated;1015struct list_head physical_devices;10161017bool pipeline_cache_enabled;10181019struct driOptionCache dri_options;1020struct driOptionCache available_dri_options;1021};10221023VkResult anv_init_wsi(struct anv_physical_device *physical_device);1024void anv_finish_wsi(struct anv_physical_device *physical_device);10251026struct anv_queue_submit {1027struct anv_cmd_buffer ** cmd_buffers;1028uint32_t cmd_buffer_count;1029uint32_t cmd_buffer_array_length;10301031uint32_t fence_count;1032uint32_t fence_array_length;1033struct drm_i915_gem_exec_fence * fences;1034uint64_t * fence_values;10351036uint32_t temporary_semaphore_count;1037uint32_t temporary_semaphore_array_length;1038struct anv_semaphore_impl * temporary_semaphores;10391040/* Semaphores to be signaled with a SYNC_FD. */1041struct anv_semaphore ** sync_fd_semaphores;1042uint32_t sync_fd_semaphore_count;1043uint32_t sync_fd_semaphore_array_length;10441045/* Allocated only with non shareable timelines. */1046union {1047struct anv_timeline ** wait_timelines;1048uint32_t * wait_timeline_syncobjs;1049};1050uint32_t wait_timeline_count;1051uint32_t wait_timeline_array_length;1052uint64_t * wait_timeline_values;10531054struct anv_timeline ** signal_timelines;1055uint32_t signal_timeline_count;1056uint32_t signal_timeline_array_length;1057uint64_t * signal_timeline_values;10581059int in_fence;1060bool need_out_fence;1061int out_fence;10621063uint32_t fence_bo_count;1064uint32_t fence_bo_array_length;1065/* An array of struct anv_bo pointers with lower bit used as a flag to1066* signal we will wait on that BO (see anv_(un)pack_ptr).1067*/1068uintptr_t * fence_bos;10691070int perf_query_pass;1071struct anv_query_pool * perf_query_pool;10721073const VkAllocationCallbacks * alloc;1074VkSystemAllocationScope alloc_scope;10751076struct anv_bo * simple_bo;1077uint32_t simple_bo_size;10781079struct list_head link;1080};10811082struct anv_queue {1083struct vk_object_base base;10841085struct anv_device * device;10861087VkDeviceQueueCreateFlags flags;1088const struct anv_queue_family * family;10891090uint32_t exec_flags;10911092/* Set once from the device api calls. */1093bool lost_signaled;10941095/* Only set once atomically by the queue */1096int lost;1097int error_line;1098const char * error_file;1099char error_msg[80];11001101/*1102* This mutext protects the variables below.1103*/1104pthread_mutex_t mutex;11051106pthread_t thread;1107pthread_cond_t cond;11081109/*1110* A list of struct anv_queue_submit to be submitted to i915.1111*/1112struct list_head queued_submits;11131114/* Set to true to stop the submission thread */1115bool quit;1116};11171118struct anv_pipeline_cache {1119struct vk_object_base base;1120struct anv_device * device;1121pthread_mutex_t mutex;11221123struct hash_table * nir_cache;11241125struct hash_table * cache;11261127bool external_sync;1128};11291130struct nir_xfb_info;1131struct anv_pipeline_bind_map;11321133void anv_pipeline_cache_init(struct anv_pipeline_cache *cache,1134struct anv_device *device,1135bool cache_enabled,1136bool external_sync);1137void anv_pipeline_cache_finish(struct anv_pipeline_cache *cache);11381139struct anv_shader_bin *1140anv_pipeline_cache_search(struct anv_pipeline_cache *cache,1141const void *key, uint32_t key_size);1142struct anv_shader_bin *1143anv_pipeline_cache_upload_kernel(struct anv_pipeline_cache *cache,1144gl_shader_stage stage,1145const void *key_data, uint32_t key_size,1146const void *kernel_data, uint32_t kernel_size,1147const struct brw_stage_prog_data *prog_data,1148uint32_t prog_data_size,1149const struct brw_compile_stats *stats,1150uint32_t num_stats,1151const struct nir_xfb_info *xfb_info,1152const struct anv_pipeline_bind_map *bind_map);11531154struct anv_shader_bin *1155anv_device_search_for_kernel(struct anv_device *device,1156struct anv_pipeline_cache *cache,1157const void *key_data, uint32_t key_size,1158bool *user_cache_bit);11591160struct anv_shader_bin *1161anv_device_upload_kernel(struct anv_device *device,1162struct anv_pipeline_cache *cache,1163gl_shader_stage stage,1164const void *key_data, uint32_t key_size,1165const void *kernel_data, uint32_t kernel_size,1166const struct brw_stage_prog_data *prog_data,1167uint32_t prog_data_size,1168const struct brw_compile_stats *stats,1169uint32_t num_stats,1170const struct nir_xfb_info *xfb_info,1171const struct anv_pipeline_bind_map *bind_map);11721173struct nir_shader;1174struct nir_shader_compiler_options;11751176struct nir_shader *1177anv_device_search_for_nir(struct anv_device *device,1178struct anv_pipeline_cache *cache,1179const struct nir_shader_compiler_options *nir_options,1180unsigned char sha1_key[20],1181void *mem_ctx);11821183void1184anv_device_upload_nir(struct anv_device *device,1185struct anv_pipeline_cache *cache,1186const struct nir_shader *nir,1187unsigned char sha1_key[20]);11881189struct anv_address {1190struct anv_bo *bo;1191int64_t offset;1192};11931194struct anv_device {1195struct vk_device vk;11961197struct anv_physical_device * physical;1198bool no_hw;1199struct intel_device_info info;1200struct isl_device isl_dev;1201int context_id;1202int fd;1203bool can_chain_batches;1204bool robust_buffer_access;1205bool has_thread_submit;12061207pthread_mutex_t vma_mutex;1208struct util_vma_heap vma_lo;1209struct util_vma_heap vma_cva;1210struct util_vma_heap vma_hi;12111212/** List of all anv_device_memory objects */1213struct list_head memory_objects;12141215struct anv_bo_pool batch_bo_pool;12161217struct anv_bo_cache bo_cache;12181219struct anv_state_pool general_state_pool;1220struct anv_state_pool dynamic_state_pool;1221struct anv_state_pool instruction_state_pool;1222struct anv_state_pool binding_table_pool;1223struct anv_state_pool surface_state_pool;12241225struct anv_state_reserved_pool custom_border_colors;12261227/** BO used for various workarounds1228*1229* There are a number of workarounds on our hardware which require writing1230* data somewhere and it doesn't really matter where. For that, we use1231* this BO and just write to the first dword or so.1232*1233* We also need to be able to handle NULL buffers bound as pushed UBOs.1234* For that, we use the high bytes (>= 1024) of the workaround BO.1235*/1236struct anv_bo * workaround_bo;1237struct anv_address workaround_address;12381239struct anv_bo * trivial_batch_bo;1240struct anv_state null_surface_state;12411242struct anv_pipeline_cache default_pipeline_cache;1243struct blorp_context blorp;12441245struct anv_state border_colors;12461247struct anv_state slice_hash;12481249uint32_t queue_count;1250struct anv_queue * queues;12511252struct anv_scratch_pool scratch_pool;1253struct anv_bo *rt_scratch_bos[16];12541255struct anv_shader_bin *rt_trampoline;1256struct anv_shader_bin *rt_trivial_return;12571258pthread_mutex_t mutex;1259pthread_cond_t queue_submit;1260int _lost;1261int lost_reported;12621263struct intel_batch_decode_ctx decoder_ctx;1264/*1265* When decoding a anv_cmd_buffer, we might need to search for BOs through1266* the cmd_buffer's list.1267*/1268struct anv_cmd_buffer *cmd_buffer_being_decoded;12691270int perf_fd; /* -1 if no opened */1271uint64_t perf_metric; /* 0 if unset */12721273struct intel_aux_map_context *aux_map_ctx;12741275const struct intel_l3_config *l3_config;12761277struct intel_debug_block_frame *debug_frame_desc;1278};12791280#if defined(GFX_VERx10) && GFX_VERx10 >= 901281#define ANV_ALWAYS_SOFTPIN true1282#else1283#define ANV_ALWAYS_SOFTPIN false1284#endif12851286static inline bool1287anv_use_softpin(const struct anv_physical_device *pdevice)1288{1289#if defined(GFX_VERx10) && GFX_VERx10 >= 901290/* Sky Lake and later always uses softpin */1291assert(pdevice->use_softpin);1292return true;1293#elif defined(GFX_VERx10) && GFX_VERx10 < 801294/* Haswell and earlier never use softpin */1295assert(!pdevice->use_softpin);1296return false;1297#else1298/* If we don't have a GFX_VERx10 #define, we need to look at the physical1299* device. Also, for GFX version 8, we need to look at the physical1300* device because Broadwell softpins but Cherryview doesn't.1301*/1302return pdevice->use_softpin;1303#endif1304}13051306static inline struct anv_instance *1307anv_device_instance_or_null(const struct anv_device *device)1308{1309return device ? device->physical->instance : NULL;1310}13111312static inline struct anv_state_pool *1313anv_binding_table_pool(struct anv_device *device)1314{1315if (anv_use_softpin(device->physical))1316return &device->binding_table_pool;1317else1318return &device->surface_state_pool;1319}13201321static inline struct anv_state1322anv_binding_table_pool_alloc(struct anv_device *device)1323{1324if (anv_use_softpin(device->physical))1325return anv_state_pool_alloc(&device->binding_table_pool,1326device->binding_table_pool.block_size, 0);1327else1328return anv_state_pool_alloc_back(&device->surface_state_pool);1329}13301331static inline void1332anv_binding_table_pool_free(struct anv_device *device, struct anv_state state) {1333anv_state_pool_free(anv_binding_table_pool(device), state);1334}13351336static inline uint32_t1337anv_mocs(const struct anv_device *device,1338const struct anv_bo *bo,1339isl_surf_usage_flags_t usage)1340{1341return isl_mocs(&device->isl_dev, usage, bo && bo->is_external);1342}13431344void anv_device_init_blorp(struct anv_device *device);1345void anv_device_finish_blorp(struct anv_device *device);13461347void _anv_device_report_lost(struct anv_device *device);1348VkResult _anv_device_set_lost(struct anv_device *device,1349const char *file, int line,1350const char *msg, ...)1351anv_printflike(4, 5);1352VkResult _anv_queue_set_lost(struct anv_queue *queue,1353const char *file, int line,1354const char *msg, ...)1355anv_printflike(4, 5);1356#define anv_device_set_lost(dev, ...) \1357_anv_device_set_lost(dev, __FILE__, __LINE__, __VA_ARGS__)1358#define anv_queue_set_lost(queue, ...) \1359(queue)->device->has_thread_submit ? \1360_anv_queue_set_lost(queue, __FILE__, __LINE__, __VA_ARGS__) : \1361_anv_device_set_lost(queue->device, __FILE__, __LINE__, __VA_ARGS__)13621363static inline bool1364anv_device_is_lost(struct anv_device *device)1365{1366int lost = p_atomic_read(&device->_lost);1367if (unlikely(lost && !device->lost_reported))1368_anv_device_report_lost(device);1369return lost;1370}13711372VkResult anv_device_query_status(struct anv_device *device);137313741375enum anv_bo_alloc_flags {1376/** Specifies that the BO must have a 32-bit address1377*1378* This is the opposite of EXEC_OBJECT_SUPPORTS_48B_ADDRESS.1379*/1380ANV_BO_ALLOC_32BIT_ADDRESS = (1 << 0),13811382/** Specifies that the BO may be shared externally */1383ANV_BO_ALLOC_EXTERNAL = (1 << 1),13841385/** Specifies that the BO should be mapped */1386ANV_BO_ALLOC_MAPPED = (1 << 2),13871388/** Specifies that the BO should be snooped so we get coherency */1389ANV_BO_ALLOC_SNOOPED = (1 << 3),13901391/** Specifies that the BO should be captured in error states */1392ANV_BO_ALLOC_CAPTURE = (1 << 4),13931394/** Specifies that the BO will have an address assigned by the caller1395*1396* Such BOs do not exist in any VMA heap.1397*/1398ANV_BO_ALLOC_FIXED_ADDRESS = (1 << 5),13991400/** Enables implicit synchronization on the BO1401*1402* This is the opposite of EXEC_OBJECT_ASYNC.1403*/1404ANV_BO_ALLOC_IMPLICIT_SYNC = (1 << 6),14051406/** Enables implicit synchronization on the BO1407*1408* This is equivalent to EXEC_OBJECT_WRITE.1409*/1410ANV_BO_ALLOC_IMPLICIT_WRITE = (1 << 7),14111412/** Has an address which is visible to the client */1413ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS = (1 << 8),14141415/** This buffer has implicit CCS data attached to it */1416ANV_BO_ALLOC_IMPLICIT_CCS = (1 << 9),14171418/** This buffer is allocated from local memory */1419ANV_BO_ALLOC_LOCAL_MEM = (1 << 10),1420};14211422VkResult anv_device_alloc_bo(struct anv_device *device,1423const char *name, uint64_t size,1424enum anv_bo_alloc_flags alloc_flags,1425uint64_t explicit_address,1426struct anv_bo **bo);1427VkResult anv_device_import_bo_from_host_ptr(struct anv_device *device,1428void *host_ptr, uint32_t size,1429enum anv_bo_alloc_flags alloc_flags,1430uint64_t client_address,1431struct anv_bo **bo_out);1432VkResult anv_device_import_bo(struct anv_device *device, int fd,1433enum anv_bo_alloc_flags alloc_flags,1434uint64_t client_address,1435struct anv_bo **bo);1436VkResult anv_device_export_bo(struct anv_device *device,1437struct anv_bo *bo, int *fd_out);1438void anv_device_release_bo(struct anv_device *device,1439struct anv_bo *bo);14401441static inline struct anv_bo *1442anv_device_lookup_bo(struct anv_device *device, uint32_t gem_handle)1443{1444return util_sparse_array_get(&device->bo_cache.bo_map, gem_handle);1445}14461447VkResult anv_device_bo_busy(struct anv_device *device, struct anv_bo *bo);1448VkResult anv_device_wait(struct anv_device *device, struct anv_bo *bo,1449int64_t timeout);14501451VkResult anv_queue_init(struct anv_device *device, struct anv_queue *queue,1452uint32_t exec_flags,1453const VkDeviceQueueCreateInfo *pCreateInfo);1454void anv_queue_finish(struct anv_queue *queue);14551456VkResult anv_queue_execbuf_locked(struct anv_queue *queue, struct anv_queue_submit *submit);1457VkResult anv_queue_submit_simple_batch(struct anv_queue *queue,1458struct anv_batch *batch);14591460uint64_t anv_gettime_ns(void);1461uint64_t anv_get_absolute_timeout(uint64_t timeout);14621463void* anv_gem_mmap(struct anv_device *device,1464uint32_t gem_handle, uint64_t offset, uint64_t size, uint32_t flags);1465void anv_gem_munmap(struct anv_device *device, void *p, uint64_t size);1466uint32_t anv_gem_create(struct anv_device *device, uint64_t size);1467void anv_gem_close(struct anv_device *device, uint32_t gem_handle);1468uint32_t anv_gem_create_regions(struct anv_device *device, uint64_t anv_bo_size,1469uint32_t num_regions,1470struct drm_i915_gem_memory_class_instance *regions);1471uint32_t anv_gem_userptr(struct anv_device *device, void *mem, size_t size);1472int anv_gem_busy(struct anv_device *device, uint32_t gem_handle);1473int anv_gem_wait(struct anv_device *device, uint32_t gem_handle, int64_t *timeout_ns);1474int anv_gem_execbuffer(struct anv_device *device,1475struct drm_i915_gem_execbuffer2 *execbuf);1476int anv_gem_set_tiling(struct anv_device *device, uint32_t gem_handle,1477uint32_t stride, uint32_t tiling);1478int anv_gem_create_context(struct anv_device *device);1479int anv_gem_create_context_engines(struct anv_device *device,1480const struct drm_i915_query_engine_info *info,1481int num_engines,1482uint16_t *engine_classes);1483bool anv_gem_has_context_priority(int fd);1484int anv_gem_destroy_context(struct anv_device *device, int context);1485int anv_gem_set_context_param(int fd, int context, uint32_t param,1486uint64_t value);1487int anv_gem_get_context_param(int fd, int context, uint32_t param,1488uint64_t *value);1489int anv_gem_get_param(int fd, uint32_t param);1490uint64_t anv_gem_get_drm_cap(int fd, uint32_t capability);1491int anv_gem_get_tiling(struct anv_device *device, uint32_t gem_handle);1492bool anv_gem_get_bit6_swizzle(int fd, uint32_t tiling);1493int anv_gem_context_get_reset_stats(int fd, int context,1494uint32_t *active, uint32_t *pending);1495int anv_gem_handle_to_fd(struct anv_device *device, uint32_t gem_handle);1496int anv_gem_reg_read(int fd, uint32_t offset, uint64_t *result);1497uint32_t anv_gem_fd_to_handle(struct anv_device *device, int fd);1498int anv_gem_set_caching(struct anv_device *device, uint32_t gem_handle, uint32_t caching);1499int anv_gem_set_domain(struct anv_device *device, uint32_t gem_handle,1500uint32_t read_domains, uint32_t write_domain);1501int anv_gem_sync_file_merge(struct anv_device *device, int fd1, int fd2);1502uint32_t anv_gem_syncobj_create(struct anv_device *device, uint32_t flags);1503void anv_gem_syncobj_destroy(struct anv_device *device, uint32_t handle);1504int anv_gem_syncobj_handle_to_fd(struct anv_device *device, uint32_t handle);1505uint32_t anv_gem_syncobj_fd_to_handle(struct anv_device *device, int fd);1506int anv_gem_syncobj_export_sync_file(struct anv_device *device,1507uint32_t handle);1508int anv_gem_syncobj_import_sync_file(struct anv_device *device,1509uint32_t handle, int fd);1510void anv_gem_syncobj_reset(struct anv_device *device, uint32_t handle);1511bool anv_gem_supports_syncobj_wait(int fd);1512int anv_gem_syncobj_wait(struct anv_device *device,1513const uint32_t *handles, uint32_t num_handles,1514int64_t abs_timeout_ns, bool wait_all);1515int anv_gem_syncobj_timeline_wait(struct anv_device *device,1516const uint32_t *handles, const uint64_t *points,1517uint32_t num_items, int64_t abs_timeout_ns,1518bool wait_all, bool wait_materialize);1519int anv_gem_syncobj_timeline_signal(struct anv_device *device,1520const uint32_t *handles, const uint64_t *points,1521uint32_t num_items);1522int anv_gem_syncobj_timeline_query(struct anv_device *device,1523const uint32_t *handles, uint64_t *points,1524uint32_t num_items);1525int anv_i915_query(int fd, uint64_t query_id, void *buffer,1526int32_t *buffer_len);1527struct drm_i915_query_engine_info *anv_gem_get_engine_info(int fd);1528int anv_gem_count_engines(const struct drm_i915_query_engine_info *info,1529uint16_t engine_class);15301531uint64_t anv_vma_alloc(struct anv_device *device,1532uint64_t size, uint64_t align,1533enum anv_bo_alloc_flags alloc_flags,1534uint64_t client_address);1535void anv_vma_free(struct anv_device *device,1536uint64_t address, uint64_t size);15371538struct anv_reloc_list {1539uint32_t num_relocs;1540uint32_t array_length;1541struct drm_i915_gem_relocation_entry * relocs;1542struct anv_bo ** reloc_bos;1543uint32_t dep_words;1544BITSET_WORD * deps;1545};15461547VkResult anv_reloc_list_init(struct anv_reloc_list *list,1548const VkAllocationCallbacks *alloc);1549void anv_reloc_list_finish(struct anv_reloc_list *list,1550const VkAllocationCallbacks *alloc);15511552VkResult anv_reloc_list_add(struct anv_reloc_list *list,1553const VkAllocationCallbacks *alloc,1554uint32_t offset, struct anv_bo *target_bo,1555uint32_t delta, uint64_t *address_u64_out);15561557VkResult anv_reloc_list_add_bo(struct anv_reloc_list *list,1558const VkAllocationCallbacks *alloc,1559struct anv_bo *target_bo);15601561struct anv_batch_bo {1562/* Link in the anv_cmd_buffer.owned_batch_bos list */1563struct list_head link;15641565struct anv_bo * bo;15661567/* Bytes actually consumed in this batch BO */1568uint32_t length;15691570/* When this batch BO is used as part of a primary batch buffer, this1571* tracked whether it is chained to another primary batch buffer.1572*1573* If this is the case, the relocation list's last entry points the1574* location of the MI_BATCH_BUFFER_START chaining to the next batch.1575*/1576bool chained;15771578struct anv_reloc_list relocs;1579};15801581struct anv_batch {1582const VkAllocationCallbacks * alloc;15831584struct anv_address start_addr;15851586void * start;1587void * end;1588void * next;15891590struct anv_reloc_list * relocs;15911592/* This callback is called (with the associated user data) in the event1593* that the batch runs out of space.1594*/1595VkResult (*extend_cb)(struct anv_batch *, void *);1596void * user_data;15971598/**1599* Current error status of the command buffer. Used to track inconsistent1600* or incomplete command buffer states that are the consequence of run-time1601* errors such as out of memory scenarios. We want to track this in the1602* batch because the command buffer object is not visible to some parts1603* of the driver.1604*/1605VkResult status;1606};16071608void *anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords);1609void anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other);1610struct anv_address anv_batch_address(struct anv_batch *batch, void *batch_location);16111612static inline void1613anv_batch_set_storage(struct anv_batch *batch, struct anv_address addr,1614void *map, size_t size)1615{1616batch->start_addr = addr;1617batch->next = batch->start = map;1618batch->end = map + size;1619}16201621static inline VkResult1622anv_batch_set_error(struct anv_batch *batch, VkResult error)1623{1624assert(error != VK_SUCCESS);1625if (batch->status == VK_SUCCESS)1626batch->status = error;1627return batch->status;1628}16291630static inline bool1631anv_batch_has_error(struct anv_batch *batch)1632{1633return batch->status != VK_SUCCESS;1634}16351636static inline uint64_t1637anv_batch_emit_reloc(struct anv_batch *batch,1638void *location, struct anv_bo *bo, uint32_t delta)1639{1640uint64_t address_u64 = 0;1641VkResult result;16421643if (ANV_ALWAYS_SOFTPIN) {1644address_u64 = bo->offset + delta;1645result = anv_reloc_list_add_bo(batch->relocs, batch->alloc, bo);1646} else {1647result = anv_reloc_list_add(batch->relocs, batch->alloc,1648location - batch->start, bo, delta,1649&address_u64);1650}1651if (unlikely(result != VK_SUCCESS)) {1652anv_batch_set_error(batch, result);1653return 0;1654}16551656return address_u64;1657}165816591660#define ANV_NULL_ADDRESS ((struct anv_address) { NULL, 0 })16611662static inline struct anv_address1663anv_address_from_u64(uint64_t addr_u64)1664{1665assert(addr_u64 == intel_canonical_address(addr_u64));1666return (struct anv_address) {1667.bo = NULL,1668.offset = addr_u64,1669};1670}16711672static inline bool1673anv_address_is_null(struct anv_address addr)1674{1675return addr.bo == NULL && addr.offset == 0;1676}16771678static inline uint64_t1679anv_address_physical(struct anv_address addr)1680{1681if (addr.bo && (ANV_ALWAYS_SOFTPIN ||1682(addr.bo->flags & EXEC_OBJECT_PINNED))) {1683assert(addr.bo->flags & EXEC_OBJECT_PINNED);1684return intel_canonical_address(addr.bo->offset + addr.offset);1685} else {1686return intel_canonical_address(addr.offset);1687}1688}16891690static inline struct anv_address1691anv_address_add(struct anv_address addr, uint64_t offset)1692{1693addr.offset += offset;1694return addr;1695}16961697static inline void1698write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)1699{1700unsigned reloc_size = 0;1701if (device->info.ver >= 8) {1702reloc_size = sizeof(uint64_t);1703*(uint64_t *)p = intel_canonical_address(v);1704} else {1705reloc_size = sizeof(uint32_t);1706*(uint32_t *)p = v;1707}17081709if (flush && !device->info.has_llc)1710intel_flush_range(p, reloc_size);1711}17121713static inline uint64_t1714_anv_combine_address(struct anv_batch *batch, void *location,1715const struct anv_address address, uint32_t delta)1716{1717if (address.bo == NULL) {1718return address.offset + delta;1719} else if (batch == NULL) {1720assert(address.bo->flags & EXEC_OBJECT_PINNED);1721return anv_address_physical(anv_address_add(address, delta));1722} else {1723assert(batch->start <= location && location < batch->end);1724/* i915 relocations are signed. */1725assert(INT32_MIN <= address.offset && address.offset <= INT32_MAX);1726return anv_batch_emit_reloc(batch, location, address.bo, address.offset + delta);1727}1728}17291730#define __gen_address_type struct anv_address1731#define __gen_user_data struct anv_batch1732#define __gen_combine_address _anv_combine_address17331734/* Wrapper macros needed to work around preprocessor argument issues. In1735* particular, arguments don't get pre-evaluated if they are concatenated.1736* This means that, if you pass GENX(3DSTATE_PS) into the emit macro, the1737* GENX macro won't get evaluated if the emit macro contains "cmd ## foo".1738* We can work around this easily enough with these helpers.1739*/1740#define __anv_cmd_length(cmd) cmd ## _length1741#define __anv_cmd_length_bias(cmd) cmd ## _length_bias1742#define __anv_cmd_header(cmd) cmd ## _header1743#define __anv_cmd_pack(cmd) cmd ## _pack1744#define __anv_reg_num(reg) reg ## _num17451746#define anv_pack_struct(dst, struc, ...) do { \1747struct struc __template = { \1748__VA_ARGS__ \1749}; \1750__anv_cmd_pack(struc)(NULL, dst, &__template); \1751VG(VALGRIND_CHECK_MEM_IS_DEFINED(dst, __anv_cmd_length(struc) * 4)); \1752} while (0)17531754#define anv_batch_emitn(batch, n, cmd, ...) ({ \1755void *__dst = anv_batch_emit_dwords(batch, n); \1756if (__dst) { \1757struct cmd __template = { \1758__anv_cmd_header(cmd), \1759.DWordLength = n - __anv_cmd_length_bias(cmd), \1760__VA_ARGS__ \1761}; \1762__anv_cmd_pack(cmd)(batch, __dst, &__template); \1763} \1764__dst; \1765})17661767#define anv_batch_emit_merge(batch, dwords0, dwords1) \1768do { \1769uint32_t *dw; \1770\1771STATIC_ASSERT(ARRAY_SIZE(dwords0) == ARRAY_SIZE(dwords1)); \1772dw = anv_batch_emit_dwords((batch), ARRAY_SIZE(dwords0)); \1773if (!dw) \1774break; \1775for (uint32_t i = 0; i < ARRAY_SIZE(dwords0); i++) \1776dw[i] = (dwords0)[i] | (dwords1)[i]; \1777VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, ARRAY_SIZE(dwords0) * 4));\1778} while (0)17791780#define anv_batch_emit(batch, cmd, name) \1781for (struct cmd name = { __anv_cmd_header(cmd) }, \1782*_dst = anv_batch_emit_dwords(batch, __anv_cmd_length(cmd)); \1783__builtin_expect(_dst != NULL, 1); \1784({ __anv_cmd_pack(cmd)(batch, _dst, &name); \1785VG(VALGRIND_CHECK_MEM_IS_DEFINED(_dst, __anv_cmd_length(cmd) * 4)); \1786_dst = NULL; \1787}))17881789#define anv_batch_write_reg(batch, reg, name) \1790for (struct reg name = {}, *_cont = (struct reg *)1; _cont != NULL; \1791({ \1792uint32_t _dw[__anv_cmd_length(reg)]; \1793__anv_cmd_pack(reg)(NULL, _dw, &name); \1794for (unsigned i = 0; i < __anv_cmd_length(reg); i++) { \1795anv_batch_emit(batch, GENX(MI_LOAD_REGISTER_IMM), lri) { \1796lri.RegisterOffset = __anv_reg_num(reg); \1797lri.DataDWord = _dw[i]; \1798} \1799} \1800_cont = NULL; \1801}))18021803/* #define __gen_get_batch_dwords anv_batch_emit_dwords */1804/* #define __gen_get_batch_address anv_batch_address */1805/* #define __gen_address_value anv_address_physical */1806/* #define __gen_address_offset anv_address_add */18071808struct anv_device_memory {1809struct vk_object_base base;18101811struct list_head link;18121813struct anv_bo * bo;1814const struct anv_memory_type * type;1815VkDeviceSize map_size;1816void * map;18171818/* If set, we are holding reference to AHardwareBuffer1819* which we must release when memory is freed.1820*/1821struct AHardwareBuffer * ahw;18221823/* If set, this memory comes from a host pointer. */1824void * host_ptr;1825};18261827/**1828* Header for Vertex URB Entry (VUE)1829*/1830struct anv_vue_header {1831uint32_t Reserved;1832uint32_t RTAIndex; /* RenderTargetArrayIndex */1833uint32_t ViewportIndex;1834float PointWidth;1835};18361837/** Struct representing a sampled image descriptor1838*1839* This descriptor layout is used for sampled images, bare sampler, and1840* combined image/sampler descriptors.1841*/1842struct anv_sampled_image_descriptor {1843/** Bindless image handle1844*1845* This is expected to already be shifted such that the 20-bit1846* SURFACE_STATE table index is in the top 20 bits.1847*/1848uint32_t image;18491850/** Bindless sampler handle1851*1852* This is assumed to be a 32B-aligned SAMPLER_STATE pointer relative1853* to the dynamic state base address.1854*/1855uint32_t sampler;1856};18571858struct anv_texture_swizzle_descriptor {1859/** Texture swizzle1860*1861* See also nir_intrinsic_channel_select_intel1862*/1863uint8_t swizzle[4];18641865/** Unused padding to ensure the struct is a multiple of 64 bits */1866uint32_t _pad;1867};18681869/** Struct representing a storage image descriptor */1870struct anv_storage_image_descriptor {1871/** Bindless image handles1872*1873* These are expected to already be shifted such that the 20-bit1874* SURFACE_STATE table index is in the top 20 bits.1875*/1876uint32_t read_write;1877uint32_t write_only;1878};18791880/** Struct representing a address/range descriptor1881*1882* The fields of this struct correspond directly to the data layout of1883* nir_address_format_64bit_bounded_global addresses. The last field is the1884* offset in the NIR address so it must be zero so that when you load the1885* descriptor you get a pointer to the start of the range.1886*/1887struct anv_address_range_descriptor {1888uint64_t address;1889uint32_t range;1890uint32_t zero;1891};18921893enum anv_descriptor_data {1894/** The descriptor contains a BTI reference to a surface state */1895ANV_DESCRIPTOR_SURFACE_STATE = (1 << 0),1896/** The descriptor contains a BTI reference to a sampler state */1897ANV_DESCRIPTOR_SAMPLER_STATE = (1 << 1),1898/** The descriptor contains an actual buffer view */1899ANV_DESCRIPTOR_BUFFER_VIEW = (1 << 2),1900/** The descriptor contains auxiliary image layout data */1901ANV_DESCRIPTOR_IMAGE_PARAM = (1 << 3),1902/** The descriptor contains auxiliary image layout data */1903ANV_DESCRIPTOR_INLINE_UNIFORM = (1 << 4),1904/** anv_address_range_descriptor with a buffer address and range */1905ANV_DESCRIPTOR_ADDRESS_RANGE = (1 << 5),1906/** Bindless surface handle */1907ANV_DESCRIPTOR_SAMPLED_IMAGE = (1 << 6),1908/** Storage image handles */1909ANV_DESCRIPTOR_STORAGE_IMAGE = (1 << 7),1910/** Storage image handles */1911ANV_DESCRIPTOR_TEXTURE_SWIZZLE = (1 << 8),1912};19131914struct anv_descriptor_set_binding_layout {1915/* The type of the descriptors in this binding */1916VkDescriptorType type;19171918/* Flags provided when this binding was created */1919VkDescriptorBindingFlagsEXT flags;19201921/* Bitfield representing the type of data this descriptor contains */1922enum anv_descriptor_data data;19231924/* Maximum number of YCbCr texture/sampler planes */1925uint8_t max_plane_count;19261927/* Number of array elements in this binding (or size in bytes for inline1928* uniform data)1929*/1930uint32_t array_size;19311932/* Index into the flattend descriptor set */1933uint32_t descriptor_index;19341935/* Index into the dynamic state array for a dynamic buffer */1936int16_t dynamic_offset_index;19371938/* Index into the descriptor set buffer views */1939int32_t buffer_view_index;19401941/* Offset into the descriptor buffer where this descriptor lives */1942uint32_t descriptor_offset;19431944/* Immutable samplers (or NULL if no immutable samplers) */1945struct anv_sampler **immutable_samplers;1946};19471948unsigned anv_descriptor_size(const struct anv_descriptor_set_binding_layout *layout);19491950unsigned anv_descriptor_type_size(const struct anv_physical_device *pdevice,1951VkDescriptorType type);19521953bool anv_descriptor_supports_bindless(const struct anv_physical_device *pdevice,1954const struct anv_descriptor_set_binding_layout *binding,1955bool sampler);19561957bool anv_descriptor_requires_bindless(const struct anv_physical_device *pdevice,1958const struct anv_descriptor_set_binding_layout *binding,1959bool sampler);19601961struct anv_descriptor_set_layout {1962struct vk_object_base base;19631964/* Descriptor set layouts can be destroyed at almost any time */1965uint32_t ref_cnt;19661967/* Number of bindings in this descriptor set */1968uint32_t binding_count;19691970/* Total number of descriptors */1971uint32_t descriptor_count;19721973/* Shader stages affected by this descriptor set */1974uint16_t shader_stages;19751976/* Number of buffer views in this descriptor set */1977uint32_t buffer_view_count;19781979/* Number of dynamic offsets used by this descriptor set */1980uint16_t dynamic_offset_count;19811982/* For each dynamic buffer, which VkShaderStageFlagBits stages are using1983* this buffer1984*/1985VkShaderStageFlags dynamic_offset_stages[MAX_DYNAMIC_BUFFERS];19861987/* Size of the descriptor buffer for this descriptor set */1988uint32_t descriptor_buffer_size;19891990/* Bindings in this descriptor set */1991struct anv_descriptor_set_binding_layout binding[0];1992};19931994void anv_descriptor_set_layout_destroy(struct anv_device *device,1995struct anv_descriptor_set_layout *layout);19961997static inline void1998anv_descriptor_set_layout_ref(struct anv_descriptor_set_layout *layout)1999{2000assert(layout && layout->ref_cnt >= 1);2001p_atomic_inc(&layout->ref_cnt);2002}20032004static inline void2005anv_descriptor_set_layout_unref(struct anv_device *device,2006struct anv_descriptor_set_layout *layout)2007{2008assert(layout && layout->ref_cnt >= 1);2009if (p_atomic_dec_zero(&layout->ref_cnt))2010anv_descriptor_set_layout_destroy(device, layout);2011}20122013struct anv_descriptor {2014VkDescriptorType type;20152016union {2017struct {2018VkImageLayout layout;2019struct anv_image_view *image_view;2020struct anv_sampler *sampler;2021};20222023struct {2024struct anv_buffer *buffer;2025uint64_t offset;2026uint64_t range;2027};20282029struct anv_buffer_view *buffer_view;2030};2031};20322033struct anv_descriptor_set {2034struct vk_object_base base;20352036struct anv_descriptor_pool *pool;2037struct anv_descriptor_set_layout *layout;20382039/* Amount of space occupied in the the pool by this descriptor set. It can2040* be larger than the size of the descriptor set.2041*/2042uint32_t size;20432044/* State relative to anv_descriptor_pool::bo */2045struct anv_state desc_mem;2046/* Surface state for the descriptor buffer */2047struct anv_state desc_surface_state;20482049/* Descriptor set address. */2050struct anv_address desc_addr;20512052uint32_t buffer_view_count;2053struct anv_buffer_view *buffer_views;20542055/* Link to descriptor pool's desc_sets list . */2056struct list_head pool_link;20572058uint32_t descriptor_count;2059struct anv_descriptor descriptors[0];2060};20612062static inline bool2063anv_descriptor_set_is_push(struct anv_descriptor_set *set)2064{2065return set->pool == NULL;2066}20672068struct anv_buffer_view {2069struct vk_object_base base;20702071enum isl_format format; /**< VkBufferViewCreateInfo::format */2072uint64_t range; /**< VkBufferViewCreateInfo::range */20732074struct anv_address address;20752076struct anv_state surface_state;2077struct anv_state storage_surface_state;2078struct anv_state writeonly_storage_surface_state;20792080struct brw_image_param storage_image_param;2081};20822083struct anv_push_descriptor_set {2084struct anv_descriptor_set set;20852086/* Put this field right behind anv_descriptor_set so it fills up the2087* descriptors[0] field. */2088struct anv_descriptor descriptors[MAX_PUSH_DESCRIPTORS];20892090/** True if the descriptor set buffer has been referenced by a draw or2091* dispatch command.2092*/2093bool set_used_on_gpu;20942095struct anv_buffer_view buffer_views[MAX_PUSH_DESCRIPTORS];2096};20972098static inline struct anv_address2099anv_descriptor_set_address(struct anv_descriptor_set *set)2100{2101if (anv_descriptor_set_is_push(set)) {2102/* We have to flag push descriptor set as used on the GPU2103* so that the next time we push descriptors, we grab a new memory.2104*/2105struct anv_push_descriptor_set *push_set =2106(struct anv_push_descriptor_set *)set;2107push_set->set_used_on_gpu = true;2108}21092110return set->desc_addr;2111}21122113struct anv_descriptor_pool {2114struct vk_object_base base;21152116uint32_t size;2117uint32_t next;2118uint32_t free_list;21192120struct anv_bo *bo;2121struct util_vma_heap bo_heap;21222123struct anv_state_stream surface_state_stream;2124void *surface_state_free_list;21252126struct list_head desc_sets;21272128char data[0];2129};21302131enum anv_descriptor_template_entry_type {2132ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_IMAGE,2133ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER,2134ANV_DESCRIPTOR_TEMPLATE_ENTRY_TYPE_BUFFER_VIEW2135};21362137struct anv_descriptor_template_entry {2138/* The type of descriptor in this entry */2139VkDescriptorType type;21402141/* Binding in the descriptor set */2142uint32_t binding;21432144/* Offset at which to write into the descriptor set binding */2145uint32_t array_element;21462147/* Number of elements to write into the descriptor set binding */2148uint32_t array_count;21492150/* Offset into the user provided data */2151size_t offset;21522153/* Stride between elements into the user provided data */2154size_t stride;2155};21562157struct anv_descriptor_update_template {2158struct vk_object_base base;21592160VkPipelineBindPoint bind_point;21612162/* The descriptor set this template corresponds to. This value is only2163* valid if the template was created with the templateType2164* VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.2165*/2166uint8_t set;21672168/* Number of entries in this template */2169uint32_t entry_count;21702171/* Entries of the template */2172struct anv_descriptor_template_entry entries[0];2173};21742175size_t2176anv_descriptor_set_layout_size(const struct anv_descriptor_set_layout *layout,2177uint32_t var_desc_count);21782179uint32_t2180anv_descriptor_set_layout_descriptor_buffer_size(const struct anv_descriptor_set_layout *set_layout,2181uint32_t var_desc_count);21822183void2184anv_descriptor_set_write_image_view(struct anv_device *device,2185struct anv_descriptor_set *set,2186const VkDescriptorImageInfo * const info,2187VkDescriptorType type,2188uint32_t binding,2189uint32_t element);21902191void2192anv_descriptor_set_write_buffer_view(struct anv_device *device,2193struct anv_descriptor_set *set,2194VkDescriptorType type,2195struct anv_buffer_view *buffer_view,2196uint32_t binding,2197uint32_t element);21982199void2200anv_descriptor_set_write_buffer(struct anv_device *device,2201struct anv_descriptor_set *set,2202struct anv_state_stream *alloc_stream,2203VkDescriptorType type,2204struct anv_buffer *buffer,2205uint32_t binding,2206uint32_t element,2207VkDeviceSize offset,2208VkDeviceSize range);22092210void2211anv_descriptor_set_write_acceleration_structure(struct anv_device *device,2212struct anv_descriptor_set *set,2213struct anv_acceleration_structure *accel,2214uint32_t binding,2215uint32_t element);22162217void2218anv_descriptor_set_write_inline_uniform_data(struct anv_device *device,2219struct anv_descriptor_set *set,2220uint32_t binding,2221const void *data,2222size_t offset,2223size_t size);22242225void2226anv_descriptor_set_write_template(struct anv_device *device,2227struct anv_descriptor_set *set,2228struct anv_state_stream *alloc_stream,2229const struct anv_descriptor_update_template *template,2230const void *data);22312232VkResult2233anv_descriptor_set_create(struct anv_device *device,2234struct anv_descriptor_pool *pool,2235struct anv_descriptor_set_layout *layout,2236uint32_t var_desc_count,2237struct anv_descriptor_set **out_set);22382239void2240anv_descriptor_set_destroy(struct anv_device *device,2241struct anv_descriptor_pool *pool,2242struct anv_descriptor_set *set);22432244#define ANV_DESCRIPTOR_SET_NULL (UINT8_MAX - 5)2245#define ANV_DESCRIPTOR_SET_PUSH_CONSTANTS (UINT8_MAX - 4)2246#define ANV_DESCRIPTOR_SET_DESCRIPTORS (UINT8_MAX - 3)2247#define ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS (UINT8_MAX - 2)2248#define ANV_DESCRIPTOR_SET_SHADER_CONSTANTS (UINT8_MAX - 1)2249#define ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS UINT8_MAX22502251struct anv_pipeline_binding {2252/** Index in the descriptor set2253*2254* This is a flattened index; the descriptor set layout is already taken2255* into account.2256*/2257uint32_t index;22582259/** The descriptor set this surface corresponds to.2260*2261* The special ANV_DESCRIPTOR_SET_* values above indicates that this2262* binding is not a normal descriptor set but something else.2263*/2264uint8_t set;22652266union {2267/** Plane in the binding index for images */2268uint8_t plane;22692270/** Input attachment index (relative to the subpass) */2271uint8_t input_attachment_index;22722273/** Dynamic offset index (for dynamic UBOs and SSBOs) */2274uint8_t dynamic_offset_index;2275};22762277/** For a storage image, whether it is write-only */2278uint8_t write_only;22792280/** Pad to 64 bits so that there are no holes and we can safely memcmp2281* assuming POD zero-initialization.2282*/2283uint8_t pad;2284};22852286struct anv_push_range {2287/** Index in the descriptor set */2288uint32_t index;22892290/** Descriptor set index */2291uint8_t set;22922293/** Dynamic offset index (for dynamic UBOs) */2294uint8_t dynamic_offset_index;22952296/** Start offset in units of 32B */2297uint8_t start;22982299/** Range in units of 32B */2300uint8_t length;2301};23022303struct anv_pipeline_layout {2304struct vk_object_base base;23052306struct {2307struct anv_descriptor_set_layout *layout;2308uint32_t dynamic_offset_start;2309} set[MAX_SETS];23102311uint32_t num_sets;23122313unsigned char sha1[20];2314};23152316struct anv_buffer {2317struct vk_object_base base;23182319struct anv_device * device;2320VkDeviceSize size;23212322VkBufferCreateFlags create_flags;2323VkBufferUsageFlags usage;23242325/* Set when bound */2326struct anv_address address;2327};23282329static inline uint64_t2330anv_buffer_get_range(struct anv_buffer *buffer, uint64_t offset, uint64_t range)2331{2332assert(offset <= buffer->size);2333if (range == VK_WHOLE_SIZE) {2334return buffer->size - offset;2335} else {2336assert(range + offset >= range);2337assert(range + offset <= buffer->size);2338return range;2339}2340}23412342enum anv_cmd_dirty_bits {2343ANV_CMD_DIRTY_DYNAMIC_VIEWPORT = 1 << 0, /* VK_DYNAMIC_STATE_VIEWPORT */2344ANV_CMD_DIRTY_DYNAMIC_SCISSOR = 1 << 1, /* VK_DYNAMIC_STATE_SCISSOR */2345ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH = 1 << 2, /* VK_DYNAMIC_STATE_LINE_WIDTH */2346ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS = 1 << 3, /* VK_DYNAMIC_STATE_DEPTH_BIAS */2347ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS = 1 << 4, /* VK_DYNAMIC_STATE_BLEND_CONSTANTS */2348ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS = 1 << 5, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS */2349ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 6, /* VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK */2350ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK = 1 << 7, /* VK_DYNAMIC_STATE_STENCIL_WRITE_MASK */2351ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE = 1 << 8, /* VK_DYNAMIC_STATE_STENCIL_REFERENCE */2352ANV_CMD_DIRTY_PIPELINE = 1 << 9,2353ANV_CMD_DIRTY_INDEX_BUFFER = 1 << 10,2354ANV_CMD_DIRTY_RENDER_TARGETS = 1 << 11,2355ANV_CMD_DIRTY_XFB_ENABLE = 1 << 12,2356ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE = 1 << 13, /* VK_DYNAMIC_STATE_LINE_STIPPLE_EXT */2357ANV_CMD_DIRTY_DYNAMIC_CULL_MODE = 1 << 14, /* VK_DYNAMIC_STATE_CULL_MODE_EXT */2358ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE = 1 << 15, /* VK_DYNAMIC_STATE_FRONT_FACE_EXT */2359ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY = 1 << 16, /* VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT */2360ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE = 1 << 17, /* VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT */2361ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE = 1 << 18, /* VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT */2362ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE = 1 << 19, /* VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT */2363ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP = 1 << 20, /* VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT */2364ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE = 1 << 21, /* VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT */2365ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE = 1 << 22, /* VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT */2366ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP = 1 << 23, /* VK_DYNAMIC_STATE_STENCIL_OP_EXT */2367ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS = 1 << 24, /* VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT */2368ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE = 1 << 25, /* VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT */2369ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE = 1 << 26, /* VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR */2370ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE = 1 << 27, /* VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT */2371ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE = 1 << 28, /* VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT */2372ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP = 1 << 29, /* VK_DYNAMIC_STATE_LOGIC_OP_EXT */2373ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE = 1 << 30, /* VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT */2374};2375typedef uint32_t anv_cmd_dirty_mask_t;23762377#define ANV_CMD_DIRTY_DYNAMIC_ALL \2378(ANV_CMD_DIRTY_DYNAMIC_VIEWPORT | \2379ANV_CMD_DIRTY_DYNAMIC_SCISSOR | \2380ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH | \2381ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | \2382ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS | \2383ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | \2384ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK | \2385ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | \2386ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | \2387ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE | \2388ANV_CMD_DIRTY_DYNAMIC_CULL_MODE | \2389ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE | \2390ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | \2391ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | \2392ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | \2393ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | \2394ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | \2395ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | \2396ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | \2397ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP | \2398ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS | \2399ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE | \2400ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE | \2401ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | \2402ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE | \2403ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP | \2404ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE)24052406static inline enum anv_cmd_dirty_bits2407anv_cmd_dirty_bit_for_vk_dynamic_state(VkDynamicState vk_state)2408{2409switch (vk_state) {2410case VK_DYNAMIC_STATE_VIEWPORT:2411case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT:2412return ANV_CMD_DIRTY_DYNAMIC_VIEWPORT;2413case VK_DYNAMIC_STATE_SCISSOR:2414case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT:2415return ANV_CMD_DIRTY_DYNAMIC_SCISSOR;2416case VK_DYNAMIC_STATE_LINE_WIDTH:2417return ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;2418case VK_DYNAMIC_STATE_DEPTH_BIAS:2419return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;2420case VK_DYNAMIC_STATE_BLEND_CONSTANTS:2421return ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;2422case VK_DYNAMIC_STATE_DEPTH_BOUNDS:2423return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;2424case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:2425return ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;2426case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:2427return ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;2428case VK_DYNAMIC_STATE_STENCIL_REFERENCE:2429return ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;2430case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT:2431return ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;2432case VK_DYNAMIC_STATE_CULL_MODE_EXT:2433return ANV_CMD_DIRTY_DYNAMIC_CULL_MODE;2434case VK_DYNAMIC_STATE_FRONT_FACE_EXT:2435return ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE;2436case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY_EXT:2437return ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;2438case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE_EXT:2439return ANV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE;2440case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE_EXT:2441return ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE;2442case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE_EXT:2443return ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE;2444case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP_EXT:2445return ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP;2446case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE_EXT:2447return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;2448case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE_EXT:2449return ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE;2450case VK_DYNAMIC_STATE_STENCIL_OP_EXT:2451return ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP;2452case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT:2453return ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;2454case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT:2455return ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE;2456case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR:2457return ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE;2458case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT:2459return ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;2460case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE_EXT:2461return ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE;2462case VK_DYNAMIC_STATE_LOGIC_OP_EXT:2463return ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP;2464case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE_EXT:2465return ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE;2466default:2467assert(!"Unsupported dynamic state");2468return 0;2469}2470}247124722473enum anv_pipe_bits {2474ANV_PIPE_DEPTH_CACHE_FLUSH_BIT = (1 << 0),2475ANV_PIPE_STALL_AT_SCOREBOARD_BIT = (1 << 1),2476ANV_PIPE_STATE_CACHE_INVALIDATE_BIT = (1 << 2),2477ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT = (1 << 3),2478ANV_PIPE_VF_CACHE_INVALIDATE_BIT = (1 << 4),2479ANV_PIPE_DATA_CACHE_FLUSH_BIT = (1 << 5),2480ANV_PIPE_TILE_CACHE_FLUSH_BIT = (1 << 6),2481ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT = (1 << 10),2482ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT = (1 << 11),2483ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT = (1 << 12),2484ANV_PIPE_DEPTH_STALL_BIT = (1 << 13),24852486/* ANV_PIPE_HDC_PIPELINE_FLUSH_BIT is a precise way to ensure prior data2487* cache work has completed. Available on Gfx12+. For earlier Gfx we2488* must reinterpret this flush as ANV_PIPE_DATA_CACHE_FLUSH_BIT.2489*/2490ANV_PIPE_HDC_PIPELINE_FLUSH_BIT = (1 << 14),2491ANV_PIPE_CS_STALL_BIT = (1 << 20),2492ANV_PIPE_END_OF_PIPE_SYNC_BIT = (1 << 21),24932494/* This bit does not exist directly in PIPE_CONTROL. Instead it means that2495* a flush has happened but not a CS stall. The next time we do any sort2496* of invalidation we need to insert a CS stall at that time. Otherwise,2497* we would have to CS stall on every flush which could be bad.2498*/2499ANV_PIPE_NEEDS_END_OF_PIPE_SYNC_BIT = (1 << 22),25002501/* This bit does not exist directly in PIPE_CONTROL. It means that render2502* target operations related to transfer commands with VkBuffer as2503* destination are ongoing. Some operations like copies on the command2504* streamer might need to be aware of this to trigger the appropriate stall2505* before they can proceed with the copy.2506*/2507ANV_PIPE_RENDER_TARGET_BUFFER_WRITES = (1 << 23),25082509/* This bit does not exist directly in PIPE_CONTROL. It means that Gfx122510* AUX-TT data has changed and we need to invalidate AUX-TT data. This is2511* done by writing the AUX-TT register.2512*/2513ANV_PIPE_AUX_TABLE_INVALIDATE_BIT = (1 << 24),25142515/* This bit does not exist directly in PIPE_CONTROL. It means that a2516* PIPE_CONTROL with a post-sync operation will follow. This is used to2517* implement a workaround for Gfx9.2518*/2519ANV_PIPE_POST_SYNC_BIT = (1 << 25),2520};25212522#define ANV_PIPE_FLUSH_BITS ( \2523ANV_PIPE_DEPTH_CACHE_FLUSH_BIT | \2524ANV_PIPE_DATA_CACHE_FLUSH_BIT | \2525ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \2526ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | \2527ANV_PIPE_TILE_CACHE_FLUSH_BIT)25282529#define ANV_PIPE_STALL_BITS ( \2530ANV_PIPE_STALL_AT_SCOREBOARD_BIT | \2531ANV_PIPE_DEPTH_STALL_BIT | \2532ANV_PIPE_CS_STALL_BIT)25332534#define ANV_PIPE_INVALIDATE_BITS ( \2535ANV_PIPE_STATE_CACHE_INVALIDATE_BIT | \2536ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT | \2537ANV_PIPE_VF_CACHE_INVALIDATE_BIT | \2538ANV_PIPE_HDC_PIPELINE_FLUSH_BIT | \2539ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT | \2540ANV_PIPE_INSTRUCTION_CACHE_INVALIDATE_BIT | \2541ANV_PIPE_AUX_TABLE_INVALIDATE_BIT)25422543static inline enum anv_pipe_bits2544anv_pipe_flush_bits_for_access_flags(struct anv_device *device,2545VkAccessFlags flags)2546{2547enum anv_pipe_bits pipe_bits = 0;25482549u_foreach_bit(b, flags) {2550switch ((VkAccessFlagBits)(1 << b)) {2551case VK_ACCESS_SHADER_WRITE_BIT:2552/* We're transitioning a buffer that was previously used as write2553* destination through the data port. To make its content available2554* to future operations, flush the hdc pipeline.2555*/2556pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;2557break;2558case VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT:2559/* We're transitioning a buffer that was previously used as render2560* target. To make its content available to future operations, flush2561* the render target cache.2562*/2563pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;2564break;2565case VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:2566/* We're transitioning a buffer that was previously used as depth2567* buffer. To make its content available to future operations, flush2568* the depth cache.2569*/2570pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;2571break;2572case VK_ACCESS_TRANSFER_WRITE_BIT:2573/* We're transitioning a buffer that was previously used as a2574* transfer write destination. Generic write operations include color2575* & depth operations as well as buffer operations like :2576* - vkCmdClearColorImage()2577* - vkCmdClearDepthStencilImage()2578* - vkCmdBlitImage()2579* - vkCmdCopy*(), vkCmdUpdate*(), vkCmdFill*()2580*2581* Most of these operations are implemented using Blorp which writes2582* through the render target, so flush that cache to make it visible2583* to future operations. And for depth related operations we also2584* need to flush the depth cache.2585*/2586pipe_bits |= ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT;2587pipe_bits |= ANV_PIPE_DEPTH_CACHE_FLUSH_BIT;2588break;2589case VK_ACCESS_MEMORY_WRITE_BIT:2590/* We're transitioning a buffer for generic write operations. Flush2591* all the caches.2592*/2593pipe_bits |= ANV_PIPE_FLUSH_BITS;2594break;2595case VK_ACCESS_HOST_WRITE_BIT:2596/* We're transitioning a buffer for access by CPU. Invalidate2597* all the caches. Since data and tile caches don't have invalidate,2598* we are forced to flush those as well.2599*/2600pipe_bits |= ANV_PIPE_FLUSH_BITS;2601pipe_bits |= ANV_PIPE_INVALIDATE_BITS;2602break;2603default:2604break; /* Nothing to do */2605}2606}26072608return pipe_bits;2609}26102611static inline enum anv_pipe_bits2612anv_pipe_invalidate_bits_for_access_flags(struct anv_device *device,2613VkAccessFlags flags)2614{2615enum anv_pipe_bits pipe_bits = 0;26162617u_foreach_bit(b, flags) {2618switch ((VkAccessFlagBits)(1 << b)) {2619case VK_ACCESS_INDIRECT_COMMAND_READ_BIT:2620/* Indirect draw commands take a buffer as input that we're going to2621* read from the command streamer to load some of the HW registers2622* (see genX_cmd_buffer.c:load_indirect_parameters). This requires a2623* command streamer stall so that all the cache flushes have2624* completed before the command streamer loads from memory.2625*/2626pipe_bits |= ANV_PIPE_CS_STALL_BIT;2627/* Indirect draw commands also set gl_BaseVertex & gl_BaseIndex2628* through a vertex buffer, so invalidate that cache.2629*/2630pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;2631/* For CmdDipatchIndirect, we also load gl_NumWorkGroups through a2632* UBO from the buffer, so we need to invalidate constant cache.2633*/2634pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;2635pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;2636/* Tile cache flush needed For CmdDipatchIndirect since command2637* streamer and vertex fetch aren't L3 coherent.2638*/2639pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;2640break;2641case VK_ACCESS_INDEX_READ_BIT:2642case VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT:2643/* We transitioning a buffer to be used for as input for vkCmdDraw*2644* commands, so we invalidate the VF cache to make sure there is no2645* stale data when we start rendering.2646*/2647pipe_bits |= ANV_PIPE_VF_CACHE_INVALIDATE_BIT;2648break;2649case VK_ACCESS_UNIFORM_READ_BIT:2650/* We transitioning a buffer to be used as uniform data. Because2651* uniform is accessed through the data port & sampler, we need to2652* invalidate the texture cache (sampler) & constant cache (data2653* port) to avoid stale data.2654*/2655pipe_bits |= ANV_PIPE_CONSTANT_CACHE_INVALIDATE_BIT;2656if (device->physical->compiler->indirect_ubos_use_sampler)2657pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;2658else2659pipe_bits |= ANV_PIPE_HDC_PIPELINE_FLUSH_BIT;2660break;2661case VK_ACCESS_SHADER_READ_BIT:2662case VK_ACCESS_INPUT_ATTACHMENT_READ_BIT:2663case VK_ACCESS_TRANSFER_READ_BIT:2664/* Transitioning a buffer to be read through the sampler, so2665* invalidate the texture cache, we don't want any stale data.2666*/2667pipe_bits |= ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT;2668break;2669case VK_ACCESS_MEMORY_READ_BIT:2670/* Transitioning a buffer for generic read, invalidate all the2671* caches.2672*/2673pipe_bits |= ANV_PIPE_INVALIDATE_BITS;2674break;2675case VK_ACCESS_MEMORY_WRITE_BIT:2676/* Generic write, make sure all previously written things land in2677* memory.2678*/2679pipe_bits |= ANV_PIPE_FLUSH_BITS;2680break;2681case VK_ACCESS_CONDITIONAL_RENDERING_READ_BIT_EXT:2682/* Transitioning a buffer for conditional rendering. We'll load the2683* content of this buffer into HW registers using the command2684* streamer, so we need to stall the command streamer to make sure2685* any in-flight flush operations have completed. Needs tile cache2686* and data cache flush because command stream isn't L3 coherent yet.2687*/2688pipe_bits |= ANV_PIPE_CS_STALL_BIT;2689pipe_bits |= ANV_PIPE_TILE_CACHE_FLUSH_BIT;2690pipe_bits |= ANV_PIPE_DATA_CACHE_FLUSH_BIT;2691break;2692case VK_ACCESS_HOST_READ_BIT:2693/* We're transitioning a buffer that was written by CPU. Flush2694* all the caches.2695*/2696pipe_bits |= ANV_PIPE_FLUSH_BITS;2697break;2698default:2699break; /* Nothing to do */2700}2701}27022703return pipe_bits;2704}27052706#define VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV ( \2707VK_IMAGE_ASPECT_COLOR_BIT | \2708VK_IMAGE_ASPECT_PLANE_0_BIT | \2709VK_IMAGE_ASPECT_PLANE_1_BIT | \2710VK_IMAGE_ASPECT_PLANE_2_BIT)2711#define VK_IMAGE_ASPECT_PLANES_BITS_ANV ( \2712VK_IMAGE_ASPECT_PLANE_0_BIT | \2713VK_IMAGE_ASPECT_PLANE_1_BIT | \2714VK_IMAGE_ASPECT_PLANE_2_BIT)27152716struct anv_vertex_binding {2717struct anv_buffer * buffer;2718VkDeviceSize offset;2719VkDeviceSize stride;2720VkDeviceSize size;2721};27222723struct anv_xfb_binding {2724struct anv_buffer * buffer;2725VkDeviceSize offset;2726VkDeviceSize size;2727};27282729struct anv_push_constants {2730/** Push constant data provided by the client through vkPushConstants */2731uint8_t client_data[MAX_PUSH_CONSTANTS_SIZE];27322733/** Dynamic offsets for dynamic UBOs and SSBOs */2734uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];27352736/* Robust access pushed registers. */2737uint64_t push_reg_mask[MESA_SHADER_STAGES];27382739/** Pad out to a multiple of 32 bytes */2740uint32_t pad[2];27412742/* Base addresses for descriptor sets */2743uint64_t desc_sets[MAX_SETS];27442745struct {2746/** Base workgroup ID2747*2748* Used for vkCmdDispatchBase.2749*/2750uint32_t base_work_group_id[3];27512752/** Subgroup ID2753*2754* This is never set by software but is implicitly filled out when2755* uploading the push constants for compute shaders.2756*/2757uint32_t subgroup_id;2758} cs;2759};27602761struct anv_dynamic_state {2762struct {2763uint32_t count;2764VkViewport viewports[MAX_VIEWPORTS];2765} viewport;27662767struct {2768uint32_t count;2769VkRect2D scissors[MAX_SCISSORS];2770} scissor;27712772float line_width;27732774struct {2775float bias;2776float clamp;2777float slope;2778} depth_bias;27792780float blend_constants[4];27812782struct {2783float min;2784float max;2785} depth_bounds;27862787struct {2788uint32_t front;2789uint32_t back;2790} stencil_compare_mask;27912792struct {2793uint32_t front;2794uint32_t back;2795} stencil_write_mask;27962797struct {2798uint32_t front;2799uint32_t back;2800} stencil_reference;28012802struct {2803struct {2804VkStencilOp fail_op;2805VkStencilOp pass_op;2806VkStencilOp depth_fail_op;2807VkCompareOp compare_op;2808} front;2809struct {2810VkStencilOp fail_op;2811VkStencilOp pass_op;2812VkStencilOp depth_fail_op;2813VkCompareOp compare_op;2814} back;2815} stencil_op;28162817struct {2818uint32_t factor;2819uint16_t pattern;2820} line_stipple;28212822struct {2823uint32_t samples;2824VkSampleLocationEXT locations[MAX_SAMPLE_LOCATIONS];2825} sample_locations;28262827VkExtent2D fragment_shading_rate;28282829VkCullModeFlags cull_mode;2830VkFrontFace front_face;2831VkPrimitiveTopology primitive_topology;2832bool depth_test_enable;2833bool depth_write_enable;2834VkCompareOp depth_compare_op;2835bool depth_bounds_test_enable;2836bool stencil_test_enable;2837bool raster_discard;2838bool depth_bias_enable;2839bool primitive_restart_enable;2840VkLogicOp logic_op;2841bool dyn_vbo_stride;2842bool dyn_vbo_size;28432844/* Bitfield, one bit per render target */2845uint8_t color_writes;2846};28472848extern const struct anv_dynamic_state default_dynamic_state;28492850uint32_t anv_dynamic_state_copy(struct anv_dynamic_state *dest,2851const struct anv_dynamic_state *src,2852uint32_t copy_mask);28532854struct anv_surface_state {2855struct anv_state state;2856/** Address of the surface referred to by this state2857*2858* This address is relative to the start of the BO.2859*/2860struct anv_address address;2861/* Address of the aux surface, if any2862*2863* This field is ANV_NULL_ADDRESS if and only if no aux surface exists.2864*2865* With the exception of gfx8, the bottom 12 bits of this address' offset2866* include extra aux information.2867*/2868struct anv_address aux_address;2869/* Address of the clear color, if any2870*2871* This address is relative to the start of the BO.2872*/2873struct anv_address clear_address;2874};28752876/**2877* Attachment state when recording a renderpass instance.2878*2879* The clear value is valid only if there exists a pending clear.2880*/2881struct anv_attachment_state {2882enum isl_aux_usage aux_usage;2883struct anv_surface_state color;2884struct anv_surface_state input;28852886VkImageLayout current_layout;2887VkImageLayout current_stencil_layout;2888VkImageAspectFlags pending_clear_aspects;2889VkImageAspectFlags pending_load_aspects;2890bool fast_clear;2891VkClearValue clear_value;28922893/* When multiview is active, attachments with a renderpass clear2894* operation have their respective layers cleared on the first2895* subpass that uses them, and only in that subpass. We keep track2896* of this using a bitfield to indicate which layers of an attachment2897* have not been cleared yet when multiview is active.2898*/2899uint32_t pending_clear_views;2900struct anv_image_view * image_view;2901};29022903/** State tracking for vertex buffer flushes2904*2905* On Gfx8-9, the VF cache only considers the bottom 32 bits of memory2906* addresses. If you happen to have two vertex buffers which get placed2907* exactly 4 GiB apart and use them in back-to-back draw calls, you can get2908* collisions. In order to solve this problem, we track vertex address ranges2909* which are live in the cache and invalidate the cache if one ever exceeds 322910* bits.2911*/2912struct anv_vb_cache_range {2913/* Virtual address at which the live vertex buffer cache range starts for2914* this vertex buffer index.2915*/2916uint64_t start;29172918/* Virtual address of the byte after where vertex buffer cache range ends.2919* This is exclusive such that end - start is the size of the range.2920*/2921uint64_t end;2922};29232924/** State tracking for particular pipeline bind point2925*2926* This struct is the base struct for anv_cmd_graphics_state and2927* anv_cmd_compute_state. These are used to track state which is bound to a2928* particular type of pipeline. Generic state that applies per-stage such as2929* binding table offsets and push constants is tracked generically with a2930* per-stage array in anv_cmd_state.2931*/2932struct anv_cmd_pipeline_state {2933struct anv_descriptor_set *descriptors[MAX_SETS];2934struct anv_push_descriptor_set *push_descriptors[MAX_SETS];29352936struct anv_push_constants push_constants;29372938/* Push constant state allocated when flushing push constants. */2939struct anv_state push_constants_state;2940};29412942/** State tracking for graphics pipeline2943*2944* This has anv_cmd_pipeline_state as a base struct to track things which get2945* bound to a graphics pipeline. Along with general pipeline bind point state2946* which is in the anv_cmd_pipeline_state base struct, it also contains other2947* state which is graphics-specific.2948*/2949struct anv_cmd_graphics_state {2950struct anv_cmd_pipeline_state base;29512952struct anv_graphics_pipeline *pipeline;29532954anv_cmd_dirty_mask_t dirty;2955uint32_t vb_dirty;29562957struct anv_vb_cache_range ib_bound_range;2958struct anv_vb_cache_range ib_dirty_range;2959struct anv_vb_cache_range vb_bound_ranges[33];2960struct anv_vb_cache_range vb_dirty_ranges[33];29612962VkShaderStageFlags push_constant_stages;29632964struct anv_dynamic_state dynamic;29652966uint32_t primitive_topology;29672968struct {2969struct anv_buffer *index_buffer;2970uint32_t index_type; /**< 3DSTATE_INDEX_BUFFER.IndexFormat */2971uint32_t index_offset;2972} gfx7;2973};29742975/** State tracking for compute pipeline2976*2977* This has anv_cmd_pipeline_state as a base struct to track things which get2978* bound to a compute pipeline. Along with general pipeline bind point state2979* which is in the anv_cmd_pipeline_state base struct, it also contains other2980* state which is compute-specific.2981*/2982struct anv_cmd_compute_state {2983struct anv_cmd_pipeline_state base;29842985struct anv_compute_pipeline *pipeline;29862987bool pipeline_dirty;29882989struct anv_state push_data;29902991struct anv_address num_workgroups;2992};29932994struct anv_cmd_ray_tracing_state {2995struct anv_cmd_pipeline_state base;29962997struct anv_ray_tracing_pipeline *pipeline;29982999bool pipeline_dirty;30003001struct {3002struct anv_bo *bo;3003struct brw_rt_scratch_layout layout;3004} scratch;3005};30063007/** State required while building cmd buffer */3008struct anv_cmd_state {3009/* PIPELINE_SELECT.PipelineSelection */3010uint32_t current_pipeline;3011const struct intel_l3_config * current_l3_config;3012uint32_t last_aux_map_state;30133014struct anv_cmd_graphics_state gfx;3015struct anv_cmd_compute_state compute;3016struct anv_cmd_ray_tracing_state rt;30173018enum anv_pipe_bits pending_pipe_bits;3019VkShaderStageFlags descriptors_dirty;3020VkShaderStageFlags push_constants_dirty;30213022struct anv_framebuffer * framebuffer;3023struct anv_render_pass * pass;3024struct anv_subpass * subpass;3025VkRect2D render_area;3026uint32_t restart_index;3027struct anv_vertex_binding vertex_bindings[MAX_VBS];3028bool xfb_enabled;3029struct anv_xfb_binding xfb_bindings[MAX_XFB_BUFFERS];3030struct anv_state binding_tables[MESA_VULKAN_SHADER_STAGES];3031struct anv_state samplers[MESA_VULKAN_SHADER_STAGES];30323033unsigned char sampler_sha1s[MESA_SHADER_STAGES][20];3034unsigned char surface_sha1s[MESA_SHADER_STAGES][20];3035unsigned char push_sha1s[MESA_SHADER_STAGES][20];30363037/**3038* Whether or not the gfx8 PMA fix is enabled. We ensure that, at the top3039* of any command buffer it is disabled by disabling it in EndCommandBuffer3040* and before invoking the secondary in ExecuteCommands.3041*/3042bool pma_fix_enabled;30433044/**3045* Whether or not we know for certain that HiZ is enabled for the current3046* subpass. If, for whatever reason, we are unsure as to whether HiZ is3047* enabled or not, this will be false.3048*/3049bool hiz_enabled;30503051bool conditional_render_enabled;30523053/**3054* Last rendering scale argument provided to3055* genX(cmd_buffer_emit_hashing_mode)().3056*/3057unsigned current_hash_scale;30583059/**3060* Array length is anv_cmd_state::pass::attachment_count. Array content is3061* valid only when recording a render pass instance.3062*/3063struct anv_attachment_state * attachments;30643065/**3066* Surface states for color render targets. These are stored in a single3067* flat array. For depth-stencil attachments, the surface state is simply3068* left blank.3069*/3070struct anv_state attachment_states;30713072/**3073* A null surface state of the right size to match the framebuffer. This3074* is one of the states in attachment_states.3075*/3076struct anv_state null_surface_state;3077};30783079struct anv_cmd_pool {3080struct vk_object_base base;3081VkAllocationCallbacks alloc;3082struct list_head cmd_buffers;30833084VkCommandPoolCreateFlags flags;3085};30863087#define ANV_MIN_CMD_BUFFER_BATCH_SIZE 81923088#define ANV_MAX_CMD_BUFFER_BATCH_SIZE (16 * 1024 * 1024)30893090enum anv_cmd_buffer_exec_mode {3091ANV_CMD_BUFFER_EXEC_MODE_PRIMARY,3092ANV_CMD_BUFFER_EXEC_MODE_EMIT,3093ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT,3094ANV_CMD_BUFFER_EXEC_MODE_CHAIN,3095ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN,3096ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN,3097};30983099struct anv_measure_batch;31003101struct anv_cmd_buffer {3102struct vk_object_base base;31033104struct anv_device * device;31053106struct anv_cmd_pool * pool;3107struct list_head pool_link;31083109struct anv_batch batch;31103111/* Pointer to the location in the batch where MI_BATCH_BUFFER_END was3112* recorded upon calling vkEndCommandBuffer(). This is useful if we need to3113* rewrite the end to chain multiple batch together at vkQueueSubmit().3114*/3115void * batch_end;31163117/* Fields required for the actual chain of anv_batch_bo's.3118*3119* These fields are initialized by anv_cmd_buffer_init_batch_bo_chain().3120*/3121struct list_head batch_bos;3122enum anv_cmd_buffer_exec_mode exec_mode;31233124/* A vector of anv_batch_bo pointers for every batch or surface buffer3125* referenced by this command buffer3126*3127* initialized by anv_cmd_buffer_init_batch_bo_chain()3128*/3129struct u_vector seen_bbos;31303131/* A vector of int32_t's for every block of binding tables.3132*3133* initialized by anv_cmd_buffer_init_batch_bo_chain()3134*/3135struct u_vector bt_block_states;3136struct anv_state bt_next;31373138struct anv_reloc_list surface_relocs;3139/** Last seen surface state block pool center bo offset */3140uint32_t last_ss_pool_center;31413142/* Serial for tracking buffer completion */3143uint32_t serial;31443145/* Stream objects for storing temporary data */3146struct anv_state_stream surface_state_stream;3147struct anv_state_stream dynamic_state_stream;3148struct anv_state_stream general_state_stream;31493150VkCommandBufferUsageFlags usage_flags;3151VkCommandBufferLevel level;31523153struct anv_query_pool *perf_query_pool;31543155struct anv_cmd_state state;31563157struct anv_address return_addr;31583159/* Set by SetPerformanceMarkerINTEL, written into queries by CmdBeginQuery */3160uint64_t intel_perf_marker;31613162struct anv_measure_batch *measure;31633164/**3165* KHR_performance_query requires self modifying command buffers and this3166* array has the location of modifying commands to the query begin and end3167* instructions storing performance counters. The array length is3168* anv_physical_device::n_perf_query_commands.3169*/3170struct mi_address_token *self_mod_locations;31713172/**3173* Index tracking which of the self_mod_locations items have already been3174* used.3175*/3176uint32_t perf_reloc_idx;31773178/**3179* Sum of all the anv_batch_bo sizes allocated for this command buffer.3180* Used to increase allocation size for long command buffers.3181*/3182uint32_t total_batch_size;3183};31843185/* Determine whether we can chain a given cmd_buffer to another one. We need3186* softpin and we also need to make sure that we can edit the end of the batch3187* to point to next one, which requires the command buffer to not be used3188* simultaneously.3189*/3190static inline bool3191anv_cmd_buffer_is_chainable(struct anv_cmd_buffer *cmd_buffer)3192{3193return anv_use_softpin(cmd_buffer->device->physical) &&3194!(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT);3195}31963197VkResult anv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);3198void anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);3199void anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer);3200void anv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer);3201void anv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,3202struct anv_cmd_buffer *secondary);3203void anv_cmd_buffer_prepare_execbuf(struct anv_cmd_buffer *cmd_buffer);3204VkResult anv_cmd_buffer_execbuf(struct anv_queue *queue,3205struct anv_cmd_buffer *cmd_buffer,3206const VkSemaphore *in_semaphores,3207const uint64_t *in_wait_values,3208uint32_t num_in_semaphores,3209const VkSemaphore *out_semaphores,3210const uint64_t *out_signal_values,3211uint32_t num_out_semaphores,3212VkFence fence,3213int perf_query_pass);32143215VkResult anv_cmd_buffer_reset(struct anv_cmd_buffer *cmd_buffer);32163217struct anv_state anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,3218const void *data, uint32_t size, uint32_t alignment);3219struct anv_state anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,3220uint32_t *a, uint32_t *b,3221uint32_t dwords, uint32_t alignment);32223223struct anv_address3224anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer);3225struct anv_state3226anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,3227uint32_t entries, uint32_t *state_offset);3228struct anv_state3229anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer);3230struct anv_state3231anv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,3232uint32_t size, uint32_t alignment);32333234VkResult3235anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer);32363237void gfx8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer);3238void gfx8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,3239bool depth_clamp_enable);3240void gfx7_cmd_buffer_emit_scissor(struct anv_cmd_buffer *cmd_buffer);32413242void anv_cmd_buffer_setup_attachments(struct anv_cmd_buffer *cmd_buffer,3243struct anv_render_pass *pass,3244struct anv_framebuffer *framebuffer,3245const VkClearValue *clear_values);32463247void anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer);32483249struct anv_state3250anv_cmd_buffer_gfx_push_constants(struct anv_cmd_buffer *cmd_buffer);3251struct anv_state3252anv_cmd_buffer_cs_push_constants(struct anv_cmd_buffer *cmd_buffer);32533254const struct anv_image_view *3255anv_cmd_buffer_get_depth_stencil_view(const struct anv_cmd_buffer *cmd_buffer);32563257VkResult3258anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,3259uint32_t num_entries,3260uint32_t *state_offset,3261struct anv_state *bt_state);32623263void anv_cmd_buffer_dump(struct anv_cmd_buffer *cmd_buffer);32643265void anv_cmd_emit_conditional_render_predicate(struct anv_cmd_buffer *cmd_buffer);32663267enum anv_fence_type {3268ANV_FENCE_TYPE_NONE = 0,3269ANV_FENCE_TYPE_BO,3270ANV_FENCE_TYPE_WSI_BO,3271ANV_FENCE_TYPE_SYNCOBJ,3272ANV_FENCE_TYPE_WSI,3273};32743275enum anv_bo_fence_state {3276/** Indicates that this is a new (or newly reset fence) */3277ANV_BO_FENCE_STATE_RESET,32783279/** Indicates that this fence has been submitted to the GPU but is still3280* (as far as we know) in use by the GPU.3281*/3282ANV_BO_FENCE_STATE_SUBMITTED,32833284ANV_BO_FENCE_STATE_SIGNALED,3285};32863287struct anv_fence_impl {3288enum anv_fence_type type;32893290union {3291/** Fence implementation for BO fences3292*3293* These fences use a BO and a set of CPU-tracked state flags. The BO3294* is added to the object list of the last execbuf call in a QueueSubmit3295* and is marked EXEC_WRITE. The state flags track when the BO has been3296* submitted to the kernel. We need to do this because Vulkan lets you3297* wait on a fence that has not yet been submitted and I915_GEM_BUSY3298* will say it's idle in this case.3299*/3300struct {3301struct anv_bo *bo;3302enum anv_bo_fence_state state;3303} bo;33043305/** DRM syncobj handle for syncobj-based fences */3306uint32_t syncobj;33073308/** WSI fence */3309struct wsi_fence *fence_wsi;3310};3311};33123313struct anv_fence {3314struct vk_object_base base;33153316/* Permanent fence state. Every fence has some form of permanent state3317* (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on (for3318* cross-process fences) or it could just be a dummy for use internally.3319*/3320struct anv_fence_impl permanent;33213322/* Temporary fence state. A fence *may* have temporary state. That state3323* is added to the fence by an import operation and is reset back to3324* ANV_SEMAPHORE_TYPE_NONE when the fence is reset. A fence with temporary3325* state cannot be signaled because the fence must already be signaled3326* before the temporary state can be exported from the fence in the other3327* process and imported here.3328*/3329struct anv_fence_impl temporary;3330};33313332void anv_fence_reset_temporary(struct anv_device *device,3333struct anv_fence *fence);33343335struct anv_event {3336struct vk_object_base base;3337uint64_t semaphore;3338struct anv_state state;3339};33403341enum anv_semaphore_type {3342ANV_SEMAPHORE_TYPE_NONE = 0,3343ANV_SEMAPHORE_TYPE_DUMMY,3344ANV_SEMAPHORE_TYPE_BO,3345ANV_SEMAPHORE_TYPE_WSI_BO,3346ANV_SEMAPHORE_TYPE_SYNC_FILE,3347ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ,3348ANV_SEMAPHORE_TYPE_TIMELINE,3349ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ_TIMELINE,3350};33513352struct anv_timeline_point {3353struct list_head link;33543355uint64_t serial;33563357/* Number of waiter on this point, when > 0 the point should not be garbage3358* collected.3359*/3360int waiting;33613362/* BO used for synchronization. */3363struct anv_bo *bo;3364};33653366struct anv_timeline {3367pthread_mutex_t mutex;3368pthread_cond_t cond;33693370uint64_t highest_past;3371uint64_t highest_pending;33723373struct list_head points;3374struct list_head free_points;3375};33763377struct anv_semaphore_impl {3378enum anv_semaphore_type type;33793380union {3381/* A BO representing this semaphore when type == ANV_SEMAPHORE_TYPE_BO3382* or type == ANV_SEMAPHORE_TYPE_WSI_BO. This BO will be added to the3383* object list on any execbuf2 calls for which this semaphore is used as3384* a wait or signal fence. When used as a signal fence or when type ==3385* ANV_SEMAPHORE_TYPE_WSI_BO, the EXEC_OBJECT_WRITE flag will be set.3386*/3387struct anv_bo *bo;33883389/* The sync file descriptor when type == ANV_SEMAPHORE_TYPE_SYNC_FILE.3390* If the semaphore is in the unsignaled state due to either just being3391* created or because it has been used for a wait, fd will be -1.3392*/3393int fd;33943395/* Sync object handle when type == ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ.3396* Unlike GEM BOs, DRM sync objects aren't deduplicated by the kernel on3397* import so we don't need to bother with a userspace cache.3398*/3399uint32_t syncobj;34003401/* Non shareable timeline semaphore3402*3403* Used when kernel don't have support for timeline semaphores.3404*/3405struct anv_timeline timeline;3406};3407};34083409struct anv_semaphore {3410struct vk_object_base base;34113412uint32_t refcount;34133414/* Permanent semaphore state. Every semaphore has some form of permanent3415* state (type != ANV_SEMAPHORE_TYPE_NONE). This may be a BO to fence on3416* (for cross-process semaphores0 or it could just be a dummy for use3417* internally.3418*/3419struct anv_semaphore_impl permanent;34203421/* Temporary semaphore state. A semaphore *may* have temporary state.3422* That state is added to the semaphore by an import operation and is reset3423* back to ANV_SEMAPHORE_TYPE_NONE when the semaphore is waited on. A3424* semaphore with temporary state cannot be signaled because the semaphore3425* must already be signaled before the temporary state can be exported from3426* the semaphore in the other process and imported here.3427*/3428struct anv_semaphore_impl temporary;3429};34303431void anv_semaphore_reset_temporary(struct anv_device *device,3432struct anv_semaphore *semaphore);34333434#define ANV_STAGE_MASK ((1 << MESA_VULKAN_SHADER_STAGES) - 1)34353436#define anv_foreach_stage(stage, stage_bits) \3437for (gl_shader_stage stage, \3438__tmp = (gl_shader_stage)((stage_bits) & ANV_STAGE_MASK); \3439stage = __builtin_ffs(__tmp) - 1, __tmp; \3440__tmp &= ~(1 << (stage)))34413442struct anv_pipeline_bind_map {3443unsigned char surface_sha1[20];3444unsigned char sampler_sha1[20];3445unsigned char push_sha1[20];34463447uint32_t surface_count;3448uint32_t sampler_count;34493450struct anv_pipeline_binding * surface_to_descriptor;3451struct anv_pipeline_binding * sampler_to_descriptor;34523453struct anv_push_range push_ranges[4];3454};34553456struct anv_shader_bin_key {3457uint32_t size;3458uint8_t data[0];3459};34603461struct anv_shader_bin {3462uint32_t ref_cnt;34633464gl_shader_stage stage;34653466const struct anv_shader_bin_key *key;34673468struct anv_state kernel;3469uint32_t kernel_size;34703471const struct brw_stage_prog_data *prog_data;3472uint32_t prog_data_size;34733474struct brw_compile_stats stats[3];3475uint32_t num_stats;34763477struct nir_xfb_info *xfb_info;34783479struct anv_pipeline_bind_map bind_map;3480};34813482struct anv_shader_bin *3483anv_shader_bin_create(struct anv_device *device,3484gl_shader_stage stage,3485const void *key, uint32_t key_size,3486const void *kernel, uint32_t kernel_size,3487const struct brw_stage_prog_data *prog_data,3488uint32_t prog_data_size,3489const struct brw_compile_stats *stats, uint32_t num_stats,3490const struct nir_xfb_info *xfb_info,3491const struct anv_pipeline_bind_map *bind_map);34923493void3494anv_shader_bin_destroy(struct anv_device *device, struct anv_shader_bin *shader);34953496static inline void3497anv_shader_bin_ref(struct anv_shader_bin *shader)3498{3499assert(shader && shader->ref_cnt >= 1);3500p_atomic_inc(&shader->ref_cnt);3501}35023503static inline void3504anv_shader_bin_unref(struct anv_device *device, struct anv_shader_bin *shader)3505{3506assert(shader && shader->ref_cnt >= 1);3507if (p_atomic_dec_zero(&shader->ref_cnt))3508anv_shader_bin_destroy(device, shader);3509}35103511#define anv_shader_bin_get_bsr(bin, local_arg_offset) ({ \3512assert((local_arg_offset) % 8 == 0); \3513const struct brw_bs_prog_data *prog_data = \3514brw_bs_prog_data_const(bin->prog_data); \3515assert(prog_data->simd_size == 8 || prog_data->simd_size == 16); \3516\3517(struct GFX_BINDLESS_SHADER_RECORD) { \3518.OffsetToLocalArguments = (local_arg_offset) / 8, \3519.BindlessShaderDispatchMode = prog_data->simd_size / 16, \3520.KernelStartPointer = bin->kernel.offset, \3521}; \3522})35233524struct anv_pipeline_executable {3525gl_shader_stage stage;35263527struct brw_compile_stats stats;35283529char *nir;3530char *disasm;3531};35323533enum anv_pipeline_type {3534ANV_PIPELINE_GRAPHICS,3535ANV_PIPELINE_COMPUTE,3536ANV_PIPELINE_RAY_TRACING,3537};35383539struct anv_pipeline {3540struct vk_object_base base;35413542struct anv_device * device;35433544struct anv_batch batch;3545struct anv_reloc_list batch_relocs;35463547void * mem_ctx;35483549enum anv_pipeline_type type;3550VkPipelineCreateFlags flags;35513552struct util_dynarray executables;35533554const struct intel_l3_config * l3_config;3555};35563557struct anv_graphics_pipeline {3558struct anv_pipeline base;35593560uint32_t batch_data[512];35613562/* States that are part of batch_data and should be not emitted3563* dynamically.3564*/3565anv_cmd_dirty_mask_t static_state_mask;35663567/* States that need to be reemitted in cmd_buffer_flush_dynamic_state().3568* This might cover more than the dynamic states specified at pipeline3569* creation.3570*/3571anv_cmd_dirty_mask_t dynamic_state_mask;35723573struct anv_dynamic_state dynamic_state;35743575/* States declared dynamic at pipeline creation. */3576anv_cmd_dirty_mask_t dynamic_states;35773578uint32_t topology;35793580/* These fields are required with dynamic primitive topology,3581* rasterization_samples used only with gen < 8.3582*/3583VkLineRasterizationModeEXT line_mode;3584VkPolygonMode polygon_mode;3585uint32_t rasterization_samples;35863587struct anv_subpass * subpass;35883589struct anv_shader_bin * shaders[MESA_SHADER_STAGES];35903591VkShaderStageFlags active_stages;35923593bool writes_depth;3594bool depth_test_enable;3595bool writes_stencil;3596bool stencil_test_enable;3597bool depth_clamp_enable;3598bool depth_clip_enable;3599bool sample_shading_enable;3600bool kill_pixel;3601bool depth_bounds_test_enable;3602bool force_fragment_thread_dispatch;36033604/* When primitive replication is used, subpass->view_mask will describe what3605* views to replicate.3606*/3607bool use_primitive_replication;36083609struct anv_state blend_state;36103611struct anv_state cps_state;36123613uint32_t vb_used;3614struct anv_pipeline_vertex_binding {3615uint32_t stride;3616bool instanced;3617uint32_t instance_divisor;3618} vb[MAX_VBS];36193620struct {3621uint32_t sf[7];3622uint32_t depth_stencil_state[3];3623uint32_t clip[4];3624uint32_t xfb_bo_pitch[4];3625uint32_t wm[3];3626uint32_t blend_state[MAX_RTS * 2];3627uint32_t streamout_state[3];3628} gfx7;36293630struct {3631uint32_t sf[4];3632uint32_t raster[5];3633uint32_t wm_depth_stencil[3];3634uint32_t wm[2];3635uint32_t ps_blend[2];3636uint32_t blend_state[1 + MAX_RTS * 2];3637uint32_t streamout_state[5];3638} gfx8;36393640struct {3641uint32_t wm_depth_stencil[4];3642} gfx9;3643};36443645struct anv_compute_pipeline {3646struct anv_pipeline base;36473648struct anv_shader_bin * cs;3649uint32_t batch_data[9];3650uint32_t interface_descriptor_data[8];3651};36523653struct anv_rt_shader_group {3654VkRayTracingShaderGroupTypeKHR type;36553656struct anv_shader_bin *general;3657struct anv_shader_bin *closest_hit;3658struct anv_shader_bin *any_hit;3659struct anv_shader_bin *intersection;36603661/* VK_KHR_ray_tracing requires shaderGroupHandleSize == 32 */3662uint32_t handle[8];3663};36643665struct anv_ray_tracing_pipeline {3666struct anv_pipeline base;36673668/* All shaders in the pipeline */3669struct util_dynarray shaders;36703671uint32_t group_count;3672struct anv_rt_shader_group * groups;36733674/* If non-zero, this is the default computed stack size as per the stack3675* size computation in the Vulkan spec. If zero, that indicates that the3676* client has requested a dynamic stack size.3677*/3678uint32_t stack_size;3679};36803681#define ANV_DECL_PIPELINE_DOWNCAST(pipe_type, pipe_enum) \3682static inline struct anv_##pipe_type##_pipeline * \3683anv_pipeline_to_##pipe_type(struct anv_pipeline *pipeline) \3684{ \3685assert(pipeline->type == pipe_enum); \3686return (struct anv_##pipe_type##_pipeline *) pipeline; \3687}36883689ANV_DECL_PIPELINE_DOWNCAST(graphics, ANV_PIPELINE_GRAPHICS)3690ANV_DECL_PIPELINE_DOWNCAST(compute, ANV_PIPELINE_COMPUTE)3691ANV_DECL_PIPELINE_DOWNCAST(ray_tracing, ANV_PIPELINE_RAY_TRACING)36923693static inline bool3694anv_pipeline_has_stage(const struct anv_graphics_pipeline *pipeline,3695gl_shader_stage stage)3696{3697return (pipeline->active_stages & mesa_to_vk_shader_stage(stage)) != 0;3698}36993700#define ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(prefix, stage) \3701static inline const struct brw_##prefix##_prog_data * \3702get_##prefix##_prog_data(const struct anv_graphics_pipeline *pipeline) \3703{ \3704if (anv_pipeline_has_stage(pipeline, stage)) { \3705return (const struct brw_##prefix##_prog_data *) \3706pipeline->shaders[stage]->prog_data; \3707} else { \3708return NULL; \3709} \3710}37113712ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(vs, MESA_SHADER_VERTEX)3713ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tcs, MESA_SHADER_TESS_CTRL)3714ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(tes, MESA_SHADER_TESS_EVAL)3715ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(gs, MESA_SHADER_GEOMETRY)3716ANV_DECL_GET_GRAPHICS_PROG_DATA_FUNC(wm, MESA_SHADER_FRAGMENT)37173718static inline const struct brw_cs_prog_data *3719get_cs_prog_data(const struct anv_compute_pipeline *pipeline)3720{3721assert(pipeline->cs);3722return (const struct brw_cs_prog_data *) pipeline->cs->prog_data;3723}37243725static inline const struct brw_vue_prog_data *3726anv_pipeline_get_last_vue_prog_data(const struct anv_graphics_pipeline *pipeline)3727{3728if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))3729return &get_gs_prog_data(pipeline)->base;3730else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))3731return &get_tes_prog_data(pipeline)->base;3732else3733return &get_vs_prog_data(pipeline)->base;3734}37353736VkResult3737anv_device_init_rt_shaders(struct anv_device *device);37383739void3740anv_device_finish_rt_shaders(struct anv_device *device);37413742VkResult3743anv_pipeline_init(struct anv_pipeline *pipeline,3744struct anv_device *device,3745enum anv_pipeline_type type,3746VkPipelineCreateFlags flags,3747const VkAllocationCallbacks *pAllocator);37483749void3750anv_pipeline_finish(struct anv_pipeline *pipeline,3751struct anv_device *device,3752const VkAllocationCallbacks *pAllocator);37533754VkResult3755anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline, struct anv_device *device,3756struct anv_pipeline_cache *cache,3757const VkGraphicsPipelineCreateInfo *pCreateInfo,3758const VkAllocationCallbacks *alloc);37593760VkResult3761anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,3762struct anv_pipeline_cache *cache,3763const VkComputePipelineCreateInfo *info,3764const struct vk_shader_module *module,3765const char *entrypoint,3766const VkSpecializationInfo *spec_info);37673768VkResult3769anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,3770struct anv_device *device,3771struct anv_pipeline_cache *cache,3772const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,3773const VkAllocationCallbacks *alloc);37743775struct anv_format_plane {3776enum isl_format isl_format:16;3777struct isl_swizzle swizzle;37783779/* Whether this plane contains chroma channels */3780bool has_chroma;37813782/* For downscaling of YUV planes */3783uint8_t denominator_scales[2];37843785/* How to map sampled ycbcr planes to a single 4 component element. */3786struct isl_swizzle ycbcr_swizzle;37873788/* What aspect is associated to this plane */3789VkImageAspectFlags aspect;3790};379137923793struct anv_format {3794struct anv_format_plane planes[3];3795VkFormat vk_format;3796uint8_t n_planes;3797bool can_ycbcr;3798};37993800/**3801* Return the aspect's _format_ plane, not its _memory_ plane (using the3802* vocabulary of VK_EXT_image_drm_format_modifier). As a consequence, \a3803* aspect_mask may contain VK_IMAGE_ASPECT_PLANE_*, but must not contain3804* VK_IMAGE_ASPECT_MEMORY_PLANE_* .3805*/3806static inline uint32_t3807anv_image_aspect_to_plane(VkImageAspectFlags image_aspects,3808VkImageAspectFlags aspect_mask)3809{3810switch (aspect_mask) {3811case VK_IMAGE_ASPECT_COLOR_BIT:3812case VK_IMAGE_ASPECT_DEPTH_BIT:3813case VK_IMAGE_ASPECT_PLANE_0_BIT:3814return 0;3815case VK_IMAGE_ASPECT_STENCIL_BIT:3816if ((image_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) == 0)3817return 0;3818FALLTHROUGH;3819case VK_IMAGE_ASPECT_PLANE_1_BIT:3820return 1;3821case VK_IMAGE_ASPECT_PLANE_2_BIT:3822return 2;3823default:3824/* Purposefully assert with depth/stencil aspects. */3825unreachable("invalid image aspect");3826}3827}38283829static inline VkImageAspectFlags3830anv_plane_to_aspect(VkImageAspectFlags image_aspects,3831uint32_t plane)3832{3833if (image_aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {3834if (util_bitcount(image_aspects) > 1)3835return VK_IMAGE_ASPECT_PLANE_0_BIT << plane;3836return VK_IMAGE_ASPECT_COLOR_BIT;3837}3838if (image_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)3839return VK_IMAGE_ASPECT_DEPTH_BIT << plane;3840assert(image_aspects == VK_IMAGE_ASPECT_STENCIL_BIT);3841return VK_IMAGE_ASPECT_STENCIL_BIT;3842}38433844#define anv_foreach_image_aspect_bit(b, image, aspects) \3845u_foreach_bit(b, anv_image_expand_aspects(image, aspects))38463847const struct anv_format *3848anv_get_format(VkFormat format);38493850static inline uint32_t3851anv_get_format_planes(VkFormat vk_format)3852{3853const struct anv_format *format = anv_get_format(vk_format);38543855return format != NULL ? format->n_planes : 0;3856}38573858struct anv_format_plane3859anv_get_format_plane(const struct intel_device_info *devinfo,3860VkFormat vk_format,3861VkImageAspectFlagBits aspect, VkImageTiling tiling);38623863static inline enum isl_format3864anv_get_isl_format(const struct intel_device_info *devinfo, VkFormat vk_format,3865VkImageAspectFlags aspect, VkImageTiling tiling)3866{3867return anv_get_format_plane(devinfo, vk_format, aspect, tiling).isl_format;3868}38693870bool anv_formats_ccs_e_compatible(const struct intel_device_info *devinfo,3871VkImageCreateFlags create_flags,3872VkFormat vk_format,3873VkImageTiling vk_tiling,3874const VkImageFormatListCreateInfoKHR *fmt_list);38753876extern VkFormat3877vk_format_from_android(unsigned android_format, unsigned android_usage);38783879static inline struct isl_swizzle3880anv_swizzle_for_render(struct isl_swizzle swizzle)3881{3882/* Sometimes the swizzle will have alpha map to one. We do this to fake3883* RGB as RGBA for texturing3884*/3885assert(swizzle.a == ISL_CHANNEL_SELECT_ONE ||3886swizzle.a == ISL_CHANNEL_SELECT_ALPHA);38873888/* But it doesn't matter what we render to that channel */3889swizzle.a = ISL_CHANNEL_SELECT_ALPHA;38903891return swizzle;3892}38933894void3895anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm);38963897/**3898* Describes how each part of anv_image will be bound to memory.3899*/3900struct anv_image_memory_range {3901/**3902* Disjoint bindings into which each portion of the image will be bound.3903*3904* Binding images to memory can be complicated and invold binding different3905* portions of the image to different memory objects or regions. For most3906* images, everything lives in the MAIN binding and gets bound by3907* vkBindImageMemory. For disjoint multi-planar images, each plane has3908* a unique, disjoint binding and gets bound by vkBindImageMemory2 with3909* VkBindImagePlaneMemoryInfo. There may also exist bits of memory which are3910* implicit or driver-managed and live in special-case bindings.3911*/3912enum anv_image_memory_binding {3913/**3914* Used if and only if image is not multi-planar disjoint. Bound by3915* vkBindImageMemory2 without VkBindImagePlaneMemoryInfo.3916*/3917ANV_IMAGE_MEMORY_BINDING_MAIN,39183919/**3920* Used if and only if image is multi-planar disjoint. Bound by3921* vkBindImageMemory2 with VkBindImagePlaneMemoryInfo.3922*/3923ANV_IMAGE_MEMORY_BINDING_PLANE_0,3924ANV_IMAGE_MEMORY_BINDING_PLANE_1,3925ANV_IMAGE_MEMORY_BINDING_PLANE_2,39263927/**3928* Driver-private bo. In special cases we may store the aux surface and/or3929* aux state in this binding.3930*/3931ANV_IMAGE_MEMORY_BINDING_PRIVATE,39323933/** Sentinel */3934ANV_IMAGE_MEMORY_BINDING_END,3935} binding;39363937/**3938* Offset is relative to the start of the binding created by3939* vkBindImageMemory, not to the start of the bo.3940*/3941uint64_t offset;39423943uint64_t size;3944uint32_t alignment;3945};39463947/**3948* Subsurface of an anv_image.3949*/3950struct anv_surface {3951struct isl_surf isl;3952struct anv_image_memory_range memory_range;3953};39543955static inline bool MUST_CHECK3956anv_surface_is_valid(const struct anv_surface *surface)3957{3958return surface->isl.size_B > 0 && surface->memory_range.size > 0;3959}39603961struct anv_image {3962struct vk_object_base base;39633964VkImageType type; /**< VkImageCreateInfo::imageType */3965/* The original VkFormat provided by the client. This may not match any3966* of the actual surface formats.3967*/3968VkFormat vk_format;3969const struct anv_format *format;39703971VkImageAspectFlags aspects;3972VkExtent3D extent;3973uint32_t levels;3974uint32_t array_size;3975uint32_t samples; /**< VkImageCreateInfo::samples */3976uint32_t n_planes;3977VkImageUsageFlags usage; /**< VkImageCreateInfo::usage. */3978VkImageUsageFlags stencil_usage;3979VkImageCreateFlags create_flags; /* Flags used when creating image. */3980VkImageTiling tiling; /** VkImageCreateInfo::tiling */39813982/** True if this is needs to be bound to an appropriately tiled BO.3983*3984* When not using modifiers, consumers such as X11, Wayland, and KMS need3985* the tiling passed via I915_GEM_SET_TILING. When exporting these buffers3986* we require a dedicated allocation so that we can know to allocate a3987* tiled buffer.3988*/3989bool needs_set_tiling;39903991/**3992* Must be DRM_FORMAT_MOD_INVALID unless tiling is3993* VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT.3994*/3995uint64_t drm_format_mod;39963997/**3998* Image has multi-planar format and was created with3999* VK_IMAGE_CREATE_DISJOINT_BIT.4000*/4001bool disjoint;40024003/* Image was created with external format. */4004bool external_format;40054006/**4007* Image was imported from gralloc with VkNativeBufferANDROID. The gralloc bo4008* must be released when the image is destroyed.4009*/4010bool from_gralloc;40114012/**4013* The memory bindings created by vkCreateImage and vkBindImageMemory.4014*4015* For details on the image's memory layout, see check_memory_bindings().4016*4017* vkCreateImage constructs the `memory_range` for each4018* anv_image_memory_binding. After vkCreateImage, each binding is valid if4019* and only if `memory_range::size > 0`.4020*4021* vkBindImageMemory binds each valid `memory_range` to an `address`.4022* Usually, the app will provide the address via the parameters of4023* vkBindImageMemory. However, special-case bindings may be bound to4024* driver-private memory.4025*/4026struct anv_image_binding {4027struct anv_image_memory_range memory_range;4028struct anv_address address;4029} bindings[ANV_IMAGE_MEMORY_BINDING_END];40304031/**4032* Image subsurfaces4033*4034* For each foo, anv_image::planes[x].surface is valid if and only if4035* anv_image::aspects has a x aspect. Refer to anv_image_aspect_to_plane()4036* to figure the number associated with a given aspect.4037*4038* The hardware requires that the depth buffer and stencil buffer be4039* separate surfaces. From Vulkan's perspective, though, depth and stencil4040* reside in the same VkImage. To satisfy both the hardware and Vulkan, we4041* allocate the depth and stencil buffers as separate surfaces in the same4042* bo.4043*/4044struct anv_image_plane {4045struct anv_surface primary_surface;40464047/**4048* A surface which shadows the main surface and may have different4049* tiling. This is used for sampling using a tiling that isn't supported4050* for other operations.4051*/4052struct anv_surface shadow_surface;40534054/**4055* The base aux usage for this image. For color images, this can be4056* either CCS_E or CCS_D depending on whether or not we can reliably4057* leave CCS on all the time.4058*/4059enum isl_aux_usage aux_usage;40604061struct anv_surface aux_surface;40624063/** Location of the fast clear state. */4064struct anv_image_memory_range fast_clear_memory_range;4065} planes[3];4066};40674068/* The ordering of this enum is important */4069enum anv_fast_clear_type {4070/** Image does not have/support any fast-clear blocks */4071ANV_FAST_CLEAR_NONE = 0,4072/** Image has/supports fast-clear but only to the default value */4073ANV_FAST_CLEAR_DEFAULT_VALUE = 1,4074/** Image has/supports fast-clear with an arbitrary fast-clear value */4075ANV_FAST_CLEAR_ANY = 2,4076};40774078/* Returns the number of auxiliary buffer levels attached to an image. */4079static inline uint8_t4080anv_image_aux_levels(const struct anv_image * const image,4081VkImageAspectFlagBits aspect)4082{4083uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);4084if (image->planes[plane].aux_usage == ISL_AUX_USAGE_NONE)4085return 0;40864087return image->levels;4088}40894090/* Returns the number of auxiliary buffer layers attached to an image. */4091static inline uint32_t4092anv_image_aux_layers(const struct anv_image * const image,4093VkImageAspectFlagBits aspect,4094const uint8_t miplevel)4095{4096assert(image);40974098/* The miplevel must exist in the main buffer. */4099assert(miplevel < image->levels);41004101if (miplevel >= anv_image_aux_levels(image, aspect)) {4102/* There are no layers with auxiliary data because the miplevel has no4103* auxiliary data.4104*/4105return 0;4106}41074108return MAX2(image->array_size, image->extent.depth >> miplevel);4109}41104111static inline struct anv_address MUST_CHECK4112anv_image_address(const struct anv_image *image,4113const struct anv_image_memory_range *mem_range)4114{4115const struct anv_image_binding *binding = &image->bindings[mem_range->binding];4116assert(binding->memory_range.offset == 0);41174118if (mem_range->size == 0)4119return ANV_NULL_ADDRESS;41204121return anv_address_add(binding->address, mem_range->offset);4122}41234124static inline struct anv_address4125anv_image_get_clear_color_addr(UNUSED const struct anv_device *device,4126const struct anv_image *image,4127VkImageAspectFlagBits aspect)4128{4129assert(image->aspects & (VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV |4130VK_IMAGE_ASPECT_DEPTH_BIT));41314132uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);4133const struct anv_image_memory_range *mem_range =4134&image->planes[plane].fast_clear_memory_range;41354136return anv_image_address(image, mem_range);4137}41384139static inline struct anv_address4140anv_image_get_fast_clear_type_addr(const struct anv_device *device,4141const struct anv_image *image,4142VkImageAspectFlagBits aspect)4143{4144struct anv_address addr =4145anv_image_get_clear_color_addr(device, image, aspect);41464147const unsigned clear_color_state_size = device->info.ver >= 10 ?4148device->isl_dev.ss.clear_color_state_size :4149device->isl_dev.ss.clear_value_size;4150return anv_address_add(addr, clear_color_state_size);4151}41524153static inline struct anv_address4154anv_image_get_compression_state_addr(const struct anv_device *device,4155const struct anv_image *image,4156VkImageAspectFlagBits aspect,4157uint32_t level, uint32_t array_layer)4158{4159assert(level < anv_image_aux_levels(image, aspect));4160assert(array_layer < anv_image_aux_layers(image, aspect, level));4161UNUSED uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);4162assert(image->planes[plane].aux_usage == ISL_AUX_USAGE_CCS_E);41634164/* Relative to start of the plane's fast clear memory range */4165uint32_t offset;41664167offset = 4; /* Go past the fast clear type */41684169if (image->type == VK_IMAGE_TYPE_3D) {4170for (uint32_t l = 0; l < level; l++)4171offset += anv_minify(image->extent.depth, l) * 4;4172} else {4173offset += level * image->array_size * 4;4174}41754176offset += array_layer * 4;41774178assert(offset < image->planes[plane].fast_clear_memory_range.size);41794180return anv_address_add(4181anv_image_get_fast_clear_type_addr(device, image, aspect),4182offset);4183}41844185/* Returns true if a HiZ-enabled depth buffer can be sampled from. */4186static inline bool4187anv_can_sample_with_hiz(const struct intel_device_info * const devinfo,4188const struct anv_image *image)4189{4190if (!(image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT))4191return false;41924193/* For Gfx8-11, there are some restrictions around sampling from HiZ.4194* The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode4195* say:4196*4197* "If this field is set to AUX_HIZ, Number of Multisamples must4198* be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D."4199*/4200if (image->type == VK_IMAGE_TYPE_3D)4201return false;42024203/* Allow this feature on BDW even though it is disabled in the BDW devinfo4204* struct. There's documentation which suggests that this feature actually4205* reduces performance on BDW, but it has only been observed to help so4206* far. Sampling fast-cleared blocks on BDW must also be handled with care4207* (see depth_stencil_attachment_compute_aux_usage() for more info).4208*/4209if (devinfo->ver != 8 && !devinfo->has_sample_with_hiz)4210return false;42114212return image->samples == 1;4213}42144215/* Returns true if an MCS-enabled buffer can be sampled from. */4216static inline bool4217anv_can_sample_mcs_with_clear(const struct intel_device_info * const devinfo,4218const struct anv_image *image)4219{4220assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);4221const uint32_t plane =4222anv_image_aspect_to_plane(image->aspects, VK_IMAGE_ASPECT_COLOR_BIT);42234224assert(isl_aux_usage_has_mcs(image->planes[plane].aux_usage));42254226const struct anv_surface *anv_surf = &image->planes[plane].primary_surface;42274228/* On TGL, the sampler has an issue with some 8 and 16bpp MSAA fast clears.4229* See HSD 1707282275, wa_14013111325. Due to the use of4230* format-reinterpretation, a simplified workaround is implemented.4231*/4232if (devinfo->ver >= 12 &&4233isl_format_get_layout(anv_surf->isl.format)->bpb <= 16) {4234return false;4235}42364237return true;4238}42394240static inline bool4241anv_image_plane_uses_aux_map(const struct anv_device *device,4242const struct anv_image *image,4243uint32_t plane)4244{4245return device->info.has_aux_map &&4246isl_aux_usage_has_ccs(image->planes[plane].aux_usage);4247}42484249void4250anv_cmd_buffer_mark_image_written(struct anv_cmd_buffer *cmd_buffer,4251const struct anv_image *image,4252VkImageAspectFlagBits aspect,4253enum isl_aux_usage aux_usage,4254uint32_t level,4255uint32_t base_layer,4256uint32_t layer_count);42574258void4259anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,4260const struct anv_image *image,4261VkImageAspectFlagBits aspect,4262enum isl_aux_usage aux_usage,4263enum isl_format format, struct isl_swizzle swizzle,4264uint32_t level, uint32_t base_layer, uint32_t layer_count,4265VkRect2D area, union isl_color_value clear_color);4266void4267anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,4268const struct anv_image *image,4269VkImageAspectFlags aspects,4270enum isl_aux_usage depth_aux_usage,4271uint32_t level,4272uint32_t base_layer, uint32_t layer_count,4273VkRect2D area,4274float depth_value, uint8_t stencil_value);4275void4276anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,4277const struct anv_image *src_image,4278enum isl_aux_usage src_aux_usage,4279uint32_t src_level, uint32_t src_base_layer,4280const struct anv_image *dst_image,4281enum isl_aux_usage dst_aux_usage,4282uint32_t dst_level, uint32_t dst_base_layer,4283VkImageAspectFlagBits aspect,4284uint32_t src_x, uint32_t src_y,4285uint32_t dst_x, uint32_t dst_y,4286uint32_t width, uint32_t height,4287uint32_t layer_count,4288enum blorp_filter filter);4289void4290anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,4291const struct anv_image *image,4292VkImageAspectFlagBits aspect, uint32_t level,4293uint32_t base_layer, uint32_t layer_count,4294enum isl_aux_op hiz_op);4295void4296anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,4297const struct anv_image *image,4298VkImageAspectFlags aspects,4299uint32_t level,4300uint32_t base_layer, uint32_t layer_count,4301VkRect2D area, uint8_t stencil_value);4302void4303anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,4304const struct anv_image *image,4305enum isl_format format, struct isl_swizzle swizzle,4306VkImageAspectFlagBits aspect,4307uint32_t base_layer, uint32_t layer_count,4308enum isl_aux_op mcs_op, union isl_color_value *clear_value,4309bool predicate);4310void4311anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,4312const struct anv_image *image,4313enum isl_format format, struct isl_swizzle swizzle,4314VkImageAspectFlagBits aspect, uint32_t level,4315uint32_t base_layer, uint32_t layer_count,4316enum isl_aux_op ccs_op, union isl_color_value *clear_value,4317bool predicate);43184319void4320anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,4321const struct anv_image *image,4322VkImageAspectFlagBits aspect,4323uint32_t base_level, uint32_t level_count,4324uint32_t base_layer, uint32_t layer_count);43254326enum isl_aux_state ATTRIBUTE_PURE4327anv_layout_to_aux_state(const struct intel_device_info * const devinfo,4328const struct anv_image *image,4329const VkImageAspectFlagBits aspect,4330const VkImageLayout layout);43314332enum isl_aux_usage ATTRIBUTE_PURE4333anv_layout_to_aux_usage(const struct intel_device_info * const devinfo,4334const struct anv_image *image,4335const VkImageAspectFlagBits aspect,4336const VkImageUsageFlagBits usage,4337const VkImageLayout layout);43384339enum anv_fast_clear_type ATTRIBUTE_PURE4340anv_layout_to_fast_clear_type(const struct intel_device_info * const devinfo,4341const struct anv_image * const image,4342const VkImageAspectFlagBits aspect,4343const VkImageLayout layout);43444345/* This is defined as a macro so that it works for both4346* VkImageSubresourceRange and VkImageSubresourceLayers4347*/4348#define anv_get_layerCount(_image, _range) \4349((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \4350(_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount)43514352static inline uint32_t4353anv_get_levelCount(const struct anv_image *image,4354const VkImageSubresourceRange *range)4355{4356return range->levelCount == VK_REMAINING_MIP_LEVELS ?4357image->levels - range->baseMipLevel : range->levelCount;4358}43594360static inline VkImageAspectFlags4361anv_image_expand_aspects(const struct anv_image *image,4362VkImageAspectFlags aspects)4363{4364/* If the underlying image has color plane aspects and4365* VK_IMAGE_ASPECT_COLOR_BIT has been requested, then return the aspects of4366* the underlying image. */4367if ((image->aspects & VK_IMAGE_ASPECT_PLANES_BITS_ANV) != 0 &&4368aspects == VK_IMAGE_ASPECT_COLOR_BIT)4369return image->aspects;43704371return aspects;4372}43734374static inline bool4375anv_image_aspects_compatible(VkImageAspectFlags aspects1,4376VkImageAspectFlags aspects2)4377{4378if (aspects1 == aspects2)4379return true;43804381/* Only 1 color aspects are compatibles. */4382if ((aspects1 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&4383(aspects2 & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) != 0 &&4384util_bitcount(aspects1) == util_bitcount(aspects2))4385return true;43864387return false;4388}43894390struct anv_image_view {4391struct vk_object_base base;43924393const struct anv_image *image; /**< VkImageViewCreateInfo::image */43944395VkImageAspectFlags aspect_mask;4396VkFormat vk_format;4397VkExtent3D extent; /**< Extent of VkImageViewCreateInfo::baseMipLevel. */43984399unsigned n_planes;4400struct {4401uint32_t image_plane;44024403struct isl_view isl;44044405/**4406* RENDER_SURFACE_STATE when using image as a sampler surface with an4407* image layout of SHADER_READ_ONLY_OPTIMAL or4408* DEPTH_STENCIL_READ_ONLY_OPTIMAL.4409*/4410struct anv_surface_state optimal_sampler_surface_state;44114412/**4413* RENDER_SURFACE_STATE when using image as a sampler surface with an4414* image layout of GENERAL.4415*/4416struct anv_surface_state general_sampler_surface_state;44174418/**4419* RENDER_SURFACE_STATE when using image as a storage image. Separate4420* states for write-only and readable, using the real format for4421* write-only and the lowered format for readable.4422*/4423struct anv_surface_state storage_surface_state;4424struct anv_surface_state writeonly_storage_surface_state;44254426struct brw_image_param storage_image_param;4427} planes[3];4428};44294430enum anv_image_view_state_flags {4431ANV_IMAGE_VIEW_STATE_STORAGE_WRITE_ONLY = (1 << 0),4432ANV_IMAGE_VIEW_STATE_TEXTURE_OPTIMAL = (1 << 1),4433};44344435void anv_image_fill_surface_state(struct anv_device *device,4436const struct anv_image *image,4437VkImageAspectFlagBits aspect,4438const struct isl_view *view,4439isl_surf_usage_flags_t view_usage,4440enum isl_aux_usage aux_usage,4441const union isl_color_value *clear_color,4442enum anv_image_view_state_flags flags,4443struct anv_surface_state *state_inout,4444struct brw_image_param *image_param_out);44454446struct anv_image_create_info {4447const VkImageCreateInfo *vk_info;44484449/** An opt-in bitmask which filters an ISL-mapping of the Vulkan tiling. */4450isl_tiling_flags_t isl_tiling_flags;44514452/** These flags will be added to any derived from VkImageCreateInfo. */4453isl_surf_usage_flags_t isl_extra_usage_flags;44544455bool external_format;4456};44574458VkResult anv_image_create(VkDevice _device,4459const struct anv_image_create_info *info,4460const VkAllocationCallbacks* alloc,4461VkImage *pImage);44624463enum isl_format4464anv_isl_format_for_descriptor_type(const struct anv_device *device,4465VkDescriptorType type);44664467static inline VkExtent3D4468anv_sanitize_image_extent(const VkImageType imageType,4469const VkExtent3D imageExtent)4470{4471switch (imageType) {4472case VK_IMAGE_TYPE_1D:4473return (VkExtent3D) { imageExtent.width, 1, 1 };4474case VK_IMAGE_TYPE_2D:4475return (VkExtent3D) { imageExtent.width, imageExtent.height, 1 };4476case VK_IMAGE_TYPE_3D:4477return imageExtent;4478default:4479unreachable("invalid image type");4480}4481}44824483static inline VkOffset3D4484anv_sanitize_image_offset(const VkImageType imageType,4485const VkOffset3D imageOffset)4486{4487switch (imageType) {4488case VK_IMAGE_TYPE_1D:4489return (VkOffset3D) { imageOffset.x, 0, 0 };4490case VK_IMAGE_TYPE_2D:4491return (VkOffset3D) { imageOffset.x, imageOffset.y, 0 };4492case VK_IMAGE_TYPE_3D:4493return imageOffset;4494default:4495unreachable("invalid image type");4496}4497}44984499static inline uint32_t4500anv_rasterization_aa_mode(VkPolygonMode raster_mode,4501VkLineRasterizationModeEXT line_mode)4502{4503if (raster_mode == VK_POLYGON_MODE_LINE &&4504line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT)4505return true;4506return false;4507}45084509VkFormatFeatureFlags4510anv_get_image_format_features(const struct intel_device_info *devinfo,4511VkFormat vk_format,4512const struct anv_format *anv_format,4513VkImageTiling vk_tiling,4514const struct isl_drm_modifier_info *isl_mod_info);45154516void anv_fill_buffer_surface_state(struct anv_device *device,4517struct anv_state state,4518enum isl_format format,4519isl_surf_usage_flags_t usage,4520struct anv_address address,4521uint32_t range, uint32_t stride);45224523static inline void4524anv_clear_color_from_att_state(union isl_color_value *clear_color,4525const struct anv_attachment_state *att_state,4526const struct anv_image_view *iview)4527{4528const struct isl_format_layout *view_fmtl =4529isl_format_get_layout(iview->planes[0].isl.format);45304531#define COPY_CLEAR_COLOR_CHANNEL(c, i) \4532if (view_fmtl->channels.c.bits) \4533clear_color->u32[i] = att_state->clear_value.color.uint32[i]45344535COPY_CLEAR_COLOR_CHANNEL(r, 0);4536COPY_CLEAR_COLOR_CHANNEL(g, 1);4537COPY_CLEAR_COLOR_CHANNEL(b, 2);4538COPY_CLEAR_COLOR_CHANNEL(a, 3);45394540#undef COPY_CLEAR_COLOR_CHANNEL4541}454245434544/* Haswell border color is a bit of a disaster. Float and unorm formats use a4545* straightforward 32-bit float color in the first 64 bytes. Instead of using4546* a nice float/integer union like Gfx8+, Haswell specifies the integer border4547* color as a separate entry /after/ the float color. The layout of this entry4548* also depends on the format's bpp (with extra hacks for RG32), and overlaps.4549*4550* Since we don't know the format/bpp, we can't make any of the border colors4551* containing '1' work for all formats, as it would be in the wrong place for4552* some of them. We opt to make 32-bit integers work as this seems like the4553* most common option. Fortunately, transparent black works regardless, as4554* all zeroes is the same in every bit-size.4555*/4556struct hsw_border_color {4557float float32[4];4558uint32_t _pad0[12];4559uint32_t uint32[4];4560uint32_t _pad1[108];4561};45624563struct gfx8_border_color {4564union {4565float float32[4];4566uint32_t uint32[4];4567};4568/* Pad out to 64 bytes */4569uint32_t _pad[12];4570};45714572struct anv_ycbcr_conversion {4573struct vk_object_base base;45744575const struct anv_format * format;4576VkSamplerYcbcrModelConversion ycbcr_model;4577VkSamplerYcbcrRange ycbcr_range;4578VkComponentSwizzle mapping[4];4579VkChromaLocation chroma_offsets[2];4580VkFilter chroma_filter;4581bool chroma_reconstruction;4582};45834584struct anv_sampler {4585struct vk_object_base base;45864587uint32_t state[3][4];4588uint32_t n_planes;4589struct anv_ycbcr_conversion *conversion;45904591/* Blob of sampler state data which is guaranteed to be 32-byte aligned4592* and with a 32-byte stride for use as bindless samplers.4593*/4594struct anv_state bindless_state;45954596struct anv_state custom_border_color;4597};45984599struct anv_framebuffer {4600struct vk_object_base base;46014602uint32_t width;4603uint32_t height;4604uint32_t layers;46054606uint32_t attachment_count;4607struct anv_image_view * attachments[0];4608};46094610struct anv_subpass_attachment {4611VkImageUsageFlagBits usage;4612uint32_t attachment;4613VkImageLayout layout;46144615/* Used only with attachment containing stencil data. */4616VkImageLayout stencil_layout;4617};46184619struct anv_subpass {4620uint32_t attachment_count;46214622/**4623* A pointer to all attachment references used in this subpass.4624* Only valid if ::attachment_count > 0.4625*/4626struct anv_subpass_attachment * attachments;4627uint32_t input_count;4628struct anv_subpass_attachment * input_attachments;4629uint32_t color_count;4630struct anv_subpass_attachment * color_attachments;4631struct anv_subpass_attachment * resolve_attachments;46324633struct anv_subpass_attachment * depth_stencil_attachment;4634struct anv_subpass_attachment * ds_resolve_attachment;4635VkResolveModeFlagBitsKHR depth_resolve_mode;4636VkResolveModeFlagBitsKHR stencil_resolve_mode;46374638uint32_t view_mask;46394640/** Subpass has a depth/stencil self-dependency */4641bool has_ds_self_dep;46424643/** Subpass has at least one color resolve attachment */4644bool has_color_resolve;4645};46464647static inline unsigned4648anv_subpass_view_count(const struct anv_subpass *subpass)4649{4650return MAX2(1, util_bitcount(subpass->view_mask));4651}46524653struct anv_render_pass_attachment {4654/* TODO: Consider using VkAttachmentDescription instead of storing each of4655* its members individually.4656*/4657VkFormat format;4658uint32_t samples;4659VkImageUsageFlags usage;4660VkAttachmentLoadOp load_op;4661VkAttachmentStoreOp store_op;4662VkAttachmentLoadOp stencil_load_op;4663VkImageLayout initial_layout;4664VkImageLayout final_layout;4665VkImageLayout first_subpass_layout;46664667VkImageLayout stencil_initial_layout;4668VkImageLayout stencil_final_layout;46694670/* The subpass id in which the attachment will be used last. */4671uint32_t last_subpass_idx;4672};46734674struct anv_render_pass {4675struct vk_object_base base;46764677uint32_t attachment_count;4678uint32_t subpass_count;4679/* An array of subpass_count+1 flushes, one per subpass boundary */4680enum anv_pipe_bits * subpass_flushes;4681struct anv_render_pass_attachment * attachments;4682struct anv_subpass subpasses[0];4683};46844685#define ANV_PIPELINE_STATISTICS_MASK 0x000007ff46864687struct anv_query_pool {4688struct vk_object_base base;46894690VkQueryType type;4691VkQueryPipelineStatisticFlags pipeline_statistics;4692/** Stride between slots, in bytes */4693uint32_t stride;4694/** Number of slots in this query pool */4695uint32_t slots;4696struct anv_bo * bo;46974698/* KHR perf queries : */4699uint32_t pass_size;4700uint32_t data_offset;4701uint32_t snapshot_size;4702uint32_t n_counters;4703struct intel_perf_counter_pass *counter_pass;4704uint32_t n_passes;4705struct intel_perf_query_info **pass_query;4706};47074708static inline uint32_t khr_perf_query_preamble_offset(const struct anv_query_pool *pool,4709uint32_t pass)4710{4711return pool->pass_size * pass + 8;4712}47134714struct anv_acceleration_structure {4715struct vk_object_base base;47164717VkDeviceSize size;4718struct anv_address address;4719};47204721int anv_get_instance_entrypoint_index(const char *name);4722int anv_get_device_entrypoint_index(const char *name);4723int anv_get_physical_device_entrypoint_index(const char *name);47244725const char *anv_get_instance_entry_name(int index);4726const char *anv_get_physical_device_entry_name(int index);4727const char *anv_get_device_entry_name(int index);47284729bool4730anv_instance_entrypoint_is_enabled(int index, uint32_t core_version,4731const struct vk_instance_extension_table *instance);4732bool4733anv_physical_device_entrypoint_is_enabled(int index, uint32_t core_version,4734const struct vk_instance_extension_table *instance);4735bool4736anv_device_entrypoint_is_enabled(int index, uint32_t core_version,4737const struct vk_instance_extension_table *instance,4738const struct vk_device_extension_table *device);47394740const struct vk_device_dispatch_table *4741anv_get_device_dispatch_table(const struct intel_device_info *devinfo);47424743void4744anv_dump_pipe_bits(enum anv_pipe_bits bits);47454746static inline void4747anv_add_pending_pipe_bits(struct anv_cmd_buffer* cmd_buffer,4748enum anv_pipe_bits bits,4749const char* reason)4750{4751cmd_buffer->state.pending_pipe_bits |= bits;4752if (unlikely(INTEL_DEBUG & DEBUG_PIPE_CONTROL) && bits)4753{4754fputs("pc: add ", stderr);4755anv_dump_pipe_bits(bits);4756fprintf(stderr, "reason: %s\n", reason);4757}4758}47594760static inline uint32_t4761anv_get_subpass_id(const struct anv_cmd_state * const cmd_state)4762{4763/* This function must be called from within a subpass. */4764assert(cmd_state->pass && cmd_state->subpass);47654766const uint32_t subpass_id = cmd_state->subpass - cmd_state->pass->subpasses;47674768/* The id of this subpass shouldn't exceed the number of subpasses in this4769* render pass minus 1.4770*/4771assert(subpass_id < cmd_state->pass->subpass_count);4772return subpass_id;4773}47744775struct anv_performance_configuration_intel {4776struct vk_object_base base;47774778struct intel_perf_registers *register_config;47794780uint64_t config_id;4781};47824783void anv_physical_device_init_perf(struct anv_physical_device *device, int fd);4784void anv_device_perf_init(struct anv_device *device);4785void anv_perf_write_pass_results(struct intel_perf_config *perf,4786struct anv_query_pool *pool, uint32_t pass,4787const struct intel_perf_query_result *accumulated_results,4788union VkPerformanceCounterResultKHR *results);47894790#define ANV_FROM_HANDLE(__anv_type, __name, __handle) \4791VK_FROM_HANDLE(__anv_type, __name, __handle)47924793VK_DEFINE_HANDLE_CASTS(anv_cmd_buffer, base, VkCommandBuffer,4794VK_OBJECT_TYPE_COMMAND_BUFFER)4795VK_DEFINE_HANDLE_CASTS(anv_device, vk.base, VkDevice, VK_OBJECT_TYPE_DEVICE)4796VK_DEFINE_HANDLE_CASTS(anv_instance, vk.base, VkInstance, VK_OBJECT_TYPE_INSTANCE)4797VK_DEFINE_HANDLE_CASTS(anv_physical_device, vk.base, VkPhysicalDevice,4798VK_OBJECT_TYPE_PHYSICAL_DEVICE)4799VK_DEFINE_HANDLE_CASTS(anv_queue, base, VkQueue, VK_OBJECT_TYPE_QUEUE)48004801VK_DEFINE_NONDISP_HANDLE_CASTS(anv_acceleration_structure, base,4802VkAccelerationStructureKHR,4803VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR)4804VK_DEFINE_NONDISP_HANDLE_CASTS(anv_cmd_pool, base, VkCommandPool,4805VK_OBJECT_TYPE_COMMAND_POOL)4806VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer, base, VkBuffer,4807VK_OBJECT_TYPE_BUFFER)4808VK_DEFINE_NONDISP_HANDLE_CASTS(anv_buffer_view, base, VkBufferView,4809VK_OBJECT_TYPE_BUFFER_VIEW)4810VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_pool, base, VkDescriptorPool,4811VK_OBJECT_TYPE_DESCRIPTOR_POOL)4812VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set, base, VkDescriptorSet,4813VK_OBJECT_TYPE_DESCRIPTOR_SET)4814VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_set_layout, base,4815VkDescriptorSetLayout,4816VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT)4817VK_DEFINE_NONDISP_HANDLE_CASTS(anv_descriptor_update_template, base,4818VkDescriptorUpdateTemplate,4819VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE)4820VK_DEFINE_NONDISP_HANDLE_CASTS(anv_device_memory, base, VkDeviceMemory,4821VK_OBJECT_TYPE_DEVICE_MEMORY)4822VK_DEFINE_NONDISP_HANDLE_CASTS(anv_fence, base, VkFence, VK_OBJECT_TYPE_FENCE)4823VK_DEFINE_NONDISP_HANDLE_CASTS(anv_event, base, VkEvent, VK_OBJECT_TYPE_EVENT)4824VK_DEFINE_NONDISP_HANDLE_CASTS(anv_framebuffer, base, VkFramebuffer,4825VK_OBJECT_TYPE_FRAMEBUFFER)4826VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image, base, VkImage, VK_OBJECT_TYPE_IMAGE)4827VK_DEFINE_NONDISP_HANDLE_CASTS(anv_image_view, base, VkImageView,4828VK_OBJECT_TYPE_IMAGE_VIEW);4829VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_cache, base, VkPipelineCache,4830VK_OBJECT_TYPE_PIPELINE_CACHE)4831VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline, base, VkPipeline,4832VK_OBJECT_TYPE_PIPELINE)4833VK_DEFINE_NONDISP_HANDLE_CASTS(anv_pipeline_layout, base, VkPipelineLayout,4834VK_OBJECT_TYPE_PIPELINE_LAYOUT)4835VK_DEFINE_NONDISP_HANDLE_CASTS(anv_query_pool, base, VkQueryPool,4836VK_OBJECT_TYPE_QUERY_POOL)4837VK_DEFINE_NONDISP_HANDLE_CASTS(anv_render_pass, base, VkRenderPass,4838VK_OBJECT_TYPE_RENDER_PASS)4839VK_DEFINE_NONDISP_HANDLE_CASTS(anv_sampler, base, VkSampler,4840VK_OBJECT_TYPE_SAMPLER)4841VK_DEFINE_NONDISP_HANDLE_CASTS(anv_semaphore, base, VkSemaphore,4842VK_OBJECT_TYPE_SEMAPHORE)4843VK_DEFINE_NONDISP_HANDLE_CASTS(anv_ycbcr_conversion, base,4844VkSamplerYcbcrConversion,4845VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION)4846VK_DEFINE_NONDISP_HANDLE_CASTS(anv_performance_configuration_intel, base,4847VkPerformanceConfigurationINTEL,4848VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL)48494850#define anv_genX(devinfo, thing) ({ \4851__typeof(&gfx9_##thing) genX_thing; \4852switch ((devinfo)->verx10) { \4853case 70: \4854genX_thing = &gfx7_##thing; \4855break; \4856case 75: \4857genX_thing = &gfx75_##thing; \4858break; \4859case 80: \4860genX_thing = &gfx8_##thing; \4861break; \4862case 90: \4863genX_thing = &gfx9_##thing; \4864break; \4865case 110: \4866genX_thing = &gfx11_##thing; \4867break; \4868case 120: \4869genX_thing = &gfx12_##thing; \4870break; \4871case 125: \4872genX_thing = &gfx125_##thing; \4873break; \4874default: \4875unreachable("Unknown hardware generation"); \4876} \4877genX_thing; \4878})48794880/* Gen-specific function declarations */4881#ifdef genX4882# include "anv_genX.h"4883#else4884# define genX(x) gfx7_##x4885# include "anv_genX.h"4886# undef genX4887# define genX(x) gfx75_##x4888# include "anv_genX.h"4889# undef genX4890# define genX(x) gfx8_##x4891# include "anv_genX.h"4892# undef genX4893# define genX(x) gfx9_##x4894# include "anv_genX.h"4895# undef genX4896# define genX(x) gfx11_##x4897# include "anv_genX.h"4898# undef genX4899# define genX(x) gfx12_##x4900# include "anv_genX.h"4901# undef genX4902# define genX(x) gfx125_##x4903# include "anv_genX.h"4904# undef genX4905#endif49064907#endif /* ANV_PRIVATE_H */490849094910