Path: blob/21.2-virgl/src/freedreno/vulkan/tu_private.h
4565 views
/*1* Copyright © 2016 Red Hat.2* Copyright © 2016 Bas Nieuwenhuizen3*4* based in part on anv driver which is:5* Copyright © 2015 Intel Corporation6*7* Permission is hereby granted, free of charge, to any person obtaining a8* copy of this software and associated documentation files (the "Software"),9* to deal in the Software without restriction, including without limitation10* the rights to use, copy, modify, merge, publish, distribute, sublicense,11* and/or sell copies of the Software, and to permit persons to whom the12* Software is furnished to do so, subject to the following conditions:13*14* The above copyright notice and this permission notice (including the next15* paragraph) shall be included in all copies or substantial portions of the16* Software.17*18* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR19* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,20* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL21* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER22* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING23* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER24* DEALINGS IN THE SOFTWARE.25*/2627#ifndef TU_PRIVATE_H28#define TU_PRIVATE_H2930#include <assert.h>31#include <pthread.h>32#include <stdbool.h>33#include <stdint.h>34#include <stdio.h>35#include <stdlib.h>36#include <string.h>37#ifdef HAVE_VALGRIND38#include <memcheck.h>39#include <valgrind.h>40#define VG(x) x41#else42#define VG(x) ((void)0)43#endif4445#define MESA_LOG_TAG "TU"4647#include "c11/threads.h"48#include "main/macros.h"49#include "util/bitscan.h"50#include "util/list.h"51#include "util/log.h"52#include "util/macros.h"53#include "util/u_atomic.h"54#include "util/u_dynarray.h"55#include "vk_alloc.h"56#include "vk_debug_report.h"57#include "vk_device.h"58#include "vk_dispatch_table.h"59#include "vk_extensions.h"60#include "vk_instance.h"61#include "vk_physical_device.h"62#include "vk_shader_module.h"63#include "wsi_common.h"6465#include "ir3/ir3_compiler.h"66#include "ir3/ir3_shader.h"6768#include "adreno_common.xml.h"69#include "adreno_pm4.xml.h"70#include "a6xx.xml.h"71#include "fdl/freedreno_layout.h"72#include "common/freedreno_dev_info.h"73#include "perfcntrs/freedreno_perfcntr.h"7475#include "tu_descriptor_set.h"76#include "tu_util.h"7778/* Pre-declarations needed for WSI entrypoints */79struct wl_surface;80struct wl_display;81typedef struct xcb_connection_t xcb_connection_t;82typedef uint32_t xcb_visualid_t;83typedef uint32_t xcb_window_t;8485#include <vulkan/vk_android_native_buffer.h>86#include <vulkan/vk_icd.h>87#include <vulkan/vulkan.h>8889#include "tu_entrypoints.h"9091#include "vk_format.h"9293#define MAX_VBS 3294#define MAX_VERTEX_ATTRIBS 3295#define MAX_RTS 896#define MAX_VSC_PIPES 3297#define MAX_VIEWPORTS 1698#define MAX_SCISSORS 1699#define MAX_DISCARD_RECTANGLES 4100#define MAX_PUSH_CONSTANTS_SIZE 128101#define MAX_PUSH_DESCRIPTORS 32102#define MAX_DYNAMIC_UNIFORM_BUFFERS 16103#define MAX_DYNAMIC_STORAGE_BUFFERS 8104#define MAX_DYNAMIC_BUFFERS \105(MAX_DYNAMIC_UNIFORM_BUFFERS + MAX_DYNAMIC_STORAGE_BUFFERS)106#define TU_MAX_DRM_DEVICES 8107#define MAX_VIEWS 16108#define MAX_BIND_POINTS 2 /* compute + graphics */109/* The Qualcomm driver exposes 0x20000058 */110#define MAX_STORAGE_BUFFER_RANGE 0x20000000111/* We use ldc for uniform buffer loads, just like the Qualcomm driver, so112* expose the same maximum range.113* TODO: The SIZE bitfield is 15 bits, and in 4-dword units, so the actual114* range might be higher.115*/116#define MAX_UNIFORM_BUFFER_RANGE 0x10000117118#define A6XX_TEX_CONST_DWORDS 16119#define A6XX_TEX_SAMP_DWORDS 4120121#define COND(bool, val) ((bool) ? (val) : 0)122#define BIT(bit) (1u << (bit))123124/* Whenever we generate an error, pass it through this function. Useful for125* debugging, where we can break on it. Only call at error site, not when126* propagating errors. Might be useful to plug in a stack trace here.127*/128129struct tu_instance;130131VkResult132__vk_errorf(struct tu_instance *instance,133VkResult error,134bool force_print,135const char *file,136int line,137const char *format,138...) PRINTFLIKE(6, 7);139140#define vk_error(instance, error) \141__vk_errorf(instance, error, false, __FILE__, __LINE__, NULL);142#define vk_errorf(instance, error, format, ...) \143__vk_errorf(instance, error, false, __FILE__, __LINE__, format, ##__VA_ARGS__);144145/* Prints startup errors if TU_DEBUG=startup is set or on a debug driver146* build.147*/148#define vk_startup_errorf(instance, error, format, ...) \149__vk_errorf(instance, error, instance->debug_flags & TU_DEBUG_STARTUP, \150__FILE__, __LINE__, format, ##__VA_ARGS__)151152void153__tu_finishme(const char *file, int line, const char *format, ...)154PRINTFLIKE(3, 4);155156/**157* Print a FINISHME message, including its source location.158*/159#define tu_finishme(format, ...) \160do { \161static bool reported = false; \162if (!reported) { \163__tu_finishme(__FILE__, __LINE__, format, ##__VA_ARGS__); \164reported = true; \165} \166} while (0)167168#define tu_stub() \169do { \170tu_finishme("stub %s", __func__); \171} while (0)172173struct tu_memory_heap {174/* Standard bits passed on to the client */175VkDeviceSize size;176VkMemoryHeapFlags flags;177178/** Copied from ANV:179*180* Driver-internal book-keeping.181*182* Align it to 64 bits to make atomic operations faster on 32 bit platforms.183*/184VkDeviceSize used __attribute__ ((aligned (8)));185};186187uint64_t188tu_get_system_heap_size(void);189190struct tu_physical_device191{192struct vk_physical_device vk;193194struct tu_instance *instance;195196const char *name;197uint8_t driver_uuid[VK_UUID_SIZE];198uint8_t device_uuid[VK_UUID_SIZE];199uint8_t cache_uuid[VK_UUID_SIZE];200201struct wsi_device wsi_device;202203int local_fd;204int master_fd;205206unsigned gpu_id;207uint32_t gmem_size;208uint64_t gmem_base;209uint32_t ccu_offset_gmem;210uint32_t ccu_offset_bypass;211212const struct fd_dev_info *info;213214int msm_major_version;215int msm_minor_version;216217/* This is the drivers on-disk cache used as a fallback as opposed to218* the pipeline cache defined by apps.219*/220struct disk_cache *disk_cache;221222struct tu_memory_heap heap;223};224225enum tu_debug_flags226{227TU_DEBUG_STARTUP = 1 << 0,228TU_DEBUG_NIR = 1 << 1,229TU_DEBUG_NOBIN = 1 << 3,230TU_DEBUG_SYSMEM = 1 << 4,231TU_DEBUG_FORCEBIN = 1 << 5,232TU_DEBUG_NOUBWC = 1 << 6,233TU_DEBUG_NOMULTIPOS = 1 << 7,234TU_DEBUG_NOLRZ = 1 << 8,235TU_DEBUG_PERFC = 1 << 9,236};237238struct tu_instance239{240struct vk_instance vk;241242uint32_t api_version;243int physical_device_count;244struct tu_physical_device physical_devices[TU_MAX_DRM_DEVICES];245246enum tu_debug_flags debug_flags;247};248249VkResult250tu_wsi_init(struct tu_physical_device *physical_device);251void252tu_wsi_finish(struct tu_physical_device *physical_device);253254bool255tu_instance_extension_supported(const char *name);256uint32_t257tu_physical_device_api_version(struct tu_physical_device *dev);258bool259tu_physical_device_extension_supported(struct tu_physical_device *dev,260const char *name);261262struct cache_entry;263264struct tu_pipeline_cache265{266struct vk_object_base base;267268struct tu_device *device;269pthread_mutex_t mutex;270271uint32_t total_size;272uint32_t table_size;273uint32_t kernel_count;274struct cache_entry **hash_table;275bool modified;276277VkAllocationCallbacks alloc;278};279280struct tu_pipeline_key281{282};283284285/* queue types */286#define TU_QUEUE_GENERAL 0287288#define TU_MAX_QUEUE_FAMILIES 1289290struct tu_syncobj;291292struct tu_queue293{294struct vk_object_base base;295296struct tu_device *device;297uint32_t queue_family_index;298int queue_idx;299VkDeviceQueueCreateFlags flags;300301uint32_t msm_queue_id;302int fence;303304/* Queue containing deferred submits */305struct list_head queued_submits;306};307308struct tu_bo309{310uint32_t gem_handle;311uint64_t size;312uint64_t iova;313void *map;314};315316enum global_shader {317GLOBAL_SH_VS,318GLOBAL_SH_FS_BLIT,319GLOBAL_SH_FS_BLIT_ZSCALE,320GLOBAL_SH_FS_CLEAR0,321GLOBAL_SH_FS_CLEAR_MAX = GLOBAL_SH_FS_CLEAR0 + MAX_RTS,322GLOBAL_SH_COUNT,323};324325#define TU_BORDER_COLOR_COUNT 4096326#define TU_BORDER_COLOR_BUILTIN 6327328/* This struct defines the layout of the global_bo */329struct tu6_global330{331/* clear/blit shaders, all <= 16 instrs (16 instr = 1 instrlen unit) */332instr_t shaders[GLOBAL_SH_COUNT][16];333334uint32_t seqno_dummy; /* dummy seqno for CP_EVENT_WRITE */335uint32_t _pad0;336volatile uint32_t vsc_draw_overflow;337uint32_t _pad1;338volatile uint32_t vsc_prim_overflow;339uint32_t _pad2;340uint64_t predicate;341342/* scratch space for VPC_SO[i].FLUSH_BASE_LO/HI, start on 32 byte boundary. */343struct {344uint32_t offset;345uint32_t pad[7];346} flush_base[4];347348ALIGN16 uint32_t cs_indirect_xyz[3];349350/* note: larger global bo will be used for customBorderColors */351struct bcolor_entry bcolor_builtin[TU_BORDER_COLOR_BUILTIN], bcolor[];352};353#define gb_offset(member) offsetof(struct tu6_global, member)354#define global_iova(cmd, member) ((cmd)->device->global_bo.iova + gb_offset(member))355356void tu_init_clear_blit_shaders(struct tu6_global *global);357358/* extra space in vsc draw/prim streams */359#define VSC_PAD 0x40360361struct tu_device362{363struct vk_device vk;364struct tu_instance *instance;365366struct tu_queue *queues[TU_MAX_QUEUE_FAMILIES];367int queue_count[TU_MAX_QUEUE_FAMILIES];368369struct tu_physical_device *physical_device;370int fd;371int _lost;372373struct ir3_compiler *compiler;374375/* Backup in-memory cache to be used if the app doesn't provide one */376struct tu_pipeline_cache *mem_cache;377378#define MIN_SCRATCH_BO_SIZE_LOG2 12 /* A page */379380/* Currently the kernel driver uses a 32-bit GPU address space, but it381* should be impossible to go beyond 48 bits.382*/383struct {384struct tu_bo bo;385mtx_t construct_mtx;386bool initialized;387} scratch_bos[48 - MIN_SCRATCH_BO_SIZE_LOG2];388389struct tu_bo global_bo;390391uint32_t vsc_draw_strm_pitch;392uint32_t vsc_prim_strm_pitch;393BITSET_DECLARE(custom_border_color, TU_BORDER_COLOR_COUNT);394mtx_t mutex;395396/* bo list for submits: */397struct drm_msm_gem_submit_bo *bo_list;398/* map bo handles to bo list index: */399uint32_t *bo_idx;400uint32_t bo_count, bo_list_size, bo_idx_size;401mtx_t bo_mutex;402403/* Command streams to set pass index to a scratch reg */404struct tu_cs *perfcntrs_pass_cs;405struct tu_cs_entry *perfcntrs_pass_cs_entries;406407/* Condition variable for timeline semaphore to notify waiters when a408* new submit is executed. */409pthread_cond_t timeline_cond;410pthread_mutex_t submit_mutex;411412#ifdef ANDROID413const void *gralloc;414enum {415TU_GRALLOC_UNKNOWN,416TU_GRALLOC_CROS,417TU_GRALLOC_OTHER,418} gralloc_type;419#endif420};421422VkResult _tu_device_set_lost(struct tu_device *device,423const char *msg, ...) PRINTFLIKE(2, 3);424#define tu_device_set_lost(dev, ...) \425_tu_device_set_lost(dev, __VA_ARGS__)426427static inline bool428tu_device_is_lost(struct tu_device *device)429{430return unlikely(p_atomic_read(&device->_lost));431}432433VkResult434tu_device_submit_deferred_locked(struct tu_device *dev);435436enum tu_bo_alloc_flags437{438TU_BO_ALLOC_NO_FLAGS = 0,439TU_BO_ALLOC_ALLOW_DUMP = 1 << 0,440TU_BO_ALLOC_GPU_READ_ONLY = 1 << 1,441};442443VkResult444tu_bo_init_new(struct tu_device *dev, struct tu_bo *bo, uint64_t size,445enum tu_bo_alloc_flags flags);446VkResult447tu_bo_init_dmabuf(struct tu_device *dev,448struct tu_bo *bo,449uint64_t size,450int fd);451int452tu_bo_export_dmabuf(struct tu_device *dev, struct tu_bo *bo);453void454tu_bo_finish(struct tu_device *dev, struct tu_bo *bo);455VkResult456tu_bo_map(struct tu_device *dev, struct tu_bo *bo);457458/* Get a scratch bo for use inside a command buffer. This will always return459* the same bo given the same size or similar sizes, so only one scratch bo460* can be used at the same time. It's meant for short-lived things where we461* need to write to some piece of memory, read from it, and then immediately462* discard it.463*/464VkResult465tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo);466467struct tu_cs_entry468{469/* No ownership */470const struct tu_bo *bo;471472uint32_t size;473uint32_t offset;474};475476struct tu_cs_memory {477uint32_t *map;478uint64_t iova;479};480481struct tu_draw_state {482uint64_t iova : 48;483uint32_t size : 16;484};485486enum tu_dynamic_state487{488/* re-use VK_DYNAMIC_STATE_ enums for non-extended dynamic states */489TU_DYNAMIC_STATE_SAMPLE_LOCATIONS = VK_DYNAMIC_STATE_STENCIL_REFERENCE + 1,490TU_DYNAMIC_STATE_RB_DEPTH_CNTL,491TU_DYNAMIC_STATE_RB_STENCIL_CNTL,492TU_DYNAMIC_STATE_VB_STRIDE,493TU_DYNAMIC_STATE_COUNT,494/* no associated draw state: */495TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY = TU_DYNAMIC_STATE_COUNT,496/* re-use the line width enum as it uses GRAS_SU_CNTL: */497TU_DYNAMIC_STATE_GRAS_SU_CNTL = VK_DYNAMIC_STATE_LINE_WIDTH,498};499500enum tu_draw_state_group_id501{502TU_DRAW_STATE_PROGRAM_CONFIG,503TU_DRAW_STATE_PROGRAM,504TU_DRAW_STATE_PROGRAM_BINNING,505TU_DRAW_STATE_TESS,506TU_DRAW_STATE_VB,507TU_DRAW_STATE_VI,508TU_DRAW_STATE_VI_BINNING,509TU_DRAW_STATE_RAST,510TU_DRAW_STATE_BLEND,511TU_DRAW_STATE_SHADER_GEOM_CONST,512TU_DRAW_STATE_FS_CONST,513TU_DRAW_STATE_DESC_SETS,514TU_DRAW_STATE_DESC_SETS_LOAD,515TU_DRAW_STATE_VS_PARAMS,516TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM,517TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM,518TU_DRAW_STATE_LRZ,519TU_DRAW_STATE_DEPTH_PLANE,520521/* dynamic state related draw states */522TU_DRAW_STATE_DYNAMIC,523TU_DRAW_STATE_COUNT = TU_DRAW_STATE_DYNAMIC + TU_DYNAMIC_STATE_COUNT,524};525526enum tu_cs_mode527{528529/*530* A command stream in TU_CS_MODE_GROW mode grows automatically whenever it531* is full. tu_cs_begin must be called before command packet emission and532* tu_cs_end must be called after.533*534* This mode may create multiple entries internally. The entries must be535* submitted together.536*/537TU_CS_MODE_GROW,538539/*540* A command stream in TU_CS_MODE_EXTERNAL mode wraps an external,541* fixed-size buffer. tu_cs_begin and tu_cs_end are optional and have no542* effect on it.543*544* This mode does not create any entry or any BO.545*/546TU_CS_MODE_EXTERNAL,547548/*549* A command stream in TU_CS_MODE_SUB_STREAM mode does not support direct550* command packet emission. tu_cs_begin_sub_stream must be called to get a551* sub-stream to emit comamnd packets to. When done with the sub-stream,552* tu_cs_end_sub_stream must be called.553*554* This mode does not create any entry internally.555*/556TU_CS_MODE_SUB_STREAM,557};558559struct tu_cs560{561uint32_t *start;562uint32_t *cur;563uint32_t *reserved_end;564uint32_t *end;565566struct tu_device *device;567enum tu_cs_mode mode;568uint32_t next_bo_size;569570struct tu_cs_entry *entries;571uint32_t entry_count;572uint32_t entry_capacity;573574struct tu_bo **bos;575uint32_t bo_count;576uint32_t bo_capacity;577578/* state for cond_exec_start/cond_exec_end */579uint32_t cond_flags;580uint32_t *cond_dwords;581};582583struct tu_device_memory584{585struct vk_object_base base;586587struct tu_bo bo;588};589590struct tu_descriptor_range591{592uint64_t va;593uint32_t size;594};595596struct tu_descriptor_set597{598struct vk_object_base base;599600const struct tu_descriptor_set_layout *layout;601struct tu_descriptor_pool *pool;602uint32_t size;603604uint64_t va;605uint32_t *mapped_ptr;606607uint32_t *dynamic_descriptors;608};609610struct tu_descriptor_pool_entry611{612uint32_t offset;613uint32_t size;614struct tu_descriptor_set *set;615};616617struct tu_descriptor_pool618{619struct vk_object_base base;620621struct tu_bo bo;622uint64_t current_offset;623uint64_t size;624625uint8_t *host_memory_base;626uint8_t *host_memory_ptr;627uint8_t *host_memory_end;628629uint32_t entry_count;630uint32_t max_entry_count;631struct tu_descriptor_pool_entry entries[0];632};633634struct tu_descriptor_update_template_entry635{636VkDescriptorType descriptor_type;637638/* The number of descriptors to update */639uint32_t descriptor_count;640641/* Into mapped_ptr or dynamic_descriptors, in units of the respective array642*/643uint32_t dst_offset;644645/* In dwords. Not valid/used for dynamic descriptors */646uint32_t dst_stride;647648uint32_t buffer_offset;649650/* Only valid for combined image samplers and samplers */651uint16_t has_sampler;652653/* In bytes */654size_t src_offset;655size_t src_stride;656657/* For push descriptors */658const struct tu_sampler *immutable_samplers;659};660661struct tu_descriptor_update_template662{663struct vk_object_base base;664665uint32_t entry_count;666VkPipelineBindPoint bind_point;667struct tu_descriptor_update_template_entry entry[0];668};669670struct tu_buffer671{672struct vk_object_base base;673674VkDeviceSize size;675676VkBufferUsageFlags usage;677VkBufferCreateFlags flags;678679struct tu_bo *bo;680VkDeviceSize bo_offset;681};682683static inline uint64_t684tu_buffer_iova(struct tu_buffer *buffer)685{686return buffer->bo->iova + buffer->bo_offset;687}688689const char *690tu_get_debug_option_name(int id);691692const char *693tu_get_perftest_option_name(int id);694695struct tu_descriptor_state696{697struct tu_descriptor_set *sets[MAX_SETS];698struct tu_descriptor_set push_set;699uint32_t dynamic_descriptors[MAX_DYNAMIC_BUFFERS * A6XX_TEX_CONST_DWORDS];700};701702enum tu_cmd_dirty_bits703{704TU_CMD_DIRTY_VERTEX_BUFFERS = BIT(0),705TU_CMD_DIRTY_VB_STRIDE = BIT(1),706TU_CMD_DIRTY_GRAS_SU_CNTL = BIT(2),707TU_CMD_DIRTY_RB_DEPTH_CNTL = BIT(3),708TU_CMD_DIRTY_RB_STENCIL_CNTL = BIT(4),709TU_CMD_DIRTY_DESC_SETS_LOAD = BIT(5),710TU_CMD_DIRTY_COMPUTE_DESC_SETS_LOAD = BIT(6),711TU_CMD_DIRTY_SHADER_CONSTS = BIT(7),712TU_CMD_DIRTY_LRZ = BIT(8),713TU_CMD_DIRTY_VS_PARAMS = BIT(9),714/* all draw states were disabled and need to be re-enabled: */715TU_CMD_DIRTY_DRAW_STATE = BIT(10)716};717718/* There are only three cache domains we have to care about: the CCU, or719* color cache unit, which is used for color and depth/stencil attachments720* and copy/blit destinations, and is split conceptually into color and depth,721* and the universal cache or UCHE which is used for pretty much everything722* else, except for the CP (uncached) and host. We need to flush whenever data723* crosses these boundaries.724*/725726enum tu_cmd_access_mask {727TU_ACCESS_UCHE_READ = 1 << 0,728TU_ACCESS_UCHE_WRITE = 1 << 1,729TU_ACCESS_CCU_COLOR_READ = 1 << 2,730TU_ACCESS_CCU_COLOR_WRITE = 1 << 3,731TU_ACCESS_CCU_DEPTH_READ = 1 << 4,732TU_ACCESS_CCU_DEPTH_WRITE = 1 << 5,733734/* Experiments have shown that while it's safe to avoid flushing the CCU735* after each blit/renderpass, it's not safe to assume that subsequent736* lookups with a different attachment state will hit unflushed cache737* entries. That is, the CCU needs to be flushed and possibly invalidated738* when accessing memory with a different attachment state. Writing to an739* attachment under the following conditions after clearing using the740* normal 2d engine path is known to have issues:741*742* - It isn't the 0'th layer.743* - There are more than one attachment, and this isn't the 0'th attachment744* (this seems to also depend on the cpp of the attachments).745*746* Our best guess is that the layer/MRT state is used when computing747* the location of a cache entry in CCU, to avoid conflicts. We assume that748* any access in a renderpass after or before an access by a transfer needs749* a flush/invalidate, and use the _INCOHERENT variants to represent access750* by a transfer.751*/752TU_ACCESS_CCU_COLOR_INCOHERENT_READ = 1 << 6,753TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE = 1 << 7,754TU_ACCESS_CCU_DEPTH_INCOHERENT_READ = 1 << 8,755TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE = 1 << 9,756757/* Accesses by the host */758TU_ACCESS_HOST_READ = 1 << 10,759TU_ACCESS_HOST_WRITE = 1 << 11,760761/* Accesses by a GPU engine which bypasses any cache. e.g. writes via762* CP_EVENT_WRITE::BLIT and the CP are SYSMEM_WRITE.763*/764TU_ACCESS_SYSMEM_READ = 1 << 12,765TU_ACCESS_SYSMEM_WRITE = 1 << 13,766767/* Set if a WFI is required. This can be required for:768* - 2D engine which (on some models) doesn't wait for flushes to complete769* before starting770* - CP draw indirect opcodes, where we need to wait for any flushes to771* complete but the CP implicitly waits for WFI's to complete and772* therefore we only need a WFI after the flushes.773*/774TU_ACCESS_WFI_READ = 1 << 14,775776/* Set if a CP_WAIT_FOR_ME is required due to the data being read by the CP777* without it waiting for any WFI.778*/779TU_ACCESS_WFM_READ = 1 << 15,780781/* Memory writes from the CP start in-order with draws and event writes,782* but execute asynchronously and hence need a CP_WAIT_MEM_WRITES if read.783*/784TU_ACCESS_CP_WRITE = 1 << 16,785786TU_ACCESS_READ =787TU_ACCESS_UCHE_READ |788TU_ACCESS_CCU_COLOR_READ |789TU_ACCESS_CCU_DEPTH_READ |790TU_ACCESS_CCU_COLOR_INCOHERENT_READ |791TU_ACCESS_CCU_DEPTH_INCOHERENT_READ |792TU_ACCESS_HOST_READ |793TU_ACCESS_SYSMEM_READ |794TU_ACCESS_WFI_READ |795TU_ACCESS_WFM_READ,796797TU_ACCESS_WRITE =798TU_ACCESS_UCHE_WRITE |799TU_ACCESS_CCU_COLOR_WRITE |800TU_ACCESS_CCU_COLOR_INCOHERENT_WRITE |801TU_ACCESS_CCU_DEPTH_WRITE |802TU_ACCESS_CCU_DEPTH_INCOHERENT_WRITE |803TU_ACCESS_HOST_WRITE |804TU_ACCESS_SYSMEM_WRITE |805TU_ACCESS_CP_WRITE,806807TU_ACCESS_ALL =808TU_ACCESS_READ |809TU_ACCESS_WRITE,810};811812enum tu_cmd_flush_bits {813TU_CMD_FLAG_CCU_FLUSH_DEPTH = 1 << 0,814TU_CMD_FLAG_CCU_FLUSH_COLOR = 1 << 1,815TU_CMD_FLAG_CCU_INVALIDATE_DEPTH = 1 << 2,816TU_CMD_FLAG_CCU_INVALIDATE_COLOR = 1 << 3,817TU_CMD_FLAG_CACHE_FLUSH = 1 << 4,818TU_CMD_FLAG_CACHE_INVALIDATE = 1 << 5,819TU_CMD_FLAG_WAIT_MEM_WRITES = 1 << 6,820TU_CMD_FLAG_WAIT_FOR_IDLE = 1 << 7,821TU_CMD_FLAG_WAIT_FOR_ME = 1 << 8,822823TU_CMD_FLAG_ALL_FLUSH =824TU_CMD_FLAG_CCU_FLUSH_DEPTH |825TU_CMD_FLAG_CCU_FLUSH_COLOR |826TU_CMD_FLAG_CACHE_FLUSH |827/* Treat the CP as a sort of "cache" which may need to be "flushed" via828* waiting for writes to land with WAIT_FOR_MEM_WRITES.829*/830TU_CMD_FLAG_WAIT_MEM_WRITES,831832TU_CMD_FLAG_GPU_INVALIDATE =833TU_CMD_FLAG_CCU_INVALIDATE_DEPTH |834TU_CMD_FLAG_CCU_INVALIDATE_COLOR |835TU_CMD_FLAG_CACHE_INVALIDATE,836837TU_CMD_FLAG_ALL_INVALIDATE =838TU_CMD_FLAG_GPU_INVALIDATE |839/* Treat the CP as a sort of "cache" which may need to be "invalidated"840* via waiting for UCHE/CCU flushes to land with WFI/WFM.841*/842TU_CMD_FLAG_WAIT_FOR_IDLE |843TU_CMD_FLAG_WAIT_FOR_ME,844};845846/* Changing the CCU from sysmem mode to gmem mode or vice-versa is pretty847* heavy, involving a CCU cache flush/invalidate and a WFI in order to change848* which part of the gmem is used by the CCU. Here we keep track of what the849* state of the CCU.850*/851enum tu_cmd_ccu_state {852TU_CMD_CCU_SYSMEM,853TU_CMD_CCU_GMEM,854TU_CMD_CCU_UNKNOWN,855};856857struct tu_cache_state {858/* Caches which must be made available (flushed) eventually if there are859* any users outside that cache domain, and caches which must be860* invalidated eventually if there are any reads.861*/862enum tu_cmd_flush_bits pending_flush_bits;863/* Pending flushes */864enum tu_cmd_flush_bits flush_bits;865};866867enum tu_lrz_force_disable_mask {868TU_LRZ_FORCE_DISABLE_LRZ = 1 << 0,869TU_LRZ_FORCE_DISABLE_WRITE = 1 << 1,870};871872enum tu_lrz_direction {873TU_LRZ_UNKNOWN,874/* Depth func less/less-than: */875TU_LRZ_LESS,876/* Depth func greater/greater-than: */877TU_LRZ_GREATER,878};879880struct tu_lrz_pipeline881{882uint32_t force_disable_mask;883bool fs_has_kill;884bool force_late_z;885bool early_fragment_tests;886};887888struct tu_lrz_state889{890/* Depth/Stencil image currently on use to do LRZ */891struct tu_image *image;892bool valid : 1;893struct tu_draw_state state;894enum tu_lrz_direction prev_direction;895};896897struct tu_vs_params {898uint32_t params_offset;899uint32_t vertex_offset;900uint32_t first_instance;901};902903struct tu_cmd_state904{905uint32_t dirty;906907struct tu_pipeline *pipeline;908struct tu_pipeline *compute_pipeline;909910/* Vertex buffers, viewports, and scissors911* the states for these can be updated partially, so we need to save these912* to be able to emit a complete draw state913*/914struct {915uint64_t base;916uint32_t size;917uint32_t stride;918} vb[MAX_VBS];919VkViewport viewport[MAX_VIEWPORTS];920VkRect2D scissor[MAX_SCISSORS];921uint32_t max_viewport, max_scissor;922923/* for dynamic states that can't be emitted directly */924uint32_t dynamic_stencil_mask;925uint32_t dynamic_stencil_wrmask;926uint32_t dynamic_stencil_ref;927928uint32_t gras_su_cntl, rb_depth_cntl, rb_stencil_cntl;929enum pc_di_primtype primtype;930931/* saved states to re-emit in TU_CMD_DIRTY_DRAW_STATE case */932struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];933struct tu_draw_state vertex_buffers;934struct tu_draw_state shader_const[2];935struct tu_draw_state desc_sets;936937struct tu_draw_state vs_params;938939/* Index buffer */940uint64_t index_va;941uint32_t max_index_count;942uint8_t index_size;943944/* because streamout base has to be 32-byte aligned945* there is an extra offset to deal with when it is946* unaligned947*/948uint8_t streamout_offset[IR3_MAX_SO_BUFFERS];949950/* Renderpasses are tricky, because we may need to flush differently if951* using sysmem vs. gmem and therefore we have to delay any flushing that952* happens before a renderpass. So we have to have two copies of the flush953* state, one for intra-renderpass flushes (i.e. renderpass dependencies)954* and one for outside a renderpass.955*/956struct tu_cache_state cache;957struct tu_cache_state renderpass_cache;958959enum tu_cmd_ccu_state ccu_state;960961const struct tu_render_pass *pass;962const struct tu_subpass *subpass;963const struct tu_framebuffer *framebuffer;964VkRect2D render_area;965966struct tu_cs_entry tile_store_ib;967968bool xfb_used;969bool has_tess;970bool has_subpass_predication;971bool predication_active;972973struct tu_lrz_state lrz;974975struct tu_draw_state depth_plane_state;976977struct tu_vs_params last_vs_params;978};979980struct tu_cmd_pool981{982struct vk_object_base base;983984VkAllocationCallbacks alloc;985struct list_head cmd_buffers;986struct list_head free_cmd_buffers;987uint32_t queue_family_index;988};989990enum tu_cmd_buffer_status991{992TU_CMD_BUFFER_STATUS_INVALID,993TU_CMD_BUFFER_STATUS_INITIAL,994TU_CMD_BUFFER_STATUS_RECORDING,995TU_CMD_BUFFER_STATUS_EXECUTABLE,996TU_CMD_BUFFER_STATUS_PENDING,997};998999struct tu_cmd_buffer1000{1001struct vk_object_base base;10021003struct tu_device *device;10041005struct tu_cmd_pool *pool;1006struct list_head pool_link;10071008VkCommandBufferUsageFlags usage_flags;1009VkCommandBufferLevel level;1010enum tu_cmd_buffer_status status;10111012struct tu_cmd_state state;1013uint32_t queue_family_index;10141015uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];1016VkShaderStageFlags push_constant_stages;1017struct tu_descriptor_set meta_push_descriptors;10181019struct tu_descriptor_state descriptors[MAX_BIND_POINTS];10201021VkResult record_result;10221023struct tu_cs cs;1024struct tu_cs draw_cs;1025struct tu_cs draw_epilogue_cs;1026struct tu_cs sub_cs;10271028uint32_t vsc_draw_strm_pitch;1029uint32_t vsc_prim_strm_pitch;1030};10311032/* Temporary struct for tracking a register state to be written, used by1033* a6xx-pack.h and tu_cs_emit_regs()1034*/1035struct tu_reg_value {1036uint32_t reg;1037uint64_t value;1038bool is_address;1039struct tu_bo *bo;1040bool bo_write;1041uint32_t bo_offset;1042uint32_t bo_shift;1043};104410451046void tu_emit_cache_flush_renderpass(struct tu_cmd_buffer *cmd_buffer,1047struct tu_cs *cs);10481049void tu_emit_cache_flush_ccu(struct tu_cmd_buffer *cmd_buffer,1050struct tu_cs *cs,1051enum tu_cmd_ccu_state ccu_state);10521053void1054tu6_emit_event_write(struct tu_cmd_buffer *cmd,1055struct tu_cs *cs,1056enum vgt_event_type event);10571058static inline struct tu_descriptor_state *1059tu_get_descriptors_state(struct tu_cmd_buffer *cmd_buffer,1060VkPipelineBindPoint bind_point)1061{1062return &cmd_buffer->descriptors[bind_point];1063}10641065struct tu_event1066{1067struct vk_object_base base;1068struct tu_bo bo;1069};10701071struct tu_push_constant_range1072{1073uint32_t lo;1074uint32_t count;1075};10761077struct tu_shader1078{1079struct ir3_shader *ir3_shader;10801081struct tu_push_constant_range push_consts;1082uint8_t active_desc_sets;1083bool multi_pos_output;1084};10851086bool1087tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, bool *multi_pos_output,1088struct tu_device *dev);10891090nir_shader *1091tu_spirv_to_nir(struct tu_device *dev,1092const VkPipelineShaderStageCreateInfo *stage_info,1093gl_shader_stage stage);10941095struct tu_shader *1096tu_shader_create(struct tu_device *dev,1097nir_shader *nir,1098unsigned multiview_mask,1099struct tu_pipeline_layout *layout,1100const VkAllocationCallbacks *alloc);11011102void1103tu_shader_destroy(struct tu_device *dev,1104struct tu_shader *shader,1105const VkAllocationCallbacks *alloc);11061107struct tu_program_descriptor_linkage1108{1109struct ir3_const_state const_state;11101111uint32_t constlen;11121113struct tu_push_constant_range push_consts;1114};11151116struct tu_pipeline_executable {1117gl_shader_stage stage;11181119struct ir3_info stats;1120bool is_binning;11211122char *nir_from_spirv;1123char *nir_final;1124char *disasm;1125};11261127struct tu_pipeline1128{1129struct vk_object_base base;11301131struct tu_cs cs;11321133/* Separate BO for private memory since it should GPU writable */1134struct tu_bo pvtmem_bo;11351136struct tu_pipeline_layout *layout;11371138bool need_indirect_descriptor_sets;1139VkShaderStageFlags active_stages;1140uint32_t active_desc_sets;11411142/* mask of enabled dynamic states1143* if BIT(i) is set, pipeline->dynamic_state[i] is *NOT* used1144*/1145uint32_t dynamic_state_mask;1146struct tu_draw_state dynamic_state[TU_DYNAMIC_STATE_COUNT];11471148/* for dynamic states which use the same register: */1149uint32_t gras_su_cntl, gras_su_cntl_mask;1150uint32_t rb_depth_cntl, rb_depth_cntl_mask;1151uint32_t rb_stencil_cntl, rb_stencil_cntl_mask;1152uint32_t stencil_wrmask;11531154bool rb_depth_cntl_disable;11551156/* draw states for the pipeline */1157struct tu_draw_state load_state, rast_state, blend_state;11581159/* for vertex buffers state */1160uint32_t num_vbs;11611162struct1163{1164struct tu_draw_state config_state;1165struct tu_draw_state state;1166struct tu_draw_state binning_state;11671168struct tu_program_descriptor_linkage link[MESA_SHADER_STAGES];1169} program;11701171struct1172{1173struct tu_draw_state state;1174struct tu_draw_state binning_state;1175} vi;11761177struct1178{1179enum pc_di_primtype primtype;1180bool primitive_restart;1181} ia;11821183struct1184{1185uint32_t patch_type;1186uint32_t param_stride;1187uint32_t hs_bo_regid;1188uint32_t ds_bo_regid;1189bool upper_left_domain_origin;1190} tess;11911192struct1193{1194uint32_t local_size[3];1195uint32_t subgroup_size;1196} compute;11971198bool provoking_vertex_last;11991200struct tu_lrz_pipeline lrz;12011202void *executables_mem_ctx;1203/* tu_pipeline_executable */1204struct util_dynarray executables;1205};12061207void1208tu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewport, uint32_t num_viewport);12091210void1211tu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scs, uint32_t scissor_count);12121213void1214tu6_clear_lrz(struct tu_cmd_buffer *cmd, struct tu_cs *cs, struct tu_image* image, const VkClearValue *value);12151216void1217tu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc);12181219void1220tu6_emit_depth_bias(struct tu_cs *cs,1221float constant_factor,1222float clamp,1223float slope_factor);12241225void tu6_emit_msaa(struct tu_cs *cs, VkSampleCountFlagBits samples);12261227void tu6_emit_window_scissor(struct tu_cs *cs, uint32_t x1, uint32_t y1, uint32_t x2, uint32_t y2);12281229void tu6_emit_window_offset(struct tu_cs *cs, uint32_t x1, uint32_t y1);12301231struct tu_pvtmem_config {1232uint64_t iova;1233uint32_t per_fiber_size;1234uint32_t per_sp_size;1235bool per_wave;1236};12371238void1239tu6_emit_xs_config(struct tu_cs *cs,1240gl_shader_stage stage,1241const struct ir3_shader_variant *xs);12421243void1244tu6_emit_xs(struct tu_cs *cs,1245gl_shader_stage stage,1246const struct ir3_shader_variant *xs,1247const struct tu_pvtmem_config *pvtmem,1248uint64_t binary_iova);12491250void1251tu6_emit_vpc(struct tu_cs *cs,1252const struct ir3_shader_variant *vs,1253const struct ir3_shader_variant *hs,1254const struct ir3_shader_variant *ds,1255const struct ir3_shader_variant *gs,1256const struct ir3_shader_variant *fs,1257uint32_t patch_control_points);12581259void1260tu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs);12611262struct tu_image_view;12631264void1265tu_resolve_sysmem(struct tu_cmd_buffer *cmd,1266struct tu_cs *cs,1267struct tu_image_view *src,1268struct tu_image_view *dst,1269uint32_t layer_mask,1270uint32_t layers,1271const VkRect2D *rect);12721273void1274tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,1275struct tu_cs *cs,1276uint32_t a,1277const VkRenderPassBeginInfo *info);12781279void1280tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,1281struct tu_cs *cs,1282uint32_t a,1283const VkRenderPassBeginInfo *info);12841285void1286tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,1287struct tu_cs *cs,1288uint32_t a,1289bool force_load);12901291/* expose this function to be able to emit load without checking LOAD_OP */1292void1293tu_emit_load_gmem_attachment(struct tu_cmd_buffer *cmd, struct tu_cs *cs, uint32_t a);12941295/* note: gmem store can also resolve */1296void1297tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,1298struct tu_cs *cs,1299uint32_t a,1300uint32_t gmem_a);13011302enum tu_supported_formats {1303FMT_VERTEX = 1,1304FMT_TEXTURE = 2,1305FMT_COLOR = 4,1306};13071308struct tu_native_format1309{1310enum a6xx_format fmt : 8;1311enum a3xx_color_swap swap : 8;1312enum a6xx_tile_mode tile_mode : 8;1313enum tu_supported_formats supported : 8;1314};13151316struct tu_native_format tu6_format_vtx(VkFormat format);1317struct tu_native_format tu6_format_color(VkFormat format, enum a6xx_tile_mode tile_mode);1318struct tu_native_format tu6_format_texture(VkFormat format, enum a6xx_tile_mode tile_mode);13191320static inline enum a6xx_format1321tu6_base_format(VkFormat format)1322{1323/* note: tu6_format_color doesn't care about tiling for .fmt field */1324return tu6_format_color(format, TILE6_LINEAR).fmt;1325}13261327struct tu_image1328{1329struct vk_object_base base;13301331/* The original VkFormat provided by the client. This may not match any1332* of the actual surface formats.1333*/1334VkFormat vk_format;1335uint32_t level_count;1336uint32_t layer_count;13371338struct fdl_layout layout[3];1339uint32_t total_size;13401341#ifdef ANDROID1342/* For VK_ANDROID_native_buffer, the WSI image owns the memory, */1343VkDeviceMemory owned_memory;1344#endif13451346/* Set when bound */1347struct tu_bo *bo;1348VkDeviceSize bo_offset;13491350uint32_t lrz_height;1351uint32_t lrz_pitch;1352uint32_t lrz_offset;13531354bool shareable;1355};13561357static inline uint32_t1358tu_get_layerCount(const struct tu_image *image,1359const VkImageSubresourceRange *range)1360{1361return range->layerCount == VK_REMAINING_ARRAY_LAYERS1362? image->layer_count - range->baseArrayLayer1363: range->layerCount;1364}13651366static inline uint32_t1367tu_get_levelCount(const struct tu_image *image,1368const VkImageSubresourceRange *range)1369{1370return range->levelCount == VK_REMAINING_MIP_LEVELS1371? image->level_count - range->baseMipLevel1372: range->levelCount;1373}13741375struct tu_image_view1376{1377struct vk_object_base base;13781379struct tu_image *image; /**< VkImageViewCreateInfo::image */13801381uint64_t base_addr;1382uint64_t ubwc_addr;1383uint32_t layer_size;1384uint32_t ubwc_layer_size;13851386/* used to determine if fast gmem store path can be used */1387VkExtent2D extent;1388bool need_y2_align;13891390bool ubwc_enabled;13911392uint32_t descriptor[A6XX_TEX_CONST_DWORDS];13931394/* Descriptor for use as a storage image as opposed to a sampled image.1395* This has a few differences for cube maps (e.g. type).1396*/1397uint32_t storage_descriptor[A6XX_TEX_CONST_DWORDS];13981399/* pre-filled register values */1400uint32_t PITCH;1401uint32_t FLAG_BUFFER_PITCH;14021403uint32_t RB_MRT_BUF_INFO;1404uint32_t SP_FS_MRT_REG;14051406uint32_t SP_PS_2D_SRC_INFO;1407uint32_t SP_PS_2D_SRC_SIZE;14081409uint32_t RB_2D_DST_INFO;14101411uint32_t RB_BLIT_DST_INFO;14121413/* for d32s8 separate stencil */1414uint64_t stencil_base_addr;1415uint32_t stencil_layer_size;1416uint32_t stencil_PITCH;1417};14181419struct tu_sampler_ycbcr_conversion {1420struct vk_object_base base;14211422VkFormat format;1423VkSamplerYcbcrModelConversion ycbcr_model;1424VkSamplerYcbcrRange ycbcr_range;1425VkComponentMapping components;1426VkChromaLocation chroma_offsets[2];1427VkFilter chroma_filter;1428};14291430struct tu_sampler {1431struct vk_object_base base;14321433uint32_t descriptor[A6XX_TEX_SAMP_DWORDS];1434struct tu_sampler_ycbcr_conversion *ycbcr_sampler;1435};14361437void1438tu_cs_image_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);14391440void1441tu_cs_image_ref_2d(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer, bool src);14421443void1444tu_cs_image_flag_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);14451446void1447tu_cs_image_stencil_ref(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);14481449#define tu_image_view_stencil(iview, x) \1450((iview->x & ~A6XX_##x##_COLOR_FORMAT__MASK) | A6XX_##x##_COLOR_FORMAT(FMT6_8_UINT))14511452VkResult1453tu_gralloc_info(struct tu_device *device,1454const VkNativeBufferANDROID *gralloc_info,1455int *dma_buf,1456uint64_t *modifier);14571458VkResult1459tu_import_memory_from_gralloc_handle(VkDevice device_h,1460int dma_buf,1461const VkAllocationCallbacks *alloc,1462VkImage image_h);14631464void1465tu_image_view_init(struct tu_image_view *iview,1466const VkImageViewCreateInfo *pCreateInfo,1467bool limited_z24s8);14681469bool1470ubwc_possible(VkFormat format, VkImageType type, VkImageUsageFlags usage, VkImageUsageFlags stencil_usage,1471const struct fd_dev_info *info, VkSampleCountFlagBits samples);14721473struct tu_buffer_view1474{1475struct vk_object_base base;14761477uint32_t descriptor[A6XX_TEX_CONST_DWORDS];14781479struct tu_buffer *buffer;1480};1481void1482tu_buffer_view_init(struct tu_buffer_view *view,1483struct tu_device *device,1484const VkBufferViewCreateInfo *pCreateInfo);14851486struct tu_attachment_info1487{1488struct tu_image_view *attachment;1489};14901491struct tu_framebuffer1492{1493struct vk_object_base base;14941495uint32_t width;1496uint32_t height;1497uint32_t layers;14981499/* size of the first tile */1500VkExtent2D tile0;1501/* number of tiles */1502VkExtent2D tile_count;15031504/* size of the first VSC pipe */1505VkExtent2D pipe0;1506/* number of VSC pipes */1507VkExtent2D pipe_count;15081509/* pipe register values */1510uint32_t pipe_config[MAX_VSC_PIPES];1511uint32_t pipe_sizes[MAX_VSC_PIPES];15121513uint32_t attachment_count;1514struct tu_attachment_info attachments[0];1515};15161517void1518tu_framebuffer_tiling_config(struct tu_framebuffer *fb,1519const struct tu_device *device,1520const struct tu_render_pass *pass);15211522struct tu_subpass_barrier {1523VkPipelineStageFlags src_stage_mask;1524VkAccessFlags src_access_mask;1525VkAccessFlags dst_access_mask;1526bool incoherent_ccu_color, incoherent_ccu_depth;1527};15281529struct tu_subpass_attachment1530{1531uint32_t attachment;1532};15331534struct tu_subpass1535{1536uint32_t input_count;1537uint32_t color_count;1538uint32_t resolve_count;1539bool resolve_depth_stencil;1540struct tu_subpass_attachment *input_attachments;1541struct tu_subpass_attachment *color_attachments;1542struct tu_subpass_attachment *resolve_attachments;1543struct tu_subpass_attachment depth_stencil_attachment;15441545VkSampleCountFlagBits samples;15461547uint32_t srgb_cntl;1548uint32_t multiview_mask;15491550struct tu_subpass_barrier start_barrier;1551};15521553struct tu_render_pass_attachment1554{1555VkFormat format;1556uint32_t samples;1557uint32_t cpp;1558VkImageAspectFlags clear_mask;1559uint32_t clear_views;1560bool load;1561bool store;1562int32_t gmem_offset;1563/* for D32S8 separate stencil: */1564bool load_stencil;1565bool store_stencil;1566int32_t gmem_offset_stencil;1567};15681569struct tu_render_pass1570{1571struct vk_object_base base;15721573uint32_t attachment_count;1574uint32_t subpass_count;1575uint32_t gmem_pixels;1576uint32_t tile_align_w;1577struct tu_subpass_attachment *subpass_attachments;1578struct tu_render_pass_attachment *attachments;1579struct tu_subpass_barrier end_barrier;1580struct tu_subpass subpasses[0];1581};15821583#define PERF_CNTRS_REG 415841585struct tu_perf_query_data1586{1587uint32_t gid; /* group-id */1588uint32_t cid; /* countable-id within the group */1589uint32_t cntr_reg; /* counter register within the group */1590uint32_t pass; /* pass index that countables can be requested */1591uint32_t app_idx; /* index provided by apps */1592};15931594struct tu_query_pool1595{1596struct vk_object_base base;15971598VkQueryType type;1599uint32_t stride;1600uint64_t size;1601uint32_t pipeline_statistics;1602struct tu_bo bo;16031604/* For performance query */1605const struct fd_perfcntr_group *perf_group;1606uint32_t perf_group_count;1607uint32_t counter_index_count;1608struct tu_perf_query_data perf_query_data[0];1609};16101611uint32_t1612tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index);16131614void1615tu_update_descriptor_sets(const struct tu_device *device,1616VkDescriptorSet overrideSet,1617uint32_t descriptorWriteCount,1618const VkWriteDescriptorSet *pDescriptorWrites,1619uint32_t descriptorCopyCount,1620const VkCopyDescriptorSet *pDescriptorCopies);16211622void1623tu_update_descriptor_set_with_template(1624const struct tu_device *device,1625struct tu_descriptor_set *set,1626VkDescriptorUpdateTemplate descriptorUpdateTemplate,1627const void *pData);16281629VkResult1630tu_physical_device_init(struct tu_physical_device *device,1631struct tu_instance *instance);1632VkResult1633tu_enumerate_devices(struct tu_instance *instance);16341635int1636tu_drm_submitqueue_new(const struct tu_device *dev,1637int priority,1638uint32_t *queue_id);16391640void1641tu_drm_submitqueue_close(const struct tu_device *dev, uint32_t queue_id);16421643int1644tu_signal_fences(struct tu_device *device, struct tu_syncobj *fence1, struct tu_syncobj *fence2);16451646int1647tu_syncobj_to_fd(struct tu_device *device, struct tu_syncobj *sync);16481649#define TU_DEFINE_HANDLE_CASTS(__tu_type, __VkType) \1650\1651static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \1652{ \1653return (struct __tu_type *) _handle; \1654} \1655\1656static inline __VkType __tu_type##_to_handle(struct __tu_type *_obj) \1657{ \1658return (__VkType) _obj; \1659}16601661#define TU_DEFINE_NONDISP_HANDLE_CASTS(__tu_type, __VkType) \1662\1663static inline struct __tu_type *__tu_type##_from_handle(__VkType _handle) \1664{ \1665return (struct __tu_type *) (uintptr_t) _handle; \1666} \1667\1668static inline __VkType __tu_type##_to_handle(struct __tu_type *_obj) \1669{ \1670return (__VkType)(uintptr_t) _obj; \1671}16721673#define TU_FROM_HANDLE(__tu_type, __name, __handle) \1674struct __tu_type *__name = __tu_type##_from_handle(__handle)16751676TU_DEFINE_HANDLE_CASTS(tu_cmd_buffer, VkCommandBuffer)1677TU_DEFINE_HANDLE_CASTS(tu_device, VkDevice)1678TU_DEFINE_HANDLE_CASTS(tu_instance, VkInstance)1679TU_DEFINE_HANDLE_CASTS(tu_physical_device, VkPhysicalDevice)1680TU_DEFINE_HANDLE_CASTS(tu_queue, VkQueue)16811682TU_DEFINE_NONDISP_HANDLE_CASTS(tu_cmd_pool, VkCommandPool)1683TU_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer, VkBuffer)1684TU_DEFINE_NONDISP_HANDLE_CASTS(tu_buffer_view, VkBufferView)1685TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_pool, VkDescriptorPool)1686TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set, VkDescriptorSet)1687TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_set_layout,1688VkDescriptorSetLayout)1689TU_DEFINE_NONDISP_HANDLE_CASTS(tu_descriptor_update_template,1690VkDescriptorUpdateTemplate)1691TU_DEFINE_NONDISP_HANDLE_CASTS(tu_device_memory, VkDeviceMemory)1692TU_DEFINE_NONDISP_HANDLE_CASTS(tu_event, VkEvent)1693TU_DEFINE_NONDISP_HANDLE_CASTS(tu_framebuffer, VkFramebuffer)1694TU_DEFINE_NONDISP_HANDLE_CASTS(tu_image, VkImage)1695TU_DEFINE_NONDISP_HANDLE_CASTS(tu_image_view, VkImageView);1696TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_cache, VkPipelineCache)1697TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline, VkPipeline)1698TU_DEFINE_NONDISP_HANDLE_CASTS(tu_pipeline_layout, VkPipelineLayout)1699TU_DEFINE_NONDISP_HANDLE_CASTS(tu_query_pool, VkQueryPool)1700TU_DEFINE_NONDISP_HANDLE_CASTS(tu_render_pass, VkRenderPass)1701TU_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler, VkSampler)1702TU_DEFINE_NONDISP_HANDLE_CASTS(tu_sampler_ycbcr_conversion, VkSamplerYcbcrConversion)17031704/* for TU_FROM_HANDLE with both VkFence and VkSemaphore: */1705#define tu_syncobj_from_handle(x) ((struct tu_syncobj*) (uintptr_t) (x))17061707void1708update_stencil_mask(uint32_t *value, VkStencilFaceFlags face, uint32_t mask);17091710#endif /* TU_PRIVATE_H */171117121713