Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_private.h
4560 views
/*1* Copyright © 2019 Raspberry Pi2*3* based in part on anv driver which is:4* Copyright © 2015 Intel Corporation5*6* based in part on radv driver which is:7* Copyright © 2016 Red Hat.8* Copyright © 2016 Bas Nieuwenhuizen9*10* Permission is hereby granted, free of charge, to any person obtaining a11* copy of this software and associated documentation files (the "Software"),12* to deal in the Software without restriction, including without limitation13* the rights to use, copy, modify, merge, publish, distribute, sublicense,14* and/or sell copies of the Software, and to permit persons to whom the15* Software is furnished to do so, subject to the following conditions:16*17* The above copyright notice and this permission notice (including the next18* paragraph) shall be included in all copies or substantial portions of the19* Software.20*21* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR22* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,23* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL24* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER25* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING26* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS27* IN THE SOFTWARE.28*/29#ifndef V3DV_PRIVATE_H30#define V3DV_PRIVATE_H3132#include <stdio.h>33#include <string.h>34#include <vulkan/vulkan.h>35#include <vulkan/vk_icd.h>36#include <vk_enum_to_str.h>3738#include "vk_device.h"39#include "vk_instance.h"40#include "vk_physical_device.h"41#include "vk_shader_module.h"42#include "vk_util.h"4344#include <xf86drm.h>4546#ifdef HAVE_VALGRIND47#include <valgrind.h>48#include <memcheck.h>49#define VG(x) x50#else51#define VG(x) ((void)0)52#endif5354#include "v3dv_limits.h"5556#include "common/v3d_device_info.h"57#include "common/v3d_limits.h"58#include "common/v3d_tiling.h"59#include "common/v3d_util.h"6061#include "compiler/shader_enums.h"62#include "compiler/spirv/nir_spirv.h"6364#include "compiler/v3d_compiler.h"6566#include "vk_debug_report.h"67#include "util/set.h"68#include "util/hash_table.h"69#include "util/xmlconfig.h"70#include "u_atomic.h"7172#include "v3dv_entrypoints.h"73#include "v3dv_bo.h"7475#include "drm-uapi/v3d_drm.h"7677#include "vk_alloc.h"78#include "simulator/v3d_simulator.h"7980#include "v3dv_cl.h"8182#include "wsi_common.h"8384/* A non-fatal assert. Useful for debugging. */85#ifdef DEBUG86#define v3dv_assert(x) ({ \87if (unlikely(!(x))) \88fprintf(stderr, "%s:%d ASSERT: %s", __FILE__, __LINE__, #x); \89})90#else91#define v3dv_assert(x)92#endif9394#define perf_debug(...) do { \95if (unlikely(V3D_DEBUG & V3D_DEBUG_PERF)) \96fprintf(stderr, __VA_ARGS__); \97} while (0)9899struct v3dv_instance;100101#ifdef USE_V3D_SIMULATOR102#define using_v3d_simulator true103#else104#define using_v3d_simulator false105#endif106107struct v3d_simulator_file;108109/* Minimum required by the Vulkan 1.1 spec */110#define MAX_MEMORY_ALLOCATION_SIZE (1ull << 30)111112struct v3dv_physical_device {113struct vk_physical_device vk;114115char *name;116int32_t render_fd;117int32_t display_fd;118int32_t master_fd;119120uint8_t driver_build_sha1[20];121uint8_t pipeline_cache_uuid[VK_UUID_SIZE];122uint8_t device_uuid[VK_UUID_SIZE];123uint8_t driver_uuid[VK_UUID_SIZE];124125struct disk_cache *disk_cache;126127mtx_t mutex;128129struct wsi_device wsi_device;130131VkPhysicalDeviceMemoryProperties memory;132133struct v3d_device_info devinfo;134135struct v3d_simulator_file *sim_file;136137const struct v3d_compiler *compiler;138uint32_t next_program_id;139140struct {141bool merge_jobs;142} options;143};144145VkResult v3dv_physical_device_acquire_display(struct v3dv_instance *instance,146struct v3dv_physical_device *pdevice,147VkIcdSurfaceBase *surface);148149VkResult v3dv_wsi_init(struct v3dv_physical_device *physical_device);150void v3dv_wsi_finish(struct v3dv_physical_device *physical_device);151struct v3dv_image *v3dv_wsi_get_image_from_swapchain(VkSwapchainKHR swapchain,152uint32_t index);153154void v3dv_meta_clear_init(struct v3dv_device *device);155void v3dv_meta_clear_finish(struct v3dv_device *device);156157void v3dv_meta_blit_init(struct v3dv_device *device);158void v3dv_meta_blit_finish(struct v3dv_device *device);159160void v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device);161void v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device);162163struct v3dv_instance {164struct vk_instance vk;165166int physicalDeviceCount;167struct v3dv_physical_device physicalDevice;168169bool pipeline_cache_enabled;170bool default_pipeline_cache_enabled;171};172173/* Tracks wait threads spawned from a single vkQueueSubmit call */174struct v3dv_queue_submit_wait_info {175/* struct vk_object_base base; ?*/176struct list_head list_link;177178struct v3dv_device *device;179180/* List of wait threads spawned for any command buffers in a particular181* call to vkQueueSubmit.182*/183uint32_t wait_thread_count;184struct {185pthread_t thread;186bool finished;187} wait_threads[16];188189/* The master wait thread for the entire submit. This will wait for all190* other threads in this submit to complete before processing signal191* semaphores and fences.192*/193pthread_t master_wait_thread;194195/* List of semaphores (and fence) to signal after all wait threads completed196* and all command buffer jobs in the submission have been sent to the GPU.197*/198uint32_t signal_semaphore_count;199VkSemaphore *signal_semaphores;200VkFence fence;201};202203struct v3dv_queue {204struct vk_object_base base;205206struct v3dv_device *device;207VkDeviceQueueCreateFlags flags;208209/* A list of active v3dv_queue_submit_wait_info */210struct list_head submit_wait_list;211212/* A mutex to prevent concurrent access to the list of wait threads */213mtx_t mutex;214215struct v3dv_job *noop_job;216};217218#define V3DV_META_BLIT_CACHE_KEY_SIZE (4 * sizeof(uint32_t))219#define V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE (3 * sizeof(uint32_t) + \220sizeof(VkComponentMapping))221222struct v3dv_meta_color_clear_pipeline {223VkPipeline pipeline;224VkRenderPass pass;225bool cached;226uint64_t key;227};228229struct v3dv_meta_depth_clear_pipeline {230VkPipeline pipeline;231uint64_t key;232};233234struct v3dv_meta_blit_pipeline {235VkPipeline pipeline;236VkRenderPass pass;237VkRenderPass pass_no_load;238uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];239};240241struct v3dv_meta_texel_buffer_copy_pipeline {242VkPipeline pipeline;243VkRenderPass pass;244VkRenderPass pass_no_load;245uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];246};247248struct v3dv_pipeline_key {249bool robust_buffer_access;250uint8_t topology;251uint8_t logicop_func;252bool msaa;253bool sample_coverage;254bool sample_alpha_to_coverage;255bool sample_alpha_to_one;256uint8_t cbufs;257struct {258enum pipe_format format;259const uint8_t *swizzle;260} color_fmt[V3D_MAX_DRAW_BUFFERS];261uint8_t f32_color_rb;262uint32_t va_swap_rb_mask;263};264265struct v3dv_pipeline_cache_stats {266uint32_t miss;267uint32_t hit;268uint32_t count;269};270271/* Equivalent to gl_shader_stage, but including the coordinate shaders272*273* FIXME: perhaps move to common274*/275enum broadcom_shader_stage {276BROADCOM_SHADER_VERTEX,277BROADCOM_SHADER_VERTEX_BIN,278BROADCOM_SHADER_GEOMETRY,279BROADCOM_SHADER_GEOMETRY_BIN,280BROADCOM_SHADER_FRAGMENT,281BROADCOM_SHADER_COMPUTE,282};283284#define BROADCOM_SHADER_STAGES (BROADCOM_SHADER_COMPUTE + 1)285286/* Assumes that coordinate shaders will be custom-handled by the caller */287static inline enum broadcom_shader_stage288gl_shader_stage_to_broadcom(gl_shader_stage stage)289{290switch (stage) {291case MESA_SHADER_VERTEX:292return BROADCOM_SHADER_VERTEX;293case MESA_SHADER_GEOMETRY:294return BROADCOM_SHADER_GEOMETRY;295case MESA_SHADER_FRAGMENT:296return BROADCOM_SHADER_FRAGMENT;297case MESA_SHADER_COMPUTE:298return BROADCOM_SHADER_COMPUTE;299default:300unreachable("Unknown gl shader stage");301}302}303304static inline gl_shader_stage305broadcom_shader_stage_to_gl(enum broadcom_shader_stage stage)306{307switch (stage) {308case BROADCOM_SHADER_VERTEX:309case BROADCOM_SHADER_VERTEX_BIN:310return MESA_SHADER_VERTEX;311case BROADCOM_SHADER_GEOMETRY:312case BROADCOM_SHADER_GEOMETRY_BIN:313return MESA_SHADER_GEOMETRY;314case BROADCOM_SHADER_FRAGMENT:315return MESA_SHADER_FRAGMENT;316case BROADCOM_SHADER_COMPUTE:317return MESA_SHADER_COMPUTE;318default:319unreachable("Unknown broadcom shader stage");320}321}322323static inline bool324broadcom_shader_stage_is_binning(enum broadcom_shader_stage stage)325{326switch (stage) {327case BROADCOM_SHADER_VERTEX_BIN:328case BROADCOM_SHADER_GEOMETRY_BIN:329return true;330default:331return false;332}333}334335static inline bool336broadcom_shader_stage_is_render_with_binning(enum broadcom_shader_stage stage)337{338switch (stage) {339case BROADCOM_SHADER_VERTEX:340case BROADCOM_SHADER_GEOMETRY:341return true;342default:343return false;344}345}346347static inline enum broadcom_shader_stage348broadcom_binning_shader_stage_for_render_stage(enum broadcom_shader_stage stage)349{350switch (stage) {351case BROADCOM_SHADER_VERTEX:352return BROADCOM_SHADER_VERTEX_BIN;353case BROADCOM_SHADER_GEOMETRY:354return BROADCOM_SHADER_GEOMETRY_BIN;355default:356unreachable("Invalid shader stage");357}358}359360static inline const char *361broadcom_shader_stage_name(enum broadcom_shader_stage stage)362{363switch(stage) {364case BROADCOM_SHADER_VERTEX_BIN:365return "MESA_SHADER_VERTEX_BIN";366case BROADCOM_SHADER_GEOMETRY_BIN:367return "MESA_SHADER_GEOMETRY_BIN";368default:369return gl_shader_stage_name(broadcom_shader_stage_to_gl(stage));370}371}372373struct v3dv_pipeline_cache {374struct vk_object_base base;375376struct v3dv_device *device;377mtx_t mutex;378379struct hash_table *nir_cache;380struct v3dv_pipeline_cache_stats nir_stats;381382struct hash_table *cache;383struct v3dv_pipeline_cache_stats stats;384};385386struct v3dv_device {387struct vk_device vk;388389struct v3dv_instance *instance;390struct v3dv_physical_device *pdevice;391392struct v3d_device_info devinfo;393struct v3dv_queue queue;394395/* A sync object to track the last job submitted to the GPU. */396uint32_t last_job_sync;397398/* A mutex to prevent concurrent access to last_job_sync from the queue */399mtx_t mutex;400401/* Resources used for meta operations */402struct {403mtx_t mtx;404struct {405VkPipelineLayout p_layout;406struct hash_table *cache; /* v3dv_meta_color_clear_pipeline */407} color_clear;408struct {409VkPipelineLayout p_layout;410struct hash_table *cache; /* v3dv_meta_depth_clear_pipeline */411} depth_clear;412struct {413VkDescriptorSetLayout ds_layout;414VkPipelineLayout p_layout;415struct hash_table *cache[3]; /* v3dv_meta_blit_pipeline for 1d, 2d, 3d */416} blit;417struct {418VkDescriptorSetLayout ds_layout;419VkPipelineLayout p_layout;420struct hash_table *cache[3]; /* v3dv_meta_texel_buffer_copy_pipeline for 1d, 2d, 3d */421} texel_buffer_copy;422} meta;423424struct v3dv_bo_cache {425/** List of struct v3d_bo freed, by age. */426struct list_head time_list;427/** List of struct v3d_bo freed, per size, by age. */428struct list_head *size_list;429uint32_t size_list_size;430431mtx_t lock;432433uint32_t cache_size;434uint32_t cache_count;435uint32_t max_cache_size;436} bo_cache;437438uint32_t bo_size;439uint32_t bo_count;440441struct v3dv_pipeline_cache default_pipeline_cache;442443/* GL_SHADER_STATE_RECORD needs to speficy default attribute values. The444* following covers the most common case, that is all attributes format445* being float being float, allowing us to reuse the same BO for all446* pipelines matching this requirement. Pipelines that need integer447* attributes will create their own BO.448*/449struct v3dv_bo *default_attribute_float;450VkPhysicalDeviceFeatures features;451};452453struct v3dv_device_memory {454struct vk_object_base base;455456struct v3dv_bo *bo;457const VkMemoryType *type;458bool has_bo_ownership;459bool is_for_wsi;460};461462#define V3D_OUTPUT_IMAGE_FORMAT_NO 255463#define TEXTURE_DATA_FORMAT_NO 255464465struct v3dv_format {466bool supported;467468/* One of V3D33_OUTPUT_IMAGE_FORMAT_*, or OUTPUT_IMAGE_FORMAT_NO */469uint8_t rt_type;470471/* One of V3D33_TEXTURE_DATA_FORMAT_*. */472uint8_t tex_type;473474/* Swizzle to apply to the RGBA shader output for storing to the tile475* buffer, to the RGBA tile buffer to produce shader input (for476* blending), and for turning the rgba8888 texture sampler return477* value into shader rgba values.478*/479uint8_t swizzle[4];480481/* Whether the return value is 16F/I/UI or 32F/I/UI. */482uint8_t return_size;483484/* If the format supports (linear) filtering when texturing. */485bool supports_filtering;486};487488struct v3d_resource_slice {489uint32_t offset;490uint32_t stride;491uint32_t padded_height;492/* Size of a single pane of the slice. For 3D textures, there will be493* a number of panes equal to the minified, power-of-two-aligned494* depth.495*/496uint32_t size;497uint8_t ub_pad;498enum v3d_tiling_mode tiling;499uint32_t padded_height_of_output_image_in_uif_blocks;500};501502struct v3dv_image {503struct vk_object_base base;504505VkImageType type;506VkImageAspectFlags aspects;507508VkExtent3D extent;509uint32_t levels;510uint32_t array_size;511uint32_t samples;512VkImageUsageFlags usage;513VkImageCreateFlags flags;514VkImageTiling tiling;515516VkFormat vk_format;517const struct v3dv_format *format;518519uint32_t cpp;520521uint64_t drm_format_mod;522bool tiled;523bool external;524525struct v3d_resource_slice slices[V3D_MAX_MIP_LEVELS];526uint64_t size; /* Total size in bytes */527uint32_t cube_map_stride;528uint32_t alignment;529530struct v3dv_device_memory *mem;531VkDeviceSize mem_offset;532};533534VkImageViewType v3dv_image_type_to_view_type(VkImageType type);535536/* Pre-generating packets needs to consider changes in packet sizes across hw537* versions. Keep things simple and allocate enough space for any supported538* version. We ensure the size is large enough through static asserts.539*/540#define V3DV_TEXTURE_SHADER_STATE_LENGTH 32541#define V3DV_SAMPLER_STATE_LENGTH 24542#define V3DV_BLEND_CFG_LENGTH 5543#define V3DV_CFG_BITS_LENGTH 4544#define V3DV_GL_SHADER_STATE_RECORD_LENGTH 36545#define V3DV_VCM_CACHE_SIZE_LENGTH 2546#define V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH 16547#define V3DV_STENCIL_CFG_LENGTH 6548549struct v3dv_image_view {550struct vk_object_base base;551552struct v3dv_image *image;553VkImageAspectFlags aspects;554VkExtent3D extent;555VkImageViewType type;556557VkFormat vk_format;558const struct v3dv_format *format;559bool swap_rb;560uint32_t internal_bpp;561uint32_t internal_type;562563uint32_t base_level;564uint32_t max_level;565uint32_t first_layer;566uint32_t last_layer;567uint32_t offset;568569/* Precomputed (composed from createinfo->components and formar swizzle)570* swizzles to pass in to the shader key.571*572* This could be also included on the descriptor bo, but the shader state573* packet doesn't need it on a bo, so we can just avoid a memory copy574*/575uint8_t swizzle[4];576577/* Prepacked TEXTURE_SHADER_STATE. It will be copied to the descriptor info578* during UpdateDescriptorSets.579*580* Empirical tests show that cube arrays need a different shader state581* depending on whether they are used with a sampler or not, so for these582* we generate two states and select the one to use based on the descriptor583* type.584*/585uint8_t texture_shader_state[2][V3DV_TEXTURE_SHADER_STATE_LENGTH];586};587588uint32_t v3dv_layer_offset(const struct v3dv_image *image, uint32_t level, uint32_t layer);589590struct v3dv_buffer {591struct vk_object_base base;592593VkDeviceSize size;594VkBufferUsageFlags usage;595uint32_t alignment;596597struct v3dv_device_memory *mem;598VkDeviceSize mem_offset;599};600601struct v3dv_buffer_view {602struct vk_object_base base;603604struct v3dv_buffer *buffer;605606VkFormat vk_format;607const struct v3dv_format *format;608uint32_t internal_bpp;609uint32_t internal_type;610611uint32_t offset;612uint32_t size;613uint32_t num_elements;614615/* Prepacked TEXTURE_SHADER_STATE. */616uint8_t texture_shader_state[V3DV_TEXTURE_SHADER_STATE_LENGTH];617};618619struct v3dv_subpass_attachment {620uint32_t attachment;621VkImageLayout layout;622};623624struct v3dv_subpass {625uint32_t input_count;626struct v3dv_subpass_attachment *input_attachments;627628uint32_t color_count;629struct v3dv_subpass_attachment *color_attachments;630struct v3dv_subpass_attachment *resolve_attachments;631632struct v3dv_subpass_attachment ds_attachment;633634/* If we need to emit the clear of the depth/stencil attachment using a635* a draw call instead of using the TLB (GFXH-1461).636*/637bool do_depth_clear_with_draw;638bool do_stencil_clear_with_draw;639};640641struct v3dv_render_pass_attachment {642VkAttachmentDescription desc;643uint32_t first_subpass;644uint32_t last_subpass;645646/* If this is a multismapled attachment that is going to be resolved,647* whether we can use the TLB resolve on store.648*/649bool use_tlb_resolve;650};651652struct v3dv_render_pass {653struct vk_object_base base;654655uint32_t attachment_count;656struct v3dv_render_pass_attachment *attachments;657658uint32_t subpass_count;659struct v3dv_subpass *subpasses;660661struct v3dv_subpass_attachment *subpass_attachments;662};663664struct v3dv_framebuffer {665struct vk_object_base base;666667uint32_t width;668uint32_t height;669uint32_t layers;670671/* Typically, edge tiles in the framebuffer have padding depending on the672* underlying tiling layout. One consequnce of this is that when the673* framebuffer dimensions are not aligned to tile boundaries, tile stores674* would still write full tiles on the edges and write to the padded area.675* If the framebuffer is aliasing a smaller region of a larger image, then676* we need to be careful with this though, as we won't have padding on the677* edge tiles (which typically means that we need to load the tile buffer678* before we store).679*/680bool has_edge_padding;681682uint32_t attachment_count;683uint32_t color_attachment_count;684struct v3dv_image_view *attachments[0];685};686687struct v3dv_frame_tiling {688uint32_t width;689uint32_t height;690uint32_t layers;691uint32_t render_target_count;692uint32_t internal_bpp;693bool msaa;694uint32_t tile_width;695uint32_t tile_height;696uint32_t draw_tiles_x;697uint32_t draw_tiles_y;698uint32_t supertile_width;699uint32_t supertile_height;700uint32_t frame_width_in_supertiles;701uint32_t frame_height_in_supertiles;702};703704void v3dv_framebuffer_compute_internal_bpp_msaa(const struct v3dv_framebuffer *framebuffer,705const struct v3dv_subpass *subpass,706uint8_t *max_bpp, bool *msaa);707708bool v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,709const VkRect2D *area,710struct v3dv_framebuffer *fb,711struct v3dv_render_pass *pass,712uint32_t subpass_idx);713714struct v3dv_cmd_pool {715struct vk_object_base base;716717VkAllocationCallbacks alloc;718struct list_head cmd_buffers;719};720721enum v3dv_cmd_buffer_status {722V3DV_CMD_BUFFER_STATUS_NEW = 0,723V3DV_CMD_BUFFER_STATUS_INITIALIZED = 1,724V3DV_CMD_BUFFER_STATUS_RECORDING = 2,725V3DV_CMD_BUFFER_STATUS_EXECUTABLE = 3726};727728union v3dv_clear_value {729uint32_t color[4];730struct {731float z;732uint8_t s;733};734};735736struct v3dv_cmd_buffer_attachment_state {737/* The original clear value as provided by the Vulkan API */738VkClearValue vk_clear_value;739740/* The hardware clear value */741union v3dv_clear_value clear_value;742};743744struct v3dv_viewport_state {745uint32_t count;746VkViewport viewports[MAX_VIEWPORTS];747float translate[MAX_VIEWPORTS][3];748float scale[MAX_VIEWPORTS][3];749};750751struct v3dv_scissor_state {752uint32_t count;753VkRect2D scissors[MAX_SCISSORS];754};755756/* Mostly a v3dv mapping of VkDynamicState, used to track which data as757* defined as dynamic758*/759enum v3dv_dynamic_state_bits {760V3DV_DYNAMIC_VIEWPORT = 1 << 0,761V3DV_DYNAMIC_SCISSOR = 1 << 1,762V3DV_DYNAMIC_STENCIL_COMPARE_MASK = 1 << 2,763V3DV_DYNAMIC_STENCIL_WRITE_MASK = 1 << 3,764V3DV_DYNAMIC_STENCIL_REFERENCE = 1 << 4,765V3DV_DYNAMIC_BLEND_CONSTANTS = 1 << 5,766V3DV_DYNAMIC_DEPTH_BIAS = 1 << 6,767V3DV_DYNAMIC_LINE_WIDTH = 1 << 7,768V3DV_DYNAMIC_ALL = (1 << 8) - 1,769};770771/* Flags for dirty pipeline state.772*/773enum v3dv_cmd_dirty_bits {774V3DV_CMD_DIRTY_VIEWPORT = 1 << 0,775V3DV_CMD_DIRTY_SCISSOR = 1 << 1,776V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK = 1 << 2,777V3DV_CMD_DIRTY_STENCIL_WRITE_MASK = 1 << 3,778V3DV_CMD_DIRTY_STENCIL_REFERENCE = 1 << 4,779V3DV_CMD_DIRTY_PIPELINE = 1 << 5,780V3DV_CMD_DIRTY_COMPUTE_PIPELINE = 1 << 6,781V3DV_CMD_DIRTY_VERTEX_BUFFER = 1 << 7,782V3DV_CMD_DIRTY_INDEX_BUFFER = 1 << 8,783V3DV_CMD_DIRTY_DESCRIPTOR_SETS = 1 << 9,784V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS = 1 << 10,785V3DV_CMD_DIRTY_PUSH_CONSTANTS = 1 << 11,786V3DV_CMD_DIRTY_BLEND_CONSTANTS = 1 << 12,787V3DV_CMD_DIRTY_OCCLUSION_QUERY = 1 << 13,788V3DV_CMD_DIRTY_DEPTH_BIAS = 1 << 14,789V3DV_CMD_DIRTY_LINE_WIDTH = 1 << 15,790};791792struct v3dv_dynamic_state {793/**794* Bitmask of (1 << VK_DYNAMIC_STATE_*).795* Defines the set of saved dynamic state.796*/797uint32_t mask;798799struct v3dv_viewport_state viewport;800801struct v3dv_scissor_state scissor;802803struct {804uint32_t front;805uint32_t back;806} stencil_compare_mask;807808struct {809uint32_t front;810uint32_t back;811} stencil_write_mask;812813struct {814uint32_t front;815uint32_t back;816} stencil_reference;817818float blend_constants[4];819820struct {821float constant_factor;822float depth_bias_clamp;823float slope_factor;824} depth_bias;825826float line_width;827};828829extern const struct v3dv_dynamic_state default_dynamic_state;830831void v3dv_viewport_compute_xform(const VkViewport *viewport,832float scale[3],833float translate[3]);834835enum v3dv_ez_state {836V3D_EZ_UNDECIDED = 0,837V3D_EZ_GT_GE,838V3D_EZ_LT_LE,839V3D_EZ_DISABLED,840};841842enum v3dv_job_type {843V3DV_JOB_TYPE_GPU_CL = 0,844V3DV_JOB_TYPE_GPU_CL_SECONDARY,845V3DV_JOB_TYPE_GPU_TFU,846V3DV_JOB_TYPE_GPU_CSD,847V3DV_JOB_TYPE_CPU_RESET_QUERIES,848V3DV_JOB_TYPE_CPU_END_QUERY,849V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS,850V3DV_JOB_TYPE_CPU_SET_EVENT,851V3DV_JOB_TYPE_CPU_WAIT_EVENTS,852V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,853V3DV_JOB_TYPE_CPU_CSD_INDIRECT,854V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY,855};856857struct v3dv_reset_query_cpu_job_info {858struct v3dv_query_pool *pool;859uint32_t first;860uint32_t count;861};862863struct v3dv_end_query_cpu_job_info {864struct v3dv_query_pool *pool;865uint32_t query;866};867868struct v3dv_copy_query_results_cpu_job_info {869struct v3dv_query_pool *pool;870uint32_t first;871uint32_t count;872struct v3dv_buffer *dst;873uint32_t offset;874uint32_t stride;875VkQueryResultFlags flags;876};877878struct v3dv_event_set_cpu_job_info {879struct v3dv_event *event;880int state;881};882883struct v3dv_event_wait_cpu_job_info {884/* List of events to wait on */885uint32_t event_count;886struct v3dv_event **events;887888/* Whether any postponed jobs after the wait should wait on semaphores */889bool sem_wait;890};891892struct v3dv_copy_buffer_to_image_cpu_job_info {893struct v3dv_image *image;894struct v3dv_buffer *buffer;895uint32_t buffer_offset;896uint32_t buffer_stride;897uint32_t buffer_layer_stride;898VkOffset3D image_offset;899VkExtent3D image_extent;900uint32_t mip_level;901uint32_t base_layer;902uint32_t layer_count;903};904905struct v3dv_csd_indirect_cpu_job_info {906struct v3dv_buffer *buffer;907uint32_t offset;908struct v3dv_job *csd_job;909uint32_t wg_size;910uint32_t *wg_uniform_offsets[3];911bool needs_wg_uniform_rewrite;912};913914struct v3dv_timestamp_query_cpu_job_info {915struct v3dv_query_pool *pool;916uint32_t query;917};918919struct v3dv_job {920struct list_head list_link;921922/* We only create job clones when executing secondary command buffers into923* primaries. These clones don't make deep copies of the original object924* so we want to flag them to avoid freeing resources they don't own.925*/926bool is_clone;927928enum v3dv_job_type type;929930struct v3dv_device *device;931932struct v3dv_cmd_buffer *cmd_buffer;933934struct v3dv_cl bcl;935struct v3dv_cl rcl;936struct v3dv_cl indirect;937938/* Set of all BOs referenced by the job. This will be used for making939* the list of BOs that the kernel will need to have paged in to940* execute our job.941*/942struct set *bos;943uint32_t bo_count;944uint64_t bo_handle_mask;945946struct v3dv_bo *tile_alloc;947struct v3dv_bo *tile_state;948949bool tmu_dirty_rcl;950951uint32_t first_subpass;952953/* When the current subpass is split into multiple jobs, this flag is set954* to true for any jobs after the first in the same subpass.955*/956bool is_subpass_continue;957958/* If this job is the last job emitted for a subpass. */959bool is_subpass_finish;960961struct v3dv_frame_tiling frame_tiling;962963enum v3dv_ez_state ez_state;964enum v3dv_ez_state first_ez_state;965966/* If we have already decided if we need to disable Early Z/S completely967* for this job.968*/969bool decided_global_ez_enable;970971/* If this job has been configured to use early Z/S clear */972bool early_zs_clear;973974/* Number of draw calls recorded into the job */975uint32_t draw_count;976977/* A flag indicating whether we want to flush every draw separately. This978* can be used for debugging, or for cases where special circumstances979* require this behavior.980*/981bool always_flush;982983/* Whether we need to serialize this job in our command stream */984bool serialize;985986/* If this is a CL job, whether we should sync before binning */987bool needs_bcl_sync;988989/* Job specs for CPU jobs */990union {991struct v3dv_reset_query_cpu_job_info query_reset;992struct v3dv_end_query_cpu_job_info query_end;993struct v3dv_copy_query_results_cpu_job_info query_copy_results;994struct v3dv_event_set_cpu_job_info event_set;995struct v3dv_event_wait_cpu_job_info event_wait;996struct v3dv_copy_buffer_to_image_cpu_job_info copy_buffer_to_image;997struct v3dv_csd_indirect_cpu_job_info csd_indirect;998struct v3dv_timestamp_query_cpu_job_info query_timestamp;999} cpu;10001001/* Job specs for TFU jobs */1002struct drm_v3d_submit_tfu tfu;10031004/* Job specs for CSD jobs */1005struct {1006struct v3dv_bo *shared_memory;1007uint32_t wg_count[3];1008uint32_t wg_base[3];1009struct drm_v3d_submit_csd submit;1010} csd;1011};10121013void v3dv_job_init(struct v3dv_job *job,1014enum v3dv_job_type type,1015struct v3dv_device *device,1016struct v3dv_cmd_buffer *cmd_buffer,1017int32_t subpass_idx);1018void v3dv_job_destroy(struct v3dv_job *job);10191020void v3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo);1021void v3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo);10221023void v3dv_job_start_frame(struct v3dv_job *job,1024uint32_t width,1025uint32_t height,1026uint32_t layers,1027uint32_t render_target_count,1028uint8_t max_internal_bpp,1029bool msaa);10301031struct v3dv_job *1032v3dv_job_clone_in_cmd_buffer(struct v3dv_job *job,1033struct v3dv_cmd_buffer *cmd_buffer);10341035struct v3dv_job *v3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device,1036enum v3dv_job_type type,1037struct v3dv_cmd_buffer *cmd_buffer,1038uint32_t subpass_idx);10391040void1041v3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer,1042uint32_t slot_size,1043uint32_t used_count,1044uint32_t *alloc_count,1045void **ptr);10461047void v3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer);10481049/* FIXME: only used on v3dv_cmd_buffer and v3dvx_cmd_buffer, perhaps move to a1050* cmd_buffer specific header?1051*/1052struct v3dv_draw_info {1053uint32_t vertex_count;1054uint32_t instance_count;1055uint32_t first_vertex;1056uint32_t first_instance;1057};10581059struct v3dv_vertex_binding {1060struct v3dv_buffer *buffer;1061VkDeviceSize offset;1062};10631064struct v3dv_descriptor_state {1065struct v3dv_descriptor_set *descriptor_sets[MAX_SETS];1066uint32_t valid;1067uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];1068};10691070struct v3dv_cmd_pipeline_state {1071struct v3dv_pipeline *pipeline;10721073struct v3dv_descriptor_state descriptor_state;1074};10751076struct v3dv_cmd_buffer_state {1077struct v3dv_render_pass *pass;1078struct v3dv_framebuffer *framebuffer;1079VkRect2D render_area;10801081/* Current job being recorded */1082struct v3dv_job *job;10831084uint32_t subpass_idx;10851086struct v3dv_cmd_pipeline_state gfx;1087struct v3dv_cmd_pipeline_state compute;10881089struct v3dv_dynamic_state dynamic;10901091uint32_t dirty;1092VkShaderStageFlagBits dirty_descriptor_stages;1093VkShaderStageFlagBits dirty_push_constants_stages;10941095/* Current clip window. We use this to check whether we have an active1096* scissor, since in that case we can't use TLB clears and need to fallback1097* to drawing rects.1098*/1099VkRect2D clip_window;11001101/* Whether our render area is aligned to tile boundaries. If this is false1102* then we have tiles that are only partially covered by the render area,1103* and therefore, we need to be careful with our loads and stores so we don't1104* modify pixels for the tile area that is not covered by the render area.1105* This means, for example, that we can't use the TLB to clear, since that1106* always clears full tiles.1107*/1108bool tile_aligned_render_area;11091110uint32_t attachment_alloc_count;1111struct v3dv_cmd_buffer_attachment_state *attachments;11121113struct v3dv_vertex_binding vertex_bindings[MAX_VBS];11141115struct {1116VkBuffer buffer;1117VkDeviceSize offset;1118uint8_t index_size;1119} index_buffer;11201121/* Current uniforms */1122struct {1123struct v3dv_cl_reloc vs_bin;1124struct v3dv_cl_reloc vs;1125struct v3dv_cl_reloc gs_bin;1126struct v3dv_cl_reloc gs;1127struct v3dv_cl_reloc fs;1128} uniforms;11291130/* Used to flag OOM conditions during command buffer recording */1131bool oom;11321133/* Whether we have recorded a pipeline barrier that we still need to1134* process.1135*/1136bool has_barrier;1137bool has_bcl_barrier;11381139/* Secondary command buffer state */1140struct {1141bool occlusion_query_enable;1142} inheritance;11431144/* Command buffer state saved during a meta operation */1145struct {1146uint32_t subpass_idx;1147VkRenderPass pass;1148VkFramebuffer framebuffer;11491150uint32_t attachment_alloc_count;1151uint32_t attachment_count;1152struct v3dv_cmd_buffer_attachment_state *attachments;11531154bool tile_aligned_render_area;1155VkRect2D render_area;11561157struct v3dv_dynamic_state dynamic;11581159struct v3dv_cmd_pipeline_state gfx;1160bool has_descriptor_state;11611162uint32_t push_constants[MAX_PUSH_CONSTANTS_SIZE / 4];1163} meta;11641165/* Command buffer state for queries */1166struct {1167/* A list of vkCmdQueryEnd commands recorded in the command buffer during1168* a render pass. We queue these here and then schedule the corresponding1169* CPU jobs for them at the time we finish the GPU job in which they have1170* been recorded.1171*/1172struct {1173uint32_t used_count;1174uint32_t alloc_count;1175struct v3dv_end_query_cpu_job_info *states;1176} end;11771178/* This BO is not NULL if we have an active query, that is, we have1179* called vkCmdBeginQuery but not vkCmdEndQuery.1180*/1181struct {1182struct v3dv_bo *bo;1183uint32_t offset;1184} active_query;1185} query;1186};11871188/* The following struct represents the info from a descriptor that we store on1189* the host memory. They are mostly links to other existing vulkan objects,1190* like the image_view in order to access to swizzle info, or the buffer used1191* for a UBO/SSBO, for example.1192*1193* FIXME: revisit if makes sense to just move everything that would be needed1194* from a descriptor to the bo.1195*/1196struct v3dv_descriptor {1197VkDescriptorType type;11981199union {1200struct {1201struct v3dv_image_view *image_view;1202struct v3dv_sampler *sampler;1203};12041205struct {1206struct v3dv_buffer *buffer;1207uint32_t offset;1208uint32_t range;1209};12101211struct v3dv_buffer_view *buffer_view;1212};1213};12141215struct v3dv_query {1216bool maybe_available;1217union {1218/* Used by GPU queries (occlusion) */1219struct {1220struct v3dv_bo *bo;1221uint32_t offset;1222};1223/* Used by CPU queries (timestamp) */1224uint64_t value;1225};1226};12271228struct v3dv_query_pool {1229struct vk_object_base base;12301231struct v3dv_bo *bo; /* Only used with GPU queries (occlusion) */12321233VkQueryType query_type;1234uint32_t query_count;1235struct v3dv_query *queries;1236};12371238VkResult v3dv_get_query_pool_results_cpu(struct v3dv_device *device,1239struct v3dv_query_pool *pool,1240uint32_t first,1241uint32_t count,1242void *data,1243VkDeviceSize stride,1244VkQueryResultFlags flags);12451246typedef void (*v3dv_cmd_buffer_private_obj_destroy_cb)(VkDevice device,1247uint64_t pobj,1248VkAllocationCallbacks *alloc);1249struct v3dv_cmd_buffer_private_obj {1250struct list_head list_link;1251uint64_t obj;1252v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb;1253};12541255struct v3dv_cmd_buffer {1256struct vk_object_base base;12571258struct v3dv_device *device;12591260struct v3dv_cmd_pool *pool;1261struct list_head pool_link;12621263/* Used at submit time to link command buffers in the submission that have1264* spawned wait threads, so we can then wait on all of them to complete1265* before we process any signal sempahores or fences.1266*/1267struct list_head list_link;12681269VkCommandBufferUsageFlags usage_flags;1270VkCommandBufferLevel level;12711272enum v3dv_cmd_buffer_status status;12731274struct v3dv_cmd_buffer_state state;12751276/* FIXME: we have just one client-side and bo for the push constants,1277* independently of the stageFlags in vkCmdPushConstants, and the1278* pipelineBindPoint in vkCmdBindPipeline. We could probably do more stage1279* tunning in the future if it makes sense.1280*/1281uint32_t push_constants_data[MAX_PUSH_CONSTANTS_SIZE / 4];1282struct v3dv_cl_reloc push_constants_resource;12831284/* Collection of Vulkan objects created internally by the driver (typically1285* during recording of meta operations) that are part of the command buffer1286* and should be destroyed with it.1287*/1288struct list_head private_objs; /* v3dv_cmd_buffer_private_obj */12891290/* Per-command buffer resources for meta operations. */1291struct {1292struct {1293/* The current descriptor pool for blit sources */1294VkDescriptorPool dspool;1295} blit;1296struct {1297/* The current descriptor pool for texel buffer copy sources */1298VkDescriptorPool dspool;1299} texel_buffer_copy;1300} meta;13011302/* List of jobs in the command buffer. For primary command buffers it1303* represents the jobs we want to submit to the GPU. For secondary command1304* buffers it represents jobs that will be merged into a primary command1305* buffer via vkCmdExecuteCommands.1306*/1307struct list_head jobs;1308};13091310struct v3dv_job *v3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer,1311int32_t subpass_idx,1312enum v3dv_job_type type);1313void v3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer);13141315struct v3dv_job *v3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer,1316uint32_t subpass_idx);1317struct v3dv_job *v3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer,1318uint32_t subpass_idx);13191320void v3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer);13211322void v3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer,1323bool push_descriptor_state);1324void v3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer,1325uint32_t dirty_dynamic_state,1326bool needs_subpass_resume);13271328void v3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer,1329struct v3dv_query_pool *pool,1330uint32_t first,1331uint32_t count);13321333void v3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer,1334struct v3dv_query_pool *pool,1335uint32_t query,1336VkQueryControlFlags flags);13371338void v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer,1339struct v3dv_query_pool *pool,1340uint32_t query);13411342void v3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer,1343struct v3dv_query_pool *pool,1344uint32_t first,1345uint32_t count,1346struct v3dv_buffer *dst,1347uint32_t offset,1348uint32_t stride,1349VkQueryResultFlags flags);13501351void v3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer,1352struct drm_v3d_submit_tfu *tfu);13531354void v3dv_cmd_buffer_rewrite_indirect_csd_job(struct v3dv_csd_indirect_cpu_job_info *info,1355const uint32_t *wg_counts);13561357void v3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer,1358uint64_t obj,1359v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb);13601361struct v3dv_semaphore {1362struct vk_object_base base;13631364/* A syncobject handle associated with this semaphore */1365uint32_t sync;13661367/* A temporary syncobject handle produced from a vkImportSemaphoreFd. */1368uint32_t temp_sync;1369};13701371struct v3dv_fence {1372struct vk_object_base base;13731374/* A syncobject handle associated with this fence */1375uint32_t sync;13761377/* A temporary syncobject handle produced from a vkImportFenceFd. */1378uint32_t temp_sync;1379};13801381struct v3dv_event {1382struct vk_object_base base;1383int state;1384};13851386struct v3dv_shader_variant {1387enum broadcom_shader_stage stage;13881389union {1390struct v3d_prog_data *base;1391struct v3d_vs_prog_data *vs;1392struct v3d_gs_prog_data *gs;1393struct v3d_fs_prog_data *fs;1394struct v3d_compute_prog_data *cs;1395} prog_data;13961397/* We explicitly save the prog_data_size as it would make easier to1398* serialize1399*/1400uint32_t prog_data_size;14011402/* The assembly for this variant will be uploaded to a BO shared with all1403* other shader stages in that pipeline. This is the offset in that BO.1404*/1405uint32_t assembly_offset;14061407/* Note: it is really likely that qpu_insts would be NULL, as it will be1408* used only temporarily, to upload it to the shared bo, as we compile the1409* different stages individually.1410*/1411uint64_t *qpu_insts;1412uint32_t qpu_insts_size;1413};14141415/*1416* Per-stage info for each stage, useful so shader_module_compile_to_nir and1417* other methods doesn't have so many parameters.1418*1419* FIXME: for the case of the coordinate shader and the vertex shader, module,1420* entrypoint, spec_info and nir are the same. There are also info only1421* relevant to some stages. But seemed too much a hassle to create a new1422* struct only to handle that. Revisit if such kind of info starts to grow.1423*/1424struct v3dv_pipeline_stage {1425struct v3dv_pipeline *pipeline;14261427enum broadcom_shader_stage stage;14281429const struct vk_shader_module *module;1430const char *entrypoint;1431const VkSpecializationInfo *spec_info;14321433nir_shader *nir;14341435/* The following is the combined hash of module+entrypoint+spec_info+nir */1436unsigned char shader_sha1[20];14371438/** A name for this program, so you can track it in shader-db output. */1439uint32_t program_id;1440};14411442/* We are using the descriptor pool entry for two things:1443* * Track the allocated sets, so we can properly free it if needed1444* * Track the suballocated pool bo regions, so if some descriptor set is1445* freed, the gap could be reallocated later.1446*1447* Those only make sense if the pool was not created with the flag1448* VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT1449*/1450struct v3dv_descriptor_pool_entry1451{1452struct v3dv_descriptor_set *set;1453/* Offset and size of the subregion allocated for this entry from the1454* pool->bo1455*/1456uint32_t offset;1457uint32_t size;1458};14591460struct v3dv_descriptor_pool {1461struct vk_object_base base;14621463/* If this descriptor pool has been allocated for the driver for internal1464* use, typically to implement meta operations.1465*/1466bool is_driver_internal;14671468struct v3dv_bo *bo;1469/* Current offset at the descriptor bo. 0 means that we didn't use it for1470* any descriptor. If the descriptor bo is NULL, current offset is1471* meaningless1472*/1473uint32_t current_offset;14741475/* If VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT is not set the1476* descriptor sets are handled as a whole as pool memory and handled by the1477* following pointers. If set, they are not used, and individually1478* descriptor sets are allocated/freed.1479*/1480uint8_t *host_memory_base;1481uint8_t *host_memory_ptr;1482uint8_t *host_memory_end;14831484uint32_t entry_count;1485uint32_t max_entry_count;1486struct v3dv_descriptor_pool_entry entries[0];1487};14881489struct v3dv_descriptor_set {1490struct vk_object_base base;14911492struct v3dv_descriptor_pool *pool;14931494const struct v3dv_descriptor_set_layout *layout;14951496/* Offset relative to the descriptor pool bo for this set */1497uint32_t base_offset;14981499/* The descriptors below can be indexed (set/binding) using the set_layout1500*/1501struct v3dv_descriptor descriptors[0];1502};15031504struct v3dv_descriptor_set_binding_layout {1505VkDescriptorType type;15061507/* Number of array elements in this binding */1508uint32_t array_size;15091510/* Index into the flattend descriptor set */1511uint32_t descriptor_index;15121513uint32_t dynamic_offset_count;1514uint32_t dynamic_offset_index;15151516/* Offset into the descriptor set where this descriptor lives (final offset1517* on the descriptor bo need to take into account set->base_offset)1518*/1519uint32_t descriptor_offset;15201521/* Offset in the v3dv_descriptor_set_layout of the immutable samplers, or 01522* if there are no immutable samplers.1523*/1524uint32_t immutable_samplers_offset;1525};15261527struct v3dv_descriptor_set_layout {1528struct vk_object_base base;15291530VkDescriptorSetLayoutCreateFlags flags;15311532/* Number of bindings in this descriptor set */1533uint32_t binding_count;15341535/* Total bo size needed for this descriptor set1536*/1537uint32_t bo_size;15381539/* Shader stages affected by this descriptor set */1540uint16_t shader_stages;15411542/* Number of descriptors in this descriptor set */1543uint32_t descriptor_count;15441545/* Number of dynamic offsets used by this descriptor set */1546uint16_t dynamic_offset_count;15471548/* Bindings in this descriptor set */1549struct v3dv_descriptor_set_binding_layout binding[0];1550};15511552struct v3dv_pipeline_layout {1553struct vk_object_base base;15541555struct {1556struct v3dv_descriptor_set_layout *layout;1557uint32_t dynamic_offset_start;1558} set[MAX_SETS];15591560uint32_t num_sets;15611562/* Shader stages that are declared to use descriptors from this layout */1563uint32_t shader_stages;15641565uint32_t dynamic_offset_count;1566uint32_t push_constant_size;1567};15681569/*1570* We are using descriptor maps for ubo/ssbo and texture/samplers, so we need1571* it to be big enough to include the max value for all of them.1572*1573* FIXME: one alternative would be to allocate the map as big as you need for1574* each descriptor type. That would means more individual allocations.1575*/1576#define DESCRIPTOR_MAP_SIZE MAX3(V3D_MAX_TEXTURE_SAMPLERS, \1577MAX_UNIFORM_BUFFERS, \1578MAX_STORAGE_BUFFERS)157915801581struct v3dv_descriptor_map {1582/* TODO: avoid fixed size array/justify the size */1583unsigned num_desc; /* Number of descriptors */1584int set[DESCRIPTOR_MAP_SIZE];1585int binding[DESCRIPTOR_MAP_SIZE];1586int array_index[DESCRIPTOR_MAP_SIZE];1587int array_size[DESCRIPTOR_MAP_SIZE];15881589/* NOTE: the following is only for sampler, but this is the easier place to1590* put it.1591*/1592uint8_t return_size[DESCRIPTOR_MAP_SIZE];1593};15941595struct v3dv_sampler {1596struct vk_object_base base;15971598bool compare_enable;1599bool unnormalized_coordinates;1600bool clamp_to_transparent_black_border;16011602/* Prepacked SAMPLER_STATE, that is referenced as part of the tmu1603* configuration. If needed it will be copied to the descriptor info during1604* UpdateDescriptorSets1605*/1606uint8_t sampler_state[V3DV_SAMPLER_STATE_LENGTH];1607};16081609struct v3dv_descriptor_template_entry {1610/* The type of descriptor in this entry */1611VkDescriptorType type;16121613/* Binding in the descriptor set */1614uint32_t binding;16151616/* Offset at which to write into the descriptor set binding */1617uint32_t array_element;16181619/* Number of elements to write into the descriptor set binding */1620uint32_t array_count;16211622/* Offset into the user provided data */1623size_t offset;16241625/* Stride between elements into the user provided data */1626size_t stride;1627};16281629struct v3dv_descriptor_update_template {1630struct vk_object_base base;16311632VkPipelineBindPoint bind_point;16331634/* The descriptor set this template corresponds to. This value is only1635* valid if the template was created with the templateType1636* VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET.1637*/1638uint8_t set;16391640/* Number of entries in this template */1641uint32_t entry_count;16421643/* Entries of the template */1644struct v3dv_descriptor_template_entry entries[0];1645};164616471648/* We keep two special values for the sampler idx that represents exactly when a1649* sampler is not needed/provided. The main use is that even if we don't have1650* sampler, we still need to do the output unpacking (through1651* nir_lower_tex). The easier way to do this is to add those special "no1652* sampler" in the sampler_map, and then use the proper unpacking for that1653* case.1654*1655* We have one when we want a 16bit output size, and other when we want a1656* 32bit output size. We use the info coming from the RelaxedPrecision1657* decoration to decide between one and the other.1658*/1659#define V3DV_NO_SAMPLER_16BIT_IDX 01660#define V3DV_NO_SAMPLER_32BIT_IDX 116611662/*1663* Following two methods are using on the combined to/from texture/sampler1664* indices maps at v3dv_pipeline.1665*/1666static inline uint32_t1667v3dv_pipeline_combined_index_key_create(uint32_t texture_index,1668uint32_t sampler_index)1669{1670return texture_index << 24 | sampler_index;1671}16721673static inline void1674v3dv_pipeline_combined_index_key_unpack(uint32_t combined_index_key,1675uint32_t *texture_index,1676uint32_t *sampler_index)1677{1678uint32_t texture = combined_index_key >> 24;1679uint32_t sampler = combined_index_key & 0xffffff;16801681if (texture_index)1682*texture_index = texture;16831684if (sampler_index)1685*sampler_index = sampler;1686}16871688struct v3dv_descriptor_maps {1689struct v3dv_descriptor_map ubo_map;1690struct v3dv_descriptor_map ssbo_map;1691struct v3dv_descriptor_map sampler_map;1692struct v3dv_descriptor_map texture_map;1693};16941695/* The structure represents data shared between different objects, like the1696* pipeline and the pipeline cache, so we ref count it to know when it should1697* be freed.1698*/1699struct v3dv_pipeline_shared_data {1700uint32_t ref_cnt;17011702unsigned char sha1_key[20];17031704struct v3dv_descriptor_maps *maps[BROADCOM_SHADER_STAGES];1705struct v3dv_shader_variant *variants[BROADCOM_SHADER_STAGES];17061707struct v3dv_bo *assembly_bo;1708};17091710struct v3dv_pipeline {1711struct vk_object_base base;17121713struct v3dv_device *device;17141715VkShaderStageFlags active_stages;17161717struct v3dv_render_pass *pass;1718struct v3dv_subpass *subpass;17191720/* Note: We can't use just a MESA_SHADER_STAGES array because we also need1721* to track binning shaders. Note these will be freed once the pipeline1722* has been compiled.1723*/1724struct v3dv_pipeline_stage *vs;1725struct v3dv_pipeline_stage *vs_bin;1726struct v3dv_pipeline_stage *gs;1727struct v3dv_pipeline_stage *gs_bin;1728struct v3dv_pipeline_stage *fs;1729struct v3dv_pipeline_stage *cs;17301731/* Flags for whether optional pipeline stages are present, for convenience */1732bool has_gs;17331734/* Spilling memory requirements */1735struct {1736struct v3dv_bo *bo;1737uint32_t size_per_thread;1738} spill;17391740struct v3dv_dynamic_state dynamic_state;17411742struct v3dv_pipeline_layout *layout;17431744/* Whether this pipeline enables depth writes */1745bool z_updates_enable;17461747enum v3dv_ez_state ez_state;17481749bool msaa;1750bool sample_rate_shading;1751uint32_t sample_mask;17521753bool primitive_restart;17541755/* Accessed by binding. So vb[binding]->stride is the stride of the vertex1756* array with such binding1757*/1758struct v3dv_pipeline_vertex_binding {1759uint32_t stride;1760uint32_t instance_divisor;1761} vb[MAX_VBS];1762uint32_t vb_count;17631764/* Note that a lot of info from VkVertexInputAttributeDescription is1765* already prepacked, so here we are only storing those that need recheck1766* later. The array must be indexed by driver location, since that is the1767* order in which we need to emit the attributes.1768*/1769struct v3dv_pipeline_vertex_attrib {1770uint32_t binding;1771uint32_t offset;1772VkFormat vk_format;1773} va[MAX_VERTEX_ATTRIBS];1774uint32_t va_count;17751776enum pipe_prim_type topology;17771778struct v3dv_pipeline_shared_data *shared_data;17791780/* In general we can reuse v3dv_device->default_attribute_float, so note1781* that the following can be NULL.1782*1783* FIXME: the content of this BO will be small, so it could be improved to1784* be uploaded to a common BO. But as in most cases it will be NULL, it is1785* not a priority.1786*/1787struct v3dv_bo *default_attribute_values;17881789struct vpm_config vpm_cfg;1790struct vpm_config vpm_cfg_bin;17911792/* If the pipeline should emit any of the stencil configuration packets */1793bool emit_stencil_cfg[2];17941795/* Blend state */1796struct {1797/* Per-RT bit mask with blend enables */1798uint8_t enables;1799/* Per-RT prepacked blend config packets */1800uint8_t cfg[V3D_MAX_DRAW_BUFFERS][V3DV_BLEND_CFG_LENGTH];1801/* Flag indicating whether the blend factors in use require1802* color constants.1803*/1804bool needs_color_constants;1805/* Mask with enabled color channels for each RT (4 bits per RT) */1806uint32_t color_write_masks;1807} blend;18081809/* Depth bias */1810struct {1811bool enabled;1812bool is_z16;1813} depth_bias;18141815/* Packets prepacked during pipeline creation1816*/1817uint8_t cfg_bits[V3DV_CFG_BITS_LENGTH];1818uint8_t shader_state_record[V3DV_GL_SHADER_STATE_RECORD_LENGTH];1819uint8_t vcm_cache_size[V3DV_VCM_CACHE_SIZE_LENGTH];1820uint8_t vertex_attrs[V3DV_GL_SHADER_STATE_ATTRIBUTE_RECORD_LENGTH *1821MAX_VERTEX_ATTRIBS];1822uint8_t stencil_cfg[2][V3DV_STENCIL_CFG_LENGTH];1823};18241825static inline VkPipelineBindPoint1826v3dv_pipeline_get_binding_point(struct v3dv_pipeline *pipeline)1827{1828assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ||1829!(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT));1830return pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT ?1831VK_PIPELINE_BIND_POINT_COMPUTE : VK_PIPELINE_BIND_POINT_GRAPHICS;1832}18331834static inline struct v3dv_descriptor_state*1835v3dv_cmd_buffer_get_descriptor_state(struct v3dv_cmd_buffer *cmd_buffer,1836struct v3dv_pipeline *pipeline)1837{1838if (v3dv_pipeline_get_binding_point(pipeline) == VK_PIPELINE_BIND_POINT_COMPUTE)1839return &cmd_buffer->state.compute.descriptor_state;1840else1841return &cmd_buffer->state.gfx.descriptor_state;1842}18431844const nir_shader_compiler_options *v3dv_pipeline_get_nir_options(void);18451846uint32_t v3dv_physical_device_vendor_id(struct v3dv_physical_device *dev);1847uint32_t v3dv_physical_device_device_id(struct v3dv_physical_device *dev);18481849VkResult __vk_errorf(struct v3dv_instance *instance, VkResult error,1850const char *file, int line,1851const char *format, ...);18521853#define vk_error(instance, error) __vk_errorf(instance, error, __FILE__, __LINE__, NULL);1854#define vk_errorf(instance, error, format, ...) __vk_errorf(instance, error, __FILE__, __LINE__, format, ## __VA_ARGS__);18551856#ifdef DEBUG1857#define v3dv_debug_ignored_stype(sType) \1858fprintf(stderr, "%s: ignored VkStructureType %u:%s\n\n", __func__, (sType), vk_StructureType_to_str(sType))1859#else1860#define v3dv_debug_ignored_stype(sType)1861#endif18621863const uint8_t *v3dv_get_format_swizzle(struct v3dv_device *device, VkFormat f);1864uint8_t v3dv_get_tex_return_size(const struct v3dv_format *vf, bool compare_enable);1865const struct v3dv_format *1866v3dv_get_compatible_tfu_format(struct v3dv_device *device,1867uint32_t bpp, VkFormat *out_vk_format);1868bool v3dv_buffer_format_supports_features(struct v3dv_device *device,1869VkFormat vk_format,1870VkFormatFeatureFlags features);18711872struct v3dv_cl_reloc v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer,1873struct v3dv_pipeline *pipeline,1874struct v3dv_shader_variant *variant);18751876struct v3dv_cl_reloc v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer,1877struct v3dv_pipeline *pipeline,1878struct v3dv_shader_variant *variant,1879uint32_t **wg_count_offsets);18801881struct v3dv_shader_variant *1882v3dv_get_shader_variant(struct v3dv_pipeline_stage *p_stage,1883struct v3dv_pipeline_cache *cache,1884struct v3d_key *key,1885size_t key_size,1886const VkAllocationCallbacks *pAllocator,1887VkResult *out_vk_result);18881889struct v3dv_shader_variant *1890v3dv_shader_variant_create(struct v3dv_device *device,1891enum broadcom_shader_stage stage,1892struct v3d_prog_data *prog_data,1893uint32_t prog_data_size,1894uint32_t assembly_offset,1895uint64_t *qpu_insts,1896uint32_t qpu_insts_size,1897VkResult *out_vk_result);18981899void1900v3dv_shader_variant_destroy(struct v3dv_device *device,1901struct v3dv_shader_variant *variant);19021903static inline void1904v3dv_pipeline_shared_data_ref(struct v3dv_pipeline_shared_data *shared_data)1905{1906assert(shared_data && shared_data->ref_cnt >= 1);1907p_atomic_inc(&shared_data->ref_cnt);1908}19091910void1911v3dv_pipeline_shared_data_destroy(struct v3dv_device *device,1912struct v3dv_pipeline_shared_data *shared_data);19131914static inline void1915v3dv_pipeline_shared_data_unref(struct v3dv_device *device,1916struct v3dv_pipeline_shared_data *shared_data)1917{1918assert(shared_data && shared_data->ref_cnt >= 1);1919if (p_atomic_dec_zero(&shared_data->ref_cnt))1920v3dv_pipeline_shared_data_destroy(device, shared_data);1921}19221923struct v3dv_descriptor *1924v3dv_descriptor_map_get_descriptor(struct v3dv_descriptor_state *descriptor_state,1925struct v3dv_descriptor_map *map,1926struct v3dv_pipeline_layout *pipeline_layout,1927uint32_t index,1928uint32_t *dynamic_offset);19291930const struct v3dv_sampler *1931v3dv_descriptor_map_get_sampler(struct v3dv_descriptor_state *descriptor_state,1932struct v3dv_descriptor_map *map,1933struct v3dv_pipeline_layout *pipeline_layout,1934uint32_t index);19351936struct v3dv_cl_reloc1937v3dv_descriptor_map_get_sampler_state(struct v3dv_device *device,1938struct v3dv_descriptor_state *descriptor_state,1939struct v3dv_descriptor_map *map,1940struct v3dv_pipeline_layout *pipeline_layout,1941uint32_t index);19421943struct v3dv_cl_reloc1944v3dv_descriptor_map_get_texture_shader_state(struct v3dv_device *device,1945struct v3dv_descriptor_state *descriptor_state,1946struct v3dv_descriptor_map *map,1947struct v3dv_pipeline_layout *pipeline_layout,1948uint32_t index);19491950const struct v3dv_format*1951v3dv_descriptor_map_get_texture_format(struct v3dv_descriptor_state *descriptor_state,1952struct v3dv_descriptor_map *map,1953struct v3dv_pipeline_layout *pipeline_layout,1954uint32_t index,1955VkFormat *out_vk_format);19561957struct v3dv_bo*1958v3dv_descriptor_map_get_texture_bo(struct v3dv_descriptor_state *descriptor_state,1959struct v3dv_descriptor_map *map,1960struct v3dv_pipeline_layout *pipeline_layout,1961uint32_t index);19621963static inline const struct v3dv_sampler *1964v3dv_immutable_samplers(const struct v3dv_descriptor_set_layout *set,1965const struct v3dv_descriptor_set_binding_layout *binding)1966{1967assert(binding->immutable_samplers_offset);1968return (const struct v3dv_sampler *) ((const char *) set + binding->immutable_samplers_offset);1969}19701971void v3dv_pipeline_cache_init(struct v3dv_pipeline_cache *cache,1972struct v3dv_device *device,1973bool cache_enabled);19741975void v3dv_pipeline_cache_finish(struct v3dv_pipeline_cache *cache);19761977void v3dv_pipeline_cache_upload_nir(struct v3dv_pipeline *pipeline,1978struct v3dv_pipeline_cache *cache,1979nir_shader *nir,1980unsigned char sha1_key[20]);19811982nir_shader* v3dv_pipeline_cache_search_for_nir(struct v3dv_pipeline *pipeline,1983struct v3dv_pipeline_cache *cache,1984const nir_shader_compiler_options *nir_options,1985unsigned char sha1_key[20]);19861987struct v3dv_pipeline_shared_data *1988v3dv_pipeline_cache_search_for_pipeline(struct v3dv_pipeline_cache *cache,1989unsigned char sha1_key[20]);19901991void1992v3dv_pipeline_cache_upload_pipeline(struct v3dv_pipeline *pipeline,1993struct v3dv_pipeline_cache *cache);19941995struct v3dv_bo *1996v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,1997struct v3dv_pipeline *pipeline);19981999void v3dv_shader_module_internal_init(struct v3dv_device *device,2000struct vk_shader_module *module,2001nir_shader *nir);20022003#define V3DV_DEFINE_HANDLE_CASTS(__v3dv_type, __VkType) \2004\2005static inline struct __v3dv_type * \2006__v3dv_type ## _from_handle(__VkType _handle) \2007{ \2008return (struct __v3dv_type *) _handle; \2009} \2010\2011static inline __VkType \2012__v3dv_type ## _to_handle(struct __v3dv_type *_obj) \2013{ \2014return (__VkType) _obj; \2015}20162017#define V3DV_DEFINE_NONDISP_HANDLE_CASTS(__v3dv_type, __VkType) \2018\2019static inline struct __v3dv_type * \2020__v3dv_type ## _from_handle(__VkType _handle) \2021{ \2022return (struct __v3dv_type *)(uintptr_t) _handle; \2023} \2024\2025static inline __VkType \2026__v3dv_type ## _to_handle(struct __v3dv_type *_obj) \2027{ \2028return (__VkType)(uintptr_t) _obj; \2029}20302031#define V3DV_FROM_HANDLE(__v3dv_type, __name, __handle) \2032struct __v3dv_type *__name = __v3dv_type ## _from_handle(__handle)20332034V3DV_DEFINE_HANDLE_CASTS(v3dv_cmd_buffer, VkCommandBuffer)2035V3DV_DEFINE_HANDLE_CASTS(v3dv_device, VkDevice)2036V3DV_DEFINE_HANDLE_CASTS(v3dv_instance, VkInstance)2037V3DV_DEFINE_HANDLE_CASTS(v3dv_physical_device, VkPhysicalDevice)2038V3DV_DEFINE_HANDLE_CASTS(v3dv_queue, VkQueue)20392040V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_cmd_pool, VkCommandPool)2041V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer, VkBuffer)2042V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_buffer_view, VkBufferView)2043V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_device_memory, VkDeviceMemory)2044V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_pool, VkDescriptorPool)2045V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set, VkDescriptorSet)2046V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_set_layout, VkDescriptorSetLayout)2047V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_descriptor_update_template, VkDescriptorUpdateTemplate)2048V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_event, VkEvent)2049V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_fence, VkFence)2050V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_framebuffer, VkFramebuffer)2051V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image, VkImage)2052V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_image_view, VkImageView)2053V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline, VkPipeline)2054V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_cache, VkPipelineCache)2055V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_pipeline_layout, VkPipelineLayout)2056V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_query_pool, VkQueryPool)2057V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_render_pass, VkRenderPass)2058V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_sampler, VkSampler)2059V3DV_DEFINE_NONDISP_HANDLE_CASTS(v3dv_semaphore, VkSemaphore)20602061/* This is defined as a macro so that it works for both2062* VkImageSubresourceRange and VkImageSubresourceLayers2063*/2064#define v3dv_layer_count(_image, _range) \2065((_range)->layerCount == VK_REMAINING_ARRAY_LAYERS ? \2066(_image)->array_size - (_range)->baseArrayLayer : (_range)->layerCount)20672068#define v3dv_level_count(_image, _range) \2069((_range)->levelCount == VK_REMAINING_MIP_LEVELS ? \2070(_image)->levels - (_range)->baseMipLevel : (_range)->levelCount)20712072static inline int2073v3dv_ioctl(int fd, unsigned long request, void *arg)2074{2075if (using_v3d_simulator)2076return v3d_simulator_ioctl(fd, request, arg);2077else2078return drmIoctl(fd, request, arg);2079}20802081/* Flags OOM conditions in command buffer state.2082*2083* Note: notice that no-op jobs don't have a command buffer reference.2084*/2085static inline void2086v3dv_flag_oom(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job)2087{2088if (cmd_buffer) {2089cmd_buffer->state.oom = true;2090} else {2091assert(job);2092if (job->cmd_buffer)2093job->cmd_buffer->state.oom = true;2094}2095}20962097#define v3dv_return_if_oom(_cmd_buffer, _job) do { \2098const struct v3dv_cmd_buffer *__cmd_buffer = _cmd_buffer; \2099if (__cmd_buffer && __cmd_buffer->state.oom) \2100return; \2101const struct v3dv_job *__job = _job; \2102if (__job && __job->cmd_buffer && __job->cmd_buffer->state.oom) \2103return; \2104} while(0) \21052106static inline uint32_t2107u64_hash(const void *key)2108{2109return _mesa_hash_data(key, sizeof(uint64_t));2110}21112112static inline bool2113u64_compare(const void *key1, const void *key2)2114{2115return memcmp(key1, key2, sizeof(uint64_t)) == 0;2116}21172118/* Helper to call hw ver speficic functions */2119#define v3dv_X(device, thing) ({ \2120__typeof(&v3d42_##thing) v3d_X_thing; \2121switch (device->devinfo.ver) { \2122case 42: \2123v3d_X_thing = &v3d42_##thing; \2124break; \2125default: \2126unreachable("Unsupported hardware generation"); \2127} \2128v3d_X_thing; \2129})213021312132/* v3d_macros from common requires v3dX and V3DX definitions. Below we need to2133* define v3dX for each version supported, because when we compile code that2134* is not version-specific, all version-specific macros need to be already2135* defined.2136*/2137#ifdef v3dX2138# include "v3dvx_private.h"2139#else2140# define v3dX(x) v3d42_##x2141# include "v3dvx_private.h"2142# undef v3dX2143#endif21442145#endif /* V3DV_PRIVATE_H */214621472148