CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Common/GPU/Vulkan/VulkanQueueRunner.h
Views: 1401
#pragma once12#include <cstdint>3#include <mutex>4#include <condition_variable>56#include "Common/Thread/Promise.h"7#include "Common/Data/Collections/Hashmaps.h"8#include "Common/Data/Collections/FastVec.h"9#include "Common/GPU/Vulkan/VulkanContext.h"10#include "Common/GPU/Vulkan/VulkanBarrier.h"11#include "Common/GPU/Vulkan/VulkanFrameData.h"12#include "Common/GPU/Vulkan/VulkanFramebuffer.h"13#include "Common/Data/Convert/SmallDataConvert.h"14#include "Common/Data/Collections/TinySet.h"15#include "Common/GPU/DataFormat.h"1617class VKRFramebuffer;18struct VKRGraphicsPipeline;19struct VKRComputePipeline;20struct VKRImage;21struct VKRPipelineLayout;22struct FrameData;2324enum {25QUEUE_HACK_MGS2_ACID = 1,26QUEUE_HACK_SONIC = 2,27QUEUE_HACK_RENDERPASS_MERGE = 8,28};2930enum class VKRRenderCommand : uint8_t {31REMOVED,32BIND_GRAPHICS_PIPELINE, // async33STENCIL,34BLEND,35VIEWPORT,36SCISSOR,37CLEAR,38DRAW,39DRAW_INDEXED,40PUSH_CONSTANTS,41DEBUG_ANNOTATION,42NUM_RENDER_COMMANDS,43};4445enum class PipelineFlags : u8 {46NONE = 0,47USES_BLEND_CONSTANT = (1 << 1),48USES_DEPTH_STENCIL = (1 << 2), // Reads or writes the depth or stencil buffers.49USES_GEOMETRY_SHADER = (1 << 3),50USES_MULTIVIEW = (1 << 4), // Inherited from the render pass it was created with.51USES_DISCARD = (1 << 5),52USES_FLAT_SHADING = (1 << 6),53};54ENUM_CLASS_BITOPS(PipelineFlags);5556struct VkRenderData {57VKRRenderCommand cmd;58union {59struct {60VKRGraphicsPipeline *pipeline;61VKRPipelineLayout *pipelineLayout;62} graphics_pipeline;63struct {64uint32_t descSetIndex;65int numUboOffsets;66uint32_t uboOffsets[3];67VkBuffer vbuffer;68VkDeviceSize voffset;69uint32_t count;70uint32_t offset;71} draw;72struct {73uint32_t descSetIndex;74uint32_t uboOffsets[3];75uint16_t numUboOffsets;76uint16_t instances;77VkBuffer vbuffer;78VkBuffer ibuffer;79uint32_t voffset;80uint32_t ioffset;81uint32_t count;82} drawIndexed;83struct {84uint32_t clearColor;85float clearZ;86int clearStencil;87int clearMask; // VK_IMAGE_ASPECT_COLOR_BIT etc88} clear;89struct {90VkViewport vp;91} viewport;92struct {93VkRect2D scissor;94} scissor;95struct {96uint8_t stencilWriteMask;97uint8_t stencilCompareMask;98uint8_t stencilRef;99} stencil;100struct {101uint32_t color;102} blendColor;103struct {104VkShaderStageFlags stages;105uint8_t offset;106uint8_t size;107uint8_t data[40]; // Should be enough for now.108} push;109struct {110const char *annotation;111} debugAnnotation;112struct {113int setIndex;114} bindDescSet;115};116};117118enum class VKRStepType : uint8_t {119RENDER,120RENDER_SKIP,121COPY,122BLIT,123READBACK,124READBACK_IMAGE,125};126127struct TransitionRequest {128VKRFramebuffer *fb;129VkImageAspectFlags aspect; // COLOR or DEPTH130VkImageLayout targetLayout;131132bool operator == (const TransitionRequest &other) const {133return fb == other.fb && aspect == other.aspect && targetLayout == other.targetLayout;134}135};136137class VKRRenderPass;138139struct VKRStep {140VKRStep(VKRStepType _type) : stepType(_type) {}141~VKRStep() {}142143VKRStepType stepType;144FastVec<VkRenderData> commands;145TinySet<TransitionRequest, 4> preTransitions;146TinySet<VKRFramebuffer *, 8> dependencies;147const char *tag;148union {149struct {150VKRFramebuffer *framebuffer;151VKRRenderPassLoadAction colorLoad;152VKRRenderPassLoadAction depthLoad;153VKRRenderPassLoadAction stencilLoad;154VKRRenderPassStoreAction colorStore;155VKRRenderPassStoreAction depthStore;156VKRRenderPassStoreAction stencilStore;157uint32_t clearColor;158float clearDepth;159u8 clearStencil;160int numDraws;161// Downloads and textures from this pass.162int numReads;163VkImageLayout finalColorLayout;164VkImageLayout finalDepthStencilLayout;165PipelineFlags pipelineFlags; // contains the self dependency flag, in the form of USES_INPUT_ATTACHMENT166VkRect2D renderArea;167// Render pass type. Deduced after finishing recording the pass, from the used pipelines.168// NOTE: Storing the render pass here doesn't do much good, we change the compatible parameters (load/store ops) during step optimization.169RenderPassType renderPassType;170} render;171struct {172VKRFramebuffer *src;173VKRFramebuffer *dst;174VkRect2D srcRect;175VkOffset2D dstPos;176VkImageAspectFlags aspectMask;177} copy;178struct {179VKRFramebuffer *src;180VKRFramebuffer *dst;181VkRect2D srcRect;182VkRect2D dstRect;183VkImageAspectFlags aspectMask;184VkFilter filter;185} blit;186struct {187VKRFramebuffer *src;188VkRect2D srcRect;189VkImageAspectFlags aspectMask;190bool delayed;191} readback;192struct {193VkImage image;194VkRect2D srcRect;195int mipLevel;196} readback_image;197};198};199200// These are enqueued from the main thread,201// and the render thread pops them off202struct VKRRenderThreadTask {203VKRRenderThreadTask(VKRRunType _runType) : runType(_runType) {}204std::vector<VKRStep *> steps;205int frame = -1;206VKRRunType runType;207208// Avoid copying these by accident.209VKRRenderThreadTask(VKRRenderThreadTask &) = delete;210VKRRenderThreadTask &operator =(VKRRenderThreadTask &) = delete;211};212213class VulkanQueueRunner {214public:215VulkanQueueRunner(VulkanContext *vulkan) : vulkan_(vulkan), renderPasses_(16) {}216217void SetBackbuffer(VkFramebuffer fb, VkImage img) {218backbuffer_ = fb;219backbufferImage_ = img;220}221222void PreprocessSteps(std::vector<VKRStep *> &steps);223void RunSteps(std::vector<VKRStep *> &steps, int curFrame, FrameData &frameData, FrameDataShared &frameDataShared, bool keepSteps = false);224void LogSteps(const std::vector<VKRStep *> &steps, bool verbose);225226static std::string StepToString(VulkanContext *vulkan, const VKRStep &step);227228void CreateDeviceObjects();229void DestroyDeviceObjects();230231// Swapchain232void DestroyBackBuffers();233bool CreateSwapchain(VkCommandBuffer cmdInit, VulkanBarrierBatch *barriers);234235bool HasBackbuffers() const {236return !framebuffers_.empty();237}238239// Get a render pass that's compatible with all our framebuffers.240// Note that it's precached, cannot look up in the map as this might be on another thread.241VKRRenderPass *GetCompatibleRenderPass() const {242return compatibleRenderPass_;243}244245inline int RPIndex(VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth) {246return (int)depth * 3 + (int)color;247}248249// src == 0 means to copy from the sync readback buffer.250bool CopyReadbackBuffer(FrameData &frameData, VKRFramebuffer *src, int width, int height, Draw::DataFormat srcFormat, Draw::DataFormat destFormat, int pixelStride, uint8_t *pixels);251252VKRRenderPass *GetRenderPass(const RPKey &key);253254bool GetRenderPassKey(VKRRenderPass *passToFind, RPKey *outKey) const {255bool found = false;256renderPasses_.Iterate([passToFind, &found, outKey](const RPKey &rpkey, const VKRRenderPass *pass) {257if (pass == passToFind) {258found = true;259*outKey = rpkey;260}261});262return found;263}264265void EnableHacks(uint32_t hacks) {266hacksEnabled_ = hacks;267}268269private:270bool InitBackbufferFramebuffers(int width, int height);271bool InitDepthStencilBuffer(VkCommandBuffer cmd, VulkanBarrierBatch *barriers); // Used for non-buffered rendering.272273VKRRenderPass *PerformBindFramebufferAsRenderTarget(const VKRStep &pass, VkCommandBuffer cmd);274void PerformRenderPass(const VKRStep &pass, VkCommandBuffer cmd, int curFrame, QueueProfileContext &profile);275void PerformCopy(const VKRStep &pass, VkCommandBuffer cmd);276void PerformBlit(const VKRStep &pass, VkCommandBuffer cmd);277void PerformReadback(const VKRStep &pass, VkCommandBuffer cmd, FrameData &frameData);278void PerformReadbackImage(const VKRStep &pass, VkCommandBuffer cmd);279280void LogRenderPass(const VKRStep &pass, bool verbose);281void LogCopy(const VKRStep &pass);282void LogBlit(const VKRStep &pass);283void LogReadback(const VKRStep &pass);284void LogReadbackImage(const VKRStep &pass);285286void ResizeReadbackBuffer(CachedReadback *readback, VkDeviceSize requiredSize);287288static void ApplyMGSHack(std::vector<VKRStep *> &steps);289static void ApplySonicHack(std::vector<VKRStep *> &steps);290static void ApplyRenderPassMerge(std::vector<VKRStep *> &steps);291292static void SetupTransferDstWriteAfterWrite(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrierBatch *recordBarrier);293294VulkanContext *vulkan_;295296VkFramebuffer backbuffer_ = VK_NULL_HANDLE;297VkImage backbufferImage_ = VK_NULL_HANDLE;298299// The "Compatible" render pass. Should be able to get rid of this soon.300VKRRenderPass *compatibleRenderPass_ = nullptr;301302// Renderpasses, all combinations of preserving or clearing or dont-care-ing fb contents.303// Each VKRRenderPass contains all compatibility classes (which attachments they have, etc).304DenseHashMap<RPKey, VKRRenderPass *> renderPasses_;305306// Readback buffer. Currently we only support synchronous readback, so we only really need one.307// We size it generously.308CachedReadback syncReadback_{};309310// TODO: Enable based on compat.ini.311uint32_t hacksEnabled_ = 0;312313// Image barrier helper used during command buffer record (PerformRenderPass etc).314// Stored here to help reuse the allocation.315316VulkanBarrierBatch recordBarrier_;317318// Swap chain management319struct SwapchainImageData {320VkImage image;321VkImageView view;322};323std::vector<VkFramebuffer> framebuffers_;324std::vector<SwapchainImageData> swapchainImages_;325uint32_t swapchainImageCount_ = 0;326struct DepthBufferInfo {327VkFormat format = VK_FORMAT_UNDEFINED;328VkImage image = VK_NULL_HANDLE;329VmaAllocation alloc = VK_NULL_HANDLE;330VkImageView view = VK_NULL_HANDLE;331};332DepthBufferInfo depth_;333};334335const char *VKRRenderCommandToString(VKRRenderCommand cmd);336337338