Path: blob/master/Common/GPU/Vulkan/VulkanRenderManager.cpp
5669 views
#include <cstdint>12#include <map>3#include <sstream>45#include "Common/Log.h"6#include "Common/StringUtils.h"7#include "Common/TimeUtil.h"89#include "Common/GPU/Vulkan/VulkanAlloc.h"10#include "Common/GPU/Vulkan/VulkanContext.h"11#include "Common/GPU/Vulkan/VulkanRenderManager.h"1213#include "Common/LogReporting.h"14#include "Common/Thread/ThreadUtil.h"1516#if 0 // def _DEBUG17#define VLOG(...) NOTICE_LOG(Log::G3D, __VA_ARGS__)18#else19#define VLOG(...)20#endif2122#ifndef UINT64_MAX23#define UINT64_MAX 0xFFFFFFFFFFFFFFFFULL24#endif2526using namespace PPSSPP_VK;2728// renderPass is an example of the "compatibility class" or RenderPassType type.29bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount, double scheduleTime, int countToCompile) {30// Good torture test to test the shutdown-while-precompiling-shaders issue on PC where it's normally31// hard to catch because shaders compile so fast.32// sleep_ms(200);3334bool multisample = RenderPassTypeHasMultisample(rpType);35if (multisample) {36if (sampleCount_ != VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM) {37_assert_(sampleCount == sampleCount_);38} else {39sampleCount_ = sampleCount;40}41}4243// Sanity check.44// Seen in crash reports from PowerVR GE8320, presumably we failed creating some shader modules.45if (!desc->vertexShader || !desc->fragmentShader) {46ERROR_LOG(Log::G3D, "Failed creating graphics pipeline - missing vs/fs shader module pointers!");47pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);48return false;49}5051// Fill in the last part of the desc since now it's time to block.52VkShaderModule vs = desc->vertexShader->BlockUntilReady();53VkShaderModule fs = desc->fragmentShader->BlockUntilReady();54VkShaderModule gs = desc->geometryShader ? desc->geometryShader->BlockUntilReady() : VK_NULL_HANDLE;5556if (!vs || !fs || (!gs && desc->geometryShader)) {57ERROR_LOG(Log::G3D, "Failed creating graphics pipeline - missing shader modules");58pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);59return false;60}6162if (!compatibleRenderPass) {63ERROR_LOG(Log::G3D, "Failed creating graphics pipeline - compatible render pass was nullptr");64pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);65return false;66}6768uint32_t stageCount = 2;69VkPipelineShaderStageCreateInfo ss[3]{};70ss[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;71ss[0].stage = VK_SHADER_STAGE_VERTEX_BIT;72ss[0].pSpecializationInfo = nullptr;73ss[0].module = vs;74ss[0].pName = "main";75ss[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;76ss[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;77ss[1].pSpecializationInfo = nullptr;78ss[1].module = fs;79ss[1].pName = "main";80if (gs) {81stageCount++;82ss[2].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;83ss[2].stage = VK_SHADER_STAGE_GEOMETRY_BIT;84ss[2].pSpecializationInfo = nullptr;85ss[2].module = gs;86ss[2].pName = "main";87}8889VkGraphicsPipelineCreateInfo pipe{ VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO };90pipe.pStages = ss;91pipe.stageCount = stageCount;92pipe.renderPass = compatibleRenderPass;93pipe.basePipelineIndex = 0;94pipe.pColorBlendState = &desc->cbs;95pipe.pDepthStencilState = &desc->dss;96pipe.pRasterizationState = &desc->rs;9798VkPipelineMultisampleStateCreateInfo ms{ VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO };99ms.rasterizationSamples = multisample ? sampleCount : VK_SAMPLE_COUNT_1_BIT;100if (multisample && (flags_ & PipelineFlags::USES_DISCARD)) {101// Extreme quality102ms.sampleShadingEnable = true;103ms.minSampleShading = 1.0f;104}105106VkPipelineInputAssemblyStateCreateInfo inputAssembly{ VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO };107inputAssembly.topology = desc->topology;108109// We will use dynamic viewport state.110pipe.pVertexInputState = &desc->vis;111pipe.pViewportState = &desc->views;112pipe.pTessellationState = nullptr;113pipe.pDynamicState = &desc->ds;114pipe.pInputAssemblyState = &inputAssembly;115pipe.pMultisampleState = &ms;116pipe.layout = desc->pipelineLayout->pipelineLayout;117pipe.basePipelineHandle = VK_NULL_HANDLE;118pipe.basePipelineIndex = 0;119pipe.subpass = 0;120121double start = time_now_d();122VkPipeline vkpipeline;123VkResult result = vkCreateGraphicsPipelines(vulkan->GetDevice(), desc->pipelineCache, 1, &pipe, nullptr, &vkpipeline);124125double now = time_now_d();126double taken_ms_since_scheduling = (now - scheduleTime) * 1000.0;127double taken_ms = (now - start) * 1000.0;128129#ifndef _DEBUG130if (taken_ms < 0.1) {131DEBUG_LOG(Log::G3D, "Pipeline (x/%d) time on %s: %0.2f ms, %0.2f ms since scheduling (fast) rpType: %04x sampleBits: %d (%s)",132countToCompile, GetCurrentThreadName(), taken_ms, taken_ms_since_scheduling, (u32)rpType, (u32)sampleCount, tag_.c_str());133} else {134INFO_LOG(Log::G3D, "Pipeline (x/%d) time on %s: %0.2f ms, %0.2f ms since scheduling rpType: %04x sampleBits: %d (%s)",135countToCompile, GetCurrentThreadName(), taken_ms, taken_ms_since_scheduling, (u32)rpType, (u32)sampleCount, tag_.c_str());136}137#endif138139bool success = true;140if (result == VK_INCOMPLETE) {141// Bad (disallowed by spec) return value seen on Adreno in Burnout :( Try to ignore?142// Would really like to log more here, we could probably attach more info to desc.143//144// At least create a null placeholder to avoid creating over and over if something is broken.145pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);146ERROR_LOG(Log::G3D, "Failed creating graphics pipeline! VK_INCOMPLETE");147LogCreationFailure();148success = false;149} else if (result != VK_SUCCESS) {150pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);151ERROR_LOG(Log::G3D, "Failed creating graphics pipeline! result='%s'", VulkanResultToString(result));152LogCreationFailure();153success = false;154} else {155// Success!156if (!tag_.empty()) {157vulkan->SetDebugName(vkpipeline, VK_OBJECT_TYPE_PIPELINE, tag_.c_str());158}159pipeline[(size_t)rpType]->Post(vkpipeline);160}161162return success;163}164165void VKRGraphicsPipeline::DestroyVariants(VulkanContext *vulkan, bool msaaOnly) {166for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {167if (!this->pipeline[i])168continue;169if (msaaOnly && (i & (int)RenderPassType::MULTISAMPLE) == 0)170continue;171172VkPipeline pipeline = this->pipeline[i]->BlockUntilReady();173// pipeline can be nullptr here, if it failed to compile before.174if (pipeline) {175vulkan->Delete().QueueDeletePipeline(pipeline);176}177this->pipeline[i] = nullptr;178}179sampleCount_ = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM;180}181182void VKRGraphicsPipeline::DestroyVariantsInstant(VkDevice device) {183for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {184if (pipeline[i]) {185vkDestroyPipeline(device, pipeline[i]->BlockUntilReady(), nullptr);186delete pipeline[i];187pipeline[i] = nullptr;188}189}190}191192VKRGraphicsPipeline::~VKRGraphicsPipeline() {193// This is called from the callbacked queued in QueueForDeletion.194// When we reach here, we should already be empty, so let's assert on that.195for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {196_assert_(!pipeline[i]);197}198if (desc)199desc->Release();200}201202void VKRGraphicsPipeline::BlockUntilCompiled() {203for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {204if (pipeline[i]) {205pipeline[i]->BlockUntilReady();206}207}208}209210void VKRGraphicsPipeline::QueueForDeletion(VulkanContext *vulkan) {211// Can't destroy variants here, the pipeline still lives for a while.212vulkan->Delete().QueueCallback([](VulkanContext *vulkan, void *p) {213VKRGraphicsPipeline *pipeline = (VKRGraphicsPipeline *)p;214pipeline->DestroyVariantsInstant(vulkan->GetDevice());215delete pipeline;216}, this);217}218219u32 VKRGraphicsPipeline::GetVariantsBitmask() const {220u32 bitmask = 0;221for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {222if (pipeline[i]) {223bitmask |= 1 << i;224}225}226return bitmask;227}228229void VKRGraphicsPipeline::LogCreationFailure() const {230ERROR_LOG(Log::G3D, "vs: %s\n[END VS]", desc->vertexShaderSource.c_str());231ERROR_LOG(Log::G3D, "fs: %s\n[END FS]", desc->fragmentShaderSource.c_str());232if (desc->geometryShader) {233ERROR_LOG(Log::G3D, "gs: %s\n[END GS]", desc->geometryShaderSource.c_str());234}235// TODO: Maybe log various other state?236ERROR_LOG(Log::G3D, "======== END OF PIPELINE ==========");237}238239struct SinglePipelineTask {240VKRGraphicsPipeline *pipeline;241VkRenderPass compatibleRenderPass;242RenderPassType rpType;243VkSampleCountFlagBits sampleCount;244double scheduleTime;245int countToCompile;246};247248class CreateMultiPipelinesTask : public Task {249public:250CreateMultiPipelinesTask(VulkanContext *vulkan, std::vector<SinglePipelineTask> tasks) : vulkan_(vulkan), tasks_(std::move(tasks)) {251tasksInFlight_.fetch_add(1);252}253~CreateMultiPipelinesTask() = default;254255TaskType Type() const override {256return TaskType::CPU_COMPUTE;257}258259TaskPriority Priority() const override {260return TaskPriority::HIGH;261}262263void Run() override {264for (auto &task : tasks_) {265task.pipeline->Create(vulkan_, task.compatibleRenderPass, task.rpType, task.sampleCount, task.scheduleTime, task.countToCompile);266}267tasksInFlight_.fetch_sub(1);268}269270VulkanContext *vulkan_;271std::vector<SinglePipelineTask> tasks_;272273// Use during shutdown to make sure there aren't any leftover tasks sitting queued.274// Could probably be done more elegantly. Like waiting for all tasks of a type, or saving pointers to them, or something...275// Returns the maximum value of tasks in flight seen during the wait.276static int WaitForAll();277static std::atomic<int> tasksInFlight_;278};279280int CreateMultiPipelinesTask::WaitForAll() {281int inFlight = 0;282int maxInFlight = 0;283while ((inFlight = tasksInFlight_.load()) > 0) {284if (inFlight > maxInFlight) {285maxInFlight = inFlight;286}287sleep_ms(2, "create-multi-pipelines-wait");288}289return maxInFlight;290}291292std::atomic<int> CreateMultiPipelinesTask::tasksInFlight_;293294VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan, bool useThread, HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory)295: vulkan_(vulkan), queueRunner_(vulkan),296initTimeMs_("initTimeMs"),297totalGPUTimeMs_("totalGPUTimeMs"),298renderCPUTimeMs_("renderCPUTimeMs"),299descUpdateTimeMs_("descUpdateCPUTimeMs"),300useRenderThread_(useThread),301frameTimeHistory_(frameTimeHistory)302{303inflightFramesAtStart_ = vulkan_->GetInflightFrames();304305// For present timing experiments. Disabled for now.306measurePresentTime_ = false;307308frameDataShared_.Init(vulkan, useThread, measurePresentTime_);309310for (int i = 0; i < inflightFramesAtStart_; i++) {311frameData_[i].Init(vulkan, i);312}313314queueRunner_.CreateDeviceObjects();315}316317bool VulkanRenderManager::CreateBackbuffers() {318if (!vulkan_->IsSwapchainInited()) {319ERROR_LOG(Log::G3D, "No swapchain - can't create backbuffers");320return false;321}322323VkCommandBuffer cmdInit = GetInitCmd();324325if (vulkan_->HasRealSwapchain()) {326if (!CreateSwapchainViewsAndDepth(cmdInit, &postInitBarrier_, frameDataShared_)) {327return false;328}329}330331curWidthRaw_ = -1;332curHeightRaw_ = -1;333334if (newInflightFrames_ != -1) {335INFO_LOG(Log::G3D, "Updating inflight frames to %d", newInflightFrames_);336vulkan_->UpdateInflightFrames(newInflightFrames_);337newInflightFrames_ = -1;338}339340outOfDateFrames_ = 0;341342for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {343auto &frameData = frameData_[i];344frameData.readyForFence = true; // Just in case.345}346347// Start the thread(s).348StartThreads();349return true;350}351352bool VulkanRenderManager::CreateSwapchainViewsAndDepth(VkCommandBuffer cmdInit, VulkanBarrierBatch *barriers, FrameDataShared &frameDataShared) {353VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &frameDataShared.swapchainImageCount_, nullptr);354_dbg_assert_(res == VK_SUCCESS);355356VkImage *swapchainImages = new VkImage[frameDataShared.swapchainImageCount_];357res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &frameDataShared.swapchainImageCount_, swapchainImages);358if (res != VK_SUCCESS) {359ERROR_LOG(Log::G3D, "vkGetSwapchainImagesKHR failed");360delete[] swapchainImages;361return false;362}363364static const VkSemaphoreCreateInfo semaphoreCreateInfo = { VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO };365for (uint32_t i = 0; i < frameDataShared.swapchainImageCount_; i++) {366SwapchainImageData sc_buffer{};367sc_buffer.image = swapchainImages[i];368res = vkCreateSemaphore(vulkan_->GetDevice(), &semaphoreCreateInfo, nullptr, &sc_buffer.renderingCompleteSemaphore);369_dbg_assert_(res == VK_SUCCESS);370371VkImageViewCreateInfo color_image_view = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };372color_image_view.format = vulkan_->GetSwapchainFormat();373color_image_view.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;374color_image_view.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;375color_image_view.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;376color_image_view.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;377color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;378color_image_view.subresourceRange.baseMipLevel = 0;379color_image_view.subresourceRange.levelCount = 1;380color_image_view.subresourceRange.baseArrayLayer = 0;381color_image_view.subresourceRange.layerCount = 1; // TODO: Investigate hw-assisted stereo.382color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D;383color_image_view.flags = 0;384color_image_view.image = sc_buffer.image;385386// We leave the images as UNDEFINED, there's no need to pre-transition them as387// the backbuffer renderpass starts out with them being auto-transitioned from UNDEFINED anyway.388// Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417.389390res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view);391vulkan_->SetDebugName(sc_buffer.view, VK_OBJECT_TYPE_IMAGE_VIEW, "swapchain_view");392frameDataShared.swapchainImages_.push_back(sc_buffer);393_dbg_assert_(res == VK_SUCCESS);394}395delete[] swapchainImages;396397// Must be before InitBackbufferRenderPass.398if (queueRunner_.InitDepthStencilBuffer(cmdInit, barriers)) {399queueRunner_.InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight(), frameDataShared);400}401return true;402}403404void VulkanRenderManager::StartThreads() {405{406std::unique_lock<std::mutex> lock(compileQueueMutex_);407_assert_(compileQueue_.empty());408}409410runCompileThread_ = true; // For controlling the compiler thread's exit411412if (useRenderThread_) {413INFO_LOG(Log::G3D, "Starting Vulkan submission thread");414renderThread_ = std::thread(&VulkanRenderManager::RenderThreadFunc, this);415}416INFO_LOG(Log::G3D, "Starting Vulkan compiler thread");417compileThread_ = std::thread(&VulkanRenderManager::CompileThreadFunc, this);418419if (measurePresentTime_ && vulkan_->Extensions().KHR_present_wait && vulkan_->GetPresentMode() == VK_PRESENT_MODE_FIFO_KHR) {420INFO_LOG(Log::G3D, "Starting Vulkan present wait thread");421presentWaitThread_ = std::thread(&VulkanRenderManager::PresentWaitThreadFunc, this);422}423}424425// MUST be called from emuthread!426void VulkanRenderManager::StopThreads() {427INFO_LOG(Log::G3D, "VulkanRenderManager::StopThreads");428// Make sure we don't have an open non-backbuffer render pass429if (curRenderStep_ && curRenderStep_->render.framebuffer != nullptr) {430EndCurRenderStep();431}432// Not sure this is a sensible check - should be ok even if not.433// _dbg_assert_(steps_.empty());434435if (useRenderThread_) {436_dbg_assert_(renderThread_.joinable());437// Tell the render thread to quit when it's done.438VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::EXIT);439task->frame = vulkan_->GetCurFrame();440{441std::unique_lock<std::mutex> lock(pushMutex_);442renderThreadQueue_.push(task);443}444pushCondVar_.notify_one();445// Once the render thread encounters the above exit task, it'll exit.446renderThread_.join();447INFO_LOG(Log::G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame());448}449450for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {451auto &frameData = frameData_[i];452// Zero the queries so we don't try to pull them later.453frameData.profile.timestampDescriptions.clear();454}455456{457std::unique_lock<std::mutex> lock(compileQueueMutex_);458runCompileThread_ = false; // Compiler and present thread both look at this bool.459_assert_(compileThread_.joinable());460compileCond_.notify_one();461}462compileThread_.join();463464if (presentWaitThread_.joinable()) {465presentWaitThread_.join();466}467468INFO_LOG(Log::G3D, "Vulkan compiler thread joined. Now wait for any straggling compile tasks. runCompileThread_ = %d", (int)runCompileThread_);469CreateMultiPipelinesTask::WaitForAll();470471{472std::unique_lock<std::mutex> lock(compileQueueMutex_);473_assert_(compileQueue_.empty());474}475}476477void VulkanRenderManager::DestroyBackbuffers() {478StopThreads();479vulkan_->WaitUntilQueueIdle();480481for (auto &image : frameDataShared_.swapchainImages_) {482vulkan_->Delete().QueueDeleteImageView(image.view);483vkDestroySemaphore(vulkan_->GetDevice(), image.renderingCompleteSemaphore, nullptr);484}485frameDataShared_.swapchainImages_.clear();486frameDataShared_.swapchainImageCount_ = 0;487488queueRunner_.DestroyBackBuffers();489}490491// Hm, I'm finding the occasional report of these asserts.492void VulkanRenderManager::CheckNothingPending() {493_assert_(pipelinesToCheck_.empty());494{495std::unique_lock<std::mutex> lock(compileQueueMutex_);496_assert_(compileQueue_.empty());497}498}499500VulkanRenderManager::~VulkanRenderManager() {501INFO_LOG(Log::G3D, "VulkanRenderManager destructor");502503{504std::unique_lock<std::mutex> lock(compileQueueMutex_);505_assert_(compileQueue_.empty());506}507508if (useRenderThread_) {509_dbg_assert_(!renderThread_.joinable());510}511512_dbg_assert_(!runCompileThread_); // StopThread should already have been called from DestroyBackbuffers.513514vulkan_->WaitUntilQueueIdle();515516_dbg_assert_(pipelineLayouts_.empty());517518VkDevice device = vulkan_->GetDevice();519frameDataShared_.Destroy(vulkan_);520for (int i = 0; i < inflightFramesAtStart_; i++) {521frameData_[i].Destroy(vulkan_);522}523queueRunner_.DestroyDeviceObjects();524}525526void VulkanRenderManager::CompileThreadFunc() {527SetCurrentThreadName("ShaderCompile");528while (true) {529bool exitAfterCompile = false;530std::vector<CompileQueueEntry> toCompile;531{532std::unique_lock<std::mutex> lock(compileQueueMutex_);533while (compileQueue_.empty() && runCompileThread_) {534compileCond_.wait(lock);535}536toCompile = std::move(compileQueue_);537compileQueue_.clear();538if (!runCompileThread_) {539exitAfterCompile = true;540}541}542543int countToCompile = (int)toCompile.size();544545// Here we sort the pending pipelines by vertex and fragment shaders,546std::map<std::pair<Promise<VkShaderModule> *, Promise<VkShaderModule> *>, std::vector<SinglePipelineTask>> map;547548double scheduleTime = time_now_d();549550// Here we sort pending graphics pipelines by vertex and fragment shaders, and split up further.551// Those with the same pairs of shaders should be on the same thread, at least on NVIDIA.552// I don't think PowerVR cares though, it doesn't seem to reuse information between the compiles,553// so we might want a different splitting algorithm there.554for (auto &entry : toCompile) {555switch (entry.type) {556case CompileQueueEntry::Type::GRAPHICS:557{558map[std::make_pair(entry.graphics->desc->vertexShader, entry.graphics->desc->fragmentShader)].push_back(559SinglePipelineTask{560entry.graphics,561entry.compatibleRenderPass,562entry.renderPassType,563entry.sampleCount,564scheduleTime, // these two are for logging purposes.565countToCompile,566}567);568break;569}570}571}572573for (const auto &iter : map) {574auto &shaders = iter.first;575auto &entries = iter.second;576577// NOTICE_LOG(Log::G3D, "For this shader pair, we have %d pipelines to create", (int)entries.size());578579Task *task = new CreateMultiPipelinesTask(vulkan_, entries);580g_threadManager.EnqueueTask(task);581}582583if (exitAfterCompile) {584break;585}586587// Hold off just a bit before we check again, to allow bunches of pipelines to collect.588sleep_ms(1, "pipeline-collect");589}590591std::unique_lock<std::mutex> lock(compileQueueMutex_);592_assert_(compileQueue_.empty());593}594595void VulkanRenderManager::RenderThreadFunc() {596SetCurrentThreadName("VulkanRenderMan");597while (true) {598_dbg_assert_(useRenderThread_);599600// Pop a task of the queue and execute it.601VKRRenderThreadTask *task = nullptr;602{603std::unique_lock<std::mutex> lock(pushMutex_);604while (renderThreadQueue_.empty()) {605pushCondVar_.wait(lock);606}607task = renderThreadQueue_.front();608renderThreadQueue_.pop();609}610611// Oh, we got a task! We can now have pushMutex_ unlocked, allowing the host to612// push more work when it feels like it, and just start working.613if (task->runType == VKRRunType::EXIT) {614// Oh, host wanted out. Let's leave.615delete task;616// In this case, there should be no more tasks.617break;618}619620Run(*task);621delete task;622}623624// Wait for the device to be done with everything, before tearing stuff down.625// TODO: Do we really need this? It's probably a good idea, though.626vkDeviceWaitIdle(vulkan_->GetDevice());627VLOG("PULL: Quitting");628}629630void VulkanRenderManager::PresentWaitThreadFunc() {631SetCurrentThreadName("PresentWait");632633#if !PPSSPP_PLATFORM(IOS_APP_STORE)634_dbg_assert_(vkWaitForPresentKHR != nullptr);635636uint64_t waitedId = frameIdGen_;637while (runCompileThread_) {638const uint64_t timeout = 1000000000ULL; // 1 sec639if (VK_SUCCESS == vkWaitForPresentKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), waitedId, timeout)) {640frameTimeHistory_[waitedId].actualPresent = time_now_d();641frameTimeHistory_[waitedId].waitCount++;642waitedId++;643} else {644// We caught up somehow, which is a bad sign (we should have blocked, right?). Maybe we should break out of the loop?645sleep_ms(1, "present-wait-problem");646frameTimeHistory_[waitedId].waitCount++;647}648_dbg_assert_(waitedId <= frameIdGen_);649}650#endif651652INFO_LOG(Log::G3D, "Leaving PresentWaitThreadFunc()");653}654655void VulkanRenderManager::PollPresentTiming() {656// For VK_GOOGLE_display_timing, we need to poll.657658// Poll for information about completed frames.659// NOTE: We seem to get the information pretty late! Like after 6 frames, which is quite weird.660// Tested on POCO F4.661// TODO: Getting validation errors that this should be called from the thread doing the presenting.662// Probably a fair point. For now, we turn it off.663if (measurePresentTime_ && vulkan_->Extensions().GOOGLE_display_timing) {664uint32_t count = 0;665vkGetPastPresentationTimingGOOGLE(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &count, nullptr);666if (count > 0) {667VkPastPresentationTimingGOOGLE *timings = new VkPastPresentationTimingGOOGLE[count];668vkGetPastPresentationTimingGOOGLE(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &count, timings);669for (uint32_t i = 0; i < count; i++) {670uint64_t presentId = timings[i].presentID;671frameTimeHistory_[presentId].actualPresent = from_time_raw(timings[i].actualPresentTime);672frameTimeHistory_[presentId].desiredPresentTime = from_time_raw(timings[i].desiredPresentTime);673frameTimeHistory_[presentId].earliestPresentTime = from_time_raw(timings[i].earliestPresentTime);674double presentMargin = from_time_raw_relative(timings[i].presentMargin);675frameTimeHistory_[presentId].presentMargin = presentMargin;676}677delete[] timings;678}679}680}681682void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfiler) {683double frameBeginTime = time_now_d()684VLOG("BeginFrame");685VkDevice device = vulkan_->GetDevice();686687int curFrame = vulkan_->GetCurFrame();688FrameData &frameData = frameData_[curFrame];689VLOG("PUSH: Fencing %d", curFrame);690691// Makes sure the submission from the previous time around has happened. Otherwise692// we are not allowed to wait from another thread here..693if (useRenderThread_) {694std::unique_lock<std::mutex> lock(frameData.fenceMutex);695while (!frameData.readyForFence) {696frameData.fenceCondVar.wait(lock);697}698frameData.readyForFence = false;699}700701// This must be the very first Vulkan call we do in a new frame.702// Makes sure the very last command buffer from the frame before the previous has been fully executed.703if (vkWaitForFences(device, 1, &frameData.fence, true, UINT64_MAX) == VK_ERROR_DEVICE_LOST) {704_assert_msg_(false, "Device lost in vkWaitForFences");705}706vkResetFences(device, 1, &frameData.fence);707708uint64_t frameId = frameIdGen_++;709710PollPresentTiming();711712ResetDescriptorLists(curFrame);713714int validBits = vulkan_->GetQueueFamilyProperties(vulkan_->GetGraphicsQueueFamilyIndex()).timestampValidBits;715716FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameId);717frameTimeData.frameId = frameId;718frameTimeData.frameBegin = frameBeginTime;719frameTimeData.afterFenceWait = time_now_d();720721// Can't set this until after the fence.722frameData.profile.enabled = enableProfiling;723frameData.profile.timestampsEnabled = enableProfiling && validBits > 0;724frameData.frameId = frameId;725726uint64_t queryResults[MAX_TIMESTAMP_QUERIES];727728if (enableProfiling) {729// Pull the profiling results from last time and produce a summary!730if (!frameData.profile.timestampDescriptions.empty() && frameData.profile.timestampsEnabled) {731int numQueries = (int)frameData.profile.timestampDescriptions.size();732VkResult res = vkGetQueryPoolResults(733vulkan_->GetDevice(),734frameData.profile.queryPool, 0, numQueries, sizeof(uint64_t) * numQueries, &queryResults[0], sizeof(uint64_t),735VK_QUERY_RESULT_64_BIT);736if (res == VK_SUCCESS) {737double timestampConversionFactor = (double)vulkan_->GetPhysicalDeviceProperties().properties.limits.timestampPeriod * (1.0 / 1000000.0);738uint64_t timestampDiffMask = validBits == 64 ? 0xFFFFFFFFFFFFFFFFULL : ((1ULL << validBits) - 1);739std::stringstream str;740741char line[256];742totalGPUTimeMs_.Update(((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));743totalGPUTimeMs_.Format(line, sizeof(line));744str << line;745renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);746renderCPUTimeMs_.Format(line, sizeof(line));747str << line;748descUpdateTimeMs_.Update(frameData.profile.descWriteTime * 1000.0);749descUpdateTimeMs_.Format(line, sizeof(line));750str << line;751snprintf(line, sizeof(line), "Descriptors written: %d (dedup: %d)\n", frameData.profile.descriptorsWritten, frameData.profile.descriptorsDeduped);752str << line;753snprintf(line, sizeof(line), "Resource deletions: %d\n", vulkan_->GetLastDeleteCount());754str << line;755for (int i = 0; i < numQueries - 1; i++) {756uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask;757double milliseconds = (double)diff * timestampConversionFactor;758759// Can't use SimpleStat for these very easily since these are dynamic per frame.760// Only the first one is static, the initCmd.761// Could try some hashtable tracking for the rest, later.762if (i == 0) {763initTimeMs_.Update(milliseconds);764initTimeMs_.Format(line, sizeof(line));765} else {766snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);767}768str << line;769}770frameData.profile.profileSummary = str.str();771} else {772frameData.profile.profileSummary = "(error getting GPU profile - not ready?)";773}774} else {775std::stringstream str;776char line[256];777renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);778renderCPUTimeMs_.Format(line, sizeof(line));779str << line;780descUpdateTimeMs_.Update(frameData.profile.descWriteTime * 1000.0);781descUpdateTimeMs_.Format(line, sizeof(line));782str << line;783snprintf(line, sizeof(line), "Descriptors written: %d\n", frameData.profile.descriptorsWritten);784str << line;785frameData.profile.profileSummary = str.str();786}787788#ifdef _DEBUG789std::string cmdString;790for (int i = 0; i < ARRAY_SIZE(frameData.profile.commandCounts); i++) {791if (frameData.profile.commandCounts[i] > 0) {792cmdString += StringFromFormat("%s: %d\n", VKRRenderCommandToString((VKRRenderCommand)i), frameData.profile.commandCounts[i]);793}794}795memset(frameData.profile.commandCounts, 0, sizeof(frameData.profile.commandCounts));796frameData.profile.profileSummary += cmdString;797#endif798}799800frameData.profile.descriptorsWritten = 0;801frameData.profile.descriptorsDeduped = 0;802803// Must be after the fence - this performs deletes.804VLOG("PUSH: BeginFrame %d", curFrame);805806insideFrame_ = true;807vulkan_->BeginFrame(enableLogProfiler ? GetInitCmd() : VK_NULL_HANDLE);808809frameData.profile.timestampDescriptions.clear();810if (frameData.profile.timestampsEnabled) {811// For various reasons, we need to always use an init cmd buffer in this case to perform the vkCmdResetQueryPool,812// unless we want to limit ourselves to only measure the main cmd buffer.813// Later versions of Vulkan have support for clearing queries on the CPU timeline, but we don't want to rely on that.814// Reserve the first two queries for initCmd.815frameData.profile.timestampDescriptions.emplace_back("initCmd Begin");816frameData.profile.timestampDescriptions.emplace_back("initCmd");817VkCommandBuffer initCmd = GetInitCmd();818}819}820821VkCommandBuffer VulkanRenderManager::GetInitCmd() {822int curFrame = vulkan_->GetCurFrame();823return frameData_[curFrame].GetInitCmd(vulkan_);824}825826void VulkanRenderManager::ReportBadStateForDraw() {827const char *cause1 = "";828char cause2[256];829cause2[0] = '\0';830if (!curRenderStep_) {831cause1 = "No current render step";832}833if (curRenderStep_ && curRenderStep_->stepType != VKRStepType::RENDER) {834cause1 = "Not a render step: ";835std::string str = VulkanQueueRunner::StepToString(vulkan_, *curRenderStep_);836truncate_cpy(cause2, str);837}838ERROR_LOG_REPORT_ONCE(baddraw, Log::G3D, "Can't draw: %s%s. Step count: %d", cause1, cause2, (int)steps_.size());839}840841int VulkanRenderManager::WaitForPipelines() {842return CreateMultiPipelinesTask::WaitForAll();843}844845VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipelineDesc *desc, PipelineFlags pipelineFlags, uint32_t variantBitmask, VkSampleCountFlagBits sampleCount, bool cacheLoad, const char *tag) {846if (!desc->vertexShader || !desc->fragmentShader) {847ERROR_LOG(Log::G3D, "Can't create graphics pipeline with missing vs/ps: %p %p", desc->vertexShader, desc->fragmentShader);848return nullptr;849}850851VKRGraphicsPipeline *pipeline = new VKRGraphicsPipeline(pipelineFlags, tag);852pipeline->desc = desc;853pipeline->desc->AddRef();854if (curRenderStep_ && !cacheLoad) {855// The common case during gameplay.856pipelinesToCheck_.push_back(pipeline);857} else {858if (!variantBitmask) {859WARN_LOG(Log::G3D, "WARNING: Will not compile any variants of pipeline, not in renderpass and empty variantBitmask");860}861// Presumably we're in initialization, loading the shader cache.862// Look at variantBitmask to see what variants we should queue up.863RPKey key{864VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR,865VKRRenderPassStoreAction::STORE, VKRRenderPassStoreAction::DONT_CARE, VKRRenderPassStoreAction::DONT_CARE,866};867VKRRenderPass *compatibleRenderPass = queueRunner_.GetRenderPass(key);868std::unique_lock<std::mutex> lock(compileQueueMutex_);869_dbg_assert_(runCompileThread_);870bool needsCompile = false;871for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {872if (!(variantBitmask & (1 << i)))873continue;874RenderPassType rpType = (RenderPassType)i;875876// Sanity check - don't compile incompatible types (could be caused by corrupt caches, changes in data structures, etc).877if ((pipelineFlags & PipelineFlags::USES_DEPTH_STENCIL) && !RenderPassTypeHasDepth(rpType)) {878WARN_LOG(Log::G3D, "Not compiling pipeline that requires depth, for non depth renderpass type");879continue;880}881// Shouldn't hit this, these should have been filtered elsewhere. However, still a good check to do.882if (sampleCount == VK_SAMPLE_COUNT_1_BIT && RenderPassTypeHasMultisample(rpType)) {883WARN_LOG(Log::G3D, "Not compiling single sample pipeline for a multisampled render pass type");884continue;885}886887if (rpType == RenderPassType::BACKBUFFER) {888sampleCount = VK_SAMPLE_COUNT_1_BIT;889}890891// Sanity check892if (runCompileThread_) {893pipeline->pipeline[i] = Promise<VkPipeline>::CreateEmpty();894compileQueue_.emplace_back(pipeline, compatibleRenderPass->Get(vulkan_, rpType, sampleCount), rpType, sampleCount);895}896needsCompile = true;897}898if (needsCompile)899compileCond_.notify_one();900}901return pipeline;902}903904void VulkanRenderManager::EndCurRenderStep() {905if (!curRenderStep_)906return;907908_dbg_assert_(runCompileThread_);909910RPKey key{911curRenderStep_->render.colorLoad, curRenderStep_->render.depthLoad, curRenderStep_->render.stencilLoad,912curRenderStep_->render.colorStore, curRenderStep_->render.depthStore, curRenderStep_->render.stencilStore,913};914// Save the accumulated pipeline flags so we can use that to configure the render pass.915// We'll often be able to avoid loading/saving the depth/stencil buffer.916curRenderStep_->render.pipelineFlags = curPipelineFlags_;917bool depthStencil = (curPipelineFlags_ & PipelineFlags::USES_DEPTH_STENCIL) != 0;918RenderPassType rpType = depthStencil ? RenderPassType::HAS_DEPTH : RenderPassType::DEFAULT;919920if (curRenderStep_->render.framebuffer && (rpType & RenderPassType::HAS_DEPTH) && !curRenderStep_->render.framebuffer->HasDepth()) {921WARN_LOG(Log::G3D, "Trying to render with a depth-writing pipeline to a framebuffer without depth: %s", curRenderStep_->render.framebuffer->Tag());922rpType = RenderPassType::DEFAULT;923}924925if (!curRenderStep_->render.framebuffer) {926rpType = RenderPassType::BACKBUFFER;927} else {928// Framebuffers can be stereo, and if so, will control the render pass type to match.929// Pipelines can be mono and render fine to stereo etc, so not checking them here.930// Note that we don't support rendering to just one layer of a multilayer framebuffer!931if (curRenderStep_->render.framebuffer->numLayers > 1) {932rpType = (RenderPassType)(rpType | RenderPassType::MULTIVIEW);933}934935if (curRenderStep_->render.framebuffer->sampleCount != VK_SAMPLE_COUNT_1_BIT) {936rpType = (RenderPassType)(rpType | RenderPassType::MULTISAMPLE);937}938}939940VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key);941curRenderStep_->render.renderPassType = rpType;942943VkSampleCountFlagBits sampleCount = curRenderStep_->render.framebuffer ? curRenderStep_->render.framebuffer->sampleCount : VK_SAMPLE_COUNT_1_BIT;944945bool needsCompile = false;946for (VKRGraphicsPipeline *pipeline : pipelinesToCheck_) {947if (!pipeline) {948// Not good, but let's try not to crash.949continue;950}951std::unique_lock<std::mutex> lock(pipeline->mutex_);952if (!pipeline->pipeline[(size_t)rpType]) {953pipeline->pipeline[(size_t)rpType] = Promise<VkPipeline>::CreateEmpty();954lock.unlock();955956_assert_(renderPass);957compileQueueMutex_.lock();958compileQueue_.emplace_back(pipeline, renderPass->Get(vulkan_, rpType, sampleCount), rpType, sampleCount);959compileQueueMutex_.unlock();960needsCompile = true;961}962}963964compileQueueMutex_.lock();965if (needsCompile)966compileCond_.notify_one();967compileQueueMutex_.unlock();968pipelinesToCheck_.clear();969970// We don't do this optimization for very small targets, probably not worth it.971if (!curRenderArea_.Empty() && (curWidth_ > 32 && curHeight_ > 32)) {972curRenderStep_->render.renderArea = curRenderArea_.ToVkRect2D();973} else {974curRenderStep_->render.renderArea.offset = {};975curRenderStep_->render.renderArea.extent = { (uint32_t)curWidth_, (uint32_t)curHeight_ };976}977curRenderArea_.Reset();978979// We no longer have a current render step.980curRenderStep_ = nullptr;981curPipelineFlags_ = (PipelineFlags)0;982}983984void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth, VKRRenderPassLoadAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {985_dbg_assert_(insideFrame_);986987// Eliminate dupes (bind of the framebuffer we already are rendering to), instantly convert to a clear if possible.988if (!steps_.empty() && steps_.back()->stepType == VKRStepType::RENDER && steps_.back()->render.framebuffer == fb) {989u32 clearMask = 0;990if (color == VKRRenderPassLoadAction::CLEAR) {991clearMask |= VK_IMAGE_ASPECT_COLOR_BIT;992}993if (depth == VKRRenderPassLoadAction::CLEAR) {994clearMask |= VK_IMAGE_ASPECT_DEPTH_BIT;995curPipelineFlags_ |= PipelineFlags::USES_DEPTH_STENCIL;996}997if (stencil == VKRRenderPassLoadAction::CLEAR) {998clearMask |= VK_IMAGE_ASPECT_STENCIL_BIT;999curPipelineFlags_ |= PipelineFlags::USES_DEPTH_STENCIL;1000}10011002// If we need a clear and the previous step has commands already, it's best to just add a clear and keep going.1003// If there's no clear needed, let's also do that.1004//1005// However, if we do need a clear and there are no commands in the previous pass,1006// we want the queuerunner to have the opportunity to merge, so we'll go ahead and make a new renderpass.1007if (clearMask == 0 || !steps_.back()->commands.empty()) {1008curRenderStep_ = steps_.back();1009curStepHasViewport_ = false;1010curStepHasScissor_ = false;1011for (const auto &c : steps_.back()->commands) {1012if (c.cmd == VKRRenderCommand::VIEWPORT) {1013curStepHasViewport_ = true;1014} else if (c.cmd == VKRRenderCommand::SCISSOR) {1015curStepHasScissor_ = true;1016}1017}1018if (clearMask != 0) {1019VkRenderData data{ VKRRenderCommand::CLEAR };1020data.clear.clearColor = clearColor;1021data.clear.clearZ = clearDepth;1022data.clear.clearStencil = clearStencil;1023data.clear.clearMask = clearMask;1024curRenderStep_->commands.push_back(data);1025curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);1026}1027return;1028}1029}10301031#ifdef _DEBUG1032SanityCheckPassesOnAdd();1033#endif10341035// More redundant bind elimination.1036if (curRenderStep_) {1037if (curRenderStep_->commands.empty()) {1038if (curRenderStep_->render.colorLoad != VKRRenderPassLoadAction::CLEAR && curRenderStep_->render.depthLoad != VKRRenderPassLoadAction::CLEAR && curRenderStep_->render.stencilLoad != VKRRenderPassLoadAction::CLEAR) {1039// Can trivially kill the last empty render step.1040_dbg_assert_(steps_.back() == curRenderStep_);1041delete steps_.back();1042steps_.pop_back();1043curRenderStep_ = nullptr;1044}1045VLOG("Empty render step. Usually happens after uploading pixels..");1046}10471048EndCurRenderStep();1049}10501051// Sanity check that we don't have binds to the backbuffer before binds to other buffers. It must always be bound last.1052if (steps_.size() >= 1 && steps_.back()->stepType == VKRStepType::RENDER && steps_.back()->render.framebuffer == nullptr && fb != nullptr) {1053_dbg_assert_(false);1054}10551056// Older Mali drivers have issues with depth and stencil don't match load/clear/etc.1057// TODO: Determine which versions and do this only where necessary.1058u32 lateClearMask = 0;1059if (depth != stencil && vulkan_->GetPhysicalDeviceProperties().properties.vendorID == VULKAN_VENDOR_ARM) {1060if (stencil == VKRRenderPassLoadAction::DONT_CARE) {1061stencil = depth;1062} else if (depth == VKRRenderPassLoadAction::DONT_CARE) {1063depth = stencil;1064} else if (stencil == VKRRenderPassLoadAction::CLEAR) {1065depth = stencil;1066lateClearMask |= VK_IMAGE_ASPECT_STENCIL_BIT;1067} else if (depth == VKRRenderPassLoadAction::CLEAR) {1068stencil = depth;1069lateClearMask |= VK_IMAGE_ASPECT_DEPTH_BIT;1070}1071}10721073VKRStep *step = new VKRStep{ VKRStepType::RENDER };1074step->render.framebuffer = fb;1075step->render.colorLoad = color;1076step->render.depthLoad = depth;1077step->render.stencilLoad = stencil;1078step->render.colorStore = VKRRenderPassStoreAction::STORE;1079step->render.depthStore = VKRRenderPassStoreAction::STORE;1080step->render.stencilStore = VKRRenderPassStoreAction::STORE;1081step->render.clearColor = clearColor;1082step->render.clearDepth = clearDepth;1083step->render.clearStencil = clearStencil;1084step->render.numDraws = 0;1085step->render.numReads = 0;1086step->render.finalColorLayout = !fb ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED;1087step->render.finalDepthStencilLayout = !fb ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED;1088// pipelineFlags, renderArea and renderPassType get filled in when we finalize the step. Do not read from them before that.1089step->tag = tag;1090steps_.push_back(step);10911092if (fb) {1093// If there's a KEEP, we naturally read from the framebuffer.1094if (color == VKRRenderPassLoadAction::KEEP || depth == VKRRenderPassLoadAction::KEEP || stencil == VKRRenderPassLoadAction::KEEP) {1095step->dependencies.insert(fb);1096}1097}10981099curRenderStep_ = step;1100curStepHasViewport_ = false;1101curStepHasScissor_ = false;1102if (fb) {1103curWidthRaw_ = fb->width;1104curHeightRaw_ = fb->height;1105curWidth_ = fb->width;1106curHeight_ = fb->height;1107} else {1108curWidthRaw_ = vulkan_->GetBackbufferWidth();1109curHeightRaw_ = vulkan_->GetBackbufferHeight();1110if (g_display.rotation == DisplayRotation::ROTATE_90 ||1111g_display.rotation == DisplayRotation::ROTATE_270) {1112curWidth_ = curHeightRaw_;1113curHeight_ = curWidthRaw_;1114} else {1115curWidth_ = curWidthRaw_;1116curHeight_ = curHeightRaw_;1117}1118}11191120if (color == VKRRenderPassLoadAction::CLEAR || depth == VKRRenderPassLoadAction::CLEAR || stencil == VKRRenderPassLoadAction::CLEAR) {1121curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);1122}11231124// See above - we add a clear afterward if only one side for depth/stencil CLEAR/KEEP.1125if (lateClearMask != 0) {1126VkRenderData data{ VKRRenderCommand::CLEAR };1127data.clear.clearColor = clearColor;1128data.clear.clearZ = clearDepth;1129data.clear.clearStencil = clearStencil;1130data.clear.clearMask = lateClearMask;1131curRenderStep_->commands.push_back(data);1132}11331134if (invalidationCallback_) {1135invalidationCallback_(InvalidationCallbackFlags::RENDER_PASS_STATE);1136}1137}11381139bool VulkanRenderManager::CopyFramebufferToMemory(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, Draw::ReadbackMode mode, const char *tag) {1140_dbg_assert_(insideFrame_);11411142for (int i = (int)steps_.size() - 1; i >= 0; i--) {1143if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {1144steps_[i]->render.numReads++;1145break;1146}1147}11481149EndCurRenderStep();11501151VKRStep *step = new VKRStep{ VKRStepType::READBACK };1152step->readback.aspectMask = aspectBits;1153step->readback.src = src;1154step->readback.srcRect.offset = { x, y };1155step->readback.srcRect.extent = { (uint32_t)w, (uint32_t)h };1156step->readback.delayed = mode == Draw::ReadbackMode::OLD_DATA_OK;1157step->dependencies.insert(src);1158step->tag = tag;1159steps_.push_back(step);11601161if (mode == Draw::ReadbackMode::BLOCK) {1162FlushSync();1163}11641165Draw::DataFormat srcFormat = Draw::DataFormat::UNDEFINED;1166if (aspectBits & VK_IMAGE_ASPECT_COLOR_BIT) {1167if (src) {1168switch (src->color.format) {1169case VK_FORMAT_R8G8B8A8_UNORM: srcFormat = Draw::DataFormat::R8G8B8A8_UNORM; break;1170default: _assert_(false);1171}1172} else {1173// Backbuffer.1174if (!(vulkan_->GetSurfaceCapabilities().supportedUsageFlags & VK_IMAGE_USAGE_TRANSFER_SRC_BIT)) {1175ERROR_LOG(Log::G3D, "Copying from backbuffer not supported, can't take screenshots");1176return false;1177}1178switch (vulkan_->GetSwapchainFormat()) {1179case VK_FORMAT_B8G8R8A8_UNORM: srcFormat = Draw::DataFormat::B8G8R8A8_UNORM; break;1180case VK_FORMAT_R8G8B8A8_UNORM: srcFormat = Draw::DataFormat::R8G8B8A8_UNORM; break;1181// NOTE: If you add supported formats here, make sure to also support them in VulkanQueueRunner::CopyReadbackBuffer.1182default:1183ERROR_LOG(Log::G3D, "Unsupported backbuffer format for screenshots");1184return false;1185}1186}1187} else if (aspectBits & VK_IMAGE_ASPECT_STENCIL_BIT) {1188// Copies from stencil are always S8.1189srcFormat = Draw::DataFormat::S8;1190} else if (aspectBits & VK_IMAGE_ASPECT_DEPTH_BIT) {1191switch (src->depth.format) {1192case VK_FORMAT_D24_UNORM_S8_UINT: srcFormat = Draw::DataFormat::D24_S8; break;1193case VK_FORMAT_D32_SFLOAT_S8_UINT: srcFormat = Draw::DataFormat::D32F; break;1194case VK_FORMAT_D16_UNORM_S8_UINT: srcFormat = Draw::DataFormat::D16; break;1195default: _assert_(false);1196}1197} else {1198_assert_(false);1199}12001201// Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM.1202return queueRunner_.CopyReadbackBuffer(frameData_[vulkan_->GetCurFrame()],1203mode == Draw::ReadbackMode::OLD_DATA_OK ? src : nullptr, w, h, srcFormat, destFormat, pixelStride, pixels);1204}12051206void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {1207_dbg_assert_(insideFrame_);12081209EndCurRenderStep();12101211VKRStep *step = new VKRStep{ VKRStepType::READBACK_IMAGE };1212step->readback_image.image = image;1213step->readback_image.srcRect.offset = { x, y };1214step->readback_image.srcRect.extent = { (uint32_t)w, (uint32_t)h };1215step->readback_image.mipLevel = mipLevel;1216step->tag = tag;1217steps_.push_back(step);12181219FlushSync();12201221// Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM.1222queueRunner_.CopyReadbackBuffer(frameData_[vulkan_->GetCurFrame()], nullptr, w, h, destFormat, destFormat, pixelStride, pixels);12231224_dbg_assert_(steps_.empty());1225}12261227static void RemoveDrawCommands(FastVec<VkRenderData> *cmds) {1228// Here we remove any DRAW type commands when we hit a CLEAR.1229for (auto &c : *cmds) {1230if (c.cmd == VKRRenderCommand::DRAW || c.cmd == VKRRenderCommand::DRAW_INDEXED) {1231c.cmd = VKRRenderCommand::REMOVED;1232}1233}1234}12351236static void CleanupRenderCommands(FastVec<VkRenderData> *cmds) {1237size_t lastCommand[(int)VKRRenderCommand::NUM_RENDER_COMMANDS];1238memset(lastCommand, -1, sizeof(lastCommand));12391240// Find any duplicate state commands (likely from RemoveDrawCommands.)1241for (size_t i = 0; i < cmds->size(); ++i) {1242auto &c = cmds->at(i);1243auto &lastOfCmd = lastCommand[(uint8_t)c.cmd];12441245switch (c.cmd) {1246case VKRRenderCommand::REMOVED:1247continue;12481249case VKRRenderCommand::VIEWPORT:1250case VKRRenderCommand::SCISSOR:1251case VKRRenderCommand::BLEND:1252case VKRRenderCommand::STENCIL:1253if (lastOfCmd != -1) {1254cmds->at(lastOfCmd).cmd = VKRRenderCommand::REMOVED;1255}1256break;12571258case VKRRenderCommand::PUSH_CONSTANTS:1259// TODO: For now, we have to keep this one (it has an offset.) Still update lastCommand.1260break;12611262case VKRRenderCommand::CLEAR:1263// Ignore, doesn't participate in state.1264continue;12651266case VKRRenderCommand::DRAW_INDEXED:1267case VKRRenderCommand::DRAW:1268default:1269// Boundary - must keep state before this.1270memset(lastCommand, -1, sizeof(lastCommand));1271continue;1272}12731274lastOfCmd = i;1275}12761277// At this point, anything in lastCommand can be cleaned up too.1278// Note that it's safe to remove the last unused PUSH_CONSTANTS here.1279for (size_t i = 0; i < ARRAY_SIZE(lastCommand); ++i) {1280auto &lastOfCmd = lastCommand[i];1281if (lastOfCmd != -1) {1282cmds->at(lastOfCmd).cmd = VKRRenderCommand::REMOVED;1283}1284}1285}12861287void VulkanRenderManager::Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask) {1288_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);1289if (!clearMask)1290return;12911292// If this is the first drawing command or clears everything, merge it into the pass.1293int allAspects = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;1294if (curRenderStep_->render.numDraws == 0 || clearMask == allAspects) {1295curRenderStep_->render.clearColor = clearColor;1296curRenderStep_->render.clearDepth = clearZ;1297curRenderStep_->render.clearStencil = clearStencil;1298curRenderStep_->render.colorLoad = (clearMask & VK_IMAGE_ASPECT_COLOR_BIT) ? VKRRenderPassLoadAction::CLEAR : VKRRenderPassLoadAction::KEEP;1299curRenderStep_->render.depthLoad = (clearMask & VK_IMAGE_ASPECT_DEPTH_BIT) ? VKRRenderPassLoadAction::CLEAR : VKRRenderPassLoadAction::KEEP;1300curRenderStep_->render.stencilLoad = (clearMask & VK_IMAGE_ASPECT_STENCIL_BIT) ? VKRRenderPassLoadAction::CLEAR : VKRRenderPassLoadAction::KEEP;13011302if (clearMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {1303if (curRenderStep_->render.framebuffer && !curRenderStep_->render.framebuffer->HasDepth()) {1304WARN_LOG(Log::G3D, "Trying to clear depth/stencil on a non-depth framebuffer: %s", curRenderStep_->render.framebuffer->Tag());1305} else {1306curPipelineFlags_ |= PipelineFlags::USES_DEPTH_STENCIL;1307}1308}13091310// In case there were commands already.1311curRenderStep_->render.numDraws = 0;1312RemoveDrawCommands(&curRenderStep_->commands);1313} else {1314VkRenderData data{ VKRRenderCommand::CLEAR };1315data.clear.clearColor = clearColor;1316data.clear.clearZ = clearZ;1317data.clear.clearStencil = clearStencil;1318data.clear.clearMask = clearMask;1319curRenderStep_->commands.push_back(data);1320}13211322curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);1323}13241325void VulkanRenderManager::CopyFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, VKRFramebuffer *dst, VkOffset2D dstPos, VkImageAspectFlags aspectMask, const char *tag) {1326#ifdef _DEBUG1327SanityCheckPassesOnAdd();1328#endif1329// _dbg_assert_msg_(src != dst, "Can't copy within the same buffer");1330if (src == dst) {1331// TODO: Check for rectangle self-overlap.1332}13331334_dbg_assert_msg_(srcRect.offset.x >= 0, "srcrect offset x (%d) < 0", srcRect.offset.x);1335_dbg_assert_msg_(srcRect.offset.y >= 0, "srcrect offset y (%d) < 0", srcRect.offset.y);1336_dbg_assert_msg_(srcRect.offset.x + srcRect.extent.width <= (uint32_t)src->width, "srcrect offset x (%d) + extent (%d) > width (%d)", srcRect.offset.x, srcRect.extent.width, (uint32_t)src->width);1337_dbg_assert_msg_(srcRect.offset.y + srcRect.extent.height <= (uint32_t)src->height, "srcrect offset y (%d) + extent (%d) > height (%d)", srcRect.offset.y, srcRect.extent.height, (uint32_t)src->height);13381339_dbg_assert_msg_(srcRect.extent.width > 0, "copy srcwidth == 0");1340_dbg_assert_msg_(srcRect.extent.height > 0, "copy srcheight == 0");13411342_dbg_assert_msg_(dstPos.x >= 0, "dstPos offset x (%d) < 0", dstPos.x);1343_dbg_assert_msg_(dstPos.y >= 0, "dstPos offset y (%d) < 0", dstPos.y);1344_dbg_assert_msg_(dstPos.x + srcRect.extent.width <= (uint32_t)dst->width, "dstPos + extent x > width");1345_dbg_assert_msg_(dstPos.y + srcRect.extent.height <= (uint32_t)dst->height, "dstPos + extent y > height");13461347VkImageLayout finalSrcLayoutBeforeCopy = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;1348if (src == dst) {1349// We only use the first loop before, and transition to VK_IMAGE_LAYOUT_GENERAL.1350finalSrcLayoutBeforeCopy = VK_IMAGE_LAYOUT_GENERAL;1351}13521353for (int i = (int)steps_.size() - 1; i >= 0; i--) {1354if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {1355if (aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {1356if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1357steps_[i]->render.finalColorLayout = finalSrcLayoutBeforeCopy;1358}1359}1360if (aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {1361if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1362steps_[i]->render.finalDepthStencilLayout = finalSrcLayoutBeforeCopy;1363}1364}1365steps_[i]->render.numReads++;1366break;1367}1368}13691370if (src != dst) {1371for (int i = (int)steps_.size() - 1; i >= 0; i--) {1372if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == dst) {1373if (aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {1374if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1375steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;1376}1377}1378if (aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {1379if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1380steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;1381}1382}1383break;1384}1385}1386}13871388EndCurRenderStep();13891390VKRStep *step = new VKRStep{ VKRStepType::COPY };13911392step->copy.aspectMask = aspectMask;1393step->copy.src = src;1394step->copy.srcRect = srcRect;1395step->copy.dst = dst;1396step->copy.dstPos = dstPos;1397step->dependencies.insert(src);1398step->tag = tag;1399bool fillsDst = dst && srcRect.offset.x == 0 && srcRect.offset.y == 0 && srcRect.extent.width == dst->width && srcRect.extent.height == dst->height;1400if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)1401step->dependencies.insert(dst);14021403steps_.push_back(step);1404}14051406void VulkanRenderManager::BlitFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, VKRFramebuffer *dst, VkRect2D dstRect, VkImageAspectFlags aspectMask, VkFilter filter, const char *tag) {1407#ifdef _DEBUG1408SanityCheckPassesOnAdd();1409#endif14101411_dbg_assert_msg_(srcRect.offset.x >= 0, "srcrect offset x (%d) < 0", srcRect.offset.x);1412_dbg_assert_msg_(srcRect.offset.y >= 0, "srcrect offset y (%d) < 0", srcRect.offset.y);1413_dbg_assert_msg_(srcRect.offset.x + srcRect.extent.width <= (uint32_t)src->width, "srcrect offset x (%d) + extent (%d) > width (%d)", srcRect.offset.x, srcRect.extent.width, (uint32_t)src->width);1414_dbg_assert_msg_(srcRect.offset.y + srcRect.extent.height <= (uint32_t)src->height, "srcrect offset y (%d) + extent (%d) > height (%d)", srcRect.offset.y, srcRect.extent.height, (uint32_t)src->height);14151416_dbg_assert_msg_(srcRect.extent.width > 0, "blit srcwidth == 0");1417_dbg_assert_msg_(srcRect.extent.height > 0, "blit srcheight == 0");14181419_dbg_assert_msg_(dstRect.offset.x >= 0, "dstrect offset x < 0");1420_dbg_assert_msg_(dstRect.offset.y >= 0, "dstrect offset y < 0");1421_dbg_assert_msg_(dstRect.offset.x + dstRect.extent.width <= (uint32_t)dst->width, "dstrect offset x + extent > width");1422_dbg_assert_msg_(dstRect.offset.y + dstRect.extent.height <= (uint32_t)dst->height, "dstrect offset y + extent > height");14231424_dbg_assert_msg_(dstRect.extent.width > 0, "blit dstwidth == 0");1425_dbg_assert_msg_(dstRect.extent.height > 0, "blit dstheight == 0");14261427// TODO: Seem to be missing final layouts here like in Copy...14281429for (int i = (int)steps_.size() - 1; i >= 0; i--) {1430if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {1431steps_[i]->render.numReads++;1432break;1433}1434}14351436// Sanity check. Added an assert to try to gather more info.1437// Got this assert in NPJH50443 FINAL FANTASY TYPE-0, but pretty rare. Moving back to debug assert.1438if (aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {1439_dbg_assert_msg_(src->depth.image != VK_NULL_HANDLE, "%s", src->Tag());1440_dbg_assert_msg_(dst->depth.image != VK_NULL_HANDLE, "%s", dst->Tag());14411442if (!src->depth.image || !dst->depth.image) {1443// Something has gone wrong, but let's try to stumble along.1444return;1445}1446}14471448EndCurRenderStep();14491450VKRStep *step = new VKRStep{ VKRStepType::BLIT };1451step->blit.aspectMask = aspectMask;1452step->blit.src = src;1453step->blit.srcRect = srcRect;1454step->blit.dst = dst;1455step->blit.dstRect = dstRect;1456step->blit.filter = filter;1457step->dependencies.insert(src);1458step->tag = tag;1459bool fillsDst = dst && dstRect.offset.x == 0 && dstRect.offset.y == 0 && dstRect.extent.width == dst->width && dstRect.extent.height == dst->height;1460if (!fillsDst)1461step->dependencies.insert(dst);14621463steps_.push_back(step);1464}14651466VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBit, int layer) {1467_dbg_assert_(curRenderStep_ != nullptr);1468_dbg_assert_(fb != nullptr);14691470// We don't support texturing from stencil, neither do we support texturing from depth|stencil together (nonsensical).1471_dbg_assert_(aspectBit == VK_IMAGE_ASPECT_COLOR_BIT || aspectBit == VK_IMAGE_ASPECT_DEPTH_BIT);14721473// Mark the dependency, check for required transitions, and return the image.14741475// Optimization: If possible, use final*Layout to put the texture into the correct layout "early".1476for (int i = (int)steps_.size() - 1; i >= 0; i--) {1477if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == fb) {1478if (aspectBit == VK_IMAGE_ASPECT_COLOR_BIT) {1479// If this framebuffer was rendered to earlier in this frame, make sure to pre-transition it to the correct layout.1480if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1481steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;1482}1483// If we find some other layout, a copy after this is likely involved. It's fine though,1484// we'll just transition it right as we need it and lose a tiny optimization.1485} else if (aspectBit == VK_IMAGE_ASPECT_DEPTH_BIT) {1486// If this framebuffer was rendered to earlier in this frame, make sure to pre-transition it to the correct layout.1487if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1488steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;1489}1490} // We don't (yet?) support texturing from stencil images.1491steps_[i]->render.numReads++;1492break;1493}1494}14951496// Track dependencies fully.1497curRenderStep_->dependencies.insert(fb);14981499// Add this pretransition unless we already have it.1500TransitionRequest rq{ fb, aspectBit, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL };1501curRenderStep_->preTransitions.insert(rq); // Note that insert avoids inserting duplicates.15021503if (layer == -1) {1504return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.texAllLayersView : fb->depth.texAllLayersView;1505} else {1506return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.texLayerViews[layer] : fb->depth.texLayerViews[layer];1507}1508}15091510// Called on main thread.1511// Sends the collected commands to the render thread. Submit-latency should be1512// measured from here, probably.1513void VulkanRenderManager::Finish() {1514EndCurRenderStep();15151516// Let's do just a bit of cleanup on render commands now.1517// TODO: Should look into removing this.1518for (auto &step : steps_) {1519if (step->stepType == VKRStepType::RENDER) {1520CleanupRenderCommands(&step->commands);1521}1522}15231524int curFrame = vulkan_->GetCurFrame();1525FrameData &frameData = frameData_[curFrame];15261527if (!postInitBarrier_.empty()) {1528VkCommandBuffer buffer = frameData.GetInitCmd(vulkan_);1529postInitBarrier_.Flush(buffer);1530}15311532VLOG("PUSH: Frame[%d]", curFrame);1533VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SUBMIT);1534task->frame = curFrame;1535if (useRenderThread_) {1536std::unique_lock<std::mutex> lock(pushMutex_);1537renderThreadQueue_.push(task);1538renderThreadQueue_.back()->steps = std::move(steps_);1539pushCondVar_.notify_one();1540} else {1541// Just do it!1542task->steps = std::move(steps_);1543Run(*task);1544delete task;1545}15461547steps_.clear();1548}15491550void VulkanRenderManager::Present() {1551int curFrame = vulkan_->GetCurFrame();15521553VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::PRESENT);1554task->frame = curFrame;1555if (useRenderThread_) {1556std::unique_lock<std::mutex> lock(pushMutex_);1557renderThreadQueue_.push(task);1558pushCondVar_.notify_one();1559} else {1560// Just do it!1561Run(*task);1562delete task;1563}15641565vulkan_->EndFrame();1566insideFrame_ = false;1567}15681569// Called on the render thread.1570//1571// Can be called again after a VKRRunType::SYNC on the same frame.1572void VulkanRenderManager::Run(VKRRenderThreadTask &task) {1573FrameData &frameData = frameData_[task.frame];15741575if (task.runType == VKRRunType::PRESENT) {1576if (!frameData.skipSwap) {1577VkResult res = frameData.QueuePresent(vulkan_, frameDataShared_);1578frameTimeHistory_[frameData.frameId].queuePresent = time_now_d();1579if (res == VK_ERROR_OUT_OF_DATE_KHR) {1580// We clearly didn't get this in vkAcquireNextImageKHR because of the skipSwap check above.1581// Do the increment.1582outOfDateFrames_++;1583} else if (res == VK_SUBOPTIMAL_KHR) {1584outOfDateFrames_++;1585} else if (res != VK_SUCCESS) {1586_assert_msg_(false, "vkQueuePresentKHR failed! result=%s", VulkanResultToString(res));1587} else {1588// Success1589outOfDateFrames_ = 0;1590}1591} else {1592// We only get here if vkAcquireNextImage returned VK_ERROR_OUT_OF_DATE.1593if (vulkan_->HasRealSwapchain()) {1594outOfDateFrames_++;1595}1596frameData.skipSwap = false;1597}1598return;1599}16001601_dbg_assert_(!frameData.hasPresentCommands);16021603if (!frameTimeHistory_[frameData.frameId].firstSubmit) {1604frameTimeHistory_[frameData.frameId].firstSubmit = time_now_d();1605}1606frameData.Submit(vulkan_, FrameSubmitType::Pending, frameDataShared_);16071608// Flush descriptors.1609double descStart = time_now_d();1610FlushDescriptors(task.frame);1611frameData.profile.descWriteTime = time_now_d() - descStart;16121613if (!frameData.hasMainCommands) {1614// Effectively resets both main and present command buffers, since they both live in this pool.1615// We always record main commands first, so we don't need to reset the present command buffer separately.1616vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolMain, 0);16171618VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };1619begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;1620VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin);1621frameData.hasMainCommands = true;1622_assert_msg_(res == VK_SUCCESS, "vkBeginCommandBuffer failed! result=%s", VulkanResultToString(res));1623}16241625queueRunner_.PreprocessSteps(task.steps);1626// Likely during shutdown, happens in headless.1627if (task.steps.empty() && !frameData.hasAcquired)1628frameData.skipSwap = true;1629//queueRunner_.LogSteps(stepsOnThread, false);1630queueRunner_.RunSteps(task.steps, task.frame, frameData, frameDataShared_);16311632switch (task.runType) {1633case VKRRunType::SUBMIT:1634frameData.Submit(vulkan_, FrameSubmitType::FinishFrame, frameDataShared_);1635break;16361637case VKRRunType::SYNC:1638// The submit will trigger the readbackFence, and also do the wait for it.1639frameData.Submit(vulkan_, FrameSubmitType::Sync, frameDataShared_);16401641if (useRenderThread_) {1642std::unique_lock<std::mutex> lock(syncMutex_);1643syncCondVar_.notify_one();1644}16451646// At this point the GPU is idle, and we can resume filling the command buffers for the1647// current frame since and thus all previously enqueued command buffers have been1648// processed. No need to switch to the next frame number, would just be confusing.1649break;16501651default:1652_dbg_assert_(false);1653break;1654}16551656VLOG("PULL: Finished running frame %d", task.frame);1657}16581659// Called from main thread.1660void VulkanRenderManager::FlushSync() {1661_dbg_assert_(!curRenderStep_);16621663if (invalidationCallback_) {1664invalidationCallback_(InvalidationCallbackFlags::COMMAND_BUFFER_STATE);1665}16661667int curFrame = vulkan_->GetCurFrame();1668FrameData &frameData = frameData_[curFrame];16691670if (!postInitBarrier_.empty()) {1671VkCommandBuffer buffer = frameData.GetInitCmd(vulkan_);1672postInitBarrier_.Flush(buffer);1673}16741675if (useRenderThread_) {1676{1677VLOG("PUSH: Frame[%d]", curFrame);1678VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SYNC);1679task->frame = curFrame;1680{1681std::unique_lock<std::mutex> lock(pushMutex_);1682renderThreadQueue_.push(task);1683renderThreadQueue_.back()->steps = std::move(steps_);1684pushCondVar_.notify_one();1685}1686steps_.clear();1687}16881689{1690std::unique_lock<std::mutex> lock(syncMutex_);1691// Wait for the flush to be hit, since we're syncing.1692while (!frameData.syncDone) {1693VLOG("PUSH: Waiting for frame[%d].syncDone = 1 (sync)", curFrame);1694syncCondVar_.wait(lock);1695}1696frameData.syncDone = false;1697}1698} else {1699VKRRenderThreadTask task(VKRRunType::SYNC);1700task.frame = curFrame;1701task.steps = std::move(steps_);1702Run(task);1703steps_.clear();1704}1705}17061707void VulkanRenderManager::ResetStats() {1708initTimeMs_.Reset();1709totalGPUTimeMs_.Reset();1710renderCPUTimeMs_.Reset();1711}17121713VKRPipelineLayout *VulkanRenderManager::CreatePipelineLayout(BindingType *bindingTypes, size_t bindingTypesCount, bool geoShadersEnabled, const char *tag) {1714VKRPipelineLayout *layout = new VKRPipelineLayout();1715layout->SetTag(tag);1716layout->bindingTypesCount = (uint32_t)bindingTypesCount;17171718_dbg_assert_(bindingTypesCount <= ARRAY_SIZE(layout->bindingTypes));1719memcpy(layout->bindingTypes, bindingTypes, sizeof(BindingType) * bindingTypesCount);17201721VkDescriptorSetLayoutBinding bindings[VKRPipelineLayout::MAX_DESC_SET_BINDINGS];1722for (int i = 0; i < (int)bindingTypesCount; i++) {1723bindings[i].binding = i;1724bindings[i].descriptorCount = 1;1725bindings[i].pImmutableSamplers = nullptr;17261727switch (bindingTypes[i]) {1728case BindingType::COMBINED_IMAGE_SAMPLER:1729bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;1730bindings[i].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;1731break;1732case BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX:1733bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;1734bindings[i].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;1735break;1736case BindingType::UNIFORM_BUFFER_DYNAMIC_ALL:1737bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;1738bindings[i].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;1739if (geoShadersEnabled) {1740bindings[i].stageFlags |= VK_SHADER_STAGE_GEOMETRY_BIT;1741}1742break;1743case BindingType::STORAGE_BUFFER_VERTEX:1744bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;1745bindings[i].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;1746break;1747case BindingType::STORAGE_BUFFER_COMPUTE:1748bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;1749bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;1750break;1751case BindingType::STORAGE_IMAGE_COMPUTE:1752bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;1753bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;1754break;1755default:1756UNREACHABLE();1757break;1758}1759}17601761VkDescriptorSetLayoutCreateInfo dsl = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };1762dsl.bindingCount = (uint32_t)bindingTypesCount;1763dsl.pBindings = bindings;1764VkResult res = vkCreateDescriptorSetLayout(vulkan_->GetDevice(), &dsl, nullptr, &layout->descriptorSetLayout);1765_assert_(VK_SUCCESS == res && layout->descriptorSetLayout);17661767VkPipelineLayoutCreateInfo pl = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO };1768VkDescriptorSetLayout setLayouts[1] = { layout->descriptorSetLayout };1769pl.setLayoutCount = ARRAY_SIZE(setLayouts);1770pl.pSetLayouts = setLayouts;1771res = vkCreatePipelineLayout(vulkan_->GetDevice(), &pl, nullptr, &layout->pipelineLayout);1772_assert_(VK_SUCCESS == res && layout->pipelineLayout);17731774vulkan_->SetDebugName(layout->descriptorSetLayout, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, tag);1775vulkan_->SetDebugName(layout->pipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT, tag);17761777for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {1778// Some games go beyond 1024 and end up having to resize like GTA, but most stay below so we start there.1779layout->frameData[i].pool.Create(vulkan_, bindingTypes, (uint32_t)bindingTypesCount, 1024);1780}17811782pipelineLayouts_.push_back(layout);1783return layout;1784}17851786void VulkanRenderManager::DestroyPipelineLayout(VKRPipelineLayout *layout) {1787for (auto iter = pipelineLayouts_.begin(); iter != pipelineLayouts_.end(); iter++) {1788if (*iter == layout) {1789pipelineLayouts_.erase(iter);1790break;1791}1792}1793vulkan_->Delete().QueueCallback([](VulkanContext *vulkan, void *userdata) {1794VKRPipelineLayout *layout = (VKRPipelineLayout *)userdata;1795for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {1796layout->frameData[i].pool.DestroyImmediately();1797}1798vkDestroyPipelineLayout(vulkan->GetDevice(), layout->pipelineLayout, nullptr);1799vkDestroyDescriptorSetLayout(vulkan->GetDevice(), layout->descriptorSetLayout, nullptr);18001801delete layout;1802}, layout);1803}18041805void VulkanRenderManager::FlushDescriptors(int frame) {1806for (auto iter : pipelineLayouts_) {1807iter->FlushDescSets(vulkan_, frame, &frameData_[frame].profile);1808}1809}18101811void VulkanRenderManager::ResetDescriptorLists(int frame) {1812for (auto iter : pipelineLayouts_) {1813VKRPipelineLayout::FrameData &data = iter->frameData[frame];18141815data.flushedDescriptors_ = 0;1816data.descSets_.clear();1817data.descData_.clear();1818}1819}18201821VKRPipelineLayout::~VKRPipelineLayout() {1822_assert_(frameData[0].pool.IsDestroyed());1823}18241825void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueueProfileContext *profile) {1826_dbg_assert_(frame < VulkanContext::MAX_INFLIGHT_FRAMES);18271828FrameData &data = frameData[frame];18291830VulkanDescSetPool &pool = data.pool;1831FastVec<PackedDescriptor> &descData = data.descData_;1832FastVec<PendingDescSet> &descSets = data.descSets_;18331834pool.Reset();18351836VkDescriptorSet setCache[8];1837VkDescriptorSetLayout layoutsForAlloc[ARRAY_SIZE(setCache)];1838for (int i = 0; i < ARRAY_SIZE(setCache); i++) {1839layoutsForAlloc[i] = descriptorSetLayout;1840}1841int setsUsed = ARRAY_SIZE(setCache); // To allocate immediately.18421843// This will write all descriptors.1844// Initially, we just do a simple look-back comparing to the previous descriptor to avoid sequential dupes.1845// In theory, we could multithread this. Gotta be a lot of descriptors for that to be worth it though.18461847// Initially, let's do naive single desc set writes.1848VkWriteDescriptorSet writes[MAX_DESC_SET_BINDINGS];1849VkDescriptorImageInfo imageInfo[MAX_DESC_SET_BINDINGS]; // just picked a practical number1850VkDescriptorBufferInfo bufferInfo[MAX_DESC_SET_BINDINGS];18511852// Preinitialize fields that won't change.1853for (size_t i = 0; i < ARRAY_SIZE(writes); i++) {1854writes[i].descriptorCount = 1;1855writes[i].dstArrayElement = 0;1856writes[i].pTexelBufferView = nullptr;1857writes[i].pNext = nullptr;1858writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;1859}18601861size_t start = data.flushedDescriptors_;1862int writeCount = 0, dedupCount = 0;18631864for (size_t index = start; index < descSets.size(); index++) {1865auto &d = descSets[index];18661867// This is where we look up to see if we already have an identical descriptor previously in the array.1868// We could do a simple custom hash map here that doesn't handle collisions, since those won't matter.1869// Instead, for now we just check history one item backwards. Good enough, it seems.1870if (index > start + 1) {1871if (descSets[index - 1].count == d.count) {1872if (!memcmp(descData.data() + d.offset, descData.data() + descSets[index - 1].offset, d.count * sizeof(PackedDescriptor))) {1873d.set = descSets[index - 1].set;1874dedupCount++;1875continue;1876}1877}1878}18791880if (setsUsed < ARRAY_SIZE(setCache)) {1881d.set = setCache[setsUsed++];1882} else {1883// Allocate in small batches.1884bool success = pool.Allocate(setCache, ARRAY_SIZE(setCache), layoutsForAlloc);1885_dbg_assert_(success);1886d.set = setCache[0];1887setsUsed = 1;1888}18891890// TODO: Build up bigger batches of writes.1891const PackedDescriptor *data = descData.begin() + d.offset;1892int numWrites = 0;1893int numBuffers = 0;1894int numImages = 0;1895for (int i = 0; i < d.count; i++) {1896if (!data[i].image.view) { // This automatically also checks for an null buffer due to the union.1897continue;1898}1899switch (this->bindingTypes[i]) {1900case BindingType::COMBINED_IMAGE_SAMPLER:1901_dbg_assert_(data[i].image.sampler != VK_NULL_HANDLE);1902_dbg_assert_(data[i].image.view != VK_NULL_HANDLE);1903imageInfo[numImages].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;1904imageInfo[numImages].imageView = data[i].image.view;1905imageInfo[numImages].sampler = data[i].image.sampler;1906writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;1907writes[numWrites].pImageInfo = &imageInfo[numImages];1908writes[numWrites].pBufferInfo = nullptr;1909numImages++;1910break;1911case BindingType::STORAGE_IMAGE_COMPUTE:1912_dbg_assert_(data[i].image.view != VK_NULL_HANDLE);1913imageInfo[numImages].imageLayout = VK_IMAGE_LAYOUT_GENERAL;1914imageInfo[numImages].imageView = data[i].image.view;1915imageInfo[numImages].sampler = VK_NULL_HANDLE;1916writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;1917writes[numWrites].pImageInfo = &imageInfo[numImages];1918writes[numWrites].pBufferInfo = nullptr;1919numImages++;1920break;1921case BindingType::STORAGE_BUFFER_VERTEX:1922case BindingType::STORAGE_BUFFER_COMPUTE:1923_dbg_assert_(data[i].buffer.buffer != VK_NULL_HANDLE);1924bufferInfo[numBuffers].buffer = data[i].buffer.buffer;1925bufferInfo[numBuffers].range = data[i].buffer.range;1926bufferInfo[numBuffers].offset = data[i].buffer.offset;1927writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;1928writes[numWrites].pBufferInfo = &bufferInfo[numBuffers];1929writes[numWrites].pImageInfo = nullptr;1930numBuffers++;1931break;1932case BindingType::UNIFORM_BUFFER_DYNAMIC_ALL:1933case BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX:1934_dbg_assert_(data[i].buffer.buffer != VK_NULL_HANDLE);1935bufferInfo[numBuffers].buffer = data[i].buffer.buffer;1936bufferInfo[numBuffers].range = data[i].buffer.range;1937bufferInfo[numBuffers].offset = 0;1938writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;1939writes[numWrites].pBufferInfo = &bufferInfo[numBuffers];1940writes[numWrites].pImageInfo = nullptr;1941numBuffers++;1942break;1943}1944writes[numWrites].dstBinding = i;1945writes[numWrites].dstSet = d.set;1946numWrites++;1947}19481949vkUpdateDescriptorSets(vulkan->GetDevice(), numWrites, writes, 0, nullptr);19501951writeCount++;1952}19531954data.flushedDescriptors_ = (int)descSets.size();1955profile->descriptorsWritten += writeCount;1956profile->descriptorsDeduped += dedupCount;1957}19581959void VulkanRenderManager::SanityCheckPassesOnAdd() {1960#if _DEBUG1961// Check that we don't have any previous passes that write to the backbuffer, that must ALWAYS be the last one.1962for (int i = 0; i < (int)steps_.size(); i++) {1963if (steps_[i]->stepType == VKRStepType::RENDER) {1964_dbg_assert_msg_(steps_[i]->render.framebuffer != nullptr, "Adding second backbuffer pass? Not good!");1965}1966}1967#endif1968}196919701971