CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Common/GPU/Vulkan/VulkanRenderManager.cpp
Views: 1401
#include <algorithm>1#include <cstdint>23#include <map>4#include <sstream>56#include "Common/Log.h"7#include "Common/StringUtils.h"8#include "Common/TimeUtil.h"910#include "Common/GPU/Vulkan/VulkanAlloc.h"11#include "Common/GPU/Vulkan/VulkanContext.h"12#include "Common/GPU/Vulkan/VulkanRenderManager.h"1314#include "Common/LogReporting.h"15#include "Common/Thread/ThreadUtil.h"16#include "Common/VR/PPSSPPVR.h"1718#if 0 // def _DEBUG19#define VLOG(...) NOTICE_LOG(Log::G3D, __VA_ARGS__)20#else21#define VLOG(...)22#endif2324#ifndef UINT64_MAX25#define UINT64_MAX 0xFFFFFFFFFFFFFFFFULL26#endif2728using namespace PPSSPP_VK;2930// renderPass is an example of the "compatibility class" or RenderPassType type.31bool VKRGraphicsPipeline::Create(VulkanContext *vulkan, VkRenderPass compatibleRenderPass, RenderPassType rpType, VkSampleCountFlagBits sampleCount, double scheduleTime, int countToCompile) {32// Good torture test to test the shutdown-while-precompiling-shaders issue on PC where it's normally33// hard to catch because shaders compile so fast.34// sleep_ms(200);3536bool multisample = RenderPassTypeHasMultisample(rpType);37if (multisample) {38if (sampleCount_ != VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM) {39_assert_(sampleCount == sampleCount_);40} else {41sampleCount_ = sampleCount;42}43}4445// Sanity check.46// Seen in crash reports from PowerVR GE8320, presumably we failed creating some shader modules.47if (!desc->vertexShader || !desc->fragmentShader) {48ERROR_LOG(Log::G3D, "Failed creating graphics pipeline - missing vs/fs shader module pointers!");49pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);50return false;51}5253// Fill in the last part of the desc since now it's time to block.54VkShaderModule vs = desc->vertexShader->BlockUntilReady();55VkShaderModule fs = desc->fragmentShader->BlockUntilReady();56VkShaderModule gs = desc->geometryShader ? desc->geometryShader->BlockUntilReady() : VK_NULL_HANDLE;5758if (!vs || !fs || (!gs && desc->geometryShader)) {59ERROR_LOG(Log::G3D, "Failed creating graphics pipeline - missing shader modules");60pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);61return false;62}6364if (!compatibleRenderPass) {65ERROR_LOG(Log::G3D, "Failed creating graphics pipeline - compatible render pass was nullptr");66pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);67return false;68}6970uint32_t stageCount = 2;71VkPipelineShaderStageCreateInfo ss[3]{};72ss[0].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;73ss[0].stage = VK_SHADER_STAGE_VERTEX_BIT;74ss[0].pSpecializationInfo = nullptr;75ss[0].module = vs;76ss[0].pName = "main";77ss[1].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;78ss[1].stage = VK_SHADER_STAGE_FRAGMENT_BIT;79ss[1].pSpecializationInfo = nullptr;80ss[1].module = fs;81ss[1].pName = "main";82if (gs) {83stageCount++;84ss[2].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;85ss[2].stage = VK_SHADER_STAGE_GEOMETRY_BIT;86ss[2].pSpecializationInfo = nullptr;87ss[2].module = gs;88ss[2].pName = "main";89}9091VkGraphicsPipelineCreateInfo pipe{ VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO };92pipe.pStages = ss;93pipe.stageCount = stageCount;94pipe.renderPass = compatibleRenderPass;95pipe.basePipelineIndex = 0;96pipe.pColorBlendState = &desc->cbs;97pipe.pDepthStencilState = &desc->dss;98pipe.pRasterizationState = &desc->rs;99100VkPipelineMultisampleStateCreateInfo ms{ VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO };101ms.rasterizationSamples = multisample ? sampleCount : VK_SAMPLE_COUNT_1_BIT;102if (multisample && (flags_ & PipelineFlags::USES_DISCARD)) {103// Extreme quality104ms.sampleShadingEnable = true;105ms.minSampleShading = 1.0f;106}107108VkPipelineInputAssemblyStateCreateInfo inputAssembly{ VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO };109inputAssembly.topology = desc->topology;110111// We will use dynamic viewport state.112pipe.pVertexInputState = &desc->vis;113pipe.pViewportState = &desc->views;114pipe.pTessellationState = nullptr;115pipe.pDynamicState = &desc->ds;116pipe.pInputAssemblyState = &inputAssembly;117pipe.pMultisampleState = &ms;118pipe.layout = desc->pipelineLayout->pipelineLayout;119pipe.basePipelineHandle = VK_NULL_HANDLE;120pipe.basePipelineIndex = 0;121pipe.subpass = 0;122123double start = time_now_d();124VkPipeline vkpipeline;125VkResult result = vkCreateGraphicsPipelines(vulkan->GetDevice(), desc->pipelineCache, 1, &pipe, nullptr, &vkpipeline);126127double now = time_now_d();128double taken_ms_since_scheduling = (now - scheduleTime) * 1000.0;129double taken_ms = (now - start) * 1000.0;130131if (taken_ms < 0.1) {132DEBUG_LOG(Log::G3D, "Pipeline (x/%d) time on %s: %0.2f ms, %0.2f ms since scheduling (fast) rpType: %04x sampleBits: %d (%s)",133countToCompile, GetCurrentThreadName(), taken_ms, taken_ms_since_scheduling, (u32)rpType, (u32)sampleCount, tag_.c_str());134} else {135INFO_LOG(Log::G3D, "Pipeline (x/%d) time on %s: %0.2f ms, %0.2f ms since scheduling rpType: %04x sampleBits: %d (%s)",136countToCompile, GetCurrentThreadName(), taken_ms, taken_ms_since_scheduling, (u32)rpType, (u32)sampleCount, tag_.c_str());137}138139bool success = true;140if (result == VK_INCOMPLETE) {141// Bad (disallowed by spec) return value seen on Adreno in Burnout :( Try to ignore?142// Would really like to log more here, we could probably attach more info to desc.143//144// At least create a null placeholder to avoid creating over and over if something is broken.145pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);146ERROR_LOG(Log::G3D, "Failed creating graphics pipeline! VK_INCOMPLETE");147LogCreationFailure();148success = false;149} else if (result != VK_SUCCESS) {150pipeline[(size_t)rpType]->Post(VK_NULL_HANDLE);151ERROR_LOG(Log::G3D, "Failed creating graphics pipeline! result='%s'", VulkanResultToString(result));152LogCreationFailure();153success = false;154} else {155// Success!156if (!tag_.empty()) {157vulkan->SetDebugName(vkpipeline, VK_OBJECT_TYPE_PIPELINE, tag_.c_str());158}159pipeline[(size_t)rpType]->Post(vkpipeline);160}161162return success;163}164165void VKRGraphicsPipeline::DestroyVariants(VulkanContext *vulkan, bool msaaOnly) {166for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {167if (!this->pipeline[i])168continue;169if (msaaOnly && (i & (int)RenderPassType::MULTISAMPLE) == 0)170continue;171172VkPipeline pipeline = this->pipeline[i]->BlockUntilReady();173// pipeline can be nullptr here, if it failed to compile before.174if (pipeline) {175vulkan->Delete().QueueDeletePipeline(pipeline);176}177this->pipeline[i] = nullptr;178}179sampleCount_ = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM;180}181182void VKRGraphicsPipeline::DestroyVariantsInstant(VkDevice device) {183for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {184if (pipeline[i]) {185vkDestroyPipeline(device, pipeline[i]->BlockUntilReady(), nullptr);186delete pipeline[i];187pipeline[i] = nullptr;188}189}190}191192VKRGraphicsPipeline::~VKRGraphicsPipeline() {193// This is called from the callbacked queued in QueueForDeletion.194// When we reach here, we should already be empty, so let's assert on that.195for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {196_assert_(!pipeline[i]);197}198if (desc)199desc->Release();200}201202void VKRGraphicsPipeline::BlockUntilCompiled() {203for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {204if (pipeline[i]) {205pipeline[i]->BlockUntilReady();206}207}208}209210void VKRGraphicsPipeline::QueueForDeletion(VulkanContext *vulkan) {211// Can't destroy variants here, the pipeline still lives for a while.212vulkan->Delete().QueueCallback([](VulkanContext *vulkan, void *p) {213VKRGraphicsPipeline *pipeline = (VKRGraphicsPipeline *)p;214pipeline->DestroyVariantsInstant(vulkan->GetDevice());215delete pipeline;216}, this);217}218219u32 VKRGraphicsPipeline::GetVariantsBitmask() const {220u32 bitmask = 0;221for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {222if (pipeline[i]) {223bitmask |= 1 << i;224}225}226return bitmask;227}228229void VKRGraphicsPipeline::LogCreationFailure() const {230ERROR_LOG(Log::G3D, "vs: %s\n[END VS]", desc->vertexShaderSource.c_str());231ERROR_LOG(Log::G3D, "fs: %s\n[END FS]", desc->fragmentShaderSource.c_str());232if (desc->geometryShader) {233ERROR_LOG(Log::G3D, "gs: %s\n[END GS]", desc->geometryShaderSource.c_str());234}235// TODO: Maybe log various other state?236ERROR_LOG(Log::G3D, "======== END OF PIPELINE ==========");237}238239struct SinglePipelineTask {240VKRGraphicsPipeline *pipeline;241VkRenderPass compatibleRenderPass;242RenderPassType rpType;243VkSampleCountFlagBits sampleCount;244double scheduleTime;245int countToCompile;246};247248class CreateMultiPipelinesTask : public Task {249public:250CreateMultiPipelinesTask(VulkanContext *vulkan, std::vector<SinglePipelineTask> tasks) : vulkan_(vulkan), tasks_(tasks) {251tasksInFlight_.fetch_add(1);252}253~CreateMultiPipelinesTask() {}254255TaskType Type() const override {256return TaskType::CPU_COMPUTE;257}258259TaskPriority Priority() const override {260return TaskPriority::HIGH;261}262263void Run() override {264for (auto &task : tasks_) {265task.pipeline->Create(vulkan_, task.compatibleRenderPass, task.rpType, task.sampleCount, task.scheduleTime, task.countToCompile);266}267tasksInFlight_.fetch_sub(1);268}269270VulkanContext *vulkan_;271std::vector<SinglePipelineTask> tasks_;272273// Use during shutdown to make sure there aren't any leftover tasks sitting queued.274// Could probably be done more elegantly. Like waiting for all tasks of a type, or saving pointers to them, or something...275static void WaitForAll();276static std::atomic<int> tasksInFlight_;277};278279void CreateMultiPipelinesTask::WaitForAll() {280while (tasksInFlight_.load() > 0) {281sleep_ms(2);282}283}284285std::atomic<int> CreateMultiPipelinesTask::tasksInFlight_;286287VulkanRenderManager::VulkanRenderManager(VulkanContext *vulkan, bool useThread, HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory)288: vulkan_(vulkan), queueRunner_(vulkan),289initTimeMs_("initTimeMs"),290totalGPUTimeMs_("totalGPUTimeMs"),291renderCPUTimeMs_("renderCPUTimeMs"),292descUpdateTimeMs_("descUpdateCPUTimeMs"),293useRenderThread_(useThread),294frameTimeHistory_(frameTimeHistory)295{296inflightFramesAtStart_ = vulkan_->GetInflightFrames();297298// For present timing experiments. Disabled for now.299measurePresentTime_ = false;300301frameDataShared_.Init(vulkan, useThread, measurePresentTime_);302303for (int i = 0; i < inflightFramesAtStart_; i++) {304frameData_[i].Init(vulkan, i);305}306307queueRunner_.CreateDeviceObjects();308}309310bool VulkanRenderManager::CreateBackbuffers() {311if (!vulkan_->GetSwapchain()) {312ERROR_LOG(Log::G3D, "No swapchain - can't create backbuffers");313return false;314}315316VkCommandBuffer cmdInit = GetInitCmd();317318if (!queueRunner_.CreateSwapchain(cmdInit, &postInitBarrier_)) {319return false;320}321322curWidthRaw_ = -1;323curHeightRaw_ = -1;324325if (HasBackbuffers()) {326VLOG("Backbuffers Created");327}328329if (newInflightFrames_ != -1) {330INFO_LOG(Log::G3D, "Updating inflight frames to %d", newInflightFrames_);331vulkan_->UpdateInflightFrames(newInflightFrames_);332newInflightFrames_ = -1;333}334335outOfDateFrames_ = 0;336337for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {338auto &frameData = frameData_[i];339frameData.readyForFence = true; // Just in case.340}341342// Start the thread(s).343if (HasBackbuffers()) {344StartThreads();345}346return true;347}348349void VulkanRenderManager::StartThreads() {350{351std::unique_lock<std::mutex> lock(compileMutex_);352_assert_(compileQueue_.empty());353}354355runCompileThread_ = true; // For controlling the compiler thread's exit356357if (useRenderThread_) {358INFO_LOG(Log::G3D, "Starting Vulkan submission thread");359renderThread_ = std::thread(&VulkanRenderManager::RenderThreadFunc, this);360}361INFO_LOG(Log::G3D, "Starting Vulkan compiler thread");362compileThread_ = std::thread(&VulkanRenderManager::CompileThreadFunc, this);363364if (measurePresentTime_ && vulkan_->Extensions().KHR_present_wait && vulkan_->GetPresentMode() == VK_PRESENT_MODE_FIFO_KHR) {365INFO_LOG(Log::G3D, "Starting Vulkan present wait thread");366presentWaitThread_ = std::thread(&VulkanRenderManager::PresentWaitThreadFunc, this);367}368}369370// Called from main thread.371void VulkanRenderManager::StopThreads() {372// Not sure this is a sensible check - should be ok even if not.373// _dbg_assert_(steps_.empty());374375if (useRenderThread_) {376_dbg_assert_(renderThread_.joinable());377// Tell the render thread to quit when it's done.378VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::EXIT);379task->frame = vulkan_->GetCurFrame();380{381std::unique_lock<std::mutex> lock(pushMutex_);382renderThreadQueue_.push(task);383}384pushCondVar_.notify_one();385// Once the render thread encounters the above exit task, it'll exit.386renderThread_.join();387INFO_LOG(Log::G3D, "Vulkan submission thread joined. Frame=%d", vulkan_->GetCurFrame());388}389390for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {391auto &frameData = frameData_[i];392// Zero the queries so we don't try to pull them later.393frameData.profile.timestampDescriptions.clear();394}395396{397std::unique_lock<std::mutex> lock(compileMutex_);398runCompileThread_ = false; // Compiler and present thread both look at this bool.399_assert_(compileThread_.joinable());400compileCond_.notify_one();401}402compileThread_.join();403404if (presentWaitThread_.joinable()) {405presentWaitThread_.join();406}407408INFO_LOG(Log::G3D, "Vulkan compiler thread joined. Now wait for any straggling compile tasks.");409CreateMultiPipelinesTask::WaitForAll();410411{412std::unique_lock<std::mutex> lock(compileMutex_);413_assert_(compileQueue_.empty());414}415}416417void VulkanRenderManager::DestroyBackbuffers() {418StopThreads();419vulkan_->WaitUntilQueueIdle();420421queueRunner_.DestroyBackBuffers();422}423424void VulkanRenderManager::CheckNothingPending() {425_assert_(pipelinesToCheck_.empty());426{427std::unique_lock<std::mutex> lock(compileMutex_);428_assert_(compileQueue_.empty());429}430}431432VulkanRenderManager::~VulkanRenderManager() {433INFO_LOG(Log::G3D, "VulkanRenderManager destructor");434435{436std::unique_lock<std::mutex> lock(compileMutex_);437_assert_(compileQueue_.empty());438}439440if (useRenderThread_) {441_dbg_assert_(!renderThread_.joinable());442}443444_dbg_assert_(!runCompileThread_); // StopThread should already have been called from DestroyBackbuffers.445446vulkan_->WaitUntilQueueIdle();447448_dbg_assert_(pipelineLayouts_.empty());449450VkDevice device = vulkan_->GetDevice();451frameDataShared_.Destroy(vulkan_);452for (int i = 0; i < inflightFramesAtStart_; i++) {453frameData_[i].Destroy(vulkan_);454}455queueRunner_.DestroyDeviceObjects();456}457458void VulkanRenderManager::CompileThreadFunc() {459SetCurrentThreadName("ShaderCompile");460while (true) {461bool exitAfterCompile = false;462std::vector<CompileQueueEntry> toCompile;463{464std::unique_lock<std::mutex> lock(compileMutex_);465while (compileQueue_.empty() && runCompileThread_) {466compileCond_.wait(lock);467}468toCompile = std::move(compileQueue_);469compileQueue_.clear();470if (!runCompileThread_) {471exitAfterCompile = true;472}473}474475int countToCompile = (int)toCompile.size();476477// Here we sort the pending pipelines by vertex and fragment shaders,478std::map<std::pair<Promise<VkShaderModule> *, Promise<VkShaderModule> *>, std::vector<SinglePipelineTask>> map;479480double scheduleTime = time_now_d();481482// Here we sort pending graphics pipelines by vertex and fragment shaders, and split up further.483// Those with the same pairs of shaders should be on the same thread, at least on NVIDIA.484// I don't think PowerVR cares though, it doesn't seem to reuse information between the compiles,485// so we might want a different splitting algorithm there.486for (auto &entry : toCompile) {487switch (entry.type) {488case CompileQueueEntry::Type::GRAPHICS:489{490map[std::make_pair(entry.graphics->desc->vertexShader, entry.graphics->desc->fragmentShader)].push_back(491SinglePipelineTask{492entry.graphics,493entry.compatibleRenderPass,494entry.renderPassType,495entry.sampleCount,496scheduleTime, // these two are for logging purposes.497countToCompile,498}499);500break;501}502}503}504505for (auto iter : map) {506auto &shaders = iter.first;507auto &entries = iter.second;508509// NOTICE_LOG(Log::G3D, "For this shader pair, we have %d pipelines to create", (int)entries.size());510511Task *task = new CreateMultiPipelinesTask(vulkan_, entries);512g_threadManager.EnqueueTask(task);513}514515if (exitAfterCompile) {516break;517}518519// Hold off just a bit before we check again, to allow bunches of pipelines to collect.520sleep_ms(1);521}522523std::unique_lock<std::mutex> lock(compileMutex_);524_assert_(compileQueue_.empty());525}526527void VulkanRenderManager::RenderThreadFunc() {528SetCurrentThreadName("VulkanRenderMan");529while (true) {530_dbg_assert_(useRenderThread_);531532// Pop a task of the queue and execute it.533VKRRenderThreadTask *task = nullptr;534{535std::unique_lock<std::mutex> lock(pushMutex_);536while (renderThreadQueue_.empty()) {537pushCondVar_.wait(lock);538}539task = renderThreadQueue_.front();540renderThreadQueue_.pop();541}542543// Oh, we got a task! We can now have pushMutex_ unlocked, allowing the host to544// push more work when it feels like it, and just start working.545if (task->runType == VKRRunType::EXIT) {546// Oh, host wanted out. Let's leave.547delete task;548// In this case, there should be no more tasks.549break;550}551552Run(*task);553delete task;554}555556// Wait for the device to be done with everything, before tearing stuff down.557// TODO: Do we really need this? It's probably a good idea, though.558vkDeviceWaitIdle(vulkan_->GetDevice());559VLOG("PULL: Quitting");560}561562void VulkanRenderManager::PresentWaitThreadFunc() {563SetCurrentThreadName("PresentWait");564565#if !PPSSPP_PLATFORM(IOS_APP_STORE)566_dbg_assert_(vkWaitForPresentKHR != nullptr);567568uint64_t waitedId = frameIdGen_;569while (runCompileThread_) {570const uint64_t timeout = 1000000000ULL; // 1 sec571if (VK_SUCCESS == vkWaitForPresentKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), waitedId, timeout)) {572frameTimeHistory_[waitedId].actualPresent = time_now_d();573frameTimeHistory_[waitedId].waitCount++;574waitedId++;575} else {576// We caught up somehow, which is a bad sign (we should have blocked, right?). Maybe we should break out of the loop?577sleep_ms(1);578frameTimeHistory_[waitedId].waitCount++;579}580_dbg_assert_(waitedId <= frameIdGen_);581}582#endif583584INFO_LOG(Log::G3D, "Leaving PresentWaitThreadFunc()");585}586587void VulkanRenderManager::PollPresentTiming() {588// For VK_GOOGLE_display_timing, we need to poll.589590// Poll for information about completed frames.591// NOTE: We seem to get the information pretty late! Like after 6 frames, which is quite weird.592// Tested on POCO F4.593// TODO: Getting validation errors that this should be called from the thread doing the presenting.594// Probably a fair point. For now, we turn it off.595if (measurePresentTime_ && vulkan_->Extensions().GOOGLE_display_timing) {596uint32_t count = 0;597vkGetPastPresentationTimingGOOGLE(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &count, nullptr);598if (count > 0) {599VkPastPresentationTimingGOOGLE *timings = new VkPastPresentationTimingGOOGLE[count];600vkGetPastPresentationTimingGOOGLE(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &count, timings);601for (uint32_t i = 0; i < count; i++) {602uint64_t presentId = timings[i].presentID;603frameTimeHistory_[presentId].actualPresent = from_time_raw(timings[i].actualPresentTime);604frameTimeHistory_[presentId].desiredPresentTime = from_time_raw(timings[i].desiredPresentTime);605frameTimeHistory_[presentId].earliestPresentTime = from_time_raw(timings[i].earliestPresentTime);606double presentMargin = from_time_raw_relative(timings[i].presentMargin);607frameTimeHistory_[presentId].presentMargin = presentMargin;608}609delete[] timings;610}611}612}613614void VulkanRenderManager::BeginFrame(bool enableProfiling, bool enableLogProfiler) {615double frameBeginTime = time_now_d()616VLOG("BeginFrame");617VkDevice device = vulkan_->GetDevice();618619int curFrame = vulkan_->GetCurFrame();620FrameData &frameData = frameData_[curFrame];621VLOG("PUSH: Fencing %d", curFrame);622623// Makes sure the submission from the previous time around has happened. Otherwise624// we are not allowed to wait from another thread here..625if (useRenderThread_) {626std::unique_lock<std::mutex> lock(frameData.fenceMutex);627while (!frameData.readyForFence) {628frameData.fenceCondVar.wait(lock);629}630frameData.readyForFence = false;631}632633// This must be the very first Vulkan call we do in a new frame.634// Makes sure the very last command buffer from the frame before the previous has been fully executed.635if (vkWaitForFences(device, 1, &frameData.fence, true, UINT64_MAX) == VK_ERROR_DEVICE_LOST) {636_assert_msg_(false, "Device lost in vkWaitForFences");637}638vkResetFences(device, 1, &frameData.fence);639640uint64_t frameId = frameIdGen_++;641642PollPresentTiming();643644ResetDescriptorLists(curFrame);645646int validBits = vulkan_->GetQueueFamilyProperties(vulkan_->GetGraphicsQueueFamilyIndex()).timestampValidBits;647648FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameId);649frameTimeData.frameId = frameId;650frameTimeData.frameBegin = frameBeginTime;651frameTimeData.afterFenceWait = time_now_d();652653// Can't set this until after the fence.654frameData.profile.enabled = enableProfiling;655frameData.profile.timestampsEnabled = enableProfiling && validBits > 0;656frameData.frameId = frameId;657658uint64_t queryResults[MAX_TIMESTAMP_QUERIES];659660if (enableProfiling) {661// Pull the profiling results from last time and produce a summary!662if (!frameData.profile.timestampDescriptions.empty() && frameData.profile.timestampsEnabled) {663int numQueries = (int)frameData.profile.timestampDescriptions.size();664VkResult res = vkGetQueryPoolResults(665vulkan_->GetDevice(),666frameData.profile.queryPool, 0, numQueries, sizeof(uint64_t) * numQueries, &queryResults[0], sizeof(uint64_t),667VK_QUERY_RESULT_64_BIT);668if (res == VK_SUCCESS) {669double timestampConversionFactor = (double)vulkan_->GetPhysicalDeviceProperties().properties.limits.timestampPeriod * (1.0 / 1000000.0);670uint64_t timestampDiffMask = validBits == 64 ? 0xFFFFFFFFFFFFFFFFULL : ((1ULL << validBits) - 1);671std::stringstream str;672673char line[256];674totalGPUTimeMs_.Update(((double)((queryResults[numQueries - 1] - queryResults[0]) & timestampDiffMask) * timestampConversionFactor));675totalGPUTimeMs_.Format(line, sizeof(line));676str << line;677renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);678renderCPUTimeMs_.Format(line, sizeof(line));679str << line;680descUpdateTimeMs_.Update(frameData.profile.descWriteTime * 1000.0);681descUpdateTimeMs_.Format(line, sizeof(line));682str << line;683snprintf(line, sizeof(line), "Descriptors written: %d (dedup: %d)\n", frameData.profile.descriptorsWritten, frameData.profile.descriptorsDeduped);684str << line;685snprintf(line, sizeof(line), "Resource deletions: %d\n", vulkan_->GetLastDeleteCount());686str << line;687for (int i = 0; i < numQueries - 1; i++) {688uint64_t diff = (queryResults[i + 1] - queryResults[i]) & timestampDiffMask;689double milliseconds = (double)diff * timestampConversionFactor;690691// Can't use SimpleStat for these very easily since these are dynamic per frame.692// Only the first one is static, the initCmd.693// Could try some hashtable tracking for the rest, later.694if (i == 0) {695initTimeMs_.Update(milliseconds);696initTimeMs_.Format(line, sizeof(line));697} else {698snprintf(line, sizeof(line), "%s: %0.3f ms\n", frameData.profile.timestampDescriptions[i + 1].c_str(), milliseconds);699}700str << line;701}702frameData.profile.profileSummary = str.str();703} else {704frameData.profile.profileSummary = "(error getting GPU profile - not ready?)";705}706} else {707std::stringstream str;708char line[256];709renderCPUTimeMs_.Update((frameData.profile.cpuEndTime - frameData.profile.cpuStartTime) * 1000.0);710renderCPUTimeMs_.Format(line, sizeof(line));711str << line;712descUpdateTimeMs_.Update(frameData.profile.descWriteTime * 1000.0);713descUpdateTimeMs_.Format(line, sizeof(line));714str << line;715snprintf(line, sizeof(line), "Descriptors written: %d\n", frameData.profile.descriptorsWritten);716str << line;717frameData.profile.profileSummary = str.str();718}719720#ifdef _DEBUG721std::string cmdString;722for (int i = 0; i < ARRAY_SIZE(frameData.profile.commandCounts); i++) {723if (frameData.profile.commandCounts[i] > 0) {724cmdString += StringFromFormat("%s: %d\n", VKRRenderCommandToString((VKRRenderCommand)i), frameData.profile.commandCounts[i]);725}726}727memset(frameData.profile.commandCounts, 0, sizeof(frameData.profile.commandCounts));728frameData.profile.profileSummary += cmdString;729#endif730}731732frameData.profile.descriptorsWritten = 0;733frameData.profile.descriptorsDeduped = 0;734735// Must be after the fence - this performs deletes.736VLOG("PUSH: BeginFrame %d", curFrame);737738insideFrame_ = true;739vulkan_->BeginFrame(enableLogProfiler ? GetInitCmd() : VK_NULL_HANDLE);740741frameData.profile.timestampDescriptions.clear();742if (frameData.profile.timestampsEnabled) {743// For various reasons, we need to always use an init cmd buffer in this case to perform the vkCmdResetQueryPool,744// unless we want to limit ourselves to only measure the main cmd buffer.745// Later versions of Vulkan have support for clearing queries on the CPU timeline, but we don't want to rely on that.746// Reserve the first two queries for initCmd.747frameData.profile.timestampDescriptions.push_back("initCmd Begin");748frameData.profile.timestampDescriptions.push_back("initCmd");749VkCommandBuffer initCmd = GetInitCmd();750}751}752753VkCommandBuffer VulkanRenderManager::GetInitCmd() {754int curFrame = vulkan_->GetCurFrame();755return frameData_[curFrame].GetInitCmd(vulkan_);756}757758void VulkanRenderManager::ReportBadStateForDraw() {759const char *cause1 = "";760char cause2[256];761cause2[0] = '\0';762if (!curRenderStep_) {763cause1 = "No current render step";764}765if (curRenderStep_ && curRenderStep_->stepType != VKRStepType::RENDER) {766cause1 = "Not a render step: ";767std::string str = VulkanQueueRunner::StepToString(vulkan_, *curRenderStep_);768truncate_cpy(cause2, str.c_str());769}770ERROR_LOG_REPORT_ONCE(baddraw, Log::G3D, "Can't draw: %s%s. Step count: %d", cause1, cause2, (int)steps_.size());771}772773VKRGraphicsPipeline *VulkanRenderManager::CreateGraphicsPipeline(VKRGraphicsPipelineDesc *desc, PipelineFlags pipelineFlags, uint32_t variantBitmask, VkSampleCountFlagBits sampleCount, bool cacheLoad, const char *tag) {774if (!desc->vertexShader || !desc->fragmentShader) {775ERROR_LOG(Log::G3D, "Can't create graphics pipeline with missing vs/ps: %p %p", desc->vertexShader, desc->fragmentShader);776return nullptr;777}778779VKRGraphicsPipeline *pipeline = new VKRGraphicsPipeline(pipelineFlags, tag);780pipeline->desc = desc;781pipeline->desc->AddRef();782if (curRenderStep_ && !cacheLoad) {783// The common case during gameplay.784pipelinesToCheck_.push_back(pipeline);785} else {786if (!variantBitmask) {787WARN_LOG(Log::G3D, "WARNING: Will not compile any variants of pipeline, not in renderpass and empty variantBitmask");788}789// Presumably we're in initialization, loading the shader cache.790// Look at variantBitmask to see what variants we should queue up.791RPKey key{792VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR,793VKRRenderPassStoreAction::STORE, VKRRenderPassStoreAction::DONT_CARE, VKRRenderPassStoreAction::DONT_CARE,794};795VKRRenderPass *compatibleRenderPass = queueRunner_.GetRenderPass(key);796std::unique_lock<std::mutex> lock(compileMutex_);797bool needsCompile = false;798for (size_t i = 0; i < (size_t)RenderPassType::TYPE_COUNT; i++) {799if (!(variantBitmask & (1 << i)))800continue;801RenderPassType rpType = (RenderPassType)i;802803// Sanity check - don't compile incompatible types (could be caused by corrupt caches, changes in data structures, etc).804if ((pipelineFlags & PipelineFlags::USES_DEPTH_STENCIL) && !RenderPassTypeHasDepth(rpType)) {805WARN_LOG(Log::G3D, "Not compiling pipeline that requires depth, for non depth renderpass type");806continue;807}808// Shouldn't hit this, these should have been filtered elsewhere. However, still a good check to do.809if (sampleCount == VK_SAMPLE_COUNT_1_BIT && RenderPassTypeHasMultisample(rpType)) {810WARN_LOG(Log::G3D, "Not compiling single sample pipeline for a multisampled render pass type");811continue;812}813814if (rpType == RenderPassType::BACKBUFFER) {815sampleCount = VK_SAMPLE_COUNT_1_BIT;816}817818pipeline->pipeline[i] = Promise<VkPipeline>::CreateEmpty();819compileQueue_.emplace_back(pipeline, compatibleRenderPass->Get(vulkan_, rpType, sampleCount), rpType, sampleCount);820needsCompile = true;821}822if (needsCompile)823compileCond_.notify_one();824}825return pipeline;826}827828void VulkanRenderManager::EndCurRenderStep() {829if (!curRenderStep_)830return;831832RPKey key{833curRenderStep_->render.colorLoad, curRenderStep_->render.depthLoad, curRenderStep_->render.stencilLoad,834curRenderStep_->render.colorStore, curRenderStep_->render.depthStore, curRenderStep_->render.stencilStore,835};836// Save the accumulated pipeline flags so we can use that to configure the render pass.837// We'll often be able to avoid loading/saving the depth/stencil buffer.838curRenderStep_->render.pipelineFlags = curPipelineFlags_;839bool depthStencil = (curPipelineFlags_ & PipelineFlags::USES_DEPTH_STENCIL) != 0;840RenderPassType rpType = depthStencil ? RenderPassType::HAS_DEPTH : RenderPassType::DEFAULT;841842if (curRenderStep_->render.framebuffer && (rpType & RenderPassType::HAS_DEPTH) && !curRenderStep_->render.framebuffer->HasDepth()) {843WARN_LOG(Log::G3D, "Trying to render with a depth-writing pipeline to a framebuffer without depth: %s", curRenderStep_->render.framebuffer->Tag());844rpType = RenderPassType::DEFAULT;845}846847if (!curRenderStep_->render.framebuffer) {848rpType = RenderPassType::BACKBUFFER;849} else {850// Framebuffers can be stereo, and if so, will control the render pass type to match.851// Pipelines can be mono and render fine to stereo etc, so not checking them here.852// Note that we don't support rendering to just one layer of a multilayer framebuffer!853if (curRenderStep_->render.framebuffer->numLayers > 1) {854rpType = (RenderPassType)(rpType | RenderPassType::MULTIVIEW);855}856857if (curRenderStep_->render.framebuffer->sampleCount != VK_SAMPLE_COUNT_1_BIT) {858rpType = (RenderPassType)(rpType | RenderPassType::MULTISAMPLE);859}860}861862VKRRenderPass *renderPass = queueRunner_.GetRenderPass(key);863curRenderStep_->render.renderPassType = rpType;864865VkSampleCountFlagBits sampleCount = curRenderStep_->render.framebuffer ? curRenderStep_->render.framebuffer->sampleCount : VK_SAMPLE_COUNT_1_BIT;866867compileMutex_.lock();868bool needsCompile = false;869for (VKRGraphicsPipeline *pipeline : pipelinesToCheck_) {870if (!pipeline) {871// Not good, but let's try not to crash.872continue;873}874if (!pipeline->pipeline[(size_t)rpType]) {875pipeline->pipeline[(size_t)rpType] = Promise<VkPipeline>::CreateEmpty();876_assert_(renderPass);877compileQueue_.push_back(CompileQueueEntry(pipeline, renderPass->Get(vulkan_, rpType, sampleCount), rpType, sampleCount));878needsCompile = true;879}880}881if (needsCompile)882compileCond_.notify_one();883compileMutex_.unlock();884pipelinesToCheck_.clear();885886// We don't do this optimization for very small targets, probably not worth it.887if (!curRenderArea_.Empty() && (curWidth_ > 32 && curHeight_ > 32)) {888curRenderStep_->render.renderArea = curRenderArea_.ToVkRect2D();889} else {890curRenderStep_->render.renderArea.offset = {};891curRenderStep_->render.renderArea.extent = { (uint32_t)curWidth_, (uint32_t)curHeight_ };892}893curRenderArea_.Reset();894895// We no longer have a current render step.896curRenderStep_ = nullptr;897curPipelineFlags_ = (PipelineFlags)0;898}899900void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRRenderPassLoadAction color, VKRRenderPassLoadAction depth, VKRRenderPassLoadAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {901_dbg_assert_(insideFrame_);902903// Eliminate dupes (bind of the framebuffer we already are rendering to), instantly convert to a clear if possible.904if (!steps_.empty() && steps_.back()->stepType == VKRStepType::RENDER && steps_.back()->render.framebuffer == fb) {905u32 clearMask = 0;906if (color == VKRRenderPassLoadAction::CLEAR) {907clearMask |= VK_IMAGE_ASPECT_COLOR_BIT;908}909if (depth == VKRRenderPassLoadAction::CLEAR) {910clearMask |= VK_IMAGE_ASPECT_DEPTH_BIT;911curPipelineFlags_ |= PipelineFlags::USES_DEPTH_STENCIL;912}913if (stencil == VKRRenderPassLoadAction::CLEAR) {914clearMask |= VK_IMAGE_ASPECT_STENCIL_BIT;915curPipelineFlags_ |= PipelineFlags::USES_DEPTH_STENCIL;916}917918// If we need a clear and the previous step has commands already, it's best to just add a clear and keep going.919// If there's no clear needed, let's also do that.920//921// However, if we do need a clear and there are no commands in the previous pass,922// we want the queuerunner to have the opportunity to merge, so we'll go ahead and make a new renderpass.923if (clearMask == 0 || !steps_.back()->commands.empty()) {924curRenderStep_ = steps_.back();925curStepHasViewport_ = false;926curStepHasScissor_ = false;927for (const auto &c : steps_.back()->commands) {928if (c.cmd == VKRRenderCommand::VIEWPORT) {929curStepHasViewport_ = true;930} else if (c.cmd == VKRRenderCommand::SCISSOR) {931curStepHasScissor_ = true;932}933}934if (clearMask != 0) {935VkRenderData data{ VKRRenderCommand::CLEAR };936data.clear.clearColor = clearColor;937data.clear.clearZ = clearDepth;938data.clear.clearStencil = clearStencil;939data.clear.clearMask = clearMask;940curRenderStep_->commands.push_back(data);941curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);942}943return;944}945}946947#ifdef _DEBUG948SanityCheckPassesOnAdd();949#endif950951// More redundant bind elimination.952if (curRenderStep_) {953if (curRenderStep_->commands.empty()) {954if (curRenderStep_->render.colorLoad != VKRRenderPassLoadAction::CLEAR && curRenderStep_->render.depthLoad != VKRRenderPassLoadAction::CLEAR && curRenderStep_->render.stencilLoad != VKRRenderPassLoadAction::CLEAR) {955// Can trivially kill the last empty render step.956_dbg_assert_(steps_.back() == curRenderStep_);957delete steps_.back();958steps_.pop_back();959curRenderStep_ = nullptr;960}961VLOG("Empty render step. Usually happens after uploading pixels..");962}963964EndCurRenderStep();965}966967// Sanity check that we don't have binds to the backbuffer before binds to other buffers. It must always be bound last.968if (steps_.size() >= 1 && steps_.back()->stepType == VKRStepType::RENDER && steps_.back()->render.framebuffer == nullptr && fb != nullptr) {969_dbg_assert_(false);970}971972// Older Mali drivers have issues with depth and stencil don't match load/clear/etc.973// TODO: Determine which versions and do this only where necessary.974u32 lateClearMask = 0;975if (depth != stencil && vulkan_->GetPhysicalDeviceProperties().properties.vendorID == VULKAN_VENDOR_ARM) {976if (stencil == VKRRenderPassLoadAction::DONT_CARE) {977stencil = depth;978} else if (depth == VKRRenderPassLoadAction::DONT_CARE) {979depth = stencil;980} else if (stencil == VKRRenderPassLoadAction::CLEAR) {981depth = stencil;982lateClearMask |= VK_IMAGE_ASPECT_STENCIL_BIT;983} else if (depth == VKRRenderPassLoadAction::CLEAR) {984stencil = depth;985lateClearMask |= VK_IMAGE_ASPECT_DEPTH_BIT;986}987}988989VKRStep *step = new VKRStep{ VKRStepType::RENDER };990step->render.framebuffer = fb;991step->render.colorLoad = color;992step->render.depthLoad = depth;993step->render.stencilLoad = stencil;994step->render.colorStore = VKRRenderPassStoreAction::STORE;995step->render.depthStore = VKRRenderPassStoreAction::STORE;996step->render.stencilStore = VKRRenderPassStoreAction::STORE;997step->render.clearColor = clearColor;998step->render.clearDepth = clearDepth;999step->render.clearStencil = clearStencil;1000step->render.numDraws = 0;1001step->render.numReads = 0;1002step->render.finalColorLayout = !fb ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED;1003step->render.finalDepthStencilLayout = !fb ? VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL : VK_IMAGE_LAYOUT_UNDEFINED;1004// pipelineFlags, renderArea and renderPassType get filled in when we finalize the step. Do not read from them before that.1005step->tag = tag;1006steps_.push_back(step);10071008if (fb) {1009// If there's a KEEP, we naturally read from the framebuffer.1010if (color == VKRRenderPassLoadAction::KEEP || depth == VKRRenderPassLoadAction::KEEP || stencil == VKRRenderPassLoadAction::KEEP) {1011step->dependencies.insert(fb);1012}1013}10141015curRenderStep_ = step;1016curStepHasViewport_ = false;1017curStepHasScissor_ = false;1018if (fb) {1019curWidthRaw_ = fb->width;1020curHeightRaw_ = fb->height;1021curWidth_ = fb->width;1022curHeight_ = fb->height;1023} else {1024curWidthRaw_ = vulkan_->GetBackbufferWidth();1025curHeightRaw_ = vulkan_->GetBackbufferHeight();1026if (g_display.rotation == DisplayRotation::ROTATE_90 ||1027g_display.rotation == DisplayRotation::ROTATE_270) {1028curWidth_ = curHeightRaw_;1029curHeight_ = curWidthRaw_;1030} else {1031curWidth_ = curWidthRaw_;1032curHeight_ = curHeightRaw_;1033}1034}10351036if (color == VKRRenderPassLoadAction::CLEAR || depth == VKRRenderPassLoadAction::CLEAR || stencil == VKRRenderPassLoadAction::CLEAR) {1037curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);1038}10391040// See above - we add a clear afterward if only one side for depth/stencil CLEAR/KEEP.1041if (lateClearMask != 0) {1042VkRenderData data{ VKRRenderCommand::CLEAR };1043data.clear.clearColor = clearColor;1044data.clear.clearZ = clearDepth;1045data.clear.clearStencil = clearStencil;1046data.clear.clearMask = lateClearMask;1047curRenderStep_->commands.push_back(data);1048}10491050if (invalidationCallback_) {1051invalidationCallback_(InvalidationCallbackFlags::RENDER_PASS_STATE);1052}1053}10541055bool VulkanRenderManager::CopyFramebufferToMemory(VKRFramebuffer *src, VkImageAspectFlags aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, Draw::ReadbackMode mode, const char *tag) {1056_dbg_assert_(insideFrame_);10571058for (int i = (int)steps_.size() - 1; i >= 0; i--) {1059if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {1060steps_[i]->render.numReads++;1061break;1062}1063}10641065EndCurRenderStep();10661067VKRStep *step = new VKRStep{ VKRStepType::READBACK };1068step->readback.aspectMask = aspectBits;1069step->readback.src = src;1070step->readback.srcRect.offset = { x, y };1071step->readback.srcRect.extent = { (uint32_t)w, (uint32_t)h };1072step->readback.delayed = mode == Draw::ReadbackMode::OLD_DATA_OK;1073step->dependencies.insert(src);1074step->tag = tag;1075steps_.push_back(step);10761077if (mode == Draw::ReadbackMode::BLOCK) {1078FlushSync();1079}10801081Draw::DataFormat srcFormat = Draw::DataFormat::UNDEFINED;1082if (aspectBits & VK_IMAGE_ASPECT_COLOR_BIT) {1083if (src) {1084switch (src->color.format) {1085case VK_FORMAT_R8G8B8A8_UNORM: srcFormat = Draw::DataFormat::R8G8B8A8_UNORM; break;1086default: _assert_(false);1087}1088} else {1089// Backbuffer.1090if (!(vulkan_->GetSurfaceCapabilities().supportedUsageFlags & VK_IMAGE_USAGE_TRANSFER_SRC_BIT)) {1091ERROR_LOG(Log::G3D, "Copying from backbuffer not supported, can't take screenshots");1092return false;1093}1094switch (vulkan_->GetSwapchainFormat()) {1095case VK_FORMAT_B8G8R8A8_UNORM: srcFormat = Draw::DataFormat::B8G8R8A8_UNORM; break;1096case VK_FORMAT_R8G8B8A8_UNORM: srcFormat = Draw::DataFormat::R8G8B8A8_UNORM; break;1097// NOTE: If you add supported formats here, make sure to also support them in VulkanQueueRunner::CopyReadbackBuffer.1098default:1099ERROR_LOG(Log::G3D, "Unsupported backbuffer format for screenshots");1100return false;1101}1102}1103} else if (aspectBits & VK_IMAGE_ASPECT_STENCIL_BIT) {1104// Copies from stencil are always S8.1105srcFormat = Draw::DataFormat::S8;1106} else if (aspectBits & VK_IMAGE_ASPECT_DEPTH_BIT) {1107switch (src->depth.format) {1108case VK_FORMAT_D24_UNORM_S8_UINT: srcFormat = Draw::DataFormat::D24_S8; break;1109case VK_FORMAT_D32_SFLOAT_S8_UINT: srcFormat = Draw::DataFormat::D32F; break;1110case VK_FORMAT_D16_UNORM_S8_UINT: srcFormat = Draw::DataFormat::D16; break;1111default: _assert_(false);1112}1113} else {1114_assert_(false);1115}11161117// Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM.1118return queueRunner_.CopyReadbackBuffer(frameData_[vulkan_->GetCurFrame()],1119mode == Draw::ReadbackMode::OLD_DATA_OK ? src : nullptr, w, h, srcFormat, destFormat, pixelStride, pixels);1120}11211122void VulkanRenderManager::CopyImageToMemorySync(VkImage image, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {1123_dbg_assert_(insideFrame_);11241125EndCurRenderStep();11261127VKRStep *step = new VKRStep{ VKRStepType::READBACK_IMAGE };1128step->readback_image.image = image;1129step->readback_image.srcRect.offset = { x, y };1130step->readback_image.srcRect.extent = { (uint32_t)w, (uint32_t)h };1131step->readback_image.mipLevel = mipLevel;1132step->tag = tag;1133steps_.push_back(step);11341135FlushSync();11361137// Need to call this after FlushSync so the pixels are guaranteed to be ready in CPU-accessible VRAM.1138queueRunner_.CopyReadbackBuffer(frameData_[vulkan_->GetCurFrame()], nullptr, w, h, destFormat, destFormat, pixelStride, pixels);1139}11401141static void RemoveDrawCommands(FastVec<VkRenderData> *cmds) {1142// Here we remove any DRAW type commands when we hit a CLEAR.1143for (auto &c : *cmds) {1144if (c.cmd == VKRRenderCommand::DRAW || c.cmd == VKRRenderCommand::DRAW_INDEXED) {1145c.cmd = VKRRenderCommand::REMOVED;1146}1147}1148}11491150static void CleanupRenderCommands(FastVec<VkRenderData> *cmds) {1151size_t lastCommand[(int)VKRRenderCommand::NUM_RENDER_COMMANDS];1152memset(lastCommand, -1, sizeof(lastCommand));11531154// Find any duplicate state commands (likely from RemoveDrawCommands.)1155for (size_t i = 0; i < cmds->size(); ++i) {1156auto &c = cmds->at(i);1157auto &lastOfCmd = lastCommand[(uint8_t)c.cmd];11581159switch (c.cmd) {1160case VKRRenderCommand::REMOVED:1161continue;11621163case VKRRenderCommand::VIEWPORT:1164case VKRRenderCommand::SCISSOR:1165case VKRRenderCommand::BLEND:1166case VKRRenderCommand::STENCIL:1167if (lastOfCmd != -1) {1168cmds->at(lastOfCmd).cmd = VKRRenderCommand::REMOVED;1169}1170break;11711172case VKRRenderCommand::PUSH_CONSTANTS:1173// TODO: For now, we have to keep this one (it has an offset.) Still update lastCommand.1174break;11751176case VKRRenderCommand::CLEAR:1177// Ignore, doesn't participate in state.1178continue;11791180case VKRRenderCommand::DRAW_INDEXED:1181case VKRRenderCommand::DRAW:1182default:1183// Boundary - must keep state before this.1184memset(lastCommand, -1, sizeof(lastCommand));1185continue;1186}11871188lastOfCmd = i;1189}11901191// At this point, anything in lastCommand can be cleaned up too.1192// Note that it's safe to remove the last unused PUSH_CONSTANTS here.1193for (size_t i = 0; i < ARRAY_SIZE(lastCommand); ++i) {1194auto &lastOfCmd = lastCommand[i];1195if (lastOfCmd != -1) {1196cmds->at(lastOfCmd).cmd = VKRRenderCommand::REMOVED;1197}1198}1199}12001201void VulkanRenderManager::Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask) {1202_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER);1203if (!clearMask)1204return;12051206// If this is the first drawing command or clears everything, merge it into the pass.1207int allAspects = VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;1208if (curRenderStep_->render.numDraws == 0 || clearMask == allAspects) {1209curRenderStep_->render.clearColor = clearColor;1210curRenderStep_->render.clearDepth = clearZ;1211curRenderStep_->render.clearStencil = clearStencil;1212curRenderStep_->render.colorLoad = (clearMask & VK_IMAGE_ASPECT_COLOR_BIT) ? VKRRenderPassLoadAction::CLEAR : VKRRenderPassLoadAction::KEEP;1213curRenderStep_->render.depthLoad = (clearMask & VK_IMAGE_ASPECT_DEPTH_BIT) ? VKRRenderPassLoadAction::CLEAR : VKRRenderPassLoadAction::KEEP;1214curRenderStep_->render.stencilLoad = (clearMask & VK_IMAGE_ASPECT_STENCIL_BIT) ? VKRRenderPassLoadAction::CLEAR : VKRRenderPassLoadAction::KEEP;12151216if (clearMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {1217if (curRenderStep_->render.framebuffer && !curRenderStep_->render.framebuffer->HasDepth()) {1218WARN_LOG(Log::G3D, "Trying to clear depth/stencil on a non-depth framebuffer: %s", curRenderStep_->render.framebuffer->Tag());1219} else {1220curPipelineFlags_ |= PipelineFlags::USES_DEPTH_STENCIL;1221}1222}12231224// In case there were commands already.1225curRenderStep_->render.numDraws = 0;1226RemoveDrawCommands(&curRenderStep_->commands);1227} else {1228VkRenderData data{ VKRRenderCommand::CLEAR };1229data.clear.clearColor = clearColor;1230data.clear.clearZ = clearZ;1231data.clear.clearStencil = clearStencil;1232data.clear.clearMask = clearMask;1233curRenderStep_->commands.push_back(data);1234}12351236curRenderArea_.SetRect(0, 0, curWidth_, curHeight_);1237}12381239void VulkanRenderManager::CopyFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, VKRFramebuffer *dst, VkOffset2D dstPos, VkImageAspectFlags aspectMask, const char *tag) {1240#ifdef _DEBUG1241SanityCheckPassesOnAdd();1242#endif12431244_dbg_assert_msg_(srcRect.offset.x >= 0, "srcrect offset x (%d) < 0", srcRect.offset.x);1245_dbg_assert_msg_(srcRect.offset.y >= 0, "srcrect offset y (%d) < 0", srcRect.offset.y);1246_dbg_assert_msg_(srcRect.offset.x + srcRect.extent.width <= (uint32_t)src->width, "srcrect offset x (%d) + extent (%d) > width (%d)", srcRect.offset.x, srcRect.extent.width, (uint32_t)src->width);1247_dbg_assert_msg_(srcRect.offset.y + srcRect.extent.height <= (uint32_t)src->height, "srcrect offset y (%d) + extent (%d) > height (%d)", srcRect.offset.y, srcRect.extent.height, (uint32_t)src->height);12481249_dbg_assert_msg_(srcRect.extent.width > 0, "copy srcwidth == 0");1250_dbg_assert_msg_(srcRect.extent.height > 0, "copy srcheight == 0");12511252_dbg_assert_msg_(dstPos.x >= 0, "dstPos offset x (%d) < 0", dstPos.x);1253_dbg_assert_msg_(dstPos.y >= 0, "dstPos offset y (%d) < 0", dstPos.y);1254_dbg_assert_msg_(dstPos.x + srcRect.extent.width <= (uint32_t)dst->width, "dstPos + extent x > width");1255_dbg_assert_msg_(dstPos.y + srcRect.extent.height <= (uint32_t)dst->height, "dstPos + extent y > height");12561257for (int i = (int)steps_.size() - 1; i >= 0; i--) {1258if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {1259if (aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {1260if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1261steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;1262}1263}1264if (aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {1265if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1266steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;1267}1268}1269steps_[i]->render.numReads++;1270break;1271}1272}1273for (int i = (int)steps_.size() - 1; i >= 0; i--) {1274if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == dst) {1275if (aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {1276if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1277steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;1278}1279}1280if (aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {1281if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1282steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;1283}1284}1285break;1286}1287}12881289EndCurRenderStep();12901291VKRStep *step = new VKRStep{ VKRStepType::COPY };12921293step->copy.aspectMask = aspectMask;1294step->copy.src = src;1295step->copy.srcRect = srcRect;1296step->copy.dst = dst;1297step->copy.dstPos = dstPos;1298step->dependencies.insert(src);1299step->tag = tag;1300bool fillsDst = dst && srcRect.offset.x == 0 && srcRect.offset.y == 0 && srcRect.extent.width == dst->width && srcRect.extent.height == dst->height;1301if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)1302step->dependencies.insert(dst);13031304steps_.push_back(step);1305}13061307void VulkanRenderManager::BlitFramebuffer(VKRFramebuffer *src, VkRect2D srcRect, VKRFramebuffer *dst, VkRect2D dstRect, VkImageAspectFlags aspectMask, VkFilter filter, const char *tag) {1308#ifdef _DEBUG1309SanityCheckPassesOnAdd();1310#endif13111312_dbg_assert_msg_(srcRect.offset.x >= 0, "srcrect offset x (%d) < 0", srcRect.offset.x);1313_dbg_assert_msg_(srcRect.offset.y >= 0, "srcrect offset y (%d) < 0", srcRect.offset.y);1314_dbg_assert_msg_(srcRect.offset.x + srcRect.extent.width <= (uint32_t)src->width, "srcrect offset x (%d) + extent (%d) > width (%d)", srcRect.offset.x, srcRect.extent.width, (uint32_t)src->width);1315_dbg_assert_msg_(srcRect.offset.y + srcRect.extent.height <= (uint32_t)src->height, "srcrect offset y (%d) + extent (%d) > height (%d)", srcRect.offset.y, srcRect.extent.height, (uint32_t)src->height);13161317_dbg_assert_msg_(srcRect.extent.width > 0, "blit srcwidth == 0");1318_dbg_assert_msg_(srcRect.extent.height > 0, "blit srcheight == 0");13191320_dbg_assert_msg_(dstRect.offset.x >= 0, "dstrect offset x < 0");1321_dbg_assert_msg_(dstRect.offset.y >= 0, "dstrect offset y < 0");1322_dbg_assert_msg_(dstRect.offset.x + dstRect.extent.width <= (uint32_t)dst->width, "dstrect offset x + extent > width");1323_dbg_assert_msg_(dstRect.offset.y + dstRect.extent.height <= (uint32_t)dst->height, "dstrect offset y + extent > height");13241325_dbg_assert_msg_(dstRect.extent.width > 0, "blit dstwidth == 0");1326_dbg_assert_msg_(dstRect.extent.height > 0, "blit dstheight == 0");13271328// TODO: Seem to be missing final layouts here like in Copy...13291330for (int i = (int)steps_.size() - 1; i >= 0; i--) {1331if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == src) {1332steps_[i]->render.numReads++;1333break;1334}1335}13361337EndCurRenderStep();13381339// Sanity check. Added an assert to try to gather more info.1340// Got this assert in NPJH50443 FINAL FANTASY TYPE-0, but pretty rare. Moving back to debug assert.1341if (aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {1342_dbg_assert_msg_(src->depth.image != VK_NULL_HANDLE, "%s", src->Tag());1343_dbg_assert_msg_(dst->depth.image != VK_NULL_HANDLE, "%s", dst->Tag());13441345if (!src->depth.image || !dst->depth.image) {1346// Something has gone wrong, but let's try to stumble along.1347return;1348}1349}13501351VKRStep *step = new VKRStep{ VKRStepType::BLIT };1352step->blit.aspectMask = aspectMask;1353step->blit.src = src;1354step->blit.srcRect = srcRect;1355step->blit.dst = dst;1356step->blit.dstRect = dstRect;1357step->blit.filter = filter;1358step->dependencies.insert(src);1359step->tag = tag;1360bool fillsDst = dst && dstRect.offset.x == 0 && dstRect.offset.y == 0 && dstRect.extent.width == dst->width && dstRect.extent.height == dst->height;1361if (!fillsDst)1362step->dependencies.insert(dst);13631364steps_.push_back(step);1365}13661367VkImageView VulkanRenderManager::BindFramebufferAsTexture(VKRFramebuffer *fb, int binding, VkImageAspectFlags aspectBit, int layer) {1368_dbg_assert_(curRenderStep_ != nullptr);13691370// We don't support texturing from stencil, neither do we support texturing from depth|stencil together (nonsensical).1371_dbg_assert_(aspectBit == VK_IMAGE_ASPECT_COLOR_BIT || aspectBit == VK_IMAGE_ASPECT_DEPTH_BIT);13721373// Mark the dependency, check for required transitions, and return the image.13741375// Optimization: If possible, use final*Layout to put the texture into the correct layout "early".1376for (int i = (int)steps_.size() - 1; i >= 0; i--) {1377if (steps_[i]->stepType == VKRStepType::RENDER && steps_[i]->render.framebuffer == fb) {1378if (aspectBit == VK_IMAGE_ASPECT_COLOR_BIT) {1379// If this framebuffer was rendered to earlier in this frame, make sure to pre-transition it to the correct layout.1380if (steps_[i]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1381steps_[i]->render.finalColorLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;1382}1383// If we find some other layout, a copy after this is likely involved. It's fine though,1384// we'll just transition it right as we need it and lose a tiny optimization.1385} else if (aspectBit == VK_IMAGE_ASPECT_DEPTH_BIT) {1386// If this framebuffer was rendered to earlier in this frame, make sure to pre-transition it to the correct layout.1387if (steps_[i]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {1388steps_[i]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;1389}1390} // We don't (yet?) support texturing from stencil images.1391steps_[i]->render.numReads++;1392break;1393}1394}13951396// Track dependencies fully.1397curRenderStep_->dependencies.insert(fb);13981399// Add this pretransition unless we already have it.1400TransitionRequest rq{ fb, aspectBit, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL };1401curRenderStep_->preTransitions.insert(rq); // Note that insert avoids inserting duplicates.14021403if (layer == -1) {1404return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.texAllLayersView : fb->depth.texAllLayersView;1405} else {1406return aspectBit == VK_IMAGE_ASPECT_COLOR_BIT ? fb->color.texLayerViews[layer] : fb->depth.texLayerViews[layer];1407}1408}14091410// Called on main thread.1411// Sends the collected commands to the render thread. Submit-latency should be1412// measured from here, probably.1413void VulkanRenderManager::Finish() {1414EndCurRenderStep();14151416// Let's do just a bit of cleanup on render commands now.1417// TODO: Should look into removing this.1418for (auto &step : steps_) {1419if (step->stepType == VKRStepType::RENDER) {1420CleanupRenderCommands(&step->commands);1421}1422}14231424int curFrame = vulkan_->GetCurFrame();1425FrameData &frameData = frameData_[curFrame];14261427if (!postInitBarrier_.empty()) {1428VkCommandBuffer buffer = frameData.GetInitCmd(vulkan_);1429postInitBarrier_.Flush(buffer);1430}14311432VLOG("PUSH: Frame[%d]", curFrame);1433VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SUBMIT);1434task->frame = curFrame;1435if (useRenderThread_) {1436std::unique_lock<std::mutex> lock(pushMutex_);1437renderThreadQueue_.push(task);1438renderThreadQueue_.back()->steps = std::move(steps_);1439pushCondVar_.notify_one();1440} else {1441// Just do it!1442task->steps = std::move(steps_);1443Run(*task);1444delete task;1445}14461447steps_.clear();1448}14491450void VulkanRenderManager::Present() {1451int curFrame = vulkan_->GetCurFrame();14521453VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::PRESENT);1454task->frame = curFrame;1455if (useRenderThread_) {1456std::unique_lock<std::mutex> lock(pushMutex_);1457renderThreadQueue_.push(task);1458pushCondVar_.notify_one();1459} else {1460// Just do it!1461Run(*task);1462delete task;1463}14641465vulkan_->EndFrame();1466insideFrame_ = false;1467}14681469// Called on the render thread.1470//1471// Can be called again after a VKRRunType::SYNC on the same frame.1472void VulkanRenderManager::Run(VKRRenderThreadTask &task) {1473FrameData &frameData = frameData_[task.frame];14741475if (task.runType == VKRRunType::PRESENT) {1476if (!frameData.skipSwap) {1477VkResult res = frameData.QueuePresent(vulkan_, frameDataShared_);1478frameTimeHistory_[frameData.frameId].queuePresent = time_now_d();1479if (res == VK_ERROR_OUT_OF_DATE_KHR) {1480// We clearly didn't get this in vkAcquireNextImageKHR because of the skipSwap check above.1481// Do the increment.1482outOfDateFrames_++;1483} else if (res == VK_SUBOPTIMAL_KHR) {1484outOfDateFrames_++;1485} else if (res != VK_SUCCESS) {1486_assert_msg_(false, "vkQueuePresentKHR failed! result=%s", VulkanResultToString(res));1487} else {1488// Success1489outOfDateFrames_ = 0;1490}1491} else {1492// We only get here if vkAcquireNextImage returned VK_ERROR_OUT_OF_DATE.1493outOfDateFrames_++;1494frameData.skipSwap = false;1495}1496return;1497}14981499_dbg_assert_(!frameData.hasPresentCommands);15001501if (!frameTimeHistory_[frameData.frameId].firstSubmit) {1502frameTimeHistory_[frameData.frameId].firstSubmit = time_now_d();1503}1504frameData.Submit(vulkan_, FrameSubmitType::Pending, frameDataShared_);15051506// Flush descriptors.1507double descStart = time_now_d();1508FlushDescriptors(task.frame);1509frameData.profile.descWriteTime = time_now_d() - descStart;15101511if (!frameData.hasMainCommands) {1512// Effectively resets both main and present command buffers, since they both live in this pool.1513// We always record main commands first, so we don't need to reset the present command buffer separately.1514vkResetCommandPool(vulkan_->GetDevice(), frameData.cmdPoolMain, 0);15151516VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };1517begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;1518VkResult res = vkBeginCommandBuffer(frameData.mainCmd, &begin);1519frameData.hasMainCommands = true;1520_assert_msg_(res == VK_SUCCESS, "vkBeginCommandBuffer failed! result=%s", VulkanResultToString(res));1521}15221523queueRunner_.PreprocessSteps(task.steps);1524// Likely during shutdown, happens in headless.1525if (task.steps.empty() && !frameData.hasAcquired)1526frameData.skipSwap = true;1527//queueRunner_.LogSteps(stepsOnThread, false);1528if (IsVREnabled()) {1529int passes = GetVRPassesCount();1530for (int i = 0; i < passes; i++) {1531PreVRFrameRender(i);1532queueRunner_.RunSteps(task.steps, task.frame, frameData, frameDataShared_, i < passes - 1);1533PostVRFrameRender();1534}1535} else {1536queueRunner_.RunSteps(task.steps, task.frame, frameData, frameDataShared_);1537}15381539switch (task.runType) {1540case VKRRunType::SUBMIT:1541frameData.Submit(vulkan_, FrameSubmitType::FinishFrame, frameDataShared_);1542break;15431544case VKRRunType::SYNC:1545// The submit will trigger the readbackFence, and also do the wait for it.1546frameData.Submit(vulkan_, FrameSubmitType::Sync, frameDataShared_);15471548if (useRenderThread_) {1549std::unique_lock<std::mutex> lock(syncMutex_);1550syncCondVar_.notify_one();1551}15521553// At this point the GPU is idle, and we can resume filling the command buffers for the1554// current frame since and thus all previously enqueued command buffers have been1555// processed. No need to switch to the next frame number, would just be confusing.1556break;15571558default:1559_dbg_assert_(false);1560}15611562VLOG("PULL: Finished running frame %d", task.frame);1563}15641565// Called from main thread.1566void VulkanRenderManager::FlushSync() {1567_dbg_assert_(!curRenderStep_);15681569if (invalidationCallback_) {1570invalidationCallback_(InvalidationCallbackFlags::COMMAND_BUFFER_STATE);1571}15721573int curFrame = vulkan_->GetCurFrame();1574FrameData &frameData = frameData_[curFrame];15751576if (!postInitBarrier_.empty()) {1577VkCommandBuffer buffer = frameData.GetInitCmd(vulkan_);1578postInitBarrier_.Flush(buffer);1579}15801581if (useRenderThread_) {1582{1583VLOG("PUSH: Frame[%d]", curFrame);1584VKRRenderThreadTask *task = new VKRRenderThreadTask(VKRRunType::SYNC);1585task->frame = curFrame;1586{1587std::unique_lock<std::mutex> lock(pushMutex_);1588renderThreadQueue_.push(task);1589renderThreadQueue_.back()->steps = std::move(steps_);1590pushCondVar_.notify_one();1591}1592steps_.clear();1593}15941595{1596std::unique_lock<std::mutex> lock(syncMutex_);1597// Wait for the flush to be hit, since we're syncing.1598while (!frameData.syncDone) {1599VLOG("PUSH: Waiting for frame[%d].syncDone = 1 (sync)", curFrame);1600syncCondVar_.wait(lock);1601}1602frameData.syncDone = false;1603}1604} else {1605VKRRenderThreadTask task(VKRRunType::SYNC);1606task.frame = curFrame;1607task.steps = std::move(steps_);1608Run(task);1609steps_.clear();1610}1611}16121613void VulkanRenderManager::ResetStats() {1614initTimeMs_.Reset();1615totalGPUTimeMs_.Reset();1616renderCPUTimeMs_.Reset();1617}16181619VKRPipelineLayout *VulkanRenderManager::CreatePipelineLayout(BindingType *bindingTypes, size_t bindingTypesCount, bool geoShadersEnabled, const char *tag) {1620VKRPipelineLayout *layout = new VKRPipelineLayout();1621layout->SetTag(tag);1622layout->bindingTypesCount = (uint32_t)bindingTypesCount;16231624_dbg_assert_(bindingTypesCount <= ARRAY_SIZE(layout->bindingTypes));1625memcpy(layout->bindingTypes, bindingTypes, sizeof(BindingType) * bindingTypesCount);16261627VkDescriptorSetLayoutBinding bindings[VKRPipelineLayout::MAX_DESC_SET_BINDINGS];1628for (int i = 0; i < bindingTypesCount; i++) {1629bindings[i].binding = i;1630bindings[i].descriptorCount = 1;1631bindings[i].pImmutableSamplers = nullptr;16321633switch (bindingTypes[i]) {1634case BindingType::COMBINED_IMAGE_SAMPLER:1635bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;1636bindings[i].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;1637break;1638case BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX:1639bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;1640bindings[i].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;1641break;1642case BindingType::UNIFORM_BUFFER_DYNAMIC_ALL:1643bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;1644bindings[i].stageFlags = VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT;1645if (geoShadersEnabled) {1646bindings[i].stageFlags |= VK_SHADER_STAGE_GEOMETRY_BIT;1647}1648break;1649case BindingType::STORAGE_BUFFER_VERTEX:1650bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;1651bindings[i].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;1652break;1653case BindingType::STORAGE_BUFFER_COMPUTE:1654bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;1655bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;1656break;1657case BindingType::STORAGE_IMAGE_COMPUTE:1658bindings[i].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;1659bindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;1660break;1661default:1662_dbg_assert_(false);1663break;1664}1665}16661667VkDescriptorSetLayoutCreateInfo dsl = { VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO };1668dsl.bindingCount = (uint32_t)bindingTypesCount;1669dsl.pBindings = bindings;1670VkResult res = vkCreateDescriptorSetLayout(vulkan_->GetDevice(), &dsl, nullptr, &layout->descriptorSetLayout);1671_assert_(VK_SUCCESS == res && layout->descriptorSetLayout);16721673VkPipelineLayoutCreateInfo pl = { VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO };1674VkDescriptorSetLayout setLayouts[1] = { layout->descriptorSetLayout };1675pl.setLayoutCount = ARRAY_SIZE(setLayouts);1676pl.pSetLayouts = setLayouts;1677res = vkCreatePipelineLayout(vulkan_->GetDevice(), &pl, nullptr, &layout->pipelineLayout);1678_assert_(VK_SUCCESS == res && layout->pipelineLayout);16791680vulkan_->SetDebugName(layout->descriptorSetLayout, VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, tag);1681vulkan_->SetDebugName(layout->pipelineLayout, VK_OBJECT_TYPE_PIPELINE_LAYOUT, tag);16821683for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {1684// Some games go beyond 1024 and end up having to resize like GTA, but most stay below so we start there.1685layout->frameData[i].pool.Create(vulkan_, bindingTypes, (uint32_t)bindingTypesCount, 1024);1686}16871688pipelineLayouts_.push_back(layout);1689return layout;1690}16911692void VulkanRenderManager::DestroyPipelineLayout(VKRPipelineLayout *layout) {1693for (auto iter = pipelineLayouts_.begin(); iter != pipelineLayouts_.end(); iter++) {1694if (*iter == layout) {1695pipelineLayouts_.erase(iter);1696break;1697}1698}1699vulkan_->Delete().QueueCallback([](VulkanContext *vulkan, void *userdata) {1700VKRPipelineLayout *layout = (VKRPipelineLayout *)userdata;1701for (int i = 0; i < VulkanContext::MAX_INFLIGHT_FRAMES; i++) {1702layout->frameData[i].pool.DestroyImmediately();1703}1704vkDestroyPipelineLayout(vulkan->GetDevice(), layout->pipelineLayout, nullptr);1705vkDestroyDescriptorSetLayout(vulkan->GetDevice(), layout->descriptorSetLayout, nullptr);17061707delete layout;1708}, layout);1709}17101711void VulkanRenderManager::FlushDescriptors(int frame) {1712for (auto iter : pipelineLayouts_) {1713iter->FlushDescSets(vulkan_, frame, &frameData_[frame].profile);1714}1715}17161717void VulkanRenderManager::ResetDescriptorLists(int frame) {1718for (auto iter : pipelineLayouts_) {1719VKRPipelineLayout::FrameData &data = iter->frameData[frame];17201721data.flushedDescriptors_ = 0;1722data.descSets_.clear();1723data.descData_.clear();1724}1725}17261727VKRPipelineLayout::~VKRPipelineLayout() {1728_assert_(frameData[0].pool.IsDestroyed());1729}17301731void VKRPipelineLayout::FlushDescSets(VulkanContext *vulkan, int frame, QueueProfileContext *profile) {1732_dbg_assert_(frame < VulkanContext::MAX_INFLIGHT_FRAMES);17331734FrameData &data = frameData[frame];17351736VulkanDescSetPool &pool = data.pool;1737FastVec<PackedDescriptor> &descData = data.descData_;1738FastVec<PendingDescSet> &descSets = data.descSets_;17391740pool.Reset();17411742VkDescriptorSet setCache[8];1743VkDescriptorSetLayout layoutsForAlloc[ARRAY_SIZE(setCache)];1744for (int i = 0; i < ARRAY_SIZE(setCache); i++) {1745layoutsForAlloc[i] = descriptorSetLayout;1746}1747int setsUsed = ARRAY_SIZE(setCache); // To allocate immediately.17481749// This will write all descriptors.1750// Initially, we just do a simple look-back comparing to the previous descriptor to avoid sequential dupes.1751// In theory, we could multithread this. Gotta be a lot of descriptors for that to be worth it though.17521753// Initially, let's do naive single desc set writes.1754VkWriteDescriptorSet writes[MAX_DESC_SET_BINDINGS];1755VkDescriptorImageInfo imageInfo[MAX_DESC_SET_BINDINGS]; // just picked a practical number1756VkDescriptorBufferInfo bufferInfo[MAX_DESC_SET_BINDINGS];17571758// Preinitialize fields that won't change.1759for (size_t i = 0; i < ARRAY_SIZE(writes); i++) {1760writes[i].descriptorCount = 1;1761writes[i].dstArrayElement = 0;1762writes[i].pTexelBufferView = nullptr;1763writes[i].pNext = nullptr;1764writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;1765}17661767size_t start = data.flushedDescriptors_;1768int writeCount = 0, dedupCount = 0;17691770for (size_t index = start; index < descSets.size(); index++) {1771auto &d = descSets[index];17721773// This is where we look up to see if we already have an identical descriptor previously in the array.1774// We could do a simple custom hash map here that doesn't handle collisions, since those won't matter.1775// Instead, for now we just check history one item backwards. Good enough, it seems.1776if (index > start + 1) {1777if (descSets[index - 1].count == d.count) {1778if (!memcmp(descData.data() + d.offset, descData.data() + descSets[index - 1].offset, d.count * sizeof(PackedDescriptor))) {1779d.set = descSets[index - 1].set;1780dedupCount++;1781continue;1782}1783}1784}17851786if (setsUsed < ARRAY_SIZE(setCache)) {1787d.set = setCache[setsUsed++];1788} else {1789// Allocate in small batches.1790bool success = pool.Allocate(setCache, ARRAY_SIZE(setCache), layoutsForAlloc);1791_dbg_assert_(success);1792d.set = setCache[0];1793setsUsed = 1;1794}17951796// TODO: Build up bigger batches of writes.1797const PackedDescriptor *data = descData.begin() + d.offset;1798int numWrites = 0;1799int numBuffers = 0;1800int numImages = 0;1801for (int i = 0; i < d.count; i++) {1802if (!data[i].image.view) { // This automatically also checks for an null buffer due to the union.1803continue;1804}1805switch (this->bindingTypes[i]) {1806case BindingType::COMBINED_IMAGE_SAMPLER:1807_dbg_assert_(data[i].image.sampler != VK_NULL_HANDLE);1808_dbg_assert_(data[i].image.view != VK_NULL_HANDLE);1809imageInfo[numImages].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;1810imageInfo[numImages].imageView = data[i].image.view;1811imageInfo[numImages].sampler = data[i].image.sampler;1812writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;1813writes[numWrites].pImageInfo = &imageInfo[numImages];1814writes[numWrites].pBufferInfo = nullptr;1815numImages++;1816break;1817case BindingType::STORAGE_IMAGE_COMPUTE:1818_dbg_assert_(data[i].image.view != VK_NULL_HANDLE);1819imageInfo[numImages].imageLayout = VK_IMAGE_LAYOUT_GENERAL;1820imageInfo[numImages].imageView = data[i].image.view;1821imageInfo[numImages].sampler = VK_NULL_HANDLE;1822writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;1823writes[numWrites].pImageInfo = &imageInfo[numImages];1824writes[numWrites].pBufferInfo = nullptr;1825numImages++;1826break;1827case BindingType::STORAGE_BUFFER_VERTEX:1828case BindingType::STORAGE_BUFFER_COMPUTE:1829_dbg_assert_(data[i].buffer.buffer != VK_NULL_HANDLE);1830bufferInfo[numBuffers].buffer = data[i].buffer.buffer;1831bufferInfo[numBuffers].range = data[i].buffer.range;1832bufferInfo[numBuffers].offset = data[i].buffer.offset;1833writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;1834writes[numWrites].pBufferInfo = &bufferInfo[numBuffers];1835writes[numWrites].pImageInfo = nullptr;1836numBuffers++;1837break;1838case BindingType::UNIFORM_BUFFER_DYNAMIC_ALL:1839case BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX:1840_dbg_assert_(data[i].buffer.buffer != VK_NULL_HANDLE);1841bufferInfo[numBuffers].buffer = data[i].buffer.buffer;1842bufferInfo[numBuffers].range = data[i].buffer.range;1843bufferInfo[numBuffers].offset = 0;1844writes[numWrites].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;1845writes[numWrites].pBufferInfo = &bufferInfo[numBuffers];1846writes[numWrites].pImageInfo = nullptr;1847numBuffers++;1848break;1849}1850writes[numWrites].dstBinding = i;1851writes[numWrites].dstSet = d.set;1852numWrites++;1853}18541855vkUpdateDescriptorSets(vulkan->GetDevice(), numWrites, writes, 0, nullptr);18561857writeCount++;1858}18591860data.flushedDescriptors_ = (int)descSets.size();1861profile->descriptorsWritten += writeCount;1862profile->descriptorsDeduped += dedupCount;1863}18641865void VulkanRenderManager::SanityCheckPassesOnAdd() {1866#if _DEBUG1867// Check that we don't have any previous passes that write to the backbuffer, that must ALWAYS be the last one.1868for (int i = 0; i < steps_.size(); i++) {1869if (steps_[i]->stepType == VKRStepType::RENDER) {1870_dbg_assert_(steps_[i]->render.framebuffer != nullptr);1871}1872}1873#endif1874}187518761877