CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Common/GPU/OpenGL/GLRenderManager.cpp
Views: 1401
#include "ppsspp_config.h"1#include "GLRenderManager.h"2#include "Common/GPU/OpenGL/GLFeatures.h"3#include "Common/GPU/thin3d.h"4#include "Common/Thread/ThreadUtil.h"5#include "Common/VR/PPSSPPVR.h"67#include "Common/Log.h"8#include "Common/TimeUtil.h"9#include "Common/MemoryUtil.h"10#include "Common/StringUtils.h"11#include "Common/Math/math_util.h"1213#if 0 // def _DEBUG14#define VLOG(...) INFO_LOG(Log::G3D, __VA_ARGS__)15#else16#define VLOG(...)17#endif1819std::thread::id renderThreadId;2021GLRTexture::GLRTexture(const Draw::DeviceCaps &caps, int width, int height, int depth, int numMips) {22if (caps.textureNPOTFullySupported) {23canWrap = true;24} else {25canWrap = isPowerOf2(width) && isPowerOf2(height);26}27w = width;28h = height;29d = depth;30this->numMips = numMips;31}3233GLRTexture::~GLRTexture() {34if (texture) {35glDeleteTextures(1, &texture);36}37}3839GLRenderManager::GLRenderManager(HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory) : frameTimeHistory_(frameTimeHistory) {40// size_t sz = sizeof(GLRRenderData);41// _dbg_assert_(sz == 88);42}4344GLRenderManager::~GLRenderManager() {45_dbg_assert_(!runCompileThread_);4647for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {48_assert_(frameData_[i].deleter.IsEmpty());49_assert_(frameData_[i].deleter_prev.IsEmpty());50}51// Was anything deleted during shutdown?52deleter_.Perform(this, skipGLCalls_);53_assert_(deleter_.IsEmpty());54}5556void GLRenderManager::ThreadStart(Draw::DrawContext *draw) {57queueRunner_.CreateDeviceObjects();58renderThreadId = std::this_thread::get_id();5960if (newInflightFrames_ != -1) {61INFO_LOG(Log::G3D, "Updating inflight frames to %d", newInflightFrames_);62inflightFrames_ = newInflightFrames_;63newInflightFrames_ = -1;64}6566// Don't save draw, we don't want any thread safety confusion.67bool mapBuffers = draw->GetBugs().Has(Draw::Bugs::ANY_MAP_BUFFER_RANGE_SLOW);68bool hasBufferStorage = gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage;69if (!gl_extensions.VersionGEThan(3, 0, 0) && gl_extensions.IsGLES && !hasBufferStorage) {70// Force disable if it wouldn't work anyway.71mapBuffers = false;72}7374// Notes on buffer mapping:75// NVIDIA GTX 9xx / 2017-10 drivers - mapping improves speed, basic unmap seems best.76// PowerVR GX6xxx / iOS 10.3 - mapping has little improvement, explicit flush is slower.77if (mapBuffers) {78switch (gl_extensions.gpuVendor) {79case GPU_VENDOR_NVIDIA:80bufferStrategy_ = GLBufferStrategy::FRAME_UNMAP;81break;8283// Temporarily disabled because it doesn't work with task switching on Android.84// The mapped buffer seems to just be pulled out like a rug from under us, crashing85// as soon as any write happens, which can happen during shutdown since we write from the86// Emu thread which may not yet have shut down. There may be solutions to this, but for now,87// disable this strategy to avoid crashing.88//case GPU_VENDOR_QUALCOMM:89// bufferStrategy_ = GLBufferStrategy::FLUSH_INVALIDATE_UNMAP;90// break;9192default:93bufferStrategy_ = GLBufferStrategy::SUBDATA;94}95} else {96bufferStrategy_ = GLBufferStrategy::SUBDATA;97}98}99100void GLRenderManager::ThreadEnd() {101INFO_LOG(Log::G3D, "ThreadEnd");102103queueRunner_.DestroyDeviceObjects();104VLOG(" PULL: Quitting");105106// Good time to run all the deleters to get rid of leftover objects.107for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {108// Since we're in shutdown, we should skip the GL calls on Android.109frameData_[i].deleter.Perform(this, skipGLCalls_);110frameData_[i].deleter_prev.Perform(this, skipGLCalls_);111}112deleter_.Perform(this, skipGLCalls_);113for (int i = 0; i < (int)steps_.size(); i++) {114delete steps_[i];115}116steps_.clear();117initSteps_.clear();118}119120// Unlike in Vulkan, this isn't a full independent function, instead it gets called every frame.121//122// This means that we have to block and run the render queue until we've presented one frame,123// at which point we can leave.124//125// NOTE: If run_ is true, we WILL run a task!126bool GLRenderManager::ThreadFrame() {127if (!runCompileThread_) {128return false;129}130131GLRRenderThreadTask *task = nullptr;132133// In case of syncs or other partial completion, we keep going until we complete a frame.134while (true) {135// Pop a task of the queue and execute it.136// NOTE: We need to actually wait for a task, we can't just bail!137{138std::unique_lock<std::mutex> lock(pushMutex_);139while (renderThreadQueue_.empty()) {140pushCondVar_.wait(lock);141}142task = std::move(renderThreadQueue_.front());143renderThreadQueue_.pop();144}145146// We got a task! We can now have pushMutex_ unlocked, allowing the host to147// push more work when it feels like it, and just start working.148if (task->runType == GLRRunType::EXIT) {149delete task;150// Oh, host wanted out. Let's leave, and also let's notify the host.151// This is unlike Vulkan too which can just block on the thread existing.152std::unique_lock<std::mutex> lock(syncMutex_);153syncCondVar_.notify_one();154syncDone_ = true;155break;156}157158// Render the scene.159VLOG(" PULL: Frame %d RUN (%0.3f)", task->frame, time_now_d());160if (Run(*task)) {161// Swap requested, so we just bail the loop.162delete task;163break;164}165delete task;166};167168return true;169}170171void GLRenderManager::StopThread() {172// There's not really a lot to do here anymore.173INFO_LOG(Log::G3D, "GLRenderManager::StopThread()");174if (runCompileThread_) {175runCompileThread_ = false;176177std::unique_lock<std::mutex> lock(pushMutex_);178renderThreadQueue_.push(new GLRRenderThreadTask(GLRRunType::EXIT));179pushCondVar_.notify_one();180} else {181WARN_LOG(Log::G3D, "GL submission thread was already paused.");182}183}184185void GLRenderManager::StartThread() {186// There's not really a lot to do here anymore.187INFO_LOG(Log::G3D, "GLRenderManager::StartThread()");188if (!runCompileThread_) {189runCompileThread_ = true;190} else {191INFO_LOG(Log::G3D, "GL submission thread was already running.");192}193}194195std::string GLRenderManager::GetGpuProfileString() const {196int curFrame = curFrame_;197const GLQueueProfileContext &profile = frameData_[curFrame].profile;198199float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime);200return StringFromFormat("CPU time to run the list: %0.2f ms\n\n%s", cputime_ms, profilePassesString_.c_str());201}202203void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {204_assert_(insideFrame_);205#ifdef _DEBUG206curProgram_ = nullptr;207#endif208209// Eliminate dupes.210if (steps_.size() && steps_.back()->stepType == GLRStepType::RENDER && steps_.back()->render.framebuffer == fb) {211if (color != GLRRenderPassAction::CLEAR && depth != GLRRenderPassAction::CLEAR && stencil != GLRRenderPassAction::CLEAR) {212// We don't move to a new step, this bind was unnecessary and we can safely skip it.213curRenderStep_ = steps_.back();214return;215}216}217if (curRenderStep_ && curRenderStep_->commands.size() == 0) {218VLOG("Empty render step. Usually happens after uploading pixels.");219}220221GLRStep *step = new GLRStep{ GLRStepType::RENDER };222// This is what queues up new passes, and can end previous ones.223step->render.framebuffer = fb;224step->render.color = color;225step->render.depth = depth;226step->render.stencil = stencil;227step->tag = tag;228steps_.push_back(step);229230GLuint clearMask = 0;231GLRRenderData data(GLRRenderCommand::CLEAR);232if (color == GLRRenderPassAction::CLEAR) {233clearMask |= GL_COLOR_BUFFER_BIT;234data.clear.clearColor = clearColor;235}236if (depth == GLRRenderPassAction::CLEAR) {237clearMask |= GL_DEPTH_BUFFER_BIT;238data.clear.clearZ = clearDepth;239}240if (stencil == GLRRenderPassAction::CLEAR) {241clearMask |= GL_STENCIL_BUFFER_BIT;242data.clear.clearStencil = clearStencil;243}244if (clearMask) {245data.clear.scissorX = 0;246data.clear.scissorY = 0;247data.clear.scissorW = 0;248data.clear.scissorH = 0;249data.clear.clearMask = clearMask;250data.clear.colorMask = 0xF;251step->commands.push_back(data);252}253curRenderStep_ = step;254255if (fb) {256if (color == GLRRenderPassAction::KEEP || depth == GLRRenderPassAction::KEEP || stencil == GLRRenderPassAction::KEEP) {257step->dependencies.insert(fb);258}259}260261if (invalidationCallback_) {262invalidationCallback_(InvalidationCallbackFlags::RENDER_PASS_STATE);263}264}265266void GLRenderManager::BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit) {267_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);268_dbg_assert_(binding < MAX_GL_TEXTURE_SLOTS);269GLRRenderData data{ GLRRenderCommand::BIND_FB_TEXTURE };270data.bind_fb_texture.slot = binding;271data.bind_fb_texture.framebuffer = fb;272data.bind_fb_texture.aspect = aspectBit;273curRenderStep_->commands.push_back(data);274curRenderStep_->dependencies.insert(fb);275}276277void GLRenderManager::CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask, const char *tag) {278GLRStep *step = new GLRStep{ GLRStepType::COPY };279step->copy.srcRect = srcRect;280step->copy.dstPos = dstPos;281step->copy.src = src;282step->copy.dst = dst;283step->copy.aspectMask = aspectMask;284step->dependencies.insert(src);285step->tag = tag;286bool fillsDst = dst && srcRect.x == 0 && srcRect.y == 0 && srcRect.w == dst->width && srcRect.h == dst->height;287if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)288step->dependencies.insert(dst);289steps_.push_back(step);290}291292void GLRenderManager::BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter, const char *tag) {293GLRStep *step = new GLRStep{ GLRStepType::BLIT };294step->blit.srcRect = srcRect;295step->blit.dstRect = dstRect;296step->blit.src = src;297step->blit.dst = dst;298step->blit.aspectMask = aspectMask;299step->blit.filter = filter;300step->dependencies.insert(src);301step->tag = tag;302bool fillsDst = dst && dstRect.x == 0 && dstRect.y == 0 && dstRect.w == dst->width && dstRect.h == dst->height;303if (!fillsDst)304step->dependencies.insert(dst);305steps_.push_back(step);306}307308bool GLRenderManager::CopyFramebufferToMemory(GLRFramebuffer *src, int aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, Draw::ReadbackMode mode, const char *tag) {309_assert_(pixels);310311GLRStep *step = new GLRStep{ GLRStepType::READBACK };312step->readback.src = src;313step->readback.srcRect = { x, y, w, h };314step->readback.aspectMask = aspectBits;315step->readback.dstFormat = destFormat;316step->dependencies.insert(src);317step->tag = tag;318steps_.push_back(step);319320curRenderStep_ = nullptr;321FlushSync();322323Draw::DataFormat srcFormat;324if (aspectBits & GL_COLOR_BUFFER_BIT) {325srcFormat = Draw::DataFormat::R8G8B8A8_UNORM;326} else if (aspectBits & GL_STENCIL_BUFFER_BIT) {327// Copies from stencil are always S8.328srcFormat = Draw::DataFormat::S8;329} else if (aspectBits & GL_DEPTH_BUFFER_BIT) {330// TODO: Do this properly.331srcFormat = Draw::DataFormat::D24_S8;332} else {333return false;334}335queueRunner_.CopyFromReadbackBuffer(src, w, h, srcFormat, destFormat, pixelStride, pixels);336return true;337}338339void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {340_assert_(texture);341_assert_(pixels);342GLRStep *step = new GLRStep{ GLRStepType::READBACK_IMAGE };343step->readback_image.texture = texture;344step->readback_image.mipLevel = mipLevel;345step->readback_image.srcRect = { x, y, w, h };346step->tag = tag;347steps_.push_back(step);348349curRenderStep_ = nullptr;350FlushSync();351352queueRunner_.CopyFromReadbackBuffer(nullptr, w, h, Draw::DataFormat::R8G8B8A8_UNORM, destFormat, pixelStride, pixels);353}354355void GLRenderManager::BeginFrame(bool enableProfiling) {356#ifdef _DEBUG357curProgram_ = nullptr;358#endif359360// Shouldn't call BeginFrame unless we're in a run state.361_dbg_assert_(runCompileThread_);362363int curFrame = GetCurFrame();364365FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameIdGen_);366frameTimeData.frameBegin = time_now_d();367frameTimeData.afterFenceWait = frameTimeData.frameBegin;368369GLFrameData &frameData = frameData_[curFrame];370frameData.frameId = frameIdGen_;371frameData.profile.enabled = enableProfiling;372373frameIdGen_++;374{375std::unique_lock<std::mutex> lock(frameData.fenceMutex);376VLOG("PUSH: BeginFrame (curFrame = %d, readyForFence = %d, time=%0.3f)", curFrame, (int)frameData.readyForFence, time_now_d());377while (!frameData.readyForFence) {378frameData.fenceCondVar.wait(lock);379}380frameData.readyForFence = false;381}382383insideFrame_ = true;384}385386void GLRenderManager::Finish() {387curRenderStep_ = nullptr; // EndCurRenderStep is this simple here.388389int curFrame = curFrame_;390GLFrameData &frameData = frameData_[curFrame];391392frameTimeHistory_[frameData.frameId].firstSubmit = time_now_d();393394frameData_[curFrame].deleter.Take(deleter_);395396if (frameData.profile.enabled) {397profilePassesString_ = std::move(frameData.profile.passesString);398399#ifdef _DEBUG400std::string cmdString;401for (int i = 0; i < ARRAY_SIZE(frameData.profile.commandCounts); i++) {402if (frameData.profile.commandCounts[i] > 0) {403cmdString += StringFromFormat("%s: %d\n", RenderCommandToString((GLRRenderCommand)i), frameData.profile.commandCounts[i]);404}405}406memset(frameData.profile.commandCounts, 0, sizeof(frameData.profile.commandCounts));407profilePassesString_ = cmdString + profilePassesString_;408#endif409410frameData.profile.passesString.clear();411}412413VLOG("PUSH: Finish, pushing task. curFrame = %d", curFrame);414GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SUBMIT);415task->frame = curFrame;416{417std::unique_lock<std::mutex> lock(pushMutex_);418renderThreadQueue_.push(task);419renderThreadQueue_.back()->initSteps = std::move(initSteps_);420renderThreadQueue_.back()->steps = std::move(steps_);421initSteps_.clear();422steps_.clear();423pushCondVar_.notify_one();424}425}426427void GLRenderManager::Present() {428GLRRenderThreadTask *presentTask = new GLRRenderThreadTask(GLRRunType::PRESENT);429presentTask->frame = curFrame_;430{431std::unique_lock<std::mutex> lock(pushMutex_);432renderThreadQueue_.push(presentTask);433pushCondVar_.notify_one();434}435436int newCurFrame = curFrame_ + 1;437if (newCurFrame >= inflightFrames_) {438newCurFrame = 0;439}440curFrame_ = newCurFrame;441442insideFrame_ = false;443}444445// Render thread. Returns true if the caller should handle a swap.446bool GLRenderManager::Run(GLRRenderThreadTask &task) {447_dbg_assert_(task.frame >= 0);448449GLFrameData &frameData = frameData_[task.frame];450451if (task.runType == GLRRunType::PRESENT) {452bool swapRequest = false;453if (!frameData.skipSwap) {454frameTimeHistory_[frameData.frameId].queuePresent = time_now_d();455if (swapIntervalChanged_) {456swapIntervalChanged_ = false;457if (swapIntervalFunction_) {458swapIntervalFunction_(swapInterval_);459}460}461// This is the swapchain framebuffer flip.462if (swapFunction_) {463VLOG(" PULL: SwapFunction()");464swapFunction_();465}466swapRequest = true;467} else {468frameData.skipSwap = false;469}470frameData.hasBegun = false;471472VLOG(" PULL: Frame %d.readyForFence = true", task.frame);473474{475std::lock_guard<std::mutex> lock(frameData.fenceMutex);476frameData.readyForFence = true;477frameData.fenceCondVar.notify_one();478// At this point, we're done with this framedata (for now).479}480return swapRequest;481}482483if (!frameData.hasBegun) {484frameData.hasBegun = true;485486frameData.deleter_prev.Perform(this, skipGLCalls_);487frameData.deleter_prev.Take(frameData.deleter);488}489490// queueRunner_.LogSteps(stepsOnThread);491queueRunner_.RunInitSteps(task.initSteps, skipGLCalls_);492493// Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created.494if (!skipGLCalls_) {495for (auto iter : frameData.activePushBuffers) {496iter->Flush();497iter->UnmapDevice();498}499}500501if (frameData.profile.enabled) {502frameData.profile.cpuStartTime = time_now_d();503}504505if (IsVREnabled()) {506int passes = GetVRPassesCount();507for (int i = 0; i < passes; i++) {508PreVRFrameRender(i);509queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, i < passes - 1, true);510PostVRFrameRender();511}512} else {513queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, false, false);514}515516if (frameData.profile.enabled) {517frameData.profile.cpuEndTime = time_now_d();518}519520if (!skipGLCalls_) {521for (auto iter : frameData.activePushBuffers) {522iter->MapDevice(bufferStrategy_);523}524}525526switch (task.runType) {527case GLRRunType::SUBMIT:528break;529530case GLRRunType::SYNC:531frameData.hasBegun = false;532533// glFinish is not actually necessary here, and won't be unless we start using534// glBufferStorage. Then we need to use fences.535{536std::lock_guard<std::mutex> lock(syncMutex_);537syncDone_ = true;538syncCondVar_.notify_one();539}540break;541542default:543_assert_(false);544}545VLOG(" PULL: ::Run(): Done running tasks");546return false;547}548549void GLRenderManager::FlushSync() {550{551VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame_);552553GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SYNC);554task->frame = curFrame_;555556std::unique_lock<std::mutex> lock(pushMutex_);557renderThreadQueue_.push(task);558renderThreadQueue_.back()->initSteps = std::move(initSteps_);559renderThreadQueue_.back()->steps = std::move(steps_);560pushCondVar_.notify_one();561steps_.clear();562}563564{565std::unique_lock<std::mutex> lock(syncMutex_);566// Wait for the flush to be hit, since we're syncing.567while (!syncDone_) {568VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame_);569syncCondVar_.wait(lock);570}571syncDone_ = false;572}573}574575576