Path: blob/master/Common/GPU/OpenGL/GLRenderManager.cpp
5658 views
#include "ppsspp_config.h"1#include "GLRenderManager.h"2#include "Common/GPU/OpenGL/GLFeatures.h"3#include "Common/GPU/thin3d.h"4#include "Common/Thread/ThreadUtil.h"5#include "Common/VR/PPSSPPVR.h"67#include "Common/Log.h"8#include "Common/TimeUtil.h"9#include "Common/MemoryUtil.h"10#include "Common/StringUtils.h"11#include "Common/Math/math_util.h"1213#if 0 // def _DEBUG14#define VLOG(...) INFO_LOG(Log::G3D, __VA_ARGS__)15#else16#define VLOG(...)17#endif1819std::thread::id renderThreadId;2021GLRTexture::GLRTexture(const Draw::DeviceCaps &caps, int width, int height, int depth, int numMips) {22if (caps.textureNPOTFullySupported) {23canWrap = true;24} else {25canWrap = isPowerOf2(width) && isPowerOf2(height);26}27w = width;28h = height;29d = depth;30this->numMips = numMips;31}3233GLRTexture::~GLRTexture() {34if (texture) {35glDeleteTextures(1, &texture);36}37}3839GLRenderManager::GLRenderManager(HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory) : frameTimeHistory_(frameTimeHistory) {40// size_t sz = sizeof(GLRRenderData);41// _dbg_assert_(sz == 88);42}4344GLRenderManager::~GLRenderManager() {45_dbg_assert_(!runCompileThread_);4647for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {48_assert_(frameData_[i].deleter.IsEmpty());49_assert_(frameData_[i].deleter_prev.IsEmpty());50}51// Was anything deleted during shutdown?52deleter_.Perform(this, skipGLCalls_);53_assert_(deleter_.IsEmpty());54}5556void GLRenderManager::ThreadStart(Draw::DrawContext *draw) {57queueRunner_.CreateDeviceObjects();58renderThreadId = std::this_thread::get_id();5960if (newInflightFrames_ != -1) {61INFO_LOG(Log::G3D, "Updating inflight frames to %d", newInflightFrames_);62inflightFrames_ = newInflightFrames_;63newInflightFrames_ = -1;64}6566// Don't save draw, we don't want any thread safety confusion.67bool mapBuffers = draw->GetBugs().Has(Draw::Bugs::ANY_MAP_BUFFER_RANGE_SLOW);68bool hasBufferStorage = gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage;69if (!gl_extensions.VersionGEThan(3, 0, 0) && gl_extensions.IsGLES && !hasBufferStorage) {70// Force disable if it wouldn't work anyway.71mapBuffers = false;72}7374// Notes on buffer mapping:75// NVIDIA GTX 9xx / 2017-10 drivers - mapping improves speed, basic unmap seems best.76// PowerVR GX6xxx / iOS 10.3 - mapping has little improvement, explicit flush is slower.77if (mapBuffers) {78switch (gl_extensions.gpuVendor) {79case GPU_VENDOR_NVIDIA:80bufferStrategy_ = GLBufferStrategy::FRAME_UNMAP;81break;8283// Temporarily disabled because it doesn't work with task switching on Android.84// The mapped buffer seems to just be pulled out like a rug from under us, crashing85// as soon as any write happens, which can happen during shutdown since we write from the86// Emu thread which may not yet have shut down. There may be solutions to this, but for now,87// disable this strategy to avoid crashing.88//case GPU_VENDOR_QUALCOMM:89// bufferStrategy_ = GLBufferStrategy::FLUSH_INVALIDATE_UNMAP;90// break;9192default:93bufferStrategy_ = GLBufferStrategy::SUBDATA;94}95} else {96bufferStrategy_ = GLBufferStrategy::SUBDATA;97}98}99100void GLRenderManager::ThreadEnd() {101INFO_LOG(Log::G3D, "GLRenderManager::ThreadEnd begin");102103runCompileThread_ = false;104105queueRunner_.DestroyDeviceObjects();106107VLOG(" PULL: Quitting");108109// Good time to run all the deleters to get rid of leftover objects.110for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {111// Since we're in shutdown, we should skip the GL calls on Android.112frameData_[i].deleter.Perform(this, skipGLCalls_);113frameData_[i].deleter_prev.Perform(this, skipGLCalls_);114}115deleter_.Perform(this, skipGLCalls_);116for (int i = 0; i < (int)steps_.size(); i++) {117delete steps_[i];118}119steps_.clear();120initSteps_.clear();121INFO_LOG(Log::G3D, "GLRenderManager::ThreadEnd end");122}123124// Unlike in Vulkan, this isn't a full independent function, instead it gets called every frame.125//126// This means that we have to block and run the render queue until we've presented one frame,127// at which point we can leave.128//129// NOTE: If run_ is true, we WILL run a task!130bool GLRenderManager::ThreadFrame(bool waitIfEmpty) {131_assert_(runCompileThread_);132133GLRRenderThreadTask *task = nullptr;134135// In case of syncs or other partial completion, we keep going until we complete a frame.136while (true) {137// Pop a task off the queue and execute it. Exiting this loop is done with a special EXIT task,138// to keep things uniform.139{140std::unique_lock<std::mutex> lock(pushMutex_);141142if (!waitIfEmpty && renderThreadQueue_.empty()) {143lock.unlock();144// Oh, host wanted out. Let's leave, and also let's notify the host.145// This is unlike Vulkan too which can just block on the thread existing.146std::unique_lock<std::mutex> lock(syncMutex_);147syncCondVar_.notify_one();148syncDone_ = true;149return false;150}151152pushCondVar_.wait(lock, [this] { return !renderThreadQueue_.empty(); });153task = renderThreadQueue_.front();154renderThreadQueue_.pop();155}156157// Render the scene.158VLOG(" PULL: Frame %d RUN (%0.3f)", task->frame, time_now_d());159if (Run(*task)) {160// Swap requested, so we just bail the loop.161delete task;162break;163}164delete task;165};166167return true;168}169170void GLRenderManager::StartThread() {171// There's not really a lot to do here anymore.172INFO_LOG(Log::G3D, "GLRenderManager::StartThread()");173if (!runCompileThread_) {174runCompileThread_ = true;175} else {176INFO_LOG(Log::G3D, "GL submission thread was already running.");177}178}179180std::string GLRenderManager::GetGpuProfileString() const {181int curFrame = curFrame_;182const GLQueueProfileContext &profile = frameData_[curFrame].profile;183184float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime);185return StringFromFormat("CPU time to run the list: %0.2f ms\n\n%s", cputime_ms, profilePassesString_.c_str());186}187188void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {189_assert_(insideFrame_);190#ifdef _DEBUG191curProgram_ = nullptr;192#endif193194// Eliminate dupes.195if (steps_.size() && steps_.back()->stepType == GLRStepType::RENDER && steps_.back()->render.framebuffer == fb) {196if (color != GLRRenderPassAction::CLEAR && depth != GLRRenderPassAction::CLEAR && stencil != GLRRenderPassAction::CLEAR) {197// We don't move to a new step, this bind was unnecessary and we can safely skip it.198curRenderStep_ = steps_.back();199return;200}201}202if (curRenderStep_ && curRenderStep_->commands.size() == 0) {203VLOG("Empty render step. Usually happens after uploading pixels.");204}205206GLRStep *step = new GLRStep{ GLRStepType::RENDER };207// This is what queues up new passes, and can end previous ones.208step->render.framebuffer = fb;209step->render.color = color;210step->render.depth = depth;211step->render.stencil = stencil;212step->tag = tag;213steps_.push_back(step);214215GLuint clearMask = 0;216GLRRenderData data(GLRRenderCommand::CLEAR);217if (color == GLRRenderPassAction::CLEAR) {218clearMask |= GL_COLOR_BUFFER_BIT;219data.clear.clearColor = clearColor;220}221if (depth == GLRRenderPassAction::CLEAR) {222clearMask |= GL_DEPTH_BUFFER_BIT;223data.clear.clearZ = clearDepth;224}225if (stencil == GLRRenderPassAction::CLEAR) {226clearMask |= GL_STENCIL_BUFFER_BIT;227data.clear.clearStencil = clearStencil;228}229if (clearMask) {230data.clear.scissorX = 0;231data.clear.scissorY = 0;232data.clear.scissorW = 0;233data.clear.scissorH = 0;234data.clear.clearMask = clearMask;235data.clear.colorMask = 0xF;236step->commands.push_back(data);237}238curRenderStep_ = step;239240if (fb) {241if (color == GLRRenderPassAction::KEEP || depth == GLRRenderPassAction::KEEP || stencil == GLRRenderPassAction::KEEP) {242step->dependencies.insert(fb);243}244}245246if (invalidationCallback_) {247invalidationCallback_(InvalidationCallbackFlags::RENDER_PASS_STATE);248}249}250251// aspectBit: GL_COLOR_BUFFER_BIT etc252void GLRenderManager::BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit) {253_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);254_dbg_assert_(binding < MAX_GL_TEXTURE_SLOTS);255GLRRenderData data{ GLRRenderCommand::BIND_FB_TEXTURE };256data.bind_fb_texture.slot = binding;257data.bind_fb_texture.framebuffer = fb;258data.bind_fb_texture.aspect = aspectBit;259curRenderStep_->commands.push_back(data);260curRenderStep_->dependencies.insert(fb);261}262263void GLRenderManager::CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask, const char *tag) {264GLRStep *step = new GLRStep{ GLRStepType::COPY };265step->copy.srcRect = srcRect;266step->copy.dstPos = dstPos;267step->copy.src = src;268step->copy.dst = dst;269step->copy.aspectMask = aspectMask;270step->dependencies.insert(src);271step->tag = tag;272bool fillsDst = dst && srcRect.x == 0 && srcRect.y == 0 && srcRect.w == dst->width && srcRect.h == dst->height;273if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)274step->dependencies.insert(dst);275steps_.push_back(step);276}277278void GLRenderManager::BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter, const char *tag) {279GLRStep *step = new GLRStep{ GLRStepType::BLIT };280step->blit.srcRect = srcRect;281step->blit.dstRect = dstRect;282step->blit.src = src;283step->blit.dst = dst;284step->blit.aspectMask = aspectMask;285step->blit.filter = filter;286step->dependencies.insert(src);287step->tag = tag;288bool fillsDst = dst && dstRect.x == 0 && dstRect.y == 0 && dstRect.w == dst->width && dstRect.h == dst->height;289if (!fillsDst)290step->dependencies.insert(dst);291steps_.push_back(step);292}293294bool GLRenderManager::CopyFramebufferToMemory(GLRFramebuffer *src, int aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, Draw::ReadbackMode mode, const char *tag) {295_assert_(pixels);296297GLRStep *step = new GLRStep{ GLRStepType::READBACK };298step->readback.src = src;299step->readback.srcRect = { x, y, w, h };300step->readback.aspectMask = aspectBits;301step->readback.dstFormat = destFormat;302step->dependencies.insert(src);303step->tag = tag;304steps_.push_back(step);305306curRenderStep_ = nullptr;307FlushSync();308309Draw::DataFormat srcFormat;310if (aspectBits & GL_COLOR_BUFFER_BIT) {311srcFormat = Draw::DataFormat::R8G8B8A8_UNORM;312} else if (aspectBits & GL_STENCIL_BUFFER_BIT) {313// Copies from stencil are always S8.314srcFormat = Draw::DataFormat::S8;315} else if (aspectBits & GL_DEPTH_BUFFER_BIT) {316// TODO: Do this properly.317srcFormat = Draw::DataFormat::D24_S8;318} else {319return false;320}321queueRunner_.CopyFromReadbackBuffer(src, w, h, srcFormat, destFormat, pixelStride, pixels);322return true;323}324325void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {326_assert_(texture);327_assert_(pixels);328GLRStep *step = new GLRStep{ GLRStepType::READBACK_IMAGE };329step->readback_image.texture = texture;330step->readback_image.mipLevel = mipLevel;331step->readback_image.srcRect = { x, y, w, h };332step->tag = tag;333steps_.push_back(step);334335curRenderStep_ = nullptr;336FlushSync();337338queueRunner_.CopyFromReadbackBuffer(nullptr, w, h, Draw::DataFormat::R8G8B8A8_UNORM, destFormat, pixelStride, pixels);339}340341void GLRenderManager::BeginFrame(bool enableProfiling) {342#ifdef _DEBUG343curProgram_ = nullptr;344#endif345346// Shouldn't call BeginFrame unless we're in a run state.347_dbg_assert_(runCompileThread_);348349int curFrame = GetCurFrame();350351FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameIdGen_);352frameTimeData.frameBegin = time_now_d();353frameTimeData.afterFenceWait = frameTimeData.frameBegin;354355GLFrameData &frameData = frameData_[curFrame];356frameData.frameId = frameIdGen_;357frameData.profile.enabled = enableProfiling;358359frameIdGen_++;360{361std::unique_lock<std::mutex> lock(frameData.fenceMutex);362VLOG("PUSH: BeginFrame (curFrame = %d, readyForFence = %d, time=%0.3f)", curFrame, (int)frameData.readyForFence, time_now_d());363while (!frameData.readyForFence) {364frameData.fenceCondVar.wait(lock);365}366frameData.readyForFence = false;367}368369insideFrame_ = true;370}371372void GLRenderManager::Finish() {373curRenderStep_ = nullptr; // EndCurRenderStep is this simple here.374375int curFrame = curFrame_;376GLFrameData &frameData = frameData_[curFrame];377378frameTimeHistory_[frameData.frameId].firstSubmit = time_now_d();379380frameData_[curFrame].deleter.Take(deleter_);381382if (frameData.profile.enabled) {383profilePassesString_ = std::move(frameData.profile.passesString);384385#ifdef _DEBUG386std::string cmdString;387for (int i = 0; i < ARRAY_SIZE(frameData.profile.commandCounts); i++) {388if (frameData.profile.commandCounts[i] > 0) {389cmdString += StringFromFormat("%s: %d\n", RenderCommandToString((GLRRenderCommand)i), frameData.profile.commandCounts[i]);390}391}392memset(frameData.profile.commandCounts, 0, sizeof(frameData.profile.commandCounts));393profilePassesString_ = cmdString + profilePassesString_;394#endif395396frameData.profile.passesString.clear();397}398399VLOG("PUSH: Finish, pushing task. curFrame = %d", curFrame);400GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SUBMIT);401task->frame = curFrame;402{403std::unique_lock<std::mutex> lock(pushMutex_);404task->initSteps = std::move(initSteps_);405task->steps = std::move(steps_);406renderThreadQueue_.push(task);407initSteps_.clear();408steps_.clear();409pushCondVar_.notify_one();410}411}412413void GLRenderManager::Present() {414GLRRenderThreadTask *presentTask = new GLRRenderThreadTask(GLRRunType::PRESENT);415presentTask->frame = curFrame_;416{417std::unique_lock<std::mutex> lock(pushMutex_);418renderThreadQueue_.push(presentTask);419pushCondVar_.notify_one();420}421422int newCurFrame = curFrame_ + 1;423if (newCurFrame >= inflightFrames_) {424newCurFrame = 0;425}426curFrame_ = newCurFrame;427428insideFrame_ = false;429}430431// Render thread. Returns true if the caller should handle a swap.432bool GLRenderManager::Run(GLRRenderThreadTask &task) {433_dbg_assert_(task.frame >= 0);434435GLFrameData &frameData = frameData_[task.frame];436437if (task.runType == GLRRunType::PRESENT) {438bool swapRequest = false;439if (!frameData.skipSwap) {440frameTimeHistory_[frameData.frameId].queuePresent = time_now_d();441if (swapIntervalChanged_) {442swapIntervalChanged_ = false;443if (swapIntervalFunction_) {444swapIntervalFunction_(swapInterval_);445}446}447// This is the swapchain framebuffer flip.448if (swapFunction_) {449VLOG(" PULL: SwapFunction()");450swapFunction_();451}452swapRequest = true;453} else {454frameData.skipSwap = false;455}456frameData.hasBegun = false;457458VLOG(" PULL: Frame %d.readyForFence = true", task.frame);459460{461std::lock_guard<std::mutex> lock(frameData.fenceMutex);462frameData.readyForFence = true;463frameData.fenceCondVar.notify_one();464// At this point, we're done with this framedata (for now).465}466return swapRequest;467}468469if (!frameData.hasBegun) {470frameData.hasBegun = true;471472frameData.deleter_prev.Perform(this, skipGLCalls_);473frameData.deleter_prev.Take(frameData.deleter);474}475476// queueRunner_.LogSteps(stepsOnThread);477queueRunner_.RunInitSteps(task.initSteps, skipGLCalls_);478479// Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created.480if (!skipGLCalls_) {481for (auto iter : frameData.activePushBuffers) {482iter->Flush();483iter->UnmapDevice();484}485}486487if (frameData.profile.enabled) {488frameData.profile.cpuStartTime = time_now_d();489}490491if (IsVREnabled()) {492int passes = GetVRPassesCount();493for (int i = 0; i < passes; i++) {494PreVRFrameRender(i);495queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, i < passes - 1, true);496PostVRFrameRender();497}498} else {499queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, false, false);500}501502if (frameData.profile.enabled) {503frameData.profile.cpuEndTime = time_now_d();504}505506if (!skipGLCalls_) {507for (auto iter : frameData.activePushBuffers) {508iter->MapDevice(bufferStrategy_);509}510}511512switch (task.runType) {513case GLRRunType::SUBMIT:514break;515516case GLRRunType::SYNC:517frameData.hasBegun = false;518519// glFinish is not actually necessary here, and won't be unless we start using520// glBufferStorage. Then we need to use fences.521{522std::lock_guard<std::mutex> lock(syncMutex_);523syncDone_ = true;524syncCondVar_.notify_one();525}526break;527528default:529_assert_(false);530}531VLOG(" PULL: ::Run(): Done running tasks");532return false;533}534535void GLRenderManager::FlushSync() {536{537VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame_);538539GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SYNC);540task->frame = curFrame_;541542std::unique_lock<std::mutex> lock(pushMutex_);543renderThreadQueue_.push(task);544renderThreadQueue_.back()->initSteps = std::move(initSteps_);545renderThreadQueue_.back()->steps = std::move(steps_);546pushCondVar_.notify_one();547steps_.clear();548}549550{551std::unique_lock<std::mutex> lock(syncMutex_);552// Wait for the flush to be hit, since we're syncing.553while (!syncDone_) {554VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame_);555syncCondVar_.wait(lock);556}557syncDone_ = false;558}559}560561562