Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/GPU/OpenGL/GLRenderManager.cpp
5658 views
1
#include "ppsspp_config.h"
2
#include "GLRenderManager.h"
3
#include "Common/GPU/OpenGL/GLFeatures.h"
4
#include "Common/GPU/thin3d.h"
5
#include "Common/Thread/ThreadUtil.h"
6
#include "Common/VR/PPSSPPVR.h"
7
8
#include "Common/Log.h"
9
#include "Common/TimeUtil.h"
10
#include "Common/MemoryUtil.h"
11
#include "Common/StringUtils.h"
12
#include "Common/Math/math_util.h"
13
14
#if 0 // def _DEBUG
15
#define VLOG(...) INFO_LOG(Log::G3D, __VA_ARGS__)
16
#else
17
#define VLOG(...)
18
#endif
19
20
std::thread::id renderThreadId;
21
22
GLRTexture::GLRTexture(const Draw::DeviceCaps &caps, int width, int height, int depth, int numMips) {
23
if (caps.textureNPOTFullySupported) {
24
canWrap = true;
25
} else {
26
canWrap = isPowerOf2(width) && isPowerOf2(height);
27
}
28
w = width;
29
h = height;
30
d = depth;
31
this->numMips = numMips;
32
}
33
34
GLRTexture::~GLRTexture() {
35
if (texture) {
36
glDeleteTextures(1, &texture);
37
}
38
}
39
40
GLRenderManager::GLRenderManager(HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory) : frameTimeHistory_(frameTimeHistory) {
41
// size_t sz = sizeof(GLRRenderData);
42
// _dbg_assert_(sz == 88);
43
}
44
45
GLRenderManager::~GLRenderManager() {
46
_dbg_assert_(!runCompileThread_);
47
48
for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
49
_assert_(frameData_[i].deleter.IsEmpty());
50
_assert_(frameData_[i].deleter_prev.IsEmpty());
51
}
52
// Was anything deleted during shutdown?
53
deleter_.Perform(this, skipGLCalls_);
54
_assert_(deleter_.IsEmpty());
55
}
56
57
void GLRenderManager::ThreadStart(Draw::DrawContext *draw) {
58
queueRunner_.CreateDeviceObjects();
59
renderThreadId = std::this_thread::get_id();
60
61
if (newInflightFrames_ != -1) {
62
INFO_LOG(Log::G3D, "Updating inflight frames to %d", newInflightFrames_);
63
inflightFrames_ = newInflightFrames_;
64
newInflightFrames_ = -1;
65
}
66
67
// Don't save draw, we don't want any thread safety confusion.
68
bool mapBuffers = draw->GetBugs().Has(Draw::Bugs::ANY_MAP_BUFFER_RANGE_SLOW);
69
bool hasBufferStorage = gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage;
70
if (!gl_extensions.VersionGEThan(3, 0, 0) && gl_extensions.IsGLES && !hasBufferStorage) {
71
// Force disable if it wouldn't work anyway.
72
mapBuffers = false;
73
}
74
75
// Notes on buffer mapping:
76
// NVIDIA GTX 9xx / 2017-10 drivers - mapping improves speed, basic unmap seems best.
77
// PowerVR GX6xxx / iOS 10.3 - mapping has little improvement, explicit flush is slower.
78
if (mapBuffers) {
79
switch (gl_extensions.gpuVendor) {
80
case GPU_VENDOR_NVIDIA:
81
bufferStrategy_ = GLBufferStrategy::FRAME_UNMAP;
82
break;
83
84
// Temporarily disabled because it doesn't work with task switching on Android.
85
// The mapped buffer seems to just be pulled out like a rug from under us, crashing
86
// as soon as any write happens, which can happen during shutdown since we write from the
87
// Emu thread which may not yet have shut down. There may be solutions to this, but for now,
88
// disable this strategy to avoid crashing.
89
//case GPU_VENDOR_QUALCOMM:
90
// bufferStrategy_ = GLBufferStrategy::FLUSH_INVALIDATE_UNMAP;
91
// break;
92
93
default:
94
bufferStrategy_ = GLBufferStrategy::SUBDATA;
95
}
96
} else {
97
bufferStrategy_ = GLBufferStrategy::SUBDATA;
98
}
99
}
100
101
void GLRenderManager::ThreadEnd() {
102
INFO_LOG(Log::G3D, "GLRenderManager::ThreadEnd begin");
103
104
runCompileThread_ = false;
105
106
queueRunner_.DestroyDeviceObjects();
107
108
VLOG(" PULL: Quitting");
109
110
// Good time to run all the deleters to get rid of leftover objects.
111
for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
112
// Since we're in shutdown, we should skip the GL calls on Android.
113
frameData_[i].deleter.Perform(this, skipGLCalls_);
114
frameData_[i].deleter_prev.Perform(this, skipGLCalls_);
115
}
116
deleter_.Perform(this, skipGLCalls_);
117
for (int i = 0; i < (int)steps_.size(); i++) {
118
delete steps_[i];
119
}
120
steps_.clear();
121
initSteps_.clear();
122
INFO_LOG(Log::G3D, "GLRenderManager::ThreadEnd end");
123
}
124
125
// Unlike in Vulkan, this isn't a full independent function, instead it gets called every frame.
126
//
127
// This means that we have to block and run the render queue until we've presented one frame,
128
// at which point we can leave.
129
//
130
// NOTE: If run_ is true, we WILL run a task!
131
bool GLRenderManager::ThreadFrame(bool waitIfEmpty) {
132
_assert_(runCompileThread_);
133
134
GLRRenderThreadTask *task = nullptr;
135
136
// In case of syncs or other partial completion, we keep going until we complete a frame.
137
while (true) {
138
// Pop a task off the queue and execute it. Exiting this loop is done with a special EXIT task,
139
// to keep things uniform.
140
{
141
std::unique_lock<std::mutex> lock(pushMutex_);
142
143
if (!waitIfEmpty && renderThreadQueue_.empty()) {
144
lock.unlock();
145
// Oh, host wanted out. Let's leave, and also let's notify the host.
146
// This is unlike Vulkan too which can just block on the thread existing.
147
std::unique_lock<std::mutex> lock(syncMutex_);
148
syncCondVar_.notify_one();
149
syncDone_ = true;
150
return false;
151
}
152
153
pushCondVar_.wait(lock, [this] { return !renderThreadQueue_.empty(); });
154
task = renderThreadQueue_.front();
155
renderThreadQueue_.pop();
156
}
157
158
// Render the scene.
159
VLOG(" PULL: Frame %d RUN (%0.3f)", task->frame, time_now_d());
160
if (Run(*task)) {
161
// Swap requested, so we just bail the loop.
162
delete task;
163
break;
164
}
165
delete task;
166
};
167
168
return true;
169
}
170
171
void GLRenderManager::StartThread() {
172
// There's not really a lot to do here anymore.
173
INFO_LOG(Log::G3D, "GLRenderManager::StartThread()");
174
if (!runCompileThread_) {
175
runCompileThread_ = true;
176
} else {
177
INFO_LOG(Log::G3D, "GL submission thread was already running.");
178
}
179
}
180
181
std::string GLRenderManager::GetGpuProfileString() const {
182
int curFrame = curFrame_;
183
const GLQueueProfileContext &profile = frameData_[curFrame].profile;
184
185
float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime);
186
return StringFromFormat("CPU time to run the list: %0.2f ms\n\n%s", cputime_ms, profilePassesString_.c_str());
187
}
188
189
void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
190
_assert_(insideFrame_);
191
#ifdef _DEBUG
192
curProgram_ = nullptr;
193
#endif
194
195
// Eliminate dupes.
196
if (steps_.size() && steps_.back()->stepType == GLRStepType::RENDER && steps_.back()->render.framebuffer == fb) {
197
if (color != GLRRenderPassAction::CLEAR && depth != GLRRenderPassAction::CLEAR && stencil != GLRRenderPassAction::CLEAR) {
198
// We don't move to a new step, this bind was unnecessary and we can safely skip it.
199
curRenderStep_ = steps_.back();
200
return;
201
}
202
}
203
if (curRenderStep_ && curRenderStep_->commands.size() == 0) {
204
VLOG("Empty render step. Usually happens after uploading pixels.");
205
}
206
207
GLRStep *step = new GLRStep{ GLRStepType::RENDER };
208
// This is what queues up new passes, and can end previous ones.
209
step->render.framebuffer = fb;
210
step->render.color = color;
211
step->render.depth = depth;
212
step->render.stencil = stencil;
213
step->tag = tag;
214
steps_.push_back(step);
215
216
GLuint clearMask = 0;
217
GLRRenderData data(GLRRenderCommand::CLEAR);
218
if (color == GLRRenderPassAction::CLEAR) {
219
clearMask |= GL_COLOR_BUFFER_BIT;
220
data.clear.clearColor = clearColor;
221
}
222
if (depth == GLRRenderPassAction::CLEAR) {
223
clearMask |= GL_DEPTH_BUFFER_BIT;
224
data.clear.clearZ = clearDepth;
225
}
226
if (stencil == GLRRenderPassAction::CLEAR) {
227
clearMask |= GL_STENCIL_BUFFER_BIT;
228
data.clear.clearStencil = clearStencil;
229
}
230
if (clearMask) {
231
data.clear.scissorX = 0;
232
data.clear.scissorY = 0;
233
data.clear.scissorW = 0;
234
data.clear.scissorH = 0;
235
data.clear.clearMask = clearMask;
236
data.clear.colorMask = 0xF;
237
step->commands.push_back(data);
238
}
239
curRenderStep_ = step;
240
241
if (fb) {
242
if (color == GLRRenderPassAction::KEEP || depth == GLRRenderPassAction::KEEP || stencil == GLRRenderPassAction::KEEP) {
243
step->dependencies.insert(fb);
244
}
245
}
246
247
if (invalidationCallback_) {
248
invalidationCallback_(InvalidationCallbackFlags::RENDER_PASS_STATE);
249
}
250
}
251
252
// aspectBit: GL_COLOR_BUFFER_BIT etc
253
void GLRenderManager::BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit) {
254
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
255
_dbg_assert_(binding < MAX_GL_TEXTURE_SLOTS);
256
GLRRenderData data{ GLRRenderCommand::BIND_FB_TEXTURE };
257
data.bind_fb_texture.slot = binding;
258
data.bind_fb_texture.framebuffer = fb;
259
data.bind_fb_texture.aspect = aspectBit;
260
curRenderStep_->commands.push_back(data);
261
curRenderStep_->dependencies.insert(fb);
262
}
263
264
void GLRenderManager::CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask, const char *tag) {
265
GLRStep *step = new GLRStep{ GLRStepType::COPY };
266
step->copy.srcRect = srcRect;
267
step->copy.dstPos = dstPos;
268
step->copy.src = src;
269
step->copy.dst = dst;
270
step->copy.aspectMask = aspectMask;
271
step->dependencies.insert(src);
272
step->tag = tag;
273
bool fillsDst = dst && srcRect.x == 0 && srcRect.y == 0 && srcRect.w == dst->width && srcRect.h == dst->height;
274
if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)
275
step->dependencies.insert(dst);
276
steps_.push_back(step);
277
}
278
279
void GLRenderManager::BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter, const char *tag) {
280
GLRStep *step = new GLRStep{ GLRStepType::BLIT };
281
step->blit.srcRect = srcRect;
282
step->blit.dstRect = dstRect;
283
step->blit.src = src;
284
step->blit.dst = dst;
285
step->blit.aspectMask = aspectMask;
286
step->blit.filter = filter;
287
step->dependencies.insert(src);
288
step->tag = tag;
289
bool fillsDst = dst && dstRect.x == 0 && dstRect.y == 0 && dstRect.w == dst->width && dstRect.h == dst->height;
290
if (!fillsDst)
291
step->dependencies.insert(dst);
292
steps_.push_back(step);
293
}
294
295
bool GLRenderManager::CopyFramebufferToMemory(GLRFramebuffer *src, int aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, Draw::ReadbackMode mode, const char *tag) {
296
_assert_(pixels);
297
298
GLRStep *step = new GLRStep{ GLRStepType::READBACK };
299
step->readback.src = src;
300
step->readback.srcRect = { x, y, w, h };
301
step->readback.aspectMask = aspectBits;
302
step->readback.dstFormat = destFormat;
303
step->dependencies.insert(src);
304
step->tag = tag;
305
steps_.push_back(step);
306
307
curRenderStep_ = nullptr;
308
FlushSync();
309
310
Draw::DataFormat srcFormat;
311
if (aspectBits & GL_COLOR_BUFFER_BIT) {
312
srcFormat = Draw::DataFormat::R8G8B8A8_UNORM;
313
} else if (aspectBits & GL_STENCIL_BUFFER_BIT) {
314
// Copies from stencil are always S8.
315
srcFormat = Draw::DataFormat::S8;
316
} else if (aspectBits & GL_DEPTH_BUFFER_BIT) {
317
// TODO: Do this properly.
318
srcFormat = Draw::DataFormat::D24_S8;
319
} else {
320
return false;
321
}
322
queueRunner_.CopyFromReadbackBuffer(src, w, h, srcFormat, destFormat, pixelStride, pixels);
323
return true;
324
}
325
326
void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {
327
_assert_(texture);
328
_assert_(pixels);
329
GLRStep *step = new GLRStep{ GLRStepType::READBACK_IMAGE };
330
step->readback_image.texture = texture;
331
step->readback_image.mipLevel = mipLevel;
332
step->readback_image.srcRect = { x, y, w, h };
333
step->tag = tag;
334
steps_.push_back(step);
335
336
curRenderStep_ = nullptr;
337
FlushSync();
338
339
queueRunner_.CopyFromReadbackBuffer(nullptr, w, h, Draw::DataFormat::R8G8B8A8_UNORM, destFormat, pixelStride, pixels);
340
}
341
342
void GLRenderManager::BeginFrame(bool enableProfiling) {
343
#ifdef _DEBUG
344
curProgram_ = nullptr;
345
#endif
346
347
// Shouldn't call BeginFrame unless we're in a run state.
348
_dbg_assert_(runCompileThread_);
349
350
int curFrame = GetCurFrame();
351
352
FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameIdGen_);
353
frameTimeData.frameBegin = time_now_d();
354
frameTimeData.afterFenceWait = frameTimeData.frameBegin;
355
356
GLFrameData &frameData = frameData_[curFrame];
357
frameData.frameId = frameIdGen_;
358
frameData.profile.enabled = enableProfiling;
359
360
frameIdGen_++;
361
{
362
std::unique_lock<std::mutex> lock(frameData.fenceMutex);
363
VLOG("PUSH: BeginFrame (curFrame = %d, readyForFence = %d, time=%0.3f)", curFrame, (int)frameData.readyForFence, time_now_d());
364
while (!frameData.readyForFence) {
365
frameData.fenceCondVar.wait(lock);
366
}
367
frameData.readyForFence = false;
368
}
369
370
insideFrame_ = true;
371
}
372
373
void GLRenderManager::Finish() {
374
curRenderStep_ = nullptr; // EndCurRenderStep is this simple here.
375
376
int curFrame = curFrame_;
377
GLFrameData &frameData = frameData_[curFrame];
378
379
frameTimeHistory_[frameData.frameId].firstSubmit = time_now_d();
380
381
frameData_[curFrame].deleter.Take(deleter_);
382
383
if (frameData.profile.enabled) {
384
profilePassesString_ = std::move(frameData.profile.passesString);
385
386
#ifdef _DEBUG
387
std::string cmdString;
388
for (int i = 0; i < ARRAY_SIZE(frameData.profile.commandCounts); i++) {
389
if (frameData.profile.commandCounts[i] > 0) {
390
cmdString += StringFromFormat("%s: %d\n", RenderCommandToString((GLRRenderCommand)i), frameData.profile.commandCounts[i]);
391
}
392
}
393
memset(frameData.profile.commandCounts, 0, sizeof(frameData.profile.commandCounts));
394
profilePassesString_ = cmdString + profilePassesString_;
395
#endif
396
397
frameData.profile.passesString.clear();
398
}
399
400
VLOG("PUSH: Finish, pushing task. curFrame = %d", curFrame);
401
GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SUBMIT);
402
task->frame = curFrame;
403
{
404
std::unique_lock<std::mutex> lock(pushMutex_);
405
task->initSteps = std::move(initSteps_);
406
task->steps = std::move(steps_);
407
renderThreadQueue_.push(task);
408
initSteps_.clear();
409
steps_.clear();
410
pushCondVar_.notify_one();
411
}
412
}
413
414
void GLRenderManager::Present() {
415
GLRRenderThreadTask *presentTask = new GLRRenderThreadTask(GLRRunType::PRESENT);
416
presentTask->frame = curFrame_;
417
{
418
std::unique_lock<std::mutex> lock(pushMutex_);
419
renderThreadQueue_.push(presentTask);
420
pushCondVar_.notify_one();
421
}
422
423
int newCurFrame = curFrame_ + 1;
424
if (newCurFrame >= inflightFrames_) {
425
newCurFrame = 0;
426
}
427
curFrame_ = newCurFrame;
428
429
insideFrame_ = false;
430
}
431
432
// Render thread. Returns true if the caller should handle a swap.
433
bool GLRenderManager::Run(GLRRenderThreadTask &task) {
434
_dbg_assert_(task.frame >= 0);
435
436
GLFrameData &frameData = frameData_[task.frame];
437
438
if (task.runType == GLRRunType::PRESENT) {
439
bool swapRequest = false;
440
if (!frameData.skipSwap) {
441
frameTimeHistory_[frameData.frameId].queuePresent = time_now_d();
442
if (swapIntervalChanged_) {
443
swapIntervalChanged_ = false;
444
if (swapIntervalFunction_) {
445
swapIntervalFunction_(swapInterval_);
446
}
447
}
448
// This is the swapchain framebuffer flip.
449
if (swapFunction_) {
450
VLOG(" PULL: SwapFunction()");
451
swapFunction_();
452
}
453
swapRequest = true;
454
} else {
455
frameData.skipSwap = false;
456
}
457
frameData.hasBegun = false;
458
459
VLOG(" PULL: Frame %d.readyForFence = true", task.frame);
460
461
{
462
std::lock_guard<std::mutex> lock(frameData.fenceMutex);
463
frameData.readyForFence = true;
464
frameData.fenceCondVar.notify_one();
465
// At this point, we're done with this framedata (for now).
466
}
467
return swapRequest;
468
}
469
470
if (!frameData.hasBegun) {
471
frameData.hasBegun = true;
472
473
frameData.deleter_prev.Perform(this, skipGLCalls_);
474
frameData.deleter_prev.Take(frameData.deleter);
475
}
476
477
// queueRunner_.LogSteps(stepsOnThread);
478
queueRunner_.RunInitSteps(task.initSteps, skipGLCalls_);
479
480
// Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created.
481
if (!skipGLCalls_) {
482
for (auto iter : frameData.activePushBuffers) {
483
iter->Flush();
484
iter->UnmapDevice();
485
}
486
}
487
488
if (frameData.profile.enabled) {
489
frameData.profile.cpuStartTime = time_now_d();
490
}
491
492
if (IsVREnabled()) {
493
int passes = GetVRPassesCount();
494
for (int i = 0; i < passes; i++) {
495
PreVRFrameRender(i);
496
queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, i < passes - 1, true);
497
PostVRFrameRender();
498
}
499
} else {
500
queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, false, false);
501
}
502
503
if (frameData.profile.enabled) {
504
frameData.profile.cpuEndTime = time_now_d();
505
}
506
507
if (!skipGLCalls_) {
508
for (auto iter : frameData.activePushBuffers) {
509
iter->MapDevice(bufferStrategy_);
510
}
511
}
512
513
switch (task.runType) {
514
case GLRRunType::SUBMIT:
515
break;
516
517
case GLRRunType::SYNC:
518
frameData.hasBegun = false;
519
520
// glFinish is not actually necessary here, and won't be unless we start using
521
// glBufferStorage. Then we need to use fences.
522
{
523
std::lock_guard<std::mutex> lock(syncMutex_);
524
syncDone_ = true;
525
syncCondVar_.notify_one();
526
}
527
break;
528
529
default:
530
_assert_(false);
531
}
532
VLOG(" PULL: ::Run(): Done running tasks");
533
return false;
534
}
535
536
void GLRenderManager::FlushSync() {
537
{
538
VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame_);
539
540
GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SYNC);
541
task->frame = curFrame_;
542
543
std::unique_lock<std::mutex> lock(pushMutex_);
544
renderThreadQueue_.push(task);
545
renderThreadQueue_.back()->initSteps = std::move(initSteps_);
546
renderThreadQueue_.back()->steps = std::move(steps_);
547
pushCondVar_.notify_one();
548
steps_.clear();
549
}
550
551
{
552
std::unique_lock<std::mutex> lock(syncMutex_);
553
// Wait for the flush to be hit, since we're syncing.
554
while (!syncDone_) {
555
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame_);
556
syncCondVar_.wait(lock);
557
}
558
syncDone_ = false;
559
}
560
}
561
562