CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/GPU/OpenGL/GLRenderManager.cpp
Views: 1401
1
#include "ppsspp_config.h"
2
#include "GLRenderManager.h"
3
#include "Common/GPU/OpenGL/GLFeatures.h"
4
#include "Common/GPU/thin3d.h"
5
#include "Common/Thread/ThreadUtil.h"
6
#include "Common/VR/PPSSPPVR.h"
7
8
#include "Common/Log.h"
9
#include "Common/TimeUtil.h"
10
#include "Common/MemoryUtil.h"
11
#include "Common/StringUtils.h"
12
#include "Common/Math/math_util.h"
13
14
#if 0 // def _DEBUG
15
#define VLOG(...) INFO_LOG(Log::G3D, __VA_ARGS__)
16
#else
17
#define VLOG(...)
18
#endif
19
20
std::thread::id renderThreadId;
21
22
GLRTexture::GLRTexture(const Draw::DeviceCaps &caps, int width, int height, int depth, int numMips) {
23
if (caps.textureNPOTFullySupported) {
24
canWrap = true;
25
} else {
26
canWrap = isPowerOf2(width) && isPowerOf2(height);
27
}
28
w = width;
29
h = height;
30
d = depth;
31
this->numMips = numMips;
32
}
33
34
GLRTexture::~GLRTexture() {
35
if (texture) {
36
glDeleteTextures(1, &texture);
37
}
38
}
39
40
GLRenderManager::GLRenderManager(HistoryBuffer<FrameTimeData, FRAME_TIME_HISTORY_LENGTH> &frameTimeHistory) : frameTimeHistory_(frameTimeHistory) {
41
// size_t sz = sizeof(GLRRenderData);
42
// _dbg_assert_(sz == 88);
43
}
44
45
GLRenderManager::~GLRenderManager() {
46
_dbg_assert_(!runCompileThread_);
47
48
for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
49
_assert_(frameData_[i].deleter.IsEmpty());
50
_assert_(frameData_[i].deleter_prev.IsEmpty());
51
}
52
// Was anything deleted during shutdown?
53
deleter_.Perform(this, skipGLCalls_);
54
_assert_(deleter_.IsEmpty());
55
}
56
57
void GLRenderManager::ThreadStart(Draw::DrawContext *draw) {
58
queueRunner_.CreateDeviceObjects();
59
renderThreadId = std::this_thread::get_id();
60
61
if (newInflightFrames_ != -1) {
62
INFO_LOG(Log::G3D, "Updating inflight frames to %d", newInflightFrames_);
63
inflightFrames_ = newInflightFrames_;
64
newInflightFrames_ = -1;
65
}
66
67
// Don't save draw, we don't want any thread safety confusion.
68
bool mapBuffers = draw->GetBugs().Has(Draw::Bugs::ANY_MAP_BUFFER_RANGE_SLOW);
69
bool hasBufferStorage = gl_extensions.ARB_buffer_storage || gl_extensions.EXT_buffer_storage;
70
if (!gl_extensions.VersionGEThan(3, 0, 0) && gl_extensions.IsGLES && !hasBufferStorage) {
71
// Force disable if it wouldn't work anyway.
72
mapBuffers = false;
73
}
74
75
// Notes on buffer mapping:
76
// NVIDIA GTX 9xx / 2017-10 drivers - mapping improves speed, basic unmap seems best.
77
// PowerVR GX6xxx / iOS 10.3 - mapping has little improvement, explicit flush is slower.
78
if (mapBuffers) {
79
switch (gl_extensions.gpuVendor) {
80
case GPU_VENDOR_NVIDIA:
81
bufferStrategy_ = GLBufferStrategy::FRAME_UNMAP;
82
break;
83
84
// Temporarily disabled because it doesn't work with task switching on Android.
85
// The mapped buffer seems to just be pulled out like a rug from under us, crashing
86
// as soon as any write happens, which can happen during shutdown since we write from the
87
// Emu thread which may not yet have shut down. There may be solutions to this, but for now,
88
// disable this strategy to avoid crashing.
89
//case GPU_VENDOR_QUALCOMM:
90
// bufferStrategy_ = GLBufferStrategy::FLUSH_INVALIDATE_UNMAP;
91
// break;
92
93
default:
94
bufferStrategy_ = GLBufferStrategy::SUBDATA;
95
}
96
} else {
97
bufferStrategy_ = GLBufferStrategy::SUBDATA;
98
}
99
}
100
101
void GLRenderManager::ThreadEnd() {
102
INFO_LOG(Log::G3D, "ThreadEnd");
103
104
queueRunner_.DestroyDeviceObjects();
105
VLOG(" PULL: Quitting");
106
107
// Good time to run all the deleters to get rid of leftover objects.
108
for (int i = 0; i < MAX_INFLIGHT_FRAMES; i++) {
109
// Since we're in shutdown, we should skip the GL calls on Android.
110
frameData_[i].deleter.Perform(this, skipGLCalls_);
111
frameData_[i].deleter_prev.Perform(this, skipGLCalls_);
112
}
113
deleter_.Perform(this, skipGLCalls_);
114
for (int i = 0; i < (int)steps_.size(); i++) {
115
delete steps_[i];
116
}
117
steps_.clear();
118
initSteps_.clear();
119
}
120
121
// Unlike in Vulkan, this isn't a full independent function, instead it gets called every frame.
122
//
123
// This means that we have to block and run the render queue until we've presented one frame,
124
// at which point we can leave.
125
//
126
// NOTE: If run_ is true, we WILL run a task!
127
bool GLRenderManager::ThreadFrame() {
128
if (!runCompileThread_) {
129
return false;
130
}
131
132
GLRRenderThreadTask *task = nullptr;
133
134
// In case of syncs or other partial completion, we keep going until we complete a frame.
135
while (true) {
136
// Pop a task of the queue and execute it.
137
// NOTE: We need to actually wait for a task, we can't just bail!
138
{
139
std::unique_lock<std::mutex> lock(pushMutex_);
140
while (renderThreadQueue_.empty()) {
141
pushCondVar_.wait(lock);
142
}
143
task = std::move(renderThreadQueue_.front());
144
renderThreadQueue_.pop();
145
}
146
147
// We got a task! We can now have pushMutex_ unlocked, allowing the host to
148
// push more work when it feels like it, and just start working.
149
if (task->runType == GLRRunType::EXIT) {
150
delete task;
151
// Oh, host wanted out. Let's leave, and also let's notify the host.
152
// This is unlike Vulkan too which can just block on the thread existing.
153
std::unique_lock<std::mutex> lock(syncMutex_);
154
syncCondVar_.notify_one();
155
syncDone_ = true;
156
break;
157
}
158
159
// Render the scene.
160
VLOG(" PULL: Frame %d RUN (%0.3f)", task->frame, time_now_d());
161
if (Run(*task)) {
162
// Swap requested, so we just bail the loop.
163
delete task;
164
break;
165
}
166
delete task;
167
};
168
169
return true;
170
}
171
172
void GLRenderManager::StopThread() {
173
// There's not really a lot to do here anymore.
174
INFO_LOG(Log::G3D, "GLRenderManager::StopThread()");
175
if (runCompileThread_) {
176
runCompileThread_ = false;
177
178
std::unique_lock<std::mutex> lock(pushMutex_);
179
renderThreadQueue_.push(new GLRRenderThreadTask(GLRRunType::EXIT));
180
pushCondVar_.notify_one();
181
} else {
182
WARN_LOG(Log::G3D, "GL submission thread was already paused.");
183
}
184
}
185
186
void GLRenderManager::StartThread() {
187
// There's not really a lot to do here anymore.
188
INFO_LOG(Log::G3D, "GLRenderManager::StartThread()");
189
if (!runCompileThread_) {
190
runCompileThread_ = true;
191
} else {
192
INFO_LOG(Log::G3D, "GL submission thread was already running.");
193
}
194
}
195
196
std::string GLRenderManager::GetGpuProfileString() const {
197
int curFrame = curFrame_;
198
const GLQueueProfileContext &profile = frameData_[curFrame].profile;
199
200
float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime);
201
return StringFromFormat("CPU time to run the list: %0.2f ms\n\n%s", cputime_ms, profilePassesString_.c_str());
202
}
203
204
void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
205
_assert_(insideFrame_);
206
#ifdef _DEBUG
207
curProgram_ = nullptr;
208
#endif
209
210
// Eliminate dupes.
211
if (steps_.size() && steps_.back()->stepType == GLRStepType::RENDER && steps_.back()->render.framebuffer == fb) {
212
if (color != GLRRenderPassAction::CLEAR && depth != GLRRenderPassAction::CLEAR && stencil != GLRRenderPassAction::CLEAR) {
213
// We don't move to a new step, this bind was unnecessary and we can safely skip it.
214
curRenderStep_ = steps_.back();
215
return;
216
}
217
}
218
if (curRenderStep_ && curRenderStep_->commands.size() == 0) {
219
VLOG("Empty render step. Usually happens after uploading pixels.");
220
}
221
222
GLRStep *step = new GLRStep{ GLRStepType::RENDER };
223
// This is what queues up new passes, and can end previous ones.
224
step->render.framebuffer = fb;
225
step->render.color = color;
226
step->render.depth = depth;
227
step->render.stencil = stencil;
228
step->tag = tag;
229
steps_.push_back(step);
230
231
GLuint clearMask = 0;
232
GLRRenderData data(GLRRenderCommand::CLEAR);
233
if (color == GLRRenderPassAction::CLEAR) {
234
clearMask |= GL_COLOR_BUFFER_BIT;
235
data.clear.clearColor = clearColor;
236
}
237
if (depth == GLRRenderPassAction::CLEAR) {
238
clearMask |= GL_DEPTH_BUFFER_BIT;
239
data.clear.clearZ = clearDepth;
240
}
241
if (stencil == GLRRenderPassAction::CLEAR) {
242
clearMask |= GL_STENCIL_BUFFER_BIT;
243
data.clear.clearStencil = clearStencil;
244
}
245
if (clearMask) {
246
data.clear.scissorX = 0;
247
data.clear.scissorY = 0;
248
data.clear.scissorW = 0;
249
data.clear.scissorH = 0;
250
data.clear.clearMask = clearMask;
251
data.clear.colorMask = 0xF;
252
step->commands.push_back(data);
253
}
254
curRenderStep_ = step;
255
256
if (fb) {
257
if (color == GLRRenderPassAction::KEEP || depth == GLRRenderPassAction::KEEP || stencil == GLRRenderPassAction::KEEP) {
258
step->dependencies.insert(fb);
259
}
260
}
261
262
if (invalidationCallback_) {
263
invalidationCallback_(InvalidationCallbackFlags::RENDER_PASS_STATE);
264
}
265
}
266
267
void GLRenderManager::BindFramebufferAsTexture(GLRFramebuffer *fb, int binding, int aspectBit) {
268
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
269
_dbg_assert_(binding < MAX_GL_TEXTURE_SLOTS);
270
GLRRenderData data{ GLRRenderCommand::BIND_FB_TEXTURE };
271
data.bind_fb_texture.slot = binding;
272
data.bind_fb_texture.framebuffer = fb;
273
data.bind_fb_texture.aspect = aspectBit;
274
curRenderStep_->commands.push_back(data);
275
curRenderStep_->dependencies.insert(fb);
276
}
277
278
void GLRenderManager::CopyFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLOffset2D dstPos, int aspectMask, const char *tag) {
279
GLRStep *step = new GLRStep{ GLRStepType::COPY };
280
step->copy.srcRect = srcRect;
281
step->copy.dstPos = dstPos;
282
step->copy.src = src;
283
step->copy.dst = dst;
284
step->copy.aspectMask = aspectMask;
285
step->dependencies.insert(src);
286
step->tag = tag;
287
bool fillsDst = dst && srcRect.x == 0 && srcRect.y == 0 && srcRect.w == dst->width && srcRect.h == dst->height;
288
if (dstPos.x != 0 || dstPos.y != 0 || !fillsDst)
289
step->dependencies.insert(dst);
290
steps_.push_back(step);
291
}
292
293
void GLRenderManager::BlitFramebuffer(GLRFramebuffer *src, GLRect2D srcRect, GLRFramebuffer *dst, GLRect2D dstRect, int aspectMask, bool filter, const char *tag) {
294
GLRStep *step = new GLRStep{ GLRStepType::BLIT };
295
step->blit.srcRect = srcRect;
296
step->blit.dstRect = dstRect;
297
step->blit.src = src;
298
step->blit.dst = dst;
299
step->blit.aspectMask = aspectMask;
300
step->blit.filter = filter;
301
step->dependencies.insert(src);
302
step->tag = tag;
303
bool fillsDst = dst && dstRect.x == 0 && dstRect.y == 0 && dstRect.w == dst->width && dstRect.h == dst->height;
304
if (!fillsDst)
305
step->dependencies.insert(dst);
306
steps_.push_back(step);
307
}
308
309
bool GLRenderManager::CopyFramebufferToMemory(GLRFramebuffer *src, int aspectBits, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, Draw::ReadbackMode mode, const char *tag) {
310
_assert_(pixels);
311
312
GLRStep *step = new GLRStep{ GLRStepType::READBACK };
313
step->readback.src = src;
314
step->readback.srcRect = { x, y, w, h };
315
step->readback.aspectMask = aspectBits;
316
step->readback.dstFormat = destFormat;
317
step->dependencies.insert(src);
318
step->tag = tag;
319
steps_.push_back(step);
320
321
curRenderStep_ = nullptr;
322
FlushSync();
323
324
Draw::DataFormat srcFormat;
325
if (aspectBits & GL_COLOR_BUFFER_BIT) {
326
srcFormat = Draw::DataFormat::R8G8B8A8_UNORM;
327
} else if (aspectBits & GL_STENCIL_BUFFER_BIT) {
328
// Copies from stencil are always S8.
329
srcFormat = Draw::DataFormat::S8;
330
} else if (aspectBits & GL_DEPTH_BUFFER_BIT) {
331
// TODO: Do this properly.
332
srcFormat = Draw::DataFormat::D24_S8;
333
} else {
334
return false;
335
}
336
queueRunner_.CopyFromReadbackBuffer(src, w, h, srcFormat, destFormat, pixelStride, pixels);
337
return true;
338
}
339
340
void GLRenderManager::CopyImageToMemorySync(GLRTexture *texture, int mipLevel, int x, int y, int w, int h, Draw::DataFormat destFormat, uint8_t *pixels, int pixelStride, const char *tag) {
341
_assert_(texture);
342
_assert_(pixels);
343
GLRStep *step = new GLRStep{ GLRStepType::READBACK_IMAGE };
344
step->readback_image.texture = texture;
345
step->readback_image.mipLevel = mipLevel;
346
step->readback_image.srcRect = { x, y, w, h };
347
step->tag = tag;
348
steps_.push_back(step);
349
350
curRenderStep_ = nullptr;
351
FlushSync();
352
353
queueRunner_.CopyFromReadbackBuffer(nullptr, w, h, Draw::DataFormat::R8G8B8A8_UNORM, destFormat, pixelStride, pixels);
354
}
355
356
void GLRenderManager::BeginFrame(bool enableProfiling) {
357
#ifdef _DEBUG
358
curProgram_ = nullptr;
359
#endif
360
361
// Shouldn't call BeginFrame unless we're in a run state.
362
_dbg_assert_(runCompileThread_);
363
364
int curFrame = GetCurFrame();
365
366
FrameTimeData &frameTimeData = frameTimeHistory_.Add(frameIdGen_);
367
frameTimeData.frameBegin = time_now_d();
368
frameTimeData.afterFenceWait = frameTimeData.frameBegin;
369
370
GLFrameData &frameData = frameData_[curFrame];
371
frameData.frameId = frameIdGen_;
372
frameData.profile.enabled = enableProfiling;
373
374
frameIdGen_++;
375
{
376
std::unique_lock<std::mutex> lock(frameData.fenceMutex);
377
VLOG("PUSH: BeginFrame (curFrame = %d, readyForFence = %d, time=%0.3f)", curFrame, (int)frameData.readyForFence, time_now_d());
378
while (!frameData.readyForFence) {
379
frameData.fenceCondVar.wait(lock);
380
}
381
frameData.readyForFence = false;
382
}
383
384
insideFrame_ = true;
385
}
386
387
void GLRenderManager::Finish() {
388
curRenderStep_ = nullptr; // EndCurRenderStep is this simple here.
389
390
int curFrame = curFrame_;
391
GLFrameData &frameData = frameData_[curFrame];
392
393
frameTimeHistory_[frameData.frameId].firstSubmit = time_now_d();
394
395
frameData_[curFrame].deleter.Take(deleter_);
396
397
if (frameData.profile.enabled) {
398
profilePassesString_ = std::move(frameData.profile.passesString);
399
400
#ifdef _DEBUG
401
std::string cmdString;
402
for (int i = 0; i < ARRAY_SIZE(frameData.profile.commandCounts); i++) {
403
if (frameData.profile.commandCounts[i] > 0) {
404
cmdString += StringFromFormat("%s: %d\n", RenderCommandToString((GLRRenderCommand)i), frameData.profile.commandCounts[i]);
405
}
406
}
407
memset(frameData.profile.commandCounts, 0, sizeof(frameData.profile.commandCounts));
408
profilePassesString_ = cmdString + profilePassesString_;
409
#endif
410
411
frameData.profile.passesString.clear();
412
}
413
414
VLOG("PUSH: Finish, pushing task. curFrame = %d", curFrame);
415
GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SUBMIT);
416
task->frame = curFrame;
417
{
418
std::unique_lock<std::mutex> lock(pushMutex_);
419
renderThreadQueue_.push(task);
420
renderThreadQueue_.back()->initSteps = std::move(initSteps_);
421
renderThreadQueue_.back()->steps = std::move(steps_);
422
initSteps_.clear();
423
steps_.clear();
424
pushCondVar_.notify_one();
425
}
426
}
427
428
void GLRenderManager::Present() {
429
GLRRenderThreadTask *presentTask = new GLRRenderThreadTask(GLRRunType::PRESENT);
430
presentTask->frame = curFrame_;
431
{
432
std::unique_lock<std::mutex> lock(pushMutex_);
433
renderThreadQueue_.push(presentTask);
434
pushCondVar_.notify_one();
435
}
436
437
int newCurFrame = curFrame_ + 1;
438
if (newCurFrame >= inflightFrames_) {
439
newCurFrame = 0;
440
}
441
curFrame_ = newCurFrame;
442
443
insideFrame_ = false;
444
}
445
446
// Render thread. Returns true if the caller should handle a swap.
447
bool GLRenderManager::Run(GLRRenderThreadTask &task) {
448
_dbg_assert_(task.frame >= 0);
449
450
GLFrameData &frameData = frameData_[task.frame];
451
452
if (task.runType == GLRRunType::PRESENT) {
453
bool swapRequest = false;
454
if (!frameData.skipSwap) {
455
frameTimeHistory_[frameData.frameId].queuePresent = time_now_d();
456
if (swapIntervalChanged_) {
457
swapIntervalChanged_ = false;
458
if (swapIntervalFunction_) {
459
swapIntervalFunction_(swapInterval_);
460
}
461
}
462
// This is the swapchain framebuffer flip.
463
if (swapFunction_) {
464
VLOG(" PULL: SwapFunction()");
465
swapFunction_();
466
}
467
swapRequest = true;
468
} else {
469
frameData.skipSwap = false;
470
}
471
frameData.hasBegun = false;
472
473
VLOG(" PULL: Frame %d.readyForFence = true", task.frame);
474
475
{
476
std::lock_guard<std::mutex> lock(frameData.fenceMutex);
477
frameData.readyForFence = true;
478
frameData.fenceCondVar.notify_one();
479
// At this point, we're done with this framedata (for now).
480
}
481
return swapRequest;
482
}
483
484
if (!frameData.hasBegun) {
485
frameData.hasBegun = true;
486
487
frameData.deleter_prev.Perform(this, skipGLCalls_);
488
frameData.deleter_prev.Take(frameData.deleter);
489
}
490
491
// queueRunner_.LogSteps(stepsOnThread);
492
queueRunner_.RunInitSteps(task.initSteps, skipGLCalls_);
493
494
// Run this after RunInitSteps so any fresh GLRBuffers for the pushbuffers can get created.
495
if (!skipGLCalls_) {
496
for (auto iter : frameData.activePushBuffers) {
497
iter->Flush();
498
iter->UnmapDevice();
499
}
500
}
501
502
if (frameData.profile.enabled) {
503
frameData.profile.cpuStartTime = time_now_d();
504
}
505
506
if (IsVREnabled()) {
507
int passes = GetVRPassesCount();
508
for (int i = 0; i < passes; i++) {
509
PreVRFrameRender(i);
510
queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, i < passes - 1, true);
511
PostVRFrameRender();
512
}
513
} else {
514
queueRunner_.RunSteps(task.steps, frameData, skipGLCalls_, false, false);
515
}
516
517
if (frameData.profile.enabled) {
518
frameData.profile.cpuEndTime = time_now_d();
519
}
520
521
if (!skipGLCalls_) {
522
for (auto iter : frameData.activePushBuffers) {
523
iter->MapDevice(bufferStrategy_);
524
}
525
}
526
527
switch (task.runType) {
528
case GLRRunType::SUBMIT:
529
break;
530
531
case GLRRunType::SYNC:
532
frameData.hasBegun = false;
533
534
// glFinish is not actually necessary here, and won't be unless we start using
535
// glBufferStorage. Then we need to use fences.
536
{
537
std::lock_guard<std::mutex> lock(syncMutex_);
538
syncDone_ = true;
539
syncCondVar_.notify_one();
540
}
541
break;
542
543
default:
544
_assert_(false);
545
}
546
VLOG(" PULL: ::Run(): Done running tasks");
547
return false;
548
}
549
550
void GLRenderManager::FlushSync() {
551
{
552
VLOG("PUSH: Frame[%d].readyForRun = true (sync)", curFrame_);
553
554
GLRRenderThreadTask *task = new GLRRenderThreadTask(GLRRunType::SYNC);
555
task->frame = curFrame_;
556
557
std::unique_lock<std::mutex> lock(pushMutex_);
558
renderThreadQueue_.push(task);
559
renderThreadQueue_.back()->initSteps = std::move(initSteps_);
560
renderThreadQueue_.back()->steps = std::move(steps_);
561
pushCondVar_.notify_one();
562
steps_.clear();
563
}
564
565
{
566
std::unique_lock<std::mutex> lock(syncMutex_);
567
// Wait for the flush to be hit, since we're syncing.
568
while (!syncDone_) {
569
VLOG("PUSH: Waiting for frame[%d].readyForFence = 1 (sync)", curFrame_);
570
syncCondVar_.wait(lock);
571
}
572
syncDone_ = false;
573
}
574
}
575
576