Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/FramebufferManagerCommon.cpp
5654 views
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include <algorithm>
19
#include <sstream>
20
#include <cmath>
21
22
#include "Common/GPU/thin3d.h"
23
#include "Common/Data/Collections/TinySet.h"
24
#include "Common/Data/Convert/ColorConv.h"
25
#include "Common/LogReporting.h"
26
#include "Common/System/Display.h"
27
#include "Common/VR/PPSSPPVR.h"
28
#include "Common/CommonTypes.h"
29
#include "Common/StringUtils.h"
30
#include "Core/Config.h"
31
#include "Core/ConfigValues.h"
32
#include "Core/Core.h"
33
#include "Core/CoreParameter.h"
34
#include "Core/Debugger/MemBlockInfo.h"
35
#include "GPU/Common/DrawEngineCommon.h"
36
#include "GPU/Common/FramebufferManagerCommon.h"
37
#include "GPU/Common/PresentationCommon.h"
38
#include "GPU/Common/TextureCacheCommon.h"
39
#include "GPU/Common/ReinterpretFramebuffer.h"
40
#include "GPU/GPUCommon.h"
41
#include "GPU/GPUState.h"
42
43
static size_t FormatFramebufferName(const VirtualFramebuffer *vfb, char *tag, size_t len) {
44
return snprintf(tag, len, "FB_%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, vfb->bufferWidth, vfb->bufferHeight, GeBufferFormatToString(vfb->fb_format));
45
}
46
47
FramebufferManagerCommon::FramebufferManagerCommon(Draw::DrawContext *draw)
48
: draw_(draw), draw2D_(draw_) {
49
presentation_ = new PresentationCommon(draw);
50
}
51
52
FramebufferManagerCommon::~FramebufferManagerCommon() {
53
DeviceLost();
54
55
DecimateFBOs();
56
for (auto vfb : vfbs_) {
57
DestroyFramebuf(vfb);
58
}
59
vfbs_.clear();
60
61
for (auto &tempFB : tempFBOs_) {
62
tempFB.second.fbo->Release();
63
}
64
tempFBOs_.clear();
65
66
// Do the same for ReadFramebuffersToMemory's VFBs
67
for (auto vfb : bvfbs_) {
68
DestroyFramebuf(vfb);
69
}
70
bvfbs_.clear();
71
72
delete presentation_;
73
delete[] convBuf_;
74
}
75
76
void FramebufferManagerCommon::Init(int msaaLevel) {
77
// We may need to override the render size if the shader is upscaling or SSAA.
78
NotifyDisplayResized();
79
NotifyRenderResized(displayLayoutConfigCopy_, msaaLevel);
80
}
81
82
// Returns true if we need to stop the render thread
83
bool FramebufferManagerCommon::UpdateRenderSize(int msaaLevel) {
84
const bool newRender = renderWidth_ != (float)PSP_CoreParameter().renderWidth || renderHeight_ != (float)PSP_CoreParameter().renderHeight || msaaLevel_ != msaaLevel;
85
86
int effectiveBloomHack = g_Config.iBloomHack;
87
if (PSP_CoreParameter().compat.flags().ForceLowerResolutionForEffectsOn) {
88
effectiveBloomHack = 3;
89
} else if (PSP_CoreParameter().compat.flags().ForceLowerResolutionForEffectsOff) {
90
effectiveBloomHack = 0;
91
}
92
93
bool newBuffered = !g_Config.bSkipBufferEffects;
94
const bool newSettings = bloomHack_ != effectiveBloomHack || useBufferedRendering_ != newBuffered;
95
96
renderWidth_ = (float)PSP_CoreParameter().renderWidth;
97
renderHeight_ = (float)PSP_CoreParameter().renderHeight;
98
renderScaleFactor_ = (float)PSP_CoreParameter().renderScaleFactor;
99
msaaLevel_ = msaaLevel;
100
101
bloomHack_ = effectiveBloomHack;
102
useBufferedRendering_ = newBuffered;
103
104
presentation_->UpdateRenderSize(renderWidth_, renderHeight_);
105
106
// If just switching TO buffered rendering, no need to pause the threads. In fact this causes problems due to the open backbuffer renderpass.
107
if (!useBufferedRendering_ && newBuffered) {
108
return false;
109
}
110
return newRender || newSettings;
111
}
112
113
void FramebufferManagerCommon::CheckPostShaders(const DisplayLayoutConfig &config) {
114
if (updatePostShaders_) {
115
presentation_->UpdatePostShader(config);
116
updatePostShaders_ = false;
117
}
118
}
119
120
void FramebufferManagerCommon::BeginFrame(const DisplayLayoutConfig &config) {
121
DecimateFBOs();
122
presentation_->BeginFrame(config);
123
currentRenderVfb_ = nullptr;
124
125
// Hack.
126
displayLayoutConfigCopy_ = config;
127
}
128
129
bool FramebufferManagerCommon::PresentedThisFrame() const {
130
return presentation_->PresentedThisFrame();
131
}
132
133
void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
134
displayFramebufPtr_ = framebuf & 0x3FFFFFFF;
135
if (Memory::IsVRAMAddress(displayFramebufPtr_))
136
displayFramebufPtr_ = framebuf & 0x041FFFFF;
137
displayStride_ = stride;
138
displayFormat_ = format;
139
}
140
141
VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) const {
142
addr &= 0x3FFFFFFF;
143
if (Memory::IsVRAMAddress(addr))
144
addr &= 0x041FFFFF;
145
VirtualFramebuffer *match = nullptr;
146
for (auto vfb : vfbs_) {
147
if (vfb->fb_address == addr) {
148
// Could check w too but whatever (actually, might very well make sense to do so, depending on context).
149
if (!match || vfb->last_frame_render > match->last_frame_render) {
150
match = vfb;
151
}
152
}
153
}
154
return match;
155
}
156
157
VirtualFramebuffer *FramebufferManagerCommon::GetExactVFB(u32 addr, int stride, GEBufferFormat format) const {
158
addr &= 0x3FFFFFFF;
159
if (Memory::IsVRAMAddress(addr))
160
addr &= 0x041FFFFF;
161
VirtualFramebuffer *newest = nullptr;
162
for (auto vfb : vfbs_) {
163
if (vfb->fb_address == addr && vfb->fb_stride == stride && vfb->fb_format == format) {
164
if (newest) {
165
if (vfb->colorBindSeq > newest->colorBindSeq) {
166
newest = vfb;
167
}
168
} else {
169
newest = vfb;
170
}
171
}
172
}
173
return newest;
174
}
175
176
VirtualFramebuffer *FramebufferManagerCommon::ResolveVFB(u32 addr, int stride, GEBufferFormat format) {
177
addr &= 0x3FFFFFFF;
178
if (Memory::IsVRAMAddress(addr))
179
addr &= 0x041FFFFF;
180
// Find the newest one matching addr and stride.
181
VirtualFramebuffer *newest = nullptr;
182
for (auto vfb : vfbs_) {
183
if (vfb->fb_address == addr && vfb->FbStrideInBytes() == stride * BufferFormatBytesPerPixel(format)) {
184
if (newest) {
185
if (vfb->colorBindSeq > newest->colorBindSeq) {
186
newest = vfb;
187
}
188
} else {
189
newest = vfb;
190
}
191
}
192
}
193
194
if (newest && newest->fb_format != format) {
195
WARN_LOG_ONCE(resolvevfb, Log::G3D, "ResolveVFB: Resolving from %s to %s at %08x/%d", GeBufferFormatToString(newest->fb_format), GeBufferFormatToString(format), addr, stride);
196
return ResolveFramebufferColorToFormat(newest, format);
197
}
198
199
return newest;
200
}
201
202
VirtualFramebuffer *FramebufferManagerCommon::GetDisplayVFB() {
203
return GetExactVFB(displayFramebufPtr_, displayStride_, displayFormat_);
204
}
205
206
// Heuristics to figure out the size of FBO to create.
207
// TODO: Possibly differentiate on whether through mode is used (since in through mode, viewport is meaningless?)
208
void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, int fb_stride, GEBufferFormat fb_format, int viewport_width, int viewport_height, int region_width, int region_height, int scissor_width, int scissor_height, int &drawing_width, int &drawing_height) {
209
static const int MAX_FRAMEBUF_HEIGHT = 512;
210
211
// Games don't always set any of these. Take the greatest parameter that looks valid based on stride.
212
if (viewport_width > 4 && viewport_width <= fb_stride && viewport_height > 0) {
213
drawing_width = viewport_width;
214
drawing_height = viewport_height;
215
// Some games specify a viewport with 0.5, but don't have VRAM for 273. 480x272 is the buffer size.
216
if (viewport_width == 481 && region_width == 480 && viewport_height == 273 && region_height == 272) {
217
drawing_width = 480;
218
drawing_height = 272;
219
}
220
// Sometimes region is set larger than the VRAM for the framebuffer.
221
// However, in one game it's correctly set as a larger height (see #7277) with the same width.
222
// A bit of a hack, but we try to handle that unusual case here.
223
if (region_width <= fb_stride && (region_width > drawing_width || (region_width == drawing_width && region_height > drawing_height)) && region_height <= MAX_FRAMEBUF_HEIGHT) {
224
drawing_width = region_width;
225
drawing_height = std::max(drawing_height, region_height);
226
}
227
// Scissor is often set to a subsection of the framebuffer, so we pay the least attention to it.
228
if (scissor_width <= fb_stride && scissor_width > drawing_width && scissor_height <= MAX_FRAMEBUF_HEIGHT) {
229
drawing_width = scissor_width;
230
drawing_height = std::max(drawing_height, scissor_height);
231
}
232
} else {
233
// If viewport wasn't valid, let's just take the greatest anything regardless of stride.
234
drawing_width = std::min(std::max(region_width, scissor_width), fb_stride);
235
drawing_height = std::max(region_height, scissor_height);
236
}
237
238
if (scissor_width == 481 && region_width == 480 && scissor_height == 273 && region_height == 272) {
239
drawing_width = 480;
240
drawing_height = 272;
241
}
242
243
// Assume no buffer is > 512 tall, it couldn't be textured or displayed fully if so.
244
if (drawing_height >= MAX_FRAMEBUF_HEIGHT) {
245
if (region_height < MAX_FRAMEBUF_HEIGHT) {
246
drawing_height = region_height;
247
} else if (scissor_height < MAX_FRAMEBUF_HEIGHT) {
248
drawing_height = scissor_height;
249
}
250
}
251
252
if (viewport_width != region_width) {
253
// The majority of the time, these are equal. If not, let's check what we know.
254
u32 nearest_address = 0xFFFFFFFF;
255
for (auto vfb : vfbs_) {
256
const u32 other_address = vfb->fb_address;
257
if (other_address > fb_address && other_address < nearest_address) {
258
nearest_address = other_address;
259
}
260
}
261
262
// Unless the game is using overlapping buffers, the next buffer should be far enough away.
263
// This catches some cases where we can know this.
264
// Hmm. The problem is that we could only catch it for the first of two buffers...
265
const u32 bpp = BufferFormatBytesPerPixel(fb_format);
266
int avail_height = (nearest_address - fb_address) / (fb_stride * bpp);
267
if (avail_height < drawing_height && avail_height == region_height) {
268
drawing_width = std::min(region_width, fb_stride);
269
drawing_height = avail_height;
270
}
271
272
// Some games draw buffers interleaved, with a high stride/region/scissor but default viewport.
273
if (fb_stride == 1024 && region_width == 1024 && scissor_width == 1024) {
274
drawing_width = 1024;
275
}
276
}
277
278
bool margin = false;
279
// Let's check if we're in a stride gap of a full-size framebuffer.
280
for (auto vfb : vfbs_) {
281
if (fb_address == vfb->fb_address) {
282
continue;
283
}
284
if (vfb->fb_stride != 512) {
285
continue;
286
}
287
288
int vfb_stride_in_bytes = BufferFormatBytesPerPixel(vfb->fb_format) * vfb->fb_stride;
289
int stride_in_bytes = BufferFormatBytesPerPixel(fb_format) * fb_stride;
290
if (stride_in_bytes != vfb_stride_in_bytes) {
291
// Mismatching stride in bytes, not interesting
292
continue;
293
}
294
295
if (fb_address > vfb->fb_address && fb_address < vfb->fb_address + vfb_stride_in_bytes) {
296
// Candidate!
297
if (vfb->height == drawing_height) {
298
// Might have a margin texture! Fix the drawing width if it's too large.
299
int width_in_bytes = vfb->fb_address + vfb_stride_in_bytes - fb_address;
300
int width_in_pixels = width_in_bytes / BufferFormatBytesPerPixel(fb_format);
301
302
// Final check
303
if (width_in_pixels <= 32) {
304
drawing_width = std::min(drawing_width, width_in_pixels);
305
margin = true;
306
// Don't really need to keep looking.
307
break;
308
}
309
}
310
}
311
}
312
313
DEBUG_LOG(Log::G3D, "Est: %08x V: %ix%i, R: %ix%i, S: %ix%i, STR: %i, THR:%i, Z:%08x = %ix%i %s", fb_address, viewport_width,viewport_height, region_width, region_height, scissor_width, scissor_height, fb_stride, gstate.isModeThrough(), gstate.isDepthWriteEnabled() ? gstate.getDepthBufAddress() : 0, drawing_width, drawing_height, margin ? " (margin!)" : "");
314
}
315
316
void GetFramebufferHeuristicInputs(FramebufferHeuristicParams *params, const GPUgstate &gstate) {
317
// GetFramebufferHeuristicInputs is only called from rendering, and thus, it's VRAM.
318
params->fb_address = gstate.getFrameBufRawAddress() | 0x04000000;
319
params->fb_stride = gstate.FrameBufStride();
320
321
params->z_address = gstate.getDepthBufRawAddress() | 0x04000000;
322
params->z_stride = gstate.DepthBufStride();
323
324
if (params->z_address == params->fb_address) {
325
// Probably indicates that the game doesn't care about Z for this VFB.
326
// Let's avoid matching it for Z copies and other shenanigans.
327
params->z_address = 0;
328
params->z_stride = 0;
329
}
330
331
params->fb_format = gstate_c.framebufFormat;
332
333
params->isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask();
334
// Technically, it may write depth later, but we're trying to detect it only when it's really true.
335
if (gstate.isModeClear()) {
336
// Not quite seeing how this makes sense..
337
params->isWritingDepth = !gstate.isClearModeDepthMask() && gstate.isDepthWriteEnabled();
338
} else {
339
params->isWritingDepth = gstate.isDepthWriteEnabled();
340
}
341
params->isDrawing = !gstate.isModeClear() || !gstate.isClearModeColorMask() || !gstate.isClearModeAlphaMask();
342
params->isModeThrough = gstate.isModeThrough();
343
const bool alphaBlending = gstate.isAlphaBlendEnabled();
344
const bool logicOpBlending = gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_CLEAR && gstate.getLogicOp() != GE_LOGIC_COPY;
345
params->isBlending = alphaBlending || logicOpBlending;
346
347
// Viewport-X1 and Y1 are not the upper left corner, but half the width/height. A bit confusing.
348
float vpx = gstate.getViewportXScale();
349
float vpy = gstate.getViewportYScale();
350
351
// Work around problem in F1 Grand Prix, where it draws in through mode with a bogus viewport.
352
// We set bad values to 0 which causes the framebuffer size heuristic to rely on the other parameters instead.
353
if (std::isnan(vpx) || vpx > 10000000.0f) {
354
vpx = 0.f;
355
}
356
if (std::isnan(vpy) || vpy > 10000000.0f) {
357
vpy = 0.f;
358
}
359
params->viewportWidth = (int)(fabsf(vpx) * 2.0f);
360
params->viewportHeight = (int)(fabsf(vpy) * 2.0f);
361
params->regionWidth = gstate.getRegionX2() + 1;
362
params->regionHeight = gstate.getRegionY2() + 1;
363
364
params->scissorLeft = gstate.getScissorX1();
365
params->scissorTop = gstate.getScissorY1();
366
params->scissorRight = gstate.getScissorX2() + 1;
367
params->scissorBottom = gstate.getScissorY2() + 1;
368
369
if (gstate.getRegionRateX() != 0x100 || gstate.getRegionRateY() != 0x100) {
370
WARN_LOG_REPORT_ONCE(regionRate, Log::G3D, "Drawing region rate add non-zero: %04x, %04x of %04x, %04x", gstate.getRegionRateX(), gstate.getRegionRateY(), gstate.getRegionX2(), gstate.getRegionY2());
371
}
372
}
373
374
static void ApplyKillzoneFramebufferSplit(FramebufferHeuristicParams *params, int *drawing_width);
375
376
VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(FramebufferHeuristicParams &params, u32 skipDrawReason) {
377
gstate_c.Clean(DIRTY_FRAMEBUF);
378
379
// Collect all parameters. This whole function has really become a cesspool of heuristics...
380
// but it appears that's what it takes, unless we emulate VRAM layout more accurately somehow.
381
382
// As there are no clear "framebuffer width" and "framebuffer height" registers,
383
// we need to infer the size of the current framebuffer somehow.
384
int drawing_width, drawing_height;
385
EstimateDrawingSize(params.fb_address, std::max(params.fb_stride, (u16)4), params.fb_format, params.viewportWidth, params.viewportHeight, params.regionWidth, params.regionHeight, params.scissorRight, params.scissorBottom, drawing_width, drawing_height);
386
387
if (params.fb_address == params.z_address) {
388
// Most likely Z will not be used in this pass, as that would wreak havoc (undefined behavior for sure)
389
// We probably don't need to do anything about that, but let's log it.
390
WARN_LOG_ONCE(color_equal_z, Log::G3D, "Framebuffer bound with color addr == z addr, likely will not use Z in this pass: %08x", params.fb_address);
391
}
392
393
// Compatibility hack for Killzone, see issue #6207.
394
if (PSP_CoreParameter().compat.flags().SplitFramebufferMargin && params.fb_format == GE_FORMAT_8888) {
395
ApplyKillzoneFramebufferSplit(&params, &drawing_width);
396
} else {
397
gstate_c.SetCurRTOffset(0, 0);
398
}
399
400
// Find a matching framebuffer.
401
VirtualFramebuffer *normal_vfb = nullptr;
402
int y_offset;
403
VirtualFramebuffer *large_offset_vfb = nullptr;
404
405
for (auto v : vfbs_) {
406
const u32 bpp = BufferFormatBytesPerPixel(v->fb_format);
407
408
if (params.fb_address == v->fb_address && params.fb_format == v->fb_format && params.fb_stride == v->fb_stride) {
409
if (!normal_vfb) {
410
normal_vfb = v;
411
}
412
} else if (!PSP_CoreParameter().compat.flags().DisallowFramebufferAtOffset && !PSP_CoreParameter().compat.flags().SplitFramebufferMargin &&
413
v->fb_stride == params.fb_stride && v->fb_format == params.fb_format) {
414
u32 v_fb_first_line_end_ptr = v->fb_address + v->fb_stride * bpp;
415
u32 v_fb_end_ptr = v->fb_address + v->fb_stride * v->height * bpp;
416
417
if (!normal_vfb && params.fb_address > v->fb_address && params.fb_address < v_fb_first_line_end_ptr) {
418
const int x_offset = (params.fb_address - v->fb_address) / bpp;
419
if (x_offset < params.fb_stride && v->height >= drawing_height) {
420
// Pretty certainly a pure render-to-X-offset.
421
WARN_LOG_REPORT_ONCE(renderoffset, Log::FrameBuf, "Rendering to framebuffer offset at %08x +%dx%d (stride %d)", v->fb_address, x_offset, 0, v->fb_stride);
422
normal_vfb = v;
423
gstate_c.SetCurRTOffset(x_offset, 0);
424
normal_vfb->width = std::max((int)normal_vfb->width, x_offset + drawing_width);
425
// To prevent the newSize code from being confused.
426
drawing_width += x_offset;
427
break;
428
}
429
} else if (PSP_CoreParameter().compat.flags().FramebufferAllowLargeVerticalOffset &&
430
params.fb_address > v->fb_address && v->fb_stride > 0 && (params.fb_address - v->fb_address) % v->FbStrideInBytes() == 0 &&
431
params.fb_address != 0x04088000 && v->fb_address != 0x04000000) { // Heuristic to avoid merging the main framebuffers.
432
y_offset = (params.fb_address - v->fb_address) / v->FbStrideInBytes();
433
if (y_offset <= v->bufferHeight) { // note: v->height is misdetected as 256 instead of 272 here in tokimeki. Note that 272 is just the height of the upper part, it's supersampling vertically.
434
large_offset_vfb = v;
435
break;
436
}
437
}
438
}
439
}
440
441
VirtualFramebuffer *vfb = nullptr;
442
if (large_offset_vfb) {
443
// These are prioritized over normal VFBs matches, to ensure things work even if the higher-address one
444
// is created first. Only enabled under compat flag.
445
vfb = large_offset_vfb;
446
WARN_LOG_REPORT_ONCE(tokimeki, Log::FrameBuf, "Detected FBO at Y offset %d of %08x: %08x", y_offset, large_offset_vfb->fb_address, params.fb_address);
447
gstate_c.SetCurRTOffset(0, y_offset);
448
vfb->height = std::max((int)vfb->height, y_offset + drawing_height);
449
drawing_height += y_offset;
450
// TODO: We can allow X/Y overlaps too, but haven't seen any so safer to not.
451
} else if (normal_vfb) {
452
vfb = normal_vfb;
453
if (vfb->z_address == 0 && vfb->z_stride == 0 && params.z_stride != 0) {
454
// Got one that was created by CreateRAMFramebuffer. Since it has no depth buffer,
455
// we just recreate it immediately.
456
ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
457
}
458
459
// Keep track, but this isn't really used.
460
vfb->z_stride = params.z_stride;
461
// Heuristic: In throughmode, a higher height could be used. Let's avoid shrinking the buffer.
462
if (params.isModeThrough && (int)vfb->width <= params.fb_stride) {
463
vfb->width = std::max((int)vfb->width, drawing_width);
464
vfb->height = std::max((int)vfb->height, drawing_height);
465
} else {
466
vfb->width = drawing_width;
467
vfb->height = drawing_height;
468
}
469
}
470
471
if (vfb) {
472
bool resized = false;
473
if ((drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) {
474
// Even if it's not newly wrong, if this is larger we need to resize up.
475
if (vfb->width > vfb->bufferWidth || vfb->height > vfb->bufferHeight) {
476
ResizeFramebufFBO(vfb, vfb->width, vfb->height);
477
resized = true;
478
} else if (vfb->newWidth != drawing_width || vfb->newHeight != drawing_height) {
479
// If it's newly wrong, or changing every frame, just keep track.
480
vfb->newWidth = drawing_width;
481
vfb->newHeight = drawing_height;
482
vfb->lastFrameNewSize = gpuStats.numFlips;
483
} else if (vfb->lastFrameNewSize + FBO_OLD_AGE < gpuStats.numFlips) {
484
// Okay, it's changed for a while (and stayed that way.) Let's start over.
485
// But only if we really need to, to avoid blinking.
486
bool needsRecreate = vfb->bufferWidth > params.fb_stride;
487
needsRecreate = needsRecreate || vfb->newWidth > vfb->bufferWidth || vfb->newWidth * 2 < vfb->bufferWidth;
488
needsRecreate = needsRecreate || vfb->newHeight > vfb->bufferHeight || vfb->newHeight * 2 < vfb->bufferHeight;
489
490
// Whether we resize or not, change the size parameters so we stop detecting a resize.
491
// It might be larger if all drawing has been in throughmode.
492
vfb->width = drawing_width;
493
vfb->height = drawing_height;
494
495
if (needsRecreate) {
496
ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
497
resized = true;
498
// Let's discard this information, might be wrong now.
499
vfb->safeWidth = 0;
500
vfb->safeHeight = 0;
501
}
502
}
503
} else {
504
// It's not different, let's keep track of that too.
505
vfb->lastFrameNewSize = gpuStats.numFlips;
506
}
507
508
if (!resized && renderScaleFactor_ != 1 && vfb->renderScaleFactor == 1) {
509
// Might be time to change this framebuffer - have we used depth?
510
if ((vfb->usageFlags & FB_USAGE_COLOR_MIXED_DEPTH) && !PSP_CoreParameter().compat.flags().ForceLowerResolutionForEffectsOn) {
511
ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
512
_assert_(vfb->renderScaleFactor != 1);
513
}
514
}
515
}
516
517
// None found? Create one.
518
if (!vfb) {
519
gstate_c.usingDepth = false; // reset depth buffer tracking
520
521
vfb = new VirtualFramebuffer{};
522
vfb->fbo = nullptr;
523
vfb->fb_address = params.fb_address;
524
vfb->fb_stride = params.fb_stride;
525
vfb->z_address = params.z_address;
526
vfb->z_stride = params.z_stride;
527
528
// The other width/height parameters are set in ResizeFramebufFBO below.
529
vfb->width = drawing_width;
530
vfb->height = drawing_height;
531
vfb->newWidth = drawing_width;
532
vfb->newHeight = drawing_height;
533
vfb->lastFrameNewSize = gpuStats.numFlips;
534
vfb->fb_format = params.fb_format;
535
vfb->usageFlags = FB_USAGE_RENDER_COLOR;
536
537
u32 colorByteSize = vfb->BufferByteSize(RASTER_COLOR);
538
if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + colorByteSize > framebufColorRangeEnd_) {
539
framebufColorRangeEnd_ = params.fb_address + colorByteSize;
540
}
541
542
// This is where we actually create the framebuffer. The true is "force".
543
ResizeFramebufFBO(vfb, drawing_width, drawing_height, true);
544
NotifyRenderFramebufferCreated(vfb);
545
546
// Note that we do not even think about depth right now. That'll be handled
547
// on the first depth access, which will call SetDepthFramebuffer.
548
549
CopyToColorFromOverlappingFramebuffers(vfb);
550
SetColorUpdated(vfb, skipDrawReason);
551
552
INFO_LOG(Log::FrameBuf, "Creating FBO for %08x (z: %08x) : %d x %d x %s", vfb->fb_address, vfb->z_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format));
553
554
vfb->last_frame_render = gpuStats.numFlips;
555
frameLastFramebufUsed_ = gpuStats.numFlips;
556
vfbs_.push_back(vfb);
557
currentRenderVfb_ = vfb;
558
559
// Assume that if we're clearing right when switching to a new framebuffer, we don't need to upload.
560
if (useBufferedRendering_ && params.isDrawing && vfb->fb_stride > 0) {
561
gpu->PerformWriteColorFromMemory(params.fb_address, colorByteSize);
562
// Alpha was already done by PerformWriteColorFromMemory.
563
PerformWriteStencilFromMemory(params.fb_address, colorByteSize, WriteStencil::STENCIL_IS_ZERO | WriteStencil::IGNORE_ALPHA);
564
// TODO: Is it worth trying to upload the depth buffer (only if it wasn't copied above..?)
565
}
566
567
DiscardFramebufferCopy();
568
569
// We already have it!
570
} else if (vfb != currentRenderVfb_) {
571
// Use it as a render target.
572
DEBUG_LOG(Log::FrameBuf, "Switching render target to FBO for %08x: %d x %d x %d ", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format);
573
vfb->usageFlags |= FB_USAGE_RENDER_COLOR;
574
vfb->last_frame_render = gpuStats.numFlips;
575
frameLastFramebufUsed_ = gpuStats.numFlips;
576
vfb->dirtyAfterDisplay = true;
577
if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
578
vfb->reallyDirtyAfterDisplay = true;
579
580
VirtualFramebuffer *prev = currentRenderVfb_;
581
currentRenderVfb_ = vfb;
582
NotifyRenderFramebufferSwitched(prev, vfb, params.isClearingDepth);
583
CopyToColorFromOverlappingFramebuffers(vfb);
584
gstate_c.usingDepth = false; // reset depth buffer tracking
585
586
DiscardFramebufferCopy();
587
} else {
588
// Something changed, but we still got the same framebuffer we were already rendering to.
589
// Might not be a lot to do here, we check in NotifyRenderFramebufferUpdated
590
vfb->last_frame_render = gpuStats.numFlips;
591
frameLastFramebufUsed_ = gpuStats.numFlips;
592
vfb->dirtyAfterDisplay = true;
593
if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
594
vfb->reallyDirtyAfterDisplay = true;
595
NotifyRenderFramebufferUpdated(vfb);
596
}
597
598
vfb->colorBindSeq = GetBindSeqCount();
599
600
gstate_c.curRTWidth = vfb->width;
601
gstate_c.curRTHeight = vfb->height;
602
gstate_c.curRTRenderWidth = vfb->renderWidth;
603
gstate_c.curRTRenderHeight = vfb->renderHeight;
604
return vfb;
605
}
606
607
// Called on the first use of depth in a render pass.
608
void FramebufferManagerCommon::SetDepthFrameBuffer(bool isClearingDepth) {
609
if (!currentRenderVfb_) {
610
return;
611
}
612
613
// First time use of this framebuffer's depth buffer.
614
bool newlyUsingDepth = (currentRenderVfb_->usageFlags & FB_USAGE_RENDER_DEPTH) == 0;
615
currentRenderVfb_->usageFlags |= FB_USAGE_RENDER_DEPTH;
616
617
uint32_t boundDepthBuffer = gstate.getDepthBufRawAddress() | 0x04000000;
618
uint32_t boundDepthStride = gstate.DepthBufStride();
619
if (currentRenderVfb_->z_address != boundDepthBuffer || currentRenderVfb_->z_stride != boundDepthStride) {
620
if (currentRenderVfb_->fb_address == boundDepthBuffer) {
621
// Disallow setting depth buffer to the same address as the color buffer, usually means it's not used.
622
WARN_LOG_N_TIMES(z_reassign, 5, Log::FrameBuf, "Ignoring color matching depth buffer at %08x", boundDepthBuffer);
623
boundDepthBuffer = 0;
624
boundDepthStride = 0;
625
}
626
WARN_LOG_N_TIMES(z_reassign, 5, Log::FrameBuf, "Framebuffer at %08x/%d has switched associated depth buffer from %08x to %08x, updating.",
627
currentRenderVfb_->fb_address, currentRenderVfb_->fb_stride, currentRenderVfb_->z_address, boundDepthBuffer);
628
629
// Technically, here we should copy away the depth buffer to another framebuffer that uses that z_address, or maybe
630
// even write it back to RAM. However, this is rare. Silent Hill is one example, see #16126.
631
currentRenderVfb_->z_address = boundDepthBuffer;
632
// Update the stride in case it changed.
633
currentRenderVfb_->z_stride = boundDepthStride;
634
635
if (currentRenderVfb_->fbo) {
636
char tag[128];
637
FormatFramebufferName(currentRenderVfb_, tag, sizeof(tag));
638
currentRenderVfb_->fbo->UpdateTag(tag);
639
}
640
}
641
642
// If this first draw call is anything other than a clear, "resolve" the depth buffer,
643
// by copying from any overlapping buffers with fresher content.
644
if (!isClearingDepth && useBufferedRendering_) {
645
CopyToDepthFromOverlappingFramebuffers(currentRenderVfb_);
646
647
// Need to upload the first line of depth buffers, for Burnout Dominator lens flares. See issue #11100 and comments to #16081.
648
// Might make this more generic and upload the whole depth buffer if we find it's needed for something.
649
if (newlyUsingDepth && draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
650
// Sanity check the depth buffer pointer.
651
if (Memory::IsValidRange(currentRenderVfb_->z_address, currentRenderVfb_->width * 2)) {
652
const u16 *src = (const u16 *)Memory::GetPointerUnchecked(currentRenderVfb_->z_address);
653
DrawPixels(currentRenderVfb_, 0, 0, (const u8 *)src, GE_FORMAT_DEPTH16, currentRenderVfb_->z_stride, currentRenderVfb_->width, currentRenderVfb_->height, RASTER_DEPTH, "Depth Upload");
654
}
655
}
656
}
657
658
currentRenderVfb_->depthBindSeq = GetBindSeqCount();
659
}
660
661
struct CopySource {
662
VirtualFramebuffer *vfb;
663
RasterChannel channel;
664
int xOffset;
665
int yOffset;
666
667
int seq() const {
668
return channel == RASTER_DEPTH ? vfb->depthBindSeq : vfb->colorBindSeq;
669
}
670
671
bool operator < (const CopySource &other) const {
672
return seq() < other.seq();
673
}
674
};
675
676
// Not sure if it's more profitable to always do these copies with raster (which may screw up early-Z due to explicit depth buffer write)
677
// or to use image copies when possible (which may make it easier for the driver to preserve early-Z, but on the other hand, will cost additional memory
678
// bandwidth on tilers due to the load operation, which we might otherwise be able to skip).
679
void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFramebuffer *dest) {
680
std::vector<CopySource> sources;
681
for (auto src : vfbs_) {
682
if (src == dest)
683
continue;
684
685
if (src->fb_address == dest->z_address && src->fb_stride == dest->z_stride && src->fb_format == GE_FORMAT_565) {
686
if (src->colorBindSeq > dest->depthBindSeq) {
687
// Source has newer data than the current buffer, use it.
688
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
689
}
690
} else if (src->z_address == dest->z_address && src->z_stride == dest->z_stride && src->depthBindSeq > dest->depthBindSeq) {
691
sources.push_back(CopySource{ src, RASTER_DEPTH, 0, 0 });
692
} else {
693
// TODO: Do more detailed overlap checks here.
694
}
695
}
696
697
std::sort(sources.begin(), sources.end());
698
699
// TODO: A full copy will overwrite anything else. So we can eliminate
700
// anything that comes before such a copy.
701
702
// For now, let's just do the last thing, if there are multiple.
703
704
// for (auto &source : sources) {
705
if (!sources.empty()) {
706
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
707
708
auto &source = sources.back();
709
if (source.channel == RASTER_DEPTH) {
710
// Good old depth->depth copy.
711
BlitFramebufferDepth(source.vfb, dest);
712
gpuStats.numDepthCopies++;
713
dest->last_frame_depth_updated = gpuStats.numFlips;
714
} else if (source.channel == RASTER_COLOR && draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
715
VirtualFramebuffer *src = source.vfb;
716
if (src->fb_format != GE_FORMAT_565) {
717
WARN_LOG_ONCE(not565, Log::FrameBuf, "fb_format of buffer at %08x not 565 as expected", src->fb_address);
718
}
719
720
// Really hate to do this, but tracking the depth swizzle state across multiple
721
// copies is not easy.
722
Draw2DShader shader = DRAW2D_565_TO_DEPTH;
723
if (PSP_CoreParameter().compat.flags().DeswizzleDepth) {
724
shader = DRAW2D_565_TO_DEPTH_DESWIZZLE;
725
}
726
727
gpuStats.numReinterpretCopies++;
728
src->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
729
dest->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
730
731
// Copying color to depth.
732
BlitUsingRaster(
733
src->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight,
734
dest->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight,
735
false, dest->renderScaleFactor, Get2DPipeline(shader), "565_to_depth");
736
}
737
}
738
739
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
740
}
741
742
// Can't easily dynamically create these strings, we just pass along the pointer.
743
static const char *reinterpretStrings[4][4] = {
744
{
745
"self_reinterpret_565",
746
"reinterpret_565_to_5551",
747
"reinterpret_565_to_4444",
748
"reinterpret_565_to_8888",
749
},
750
{
751
"reinterpret_5551_to_565",
752
"self_reinterpret_5551",
753
"reinterpret_5551_to_4444",
754
"reinterpret_5551_to_8888",
755
},
756
{
757
"reinterpret_4444_to_565",
758
"reinterpret_4444_to_5551",
759
"self_reinterpret_4444",
760
"reinterpret_4444_to_8888",
761
},
762
{
763
"reinterpret_8888_to_565",
764
"reinterpret_8888_to_5551",
765
"reinterpret_8888_to_4444",
766
"self_reinterpret_8888",
767
},
768
};
769
770
// Call this after the target has been bound for rendering. For color, raster is probably always going to win over blits/copies.
771
void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFramebuffer *dst) {
772
if (!useBufferedRendering_) {
773
return;
774
}
775
776
std::vector<CopySource> sources;
777
for (auto src : vfbs_) {
778
// Discard old and equal potential inputs.
779
if (src == dst || src->colorBindSeq < dst->colorBindSeq) {
780
continue;
781
}
782
783
if (src->fb_address == dst->fb_address && src->fb_stride == dst->fb_stride) {
784
// Another render target at the exact same location but gotta be a different format or a different stride, otherwise
785
// it would be the same, and should have been detected in DoSetRenderFrameBuffer.
786
if (src->fb_format != dst->fb_format) {
787
// This will result in reinterpret later, if both formats are 16-bit.
788
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
789
} else {
790
// This shouldn't happen anymore. I think when it happened last, we still had
791
// lax stride checking when video was incoming, and a resize happened causing a duplicate.
792
}
793
} else if (src->fb_stride == dst->fb_stride && src->fb_format == dst->fb_format) {
794
u32 bytesPerPixel = BufferFormatBytesPerPixel(src->fb_format);
795
796
u32 strideInBytes = src->fb_stride * bytesPerPixel; // Same for both src and dest
797
798
u32 srcColorStart = src->fb_address;
799
u32 srcFirstLineEnd = src->fb_address + strideInBytes;
800
u32 srcColorEnd = strideInBytes * src->height;
801
802
u32 dstColorStart = dst->fb_address;
803
u32 dstFirstLineEnd = dst->fb_address + strideInBytes;
804
u32 dstColorEnd = strideInBytes * dst->height;
805
806
// Initially we'll only allow pure horizontal and vertical overlap,
807
// to reduce the risk for false positives. We can allow diagonal overlap too if needed
808
// in the future.
809
810
// Check for potential vertical overlap, like in Juiced 2.
811
int xOffset = 0;
812
int yOffset = 0;
813
814
// TODO: Get rid of the compatibility flag check.
815
if ((dstColorStart - srcColorStart) % strideInBytes == 0
816
&& PSP_CoreParameter().compat.flags().AllowLargeFBTextureOffsets) {
817
// Buffers are aligned.
818
yOffset = ((int)dstColorStart - (int)srcColorStart) / strideInBytes;
819
if (yOffset <= -(int)src->height) {
820
// Not overlapping
821
continue;
822
} else if (yOffset >= dst->height) {
823
// Not overlapping
824
continue;
825
}
826
} else {
827
// Buffers not stride-aligned - ignoring for now.
828
// This is where we'll add the horizontal offset for GoW.
829
continue;
830
}
831
sources.push_back(CopySource{ src, RASTER_COLOR, xOffset, yOffset });
832
} else if (src->fb_address == dst->fb_address && src->FbStrideInBytes() == dst->FbStrideInBytes()) {
833
if (src->fb_stride == dst->fb_stride * 2) {
834
// Reinterpret from 16-bit to 32-bit.
835
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
836
} else if (src->fb_stride * 2 == dst->fb_stride) {
837
// Reinterpret from 32-bit to 16-bit.
838
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
839
} else {
840
// 16-to-16 reinterpret, should have been caught above already.
841
_assert_msg_(false, "Reinterpret: Shouldn't get here");
842
}
843
}
844
}
845
846
std::sort(sources.begin(), sources.end());
847
848
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
849
850
bool tookActions = false;
851
852
// TODO: Only do the latest one.
853
for (const CopySource &source : sources) {
854
VirtualFramebuffer *src = source.vfb;
855
856
// Copy a rectangle from the original to the new buffer.
857
// Yes, we mean to look at src->width/height for the dest rectangle.
858
859
// TODO: Try to bound the blit using gstate_c.vertBounds like depal does.
860
861
int srcWidth = src->width * src->renderScaleFactor;
862
int srcHeight = src->height * src->renderScaleFactor;
863
int dstWidth = src->width * dst->renderScaleFactor;
864
int dstHeight = src->height * dst->renderScaleFactor;
865
866
int dstX1 = -source.xOffset * dst->renderScaleFactor;
867
int dstY1 = -source.yOffset * dst->renderScaleFactor;
868
int dstX2 = dstX1 + dstWidth;
869
int dstY2 = dstY1 + dstHeight;
870
871
if (source.channel == RASTER_COLOR) {
872
Draw2DPipeline *pipeline = nullptr;
873
const char *pass_name = "N/A";
874
float scaleFactorX = 1.0f;
875
if (src->fb_format == dst->fb_format) {
876
gpuStats.numColorCopies++;
877
pipeline = Get2DPipeline(DRAW2D_COPY_COLOR);
878
pass_name = "copy_color";
879
} else {
880
if (PSP_CoreParameter().compat.flags().BlueToAlpha) {
881
WARN_LOG_ONCE(bta, Log::FrameBuf, "WARNING: Reinterpret encountered with BlueToAlpha on");
882
}
883
884
// Reinterpret!
885
WARN_LOG_N_TIMES(reint, 5, Log::FrameBuf, "Reinterpret detected from %08x_%s to %08x_%s",
886
src->fb_address, GeBufferFormatToString(src->fb_format),
887
dst->fb_address, GeBufferFormatToString(dst->fb_format));
888
889
pipeline = GetReinterpretPipeline(src->fb_format, dst->fb_format, &scaleFactorX);
890
dstX1 *= scaleFactorX;
891
dstX2 *= scaleFactorX;
892
893
pass_name = reinterpretStrings[(int)src->fb_format][(int)dst->fb_format];
894
895
gpuStats.numReinterpretCopies++;
896
}
897
898
if (pipeline) {
899
tookActions = true;
900
// OK we have the pipeline, now just do the blit.
901
BlitUsingRaster(src->fbo, 0.0f, 0.0f, srcWidth, srcHeight,
902
dst->fbo, dstX1, dstY1, dstX2, dstY2, false, dst->renderScaleFactor, pipeline, pass_name);
903
}
904
905
if (scaleFactorX == 1.0f && dst->z_address == src->z_address && dst->z_stride == src->z_stride) {
906
// We should also copy the depth buffer in this case!
907
BlitFramebufferDepth(src, dst, true);
908
}
909
}
910
}
911
912
if (currentRenderVfb_ && dst != currentRenderVfb_ && tookActions) {
913
// Will probably just change the name of the current renderpass, since one was started by the reinterpret itself.
914
draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "After Reinterpret");
915
}
916
917
shaderManager_->DirtyLastShader();
918
textureCache_->ForgetLastTexture();
919
}
920
921
Draw2DPipeline *FramebufferManagerCommon::GetReinterpretPipeline(GEBufferFormat from, GEBufferFormat to, float *scaleFactorX) {
922
if (from == to) {
923
*scaleFactorX = 1.0f;
924
return Get2DPipeline(DRAW2D_COPY_COLOR);
925
}
926
927
if (IsBufferFormat16Bit(from) && !IsBufferFormat16Bit(to)) {
928
// We halve the X coordinates in the destination framebuffer.
929
// The shader will collect two pixels worth of input data and merge into one.
930
*scaleFactorX = 0.5f;
931
} else if (!IsBufferFormat16Bit(from) && IsBufferFormat16Bit(to)) {
932
// We double the X coordinates in the destination framebuffer.
933
// The shader will sample and depending on the X coordinate & 1, use the upper or lower bits.
934
*scaleFactorX = 2.0f;
935
} else {
936
*scaleFactorX = 1.0f;
937
}
938
939
Draw2DPipeline *pipeline = reinterpretFromTo_[(int)from][(int)to];
940
if (!pipeline) {
941
pipeline = draw2D_.Create2DPipeline([=](ShaderWriter &shaderWriter) -> Draw2DPipelineInfo {
942
return GenerateReinterpretFragmentShader(shaderWriter, from, to);
943
});
944
reinterpretFromTo_[(int)from][(int)to] = pipeline;
945
}
946
return pipeline;
947
}
948
949
void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) {
950
// Notify the texture cache of both the color and depth buffers.
951
textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED);
952
if (v->fbo) {
953
v->fbo->Release();
954
v->fbo = nullptr;
955
}
956
957
// Wipe some pointers
958
DiscardFramebufferCopy();
959
if (currentRenderVfb_ == v)
960
currentRenderVfb_ = nullptr;
961
if (displayFramebuf_ == v)
962
displayFramebuf_ = nullptr;
963
if (prevDisplayFramebuf_ == v)
964
prevDisplayFramebuf_ = nullptr;
965
if (prevPrevDisplayFramebuf_ == v)
966
prevPrevDisplayFramebuf_ = nullptr;
967
968
delete v;
969
}
970
971
void FramebufferManagerCommon::BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst, bool allowSizeMismatch) {
972
_dbg_assert_(src && dst);
973
_dbg_assert_(src != dst);
974
975
// Check that the depth address is even the same before actually blitting.
976
bool matchingDepthBuffer = src->z_address == dst->z_address && src->z_stride != 0 && dst->z_stride != 0;
977
bool matchingSize = (src->width == dst->width || (src->width == 512 && dst->width == 480) || (src->width == 480 && dst->width == 512)) && src->height == dst->height;
978
if (!matchingDepthBuffer || (!matchingSize && !allowSizeMismatch)) {
979
return;
980
}
981
982
// Copy depth value from the previously bound framebuffer to the current one.
983
bool hasNewerDepth = src->last_frame_depth_render != 0 && src->last_frame_depth_render >= dst->last_frame_depth_updated;
984
if (!src->fbo || !dst->fbo || !useBufferedRendering_ || !hasNewerDepth) {
985
// If depth wasn't updated, then we're at least "two degrees" away from the data.
986
// This is an optimization: it probably doesn't need to be copied in this case.
987
return;
988
}
989
990
bool useCopy = draw_->GetDeviceCaps().framebufferSeparateDepthCopySupported || (!draw_->GetDeviceCaps().framebufferDepthBlitSupported && draw_->GetDeviceCaps().framebufferCopySupported);
991
bool useBlit = draw_->GetDeviceCaps().framebufferDepthBlitSupported;
992
993
bool useRaster = draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported && draw_->GetDeviceCaps().textureDepthSupported;
994
995
if (src->fbo->MultiSampleLevel() > 0 && dst->fbo->MultiSampleLevel() > 0) {
996
// If multisampling, we want to copy depth properly so we get all the samples, to avoid aliased edges.
997
// Can be seen in the fire in Jeanne D'arc, for example.
998
if (useRaster && useCopy) {
999
useRaster = false;
1000
}
1001
}
1002
1003
int w = std::min(src->renderWidth, dst->renderWidth);
1004
int h = std::min(src->renderHeight, dst->renderHeight);
1005
1006
// Some GPUs can copy depth but only if stencil gets to come along for the ride. We only want to use this if there is no blit functionality.
1007
if (useRaster) {
1008
BlitUsingRaster(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, false, dst->renderScaleFactor, Get2DPipeline(Draw2DShader::DRAW2D_COPY_DEPTH), "BlitDepthRaster");
1009
} else if (useCopy) {
1010
draw_->CopyFramebufferImage(src->fbo, 0, 0, 0, 0, dst->fbo, 0, 0, 0, 0, w, h, 1, Draw::Aspect::DEPTH_BIT, "CopyFramebufferDepth");
1011
RebindFramebuffer("After BlitFramebufferDepth");
1012
} else if (useBlit) {
1013
// We'll accept whether we get a separate depth blit or not...
1014
draw_->BlitFramebuffer(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, Draw::Aspect::DEPTH_BIT, Draw::FB_BLIT_NEAREST, "BlitFramebufferDepth");
1015
RebindFramebuffer("After BlitFramebufferDepth");
1016
}
1017
1018
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
1019
}
1020
1021
void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) {
1022
if (!useBufferedRendering_) {
1023
// Let's ignore rendering to targets that have not (yet) been displayed.
1024
gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
1025
} else if (currentRenderVfb_) {
1026
DownloadFramebufferOnSwitch(currentRenderVfb_);
1027
}
1028
1029
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
1030
1031
NotifyRenderFramebufferUpdated(vfb);
1032
}
1033
1034
void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb) {
1035
if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
1036
gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
1037
}
1038
if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) {
1039
gstate_c.Dirty(DIRTY_PROJMATRIX);
1040
gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX);
1041
}
1042
}
1043
1044
void FramebufferManagerCommon::DownloadFramebufferOnSwitch(VirtualFramebuffer *vfb) {
1045
if (vfb && vfb->safeWidth > 0 && vfb->safeHeight > 0 && !(vfb->usageFlags & FB_USAGE_FIRST_FRAME_SAVED) && !vfb->memoryUpdated) {
1046
// Some games will draw to some memory once, and use it as a render-to-texture later.
1047
// To support this, we save the first frame to memory when we have a safe w/h.
1048
// Saving each frame would be slow.
1049
1050
// TODO: This type of download could be made async, for less stutter on framebuffer creation.
1051
if (GetSkipGPUReadbackMode() == SkipGPUReadbackMode::NO_SKIP && !PSP_CoreParameter().compat.flags().DisableFirstFrameReadback) {
1052
ReadFramebufferToMemory(vfb, 0, 0, vfb->safeWidth, vfb->safeHeight, RASTER_COLOR, Draw::ReadbackMode::BLOCK);
1053
vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR;
1054
vfb->safeWidth = 0;
1055
vfb->safeHeight = 0;
1056
}
1057
}
1058
}
1059
1060
bool FramebufferManagerCommon::ShouldDownloadFramebufferColor(const VirtualFramebuffer *vfb) {
1061
// Dangan Ronpa hack
1062
return PSP_CoreParameter().compat.flags().Force04154000Download && vfb->fb_address == 0x04154000;
1063
}
1064
1065
bool FramebufferManagerCommon::ShouldDownloadFramebufferDepth(const VirtualFramebuffer *vfb) {
1066
// Download depth buffer if compat flag set (previously used for Syphon Filter lens flares, now used for nothing)
1067
if (!PSP_CoreParameter().compat.flags().ReadbackDepth || GetSkipGPUReadbackMode() != SkipGPUReadbackMode::NO_SKIP) {
1068
return false;
1069
}
1070
return (vfb->usageFlags & FB_USAGE_RENDER_DEPTH) != 0 && vfb->width >= 480 && vfb->height >= 272;
1071
}
1072
1073
void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth) {
1074
if (prevVfb) {
1075
if (ShouldDownloadFramebufferColor(prevVfb) && !prevVfb->memoryUpdated) {
1076
// NOTE: This path is ONLY for the Dangan Ronpa hack, see ShouldDownloadFramebufferColor
1077
ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RASTER_COLOR, Draw::ReadbackMode::OLD_DATA_OK);
1078
prevVfb->usageFlags = (prevVfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR;
1079
} else {
1080
DownloadFramebufferOnSwitch(prevVfb);
1081
}
1082
1083
if (ShouldDownloadFramebufferDepth(prevVfb)) {
1084
ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RasterChannel::RASTER_DEPTH, Draw::ReadbackMode::BLOCK);
1085
}
1086
}
1087
1088
textureCache_->ForgetLastTexture();
1089
shaderManager_->DirtyLastShader();
1090
1091
if (useBufferedRendering_) {
1092
if (vfb->fbo) {
1093
shaderManager_->DirtyLastShader();
1094
Draw::RPAction depthAction = Draw::RPAction::KEEP;
1095
float clearDepth = 0.0f;
1096
if (vfb->usageFlags & FB_USAGE_INVALIDATE_DEPTH) {
1097
depthAction = Draw::RPAction::CLEAR;
1098
clearDepth = GetDepthScaleFactors(gstate_c.UseFlags()).Offset();
1099
vfb->usageFlags &= ~FB_USAGE_INVALIDATE_DEPTH;
1100
}
1101
draw_->BindFramebufferAsRenderTarget(vfb->fbo, {Draw::RPAction::KEEP, depthAction, Draw::RPAction::KEEP, 0, clearDepth}, "FBSwitch");
1102
} else {
1103
// This should only happen very briefly when toggling useBufferedRendering_.
1104
ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
1105
}
1106
} else {
1107
if (vfb->fbo) {
1108
// This should only happen very briefly when toggling useBufferedRendering_.
1109
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_DESTROYED);
1110
vfb->fbo->Release();
1111
vfb->fbo = nullptr;
1112
}
1113
1114
// Let's ignore rendering to targets that have not (yet) been displayed.
1115
if (vfb->usageFlags & FB_USAGE_DISPLAYED_FRAMEBUFFER) {
1116
gstate_c.skipDrawReason &= ~SKIPDRAW_NON_DISPLAYED_FB;
1117
} else {
1118
gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
1119
}
1120
}
1121
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED);
1122
1123
NotifyRenderFramebufferUpdated(vfb);
1124
}
1125
1126
void FramebufferManagerCommon::PerformWriteFormattedFromMemory(u32 addr, int size, int stride, GEBufferFormat fmt) {
1127
// Note: UpdateFromMemory() is still called later.
1128
// This is a special case where we have extra information prior to the invalidation,
1129
// because it's called from sceJpeg, sceMpeg, scePsmf etc.
1130
1131
// TODO: Could possibly be at an offset...
1132
// Also, stride needs better handling.
1133
VirtualFramebuffer *vfb = ResolveVFB(addr, stride, fmt);
1134
if (vfb) {
1135
// Let's count this as a "render". This will also force us to use the correct format.
1136
vfb->last_frame_render = gpuStats.numFlips;
1137
vfb->colorBindSeq = GetBindSeqCount();
1138
1139
if (vfb->fb_stride < stride) {
1140
INFO_LOG(Log::FrameBuf, "Changing stride for %08x from %d to %d", addr, vfb->fb_stride, stride);
1141
const int bpp = BufferFormatBytesPerPixel(fmt);
1142
ResizeFramebufFBO(vfb, stride, size / (bpp * stride));
1143
// Resizing may change the viewport/etc.
1144
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
1145
vfb->fb_stride = stride;
1146
// This might be a bit wider than necessary, but we'll redetect on next render.
1147
vfb->width = stride;
1148
}
1149
}
1150
}
1151
1152
void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size) {
1153
// Take off the uncached flag from the address. Not to be confused with the start of VRAM.
1154
addr &= 0x3FFFFFFF;
1155
if (Memory::IsVRAMAddress(addr))
1156
addr &= 0x041FFFFF;
1157
// TODO: Could go through all FBOs, but probably not important?
1158
// TODO: Could also check for inner changes, but video is most important.
1159
// TODO: This shouldn't care if it's a display framebuf or not, should work exactly the same.
1160
bool isDisplayBuf = addr == CurrentDisplayFramebufAddr() || addr == PrevDisplayFramebufAddr();
1161
// TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help.
1162
if (!Memory::IsValidAddress(displayFramebufPtr_))
1163
return;
1164
1165
for (size_t i = 0; i < vfbs_.size(); ++i) {
1166
VirtualFramebuffer *vfb = vfbs_[i];
1167
if (vfb->fb_address == addr) {
1168
FlushBeforeCopy();
1169
1170
if (useBufferedRendering_ && vfb->fbo) {
1171
GEBufferFormat fmt = vfb->fb_format;
1172
if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) {
1173
// If we're not rendering to it, format may be wrong. Use displayFormat_ instead.
1174
// TODO: This doesn't seem quite right anymore.
1175
fmt = displayFormat_;
1176
}
1177
DrawPixels(vfb, 0, 0, Memory::GetPointerUnchecked(addr), fmt, vfb->fb_stride, vfb->width, vfb->height, RASTER_COLOR, "UpdateFromMemory_DrawPixels");
1178
SetColorUpdated(vfb, gstate_c.skipDrawReason);
1179
} else {
1180
INFO_LOG(Log::FrameBuf, "Invalidating FBO for %08x (%dx%d %s)", vfb->fb_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format));
1181
DestroyFramebuf(vfb);
1182
vfbs_.erase(vfbs_.begin() + i--);
1183
}
1184
}
1185
}
1186
1187
RebindFramebuffer("RebindFramebuffer - UpdateFromMemory");
1188
1189
// TODO: Necessary?
1190
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
1191
}
1192
1193
void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, RasterChannel channel, const char *tag) {
1194
textureCache_->ForgetLastTexture();
1195
shaderManager_->DirtyLastShader();
1196
float u0 = 0.0f, u1 = 1.0f;
1197
float v0 = 0.0f, v1 = 1.0f;
1198
1199
DrawTextureFlags flags;
1200
if (useBufferedRendering_ && vfb) {
1201
_dbg_assert_(vfb->fbo);
1202
if (vfb->fbo) {
1203
if (channel == RASTER_DEPTH || PSP_CoreParameter().compat.flags().NearestFilteringOnFramebufferCreate) {
1204
flags = DRAWTEX_NEAREST;
1205
} else {
1206
flags = DRAWTEX_LINEAR;
1207
}
1208
draw_->BindFramebufferAsRenderTarget(vfb->fbo, {Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP}, tag);
1209
SetViewport2D(0, 0, vfb->renderWidth, vfb->renderHeight);
1210
draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight);
1211
}
1212
} else {
1213
// The hacky way to get the display layout config (normally we pass it down, but it would require a lot of plumbing here).
1214
// This is only for non-buffered rendering.
1215
auto config = g_Config.GetDisplayLayoutConfig(g_display.GetDeviceOrientation());
1216
// Here config is valid.
1217
_dbg_assert_(channel == RASTER_COLOR);
1218
// We are drawing directly to the back buffer so need to flip.
1219
// Should more of this be handled by the presentation engine?
1220
if (needBackBufferYSwap_)
1221
std::swap(v0, v1);
1222
flags = config.iDisplayFilter == SCALE_LINEAR ? DRAWTEX_LINEAR : DRAWTEX_NEAREST;
1223
flags = flags | DRAWTEX_TO_BACKBUFFER;
1224
FRect frame = GetScreenFrame(config.bIgnoreScreenInsets, pixelWidth_, pixelHeight_);
1225
FRect rc;
1226
CalculateDisplayOutputRect(config, &rc, 480.0f, 272.0f, frame, ROTATION_LOCKED_HORIZONTAL);
1227
SetViewport2D(rc.x, rc.y, rc.w, rc.h);
1228
draw_->SetScissorRect(0, 0, pixelWidth_, pixelHeight_);
1229
}
1230
1231
if (channel == RASTER_DEPTH) {
1232
_dbg_assert_(srcPixelFormat == GE_FORMAT_DEPTH16);
1233
flags = flags | DRAWTEX_DEPTH;
1234
if (vfb)
1235
vfb->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
1236
}
1237
1238
Draw::Texture *pixelsTex = MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height);
1239
if (pixelsTex) {
1240
draw_->BindTextures(0, 1, &pixelsTex, Draw::TextureBindFlags::VULKAN_BIND_ARRAY);
1241
1242
// TODO: Replace with draw2D_.Blit() directly.
1243
DrawActiveTexture(dstX, dstY, width, height,
1244
vfb ? vfb->bufferWidth : g_display.pixel_xres,
1245
vfb ? vfb->bufferHeight : g_display.pixel_yres,
1246
u0, v0, u1, v1, ROTATION_LOCKED_HORIZONTAL, flags);
1247
1248
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
1249
1250
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
1251
}
1252
}
1253
1254
bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags, int layer) {
1255
if (!framebuffer->fbo || !useBufferedRendering_) {
1256
draw_->BindTexture(stage, nullptr);
1257
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
1258
return false;
1259
}
1260
1261
// currentRenderVfb_ will always be set when this is called, except from the GE debugger.
1262
// Let's just not bother with the copy in that case.
1263
bool skipCopy = !(flags & BINDFBCOLOR_MAY_COPY);
1264
1265
// Currently rendering to this framebuffer. Need to make a copy.
1266
if (!skipCopy && framebuffer == currentRenderVfb_) {
1267
// Self-texturing, need a copy currently (some backends can potentially support it though).
1268
WARN_LOG_ONCE(selfTextureCopy, Log::G3D, "Attempting to texture from current render target (src=%08x / target=%08x / flags=%d), making a copy", framebuffer->fb_address, currentRenderVfb_->fb_address, flags);
1269
// TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size.
1270
if (currentFramebufferCopy_ && (flags & BINDFBCOLOR_UNCACHED) == 0) {
1271
// We have a copy already that hasn't been invalidated, let's keep using it.
1272
draw_->BindFramebufferAsTexture(currentFramebufferCopy_, stage, Draw::Aspect::COLOR_BIT, layer);
1273
return true;
1274
}
1275
1276
// There's a special case we can handle here, where the game is texturing from the same pixels being read, in order to
1277
// implement a DST*DST blending function, which the PSP can't do. However on the PC we can absolutely do this!
1278
// TODO: Add more checks here.
1279
if (PSP_CoreParameter().compat.flags().DetectDestBlendSquared &&
1280
gstate.isAlphaBlendEnabled() && gstate.getBlendEq() == GE_BLENDMODE_MUL_AND_ADD && gstate.getBlendFuncA() == GE_SRCBLEND_DSTCOLOR && gstate.getBlendFuncB() == GE_DSTBLEND_FIXB && gstate.getFixB() == 0x0 &&
1281
gstate.getMaterialAmbientRGBA() == 0xFFFFFFFF) {
1282
// This is the pure DST*DST case, the SRC color is ignored.
1283
// Used by Brave Story - New Traveller. This assumes that texture coordinates are set to match the framebuffer pixels - and to
1284
// be able to make that assumption reasonably safely we use a compat flag to restrict it to that game.
1285
// We can just override the blend mode. Let's set a state variable.
1286
// We also just leave the last texture bound, ideally we should bind a placeholder here.
1287
gstate_c.dstSquared = true;
1288
return true;
1289
}
1290
1291
Draw::Framebuffer *renderCopy = GetTempFBO(TempFBO::COPY, framebuffer->renderWidth, framebuffer->renderHeight);
1292
if (renderCopy) {
1293
VirtualFramebuffer copyInfo = *framebuffer;
1294
copyInfo.fbo = renderCopy;
1295
1296
bool partial = false;
1297
CopyFramebufferForColorTexture(&copyInfo, framebuffer, flags, layer, &partial);
1298
RebindFramebuffer("After BindFramebufferAsColorTexture");
1299
draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::Aspect::COLOR_BIT, layer);
1300
1301
// Only cache the copy if it wasn't a partial copy.
1302
// TODO: Improve on this.
1303
if (!partial && (flags & BINDFBCOLOR_UNCACHED) == 0) {
1304
currentFramebufferCopy_ = renderCopy;
1305
}
1306
gpuStats.numCopiesForSelfTex++;
1307
} else {
1308
// Failed to get temp FBO? Weird.
1309
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::Aspect::COLOR_BIT, layer);
1310
}
1311
return true;
1312
} else if (framebuffer != currentRenderVfb_ || (flags & BINDFBCOLOR_FORCE_SELF) != 0) {
1313
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::Aspect::COLOR_BIT, layer);
1314
return true;
1315
} else {
1316
// Here it's an error because for some reason skipCopy is true. That shouldn't really happen.
1317
ERROR_LOG_REPORT_ONCE(selfTextureFail, Log::G3D, "Attempting to texture from target (src=%08x / target=%08x / flags=%d)", framebuffer->fb_address, currentRenderVfb_->fb_address, flags);
1318
// To do this safely in Vulkan, we need to use input attachments.
1319
// Actually if the texture region and render regions don't overlap, this is safe, but we need
1320
// to transition to GENERAL image layout which will take some trickery.
1321
// Badness on D3D11 to bind the currently rendered-to framebuffer as a texture.
1322
draw_->BindTexture(stage, nullptr);
1323
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
1324
return false;
1325
}
1326
}
1327
1328
void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer, bool *partial) {
1329
int x = 0;
1330
int y = 0;
1331
int w = src->drawnWidth;
1332
int h = src->drawnHeight;
1333
1334
*partial = false;
1335
1336
// If max is not > min, we probably could not detect it. Skip.
1337
// See the vertex decoder, where this is updated.
1338
// TODO: We're currently not hitting this path in Dante. See #17032
1339
if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) == BINDFBCOLOR_MAY_COPY_WITH_UV && gstate_c.vertBounds.maxU > gstate_c.vertBounds.minU) {
1340
x = std::max(gstate_c.vertBounds.minU, (u16)0);
1341
y = std::max(gstate_c.vertBounds.minV, (u16)0);
1342
w = std::min(gstate_c.vertBounds.maxU, src->drawnWidth) - x;
1343
h = std::min(gstate_c.vertBounds.maxV, src->drawnHeight) - y;
1344
1345
// If we bound a framebuffer, apply the byte offset as pixels to the copy too.
1346
if (flags & BINDFBCOLOR_APPLY_TEX_OFFSET) {
1347
x += gstate_c.curTextureXOffset;
1348
y += gstate_c.curTextureYOffset;
1349
}
1350
1351
// We'll have to reapply these next time since we cropped to UV.
1352
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
1353
}
1354
1355
if (x < src->drawnWidth && y < src->drawnHeight && w > 0 && h > 0) {
1356
if (x != 0 || y != 0 || w < src->drawnWidth || h < src->drawnHeight) {
1357
*partial = true;
1358
}
1359
BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "CopyFBForColorTexture");
1360
}
1361
}
1362
1363
Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
1364
Draw::DataFormat depthFormat = Draw::DataFormat::UNDEFINED;
1365
1366
int bpp = BufferFormatBytesPerPixel(srcPixelFormat);
1367
int srcStrideInBytes = srcStride * bpp;
1368
int widthInBytes = width * bpp;
1369
1370
// Compute hash of contents.
1371
uint64_t imageHash;
1372
if (widthInBytes == srcStrideInBytes) {
1373
imageHash = XXH3_64bits(srcPixels, widthInBytes * height);
1374
} else {
1375
XXH3_state_t *hashState = XXH3_createState();
1376
XXH3_64bits_reset(hashState);
1377
for (int y = 0; y < height; y++) {
1378
XXH3_64bits_update(hashState, srcPixels + srcStrideInBytes * y, widthInBytes);
1379
}
1380
imageHash = XXH3_64bits_digest(hashState);
1381
XXH3_freeState(hashState);
1382
}
1383
1384
Draw::DataFormat texFormat = preferredPixelsFormat_;
1385
1386
if (srcPixelFormat == GE_FORMAT_DEPTH16) {
1387
if ((draw_->GetDataFormatSupport(Draw::DataFormat::R16_UNORM) & Draw::FMT_TEXTURE) != 0) {
1388
texFormat = Draw::DataFormat::R16_UNORM;
1389
} else if ((draw_->GetDataFormatSupport(Draw::DataFormat::R8_UNORM) & Draw::FMT_TEXTURE) != 0) {
1390
// This could be improved by using specific draw shaders to pack full precision in two channels.
1391
// However, not really worth the trouble until we find a game that requires it.
1392
texFormat = Draw::DataFormat::R8_UNORM;
1393
} else {
1394
// No usable single channel format. Can't be bothered.
1395
return nullptr;
1396
}
1397
} else if (srcPixelFormat == GE_FORMAT_565) {
1398
// Check for supported matching formats.
1399
// This mainly benefits the redundant copies in God of War on low-end platforms.
1400
if ((draw_->GetDataFormatSupport(Draw::DataFormat::B5G6R5_UNORM_PACK16) & Draw::FMT_TEXTURE) != 0) {
1401
texFormat = Draw::DataFormat::B5G6R5_UNORM_PACK16;
1402
} else if ((draw_->GetDataFormatSupport(Draw::DataFormat::R5G6B5_UNORM_PACK16) & Draw::FMT_TEXTURE) != 0) {
1403
texFormat = Draw::DataFormat::R5G6B5_UNORM_PACK16;
1404
}
1405
}
1406
1407
// TODO: We can just change the texture format and flip some bits around instead of this.
1408
// Could share code with the texture cache perhaps.
1409
auto generateTexture = [&](uint8_t *data, const uint8_t *initData, uint32_t w, uint32_t h, uint32_t d, uint32_t byteStride, uint32_t sliceByteStride) {
1410
for (int y = 0; y < height; y++) {
1411
const u16_le *src16 = (const u16_le *)srcPixels + srcStride * y;
1412
const u32_le *src32 = (const u32_le *)srcPixels + srcStride * y;
1413
u32 *dst = (u32 *)(data + byteStride * y);
1414
u16 *dst16 = (u16 *)(data + byteStride * y);
1415
u8 *dst8 = (u8 *)(data + byteStride * y);
1416
switch (srcPixelFormat) {
1417
case GE_FORMAT_565:
1418
if (texFormat == Draw::DataFormat::B5G6R5_UNORM_PACK16) {
1419
memcpy(dst16, src16, w * sizeof(uint16_t));
1420
} else if (texFormat == Draw::DataFormat::R5G6B5_UNORM_PACK16) {
1421
ConvertRGB565ToBGR565(dst16, src16, width); // Fast!
1422
} else if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM) {
1423
ConvertRGB565ToBGRA8888(dst, src16, width);
1424
} else {
1425
ConvertRGB565ToRGBA8888(dst, src16, width);
1426
}
1427
break;
1428
1429
case GE_FORMAT_5551:
1430
if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM)
1431
ConvertRGBA5551ToBGRA8888(dst, src16, width);
1432
else
1433
ConvertRGBA5551ToRGBA8888(dst, src16, width);
1434
break;
1435
1436
case GE_FORMAT_4444:
1437
if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM)
1438
ConvertRGBA4444ToBGRA8888(dst, src16, width);
1439
else
1440
ConvertRGBA4444ToRGBA8888(dst, src16, width);
1441
break;
1442
1443
case GE_FORMAT_8888:
1444
if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM)
1445
ConvertRGBA8888ToBGRA8888(dst, src32, width);
1446
// This means use original pointer as-is. May avoid or optimize a copy.
1447
else if (srcStride == width)
1448
return false;
1449
else
1450
memcpy(dst, src32, width * 4);
1451
break;
1452
1453
case GE_FORMAT_DEPTH16:
1454
// TODO: Must take the depth range into account, unless it's already 0-1.
1455
// TODO: Depending on the color buffer format used with this depth buffer, we need
1456
// to do one of two different swizzle operations. However, for the only use of this so far,
1457
// the Burnout lens flare trickery, swizzle doesn't matter since it's just a 0, 7fff, 0, 7fff pattern
1458
// which comes out the same.
1459
if (texFormat == Draw::DataFormat::R16_UNORM) {
1460
// We just use this format straight.
1461
memcpy(dst16, src16, w * 2);
1462
} else if (texFormat == Draw::DataFormat::R8_UNORM) {
1463
// We fall back to R8_UNORM. Precision is enough for most cases of depth clearing and initialization we've seen,
1464
// but hardly ideal.
1465
for (int i = 0; i < width; i++) {
1466
dst8[i] = src16[i] >> 8;
1467
}
1468
}
1469
break;
1470
1471
case GE_FORMAT_INVALID:
1472
case GE_FORMAT_CLUT8:
1473
// Bad
1474
break;
1475
}
1476
}
1477
return true;
1478
};
1479
1480
int frameNumber = draw_->GetFrameCount();
1481
1482
// First look for an exact match (including contents hash) that we can re-use.
1483
for (auto &iter : drawPixelsCache_) {
1484
if (iter.contentsHash == imageHash && iter.tex->Width() == width && iter.tex->Height() == height && iter.tex->Format() == texFormat) {
1485
iter.frameNumber = frameNumber;
1486
gpuStats.numCachedUploads++;
1487
return iter.tex;
1488
}
1489
}
1490
1491
// Then, look for an alternative one that's not been used recently that we can overwrite.
1492
for (auto &iter : drawPixelsCache_) {
1493
if (iter.frameNumber >= frameNumber - 3 || iter.tex->Width() != width || iter.tex->Height() != height || iter.tex->Format() != texFormat) {
1494
continue;
1495
}
1496
1497
// OK, current one seems good, let's use it (and mark it used).
1498
gpuStats.numUploads++;
1499
draw_->UpdateTextureLevels(iter.tex, &srcPixels, generateTexture, 1);
1500
// NOTE: numFlips is no good - this is called every frame when paused sometimes!
1501
iter.frameNumber = frameNumber;
1502
// We need to update the hash for future matching.
1503
iter.contentsHash = imageHash;
1504
return iter.tex;
1505
}
1506
1507
// Note: For depth, we create an R16_UNORM texture, that'll be just fine for uploading depth through a shader,
1508
// and likely more efficient.
1509
Draw::TextureDesc desc{
1510
Draw::TextureType::LINEAR2D,
1511
texFormat,
1512
width,
1513
height,
1514
1,
1515
1,
1516
false,
1517
Draw::TextureSwizzle::DEFAULT,
1518
"DrawPixels",
1519
{ (uint8_t *)srcPixels },
1520
generateTexture,
1521
};
1522
1523
// Hot Shots Golf (#12355) does tons of these in a frame in some situations! So creating textures
1524
// better be fast. So does God of War, a lot of the time, a bit unclear what it's doing.
1525
Draw::Texture *tex = draw_->CreateTexture(desc);
1526
if (!tex) {
1527
ERROR_LOG(Log::G3D, "Failed to create DrawPixels texture");
1528
}
1529
// We don't need to count here, already counted by numUploads by the caller.
1530
1531
// INFO_LOG(Log::G3D, "Creating drawPixelsCache texture: %dx%d", tex->Width(), tex->Height());
1532
1533
DrawPixelsEntry entry{ tex, imageHash, frameNumber };
1534
drawPixelsCache_.push_back(entry);
1535
gpuStats.numUploads++;
1536
return tex;
1537
}
1538
1539
bool FramebufferManagerCommon::DrawFramebufferToOutput(const DisplayLayoutConfig &config, const u8 *srcPixels, int srcStride, GEBufferFormat srcPixelFormat) {
1540
textureCache_->ForgetLastTexture();
1541
shaderManager_->DirtyLastShader();
1542
1543
float u0 = 0.0f, u1 = 480.0f / 512.0f;
1544
float v0 = 0.0f, v1 = 1.0f;
1545
Draw::Texture *pixelsTex = MakePixelTexture(srcPixels, srcPixelFormat, srcStride, 512, 272);
1546
if (!pixelsTex)
1547
return false;
1548
1549
int uvRotation = useBufferedRendering_ ? config.iInternalScreenRotation : ROTATION_LOCKED_HORIZONTAL;
1550
OutputFlags flags = config.iDisplayFilter == SCALE_LINEAR ? OutputFlags::LINEAR : OutputFlags::NEAREST;
1551
if (needBackBufferYSwap_) {
1552
flags |= OutputFlags::BACKBUFFER_FLIPPED;
1553
}
1554
// CopyToOutput reverses these, probably to match "up".
1555
if (GetGPUBackend() == GPUBackend::DIRECT3D11) {
1556
flags |= OutputFlags::POSITION_FLIPPED;
1557
}
1558
1559
presentation_->UpdateUniforms(textureCache_->VideoIsPlaying());
1560
presentation_->SourceTexture(pixelsTex, 512, 272);
1561
presentation_->CopyToOutput(config, flags, uvRotation, u0, v0, u1, v1);
1562
1563
// PresentationCommon sets all kinds of state, we can't rely on anything.
1564
gstate_c.Dirty(DIRTY_ALL);
1565
1566
DiscardFramebufferCopy();
1567
currentRenderVfb_ = nullptr;
1568
1569
return true;
1570
}
1571
1572
void FramebufferManagerCommon::SetViewport2D(int x, int y, int w, int h) {
1573
Draw::Viewport viewport{ (float)x, (float)y, (float)w, (float)h, 0.0f, 1.0f };
1574
draw_->SetViewport(viewport);
1575
}
1576
1577
void FramebufferManagerCommon::CopyDisplayToOutput(const DisplayLayoutConfig &config, bool reallyDirty) {
1578
DownloadFramebufferOnSwitch(currentRenderVfb_);
1579
shaderManager_->DirtyLastShader();
1580
1581
if (displayFramebufPtr_ == 0) {
1582
if (GetUIState() != UISTATE_PAUSEMENU) {
1583
if (Core_IsStepping())
1584
VERBOSE_LOG(Log::FrameBuf, "Display disabled, displaying only black");
1585
else
1586
DEBUG_LOG(Log::FrameBuf, "Display disabled, displaying only black");
1587
}
1588
// No framebuffer to display! Clear to black.
1589
if (useBufferedRendering_) {
1590
draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "CopyDisplayToOutput");
1591
}
1592
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
1593
presentation_->NotifyPresent();
1594
return;
1595
}
1596
1597
u32 offsetX = 0;
1598
u32 offsetY = 0;
1599
1600
// If it's not really dirty, we're probably frameskipping. Use the last working one.
1601
u32 fbaddr = reallyDirty ? displayFramebufPtr_ : prevDisplayFramebufPtr_;
1602
prevDisplayFramebufPtr_ = fbaddr;
1603
1604
VirtualFramebuffer *vfb = ResolveVFB(fbaddr, displayStride_, displayFormat_);
1605
if (!vfb) {
1606
// Let's search for a framebuf within this range. Note that we also look for
1607
// "framebuffers" sitting in RAM (created from block transfer or similar) so we only take off the kernel
1608
// and uncached bits of the address when comparing.
1609
const u32 addr = fbaddr;
1610
for (auto v : vfbs_) {
1611
const u32 v_addr = v->fb_address;
1612
const u32 v_size = v->BufferByteSize(RASTER_COLOR);
1613
1614
if (v->fb_format != displayFormat_ || v->fb_stride != displayStride_) {
1615
// Displaying a buffer of the wrong format or stride is nonsense, ignore it.
1616
continue;
1617
}
1618
1619
if (addr >= v_addr && addr < v_addr + v_size) {
1620
const u32 dstBpp = BufferFormatBytesPerPixel(v->fb_format);
1621
const u32 v_offsetX = ((addr - v_addr) / dstBpp) % v->fb_stride;
1622
const u32 v_offsetY = ((addr - v_addr) / dstBpp) / v->fb_stride;
1623
// We have enough space there for the display, right?
1624
if (v_offsetX + 480 > (u32)v->fb_stride || v->bufferHeight < v_offsetY + 272) {
1625
continue;
1626
}
1627
// Check for the closest one.
1628
if (offsetY == 0 || offsetY > v_offsetY) {
1629
offsetX = v_offsetX;
1630
offsetY = v_offsetY;
1631
vfb = v;
1632
}
1633
}
1634
}
1635
1636
if (vfb) {
1637
// Okay, we found one above.
1638
// Log should be "Displaying from framebuf" but not worth changing the report.
1639
DEBUG_LOG(Log::FrameBuf, "Rendering from framebuf with offset %08x -> %08x+%dx%d", addr, vfb->fb_address, offsetX, offsetY);
1640
}
1641
}
1642
1643
// Reject too-tiny framebuffers to display (Godfather, see issue #16915).
1644
if (vfb && vfb->height < 64) {
1645
vfb = nullptr;
1646
}
1647
1648
if (!vfb) {
1649
if (Memory::IsValidAddress(fbaddr)) {
1650
// The game is displaying something directly from RAM. In GTA, it's decoded video.
1651
// If successful, this effectively calls presentation_->NotifyPresent();
1652
if (!DrawFramebufferToOutput(config, Memory::GetPointerUnchecked(fbaddr), displayStride_, displayFormat_)) {
1653
if (useBufferedRendering_) {
1654
// Bind and clear the backbuffer. This should be the first time during the frame that it's bound.
1655
draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "CopyDisplayToOutput_DrawError");
1656
}
1657
presentation_->NotifyPresent();
1658
}
1659
return;
1660
} else {
1661
DEBUG_LOG(Log::FrameBuf, "Found no FBO to display! displayFBPtr = %08x", fbaddr);
1662
// No framebuffer to display! Clear to black.
1663
if (useBufferedRendering_) {
1664
// Bind and clear the backbuffer. This should be the first time during the frame that it's bound.
1665
draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "CopyDisplayToOutput_NoFBO");
1666
} // For non-buffered rendering, every frame is cleared anyway.
1667
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
1668
presentation_->NotifyPresent();
1669
return;
1670
}
1671
}
1672
1673
vfb->usageFlags |= FB_USAGE_DISPLAYED_FRAMEBUFFER;
1674
vfb->last_frame_displayed = gpuStats.numFlips;
1675
vfb->dirtyAfterDisplay = false;
1676
vfb->reallyDirtyAfterDisplay = false;
1677
1678
if (prevDisplayFramebuf_ != displayFramebuf_) {
1679
prevPrevDisplayFramebuf_ = prevDisplayFramebuf_;
1680
}
1681
if (displayFramebuf_ != vfb) {
1682
prevDisplayFramebuf_ = displayFramebuf_;
1683
}
1684
displayFramebuf_ = vfb;
1685
1686
if (vfb->fbo) {
1687
if (GetUIState() != UISTATE_PAUSEMENU) {
1688
if (Core_IsStepping())
1689
VERBOSE_LOG(Log::FrameBuf, "Displaying FBO %08x", vfb->fb_address);
1690
else
1691
DEBUG_LOG(Log::FrameBuf, "Displaying FBO %08x", vfb->fb_address);
1692
}
1693
1694
float u0 = offsetX / (float)vfb->bufferWidth;
1695
float v0 = offsetY / (float)vfb->bufferHeight;
1696
float u1 = (480.0f + offsetX) / (float)vfb->bufferWidth;
1697
float v1 = (272.0f + offsetY) / (float)vfb->bufferHeight;
1698
1699
//clip the VR framebuffer to keep the aspect ratio
1700
if (IsVREnabled() && !IsFlatVRGame() && !IsGameVRScene()) {
1701
float aspect = 272.0f / 480.0f * (IsImmersiveVRMode() ? 2.0f : 1.0f);
1702
float clipY = 272.0f * (1.0f - aspect) / 2.0f;
1703
v0 = (clipY + offsetY) / (float)vfb->bufferHeight;
1704
v1 = (272.0f - clipY + offsetY) / (float)vfb->bufferHeight;
1705
1706
//zoom inside
1707
float zoom = IsImmersiveVRMode() ? 0.4f : 0.1f;
1708
u0 += zoom / aspect;
1709
u1 -= zoom / aspect;
1710
v0 += zoom;
1711
v1 -= zoom;
1712
}
1713
1714
textureCache_->ForgetLastTexture();
1715
1716
int uvRotation = useBufferedRendering_ ? config.iInternalScreenRotation : ROTATION_LOCKED_HORIZONTAL;
1717
OutputFlags flags = config.iDisplayFilter == SCALE_LINEAR ? OutputFlags::LINEAR : OutputFlags::NEAREST;
1718
if (needBackBufferYSwap_) {
1719
flags |= OutputFlags::BACKBUFFER_FLIPPED;
1720
}
1721
// DrawActiveTexture reverses these, probably to match "up".
1722
if (GetGPUBackend() == GPUBackend::DIRECT3D11) {
1723
flags |= OutputFlags::POSITION_FLIPPED;
1724
}
1725
1726
int actualWidth = (vfb->bufferWidth * vfb->renderWidth) / vfb->width;
1727
int actualHeight = (vfb->bufferHeight * vfb->renderHeight) / vfb->height;
1728
presentation_->UpdateUniforms(textureCache_->VideoIsPlaying());
1729
presentation_->SourceFramebuffer(vfb->fbo, actualWidth, actualHeight);
1730
presentation_->CopyToOutput(config, flags, uvRotation, u0, v0, u1, v1);
1731
} else if (useBufferedRendering_) {
1732
WARN_LOG(Log::FrameBuf, "Using buffered rendering, and current VFB lacks an FBO: %08x", vfb->fb_address);
1733
} else {
1734
// This is OK because here we're in "skip buffered" mode, so even if we haven't presented
1735
// we will have a render target.
1736
presentation_->NotifyPresent();
1737
}
1738
1739
// This may get called mid-draw if the game uses an immediate flip.
1740
// PresentationCommon sets all kinds of state, we can't rely on anything.
1741
gstate_c.Dirty(DIRTY_ALL);
1742
DiscardFramebufferCopy();
1743
currentRenderVfb_ = nullptr;
1744
}
1745
1746
void FramebufferManagerCommon::DecimateFBOs() {
1747
DiscardFramebufferCopy();
1748
currentRenderVfb_ = nullptr;
1749
1750
for (auto iter : fbosToDelete_) {
1751
iter->Release();
1752
}
1753
fbosToDelete_.clear();
1754
1755
for (size_t i = 0; i < vfbs_.size(); ++i) {
1756
VirtualFramebuffer *vfb = vfbs_[i];
1757
int age = frameLastFramebufUsed_ - std::max(vfb->last_frame_render, vfb->last_frame_used);
1758
1759
if (ShouldDownloadFramebufferColor(vfb) && age == 0 && !vfb->memoryUpdated) {
1760
ReadFramebufferToMemory(vfb, 0, 0, vfb->width, vfb->height, RASTER_COLOR, Draw::ReadbackMode::BLOCK);
1761
vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR;
1762
}
1763
1764
// Let's also "decimate" the usageFlags.
1765
UpdateFramebufUsage(vfb);
1766
1767
if (vfb != displayFramebuf_ && vfb != prevDisplayFramebuf_ && vfb != prevPrevDisplayFramebuf_) {
1768
if (age > FBO_OLD_AGE) {
1769
INFO_LOG(Log::FrameBuf, "Decimating FBO for %08x (%ix%i %s), age %i", vfb->fb_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format), age);
1770
DestroyFramebuf(vfb);
1771
vfbs_.erase(vfbs_.begin() + i--);
1772
}
1773
}
1774
}
1775
1776
for (auto it = tempFBOs_.begin(); it != tempFBOs_.end(); ) {
1777
int age = frameLastFramebufUsed_ - it->second.last_frame_used;
1778
if (age > FBO_OLD_AGE) {
1779
it->second.fbo->Release();
1780
it = tempFBOs_.erase(it);
1781
} else {
1782
++it;
1783
}
1784
}
1785
1786
// Do the same for ReadFramebuffersToMemory's VFBs
1787
for (size_t i = 0; i < bvfbs_.size(); ++i) {
1788
VirtualFramebuffer *vfb = bvfbs_[i];
1789
int age = frameLastFramebufUsed_ - vfb->last_frame_render;
1790
if (age > FBO_OLD_AGE) {
1791
INFO_LOG(Log::FrameBuf, "Decimating FBO for %08x (%dx%d %s), age %i", vfb->fb_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format), age);
1792
DestroyFramebuf(vfb);
1793
bvfbs_.erase(bvfbs_.begin() + i--);
1794
}
1795
}
1796
1797
// And DrawPixels cached textures.
1798
1799
for (auto it = drawPixelsCache_.begin(); it != drawPixelsCache_.end(); ) {
1800
int age = draw_->GetFrameCount() - it->frameNumber;
1801
if (age > 10) {
1802
// INFO_LOG(Log::G3D, "Releasing drawPixelsCache texture: %dx%d", it->tex->Width(), it->tex->Height());
1803
it->tex->Release();
1804
it->tex = nullptr;
1805
it = drawPixelsCache_.erase(it);
1806
} else {
1807
++it;
1808
}
1809
}
1810
}
1811
1812
// Requires width/height to be set already.
1813
void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, int h, bool force, bool skipCopy) {
1814
_dbg_assert_(w > 0);
1815
_dbg_assert_(h > 0);
1816
VirtualFramebuffer old = *vfb;
1817
1818
int oldWidth = vfb->bufferWidth;
1819
int oldHeight = vfb->bufferHeight;
1820
1821
if (force) {
1822
vfb->bufferWidth = w;
1823
vfb->bufferHeight = h;
1824
} else {
1825
if (vfb->bufferWidth >= w && vfb->bufferHeight >= h) {
1826
return;
1827
}
1828
1829
// In case it gets thin and wide, don't resize down either side.
1830
vfb->bufferWidth = std::max((int)vfb->bufferWidth, w);
1831
vfb->bufferHeight = std::max((int)vfb->bufferHeight, h);
1832
}
1833
1834
bool force1x = false;
1835
switch (bloomHack_) {
1836
case 1:
1837
force1x = vfb->bufferWidth <= 128 || vfb->bufferHeight <= 64;
1838
break;
1839
case 2:
1840
force1x = vfb->bufferWidth <= 256 || vfb->bufferHeight <= 128;
1841
break;
1842
case 3:
1843
force1x = vfb->bufferWidth < 480 || vfb->bufferWidth > 800 || vfb->bufferHeight < 272; // GOW uses 864x272
1844
break;
1845
}
1846
1847
if ((vfb->usageFlags & FB_USAGE_COLOR_MIXED_DEPTH) && !PSP_CoreParameter().compat.flags().ForceLowerResolutionForEffectsOn) {
1848
force1x = false;
1849
}
1850
if (PSP_CoreParameter().compat.flags().Force04154000Download && vfb->fb_address == 0x04154000) {
1851
force1x = true;
1852
}
1853
1854
if (force1x && g_Config.iInternalResolution != 1) {
1855
vfb->renderScaleFactor = 1;
1856
vfb->renderWidth = vfb->bufferWidth;
1857
vfb->renderHeight = vfb->bufferHeight;
1858
} else {
1859
vfb->renderScaleFactor = renderScaleFactor_;
1860
vfb->renderWidth = (u16)(vfb->bufferWidth * renderScaleFactor_);
1861
vfb->renderHeight = (u16)(vfb->bufferHeight * renderScaleFactor_);
1862
}
1863
1864
bool creating = old.bufferWidth == 0;
1865
if (creating) {
1866
INFO_LOG(Log::FrameBuf, "Creating %s FBO at %08x/%08x stride=%d %dx%d (force=%d)", GeBufferFormatToString(vfb->fb_format), vfb->fb_address, vfb->z_address, vfb->fb_stride, vfb->bufferWidth, vfb->bufferHeight, (int)force);
1867
} else {
1868
INFO_LOG(Log::FrameBuf, "Resizing %s FBO at %08x/%08x stride=%d from %dx%d to %dx%d (force=%d, skipCopy=%d)", GeBufferFormatToString(vfb->fb_format), vfb->fb_address, vfb->z_address, vfb->fb_stride, old.bufferWidth, old.bufferHeight, vfb->bufferWidth, vfb->bufferHeight, (int)force, (int)skipCopy);
1869
}
1870
1871
// During hardware rendering, we always render at full color depth even if the game wouldn't on real hardware.
1872
// It's not worth the trouble trying to support lower bit-depth rendering, just
1873
// more cases to test that nobody will ever use.
1874
1875
textureCache_->ForgetLastTexture();
1876
1877
if (!useBufferedRendering_) {
1878
if (vfb->fbo) {
1879
vfb->fbo->Release();
1880
vfb->fbo = nullptr;
1881
}
1882
return;
1883
}
1884
if (!old.fbo && vfb->last_frame_failed != 0 && vfb->last_frame_failed - gpuStats.numFlips < 63) {
1885
// Don't constantly retry FBOs which failed to create.
1886
return;
1887
}
1888
1889
shaderManager_->DirtyLastShader();
1890
char tag[128];
1891
size_t len = FormatFramebufferName(vfb, tag, sizeof(tag));
1892
1893
gpuStats.numFBOsCreated++;
1894
1895
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), msaaLevel_, true, tag });
1896
if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) {
1897
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, vfb->BufferByteSize(RASTER_COLOR), tag, len);
1898
}
1899
if (Memory::IsVRAMAddress(vfb->z_address) && vfb->z_stride != 0) {
1900
char buf[128];
1901
size_t len = snprintf(buf, sizeof(buf), "Z_%s", tag);
1902
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->z_address, vfb->z_stride * vfb->height * sizeof(uint16_t), buf, len);
1903
}
1904
if (old.fbo) {
1905
INFO_LOG(Log::FrameBuf, "Resizing FBO for %08x : %dx%dx%s", vfb->fb_address, w, h, GeBufferFormatToString(vfb->fb_format));
1906
if (vfb->fbo) {
1907
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
1908
if (!skipCopy) {
1909
BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min((u16)oldWidth, std::min(vfb->bufferWidth, vfb->width)), std::min((u16)oldHeight, std::min(vfb->height, vfb->bufferHeight)), 0, RASTER_COLOR, "BlitColor_ResizeFramebufFBO");
1910
}
1911
if (vfb->usageFlags & FB_USAGE_RENDER_DEPTH) {
1912
BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min((u16)oldWidth, std::min(vfb->bufferWidth, vfb->width)), std::min((u16)oldHeight, std::min(vfb->height, vfb->bufferHeight)), 0, RASTER_DEPTH, "BlitDepth_ResizeFramebufFBO");
1913
}
1914
}
1915
fbosToDelete_.push_back(old.fbo);
1916
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "ResizeFramebufFBO");
1917
} else {
1918
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
1919
}
1920
DiscardFramebufferCopy();
1921
currentRenderVfb_ = vfb;
1922
1923
if (!vfb->fbo) {
1924
ERROR_LOG(Log::FrameBuf, "Error creating FBO during resize! %dx%d", vfb->renderWidth, vfb->renderHeight);
1925
vfb->last_frame_failed = gpuStats.numFlips;
1926
}
1927
}
1928
1929
struct CopyCandidate {
1930
VirtualFramebuffer *vfb = nullptr;
1931
int y = 0;
1932
int h = 0;
1933
1934
std::string ToString(RasterChannel channel) const {
1935
return StringFromFormat("%08x %s %dx%d y=%d h=%d", vfb->Address(channel), GeBufferFormatToString(vfb->Format(channel)), vfb->width, vfb->height, y, h);
1936
}
1937
};
1938
1939
static const CopyCandidate *GetBestCopyCandidate(const TinySet<CopyCandidate, 4> &candidates, uint32_t basePtr, RasterChannel channel) {
1940
const CopyCandidate *best = nullptr;
1941
1942
// Pick the "best" candidate by comparing to the old best using heuristics.
1943
for (size_t i = 0; i < candidates.size(); i++) {
1944
const CopyCandidate *candidate = &candidates[i];
1945
1946
bool better = !best;
1947
if (!better) {
1948
// Heuristics determined from the old algorithm, that we might want to keep:
1949
// * Lower yOffsets are prioritized.
1950
// * Bindseq
1951
better = candidate->y < best->y;
1952
if (!better) {
1953
better = candidate->vfb->BindSeq(channel) > best->vfb->BindSeq(channel);
1954
}
1955
}
1956
1957
if (better) {
1958
best = candidate;
1959
}
1960
}
1961
return best;
1962
}
1963
1964
// This is called from detected memcopies and framebuffer initialization from VRAM. Not block transfers.
1965
// Also with specialized flags from some replacement functions. Only those will currently request depth copies!
1966
// NOTE: This is very tricky because there's no information about color depth here, so we'll have to make guesses
1967
// about what underlying framebuffer is the most likely to be the relevant ones. For src, we can probably prioritize recent
1968
// ones. For dst, less clear.
1969
bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, GPUCopyFlag flags, u32 skipDrawReason) {
1970
if (size == 0) {
1971
return false;
1972
}
1973
1974
dst &= 0x3FFFFFFF;
1975
src &= 0x3FFFFFFF;
1976
1977
if (Memory::IsVRAMAddress(dst))
1978
dst &= 0x041FFFFF;
1979
if (Memory::IsVRAMAddress(src))
1980
src &= 0x041FFFFF;
1981
1982
// TODO: Merge the below into FindTransferFramebuffer.
1983
// Or at least this should be like the other ones, gathering possible candidates
1984
// with the ability to list them out for debugging.
1985
1986
bool ignoreDstBuffer = flags & GPUCopyFlag::FORCE_DST_MATCH_MEM;
1987
bool ignoreSrcBuffer = flags & (GPUCopyFlag::FORCE_SRC_MATCH_MEM | GPUCopyFlag::MEMSET);
1988
1989
// TODO: In the future we should probably check both channels. Currently depth is only on request.
1990
RasterChannel channel = (flags & GPUCopyFlag::DEPTH_REQUESTED) ? RASTER_DEPTH : RASTER_COLOR;
1991
1992
TinySet<CopyCandidate, 4> srcCandidates;
1993
TinySet<CopyCandidate, 4> dstCandidates;
1994
1995
// TODO: These two loops should be merged into one utility function, similar to what's done with rectangle copies.
1996
1997
// First find candidates for the source.
1998
// We only look at the color channel for now.
1999
for (auto vfb : vfbs_) {
2000
if (vfb->fb_stride == 0 || ignoreSrcBuffer) {
2001
continue;
2002
}
2003
2004
// We only remove the kernel and uncached bits when comparing.
2005
const u32 vfb_address = vfb->Address(channel);
2006
const u32 vfb_size = vfb->BufferByteSize(channel);
2007
const u32 vfb_byteStride = vfb->BufferByteStride(channel);
2008
const int vfb_byteWidth = vfb->BufferByteWidth(channel);
2009
2010
CopyCandidate srcCandidate;
2011
srcCandidate.vfb = vfb;
2012
2013
// Special path for depth for now.
2014
if (channel == RASTER_DEPTH) {
2015
if (src == vfb->z_address && size == vfb->z_stride * 2 * vfb->height) {
2016
srcCandidate.y = 0;
2017
srcCandidate.h = vfb->height;
2018
srcCandidates.push_back(srcCandidate);
2019
}
2020
continue;
2021
}
2022
2023
if (src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) {
2024
// Heuristic originally from dest below, but just as valid looking for the source.
2025
// Fixes a misdetection in Brothers in Arms: D-Day, issue #18512.
2026
if (vfb_address == dst && ((size == 0x44000 && vfb_size == 0x88000) || (size == 0x88000 && vfb_size == 0x44000))) {
2027
// Not likely to be a correct color format copy for this buffer. Ignore it, there will either be RAM
2028
// that can be displayed from, or another matching buffer with the right format if rendering is going on.
2029
// If we had scoring here, we should strongly penalize this target instead of ignoring it.
2030
WARN_LOG_N_TIMES(notify_copy_2x, 5, Log::FrameBuf, "Framebuffer size %08x conspicuously not matching copy size %08x for source in NotifyFramebufferCopy. Ignoring.", size, vfb_size);
2031
continue;
2032
}
2033
2034
if ((u32)size > vfb_size + 0x1000 && vfb->fb_format != GE_FORMAT_8888 && vfb->last_frame_render < gpuStats.numFlips) {
2035
// Seems likely we are looking at a potential copy of 32-bit pixels (like video) to an old 16-bit buffer,
2036
// which is very likely simply the wrong target, so skip it. See issue #17740 where this happens in Naruto Ultimate Ninja Heroes 2.
2037
// Probably no point to give it a bad score and let it pass to sorting, as we're pretty sure here.
2038
WARN_LOG_N_TIMES(notify_copy_2x, 5, Log::FrameBuf, "Framebuffer size %08x too small for %08x bytes of data and also 16-bit (%s), and not rendered to this frame. Ignoring.", vfb_size, size, GeBufferFormatToString(vfb->fb_format));
2039
continue;
2040
}
2041
2042
const u32 offset = src - vfb_address;
2043
const u32 yOffset = offset / vfb_byteStride;
2044
if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0)) {
2045
srcCandidate.y = yOffset;
2046
srcCandidate.h = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
2047
} else if ((offset % vfb_byteStride) == 0 && size == vfb->fb_stride) {
2048
// Valkyrie Profile reads 512 bytes at a time, rather than 2048. So, let's whitelist fb_stride also.
2049
srcCandidate.y = yOffset;
2050
srcCandidate.h = 1;
2051
} else if (yOffset == 0 && (vfb->usageFlags & FB_USAGE_CLUT)) {
2052
// Okay, last try - it might be a clut.
2053
srcCandidate.y = yOffset;
2054
srcCandidate.h = 1;
2055
} else {
2056
continue;
2057
}
2058
srcCandidates.push_back(srcCandidate);
2059
}
2060
}
2061
2062
for (auto vfb : vfbs_) {
2063
if (vfb->fb_stride == 0 || ignoreDstBuffer) {
2064
continue;
2065
}
2066
2067
// We only remove the kernel and uncached bits when comparing.
2068
const u32 vfb_address = vfb->Address(channel);
2069
const u32 vfb_size = vfb->BufferByteSize(channel);
2070
const u32 vfb_byteStride = vfb->BufferByteStride(channel);
2071
const int vfb_byteWidth = vfb->BufferByteWidth(channel);
2072
2073
// Heuristic to try to prevent potential glitches with video playback.
2074
if (vfb_address == dst && ((size == 0x44000 && vfb_size == 0x88000) || (size == 0x88000 && vfb_size == 0x44000))) {
2075
// Not likely to be a correct color format copy for this buffer. Ignore it, there will either be RAM
2076
// that can be displayed from, or another matching buffer with the right format if rendering is going on.
2077
// If we had scoring here, we should strongly penalize this target instead of ignoring it.
2078
WARN_LOG_N_TIMES(notify_copy_2x, 5, Log::FrameBuf, "Framebuffer size %08x conspicuously not matching copy size %08x for dest in NotifyFramebufferCopy. Ignoring.", size, vfb_size);
2079
continue;
2080
}
2081
2082
CopyCandidate dstCandidate;
2083
dstCandidate.vfb = vfb;
2084
2085
// Special path for depth for now.
2086
if (channel == RASTER_DEPTH) {
2087
// Let's assume exact matches only for simplicity.
2088
if (dst == vfb->z_address && size == vfb->z_stride * 2 * vfb->height) {
2089
dstCandidate.y = 0;
2090
dstCandidate.h = vfb->height;
2091
dstCandidates.push_back(dstCandidate);
2092
}
2093
continue;
2094
}
2095
2096
if (!ignoreDstBuffer && dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) {
2097
const u32 offset = dst - vfb_address;
2098
const u32 yOffset = offset / vfb_byteStride;
2099
if ((offset % vfb_byteStride) == 0 && (size <= vfb_byteWidth || (size % vfb_byteStride) == 0)) {
2100
dstCandidate.y = yOffset;
2101
dstCandidate.h = (size <= vfb_byteWidth) ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
2102
dstCandidates.push_back(dstCandidate);
2103
}
2104
}
2105
}
2106
2107
// For now fill in these old variables from the candidates to reduce the initial diff.
2108
VirtualFramebuffer *dstBuffer = nullptr;
2109
VirtualFramebuffer *srcBuffer = nullptr;
2110
int srcY;
2111
int srcH;
2112
int dstY;
2113
int dstH;
2114
2115
const CopyCandidate *bestSrc = GetBestCopyCandidate(srcCandidates, src, channel);
2116
if (bestSrc) {
2117
srcBuffer = bestSrc->vfb;
2118
srcY = bestSrc->y;
2119
srcH = bestSrc->h;
2120
}
2121
const CopyCandidate *bestDst = GetBestCopyCandidate(dstCandidates, dst, channel);
2122
if (bestDst) {
2123
dstBuffer = bestDst->vfb;
2124
dstY = bestDst->y;
2125
dstH = bestDst->h;
2126
}
2127
2128
if (srcCandidates.size() > 1) {
2129
if (Reporting::ShouldLogNTimes("mulblock", 5)) {
2130
std::string log;
2131
for (size_t i = 0; i < srcCandidates.size(); i++) {
2132
log += " - " + srcCandidates[i].ToString(channel);
2133
if (bestSrc && srcCandidates[i].vfb == bestSrc->vfb) {
2134
log += " * \n";
2135
} else {
2136
log += "\n";
2137
}
2138
}
2139
WARN_LOG(Log::FrameBuf, "Copy: Multiple src vfb candidates for (src: %08x, size: %d):\n%s (%s)", src, size, log.c_str(), RasterChannelToString(channel));
2140
}
2141
}
2142
2143
if (dstCandidates.size() > 1) {
2144
if (Reporting::ShouldLogNTimes("mulblock", 5)) {
2145
std::string log;
2146
for (size_t i = 0; i < dstCandidates.size(); i++) {
2147
log += " - " + dstCandidates[i].ToString(channel);
2148
if (bestDst && dstCandidates[i].vfb == bestDst->vfb) {
2149
log += " * \n";
2150
} else {
2151
log += "\n";
2152
}
2153
}
2154
WARN_LOG(Log::FrameBuf, "Copy: Multiple dst vfb candidates for (dst: %08x, size: %d):\n%s (%s)", src, size, log.c_str(), RasterChannelToString(channel));
2155
}
2156
}
2157
2158
if (!useBufferedRendering_) {
2159
// If we're copying into a recently used display buf, it's probably destined for the screen.
2160
if (channel == RASTER_DEPTH || srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) {
2161
return false;
2162
}
2163
}
2164
2165
if (!dstBuffer && srcBuffer && channel != RASTER_DEPTH) {
2166
// Note - if we're here, we're in a memcpy, not a block transfer. Not allowing IntraVRAMBlockTransferAllowCreateFB.
2167
// Technically, that makes BlockTransferAllowCreateFB a bit of a misnomer.
2168
bool allowCreateFB = (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB || GetSkipGPUReadbackMode() == SkipGPUReadbackMode::COPY_TO_TEXTURE);
2169
if (allowCreateFB && !(flags & GPUCopyFlag::DISALLOW_CREATE_VFB)) {
2170
dstBuffer = CreateRAMFramebuffer(dst, srcBuffer->width, srcBuffer->height, srcBuffer->fb_stride, srcBuffer->fb_format);
2171
dstY = 0;
2172
}
2173
}
2174
if (dstBuffer) {
2175
dstBuffer->last_frame_used = gpuStats.numFlips;
2176
if (channel == RASTER_DEPTH && !srcBuffer)
2177
dstBuffer->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
2178
}
2179
if (srcBuffer && channel == RASTER_DEPTH && !dstBuffer)
2180
srcBuffer->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
2181
2182
if (dstBuffer && srcBuffer) {
2183
if (srcBuffer == dstBuffer) {
2184
WARN_LOG_ONCE(dstsrccpy, Log::FrameBuf, "Intra-buffer memcpy (not supported) %08x -> %08x (size: %x)", src, dst, size);
2185
} else {
2186
WARN_LOG_ONCE(dstnotsrccpy, Log::FrameBuf, "Inter-buffer memcpy %08x -> %08x (size: %x)", src, dst, size);
2187
// Just do the blit!
2188
BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0, channel, "Blit_InterBufferMemcpy");
2189
SetColorUpdated(dstBuffer, skipDrawReason);
2190
RebindFramebuffer("RebindFramebuffer - Inter-buffer memcpy");
2191
}
2192
return false;
2193
} else if (dstBuffer) {
2194
if (flags & GPUCopyFlag::MEMSET) {
2195
gpuStats.numClears++;
2196
WARN_LOG_N_TIMES(btucpy, 5, Log::FrameBuf, "Memcpy fbo memset-clear %08x (size: %x)", dst, size);
2197
} else {
2198
WARN_LOG_N_TIMES(btucpy, 5, Log::FrameBuf, "Memcpy fbo upload %08x -> %08x (size: %x)", src, dst, size);
2199
}
2200
FlushBeforeCopy();
2201
2202
// TODO: Hot Shots Golf makes a lot of these during the "meter", to copy back the image to the screen, it copies line by line.
2203
// We could collect these in a buffer and flush on the next draw, or something like that, to avoid that. The line copies cause
2204
// awkward visual artefacts.
2205
const u8 *srcBase = Memory::GetPointerUnchecked(src);
2206
GEBufferFormat srcFormat = channel == RASTER_DEPTH ? GE_FORMAT_DEPTH16 : dstBuffer->fb_format;
2207
int srcStride = channel == RASTER_DEPTH ? dstBuffer->z_stride : dstBuffer->fb_stride;
2208
DrawPixels(dstBuffer, 0, dstY, srcBase, srcFormat, srcStride, dstBuffer->width, dstH, channel, "MemcpyFboUpload_DrawPixels");
2209
SetColorUpdated(dstBuffer, skipDrawReason);
2210
RebindFramebuffer("RebindFramebuffer - Memcpy fbo upload");
2211
// This is a memcpy, let's still copy just in case.
2212
return false;
2213
} else if (srcBuffer) {
2214
WARN_LOG_N_TIMES(btdcpy, 5, Log::FrameBuf, "Memcpy fbo download %08x -> %08x", src, dst);
2215
FlushBeforeCopy();
2216
// TODO: In Hot Shots Golf, check if we can do a readback to a framebuffer here.
2217
// Again we have the problem though that it's doing a lot of small copies here, one for each line.
2218
if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) {
2219
WARN_LOG_ONCE(btdcpyheight, Log::FrameBuf, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight);
2220
} else if (GetSkipGPUReadbackMode() == SkipGPUReadbackMode::NO_SKIP && (!srcBuffer->memoryUpdated || channel == RASTER_DEPTH)) {
2221
ReadFramebufferToMemory(srcBuffer, 0, srcY, srcBuffer->width, srcH, channel, Draw::ReadbackMode::BLOCK);
2222
srcBuffer->usageFlags = (srcBuffer->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
2223
}
2224
return false;
2225
} else {
2226
return false;
2227
}
2228
}
2229
2230
std::string BlockTransferRect::ToString() const {
2231
int bpp = BufferFormatBytesPerPixel(channel == RASTER_DEPTH ? GE_FORMAT_DEPTH16 : vfb->fb_format);
2232
return StringFromFormat("%s %08x/%d/%s seq:%d %d,%d %dx%d", RasterChannelToString(channel), vfb->fb_address, vfb->FbStrideInBytes(), GeBufferFormatToString(vfb->fb_format), vfb->colorBindSeq, x_bytes / bpp, y, w_bytes / bpp, h);
2233
}
2234
2235
// This is used when looking for framebuffers for a block transfer.
2236
// The only known game to block transfer depth buffers is Iron Man, see #16530, so
2237
// we have a compat flag and pretty limited functionality for that.
2238
bool FramebufferManagerCommon::FindTransferFramebuffer(u32 basePtr, int stride_pixels, int x_pixels, int y, int w_pixels, int h, int bpp, bool destination, BlockTransferRect *rect) {
2239
basePtr &= 0x3FFFFFFF;
2240
if (Memory::IsVRAMAddress(basePtr))
2241
basePtr &= 0x041FFFFF;
2242
rect->vfb = nullptr;
2243
2244
if (!stride_pixels) {
2245
WARN_LOG(Log::FrameBuf, "Zero stride in FindTransferFrameBuffer, ignoring");
2246
return false;
2247
}
2248
2249
const u32 byteStride = stride_pixels * bpp;
2250
int x_bytes = x_pixels * bpp;
2251
int w_bytes = w_pixels * bpp;
2252
2253
TinySet<BlockTransferRect, 4> candidates;
2254
2255
// We work entirely in bytes when we do the matching, because games don't consistently use bpps that match
2256
// that of their buffers. Then after matching we try to map the copy to the simplest operation that does
2257
// what we need.
2258
2259
// We are only looking at color for now, have not found any block transfers of depth data (although it's plausible).
2260
2261
for (auto vfb : vfbs_) {
2262
BlockTransferRect candidate{ vfb, RASTER_COLOR };
2263
2264
// Two cases so far of games depending on depth copies: Iron Man in issue #16530 (buffer->buffer)
2265
// and also #17878 where a game does ram->buffer to an auto-swizzling (|0x600000) address,
2266
// to initialize Z with a pre-rendered depth buffer.
2267
if (vfb->z_address == basePtr && vfb->BufferByteStride(RASTER_DEPTH) == byteStride && PSP_CoreParameter().compat.flags().BlockTransferDepth) {
2268
WARN_LOG_N_TIMES(z_xfer, 5, Log::FrameBuf, "FindTransferFramebuffer: found matching depth buffer, %08x (dest=%d, bpp=%d)", basePtr, (int)destination, bpp);
2269
candidate.channel = RASTER_DEPTH;
2270
candidate.x_bytes = x_pixels * bpp;
2271
candidate.w_bytes = w_pixels * bpp;
2272
candidate.y = y;
2273
candidate.h = h;
2274
candidates.push_back(candidate);
2275
continue;
2276
}
2277
2278
const u32 vfb_address = vfb->fb_address;
2279
const u32 vfb_size = vfb->BufferByteSize(RASTER_COLOR);
2280
2281
if (basePtr < vfb_address || basePtr >= vfb_address + vfb_size) {
2282
continue;
2283
}
2284
2285
const u32 vfb_bpp = BufferFormatBytesPerPixel(vfb->fb_format);
2286
const u32 vfb_byteStride = vfb->FbStrideInBytes();
2287
const u32 vfb_byteWidth = vfb->WidthInBytes();
2288
2289
candidate.w_bytes = w_pixels * bpp;
2290
candidate.h = h;
2291
2292
const u32 byteOffset = basePtr - vfb_address;
2293
const int memXOffset = byteOffset % byteStride;
2294
const int memYOffset = byteOffset / byteStride;
2295
2296
// Some games use mismatching bitdepths. But make sure the stride matches.
2297
// If it doesn't, generally this means we detected the framebuffer with too large a height.
2298
// Use bufferHeight in case of buffers that resize up and down often per frame (Valkyrie Profile.)
2299
2300
// If it's outside the vfb by a single pixel, we currently disregard it.
2301
if (memYOffset > vfb->bufferHeight - h) {
2302
continue;
2303
}
2304
2305
if (byteOffset == vfb->WidthInBytes() && w_bytes < vfb->FbStrideInBytes()) {
2306
// Looks like we're in a margin texture of the vfb, which is not the vfb itself.
2307
// Ignore the match.
2308
continue;
2309
}
2310
2311
if (vfb_byteStride != byteStride) {
2312
// Grand Knights History occasionally copies with a mismatching stride but a full line at a time.
2313
// That's why we multiply by height, not width - this copy is a rectangle with the wrong stride but a line with the correct one.
2314
// Makes it hard to detect the wrong transfers in e.g. God of War.
2315
if (w_pixels != stride_pixels || (byteStride * h != vfb_byteStride && byteStride * h != vfb_byteWidth)) {
2316
if (destination) {
2317
// However, some other games write cluts to framebuffers.
2318
// Let's catch this and upload. Otherwise reject the match.
2319
bool match = (vfb->usageFlags & FB_USAGE_CLUT) != 0;
2320
if (match) {
2321
candidate.w_bytes = byteStride * h;
2322
h = 1;
2323
} else {
2324
continue;
2325
}
2326
} else {
2327
continue;
2328
}
2329
} else {
2330
// This is the Grand Knights History case.
2331
candidate.w_bytes = byteStride * h;
2332
candidate.h = 1;
2333
}
2334
} else {
2335
candidate.w_bytes = w_bytes;
2336
candidate.h = h;
2337
}
2338
2339
candidate.x_bytes = x_bytes + memXOffset;
2340
candidate.y = y + memYOffset;
2341
candidate.vfb = vfb;
2342
candidates.push_back(candidate);
2343
}
2344
2345
const BlockTransferRect *best = nullptr;
2346
// Sort candidates by just recency for now, we might add other.
2347
for (size_t i = 0; i < candidates.size(); i++) {
2348
const BlockTransferRect *candidate = &candidates[i];
2349
2350
bool better = !best;
2351
if (!better) {
2352
if (candidate->channel == best->channel) {
2353
better = candidate->vfb->BindSeq(candidate->channel) > best->vfb->BindSeq(candidate->channel);
2354
} else {
2355
// Prefer depth over color if the address match is perfect.
2356
if (candidate->channel == RASTER_DEPTH && best->channel == RASTER_COLOR && candidate->vfb->z_address == basePtr) {
2357
better = true;
2358
}
2359
}
2360
}
2361
2362
if ((candidate->vfb->usageFlags & FB_USAGE_CLUT) && candidate->x_bytes == 0 && candidate->y == 0 && destination) {
2363
// Hack to prioritize copies to clut buffers.
2364
best = candidate;
2365
break;
2366
}
2367
if (better) {
2368
best = candidate;
2369
}
2370
}
2371
2372
if (candidates.size() > 1) {
2373
if (Reporting::ShouldLogNTimes("mulblock", 5)) {
2374
std::string log;
2375
for (size_t i = 0; i < candidates.size(); i++) {
2376
log += " - " + candidates[i].ToString() + "\n";
2377
}
2378
WARN_LOG(Log::FrameBuf, "Multiple framebuffer candidates for %08x/%d/%d %d,%d %dx%d (dest = %d):\n%s", basePtr, stride_pixels, bpp, x_pixels, y, w_pixels, h, (int)destination, log.c_str());
2379
}
2380
}
2381
2382
if (best) {
2383
*rect = *best;
2384
return true;
2385
} else {
2386
if (Memory::IsVRAMAddress(basePtr) && destination && h >= 128) {
2387
WARN_LOG_N_TIMES(nocands, 5, Log::FrameBuf, "Didn't find a destination candidate for %08x/%d/%d %d,%d %dx%d", basePtr, stride_pixels, bpp, x_pixels, y, w_pixels, h);
2388
}
2389
return false;
2390
}
2391
}
2392
2393
VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAddress, int width, int height, int stride, GEBufferFormat format) {
2394
INFO_LOG(Log::FrameBuf, "Creating RAM framebuffer at %08x (%dx%d, stride %d, fb_format %d)", fbAddress, width, height, stride, format);
2395
2396
RasterChannel channel = format == GE_FORMAT_DEPTH16 ? RASTER_DEPTH : RASTER_COLOR;
2397
2398
// A target for the destination is missing - so just create one!
2399
// Make sure this one would be found by the algorithm above so we wouldn't
2400
// create a new one each frame.
2401
VirtualFramebuffer *vfb = new VirtualFramebuffer{};
2402
vfb->fbo = nullptr;
2403
uint32_t mask = Memory::IsVRAMAddress(fbAddress) ? 0x041FFFFF : 0x3FFFFFFF;
2404
if (format == GE_FORMAT_DEPTH16) {
2405
vfb->fb_address = 0xFFFFFFFF; // Invalid address
2406
vfb->fb_stride = 0;
2407
vfb->z_address = fbAddress; // marks that if anyone tries to render with depth to this framebuffer, it should be dropped and recreated.
2408
vfb->z_stride = stride;
2409
vfb->width = width;
2410
} else {
2411
vfb->fb_address = fbAddress & mask; // NOTE - not necessarily in VRAM!
2412
vfb->fb_stride = stride;
2413
vfb->z_address = 0;
2414
vfb->z_stride = 0;
2415
vfb->width = std::max(width, stride);
2416
}
2417
vfb->height = height;
2418
vfb->newWidth = vfb->width;
2419
vfb->newHeight = vfb->height;
2420
vfb->lastFrameNewSize = gpuStats.numFlips;
2421
vfb->renderScaleFactor = renderScaleFactor_;
2422
vfb->renderWidth = (u16)(vfb->width * renderScaleFactor_);
2423
vfb->renderHeight = (u16)(vfb->height * renderScaleFactor_);
2424
vfb->bufferWidth = vfb->width;
2425
vfb->bufferHeight = vfb->height;
2426
vfb->fb_format = format == GE_FORMAT_DEPTH16 ? GE_FORMAT_8888 : format;
2427
vfb->usageFlags = format == GE_FORMAT_DEPTH16 ? FB_USAGE_RENDER_DEPTH : FB_USAGE_RENDER_COLOR;
2428
if (format != GE_FORMAT_DEPTH16) {
2429
SetColorUpdated(vfb, 0);
2430
}
2431
char name[64];
2432
snprintf(name, sizeof(name), "%08x_%s_RAM", vfb->Address(channel), RasterChannelToString(channel));
2433
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
2434
bool createDepthBuffer = format == GE_FORMAT_DEPTH16;
2435
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, createDepthBuffer, name });
2436
vfbs_.push_back(vfb);
2437
2438
u32 byteSize = vfb->BufferByteSize(channel);
2439
if (fbAddress + byteSize > framebufColorRangeEnd_) {
2440
framebufColorRangeEnd_ = fbAddress + byteSize;
2441
}
2442
2443
return vfb;
2444
}
2445
2446
// 1:1 pixel size buffers, we resize buffers to these before we read them back.
2447
// TODO: We shouldn't keep whole VirtualFramebuffer structs for these - the fbo and last_frame_render is enough.
2448
VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFramebuffer *vfb, RasterChannel channel) {
2449
// For now we'll keep these on the same struct as the ones that can get displayed
2450
// (and blatantly copy work already done above while at it).
2451
VirtualFramebuffer *nvfb = nullptr;
2452
2453
// We maintain a separate vector of framebuffer objects for blitting.
2454
for (VirtualFramebuffer *v : bvfbs_) {
2455
if (v->Address(channel) == vfb->Address(channel) && v->Format(channel) == vfb->Format(channel)) {
2456
if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) {
2457
nvfb = v;
2458
if (channel == RASTER_COLOR) {
2459
v->fb_stride = vfb->fb_stride;
2460
} else {
2461
v->z_stride = vfb->z_stride;
2462
}
2463
v->width = vfb->width;
2464
v->height = vfb->height;
2465
break;
2466
}
2467
}
2468
}
2469
2470
// Create a new fbo if none was found for the size
2471
if (!nvfb) {
2472
nvfb = new VirtualFramebuffer{};
2473
nvfb->fbo = nullptr;
2474
nvfb->fb_address = channel == RASTER_COLOR ? vfb->fb_address : 0;
2475
nvfb->fb_stride = channel == RASTER_COLOR ? vfb->fb_stride : 0;
2476
nvfb->z_address = channel == RASTER_DEPTH ? vfb->z_address : 0;
2477
nvfb->z_stride = channel == RASTER_DEPTH ? vfb->z_stride : 0;
2478
nvfb->width = vfb->width;
2479
nvfb->height = vfb->height;
2480
nvfb->renderWidth = vfb->bufferWidth;
2481
nvfb->renderHeight = vfb->bufferHeight;
2482
nvfb->renderScaleFactor = 1; // For readbacks we resize to the original size, of course.
2483
nvfb->bufferWidth = vfb->bufferWidth;
2484
nvfb->bufferHeight = vfb->bufferHeight;
2485
nvfb->fb_format = vfb->fb_format;
2486
nvfb->drawnWidth = vfb->drawnWidth;
2487
nvfb->drawnHeight = vfb->drawnHeight;
2488
2489
char name[64];
2490
snprintf(name, sizeof(name), "download_temp_%08x_%s", vfb->Address(channel), RasterChannelToString(channel));
2491
2492
// We always create a color-only framebuffer here - readbacks of depth convert to color while translating the values.
2493
nvfb->fbo = draw_->CreateFramebuffer({ nvfb->bufferWidth, nvfb->bufferHeight, 1, 1, 0, false, name });
2494
if (!nvfb->fbo) {
2495
ERROR_LOG(Log::FrameBuf, "Error creating FBO! %d x %d", nvfb->renderWidth, nvfb->renderHeight);
2496
delete nvfb;
2497
return nullptr;
2498
}
2499
bvfbs_.push_back(nvfb);
2500
} else {
2501
UpdateDownloadTempBuffer(nvfb);
2502
}
2503
2504
nvfb->usageFlags |= FB_USAGE_RENDER_COLOR;
2505
nvfb->last_frame_render = gpuStats.numFlips;
2506
nvfb->dirtyAfterDisplay = true;
2507
2508
return nvfb;
2509
}
2510
2511
void FramebufferManagerCommon::ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor) {
2512
if (currentRenderVfb_) {
2513
if ((currentRenderVfb_->usageFlags & FB_USAGE_DOWNLOAD_CLEAR) != 0) {
2514
// Already zeroed in memory.
2515
return;
2516
}
2517
}
2518
2519
if (!Memory::IsValidAddress(gstate.getFrameBufAddress())) {
2520
return;
2521
}
2522
2523
u8 *addr = Memory::GetPointerWriteUnchecked(gstate.getFrameBufAddress());
2524
const int bpp = BufferFormatBytesPerPixel(gstate_c.framebufFormat);
2525
2526
u32 clearBits = clearColor;
2527
if (bpp == 2) {
2528
u16 clear16 = 0;
2529
switch (gstate_c.framebufFormat) {
2530
case GE_FORMAT_565: clear16 = RGBA8888toRGB565(clearColor); break;
2531
case GE_FORMAT_5551: clear16 = RGBA8888toRGBA5551(clearColor); break;
2532
case GE_FORMAT_4444: clear16 = RGBA8888toRGBA4444(clearColor); break;
2533
default: _dbg_assert_(0); break;
2534
}
2535
clearBits = clear16 | (clear16 << 16);
2536
}
2537
2538
const bool singleByteClear = (clearBits >> 16) == (clearBits & 0xFFFF) && (clearBits >> 24) == (clearBits & 0xFF);
2539
const int stride = gstate.FrameBufStride();
2540
const int width = x2 - x1;
2541
2542
const int byteStride = stride * bpp;
2543
const int byteWidth = width * bpp;
2544
for (int y = y1; y < y2; ++y) {
2545
NotifyMemInfo(MemBlockFlags::WRITE, gstate.getFrameBufAddress() + x1 * bpp + y * byteStride, byteWidth, "FramebufferClear");
2546
}
2547
2548
// Can use memset for simple cases. Often alpha is different and gums up the works.
2549
if (singleByteClear) {
2550
addr += x1 * bpp;
2551
for (int y = y1; y < y2; ++y) {
2552
memset(addr + y * byteStride, clearBits, byteWidth);
2553
}
2554
} else {
2555
// This will most often be true - rarely is the width not aligned.
2556
// TODO: We should really use non-temporal stores here to avoid the cache,
2557
// as it's unlikely that these bytes will be read.
2558
if ((width & 3) == 0 && (x1 & 3) == 0) {
2559
u64 val64 = clearBits | ((u64)clearBits << 32);
2560
int xstride = 8 / bpp;
2561
2562
u64 *addr64 = (u64 *)addr;
2563
const int stride64 = stride / xstride;
2564
const int x1_64 = x1 / xstride;
2565
const int x2_64 = x2 / xstride;
2566
for (int y = y1; y < y2; ++y) {
2567
for (int x = x1_64; x < x2_64; ++x) {
2568
addr64[y * stride64 + x] = val64;
2569
}
2570
}
2571
} else if (bpp == 4) {
2572
u32 *addr32 = (u32 *)addr;
2573
for (int y = y1; y < y2; ++y) {
2574
for (int x = x1; x < x2; ++x) {
2575
addr32[y * stride + x] = clearBits;
2576
}
2577
}
2578
} else if (bpp == 2) {
2579
u16 *addr16 = (u16 *)addr;
2580
for (int y = y1; y < y2; ++y) {
2581
for (int x = x1; x < x2; ++x) {
2582
addr16[y * stride + x] = (u16)clearBits;
2583
}
2584
}
2585
}
2586
}
2587
2588
if (currentRenderVfb_) {
2589
// The current content is in memory now, so update the flag.
2590
if (x1 == 0 && y1 == 0 && x2 >= currentRenderVfb_->width && y2 >= currentRenderVfb_->height) {
2591
currentRenderVfb_->usageFlags |= FB_USAGE_DOWNLOAD_CLEAR;
2592
currentRenderVfb_->memoryUpdated = true;
2593
}
2594
}
2595
}
2596
2597
bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp, u32 skipDrawReason) {
2598
if (!useBufferedRendering_) {
2599
return false;
2600
}
2601
2602
// Skip checking if there's no framebuffers in that area. Make a special exception for obvious transfers to depth buffer, see issue #17878
2603
bool dstDepthSwizzle = Memory::IsVRAMAddress(dstBasePtr) && ((dstBasePtr & 0x600000) == 0x600000);
2604
2605
if (!dstDepthSwizzle && !MayIntersectFramebufferColor(srcBasePtr) && !MayIntersectFramebufferColor(dstBasePtr)) {
2606
return false;
2607
}
2608
2609
BlockTransferRect dstRect{};
2610
BlockTransferRect srcRect{};
2611
2612
// These modify the X/Y/W/H parameters depending on the memory offset of the base pointers from the actual buffers.
2613
bool srcBuffer = FindTransferFramebuffer(srcBasePtr, srcStride, srcX, srcY, width, height, bpp, false, &srcRect);
2614
bool dstBuffer = FindTransferFramebuffer(dstBasePtr, dstStride, dstX, dstY, width, height, bpp, true, &dstRect);
2615
2616
if (srcRect.channel == RASTER_DEPTH) {
2617
// Ignore the found buffer if it's not 16-bit - we create a new more suitable one instead.
2618
if (dstRect.channel == RASTER_COLOR && dstRect.vfb->fb_format == GE_FORMAT_8888) {
2619
dstBuffer = false;
2620
}
2621
}
2622
2623
if (!srcBuffer && dstBuffer && dstRect.channel == RASTER_DEPTH) {
2624
dstBuffer = true;
2625
}
2626
2627
if (srcBuffer && !dstBuffer) {
2628
// In here, we can't read from dstRect.
2629
if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB ||
2630
GetSkipGPUReadbackMode() == SkipGPUReadbackMode::COPY_TO_TEXTURE ||
2631
(PSP_CoreParameter().compat.flags().IntraVRAMBlockTransferAllowCreateFB &&
2632
(srcRect.vfb && Memory::IsVRAMAddress(srcRect.vfb->fb_address)) && Memory::IsVRAMAddress(dstBasePtr))) {
2633
GEBufferFormat ramFormat;
2634
// Try to guess the appropriate format. We only know the bpp from the block transfer command (16 or 32 bit).
2635
if (srcRect.channel == RASTER_COLOR) {
2636
if (bpp == 4) {
2637
// Only one possibility unless it's doing split pixel tricks (which we could detect through stride maybe).
2638
ramFormat = GE_FORMAT_8888;
2639
} else if (srcRect.vfb && srcRect.vfb->fb_format != GE_FORMAT_8888) {
2640
// We guess that the game will interpret the data the same as it was in the source of the copy.
2641
// Seems like a likely good guess, and works in Test Drive Unlimited.
2642
ramFormat = srcRect.vfb->fb_format;
2643
} else {
2644
// No info left - just fall back to something. But this is definitely split pixel tricks.
2645
ramFormat = GE_FORMAT_5551;
2646
}
2647
dstRect.vfb = CreateRAMFramebuffer(dstBasePtr, width, height, dstStride, ramFormat);
2648
dstRect.x_bytes = bpp * dstX;
2649
dstRect.y = dstY;
2650
dstRect.w_bytes = bpp * width;
2651
dstRect.h = height;
2652
dstRect.channel = RASTER_COLOR;
2653
} else {
2654
dstRect.vfb = CreateRAMFramebuffer(dstBasePtr, width, height, dstStride, GE_FORMAT_DEPTH16);
2655
dstRect.x_bytes = 0;
2656
dstRect.w_bytes = 2 * width; // 2 = depth bpp
2657
dstRect.y = 0;
2658
dstRect.h = height;
2659
dstRect.channel = RASTER_DEPTH;
2660
}
2661
dstBuffer = true;
2662
}
2663
}
2664
2665
if (dstBuffer) {
2666
dstRect.vfb->last_frame_used = gpuStats.numFlips;
2667
// Mark the destination as fresh.
2668
if (dstRect.channel == RASTER_COLOR) {
2669
dstRect.vfb->colorBindSeq = GetBindSeqCount();
2670
} else {
2671
dstRect.vfb->depthBindSeq = GetBindSeqCount();
2672
}
2673
}
2674
2675
if (dstBuffer && srcBuffer) {
2676
if (srcRect.vfb && srcRect.vfb == dstRect.vfb && srcRect.channel == dstRect.channel) {
2677
// Transfer within the same buffer.
2678
// This is a simple case because there will be no format conversion or similar shenanigans needed.
2679
// However, the BPP might still mismatch, but in such a case we can convert the coordinates.
2680
if (srcX == dstX && srcY == dstY) {
2681
// Ignore, nothing to do. Tales of Phantasia X does this by accident.
2682
// Returning true to also skip the memory copy.
2683
return true;
2684
}
2685
2686
int buffer_bpp = BufferFormatBytesPerPixel(srcRect.vfb->Format(srcRect.channel));
2687
2688
if (bpp != buffer_bpp) {
2689
WARN_LOG_ONCE(intrabpp, Log::G3D, "Mismatched transfer bpp in intra-buffer block transfer. Was %d, expected %d.", bpp, buffer_bpp);
2690
// We just switch to using the buffer's bpp, since we've already converted the rectangle to byte offsets.
2691
bpp = buffer_bpp;
2692
}
2693
2694
WARN_LOG_N_TIMES(dstsrc, 5, Log::G3D, "Intra-buffer block transfer %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
2695
width, height, bpp,
2696
srcBasePtr, srcRect.x_bytes / bpp, srcRect.y, srcStride,
2697
dstBasePtr, dstRect.x_bytes / bpp, dstRect.y, dstStride);
2698
FlushBeforeCopy();
2699
// Some backends can handle blitting within a framebuffer. Others will just have to deal with it or ignore it, apparently.
2700
BlitFramebuffer(dstRect.vfb, dstX, dstY, srcRect.vfb, srcX, srcY, dstRect.w_bytes / bpp, dstRect.h, bpp, dstRect.channel, "Blit_IntraBufferBlockTransfer");
2701
RebindFramebuffer("rebind after intra block transfer");
2702
SetColorUpdated(dstRect.vfb, skipDrawReason);
2703
return true; // Skip the memory copy.
2704
}
2705
2706
// Straightforward blit between two same-format framebuffers.
2707
if (srcRect.vfb && srcRect.channel == dstRect.channel && srcRect.vfb->Format(srcRect.channel) == dstRect.vfb->Format(dstRect.channel)) {
2708
WARN_LOG_N_TIMES(dstnotsrc, 5, Log::G3D, "Inter-buffer %s block transfer %dx%d %dbpp from %08x (x:%d y:%d stride:%d %s) -> %08x (x:%d y:%d stride:%d %s)",
2709
RasterChannelToString(srcRect.channel),
2710
width, height, bpp,
2711
srcBasePtr, srcRect.x_bytes / bpp, srcRect.y, srcStride, GeBufferFormatToString(srcRect.vfb->fb_format),
2712
dstBasePtr, dstRect.x_bytes / bpp, dstRect.y, dstStride, GeBufferFormatToString(dstRect.vfb->fb_format));
2713
2714
// Straight blit will do, but check the bpp, we might need to convert coordinates differently.
2715
int buffer_bpp = BufferFormatBytesPerPixel(srcRect.vfb->Format(srcRect.channel));
2716
if (bpp != buffer_bpp) {
2717
WARN_LOG_ONCE(intrabpp, Log::G3D, "Mismatched transfer bpp in inter-buffer block transfer. Was %d, expected %d.", bpp, buffer_bpp);
2718
// We just switch to using the buffer's bpp, since we've already converted the rectangle to byte offsets.
2719
bpp = buffer_bpp;
2720
}
2721
FlushBeforeCopy();
2722
BlitFramebuffer(dstRect.vfb, dstRect.x_bytes / bpp, dstRect.y, srcRect.vfb, srcRect.x_bytes / bpp, srcRect.y, srcRect.w_bytes / bpp, height, bpp, srcRect.channel, "Blit_InterBufferBlockTransfer");
2723
RebindFramebuffer("RebindFramebuffer - Inter-buffer block transfer");
2724
SetColorUpdated(dstRect.vfb, skipDrawReason);
2725
return true;
2726
}
2727
2728
// Getting to the more complex cases. Have not actually seen much of these yet.
2729
if (srcRect.vfb && dstRect.vfb) {
2730
WARN_LOG_N_TIMES(blockformat, 5, Log::G3D, "Mismatched buffer formats in block transfer: %s->%s (%dx%d)",
2731
GeBufferFormatToString(srcRect.vfb->Format(srcRect.channel)), GeBufferFormatToString(dstRect.vfb->Format(dstRect.channel)),
2732
width, height);
2733
}
2734
2735
// TODO
2736
2737
// No need to actually do the memory copy behind, probably.
2738
return true;
2739
2740
} else if (dstBuffer) {
2741
// Handle depth uploads directly here, and let's not bother copying the data. This is compat-flag-gated for now,
2742
// may generalize it when I remove the compat flag.
2743
if (dstRect.channel == RASTER_DEPTH) {
2744
WARN_LOG_ONCE(btud, Log::G3D, "Block transfer upload %08x -> %08x (%dx%d %d,%d bpp=%d %s)", srcBasePtr, dstBasePtr, width, height, dstX, dstY, bpp, RasterChannelToString(dstRect.channel));
2745
FlushBeforeCopy();
2746
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
2747
DrawPixels(dstRect.vfb, dstX, dstY, srcBase, dstRect.vfb->Format(dstRect.channel), srcStride * bpp / 2, (int)(dstRect.w_bytes / 2), dstRect.h, dstRect.channel, "BlockTransferCopy_DrawPixelsDepth");
2748
RebindFramebuffer("RebindFramebuffer - UploadDepth");
2749
return true;
2750
}
2751
2752
// Here we should just draw the pixels into the buffer. Return false to copy the memory first.
2753
// NotifyBlockTransferAfter will take care of the rest.
2754
return false;
2755
} else if (srcBuffer) {
2756
if (width == 48 && height == 48 && srcY == 224 && srcX == 432 && PSP_CoreParameter().compat.flags().TacticsOgreEliminateDebugReadback) {
2757
return false;
2758
}
2759
2760
WARN_LOG_N_TIMES(btd, 10, Log::G3D, "Block transfer readback %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
2761
width, height, bpp,
2762
srcBasePtr, srcRect.x_bytes / bpp, srcRect.y, srcStride,
2763
dstBasePtr, dstRect.x_bytes / bpp, dstRect.y, dstStride);
2764
FlushBeforeCopy();
2765
if (GetSkipGPUReadbackMode() == SkipGPUReadbackMode::NO_SKIP && srcRect.vfb && !srcRect.vfb->memoryUpdated) {
2766
const int srcBpp = BufferFormatBytesPerPixel(srcRect.vfb->fb_format);
2767
const float srcXFactor = (float)bpp / srcBpp;
2768
const bool tooTall = srcY + srcRect.h > srcRect.vfb->bufferHeight;
2769
if (srcRect.h <= 0 || (tooTall && srcY != 0)) {
2770
WARN_LOG_ONCE(btdheight, Log::G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcRect.y, srcRect.h, srcRect.vfb->bufferHeight);
2771
} else {
2772
if (tooTall) {
2773
WARN_LOG_ONCE(btdheight, Log::G3D, "Block transfer download %08x -> %08x dangerous, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcRect.y, srcRect.h, srcRect.vfb->bufferHeight);
2774
}
2775
ReadFramebufferToMemory(srcRect.vfb, static_cast<int>(srcX * srcXFactor), srcY, static_cast<int>(srcRect.w_bytes * srcXFactor), srcRect.h, RASTER_COLOR, Draw::ReadbackMode::BLOCK);
2776
srcRect.vfb->usageFlags = (srcRect.vfb->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
2777
}
2778
}
2779
return false; // Let the bit copy happen
2780
} else {
2781
return false;
2782
}
2783
}
2784
2785
SkipGPUReadbackMode FramebufferManagerCommon::GetSkipGPUReadbackMode() {
2786
if (PSP_CoreParameter().compat.flags().ForceEnableGPUReadback) {
2787
return SkipGPUReadbackMode::NO_SKIP;
2788
} else {
2789
return (SkipGPUReadbackMode)g_Config.iSkipGPUReadbackMode;
2790
}
2791
}
2792
2793
void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp, u32 skipDrawReason) {
2794
// If it's a block transfer direct to the screen, and we're not using buffers, draw immediately.
2795
// We may still do a partial block draw below if this doesn't pass.
2796
if (!useBufferedRendering_ && dstStride >= 480 && width >= 480 && height == 272) {
2797
bool isPrevDisplayBuffer = PrevDisplayFramebufAddr() == dstBasePtr;
2798
bool isDisplayBuffer = CurrentDisplayFramebufAddr() == dstBasePtr;
2799
if (isPrevDisplayBuffer || isDisplayBuffer) {
2800
FlushBeforeCopy();
2801
// HACK
2802
DrawFramebufferToOutput(displayLayoutConfigCopy_, Memory::GetPointerUnchecked(dstBasePtr), dstStride, displayFormat_);
2803
return;
2804
}
2805
}
2806
2807
if (MayIntersectFramebufferColor(srcBasePtr) || MayIntersectFramebufferColor(dstBasePtr)) {
2808
// TODO: Figure out how we can avoid repeating the search here.
2809
2810
BlockTransferRect dstRect{};
2811
BlockTransferRect srcRect{};
2812
2813
// These modify the X/Y/W/H parameters depending on the memory offset of the base pointers from the actual buffers.
2814
bool srcBuffer = FindTransferFramebuffer(srcBasePtr, srcStride, srcX, srcY, width, height, bpp, false, &srcRect);
2815
bool dstBuffer = FindTransferFramebuffer(dstBasePtr, dstStride, dstX, dstY, width, height, bpp, true, &dstRect);
2816
2817
// A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to
2818
// the backbuffer. Detect this and have the framebuffermanager draw the pixels.
2819
if ((!useBufferedRendering_ && currentRenderVfb_ != dstRect.vfb) || dstRect.vfb == nullptr) {
2820
return;
2821
}
2822
2823
if (dstBuffer && !srcBuffer) {
2824
WARN_LOG_ONCE(btu, Log::G3D, "Block transfer upload %08x -> %08x (%dx%d %d,%d bpp=%d)", srcBasePtr, dstBasePtr, width, height, dstX, dstY, bpp);
2825
FlushBeforeCopy();
2826
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
2827
2828
int dstBpp = BufferFormatBytesPerPixel(dstRect.vfb->fb_format);
2829
float dstXFactor = (float)bpp / dstBpp;
2830
if (dstRect.w_bytes / bpp > dstRect.vfb->width || dstRect.h > dstRect.vfb->height) {
2831
// The buffer isn't big enough, and we have a clear hint of size. Resize.
2832
// This happens in Valkyrie Profile when uploading video at the ending.
2833
// Also happens to the CLUT framebuffer in the Burnout Dominator lens flare effect. See #16075
2834
ResizeFramebufFBO(dstRect.vfb, dstRect.w_bytes / bpp, dstRect.h, false, true);
2835
// Make sure we don't flop back and forth.
2836
dstRect.vfb->newWidth = std::max(dstRect.w_bytes / bpp, (int)dstRect.vfb->width);
2837
dstRect.vfb->newHeight = std::max(dstRect.h, (int)dstRect.vfb->height);
2838
dstRect.vfb->lastFrameNewSize = gpuStats.numFlips;
2839
// Resizing may change the viewport/etc.
2840
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
2841
}
2842
DrawPixels(dstRect.vfb, static_cast<int>(dstX * dstXFactor), dstY, srcBase, dstRect.vfb->fb_format, static_cast<int>(srcStride * dstXFactor), static_cast<int>(dstRect.w_bytes / bpp * dstXFactor), dstRect.h, RASTER_COLOR, "BlockTransferCopy_DrawPixels");
2843
SetColorUpdated(dstRect.vfb, skipDrawReason);
2844
RebindFramebuffer("RebindFramebuffer - NotifyBlockTransferAfter");
2845
}
2846
}
2847
}
2848
2849
void FramebufferManagerCommon::SetSafeSize(u16 w, u16 h) {
2850
VirtualFramebuffer *vfb = currentRenderVfb_;
2851
if (vfb) {
2852
vfb->safeWidth = std::min(vfb->bufferWidth, std::max(vfb->safeWidth, w));
2853
vfb->safeHeight = std::min(vfb->bufferHeight, std::max(vfb->safeHeight, h));
2854
}
2855
}
2856
2857
void FramebufferManagerCommon::NotifyDisplayResized() {
2858
pixelWidth_ = PSP_CoreParameter().pixelWidth;
2859
pixelHeight_ = PSP_CoreParameter().pixelHeight;
2860
presentation_->UpdateDisplaySize(pixelWidth_, pixelHeight_);
2861
2862
INFO_LOG(Log::G3D, "FramebufferManagerCommon::NotifyDisplayResized: %dx%d", pixelWidth_, pixelHeight_);
2863
2864
// No drawing is allowed here. This includes anything that might potentially touch a command buffer, like creating images!
2865
// So we need to defer the post processing initialization.
2866
updatePostShaders_ = true;
2867
}
2868
2869
void FramebufferManagerCommon::NotifyRenderResized(const DisplayLayoutConfig &config, int msaaLevel) {
2870
gstate_c.skipDrawReason &= ~SKIPDRAW_NON_DISPLAYED_FB;
2871
2872
int w, h, scaleFactor;
2873
presentation_->CalculateRenderResolution(config, &w, &h, &scaleFactor, &postShaderIsUpscalingFilter_, &postShaderIsSupersampling_);
2874
PSP_CoreParameter().renderWidth = w;
2875
PSP_CoreParameter().renderHeight = h;
2876
PSP_CoreParameter().renderScaleFactor = scaleFactor;
2877
2878
if (UpdateRenderSize(msaaLevel)) {
2879
draw_->StopThreads();
2880
DestroyAllFBOs();
2881
draw_->StartThreads();
2882
}
2883
2884
// No drawing is allowed here. This includes anything that might potentially touch a command buffer, like creating images!
2885
// So we need to defer the post processing initialization.
2886
updatePostShaders_ = true;
2887
}
2888
2889
void FramebufferManagerCommon::NotifyConfigChanged() {
2890
updatePostShaders_ = true;
2891
}
2892
2893
void FramebufferManagerCommon::DestroyAllFBOs() {
2894
DiscardFramebufferCopy();
2895
currentRenderVfb_ = nullptr;
2896
displayFramebuf_ = nullptr;
2897
prevDisplayFramebuf_ = nullptr;
2898
prevPrevDisplayFramebuf_ = nullptr;
2899
2900
for (VirtualFramebuffer *vfb : vfbs_) {
2901
INFO_LOG(Log::FrameBuf, "Destroying FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format);
2902
DestroyFramebuf(vfb);
2903
}
2904
vfbs_.clear();
2905
2906
for (VirtualFramebuffer *vfb : bvfbs_) {
2907
DestroyFramebuf(vfb);
2908
}
2909
bvfbs_.clear();
2910
2911
for (auto &tempFB : tempFBOs_) {
2912
tempFB.second.fbo->Release();
2913
}
2914
tempFBOs_.clear();
2915
2916
for (auto &iter : fbosToDelete_) {
2917
iter->Release();
2918
}
2919
fbosToDelete_.clear();
2920
2921
for (auto &iter : drawPixelsCache_) {
2922
iter.tex->Release();
2923
}
2924
drawPixelsCache_.clear();
2925
}
2926
2927
static const char *TempFBOReasonToString(TempFBO reason) {
2928
switch (reason) {
2929
case TempFBO::DEPAL: return "depal";
2930
case TempFBO::BLIT: return "blit";
2931
case TempFBO::COPY: return "copy";
2932
case TempFBO::STENCIL: return "stencil";
2933
default: break;
2934
}
2935
return "";
2936
}
2937
2938
Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u16 h) {
2939
u64 key = ((u64)reason << 48) | ((u32)w << 16) | h;
2940
auto it = tempFBOs_.find(key);
2941
if (it != tempFBOs_.end()) {
2942
it->second.last_frame_used = gpuStats.numFlips;
2943
return it->second.fbo;
2944
}
2945
2946
bool z_stencil = reason == TempFBO::STENCIL;
2947
char name[128];
2948
snprintf(name, sizeof(name), "tempfbo_%s_%dx%d", TempFBOReasonToString(reason), w / renderScaleFactor_, h / renderScaleFactor_);
2949
2950
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, GetFramebufferLayers(), 0, z_stencil, name });
2951
if (!fbo) {
2952
return nullptr;
2953
}
2954
2955
const TempFBOInfo info = { fbo, gpuStats.numFlips };
2956
tempFBOs_[key] = info;
2957
return fbo;
2958
}
2959
2960
void FramebufferManagerCommon::UpdateFramebufUsage(VirtualFramebuffer *vfb) const {
2961
auto checkFlag = [&](u16 flag, int last_frame) {
2962
if (vfb->usageFlags & flag) {
2963
const int age = frameLastFramebufUsed_ - last_frame;
2964
if (age > FBO_OLD_USAGE_FLAG) {
2965
vfb->usageFlags &= ~flag;
2966
}
2967
}
2968
};
2969
2970
checkFlag(FB_USAGE_DISPLAYED_FRAMEBUFFER, vfb->last_frame_displayed);
2971
checkFlag(FB_USAGE_TEXTURE, vfb->last_frame_used);
2972
checkFlag(FB_USAGE_RENDER_COLOR, vfb->last_frame_render);
2973
checkFlag(FB_USAGE_CLUT, vfb->last_frame_clut);
2974
}
2975
2976
void FramebufferManagerCommon::ClearAllDepthBuffers() {
2977
for (auto vfb : vfbs_) {
2978
vfb->usageFlags |= FB_USAGE_INVALIDATE_DEPTH;
2979
}
2980
}
2981
2982
// We might also want to implement an asynchronous callback-style version of this. Would probably
2983
// only be possible to implement optimally on Vulkan, but on GL and D3D11 we could do pixel buffers
2984
// and read on the next frame, then call the callback.
2985
//
2986
// The main use cases for this are:
2987
// * GE debugging(in practice async will not matter because it will stall anyway.)
2988
// * Video file recording(would probably be great if it was async.)
2989
// * Screenshots(benefit slightly from async.)
2990
// * Save state screenshots(could probably be async but need to manage the stall.)
2991
bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxScaleFactor) {
2992
VirtualFramebuffer *vfb = currentRenderVfb_;
2993
if (!vfb || vfb->fb_address != fb_address) {
2994
vfb = ResolveVFB(fb_address, fb_stride, format);
2995
}
2996
2997
if (!vfb) {
2998
if (!Memory::IsValidAddress(fb_address))
2999
return false;
3000
// If there's no vfb and we're drawing there, must be memory?
3001
buffer = GPUDebugBuffer(Memory::GetPointerWriteUnchecked(fb_address), fb_stride, 512, format);
3002
return true;
3003
}
3004
3005
int w = vfb->renderWidth, h = vfb->renderHeight;
3006
3007
Draw::Framebuffer *bound = nullptr;
3008
3009
if (vfb->fbo) {
3010
if (maxScaleFactor > 0 && vfb->renderWidth > vfb->width * maxScaleFactor) {
3011
w = vfb->width * maxScaleFactor;
3012
h = vfb->height * maxScaleFactor;
3013
3014
Draw::Framebuffer *tempFBO = GetTempFBO(TempFBO::COPY, w, h);
3015
VirtualFramebuffer tempVfb = *vfb;
3016
tempVfb.fbo = tempFBO;
3017
tempVfb.bufferWidth = vfb->width;
3018
tempVfb.bufferHeight = vfb->height;
3019
tempVfb.renderWidth = w;
3020
tempVfb.renderHeight = h;
3021
tempVfb.renderScaleFactor = maxScaleFactor;
3022
BlitFramebuffer(&tempVfb, 0, 0, vfb, 0, 0, vfb->width, vfb->height, 0, RASTER_COLOR, "Blit_GetFramebuffer");
3023
3024
bound = tempFBO;
3025
} else {
3026
bound = vfb->fbo;
3027
}
3028
}
3029
3030
if (!useBufferedRendering_) {
3031
// Safety check.
3032
w = std::min(w, PSP_CoreParameter().pixelWidth);
3033
h = std::min(h, PSP_CoreParameter().pixelHeight);
3034
}
3035
3036
// TODO: Maybe should handle flipY inside CopyFramebufferToMemorySync somehow?
3037
bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
3038
buffer.Allocate(w, h, GE_FORMAT_8888, flipY);
3039
bool retval = draw_->CopyFramebufferToMemory(bound, Draw::Aspect::COLOR_BIT, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetFramebuffer");
3040
3041
// Don't need to increment gpu stats for readback count here, this is a debugger-only function.
3042
3043
// After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe.
3044
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
3045
// We may have blitted to a temp FBO.
3046
RebindFramebuffer("RebindFramebuffer - GetFramebuffer");
3047
return retval;
3048
}
3049
3050
bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) {
3051
VirtualFramebuffer *vfb = currentRenderVfb_;
3052
if (!vfb) {
3053
vfb = GetVFBAt(fb_address);
3054
}
3055
3056
if (!vfb) {
3057
if (!Memory::IsValidAddress(z_address))
3058
return false;
3059
// If there's no vfb and we're drawing there, must be memory?
3060
buffer = GPUDebugBuffer(Memory::GetPointerWriteUnchecked(z_address), z_stride, 512, GPU_DBG_FORMAT_16BIT);
3061
return true;
3062
}
3063
3064
int w = vfb->renderWidth;
3065
int h = vfb->renderHeight;
3066
if (!useBufferedRendering_) {
3067
// Safety check.
3068
w = std::min(w, PSP_CoreParameter().pixelWidth);
3069
h = std::min(h, PSP_CoreParameter().pixelHeight);
3070
}
3071
3072
bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
3073
3074
// Old code
3075
if (gstate_c.Use(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) {
3076
buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT_DIV_256, flipY);
3077
} else {
3078
buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT, flipY);
3079
}
3080
// No need to free on failure, that's the caller's job (it likely will reuse a buffer.)
3081
bool retval = draw_->CopyFramebufferToMemory(vfb->fbo, Draw::Aspect::DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetDepthBuffer");
3082
if (!retval) {
3083
// Try ReadbackDepthbufferSync, in case GLES.
3084
buffer.Allocate(w, h, GPU_DBG_FORMAT_16BIT, flipY);
3085
retval = ReadbackDepthbuffer(vfb->fbo, 0, 0, w, h, (uint16_t *)buffer.GetData(), w, w, h, Draw::ReadbackMode::BLOCK);
3086
}
3087
3088
// After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe.
3089
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
3090
// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
3091
RebindFramebuffer("RebindFramebuffer - GetDepthbuffer");
3092
return retval;
3093
}
3094
3095
bool FramebufferManagerCommon::GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer) {
3096
VirtualFramebuffer *vfb = currentRenderVfb_;
3097
if (!vfb) {
3098
vfb = GetVFBAt(fb_address);
3099
}
3100
3101
if (!vfb) {
3102
if (!Memory::IsValidAddress(fb_address))
3103
return false;
3104
// If there's no vfb and we're drawing there, must be memory?
3105
// TODO: Actually get the stencil.
3106
buffer = GPUDebugBuffer(Memory::GetPointerWrite(fb_address), fb_stride, 512, GPU_DBG_FORMAT_8888);
3107
return true;
3108
}
3109
3110
int w = vfb->renderWidth;
3111
int h = vfb->renderHeight;
3112
if (!useBufferedRendering_) {
3113
// Safety check.
3114
w = std::min(w, PSP_CoreParameter().pixelWidth);
3115
h = std::min(h, PSP_CoreParameter().pixelHeight);
3116
}
3117
3118
bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
3119
// No need to free on failure, the caller/destructor will do that. Usually this is a reused buffer, anyway.
3120
buffer.Allocate(w, h, GPU_DBG_FORMAT_8BIT, flipY);
3121
bool retval = draw_->CopyFramebufferToMemory(vfb->fbo, Draw::Aspect::STENCIL_BIT, 0, 0, w,h, Draw::DataFormat::S8, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetStencilbuffer");
3122
if (!retval) {
3123
retval = ReadbackStencilbuffer(vfb->fbo, 0, 0, w, h, buffer.GetData(), w, Draw::ReadbackMode::BLOCK);
3124
}
3125
// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
3126
RebindFramebuffer("RebindFramebuffer - GetStencilbuffer");
3127
return retval;
3128
}
3129
3130
bool GetOutputFramebuffer(Draw::DrawContext *draw, GPUDebugBuffer &buffer) {
3131
int w, h;
3132
draw->GetFramebufferDimensions(nullptr, &w, &h);
3133
Draw::DataFormat fmt = draw->PreferredFramebufferReadbackFormat(nullptr);
3134
// Ignore preferred formats other than BGRA.
3135
_dbg_assert_(fmt == Draw::DataFormat::B8G8R8A8_UNORM || fmt == Draw::DataFormat::R8G8B8A8_UNORM);
3136
if (fmt != Draw::DataFormat::B8G8R8A8_UNORM)
3137
fmt = Draw::DataFormat::R8G8B8A8_UNORM;
3138
3139
bool flipped = g_Config.iGPUBackend == (int)GPUBackend::OPENGL;
3140
3141
buffer.Allocate(w, h, fmt == Draw::DataFormat::R8G8B8A8_UNORM ? GPU_DBG_FORMAT_8888 : GPU_DBG_FORMAT_8888_BGRA, flipped);
3142
return draw->CopyFramebufferToMemory(nullptr, Draw::Aspect::COLOR_BIT, 0, 0, w, h, fmt, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetOutputFramebuffer");
3143
}
3144
3145
bool FramebufferManagerCommon::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
3146
bool retval = ::GetOutputFramebuffer(draw_, buffer);
3147
// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
3148
RebindFramebuffer("RebindFramebuffer - GetOutputFramebuffer");
3149
return retval;
3150
}
3151
3152
// This reads a channel of a framebuffer into emulated PSP VRAM, taking care of scaling down as needed.
3153
//
3154
// Color conversion is currently done on CPU but should theoretically be done on GPU.
3155
// (Except using the GPU might cause problems because of various implementations'
3156
// dithering behavior and games that expect exact colors like Danganronpa, so we
3157
// can't entirely be rid of the CPU path.) -- unknown
3158
void FramebufferManagerCommon::ReadbackFramebuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel, Draw::ReadbackMode mode) {
3159
if (w <= 0 || h <= 0) {
3160
ERROR_LOG(Log::FrameBuf, "Bad inputs to ReadbackFramebufferSync: %d %d %d %d", x, y, w, h);
3161
return;
3162
}
3163
3164
// Note that ReadbackDepthBufferSync can stretch on its own while converting data format, so we don't need to downscale in that case.
3165
if (vfb->renderScaleFactor == 1 || channel == RASTER_DEPTH) {
3166
// No need to stretch-blit
3167
} else {
3168
VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb, channel);
3169
if (nvfb) {
3170
BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, channel, "Blit_ReadFramebufferToMemory");
3171
vfb = nvfb;
3172
}
3173
}
3174
3175
const u32 fb_address = channel == RASTER_COLOR ? vfb->fb_address : vfb->z_address;
3176
3177
Draw::DataFormat destFormat = channel == RASTER_COLOR ? GEFormatToThin3D(vfb->fb_format) : GEFormatToThin3D(GE_FORMAT_DEPTH16);
3178
const int dstBpp = (int)DataFormatSizeInBytes(destFormat);
3179
3180
int stride = channel == RASTER_COLOR ? vfb->fb_stride : vfb->z_stride;
3181
3182
const int dstByteOffset = (y * stride + x) * dstBpp;
3183
// Leave the gap between the end of the last line and the full stride.
3184
// This is only used for the NotifyMemInfo range.
3185
const int dstSize = ((h - 1) * stride + w) * dstBpp;
3186
3187
if (!Memory::IsValidRange(fb_address + dstByteOffset, dstSize)) {
3188
ERROR_LOG_REPORT(Log::G3D, "ReadbackFramebufferSync would write outside of memory, ignoring");
3189
return;
3190
}
3191
3192
u8 *destPtr = Memory::GetPointerWriteUnchecked(fb_address + dstByteOffset);
3193
3194
// We always need to convert from the framebuffer native format.
3195
// Right now that's always 8888.
3196
DEBUG_LOG(Log::FrameBuf, "Reading framebuffer to mem, fb_address = %08x, ptr=%p", fb_address, destPtr);
3197
3198
if (channel == RASTER_DEPTH) {
3199
_assert_msg_(vfb && vfb->z_address != 0 && vfb->z_stride != 0, "Depth buffer invalid");
3200
ReadbackDepthbuffer(vfb->fbo,
3201
x * vfb->renderScaleFactor, y * vfb->renderScaleFactor,
3202
w * vfb->renderScaleFactor, h * vfb->renderScaleFactor, (uint16_t *)destPtr, stride, w, h, mode);
3203
} else {
3204
draw_->CopyFramebufferToMemory(vfb->fbo, channel == RASTER_COLOR ? Draw::Aspect::COLOR_BIT : Draw::Aspect::DEPTH_BIT, x, y, w, h, destFormat, destPtr, stride, mode, "ReadbackFramebufferSync");
3205
}
3206
3207
char tag[128];
3208
size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->fb_format));
3209
NotifyMemInfo(MemBlockFlags::WRITE, fb_address + dstByteOffset, dstSize, tag, len);
3210
3211
if (mode == Draw::ReadbackMode::BLOCK) {
3212
gpuStats.numBlockingReadbacks++;
3213
} else {
3214
gpuStats.numReadbacks++;
3215
}
3216
}
3217
3218
bool FramebufferManagerCommon::ReadbackStencilbuffer(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride, Draw::ReadbackMode mode) {
3219
return draw_->CopyFramebufferToMemory(fbo, Draw::Aspect::DEPTH_BIT, x, y, w, h, Draw::DataFormat::S8, pixels, pixelsStride, mode, "ReadbackStencilbufferSync");
3220
}
3221
3222
void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel, Draw::ReadbackMode mode) {
3223
if (!vfb || !vfb->fbo) {
3224
return;
3225
}
3226
3227
// Clamp to bufferWidth. Sometimes block transfers can cause this to hit.
3228
if (x + w >= vfb->bufferWidth) {
3229
w = vfb->bufferWidth - x;
3230
}
3231
if (gameUsesSequentialCopies_) {
3232
// Ignore the x/y/etc., read the entire thing. See below.
3233
x = 0;
3234
y = 0;
3235
w = vfb->width;
3236
h = vfb->height;
3237
vfb->memoryUpdated = true;
3238
vfb->usageFlags |= FB_USAGE_DOWNLOAD;
3239
} else if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
3240
// Mark it as fully downloaded until next render to it.
3241
if (channel == RASTER_COLOR)
3242
vfb->memoryUpdated = true;
3243
vfb->usageFlags |= FB_USAGE_DOWNLOAD;
3244
} else {
3245
// Let's try to set the flag eventually, if the game copies a lot.
3246
// Some games (like Grand Knights History) copy subranges very frequently.
3247
const static int FREQUENT_SEQUENTIAL_COPIES = 3;
3248
static int frameLastCopy = 0;
3249
static u32 bufferLastCopy = 0;
3250
static int copiesThisFrame = 0;
3251
if (frameLastCopy != gpuStats.numFlips || bufferLastCopy != vfb->fb_address) {
3252
frameLastCopy = gpuStats.numFlips;
3253
bufferLastCopy = vfb->fb_address;
3254
copiesThisFrame = 0;
3255
}
3256
if (++copiesThisFrame > FREQUENT_SEQUENTIAL_COPIES) {
3257
gameUsesSequentialCopies_ = true;
3258
}
3259
}
3260
3261
// This handles any required stretching internally.
3262
ReadbackFramebuffer(vfb, x, y, w, h, channel, mode);
3263
3264
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
3265
textureCache_->ForgetLastTexture();
3266
RebindFramebuffer("RebindFramebuffer - ReadFramebufferToMemory");
3267
}
3268
3269
void FramebufferManagerCommon::FlushBeforeCopy() {
3270
drawEngine_->FlushQueuedDepth();
3271
// Flush anything not yet drawn before blitting, downloading, or uploading.
3272
// This might be a stalled list, or unflushed before a block transfer, etc.
3273
// Only bother if any draws are pending.
3274
if (drawEngine_->GetNumDrawCalls() > 0) {
3275
// TODO: It's really bad that we are calling SetRenderFramebuffer here with
3276
// all the irrelevant state checking it'll use to decide what to do. Should
3277
// do something more focused here.
3278
bool changed;
3279
SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
3280
drawEngine_->Flush();
3281
}
3282
}
3283
3284
// TODO: Replace with with depal, reading the palette from the texture on the GPU directly.
3285
void FramebufferManagerCommon::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) {
3286
VirtualFramebuffer *vfb = GetVFBAt(fb_address);
3287
if (vfb && vfb->fb_stride != 0) {
3288
const u32 bpp = BufferFormatBytesPerPixel(vfb->fb_format);
3289
int x = 0;
3290
int y = 0;
3291
int pixels = loadBytes / bpp;
3292
// The height will be 1 for each stride or part thereof.
3293
int w = std::min(pixels % vfb->fb_stride, (int)vfb->width);
3294
int h = std::min((pixels + vfb->fb_stride - 1) / vfb->fb_stride, (int)vfb->height);
3295
3296
if (w == 0 || h > 1) {
3297
// Exactly aligned, or more than one row.
3298
w = std::min(vfb->fb_stride, vfb->width);
3299
}
3300
3301
// We might still have a pending draw to the fb in question, flush if so.
3302
FlushBeforeCopy();
3303
3304
// No need to download if we already have it.
3305
if (w > 0 && h > 0 && !vfb->memoryUpdated && vfb->clutUpdatedBytes < loadBytes) {
3306
// We intentionally don't try to optimize into a full download here - we don't want to over download.
3307
3308
// CLUT framebuffers are often incorrectly estimated in size.
3309
if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
3310
vfb->memoryUpdated = true;
3311
}
3312
vfb->clutUpdatedBytes = loadBytes;
3313
3314
// This function now handles scaling down internally.
3315
ReadbackFramebuffer(vfb, x, y, w, h, RASTER_COLOR, Draw::ReadbackMode::BLOCK);
3316
3317
textureCache_->ForgetLastTexture();
3318
RebindFramebuffer("RebindFramebuffer - DownloadFramebufferForClut");
3319
}
3320
}
3321
}
3322
3323
void FramebufferManagerCommon::RebindFramebuffer(const char *tag) {
3324
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
3325
shaderManager_->DirtyLastShader();
3326
// Needed for D3D11 to run validation clean. I don't think it's actually an issue.
3327
// textureCache_->ForgetLastTexture();
3328
if (currentRenderVfb_ && currentRenderVfb_->fbo) {
3329
draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag);
3330
} else {
3331
// This can happen (like it does in Parappa) when a frame starts with copies instead of rendering.
3332
// Let's do nothing and assume it'll take care of itself.
3333
}
3334
}
3335
3336
std::vector<const VirtualFramebuffer *> FramebufferManagerCommon::GetFramebufferList() const {
3337
std::vector<const VirtualFramebuffer *> list;
3338
for (auto vfb : vfbs_) {
3339
list.push_back(vfb);
3340
}
3341
return list;
3342
}
3343
3344
template <typename T>
3345
static void DoRelease(T *&obj) {
3346
if (obj)
3347
obj->Release();
3348
obj = nullptr;
3349
}
3350
3351
void FramebufferManagerCommon::ReleasePipelines() {
3352
for (int i = 0; i < ARRAY_SIZE(reinterpretFromTo_); i++) {
3353
for (int j = 0; j < ARRAY_SIZE(reinterpretFromTo_); j++) {
3354
DoRelease(reinterpretFromTo_[i][j]);
3355
}
3356
}
3357
DoRelease(stencilWriteSampler_);
3358
DoRelease(stencilWritePipeline_);
3359
DoRelease(stencilReadbackSampler_);
3360
DoRelease(stencilReadbackPipeline_);
3361
DoRelease(depthReadbackSampler_);
3362
DoRelease(depthReadbackPipeline_);
3363
DoRelease(draw2DPipelineCopyColor_);
3364
DoRelease(draw2DPipelineColorRect2Lin_);
3365
DoRelease(draw2DPipelineCopyDepth_);
3366
DoRelease(draw2DPipelineEncodeDepth_);
3367
DoRelease(draw2DPipeline565ToDepth_);
3368
DoRelease(draw2DPipeline565ToDepthDeswizzle_);
3369
}
3370
3371
void FramebufferManagerCommon::DeviceLost() {
3372
DestroyAllFBOs();
3373
3374
presentation_->DeviceLost();
3375
draw2D_.DeviceLost();
3376
3377
ReleasePipelines();
3378
3379
draw_ = nullptr;
3380
}
3381
3382
void FramebufferManagerCommon::DeviceRestore(Draw::DrawContext *draw) {
3383
draw_ = draw;
3384
draw2D_.DeviceRestore(draw_);
3385
presentation_->DeviceRestore(draw_);
3386
}
3387
3388
void FramebufferManagerCommon::DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags) {
3389
// Will be drawn as a strip.
3390
Draw2DVertex coord[4] = {
3391
{x, y, u0, v0},
3392
{x + w, y, u1, v0},
3393
{x + w, y + h, u1, v1},
3394
{x, y + h, u0, v1},
3395
};
3396
3397
if (uvRotation != ROTATION_LOCKED_HORIZONTAL) {
3398
float temp[8];
3399
int rotation = 0;
3400
switch (uvRotation) {
3401
case ROTATION_LOCKED_HORIZONTAL180: rotation = 2; break;
3402
case ROTATION_LOCKED_VERTICAL: rotation = 1; break;
3403
case ROTATION_LOCKED_VERTICAL180: rotation = 3; break;
3404
}
3405
for (int i = 0; i < 4; i++) {
3406
temp[i * 2] = coord[((i + rotation) & 3)].u;
3407
temp[i * 2 + 1] = coord[((i + rotation) & 3)].v;
3408
}
3409
3410
for (int i = 0; i < 4; i++) {
3411
coord[i].u = temp[i * 2];
3412
coord[i].v = temp[i * 2 + 1];
3413
}
3414
}
3415
3416
const float invDestW = 2.0f / destW;
3417
const float invDestH = 2.0f / destH;
3418
for (int i = 0; i < 4; i++) {
3419
coord[i].x = coord[i].x * invDestW - 1.0f;
3420
coord[i].y = coord[i].y * invDestH - 1.0f;
3421
}
3422
3423
if ((flags & DRAWTEX_TO_BACKBUFFER) && g_display.rotation != DisplayRotation::ROTATE_0) {
3424
for (int i = 0; i < 4; i++) {
3425
// backwards notation, should fix that...
3426
Lin::Vec3 pos = Lin::Vec3(coord[i].x, coord[i].y, 0.0);
3427
pos = pos * g_display.rot_matrix;
3428
coord[i].x = pos.x;
3429
coord[i].y = pos.y;
3430
}
3431
}
3432
3433
// Rearrange to strip form.
3434
std::swap(coord[2], coord[3]);
3435
3436
draw2D_.DrawStrip2D(nullptr, coord, 4, (flags & DRAWTEX_LINEAR) != 0, Get2DPipeline((flags & DRAWTEX_DEPTH) ? DRAW2D_ENCODE_R16_TO_DEPTH : DRAW2D_COPY_COLOR));
3437
3438
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
3439
}
3440
3441
void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, RasterChannel channel, const char *tag) {
3442
if (!dst->fbo || !src->fbo || !useBufferedRendering_) {
3443
// This can happen if they recently switched from non-buffered.
3444
if (useBufferedRendering_) {
3445
// Just bind the back buffer for rendering, forget about doing anything else as we're in a weird state.
3446
draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "BlitFramebuffer");
3447
}
3448
return;
3449
}
3450
3451
if (channel == RASTER_DEPTH && !draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
3452
// Can't do anything :(
3453
return;
3454
}
3455
3456
// Perform a little bit of clipping first.
3457
// Block transfer coords are unsigned so I don't think we need to clip on the left side.. Although there are
3458
// other uses for BlitFramebuffer.
3459
if (dstX + w > dst->bufferWidth) {
3460
w -= dstX + w - dst->bufferWidth;
3461
}
3462
if (dstY + h > dst->bufferHeight) {
3463
h -= dstY + h - dst->bufferHeight;
3464
}
3465
if (srcX + w > src->bufferWidth) {
3466
w -= srcX + w - src->bufferWidth;
3467
}
3468
if (srcY + h > src->bufferHeight) {
3469
h -= srcY + h - src->bufferHeight;
3470
}
3471
3472
if (w <= 0 || h <= 0) {
3473
// The whole rectangle got clipped.
3474
return;
3475
}
3476
3477
bool useBlit = channel == RASTER_COLOR ? draw_->GetDeviceCaps().framebufferBlitSupported : false;
3478
bool useCopy = channel == RASTER_COLOR ? draw_->GetDeviceCaps().framebufferCopySupported : false;
3479
if (src != dst && (dst == currentRenderVfb_ || dst->fbo->MultiSampleLevel() != 0 || src->fbo->MultiSampleLevel() != 0)) {
3480
// If already bound, using either a blit or a copy is unlikely to be an optimization.
3481
// So we're gonna use a raster draw instead. Also multisampling has problems with copies currently.
3482
useBlit = false;
3483
useCopy = false;
3484
}
3485
3486
float srcXFactor = src->renderScaleFactor;
3487
float srcYFactor = src->renderScaleFactor;
3488
const int srcBpp = BufferFormatBytesPerPixel(src->Format(channel));
3489
if (srcBpp != bpp && bpp != 0) {
3490
// If we do this, we're kinda in nonsense territory since the actual formats won't match (unless intentionally blitting black or white).
3491
srcXFactor = (srcXFactor * bpp) / srcBpp;
3492
}
3493
int srcX1 = srcX * srcXFactor;
3494
int srcX2 = (srcX + w) * srcXFactor;
3495
int srcY1 = srcY * srcYFactor;
3496
int srcY2 = (srcY + h) * srcYFactor;
3497
3498
float dstXFactor = dst->renderScaleFactor;
3499
float dstYFactor = dst->renderScaleFactor;
3500
const int dstBpp = BufferFormatBytesPerPixel(dst->Format(channel));
3501
if (dstBpp != bpp && bpp != 0) {
3502
// If we do this, we're kinda in nonsense territory since the actual formats won't match (unless intentionally blitting black or white).
3503
dstXFactor = (dstXFactor * bpp) / dstBpp;
3504
}
3505
int dstX1 = dstX * dstXFactor;
3506
int dstX2 = (dstX + w) * dstXFactor;
3507
int dstY1 = dstY * dstYFactor;
3508
int dstY2 = (dstY + h) * dstYFactor;
3509
3510
if (src == dst && srcX == dstX && srcY == dstY) {
3511
// Let's just skip a copy where the destination is equal to the source.
3512
WARN_LOG_REPORT_ONCE(blitSame, Log::G3D, "Skipped blit with equal dst and src");
3513
return;
3514
}
3515
3516
if (useCopy) {
3517
// glBlitFramebuffer can clip, but glCopyImageSubData is more restricted.
3518
// In case the src goes outside, we just skip the optimization in that case.
3519
const bool sameSize = dstX2 - dstX1 == srcX2 - srcX1 && dstY2 - dstY1 == srcY2 - srcY1;
3520
const bool srcInsideBounds = srcX2 <= src->renderWidth && srcY2 <= src->renderHeight;
3521
const bool dstInsideBounds = dstX2 <= dst->renderWidth && dstY2 <= dst->renderHeight;
3522
const bool xOverlap = src == dst && srcX2 > dstX1 && srcX1 < dstX2;
3523
const bool yOverlap = src == dst && srcY2 > dstY1 && srcY1 < dstY2;
3524
if (sameSize && srcInsideBounds && dstInsideBounds && !(xOverlap && yOverlap)) {
3525
draw_->CopyFramebufferImage(src->fbo, 0, srcX1, srcY1, 0, dst->fbo, 0, dstX1, dstY1, 0, dstX2 - dstX1, dstY2 - dstY1, 1,
3526
channel == RASTER_COLOR ? Draw::Aspect::COLOR_BIT : Draw::Aspect::DEPTH_BIT, tag);
3527
return;
3528
}
3529
}
3530
3531
if (useBlit) {
3532
draw_->BlitFramebuffer(src->fbo, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2,
3533
channel == RASTER_COLOR ? Draw::Aspect::COLOR_BIT : Draw::Aspect::DEPTH_BIT, Draw::FB_BLIT_NEAREST, tag);
3534
} else {
3535
Draw2DPipeline *pipeline = Get2DPipeline(channel == RASTER_COLOR ? DRAW2D_COPY_COLOR : DRAW2D_COPY_DEPTH);
3536
Draw::Framebuffer *srcFBO = src->fbo;
3537
if (src == dst) {
3538
Draw::Framebuffer *tempFBO = GetTempFBO(TempFBO::BLIT, src->renderWidth, src->renderHeight);
3539
// We need to copy to the temp using only the source coordinates, since those are the ones we read in the next blit.
3540
BlitUsingRaster(src->fbo, srcX1, srcY1, srcX2, srcY2, tempFBO, srcX1, srcY1, srcX2, srcY2, false, dst->renderScaleFactor, pipeline, tag);
3541
srcFBO = tempFBO;
3542
}
3543
BlitUsingRaster(srcFBO, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2, false, dst->renderScaleFactor, pipeline, tag);
3544
}
3545
3546
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
3547
3548
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
3549
}
3550
3551
// The input is raw pixel coordinates, scale not taken into account.
3552
void FramebufferManagerCommon::BlitUsingRaster(
3553
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2,
3554
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2,
3555
bool linearFilter,
3556
int scaleFactor,
3557
Draw2DPipeline *pipeline, const char *tag) {
3558
3559
_dbg_assert_(src);
3560
_dbg_assert_(dest);
3561
_dbg_assert_(pipeline);
3562
3563
if (!src || !dest || !pipeline) {
3564
// Nothing we can do, other than trying to catch it in debug with the asserts above.
3565
return;
3566
}
3567
3568
if (pipeline->info.writeChannel == RASTER_DEPTH) {
3569
_dbg_assert_(draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported);
3570
}
3571
3572
int destW, destH, srcW, srcH;
3573
draw_->GetFramebufferDimensions(src, &srcW, &srcH);
3574
draw_->GetFramebufferDimensions(dest, &destW, &destH);
3575
3576
// Unbind the texture first to avoid the D3D11 hazard check (can't set render target to things bound as textures and vice versa, not even temporarily).
3577
draw_->BindTexture(0, nullptr);
3578
// This will get optimized away in case it's already bound (in VK and GL at least..)
3579
draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag ? tag : "BlitUsingRaster");
3580
draw_->BindFramebufferAsTexture(src, 0, pipeline->info.readChannel == RASTER_COLOR ? Draw::Aspect::COLOR_BIT : Draw::Aspect::DEPTH_BIT, Draw::ALL_LAYERS);
3581
3582
if (destX1 == 0.0f && destY1 == 0.0f && destX2 >= destW && destY2 >= destH) {
3583
// We overwrite the whole channel of the framebuffer, so we can invalidate the current contents.
3584
draw_->InvalidateFramebuffer(Draw::FB_INVALIDATION_LOAD, pipeline->info.writeChannel == RASTER_COLOR ? Draw::Aspect::COLOR_BIT : Draw::Aspect::DEPTH_BIT);
3585
}
3586
3587
Draw::Viewport viewport{ 0.0f, 0.0f, (float)dest->Width(), (float)dest->Height(), 0.0f, 1.0f };
3588
draw_->SetViewport(viewport);
3589
draw_->SetScissorRect(0, 0, (int)dest->Width(), (int)dest->Height());
3590
3591
draw2D_.Blit(pipeline, srcX1, srcY1, srcX2, srcY2, destX1, destY1, destX2, destY2, (float)srcW, (float)srcH, (float)destW, (float)destH, linearFilter, scaleFactor);
3592
3593
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
3594
}
3595
3596
int FramebufferManagerCommon::GetFramebufferLayers() const {
3597
int layers = 1;
3598
if (gstate_c.Use(GPU_USE_SINGLE_PASS_STEREO)) {
3599
layers = 2;
3600
}
3601
return layers;
3602
}
3603
3604
VirtualFramebuffer *FramebufferManagerCommon::ResolveFramebufferColorToFormat(VirtualFramebuffer *src, GEBufferFormat newFormat) {
3605
// Look for an identical framebuffer with the new format
3606
_dbg_assert_(src->fb_format != newFormat);
3607
3608
VirtualFramebuffer *vfb = nullptr;
3609
for (auto dest : vfbs_) {
3610
if (dest == src) {
3611
continue;
3612
}
3613
3614
// Sanity check for things that shouldn't exist.
3615
if (dest->fb_address == src->fb_address && dest->fb_format == src->fb_format && dest->fb_stride == src->fb_stride) {
3616
_dbg_assert_msg_(false, "illegal clone of src found");
3617
}
3618
3619
if (dest->fb_address == src->fb_address && dest->FbStrideInBytes() == src->FbStrideInBytes() && dest->fb_format == newFormat) {
3620
vfb = dest;
3621
break;
3622
}
3623
}
3624
3625
if (!vfb) {
3626
// Create a clone!
3627
vfb = new VirtualFramebuffer();
3628
*vfb = *src; // Copies everything, but watch out! Can't copy fbo.
3629
3630
// Adjust width by bpp.
3631
float widthFactor = (float)BufferFormatBytesPerPixel(vfb->fb_format) / (float)BufferFormatBytesPerPixel(newFormat);
3632
3633
vfb->width *= widthFactor;
3634
vfb->bufferWidth *= widthFactor;
3635
vfb->renderWidth *= widthFactor;
3636
vfb->drawnWidth *= widthFactor;
3637
vfb->newWidth *= widthFactor;
3638
vfb->safeWidth *= widthFactor;
3639
3640
vfb->fb_format = newFormat;
3641
// stride stays the same since it's in pixels.
3642
3643
WARN_LOG(Log::FrameBuf, "Creating %s clone of %08x/%08x/%s (%dx%d -> %dx%d)", GeBufferFormatToString(newFormat), src->fb_address, src->z_address, GeBufferFormatToString(src->fb_format), src->width, src->height, vfb->width, vfb->height);
3644
3645
char tag[128];
3646
FormatFramebufferName(vfb, tag, sizeof(tag));
3647
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, true, tag });
3648
vfbs_.push_back(vfb);
3649
}
3650
3651
// OK, now resolve it so we can texture from it.
3652
// This will do any necessary reinterprets.
3653
CopyToColorFromOverlappingFramebuffers(vfb);
3654
// Now we consider the resolved one the latest at the address (though really, we could make them equivalent?).
3655
vfb->colorBindSeq = GetBindSeqCount();
3656
return vfb;
3657
}
3658
3659
static void ApplyKillzoneFramebufferSplit(FramebufferHeuristicParams *params, int *drawing_width) {
3660
// Detect whether we're rendering to the margin.
3661
bool margin;
3662
if ((params->scissorRight - params->scissorLeft) == 32) {
3663
// Title screen has this easy case. It also uses non-through verts, so lucky for us that we have this.
3664
margin = true;
3665
} else if (params->scissorRight == 480) {
3666
margin = false;
3667
} else {
3668
// Go deep, look at the vertices. Killzone-specific, of course.
3669
margin = false;
3670
if ((gstate.vertType & 0xFFFFFF) == 0x00800102) { // through, u16, s16
3671
u16 *vdata = (u16 *)Memory::GetPointerUnchecked(gstate_c.vertexAddr);
3672
int v0PosU = vdata[0];
3673
int v0PosX = vdata[2];
3674
if (v0PosX >= 480 && v0PosU < 480) {
3675
// Texturing from surface, writing to margin
3676
margin = true;
3677
}
3678
}
3679
3680
// TODO: Implement this for Burnout Dominator. It has to handle self-reads inside
3681
// the margin framebuffer though, so framebuffer copies are still needed, just smaller.
3682
// It uses 0x0080019f (through, float texcoords, ABGR 8888 colors, float positions).
3683
}
3684
3685
if (margin) {
3686
gstate_c.SetCurRTOffset(-480, 0);
3687
// Modify the fb_address and z_address too to avoid matching below.
3688
params->fb_address += 480 * 4;
3689
params->z_address += 480 * 2;
3690
*drawing_width = 32;
3691
} else {
3692
gstate_c.SetCurRTOffset(0, 0);
3693
*drawing_width = 480;
3694
}
3695
}
3696
3697