CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/FramebufferManagerCommon.cpp
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include <algorithm>
19
#include <sstream>
20
#include <cmath>
21
22
#include "Common/GPU/thin3d.h"
23
#include "Common/GPU/OpenGL/GLFeatures.h"
24
#include "Common/Data/Collections/TinySet.h"
25
#include "Common/Data/Convert/ColorConv.h"
26
#include "Common/Data/Text/I18n.h"
27
#include "Common/LogReporting.h"
28
#include "Common/Math/lin/matrix4x4.h"
29
#include "Common/Math/math_util.h"
30
#include "Common/System/Display.h"
31
#include "Common/System/OSD.h"
32
#include "Common/VR/PPSSPPVR.h"
33
#include "Common/CommonTypes.h"
34
#include "Common/StringUtils.h"
35
#include "Core/Config.h"
36
#include "Core/ConfigValues.h"
37
#include "Core/Core.h"
38
#include "Core/CoreParameter.h"
39
#include "Core/Debugger/MemBlockInfo.h"
40
#include "Core/MIPS/MIPS.h"
41
#include "GPU/Common/DrawEngineCommon.h"
42
#include "GPU/Common/FramebufferManagerCommon.h"
43
#include "GPU/Common/PostShader.h"
44
#include "GPU/Common/PresentationCommon.h"
45
#include "GPU/Common/TextureCacheCommon.h"
46
#include "GPU/Common/ReinterpretFramebuffer.h"
47
#include "GPU/Debugger/Debugger.h"
48
#include "GPU/Debugger/Record.h"
49
#include "GPU/Debugger/Stepping.h"
50
#include "GPU/GPUInterface.h"
51
#include "GPU/GPUState.h"
52
53
static size_t FormatFramebufferName(const VirtualFramebuffer *vfb, char *tag, size_t len) {
54
return snprintf(tag, len, "FB_%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, vfb->bufferWidth, vfb->bufferHeight, GeBufferFormatToString(vfb->fb_format));
55
}
56
57
FramebufferManagerCommon::FramebufferManagerCommon(Draw::DrawContext *draw)
58
: draw_(draw), draw2D_(draw_) {
59
presentation_ = new PresentationCommon(draw);
60
}
61
62
FramebufferManagerCommon::~FramebufferManagerCommon() {
63
DeviceLost();
64
65
DecimateFBOs();
66
for (auto vfb : vfbs_) {
67
DestroyFramebuf(vfb);
68
}
69
vfbs_.clear();
70
71
for (auto &tempFB : tempFBOs_) {
72
tempFB.second.fbo->Release();
73
}
74
tempFBOs_.clear();
75
76
// Do the same for ReadFramebuffersToMemory's VFBs
77
for (auto vfb : bvfbs_) {
78
DestroyFramebuf(vfb);
79
}
80
bvfbs_.clear();
81
82
delete presentation_;
83
delete[] convBuf_;
84
}
85
86
void FramebufferManagerCommon::Init(int msaaLevel) {
87
// We may need to override the render size if the shader is upscaling or SSAA.
88
NotifyDisplayResized();
89
NotifyRenderResized(msaaLevel);
90
}
91
92
bool FramebufferManagerCommon::UpdateRenderSize(int msaaLevel) {
93
const bool newRender = renderWidth_ != (float)PSP_CoreParameter().renderWidth || renderHeight_ != (float)PSP_CoreParameter().renderHeight || msaaLevel_ != msaaLevel;
94
95
int effectiveBloomHack = g_Config.iBloomHack;
96
if (PSP_CoreParameter().compat.flags().ForceLowerResolutionForEffectsOn) {
97
effectiveBloomHack = 3;
98
} else if (PSP_CoreParameter().compat.flags().ForceLowerResolutionForEffectsOff) {
99
effectiveBloomHack = 0;
100
}
101
102
bool newBuffered = !g_Config.bSkipBufferEffects;
103
const bool newSettings = bloomHack_ != effectiveBloomHack || useBufferedRendering_ != newBuffered;
104
105
renderWidth_ = (float)PSP_CoreParameter().renderWidth;
106
renderHeight_ = (float)PSP_CoreParameter().renderHeight;
107
renderScaleFactor_ = (float)PSP_CoreParameter().renderScaleFactor;
108
msaaLevel_ = msaaLevel;
109
110
bloomHack_ = effectiveBloomHack;
111
useBufferedRendering_ = newBuffered;
112
113
presentation_->UpdateRenderSize(renderWidth_, renderHeight_);
114
return newRender || newSettings;
115
}
116
117
void FramebufferManagerCommon::CheckPostShaders() {
118
if (updatePostShaders_) {
119
presentation_->UpdatePostShader();
120
updatePostShaders_ = false;
121
}
122
}
123
124
void FramebufferManagerCommon::BeginFrame() {
125
DecimateFBOs();
126
presentation_->BeginFrame();
127
currentRenderVfb_ = nullptr;
128
}
129
130
bool FramebufferManagerCommon::PresentedThisFrame() const {
131
return presentation_->PresentedThisFrame();
132
}
133
134
void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
135
displayFramebufPtr_ = framebuf & 0x3FFFFFFF;
136
if (Memory::IsVRAMAddress(displayFramebufPtr_))
137
displayFramebufPtr_ = framebuf & 0x041FFFFF;
138
displayStride_ = stride;
139
displayFormat_ = format;
140
GPUDebug::NotifyDisplay(framebuf, stride, format);
141
GPURecord::NotifyDisplay(framebuf, stride, format);
142
}
143
144
VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) const {
145
addr &= 0x3FFFFFFF;
146
if (Memory::IsVRAMAddress(addr))
147
addr &= 0x041FFFFF;
148
VirtualFramebuffer *match = nullptr;
149
for (auto vfb : vfbs_) {
150
if (vfb->fb_address == addr) {
151
// Could check w too but whatever (actually, might very well make sense to do so, depending on context).
152
if (!match || vfb->last_frame_render > match->last_frame_render) {
153
match = vfb;
154
}
155
}
156
}
157
return match;
158
}
159
160
VirtualFramebuffer *FramebufferManagerCommon::GetExactVFB(u32 addr, int stride, GEBufferFormat format) const {
161
addr &= 0x3FFFFFFF;
162
if (Memory::IsVRAMAddress(addr))
163
addr &= 0x041FFFFF;
164
VirtualFramebuffer *newest = nullptr;
165
for (auto vfb : vfbs_) {
166
if (vfb->fb_address == addr && vfb->fb_stride == stride && vfb->fb_format == format) {
167
if (newest) {
168
if (vfb->colorBindSeq > newest->colorBindSeq) {
169
newest = vfb;
170
}
171
} else {
172
newest = vfb;
173
}
174
}
175
}
176
return newest;
177
}
178
179
VirtualFramebuffer *FramebufferManagerCommon::ResolveVFB(u32 addr, int stride, GEBufferFormat format) {
180
addr &= 0x3FFFFFFF;
181
if (Memory::IsVRAMAddress(addr))
182
addr &= 0x041FFFFF;
183
// Find the newest one matching addr and stride.
184
VirtualFramebuffer *newest = nullptr;
185
for (auto vfb : vfbs_) {
186
if (vfb->fb_address == addr && vfb->FbStrideInBytes() == stride * BufferFormatBytesPerPixel(format)) {
187
if (newest) {
188
if (vfb->colorBindSeq > newest->colorBindSeq) {
189
newest = vfb;
190
}
191
} else {
192
newest = vfb;
193
}
194
}
195
}
196
197
if (newest && newest->fb_format != format) {
198
WARN_LOG_ONCE(resolvevfb, Log::G3D, "ResolveVFB: Resolving from %s to %s at %08x/%d", GeBufferFormatToString(newest->fb_format), GeBufferFormatToString(format), addr, stride);
199
return ResolveFramebufferColorToFormat(newest, format);
200
}
201
202
return newest;
203
}
204
205
VirtualFramebuffer *FramebufferManagerCommon::GetDisplayVFB() {
206
return GetExactVFB(displayFramebufPtr_, displayStride_, displayFormat_);
207
}
208
209
// Heuristics to figure out the size of FBO to create.
210
// TODO: Possibly differentiate on whether through mode is used (since in through mode, viewport is meaningless?)
211
void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, int fb_stride, GEBufferFormat fb_format, int viewport_width, int viewport_height, int region_width, int region_height, int scissor_width, int scissor_height, int &drawing_width, int &drawing_height) {
212
static const int MAX_FRAMEBUF_HEIGHT = 512;
213
214
// Games don't always set any of these. Take the greatest parameter that looks valid based on stride.
215
if (viewport_width > 4 && viewport_width <= fb_stride && viewport_height > 0) {
216
drawing_width = viewport_width;
217
drawing_height = viewport_height;
218
// Some games specify a viewport with 0.5, but don't have VRAM for 273. 480x272 is the buffer size.
219
if (viewport_width == 481 && region_width == 480 && viewport_height == 273 && region_height == 272) {
220
drawing_width = 480;
221
drawing_height = 272;
222
}
223
// Sometimes region is set larger than the VRAM for the framebuffer.
224
// However, in one game it's correctly set as a larger height (see #7277) with the same width.
225
// A bit of a hack, but we try to handle that unusual case here.
226
if (region_width <= fb_stride && (region_width > drawing_width || (region_width == drawing_width && region_height > drawing_height)) && region_height <= MAX_FRAMEBUF_HEIGHT) {
227
drawing_width = region_width;
228
drawing_height = std::max(drawing_height, region_height);
229
}
230
// Scissor is often set to a subsection of the framebuffer, so we pay the least attention to it.
231
if (scissor_width <= fb_stride && scissor_width > drawing_width && scissor_height <= MAX_FRAMEBUF_HEIGHT) {
232
drawing_width = scissor_width;
233
drawing_height = std::max(drawing_height, scissor_height);
234
}
235
} else {
236
// If viewport wasn't valid, let's just take the greatest anything regardless of stride.
237
drawing_width = std::min(std::max(region_width, scissor_width), fb_stride);
238
drawing_height = std::max(region_height, scissor_height);
239
}
240
241
if (scissor_width == 481 && region_width == 480 && scissor_height == 273 && region_height == 272) {
242
drawing_width = 480;
243
drawing_height = 272;
244
}
245
246
// Assume no buffer is > 512 tall, it couldn't be textured or displayed fully if so.
247
if (drawing_height >= MAX_FRAMEBUF_HEIGHT) {
248
if (region_height < MAX_FRAMEBUF_HEIGHT) {
249
drawing_height = region_height;
250
} else if (scissor_height < MAX_FRAMEBUF_HEIGHT) {
251
drawing_height = scissor_height;
252
}
253
}
254
255
if (viewport_width != region_width) {
256
// The majority of the time, these are equal. If not, let's check what we know.
257
u32 nearest_address = 0xFFFFFFFF;
258
for (auto vfb : vfbs_) {
259
const u32 other_address = vfb->fb_address;
260
if (other_address > fb_address && other_address < nearest_address) {
261
nearest_address = other_address;
262
}
263
}
264
265
// Unless the game is using overlapping buffers, the next buffer should be far enough away.
266
// This catches some cases where we can know this.
267
// Hmm. The problem is that we could only catch it for the first of two buffers...
268
const u32 bpp = BufferFormatBytesPerPixel(fb_format);
269
int avail_height = (nearest_address - fb_address) / (fb_stride * bpp);
270
if (avail_height < drawing_height && avail_height == region_height) {
271
drawing_width = std::min(region_width, fb_stride);
272
drawing_height = avail_height;
273
}
274
275
// Some games draw buffers interleaved, with a high stride/region/scissor but default viewport.
276
if (fb_stride == 1024 && region_width == 1024 && scissor_width == 1024) {
277
drawing_width = 1024;
278
}
279
}
280
281
bool margin = false;
282
// Let's check if we're in a stride gap of a full-size framebuffer.
283
for (auto vfb : vfbs_) {
284
if (fb_address == vfb->fb_address) {
285
continue;
286
}
287
if (vfb->fb_stride != 512) {
288
continue;
289
}
290
291
int vfb_stride_in_bytes = BufferFormatBytesPerPixel(vfb->fb_format) * vfb->fb_stride;
292
int stride_in_bytes = BufferFormatBytesPerPixel(fb_format) * fb_stride;
293
if (stride_in_bytes != vfb_stride_in_bytes) {
294
// Mismatching stride in bytes, not interesting
295
continue;
296
}
297
298
if (fb_address > vfb->fb_address && fb_address < vfb->fb_address + vfb_stride_in_bytes) {
299
// Candidate!
300
if (vfb->height == drawing_height) {
301
// Might have a margin texture! Fix the drawing width if it's too large.
302
int width_in_bytes = vfb->fb_address + vfb_stride_in_bytes - fb_address;
303
int width_in_pixels = width_in_bytes / BufferFormatBytesPerPixel(fb_format);
304
305
// Final check
306
if (width_in_pixels <= 32) {
307
drawing_width = std::min(drawing_width, width_in_pixels);
308
margin = true;
309
// Don't really need to keep looking.
310
break;
311
}
312
}
313
}
314
}
315
316
DEBUG_LOG(Log::G3D, "Est: %08x V: %ix%i, R: %ix%i, S: %ix%i, STR: %i, THR:%i, Z:%08x = %ix%i %s", fb_address, viewport_width,viewport_height, region_width, region_height, scissor_width, scissor_height, fb_stride, gstate.isModeThrough(), gstate.isDepthWriteEnabled() ? gstate.getDepthBufAddress() : 0, drawing_width, drawing_height, margin ? " (margin!)" : "");
317
}
318
319
void GetFramebufferHeuristicInputs(FramebufferHeuristicParams *params, const GPUgstate &gstate) {
320
// GetFramebufferHeuristicInputs is only called from rendering, and thus, it's VRAM.
321
params->fb_address = gstate.getFrameBufRawAddress() | 0x04000000;
322
params->fb_stride = gstate.FrameBufStride();
323
324
params->z_address = gstate.getDepthBufRawAddress() | 0x04000000;
325
params->z_stride = gstate.DepthBufStride();
326
327
if (params->z_address == params->fb_address) {
328
// Probably indicates that the game doesn't care about Z for this VFB.
329
// Let's avoid matching it for Z copies and other shenanigans.
330
params->z_address = 0;
331
params->z_stride = 0;
332
}
333
334
params->fb_format = gstate_c.framebufFormat;
335
336
params->isClearingDepth = gstate.isModeClear() && gstate.isClearModeDepthMask();
337
// Technically, it may write depth later, but we're trying to detect it only when it's really true.
338
if (gstate.isModeClear()) {
339
// Not quite seeing how this makes sense..
340
params->isWritingDepth = !gstate.isClearModeDepthMask() && gstate.isDepthWriteEnabled();
341
} else {
342
params->isWritingDepth = gstate.isDepthWriteEnabled();
343
}
344
params->isDrawing = !gstate.isModeClear() || !gstate.isClearModeColorMask() || !gstate.isClearModeAlphaMask();
345
params->isModeThrough = gstate.isModeThrough();
346
const bool alphaBlending = gstate.isAlphaBlendEnabled();
347
const bool logicOpBlending = gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_CLEAR && gstate.getLogicOp() != GE_LOGIC_COPY;
348
params->isBlending = alphaBlending || logicOpBlending;
349
350
// Viewport-X1 and Y1 are not the upper left corner, but half the width/height. A bit confusing.
351
float vpx = gstate.getViewportXScale();
352
float vpy = gstate.getViewportYScale();
353
354
// Work around problem in F1 Grand Prix, where it draws in through mode with a bogus viewport.
355
// We set bad values to 0 which causes the framebuffer size heuristic to rely on the other parameters instead.
356
if (std::isnan(vpx) || vpx > 10000000.0f) {
357
vpx = 0.f;
358
}
359
if (std::isnan(vpy) || vpy > 10000000.0f) {
360
vpy = 0.f;
361
}
362
params->viewportWidth = (int)(fabsf(vpx) * 2.0f);
363
params->viewportHeight = (int)(fabsf(vpy) * 2.0f);
364
params->regionWidth = gstate.getRegionX2() + 1;
365
params->regionHeight = gstate.getRegionY2() + 1;
366
367
params->scissorLeft = gstate.getScissorX1();
368
params->scissorTop = gstate.getScissorY1();
369
params->scissorRight = gstate.getScissorX2() + 1;
370
params->scissorBottom = gstate.getScissorY2() + 1;
371
372
if (gstate.getRegionRateX() != 0x100 || gstate.getRegionRateY() != 0x100) {
373
WARN_LOG_REPORT_ONCE(regionRate, Log::G3D, "Drawing region rate add non-zero: %04x, %04x of %04x, %04x", gstate.getRegionRateX(), gstate.getRegionRateY(), gstate.getRegionX2(), gstate.getRegionY2());
374
}
375
}
376
377
static void ApplyKillzoneFramebufferSplit(FramebufferHeuristicParams *params, int *drawing_width);
378
379
VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(FramebufferHeuristicParams &params, u32 skipDrawReason) {
380
gstate_c.Clean(DIRTY_FRAMEBUF);
381
382
// Collect all parameters. This whole function has really become a cesspool of heuristics...
383
// but it appears that's what it takes, unless we emulate VRAM layout more accurately somehow.
384
385
// As there are no clear "framebuffer width" and "framebuffer height" registers,
386
// we need to infer the size of the current framebuffer somehow.
387
int drawing_width, drawing_height;
388
EstimateDrawingSize(params.fb_address, std::max(params.fb_stride, (u16)4), params.fb_format, params.viewportWidth, params.viewportHeight, params.regionWidth, params.regionHeight, params.scissorRight, params.scissorBottom, drawing_width, drawing_height);
389
390
if (params.fb_address == params.z_address) {
391
// Most likely Z will not be used in this pass, as that would wreak havoc (undefined behavior for sure)
392
// We probably don't need to do anything about that, but let's log it.
393
WARN_LOG_ONCE(color_equal_z, Log::G3D, "Framebuffer bound with color addr == z addr, likely will not use Z in this pass: %08x", params.fb_address);
394
}
395
396
// Compatibility hack for Killzone, see issue #6207.
397
if (PSP_CoreParameter().compat.flags().SplitFramebufferMargin && params.fb_format == GE_FORMAT_8888) {
398
ApplyKillzoneFramebufferSplit(&params, &drawing_width);
399
} else {
400
gstate_c.SetCurRTOffset(0, 0);
401
}
402
403
// Find a matching framebuffer.
404
VirtualFramebuffer *normal_vfb = nullptr;
405
int y_offset;
406
VirtualFramebuffer *large_offset_vfb = nullptr;
407
408
for (auto v : vfbs_) {
409
const u32 bpp = BufferFormatBytesPerPixel(v->fb_format);
410
411
if (params.fb_address == v->fb_address && params.fb_format == v->fb_format && params.fb_stride == v->fb_stride) {
412
if (!normal_vfb) {
413
normal_vfb = v;
414
}
415
} else if (!PSP_CoreParameter().compat.flags().DisallowFramebufferAtOffset && !PSP_CoreParameter().compat.flags().SplitFramebufferMargin &&
416
v->fb_stride == params.fb_stride && v->fb_format == params.fb_format) {
417
u32 v_fb_first_line_end_ptr = v->fb_address + v->fb_stride * bpp;
418
u32 v_fb_end_ptr = v->fb_address + v->fb_stride * v->height * bpp;
419
420
if (!normal_vfb && params.fb_address > v->fb_address && params.fb_address < v_fb_first_line_end_ptr) {
421
const int x_offset = (params.fb_address - v->fb_address) / bpp;
422
if (x_offset < params.fb_stride && v->height >= drawing_height) {
423
// Pretty certainly a pure render-to-X-offset.
424
WARN_LOG_REPORT_ONCE(renderoffset, Log::FrameBuf, "Rendering to framebuffer offset at %08x +%dx%d (stride %d)", v->fb_address, x_offset, 0, v->fb_stride);
425
normal_vfb = v;
426
gstate_c.SetCurRTOffset(x_offset, 0);
427
normal_vfb->width = std::max((int)normal_vfb->width, x_offset + drawing_width);
428
// To prevent the newSize code from being confused.
429
drawing_width += x_offset;
430
break;
431
}
432
} else if (PSP_CoreParameter().compat.flags().FramebufferAllowLargeVerticalOffset &&
433
params.fb_address > v->fb_address && v->fb_stride > 0 && (params.fb_address - v->fb_address) % v->FbStrideInBytes() == 0 &&
434
params.fb_address != 0x04088000 && v->fb_address != 0x04000000) { // Heuristic to avoid merging the main framebuffers.
435
y_offset = (params.fb_address - v->fb_address) / v->FbStrideInBytes();
436
if (y_offset <= v->bufferHeight) { // note: v->height is misdetected as 256 instead of 272 here in tokimeki. Note that 272 is just the height of the upper part, it's supersampling vertically.
437
large_offset_vfb = v;
438
break;
439
}
440
}
441
}
442
}
443
444
VirtualFramebuffer *vfb = nullptr;
445
if (large_offset_vfb) {
446
// These are prioritized over normal VFBs matches, to ensure things work even if the higher-address one
447
// is created first. Only enabled under compat flag.
448
vfb = large_offset_vfb;
449
WARN_LOG_REPORT_ONCE(tokimeki, Log::FrameBuf, "Detected FBO at Y offset %d of %08x: %08x", y_offset, large_offset_vfb->fb_address, params.fb_address);
450
gstate_c.SetCurRTOffset(0, y_offset);
451
vfb->height = std::max((int)vfb->height, y_offset + drawing_height);
452
drawing_height += y_offset;
453
// TODO: We can allow X/Y overlaps too, but haven't seen any so safer to not.
454
} else if (normal_vfb) {
455
vfb = normal_vfb;
456
if (vfb->z_address == 0 && vfb->z_stride == 0 && params.z_stride != 0) {
457
// Got one that was created by CreateRAMFramebuffer. Since it has no depth buffer,
458
// we just recreate it immediately.
459
ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
460
}
461
462
// Keep track, but this isn't really used.
463
vfb->z_stride = params.z_stride;
464
// Heuristic: In throughmode, a higher height could be used. Let's avoid shrinking the buffer.
465
if (params.isModeThrough && (int)vfb->width <= params.fb_stride) {
466
vfb->width = std::max((int)vfb->width, drawing_width);
467
vfb->height = std::max((int)vfb->height, drawing_height);
468
} else {
469
vfb->width = drawing_width;
470
vfb->height = drawing_height;
471
}
472
}
473
474
if (vfb) {
475
bool resized = false;
476
if ((drawing_width != vfb->bufferWidth || drawing_height != vfb->bufferHeight)) {
477
// Even if it's not newly wrong, if this is larger we need to resize up.
478
if (vfb->width > vfb->bufferWidth || vfb->height > vfb->bufferHeight) {
479
ResizeFramebufFBO(vfb, vfb->width, vfb->height);
480
resized = true;
481
} else if (vfb->newWidth != drawing_width || vfb->newHeight != drawing_height) {
482
// If it's newly wrong, or changing every frame, just keep track.
483
vfb->newWidth = drawing_width;
484
vfb->newHeight = drawing_height;
485
vfb->lastFrameNewSize = gpuStats.numFlips;
486
} else if (vfb->lastFrameNewSize + FBO_OLD_AGE < gpuStats.numFlips) {
487
// Okay, it's changed for a while (and stayed that way.) Let's start over.
488
// But only if we really need to, to avoid blinking.
489
bool needsRecreate = vfb->bufferWidth > params.fb_stride;
490
needsRecreate = needsRecreate || vfb->newWidth > vfb->bufferWidth || vfb->newWidth * 2 < vfb->bufferWidth;
491
needsRecreate = needsRecreate || vfb->newHeight > vfb->bufferHeight || vfb->newHeight * 2 < vfb->bufferHeight;
492
493
// Whether we resize or not, change the size parameters so we stop detecting a resize.
494
// It might be larger if all drawing has been in throughmode.
495
vfb->width = drawing_width;
496
vfb->height = drawing_height;
497
498
if (needsRecreate) {
499
ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
500
resized = true;
501
// Let's discard this information, might be wrong now.
502
vfb->safeWidth = 0;
503
vfb->safeHeight = 0;
504
}
505
}
506
} else {
507
// It's not different, let's keep track of that too.
508
vfb->lastFrameNewSize = gpuStats.numFlips;
509
}
510
511
if (!resized && renderScaleFactor_ != 1 && vfb->renderScaleFactor == 1) {
512
// Might be time to change this framebuffer - have we used depth?
513
if ((vfb->usageFlags & FB_USAGE_COLOR_MIXED_DEPTH) && !PSP_CoreParameter().compat.flags().ForceLowerResolutionForEffectsOn) {
514
ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
515
_assert_(vfb->renderScaleFactor != 1);
516
}
517
}
518
}
519
520
// None found? Create one.
521
if (!vfb) {
522
gstate_c.usingDepth = false; // reset depth buffer tracking
523
524
vfb = new VirtualFramebuffer{};
525
vfb->fbo = nullptr;
526
vfb->fb_address = params.fb_address;
527
vfb->fb_stride = params.fb_stride;
528
vfb->z_address = params.z_address;
529
vfb->z_stride = params.z_stride;
530
531
// The other width/height parameters are set in ResizeFramebufFBO below.
532
vfb->width = drawing_width;
533
vfb->height = drawing_height;
534
vfb->newWidth = drawing_width;
535
vfb->newHeight = drawing_height;
536
vfb->lastFrameNewSize = gpuStats.numFlips;
537
vfb->fb_format = params.fb_format;
538
vfb->usageFlags = FB_USAGE_RENDER_COLOR;
539
540
u32 colorByteSize = vfb->BufferByteSize(RASTER_COLOR);
541
if (Memory::IsVRAMAddress(params.fb_address) && params.fb_address + colorByteSize > framebufColorRangeEnd_) {
542
framebufColorRangeEnd_ = params.fb_address + colorByteSize;
543
}
544
545
// This is where we actually create the framebuffer. The true is "force".
546
ResizeFramebufFBO(vfb, drawing_width, drawing_height, true);
547
NotifyRenderFramebufferCreated(vfb);
548
549
// Note that we do not even think about depth right now. That'll be handled
550
// on the first depth access, which will call SetDepthFramebuffer.
551
552
CopyToColorFromOverlappingFramebuffers(vfb);
553
SetColorUpdated(vfb, skipDrawReason);
554
555
INFO_LOG(Log::FrameBuf, "Creating FBO for %08x (z: %08x) : %d x %d x %s", vfb->fb_address, vfb->z_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format));
556
557
vfb->last_frame_render = gpuStats.numFlips;
558
frameLastFramebufUsed_ = gpuStats.numFlips;
559
vfbs_.push_back(vfb);
560
currentRenderVfb_ = vfb;
561
562
// Assume that if we're clearing right when switching to a new framebuffer, we don't need to upload.
563
if (useBufferedRendering_ && params.isDrawing) {
564
gpu->PerformWriteColorFromMemory(params.fb_address, colorByteSize);
565
// Alpha was already done by PerformWriteColorFromMemory.
566
PerformWriteStencilFromMemory(params.fb_address, colorByteSize, WriteStencil::STENCIL_IS_ZERO | WriteStencil::IGNORE_ALPHA);
567
// TODO: Is it worth trying to upload the depth buffer (only if it wasn't copied above..?)
568
}
569
570
DiscardFramebufferCopy();
571
572
// We already have it!
573
} else if (vfb != currentRenderVfb_) {
574
// Use it as a render target.
575
DEBUG_LOG(Log::FrameBuf, "Switching render target to FBO for %08x: %d x %d x %d ", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format);
576
vfb->usageFlags |= FB_USAGE_RENDER_COLOR;
577
vfb->last_frame_render = gpuStats.numFlips;
578
frameLastFramebufUsed_ = gpuStats.numFlips;
579
vfb->dirtyAfterDisplay = true;
580
if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
581
vfb->reallyDirtyAfterDisplay = true;
582
583
VirtualFramebuffer *prev = currentRenderVfb_;
584
currentRenderVfb_ = vfb;
585
NotifyRenderFramebufferSwitched(prev, vfb, params.isClearingDepth);
586
CopyToColorFromOverlappingFramebuffers(vfb);
587
gstate_c.usingDepth = false; // reset depth buffer tracking
588
589
DiscardFramebufferCopy();
590
} else {
591
// Something changed, but we still got the same framebuffer we were already rendering to.
592
// Might not be a lot to do here, we check in NotifyRenderFramebufferUpdated
593
vfb->last_frame_render = gpuStats.numFlips;
594
frameLastFramebufUsed_ = gpuStats.numFlips;
595
vfb->dirtyAfterDisplay = true;
596
if ((skipDrawReason & SKIPDRAW_SKIPFRAME) == 0)
597
vfb->reallyDirtyAfterDisplay = true;
598
NotifyRenderFramebufferUpdated(vfb);
599
}
600
601
vfb->colorBindSeq = GetBindSeqCount();
602
603
gstate_c.curRTWidth = vfb->width;
604
gstate_c.curRTHeight = vfb->height;
605
gstate_c.curRTRenderWidth = vfb->renderWidth;
606
gstate_c.curRTRenderHeight = vfb->renderHeight;
607
return vfb;
608
}
609
610
// Called on the first use of depth in a render pass.
611
void FramebufferManagerCommon::SetDepthFrameBuffer(bool isClearingDepth) {
612
if (!currentRenderVfb_) {
613
return;
614
}
615
616
// First time use of this framebuffer's depth buffer.
617
bool newlyUsingDepth = (currentRenderVfb_->usageFlags & FB_USAGE_RENDER_DEPTH) == 0;
618
currentRenderVfb_->usageFlags |= FB_USAGE_RENDER_DEPTH;
619
620
uint32_t boundDepthBuffer = gstate.getDepthBufRawAddress() | 0x04000000;
621
uint32_t boundDepthStride = gstate.DepthBufStride();
622
if (currentRenderVfb_->z_address != boundDepthBuffer || currentRenderVfb_->z_stride != boundDepthStride) {
623
if (currentRenderVfb_->fb_address == boundDepthBuffer) {
624
// Disallow setting depth buffer to the same address as the color buffer, usually means it's not used.
625
WARN_LOG_N_TIMES(z_reassign, 5, Log::FrameBuf, "Ignoring color matching depth buffer at %08x", boundDepthBuffer);
626
boundDepthBuffer = 0;
627
boundDepthStride = 0;
628
}
629
WARN_LOG_N_TIMES(z_reassign, 5, Log::FrameBuf, "Framebuffer at %08x/%d has switched associated depth buffer from %08x to %08x, updating.",
630
currentRenderVfb_->fb_address, currentRenderVfb_->fb_stride, currentRenderVfb_->z_address, boundDepthBuffer);
631
632
// Technically, here we should copy away the depth buffer to another framebuffer that uses that z_address, or maybe
633
// even write it back to RAM. However, this is rare. Silent Hill is one example, see #16126.
634
currentRenderVfb_->z_address = boundDepthBuffer;
635
// Update the stride in case it changed.
636
currentRenderVfb_->z_stride = boundDepthStride;
637
638
if (currentRenderVfb_->fbo) {
639
char tag[128];
640
FormatFramebufferName(currentRenderVfb_, tag, sizeof(tag));
641
currentRenderVfb_->fbo->UpdateTag(tag);
642
}
643
}
644
645
// If this first draw call is anything other than a clear, "resolve" the depth buffer,
646
// by copying from any overlapping buffers with fresher content.
647
if (!isClearingDepth && useBufferedRendering_) {
648
CopyToDepthFromOverlappingFramebuffers(currentRenderVfb_);
649
650
// Need to upload the first line of depth buffers, for Burnout Dominator lens flares. See issue #11100 and comments to #16081.
651
// Might make this more generic and upload the whole depth buffer if we find it's needed for something.
652
if (newlyUsingDepth && draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
653
// Sanity check the depth buffer pointer.
654
if (Memory::IsValidRange(currentRenderVfb_->z_address, currentRenderVfb_->width * 2)) {
655
const u16 *src = (const u16 *)Memory::GetPointerUnchecked(currentRenderVfb_->z_address);
656
DrawPixels(currentRenderVfb_, 0, 0, (const u8 *)src, GE_FORMAT_DEPTH16, currentRenderVfb_->z_stride, currentRenderVfb_->width, currentRenderVfb_->height, RASTER_DEPTH, "Depth Upload");
657
}
658
}
659
}
660
661
currentRenderVfb_->depthBindSeq = GetBindSeqCount();
662
}
663
664
struct CopySource {
665
VirtualFramebuffer *vfb;
666
RasterChannel channel;
667
int xOffset;
668
int yOffset;
669
670
int seq() const {
671
return channel == RASTER_DEPTH ? vfb->depthBindSeq : vfb->colorBindSeq;
672
}
673
674
bool operator < (const CopySource &other) const {
675
return seq() < other.seq();
676
}
677
};
678
679
// Not sure if it's more profitable to always do these copies with raster (which may screw up early-Z due to explicit depth buffer write)
680
// or to use image copies when possible (which may make it easier for the driver to preserve early-Z, but on the other hand, will cost additional memory
681
// bandwidth on tilers due to the load operation, which we might otherwise be able to skip).
682
void FramebufferManagerCommon::CopyToDepthFromOverlappingFramebuffers(VirtualFramebuffer *dest) {
683
std::vector<CopySource> sources;
684
for (auto src : vfbs_) {
685
if (src == dest)
686
continue;
687
688
if (src->fb_address == dest->z_address && src->fb_stride == dest->z_stride && src->fb_format == GE_FORMAT_565) {
689
if (src->colorBindSeq > dest->depthBindSeq) {
690
// Source has newer data than the current buffer, use it.
691
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
692
}
693
} else if (src->z_address == dest->z_address && src->z_stride == dest->z_stride && src->depthBindSeq > dest->depthBindSeq) {
694
sources.push_back(CopySource{ src, RASTER_DEPTH, 0, 0 });
695
} else {
696
// TODO: Do more detailed overlap checks here.
697
}
698
}
699
700
std::sort(sources.begin(), sources.end());
701
702
// TODO: A full copy will overwrite anything else. So we can eliminate
703
// anything that comes before such a copy.
704
705
// For now, let's just do the last thing, if there are multiple.
706
707
// for (auto &source : sources) {
708
if (!sources.empty()) {
709
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
710
711
auto &source = sources.back();
712
if (source.channel == RASTER_DEPTH) {
713
// Good old depth->depth copy.
714
BlitFramebufferDepth(source.vfb, dest);
715
gpuStats.numDepthCopies++;
716
dest->last_frame_depth_updated = gpuStats.numFlips;
717
} else if (source.channel == RASTER_COLOR && draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
718
VirtualFramebuffer *src = source.vfb;
719
if (src->fb_format != GE_FORMAT_565) {
720
WARN_LOG_ONCE(not565, Log::FrameBuf, "fb_format of buffer at %08x not 565 as expected", src->fb_address);
721
}
722
723
// Really hate to do this, but tracking the depth swizzle state across multiple
724
// copies is not easy.
725
Draw2DShader shader = DRAW2D_565_TO_DEPTH;
726
if (PSP_CoreParameter().compat.flags().DeswizzleDepth) {
727
shader = DRAW2D_565_TO_DEPTH_DESWIZZLE;
728
}
729
730
gpuStats.numReinterpretCopies++;
731
src->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
732
dest->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
733
734
// Copying color to depth.
735
BlitUsingRaster(
736
src->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight,
737
dest->fbo, 0.0f, 0.0f, src->renderWidth, src->renderHeight,
738
false, dest->renderScaleFactor, Get2DPipeline(shader), "565_to_depth");
739
}
740
}
741
742
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
743
}
744
745
// Can't easily dynamically create these strings, we just pass along the pointer.
746
static const char *reinterpretStrings[4][4] = {
747
{
748
"self_reinterpret_565",
749
"reinterpret_565_to_5551",
750
"reinterpret_565_to_4444",
751
"reinterpret_565_to_8888",
752
},
753
{
754
"reinterpret_5551_to_565",
755
"self_reinterpret_5551",
756
"reinterpret_5551_to_4444",
757
"reinterpret_5551_to_8888",
758
},
759
{
760
"reinterpret_4444_to_565",
761
"reinterpret_4444_to_5551",
762
"self_reinterpret_4444",
763
"reinterpret_4444_to_8888",
764
},
765
{
766
"reinterpret_8888_to_565",
767
"reinterpret_8888_to_5551",
768
"reinterpret_8888_to_4444",
769
"self_reinterpret_8888",
770
},
771
};
772
773
// Call this after the target has been bound for rendering. For color, raster is probably always going to win over blits/copies.
774
void FramebufferManagerCommon::CopyToColorFromOverlappingFramebuffers(VirtualFramebuffer *dst) {
775
if (!useBufferedRendering_) {
776
return;
777
}
778
779
std::vector<CopySource> sources;
780
for (auto src : vfbs_) {
781
// Discard old and equal potential inputs.
782
if (src == dst || src->colorBindSeq < dst->colorBindSeq) {
783
continue;
784
}
785
786
if (src->fb_address == dst->fb_address && src->fb_stride == dst->fb_stride) {
787
// Another render target at the exact same location but gotta be a different format or a different stride, otherwise
788
// it would be the same, and should have been detected in DoSetRenderFrameBuffer.
789
if (src->fb_format != dst->fb_format) {
790
// This will result in reinterpret later, if both formats are 16-bit.
791
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
792
} else {
793
// This shouldn't happen anymore. I think when it happened last, we still had
794
// lax stride checking when video was incoming, and a resize happened causing a duplicate.
795
}
796
} else if (src->fb_stride == dst->fb_stride && src->fb_format == dst->fb_format) {
797
u32 bytesPerPixel = BufferFormatBytesPerPixel(src->fb_format);
798
799
u32 strideInBytes = src->fb_stride * bytesPerPixel; // Same for both src and dest
800
801
u32 srcColorStart = src->fb_address;
802
u32 srcFirstLineEnd = src->fb_address + strideInBytes;
803
u32 srcColorEnd = strideInBytes * src->height;
804
805
u32 dstColorStart = dst->fb_address;
806
u32 dstFirstLineEnd = dst->fb_address + strideInBytes;
807
u32 dstColorEnd = strideInBytes * dst->height;
808
809
// Initially we'll only allow pure horizontal and vertical overlap,
810
// to reduce the risk for false positives. We can allow diagonal overlap too if needed
811
// in the future.
812
813
// Check for potential vertical overlap, like in Juiced 2.
814
int xOffset = 0;
815
int yOffset = 0;
816
817
// TODO: Get rid of the compatibility flag check.
818
if ((dstColorStart - srcColorStart) % strideInBytes == 0
819
&& PSP_CoreParameter().compat.flags().AllowLargeFBTextureOffsets) {
820
// Buffers are aligned.
821
yOffset = ((int)dstColorStart - (int)srcColorStart) / strideInBytes;
822
if (yOffset <= -(int)src->height) {
823
// Not overlapping
824
continue;
825
} else if (yOffset >= dst->height) {
826
// Not overlapping
827
continue;
828
}
829
} else {
830
// Buffers not stride-aligned - ignoring for now.
831
// This is where we'll add the horizontal offset for GoW.
832
continue;
833
}
834
sources.push_back(CopySource{ src, RASTER_COLOR, xOffset, yOffset });
835
} else if (src->fb_address == dst->fb_address && src->FbStrideInBytes() == dst->FbStrideInBytes()) {
836
if (src->fb_stride == dst->fb_stride * 2) {
837
// Reinterpret from 16-bit to 32-bit.
838
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
839
} else if (src->fb_stride * 2 == dst->fb_stride) {
840
// Reinterpret from 32-bit to 16-bit.
841
sources.push_back(CopySource{ src, RASTER_COLOR, 0, 0 });
842
} else {
843
// 16-to-16 reinterpret, should have been caught above already.
844
_assert_msg_(false, "Reinterpret: Shouldn't get here");
845
}
846
}
847
}
848
849
std::sort(sources.begin(), sources.end());
850
851
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
852
853
bool tookActions = false;
854
855
// TODO: Only do the latest one.
856
for (const CopySource &source : sources) {
857
VirtualFramebuffer *src = source.vfb;
858
859
// Copy a rectangle from the original to the new buffer.
860
// Yes, we mean to look at src->width/height for the dest rectangle.
861
862
// TODO: Try to bound the blit using gstate_c.vertBounds like depal does.
863
864
int srcWidth = src->width * src->renderScaleFactor;
865
int srcHeight = src->height * src->renderScaleFactor;
866
int dstWidth = src->width * dst->renderScaleFactor;
867
int dstHeight = src->height * dst->renderScaleFactor;
868
869
int dstX1 = -source.xOffset * dst->renderScaleFactor;
870
int dstY1 = -source.yOffset * dst->renderScaleFactor;
871
int dstX2 = dstX1 + dstWidth;
872
int dstY2 = dstY1 + dstHeight;
873
874
if (source.channel == RASTER_COLOR) {
875
Draw2DPipeline *pipeline = nullptr;
876
const char *pass_name = "N/A";
877
float scaleFactorX = 1.0f;
878
if (src->fb_format == dst->fb_format) {
879
gpuStats.numColorCopies++;
880
pipeline = Get2DPipeline(DRAW2D_COPY_COLOR);
881
pass_name = "copy_color";
882
} else {
883
if (PSP_CoreParameter().compat.flags().BlueToAlpha) {
884
WARN_LOG_ONCE(bta, Log::FrameBuf, "WARNING: Reinterpret encountered with BlueToAlpha on");
885
}
886
887
// Reinterpret!
888
WARN_LOG_N_TIMES(reint, 5, Log::FrameBuf, "Reinterpret detected from %08x_%s to %08x_%s",
889
src->fb_address, GeBufferFormatToString(src->fb_format),
890
dst->fb_address, GeBufferFormatToString(dst->fb_format));
891
892
pipeline = GetReinterpretPipeline(src->fb_format, dst->fb_format, &scaleFactorX);
893
dstX1 *= scaleFactorX;
894
dstX2 *= scaleFactorX;
895
896
pass_name = reinterpretStrings[(int)src->fb_format][(int)dst->fb_format];
897
898
gpuStats.numReinterpretCopies++;
899
}
900
901
if (pipeline) {
902
tookActions = true;
903
// OK we have the pipeline, now just do the blit.
904
BlitUsingRaster(src->fbo, 0.0f, 0.0f, srcWidth, srcHeight,
905
dst->fbo, dstX1, dstY1, dstX2, dstY2, false, dst->renderScaleFactor, pipeline, pass_name);
906
}
907
908
if (scaleFactorX == 1.0f && dst->z_address == src->z_address && dst->z_stride == src->z_stride) {
909
// We should also copy the depth buffer in this case!
910
BlitFramebufferDepth(src, dst, true);
911
}
912
}
913
}
914
915
if (currentRenderVfb_ && dst != currentRenderVfb_ && tookActions) {
916
// Will probably just change the name of the current renderpass, since one was started by the reinterpret itself.
917
draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "After Reinterpret");
918
}
919
920
shaderManager_->DirtyLastShader();
921
textureCache_->ForgetLastTexture();
922
}
923
924
Draw2DPipeline *FramebufferManagerCommon::GetReinterpretPipeline(GEBufferFormat from, GEBufferFormat to, float *scaleFactorX) {
925
if (from == to) {
926
*scaleFactorX = 1.0f;
927
return Get2DPipeline(DRAW2D_COPY_COLOR);
928
}
929
930
if (IsBufferFormat16Bit(from) && !IsBufferFormat16Bit(to)) {
931
// We halve the X coordinates in the destination framebuffer.
932
// The shader will collect two pixels worth of input data and merge into one.
933
*scaleFactorX = 0.5f;
934
} else if (!IsBufferFormat16Bit(from) && IsBufferFormat16Bit(to)) {
935
// We double the X coordinates in the destination framebuffer.
936
// The shader will sample and depending on the X coordinate & 1, use the upper or lower bits.
937
*scaleFactorX = 2.0f;
938
} else {
939
*scaleFactorX = 1.0f;
940
}
941
942
Draw2DPipeline *pipeline = reinterpretFromTo_[(int)from][(int)to];
943
if (!pipeline) {
944
pipeline = draw2D_.Create2DPipeline([=](ShaderWriter &shaderWriter) -> Draw2DPipelineInfo {
945
return GenerateReinterpretFragmentShader(shaderWriter, from, to);
946
});
947
reinterpretFromTo_[(int)from][(int)to] = pipeline;
948
}
949
return pipeline;
950
}
951
952
void FramebufferManagerCommon::DestroyFramebuf(VirtualFramebuffer *v) {
953
// Notify the texture cache of both the color and depth buffers.
954
textureCache_->NotifyFramebuffer(v, NOTIFY_FB_DESTROYED);
955
if (v->fbo) {
956
v->fbo->Release();
957
v->fbo = nullptr;
958
}
959
960
// Wipe some pointers
961
DiscardFramebufferCopy();
962
if (currentRenderVfb_ == v)
963
currentRenderVfb_ = nullptr;
964
if (displayFramebuf_ == v)
965
displayFramebuf_ = nullptr;
966
if (prevDisplayFramebuf_ == v)
967
prevDisplayFramebuf_ = nullptr;
968
if (prevPrevDisplayFramebuf_ == v)
969
prevPrevDisplayFramebuf_ = nullptr;
970
971
delete v;
972
}
973
974
void FramebufferManagerCommon::BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst, bool allowSizeMismatch) {
975
_dbg_assert_(src && dst);
976
977
_dbg_assert_(src != dst);
978
979
// Check that the depth address is even the same before actually blitting.
980
bool matchingDepthBuffer = src->z_address == dst->z_address && src->z_stride != 0 && dst->z_stride != 0;
981
bool matchingSize = (src->width == dst->width || (src->width == 512 && dst->width == 480) || (src->width == 480 && dst->width == 512)) && src->height == dst->height;
982
if (!matchingDepthBuffer || (!matchingSize && !allowSizeMismatch)) {
983
return;
984
}
985
986
// Copy depth value from the previously bound framebuffer to the current one.
987
bool hasNewerDepth = src->last_frame_depth_render != 0 && src->last_frame_depth_render >= dst->last_frame_depth_updated;
988
if (!src->fbo || !dst->fbo || !useBufferedRendering_ || !hasNewerDepth) {
989
// If depth wasn't updated, then we're at least "two degrees" away from the data.
990
// This is an optimization: it probably doesn't need to be copied in this case.
991
return;
992
}
993
994
bool useCopy = draw_->GetDeviceCaps().framebufferSeparateDepthCopySupported || (!draw_->GetDeviceCaps().framebufferDepthBlitSupported && draw_->GetDeviceCaps().framebufferCopySupported);
995
bool useBlit = draw_->GetDeviceCaps().framebufferDepthBlitSupported;
996
997
bool useRaster = draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported && draw_->GetDeviceCaps().textureDepthSupported;
998
999
if (src->fbo->MultiSampleLevel() > 0 && dst->fbo->MultiSampleLevel() > 0) {
1000
// If multisampling, we want to copy depth properly so we get all the samples, to avoid aliased edges.
1001
// Can be seen in the fire in Jeanne D'arc, for example.
1002
if (useRaster && useCopy) {
1003
useRaster = false;
1004
}
1005
}
1006
1007
int w = std::min(src->renderWidth, dst->renderWidth);
1008
int h = std::min(src->renderHeight, dst->renderHeight);
1009
1010
// Some GPUs can copy depth but only if stencil gets to come along for the ride. We only want to use this if there is no blit functionality.
1011
if (useRaster) {
1012
BlitUsingRaster(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, false, dst->renderScaleFactor, Get2DPipeline(Draw2DShader::DRAW2D_COPY_DEPTH), "BlitDepthRaster");
1013
} else if (useCopy) {
1014
draw_->CopyFramebufferImage(src->fbo, 0, 0, 0, 0, dst->fbo, 0, 0, 0, 0, w, h, 1, Draw::FB_DEPTH_BIT, "CopyFramebufferDepth");
1015
RebindFramebuffer("After BlitFramebufferDepth");
1016
} else if (useBlit) {
1017
// We'll accept whether we get a separate depth blit or not...
1018
draw_->BlitFramebuffer(src->fbo, 0, 0, w, h, dst->fbo, 0, 0, w, h, Draw::FB_DEPTH_BIT, Draw::FB_BLIT_NEAREST, "BlitFramebufferDepth");
1019
RebindFramebuffer("After BlitFramebufferDepth");
1020
}
1021
1022
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
1023
}
1024
1025
void FramebufferManagerCommon::NotifyRenderFramebufferCreated(VirtualFramebuffer *vfb) {
1026
if (!useBufferedRendering_) {
1027
// Let's ignore rendering to targets that have not (yet) been displayed.
1028
gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
1029
} else if (currentRenderVfb_) {
1030
DownloadFramebufferOnSwitch(currentRenderVfb_);
1031
}
1032
1033
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
1034
1035
NotifyRenderFramebufferUpdated(vfb);
1036
}
1037
1038
void FramebufferManagerCommon::NotifyRenderFramebufferUpdated(VirtualFramebuffer *vfb) {
1039
if (gstate_c.curRTWidth != vfb->width || gstate_c.curRTHeight != vfb->height) {
1040
gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
1041
}
1042
if (gstate_c.curRTRenderWidth != vfb->renderWidth || gstate_c.curRTRenderHeight != vfb->renderHeight) {
1043
gstate_c.Dirty(DIRTY_PROJMATRIX);
1044
gstate_c.Dirty(DIRTY_PROJTHROUGHMATRIX);
1045
}
1046
}
1047
1048
void FramebufferManagerCommon::DownloadFramebufferOnSwitch(VirtualFramebuffer *vfb) {
1049
if (vfb && vfb->safeWidth > 0 && vfb->safeHeight > 0 && !(vfb->usageFlags & FB_USAGE_FIRST_FRAME_SAVED) && !vfb->memoryUpdated) {
1050
// Some games will draw to some memory once, and use it as a render-to-texture later.
1051
// To support this, we save the first frame to memory when we have a safe w/h.
1052
// Saving each frame would be slow.
1053
1054
// TODO: This type of download could be made async, for less stutter on framebuffer creation.
1055
if (GetSkipGPUReadbackMode() == SkipGPUReadbackMode::NO_SKIP && !PSP_CoreParameter().compat.flags().DisableFirstFrameReadback) {
1056
ReadFramebufferToMemory(vfb, 0, 0, vfb->safeWidth, vfb->safeHeight, RASTER_COLOR, Draw::ReadbackMode::BLOCK);
1057
vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR;
1058
vfb->safeWidth = 0;
1059
vfb->safeHeight = 0;
1060
}
1061
}
1062
}
1063
1064
bool FramebufferManagerCommon::ShouldDownloadFramebufferColor(const VirtualFramebuffer *vfb) {
1065
// Dangan Ronpa hack
1066
return PSP_CoreParameter().compat.flags().Force04154000Download && vfb->fb_address == 0x04154000;
1067
}
1068
1069
bool FramebufferManagerCommon::ShouldDownloadFramebufferDepth(const VirtualFramebuffer *vfb) {
1070
// Download depth buffer for Syphon Filter lens flares
1071
if (!PSP_CoreParameter().compat.flags().ReadbackDepth || GetSkipGPUReadbackMode() != SkipGPUReadbackMode::NO_SKIP) {
1072
return false;
1073
}
1074
return (vfb->usageFlags & FB_USAGE_RENDER_DEPTH) != 0 && vfb->width >= 480 && vfb->height >= 272;
1075
}
1076
1077
void FramebufferManagerCommon::NotifyRenderFramebufferSwitched(VirtualFramebuffer *prevVfb, VirtualFramebuffer *vfb, bool isClearingDepth) {
1078
if (prevVfb) {
1079
if (ShouldDownloadFramebufferColor(prevVfb) && !prevVfb->memoryUpdated) {
1080
ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RASTER_COLOR, Draw::ReadbackMode::OLD_DATA_OK);
1081
prevVfb->usageFlags = (prevVfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR;
1082
} else {
1083
DownloadFramebufferOnSwitch(prevVfb);
1084
}
1085
1086
if (ShouldDownloadFramebufferDepth(prevVfb)) {
1087
ReadFramebufferToMemory(prevVfb, 0, 0, prevVfb->width, prevVfb->height, RasterChannel::RASTER_DEPTH, Draw::ReadbackMode::BLOCK);
1088
}
1089
}
1090
1091
textureCache_->ForgetLastTexture();
1092
shaderManager_->DirtyLastShader();
1093
1094
if (useBufferedRendering_) {
1095
if (vfb->fbo) {
1096
shaderManager_->DirtyLastShader();
1097
Draw::RPAction depthAction = Draw::RPAction::KEEP;
1098
float clearDepth = 0.0f;
1099
if (vfb->usageFlags & FB_USAGE_INVALIDATE_DEPTH) {
1100
depthAction = Draw::RPAction::CLEAR;
1101
clearDepth = GetDepthScaleFactors(gstate_c.UseFlags()).Offset();
1102
vfb->usageFlags &= ~FB_USAGE_INVALIDATE_DEPTH;
1103
}
1104
draw_->BindFramebufferAsRenderTarget(vfb->fbo, {Draw::RPAction::KEEP, depthAction, Draw::RPAction::KEEP, 0, clearDepth}, "FBSwitch");
1105
} else {
1106
// This should only happen very briefly when toggling useBufferedRendering_.
1107
ResizeFramebufFBO(vfb, vfb->width, vfb->height, true);
1108
}
1109
} else {
1110
if (vfb->fbo) {
1111
// This should only happen very briefly when toggling useBufferedRendering_.
1112
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_DESTROYED);
1113
vfb->fbo->Release();
1114
vfb->fbo = nullptr;
1115
}
1116
1117
// Let's ignore rendering to targets that have not (yet) been displayed.
1118
if (vfb->usageFlags & FB_USAGE_DISPLAYED_FRAMEBUFFER) {
1119
gstate_c.skipDrawReason &= ~SKIPDRAW_NON_DISPLAYED_FB;
1120
} else {
1121
gstate_c.skipDrawReason |= SKIPDRAW_NON_DISPLAYED_FB;
1122
}
1123
}
1124
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_UPDATED);
1125
1126
NotifyRenderFramebufferUpdated(vfb);
1127
}
1128
1129
void FramebufferManagerCommon::PerformWriteFormattedFromMemory(u32 addr, int size, int stride, GEBufferFormat fmt) {
1130
// Note: UpdateFromMemory() is still called later.
1131
// This is a special case where we have extra information prior to the invalidation,
1132
// because it's called from sceJpeg, sceMpeg, scePsmf etc.
1133
1134
// TODO: Could possibly be at an offset...
1135
// Also, stride needs better handling.
1136
VirtualFramebuffer *vfb = ResolveVFB(addr, stride, fmt);
1137
if (vfb) {
1138
// Let's count this as a "render". This will also force us to use the correct format.
1139
vfb->last_frame_render = gpuStats.numFlips;
1140
vfb->colorBindSeq = GetBindSeqCount();
1141
1142
if (vfb->fb_stride < stride) {
1143
INFO_LOG(Log::FrameBuf, "Changing stride for %08x from %d to %d", addr, vfb->fb_stride, stride);
1144
const int bpp = BufferFormatBytesPerPixel(fmt);
1145
ResizeFramebufFBO(vfb, stride, size / (bpp * stride));
1146
// Resizing may change the viewport/etc.
1147
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
1148
vfb->fb_stride = stride;
1149
// This might be a bit wider than necessary, but we'll redetect on next render.
1150
vfb->width = stride;
1151
}
1152
}
1153
}
1154
1155
void FramebufferManagerCommon::UpdateFromMemory(u32 addr, int size) {
1156
// Take off the uncached flag from the address. Not to be confused with the start of VRAM.
1157
addr &= 0x3FFFFFFF;
1158
if (Memory::IsVRAMAddress(addr))
1159
addr &= 0x041FFFFF;
1160
// TODO: Could go through all FBOs, but probably not important?
1161
// TODO: Could also check for inner changes, but video is most important.
1162
// TODO: This shouldn't care if it's a display framebuf or not, should work exactly the same.
1163
bool isDisplayBuf = addr == CurrentDisplayFramebufAddr() || addr == PrevDisplayFramebufAddr();
1164
// TODO: Deleting the FBO is a heavy hammer solution, so let's only do it if it'd help.
1165
if (!Memory::IsValidAddress(displayFramebufPtr_))
1166
return;
1167
1168
for (size_t i = 0; i < vfbs_.size(); ++i) {
1169
VirtualFramebuffer *vfb = vfbs_[i];
1170
if (vfb->fb_address == addr) {
1171
FlushBeforeCopy();
1172
1173
if (useBufferedRendering_ && vfb->fbo) {
1174
GEBufferFormat fmt = vfb->fb_format;
1175
if (vfb->last_frame_render + 1 < gpuStats.numFlips && isDisplayBuf) {
1176
// If we're not rendering to it, format may be wrong. Use displayFormat_ instead.
1177
// TODO: This doesn't seem quite right anymore.
1178
fmt = displayFormat_;
1179
}
1180
DrawPixels(vfb, 0, 0, Memory::GetPointerUnchecked(addr), fmt, vfb->fb_stride, vfb->width, vfb->height, RASTER_COLOR, "UpdateFromMemory_DrawPixels");
1181
SetColorUpdated(vfb, gstate_c.skipDrawReason);
1182
} else {
1183
INFO_LOG(Log::FrameBuf, "Invalidating FBO for %08x (%dx%d %s)", vfb->fb_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format));
1184
DestroyFramebuf(vfb);
1185
vfbs_.erase(vfbs_.begin() + i--);
1186
}
1187
}
1188
}
1189
1190
RebindFramebuffer("RebindFramebuffer - UpdateFromMemory");
1191
1192
// TODO: Necessary?
1193
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
1194
}
1195
1196
void FramebufferManagerCommon::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height, RasterChannel channel, const char *tag) {
1197
textureCache_->ForgetLastTexture();
1198
shaderManager_->DirtyLastShader();
1199
float u0 = 0.0f, u1 = 1.0f;
1200
float v0 = 0.0f, v1 = 1.0f;
1201
1202
DrawTextureFlags flags;
1203
if (useBufferedRendering_ && vfb && vfb->fbo) {
1204
if (channel == RASTER_DEPTH || PSP_CoreParameter().compat.flags().NearestFilteringOnFramebufferCreate) {
1205
flags = DRAWTEX_NEAREST;
1206
} else {
1207
flags = DRAWTEX_LINEAR;
1208
}
1209
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag);
1210
SetViewport2D(0, 0, vfb->renderWidth, vfb->renderHeight);
1211
draw_->SetScissorRect(0, 0, vfb->renderWidth, vfb->renderHeight);
1212
} else {
1213
_dbg_assert_(channel == RASTER_COLOR);
1214
// We are drawing directly to the back buffer so need to flip.
1215
// Should more of this be handled by the presentation engine?
1216
if (needBackBufferYSwap_)
1217
std::swap(v0, v1);
1218
flags = g_Config.iDisplayFilter == SCALE_LINEAR ? DRAWTEX_LINEAR : DRAWTEX_NEAREST;
1219
flags = flags | DRAWTEX_TO_BACKBUFFER;
1220
FRect frame = GetScreenFrame(pixelWidth_, pixelHeight_);
1221
FRect rc;
1222
CalculateDisplayOutputRect(&rc, 480.0f, 272.0f, frame, ROTATION_LOCKED_HORIZONTAL);
1223
SetViewport2D(rc.x, rc.y, rc.w, rc.h);
1224
draw_->SetScissorRect(0, 0, pixelWidth_, pixelHeight_);
1225
}
1226
1227
if (channel == RASTER_DEPTH) {
1228
_dbg_assert_(srcPixelFormat == GE_FORMAT_DEPTH16);
1229
flags = flags | DRAWTEX_DEPTH;
1230
if (vfb)
1231
vfb->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
1232
}
1233
1234
Draw::Texture *pixelsTex = MakePixelTexture(srcPixels, srcPixelFormat, srcStride, width, height);
1235
if (pixelsTex) {
1236
draw_->BindTextures(0, 1, &pixelsTex, Draw::TextureBindFlags::VULKAN_BIND_ARRAY);
1237
1238
// TODO: Replace with draw2D_.Blit() directly.
1239
DrawActiveTexture(dstX, dstY, width, height,
1240
vfb ? vfb->bufferWidth : g_display.pixel_xres,
1241
vfb ? vfb->bufferHeight : g_display.pixel_yres,
1242
u0, v0, u1, v1, ROTATION_LOCKED_HORIZONTAL, flags);
1243
1244
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
1245
1246
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
1247
}
1248
}
1249
1250
bool FramebufferManagerCommon::BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags, int layer) {
1251
if (!framebuffer->fbo || !useBufferedRendering_) {
1252
draw_->BindTexture(stage, nullptr);
1253
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
1254
return false;
1255
}
1256
1257
// currentRenderVfb_ will always be set when this is called, except from the GE debugger.
1258
// Let's just not bother with the copy in that case.
1259
bool skipCopy = !(flags & BINDFBCOLOR_MAY_COPY);
1260
1261
// Currently rendering to this framebuffer. Need to make a copy.
1262
if (!skipCopy && framebuffer == currentRenderVfb_) {
1263
// Self-texturing, need a copy currently (some backends can potentially support it though).
1264
WARN_LOG_ONCE(selfTextureCopy, Log::G3D, "Attempting to texture from current render target (src=%08x / target=%08x / flags=%d), making a copy", framebuffer->fb_address, currentRenderVfb_->fb_address, flags);
1265
// TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size.
1266
if (currentFramebufferCopy_ && (flags & BINDFBCOLOR_UNCACHED) == 0) {
1267
// We have a copy already that hasn't been invalidated, let's keep using it.
1268
draw_->BindFramebufferAsTexture(currentFramebufferCopy_, stage, Draw::FB_COLOR_BIT, layer);
1269
return true;
1270
}
1271
1272
Draw::Framebuffer *renderCopy = GetTempFBO(TempFBO::COPY, framebuffer->renderWidth, framebuffer->renderHeight);
1273
if (renderCopy) {
1274
VirtualFramebuffer copyInfo = *framebuffer;
1275
copyInfo.fbo = renderCopy;
1276
1277
bool partial = false;
1278
CopyFramebufferForColorTexture(&copyInfo, framebuffer, flags, layer, &partial);
1279
RebindFramebuffer("After BindFramebufferAsColorTexture");
1280
draw_->BindFramebufferAsTexture(renderCopy, stage, Draw::FB_COLOR_BIT, layer);
1281
1282
// Only cache the copy if it wasn't a partial copy.
1283
// TODO: Improve on this.
1284
if (!partial && (flags & BINDFBCOLOR_UNCACHED) == 0) {
1285
currentFramebufferCopy_ = renderCopy;
1286
}
1287
gpuStats.numCopiesForSelfTex++;
1288
} else {
1289
// Failed to get temp FBO? Weird.
1290
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, layer);
1291
}
1292
return true;
1293
} else if (framebuffer != currentRenderVfb_ || (flags & BINDFBCOLOR_FORCE_SELF) != 0) {
1294
draw_->BindFramebufferAsTexture(framebuffer->fbo, stage, Draw::FB_COLOR_BIT, layer);
1295
return true;
1296
} else {
1297
// Here it's an error because for some reason skipCopy is true. That shouldn't really happen.
1298
ERROR_LOG_REPORT_ONCE(selfTextureFail, Log::G3D, "Attempting to texture from target (src=%08x / target=%08x / flags=%d)", framebuffer->fb_address, currentRenderVfb_->fb_address, flags);
1299
// To do this safely in Vulkan, we need to use input attachments.
1300
// Actually if the texture region and render regions don't overlap, this is safe, but we need
1301
// to transition to GENERAL image layout which will take some trickery.
1302
// Badness on D3D11 to bind the currently rendered-to framebuffer as a texture.
1303
draw_->BindTexture(stage, nullptr);
1304
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
1305
return false;
1306
}
1307
}
1308
1309
void FramebufferManagerCommon::CopyFramebufferForColorTexture(VirtualFramebuffer *dst, VirtualFramebuffer *src, int flags, int layer, bool *partial) {
1310
int x = 0;
1311
int y = 0;
1312
int w = src->drawnWidth;
1313
int h = src->drawnHeight;
1314
1315
*partial = false;
1316
1317
// If max is not > min, we probably could not detect it. Skip.
1318
// See the vertex decoder, where this is updated.
1319
// TODO: We're currently not hitting this path in Dante. See #17032
1320
if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) == BINDFBCOLOR_MAY_COPY_WITH_UV && gstate_c.vertBounds.maxU > gstate_c.vertBounds.minU) {
1321
x = std::max(gstate_c.vertBounds.minU, (u16)0);
1322
y = std::max(gstate_c.vertBounds.minV, (u16)0);
1323
w = std::min(gstate_c.vertBounds.maxU, src->drawnWidth) - x;
1324
h = std::min(gstate_c.vertBounds.maxV, src->drawnHeight) - y;
1325
1326
// If we bound a framebuffer, apply the byte offset as pixels to the copy too.
1327
if (flags & BINDFBCOLOR_APPLY_TEX_OFFSET) {
1328
x += gstate_c.curTextureXOffset;
1329
y += gstate_c.curTextureYOffset;
1330
}
1331
1332
// We'll have to reapply these next time since we cropped to UV.
1333
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
1334
}
1335
1336
if (x < src->drawnWidth && y < src->drawnHeight && w > 0 && h > 0) {
1337
if (x != 0 || y != 0 || w < src->drawnWidth || h < src->drawnHeight) {
1338
*partial = true;
1339
}
1340
BlitFramebuffer(dst, x, y, src, x, y, w, h, 0, RASTER_COLOR, "CopyFBForColorTexture");
1341
}
1342
}
1343
1344
Draw::Texture *FramebufferManagerCommon::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
1345
Draw::DataFormat depthFormat = Draw::DataFormat::UNDEFINED;
1346
1347
int bpp = BufferFormatBytesPerPixel(srcPixelFormat);
1348
int srcStrideInBytes = srcStride * bpp;
1349
int widthInBytes = width * bpp;
1350
1351
// Compute hash of contents.
1352
uint64_t imageHash;
1353
if (widthInBytes == srcStrideInBytes) {
1354
imageHash = XXH3_64bits(srcPixels, widthInBytes * height);
1355
} else {
1356
XXH3_state_t *hashState = XXH3_createState();
1357
XXH3_64bits_reset(hashState);
1358
for (int y = 0; y < height; y++) {
1359
XXH3_64bits_update(hashState, srcPixels + srcStrideInBytes * y, widthInBytes);
1360
}
1361
imageHash = XXH3_64bits_digest(hashState);
1362
XXH3_freeState(hashState);
1363
}
1364
1365
Draw::DataFormat texFormat = preferredPixelsFormat_;
1366
1367
if (srcPixelFormat == GE_FORMAT_DEPTH16) {
1368
if ((draw_->GetDataFormatSupport(Draw::DataFormat::R16_UNORM) & Draw::FMT_TEXTURE) != 0) {
1369
texFormat = Draw::DataFormat::R16_UNORM;
1370
} else if ((draw_->GetDataFormatSupport(Draw::DataFormat::R8_UNORM) & Draw::FMT_TEXTURE) != 0) {
1371
// This could be improved by using specific draw shaders to pack full precision in two channels.
1372
// However, not really worth the trouble until we find a game that requires it.
1373
texFormat = Draw::DataFormat::R8_UNORM;
1374
} else {
1375
// No usable single channel format. Can't be bothered.
1376
return nullptr;
1377
}
1378
} else if (srcPixelFormat == GE_FORMAT_565) {
1379
// Check for supported matching formats.
1380
// This mainly benefits the redundant copies in God of War on low-end platforms.
1381
if ((draw_->GetDataFormatSupport(Draw::DataFormat::B5G6R5_UNORM_PACK16) & Draw::FMT_TEXTURE) != 0) {
1382
texFormat = Draw::DataFormat::B5G6R5_UNORM_PACK16;
1383
} else if ((draw_->GetDataFormatSupport(Draw::DataFormat::R5G6B5_UNORM_PACK16) & Draw::FMT_TEXTURE) != 0) {
1384
texFormat = Draw::DataFormat::R5G6B5_UNORM_PACK16;
1385
}
1386
}
1387
1388
// TODO: We can just change the texture format and flip some bits around instead of this.
1389
// Could share code with the texture cache perhaps.
1390
auto generateTexture = [&](uint8_t *data, const uint8_t *initData, uint32_t w, uint32_t h, uint32_t d, uint32_t byteStride, uint32_t sliceByteStride) {
1391
for (int y = 0; y < height; y++) {
1392
const u16_le *src16 = (const u16_le *)srcPixels + srcStride * y;
1393
const u32_le *src32 = (const u32_le *)srcPixels + srcStride * y;
1394
u32 *dst = (u32 *)(data + byteStride * y);
1395
u16 *dst16 = (u16 *)(data + byteStride * y);
1396
u8 *dst8 = (u8 *)(data + byteStride * y);
1397
switch (srcPixelFormat) {
1398
case GE_FORMAT_565:
1399
if (texFormat == Draw::DataFormat::B5G6R5_UNORM_PACK16) {
1400
memcpy(dst16, src16, w * sizeof(uint16_t));
1401
} else if (texFormat == Draw::DataFormat::R5G6B5_UNORM_PACK16) {
1402
ConvertRGB565ToBGR565(dst16, src16, width); // Fast!
1403
} else if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM) {
1404
ConvertRGB565ToBGRA8888(dst, src16, width);
1405
} else {
1406
ConvertRGB565ToRGBA8888(dst, src16, width);
1407
}
1408
break;
1409
1410
case GE_FORMAT_5551:
1411
if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM)
1412
ConvertRGBA5551ToBGRA8888(dst, src16, width);
1413
else
1414
ConvertRGBA5551ToRGBA8888(dst, src16, width);
1415
break;
1416
1417
case GE_FORMAT_4444:
1418
if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM)
1419
ConvertRGBA4444ToBGRA8888(dst, src16, width);
1420
else
1421
ConvertRGBA4444ToRGBA8888(dst, src16, width);
1422
break;
1423
1424
case GE_FORMAT_8888:
1425
if (texFormat == Draw::DataFormat::B8G8R8A8_UNORM)
1426
ConvertRGBA8888ToBGRA8888(dst, src32, width);
1427
// This means use original pointer as-is. May avoid or optimize a copy.
1428
else if (srcStride == width)
1429
return false;
1430
else
1431
memcpy(dst, src32, width * 4);
1432
break;
1433
1434
case GE_FORMAT_DEPTH16:
1435
// TODO: Must take the depth range into account, unless it's already 0-1.
1436
// TODO: Depending on the color buffer format used with this depth buffer, we need
1437
// to do one of two different swizzle operations. However, for the only use of this so far,
1438
// the Burnout lens flare trickery, swizzle doesn't matter since it's just a 0, 7fff, 0, 7fff pattern
1439
// which comes out the same.
1440
if (texFormat == Draw::DataFormat::R16_UNORM) {
1441
// We just use this format straight.
1442
memcpy(dst16, src16, w * 2);
1443
} else if (texFormat == Draw::DataFormat::R8_UNORM) {
1444
// We fall back to R8_UNORM. Precision is enough for most cases of depth clearing and initialization we've seen,
1445
// but hardly ideal.
1446
for (int i = 0; i < width; i++) {
1447
dst8[i] = src16[i] >> 8;
1448
}
1449
}
1450
break;
1451
1452
case GE_FORMAT_INVALID:
1453
case GE_FORMAT_CLUT8:
1454
// Bad
1455
break;
1456
}
1457
}
1458
return true;
1459
};
1460
1461
int frameNumber = draw_->GetFrameCount();
1462
1463
// First look for an exact match (including contents hash) that we can re-use.
1464
for (auto &iter : drawPixelsCache_) {
1465
if (iter.contentsHash == imageHash && iter.tex->Width() == width && iter.tex->Height() == height && iter.tex->Format() == texFormat) {
1466
iter.frameNumber = frameNumber;
1467
gpuStats.numCachedUploads++;
1468
return iter.tex;
1469
}
1470
}
1471
1472
// Then, look for an alternative one that's not been used recently that we can overwrite.
1473
for (auto &iter : drawPixelsCache_) {
1474
if (iter.frameNumber >= frameNumber - 3 || iter.tex->Width() != width || iter.tex->Height() != height || iter.tex->Format() != texFormat) {
1475
continue;
1476
}
1477
1478
// OK, current one seems good, let's use it (and mark it used).
1479
gpuStats.numUploads++;
1480
draw_->UpdateTextureLevels(iter.tex, &srcPixels, generateTexture, 1);
1481
// NOTE: numFlips is no good - this is called every frame when paused sometimes!
1482
iter.frameNumber = frameNumber;
1483
// We need to update the hash for future matching.
1484
iter.contentsHash = imageHash;
1485
return iter.tex;
1486
}
1487
1488
// Note: For depth, we create an R16_UNORM texture, that'll be just fine for uploading depth through a shader,
1489
// and likely more efficient.
1490
Draw::TextureDesc desc{
1491
Draw::TextureType::LINEAR2D,
1492
texFormat,
1493
width,
1494
height,
1495
1,
1496
1,
1497
false,
1498
Draw::TextureSwizzle::DEFAULT,
1499
"DrawPixels",
1500
{ (uint8_t *)srcPixels },
1501
generateTexture,
1502
};
1503
1504
// Hot Shots Golf (#12355) does tons of these in a frame in some situations! So creating textures
1505
// better be fast. So does God of War, a lot of the time, a bit unclear what it's doing.
1506
Draw::Texture *tex = draw_->CreateTexture(desc);
1507
if (!tex) {
1508
ERROR_LOG(Log::G3D, "Failed to create DrawPixels texture");
1509
}
1510
// We don't need to count here, already counted by numUploads by the caller.
1511
1512
// INFO_LOG(Log::G3D, "Creating drawPixelsCache texture: %dx%d", tex->Width(), tex->Height());
1513
1514
DrawPixelsEntry entry{ tex, imageHash, frameNumber };
1515
drawPixelsCache_.push_back(entry);
1516
gpuStats.numUploads++;
1517
return tex;
1518
}
1519
1520
void FramebufferManagerCommon::DrawFramebufferToOutput(const u8 *srcPixels, int srcStride, GEBufferFormat srcPixelFormat) {
1521
textureCache_->ForgetLastTexture();
1522
shaderManager_->DirtyLastShader();
1523
1524
float u0 = 0.0f, u1 = 480.0f / 512.0f;
1525
float v0 = 0.0f, v1 = 1.0f;
1526
Draw::Texture *pixelsTex = MakePixelTexture(srcPixels, srcPixelFormat, srcStride, 512, 272);
1527
if (!pixelsTex)
1528
return;
1529
1530
int uvRotation = useBufferedRendering_ ? g_Config.iInternalScreenRotation : ROTATION_LOCKED_HORIZONTAL;
1531
OutputFlags flags = g_Config.iDisplayFilter == SCALE_LINEAR ? OutputFlags::LINEAR : OutputFlags::NEAREST;
1532
if (needBackBufferYSwap_) {
1533
flags |= OutputFlags::BACKBUFFER_FLIPPED;
1534
}
1535
// CopyToOutput reverses these, probably to match "up".
1536
if (GetGPUBackend() == GPUBackend::DIRECT3D9 || GetGPUBackend() == GPUBackend::DIRECT3D11) {
1537
flags |= OutputFlags::POSITION_FLIPPED;
1538
}
1539
1540
presentation_->UpdateUniforms(textureCache_->VideoIsPlaying());
1541
presentation_->SourceTexture(pixelsTex, 512, 272);
1542
presentation_->CopyToOutput(flags, uvRotation, u0, v0, u1, v1);
1543
1544
// PresentationCommon sets all kinds of state, we can't rely on anything.
1545
gstate_c.Dirty(DIRTY_ALL);
1546
1547
DiscardFramebufferCopy();
1548
currentRenderVfb_ = nullptr;
1549
}
1550
1551
void FramebufferManagerCommon::SetViewport2D(int x, int y, int w, int h) {
1552
Draw::Viewport viewport{ (float)x, (float)y, (float)w, (float)h, 0.0f, 1.0f };
1553
draw_->SetViewport(viewport);
1554
}
1555
1556
void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
1557
DownloadFramebufferOnSwitch(currentRenderVfb_);
1558
shaderManager_->DirtyLastShader();
1559
1560
if (displayFramebufPtr_ == 0) {
1561
if (GetUIState() != UISTATE_PAUSEMENU) {
1562
if (Core_IsStepping())
1563
VERBOSE_LOG(Log::FrameBuf, "Display disabled, displaying only black");
1564
else
1565
DEBUG_LOG(Log::FrameBuf, "Display disabled, displaying only black");
1566
}
1567
// No framebuffer to display! Clear to black.
1568
if (useBufferedRendering_) {
1569
draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "CopyDisplayToOutput");
1570
presentation_->NotifyPresent();
1571
}
1572
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
1573
return;
1574
}
1575
1576
u32 offsetX = 0;
1577
u32 offsetY = 0;
1578
1579
// If it's not really dirty, we're probably frameskipping. Use the last working one.
1580
u32 fbaddr = reallyDirty ? displayFramebufPtr_ : prevDisplayFramebufPtr_;
1581
prevDisplayFramebufPtr_ = fbaddr;
1582
1583
VirtualFramebuffer *vfb = ResolveVFB(fbaddr, displayStride_, displayFormat_);
1584
if (!vfb) {
1585
// Let's search for a framebuf within this range. Note that we also look for
1586
// "framebuffers" sitting in RAM (created from block transfer or similar) so we only take off the kernel
1587
// and uncached bits of the address when comparing.
1588
const u32 addr = fbaddr;
1589
for (auto v : vfbs_) {
1590
const u32 v_addr = v->fb_address;
1591
const u32 v_size = v->BufferByteSize(RASTER_COLOR);
1592
1593
if (v->fb_format != displayFormat_ || v->fb_stride != displayStride_) {
1594
// Displaying a buffer of the wrong format or stride is nonsense, ignore it.
1595
continue;
1596
}
1597
1598
if (addr >= v_addr && addr < v_addr + v_size) {
1599
const u32 dstBpp = BufferFormatBytesPerPixel(v->fb_format);
1600
const u32 v_offsetX = ((addr - v_addr) / dstBpp) % v->fb_stride;
1601
const u32 v_offsetY = ((addr - v_addr) / dstBpp) / v->fb_stride;
1602
// We have enough space there for the display, right?
1603
if (v_offsetX + 480 > (u32)v->fb_stride || v->bufferHeight < v_offsetY + 272) {
1604
continue;
1605
}
1606
// Check for the closest one.
1607
if (offsetY == 0 || offsetY > v_offsetY) {
1608
offsetX = v_offsetX;
1609
offsetY = v_offsetY;
1610
vfb = v;
1611
}
1612
}
1613
}
1614
1615
if (vfb) {
1616
// Okay, we found one above.
1617
// Log should be "Displaying from framebuf" but not worth changing the report.
1618
INFO_LOG_REPORT_ONCE(displayoffset, Log::FrameBuf, "Rendering from framebuf with offset %08x -> %08x+%dx%d", addr, vfb->fb_address, offsetX, offsetY);
1619
}
1620
}
1621
1622
// Reject too-tiny framebuffers to display (Godfather, see issue #16915).
1623
if (vfb && vfb->height < 64) {
1624
vfb = nullptr;
1625
}
1626
1627
if (!vfb) {
1628
if (Memory::IsValidAddress(fbaddr)) {
1629
// The game is displaying something directly from RAM. In GTA, it's decoded video.
1630
DrawFramebufferToOutput(Memory::GetPointerUnchecked(fbaddr), displayStride_, displayFormat_);
1631
return;
1632
} else {
1633
DEBUG_LOG(Log::FrameBuf, "Found no FBO to display! displayFBPtr = %08x", fbaddr);
1634
// No framebuffer to display! Clear to black.
1635
if (useBufferedRendering_) {
1636
// Bind and clear the backbuffer. This should be the first time during the frame that it's bound.
1637
draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "CopyDisplayToOutput_NoFBO");
1638
}
1639
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE);
1640
return;
1641
}
1642
}
1643
1644
vfb->usageFlags |= FB_USAGE_DISPLAYED_FRAMEBUFFER;
1645
vfb->last_frame_displayed = gpuStats.numFlips;
1646
vfb->dirtyAfterDisplay = false;
1647
vfb->reallyDirtyAfterDisplay = false;
1648
1649
if (prevDisplayFramebuf_ != displayFramebuf_) {
1650
prevPrevDisplayFramebuf_ = prevDisplayFramebuf_;
1651
}
1652
if (displayFramebuf_ != vfb) {
1653
prevDisplayFramebuf_ = displayFramebuf_;
1654
}
1655
displayFramebuf_ = vfb;
1656
1657
if (vfb->fbo) {
1658
if (GetUIState() != UISTATE_PAUSEMENU) {
1659
if (Core_IsStepping())
1660
VERBOSE_LOG(Log::FrameBuf, "Displaying FBO %08x", vfb->fb_address);
1661
else
1662
DEBUG_LOG(Log::FrameBuf, "Displaying FBO %08x", vfb->fb_address);
1663
}
1664
1665
float u0 = offsetX / (float)vfb->bufferWidth;
1666
float v0 = offsetY / (float)vfb->bufferHeight;
1667
float u1 = (480.0f + offsetX) / (float)vfb->bufferWidth;
1668
float v1 = (272.0f + offsetY) / (float)vfb->bufferHeight;
1669
1670
//clip the VR framebuffer to keep the aspect ratio
1671
if (IsVREnabled() && !IsFlatVRGame() && !IsGameVRScene()) {
1672
float aspect = 272.0f / 480.0f * (IsImmersiveVRMode() ? 2.0f : 1.0f);
1673
float clipY = 272.0f * (1.0f - aspect) / 2.0f;
1674
v0 = (clipY + offsetY) / (float)vfb->bufferHeight;
1675
v1 = (272.0f - clipY + offsetY) / (float)vfb->bufferHeight;
1676
1677
//zoom inside
1678
float zoom = IsImmersiveVRMode() ? 0.4f : 0.1f;
1679
u0 += zoom / aspect;
1680
u1 -= zoom / aspect;
1681
v0 += zoom;
1682
v1 -= zoom;
1683
}
1684
1685
textureCache_->ForgetLastTexture();
1686
1687
int uvRotation = useBufferedRendering_ ? g_Config.iInternalScreenRotation : ROTATION_LOCKED_HORIZONTAL;
1688
OutputFlags flags = g_Config.iDisplayFilter == SCALE_LINEAR ? OutputFlags::LINEAR : OutputFlags::NEAREST;
1689
if (needBackBufferYSwap_) {
1690
flags |= OutputFlags::BACKBUFFER_FLIPPED;
1691
}
1692
// DrawActiveTexture reverses these, probably to match "up".
1693
if (GetGPUBackend() == GPUBackend::DIRECT3D9 || GetGPUBackend() == GPUBackend::DIRECT3D11) {
1694
flags |= OutputFlags::POSITION_FLIPPED;
1695
}
1696
1697
int actualWidth = (vfb->bufferWidth * vfb->renderWidth) / vfb->width;
1698
int actualHeight = (vfb->bufferHeight * vfb->renderHeight) / vfb->height;
1699
presentation_->UpdateUniforms(textureCache_->VideoIsPlaying());
1700
presentation_->SourceFramebuffer(vfb->fbo, actualWidth, actualHeight);
1701
presentation_->CopyToOutput(flags, uvRotation, u0, v0, u1, v1);
1702
} else if (useBufferedRendering_) {
1703
WARN_LOG(Log::FrameBuf, "Current VFB lacks an FBO: %08x", vfb->fb_address);
1704
}
1705
1706
// This may get called mid-draw if the game uses an immediate flip.
1707
// PresentationCommon sets all kinds of state, we can't rely on anything.
1708
gstate_c.Dirty(DIRTY_ALL);
1709
DiscardFramebufferCopy();
1710
currentRenderVfb_ = nullptr;
1711
}
1712
1713
void FramebufferManagerCommon::DecimateFBOs() {
1714
DiscardFramebufferCopy();
1715
currentRenderVfb_ = nullptr;
1716
1717
for (auto iter : fbosToDelete_) {
1718
iter->Release();
1719
}
1720
fbosToDelete_.clear();
1721
1722
for (size_t i = 0; i < vfbs_.size(); ++i) {
1723
VirtualFramebuffer *vfb = vfbs_[i];
1724
int age = frameLastFramebufUsed_ - std::max(vfb->last_frame_render, vfb->last_frame_used);
1725
1726
if (ShouldDownloadFramebufferColor(vfb) && age == 0 && !vfb->memoryUpdated) {
1727
ReadFramebufferToMemory(vfb, 0, 0, vfb->width, vfb->height, RASTER_COLOR, Draw::ReadbackMode::BLOCK);
1728
vfb->usageFlags = (vfb->usageFlags | FB_USAGE_DOWNLOAD | FB_USAGE_FIRST_FRAME_SAVED) & ~FB_USAGE_DOWNLOAD_CLEAR;
1729
}
1730
1731
// Let's also "decimate" the usageFlags.
1732
UpdateFramebufUsage(vfb);
1733
1734
if (vfb != displayFramebuf_ && vfb != prevDisplayFramebuf_ && vfb != prevPrevDisplayFramebuf_) {
1735
if (age > FBO_OLD_AGE) {
1736
INFO_LOG(Log::FrameBuf, "Decimating FBO for %08x (%ix%i %s), age %i", vfb->fb_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format), age);
1737
DestroyFramebuf(vfb);
1738
vfbs_.erase(vfbs_.begin() + i--);
1739
}
1740
}
1741
}
1742
1743
for (auto it = tempFBOs_.begin(); it != tempFBOs_.end(); ) {
1744
int age = frameLastFramebufUsed_ - it->second.last_frame_used;
1745
if (age > FBO_OLD_AGE) {
1746
it->second.fbo->Release();
1747
it = tempFBOs_.erase(it);
1748
} else {
1749
++it;
1750
}
1751
}
1752
1753
// Do the same for ReadFramebuffersToMemory's VFBs
1754
for (size_t i = 0; i < bvfbs_.size(); ++i) {
1755
VirtualFramebuffer *vfb = bvfbs_[i];
1756
int age = frameLastFramebufUsed_ - vfb->last_frame_render;
1757
if (age > FBO_OLD_AGE) {
1758
INFO_LOG(Log::FrameBuf, "Decimating FBO for %08x (%dx%d %s), age %i", vfb->fb_address, vfb->width, vfb->height, GeBufferFormatToString(vfb->fb_format), age);
1759
DestroyFramebuf(vfb);
1760
bvfbs_.erase(bvfbs_.begin() + i--);
1761
}
1762
}
1763
1764
// And DrawPixels cached textures.
1765
1766
for (auto it = drawPixelsCache_.begin(); it != drawPixelsCache_.end(); ) {
1767
int age = draw_->GetFrameCount() - it->frameNumber;
1768
if (age > 10) {
1769
// INFO_LOG(Log::G3D, "Releasing drawPixelsCache texture: %dx%d", it->tex->Width(), it->tex->Height());
1770
it->tex->Release();
1771
it->tex = nullptr;
1772
it = drawPixelsCache_.erase(it);
1773
} else {
1774
++it;
1775
}
1776
}
1777
}
1778
1779
// Requires width/height to be set already.
1780
void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w, int h, bool force, bool skipCopy) {
1781
_dbg_assert_(w > 0);
1782
_dbg_assert_(h > 0);
1783
VirtualFramebuffer old = *vfb;
1784
1785
int oldWidth = vfb->bufferWidth;
1786
int oldHeight = vfb->bufferHeight;
1787
1788
if (force) {
1789
vfb->bufferWidth = w;
1790
vfb->bufferHeight = h;
1791
} else {
1792
if (vfb->bufferWidth >= w && vfb->bufferHeight >= h) {
1793
return;
1794
}
1795
1796
// In case it gets thin and wide, don't resize down either side.
1797
vfb->bufferWidth = std::max((int)vfb->bufferWidth, w);
1798
vfb->bufferHeight = std::max((int)vfb->bufferHeight, h);
1799
}
1800
1801
bool force1x = false;
1802
switch (bloomHack_) {
1803
case 1:
1804
force1x = vfb->bufferWidth <= 128 || vfb->bufferHeight <= 64;
1805
break;
1806
case 2:
1807
force1x = vfb->bufferWidth <= 256 || vfb->bufferHeight <= 128;
1808
break;
1809
case 3:
1810
force1x = vfb->bufferWidth < 480 || vfb->bufferWidth > 800 || vfb->bufferHeight < 272; // GOW uses 864x272
1811
break;
1812
}
1813
1814
if ((vfb->usageFlags & FB_USAGE_COLOR_MIXED_DEPTH) && !PSP_CoreParameter().compat.flags().ForceLowerResolutionForEffectsOn) {
1815
force1x = false;
1816
}
1817
if (PSP_CoreParameter().compat.flags().Force04154000Download && vfb->fb_address == 0x04154000) {
1818
force1x = true;
1819
}
1820
1821
if (force1x && g_Config.iInternalResolution != 1) {
1822
vfb->renderScaleFactor = 1;
1823
vfb->renderWidth = vfb->bufferWidth;
1824
vfb->renderHeight = vfb->bufferHeight;
1825
} else {
1826
vfb->renderScaleFactor = renderScaleFactor_;
1827
vfb->renderWidth = (u16)(vfb->bufferWidth * renderScaleFactor_);
1828
vfb->renderHeight = (u16)(vfb->bufferHeight * renderScaleFactor_);
1829
}
1830
1831
bool creating = old.bufferWidth == 0;
1832
if (creating) {
1833
WARN_LOG(Log::FrameBuf, "Creating %s FBO at %08x/%08x stride=%d %dx%d (force=%d)", GeBufferFormatToString(vfb->fb_format), vfb->fb_address, vfb->z_address, vfb->fb_stride, vfb->bufferWidth, vfb->bufferHeight, (int)force);
1834
} else {
1835
WARN_LOG(Log::FrameBuf, "Resizing %s FBO at %08x/%08x stride=%d from %dx%d to %dx%d (force=%d, skipCopy=%d)", GeBufferFormatToString(vfb->fb_format), vfb->fb_address, vfb->z_address, vfb->fb_stride, old.bufferWidth, old.bufferHeight, vfb->bufferWidth, vfb->bufferHeight, (int)force, (int)skipCopy);
1836
}
1837
1838
// During hardware rendering, we always render at full color depth even if the game wouldn't on real hardware.
1839
// It's not worth the trouble trying to support lower bit-depth rendering, just
1840
// more cases to test that nobody will ever use.
1841
1842
textureCache_->ForgetLastTexture();
1843
1844
if (!useBufferedRendering_) {
1845
if (vfb->fbo) {
1846
vfb->fbo->Release();
1847
vfb->fbo = nullptr;
1848
}
1849
return;
1850
}
1851
if (!old.fbo && vfb->last_frame_failed != 0 && vfb->last_frame_failed - gpuStats.numFlips < 63) {
1852
// Don't constantly retry FBOs which failed to create.
1853
return;
1854
}
1855
1856
shaderManager_->DirtyLastShader();
1857
char tag[128];
1858
size_t len = FormatFramebufferName(vfb, tag, sizeof(tag));
1859
1860
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), msaaLevel_, true, tag });
1861
if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) {
1862
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, vfb->BufferByteSize(RASTER_COLOR), tag, len);
1863
}
1864
if (Memory::IsVRAMAddress(vfb->z_address) && vfb->z_stride != 0) {
1865
char buf[128];
1866
size_t len = snprintf(buf, sizeof(buf), "Z_%s", tag);
1867
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->z_address, vfb->z_stride * vfb->height * sizeof(uint16_t), buf, len);
1868
}
1869
if (old.fbo) {
1870
INFO_LOG(Log::FrameBuf, "Resizing FBO for %08x : %dx%dx%s", vfb->fb_address, w, h, GeBufferFormatToString(vfb->fb_format));
1871
if (vfb->fbo) {
1872
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
1873
if (!skipCopy) {
1874
BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min((u16)oldWidth, std::min(vfb->bufferWidth, vfb->width)), std::min((u16)oldHeight, std::min(vfb->height, vfb->bufferHeight)), 0, RASTER_COLOR, "BlitColor_ResizeFramebufFBO");
1875
}
1876
if (vfb->usageFlags & FB_USAGE_RENDER_DEPTH) {
1877
BlitFramebuffer(vfb, 0, 0, &old, 0, 0, std::min((u16)oldWidth, std::min(vfb->bufferWidth, vfb->width)), std::min((u16)oldHeight, std::min(vfb->height, vfb->bufferHeight)), 0, RASTER_DEPTH, "BlitDepth_ResizeFramebufFBO");
1878
}
1879
}
1880
fbosToDelete_.push_back(old.fbo);
1881
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "ResizeFramebufFBO");
1882
} else {
1883
draw_->BindFramebufferAsRenderTarget(vfb->fbo, { Draw::RPAction::CLEAR, Draw::RPAction::CLEAR, Draw::RPAction::CLEAR }, "ResizeFramebufFBO");
1884
}
1885
DiscardFramebufferCopy();
1886
currentRenderVfb_ = vfb;
1887
1888
if (!vfb->fbo) {
1889
ERROR_LOG(Log::FrameBuf, "Error creating FBO during resize! %dx%d", vfb->renderWidth, vfb->renderHeight);
1890
vfb->last_frame_failed = gpuStats.numFlips;
1891
}
1892
}
1893
1894
struct CopyCandidate {
1895
VirtualFramebuffer *vfb = nullptr;
1896
int y;
1897
int h;
1898
1899
std::string ToString(RasterChannel channel) const {
1900
return StringFromFormat("%08x %s %dx%d y=%d h=%d", vfb->Address(channel), GeBufferFormatToString(vfb->Format(channel)), vfb->width, vfb->height, y, h);
1901
}
1902
};
1903
1904
static const CopyCandidate *GetBestCopyCandidate(const TinySet<CopyCandidate, 4> &candidates, uint32_t basePtr, RasterChannel channel) {
1905
const CopyCandidate *best = nullptr;
1906
1907
// Pick the "best" candidate by comparing to the old best using heuristics.
1908
for (size_t i = 0; i < candidates.size(); i++) {
1909
const CopyCandidate *candidate = &candidates[i];
1910
1911
bool better = !best;
1912
if (!better) {
1913
// Heuristics determined from the old algorithm, that we might want to keep:
1914
// * Lower yOffsets are prioritized.
1915
// * Bindseq
1916
better = candidate->y < best->y;
1917
if (!better) {
1918
better = candidate->vfb->BindSeq(channel) > best->vfb->BindSeq(channel);
1919
}
1920
}
1921
1922
if (better) {
1923
best = candidate;
1924
}
1925
}
1926
return best;
1927
}
1928
1929
// This is called from detected memcopies and framebuffer initialization from VRAM. Not block transfers.
1930
// Also with specialized flags from some replacement functions. Only those will currently request depth copies!
1931
// NOTE: This is very tricky because there's no information about color depth here, so we'll have to make guesses
1932
// about what underlying framebuffer is the most likely to be the relevant ones. For src, we can probably prioritize recent
1933
// ones. For dst, less clear.
1934
bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size, GPUCopyFlag flags, u32 skipDrawReason) {
1935
if (size == 0) {
1936
return false;
1937
}
1938
1939
dst &= 0x3FFFFFFF;
1940
src &= 0x3FFFFFFF;
1941
1942
if (Memory::IsVRAMAddress(dst))
1943
dst &= 0x041FFFFF;
1944
if (Memory::IsVRAMAddress(src))
1945
src &= 0x041FFFFF;
1946
1947
// TODO: Merge the below into FindTransferFramebuffer.
1948
// Or at least this should be like the other ones, gathering possible candidates
1949
// with the ability to list them out for debugging.
1950
1951
bool ignoreDstBuffer = flags & GPUCopyFlag::FORCE_DST_MATCH_MEM;
1952
bool ignoreSrcBuffer = flags & (GPUCopyFlag::FORCE_SRC_MATCH_MEM | GPUCopyFlag::MEMSET);
1953
1954
// TODO: In the future we should probably check both channels. Currently depth is only on request.
1955
RasterChannel channel = (flags & GPUCopyFlag::DEPTH_REQUESTED) ? RASTER_DEPTH : RASTER_COLOR;
1956
1957
TinySet<CopyCandidate, 4> srcCandidates;
1958
TinySet<CopyCandidate, 4> dstCandidates;
1959
1960
// TODO: These two loops should be merged into one utility function, similar to what's done with rectangle copies.
1961
1962
// First find candidates for the source.
1963
// We only look at the color channel for now.
1964
for (auto vfb : vfbs_) {
1965
if (vfb->fb_stride == 0 || ignoreSrcBuffer) {
1966
continue;
1967
}
1968
1969
// We only remove the kernel and uncached bits when comparing.
1970
const u32 vfb_address = vfb->Address(channel);
1971
const u32 vfb_size = vfb->BufferByteSize(channel);
1972
const u32 vfb_byteStride = vfb->BufferByteStride(channel);
1973
const int vfb_byteWidth = vfb->BufferByteWidth(channel);
1974
1975
CopyCandidate srcCandidate;
1976
srcCandidate.vfb = vfb;
1977
1978
// Special path for depth for now.
1979
if (channel == RASTER_DEPTH) {
1980
if (src == vfb->z_address && size == vfb->z_stride * 2 * vfb->height) {
1981
srcCandidate.y = 0;
1982
srcCandidate.h = vfb->height;
1983
srcCandidates.push_back(srcCandidate);
1984
}
1985
continue;
1986
}
1987
1988
if (src >= vfb_address && (src + size <= vfb_address + vfb_size || src == vfb_address)) {
1989
// Heuristic originally from dest below, but just as valid looking for the source.
1990
// Fixes a misdetection in Brothers in Arms: D-Day, issue #18512.
1991
if (vfb_address == dst && ((size == 0x44000 && vfb_size == 0x88000) || (size == 0x88000 && vfb_size == 0x44000))) {
1992
// Not likely to be a correct color format copy for this buffer. Ignore it, there will either be RAM
1993
// that can be displayed from, or another matching buffer with the right format if rendering is going on.
1994
// If we had scoring here, we should strongly penalize this target instead of ignoring it.
1995
WARN_LOG_N_TIMES(notify_copy_2x, 5, Log::FrameBuf, "Framebuffer size %08x conspicuously not matching copy size %08x for source in NotifyFramebufferCopy. Ignoring.", size, vfb_size);
1996
continue;
1997
}
1998
1999
if ((u32)size > vfb_size + 0x1000 && vfb->fb_format != GE_FORMAT_8888 && vfb->last_frame_render < gpuStats.numFlips) {
2000
// Seems likely we are looking at a potential copy of 32-bit pixels (like video) to an old 16-bit buffer,
2001
// which is very likely simply the wrong target, so skip it. See issue #17740 where this happens in Naruto Ultimate Ninja Heroes 2.
2002
// Probably no point to give it a bad score and let it pass to sorting, as we're pretty sure here.
2003
WARN_LOG_N_TIMES(notify_copy_2x, 5, Log::FrameBuf, "Framebuffer size %08x too small for %08x bytes of data and also 16-bit (%s), and not rendered to this frame. Ignoring.", vfb_size, size, GeBufferFormatToString(vfb->fb_format));
2004
continue;
2005
}
2006
2007
const u32 offset = src - vfb_address;
2008
const u32 yOffset = offset / vfb_byteStride;
2009
if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0)) {
2010
srcCandidate.y = yOffset;
2011
srcCandidate.h = size == vfb_byteWidth ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
2012
} else if ((offset % vfb_byteStride) == 0 && size == vfb->fb_stride) {
2013
// Valkyrie Profile reads 512 bytes at a time, rather than 2048. So, let's whitelist fb_stride also.
2014
srcCandidate.y = yOffset;
2015
srcCandidate.h = 1;
2016
} else if (yOffset == 0 && (vfb->usageFlags & FB_USAGE_CLUT)) {
2017
// Okay, last try - it might be a clut.
2018
srcCandidate.y = yOffset;
2019
srcCandidate.h = 1;
2020
} else {
2021
continue;
2022
}
2023
srcCandidates.push_back(srcCandidate);
2024
}
2025
}
2026
2027
for (auto vfb : vfbs_) {
2028
if (vfb->fb_stride == 0 || ignoreDstBuffer) {
2029
continue;
2030
}
2031
2032
// We only remove the kernel and uncached bits when comparing.
2033
const u32 vfb_address = vfb->Address(channel);
2034
const u32 vfb_size = vfb->BufferByteSize(channel);
2035
const u32 vfb_byteStride = vfb->BufferByteStride(channel);
2036
const int vfb_byteWidth = vfb->BufferByteWidth(channel);
2037
2038
// Heuristic to try to prevent potential glitches with video playback.
2039
if (vfb_address == dst && ((size == 0x44000 && vfb_size == 0x88000) || (size == 0x88000 && vfb_size == 0x44000))) {
2040
// Not likely to be a correct color format copy for this buffer. Ignore it, there will either be RAM
2041
// that can be displayed from, or another matching buffer with the right format if rendering is going on.
2042
// If we had scoring here, we should strongly penalize this target instead of ignoring it.
2043
WARN_LOG_N_TIMES(notify_copy_2x, 5, Log::FrameBuf, "Framebuffer size %08x conspicuously not matching copy size %08x for dest in NotifyFramebufferCopy. Ignoring.", size, vfb_size);
2044
continue;
2045
}
2046
2047
CopyCandidate dstCandidate;
2048
dstCandidate.vfb = vfb;
2049
2050
// Special path for depth for now.
2051
if (channel == RASTER_DEPTH) {
2052
// Let's assume exact matches only for simplicity.
2053
if (dst == vfb->z_address && size == vfb->z_stride * 2 * vfb->height) {
2054
dstCandidate.y = 0;
2055
dstCandidate.h = vfb->height;
2056
dstCandidates.push_back(dstCandidate);
2057
}
2058
continue;
2059
}
2060
2061
if (!ignoreDstBuffer && dst >= vfb_address && (dst + size <= vfb_address + vfb_size || dst == vfb_address)) {
2062
const u32 offset = dst - vfb_address;
2063
const u32 yOffset = offset / vfb_byteStride;
2064
if ((offset % vfb_byteStride) == 0 && (size == vfb_byteWidth || (size % vfb_byteStride) == 0)) {
2065
dstCandidate.y = yOffset;
2066
dstCandidate.h = (size == vfb_byteWidth) ? 1 : std::min((u32)size / vfb_byteStride, (u32)vfb->height);
2067
dstCandidates.push_back(dstCandidate);
2068
}
2069
}
2070
}
2071
2072
// For now fill in these old variables from the candidates to reduce the initial diff.
2073
VirtualFramebuffer *dstBuffer = nullptr;
2074
VirtualFramebuffer *srcBuffer = nullptr;
2075
int srcY;
2076
int srcH;
2077
int dstY;
2078
int dstH;
2079
2080
const CopyCandidate *bestSrc = GetBestCopyCandidate(srcCandidates, src, channel);
2081
if (bestSrc) {
2082
srcBuffer = bestSrc->vfb;
2083
srcY = bestSrc->y;
2084
srcH = bestSrc->h;
2085
}
2086
const CopyCandidate *bestDst = GetBestCopyCandidate(dstCandidates, dst, channel);
2087
if (bestDst) {
2088
dstBuffer = bestDst->vfb;
2089
dstY = bestDst->y;
2090
dstH = bestDst->h;
2091
}
2092
2093
if (srcCandidates.size() > 1) {
2094
if (Reporting::ShouldLogNTimes("mulblock", 5)) {
2095
std::string log;
2096
for (size_t i = 0; i < srcCandidates.size(); i++) {
2097
log += " - " + srcCandidates[i].ToString(channel);
2098
if (bestSrc && srcCandidates[i].vfb == bestSrc->vfb) {
2099
log += " * \n";
2100
} else {
2101
log += "\n";
2102
}
2103
}
2104
WARN_LOG(Log::FrameBuf, "Copy: Multiple src vfb candidates for (src: %08x, size: %d):\n%s (%s)", src, size, log.c_str(), RasterChannelToString(channel));
2105
}
2106
}
2107
2108
if (dstCandidates.size() > 1) {
2109
if (Reporting::ShouldLogNTimes("mulblock", 5)) {
2110
std::string log;
2111
for (size_t i = 0; i < dstCandidates.size(); i++) {
2112
log += " - " + dstCandidates[i].ToString(channel);
2113
if (bestDst && dstCandidates[i].vfb == bestDst->vfb) {
2114
log += " * \n";
2115
} else {
2116
log += "\n";
2117
}
2118
}
2119
WARN_LOG(Log::FrameBuf, "Copy: Multiple dst vfb candidates for (dst: %08x, size: %d):\n%s (%s)", src, size, log.c_str(), RasterChannelToString(channel));
2120
}
2121
}
2122
2123
if (!useBufferedRendering_) {
2124
// If we're copying into a recently used display buf, it's probably destined for the screen.
2125
if (channel == RASTER_DEPTH || srcBuffer || (dstBuffer != displayFramebuf_ && dstBuffer != prevDisplayFramebuf_)) {
2126
return false;
2127
}
2128
}
2129
2130
if (!dstBuffer && srcBuffer && channel != RASTER_DEPTH) {
2131
// Note - if we're here, we're in a memcpy, not a block transfer. Not allowing IntraVRAMBlockTransferAllowCreateFB.
2132
// Technically, that makes BlockTransferAllowCreateFB a bit of a misnomer.
2133
bool allowCreateFB = (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB || GetSkipGPUReadbackMode() == SkipGPUReadbackMode::COPY_TO_TEXTURE);
2134
if (allowCreateFB && !(flags & GPUCopyFlag::DISALLOW_CREATE_VFB)) {
2135
dstBuffer = CreateRAMFramebuffer(dst, srcBuffer->width, srcBuffer->height, srcBuffer->fb_stride, srcBuffer->fb_format);
2136
dstY = 0;
2137
}
2138
}
2139
if (dstBuffer) {
2140
dstBuffer->last_frame_used = gpuStats.numFlips;
2141
if (channel == RASTER_DEPTH && !srcBuffer)
2142
dstBuffer->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
2143
}
2144
if (srcBuffer && channel == RASTER_DEPTH && !dstBuffer)
2145
srcBuffer->usageFlags |= FB_USAGE_COLOR_MIXED_DEPTH;
2146
2147
if (dstBuffer && srcBuffer) {
2148
if (srcBuffer == dstBuffer) {
2149
WARN_LOG_ONCE(dstsrccpy, Log::FrameBuf, "Intra-buffer memcpy (not supported) %08x -> %08x (size: %x)", src, dst, size);
2150
} else {
2151
WARN_LOG_ONCE(dstnotsrccpy, Log::FrameBuf, "Inter-buffer memcpy %08x -> %08x (size: %x)", src, dst, size);
2152
// Just do the blit!
2153
BlitFramebuffer(dstBuffer, 0, dstY, srcBuffer, 0, srcY, srcBuffer->width, srcH, 0, channel, "Blit_InterBufferMemcpy");
2154
SetColorUpdated(dstBuffer, skipDrawReason);
2155
RebindFramebuffer("RebindFramebuffer - Inter-buffer memcpy");
2156
}
2157
return false;
2158
} else if (dstBuffer) {
2159
if (flags & GPUCopyFlag::MEMSET) {
2160
gpuStats.numClears++;
2161
}
2162
WARN_LOG_N_TIMES(btucpy, 5, Log::FrameBuf, "Memcpy fbo upload %08x -> %08x (size: %x)", src, dst, size);
2163
FlushBeforeCopy();
2164
2165
// TODO: Hot Shots Golf makes a lot of these during the "meter", to copy back the image to the screen, it copies line by line.
2166
// We could collect these in a buffer and flush on the next draw, or something like that, to avoid that. The line copies cause
2167
// awkward visual artefacts.
2168
const u8 *srcBase = Memory::GetPointerUnchecked(src);
2169
GEBufferFormat srcFormat = channel == RASTER_DEPTH ? GE_FORMAT_DEPTH16 : dstBuffer->fb_format;
2170
int srcStride = channel == RASTER_DEPTH ? dstBuffer->z_stride : dstBuffer->fb_stride;
2171
DrawPixels(dstBuffer, 0, dstY, srcBase, srcFormat, srcStride, dstBuffer->width, dstH, channel, "MemcpyFboUpload_DrawPixels");
2172
SetColorUpdated(dstBuffer, skipDrawReason);
2173
RebindFramebuffer("RebindFramebuffer - Memcpy fbo upload");
2174
// This is a memcpy, let's still copy just in case.
2175
return false;
2176
} else if (srcBuffer) {
2177
WARN_LOG_N_TIMES(btdcpy, 5, Log::FrameBuf, "Memcpy fbo download %08x -> %08x", src, dst);
2178
FlushBeforeCopy();
2179
// TODO: In Hot Shots Golf, check if we can do a readback to a framebuffer here.
2180
// Again we have the problem though that it's doing a lot of small copies here, one for each line.
2181
if (srcH == 0 || srcY + srcH > srcBuffer->bufferHeight) {
2182
WARN_LOG_ONCE(btdcpyheight, Log::FrameBuf, "Memcpy fbo download %08x -> %08x skipped, %d+%d is taller than %d", src, dst, srcY, srcH, srcBuffer->bufferHeight);
2183
} else if (GetSkipGPUReadbackMode() == SkipGPUReadbackMode::NO_SKIP && (!srcBuffer->memoryUpdated || channel == RASTER_DEPTH)) {
2184
Draw::ReadbackMode readbackMode = Draw::ReadbackMode::BLOCK;
2185
if (PSP_CoreParameter().compat.flags().AllowDelayedReadbacks) {
2186
readbackMode = Draw::ReadbackMode::OLD_DATA_OK;
2187
}
2188
ReadFramebufferToMemory(srcBuffer, 0, srcY, srcBuffer->width, srcH, channel, readbackMode);
2189
srcBuffer->usageFlags = (srcBuffer->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
2190
}
2191
return false;
2192
} else {
2193
return false;
2194
}
2195
}
2196
2197
std::string BlockTransferRect::ToString() const {
2198
int bpp = BufferFormatBytesPerPixel(channel == RASTER_DEPTH ? GE_FORMAT_DEPTH16 : vfb->fb_format);
2199
return StringFromFormat("%s %08x/%d/%s seq:%d %d,%d %dx%d", RasterChannelToString(channel), vfb->fb_address, vfb->FbStrideInBytes(), GeBufferFormatToString(vfb->fb_format), vfb->colorBindSeq, x_bytes / bpp, y, w_bytes / bpp, h);
2200
}
2201
2202
// This is used when looking for framebuffers for a block transfer.
2203
// The only known game to block transfer depth buffers is Iron Man, see #16530, so
2204
// we have a compat flag and pretty limited functionality for that.
2205
bool FramebufferManagerCommon::FindTransferFramebuffer(u32 basePtr, int stride_pixels, int x_pixels, int y, int w_pixels, int h, int bpp, bool destination, BlockTransferRect *rect) {
2206
basePtr &= 0x3FFFFFFF;
2207
if (Memory::IsVRAMAddress(basePtr))
2208
basePtr &= 0x041FFFFF;
2209
rect->vfb = nullptr;
2210
2211
if (!stride_pixels) {
2212
WARN_LOG(Log::FrameBuf, "Zero stride in FindTransferFrameBuffer, ignoring");
2213
return false;
2214
}
2215
2216
const u32 byteStride = stride_pixels * bpp;
2217
int x_bytes = x_pixels * bpp;
2218
int w_bytes = w_pixels * bpp;
2219
2220
TinySet<BlockTransferRect, 4> candidates;
2221
2222
// We work entirely in bytes when we do the matching, because games don't consistently use bpps that match
2223
// that of their buffers. Then after matching we try to map the copy to the simplest operation that does
2224
// what we need.
2225
2226
// We are only looking at color for now, have not found any block transfers of depth data (although it's plausible).
2227
2228
for (auto vfb : vfbs_) {
2229
BlockTransferRect candidate{ vfb, RASTER_COLOR };
2230
2231
// Two cases so far of games depending on depth copies: Iron Man in issue #16530 (buffer->buffer)
2232
// and also #17878 where a game does ram->buffer to an auto-swizzling (|0x600000) address,
2233
// to initialize Z with a pre-rendered depth buffer.
2234
if (vfb->z_address == basePtr && vfb->BufferByteStride(RASTER_DEPTH) == byteStride && PSP_CoreParameter().compat.flags().BlockTransferDepth) {
2235
WARN_LOG_N_TIMES(z_xfer, 5, Log::FrameBuf, "FindTransferFramebuffer: found matching depth buffer, %08x (dest=%d, bpp=%d)", basePtr, (int)destination, bpp);
2236
candidate.channel = RASTER_DEPTH;
2237
candidate.x_bytes = x_pixels * bpp;
2238
candidate.w_bytes = w_pixels * bpp;
2239
candidate.y = y;
2240
candidate.h = h;
2241
candidates.push_back(candidate);
2242
continue;
2243
}
2244
2245
const u32 vfb_address = vfb->fb_address;
2246
const u32 vfb_size = vfb->BufferByteSize(RASTER_COLOR);
2247
2248
if (basePtr < vfb_address || basePtr >= vfb_address + vfb_size) {
2249
continue;
2250
}
2251
2252
const u32 vfb_bpp = BufferFormatBytesPerPixel(vfb->fb_format);
2253
const u32 vfb_byteStride = vfb->FbStrideInBytes();
2254
const u32 vfb_byteWidth = vfb->WidthInBytes();
2255
2256
candidate.w_bytes = w_pixels * bpp;
2257
candidate.h = h;
2258
2259
const u32 byteOffset = basePtr - vfb_address;
2260
const int memXOffset = byteOffset % byteStride;
2261
const int memYOffset = byteOffset / byteStride;
2262
2263
// Some games use mismatching bitdepths. But make sure the stride matches.
2264
// If it doesn't, generally this means we detected the framebuffer with too large a height.
2265
// Use bufferHeight in case of buffers that resize up and down often per frame (Valkyrie Profile.)
2266
2267
// If it's outside the vfb by a single pixel, we currently disregard it.
2268
if (memYOffset > vfb->bufferHeight - h) {
2269
continue;
2270
}
2271
2272
if (byteOffset == vfb->WidthInBytes() && w_bytes < vfb->FbStrideInBytes()) {
2273
// Looks like we're in a margin texture of the vfb, which is not the vfb itself.
2274
// Ignore the match.
2275
continue;
2276
}
2277
2278
if (vfb_byteStride != byteStride) {
2279
// Grand Knights History occasionally copies with a mismatching stride but a full line at a time.
2280
// That's why we multiply by height, not width - this copy is a rectangle with the wrong stride but a line with the correct one.
2281
// Makes it hard to detect the wrong transfers in e.g. God of War.
2282
if (w_pixels != stride_pixels || (byteStride * h != vfb_byteStride && byteStride * h != vfb_byteWidth)) {
2283
if (destination) {
2284
// However, some other games write cluts to framebuffers.
2285
// Let's catch this and upload. Otherwise reject the match.
2286
bool match = (vfb->usageFlags & FB_USAGE_CLUT) != 0;
2287
if (match) {
2288
candidate.w_bytes = byteStride * h;
2289
h = 1;
2290
} else {
2291
continue;
2292
}
2293
} else {
2294
continue;
2295
}
2296
} else {
2297
// This is the Grand Knights History case.
2298
candidate.w_bytes = byteStride * h;
2299
candidate.h = 1;
2300
}
2301
} else {
2302
candidate.w_bytes = w_bytes;
2303
candidate.h = h;
2304
}
2305
2306
candidate.x_bytes = x_bytes + memXOffset;
2307
candidate.y = y + memYOffset;
2308
candidate.vfb = vfb;
2309
candidates.push_back(candidate);
2310
}
2311
2312
const BlockTransferRect *best = nullptr;
2313
// Sort candidates by just recency for now, we might add other.
2314
for (size_t i = 0; i < candidates.size(); i++) {
2315
const BlockTransferRect *candidate = &candidates[i];
2316
2317
bool better = !best;
2318
if (!better) {
2319
if (candidate->channel == best->channel) {
2320
better = candidate->vfb->BindSeq(candidate->channel) > best->vfb->BindSeq(candidate->channel);
2321
} else {
2322
// Prefer depth over color if the address match is perfect.
2323
if (candidate->channel == RASTER_DEPTH && best->channel == RASTER_COLOR && candidate->vfb->z_address == basePtr) {
2324
better = true;
2325
}
2326
}
2327
}
2328
2329
if ((candidate->vfb->usageFlags & FB_USAGE_CLUT) && candidate->x_bytes == 0 && candidate->y == 0 && destination) {
2330
// Hack to prioritize copies to clut buffers.
2331
best = candidate;
2332
break;
2333
}
2334
if (better) {
2335
best = candidate;
2336
}
2337
}
2338
2339
if (candidates.size() > 1) {
2340
if (Reporting::ShouldLogNTimes("mulblock", 5)) {
2341
std::string log;
2342
for (size_t i = 0; i < candidates.size(); i++) {
2343
log += " - " + candidates[i].ToString() + "\n";
2344
}
2345
WARN_LOG(Log::FrameBuf, "Multiple framebuffer candidates for %08x/%d/%d %d,%d %dx%d (dest = %d):\n%s", basePtr, stride_pixels, bpp, x_pixels, y, w_pixels, h, (int)destination, log.c_str());
2346
}
2347
}
2348
2349
if (best) {
2350
*rect = *best;
2351
return true;
2352
} else {
2353
if (Memory::IsVRAMAddress(basePtr) && destination && h >= 128) {
2354
WARN_LOG_N_TIMES(nocands, 5, Log::FrameBuf, "Didn't find a destination candidate for %08x/%d/%d %d,%d %dx%d", basePtr, stride_pixels, bpp, x_pixels, y, w_pixels, h);
2355
}
2356
return false;
2357
}
2358
}
2359
2360
VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAddress, int width, int height, int stride, GEBufferFormat format) {
2361
INFO_LOG(Log::FrameBuf, "Creating RAM framebuffer at %08x (%dx%d, stride %d, fb_format %d)", fbAddress, width, height, stride, format);
2362
2363
RasterChannel channel = format == GE_FORMAT_DEPTH16 ? RASTER_DEPTH : RASTER_COLOR;
2364
2365
// A target for the destination is missing - so just create one!
2366
// Make sure this one would be found by the algorithm above so we wouldn't
2367
// create a new one each frame.
2368
VirtualFramebuffer *vfb = new VirtualFramebuffer{};
2369
vfb->fbo = nullptr;
2370
uint32_t mask = Memory::IsVRAMAddress(fbAddress) ? 0x041FFFFF : 0x3FFFFFFF;
2371
if (format == GE_FORMAT_DEPTH16) {
2372
vfb->fb_address = 0xFFFFFFFF; // Invalid address
2373
vfb->fb_stride = 0;
2374
vfb->z_address = fbAddress; // marks that if anyone tries to render with depth to this framebuffer, it should be dropped and recreated.
2375
vfb->z_stride = stride;
2376
vfb->width = width;
2377
} else {
2378
vfb->fb_address = fbAddress & mask; // NOTE - not necessarily in VRAM!
2379
vfb->fb_stride = stride;
2380
vfb->z_address = 0;
2381
vfb->z_stride = 0;
2382
vfb->width = std::max(width, stride);
2383
}
2384
vfb->height = height;
2385
vfb->newWidth = vfb->width;
2386
vfb->newHeight = vfb->height;
2387
vfb->lastFrameNewSize = gpuStats.numFlips;
2388
vfb->renderScaleFactor = renderScaleFactor_;
2389
vfb->renderWidth = (u16)(vfb->width * renderScaleFactor_);
2390
vfb->renderHeight = (u16)(vfb->height * renderScaleFactor_);
2391
vfb->bufferWidth = vfb->width;
2392
vfb->bufferHeight = vfb->height;
2393
vfb->fb_format = format == GE_FORMAT_DEPTH16 ? GE_FORMAT_8888 : format;
2394
vfb->usageFlags = format == GE_FORMAT_DEPTH16 ? FB_USAGE_RENDER_DEPTH : FB_USAGE_RENDER_COLOR;
2395
if (format != GE_FORMAT_DEPTH16) {
2396
SetColorUpdated(vfb, 0);
2397
}
2398
char name[64];
2399
snprintf(name, sizeof(name), "%08x_%s_RAM", vfb->Address(channel), RasterChannelToString(channel));
2400
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
2401
bool createDepthBuffer = format == GE_FORMAT_DEPTH16;
2402
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, createDepthBuffer, name });
2403
vfbs_.push_back(vfb);
2404
2405
u32 byteSize = vfb->BufferByteSize(channel);
2406
if (fbAddress + byteSize > framebufColorRangeEnd_) {
2407
framebufColorRangeEnd_ = fbAddress + byteSize;
2408
}
2409
2410
return vfb;
2411
}
2412
2413
// 1:1 pixel size buffers, we resize buffers to these before we read them back.
2414
// TODO: We shouldn't keep whole VirtualFramebuffer structs for these - the fbo and last_frame_render is enough.
2415
VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFramebuffer *vfb, RasterChannel channel) {
2416
// For now we'll keep these on the same struct as the ones that can get displayed
2417
// (and blatantly copy work already done above while at it).
2418
VirtualFramebuffer *nvfb = nullptr;
2419
2420
// We maintain a separate vector of framebuffer objects for blitting.
2421
for (VirtualFramebuffer *v : bvfbs_) {
2422
if (v->Address(channel) == vfb->Address(channel) && v->Format(channel) == vfb->Format(channel)) {
2423
if (v->bufferWidth == vfb->bufferWidth && v->bufferHeight == vfb->bufferHeight) {
2424
nvfb = v;
2425
if (channel == RASTER_COLOR) {
2426
v->fb_stride = vfb->fb_stride;
2427
} else {
2428
v->z_stride = vfb->z_stride;
2429
}
2430
v->width = vfb->width;
2431
v->height = vfb->height;
2432
break;
2433
}
2434
}
2435
}
2436
2437
// Create a new fbo if none was found for the size
2438
if (!nvfb) {
2439
nvfb = new VirtualFramebuffer{};
2440
nvfb->fbo = nullptr;
2441
nvfb->fb_address = channel == RASTER_COLOR ? vfb->fb_address : 0;
2442
nvfb->fb_stride = channel == RASTER_COLOR ? vfb->fb_stride : 0;
2443
nvfb->z_address = channel == RASTER_DEPTH ? vfb->z_address : 0;
2444
nvfb->z_stride = channel == RASTER_DEPTH ? vfb->z_stride : 0;
2445
nvfb->width = vfb->width;
2446
nvfb->height = vfb->height;
2447
nvfb->renderWidth = vfb->bufferWidth;
2448
nvfb->renderHeight = vfb->bufferHeight;
2449
nvfb->renderScaleFactor = 1; // For readbacks we resize to the original size, of course.
2450
nvfb->bufferWidth = vfb->bufferWidth;
2451
nvfb->bufferHeight = vfb->bufferHeight;
2452
nvfb->fb_format = vfb->fb_format;
2453
nvfb->drawnWidth = vfb->drawnWidth;
2454
nvfb->drawnHeight = vfb->drawnHeight;
2455
2456
char name[64];
2457
snprintf(name, sizeof(name), "download_temp_%08x_%s", vfb->Address(channel), RasterChannelToString(channel));
2458
2459
// We always create a color-only framebuffer here - readbacks of depth convert to color while translating the values.
2460
nvfb->fbo = draw_->CreateFramebuffer({ nvfb->bufferWidth, nvfb->bufferHeight, 1, 1, 0, false, name });
2461
if (!nvfb->fbo) {
2462
ERROR_LOG(Log::FrameBuf, "Error creating FBO! %d x %d", nvfb->renderWidth, nvfb->renderHeight);
2463
delete nvfb;
2464
return nullptr;
2465
}
2466
bvfbs_.push_back(nvfb);
2467
} else {
2468
UpdateDownloadTempBuffer(nvfb);
2469
}
2470
2471
nvfb->usageFlags |= FB_USAGE_RENDER_COLOR;
2472
nvfb->last_frame_render = gpuStats.numFlips;
2473
nvfb->dirtyAfterDisplay = true;
2474
2475
return nvfb;
2476
}
2477
2478
void FramebufferManagerCommon::ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor) {
2479
if (currentRenderVfb_) {
2480
if ((currentRenderVfb_->usageFlags & FB_USAGE_DOWNLOAD_CLEAR) != 0) {
2481
// Already zeroed in memory.
2482
return;
2483
}
2484
}
2485
2486
if (!Memory::IsValidAddress(gstate.getFrameBufAddress())) {
2487
return;
2488
}
2489
2490
u8 *addr = Memory::GetPointerWriteUnchecked(gstate.getFrameBufAddress());
2491
const int bpp = BufferFormatBytesPerPixel(gstate_c.framebufFormat);
2492
2493
u32 clearBits = clearColor;
2494
if (bpp == 2) {
2495
u16 clear16 = 0;
2496
switch (gstate_c.framebufFormat) {
2497
case GE_FORMAT_565: clear16 = RGBA8888toRGB565(clearColor); break;
2498
case GE_FORMAT_5551: clear16 = RGBA8888toRGBA5551(clearColor); break;
2499
case GE_FORMAT_4444: clear16 = RGBA8888toRGBA4444(clearColor); break;
2500
default: _dbg_assert_(0); break;
2501
}
2502
clearBits = clear16 | (clear16 << 16);
2503
}
2504
2505
const bool singleByteClear = (clearBits >> 16) == (clearBits & 0xFFFF) && (clearBits >> 24) == (clearBits & 0xFF);
2506
const int stride = gstate.FrameBufStride();
2507
const int width = x2 - x1;
2508
2509
const int byteStride = stride * bpp;
2510
const int byteWidth = width * bpp;
2511
for (int y = y1; y < y2; ++y) {
2512
NotifyMemInfo(MemBlockFlags::WRITE, gstate.getFrameBufAddress() + x1 * bpp + y * byteStride, byteWidth, "FramebufferClear");
2513
}
2514
2515
// Can use memset for simple cases. Often alpha is different and gums up the works.
2516
if (singleByteClear) {
2517
addr += x1 * bpp;
2518
for (int y = y1; y < y2; ++y) {
2519
memset(addr + y * byteStride, clearBits, byteWidth);
2520
}
2521
} else {
2522
// This will most often be true - rarely is the width not aligned.
2523
// TODO: We should really use non-temporal stores here to avoid the cache,
2524
// as it's unlikely that these bytes will be read.
2525
if ((width & 3) == 0 && (x1 & 3) == 0) {
2526
u64 val64 = clearBits | ((u64)clearBits << 32);
2527
int xstride = 8 / bpp;
2528
2529
u64 *addr64 = (u64 *)addr;
2530
const int stride64 = stride / xstride;
2531
const int x1_64 = x1 / xstride;
2532
const int x2_64 = x2 / xstride;
2533
for (int y = y1; y < y2; ++y) {
2534
for (int x = x1_64; x < x2_64; ++x) {
2535
addr64[y * stride64 + x] = val64;
2536
}
2537
}
2538
} else if (bpp == 4) {
2539
u32 *addr32 = (u32 *)addr;
2540
for (int y = y1; y < y2; ++y) {
2541
for (int x = x1; x < x2; ++x) {
2542
addr32[y * stride + x] = clearBits;
2543
}
2544
}
2545
} else if (bpp == 2) {
2546
u16 *addr16 = (u16 *)addr;
2547
for (int y = y1; y < y2; ++y) {
2548
for (int x = x1; x < x2; ++x) {
2549
addr16[y * stride + x] = (u16)clearBits;
2550
}
2551
}
2552
}
2553
}
2554
2555
if (currentRenderVfb_) {
2556
// The current content is in memory now, so update the flag.
2557
if (x1 == 0 && y1 == 0 && x2 >= currentRenderVfb_->width && y2 >= currentRenderVfb_->height) {
2558
currentRenderVfb_->usageFlags |= FB_USAGE_DOWNLOAD_CLEAR;
2559
currentRenderVfb_->memoryUpdated = true;
2560
}
2561
}
2562
}
2563
2564
bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp, u32 skipDrawReason) {
2565
if (!useBufferedRendering_) {
2566
return false;
2567
}
2568
2569
// Skip checking if there's no framebuffers in that area. Make a special exception for obvious transfers to depth buffer, see issue #17878
2570
bool dstDepthSwizzle = Memory::IsVRAMAddress(dstBasePtr) && ((dstBasePtr & 0x600000) == 0x600000);
2571
2572
if (!dstDepthSwizzle && !MayIntersectFramebufferColor(srcBasePtr) && !MayIntersectFramebufferColor(dstBasePtr)) {
2573
return false;
2574
}
2575
2576
BlockTransferRect dstRect{};
2577
BlockTransferRect srcRect{};
2578
2579
// These modify the X/Y/W/H parameters depending on the memory offset of the base pointers from the actual buffers.
2580
bool srcBuffer = FindTransferFramebuffer(srcBasePtr, srcStride, srcX, srcY, width, height, bpp, false, &srcRect);
2581
bool dstBuffer = FindTransferFramebuffer(dstBasePtr, dstStride, dstX, dstY, width, height, bpp, true, &dstRect);
2582
2583
if (srcRect.channel == RASTER_DEPTH) {
2584
// Ignore the found buffer if it's not 16-bit - we create a new more suitable one instead.
2585
if (dstRect.channel == RASTER_COLOR && dstRect.vfb->fb_format == GE_FORMAT_8888) {
2586
dstBuffer = false;
2587
}
2588
}
2589
2590
if (!srcBuffer && dstBuffer && dstRect.channel == RASTER_DEPTH) {
2591
dstBuffer = true;
2592
}
2593
2594
if (srcBuffer && !dstBuffer) {
2595
// In here, we can't read from dstRect.
2596
if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB ||
2597
GetSkipGPUReadbackMode() == SkipGPUReadbackMode::COPY_TO_TEXTURE ||
2598
(PSP_CoreParameter().compat.flags().IntraVRAMBlockTransferAllowCreateFB &&
2599
Memory::IsVRAMAddress(srcRect.vfb->fb_address) && Memory::IsVRAMAddress(dstBasePtr))) {
2600
GEBufferFormat ramFormat;
2601
// Try to guess the appropriate format. We only know the bpp from the block transfer command (16 or 32 bit).
2602
if (srcRect.channel == RASTER_COLOR) {
2603
if (bpp == 4) {
2604
// Only one possibility unless it's doing split pixel tricks (which we could detect through stride maybe).
2605
ramFormat = GE_FORMAT_8888;
2606
} else if (srcRect.vfb->fb_format != GE_FORMAT_8888) {
2607
// We guess that the game will interpret the data the same as it was in the source of the copy.
2608
// Seems like a likely good guess, and works in Test Drive Unlimited.
2609
ramFormat = srcRect.vfb->fb_format;
2610
} else {
2611
// No info left - just fall back to something. But this is definitely split pixel tricks.
2612
ramFormat = GE_FORMAT_5551;
2613
}
2614
dstRect.vfb = CreateRAMFramebuffer(dstBasePtr, width, height, dstStride, ramFormat);
2615
} else {
2616
dstRect.vfb = CreateRAMFramebuffer(dstBasePtr, width, height, dstStride, GE_FORMAT_DEPTH16);
2617
dstRect.x_bytes = 0;
2618
dstRect.w_bytes = 2 * width;
2619
dstRect.y = 0;
2620
dstRect.h = height;
2621
dstRect.channel = RASTER_DEPTH;
2622
}
2623
dstBuffer = true;
2624
}
2625
}
2626
2627
if (dstBuffer) {
2628
dstRect.vfb->last_frame_used = gpuStats.numFlips;
2629
// Mark the destination as fresh.
2630
if (dstRect.channel == RASTER_COLOR) {
2631
dstRect.vfb->colorBindSeq = GetBindSeqCount();
2632
} else {
2633
dstRect.vfb->depthBindSeq = GetBindSeqCount();
2634
}
2635
}
2636
2637
if (dstBuffer && srcBuffer) {
2638
if (srcRect.vfb && srcRect.vfb == dstRect.vfb && srcRect.channel == dstRect.channel) {
2639
// Transfer within the same buffer.
2640
// This is a simple case because there will be no format conversion or similar shenanigans needed.
2641
// However, the BPP might still mismatch, but in such a case we can convert the coordinates.
2642
if (srcX == dstX && srcY == dstY) {
2643
// Ignore, nothing to do. Tales of Phantasia X does this by accident.
2644
// Returning true to also skip the memory copy.
2645
return true;
2646
}
2647
2648
int buffer_bpp = BufferFormatBytesPerPixel(srcRect.vfb->Format(srcRect.channel));
2649
2650
if (bpp != buffer_bpp) {
2651
WARN_LOG_ONCE(intrabpp, Log::G3D, "Mismatched transfer bpp in intra-buffer block transfer. Was %d, expected %d.", bpp, buffer_bpp);
2652
// We just switch to using the buffer's bpp, since we've already converted the rectangle to byte offsets.
2653
bpp = buffer_bpp;
2654
}
2655
2656
WARN_LOG_N_TIMES(dstsrc, 5, Log::G3D, "Intra-buffer block transfer %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
2657
width, height, bpp,
2658
srcBasePtr, srcRect.x_bytes / bpp, srcRect.y, srcStride,
2659
dstBasePtr, dstRect.x_bytes / bpp, dstRect.y, dstStride);
2660
FlushBeforeCopy();
2661
// Some backends can handle blitting within a framebuffer. Others will just have to deal with it or ignore it, apparently.
2662
BlitFramebuffer(dstRect.vfb, dstX, dstY, srcRect.vfb, srcX, srcY, dstRect.w_bytes / bpp, dstRect.h, bpp, dstRect.channel, "Blit_IntraBufferBlockTransfer");
2663
RebindFramebuffer("rebind after intra block transfer");
2664
SetColorUpdated(dstRect.vfb, skipDrawReason);
2665
return true; // Skip the memory copy.
2666
}
2667
2668
// Straightforward blit between two same-format framebuffers.
2669
if (srcRect.vfb && srcRect.channel == dstRect.channel && srcRect.vfb->Format(srcRect.channel) == dstRect.vfb->Format(dstRect.channel)) {
2670
WARN_LOG_N_TIMES(dstnotsrc, 5, Log::G3D, "Inter-buffer %s block transfer %dx%d %dbpp from %08x (x:%d y:%d stride:%d %s) -> %08x (x:%d y:%d stride:%d %s)",
2671
RasterChannelToString(srcRect.channel),
2672
width, height, bpp,
2673
srcBasePtr, srcRect.x_bytes / bpp, srcRect.y, srcStride, GeBufferFormatToString(srcRect.vfb->fb_format),
2674
dstBasePtr, dstRect.x_bytes / bpp, dstRect.y, dstStride, GeBufferFormatToString(dstRect.vfb->fb_format));
2675
2676
// Straight blit will do, but check the bpp, we might need to convert coordinates differently.
2677
int buffer_bpp = BufferFormatBytesPerPixel(srcRect.vfb->Format(srcRect.channel));
2678
if (bpp != buffer_bpp) {
2679
WARN_LOG_ONCE(intrabpp, Log::G3D, "Mismatched transfer bpp in inter-buffer block transfer. Was %d, expected %d.", bpp, buffer_bpp);
2680
// We just switch to using the buffer's bpp, since we've already converted the rectangle to byte offsets.
2681
bpp = buffer_bpp;
2682
}
2683
FlushBeforeCopy();
2684
BlitFramebuffer(dstRect.vfb, dstRect.x_bytes / bpp, dstRect.y, srcRect.vfb, srcRect.x_bytes / bpp, srcRect.y, srcRect.w_bytes / bpp, height, bpp, srcRect.channel, "Blit_InterBufferBlockTransfer");
2685
RebindFramebuffer("RebindFramebuffer - Inter-buffer block transfer");
2686
SetColorUpdated(dstRect.vfb, skipDrawReason);
2687
return true;
2688
}
2689
2690
// Getting to the more complex cases. Have not actually seen much of these yet.
2691
WARN_LOG_N_TIMES(blockformat, 5, Log::G3D, "Mismatched buffer formats in block transfer: %s->%s (%dx%d)",
2692
GeBufferFormatToString(srcRect.vfb->Format(srcRect.channel)), GeBufferFormatToString(dstRect.vfb->Format(dstRect.channel)),
2693
width, height);
2694
2695
// TODO
2696
2697
// No need to actually do the memory copy behind, probably.
2698
return true;
2699
2700
} else if (dstBuffer) {
2701
// Handle depth uploads directly here, and let's not bother copying the data. This is compat-flag-gated for now,
2702
// may generalize it when I remove the compat flag.
2703
if (dstRect.channel == RASTER_DEPTH) {
2704
WARN_LOG_ONCE(btud, Log::G3D, "Block transfer upload %08x -> %08x (%dx%d %d,%d bpp=%d %s)", srcBasePtr, dstBasePtr, width, height, dstX, dstY, bpp, RasterChannelToString(dstRect.channel));
2705
FlushBeforeCopy();
2706
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
2707
DrawPixels(dstRect.vfb, dstX, dstY, srcBase, dstRect.vfb->Format(dstRect.channel), srcStride * bpp / 2, (int)(dstRect.w_bytes / 2), dstRect.h, dstRect.channel, "BlockTransferCopy_DrawPixelsDepth");
2708
RebindFramebuffer("RebindFramebuffer - UploadDepth");
2709
return true;
2710
}
2711
2712
// Here we should just draw the pixels into the buffer. Return false to copy the memory first.
2713
// NotifyBlockTransferAfter will take care of the rest.
2714
return false;
2715
} else if (srcBuffer) {
2716
if (width == 48 && height == 48 && srcY == 224 && srcX == 432 && PSP_CoreParameter().compat.flags().TacticsOgreEliminateDebugReadback) {
2717
return false;
2718
}
2719
2720
WARN_LOG_N_TIMES(btd, 10, Log::G3D, "Block transfer readback %dx%d %dbpp from %08x (x:%d y:%d stride:%d) -> %08x (x:%d y:%d stride:%d)",
2721
width, height, bpp,
2722
srcBasePtr, srcRect.x_bytes / bpp, srcRect.y, srcStride,
2723
dstBasePtr, dstRect.x_bytes / bpp, dstRect.y, dstStride);
2724
FlushBeforeCopy();
2725
if (GetSkipGPUReadbackMode() == SkipGPUReadbackMode::NO_SKIP && !srcRect.vfb->memoryUpdated) {
2726
const int srcBpp = BufferFormatBytesPerPixel(srcRect.vfb->fb_format);
2727
const float srcXFactor = (float)bpp / srcBpp;
2728
const bool tooTall = srcY + srcRect.h > srcRect.vfb->bufferHeight;
2729
if (srcRect.h <= 0 || (tooTall && srcY != 0)) {
2730
WARN_LOG_ONCE(btdheight, Log::G3D, "Block transfer download %08x -> %08x skipped, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcRect.y, srcRect.h, srcRect.vfb->bufferHeight);
2731
} else {
2732
if (tooTall) {
2733
WARN_LOG_ONCE(btdheight, Log::G3D, "Block transfer download %08x -> %08x dangerous, %d+%d is taller than %d", srcBasePtr, dstBasePtr, srcRect.y, srcRect.h, srcRect.vfb->bufferHeight);
2734
}
2735
Draw::ReadbackMode readbackMode = Draw::ReadbackMode::BLOCK;
2736
if (PSP_CoreParameter().compat.flags().AllowDelayedReadbacks) {
2737
readbackMode = Draw::ReadbackMode::OLD_DATA_OK;
2738
}
2739
ReadFramebufferToMemory(srcRect.vfb, static_cast<int>(srcX * srcXFactor), srcY, static_cast<int>(srcRect.w_bytes * srcXFactor), srcRect.h, RASTER_COLOR, readbackMode);
2740
srcRect.vfb->usageFlags = (srcRect.vfb->usageFlags | FB_USAGE_DOWNLOAD) & ~FB_USAGE_DOWNLOAD_CLEAR;
2741
}
2742
}
2743
return false; // Let the bit copy happen
2744
} else {
2745
return false;
2746
}
2747
}
2748
2749
SkipGPUReadbackMode FramebufferManagerCommon::GetSkipGPUReadbackMode() {
2750
if (PSP_CoreParameter().compat.flags().ForceEnableGPUReadback) {
2751
return SkipGPUReadbackMode::NO_SKIP;
2752
} else {
2753
return (SkipGPUReadbackMode)g_Config.iSkipGPUReadbackMode;
2754
}
2755
}
2756
2757
void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int width, int height, int bpp, u32 skipDrawReason) {
2758
// If it's a block transfer direct to the screen, and we're not using buffers, draw immediately.
2759
// We may still do a partial block draw below if this doesn't pass.
2760
if (!useBufferedRendering_ && dstStride >= 480 && width >= 480 && height == 272) {
2761
bool isPrevDisplayBuffer = PrevDisplayFramebufAddr() == dstBasePtr;
2762
bool isDisplayBuffer = CurrentDisplayFramebufAddr() == dstBasePtr;
2763
if (isPrevDisplayBuffer || isDisplayBuffer) {
2764
FlushBeforeCopy();
2765
DrawFramebufferToOutput(Memory::GetPointerUnchecked(dstBasePtr), dstStride, displayFormat_);
2766
return;
2767
}
2768
}
2769
2770
if (MayIntersectFramebufferColor(srcBasePtr) || MayIntersectFramebufferColor(dstBasePtr)) {
2771
// TODO: Figure out how we can avoid repeating the search here.
2772
2773
BlockTransferRect dstRect{};
2774
BlockTransferRect srcRect{};
2775
2776
// These modify the X/Y/W/H parameters depending on the memory offset of the base pointers from the actual buffers.
2777
bool srcBuffer = FindTransferFramebuffer(srcBasePtr, srcStride, srcX, srcY, width, height, bpp, false, &srcRect);
2778
bool dstBuffer = FindTransferFramebuffer(dstBasePtr, dstStride, dstX, dstY, width, height, bpp, true, &dstRect);
2779
2780
// A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to
2781
// the backbuffer. Detect this and have the framebuffermanager draw the pixels.
2782
if (!useBufferedRendering_ && currentRenderVfb_ != dstRect.vfb) {
2783
return;
2784
}
2785
2786
if (dstBuffer && !srcBuffer) {
2787
WARN_LOG_ONCE(btu, Log::G3D, "Block transfer upload %08x -> %08x (%dx%d %d,%d bpp=%d)", srcBasePtr, dstBasePtr, width, height, dstX, dstY, bpp);
2788
FlushBeforeCopy();
2789
const u8 *srcBase = Memory::GetPointerUnchecked(srcBasePtr) + (srcX + srcY * srcStride) * bpp;
2790
2791
int dstBpp = BufferFormatBytesPerPixel(dstRect.vfb->fb_format);
2792
float dstXFactor = (float)bpp / dstBpp;
2793
if (dstRect.w_bytes / bpp > dstRect.vfb->width || dstRect.h > dstRect.vfb->height) {
2794
// The buffer isn't big enough, and we have a clear hint of size. Resize.
2795
// This happens in Valkyrie Profile when uploading video at the ending.
2796
// Also happens to the CLUT framebuffer in the Burnout Dominator lens flare effect. See #16075
2797
ResizeFramebufFBO(dstRect.vfb, dstRect.w_bytes / bpp, dstRect.h, false, true);
2798
// Make sure we don't flop back and forth.
2799
dstRect.vfb->newWidth = std::max(dstRect.w_bytes / bpp, (int)dstRect.vfb->width);
2800
dstRect.vfb->newHeight = std::max(dstRect.h, (int)dstRect.vfb->height);
2801
dstRect.vfb->lastFrameNewSize = gpuStats.numFlips;
2802
// Resizing may change the viewport/etc.
2803
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE);
2804
}
2805
DrawPixels(dstRect.vfb, static_cast<int>(dstX * dstXFactor), dstY, srcBase, dstRect.vfb->fb_format, static_cast<int>(srcStride * dstXFactor), static_cast<int>(dstRect.w_bytes / bpp * dstXFactor), dstRect.h, RASTER_COLOR, "BlockTransferCopy_DrawPixels");
2806
SetColorUpdated(dstRect.vfb, skipDrawReason);
2807
RebindFramebuffer("RebindFramebuffer - NotifyBlockTransferAfter");
2808
}
2809
}
2810
}
2811
2812
void FramebufferManagerCommon::SetSafeSize(u16 w, u16 h) {
2813
VirtualFramebuffer *vfb = currentRenderVfb_;
2814
if (vfb) {
2815
vfb->safeWidth = std::min(vfb->bufferWidth, std::max(vfb->safeWidth, w));
2816
vfb->safeHeight = std::min(vfb->bufferHeight, std::max(vfb->safeHeight, h));
2817
}
2818
}
2819
2820
void FramebufferManagerCommon::NotifyDisplayResized() {
2821
pixelWidth_ = PSP_CoreParameter().pixelWidth;
2822
pixelHeight_ = PSP_CoreParameter().pixelHeight;
2823
presentation_->UpdateDisplaySize(pixelWidth_, pixelHeight_);
2824
2825
INFO_LOG(Log::G3D, "FramebufferManagerCommon::NotifyDisplayResized: %dx%d", pixelWidth_, pixelHeight_);
2826
2827
// No drawing is allowed here. This includes anything that might potentially touch a command buffer, like creating images!
2828
// So we need to defer the post processing initialization.
2829
updatePostShaders_ = true;
2830
}
2831
2832
void FramebufferManagerCommon::NotifyRenderResized(int msaaLevel) {
2833
gstate_c.skipDrawReason &= ~SKIPDRAW_NON_DISPLAYED_FB;
2834
2835
int w, h, scaleFactor;
2836
presentation_->CalculateRenderResolution(&w, &h, &scaleFactor, &postShaderIsUpscalingFilter_, &postShaderIsSupersampling_);
2837
PSP_CoreParameter().renderWidth = w;
2838
PSP_CoreParameter().renderHeight = h;
2839
PSP_CoreParameter().renderScaleFactor = scaleFactor;
2840
2841
if (UpdateRenderSize(msaaLevel)) {
2842
draw_->StopThreads();
2843
DestroyAllFBOs();
2844
draw_->StartThreads();
2845
}
2846
2847
// No drawing is allowed here. This includes anything that might potentially touch a command buffer, like creating images!
2848
// So we need to defer the post processing initialization.
2849
updatePostShaders_ = true;
2850
}
2851
2852
void FramebufferManagerCommon::NotifyConfigChanged() {
2853
updatePostShaders_ = true;
2854
}
2855
2856
void FramebufferManagerCommon::DestroyAllFBOs() {
2857
DiscardFramebufferCopy();
2858
currentRenderVfb_ = nullptr;
2859
displayFramebuf_ = nullptr;
2860
prevDisplayFramebuf_ = nullptr;
2861
prevPrevDisplayFramebuf_ = nullptr;
2862
2863
for (VirtualFramebuffer *vfb : vfbs_) {
2864
INFO_LOG(Log::FrameBuf, "Destroying FBO for %08x : %i x %i x %i", vfb->fb_address, vfb->width, vfb->height, vfb->fb_format);
2865
DestroyFramebuf(vfb);
2866
}
2867
vfbs_.clear();
2868
2869
for (VirtualFramebuffer *vfb : bvfbs_) {
2870
DestroyFramebuf(vfb);
2871
}
2872
bvfbs_.clear();
2873
2874
for (auto &tempFB : tempFBOs_) {
2875
tempFB.second.fbo->Release();
2876
}
2877
tempFBOs_.clear();
2878
2879
for (auto &iter : fbosToDelete_) {
2880
iter->Release();
2881
}
2882
fbosToDelete_.clear();
2883
2884
for (auto &iter : drawPixelsCache_) {
2885
iter.tex->Release();
2886
}
2887
drawPixelsCache_.clear();
2888
}
2889
2890
static const char *TempFBOReasonToString(TempFBO reason) {
2891
switch (reason) {
2892
case TempFBO::DEPAL: return "depal";
2893
case TempFBO::BLIT: return "blit";
2894
case TempFBO::COPY: return "copy";
2895
case TempFBO::STENCIL: return "stencil";
2896
default: break;
2897
}
2898
return "";
2899
}
2900
2901
Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u16 h) {
2902
u64 key = ((u64)reason << 48) | ((u32)w << 16) | h;
2903
auto it = tempFBOs_.find(key);
2904
if (it != tempFBOs_.end()) {
2905
it->second.last_frame_used = gpuStats.numFlips;
2906
return it->second.fbo;
2907
}
2908
2909
bool z_stencil = reason == TempFBO::STENCIL;
2910
char name[128];
2911
snprintf(name, sizeof(name), "tempfbo_%s_%dx%d", TempFBOReasonToString(reason), w / renderScaleFactor_, h / renderScaleFactor_);
2912
2913
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, GetFramebufferLayers(), 0, z_stencil, name });
2914
if (!fbo) {
2915
return nullptr;
2916
}
2917
2918
const TempFBOInfo info = { fbo, gpuStats.numFlips };
2919
tempFBOs_[key] = info;
2920
return fbo;
2921
}
2922
2923
void FramebufferManagerCommon::UpdateFramebufUsage(VirtualFramebuffer *vfb) const {
2924
auto checkFlag = [&](u16 flag, int last_frame) {
2925
if (vfb->usageFlags & flag) {
2926
const int age = frameLastFramebufUsed_ - last_frame;
2927
if (age > FBO_OLD_USAGE_FLAG) {
2928
vfb->usageFlags &= ~flag;
2929
}
2930
}
2931
};
2932
2933
checkFlag(FB_USAGE_DISPLAYED_FRAMEBUFFER, vfb->last_frame_displayed);
2934
checkFlag(FB_USAGE_TEXTURE, vfb->last_frame_used);
2935
checkFlag(FB_USAGE_RENDER_COLOR, vfb->last_frame_render);
2936
checkFlag(FB_USAGE_CLUT, vfb->last_frame_clut);
2937
}
2938
2939
void FramebufferManagerCommon::ClearAllDepthBuffers() {
2940
for (auto vfb : vfbs_) {
2941
vfb->usageFlags |= FB_USAGE_INVALIDATE_DEPTH;
2942
}
2943
}
2944
2945
// We might also want to implement an asynchronous callback-style version of this. Would probably
2946
// only be possible to implement optimally on Vulkan, but on GL and D3D11 we could do pixel buffers
2947
// and read on the next frame, then call the callback.
2948
//
2949
// The main use cases for this are:
2950
// * GE debugging(in practice async will not matter because it will stall anyway.)
2951
// * Video file recording(would probably be great if it was async.)
2952
// * Screenshots(benefit slightly from async.)
2953
// * Save state screenshots(could probably be async but need to manage the stall.)
2954
bool FramebufferManagerCommon::GetFramebuffer(u32 fb_address, int fb_stride, GEBufferFormat format, GPUDebugBuffer &buffer, int maxScaleFactor) {
2955
VirtualFramebuffer *vfb = currentRenderVfb_;
2956
if (!vfb || vfb->fb_address != fb_address) {
2957
vfb = ResolveVFB(fb_address, fb_stride, format);
2958
}
2959
2960
if (!vfb) {
2961
if (!Memory::IsValidAddress(fb_address))
2962
return false;
2963
// If there's no vfb and we're drawing there, must be memory?
2964
buffer = GPUDebugBuffer(Memory::GetPointerWriteUnchecked(fb_address), fb_stride, 512, format);
2965
return true;
2966
}
2967
2968
int w = vfb->renderWidth, h = vfb->renderHeight;
2969
2970
Draw::Framebuffer *bound = nullptr;
2971
2972
if (vfb->fbo) {
2973
if (maxScaleFactor > 0 && vfb->renderWidth > vfb->width * maxScaleFactor) {
2974
w = vfb->width * maxScaleFactor;
2975
h = vfb->height * maxScaleFactor;
2976
2977
Draw::Framebuffer *tempFBO = GetTempFBO(TempFBO::COPY, w, h);
2978
VirtualFramebuffer tempVfb = *vfb;
2979
tempVfb.fbo = tempFBO;
2980
tempVfb.bufferWidth = vfb->width;
2981
tempVfb.bufferHeight = vfb->height;
2982
tempVfb.renderWidth = w;
2983
tempVfb.renderHeight = h;
2984
tempVfb.renderScaleFactor = maxScaleFactor;
2985
BlitFramebuffer(&tempVfb, 0, 0, vfb, 0, 0, vfb->width, vfb->height, 0, RASTER_COLOR, "Blit_GetFramebuffer");
2986
2987
bound = tempFBO;
2988
} else {
2989
bound = vfb->fbo;
2990
}
2991
}
2992
2993
if (!useBufferedRendering_) {
2994
// Safety check.
2995
w = std::min(w, PSP_CoreParameter().pixelWidth);
2996
h = std::min(h, PSP_CoreParameter().pixelHeight);
2997
}
2998
2999
// TODO: Maybe should handle flipY inside CopyFramebufferToMemorySync somehow?
3000
bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
3001
buffer.Allocate(w, h, GE_FORMAT_8888, flipY);
3002
bool retval = draw_->CopyFramebufferToMemory(bound, Draw::FB_COLOR_BIT, 0, 0, w, h, Draw::DataFormat::R8G8B8A8_UNORM, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetFramebuffer");
3003
3004
// Don't need to increment gpu stats for readback count here, this is a debugger-only function.
3005
3006
// After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe.
3007
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
3008
// We may have blitted to a temp FBO.
3009
RebindFramebuffer("RebindFramebuffer - GetFramebuffer");
3010
return retval;
3011
}
3012
3013
bool FramebufferManagerCommon::GetDepthbuffer(u32 fb_address, int fb_stride, u32 z_address, int z_stride, GPUDebugBuffer &buffer) {
3014
VirtualFramebuffer *vfb = currentRenderVfb_;
3015
if (!vfb) {
3016
vfb = GetVFBAt(fb_address);
3017
}
3018
3019
if (!vfb) {
3020
if (!Memory::IsValidAddress(z_address))
3021
return false;
3022
// If there's no vfb and we're drawing there, must be memory?
3023
buffer = GPUDebugBuffer(Memory::GetPointerWriteUnchecked(z_address), z_stride, 512, GPU_DBG_FORMAT_16BIT);
3024
return true;
3025
}
3026
3027
int w = vfb->renderWidth;
3028
int h = vfb->renderHeight;
3029
if (!useBufferedRendering_) {
3030
// Safety check.
3031
w = std::min(w, PSP_CoreParameter().pixelWidth);
3032
h = std::min(h, PSP_CoreParameter().pixelHeight);
3033
}
3034
3035
bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
3036
3037
// Old code
3038
if (gstate_c.Use(GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT)) {
3039
buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT_DIV_256, flipY);
3040
} else {
3041
buffer.Allocate(w, h, GPU_DBG_FORMAT_FLOAT, flipY);
3042
}
3043
// No need to free on failure, that's the caller's job (it likely will reuse a buffer.)
3044
bool retval = draw_->CopyFramebufferToMemory(vfb->fbo, Draw::FB_DEPTH_BIT, 0, 0, w, h, Draw::DataFormat::D32F, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetDepthBuffer");
3045
if (!retval) {
3046
// Try ReadbackDepthbufferSync, in case GLES.
3047
buffer.Allocate(w, h, GPU_DBG_FORMAT_16BIT, flipY);
3048
retval = ReadbackDepthbuffer(vfb->fbo, 0, 0, w, h, (uint16_t *)buffer.GetData(), w, w, h, Draw::ReadbackMode::BLOCK);
3049
}
3050
3051
// After a readback we'll have flushed and started over, need to dirty a bunch of things to be safe.
3052
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
3053
// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
3054
RebindFramebuffer("RebindFramebuffer - GetDepthbuffer");
3055
return retval;
3056
}
3057
3058
bool FramebufferManagerCommon::GetStencilbuffer(u32 fb_address, int fb_stride, GPUDebugBuffer &buffer) {
3059
VirtualFramebuffer *vfb = currentRenderVfb_;
3060
if (!vfb) {
3061
vfb = GetVFBAt(fb_address);
3062
}
3063
3064
if (!vfb) {
3065
if (!Memory::IsValidAddress(fb_address))
3066
return false;
3067
// If there's no vfb and we're drawing there, must be memory?
3068
// TODO: Actually get the stencil.
3069
buffer = GPUDebugBuffer(Memory::GetPointerWrite(fb_address), fb_stride, 512, GPU_DBG_FORMAT_8888);
3070
return true;
3071
}
3072
3073
int w = vfb->renderWidth;
3074
int h = vfb->renderHeight;
3075
if (!useBufferedRendering_) {
3076
// Safety check.
3077
w = std::min(w, PSP_CoreParameter().pixelWidth);
3078
h = std::min(h, PSP_CoreParameter().pixelHeight);
3079
}
3080
3081
bool flipY = (GetGPUBackend() == GPUBackend::OPENGL && !useBufferedRendering_) ? true : false;
3082
// No need to free on failure, the caller/destructor will do that. Usually this is a reused buffer, anyway.
3083
buffer.Allocate(w, h, GPU_DBG_FORMAT_8BIT, flipY);
3084
bool retval = draw_->CopyFramebufferToMemory(vfb->fbo, Draw::FB_STENCIL_BIT, 0, 0, w,h, Draw::DataFormat::S8, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetStencilbuffer");
3085
if (!retval) {
3086
retval = ReadbackStencilbuffer(vfb->fbo, 0, 0, w, h, buffer.GetData(), w, Draw::ReadbackMode::BLOCK);
3087
}
3088
// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
3089
RebindFramebuffer("RebindFramebuffer - GetStencilbuffer");
3090
return retval;
3091
}
3092
3093
bool GetOutputFramebuffer(Draw::DrawContext *draw, GPUDebugBuffer &buffer) {
3094
int w, h;
3095
draw->GetFramebufferDimensions(nullptr, &w, &h);
3096
Draw::DataFormat fmt = draw->PreferredFramebufferReadbackFormat(nullptr);
3097
// Ignore preferred formats other than BGRA.
3098
if (fmt != Draw::DataFormat::B8G8R8A8_UNORM)
3099
fmt = Draw::DataFormat::R8G8B8A8_UNORM;
3100
3101
bool flipped = g_Config.iGPUBackend == (int)GPUBackend::OPENGL;
3102
3103
buffer.Allocate(w, h, fmt == Draw::DataFormat::R8G8B8A8_UNORM ? GPU_DBG_FORMAT_8888 : GPU_DBG_FORMAT_8888_BGRA, flipped);
3104
return draw->CopyFramebufferToMemory(nullptr, Draw::FB_COLOR_BIT, 0, 0, w, h, fmt, buffer.GetData(), w, Draw::ReadbackMode::BLOCK, "GetOutputFramebuffer");
3105
}
3106
3107
bool FramebufferManagerCommon::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
3108
bool retval = ::GetOutputFramebuffer(draw_, buffer);
3109
// That may have unbound the framebuffer, rebind to avoid crashes when debugging.
3110
RebindFramebuffer("RebindFramebuffer - GetOutputFramebuffer");
3111
return retval;
3112
}
3113
3114
// This reads a channel of a framebuffer into emulated PSP VRAM, taking care of scaling down as needed.
3115
//
3116
// Color conversion is currently done on CPU but should theoretically be done on GPU.
3117
// (Except using the GPU might cause problems because of various implementations'
3118
// dithering behavior and games that expect exact colors like Danganronpa, so we
3119
// can't entirely be rid of the CPU path.) -- unknown
3120
void FramebufferManagerCommon::ReadbackFramebuffer(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel, Draw::ReadbackMode mode) {
3121
if (w <= 0 || h <= 0) {
3122
ERROR_LOG(Log::FrameBuf, "Bad inputs to ReadbackFramebufferSync: %d %d %d %d", x, y, w, h);
3123
return;
3124
}
3125
3126
// Note that ReadbackDepthBufferSync can stretch on its own while converting data format, so we don't need to downscale in that case.
3127
if (vfb->renderScaleFactor == 1 || channel == RASTER_DEPTH) {
3128
// No need to stretch-blit
3129
} else {
3130
VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb, channel);
3131
if (nvfb) {
3132
BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, channel, "Blit_ReadFramebufferToMemory");
3133
vfb = nvfb;
3134
}
3135
}
3136
3137
const u32 fb_address = channel == RASTER_COLOR ? vfb->fb_address : vfb->z_address;
3138
3139
Draw::DataFormat destFormat = channel == RASTER_COLOR ? GEFormatToThin3D(vfb->fb_format) : GEFormatToThin3D(GE_FORMAT_DEPTH16);
3140
const int dstBpp = (int)DataFormatSizeInBytes(destFormat);
3141
3142
int stride = channel == RASTER_COLOR ? vfb->fb_stride : vfb->z_stride;
3143
3144
const int dstByteOffset = (y * stride + x) * dstBpp;
3145
// Leave the gap between the end of the last line and the full stride.
3146
// This is only used for the NotifyMemInfo range.
3147
const int dstSize = ((h - 1) * stride + w) * dstBpp;
3148
3149
if (!Memory::IsValidRange(fb_address + dstByteOffset, dstSize)) {
3150
ERROR_LOG_REPORT(Log::G3D, "ReadbackFramebufferSync would write outside of memory, ignoring");
3151
return;
3152
}
3153
3154
u8 *destPtr = Memory::GetPointerWriteUnchecked(fb_address + dstByteOffset);
3155
3156
// We always need to convert from the framebuffer native format.
3157
// Right now that's always 8888.
3158
DEBUG_LOG(Log::FrameBuf, "Reading framebuffer to mem, fb_address = %08x, ptr=%p", fb_address, destPtr);
3159
3160
if (channel == RASTER_DEPTH) {
3161
_assert_msg_(vfb && vfb->z_address != 0 && vfb->z_stride != 0, "Depth buffer invalid");
3162
ReadbackDepthbuffer(vfb->fbo,
3163
x * vfb->renderScaleFactor, y * vfb->renderScaleFactor,
3164
w * vfb->renderScaleFactor, h * vfb->renderScaleFactor, (uint16_t *)destPtr, stride, w, h, mode);
3165
} else {
3166
draw_->CopyFramebufferToMemory(vfb->fbo, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, destPtr, stride, mode, "ReadbackFramebufferSync");
3167
}
3168
3169
char tag[128];
3170
size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->fb_format));
3171
NotifyMemInfo(MemBlockFlags::WRITE, fb_address + dstByteOffset, dstSize, tag, len);
3172
3173
if (mode == Draw::ReadbackMode::BLOCK) {
3174
gpuStats.numBlockingReadbacks++;
3175
} else {
3176
gpuStats.numReadbacks++;
3177
}
3178
}
3179
3180
bool FramebufferManagerCommon::ReadbackStencilbuffer(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride, Draw::ReadbackMode mode) {
3181
return draw_->CopyFramebufferToMemory(fbo, Draw::FB_DEPTH_BIT, x, y, w, h, Draw::DataFormat::S8, pixels, pixelsStride, mode, "ReadbackStencilbufferSync");
3182
}
3183
3184
void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel, Draw::ReadbackMode mode) {
3185
// Clamp to bufferWidth. Sometimes block transfers can cause this to hit.
3186
if (x + w >= vfb->bufferWidth) {
3187
w = vfb->bufferWidth - x;
3188
}
3189
if (vfb && vfb->fbo) {
3190
if (gameUsesSequentialCopies_) {
3191
// Ignore the x/y/etc., read the entire thing. See below.
3192
x = 0;
3193
y = 0;
3194
w = vfb->width;
3195
h = vfb->height;
3196
vfb->memoryUpdated = true;
3197
vfb->usageFlags |= FB_USAGE_DOWNLOAD;
3198
} else if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
3199
// Mark it as fully downloaded until next render to it.
3200
if (channel == RASTER_COLOR)
3201
vfb->memoryUpdated = true;
3202
vfb->usageFlags |= FB_USAGE_DOWNLOAD;
3203
} else {
3204
// Let's try to set the flag eventually, if the game copies a lot.
3205
// Some games (like Grand Knights History) copy subranges very frequently.
3206
const static int FREQUENT_SEQUENTIAL_COPIES = 3;
3207
static int frameLastCopy = 0;
3208
static u32 bufferLastCopy = 0;
3209
static int copiesThisFrame = 0;
3210
if (frameLastCopy != gpuStats.numFlips || bufferLastCopy != vfb->fb_address) {
3211
frameLastCopy = gpuStats.numFlips;
3212
bufferLastCopy = vfb->fb_address;
3213
copiesThisFrame = 0;
3214
}
3215
if (++copiesThisFrame > FREQUENT_SEQUENTIAL_COPIES) {
3216
gameUsesSequentialCopies_ = true;
3217
}
3218
}
3219
3220
// This handles any required stretching internally.
3221
ReadbackFramebuffer(vfb, x, y, w, h, channel, mode);
3222
3223
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
3224
textureCache_->ForgetLastTexture();
3225
RebindFramebuffer("RebindFramebuffer - ReadFramebufferToMemory");
3226
}
3227
}
3228
3229
void FramebufferManagerCommon::FlushBeforeCopy() {
3230
// Flush anything not yet drawn before blitting, downloading, or uploading.
3231
// This might be a stalled list, or unflushed before a block transfer, etc.
3232
// Only bother if any draws are pending.
3233
if (drawEngine_->GetNumDrawCalls() > 0) {
3234
// TODO: It's really bad that we are calling SetRenderFramebuffer here with
3235
// all the irrelevant state checking it'll use to decide what to do. Should
3236
// do something more focused here.
3237
SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
3238
drawEngine_->DispatchFlush();
3239
}
3240
}
3241
3242
// TODO: Replace with with depal, reading the palette from the texture on the GPU directly.
3243
void FramebufferManagerCommon::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) {
3244
VirtualFramebuffer *vfb = GetVFBAt(fb_address);
3245
if (vfb && vfb->fb_stride != 0) {
3246
const u32 bpp = BufferFormatBytesPerPixel(vfb->fb_format);
3247
int x = 0;
3248
int y = 0;
3249
int pixels = loadBytes / bpp;
3250
// The height will be 1 for each stride or part thereof.
3251
int w = std::min(pixels % vfb->fb_stride, (int)vfb->width);
3252
int h = std::min((pixels + vfb->fb_stride - 1) / vfb->fb_stride, (int)vfb->height);
3253
3254
if (w == 0 || h > 1) {
3255
// Exactly aligned, or more than one row.
3256
w = std::min(vfb->fb_stride, vfb->width);
3257
}
3258
3259
// We might still have a pending draw to the fb in question, flush if so.
3260
FlushBeforeCopy();
3261
3262
// No need to download if we already have it.
3263
if (w > 0 && h > 0 && !vfb->memoryUpdated && vfb->clutUpdatedBytes < loadBytes) {
3264
// We intentionally don't try to optimize into a full download here - we don't want to over download.
3265
3266
// CLUT framebuffers are often incorrectly estimated in size.
3267
if (x == 0 && y == 0 && w == vfb->width && h == vfb->height) {
3268
vfb->memoryUpdated = true;
3269
}
3270
vfb->clutUpdatedBytes = loadBytes;
3271
3272
// This function now handles scaling down internally.
3273
ReadbackFramebuffer(vfb, x, y, w, h, RASTER_COLOR, Draw::ReadbackMode::BLOCK);
3274
3275
textureCache_->ForgetLastTexture();
3276
RebindFramebuffer("RebindFramebuffer - DownloadFramebufferForClut");
3277
}
3278
}
3279
}
3280
3281
void FramebufferManagerCommon::RebindFramebuffer(const char *tag) {
3282
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
3283
shaderManager_->DirtyLastShader();
3284
// Needed for D3D11 to run validation clean. I don't think it's actually an issue.
3285
// textureCache_->ForgetLastTexture();
3286
if (currentRenderVfb_ && currentRenderVfb_->fbo) {
3287
draw_->BindFramebufferAsRenderTarget(currentRenderVfb_->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag);
3288
} else {
3289
// This can happen (like it does in Parappa) when a frame starts with copies instead of rendering.
3290
// Let's do nothing and assume it'll take care of itself.
3291
}
3292
}
3293
3294
std::vector<const VirtualFramebuffer *> FramebufferManagerCommon::GetFramebufferList() const {
3295
std::vector<const VirtualFramebuffer *> list;
3296
for (auto vfb : vfbs_) {
3297
list.push_back(vfb);
3298
}
3299
return list;
3300
}
3301
3302
template <typename T>
3303
static void DoRelease(T *&obj) {
3304
if (obj)
3305
obj->Release();
3306
obj = nullptr;
3307
}
3308
3309
void FramebufferManagerCommon::ReleasePipelines() {
3310
for (int i = 0; i < ARRAY_SIZE(reinterpretFromTo_); i++) {
3311
for (int j = 0; j < ARRAY_SIZE(reinterpretFromTo_); j++) {
3312
DoRelease(reinterpretFromTo_[i][j]);
3313
}
3314
}
3315
DoRelease(stencilWriteSampler_);
3316
DoRelease(stencilWritePipeline_);
3317
DoRelease(stencilReadbackSampler_);
3318
DoRelease(stencilReadbackPipeline_);
3319
DoRelease(depthReadbackSampler_);
3320
DoRelease(depthReadbackPipeline_);
3321
DoRelease(draw2DPipelineCopyColor_);
3322
DoRelease(draw2DPipelineColorRect2Lin_);
3323
DoRelease(draw2DPipelineCopyDepth_);
3324
DoRelease(draw2DPipelineEncodeDepth_);
3325
DoRelease(draw2DPipeline565ToDepth_);
3326
DoRelease(draw2DPipeline565ToDepthDeswizzle_);
3327
}
3328
3329
void FramebufferManagerCommon::DeviceLost() {
3330
DestroyAllFBOs();
3331
3332
presentation_->DeviceLost();
3333
draw2D_.DeviceLost();
3334
3335
ReleasePipelines();
3336
3337
draw_ = nullptr;
3338
}
3339
3340
void FramebufferManagerCommon::DeviceRestore(Draw::DrawContext *draw) {
3341
draw_ = draw;
3342
draw2D_.DeviceRestore(draw_);
3343
presentation_->DeviceRestore(draw_);
3344
}
3345
3346
void FramebufferManagerCommon::DrawActiveTexture(float x, float y, float w, float h, float destW, float destH, float u0, float v0, float u1, float v1, int uvRotation, int flags) {
3347
// Will be drawn as a strip.
3348
Draw2DVertex coord[4] = {
3349
{x, y, u0, v0},
3350
{x + w, y, u1, v0},
3351
{x + w, y + h, u1, v1},
3352
{x, y + h, u0, v1},
3353
};
3354
3355
if (uvRotation != ROTATION_LOCKED_HORIZONTAL) {
3356
float temp[8];
3357
int rotation = 0;
3358
switch (uvRotation) {
3359
case ROTATION_LOCKED_HORIZONTAL180: rotation = 2; break;
3360
case ROTATION_LOCKED_VERTICAL: rotation = 1; break;
3361
case ROTATION_LOCKED_VERTICAL180: rotation = 3; break;
3362
}
3363
for (int i = 0; i < 4; i++) {
3364
temp[i * 2] = coord[((i + rotation) & 3)].u;
3365
temp[i * 2 + 1] = coord[((i + rotation) & 3)].v;
3366
}
3367
3368
for (int i = 0; i < 4; i++) {
3369
coord[i].u = temp[i * 2];
3370
coord[i].v = temp[i * 2 + 1];
3371
}
3372
}
3373
3374
const float invDestW = 2.0f / destW;
3375
const float invDestH = 2.0f / destH;
3376
for (int i = 0; i < 4; i++) {
3377
coord[i].x = coord[i].x * invDestW - 1.0f;
3378
coord[i].y = coord[i].y * invDestH - 1.0f;
3379
}
3380
3381
if ((flags & DRAWTEX_TO_BACKBUFFER) && g_display.rotation != DisplayRotation::ROTATE_0) {
3382
for (int i = 0; i < 4; i++) {
3383
// backwards notation, should fix that...
3384
Lin::Vec3 pos = Lin::Vec3(coord[i].x, coord[i].y, 0.0);
3385
pos = pos * g_display.rot_matrix;
3386
coord[i].x = pos.x;
3387
coord[i].y = pos.y;
3388
}
3389
}
3390
3391
// Rearrange to strip form.
3392
std::swap(coord[2], coord[3]);
3393
3394
draw2D_.DrawStrip2D(nullptr, coord, 4, (flags & DRAWTEX_LINEAR) != 0, Get2DPipeline((flags & DRAWTEX_DEPTH) ? DRAW2D_ENCODE_R16_TO_DEPTH : DRAW2D_COPY_COLOR));
3395
3396
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
3397
}
3398
3399
void FramebufferManagerCommon::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int dstY, VirtualFramebuffer *src, int srcX, int srcY, int w, int h, int bpp, RasterChannel channel, const char *tag) {
3400
if (!dst->fbo || !src->fbo || !useBufferedRendering_) {
3401
// This can happen if they recently switched from non-buffered.
3402
if (useBufferedRendering_) {
3403
// Just bind the back buffer for rendering, forget about doing anything else as we're in a weird state.
3404
draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, "BlitFramebuffer");
3405
}
3406
return;
3407
}
3408
3409
if (channel == RASTER_DEPTH && !draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported) {
3410
// Can't do anything :(
3411
return;
3412
}
3413
3414
// Perform a little bit of clipping first.
3415
// Block transfer coords are unsigned so I don't think we need to clip on the left side.. Although there are
3416
// other uses for BlitFramebuffer.
3417
if (dstX + w > dst->bufferWidth) {
3418
w -= dstX + w - dst->bufferWidth;
3419
}
3420
if (dstY + h > dst->bufferHeight) {
3421
h -= dstY + h - dst->bufferHeight;
3422
}
3423
if (srcX + w > src->bufferWidth) {
3424
w -= srcX + w - src->bufferWidth;
3425
}
3426
if (srcY + h > src->bufferHeight) {
3427
h -= srcY + h - src->bufferHeight;
3428
}
3429
3430
if (w <= 0 || h <= 0) {
3431
// The whole rectangle got clipped.
3432
return;
3433
}
3434
3435
bool useBlit = channel == RASTER_COLOR ? draw_->GetDeviceCaps().framebufferBlitSupported : false;
3436
bool useCopy = channel == RASTER_COLOR ? draw_->GetDeviceCaps().framebufferCopySupported : false;
3437
if (dst == currentRenderVfb_ || dst->fbo->MultiSampleLevel() != 0 || src->fbo->MultiSampleLevel() != 0) {
3438
// If already bound, using either a blit or a copy is unlikely to be an optimization.
3439
// So we're gonna use a raster draw instead. Also multisampling has problems with copies currently.
3440
useBlit = false;
3441
useCopy = false;
3442
}
3443
3444
float srcXFactor = src->renderScaleFactor;
3445
float srcYFactor = src->renderScaleFactor;
3446
const int srcBpp = BufferFormatBytesPerPixel(src->Format(channel));
3447
if (srcBpp != bpp && bpp != 0) {
3448
// If we do this, we're kinda in nonsense territory since the actual formats won't match (unless intentionally blitting black or white).
3449
srcXFactor = (srcXFactor * bpp) / srcBpp;
3450
}
3451
int srcX1 = srcX * srcXFactor;
3452
int srcX2 = (srcX + w) * srcXFactor;
3453
int srcY1 = srcY * srcYFactor;
3454
int srcY2 = (srcY + h) * srcYFactor;
3455
3456
float dstXFactor = dst->renderScaleFactor;
3457
float dstYFactor = dst->renderScaleFactor;
3458
const int dstBpp = BufferFormatBytesPerPixel(dst->Format(channel));
3459
if (dstBpp != bpp && bpp != 0) {
3460
// If we do this, we're kinda in nonsense territory since the actual formats won't match (unless intentionally blitting black or white).
3461
dstXFactor = (dstXFactor * bpp) / dstBpp;
3462
}
3463
int dstX1 = dstX * dstXFactor;
3464
int dstX2 = (dstX + w) * dstXFactor;
3465
int dstY1 = dstY * dstYFactor;
3466
int dstY2 = (dstY + h) * dstYFactor;
3467
3468
if (src == dst && srcX == dstX && srcY == dstY) {
3469
// Let's just skip a copy where the destination is equal to the source.
3470
WARN_LOG_REPORT_ONCE(blitSame, Log::G3D, "Skipped blit with equal dst and src");
3471
return;
3472
}
3473
3474
if (useCopy) {
3475
// glBlitFramebuffer can clip, but glCopyImageSubData is more restricted.
3476
// In case the src goes outside, we just skip the optimization in that case.
3477
const bool sameSize = dstX2 - dstX1 == srcX2 - srcX1 && dstY2 - dstY1 == srcY2 - srcY1;
3478
const bool srcInsideBounds = srcX2 <= src->renderWidth && srcY2 <= src->renderHeight;
3479
const bool dstInsideBounds = dstX2 <= dst->renderWidth && dstY2 <= dst->renderHeight;
3480
const bool xOverlap = src == dst && srcX2 > dstX1 && srcX1 < dstX2;
3481
const bool yOverlap = src == dst && srcY2 > dstY1 && srcY1 < dstY2;
3482
if (sameSize && srcInsideBounds && dstInsideBounds && !(xOverlap && yOverlap)) {
3483
draw_->CopyFramebufferImage(src->fbo, 0, srcX1, srcY1, 0, dst->fbo, 0, dstX1, dstY1, 0, dstX2 - dstX1, dstY2 - dstY1, 1,
3484
channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, tag);
3485
return;
3486
}
3487
}
3488
3489
if (useBlit) {
3490
draw_->BlitFramebuffer(src->fbo, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2,
3491
channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, Draw::FB_BLIT_NEAREST, tag);
3492
} else {
3493
Draw2DPipeline *pipeline = Get2DPipeline(channel == RASTER_COLOR ? DRAW2D_COPY_COLOR : DRAW2D_COPY_DEPTH);
3494
Draw::Framebuffer *srcFBO = src->fbo;
3495
if (src == dst) {
3496
Draw::Framebuffer *tempFBO = GetTempFBO(TempFBO::BLIT, src->renderWidth, src->renderHeight);
3497
BlitUsingRaster(src->fbo, srcX1, srcY1, srcX2, srcY2, tempFBO, dstX1, dstY1, dstX2, dstY2, false, dst->renderScaleFactor, pipeline, tag);
3498
srcFBO = tempFBO;
3499
}
3500
BlitUsingRaster(srcFBO, srcX1, srcY1, srcX2, srcY2, dst->fbo, dstX1, dstY1, dstX2, dstY2, false, dst->renderScaleFactor, pipeline, tag);
3501
}
3502
3503
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
3504
3505
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
3506
}
3507
3508
// The input is raw pixel coordinates, scale not taken into account.
3509
void FramebufferManagerCommon::BlitUsingRaster(
3510
Draw::Framebuffer *src, float srcX1, float srcY1, float srcX2, float srcY2,
3511
Draw::Framebuffer *dest, float destX1, float destY1, float destX2, float destY2,
3512
bool linearFilter,
3513
int scaleFactor,
3514
Draw2DPipeline *pipeline, const char *tag) {
3515
3516
if (pipeline->info.writeChannel == RASTER_DEPTH) {
3517
_dbg_assert_(draw_->GetDeviceCaps().fragmentShaderDepthWriteSupported);
3518
}
3519
3520
int destW, destH, srcW, srcH;
3521
draw_->GetFramebufferDimensions(src, &srcW, &srcH);
3522
draw_->GetFramebufferDimensions(dest, &destW, &destH);
3523
3524
// Unbind the texture first to avoid the D3D11 hazard check (can't set render target to things bound as textures and vice versa, not even temporarily).
3525
draw_->BindTexture(0, nullptr);
3526
// This will get optimized away in case it's already bound (in VK and GL at least..)
3527
draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag ? tag : "BlitUsingRaster");
3528
draw_->BindFramebufferAsTexture(src, 0, pipeline->info.readChannel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, Draw::ALL_LAYERS);
3529
3530
if (destX1 == 0.0f && destY1 == 0.0f && destX2 >= destW && destY2 >= destH) {
3531
// We overwrite the whole channel of the framebuffer, so we can invalidate the current contents.
3532
draw_->InvalidateFramebuffer(Draw::FB_INVALIDATION_LOAD, pipeline->info.writeChannel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT);
3533
}
3534
3535
Draw::Viewport viewport{ 0.0f, 0.0f, (float)dest->Width(), (float)dest->Height(), 0.0f, 1.0f };
3536
draw_->SetViewport(viewport);
3537
draw_->SetScissorRect(0, 0, (int)dest->Width(), (int)dest->Height());
3538
3539
draw2D_.Blit(pipeline, srcX1, srcY1, srcX2, srcY2, destX1, destY1, destX2, destY2, (float)srcW, (float)srcH, (float)destW, (float)destH, linearFilter, scaleFactor);
3540
3541
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
3542
}
3543
3544
int FramebufferManagerCommon::GetFramebufferLayers() const {
3545
int layers = 1;
3546
if (gstate_c.Use(GPU_USE_SINGLE_PASS_STEREO)) {
3547
layers = 2;
3548
}
3549
return layers;
3550
}
3551
3552
VirtualFramebuffer *FramebufferManagerCommon::ResolveFramebufferColorToFormat(VirtualFramebuffer *src, GEBufferFormat newFormat) {
3553
// Look for an identical framebuffer with the new format
3554
_dbg_assert_(src->fb_format != newFormat);
3555
3556
VirtualFramebuffer *vfb = nullptr;
3557
for (auto dest : vfbs_) {
3558
if (dest == src) {
3559
continue;
3560
}
3561
3562
// Sanity check for things that shouldn't exist.
3563
if (dest->fb_address == src->fb_address && dest->fb_format == src->fb_format && dest->fb_stride == src->fb_stride) {
3564
_dbg_assert_msg_(false, "illegal clone of src found");
3565
}
3566
3567
if (dest->fb_address == src->fb_address && dest->FbStrideInBytes() == src->FbStrideInBytes() && dest->fb_format == newFormat) {
3568
vfb = dest;
3569
break;
3570
}
3571
}
3572
3573
if (!vfb) {
3574
// Create a clone!
3575
vfb = new VirtualFramebuffer();
3576
*vfb = *src; // Copies everything, but watch out! Can't copy fbo.
3577
3578
// Adjust width by bpp.
3579
float widthFactor = (float)BufferFormatBytesPerPixel(vfb->fb_format) / (float)BufferFormatBytesPerPixel(newFormat);
3580
3581
vfb->width *= widthFactor;
3582
vfb->bufferWidth *= widthFactor;
3583
vfb->renderWidth *= widthFactor;
3584
vfb->drawnWidth *= widthFactor;
3585
vfb->newWidth *= widthFactor;
3586
vfb->safeWidth *= widthFactor;
3587
3588
vfb->fb_format = newFormat;
3589
// stride stays the same since it's in pixels.
3590
3591
WARN_LOG(Log::FrameBuf, "Creating %s clone of %08x/%08x/%s (%dx%d -> %dx%d)", GeBufferFormatToString(newFormat), src->fb_address, src->z_address, GeBufferFormatToString(src->fb_format), src->width, src->height, vfb->width, vfb->height);
3592
3593
char tag[128];
3594
FormatFramebufferName(vfb, tag, sizeof(tag));
3595
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, true, tag });
3596
vfbs_.push_back(vfb);
3597
}
3598
3599
// OK, now resolve it so we can texture from it.
3600
// This will do any necessary reinterprets.
3601
CopyToColorFromOverlappingFramebuffers(vfb);
3602
// Now we consider the resolved one the latest at the address (though really, we could make them equivalent?).
3603
vfb->colorBindSeq = GetBindSeqCount();
3604
return vfb;
3605
}
3606
3607
static void ApplyKillzoneFramebufferSplit(FramebufferHeuristicParams *params, int *drawing_width) {
3608
// Detect whether we're rendering to the margin.
3609
bool margin;
3610
if ((params->scissorRight - params->scissorLeft) == 32) {
3611
// Title screen has this easy case. It also uses non-through verts, so lucky for us that we have this.
3612
margin = true;
3613
} else if (params->scissorRight == 480) {
3614
margin = false;
3615
} else {
3616
// Go deep, look at the vertices. Killzone-specific, of course.
3617
margin = false;
3618
if ((gstate.vertType & 0xFFFFFF) == 0x00800102) { // through, u16, s16
3619
u16 *vdata = (u16 *)Memory::GetPointerUnchecked(gstate_c.vertexAddr);
3620
int v0PosU = vdata[0];
3621
int v0PosX = vdata[2];
3622
if (v0PosX >= 480 && v0PosU < 480) {
3623
// Texturing from surface, writing to margin
3624
margin = true;
3625
}
3626
}
3627
3628
// TODO: Implement this for Burnout Dominator. It has to handle self-reads inside
3629
// the margin framebuffer though, so framebuffer copies are still needed, just smaller.
3630
// It uses 0x0080019f (through, float texcoords, ABGR 8888 colors, float positions).
3631
}
3632
3633
if (margin) {
3634
gstate_c.SetCurRTOffset(-480, 0);
3635
// Modify the fb_address and z_address too to avoid matching below.
3636
params->fb_address += 480 * 4;
3637
params->z_address += 480 * 2;
3638
*drawing_width = 32;
3639
} else {
3640
gstate_c.SetCurRTOffset(0, 0);
3641
*drawing_width = 480;
3642
}
3643
}
3644
3645