CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/TextureCacheCommon.h
Views: 1401
1
// Copyright (c) 2013- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#pragma once
19
20
#include <map>
21
#include <vector>
22
#include <memory>
23
24
#include "Common/CommonTypes.h"
25
#include "Common/MemoryUtil.h"
26
#include "Core/System.h"
27
#include "GPU/GPU.h"
28
#include "GPU/Common/GPUDebugInterface.h"
29
#include "GPU/Common/TextureDecoder.h"
30
#include "GPU/Common/TextureScalerCommon.h"
31
#include "GPU/Common/TextureShaderCommon.h"
32
#include "GPU/Common/TextureReplacer.h"
33
34
class Draw2D;
35
36
enum FramebufferNotification {
37
NOTIFY_FB_CREATED,
38
NOTIFY_FB_UPDATED,
39
NOTIFY_FB_DESTROYED,
40
};
41
42
// Changes more frequent than this will be considered "frequent" and prevent texture scaling.
43
#define TEXCACHE_FRAME_CHANGE_FREQUENT 6
44
// Note: only used when hash backoff is disabled.
45
#define TEXCACHE_FRAME_CHANGE_FREQUENT_REGAIN_TRUST 33
46
47
#define TEXCACHE_MAX_TEXELS_SCALED (256*256) // Per frame
48
49
struct VirtualFramebuffer;
50
class TextureReplacer;
51
class ShaderManagerCommon;
52
53
enum class TexDecodeFlags {
54
EXPAND32 = 1,
55
REVERSE_COLORS = 2,
56
TO_CLUT8 = 4,
57
};
58
ENUM_CLASS_BITOPS(TexDecodeFlags);
59
60
namespace Draw {
61
class DrawContext;
62
class Texture;
63
}
64
65
// Used by D3D11 and Vulkan, could be used by modern GL
66
struct SamplerCacheKey {
67
union {
68
uint64_t fullKey;
69
struct {
70
// These are 8.8 fixed point.
71
int16_t maxLevel;
72
int16_t minLevel;
73
int16_t lodBias;
74
75
bool mipEnable : 1;
76
bool minFilt : 1;
77
bool mipFilt : 1;
78
bool magFilt : 1;
79
bool sClamp : 1;
80
bool tClamp : 1;
81
bool aniso : 1;
82
bool texture3d : 1;
83
};
84
};
85
bool operator < (const SamplerCacheKey &other) const {
86
return fullKey < other.fullKey;
87
}
88
void ToString(std::string *str) const {
89
str->resize(sizeof(*this));
90
memcpy(&(*str)[0], this, sizeof(*this));
91
}
92
void FromString(const std::string &str) {
93
memcpy(this, &str[0], sizeof(*this));
94
}
95
};
96
97
class GLRTexture;
98
class VulkanTexture;
99
100
// Allow the extra bits from the remasters for the purposes of this.
101
inline int dimWidth(u16 dim) {
102
return 1 << (dim & 0xFF);
103
}
104
105
inline int dimHeight(u16 dim) {
106
return 1 << ((dim >> 8) & 0xFF);
107
}
108
109
// Enough information about a texture to match it to framebuffers.
110
struct TextureDefinition {
111
u32 addr;
112
u16 bufw;
113
u16 dim;
114
GETextureFormat format;
115
};
116
117
// Texture replacement state machine:
118
// Call FindReplacement during PrepareBuild.
119
// If replacedTexture gets set: If not found, -> STATUS_TO_REPLACE, otherwise directly -> STATUS_IS_SCALED.
120
// If replacedTexture is null, leave it at null.
121
// If replacedTexture is set in SetTexture and STATUS_IS_SCALED is not set, query status. If ready rebuild texture, which will set STATUS_IS_SCALED.
122
123
// NOTE: These only handle textures loaded directly from PSP memory contents.
124
// Framebuffer textures do not have entries, we bind the framebuffers directly.
125
// At one point we might merge the concepts of framebuffers and textures, but that
126
// moment is far away.
127
128
// TODO: Shrink this struct. There is some fluff.
129
struct TexCacheEntry {
130
~TexCacheEntry() {
131
if (texturePtr || textureName || vkTex)
132
Crash();
133
}
134
// After marking STATUS_UNRELIABLE, if it stays the same this many frames we'll trust it again.
135
const static int FRAMES_REGAIN_TRUST = 1000;
136
137
enum TexStatus {
138
STATUS_HASHING = 0x00,
139
STATUS_RELIABLE = 0x01, // Don't bother rehashing.
140
STATUS_UNRELIABLE = 0x02, // Always recheck hash.
141
STATUS_MASK = 0x03,
142
143
STATUS_ALPHA_UNKNOWN = 0x04,
144
STATUS_ALPHA_FULL = 0x00, // Has no alpha channel, or always full alpha.
145
STATUS_ALPHA_MASK = 0x04,
146
147
STATUS_CLUT_VARIANTS = 0x08, // Has multiple CLUT variants.
148
STATUS_CHANGE_FREQUENT = 0x10, // Changes often (less than 6 frames in between.)
149
STATUS_CLUT_RECHECK = 0x20, // Another texture with same addr had a hashfail.
150
STATUS_TO_SCALE = 0x80, // Pending texture scaling in a later frame.
151
STATUS_IS_SCALED_OR_REPLACED = 0x100, // Has been scaled already (ignored for replacement checks).
152
STATUS_TO_REPLACE = 0x0200, // Pending texture replacement.
153
// When hashing large textures, we optimize 512x512 down to 512x272 by default, since this
154
// is commonly the only part accessed. If access is made above 272, we hash the entire
155
// texture, and set this flag to allow scaling the texture just once for the new hash.
156
STATUS_FREE_CHANGE = 0x0400, // Allow one change before marking "frequent".
157
158
STATUS_NO_MIPS = 0x0800, // Has bad or unusable mipmap levels.
159
160
STATUS_FRAMEBUFFER_OVERLAP = 0x1000,
161
162
STATUS_FORCE_REBUILD = 0x2000,
163
164
STATUS_3D = 0x4000,
165
166
STATUS_CLUT_GPU = 0x8000,
167
168
STATUS_VIDEO = 0x10000,
169
STATUS_BGRA = 0x20000,
170
};
171
172
// TexStatus enum flag combination.
173
u32 status;
174
175
u32 addr;
176
u32 minihash;
177
u8 format; // GeTextureFormat
178
u8 maxLevel;
179
u16 dim;
180
u16 bufw;
181
union {
182
GLRTexture *textureName;
183
void *texturePtr;
184
VulkanTexture *vkTex;
185
};
186
#ifdef _WIN32
187
void *textureView; // Used by D3D11 only for the shader resource view.
188
#endif
189
int invalidHint;
190
int lastFrame;
191
int numFrames;
192
int numInvalidated;
193
u32 framesUntilNextFullHash;
194
u32 fullhash;
195
u32 cluthash;
196
u16 maxSeenV;
197
ReplacedTexture *replacedTexture;
198
199
TexStatus GetHashStatus() {
200
return TexStatus(status & STATUS_MASK);
201
}
202
void SetHashStatus(TexStatus newStatus) {
203
status = (status & ~STATUS_MASK) | newStatus;
204
}
205
TexStatus GetAlphaStatus() {
206
return TexStatus(status & STATUS_ALPHA_MASK);
207
}
208
void SetAlphaStatus(TexStatus newStatus) {
209
status = (status & ~STATUS_ALPHA_MASK) | newStatus;
210
}
211
void SetAlphaStatus(TexStatus newStatus, int level) {
212
// For non-level zero, only set more restrictive.
213
if (newStatus == STATUS_ALPHA_UNKNOWN || level == 0) {
214
SetAlphaStatus(newStatus);
215
}
216
}
217
void SetAlphaStatus(CheckAlphaResult alphaResult, int level) {
218
TexStatus newStatus = (TexStatus)alphaResult;
219
// For non-level zero, only set more restrictive.
220
if (newStatus == STATUS_ALPHA_UNKNOWN || level == 0) {
221
SetAlphaStatus(newStatus);
222
}
223
}
224
225
// This is the full size in RAM, not the half size we use sometimes as a "safe" underestimate.
226
u32 SizeInRAM() const {
227
return (textureBitsPerPixel[format] * bufw * dimHeight(dim)) / 8;
228
}
229
230
bool Matches(u16 dim2, u8 format2, u8 maxLevel2) const;
231
u64 CacheKey() const;
232
static u64 CacheKey(u32 addr, u8 format, u16 dim, u32 cluthash);
233
};
234
235
// Can't be unordered_map, we use lower_bound ... although for some reason that (used to?) compiles on MSVC.
236
// Would really like to replace this with DenseHashMap but can't as long as we need lower_bound.
237
typedef std::map<u64, std::unique_ptr<TexCacheEntry>> TexCache;
238
239
// Urgh.
240
#ifdef IGNORE
241
#undef IGNORE
242
#endif
243
244
struct FramebufferMatchInfo {
245
int16_t xOffset;
246
int16_t yOffset;
247
bool reinterpret;
248
GEBufferFormat reinterpretTo;
249
};
250
251
struct AttachCandidate {
252
VirtualFramebuffer *fb;
253
FramebufferMatchInfo match;
254
RasterChannel channel;
255
int relevancy;
256
257
std::string ToString() const;
258
};
259
260
class FramebufferManagerCommon;
261
262
struct BuildTexturePlan {
263
// Inputs
264
bool hardwareScaling = false;
265
bool slowScaler = true;
266
267
// Set if the PSP software specified an unusual mip chain,
268
// such as the same size throughout, or anything else that doesn't divide by
269
// two on each level. If this is set, we won't generate mips nor use any.
270
// However, we still respect baseLevelSrc.
271
bool badMipSizes;
272
273
// Number of mip levels to load from PSP memory (or replacement).
274
int levelsToLoad;
275
276
// The number of levels in total to create.
277
// If greater than maxLevelToLoad, the backend is expected to either generate
278
// the missing levels, or limit itself to levelsToLoad levels.
279
int levelsToCreate;
280
281
// The maximum number of mips levels we can create for this texture.
282
int maxPossibleLevels;
283
284
// Load the 0-mip from this PSP texture level instead of 0.
285
// If non-zero, we are only loading one level.
286
int baseLevelSrc;
287
288
// The scale factor of the final texture.
289
int scaleFactor;
290
291
// Whether it's a video texture or not. Some decisions might depend on this.
292
bool isVideo;
293
294
// Unscaled size of the 0-mip of the original texture.
295
// Don't really need to have it here, but convenient.
296
int w;
297
int h;
298
299
// Scaled (or replaced) size of the 0-mip of the final texture.
300
int createW;
301
int createH;
302
303
// Used for 3D textures only. If not a 3D texture, will be 1.
304
int depth;
305
306
// The replacement for the texture.
307
ReplacedTexture *replaced;
308
// Need to only check once since it can change during the load!
309
bool doReplace;
310
bool saveTexture;
311
312
// TODO: Expand32 should probably also be decided in PrepareBuildTexture.
313
bool decodeToClut8;
314
315
void GetMipSize(int level, int *w, int *h) const {
316
if (doReplace) {
317
replaced->GetSize(level, w, h);
318
return;
319
}
320
if (depth == 1) {
321
*w = createW >> level;
322
*h = createH >> level;
323
} else {
324
// 3D texture, we look for layers instead of levels.
325
*w = createW;
326
*h = createH;
327
}
328
}
329
};
330
331
class TextureCacheCommon {
332
public:
333
TextureCacheCommon(Draw::DrawContext *draw, Draw2D *draw2D);
334
virtual ~TextureCacheCommon();
335
336
void LoadClut(u32 clutAddr, u32 loadBytes);
337
bool GetCurrentClutBuffer(GPUDebugBuffer &buffer);
338
339
// This updates nextTexture_ / nextFramebufferTexture_, which is then used by ApplyTexture.
340
// TODO: Return stuff directly instead of keeping state.
341
TexCacheEntry *SetTexture();
342
343
void SetShaderManager(ShaderManagerCommon *sm) {
344
shaderManager_ = sm;
345
}
346
347
void ApplyTexture();
348
bool SetOffsetTexture(u32 yOffset);
349
void Invalidate(u32 addr, int size, GPUInvalidationType type);
350
void InvalidateAll(GPUInvalidationType type);
351
void ClearNextFrame();
352
353
TextureShaderCache *GetTextureShaderCache() { return textureShaderCache_; }
354
355
virtual void ForgetLastTexture() = 0;
356
virtual void Clear(bool delete_them);
357
virtual void NotifyConfigChanged();
358
virtual void ApplySamplingParams(const SamplerCacheKey &key) = 0;
359
360
// FramebufferManager keeps TextureCache updated about what regions of memory are being rendered to,
361
// so that it can invalidate TexCacheEntries pointed at those addresses.
362
void NotifyFramebuffer(VirtualFramebuffer *framebuffer, FramebufferNotification msg);
363
void NotifyWriteFormattedFromMemory(u32 addr, int size, int width, GEBufferFormat fmt);
364
365
size_t NumLoadedTextures() const {
366
return cache_.size();
367
}
368
369
bool IsFakeMipmapChange() {
370
return PSP_CoreParameter().compat.flags().FakeMipmapChange && gstate.getTexLevelMode() == GE_TEXLEVEL_MODE_CONST;
371
}
372
bool VideoIsPlaying() {
373
return !videos_.empty();
374
}
375
virtual bool GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level, bool *isFramebuffer) { return false; }
376
377
virtual void StartFrame();
378
379
virtual void DeviceLost() = 0;
380
virtual void DeviceRestore(Draw::DrawContext *draw) = 0;
381
382
protected:
383
virtual void *GetNativeTextureView(const TexCacheEntry *entry) = 0;
384
bool PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEntry *entry);
385
386
virtual void BindTexture(TexCacheEntry *entry) = 0;
387
virtual void Unbind() = 0;
388
virtual void ReleaseTexture(TexCacheEntry *entry, bool delete_them) = 0;
389
void DeleteTexture(TexCache::iterator it);
390
void Decimate(TexCacheEntry *exceptThisOne, bool forcePressure); // forcePressure defaults to false.
391
392
void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel);
393
void ApplyTextureDepal(TexCacheEntry *entry);
394
395
void HandleTextureChange(TexCacheEntry *const entry, const char *reason, bool initialMatch, bool doDelete);
396
virtual void BuildTexture(TexCacheEntry *const entry) = 0;
397
virtual void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) = 0;
398
bool CheckFullHash(TexCacheEntry *entry, bool &doDelete);
399
400
virtual void BindAsClutTexture(Draw::Texture *tex, bool smooth) {}
401
402
CheckAlphaResult DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, TexDecodeFlags flags);
403
static void UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);
404
CheckAlphaResult ReadIndexedTex(u8 *out, int outPitch, int level, const u8 *texptr, int bytesPerIndex, int bufw, bool reverseColors, bool expandTo32Bit);
405
ReplacedTexture *FindReplacement(TexCacheEntry *entry, int *w, int *h, int *d);
406
void PollReplacement(TexCacheEntry *entry, int *w, int *h, int *d);
407
408
// Return value is mapData normally, but could be another buffer allocated with AllocateAlignedMemory.
409
void LoadTextureLevel(TexCacheEntry &entry, uint8_t *mapData, size_t dataSize, int mapRowPitch, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags);
410
411
template <typename T>
412
inline const T *GetCurrentClut() {
413
return (const T *)clutBuf_;
414
}
415
416
template <typename T>
417
inline const T *GetCurrentRawClut() {
418
return (const T *)clutBufRaw_;
419
}
420
421
static u32 EstimateTexMemoryUsage(const TexCacheEntry *entry);
422
423
SamplerCacheKey GetSamplingParams(int maxLevel, const TexCacheEntry *entry);
424
SamplerCacheKey GetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight);
425
void UpdateMaxSeenV(TexCacheEntry *entry, bool throughMode);
426
427
bool MatchFramebuffer(const TextureDefinition &entry, VirtualFramebuffer *framebuffer, u32 texaddrOffset, RasterChannel channel, FramebufferMatchInfo *matchInfo) const;
428
429
bool GetBestFramebufferCandidate(const TextureDefinition &entry, u32 texAddrOffset, AttachCandidate *bestCandidate) const;
430
431
void SetTextureFramebuffer(const AttachCandidate &candidate);
432
bool GetCurrentFramebufferTextureDebug(GPUDebugBuffer &buffer, bool *isFramebuffer);
433
434
virtual void BoundFramebufferTexture() {}
435
436
void DecimateVideos();
437
bool IsVideo(u32 texaddr) const;
438
439
static CheckAlphaResult CheckCLUTAlpha(const uint8_t *pixelData, GEPaletteFormat clutFmt, int w);
440
441
static inline u32 QuickTexHash(TextureReplacer &replacer, u32 addr, int bufw, int w, int h, bool swizzled, GETextureFormat format, const TexCacheEntry *entry) {
442
if (replacer.Enabled()) {
443
return replacer.ComputeHash(addr, bufw, w, h, swizzled, format, entry->maxSeenV);
444
}
445
446
if (h == 512 && entry->maxSeenV < 512 && entry->maxSeenV != 0) {
447
h = (int)entry->maxSeenV;
448
}
449
450
u32 sizeInRAM;
451
if (swizzled) {
452
// In swizzle mode, textures are stored in rectangular blocks with the height 8.
453
// That means that for a 64x4 texture, like in issue #9308, we would only hash half of the texture!
454
// In theory, we should make sure to only hash half of each block, but in reality it's not likely that
455
// games are using that memory for anything else. So we'll just make sure to compute the full size to hash.
456
// To do that, we just use the same calculation but round the height upwards to the nearest multiple of 8.
457
sizeInRAM = (textureBitsPerPixel[format] * bufw * ((h + 7) & ~7)) >> 3;
458
} else {
459
sizeInRAM = (textureBitsPerPixel[format] * bufw * h) >> 3;
460
}
461
const u32 *checkp = (const u32 *)Memory::GetPointer(addr);
462
463
gpuStats.numTextureDataBytesHashed += sizeInRAM;
464
465
if (Memory::IsValidAddress(addr + sizeInRAM)) {
466
return StableQuickTexHash(checkp, sizeInRAM);
467
} else {
468
return 0;
469
}
470
}
471
472
static inline u32 MiniHash(const u32 *ptr) {
473
return ptr[0];
474
}
475
476
Draw::DrawContext *draw_;
477
Draw2D *draw2D_;
478
479
TextureReplacer replacer_;
480
TextureScalerCommon scaler_;
481
FramebufferManagerCommon *framebufferManager_;
482
TextureShaderCache *textureShaderCache_;
483
ShaderManagerCommon *shaderManager_;
484
485
bool clearCacheNextFrame_ = false;
486
bool lowMemoryMode_ = false;
487
488
int decimationCounter_;
489
int texelsScaledThisFrame_ = 0;
490
int timesInvalidatedAllThisFrame_ = 0;
491
double replacementTimeThisFrame_ = 0;
492
// TODO: Maybe vary by FPS...
493
double replacementFrameBudget_ = 0.5 / 60.0;
494
495
TexCache cache_;
496
u32 cacheSizeEstimate_ = 0;
497
498
TexCache secondCache_;
499
u32 secondCacheSizeEstimate_ = 0;
500
501
struct VideoInfo {
502
u32 addr;
503
u32 size;
504
int flips;
505
};
506
std::vector<VideoInfo> videos_;
507
508
AlignedVector<u32, 16> tmpTexBuf32_;
509
AlignedVector<u32, 16> tmpTexBufRearrange_;
510
511
TexCacheEntry *nextTexture_ = nullptr;
512
bool failedTexture_ = false;
513
VirtualFramebuffer *nextFramebufferTexture_ = nullptr;
514
RasterChannel nextFramebufferTextureChannel_ = RASTER_COLOR;
515
516
u32 clutHash_ = 0;
517
518
// Raw is where we keep the original bytes. Converted is where we swap colors if necessary.
519
u32 *clutBufRaw_;
520
u32 *clutBufConverted_;
521
// This is the active one.
522
u32 *clutBuf_;
523
u32 clutLastFormat_ = 0xFFFFFFFF;
524
u32 clutTotalBytes_ = 0;
525
u32 clutMaxBytes_ = 0;
526
u32 clutRenderAddress_ = 0xFFFFFFFF;
527
u32 clutRenderOffset_;
528
GEBufferFormat clutRenderFormat_;
529
530
// True if the clut is just alpha values in the same order (RGBA4444-bit only.)
531
bool clutAlphaLinear_ = false;
532
u16 clutAlphaLinearColor_;
533
534
// Facilities for GPU depal of static textures.
535
Draw::Framebuffer *dynamicClutTemp_ = nullptr;
536
Draw::Framebuffer *dynamicClutFbo_ = nullptr;
537
538
int standardScaleFactor_;
539
int shaderScaleFactor_ = 0;
540
541
const char *nextChangeReason_;
542
bool nextNeedsRehash_;
543
bool nextNeedsChange_;
544
bool nextNeedsRebuild_;
545
546
u32 *expandClut_;
547
};
548
549
inline bool TexCacheEntry::Matches(u16 dim2, u8 format2, u8 maxLevel2) const {
550
return dim == dim2 && format == format2 && maxLevel == maxLevel2;
551
}
552
553
inline u64 TexCacheEntry::CacheKey() const {
554
return CacheKey(addr, format, dim, cluthash);
555
}
556
557
inline u64 TexCacheEntry::CacheKey(u32 addr, u8 format, u16 dim, u32 cluthash) {
558
u64 cachekey = ((u64)(addr & 0x3FFFFFFF) << 32) | dim;
559
bool hasClut = (format & 4) != 0;
560
if (hasClut) {
561
cachekey ^= cluthash;
562
}
563
return cachekey;
564
}
565
566