CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Common/TextureCacheCommon.h
Views: 1401
// Copyright (c) 2013- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#pragma once1819#include <map>20#include <vector>21#include <memory>2223#include "Common/CommonTypes.h"24#include "Common/MemoryUtil.h"25#include "Core/System.h"26#include "GPU/GPU.h"27#include "GPU/Common/GPUDebugInterface.h"28#include "GPU/Common/TextureDecoder.h"29#include "GPU/Common/TextureScalerCommon.h"30#include "GPU/Common/TextureShaderCommon.h"31#include "GPU/Common/TextureReplacer.h"3233class Draw2D;3435enum FramebufferNotification {36NOTIFY_FB_CREATED,37NOTIFY_FB_UPDATED,38NOTIFY_FB_DESTROYED,39};4041// Changes more frequent than this will be considered "frequent" and prevent texture scaling.42#define TEXCACHE_FRAME_CHANGE_FREQUENT 643// Note: only used when hash backoff is disabled.44#define TEXCACHE_FRAME_CHANGE_FREQUENT_REGAIN_TRUST 334546#define TEXCACHE_MAX_TEXELS_SCALED (256*256) // Per frame4748struct VirtualFramebuffer;49class TextureReplacer;50class ShaderManagerCommon;5152enum class TexDecodeFlags {53EXPAND32 = 1,54REVERSE_COLORS = 2,55TO_CLUT8 = 4,56};57ENUM_CLASS_BITOPS(TexDecodeFlags);5859namespace Draw {60class DrawContext;61class Texture;62}6364// Used by D3D11 and Vulkan, could be used by modern GL65struct SamplerCacheKey {66union {67uint64_t fullKey;68struct {69// These are 8.8 fixed point.70int16_t maxLevel;71int16_t minLevel;72int16_t lodBias;7374bool mipEnable : 1;75bool minFilt : 1;76bool mipFilt : 1;77bool magFilt : 1;78bool sClamp : 1;79bool tClamp : 1;80bool aniso : 1;81bool texture3d : 1;82};83};84bool operator < (const SamplerCacheKey &other) const {85return fullKey < other.fullKey;86}87void ToString(std::string *str) const {88str->resize(sizeof(*this));89memcpy(&(*str)[0], this, sizeof(*this));90}91void FromString(const std::string &str) {92memcpy(this, &str[0], sizeof(*this));93}94};9596class GLRTexture;97class VulkanTexture;9899// Allow the extra bits from the remasters for the purposes of this.100inline int dimWidth(u16 dim) {101return 1 << (dim & 0xFF);102}103104inline int dimHeight(u16 dim) {105return 1 << ((dim >> 8) & 0xFF);106}107108// Enough information about a texture to match it to framebuffers.109struct TextureDefinition {110u32 addr;111u16 bufw;112u16 dim;113GETextureFormat format;114};115116// Texture replacement state machine:117// Call FindReplacement during PrepareBuild.118// If replacedTexture gets set: If not found, -> STATUS_TO_REPLACE, otherwise directly -> STATUS_IS_SCALED.119// If replacedTexture is null, leave it at null.120// If replacedTexture is set in SetTexture and STATUS_IS_SCALED is not set, query status. If ready rebuild texture, which will set STATUS_IS_SCALED.121122// NOTE: These only handle textures loaded directly from PSP memory contents.123// Framebuffer textures do not have entries, we bind the framebuffers directly.124// At one point we might merge the concepts of framebuffers and textures, but that125// moment is far away.126127// TODO: Shrink this struct. There is some fluff.128struct TexCacheEntry {129~TexCacheEntry() {130if (texturePtr || textureName || vkTex)131Crash();132}133// After marking STATUS_UNRELIABLE, if it stays the same this many frames we'll trust it again.134const static int FRAMES_REGAIN_TRUST = 1000;135136enum TexStatus {137STATUS_HASHING = 0x00,138STATUS_RELIABLE = 0x01, // Don't bother rehashing.139STATUS_UNRELIABLE = 0x02, // Always recheck hash.140STATUS_MASK = 0x03,141142STATUS_ALPHA_UNKNOWN = 0x04,143STATUS_ALPHA_FULL = 0x00, // Has no alpha channel, or always full alpha.144STATUS_ALPHA_MASK = 0x04,145146STATUS_CLUT_VARIANTS = 0x08, // Has multiple CLUT variants.147STATUS_CHANGE_FREQUENT = 0x10, // Changes often (less than 6 frames in between.)148STATUS_CLUT_RECHECK = 0x20, // Another texture with same addr had a hashfail.149STATUS_TO_SCALE = 0x80, // Pending texture scaling in a later frame.150STATUS_IS_SCALED_OR_REPLACED = 0x100, // Has been scaled already (ignored for replacement checks).151STATUS_TO_REPLACE = 0x0200, // Pending texture replacement.152// When hashing large textures, we optimize 512x512 down to 512x272 by default, since this153// is commonly the only part accessed. If access is made above 272, we hash the entire154// texture, and set this flag to allow scaling the texture just once for the new hash.155STATUS_FREE_CHANGE = 0x0400, // Allow one change before marking "frequent".156157STATUS_NO_MIPS = 0x0800, // Has bad or unusable mipmap levels.158159STATUS_FRAMEBUFFER_OVERLAP = 0x1000,160161STATUS_FORCE_REBUILD = 0x2000,162163STATUS_3D = 0x4000,164165STATUS_CLUT_GPU = 0x8000,166167STATUS_VIDEO = 0x10000,168STATUS_BGRA = 0x20000,169};170171// TexStatus enum flag combination.172u32 status;173174u32 addr;175u32 minihash;176u8 format; // GeTextureFormat177u8 maxLevel;178u16 dim;179u16 bufw;180union {181GLRTexture *textureName;182void *texturePtr;183VulkanTexture *vkTex;184};185#ifdef _WIN32186void *textureView; // Used by D3D11 only for the shader resource view.187#endif188int invalidHint;189int lastFrame;190int numFrames;191int numInvalidated;192u32 framesUntilNextFullHash;193u32 fullhash;194u32 cluthash;195u16 maxSeenV;196ReplacedTexture *replacedTexture;197198TexStatus GetHashStatus() {199return TexStatus(status & STATUS_MASK);200}201void SetHashStatus(TexStatus newStatus) {202status = (status & ~STATUS_MASK) | newStatus;203}204TexStatus GetAlphaStatus() {205return TexStatus(status & STATUS_ALPHA_MASK);206}207void SetAlphaStatus(TexStatus newStatus) {208status = (status & ~STATUS_ALPHA_MASK) | newStatus;209}210void SetAlphaStatus(TexStatus newStatus, int level) {211// For non-level zero, only set more restrictive.212if (newStatus == STATUS_ALPHA_UNKNOWN || level == 0) {213SetAlphaStatus(newStatus);214}215}216void SetAlphaStatus(CheckAlphaResult alphaResult, int level) {217TexStatus newStatus = (TexStatus)alphaResult;218// For non-level zero, only set more restrictive.219if (newStatus == STATUS_ALPHA_UNKNOWN || level == 0) {220SetAlphaStatus(newStatus);221}222}223224// This is the full size in RAM, not the half size we use sometimes as a "safe" underestimate.225u32 SizeInRAM() const {226return (textureBitsPerPixel[format] * bufw * dimHeight(dim)) / 8;227}228229bool Matches(u16 dim2, u8 format2, u8 maxLevel2) const;230u64 CacheKey() const;231static u64 CacheKey(u32 addr, u8 format, u16 dim, u32 cluthash);232};233234// Can't be unordered_map, we use lower_bound ... although for some reason that (used to?) compiles on MSVC.235// Would really like to replace this with DenseHashMap but can't as long as we need lower_bound.236typedef std::map<u64, std::unique_ptr<TexCacheEntry>> TexCache;237238// Urgh.239#ifdef IGNORE240#undef IGNORE241#endif242243struct FramebufferMatchInfo {244int16_t xOffset;245int16_t yOffset;246bool reinterpret;247GEBufferFormat reinterpretTo;248};249250struct AttachCandidate {251VirtualFramebuffer *fb;252FramebufferMatchInfo match;253RasterChannel channel;254int relevancy;255256std::string ToString() const;257};258259class FramebufferManagerCommon;260261struct BuildTexturePlan {262// Inputs263bool hardwareScaling = false;264bool slowScaler = true;265266// Set if the PSP software specified an unusual mip chain,267// such as the same size throughout, or anything else that doesn't divide by268// two on each level. If this is set, we won't generate mips nor use any.269// However, we still respect baseLevelSrc.270bool badMipSizes;271272// Number of mip levels to load from PSP memory (or replacement).273int levelsToLoad;274275// The number of levels in total to create.276// If greater than maxLevelToLoad, the backend is expected to either generate277// the missing levels, or limit itself to levelsToLoad levels.278int levelsToCreate;279280// The maximum number of mips levels we can create for this texture.281int maxPossibleLevels;282283// Load the 0-mip from this PSP texture level instead of 0.284// If non-zero, we are only loading one level.285int baseLevelSrc;286287// The scale factor of the final texture.288int scaleFactor;289290// Whether it's a video texture or not. Some decisions might depend on this.291bool isVideo;292293// Unscaled size of the 0-mip of the original texture.294// Don't really need to have it here, but convenient.295int w;296int h;297298// Scaled (or replaced) size of the 0-mip of the final texture.299int createW;300int createH;301302// Used for 3D textures only. If not a 3D texture, will be 1.303int depth;304305// The replacement for the texture.306ReplacedTexture *replaced;307// Need to only check once since it can change during the load!308bool doReplace;309bool saveTexture;310311// TODO: Expand32 should probably also be decided in PrepareBuildTexture.312bool decodeToClut8;313314void GetMipSize(int level, int *w, int *h) const {315if (doReplace) {316replaced->GetSize(level, w, h);317return;318}319if (depth == 1) {320*w = createW >> level;321*h = createH >> level;322} else {323// 3D texture, we look for layers instead of levels.324*w = createW;325*h = createH;326}327}328};329330class TextureCacheCommon {331public:332TextureCacheCommon(Draw::DrawContext *draw, Draw2D *draw2D);333virtual ~TextureCacheCommon();334335void LoadClut(u32 clutAddr, u32 loadBytes);336bool GetCurrentClutBuffer(GPUDebugBuffer &buffer);337338// This updates nextTexture_ / nextFramebufferTexture_, which is then used by ApplyTexture.339// TODO: Return stuff directly instead of keeping state.340TexCacheEntry *SetTexture();341342void SetShaderManager(ShaderManagerCommon *sm) {343shaderManager_ = sm;344}345346void ApplyTexture();347bool SetOffsetTexture(u32 yOffset);348void Invalidate(u32 addr, int size, GPUInvalidationType type);349void InvalidateAll(GPUInvalidationType type);350void ClearNextFrame();351352TextureShaderCache *GetTextureShaderCache() { return textureShaderCache_; }353354virtual void ForgetLastTexture() = 0;355virtual void Clear(bool delete_them);356virtual void NotifyConfigChanged();357virtual void ApplySamplingParams(const SamplerCacheKey &key) = 0;358359// FramebufferManager keeps TextureCache updated about what regions of memory are being rendered to,360// so that it can invalidate TexCacheEntries pointed at those addresses.361void NotifyFramebuffer(VirtualFramebuffer *framebuffer, FramebufferNotification msg);362void NotifyWriteFormattedFromMemory(u32 addr, int size, int width, GEBufferFormat fmt);363364size_t NumLoadedTextures() const {365return cache_.size();366}367368bool IsFakeMipmapChange() {369return PSP_CoreParameter().compat.flags().FakeMipmapChange && gstate.getTexLevelMode() == GE_TEXLEVEL_MODE_CONST;370}371bool VideoIsPlaying() {372return !videos_.empty();373}374virtual bool GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level, bool *isFramebuffer) { return false; }375376virtual void StartFrame();377378virtual void DeviceLost() = 0;379virtual void DeviceRestore(Draw::DrawContext *draw) = 0;380381protected:382virtual void *GetNativeTextureView(const TexCacheEntry *entry) = 0;383bool PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEntry *entry);384385virtual void BindTexture(TexCacheEntry *entry) = 0;386virtual void Unbind() = 0;387virtual void ReleaseTexture(TexCacheEntry *entry, bool delete_them) = 0;388void DeleteTexture(TexCache::iterator it);389void Decimate(TexCacheEntry *exceptThisOne, bool forcePressure); // forcePressure defaults to false.390391void ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer, GETextureFormat texFormat, RasterChannel channel);392void ApplyTextureDepal(TexCacheEntry *entry);393394void HandleTextureChange(TexCacheEntry *const entry, const char *reason, bool initialMatch, bool doDelete);395virtual void BuildTexture(TexCacheEntry *const entry) = 0;396virtual void UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) = 0;397bool CheckFullHash(TexCacheEntry *entry, bool &doDelete);398399virtual void BindAsClutTexture(Draw::Texture *tex, bool smooth) {}400401CheckAlphaResult DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, TexDecodeFlags flags);402static void UnswizzleFromMem(u32 *dest, u32 destPitch, const u8 *texptr, u32 bufw, u32 height, u32 bytesPerPixel);403CheckAlphaResult ReadIndexedTex(u8 *out, int outPitch, int level, const u8 *texptr, int bytesPerIndex, int bufw, bool reverseColors, bool expandTo32Bit);404ReplacedTexture *FindReplacement(TexCacheEntry *entry, int *w, int *h, int *d);405void PollReplacement(TexCacheEntry *entry, int *w, int *h, int *d);406407// Return value is mapData normally, but could be another buffer allocated with AllocateAlignedMemory.408void LoadTextureLevel(TexCacheEntry &entry, uint8_t *mapData, size_t dataSize, int mapRowPitch, BuildTexturePlan &plan, int srcLevel, Draw::DataFormat dstFmt, TexDecodeFlags texDecFlags);409410template <typename T>411inline const T *GetCurrentClut() {412return (const T *)clutBuf_;413}414415template <typename T>416inline const T *GetCurrentRawClut() {417return (const T *)clutBufRaw_;418}419420static u32 EstimateTexMemoryUsage(const TexCacheEntry *entry);421422SamplerCacheKey GetSamplingParams(int maxLevel, const TexCacheEntry *entry);423SamplerCacheKey GetFramebufferSamplingParams(u16 bufferWidth, u16 bufferHeight);424void UpdateMaxSeenV(TexCacheEntry *entry, bool throughMode);425426bool MatchFramebuffer(const TextureDefinition &entry, VirtualFramebuffer *framebuffer, u32 texaddrOffset, RasterChannel channel, FramebufferMatchInfo *matchInfo) const;427428bool GetBestFramebufferCandidate(const TextureDefinition &entry, u32 texAddrOffset, AttachCandidate *bestCandidate) const;429430void SetTextureFramebuffer(const AttachCandidate &candidate);431bool GetCurrentFramebufferTextureDebug(GPUDebugBuffer &buffer, bool *isFramebuffer);432433virtual void BoundFramebufferTexture() {}434435void DecimateVideos();436bool IsVideo(u32 texaddr) const;437438static CheckAlphaResult CheckCLUTAlpha(const uint8_t *pixelData, GEPaletteFormat clutFmt, int w);439440static inline u32 QuickTexHash(TextureReplacer &replacer, u32 addr, int bufw, int w, int h, bool swizzled, GETextureFormat format, const TexCacheEntry *entry) {441if (replacer.Enabled()) {442return replacer.ComputeHash(addr, bufw, w, h, swizzled, format, entry->maxSeenV);443}444445if (h == 512 && entry->maxSeenV < 512 && entry->maxSeenV != 0) {446h = (int)entry->maxSeenV;447}448449u32 sizeInRAM;450if (swizzled) {451// In swizzle mode, textures are stored in rectangular blocks with the height 8.452// That means that for a 64x4 texture, like in issue #9308, we would only hash half of the texture!453// In theory, we should make sure to only hash half of each block, but in reality it's not likely that454// games are using that memory for anything else. So we'll just make sure to compute the full size to hash.455// To do that, we just use the same calculation but round the height upwards to the nearest multiple of 8.456sizeInRAM = (textureBitsPerPixel[format] * bufw * ((h + 7) & ~7)) >> 3;457} else {458sizeInRAM = (textureBitsPerPixel[format] * bufw * h) >> 3;459}460const u32 *checkp = (const u32 *)Memory::GetPointer(addr);461462gpuStats.numTextureDataBytesHashed += sizeInRAM;463464if (Memory::IsValidAddress(addr + sizeInRAM)) {465return StableQuickTexHash(checkp, sizeInRAM);466} else {467return 0;468}469}470471static inline u32 MiniHash(const u32 *ptr) {472return ptr[0];473}474475Draw::DrawContext *draw_;476Draw2D *draw2D_;477478TextureReplacer replacer_;479TextureScalerCommon scaler_;480FramebufferManagerCommon *framebufferManager_;481TextureShaderCache *textureShaderCache_;482ShaderManagerCommon *shaderManager_;483484bool clearCacheNextFrame_ = false;485bool lowMemoryMode_ = false;486487int decimationCounter_;488int texelsScaledThisFrame_ = 0;489int timesInvalidatedAllThisFrame_ = 0;490double replacementTimeThisFrame_ = 0;491// TODO: Maybe vary by FPS...492double replacementFrameBudget_ = 0.5 / 60.0;493494TexCache cache_;495u32 cacheSizeEstimate_ = 0;496497TexCache secondCache_;498u32 secondCacheSizeEstimate_ = 0;499500struct VideoInfo {501u32 addr;502u32 size;503int flips;504};505std::vector<VideoInfo> videos_;506507AlignedVector<u32, 16> tmpTexBuf32_;508AlignedVector<u32, 16> tmpTexBufRearrange_;509510TexCacheEntry *nextTexture_ = nullptr;511bool failedTexture_ = false;512VirtualFramebuffer *nextFramebufferTexture_ = nullptr;513RasterChannel nextFramebufferTextureChannel_ = RASTER_COLOR;514515u32 clutHash_ = 0;516517// Raw is where we keep the original bytes. Converted is where we swap colors if necessary.518u32 *clutBufRaw_;519u32 *clutBufConverted_;520// This is the active one.521u32 *clutBuf_;522u32 clutLastFormat_ = 0xFFFFFFFF;523u32 clutTotalBytes_ = 0;524u32 clutMaxBytes_ = 0;525u32 clutRenderAddress_ = 0xFFFFFFFF;526u32 clutRenderOffset_;527GEBufferFormat clutRenderFormat_;528529// True if the clut is just alpha values in the same order (RGBA4444-bit only.)530bool clutAlphaLinear_ = false;531u16 clutAlphaLinearColor_;532533// Facilities for GPU depal of static textures.534Draw::Framebuffer *dynamicClutTemp_ = nullptr;535Draw::Framebuffer *dynamicClutFbo_ = nullptr;536537int standardScaleFactor_;538int shaderScaleFactor_ = 0;539540const char *nextChangeReason_;541bool nextNeedsRehash_;542bool nextNeedsChange_;543bool nextNeedsRebuild_;544545u32 *expandClut_;546};547548inline bool TexCacheEntry::Matches(u16 dim2, u8 format2, u8 maxLevel2) const {549return dim == dim2 && format == format2 && maxLevel == maxLevel2;550}551552inline u64 TexCacheEntry::CacheKey() const {553return CacheKey(addr, format, dim, cluthash);554}555556inline u64 TexCacheEntry::CacheKey(u32 addr, u8 format, u16 dim, u32 cluthash) {557u64 cachekey = ((u64)(addr & 0x3FFFFFFF) << 32) | dim;558bool hasClut = (format & 4) != 0;559if (hasClut) {560cachekey ^= cluthash;561}562return cachekey;563}564565566