CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Directx9/TextureCacheDX9.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include <algorithm>18#include <cstring>19#include <wrl/client.h>2021#include "Common/TimeUtil.h"22#include "Core/MemMap.h"23#include "GPU/ge_constants.h"2425#include "GPU/GPUState.h"26#include "GPU/Directx9/TextureCacheDX9.h"27#include "GPU/Directx9/FramebufferManagerDX9.h"28#include "GPU/Directx9/ShaderManagerDX9.h"29#include "Common/GPU/D3D9/D3D9StateCache.h"30#include "GPU/Common/TextureShaderCommon.h"31#include "GPU/Common/FramebufferManagerCommon.h"32#include "GPU/Common/TextureDecoder.h"33#include "Core/Config.h"3435#include "ext/xxhash.h"36#include "Common/Math/math_util.h"3738// NOTE: In the D3D backends, we flip R and B in the shaders, so while these look wrong, they're OK.3940using Microsoft::WRL::ComPtr;4142Draw::DataFormat FromD3D9Format(u32 fmt) {43switch (fmt) {44case D3DFMT_A4R4G4B4: return Draw::DataFormat::B4G4R4A4_UNORM_PACK16;45case D3DFMT_A1R5G5B5: return Draw::DataFormat::A1R5G5B5_UNORM_PACK16;46case D3DFMT_R5G6B5: return Draw::DataFormat::R5G6B5_UNORM_PACK16;47case D3DFMT_A8: return Draw::DataFormat::R8_UNORM;48case D3DFMT_A8R8G8B8: default: return Draw::DataFormat::R8G8B8A8_UNORM;49}50}5152D3DFORMAT ToD3D9Format(Draw::DataFormat fmt) {53switch (fmt) {54case Draw::DataFormat::BC1_RGBA_UNORM_BLOCK: return D3DFMT_DXT1;55case Draw::DataFormat::BC2_UNORM_BLOCK: return D3DFMT_DXT3;56case Draw::DataFormat::BC3_UNORM_BLOCK: return D3DFMT_DXT5;57case Draw::DataFormat::R8G8B8A8_UNORM: return D3DFMT_A8R8G8B8;58default: _dbg_assert_(false); return D3DFMT_A8R8G8B8;59}60}6162#define INVALID_TEX (LPDIRECT3DTEXTURE9)(-1)6364static const D3DVERTEXELEMENT9 g_FramebufferVertexElements[] = {65{ 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 },66{ 0, 12, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 },67D3DDECL_END()68};6970TextureCacheDX9::TextureCacheDX9(Draw::DrawContext *draw, Draw2D *draw2D)71: TextureCacheCommon(draw, draw2D) {72lastBoundTexture = INVALID_TEX;73device_ = (LPDIRECT3DDEVICE9)draw->GetNativeObject(Draw::NativeObject::DEVICE);74deviceEx_ = (LPDIRECT3DDEVICE9EX)draw->GetNativeObject(Draw::NativeObject::DEVICE_EX);75D3DCAPS9 pCaps;76ZeroMemory(&pCaps, sizeof(pCaps));77HRESULT result = 0;78if (deviceEx_) {79result = deviceEx_->GetDeviceCaps(&pCaps);80} else {81result = device_->GetDeviceCaps(&pCaps);82}83if (FAILED(result)) {84WARN_LOG(Log::G3D, "Failed to get the device caps!");85maxAnisotropyLevel = 16;86} else {87maxAnisotropyLevel = pCaps.MaxAnisotropy;88}8990nextTexture_ = nullptr;91device_->CreateVertexDeclaration(g_FramebufferVertexElements, &pFramebufferVertexDecl);92}9394TextureCacheDX9::~TextureCacheDX9() {95Clear(true);96}9798void TextureCacheDX9::SetFramebufferManager(FramebufferManagerDX9 *fbManager) {99framebufferManager_ = fbManager;100}101102void TextureCacheDX9::ReleaseTexture(TexCacheEntry *entry, bool delete_them) {103LPDIRECT3DBASETEXTURE9 &texture = DxTex(entry);104if (texture) {105texture->Release();106texture = nullptr;107}108}109110void TextureCacheDX9::ForgetLastTexture() {111lastBoundTexture = INVALID_TEX;112}113114D3DFORMAT getClutDestFormat(GEPaletteFormat format) {115switch (format) {116case GE_CMODE_16BIT_ABGR4444:117return D3DFMT_A4R4G4B4;118case GE_CMODE_16BIT_ABGR5551:119return D3DFMT_A1R5G5B5;120case GE_CMODE_16BIT_BGR5650:121return D3DFMT_R5G6B5;122case GE_CMODE_32BIT_ABGR8888:123return D3DFMT_A8R8G8B8;124}125// Should never be here !126return D3DFMT_A8R8G8B8;127}128129void TextureCacheDX9::ApplySamplingParams(const SamplerCacheKey &key) {130D3DTEXTUREFILTERTYPE minFilt = (false ? D3DTEXF_ANISOTROPIC : D3DTEXF_LINEAR);131dxstate.texMinFilter.set(key.minFilt ? minFilt : D3DTEXF_POINT);132dxstate.texMipFilter.set(key.mipFilt ? D3DTEXF_LINEAR : D3DTEXF_POINT);133dxstate.texMagFilter.set(key.magFilt ? D3DTEXF_LINEAR : D3DTEXF_POINT);134135// DX9 mip levels are .. odd. The "max level" sets the LARGEST mip to use.136// We can enforce only the top mip level by setting a massive negative lod bias.137138if (!key.mipEnable) {139dxstate.texMaxMipLevel.set(0);140dxstate.texMipLodBias.set(-100.0f);141} else {142dxstate.texMipLodBias.set((float)key.lodBias / 256.0f);143dxstate.texMaxMipLevel.set(key.minLevel / 256);144}145146dxstate.texAddressU.set(key.sClamp ? D3DTADDRESS_CLAMP : D3DTADDRESS_WRAP);147dxstate.texAddressV.set(key.tClamp ? D3DTADDRESS_CLAMP : D3DTADDRESS_WRAP);148}149150void TextureCacheDX9::StartFrame() {151TextureCacheCommon::StartFrame();152153if (gstate_c.Use(GPU_USE_ANISOTROPY)) {154// Just take the opportunity to set the global aniso level here, once per frame.155DWORD aniso = 1 << g_Config.iAnisotropyLevel;156DWORD anisotropyLevel = aniso > maxAnisotropyLevel ? maxAnisotropyLevel : aniso;157device_->SetSamplerState(0, D3DSAMP_MAXANISOTROPY, anisotropyLevel);158}159}160161void TextureCacheDX9::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) {162const u32 clutBaseBytes = clutBase * (clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16));163// Technically, these extra bytes weren't loaded, but hopefully it was loaded earlier.164// If not, we're going to hash random data, which hopefully doesn't cause a performance issue.165//166// TODO: Actually, this seems like a hack. The game can upload part of a CLUT and reference other data.167// clutTotalBytes_ is the last amount uploaded. We should hash clutMaxBytes_, but this will often hash168// unrelated old entries for small palettes.169// Adding clutBaseBytes may just be mitigating this for some usage patterns.170const u32 clutExtendedBytes = std::min(clutTotalBytes_ + clutBaseBytes, clutMaxBytes_);171172if (replacer_.Enabled())173clutHash_ = XXH32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);174else175clutHash_ = XXH3_64bits((const char *)clutBufRaw_, clutExtendedBytes) & 0xFFFFFFFF;176clutBuf_ = clutBufRaw_;177178// Special optimization: fonts typically draw clut4 with just alpha values in a single color.179clutAlphaLinear_ = false;180clutAlphaLinearColor_ = 0;181if (clutFormat == GE_CMODE_16BIT_ABGR4444 && clutIndexIsSimple) {182const u16_le *clut = GetCurrentClut<u16_le>();183clutAlphaLinear_ = true;184clutAlphaLinearColor_ = clut[15] & 0x0FFF;185for (int i = 0; i < 16; ++i) {186u16 step = clutAlphaLinearColor_ | (i << 12);187if (clut[i] != step) {188clutAlphaLinear_ = false;189break;190}191}192}193194clutLastFormat_ = gstate.clutformat;195}196197void TextureCacheDX9::BindTexture(TexCacheEntry *entry) {198if (!entry) {199device_->SetTexture(0, nullptr);200return;201}202IDirect3DBaseTexture9 *texture = DxTex(entry);203if (texture != lastBoundTexture) {204device_->SetTexture(0, texture);205lastBoundTexture = texture;206}207int maxLevel = (entry->status & TexCacheEntry::STATUS_NO_MIPS) ? 0 : entry->maxLevel;208SamplerCacheKey samplerKey = GetSamplingParams(maxLevel, entry);209ApplySamplingParams(samplerKey);210}211212void TextureCacheDX9::Unbind() {213device_->SetTexture(0, nullptr);214ForgetLastTexture();215}216217void TextureCacheDX9::BindAsClutTexture(Draw::Texture *tex, bool smooth) {218LPDIRECT3DBASETEXTURE9 clutTexture = (LPDIRECT3DBASETEXTURE9)draw_->GetNativeObject(Draw::NativeObject::TEXTURE_VIEW, tex);219device_->SetTexture(1, clutTexture);220device_->SetSamplerState(1, D3DSAMP_MINFILTER, smooth ? D3DTEXF_LINEAR : D3DTEXF_POINT);221device_->SetSamplerState(1, D3DSAMP_MAGFILTER, smooth ? D3DTEXF_LINEAR : D3DTEXF_POINT);222device_->SetSamplerState(1, D3DSAMP_MIPFILTER, D3DTEXF_NONE);223}224225void TextureCacheDX9::BuildTexture(TexCacheEntry *const entry) {226BuildTexturePlan plan;227if (!PrepareBuildTexture(plan, entry)) {228// We're screwed?229return;230}231232D3DFORMAT dstFmt = GetDestFormat(GETextureFormat(entry->format), gstate.getClutPaletteFormat());233if (plan.doReplace) {234dstFmt = ToD3D9Format(plan.replaced->Format());235} else if (plan.scaleFactor > 1 || plan.saveTexture) {236dstFmt = D3DFMT_A8R8G8B8;237} else if (plan.decodeToClut8) {238dstFmt = D3DFMT_A8;239}240241int levels;242243LPDIRECT3DBASETEXTURE9 &texture = DxTex(entry);244D3DPOOL pool = D3DPOOL_DEFAULT;245int usage = D3DUSAGE_DYNAMIC;246247int tw;248int th;249plan.GetMipSize(0, &tw, &th);250251HRESULT hr;252if (plan.depth == 1) {253// We don't yet have mip generation, so clamp the number of levels to the ones we can load directly.254levels = std::min(plan.levelsToCreate, plan.levelsToLoad);255256LPDIRECT3DTEXTURE9 tex;257hr = device_->CreateTexture(tw, th, levels, usage, dstFmt, pool, &tex, nullptr);258texture = tex;259} else {260LPDIRECT3DVOLUMETEXTURE9 tex;261hr = device_->CreateVolumeTexture(tw, th, plan.depth, 1, usage, dstFmt, pool, &tex, nullptr);262texture = tex;263264levels = 1;265}266267if (FAILED(hr)) {268INFO_LOG(Log::G3D, "Failed to create D3D texture: %dx%d", tw, th);269ReleaseTexture(entry, true);270return;271}272273if (!texture) {274// What to do here?275return;276}277278if (plan.depth == 1) {279// Regular loop.280for (int i = 0; i < levels; i++) {281int dstLevel = i;282HRESULT result;283uint32_t lockFlag = dstLevel == 0 ? D3DLOCK_DISCARD : 0; // Can only discard the top level284D3DLOCKED_RECT rect{};285286result = ((LPDIRECT3DTEXTURE9)texture)->LockRect(dstLevel, &rect, NULL, lockFlag);287if (FAILED(result)) {288ERROR_LOG(Log::G3D, "Failed to lock D3D 2D texture at level %d: %dx%d", i, plan.w, plan.h);289return;290}291uint8_t *data = (uint8_t *)rect.pBits;292int stride = rect.Pitch;293LoadTextureLevel(*entry, data, 0, stride, plan, (i == 0) ? plan.baseLevelSrc : i, FromD3D9Format(dstFmt), TexDecodeFlags{});294((LPDIRECT3DTEXTURE9)texture)->UnlockRect(dstLevel);295}296} else {297// 3D loop.298D3DLOCKED_BOX box;299HRESULT result = ((LPDIRECT3DVOLUMETEXTURE9)texture)->LockBox(0, &box, nullptr, D3DLOCK_DISCARD);300if (FAILED(result)) {301ERROR_LOG(Log::G3D, "Failed to lock D3D 2D texture: %dx%dx%d", plan.w, plan.h, plan.depth);302return;303}304305uint8_t *data = (uint8_t *)box.pBits;306int stride = box.RowPitch;307for (int i = 0; i < plan.depth; i++) {308LoadTextureLevel(*entry, data, 0, stride, plan, (i == 0) ? plan.baseLevelSrc : i, FromD3D9Format(dstFmt), TexDecodeFlags{});309data += box.SlicePitch;310}311((LPDIRECT3DVOLUMETEXTURE9)texture)->UnlockBox(0);312}313314// Signal that we support depth textures so use it as one.315if (plan.depth > 1) {316entry->status |= TexCacheEntry::STATUS_3D;317}318319if (plan.doReplace) {320entry->SetAlphaStatus(TexCacheEntry::TexStatus(plan.replaced->AlphaStatus()));321322if (!Draw::DataFormatIsBlockCompressed(plan.replaced->Format(), nullptr)) {323entry->status |= TexCacheEntry::STATUS_BGRA;324}325} else {326entry->status |= TexCacheEntry::STATUS_BGRA;327}328}329330D3DFORMAT TextureCacheDX9::GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const {331switch (format) {332case GE_TFMT_CLUT4:333case GE_TFMT_CLUT8:334case GE_TFMT_CLUT16:335case GE_TFMT_CLUT32:336return getClutDestFormat(clutFormat);337case GE_TFMT_4444:338return D3DFMT_A4R4G4B4;339case GE_TFMT_5551:340return D3DFMT_A1R5G5B5;341case GE_TFMT_5650:342return D3DFMT_R5G6B5;343case GE_TFMT_8888:344case GE_TFMT_DXT1:345case GE_TFMT_DXT3:346case GE_TFMT_DXT5:347default:348return D3DFMT_A8R8G8B8;349}350}351352bool TextureCacheDX9::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level, bool *isFramebuffer) {353SetTexture();354if (!nextTexture_) {355return GetCurrentFramebufferTextureDebug(buffer, isFramebuffer);356}357358ApplyTexture();359360ComPtr<IDirect3DBaseTexture9> baseTex;361ComPtr<IDirect3DTexture9> tex;362ComPtr<IDirect3DSurface9> offscreen;363HRESULT hr;364365bool success = false;366hr = device_->GetTexture(0, &baseTex);367if (SUCCEEDED(hr) && baseTex != NULL) {368hr = baseTex.As(&tex);369if (SUCCEEDED(hr)) {370D3DSURFACE_DESC desc;371D3DLOCKED_RECT locked;372tex->GetLevelDesc(level, &desc);373RECT rect = { 0, 0, (LONG)desc.Width, (LONG)desc.Height };374hr = tex->LockRect(level, &locked, &rect, D3DLOCK_READONLY);375376// If it fails, this means it's a render-to-texture, so we have to get creative.377if (FAILED(hr)) {378ComPtr<IDirect3DSurface9> renderTarget;379hr = tex->GetSurfaceLevel(level, &renderTarget);380if (renderTarget && SUCCEEDED(hr)) {381hr = device_->CreateOffscreenPlainSurface(desc.Width, desc.Height, desc.Format, D3DPOOL_SYSTEMMEM, &offscreen, NULL);382if (SUCCEEDED(hr)) {383hr = device_->GetRenderTargetData(renderTarget.Get(), offscreen.Get());384if (SUCCEEDED(hr)) {385hr = offscreen->LockRect(&locked, &rect, D3DLOCK_READONLY);386}387}388}389*isFramebuffer = true;390} else {391*isFramebuffer = false;392}393394if (SUCCEEDED(hr)) {395GPUDebugBufferFormat fmt;396int pixelSize;397switch (desc.Format) {398case D3DFMT_A1R5G5B5:399fmt = gstate_c.bgraTexture ? GPU_DBG_FORMAT_5551 : GPU_DBG_FORMAT_5551_BGRA;400pixelSize = 2;401break;402case D3DFMT_A4R4G4B4:403fmt = gstate_c.bgraTexture ? GPU_DBG_FORMAT_4444 : GPU_DBG_FORMAT_4444_BGRA;404pixelSize = 2;405break;406case D3DFMT_R5G6B5:407fmt = gstate_c.bgraTexture ? GPU_DBG_FORMAT_565 : GPU_DBG_FORMAT_565_BGRA;408pixelSize = 2;409break;410case D3DFMT_A8R8G8B8:411fmt = gstate_c.bgraTexture ? GPU_DBG_FORMAT_8888 : GPU_DBG_FORMAT_8888_BGRA;412pixelSize = 4;413break;414default:415fmt = GPU_DBG_FORMAT_INVALID;416break;417}418419if (fmt != GPU_DBG_FORMAT_INVALID) {420buffer.Allocate(locked.Pitch / pixelSize, desc.Height, fmt, false);421memcpy(buffer.GetData(), locked.pBits, locked.Pitch * desc.Height);422success = true;423} else {424success = false;425}426if (offscreen) {427offscreen->UnlockRect();428} else {429tex->UnlockRect(level);430}431}432}433}434435return success;436}437438void *TextureCacheDX9::GetNativeTextureView(const TexCacheEntry *entry) {439LPDIRECT3DBASETEXTURE9 tex = DxTex(entry);440return (void *)tex;441}442443444