CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/D3D11/TextureCacheD3D11.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include <algorithm>18#include <cstring>19#include <cfloat>2021#include <d3d11.h>2223#include "Common/TimeUtil.h"24#include "Core/MemMap.h"25#include "GPU/ge_constants.h"26#include "GPU/GPUState.h"27#include "GPU/Common/GPUStateUtils.h"28#include "GPU/Common/DrawEngineCommon.h"29#include "GPU/D3D11/TextureCacheD3D11.h"30#include "GPU/D3D11/FramebufferManagerD3D11.h"31#include "GPU/D3D11/ShaderManagerD3D11.h"32#include "GPU/Common/TextureShaderCommon.h"33#include "GPU/D3D11/D3D11Util.h"34#include "GPU/Common/FramebufferManagerCommon.h"35#include "GPU/Common/TextureDecoder.h"36#include "Core/Config.h"3738#include "ext/xxhash.h"39#include "Common/Math/math_util.h"4041// For depth depal42struct DepthPushConstants {43float z_scale;44float z_offset;45float pad[2];46};4748#define INVALID_TEX (ID3D11ShaderResourceView *)(-1LL)4950static const D3D11_INPUT_ELEMENT_DESC g_QuadVertexElements[] = {51{ "POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0, },52{ "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 12,},53};5455// NOTE: In the D3D backends, we flip R and B in the shaders, so while these look wrong, they're OK.5657static Draw::DataFormat FromD3D11Format(u32 fmt) {58switch (fmt) {59case DXGI_FORMAT_B4G4R4A4_UNORM: return Draw::DataFormat::A4R4G4B4_UNORM_PACK16;60case DXGI_FORMAT_B5G5R5A1_UNORM: return Draw::DataFormat::A1R5G5B5_UNORM_PACK16;61case DXGI_FORMAT_B5G6R5_UNORM: return Draw::DataFormat::R5G6B5_UNORM_PACK16;62case DXGI_FORMAT_R8_UNORM: return Draw::DataFormat::R8_UNORM;63case DXGI_FORMAT_B8G8R8A8_UNORM: default: return Draw::DataFormat::R8G8B8A8_UNORM;64}65}6667static DXGI_FORMAT ToDXGIFormat(Draw::DataFormat fmt) {68switch (fmt) {69case Draw::DataFormat::BC1_RGBA_UNORM_BLOCK: return DXGI_FORMAT_BC1_UNORM;70case Draw::DataFormat::BC2_UNORM_BLOCK: return DXGI_FORMAT_BC2_UNORM;71case Draw::DataFormat::BC3_UNORM_BLOCK: return DXGI_FORMAT_BC3_UNORM;72case Draw::DataFormat::BC4_UNORM_BLOCK: return DXGI_FORMAT_BC4_UNORM;73case Draw::DataFormat::BC5_UNORM_BLOCK: return DXGI_FORMAT_BC5_UNORM;74case Draw::DataFormat::BC7_UNORM_BLOCK: return DXGI_FORMAT_BC7_UNORM;75case Draw::DataFormat::R8G8B8A8_UNORM: return DXGI_FORMAT_B8G8R8A8_UNORM;76default: _dbg_assert_(false); return DXGI_FORMAT_UNKNOWN;77}78}7980SamplerCacheD3D11::~SamplerCacheD3D11() {81for (auto &iter : cache_) {82iter.second->Release();83}84}8586ID3D11SamplerState *SamplerCacheD3D11::GetOrCreateSampler(ID3D11Device *device, const SamplerCacheKey &key) {87auto iter = cache_.find(key);88if (iter != cache_.end()) {89return iter->second;90}9192D3D11_SAMPLER_DESC samp{};93samp.AddressU = key.sClamp ? D3D11_TEXTURE_ADDRESS_CLAMP : D3D11_TEXTURE_ADDRESS_WRAP;94samp.AddressV = key.tClamp ? D3D11_TEXTURE_ADDRESS_CLAMP : D3D11_TEXTURE_ADDRESS_WRAP;95samp.AddressW = samp.AddressU; // Mali benefits from all clamps being the same, and this one is irrelevant.96if (key.aniso) {97samp.MaxAnisotropy = (float)(1 << g_Config.iAnisotropyLevel);98} else {99samp.MaxAnisotropy = 1.0f;100}101int filterKey = ((int)key.minFilt << 2) | ((int)key.magFilt << 1) | ((int)key.mipFilt);102static const D3D11_FILTER filters[8] = {103D3D11_FILTER_MIN_MAG_MIP_POINT,104D3D11_FILTER_MIN_MAG_POINT_MIP_LINEAR,105D3D11_FILTER_MIN_POINT_MAG_LINEAR_MIP_POINT,106D3D11_FILTER_MIN_POINT_MAG_MIP_LINEAR,107D3D11_FILTER_MIN_LINEAR_MAG_MIP_POINT,108D3D11_FILTER_MIN_LINEAR_MAG_POINT_MIP_LINEAR,109D3D11_FILTER_MIN_MAG_LINEAR_MIP_POINT,110D3D11_FILTER_MIN_MAG_MIP_LINEAR,111};112// Only switch to aniso if linear min and mag are set.113if (key.aniso && key.magFilt != 0 && key.minFilt != 0)114samp.Filter = D3D11_FILTER_ANISOTROPIC;115else116samp.Filter = filters[filterKey];117// Can't set MaxLOD on Feature Level <= 9_3.118if (device->GetFeatureLevel() <= D3D_FEATURE_LEVEL_9_3) {119samp.MaxLOD = FLT_MAX;120samp.MinLOD = -FLT_MAX;121samp.MipLODBias = 0.0f;122} else {123samp.MaxLOD = key.maxLevel / 256.0f;124samp.MinLOD = key.minLevel / 256.0f;125samp.MipLODBias = key.lodBias / 256.0f;126}127samp.ComparisonFunc = D3D11_COMPARISON_NEVER;128for (int i = 0; i < 4; i++) {129samp.BorderColor[i] = 1.0f;130}131132ID3D11SamplerState *sampler;133ASSERT_SUCCESS(device->CreateSamplerState(&samp, &sampler));134cache_[key] = sampler;135return sampler;136}137138TextureCacheD3D11::TextureCacheD3D11(Draw::DrawContext *draw, Draw2D *draw2D)139: TextureCacheCommon(draw, draw2D) {140device_ = (ID3D11Device *)draw->GetNativeObject(Draw::NativeObject::DEVICE);141context_ = (ID3D11DeviceContext *)draw->GetNativeObject(Draw::NativeObject::CONTEXT);142143lastBoundTexture = INVALID_TEX;144145D3D11_BUFFER_DESC desc{ sizeof(DepthPushConstants), D3D11_USAGE_DYNAMIC, D3D11_BIND_CONSTANT_BUFFER, D3D11_CPU_ACCESS_WRITE };146HRESULT hr = device_->CreateBuffer(&desc, nullptr, &depalConstants_);147_dbg_assert_(SUCCEEDED(hr));148149HRESULT result = 0;150151nextTexture_ = nullptr;152}153154TextureCacheD3D11::~TextureCacheD3D11() {155depalConstants_->Release();156157// pFramebufferVertexDecl->Release();158Clear(true);159}160161void TextureCacheD3D11::SetFramebufferManager(FramebufferManagerD3D11 *fbManager) {162framebufferManager_ = fbManager;163}164165void TextureCacheD3D11::ReleaseTexture(TexCacheEntry *entry, bool delete_them) {166ID3D11Texture2D *texture = (ID3D11Texture2D *)entry->texturePtr;167ID3D11ShaderResourceView *view = (ID3D11ShaderResourceView *)entry->textureView;168if (texture) {169texture->Release();170entry->texturePtr = nullptr;171}172if (view) {173view->Release();174entry->textureView = nullptr;175}176}177178void TextureCacheD3D11::ForgetLastTexture() {179lastBoundTexture = INVALID_TEX;180181ID3D11ShaderResourceView *nullTex[4]{};182context_->PSSetShaderResources(0, 4, nullTex);183}184185void TextureCacheD3D11::UpdateCurrentClut(GEPaletteFormat clutFormat, u32 clutBase, bool clutIndexIsSimple) {186const u32 clutBaseBytes = clutBase * (clutFormat == GE_CMODE_32BIT_ABGR8888 ? sizeof(u32) : sizeof(u16));187// Technically, these extra bytes weren't loaded, but hopefully it was loaded earlier.188// If not, we're going to hash random data, which hopefully doesn't cause a performance issue.189//190// TODO: Actually, this seems like a hack. The game can upload part of a CLUT and reference other data.191// clutTotalBytes_ is the last amount uploaded. We should hash clutMaxBytes_, but this will often hash192// unrelated old entries for small palettes.193// Adding clutBaseBytes may just be mitigating this for some usage patterns.194const u32 clutExtendedBytes = std::min(clutTotalBytes_ + clutBaseBytes, clutMaxBytes_);195196if (replacer_.Enabled())197clutHash_ = XXH32((const char *)clutBufRaw_, clutExtendedBytes, 0xC0108888);198else199clutHash_ = XXH3_64bits((const char *)clutBufRaw_, clutExtendedBytes) & 0xFFFFFFFF;200clutBuf_ = clutBufRaw_;201202// Special optimization: fonts typically draw clut4 with just alpha values in a single color.203clutAlphaLinear_ = false;204clutAlphaLinearColor_ = 0;205if (clutFormat == GE_CMODE_16BIT_ABGR4444 && clutIndexIsSimple) {206const u16_le *clut = GetCurrentClut<u16_le>();207clutAlphaLinear_ = true;208clutAlphaLinearColor_ = clut[15] & 0x0FFF;209for (int i = 0; i < 16; ++i) {210u16 step = clutAlphaLinearColor_ | (i << 12);211if (clut[i] != step) {212clutAlphaLinear_ = false;213break;214}215}216}217218clutLastFormat_ = gstate.clutformat;219}220221void TextureCacheD3D11::BindTexture(TexCacheEntry *entry) {222if (!entry) {223ID3D11ShaderResourceView *textureView = nullptr;224context_->PSSetShaderResources(0, 1, &textureView);225return;226}227ID3D11ShaderResourceView *textureView = DxView(entry);228if (textureView != lastBoundTexture) {229context_->PSSetShaderResources(0, 1, &textureView);230lastBoundTexture = textureView;231}232int maxLevel = (entry->status & TexCacheEntry::STATUS_NO_MIPS) ? 0 : entry->maxLevel;233SamplerCacheKey samplerKey = GetSamplingParams(maxLevel, entry);234ID3D11SamplerState *state = samplerCache_.GetOrCreateSampler(device_, samplerKey);235context_->PSSetSamplers(0, 1, &state);236gstate_c.SetUseShaderDepal(ShaderDepalMode::OFF);237}238239void TextureCacheD3D11::ApplySamplingParams(const SamplerCacheKey &key) {240ID3D11SamplerState *state = samplerCache_.GetOrCreateSampler(device_, key);241context_->PSSetSamplers(0, 1, &state);242}243244void TextureCacheD3D11::Unbind() {245ForgetLastTexture();246}247248void TextureCacheD3D11::BindAsClutTexture(Draw::Texture *tex, bool smooth) {249ID3D11ShaderResourceView *clutTexture = (ID3D11ShaderResourceView *)draw_->GetNativeObject(Draw::NativeObject::TEXTURE_VIEW, tex);250context_->PSSetShaderResources(TEX_SLOT_CLUT, 1, &clutTexture);251context_->PSSetSamplers(3, 1, smooth ? &stockD3D11.samplerLinear2DClamp : &stockD3D11.samplerPoint2DClamp);252}253254void TextureCacheD3D11::BuildTexture(TexCacheEntry *const entry) {255BuildTexturePlan plan;256if (!PrepareBuildTexture(plan, entry)) {257// We're screwed?258return;259}260261DXGI_FORMAT dstFmt = GetDestFormat(GETextureFormat(entry->format), gstate.getClutPaletteFormat());262if (plan.doReplace) {263dstFmt = ToDXGIFormat(plan.replaced->Format());264} else if (plan.scaleFactor > 1 || plan.saveTexture) {265dstFmt = DXGI_FORMAT_B8G8R8A8_UNORM;266} else if (plan.decodeToClut8) {267dstFmt = DXGI_FORMAT_R8_UNORM;268}269270int levels;271272ID3D11ShaderResourceView *view;273ID3D11Resource *texture = DxTex(entry);274_assert_(texture == nullptr);275276// The PSP only supports 8 mip levels, but we support more in the texture replacer. 20 will never run out.277D3D11_SUBRESOURCE_DATA subresData[20]{};278279if (plan.depth == 1) {280// We don't yet have mip generation, so clamp the number of levels to the ones we can load directly.281levels = std::min(plan.levelsToCreate, plan.levelsToLoad);282} else {283levels = plan.depth;284}285286Draw::DataFormat texFmt = FromD3D11Format(dstFmt);287288for (int i = 0; i < levels; i++) {289int srcLevel = (i == 0) ? plan.baseLevelSrc : i;290291int mipWidth;292int mipHeight;293plan.GetMipSize(i, &mipWidth, &mipHeight);294295u8 *data = nullptr;296int stride = 0;297298int dataSize;299if (plan.doReplace) {300int blockSize = 0;301if (Draw::DataFormatIsBlockCompressed(plan.replaced->Format(), &blockSize)) {302stride = ((mipWidth + 3) & ~3) * blockSize / 4; // Number of blocks * 4 * Size of a block / 4303dataSize = plan.replaced->GetLevelDataSizeAfterCopy(i);304} else {305int bpp = (int)Draw::DataFormatSizeInBytes(plan.replaced->Format());306stride = std::max(mipWidth * bpp, 16);307dataSize = stride * mipHeight;308}309} else {310int bpp = 0;311if (plan.scaleFactor > 1) {312bpp = 4;313} else {314bpp = dstFmt == DXGI_FORMAT_B8G8R8A8_UNORM ? 4 : 2;315}316stride = std::max(mipWidth * bpp, 16);317dataSize = stride * mipHeight;318}319320if (plan.depth == 1) {321data = (u8 *)AllocateAlignedMemory(dataSize, 16);322subresData[i].pSysMem = data;323subresData[i].SysMemPitch = stride;324subresData[i].SysMemSlicePitch = 0;325} else {326if (i == 0) {327subresData[0].pSysMem = AllocateAlignedMemory(stride * mipHeight * plan.depth, 16);328subresData[0].SysMemPitch = stride;329subresData[0].SysMemSlicePitch = stride * mipHeight;330}331data = (uint8_t *)subresData[0].pSysMem + stride * mipHeight * i;332}333334if (!data) {335ERROR_LOG(Log::G3D, "Ran out of RAM trying to allocate a temporary texture upload buffer (%dx%d)", mipWidth, mipHeight);336return;337}338339LoadTextureLevel(*entry, data, 0, stride, plan, srcLevel, texFmt, TexDecodeFlags{});340}341342int tw;343int th;344plan.GetMipSize(0, &tw, &th);345if (tw > 16384)346tw = 16384;347if (th > 16384)348th = 16384;349350// NOTE: For block-compressed textures, we'll force the size up to the closest 4x4. This is due to an351// unfortunate restriction in D3D11 (and early D3D12). We'll warn about it in the log to give texture pack352// authors notice to fix it.353if (plan.doReplace && Draw::DataFormatIsBlockCompressed(plan.replaced->Format(), nullptr)) {354tw = (tw + 3) & ~3;355th = (th + 3) & ~3;356}357358if (plan.depth == 1) {359// We don't yet have mip generation, so clamp the number of levels to the ones we can load directly.360levels = std::min(plan.levelsToCreate, plan.levelsToLoad);361362ID3D11Texture2D *tex = nullptr;363D3D11_TEXTURE2D_DESC desc{};364desc.CPUAccessFlags = 0;365desc.Usage = D3D11_USAGE_DEFAULT;366desc.ArraySize = 1;367desc.SampleDesc.Count = 1;368desc.Width = tw;369desc.Height = th;370desc.Format = dstFmt;371desc.MipLevels = levels;372desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;373ASSERT_SUCCESS(device_->CreateTexture2D(&desc, subresData, &tex));374texture = tex;375} else {376ID3D11Texture3D *tex = nullptr;377D3D11_TEXTURE3D_DESC desc{};378desc.CPUAccessFlags = 0;379desc.Usage = D3D11_USAGE_DEFAULT;380desc.Width = tw;381desc.Height = th;382desc.Depth = plan.depth;383desc.Format = dstFmt;384desc.MipLevels = 1;385desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;386ASSERT_SUCCESS(device_->CreateTexture3D(&desc, subresData, &tex));387texture = tex;388389levels = plan.depth;390}391392ASSERT_SUCCESS(device_->CreateShaderResourceView(texture, nullptr, &view));393entry->texturePtr = texture;394entry->textureView = view;395396for (int i = 0; i < 12; i++) {397if (subresData[i].pSysMem) {398FreeAlignedMemory((void *)subresData[i].pSysMem);399}400}401402// Signal that we support depth textures so use it as one.403if (plan.depth > 1) {404entry->status |= TexCacheEntry::STATUS_3D;405}406407if (levels == 1) {408entry->status |= TexCacheEntry::STATUS_NO_MIPS;409} else {410entry->status &= ~TexCacheEntry::STATUS_NO_MIPS;411}412413if (plan.doReplace) {414entry->SetAlphaStatus(TexCacheEntry::TexStatus(plan.replaced->AlphaStatus()));415416if (!Draw::DataFormatIsBlockCompressed(plan.replaced->Format(), nullptr)) {417entry->status |= TexCacheEntry::STATUS_BGRA;418}419} else {420entry->status |= TexCacheEntry::STATUS_BGRA;421}422}423424DXGI_FORMAT GetClutDestFormatD3D11(GEPaletteFormat format) {425switch (format) {426case GE_CMODE_16BIT_ABGR4444:427return DXGI_FORMAT_B4G4R4A4_UNORM;428case GE_CMODE_16BIT_ABGR5551:429return DXGI_FORMAT_B5G5R5A1_UNORM;430case GE_CMODE_16BIT_BGR5650:431return DXGI_FORMAT_B5G6R5_UNORM;432case GE_CMODE_32BIT_ABGR8888:433return DXGI_FORMAT_B8G8R8A8_UNORM;434}435// Should never be here !436return DXGI_FORMAT_B8G8R8A8_UNORM;437}438439DXGI_FORMAT TextureCacheD3D11::GetDestFormat(GETextureFormat format, GEPaletteFormat clutFormat) const {440if (!gstate_c.Use(GPU_USE_16BIT_FORMATS)) {441return DXGI_FORMAT_B8G8R8A8_UNORM;442}443444switch (format) {445case GE_TFMT_CLUT4:446case GE_TFMT_CLUT8:447case GE_TFMT_CLUT16:448case GE_TFMT_CLUT32:449return GetClutDestFormatD3D11(clutFormat);450case GE_TFMT_4444:451return DXGI_FORMAT_B4G4R4A4_UNORM;452case GE_TFMT_5551:453return DXGI_FORMAT_B5G5R5A1_UNORM;454case GE_TFMT_5650:455return DXGI_FORMAT_B5G6R5_UNORM;456case GE_TFMT_8888:457case GE_TFMT_DXT1:458case GE_TFMT_DXT3:459case GE_TFMT_DXT5:460default:461return DXGI_FORMAT_B8G8R8A8_UNORM;462}463}464465bool TextureCacheD3D11::GetCurrentTextureDebug(GPUDebugBuffer &buffer, int level, bool *isFramebuffer) {466SetTexture();467if (!nextTexture_) {468return GetCurrentFramebufferTextureDebug(buffer, isFramebuffer);469}470471// Apply texture may need to rebuild the texture if we're about to render, or bind a framebuffer.472TexCacheEntry *entry = nextTexture_;473ApplyTexture();474475ID3D11Texture2D *texture = (ID3D11Texture2D *)entry->texturePtr;476if (!texture)477return false;478479D3D11_TEXTURE2D_DESC desc;480texture->GetDesc(&desc);481482int width = desc.Width >> level;483int height = desc.Height >> level;484485switch (desc.Format) {486case DXGI_FORMAT_B8G8R8A8_UNORM:487buffer.Allocate(width, height, GPU_DBG_FORMAT_8888);488break;489490case DXGI_FORMAT_B5G6R5_UNORM:491buffer.Allocate(width, height, GPU_DBG_FORMAT_565);492break;493494case DXGI_FORMAT_B4G4R4A4_UNORM:495buffer.Allocate(width, height, GPU_DBG_FORMAT_4444);496break;497498case DXGI_FORMAT_B5G5R5A1_UNORM:499buffer.Allocate(width, height, GPU_DBG_FORMAT_5551);500break;501502case DXGI_FORMAT_R8_UNORM:503buffer.Allocate(width, height, GPU_DBG_FORMAT_8BIT);504break;505506default:507return false;508}509510desc.BindFlags = 0;511desc.Usage = D3D11_USAGE_STAGING;512desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;513514ID3D11Texture2D *stagingCopy = nullptr;515device_->CreateTexture2D(&desc, nullptr, &stagingCopy);516if (!stagingCopy)517return false;518context_->CopyResource(stagingCopy, texture);519520D3D11_MAPPED_SUBRESOURCE map;521if (FAILED(context_->Map(stagingCopy, level, D3D11_MAP_READ, 0, &map))) {522stagingCopy->Release();523return false;524}525526int bufferRowSize = buffer.PixelSize() * width;527for (int y = 0; y < height; y++) {528memcpy(buffer.GetData() + bufferRowSize * y, (const uint8_t *)map.pData + map.RowPitch * y, bufferRowSize);529}530531context_->Unmap(stagingCopy, level);532stagingCopy->Release();533*isFramebuffer = false;534return true;535}536537void *TextureCacheD3D11::GetNativeTextureView(const TexCacheEntry *entry) {538ID3D11ShaderResourceView *textureView = DxView(entry);539return (void *)textureView;540}541542543