CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/GPUState.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "ppsspp_config.h"18#include "Common/Common.h"19#include "Common/Serialize/Serializer.h"20#include "Common/Serialize/SerializeFuncs.h"21#include "Core/CoreParameter.h"22#include "Core/Config.h"23#include "Core/System.h"24#include "Core/MemMap.h"25#include "GPU/ge_constants.h"26#include "GPU/GPUInterface.h"27#include "GPU/GPUState.h"2829#ifdef _M_SSE30#include <emmintrin.h>31#endif32#if PPSSPP_ARCH(ARM_NEON)33#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)34#include <arm64_neon.h>35#else36#include <arm_neon.h>37#endif38#endif3940// This must be aligned so that the matrices within are aligned.41alignas(16) GPUgstate gstate;42// Let's align this one too for good measure.43alignas(16) GPUStateCache gstate_c;4445// For save state compatibility.46static int savedContextVersion = 1;4748struct CmdRange {49u8 start;50u8 end;51};5253static const CmdRange contextCmdRanges[] = {54{0x00, 0x02},55// Skip: {0x03, 0x0F},56{0x10, 0x10},57// Skip: {0x11, 0x11},58{0x12, 0x28},59// Skip: {0x29, 0x2B},60{0x2c, 0x33},61// Skip: {0x34, 0x35},62{0x36, 0x38},63// Skip: {0x39, 0x41},64{0x42, 0x4D},65// Skip: {0x4E, 0x4F},66{0x50, 0x51},67// Skip: {0x52, 0x52},68{0x53, 0x58},69// Skip: {0x59, 0x5A},70{0x5B, 0xB5},71// Skip: {0xB6, 0xB7},72{0xB8, 0xC3},73// Skip: {0xC4, 0xC4},74{0xC5, 0xD0},75// Skip: {0xD1, 0xD1}76{0xD2, 0xE9},77// Skip: {0xEA, 0xEA},78{0xEB, 0xEC},79// Skip: {0xED, 0xED},80{0xEE, 0xEE},81// Skip: {0xEF, 0xEF},82{0xF0, 0xF6},83// Skip: {0xF7, 0xF7},84{0xF8, 0xF9},85// Skip: {0xFA, 0xFF},86};8788static u32_le *SaveMatrix(u32_le *cmds, GEMatrixType type, int sz, int numcmd, int datacmd) {89if (!gpu)90return cmds;9192*cmds++ = numcmd << 24;93// This saves the CPU-visible values, not the actual used ones, which may differ.94// Note that Restore overwrites both values.95if (type == GE_MTX_BONE0) {96for (int i = 0; i < 8; ++i)97gpu->GetMatrix24(GEMatrixType(GE_MTX_BONE0 + i), cmds + i * 12, datacmd << 24);98} else {99gpu->GetMatrix24(type, cmds, datacmd << 24);100}101cmds += sz;102103return cmds;104}105106static const u32_le *LoadMatrix(const u32_le *cmds, float *mtx, int sz) {107// Skip the reset.108cmds++;109for (int i = 0; i < sz; ++i) {110mtx[i] = getFloat24(*cmds++);111}112113return cmds;114}115116void GPUgstate::Reset() {117memset(gstate.cmdmem, 0, sizeof(gstate.cmdmem));118for (int i = 0; i < 256; i++) {119gstate.cmdmem[i] = i << 24;120}121122// Lighting is not enabled by default, matrices are zero initialized.123memset(gstate.worldMatrix, 0, sizeof(gstate.worldMatrix));124memset(gstate.viewMatrix, 0, sizeof(gstate.viewMatrix));125memset(gstate.projMatrix, 0, sizeof(gstate.projMatrix));126memset(gstate.tgenMatrix, 0, sizeof(gstate.tgenMatrix));127memset(gstate.boneMatrix, 0, sizeof(gstate.boneMatrix));128129savedContextVersion = 1;130131gstate_c.Dirty(DIRTY_CULL_PLANES);132}133134void GPUgstate::Save(u32_le *ptr) {135// Not sure what the first 10 values are, exactly, but these seem right.136ptr[5] = gstate_c.vertexAddr;137ptr[6] = gstate_c.indexAddr;138ptr[7] = gstate_c.offsetAddr;139140// Command values start 17 ints in.141u32_le *cmds = ptr + 17;142for (size_t i = 0; i < ARRAY_SIZE(contextCmdRanges); ++i) {143for (int n = contextCmdRanges[i].start; n <= contextCmdRanges[i].end; ++n) {144// We'll run ReapplyGfxState after this to process dirtying.145*cmds++ = cmdmem[n];146}147}148149if (savedContextVersion == 0) {150if (Memory::IsValidAddress(getClutAddress()))151*cmds++ = loadclut;152153// Seems like it actually writes commands to load the matrices and then reset the counts.154*cmds++ = boneMatrixNumber;155*cmds++ = worldmtxnum;156*cmds++ = viewmtxnum;157*cmds++ = projmtxnum;158*cmds++ = texmtxnum;159160u8 *matrices = (u8 *)cmds;161memcpy(matrices, boneMatrix, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);162memcpy(matrices, worldMatrix, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);163memcpy(matrices, viewMatrix, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);164memcpy(matrices, projMatrix, sizeof(projMatrix)); matrices += sizeof(projMatrix);165memcpy(matrices, tgenMatrix, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);166} else {167cmds = SaveMatrix(cmds, GE_MTX_BONE0, ARRAY_SIZE(boneMatrix), GE_CMD_BONEMATRIXNUMBER, GE_CMD_BONEMATRIXDATA);168cmds = SaveMatrix(cmds, GE_MTX_WORLD, ARRAY_SIZE(worldMatrix), GE_CMD_WORLDMATRIXNUMBER, GE_CMD_WORLDMATRIXDATA);169cmds = SaveMatrix(cmds, GE_MTX_VIEW, ARRAY_SIZE(viewMatrix), GE_CMD_VIEWMATRIXNUMBER, GE_CMD_VIEWMATRIXDATA);170cmds = SaveMatrix(cmds, GE_MTX_PROJECTION, ARRAY_SIZE(projMatrix), GE_CMD_PROJMATRIXNUMBER, GE_CMD_PROJMATRIXDATA);171cmds = SaveMatrix(cmds, GE_MTX_TEXGEN, ARRAY_SIZE(tgenMatrix), GE_CMD_TGENMATRIXNUMBER, GE_CMD_TGENMATRIXDATA);172173*cmds++ = boneMatrixNumber & 0xFF00007F;174*cmds++ = worldmtxnum & 0xFF00000F;175*cmds++ = viewmtxnum & 0xFF00000F;176*cmds++ = projmtxnum & 0xFF00000F;177*cmds++ = texmtxnum & 0xFF00000F;178*cmds++ = GE_CMD_END << 24;179}180}181182void GPUgstate::FastLoadBoneMatrix(u32 addr) {183const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(addr);184u32 num = boneMatrixNumber;185u32 *dst = (u32 *)(boneMatrix + (num & 0x7F));186187#ifdef _M_SSE188__m128i row1 = _mm_slli_epi32(_mm_loadu_si128((const __m128i *)src), 8);189__m128i row2 = _mm_slli_epi32(_mm_loadu_si128((const __m128i *)(src + 4)), 8);190__m128i row3 = _mm_slli_epi32(_mm_loadu_si128((const __m128i *)(src + 8)), 8);191if ((num & 0x3) == 0) {192_mm_store_si128((__m128i *)dst, row1);193_mm_store_si128((__m128i *)(dst + 4), row2);194_mm_store_si128((__m128i *)(dst + 8), row3);195} else {196_mm_storeu_si128((__m128i *)dst, row1);197_mm_storeu_si128((__m128i *)(dst + 4), row2);198_mm_storeu_si128((__m128i *)(dst + 8), row3);199}200#elif PPSSPP_ARCH(ARM_NEON)201const uint32x4_t row1 = vshlq_n_u32(vld1q_u32(src), 8);202const uint32x4_t row2 = vshlq_n_u32(vld1q_u32(src + 4), 8);203const uint32x4_t row3 = vshlq_n_u32(vld1q_u32(src + 8), 8);204vst1q_u32(dst, row1);205vst1q_u32(dst + 4, row2);206vst1q_u32(dst + 8, row3);207#else208for (int i = 0; i < 12; i++) {209dst[i] = src[i] << 8;210}211#endif212213num += 12;214gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x00FFFFFF);215}216217void GPUgstate::Restore(const u32_le *ptr) {218// Not sure what the first 10 values are, exactly, but these seem right.219gstate_c.vertexAddr = ptr[5];220gstate_c.indexAddr = ptr[6];221gstate_c.offsetAddr = ptr[7];222223// Command values start 17 ints in.224const u32_le *cmds = ptr + 17;225for (size_t i = 0; i < ARRAY_SIZE(contextCmdRanges); ++i) {226for (int n = contextCmdRanges[i].start; n <= contextCmdRanges[i].end; ++n) {227cmdmem[n] = *cmds++;228}229}230231if (savedContextVersion == 0) {232if (Memory::IsValidAddress(getClutAddress()))233loadclut = *cmds++;234boneMatrixNumber = *cmds++;235worldmtxnum = *cmds++;236viewmtxnum = *cmds++;237projmtxnum = *cmds++;238texmtxnum = *cmds++;239240u8 *matrices = (u8 *)cmds;241memcpy(boneMatrix, matrices, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);242memcpy(worldMatrix, matrices, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);243memcpy(viewMatrix, matrices, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);244memcpy(projMatrix, matrices, sizeof(projMatrix)); matrices += sizeof(projMatrix);245memcpy(tgenMatrix, matrices, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);246} else {247cmds = LoadMatrix(cmds, boneMatrix, ARRAY_SIZE(boneMatrix));248cmds = LoadMatrix(cmds, worldMatrix, ARRAY_SIZE(worldMatrix));249cmds = LoadMatrix(cmds, viewMatrix, ARRAY_SIZE(viewMatrix));250cmds = LoadMatrix(cmds, projMatrix, ARRAY_SIZE(projMatrix));251cmds = LoadMatrix(cmds, tgenMatrix, ARRAY_SIZE(tgenMatrix));252253boneMatrixNumber = (*cmds++) & 0xFF00007F;254worldmtxnum = (*cmds++) & 0xFF00000F;255viewmtxnum = (*cmds++) & 0xFF00000F;256projmtxnum = (*cmds++) & 0xFF00000F;257texmtxnum = (*cmds++) & 0xFF00000F;258}259260if (gpu)261gpu->ResetMatrices();262263gstate_c.Dirty(DIRTY_CULL_PLANES);264}265266bool vertTypeIsSkinningEnabled(u32 vertType) {267return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE);268}269270struct GPUStateCache_v0 {271u32 vertexAddr;272u32 indexAddr;273274u32 offsetAddr;275276bool textureChanged;277bool textureFullAlpha;278bool vertexFullAlpha;279bool framebufChanged;280281int skipDrawReason;282283UVScale uv;284bool flipTexture;285};286287void GPUStateCache::Reset() {288memset(&gstate_c, 0, sizeof(gstate_c));289}290291void GPUStateCache::DoState(PointerWrap &p) {292auto s = p.Section("GPUStateCache", 0, 5);293if (!s) {294// Old state, this was not versioned.295GPUStateCache_v0 old;296Do(p, old);297298vertexAddr = old.vertexAddr;299indexAddr = old.indexAddr;300offsetAddr = old.offsetAddr;301gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);302textureFullAlpha = old.textureFullAlpha;303vertexFullAlpha = old.vertexFullAlpha;304skipDrawReason = old.skipDrawReason;305uv = old.uv;306307savedContextVersion = 0;308} else {309Do(p, vertexAddr);310Do(p, indexAddr);311Do(p, offsetAddr);312313uint8_t textureChanged = 0;314Do(p, textureChanged); // legacy315gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);316Do(p, textureFullAlpha);317Do(p, vertexFullAlpha);318bool framebufChanged = false; // legacy319Do(p, framebufChanged);320321Do(p, skipDrawReason);322323Do(p, uv);324325bool oldFlipTexture = false;326Do(p, oldFlipTexture); // legacy327}328329// needShaderTexClamp and bgraTexture don't need to be saved.330331if (s >= 3) {332bool oldTextureSimpleAlpha = false;333Do(p, oldTextureSimpleAlpha);334}335336if (s < 2) {337float l12[12];338float l4[4];339Do(p, l12); // lightpos340Do(p, l12); // lightdir341Do(p, l12); // lightattr342Do(p, l12); // lightcol0343Do(p, l12); // lightcol1344Do(p, l12); // lightcol2345Do(p, l4); // lightangle346Do(p, l4); // lightspot347}348349Do(p, morphWeights);350351Do(p, curTextureWidth);352Do(p, curTextureHeight);353Do(p, actualTextureHeight);354// curTextureXOffset and curTextureYOffset don't need to be saved. Well, the above don't either...355356Do(p, vpWidth);357Do(p, vpHeight);358if (s == 4) {359float oldDepth = 1.0f;360Do(p, oldDepth);361}362363Do(p, curRTWidth);364Do(p, curRTHeight);365366// curRTBufferWidth, curRTBufferHeight, and cutRTOffsetX don't need to be saved.367if (s < 5) {368savedContextVersion = 0;369} else {370Do(p, savedContextVersion);371}372373if (p.GetMode() == PointerWrap::MODE_READ)374gstate_c.Dirty(DIRTY_CULL_PLANES);375}376377static const char *const gpuUseFlagNames[32] = {378"GPU_USE_DUALSOURCE_BLEND",379"GPU_USE_LIGHT_UBERSHADER",380"GPU_USE_FRAGMENT_TEST_CACHE",381"GPU_USE_VS_RANGE_CULLING",382"GPU_USE_BLEND_MINMAX",383"GPU_USE_LOGIC_OP",384"GPU_USE_FRAGMENT_UBERSHADER",385"GPU_USE_TEXTURE_NPOT",386"GPU_USE_ANISOTROPY",387"GPU_USE_CLEAR_RAM_HACK",388"GPU_USE_INSTANCE_RENDERING",389"GPU_USE_VERTEX_TEXTURE_FETCH",390"GPU_USE_TEXTURE_FLOAT",391"GPU_USE_16BIT_FORMATS",392"GPU_USE_DEPTH_CLAMP",393"GPU_USE_TEXTURE_LOD_CONTROL",394"GPU_USE_DEPTH_TEXTURE",395"GPU_USE_ACCURATE_DEPTH",396"GPU_USE_GS_CULLING",397"N/A",398"GPU_USE_FRAMEBUFFER_FETCH",399"GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT",400"GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT",401"GPU_ROUND_DEPTH_TO_16BIT",402"GPU_USE_CLIP_DISTANCE",403"GPU_USE_CULL_DISTANCE",404"N/A", // bit 26405"N/A", // bit 27406"N/A", // bit 28407"GPU_USE_VIRTUAL_REALITY",408"GPU_USE_SINGLE_PASS_STEREO",409"GPU_USE_SIMPLE_STEREO_PERSPECTIVE",410};411412const char *GpuUseFlagToString(int useFlag) {413if ((u32)useFlag < 32) {414return gpuUseFlagNames[useFlag];415} else {416return "N/A";417}418}419420421