CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Software/SoftGpu.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include <set>18#include "Common/System/Display.h"19#include "Common/GPU/OpenGL/GLFeatures.h"2021#include "GPU/GPUState.h"22#include "GPU/ge_constants.h"23#include "GPU/Common/TextureDecoder.h"24#include "Common/Data/Convert/ColorConv.h"25#include "Common/GraphicsContext.h"26#include "Common/LogReporting.h"27#include "Core/Config.h"28#include "Core/ConfigValues.h"29#include "Core/Core.h"30#include "Core/Debugger/MemBlockInfo.h"31#include "Core/MemMap.h"32#include "Core/MemMapHelpers.h"33#include "Core/HLE/sceKernelInterrupt.h"34#include "Core/HLE/sceGe.h"35#include "Core/MIPS/MIPS.h"36#include "Core/Util/PPGeDraw.h"37#include "Common/Profiler/Profiler.h"38#include "Common/GPU/thin3d.h"3940#include "GPU/Software/DrawPixel.h"41#include "GPU/Software/Rasterizer.h"42#include "GPU/Software/Sampler.h"43#include "GPU/Software/SoftGpu.h"44#include "GPU/Software/TransformUnit.h"45#include "GPU/Common/DrawEngineCommon.h"46#include "GPU/Common/PresentationCommon.h"47#include "Common/GPU/ShaderTranslation.h"48#include "GPU/Common/SplineCommon.h"49#include "GPU/Debugger/Debugger.h"50#include "GPU/Debugger/Record.h"5152const int FB_WIDTH = 480;53const int FB_HEIGHT = 272;5455uint8_t clut[1024];56FormatBuffer fb;57FormatBuffer depthbuf;5859struct CommandInfo {60uint64_t flags;61SoftGPU::CmdFunc func;62};63static CommandInfo softgpuCmdInfo[256];6465struct SoftwareCommandTableEntry {66uint8_t cmd;67uint8_t flags;68SoftDirty dirty;69SoftGPU::CmdFunc func;70};7172// Software uses a different one, because dirty flags and execute funcs are a bit different.73const SoftwareCommandTableEntry softgpuCommandTable[] = {74{ GE_CMD_OFFSETADDR, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_OffsetAddr },75{ GE_CMD_ORIGIN, FLAG_EXECUTE | FLAG_READS_PC, SoftDirty::NONE, &GPUCommon::Execute_Origin },76{ GE_CMD_JUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, SoftDirty::NONE, &GPUCommon::Execute_Jump },77{ GE_CMD_CALL, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, SoftDirty::NONE, &SoftGPU::Execute_Call },78{ GE_CMD_RET, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, SoftDirty::NONE, &GPUCommon::Execute_Ret },79{ GE_CMD_END, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, SoftDirty::NONE, &GPUCommon::Execute_End },80{ GE_CMD_VADDR, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Vaddr },81{ GE_CMD_IADDR, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Iaddr },82{ GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, SoftDirty::NONE, &GPUCommon::Execute_BJump },83{ GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_BoundingBox },8485{ GE_CMD_PRIM, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_Prim },86{ GE_CMD_BEZIER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_Bezier },87{ GE_CMD_SPLINE, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_Spline },8889// Vertex type affects a number of things, mainly because of through.90{ GE_CMD_VERTEXTYPE, FLAG_EXECUTEONCHANGE, SoftDirty::TRANSFORM_BASIC, &SoftGPU::Execute_VertexType },9192{ GE_CMD_LOADCLUT, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_LoadClut },9394// These two are actually processed in CMD_END, no flush needed.95{ GE_CMD_SIGNAL },96{ GE_CMD_FINISH },9798// Changes that dirty the framebuffer or depthbuffer pointer/size.99{ GE_CMD_FRAMEBUFPTR, FLAG_EXECUTEONCHANGE, SoftDirty::BINNER_RANGE, &SoftGPU::Execute_FramebufPtr },100{ GE_CMD_FRAMEBUFWIDTH, FLAG_EXECUTEONCHANGE, SoftDirty::BINNER_RANGE | SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED, &SoftGPU::Execute_FramebufPtr },101{ GE_CMD_FRAMEBUFPIXFORMAT, FLAG_EXECUTEONCHANGE, SoftDirty::BINNER_RANGE | SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_STENCIL | SoftDirty::PIXEL_WRITEMASK, &SoftGPU::Execute_FramebufFormat },102{ GE_CMD_ZBUFPTR, FLAG_EXECUTEONCHANGE, SoftDirty::BINNER_RANGE, &SoftGPU::Execute_ZbufPtr },103{ GE_CMD_ZBUFWIDTH, FLAG_EXECUTEONCHANGE, SoftDirty::BINNER_RANGE | SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED, &SoftGPU::Execute_ZbufPtr },104105{ GE_CMD_FOGCOLOR, 0, SoftDirty::PIXEL_CACHED },106{ GE_CMD_FOG1, 0, SoftDirty::TRANSFORM_FOG },107{ GE_CMD_FOG2, 0, SoftDirty::TRANSFORM_FOG },108109{ GE_CMD_CLEARMODE, 0, SoftDirty::TRANSFORM_BASIC | SoftDirty::RAST_BASIC | SoftDirty::RAST_TEX | SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA | SoftDirty::PIXEL_STENCIL | SoftDirty::PIXEL_CACHED | SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP },110{ GE_CMD_TEXTUREMAPENABLE, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::RAST_TEX | SoftDirty::TRANSFORM_BASIC | SoftDirty::BINNER_OVERLAP },111{ GE_CMD_FOGENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED | SoftDirty::TRANSFORM_BASIC | SoftDirty::TRANSFORM_FOG | SoftDirty::TRANSFORM_MATRIX },112{ GE_CMD_TEXMODE, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::RAST_TEX },113// Currently this doesn't affect any state, but maybe it should.114{ GE_CMD_TEXSHADELS },115{ GE_CMD_SHADEMODE, 0, SoftDirty::RAST_BASIC },116{ GE_CMD_TEXFUNC, 0, SoftDirty::SAMPLER_BASIC },117{ GE_CMD_COLORTEST, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED },118{ GE_CMD_ALPHATESTENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA },119{ GE_CMD_COLORTESTENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED },120{ GE_CMD_COLORTESTMASK, 0, SoftDirty::PIXEL_CACHED },121122{ GE_CMD_REVERSENORMAL, 0, SoftDirty::TRANSFORM_BASIC },123{ GE_CMD_LIGHTINGENABLE, 0, SoftDirty::TRANSFORM_BASIC | SoftDirty::TRANSFORM_MATRIX | SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_0 | SoftDirty::LIGHT_1 | SoftDirty::LIGHT_2 | SoftDirty::LIGHT_3 },124{ GE_CMD_LIGHTENABLE0, 0, SoftDirty::TRANSFORM_MATRIX | SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_0 },125{ GE_CMD_LIGHTENABLE1, 0, SoftDirty::TRANSFORM_MATRIX | SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_1 },126{ GE_CMD_LIGHTENABLE2, 0, SoftDirty::TRANSFORM_MATRIX | SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_2 },127{ GE_CMD_LIGHTENABLE3, 0, SoftDirty::TRANSFORM_MATRIX | SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_3 },128{ GE_CMD_LIGHTTYPE0, 0, SoftDirty::TRANSFORM_MATRIX | SoftDirty::LIGHT_0 },129{ GE_CMD_LIGHTTYPE1, 0, SoftDirty::TRANSFORM_MATRIX | SoftDirty::LIGHT_1 },130{ GE_CMD_LIGHTTYPE2, 0, SoftDirty::TRANSFORM_MATRIX | SoftDirty::LIGHT_2 },131{ GE_CMD_LIGHTTYPE3, 0, SoftDirty::TRANSFORM_MATRIX | SoftDirty::LIGHT_3 },132{ GE_CMD_MATERIALUPDATE, 0, SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL },133134{ GE_CMD_LIGHTMODE, 0, SoftDirty::LIGHT_BASIC },135136{ GE_CMD_TEXFILTER, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::RAST_TEX },137{ GE_CMD_TEXWRAP, 0, SoftDirty::SAMPLER_BASIC },138139{ GE_CMD_ALPHATEST, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA },140{ GE_CMD_COLORREF, 0, SoftDirty::PIXEL_CACHED },141{ GE_CMD_TEXENVCOLOR, 0, SoftDirty::PIXEL_CACHED },142143// Currently, this is not part of state, just read on vertex processing.144{ GE_CMD_CULL },145{ GE_CMD_CULLFACEENABLE },146147{ GE_CMD_DITHERENABLE, 0, SoftDirty::PIXEL_BASIC },148{ GE_CMD_STENCILOP, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_STENCIL },149{ GE_CMD_STENCILTEST, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_STENCIL },150{ GE_CMD_STENCILTESTENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_STENCIL },151{ GE_CMD_ALPHABLENDENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA },152{ GE_CMD_BLENDMODE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA },153{ GE_CMD_BLENDFIXEDA, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA | SoftDirty::PIXEL_CACHED },154{ GE_CMD_BLENDFIXEDB, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_ALPHA | SoftDirty::PIXEL_CACHED },155{ GE_CMD_MASKRGB, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_WRITEMASK },156{ GE_CMD_MASKALPHA, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_WRITEMASK },157{ GE_CMD_ZTEST, 0, SoftDirty::PIXEL_BASIC },158{ GE_CMD_ZTESTENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP },159{ GE_CMD_ZWRITEDISABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP },160{ GE_CMD_LOGICOP, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED },161{ GE_CMD_LOGICOPENABLE, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED },162163{ GE_CMD_TEXMAPMODE, 0, SoftDirty::TRANSFORM_BASIC | SoftDirty::RAST_TEX },164165// These are read on every SubmitPrim, no need for dirtying or flushing.166{ GE_CMD_TEXSCALEU },167{ GE_CMD_TEXSCALEV },168{ GE_CMD_TEXOFFSETU },169{ GE_CMD_TEXOFFSETV },170171{ GE_CMD_TEXSIZE0, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },172{ GE_CMD_TEXSIZE1, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },173{ GE_CMD_TEXSIZE2, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },174{ GE_CMD_TEXSIZE3, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },175{ GE_CMD_TEXSIZE4, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },176{ GE_CMD_TEXSIZE5, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },177{ GE_CMD_TEXSIZE6, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },178{ GE_CMD_TEXSIZE7, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },179{ GE_CMD_TEXFORMAT, 0, SoftDirty::SAMPLER_BASIC | SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },180{ GE_CMD_TEXLEVEL, 0, SoftDirty::RAST_TEX },181{ GE_CMD_TEXLODSLOPE, 0, SoftDirty::RAST_TEX },182{ GE_CMD_TEXADDR0, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },183{ GE_CMD_TEXADDR1, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },184{ GE_CMD_TEXADDR2, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },185{ GE_CMD_TEXADDR3, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },186{ GE_CMD_TEXADDR4, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },187{ GE_CMD_TEXADDR5, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },188{ GE_CMD_TEXADDR6, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },189{ GE_CMD_TEXADDR7, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },190{ GE_CMD_TEXBUFWIDTH0, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },191{ GE_CMD_TEXBUFWIDTH1, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },192{ GE_CMD_TEXBUFWIDTH2, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },193{ GE_CMD_TEXBUFWIDTH3, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },194{ GE_CMD_TEXBUFWIDTH4, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },195{ GE_CMD_TEXBUFWIDTH5, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },196{ GE_CMD_TEXBUFWIDTH6, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },197{ GE_CMD_TEXBUFWIDTH7, 0, SoftDirty::SAMPLER_TEXLIST | SoftDirty::BINNER_OVERLAP },198199{ GE_CMD_CLUTADDR },200{ GE_CMD_CLUTADDRUPPER },201{ GE_CMD_CLUTFORMAT, 0, SoftDirty::SAMPLER_BASIC },202203// Morph weights. TODO: Remove precomputation?204{ GE_CMD_MORPHWEIGHT0, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },205{ GE_CMD_MORPHWEIGHT1, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },206{ GE_CMD_MORPHWEIGHT2, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },207{ GE_CMD_MORPHWEIGHT3, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },208{ GE_CMD_MORPHWEIGHT4, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },209{ GE_CMD_MORPHWEIGHT5, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },210{ GE_CMD_MORPHWEIGHT6, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },211{ GE_CMD_MORPHWEIGHT7, FLAG_EXECUTEONCHANGE, SoftDirty::NONE, &GPUCommon::Execute_MorphWeight },212213// No state of flushing required for patch parameters, currently.214{ GE_CMD_PATCHDIVISION },215{ GE_CMD_PATCHPRIMITIVE },216{ GE_CMD_PATCHFACING },217{ GE_CMD_PATCHCULLENABLE },218219// Can probably ignore this one as we don't support AA lines.220{ GE_CMD_ANTIALIASENABLE, 0, SoftDirty::RAST_BASIC },221222// Viewport and offset for positions.223{ GE_CMD_OFFSETX, 0, SoftDirty::RAST_OFFSET },224{ GE_CMD_OFFSETY, 0, SoftDirty::RAST_OFFSET },225{ GE_CMD_VIEWPORTXSCALE, 0, SoftDirty::TRANSFORM_VIEWPORT },226{ GE_CMD_VIEWPORTYSCALE, 0, SoftDirty::TRANSFORM_VIEWPORT },227{ GE_CMD_VIEWPORTXCENTER, 0, SoftDirty::TRANSFORM_VIEWPORT },228{ GE_CMD_VIEWPORTYCENTER, 0, SoftDirty::TRANSFORM_VIEWPORT },229{ GE_CMD_VIEWPORTZSCALE, 0, SoftDirty::TRANSFORM_VIEWPORT },230{ GE_CMD_VIEWPORTZCENTER, 0, SoftDirty::TRANSFORM_VIEWPORT },231{ GE_CMD_DEPTHCLAMPENABLE, 0, SoftDirty::TRANSFORM_BASIC },232233// Z clipping.234{ GE_CMD_MINZ, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED },235{ GE_CMD_MAXZ, 0, SoftDirty::PIXEL_BASIC | SoftDirty::PIXEL_CACHED },236237// Region doesn't seem to affect scissor or anything.238// As long as REGION1 is zero, REGION2 is effectively another scissor.239{ GE_CMD_REGION1, 0, SoftDirty::BINNER_RANGE },240{ GE_CMD_REGION2, 0, SoftDirty::BINNER_RANGE },241242// Scissor, only used by the binner.243{ GE_CMD_SCISSOR1, 0, SoftDirty::BINNER_RANGE },244{ GE_CMD_SCISSOR2, 0, SoftDirty::BINNER_RANGE },245246// Lighting base colors.247{ GE_CMD_AMBIENTCOLOR, 0, SoftDirty::LIGHT_MATERIAL },248{ GE_CMD_AMBIENTALPHA, 0, SoftDirty::LIGHT_MATERIAL },249{ GE_CMD_MATERIALDIFFUSE, 0, SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_0 | SoftDirty::LIGHT_1 | SoftDirty::LIGHT_2 | SoftDirty::LIGHT_3 },250// Not currently state, but maybe should be.251{ GE_CMD_MATERIALEMISSIVE, 0, SoftDirty::NONE },252{ GE_CMD_MATERIALAMBIENT, 0, SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_0 | SoftDirty::LIGHT_1 | SoftDirty::LIGHT_2 | SoftDirty::LIGHT_3 },253{ GE_CMD_MATERIALALPHA, 0, SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_0 | SoftDirty::LIGHT_1 | SoftDirty::LIGHT_2 | SoftDirty::LIGHT_3 },254{ GE_CMD_MATERIALSPECULAR, 0, SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_0 | SoftDirty::LIGHT_1 | SoftDirty::LIGHT_2 | SoftDirty::LIGHT_3 },255{ GE_CMD_MATERIALSPECULARCOEF, 0, SoftDirty::LIGHT_BASIC },256257{ GE_CMD_LX0, 0, SoftDirty::LIGHT_0 },258{ GE_CMD_LY0, 0, SoftDirty::LIGHT_0 },259{ GE_CMD_LZ0, 0, SoftDirty::LIGHT_0 },260{ GE_CMD_LX1, 0, SoftDirty::LIGHT_1 },261{ GE_CMD_LY1, 0, SoftDirty::LIGHT_1 },262{ GE_CMD_LZ1, 0, SoftDirty::LIGHT_1 },263{ GE_CMD_LX2, 0, SoftDirty::LIGHT_2 },264{ GE_CMD_LY2, 0, SoftDirty::LIGHT_2 },265{ GE_CMD_LZ2, 0, SoftDirty::LIGHT_2 },266{ GE_CMD_LX3, 0, SoftDirty::LIGHT_3 },267{ GE_CMD_LY3, 0, SoftDirty::LIGHT_3 },268{ GE_CMD_LZ3, 0, SoftDirty::LIGHT_3 },269270{ GE_CMD_LDX0, 0, SoftDirty::LIGHT_0 },271{ GE_CMD_LDY0, 0, SoftDirty::LIGHT_0 },272{ GE_CMD_LDZ0, 0, SoftDirty::LIGHT_0 },273{ GE_CMD_LDX1, 0, SoftDirty::LIGHT_1 },274{ GE_CMD_LDY1, 0, SoftDirty::LIGHT_1 },275{ GE_CMD_LDZ1, 0, SoftDirty::LIGHT_1 },276{ GE_CMD_LDX2, 0, SoftDirty::LIGHT_2 },277{ GE_CMD_LDY2, 0, SoftDirty::LIGHT_2 },278{ GE_CMD_LDZ2, 0, SoftDirty::LIGHT_2 },279{ GE_CMD_LDX3, 0, SoftDirty::LIGHT_3 },280{ GE_CMD_LDY3, 0, SoftDirty::LIGHT_3 },281{ GE_CMD_LDZ3, 0, SoftDirty::LIGHT_3 },282283{ GE_CMD_LKA0, 0, SoftDirty::LIGHT_0 },284{ GE_CMD_LKB0, 0, SoftDirty::LIGHT_0 },285{ GE_CMD_LKC0, 0, SoftDirty::LIGHT_0 },286{ GE_CMD_LKA1, 0, SoftDirty::LIGHT_1 },287{ GE_CMD_LKB1, 0, SoftDirty::LIGHT_1 },288{ GE_CMD_LKC1, 0, SoftDirty::LIGHT_1 },289{ GE_CMD_LKA2, 0, SoftDirty::LIGHT_2 },290{ GE_CMD_LKB2, 0, SoftDirty::LIGHT_2 },291{ GE_CMD_LKC2, 0, SoftDirty::LIGHT_2 },292{ GE_CMD_LKA3, 0, SoftDirty::LIGHT_3 },293{ GE_CMD_LKB3, 0, SoftDirty::LIGHT_3 },294{ GE_CMD_LKC3, 0, SoftDirty::LIGHT_3 },295296{ GE_CMD_LKS0, 0, SoftDirty::LIGHT_0 },297{ GE_CMD_LKS1, 0, SoftDirty::LIGHT_1 },298{ GE_CMD_LKS2, 0, SoftDirty::LIGHT_2 },299{ GE_CMD_LKS3, 0, SoftDirty::LIGHT_3 },300301{ GE_CMD_LKO0, 0, SoftDirty::LIGHT_0 },302{ GE_CMD_LKO1, 0, SoftDirty::LIGHT_1 },303{ GE_CMD_LKO2, 0, SoftDirty::LIGHT_2 },304{ GE_CMD_LKO3, 0, SoftDirty::LIGHT_3 },305306// Specific light colors.307{ GE_CMD_LAC0, 0, SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_0 },308{ GE_CMD_LDC0, 0, SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_0 },309{ GE_CMD_LSC0, 0, SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_0 },310{ GE_CMD_LAC1, 0, SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_1 },311{ GE_CMD_LDC1, 0, SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_1 },312{ GE_CMD_LSC1, 0, SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_1 },313{ GE_CMD_LAC2, 0, SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_2 },314{ GE_CMD_LDC2, 0, SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_2 },315{ GE_CMD_LSC2, 0, SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_2 },316{ GE_CMD_LAC3, 0, SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_3 },317{ GE_CMD_LDC3, 0, SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_3 },318{ GE_CMD_LSC3, 0, SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_3 },319320// These are currently ignored, but might do flushing later.321{ GE_CMD_TEXFLUSH },322{ GE_CMD_TEXSYNC },323324// These are just nop or part of other later commands.325{ GE_CMD_NOP },326{ GE_CMD_BASE },327{ GE_CMD_TRANSFERSRC },328{ GE_CMD_TRANSFERSRCW },329{ GE_CMD_TRANSFERDST },330{ GE_CMD_TRANSFERDSTW },331{ GE_CMD_TRANSFERSRCPOS },332{ GE_CMD_TRANSFERDSTPOS },333{ GE_CMD_TRANSFERSIZE },334335// This will flush if necessary.336{ GE_CMD_TRANSFERSTART, FLAG_EXECUTE | FLAG_READS_PC, SoftDirty::NONE, &SoftGPU::Execute_BlockTransferStart },337338// We cache the dither matrix, but the values affect little.339{ GE_CMD_DITH0, 0, SoftDirty::PIXEL_DITHER },340{ GE_CMD_DITH1, 0, SoftDirty::PIXEL_DITHER },341{ GE_CMD_DITH2, 0, SoftDirty::PIXEL_DITHER },342{ GE_CMD_DITH3, 0, SoftDirty::PIXEL_DITHER },343344{ GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_WorldMtxNum },345{ GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_WorldMtxData },346{ GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ViewMtxNum },347{ GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ViewMtxData },348{ GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ProjMtxNum },349{ GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ProjMtxData },350// Currently not state.351{ GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_TgenMtxNum },352{ GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_TgenMtxData },353{ GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_BoneMtxNum },354{ GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_BoneMtxData },355356// Vertex Screen/Texture/Color357{ GE_CMD_VSCX },358{ GE_CMD_VSCY },359{ GE_CMD_VSCZ },360{ GE_CMD_VTCS },361{ GE_CMD_VTCT },362{ GE_CMD_VTCQ },363{ GE_CMD_VCV },364{ GE_CMD_VAP, FLAG_EXECUTE, SoftDirty::NONE, &SoftGPU::Execute_ImmVertexAlphaPrim },365{ GE_CMD_VFC },366{ GE_CMD_VSCV },367368// "Missing" commands (gaps in the sequence)369{ GE_CMD_UNKNOWN_03, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },370{ GE_CMD_UNKNOWN_0D, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },371{ GE_CMD_UNKNOWN_11, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },372{ GE_CMD_UNKNOWN_29, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },373{ GE_CMD_UNKNOWN_34, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },374{ GE_CMD_UNKNOWN_35, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },375{ GE_CMD_UNKNOWN_39, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },376{ GE_CMD_UNKNOWN_4E, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },377{ GE_CMD_UNKNOWN_4F, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },378{ GE_CMD_UNKNOWN_52, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },379{ GE_CMD_UNKNOWN_59, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },380{ GE_CMD_UNKNOWN_5A, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },381{ GE_CMD_UNKNOWN_B6, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },382{ GE_CMD_UNKNOWN_B7, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },383{ GE_CMD_UNKNOWN_D1, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },384{ GE_CMD_UNKNOWN_ED, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },385{ GE_CMD_UNKNOWN_EF, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },386{ GE_CMD_UNKNOWN_FA, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },387{ GE_CMD_UNKNOWN_FB, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },388{ GE_CMD_UNKNOWN_FC, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },389{ GE_CMD_UNKNOWN_FD, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },390{ GE_CMD_UNKNOWN_FE, FLAG_EXECUTE, SoftDirty::NONE, &GPUCommon::Execute_Unknown },391// Appears to be debugging related or something? Hit a lot in GoW.392{ GE_CMD_NOP_FF },393};394395SoftGPU::SoftGPU(GraphicsContext *gfxCtx, Draw::DrawContext *draw)396: GPUCommon(gfxCtx, draw)397{398fb.data = Memory::GetPointerWrite(0x44000000); // TODO: correct default address?399depthbuf.data = Memory::GetPointerWrite(0x44000000); // TODO: correct default address?400401memset(softgpuCmdInfo, 0, sizeof(softgpuCmdInfo));402403// Convert the command table to a faster format, and check for dupes.404std::set<u8> dupeCheck;405for (size_t i = 0; i < ARRAY_SIZE(softgpuCommandTable); i++) {406const u8 cmd = softgpuCommandTable[i].cmd;407if (dupeCheck.find(cmd) != dupeCheck.end()) {408ERROR_LOG(Log::G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);409} else {410dupeCheck.insert(cmd);411}412softgpuCmdInfo[cmd].flags |= (uint64_t)softgpuCommandTable[i].flags | ((uint64_t)softgpuCommandTable[i].dirty << 8);413softgpuCmdInfo[cmd].func = softgpuCommandTable[i].func;414if ((softgpuCmdInfo[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !softgpuCmdInfo[cmd].func) {415// Can't have FLAG_EXECUTE commands without a function pointer to execute.416Crash();417}418}419// Find commands missing from the table.420for (int i = 0; i < 0xEF; i++) {421if (dupeCheck.find((u8)i) == dupeCheck.end()) {422ERROR_LOG(Log::G3D, "Command missing from table: %02x (%i)", i, i);423}424}425426memset(vramDirty_, (uint8_t)(SoftGPUVRAMDirty::DIRTY | SoftGPUVRAMDirty::REALLY_DIRTY), sizeof(vramDirty_));427// TODO: Is there a default?428displayFramebuf_ = 0;429displayStride_ = 512;430displayFormat_ = GE_FORMAT_8888;431432Rasterizer::Init();433Sampler::Init();434drawEngine_ = new SoftwareDrawEngine();435if (!drawEngine_)436return;437438drawEngine_->Init();439drawEngineCommon_ = drawEngine_;440441// Push the initial CLUT buffer in case it's all zero (we push only on change.)442if (drawEngine_->transformUnit.IsStarted())443drawEngine_->transformUnit.NotifyClutUpdate(clut);444445// No need to flush for simple parameter changes.446flushOnParams_ = false;447448if (gfxCtx && draw) {449presentation_ = new PresentationCommon(draw_);450presentation_->SetLanguage(draw_->GetShaderLanguageDesc().shaderLanguage);451presentation_->UpdateDisplaySize(PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight);452presentation_->UpdateRenderSize(PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight);453}454455NotifyConfigChanged();456NotifyDisplayResized();457NotifyRenderResized();458}459460void SoftGPU::DeviceLost() {461if (presentation_)462presentation_->DeviceLost();463draw_ = nullptr;464if (fbTex) {465fbTex->Release();466fbTex = nullptr;467}468}469470void SoftGPU::DeviceRestore(Draw::DrawContext *draw) {471draw_ = draw;472if (presentation_)473presentation_->DeviceRestore(draw_);474PPGeSetDrawContext(draw_);475}476477SoftGPU::~SoftGPU() {478if (fbTex) {479fbTex->Release();480fbTex = nullptr;481}482483delete presentation_;484delete drawEngine_;485486Sampler::Shutdown();487Rasterizer::Shutdown();488}489490void SoftGPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {491// Seems like this can point into RAM, but should be VRAM if not in RAM.492displayFramebuf_ = (framebuf & 0xFF000000) == 0 ? 0x44000000 | framebuf : framebuf;493displayStride_ = stride;494displayFormat_ = format;495GPUDebug::NotifyDisplay(framebuf, stride, format);496GPURecord::NotifyDisplay(framebuf, stride, format);497}498499DSStretch g_DarkStalkerStretch;500501void SoftGPU::ConvertTextureDescFrom16(Draw::TextureDesc &desc, int srcwidth, int srcheight, const uint16_t *overrideData) {502// TODO: This should probably be converted in a shader instead..503fbTexBuffer_.resize(srcwidth * srcheight);504const uint16_t *displayBuffer = overrideData;505if (!displayBuffer)506displayBuffer = (const uint16_t *)Memory::GetPointer(displayFramebuf_);507508for (int y = 0; y < srcheight; ++y) {509u32 *buf_line = &fbTexBuffer_[y * srcwidth];510const u16 *fb_line = &displayBuffer[y * displayStride_];511512switch (displayFormat_) {513case GE_FORMAT_565:514ConvertRGB565ToRGBA8888(buf_line, fb_line, srcwidth);515break;516517case GE_FORMAT_5551:518ConvertRGBA5551ToRGBA8888(buf_line, fb_line, srcwidth);519break;520521case GE_FORMAT_4444:522ConvertRGBA4444ToRGBA8888(buf_line, fb_line, srcwidth);523break;524525default:526ERROR_LOG_REPORT(Log::G3D, "Software: Unexpected framebuffer format: %d", displayFormat_);527break;528}529}530531desc.width = srcwidth;532desc.height = srcheight;533desc.initData.push_back((uint8_t *)fbTexBuffer_.data());534}535536// Copies RGBA8 data from RAM to the currently bound render target.537void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {538if (!draw_ || !presentation_)539return;540float u0 = 0.0f;541float u1;542float v0 = 0.0f;543float v1 = 1.0f;544545if (fbTex) {546fbTex->Release();547fbTex = nullptr;548}549550// For accuracy, try to handle 0 stride - sometimes used.551if (displayStride_ == 0) {552srcheight = 1;553u1 = 1.0f;554} else {555u1 = (float)srcwidth / displayStride_;556}557558Draw::TextureDesc desc{};559desc.type = Draw::TextureType::LINEAR2D;560desc.format = Draw::DataFormat::R8G8B8A8_UNORM;561desc.depth = 1;562desc.mipLevels = 1;563desc.tag = "SoftGPU";564bool hasImage = true;565566OutputFlags outputFlags = g_Config.iDisplayFilter == SCALE_NEAREST ? OutputFlags::NEAREST : OutputFlags::LINEAR;567bool hasPostShader = presentation_ && presentation_->HasPostShader();568569if (PSP_CoreParameter().compat.flags().DarkStalkersPresentHack && displayFormat_ == GE_FORMAT_5551 && g_DarkStalkerStretch != DSStretch::Off) {570const u8 *data = Memory::GetPointerWrite(0x04088000);571bool fillDesc = true;572if (draw_->GetDataFormatSupport(Draw::DataFormat::A1B5G5R5_UNORM_PACK16) & Draw::FMT_TEXTURE) {573// The perfect one.574desc.format = Draw::DataFormat::A1B5G5R5_UNORM_PACK16;575} else if (!hasPostShader && (draw_->GetDataFormatSupport(Draw::DataFormat::A1R5G5B5_UNORM_PACK16) & Draw::FMT_TEXTURE)) {576// RB swapped, compensate with a shader.577desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16;578outputFlags |= OutputFlags::RB_SWIZZLE;579} else {580ConvertTextureDescFrom16(desc, srcwidth, srcheight, (const uint16_t *)data);581fillDesc = false;582}583if (fillDesc) {584desc.width = displayStride_ == 0 ? srcwidth : displayStride_;585desc.height = srcheight;586desc.initData.push_back(data);587}588u0 = 64.5f / (float)desc.width;589u1 = 447.5f / (float)desc.width;590v0 = 16.0f / (float)desc.height;591v1 = 240.0f / (float)desc.height;592if (g_DarkStalkerStretch == DSStretch::Normal) {593outputFlags |= OutputFlags::PILLARBOX;594}595} else if (!Memory::IsValidAddress(displayFramebuf_) || srcwidth == 0 || srcheight == 0) {596hasImage = false;597u1 = 1.0f;598} else if (displayFormat_ == GE_FORMAT_8888) {599const u8 *data = Memory::GetPointer(displayFramebuf_);600desc.width = displayStride_ == 0 ? srcwidth : displayStride_;601desc.height = srcheight;602desc.initData.push_back(data);603desc.format = Draw::DataFormat::R8G8B8A8_UNORM;604} else if (displayFormat_ == GE_FORMAT_5551) {605const u8 *data = Memory::GetPointer(displayFramebuf_);606bool fillDesc = true;607if (draw_->GetDataFormatSupport(Draw::DataFormat::A1B5G5R5_UNORM_PACK16) & Draw::FMT_TEXTURE) {608// The perfect one.609desc.format = Draw::DataFormat::A1B5G5R5_UNORM_PACK16;610} else if (!hasPostShader && (draw_->GetDataFormatSupport(Draw::DataFormat::A1R5G5B5_UNORM_PACK16) & Draw::FMT_TEXTURE)) {611// RB swapped, compensate with a shader.612desc.format = Draw::DataFormat::A1R5G5B5_UNORM_PACK16;613outputFlags |= OutputFlags::RB_SWIZZLE;614} else {615ConvertTextureDescFrom16(desc, srcwidth, srcheight);616u1 = 1.0f;617fillDesc = false;618}619if (fillDesc) {620desc.width = displayStride_ == 0 ? srcwidth : displayStride_;621desc.height = srcheight;622desc.initData.push_back(data);623}624} else {625ConvertTextureDescFrom16(desc, srcwidth, srcheight);626u1 = 1.0f;627}628if (!hasImage) {629draw_->BindFramebufferAsRenderTarget(nullptr, { Draw::RPAction::CLEAR, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "CopyToCurrentFboFromDisplayRam");630return;631}632633fbTex = draw_->CreateTexture(desc);634635switch (GetGPUBackend()) {636case GPUBackend::OPENGL:637outputFlags |= OutputFlags::BACKBUFFER_FLIPPED;638break;639case GPUBackend::DIRECT3D9:640case GPUBackend::DIRECT3D11:641outputFlags |= OutputFlags::POSITION_FLIPPED;642break;643case GPUBackend::VULKAN:644break;645}646647presentation_->SourceTexture(fbTex, desc.width, desc.height);648presentation_->CopyToOutput(outputFlags, g_Config.iInternalScreenRotation, u0, v0, u1, v1);649}650651void SoftGPU::CopyDisplayToOutput(bool reallyDirty) {652drawEngine_->transformUnit.Flush("output");653// The display always shows 480x272.654CopyToCurrentFboFromDisplayRam(FB_WIDTH, FB_HEIGHT);655MarkDirty(displayFramebuf_, displayStride_, 272, displayFormat_, SoftGPUVRAMDirty::CLEAR);656}657658void SoftGPU::MarkDirty(uint32_t addr, uint32_t stride, uint32_t height, GEBufferFormat fmt, SoftGPUVRAMDirty value) {659uint32_t bytes = height * stride * (fmt == GE_FORMAT_8888 ? 4 : 2);660MarkDirty(addr, bytes, value);661}662663void SoftGPU::MarkDirty(uint32_t addr, uint32_t bytes, SoftGPUVRAMDirty value) {664// Only bother tracking if frameskipping.665if (g_Config.iFrameSkip == 0)666return;667if (!Memory::IsVRAMAddress(addr) || !Memory::IsVRAMAddress(addr + bytes - 1))668return;669if (lastDirtyAddr_ == addr && lastDirtySize_ == bytes && lastDirtyValue_ == value)670return;671672uint32_t start = ((addr - PSP_GetVidMemBase()) & 0x001FFFFF) >> 10;673uint32_t end = start + ((bytes + 1023) >> 10);674if (end > sizeof(vramDirty_)) {675end = sizeof(vramDirty_);676}677if (value == SoftGPUVRAMDirty::CLEAR || value == (SoftGPUVRAMDirty::DIRTY | SoftGPUVRAMDirty::REALLY_DIRTY)) {678memset(vramDirty_ + start, (uint8_t)value, end - start);679} else {680for (uint32_t i = start; i < end; ++i) {681vramDirty_[i] |= (uint8_t)value;682}683}684685lastDirtyAddr_ = addr;686lastDirtySize_ = bytes;687lastDirtyValue_ = value;688}689690bool SoftGPU::ClearDirty(uint32_t addr, uint32_t stride, uint32_t height, GEBufferFormat fmt, SoftGPUVRAMDirty value) {691uint32_t bytes = height * stride * (fmt == GE_FORMAT_8888 ? 4 : 2);692return ClearDirty(addr, bytes, value);693}694695bool SoftGPU::ClearDirty(uint32_t addr, uint32_t bytes, SoftGPUVRAMDirty value) {696if (!Memory::IsVRAMAddress(addr) || !Memory::IsVRAMAddress(addr + bytes - 1))697return false;698699uint32_t start = ((addr - PSP_GetVidMemBase()) & 0x001FFFFF) >> 10;700uint32_t end = start + ((bytes + 1023) >> 10);701bool result = false;702for (uint32_t i = start; i < end; ++i) {703if (vramDirty_[i] & (uint8_t)value) {704result = true;705vramDirty_[i] &= ~(uint8_t)value;706}707}708709lastDirtyAddr_ = 0;710lastDirtySize_ = 0;711712return result;713}714715void SoftGPU::NotifyRenderResized() {716// Force the render params to 480x272 so other things work.717if (g_Config.IsPortrait()) {718PSP_CoreParameter().renderWidth = 272;719PSP_CoreParameter().renderHeight = 480;720} else {721PSP_CoreParameter().renderWidth = 480;722PSP_CoreParameter().renderHeight = 272;723}724}725726void SoftGPU::NotifyDisplayResized() {727displayResized_ = true;728}729730void SoftGPU::CheckDisplayResized() {731if (displayResized_ && presentation_) {732presentation_->UpdateDisplaySize(PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight);733presentation_->UpdateRenderSize(PSP_CoreParameter().renderWidth, PSP_CoreParameter().renderHeight);734presentation_->UpdatePostShader();735displayResized_ = false;736}737}738739void SoftGPU::CheckConfigChanged() {740if (configChanged_) {741drawEngineCommon_->NotifyConfigChanged();742BuildReportingInfo();743if (presentation_) {744presentation_->UpdatePostShader();745}746configChanged_ = false;747}748}749750void SoftGPU::FastRunLoop(DisplayList &list) {751PROFILE_THIS_SCOPE("soft_runloop");752const auto *cmdInfo = softgpuCmdInfo;753int dc = downcount;754SoftDirty dirty = dirtyFlags_;755for (; dc > 0; --dc) {756u32 op = Memory::ReadUnchecked_U32(list.pc);757const u32 cmd = op >> 24;758const auto &info = cmdInfo[cmd];759const u32 diff = op ^ gstate.cmdmem[cmd];760if (diff == 0) {761if (info.flags & FLAG_EXECUTE) {762downcount = dc;763dirtyFlags_ = dirty;764(this->*info.func)(op, diff);765dirty = dirtyFlags_;766dc = downcount;767}768} else {769uint64_t flags = info.flags;770gstate.cmdmem[cmd] = op;771dirty |= SoftDirty(flags >> 8);772if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {773downcount = dc;774dirtyFlags_ = dirty;775(this->*info.func)(op, diff);776dirty = dirtyFlags_;777dc = downcount;778}779}780list.pc += 4;781}782downcount = 0;783dirtyFlags_ = dirty;784}785786bool SoftGPU::IsStarted() {787return drawEngine_ && drawEngine_->transformUnit.IsStarted();788}789790void SoftGPU::ExecuteOp(u32 op, u32 diff) {791const u8 cmd = op >> 24;792const auto info = softgpuCmdInfo[cmd];793if (diff == 0) {794if (info.flags & FLAG_EXECUTE)795(this->*info.func)(op, diff);796} else {797dirtyFlags_ |= SoftDirty(info.flags >> 8);798if (info.flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE))799(this->*info.func)(op, diff);800}801}802803void SoftGPU::Execute_BlockTransferStart(u32 op, u32 diff) {804u32 srcBasePtr = gstate.getTransferSrcAddress();805u32 srcStride = gstate.getTransferSrcStride();806807u32 dstBasePtr = gstate.getTransferDstAddress();808u32 dstStride = gstate.getTransferDstStride();809810int srcX = gstate.getTransferSrcX();811int srcY = gstate.getTransferSrcY();812813int dstX = gstate.getTransferDstX();814int dstY = gstate.getTransferDstY();815816int width = gstate.getTransferWidth();817int height = gstate.getTransferHeight();818819int bpp = gstate.getTransferBpp();820821// Use height less one to account for width, which can be greater or less than stride.822const uint32_t src = srcBasePtr + (srcY * srcStride + srcX) * bpp;823const uint32_t srcSize = (height - 1) * (srcStride + width) * bpp;824const uint32_t dst = dstBasePtr + (dstY * dstStride + dstX) * bpp;825const uint32_t dstSize = (height - 1) * (dstStride + width) * bpp;826827// Need to flush both source and target, so we overwrite properly.828if (Memory::IsValidRange(src, srcSize) && Memory::IsValidRange(dst, dstSize)) {829drawEngine_->transformUnit.FlushIfOverlap("blockxfer", false, src, srcStride, width * bpp, height);830drawEngine_->transformUnit.FlushIfOverlap("blockxfer", true, dst, dstStride, width * bpp, height);831} else {832drawEngine_->transformUnit.Flush("blockxfer_wrap");833}834835DoBlockTransfer(gstate_c.skipDrawReason);836837// Could theoretically dirty the framebuffer.838MarkDirty(dst, dstSize, SoftGPUVRAMDirty::DIRTY | SoftGPUVRAMDirty::REALLY_DIRTY);839}840841void SoftGPU::Execute_Prim(u32 op, u32 diff) {842u32 count = op & 0xFFFF;843// Upper bits are ignored.844GEPrimitiveType prim = static_cast<GEPrimitiveType>((op >> 16) & 7);845if (count == 0)846return;847FlushImm();848849if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {850ERROR_LOG_REPORT(Log::G3D, "Software: Bad vertex address %08x!", gstate_c.vertexAddr);851return;852}853854const void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);855const void *indices = NULL;856if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {857if (!Memory::IsValidAddress(gstate_c.indexAddr)) {858ERROR_LOG_REPORT(Log::G3D, "Software: Bad index address %08x!", gstate_c.indexAddr);859return;860}861indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);862}863864cyclesExecuted += EstimatePerVertexCost() * count;865int bytesRead;866gstate_c.UpdateUVScaleOffset();867drawEngine_->transformUnit.SetDirty(dirtyFlags_);868drawEngine_->transformUnit.SubmitPrimitive(verts, indices, prim, count, gstate.vertType, &bytesRead, drawEngine_);869dirtyFlags_ = drawEngine_->transformUnit.GetDirty();870871SoftGPUVRAMDirty mark = (gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) != 0 ? SoftGPUVRAMDirty::DIRTY : SoftGPUVRAMDirty::DIRTY | SoftGPUVRAMDirty::REALLY_DIRTY;872MarkDirty(gstate.getFrameBufAddress(), gstate.FrameBufStride(), gstate.getRegionY2() + 1, gstate.FrameBufFormat(), mark);873874// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).875// Some games rely on this, they don't bother reloading VADDR and IADDR.876// The VADDR/IADDR registers are NOT updated.877AdvanceVerts(gstate.vertType, count, bytesRead);878}879880void SoftGPU::Execute_Bezier(u32 op, u32 diff) {881// This also make skipping drawing very effective.882if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {883// TODO: Should this eat some cycles? Probably yes. Not sure if important.884return;885}886887if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {888ERROR_LOG_REPORT(Log::G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);889return;890}891892const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);893const void *indices = NULL;894if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {895if (!Memory::IsValidAddress(gstate_c.indexAddr)) {896ERROR_LOG_REPORT(Log::G3D, "Bad index address %08x!", gstate_c.indexAddr);897return;898}899indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);900}901902if ((gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) || vertTypeIsSkinningEnabled(gstate.vertType)) {903DEBUG_LOG_REPORT(Log::G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));904}905906Spline::BezierSurface surface;907surface.tess_u = gstate.getPatchDivisionU();908surface.tess_v = gstate.getPatchDivisionV();909surface.num_points_u = op & 0xFF;910surface.num_points_v = (op >> 8) & 0xFF;911surface.num_patches_u = (surface.num_points_u - 1) / 3;912surface.num_patches_v = (surface.num_points_v - 1) / 3;913surface.primType = gstate.getPatchPrimitiveType();914surface.patchFacing = gstate.patchfacing & 1;915916SetDrawType(DRAW_BEZIER, PatchPrimToPrim(surface.primType));917918int bytesRead = 0;919gstate_c.UpdateUVScaleOffset();920drawEngine_->transformUnit.SetDirty(dirtyFlags_);921drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "bezier");922dirtyFlags_ = drawEngine_->transformUnit.GetDirty();923924SoftGPUVRAMDirty mark = (gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) != 0 ? SoftGPUVRAMDirty::DIRTY : SoftGPUVRAMDirty::DIRTY | SoftGPUVRAMDirty::REALLY_DIRTY;925MarkDirty(gstate.getFrameBufAddress(), gstate.FrameBufStride(), gstate.getRegionY2() + 1, gstate.FrameBufFormat(), mark);926927// After drawing, we advance pointers - see SubmitPrim which does the same.928int count = surface.num_points_u * surface.num_points_v;929AdvanceVerts(gstate.vertType, count, bytesRead);930}931932void SoftGPU::Execute_Spline(u32 op, u32 diff) {933// This also make skipping drawing very effective.934if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {935// TODO: Should this eat some cycles? Probably yes. Not sure if important.936return;937}938939if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {940ERROR_LOG_REPORT(Log::G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);941return;942}943944const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);945const void *indices = NULL;946if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {947if (!Memory::IsValidAddress(gstate_c.indexAddr)) {948ERROR_LOG_REPORT(Log::G3D, "Bad index address %08x!", gstate_c.indexAddr);949return;950}951indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);952}953954if ((gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) || vertTypeIsSkinningEnabled(gstate.vertType)) {955DEBUG_LOG_REPORT(Log::G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));956}957958Spline::SplineSurface surface;959surface.tess_u = gstate.getPatchDivisionU();960surface.tess_v = gstate.getPatchDivisionV();961surface.type_u = (op >> 16) & 0x3;962surface.type_v = (op >> 18) & 0x3;963surface.num_points_u = op & 0xFF;964surface.num_points_v = (op >> 8) & 0xFF;965surface.num_patches_u = surface.num_points_u - 3;966surface.num_patches_v = surface.num_points_v - 3;967surface.primType = gstate.getPatchPrimitiveType();968surface.patchFacing = gstate.patchfacing & 1;969970SetDrawType(DRAW_SPLINE, PatchPrimToPrim(surface.primType));971972int bytesRead = 0;973gstate_c.UpdateUVScaleOffset();974drawEngine_->transformUnit.SetDirty(dirtyFlags_);975drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "spline");976dirtyFlags_ = drawEngine_->transformUnit.GetDirty();977978SoftGPUVRAMDirty mark = (gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME) != 0 ? SoftGPUVRAMDirty::DIRTY : SoftGPUVRAMDirty::DIRTY | SoftGPUVRAMDirty::REALLY_DIRTY;979MarkDirty(gstate.getFrameBufAddress(), gstate.FrameBufStride(), gstate.getRegionY2() + 1, gstate.FrameBufFormat(), mark);980981// After drawing, we advance pointers - see SubmitPrim which does the same.982int count = surface.num_points_u * surface.num_points_v;983AdvanceVerts(gstate.vertType, count, bytesRead);984}985986void SoftGPU::Execute_LoadClut(u32 op, u32 diff) {987u32 clutAddr = gstate.getClutAddress();988// Avoid the hack in getClutLoadBytes() to inaccurately allow more palette data.989u32 clutTotalBytes = (gstate.getClutLoadBlocks() & 0x3F) * 32;990if (clutTotalBytes > 1024)991clutTotalBytes = 1024;992993// Might be copying drawing into the CLUT, so flush.994drawEngine_->transformUnit.FlushIfOverlap("loadclut", false, clutAddr, clutTotalBytes, clutTotalBytes, 1);995996bool changed = false;997if (Memory::IsValidAddress(clutAddr)) {998u32 validSize = Memory::ValidSize(clutAddr, clutTotalBytes);999changed = memcmp(clut, Memory::GetPointerUnchecked(clutAddr), validSize) != 0;1000if (changed)1001Memory::MemcpyUnchecked(clut, clutAddr, validSize);1002if (validSize < clutTotalBytes) {1003// Zero out the parts that were outside valid memory.1004memset((u8 *)clut + validSize, 0x00, clutTotalBytes - validSize);1005changed = true;1006}1007} else if (clutAddr != 0) {1008// Some invalid addresses trigger a crash, others fill with zero. We always fill zero.1009DEBUG_LOG(Log::G3D, "Software: Invalid CLUT address, filling with garbage instead of crashing");1010memset(clut, 0x00, clutTotalBytes);1011changed = true;1012}10131014if (changed)1015drawEngine_->transformUnit.NotifyClutUpdate(clut);1016dirtyFlags_ |= SoftDirty::SAMPLER_CLUT;1017}10181019void SoftGPU::Execute_FramebufPtr(u32 op, u32 diff) {1020// We assume fb.data won't change while we're drawing.1021if (diff) {1022drawEngine_->transformUnit.Flush("framebuf");1023fb.data = Memory::GetPointerWrite(gstate.getFrameBufAddress());1024}1025}10261027void SoftGPU::Execute_FramebufFormat(u32 op, u32 diff) {1028// We should flush, because ranges within bins may change.1029if (diff)1030drawEngine_->transformUnit.Flush("framebuf");1031}10321033void SoftGPU::Execute_BoundingBox(u32 op, u32 diff) {1034gstate_c.Dirty(DIRTY_CULL_PLANES);1035GPUCommon::Execute_BoundingBox(op, diff);1036}10371038void SoftGPU::Execute_ZbufPtr(u32 op, u32 diff) {1039// We assume depthbuf.data won't change while we're drawing.1040if (diff) {1041drawEngine_->transformUnit.Flush("depthbuf");1042// For the pointer, ignore memory mirrors. This also gives some buffer for draws that go outside.1043// TODO: Confirm how wrapping is handled in drawing. Adjust if we ever handle VRAM mirrors more accurately.1044depthbuf.data = Memory::GetPointerWrite(gstate.getDepthBufAddress() & 0x041FFFF0);1045}1046}10471048void SoftGPU::Execute_VertexType(u32 op, u32 diff) {1049if ((diff & GE_VTYPE_THROUGH_MASK) != 0) {1050// This affects a lot of things, but some don't matter if it's off - so defer to when it's back on.1051dirtyFlags_ |= SoftDirty::RAST_BASIC | SoftDirty::PIXEL_BASIC;1052if ((op & GE_VTYPE_THROUGH_MASK) == 0) {1053dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX | SoftDirty::TRANSFORM_VIEWPORT | SoftDirty::TRANSFORM_FOG;1054dirtyFlags_ |= SoftDirty::LIGHT_BASIC | SoftDirty::LIGHT_MATERIAL | SoftDirty::LIGHT_0 | SoftDirty::LIGHT_1 | SoftDirty::LIGHT_2 | SoftDirty::LIGHT_3;1055dirtyFlags_ |= SoftDirty::PIXEL_CACHED;1056}1057}1058}10591060void SoftGPU::Execute_WorldMtxNum(u32 op, u32 diff) {1061// Setting 0xFFFFF0 will reset to 0.1062gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (op & 0xF);1063}10641065void SoftGPU::Execute_ViewMtxNum(u32 op, u32 diff) {1066gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (op & 0xF);1067}10681069void SoftGPU::Execute_ProjMtxNum(u32 op, u32 diff) {1070gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (op & 0xF);1071}10721073void SoftGPU::Execute_TgenMtxNum(u32 op, u32 diff) {1074gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (op & 0xF);1075}10761077void SoftGPU::Execute_BoneMtxNum(u32 op, u32 diff) {1078// Setting any bits outside 0x7F are ignored and resets the internal counter.1079gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (op & 0x7F);1080}10811082void SoftGPU::Execute_WorldMtxData(u32 op, u32 diff) {1083int num = gstate.worldmtxnum & 0x00FFFFFF;1084if (num < 12) {1085u32 *target = (u32 *)&gstate.worldMatrix[num];1086u32 newVal = op << 8;1087if (newVal != *target) {1088*target = newVal;1089dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;1090gstate_c.Dirty(DIRTY_CULL_PLANES);1091}1092}10931094// Also update the CPU visible values, which update differently.1095u32 *target = &matrixVisible.all[12 * 8 + (num & 0xF)];1096*target = op & 0x00FFFFFF;10971098num++;1099gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0x00FFFFFF);1100gstate.worldmtxdata = GE_CMD_WORLDMATRIXDATA << 24;1101}11021103void SoftGPU::Execute_ViewMtxData(u32 op, u32 diff) {1104int num = gstate.viewmtxnum & 0x00FFFFFF;1105if (num < 12) {1106u32 *target = (u32 *)&gstate.viewMatrix[num];1107u32 newVal = op << 8;1108if (newVal != *target) {1109*target = newVal;1110dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;1111gstate_c.Dirty(DIRTY_CULL_PLANES);1112}1113}11141115// Also update the CPU visible values, which update differently.1116u32 *target = &matrixVisible.all[12 * 8 + 12 + (num & 0xF)];1117*target = op & 0x00FFFFFF;11181119num++;1120gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0x00FFFFFF);1121gstate.viewmtxdata = GE_CMD_VIEWMATRIXDATA << 24;1122}11231124void SoftGPU::Execute_ProjMtxData(u32 op, u32 diff) {1125int num = gstate.projmtxnum & 0x00FFFFFF;1126if (num < 16) {1127u32 *target = (u32 *)&gstate.projMatrix[num];1128u32 newVal = op << 8;1129if (newVal != *target) {1130*target = newVal;1131dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX;1132gstate_c.Dirty(DIRTY_CULL_PLANES);1133}1134}11351136// Also update the CPU visible values, which update differently.1137u32 *target = &matrixVisible.all[12 * 8 + 12 + 12 + (num & 0xF)];1138*target = op & 0x00FFFFFF;11391140num++;1141gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0x00FFFFFF);1142gstate.projmtxdata = GE_CMD_PROJMATRIXDATA << 24;1143}11441145void SoftGPU::Execute_TgenMtxData(u32 op, u32 diff) {1146int num = gstate.texmtxnum & 0x00FFFFFF;1147if (num < 12) {1148u32 *target = (u32 *)&gstate.tgenMatrix[num];1149u32 newVal = op << 8;1150if (newVal != *target) {1151*target = newVal;1152// This is mainly used in vertex read, but also affects if we enable texture projection.1153dirtyFlags_ |= SoftDirty::RAST_TEX;1154}1155}11561157// Doesn't wrap to any other matrix.1158if ((num & 0xF) < 12) {1159matrixVisible.tgen[num & 0xF] = op & 0x00FFFFFF;1160}11611162num++;1163gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0x00FFFFFF);1164gstate.texmtxdata = GE_CMD_TGENMATRIXDATA << 24;1165}11661167void SoftGPU::Execute_BoneMtxData(u32 op, u32 diff) {1168int num = gstate.boneMatrixNumber & 0x00FFFFFF;11691170if (num < 96) {1171u32 *target = (u32 *)&gstate.boneMatrix[num];1172u32 newVal = op << 8;1173// No dirtying, we read bone data during vertex read.1174*target = newVal;1175}11761177// Also update the CPU visible values, which update differently.1178u32 *target = &matrixVisible.all[(num & 0x7F)];1179*target = op & 0x00FFFFFF;11801181num++;1182gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x00FFFFFF);1183gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24;1184}11851186static void CopyMatrix24(u32_le *result, const u32 *mtx, u32 count, u32 cmdbits) {1187for (u32 i = 0; i < count; ++i) {1188result[i] = mtx[i] | cmdbits;1189}1190}11911192bool SoftGPU::GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) {1193switch (type) {1194case GE_MTX_BONE0:1195case GE_MTX_BONE1:1196case GE_MTX_BONE2:1197case GE_MTX_BONE3:1198case GE_MTX_BONE4:1199case GE_MTX_BONE5:1200case GE_MTX_BONE6:1201case GE_MTX_BONE7:1202CopyMatrix24(result, matrixVisible.bone + (type - GE_MTX_BONE0) * 12, 12, cmdbits);1203break;1204case GE_MTX_TEXGEN:1205CopyMatrix24(result, matrixVisible.tgen, 12, cmdbits);1206break;1207case GE_MTX_WORLD:1208CopyMatrix24(result, matrixVisible.world, 12, cmdbits);1209break;1210case GE_MTX_VIEW:1211CopyMatrix24(result, matrixVisible.view, 12, cmdbits);1212break;1213case GE_MTX_PROJECTION:1214CopyMatrix24(result, matrixVisible.proj, 16, cmdbits);1215break;1216default:1217return false;1218}1219return true;1220}12211222void SoftGPU::ResetMatrices() {1223GPUCommon::ResetMatrices();1224dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX | SoftDirty::RAST_TEX;1225}12261227void SoftGPU::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {1228GPUCommon::Execute_ImmVertexAlphaPrim(op, diff);1229// We won't flush as often as hardware renderers, so we want to flush right away.1230FlushImm();1231}12321233void SoftGPU::Execute_Call(u32 op, u32 diff) {1234PROFILE_THIS_SCOPE("gpu_call");12351236const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC);1237if (!Memory::IsValidAddress(target)) {1238ERROR_LOG(Log::G3D, "CALL to illegal address %08x - ignoring! data=%06x", target, op & 0x00FFFFFF);1239if (g_Config.bIgnoreBadMemAccess) {1240return;1241}1242gpuState = GPUSTATE_ERROR;1243downcount = 0;1244return;1245}12461247const u32 retval = currentList->pc + 4;1248if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {1249ERROR_LOG(Log::G3D, "CALL: Stack full!");1250} else {1251auto &stackEntry = currentList->stack[currentList->stackptr++];1252stackEntry.pc = retval;1253stackEntry.offsetAddr = gstate_c.offsetAddr;1254// The base address is NOT saved/restored for a regular call.1255UpdatePC(currentList->pc, target - 4);1256currentList->pc = target - 4; // pc will be increased after we return, counteract that1257}1258}12591260void SoftGPU::FinishDeferred() {1261// Need to flush before going back to CPU, so drawing is appropriately visible.1262drawEngine_->transformUnit.Flush("finish");1263}12641265int SoftGPU::ListSync(int listid, int mode) {1266// Take this as a cue that we need to finish drawing.1267drawEngine_->transformUnit.Flush("listsync");1268return GPUCommon::ListSync(listid, mode);1269}12701271u32 SoftGPU::DrawSync(int mode) {1272// Take this as a cue that we need to finish drawing.1273drawEngine_->transformUnit.Flush("drawsync");1274return GPUCommon::DrawSync(mode);1275}12761277void SoftGPU::GetStats(char *buffer, size_t bufsize) {1278drawEngine_->transformUnit.GetStats(buffer, bufsize);1279}12801281void SoftGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type)1282{1283// Nothing to invalidate.1284}12851286void SoftGPU::PerformWriteFormattedFromMemory(u32 addr, int size, int width, GEBufferFormat format)1287{1288// Ignore.1289}12901291bool SoftGPU::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags) {1292// Nothing to update.1293InvalidateCache(dest, size, GPU_INVALIDATE_HINT);1294if (!(flags & GPUCopyFlag::DEBUG_NOTIFIED))1295GPURecord::NotifyMemcpy(dest, src, size);1296// Let's just be safe.1297MarkDirty(dest, size, SoftGPUVRAMDirty::DIRTY | SoftGPUVRAMDirty::REALLY_DIRTY);1298return false;1299}13001301bool SoftGPU::PerformMemorySet(u32 dest, u8 v, int size)1302{1303// Nothing to update.1304InvalidateCache(dest, size, GPU_INVALIDATE_HINT);1305GPURecord::NotifyMemset(dest, v, size);1306// Let's just be safe.1307MarkDirty(dest, size, SoftGPUVRAMDirty::DIRTY | SoftGPUVRAMDirty::REALLY_DIRTY);1308return false;1309}13101311bool SoftGPU::PerformReadbackToMemory(u32 dest, int size)1312{1313// Nothing to update.1314InvalidateCache(dest, size, GPU_INVALIDATE_HINT);1315return false;1316}13171318bool SoftGPU::PerformWriteColorFromMemory(u32 dest, int size)1319{1320// Nothing to update.1321InvalidateCache(dest, size, GPU_INVALIDATE_HINT);1322GPURecord::NotifyUpload(dest, size);1323return false;1324}13251326bool SoftGPU::PerformWriteStencilFromMemory(u32 dest, int size, WriteStencil flags)1327{1328return false;1329}13301331bool SoftGPU::FramebufferDirty() {1332if (g_Config.iFrameSkip != 0) {1333return ClearDirty(displayFramebuf_, displayStride_, 272, displayFormat_, SoftGPUVRAMDirty::DIRTY);1334}1335return true;1336}13371338bool SoftGPU::FramebufferReallyDirty() {1339if (g_Config.iFrameSkip != 0) {1340return ClearDirty(displayFramebuf_, displayStride_, 272, displayFormat_, SoftGPUVRAMDirty::REALLY_DIRTY);1341}1342return true;1343}13441345static DrawingCoords GetTargetSize(int stride) {1346int w = std::min(stride, std::max(gstate.getRegionX2(), gstate.getScissorX2()) + 1);1347int h = std::max(gstate.getRegionY2(), gstate.getScissorY2()) + 1;1348if (gstate.getRegionX2() == 1023 && gstate.getRegionY2() == 1023) {1349// Some games max out region, but always scissor to an appropriate size.1350// Both values always scissor, we just prefer region as it's usually a more stable size.1351w = std::max(stride, gstate.getScissorX2() + 1);1352h = std::max(272, gstate.getScissorY2() + 1);1353}13541355return DrawingCoords((s16)w, (s16)h);1356}13571358bool SoftGPU::GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferType type, int maxRes) {1359int stride = gstate.FrameBufStride();1360DrawingCoords size = GetTargetSize(stride);1361GEBufferFormat fmt = gstate.FrameBufFormat();1362const u8 *src = fb.data;13631364if (!Memory::IsValidAddress(displayFramebuf_))1365return false;13661367if (type == GPU_DBG_FRAMEBUF_DISPLAY) {1368size.x = 480;1369size.y = 272;1370stride = displayStride_;1371fmt = displayFormat_;1372src = Memory::GetPointer(displayFramebuf_);1373}13741375buffer.Allocate(size.x, size.y, fmt);13761377const int depth = fmt == GE_FORMAT_8888 ? 4 : 2;1378u8 *dst = buffer.GetData();1379const int byteWidth = size.x * depth;1380for (int16_t y = 0; y < size.y; ++y) {1381memcpy(dst, src, byteWidth);1382dst += byteWidth;1383src += stride * depth;1384}1385return true;1386}13871388bool SoftGPU::GetOutputFramebuffer(GPUDebugBuffer &buffer) {1389return GetCurrentFramebuffer(buffer, GPU_DBG_FRAMEBUF_DISPLAY, 1);1390}13911392bool SoftGPU::GetCurrentDepthbuffer(GPUDebugBuffer &buffer) {1393DrawingCoords size = GetTargetSize(gstate.DepthBufStride());1394buffer.Allocate(size.x, size.y, GPU_DBG_FORMAT_16BIT);13951396const int depth = 2;1397const u8 *src = depthbuf.data;1398u8 *dst = buffer.GetData();1399for (int16_t y = 0; y < size.y; ++y) {1400memcpy(dst, src, size.x * depth);1401dst += size.x * depth;1402src += gstate.DepthBufStride() * depth;1403}1404return true;1405}14061407static inline u8 GetPixelStencil(GEBufferFormat fmt, int fbStride, int x, int y) {1408if (fmt == GE_FORMAT_565) {1409// Always treated as 0 for comparison purposes.1410return 0;1411} else if (fmt == GE_FORMAT_5551) {1412return ((fb.Get16(x, y, fbStride) & 0x8000) != 0) ? 0xFF : 0;1413} else if (fmt == GE_FORMAT_4444) {1414return Convert4To8(fb.Get16(x, y, fbStride) >> 12);1415} else {1416return fb.Get32(x, y, fbStride) >> 24;1417}1418}14191420bool SoftGPU::GetCurrentStencilbuffer(GPUDebugBuffer &buffer) {1421DrawingCoords size = GetTargetSize(gstate.FrameBufStride());1422buffer.Allocate(size.x, size.y, GPU_DBG_FORMAT_8BIT);14231424u8 *row = buffer.GetData();1425for (int16_t y = 0; y < size.y; ++y) {1426for (int16_t x = 0; x < size.x; ++x) {1427row[x] = GetPixelStencil(gstate.FrameBufFormat(), gstate.FrameBufStride(), x, y);1428}1429row += size.x;1430}1431return true;1432}14331434bool SoftGPU::GetCurrentTexture(GPUDebugBuffer &buffer, int level, bool *isFramebuffer) {1435*isFramebuffer = false;1436return Rasterizer::GetCurrentTexture(buffer, level);1437}14381439bool SoftGPU::GetCurrentClut(GPUDebugBuffer &buffer)1440{1441const u32 bpp = gstate.getClutPaletteFormat() == GE_CMODE_32BIT_ABGR8888 ? 4 : 2;1442const u32 pixels = 1024 / bpp;14431444buffer.Allocate(pixels, 1, (GEBufferFormat)gstate.getClutPaletteFormat());1445memcpy(buffer.GetData(), clut, 1024);1446return true;1447}14481449bool SoftGPU::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) {1450gstate_c.UpdateUVScaleOffset();1451return drawEngine_->transformUnit.GetCurrentSimpleVertices(count, vertices, indices);1452}14531454bool SoftGPU::DescribeCodePtr(const u8 *ptr, std::string &name) {1455std::string subname;1456if (Sampler::DescribeCodePtr(ptr, subname)) {1457name = "SamplerJit:" + subname;1458return true;1459}1460if (Rasterizer::DescribeCodePtr(ptr, subname)) {1461name = "RasterizerJit:" + subname;1462return true;1463}1464return GPUCommon::DescribeCodePtr(ptr, name);1465}146614671468