CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/GPUCommon.cpp
Views: 1401
#include "ppsspp_config.h"12#if defined(_M_SSE)3#include <emmintrin.h>4#endif5#if PPSSPP_ARCH(ARM_NEON)6#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)7#include <arm64_neon.h>8#else9#include <arm_neon.h>10#endif11#endif1213#include <algorithm>1415#include "Common/Profiler/Profiler.h"1617#include "Common/GraphicsContext.h"18#include "Common/LogReporting.h"19#include "Common/Serialize/Serializer.h"20#include "Common/Serialize/SerializeFuncs.h"21#include "Common/Serialize/SerializeList.h"22#include "Common/TimeUtil.h"23#include "GPU/GeDisasm.h"24#include "GPU/GPU.h"25#include "GPU/GPUCommon.h"26#include "GPU/GPUState.h"27#include "Core/Config.h"28#include "Core/CoreTiming.h"29#include "Core/Debugger/MemBlockInfo.h"30#include "Core/MemMap.h"31#include "Core/Reporting.h"32#include "Core/HLE/HLE.h"33#include "Core/HLE/sceKernelMemory.h"34#include "Core/HLE/sceKernelInterrupt.h"35#include "Core/HLE/sceKernelThread.h"36#include "Core/HLE/sceGe.h"37#include "Core/HW/Display.h"38#include "Core/Util/PPGeDraw.h"39#include "Core/MemMapHelpers.h"40#include "GPU/Common/DrawEngineCommon.h"41#include "GPU/Common/FramebufferManagerCommon.h"42#include "GPU/Common/TextureCacheCommon.h"43#include "GPU/Debugger/Debugger.h"44#include "GPU/Debugger/Record.h"4546void GPUCommon::Flush() {47drawEngineCommon_->DispatchFlush();48}4950void GPUCommon::DispatchFlush() {51drawEngineCommon_->DispatchFlush();52}5354GPUCommon::GPUCommon(GraphicsContext *gfxCtx, Draw::DrawContext *draw) :55gfxCtx_(gfxCtx),56draw_(draw)57{58// This assert failed on GCC x86 32-bit (but not MSVC 32-bit!) before adding the59// "padding" field at the end. This is important for save state compatibility.60// The compiler was not rounding the struct size up to an 8 byte boundary, which61// you'd expect due to the int64 field, but the Linux ABI apparently does not require that.62static_assert(sizeof(DisplayList) == 456, "Bad DisplayList size");6364Reinitialize();65gstate.Reset();66gstate_c.Reset();67gpuStats.Reset();6869PPGeSetDrawContext(draw);70ResetMatrices();71}7273void GPUCommon::BeginHostFrame() {74ReapplyGfxState();7576// TODO: Assume config may have changed - maybe move to resize.77gstate_c.Dirty(DIRTY_ALL);7879UpdateCmdInfo();8081UpdateMSAALevel(draw_);82CheckConfigChanged();83CheckDisplayResized();84CheckRenderResized();85}8687void GPUCommon::EndHostFrame() {88// Probably not necessary.89if (draw_) {90draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);91}92}9394void GPUCommon::Reinitialize() {95memset(dls, 0, sizeof(dls));96for (int i = 0; i < DisplayListMaxCount; ++i) {97dls[i].state = PSP_GE_DL_STATE_NONE;98dls[i].waitTicks = 0;99}100101nextListID = 0;102currentList = nullptr;103isbreak = false;104drawCompleteTicks = 0;105busyTicks = 0;106timeSpentStepping_ = 0.0;107interruptsEnabled_ = true;108109if (textureCache_)110textureCache_->Clear(true);111if (framebufferManager_)112framebufferManager_->DestroyAllFBOs();113}114115int GPUCommon::EstimatePerVertexCost() {116// TODO: This is transform cost, also account for rasterization cost somehow... although it probably117// runs in parallel with transform.118119// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.120121// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things122// went too fast and starts doing all the work over again).123124int cost = 20;125if (gstate.isLightingEnabled()) {126cost += 10;127128for (int i = 0; i < 4; i++) {129if (gstate.isLightChanEnabled(i))130cost += 7;131}132}133134if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {135cost += 20;136}137int morphCount = gstate.getNumMorphWeights();138if (morphCount > 1) {139cost += 5 * morphCount;140}141return cost;142}143144void GPUCommon::PopDLQueue() {145if(!dlQueue.empty()) {146dlQueue.pop_front();147if(!dlQueue.empty()) {148bool running = currentList->state == PSP_GE_DL_STATE_RUNNING;149currentList = &dls[dlQueue.front()];150if (running)151currentList->state = PSP_GE_DL_STATE_RUNNING;152} else {153currentList = nullptr;154}155}156}157158bool GPUCommon::BusyDrawing() {159u32 state = DrawSync(1);160if (state == PSP_GE_LIST_DRAWING || state == PSP_GE_LIST_STALLING) {161if (currentList && currentList->state != PSP_GE_DL_STATE_PAUSED) {162return true;163}164}165return false;166}167168void GPUCommon::NotifyConfigChanged() {169configChanged_ = true;170}171172void GPUCommon::NotifyRenderResized() {173renderResized_ = true;174}175176void GPUCommon::NotifyDisplayResized() {177displayResized_ = true;178}179180void GPUCommon::DumpNextFrame() {181dumpNextFrame_ = true;182}183184u32 GPUCommon::DrawSync(int mode) {185gpuStats.numDrawSyncs++;186187if (mode < 0 || mode > 1)188return SCE_KERNEL_ERROR_INVALID_MODE;189190if (mode == 0) {191if (!__KernelIsDispatchEnabled()) {192return SCE_KERNEL_ERROR_CAN_NOT_WAIT;193}194if (__IsInInterrupt()) {195return SCE_KERNEL_ERROR_ILLEGAL_CONTEXT;196}197198if (drawCompleteTicks > CoreTiming::GetTicks()) {199__GeWaitCurrentThread(GPU_SYNC_DRAW, 1, "GeDrawSync");200} else {201for (int i = 0; i < DisplayListMaxCount; ++i) {202if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) {203dls[i].state = PSP_GE_DL_STATE_NONE;204}205}206}207return 0;208}209210// If there's no current list, it must be complete.211DisplayList *top = NULL;212for (int i : dlQueue) {213if (dls[i].state != PSP_GE_DL_STATE_COMPLETED) {214top = &dls[i];215break;216}217}218if (!top || top->state == PSP_GE_DL_STATE_COMPLETED)219return PSP_GE_LIST_COMPLETED;220221if (currentList->pc == currentList->stall)222return PSP_GE_LIST_STALLING;223224return PSP_GE_LIST_DRAWING;225}226227void GPUCommon::CheckDrawSync() {228if (dlQueue.empty()) {229for (int i = 0; i < DisplayListMaxCount; ++i)230dls[i].state = PSP_GE_DL_STATE_NONE;231}232}233234int GPUCommon::ListSync(int listid, int mode) {235gpuStats.numListSyncs++;236237if (listid < 0 || listid >= DisplayListMaxCount)238return SCE_KERNEL_ERROR_INVALID_ID;239240if (mode < 0 || mode > 1)241return SCE_KERNEL_ERROR_INVALID_MODE;242243DisplayList& dl = dls[listid];244if (mode == 1) {245switch (dl.state) {246case PSP_GE_DL_STATE_QUEUED:247if (dl.interrupted)248return PSP_GE_LIST_PAUSED;249return PSP_GE_LIST_QUEUED;250251case PSP_GE_DL_STATE_RUNNING:252if (dl.pc == dl.stall)253return PSP_GE_LIST_STALLING;254return PSP_GE_LIST_DRAWING;255256case PSP_GE_DL_STATE_COMPLETED:257return PSP_GE_LIST_COMPLETED;258259case PSP_GE_DL_STATE_PAUSED:260return PSP_GE_LIST_PAUSED;261262default:263return SCE_KERNEL_ERROR_INVALID_ID;264}265}266267if (!__KernelIsDispatchEnabled()) {268return SCE_KERNEL_ERROR_CAN_NOT_WAIT;269}270if (__IsInInterrupt()) {271return SCE_KERNEL_ERROR_ILLEGAL_CONTEXT;272}273274if (dl.waitTicks > CoreTiming::GetTicks()) {275__GeWaitCurrentThread(GPU_SYNC_LIST, listid, "GeListSync");276}277return PSP_GE_LIST_COMPLETED;278}279280int GPUCommon::GetStack(int index, u32 stackPtr) {281if (!currentList) {282// Seems like it doesn't return an error code?283return 0;284}285286if (currentList->stackptr <= index) {287return SCE_KERNEL_ERROR_INVALID_INDEX;288}289290if (index >= 0) {291auto stack = PSPPointer<u32_le>::Create(stackPtr);292if (stack.IsValid()) {293auto entry = currentList->stack[index];294// Not really sure what most of these values are.295stack[0] = 0;296stack[1] = entry.pc + 4;297stack[2] = entry.offsetAddr;298stack[7] = entry.baseAddr;299}300}301302return currentList->stackptr;303}304305static void CopyMatrix24(u32_le *result, const float *mtx, u32 count, u32 cmdbits) {306// Screams out for simple SIMD, but probably not called often enough to be worth it.307for (u32 i = 0; i < count; ++i) {308result[i] = toFloat24(mtx[i]) | cmdbits;309}310}311312bool GPUCommon::GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) {313switch (type) {314case GE_MTX_BONE0:315case GE_MTX_BONE1:316case GE_MTX_BONE2:317case GE_MTX_BONE3:318case GE_MTX_BONE4:319case GE_MTX_BONE5:320case GE_MTX_BONE6:321case GE_MTX_BONE7:322CopyMatrix24(result, gstate.boneMatrix + (type - GE_MTX_BONE0) * 12, 12, cmdbits);323break;324case GE_MTX_TEXGEN:325CopyMatrix24(result, gstate.tgenMatrix, 12, cmdbits);326break;327case GE_MTX_WORLD:328CopyMatrix24(result, gstate.worldMatrix, 12, cmdbits);329break;330case GE_MTX_VIEW:331CopyMatrix24(result, gstate.viewMatrix, 12, cmdbits);332break;333case GE_MTX_PROJECTION:334CopyMatrix24(result, gstate.projMatrix, 16, cmdbits);335break;336default:337return false;338}339return true;340}341342void GPUCommon::ResetMatrices() {343// This means we restored a context, so update the visible matrix data.344for (size_t i = 0; i < ARRAY_SIZE(gstate.boneMatrix); ++i)345matrixVisible.bone[i] = toFloat24(gstate.boneMatrix[i]);346for (size_t i = 0; i < ARRAY_SIZE(gstate.worldMatrix); ++i)347matrixVisible.world[i] = toFloat24(gstate.worldMatrix[i]);348for (size_t i = 0; i < ARRAY_SIZE(gstate.viewMatrix); ++i)349matrixVisible.view[i] = toFloat24(gstate.viewMatrix[i]);350for (size_t i = 0; i < ARRAY_SIZE(gstate.projMatrix); ++i)351matrixVisible.proj[i] = toFloat24(gstate.projMatrix[i]);352for (size_t i = 0; i < ARRAY_SIZE(gstate.tgenMatrix); ++i)353matrixVisible.tgen[i] = toFloat24(gstate.tgenMatrix[i]);354355// Assume all the matrices changed, so dirty things related to them.356gstate_c.Dirty(DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_PROJMATRIX | DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE | DIRTY_BONE_UNIFORMS);357}358359u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head) {360// TODO Check the stack values in missing arg and ajust the stack depth361362// Check alignment363// TODO Check the context and stack alignement too364if (((listpc | stall) & 3) != 0 || !Memory::IsValidAddress(listpc)) {365ERROR_LOG_REPORT(Log::G3D, "sceGeListEnqueue: invalid address %08x", listpc);366return SCE_KERNEL_ERROR_INVALID_POINTER;367}368369// If args->size is below 16, it's the old struct without stack info.370if (args.IsValid() && args->size >= 16 && args->numStacks >= 256) {371return hleLogError(Log::G3D, SCE_KERNEL_ERROR_INVALID_SIZE, "invalid stack depth %d", args->numStacks);372}373374int id = -1;375u64 currentTicks = CoreTiming::GetTicks();376u32 stackAddr = args.IsValid() && args->size >= 16 ? (u32)args->stackAddr : 0;377// Check compatibility378if (sceKernelGetCompiledSdkVersion() > 0x01FFFFFF) {379//numStacks = 0;380//stack = NULL;381for (int i = 0; i < DisplayListMaxCount; ++i) {382if (dls[i].state != PSP_GE_DL_STATE_NONE && dls[i].state != PSP_GE_DL_STATE_COMPLETED) {383// Logically, if the CPU has not interrupted yet, it hasn't seen the latest pc either.384// Exit enqueues right after an END, which fails without ignoring pendingInterrupt lists.385if (dls[i].pc == listpc && !dls[i].pendingInterrupt) {386ERROR_LOG(Log::G3D, "sceGeListEnqueue: can't enqueue, list address %08X already used", listpc);387return 0x80000021;388} else if (stackAddr != 0 && dls[i].stackAddr == stackAddr && !dls[i].pendingInterrupt) {389ERROR_LOG(Log::G3D, "sceGeListEnqueue: can't enqueue, stack address %08X already used", stackAddr);390return 0x80000021;391}392}393}394}395// TODO Check if list stack dls[i].stack already used then return 0x80000021 as above396397for (int i = 0; i < DisplayListMaxCount; ++i) {398int possibleID = (i + nextListID) % DisplayListMaxCount;399auto possibleList = dls[possibleID];400if (possibleList.pendingInterrupt) {401continue;402}403404if (possibleList.state == PSP_GE_DL_STATE_NONE) {405id = possibleID;406break;407}408if (possibleList.state == PSP_GE_DL_STATE_COMPLETED && possibleList.waitTicks < currentTicks) {409id = possibleID;410}411}412if (id < 0) {413ERROR_LOG_REPORT(Log::G3D, "No DL ID available to enqueue");414for (int i : dlQueue) {415DisplayList &dl = dls[i];416DEBUG_LOG(Log::G3D, "DisplayList %d status %d pc %08x stall %08x", i, dl.state, dl.pc, dl.stall);417}418return SCE_KERNEL_ERROR_OUT_OF_MEMORY;419}420nextListID = id + 1;421422DisplayList &dl = dls[id];423dl.id = id;424dl.startpc = listpc & 0x0FFFFFFF;425dl.pc = listpc & 0x0FFFFFFF;426dl.stall = stall & 0x0FFFFFFF;427dl.subIntrBase = std::max(subIntrBase, -1);428dl.stackptr = 0;429dl.signal = PSP_GE_SIGNAL_NONE;430dl.interrupted = false;431dl.waitTicks = (u64)-1;432dl.interruptsEnabled = interruptsEnabled_;433dl.started = false;434dl.offsetAddr = 0;435dl.bboxResult = false;436dl.stackAddr = stackAddr;437438if (args.IsValid() && args->context.IsValid())439dl.context = args->context;440else441dl.context = 0;442443if (head) {444if (currentList) {445if (currentList->state != PSP_GE_DL_STATE_PAUSED)446return SCE_KERNEL_ERROR_INVALID_VALUE;447currentList->state = PSP_GE_DL_STATE_QUEUED;448// Make sure we clear the signal so we don't try to pause it again.449currentList->signal = PSP_GE_SIGNAL_NONE;450}451452dl.state = PSP_GE_DL_STATE_PAUSED;453454currentList = &dl;455dlQueue.push_front(id);456} else if (currentList) {457dl.state = PSP_GE_DL_STATE_QUEUED;458dlQueue.push_back(id);459} else {460dl.state = PSP_GE_DL_STATE_RUNNING;461currentList = &dl;462dlQueue.push_front(id);463464drawCompleteTicks = (u64)-1;465466// TODO save context when starting the list if param is set467ProcessDLQueue();468}469470return id;471}472473u32 GPUCommon::DequeueList(int listid) {474if (listid < 0 || listid >= DisplayListMaxCount || dls[listid].state == PSP_GE_DL_STATE_NONE)475return SCE_KERNEL_ERROR_INVALID_ID;476477auto &dl = dls[listid];478if (dl.started)479return SCE_KERNEL_ERROR_BUSY;480481dl.state = PSP_GE_DL_STATE_NONE;482483if (listid == dlQueue.front())484PopDLQueue();485else486dlQueue.remove(listid);487488dl.waitTicks = 0;489__GeTriggerWait(GPU_SYNC_LIST, listid);490491CheckDrawSync();492493return 0;494}495496u32 GPUCommon::UpdateStall(int listid, u32 newstall) {497if (listid < 0 || listid >= DisplayListMaxCount || dls[listid].state == PSP_GE_DL_STATE_NONE)498return SCE_KERNEL_ERROR_INVALID_ID;499auto &dl = dls[listid];500if (dl.state == PSP_GE_DL_STATE_COMPLETED)501return SCE_KERNEL_ERROR_ALREADY;502503dl.stall = newstall & 0x0FFFFFFF;504505ProcessDLQueue();506507return 0;508}509510u32 GPUCommon::Continue() {511if (!currentList)512return 0;513514if (currentList->state == PSP_GE_DL_STATE_PAUSED)515{516if (!isbreak) {517// TODO: Supposedly this returns SCE_KERNEL_ERROR_BUSY in some case, previously it had518// currentList->signal == PSP_GE_SIGNAL_HANDLER_PAUSE, but it doesn't reproduce.519520currentList->state = PSP_GE_DL_STATE_RUNNING;521currentList->signal = PSP_GE_SIGNAL_NONE;522523// TODO Restore context of DL is necessary524// TODO Restore BASE525526// We have a list now, so it's not complete.527drawCompleteTicks = (u64)-1;528} else {529currentList->state = PSP_GE_DL_STATE_QUEUED;530currentList->signal = PSP_GE_SIGNAL_NONE;531}532}533else if (currentList->state == PSP_GE_DL_STATE_RUNNING)534{535if (sceKernelGetCompiledSdkVersion() >= 0x02000000)536return 0x80000020;537return -1;538}539else540{541if (sceKernelGetCompiledSdkVersion() >= 0x02000000)542return 0x80000004;543return -1;544}545546ProcessDLQueue();547return 0;548}549550u32 GPUCommon::Break(int mode) {551if (mode < 0 || mode > 1)552return SCE_KERNEL_ERROR_INVALID_MODE;553554if (!currentList)555return SCE_KERNEL_ERROR_ALREADY;556557if (mode == 1)558{559// Clear the queue560dlQueue.clear();561for (int i = 0; i < DisplayListMaxCount; ++i)562{563dls[i].state = PSP_GE_DL_STATE_NONE;564dls[i].signal = PSP_GE_SIGNAL_NONE;565}566567nextListID = 0;568currentList = NULL;569return 0;570}571572if (currentList->state == PSP_GE_DL_STATE_NONE || currentList->state == PSP_GE_DL_STATE_COMPLETED)573{574if (sceKernelGetCompiledSdkVersion() >= 0x02000000)575return 0x80000004;576return -1;577}578579if (currentList->state == PSP_GE_DL_STATE_PAUSED)580{581if (sceKernelGetCompiledSdkVersion() > 0x02000010)582{583if (currentList->signal == PSP_GE_SIGNAL_HANDLER_PAUSE)584{585ERROR_LOG_REPORT(Log::G3D, "sceGeBreak: can't break signal-pausing list");586}587else588return SCE_KERNEL_ERROR_ALREADY;589}590return SCE_KERNEL_ERROR_BUSY;591}592593if (currentList->state == PSP_GE_DL_STATE_QUEUED)594{595currentList->state = PSP_GE_DL_STATE_PAUSED;596return currentList->id;597}598599// TODO Save BASE600// TODO Adjust pc to be just before SIGNAL/END601602// TODO: Is this right?603if (currentList->signal == PSP_GE_SIGNAL_SYNC)604currentList->pc += 8;605606currentList->interrupted = true;607currentList->state = PSP_GE_DL_STATE_PAUSED;608currentList->signal = PSP_GE_SIGNAL_HANDLER_SUSPEND;609isbreak = true;610611return currentList->id;612}613614void GPUCommon::NotifySteppingEnter() {615if (coreCollectDebugStats) {616timeSteppingStarted_ = time_now_d();617}618}619void GPUCommon::NotifySteppingExit() {620if (coreCollectDebugStats) {621if (timeSteppingStarted_ <= 0.0) {622ERROR_LOG(Log::G3D, "Mismatched stepping enter/exit.");623}624double total = time_now_d() - timeSteppingStarted_;625_dbg_assert_msg_(total >= 0.0, "Time spent stepping became negative");626timeSpentStepping_ += total;627timeSteppingStarted_ = 0.0;628}629}630631bool GPUCommon::InterpretList(DisplayList &list) {632// Initialized to avoid a race condition with bShowDebugStats changing.633double start = 0.0;634if (coreCollectDebugStats) {635start = time_now_d();636}637638if (list.state == PSP_GE_DL_STATE_PAUSED)639return false;640currentList = &list;641642if (!list.started && list.context.IsValid()) {643gstate.Save(list.context);644}645list.started = true;646647gstate_c.offsetAddr = list.offsetAddr;648649if (!Memory::IsValidAddress(list.pc)) {650ERROR_LOG_REPORT(Log::G3D, "DL PC = %08x WTF!!!!", list.pc);651return true;652}653654cycleLastPC = list.pc;655cyclesExecuted += 60;656downcount = list.stall == 0 ? 0x0FFFFFFF : (list.stall - list.pc) / 4;657list.state = PSP_GE_DL_STATE_RUNNING;658list.interrupted = false;659660gpuState = list.pc == list.stall ? GPUSTATE_STALL : GPUSTATE_RUNNING;661662// To enable breakpoints, we don't do fast matrix loads while debugger active.663debugRecording_ = GPUDebug::IsActive() || GPURecord::IsActive();664const bool useFastRunLoop = !dumpThisFrame_ && !debugRecording_;665while (gpuState == GPUSTATE_RUNNING) {666{667if (list.pc == list.stall) {668gpuState = GPUSTATE_STALL;669downcount = 0;670}671}672673if (useFastRunLoop) {674FastRunLoop(list);675} else {676SlowRunLoop(list);677}678679{680downcount = list.stall == 0 ? 0x0FFFFFFF : (list.stall - list.pc) / 4;681682if (gpuState == GPUSTATE_STALL && list.stall != list.pc) {683// Unstalled.684gpuState = GPUSTATE_RUNNING;685}686}687}688689FinishDeferred();690if (debugRecording_)691GPURecord::NotifyCPU();692693// We haven't run the op at list.pc, so it shouldn't count.694if (cycleLastPC != list.pc) {695UpdatePC(list.pc - 4, list.pc);696}697698list.offsetAddr = gstate_c.offsetAddr;699700if (coreCollectDebugStats) {701double total = time_now_d() - start - timeSpentStepping_;702_dbg_assert_msg_(total >= 0.0, "Time spent DL processing became negative");703hleSetSteppingTime(timeSpentStepping_);704DisplayNotifySleep(timeSpentStepping_);705timeSpentStepping_ = 0.0;706gpuStats.msProcessingDisplayLists += total;707}708return gpuState == GPUSTATE_DONE || gpuState == GPUSTATE_ERROR;709}710711void GPUCommon::PSPFrame() {712immCount_ = 0;713if (dumpNextFrame_) {714NOTICE_LOG(Log::G3D, "DUMPING THIS FRAME");715dumpThisFrame_ = true;716dumpNextFrame_ = false;717} else if (dumpThisFrame_) {718dumpThisFrame_ = false;719}720GPUDebug::NotifyBeginFrame();721GPURecord::NotifyBeginFrame();722}723724bool GPUCommon::PresentedThisFrame() const {725return framebufferManager_ ? framebufferManager_->PresentedThisFrame() : true;726}727728void GPUCommon::SlowRunLoop(DisplayList &list) {729const bool dumpThisFrame = dumpThisFrame_;730while (downcount > 0) {731bool process = GPUDebug::NotifyCommand(list.pc);732if (process) {733GPURecord::NotifyCommand(list.pc);734u32 op = Memory::ReadUnchecked_U32(list.pc);735u32 cmd = op >> 24;736737u32 diff = op ^ gstate.cmdmem[cmd];738PreExecuteOp(op, diff);739if (dumpThisFrame) {740char temp[256];741u32 prev;742if (Memory::IsValidAddress(list.pc - 4)) {743prev = Memory::ReadUnchecked_U32(list.pc - 4);744} else {745prev = 0;746}747GeDisassembleOp(list.pc, op, prev, temp, 256);748NOTICE_LOG(Log::G3D, "%08x: %s", op, temp);749}750gstate.cmdmem[cmd] = op;751752ExecuteOp(op, diff);753}754755list.pc += 4;756--downcount;757}758}759760// The newPC parameter is used for jumps, we don't count cycles between.761void GPUCommon::UpdatePC(u32 currentPC, u32 newPC) {762// Rough estimate, 2 CPU ticks (it's double the clock rate) per GPU instruction.763u32 executed = (currentPC - cycleLastPC) / 4;764cyclesExecuted += 2 * executed;765cycleLastPC = newPC;766767// Exit the runloop and recalculate things. This happens a lot in some games.768if (currentList)769downcount = currentList->stall == 0 ? 0x0FFFFFFF : (currentList->stall - newPC) / 4;770else771downcount = 0;772}773774void GPUCommon::ReapplyGfxState() {775// The commands are embedded in the command memory so we can just reexecute the words. Convenient.776// To be safe we pass 0xFFFFFFFF as the diff.777778// TODO: Consider whether any of this should really be done. We might be able to get all the way779// by simplying dirtying the appropriate gstate_c dirty flags.780781for (int i = GE_CMD_VERTEXTYPE; i < GE_CMD_BONEMATRIXNUMBER; i++) {782if (i != GE_CMD_ORIGIN && i != GE_CMD_OFFSETADDR) {783ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);784}785}786787// Can't write to bonematrixnumber here788789for (int i = GE_CMD_MORPHWEIGHT0; i <= GE_CMD_PATCHFACING; i++) {790ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);791}792793// There are a few here in the middle that we shouldn't execute...794795// 0x42 to 0xEA796for (int i = GE_CMD_VIEWPORTXSCALE; i < GE_CMD_TRANSFERSTART; i++) {797switch (i) {798case GE_CMD_LOADCLUT:799case GE_CMD_TEXSYNC:800case GE_CMD_TEXFLUSH:801break;802default:803ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);804break;805}806}807808// Let's just skip the transfer size stuff, it's just values.809}810811uint32_t GPUCommon::SetAddrTranslation(uint32_t value) {812std::swap(edramTranslation_, value);813return value;814}815816uint32_t GPUCommon::GetAddrTranslation() {817return edramTranslation_;818}819820inline void GPUCommon::UpdateState(GPURunState state) {821gpuState = state;822if (state != GPUSTATE_RUNNING)823downcount = 0;824}825826int GPUCommon::GetNextListIndex() {827auto iter = dlQueue.begin();828if (iter != dlQueue.end()) {829return *iter;830} else {831return -1;832}833}834835void GPUCommon::ProcessDLQueue() {836startingTicks = CoreTiming::GetTicks();837cyclesExecuted = 0;838839// Seems to be correct behaviour to process the list anyway?840if (startingTicks < busyTicks) {841DEBUG_LOG(Log::G3D, "Can't execute a list yet, still busy for %lld ticks", busyTicks - startingTicks);842//return;843}844845for (int listIndex = GetNextListIndex(); listIndex != -1; listIndex = GetNextListIndex()) {846DisplayList &l = dls[listIndex];847DEBUG_LOG(Log::G3D, "Starting DL execution at %08x - stall = %08x", l.pc, l.stall);848if (!InterpretList(l)) {849return;850} else {851// Some other list could've taken the spot while we dilly-dallied around.852if (l.state != PSP_GE_DL_STATE_QUEUED) {853// At the end, we can remove it from the queue and continue.854dlQueue.erase(std::remove(dlQueue.begin(), dlQueue.end(), listIndex), dlQueue.end());855}856}857}858859currentList = nullptr;860861if (coreCollectDebugStats) {862gpuStats.otherGPUCycles += cyclesExecuted;863}864865drawCompleteTicks = startingTicks + cyclesExecuted;866busyTicks = std::max(busyTicks, drawCompleteTicks);867__GeTriggerSync(GPU_SYNC_DRAW, 1, drawCompleteTicks);868// Since the event is in CoreTiming, we're in sync. Just set 0 now.869}870871void GPUCommon::Execute_OffsetAddr(u32 op, u32 diff) {872gstate_c.offsetAddr = op << 8;873}874875void GPUCommon::Execute_Vaddr(u32 op, u32 diff) {876gstate_c.vertexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);877}878879void GPUCommon::Execute_Iaddr(u32 op, u32 diff) {880gstate_c.indexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);881}882883void GPUCommon::Execute_Origin(u32 op, u32 diff) {884if (currentList)885gstate_c.offsetAddr = currentList->pc;886}887888void GPUCommon::Execute_Jump(u32 op, u32 diff) {889const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC);890if (!Memory::IsValidAddress(target)) {891ERROR_LOG(Log::G3D, "JUMP to illegal address %08x - ignoring! data=%06x", target, op & 0x00FFFFFF);892UpdateState(GPUSTATE_ERROR);893return;894}895UpdatePC(currentList->pc, target - 4);896currentList->pc = target - 4; // pc will be increased after we return, counteract that897}898899void GPUCommon::Execute_BJump(u32 op, u32 diff) {900if (!currentList->bboxResult) {901// bounding box jump.902const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC);903gpuStats.numBBOXJumps++;904if (Memory::IsValidAddress(target)) {905UpdatePC(currentList->pc, target - 4);906currentList->pc = target - 4; // pc will be increased after we return, counteract that907} else {908ERROR_LOG(Log::G3D, "BJUMP to illegal address %08x - ignoring! data=%06x", target, op & 0x00FFFFFF);909UpdateState(GPUSTATE_ERROR);910}911}912}913914void GPUCommon::Execute_Call(u32 op, u32 diff) {915PROFILE_THIS_SCOPE("gpu_call");916917const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC);918if (!Memory::IsValidAddress(target)) {919ERROR_LOG(Log::G3D, "CALL to illegal address %08x - ignoring! data=%06x", target, op & 0x00FFFFFF);920if (g_Config.bIgnoreBadMemAccess) {921return;922}923UpdateState(GPUSTATE_ERROR);924return;925}926DoExecuteCall(target);927}928929void GPUCommon::DoExecuteCall(u32 target) {930// Bone matrix optimization - many games will CALL a bone matrix (!).931// We don't optimize during recording - so the matrix data gets recorded.932if (!debugRecording_ && Memory::IsValidRange(target, 13 * 4) && (Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA) {933// Check for the end934if ((Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&935(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET &&936(gstate.boneMatrixNumber & 0x00FFFFFF) <= 96 - 12) {937// Yep, pretty sure this is a bone matrix call. Double check stall first.938if (target > currentList->stall || target + 12 * 4 < currentList->stall) {939FastLoadBoneMatrix(target);940return;941}942}943}944945if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {946ERROR_LOG(Log::G3D, "CALL: Stack full!");947// TODO: UpdateState(GPUSTATE_ERROR) ?948} else {949auto &stackEntry = currentList->stack[currentList->stackptr++];950stackEntry.pc = currentList->pc + 4;951stackEntry.offsetAddr = gstate_c.offsetAddr;952// The base address is NOT saved/restored for a regular call.953UpdatePC(currentList->pc, target - 4);954currentList->pc = target - 4; // pc will be increased after we return, counteract that955}956}957958void GPUCommon::Execute_Ret(u32 op, u32 diff) {959if (currentList->stackptr == 0) {960DEBUG_LOG(Log::G3D, "RET: Stack empty!");961} else {962auto &stackEntry = currentList->stack[--currentList->stackptr];963gstate_c.offsetAddr = stackEntry.offsetAddr;964// We always clear the top (uncached/etc.) bits965const u32 target = stackEntry.pc & 0x0FFFFFFF;966UpdatePC(currentList->pc, target - 4);967currentList->pc = target - 4;968#ifdef _DEBUG969if (!Memory::IsValidAddress(currentList->pc)) {970ERROR_LOG_REPORT(Log::G3D, "Invalid DL PC %08x on return", currentList->pc);971UpdateState(GPUSTATE_ERROR);972}973#endif974}975}976977void GPUCommon::Execute_End(u32 op, u32 diff) {978if (flushOnParams_)979Flush();980981const u32 prev = Memory::ReadUnchecked_U32(currentList->pc - 4);982UpdatePC(currentList->pc, currentList->pc);983// Count in a few extra cycles on END.984cyclesExecuted += 60;985986switch (prev >> 24) {987case GE_CMD_SIGNAL:988{989// TODO: see http://code.google.com/p/jpcsp/source/detail?r=2935#990SignalBehavior behaviour = static_cast<SignalBehavior>((prev >> 16) & 0xFF);991const int signal = prev & 0xFFFF;992const int enddata = op & 0xFFFF;993bool trigger = true;994currentList->subIntrToken = signal;995996switch (behaviour) {997case PSP_GE_SIGNAL_HANDLER_SUSPEND:998// Suspend the list, and call the signal handler. When it's done, resume.999// Before sdkver 0x02000010, listsync should return paused.1000if (sceKernelGetCompiledSdkVersion() <= 0x02000010)1001currentList->state = PSP_GE_DL_STATE_PAUSED;1002currentList->signal = behaviour;1003DEBUG_LOG(Log::G3D, "Signal with wait. signal/end: %04x %04x", signal, enddata);1004break;1005case PSP_GE_SIGNAL_HANDLER_CONTINUE:1006// Resume the list right away, then call the handler.1007currentList->signal = behaviour;1008DEBUG_LOG(Log::G3D, "Signal without wait. signal/end: %04x %04x", signal, enddata);1009break;1010case PSP_GE_SIGNAL_HANDLER_PAUSE:1011// Pause the list instead of ending at the next FINISH.1012// Call the handler with the PAUSE signal value at that FINISH.1013// Technically, this ought to trigger an interrupt, but it won't do anything.1014// But right now, signal is always reset by interrupts, so that causes pause to not work.1015trigger = false;1016currentList->signal = behaviour;1017DEBUG_LOG(Log::G3D, "Signal with Pause. signal/end: %04x %04x", signal, enddata);1018break;1019case PSP_GE_SIGNAL_SYNC:1020// Acts as a memory barrier, never calls any user code.1021// Technically, this ought to trigger an interrupt, but it won't do anything.1022// Triggering here can cause incorrect rescheduling, which breaks 3rd Birthday.1023// However, this is likely a bug in how GE signal interrupts are handled.1024trigger = false;1025currentList->signal = behaviour;1026DEBUG_LOG(Log::G3D, "Signal with Sync. signal/end: %04x %04x", signal, enddata);1027break;1028case PSP_GE_SIGNAL_JUMP:1029case PSP_GE_SIGNAL_RJUMP:1030case PSP_GE_SIGNAL_OJUMP:1031{1032trigger = false;1033currentList->signal = behaviour;1034// pc will be increased after we return, counteract that.1035u32 target = (((signal << 16) | enddata) & 0xFFFFFFFC) - 4;1036const char *targetType = "absolute";1037if (behaviour == PSP_GE_SIGNAL_RJUMP) {1038target += currentList->pc - 4;1039targetType = "relative";1040} else if (behaviour == PSP_GE_SIGNAL_OJUMP) {1041target = gstate_c.getRelativeAddress(target);1042targetType = "origin";1043}10441045if (!Memory::IsValidAddress(target)) {1046ERROR_LOG_REPORT(Log::G3D, "Signal with Jump (%s): bad address. signal/end: %04x %04x", targetType, signal, enddata);1047UpdateState(GPUSTATE_ERROR);1048} else {1049UpdatePC(currentList->pc, target);1050currentList->pc = target;1051DEBUG_LOG(Log::G3D, "Signal with Jump (%s). signal/end: %04x %04x", targetType, signal, enddata);1052}1053}1054break;1055case PSP_GE_SIGNAL_CALL:1056case PSP_GE_SIGNAL_RCALL:1057case PSP_GE_SIGNAL_OCALL:1058{1059trigger = false;1060currentList->signal = behaviour;1061// pc will be increased after we return, counteract that.1062u32 target = (((signal << 16) | enddata) & 0xFFFFFFFC) - 4;1063const char *targetType = "absolute";1064if (behaviour == PSP_GE_SIGNAL_RCALL) {1065target += currentList->pc - 4;1066targetType = "relative";1067} else if (behaviour == PSP_GE_SIGNAL_OCALL) {1068target = gstate_c.getRelativeAddress(target);1069targetType = "origin";1070}10711072if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {1073ERROR_LOG_REPORT(Log::G3D, "Signal with Call (%s): stack full. signal/end: %04x %04x", targetType, signal, enddata);1074} else if (!Memory::IsValidAddress(target)) {1075ERROR_LOG_REPORT(Log::G3D, "Signal with Call (%s): bad address. signal/end: %04x %04x", targetType, signal, enddata);1076UpdateState(GPUSTATE_ERROR);1077} else {1078// TODO: This might save/restore other state...1079auto &stackEntry = currentList->stack[currentList->stackptr++];1080stackEntry.pc = currentList->pc;1081stackEntry.offsetAddr = gstate_c.offsetAddr;1082stackEntry.baseAddr = gstate.base;1083UpdatePC(currentList->pc, target);1084currentList->pc = target;1085DEBUG_LOG(Log::G3D, "Signal with Call (%s). signal/end: %04x %04x", targetType, signal, enddata);1086}1087}1088break;1089case PSP_GE_SIGNAL_RET:1090{1091trigger = false;1092currentList->signal = behaviour;1093if (currentList->stackptr == 0) {1094ERROR_LOG_REPORT(Log::G3D, "Signal with Return: stack empty. signal/end: %04x %04x", signal, enddata);1095} else {1096// TODO: This might save/restore other state...1097auto &stackEntry = currentList->stack[--currentList->stackptr];1098gstate_c.offsetAddr = stackEntry.offsetAddr;1099gstate.base = stackEntry.baseAddr;1100UpdatePC(currentList->pc, stackEntry.pc);1101currentList->pc = stackEntry.pc;1102DEBUG_LOG(Log::G3D, "Signal with Return. signal/end: %04x %04x", signal, enddata);1103}1104}1105break;1106default:1107ERROR_LOG_REPORT(Log::G3D, "UNKNOWN Signal UNIMPLEMENTED %i ! signal/end: %04x %04x", behaviour, signal, enddata);1108break;1109}1110// TODO: Technically, jump/call/ret should generate an interrupt, but before the pc change maybe?1111if (currentList->interruptsEnabled && trigger) {1112if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {1113currentList->pendingInterrupt = true;1114UpdateState(GPUSTATE_INTERRUPT);1115}1116}1117}1118break;1119case GE_CMD_FINISH:1120switch (currentList->signal) {1121case PSP_GE_SIGNAL_HANDLER_PAUSE:1122currentList->state = PSP_GE_DL_STATE_PAUSED;1123if (currentList->interruptsEnabled) {1124if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {1125currentList->pendingInterrupt = true;1126UpdateState(GPUSTATE_INTERRUPT);1127}1128}1129break;11301131case PSP_GE_SIGNAL_SYNC:1132currentList->signal = PSP_GE_SIGNAL_NONE;1133// TODO: Technically this should still cause an interrupt. Probably for memory sync.1134break;11351136default:1137FlushImm();1138currentList->subIntrToken = prev & 0xFFFF;1139UpdateState(GPUSTATE_DONE);1140// Since we marked done, we have to restore the context now before the next list runs.1141if (currentList->started && currentList->context.IsValid()) {1142gstate.Restore(currentList->context);1143ReapplyGfxState();1144// Don't restore the context again.1145currentList->started = false;1146}11471148if (currentList->interruptsEnabled && __GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {1149currentList->pendingInterrupt = true;1150} else {1151currentList->state = PSP_GE_DL_STATE_COMPLETED;1152currentList->waitTicks = startingTicks + cyclesExecuted;1153busyTicks = std::max(busyTicks, currentList->waitTicks);1154__GeTriggerSync(GPU_SYNC_LIST, currentList->id, currentList->waitTicks);1155}1156break;1157}1158break;1159default:1160DEBUG_LOG(Log::G3D,"Ah, not finished: %06x", prev & 0xFFFFFF);1161break;1162}1163}11641165void GPUCommon::Execute_BoundingBox(u32 op, u32 diff) {1166// Just resetting, nothing to check bounds for.1167const u32 count = op & 0xFFFF;1168if (count == 0) {1169currentList->bboxResult = false;1170return;1171}11721173// Approximate based on timings of several counts on a PSP.1174cyclesExecuted += count * 22;11751176const bool useInds = (gstate.vertType & GE_VTYPE_IDX_MASK) != 0;1177VertexDecoder *dec = drawEngineCommon_->GetVertexDecoder(gstate.vertType);1178int bytesRead = (useInds ? 1 : dec->VertexSize()) * count;11791180if (Memory::IsValidRange(gstate_c.vertexAddr, bytesRead)) {1181const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);1182if (!control_points) {1183ERROR_LOG_REPORT_ONCE(boundingbox, Log::G3D, "Invalid verts in bounding box check");1184currentList->bboxResult = true;1185return;1186}11871188const void *inds = nullptr;1189if (useInds) {1190int indexShift = ((gstate.vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT) - 1;1191inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);1192if (!inds || !Memory::IsValidRange(gstate_c.indexAddr, count << indexShift)) {1193ERROR_LOG_REPORT_ONCE(boundingboxInds, Log::G3D, "Invalid inds in bounding box check");1194currentList->bboxResult = true;1195return;1196}1197}11981199// Test if the bounding box is within the drawing region.1200// The PSP only seems to vary the result based on a single range of 0x100.1201if (count > 0x200) {1202// The second to last set of 0x100 is checked (even for odd counts.)1203size_t skipSize = (count - 0x200) * dec->VertexSize();1204currentList->bboxResult = drawEngineCommon_->TestBoundingBox((const uint8_t *)control_points + skipSize, inds, 0x100, gstate.vertType);1205} else if (count > 0x100) {1206int checkSize = count - 0x100;1207currentList->bboxResult = drawEngineCommon_->TestBoundingBox(control_points, inds, checkSize, gstate.vertType);1208} else {1209currentList->bboxResult = drawEngineCommon_->TestBoundingBox(control_points, inds, count, gstate.vertType);1210}1211AdvanceVerts(gstate.vertType, count, bytesRead);1212} else {1213ERROR_LOG_REPORT_ONCE(boundingbox, Log::G3D, "Bad bounding box data: %06x", count);1214// Data seems invalid. Let's assume the box test passed.1215currentList->bboxResult = true;1216}1217}12181219void GPUCommon::Execute_MorphWeight(u32 op, u32 diff) {1220gstate_c.morphWeights[(op >> 24) - GE_CMD_MORPHWEIGHT0] = getFloat24(op);1221}12221223void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {1224// Safety check.1225if (immCount_ >= MAX_IMMBUFFER_SIZE) {1226// Only print once for each overrun.1227if (immCount_ == MAX_IMMBUFFER_SIZE) {1228ERROR_LOG_REPORT_ONCE(exceed_imm_buffer, Log::G3D, "Exceeded immediate draw buffer size. gstate.imm_ap=%06x , prim=%d", gstate.imm_ap & 0xFFFFFF, (int)immPrim_);1229}1230if (immCount_ < 0x7fffffff) // Paranoia :)1231immCount_++;1232return;1233}12341235int prim = (op >> 8) & 0x7;1236if (prim != GE_PRIM_KEEP_PREVIOUS) {1237// Flush before changing the prim type. Only continue can be used to continue a prim.1238FlushImm();1239}12401241TransformedVertex &v = immBuffer_[immCount_++];12421243// ThrillVille does a clear with this, additional parameters found via tests.1244// The current vtype affects how the coordinate is processed.1245if (gstate.isModeThrough()) {1246v.x = ((int)(gstate.imm_vscx & 0xFFFF) - 0x8000) / 16.0f;1247v.y = ((int)(gstate.imm_vscy & 0xFFFF) - 0x8000) / 16.0f;1248} else {1249int offsetX = gstate.getOffsetX16();1250int offsetY = gstate.getOffsetY16();1251v.x = ((int)(gstate.imm_vscx & 0xFFFF) - offsetX) / 16.0f;1252v.y = ((int)(gstate.imm_vscy & 0xFFFF) - offsetY) / 16.0f;1253}1254v.z = gstate.imm_vscz & 0xFFFF;1255v.pos_w = 1.0f;1256v.u = getFloat24(gstate.imm_vtcs);1257v.v = getFloat24(gstate.imm_vtct);1258v.uv_w = getFloat24(gstate.imm_vtcq);1259v.color0_32 = (gstate.imm_cv & 0xFFFFFF) | (gstate.imm_ap << 24);1260// TODO: When !gstate.isModeThrough(), direct fog coefficient (0 = entirely fog), ignore fog flag (also GE_IMM_FOG.)1261v.fog = (gstate.imm_fc & 0xFF) / 255.0f;1262// TODO: Apply if gstate.isUsingSecondaryColor() && !gstate.isModeThrough(), ignore lighting flag.1263v.color1_32 = gstate.imm_scv & 0xFFFFFF;1264if (prim != GE_PRIM_KEEP_PREVIOUS) {1265immPrim_ = (GEPrimitiveType)prim;1266// Flags seem to only be respected from the first prim.1267immFlags_ = op & 0x00FFF800;1268immFirstSent_ = false;1269} else if (prim == GE_PRIM_KEEP_PREVIOUS && immPrim_ != GE_PRIM_INVALID) {1270static constexpr int flushPrimCount[] = { 1, 2, 0, 3, 0, 0, 2, 0 };1271// Instead of finding a proper point to flush, we just emit prims when we can.1272if (immCount_ == flushPrimCount[immPrim_ & 7])1273FlushImm();1274} else {1275ERROR_LOG_REPORT_ONCE(imm_draw_prim, Log::G3D, "Immediate draw: Unexpected primitive %d at count %d", prim, immCount_);1276}1277}12781279void GPUCommon::FlushImm() {1280if (immCount_ == 0 || immPrim_ == GE_PRIM_INVALID)1281return;12821283SetDrawType(DRAW_PRIM, immPrim_);1284VirtualFramebuffer *vfb = nullptr;1285if (framebufferManager_)1286vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);1287if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {1288// No idea how many cycles to skip, heh.1289immCount_ = 0;1290return;1291}1292gstate_c.UpdateUVScaleOffset();1293if (vfb) {1294CheckDepthUsage(vfb);1295}12961297bool antialias = (immFlags_ & GE_IMM_ANTIALIAS) != 0;1298bool prevAntialias = gstate.isAntiAliasEnabled();1299bool shading = (immFlags_ & GE_IMM_SHADING) != 0;1300bool prevShading = gstate.getShadeMode() == GE_SHADE_GOURAUD;1301bool cullEnable = (immFlags_ & GE_IMM_CULLENABLE) != 0;1302bool prevCullEnable = gstate.isCullEnabled();1303int cullMode = (immFlags_ & GE_IMM_CULLFACE) != 0 ? 1 : 0;1304bool texturing = (immFlags_ & GE_IMM_TEXTURE) != 0;1305bool prevTexturing = gstate.isTextureMapEnabled();1306bool fog = (immFlags_ & GE_IMM_FOG) != 0;1307bool prevFog = gstate.isFogEnabled();1308bool dither = (immFlags_ & GE_IMM_DITHER) != 0;1309bool prevDither = gstate.isDitherEnabled();13101311if ((immFlags_ & GE_IMM_CLIPMASK) != 0) {1312WARN_LOG_REPORT_ONCE(geimmclipvalue, Log::G3D, "Imm vertex used clip value, flags=%06x", immFlags_);1313}13141315bool changed = texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither;1316changed = changed || prevShading != shading || prevFog != fog;1317if (changed) {1318DispatchFlush();1319gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)antialias;1320gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)shading;1321gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)cullEnable;1322gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)texturing;1323gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)fog;1324gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)dither;1325gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);1326}13271328drawEngineCommon_->DispatchSubmitImm(immPrim_, immBuffer_, immCount_, cullMode, immFirstSent_);1329immCount_ = 0;1330immFirstSent_ = true;13311332if (changed) {1333DispatchFlush();1334gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)prevAntialias;1335gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)prevShading;1336gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)prevCullEnable;1337gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing;1338gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)prevFog;1339gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)prevDither;1340gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);1341}1342}13431344void GPUCommon::Execute_Unknown(u32 op, u32 diff) {1345if ((op & 0xFFFFFF) != 0)1346WARN_LOG_REPORT_ONCE(unknowncmd, Log::G3D, "Unknown GE command : %08x ", op);1347}13481349void GPUCommon::FastLoadBoneMatrix(u32 target) {1350const u32 num = gstate.boneMatrixNumber & 0x7F;1351_dbg_assert_msg_(num + 12 <= 96, "FastLoadBoneMatrix would corrupt memory");1352const u32 mtxNum = num / 12;1353u32 uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum;1354if (num != 12 * mtxNum) {1355uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7);1356}13571358if (!g_Config.bSoftwareSkinning) {1359if (flushOnParams_)1360Flush();1361gstate_c.Dirty(uniformsToDirty);1362} else {1363gstate_c.deferredVertTypeDirty |= uniformsToDirty;1364}1365gstate.FastLoadBoneMatrix(target);13661367cyclesExecuted += 2 * 14; // one to reset the counter, 12 to load the matrix, and a return.13681369if (coreCollectDebugStats) {1370gpuStats.otherGPUCycles += 2 * 14;1371}1372}13731374struct DisplayList_v1 {1375int id;1376u32 startpc;1377u32 pc;1378u32 stall;1379DisplayListState state;1380SignalBehavior signal;1381int subIntrBase;1382u16 subIntrToken;1383DisplayListStackEntry stack[32];1384int stackptr;1385bool interrupted;1386u64 waitTicks;1387bool interruptsEnabled;1388bool pendingInterrupt;1389bool started;1390size_t contextPtr;1391u32 offsetAddr;1392bool bboxResult;1393};13941395struct DisplayList_v2 {1396int id;1397u32 startpc;1398u32 pc;1399u32 stall;1400DisplayListState state;1401SignalBehavior signal;1402int subIntrBase;1403u16 subIntrToken;1404DisplayListStackEntry stack[32];1405int stackptr;1406bool interrupted;1407u64 waitTicks;1408bool interruptsEnabled;1409bool pendingInterrupt;1410bool started;1411PSPPointer<u32_le> context;1412u32 offsetAddr;1413bool bboxResult;1414};14151416void GPUCommon::DoState(PointerWrap &p) {1417auto s = p.Section("GPUCommon", 1, 6);1418if (!s)1419return;14201421Do<int>(p, dlQueue);1422if (s >= 4) {1423DoArray(p, dls, ARRAY_SIZE(dls));1424} else if (s >= 3) {1425// This may have been saved with or without padding, depending on platform.1426// We need to upconvert it to our consistently-padded struct.1427static const size_t DisplayList_v3_size = 452;1428static const size_t DisplayList_v4_size = 456;1429static_assert(DisplayList_v4_size == sizeof(DisplayList), "Make sure to change here when updating DisplayList");14301431p.DoVoid(&dls[0], DisplayList_v3_size);1432dls[0].padding = 0;14331434const u8 *savedPtr = *p.GetPPtr();1435const u32 *savedPtr32 = (const u32 *)savedPtr;1436// Here's the trick: the first member (id) is always the same as the index.1437// The second member (startpc) is always an address, or 0, never 1. So we can see the padding.1438const bool hasPadding = savedPtr32[1] == 1;1439if (hasPadding) {1440u32 padding;1441Do(p, padding);1442}14431444for (size_t i = 1; i < ARRAY_SIZE(dls); ++i) {1445p.DoVoid(&dls[i], DisplayList_v3_size);1446dls[i].padding = 0;1447if (hasPadding) {1448u32 padding;1449Do(p, padding);1450}1451}1452} else if (s >= 2) {1453for (size_t i = 0; i < ARRAY_SIZE(dls); ++i) {1454DisplayList_v2 oldDL;1455Do(p, oldDL);1456// Copy over everything except the last, new member (stackAddr.)1457memcpy(&dls[i], &oldDL, sizeof(DisplayList_v2));1458dls[i].stackAddr = 0;1459}1460} else {1461// Can only be in read mode here.1462for (size_t i = 0; i < ARRAY_SIZE(dls); ++i) {1463DisplayList_v1 oldDL;1464Do(p, oldDL);1465// On 32-bit, they're the same, on 64-bit oldDL is bigger.1466memcpy(&dls[i], &oldDL, sizeof(DisplayList_v1));1467// Fix the other fields. Let's hope context wasn't important, it was a pointer.1468dls[i].context = 0;1469dls[i].offsetAddr = oldDL.offsetAddr;1470dls[i].bboxResult = oldDL.bboxResult;1471dls[i].stackAddr = 0;1472}1473}1474int currentID = 0;1475if (currentList != nullptr) {1476currentID = (int)(currentList - &dls[0]);1477}1478Do(p, currentID);1479if (currentID == 0) {1480currentList = nullptr;1481} else {1482currentList = &dls[currentID];1483}1484Do(p, interruptRunning);1485Do(p, gpuState);1486Do(p, isbreak);1487Do(p, drawCompleteTicks);1488Do(p, busyTicks);14891490if (s >= 5) {1491Do(p, matrixVisible.all);1492}1493if (s >= 6) {1494Do(p, edramTranslation_);1495}1496}14971498void GPUCommon::InterruptStart(int listid) {1499interruptRunning = true;1500}1501void GPUCommon::InterruptEnd(int listid) {1502interruptRunning = false;1503isbreak = false;15041505DisplayList &dl = dls[listid];1506dl.pendingInterrupt = false;1507// TODO: Unless the signal handler could change it?1508if (dl.state == PSP_GE_DL_STATE_COMPLETED || dl.state == PSP_GE_DL_STATE_NONE) {1509if (dl.started && dl.context.IsValid()) {1510gstate.Restore(dl.context);1511ReapplyGfxState();1512}1513dl.waitTicks = 0;1514__GeTriggerWait(GPU_SYNC_LIST, listid);15151516// Make sure the list isn't still queued since it's now completed.1517if (!dlQueue.empty()) {1518if (listid == dlQueue.front())1519PopDLQueue();1520else1521dlQueue.remove(listid);1522}1523}15241525ProcessDLQueue();1526}15271528// TODO: Maybe cleaner to keep this in GE and trigger the clear directly?1529void GPUCommon::SyncEnd(GPUSyncType waitType, int listid, bool wokeThreads) {1530if (waitType == GPU_SYNC_DRAW && wokeThreads)1531{1532for (int i = 0; i < DisplayListMaxCount; ++i) {1533if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) {1534dls[i].state = PSP_GE_DL_STATE_NONE;1535}1536}1537}1538}15391540bool GPUCommon::GetCurrentDisplayList(DisplayList &list) {1541if (!currentList) {1542return false;1543}1544list = *currentList;1545return true;1546}15471548std::vector<DisplayList> GPUCommon::ActiveDisplayLists() {1549std::vector<DisplayList> result;15501551for (int it : dlQueue) {1552result.push_back(dls[it]);1553}15541555return result;1556}15571558void GPUCommon::ResetListPC(int listID, u32 pc) {1559if (listID < 0 || listID >= DisplayListMaxCount) {1560_dbg_assert_msg_(false, "listID out of range: %d", listID);1561return;1562}15631564Reporting::NotifyDebugger();1565dls[listID].pc = pc;1566downcount = 0;1567}15681569void GPUCommon::ResetListStall(int listID, u32 stall) {1570if (listID < 0 || listID >= DisplayListMaxCount) {1571_dbg_assert_msg_(false, "listID out of range: %d", listID);1572return;1573}15741575Reporting::NotifyDebugger();1576dls[listID].stall = stall;1577downcount = 0;1578}15791580void GPUCommon::ResetListState(int listID, DisplayListState state) {1581if (listID < 0 || listID >= DisplayListMaxCount) {1582_dbg_assert_msg_(false, "listID out of range: %d", listID);1583return;1584}15851586Reporting::NotifyDebugger();1587dls[listID].state = state;1588downcount = 0;1589}15901591GPUDebugOp GPUCommon::DissassembleOp(u32 pc, u32 op) {1592char buffer[1024];1593u32 prev = Memory::IsValidAddress(pc - 4) ? Memory::ReadUnchecked_U32(pc - 4) : 0;1594GeDisassembleOp(pc, op, prev, buffer, sizeof(buffer));15951596GPUDebugOp info;1597info.pc = pc;1598info.cmd = op >> 24;1599info.op = op;1600info.desc = buffer;1601return info;1602}16031604std::vector<GPUDebugOp> GPUCommon::DissassembleOpRange(u32 startpc, u32 endpc) {1605char buffer[1024];1606std::vector<GPUDebugOp> result;1607GPUDebugOp info;16081609// Don't trigger a pause.1610u32 prev = Memory::IsValidAddress(startpc - 4) ? Memory::Read_U32(startpc - 4) : 0;1611result.reserve((endpc - startpc) / 4);1612for (u32 pc = startpc; pc < endpc; pc += 4) {1613u32 op = Memory::IsValidAddress(pc) ? Memory::Read_U32(pc) : 0;1614GeDisassembleOp(pc, op, prev, buffer, sizeof(buffer));1615prev = op;16161617info.pc = pc;1618info.cmd = op >> 24;1619info.op = op;1620info.desc = buffer;1621result.push_back(info);1622}1623return result;1624}16251626u32 GPUCommon::GetRelativeAddress(u32 data) {1627return gstate_c.getRelativeAddress(data);1628}16291630u32 GPUCommon::GetVertexAddress() {1631return gstate_c.vertexAddr;1632}16331634u32 GPUCommon::GetIndexAddress() {1635return gstate_c.indexAddr;1636}16371638GPUgstate GPUCommon::GetGState() {1639return gstate;1640}16411642void GPUCommon::SetCmdValue(u32 op) {1643u32 cmd = op >> 24;1644u32 diff = op ^ gstate.cmdmem[cmd];16451646Reporting::NotifyDebugger();1647PreExecuteOp(op, diff);1648gstate.cmdmem[cmd] = op;1649ExecuteOp(op, diff);1650downcount = 0;1651}16521653void GPUCommon::DoBlockTransfer(u32 skipDrawReason) {1654u32 srcBasePtr = gstate.getTransferSrcAddress();1655u32 srcStride = gstate.getTransferSrcStride();16561657u32 dstBasePtr = gstate.getTransferDstAddress();1658u32 dstStride = gstate.getTransferDstStride();16591660int srcX = gstate.getTransferSrcX();1661int srcY = gstate.getTransferSrcY();16621663int dstX = gstate.getTransferDstX();1664int dstY = gstate.getTransferDstY();16651666int width = gstate.getTransferWidth();1667int height = gstate.getTransferHeight();16681669int bpp = gstate.getTransferBpp();16701671DEBUG_LOG(Log::G3D, "Block transfer: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);1672gpuStats.numBlockTransfers++;16731674// For VRAM, we wrap around when outside valid memory (mirrors still work.)1675if ((srcBasePtr & 0x04800000) == 0x04800000)1676srcBasePtr &= ~0x00800000;1677if ((dstBasePtr & 0x04800000) == 0x04800000)1678dstBasePtr &= ~0x00800000;16791680// Use height less one to account for width, which can be greater or less than stride, and then add it on for the last line.1681// NOTE: The sizes are only used for validity checks and memory info tracking.1682const uint32_t src = srcBasePtr + (srcY * srcStride + srcX) * bpp;1683const uint32_t dst = dstBasePtr + (dstY * dstStride + dstX) * bpp;1684const uint32_t srcSize = ((height - 1) * srcStride) + width * bpp;1685const uint32_t dstSize = ((height - 1) * dstStride) + width * bpp;16861687bool srcDstOverlap = src + srcSize > dst && dst + dstSize > src;1688bool srcValid = Memory::IsValidRange(src, srcSize);1689bool dstValid = Memory::IsValidRange(dst, dstSize);1690bool srcWraps = Memory::IsVRAMAddress(srcBasePtr) && !srcValid;1691bool dstWraps = Memory::IsVRAMAddress(dstBasePtr) && !dstValid;16921693char tag[128];1694size_t tagSize;16951696// Tell the framebuffer manager to take action if possible. If it does the entire thing, let's just return.1697if (!framebufferManager_ || !framebufferManager_->NotifyBlockTransferBefore(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp, skipDrawReason)) {1698// Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?)1699// Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them1700// entirely by walking a couple of pointers...17011702// Simple case: just a straight copy, no overlap or wrapping.1703if (srcStride == dstStride && (u32)width == srcStride && !srcDstOverlap && srcValid && dstValid) {1704u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp;1705u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp;1706u32 bytesToCopy = width * height * bpp;17071708const u8 *srcp = Memory::GetPointer(srcLineStartAddr);1709u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);1710memcpy(dstp, srcp, bytesToCopy);17111712if (MemBlockInfoDetailed(bytesToCopy)) {1713NotifyMemInfoCopy(dst, src, bytesToCopy, "GPUBlockTransfer/");1714}1715} else if ((srcDstOverlap || srcWraps || dstWraps) && (srcValid || srcWraps) && (dstValid || dstWraps)) {1716// This path means we have either src/dst overlap, OR one or both of src and dst wrap.1717// This should be uncommon so it's the slowest path.1718u32 bytesToCopy = width * bpp;1719bool notifyDetail = MemBlockInfoDetailed(srcWraps || dstWraps ? 64 : bytesToCopy);1720bool notifyAll = !notifyDetail && MemBlockInfoDetailed(srcSize, dstSize);1721if (notifyDetail || notifyAll) {1722tagSize = FormatMemWriteTagAt(tag, sizeof(tag), "GPUBlockTransfer/", src, srcSize);1723}17241725auto notifyingMemmove = [&](u32 d, u32 s, u32 sz) {1726const u8 *srcp = Memory::GetPointer(s);1727u8 *dstp = Memory::GetPointerWrite(d);1728memmove(dstp, srcp, sz);17291730if (notifyDetail) {1731NotifyMemInfo(MemBlockFlags::READ, s, sz, tag, tagSize);1732NotifyMemInfo(MemBlockFlags::WRITE, d, sz, tag, tagSize);1733}1734};17351736for (int y = 0; y < height; y++) {1737u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;1738u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;1739// If we already passed a wrap, we can use the quicker path.1740if ((srcLineStartAddr & 0x04800000) == 0x04800000)1741srcLineStartAddr &= ~0x00800000;1742if ((dstLineStartAddr & 0x04800000) == 0x04800000)1743dstLineStartAddr &= ~0x00800000;1744// These flags mean there's a wrap inside this line.1745bool srcLineWrap = !Memory::IsValidRange(srcLineStartAddr, bytesToCopy);1746bool dstLineWrap = !Memory::IsValidRange(dstLineStartAddr, bytesToCopy);17471748if (!srcLineWrap && !dstLineWrap) {1749const u8 *srcp = Memory::GetPointer(srcLineStartAddr);1750u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);1751for (u32 i = 0; i < bytesToCopy; i += 64) {1752u32 chunk = i + 64 > bytesToCopy ? bytesToCopy - i : 64;1753memmove(dstp + i, srcp + i, chunk);1754}17551756// If we're tracking detail, it's useful to have the gaps illustrated properly.1757if (notifyDetail) {1758NotifyMemInfo(MemBlockFlags::READ, srcLineStartAddr, bytesToCopy, tag, tagSize);1759NotifyMemInfo(MemBlockFlags::WRITE, dstLineStartAddr, bytesToCopy, tag, tagSize);1760}1761} else {1762// We can wrap at any point, so along with overlap this gets a bit complicated.1763// We're just going to do this the slow and easy way.1764u32 srcLinePos = srcLineStartAddr;1765u32 dstLinePos = dstLineStartAddr;1766for (u32 i = 0; i < bytesToCopy; i += 64) {1767u32 chunk = i + 64 > bytesToCopy ? bytesToCopy - i : 64;1768u32 srcValid = Memory::ValidSize(srcLinePos, chunk);1769u32 dstValid = Memory::ValidSize(dstLinePos, chunk);17701771// First chunk, for which both are valid.1772u32 bothSize = std::min(srcValid, dstValid);1773if (bothSize != 0)1774notifyingMemmove(dstLinePos, srcLinePos, bothSize);17751776// Now, whichever side has more valid (or the rest, if only one side must wrap.)1777u32 exclusiveSize = std::max(srcValid, dstValid) - bothSize;1778if (exclusiveSize != 0 && srcValid >= dstValid) {1779notifyingMemmove(PSP_GetVidMemBase(), srcLineStartAddr + bothSize, exclusiveSize);1780} else if (exclusiveSize != 0 && srcValid < dstValid) {1781notifyingMemmove(dstLineStartAddr + bothSize, PSP_GetVidMemBase(), exclusiveSize);1782}17831784// Finally, if both src and dst wrapped, that portion.1785u32 wrappedSize = chunk - bothSize - exclusiveSize;1786if (wrappedSize != 0 && srcValid >= dstValid) {1787notifyingMemmove(PSP_GetVidMemBase() + exclusiveSize, PSP_GetVidMemBase(), wrappedSize);1788} else if (wrappedSize != 0 && srcValid < dstValid) {1789notifyingMemmove(PSP_GetVidMemBase(), PSP_GetVidMemBase() + exclusiveSize, wrappedSize);1790}17911792srcLinePos += chunk;1793dstLinePos += chunk;1794if ((srcLinePos & 0x04800000) == 0x04800000)1795srcLinePos &= ~0x00800000;1796if ((dstLinePos & 0x04800000) == 0x04800000)1797dstLinePos &= ~0x00800000;1798}1799}1800}18011802if (notifyAll) {1803if (srcWraps) {1804u32 validSize = Memory::ValidSize(src, srcSize);1805NotifyMemInfo(MemBlockFlags::READ, src, validSize, tag, tagSize);1806NotifyMemInfo(MemBlockFlags::READ, PSP_GetVidMemBase(), srcSize - validSize, tag, tagSize);1807} else {1808NotifyMemInfo(MemBlockFlags::READ, src, srcSize, tag, tagSize);1809}1810if (dstWraps) {1811u32 validSize = Memory::ValidSize(dst, dstSize);1812NotifyMemInfo(MemBlockFlags::WRITE, dst, validSize, tag, tagSize);1813NotifyMemInfo(MemBlockFlags::WRITE, PSP_GetVidMemBase(), dstSize - validSize, tag, tagSize);1814} else {1815NotifyMemInfo(MemBlockFlags::WRITE, dst, dstSize, tag, tagSize);1816}1817}1818} else if (srcValid && dstValid) {1819u32 bytesToCopy = width * bpp;1820bool notifyDetail = MemBlockInfoDetailed(bytesToCopy);1821bool notifyAll = !notifyDetail && MemBlockInfoDetailed(srcSize, dstSize);1822if (notifyDetail || notifyAll) {1823tagSize = FormatMemWriteTagAt(tag, sizeof(tag), "GPUBlockTransfer/", src, srcSize);1824}18251826for (int y = 0; y < height; y++) {1827u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;1828u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;18291830const u8 *srcp = Memory::GetPointer(srcLineStartAddr);1831u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);1832memcpy(dstp, srcp, bytesToCopy);18331834// If we're tracking detail, it's useful to have the gaps illustrated properly.1835if (notifyDetail) {1836NotifyMemInfo(MemBlockFlags::READ, srcLineStartAddr, bytesToCopy, tag, tagSize);1837NotifyMemInfo(MemBlockFlags::WRITE, dstLineStartAddr, bytesToCopy, tag, tagSize);1838}1839}18401841if (notifyAll) {1842NotifyMemInfo(MemBlockFlags::READ, src, srcSize, tag, tagSize);1843NotifyMemInfo(MemBlockFlags::WRITE, dst, dstSize, tag, tagSize);1844}1845} else {1846// This seems to cause the GE to require a break/reset on a PSP.1847// TODO: Handle that and figure out which bytes are still copied?1848ERROR_LOG_REPORT_ONCE(invalidtransfer, Log::G3D, "Block transfer invalid: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);1849}18501851if (framebufferManager_) {1852// Fixes Gran Turismo's funky text issue, since it overwrites the current texture.1853textureCache_->Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT);1854framebufferManager_->NotifyBlockTransferAfter(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp, skipDrawReason);1855}1856}18571858// TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate.1859cyclesExecuted += ((height * width * bpp) * 16) / 10;1860}18611862bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags) {1863// Track stray copies of a framebuffer in RAM. MotoGP does this.1864if (framebufferManager_->MayIntersectFramebufferColor(src) || framebufferManager_->MayIntersectFramebufferColor(dest)) {1865if (!framebufferManager_->NotifyFramebufferCopy(src, dest, size, flags, gstate_c.skipDrawReason)) {1866// We use matching values in PerformReadbackToMemory/PerformWriteColorFromMemory.1867// Since they're identical we don't need to copy.1868if (dest != src) {1869if (Memory::IsValidRange(dest, size) && Memory::IsValidRange(src, size)) {1870memcpy(Memory::GetPointerWriteUnchecked(dest), Memory::GetPointerUnchecked(src), size);1871}1872if (MemBlockInfoDetailed(size)) {1873NotifyMemInfoCopy(dest, src, size, "GPUMemcpy/");1874}1875}1876}1877InvalidateCache(dest, size, GPU_INVALIDATE_HINT);1878return true;1879}18801881if (MemBlockInfoDetailed(size)) {1882NotifyMemInfoCopy(dest, src, size, "GPUMemcpy/");1883}1884InvalidateCache(dest, size, GPU_INVALIDATE_HINT);1885if (!(flags & GPUCopyFlag::DEBUG_NOTIFIED))1886GPURecord::NotifyMemcpy(dest, src, size);1887return false;1888}18891890bool GPUCommon::PerformMemorySet(u32 dest, u8 v, int size) {1891// This may indicate a memset, usually to 0, of a framebuffer.1892if (framebufferManager_->MayIntersectFramebufferColor(dest)) {1893Memory::Memset(dest, v, size, "GPUMemset");1894if (!framebufferManager_->NotifyFramebufferCopy(dest, dest, size, GPUCopyFlag::MEMSET, gstate_c.skipDrawReason)) {1895InvalidateCache(dest, size, GPU_INVALIDATE_HINT);1896}1897return true;1898}18991900NotifyMemInfo(MemBlockFlags::WRITE, dest, size, "GPUMemset");1901// Or perhaps a texture, let's invalidate.1902InvalidateCache(dest, size, GPU_INVALIDATE_HINT);1903GPURecord::NotifyMemset(dest, v, size);1904return false;1905}19061907bool GPUCommon::PerformReadbackToMemory(u32 dest, int size) {1908if (Memory::IsVRAMAddress(dest)) {1909return PerformMemoryCopy(dest, dest, size, GPUCopyFlag::FORCE_DST_MATCH_MEM);1910}1911return false;1912}19131914bool GPUCommon::PerformWriteColorFromMemory(u32 dest, int size) {1915if (Memory::IsVRAMAddress(dest)) {1916GPURecord::NotifyUpload(dest, size);1917return PerformMemoryCopy(dest, dest, size, GPUCopyFlag::FORCE_SRC_MATCH_MEM | GPUCopyFlag::DEBUG_NOTIFIED);1918}1919return false;1920}19211922void GPUCommon::PerformWriteFormattedFromMemory(u32 addr, int size, int frameWidth, GEBufferFormat format) {1923if (Memory::IsVRAMAddress(addr)) {1924framebufferManager_->PerformWriteFormattedFromMemory(addr, size, frameWidth, format);1925}1926textureCache_->NotifyWriteFormattedFromMemory(addr, size, frameWidth, format);1927InvalidateCache(addr, size, GPU_INVALIDATE_SAFE);1928}19291930bool GPUCommon::PerformWriteStencilFromMemory(u32 dest, int size, WriteStencil flags) {1931if (framebufferManager_->MayIntersectFramebufferColor(dest)) {1932framebufferManager_->PerformWriteStencilFromMemory(dest, size, flags);1933return true;1934}1935return false;1936}19371938bool GPUCommon::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) {1939gstate_c.UpdateUVScaleOffset();1940return drawEngineCommon_->GetCurrentSimpleVertices(count, vertices, indices);1941}19421943bool GPUCommon::DescribeCodePtr(const u8 *ptr, std::string &name) {1944// The only part of GPU emulation (other than software) that jits is the vertex decoder, currently,1945// which is owned by the drawengine.1946return drawEngineCommon_->DescribeCodePtr(ptr, name);1947}194819491950