CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/x86/X64IRAsm.cpp
Views: 1401
1
// Copyright (c) 2023- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
20
21
#include "Common/Log.h"
22
#include "Core/CoreTiming.h"
23
#include "Core/MemMap.h"
24
#include "Core/MIPS/x86/X64IRJit.h"
25
#include "Core/MIPS/x86/X64IRRegCache.h"
26
#include "Core/MIPS/JitCommon/JitCommon.h"
27
#include "Core/MIPS/JitCommon/JitState.h"
28
#include "Core/System.h"
29
30
namespace MIPSComp {
31
32
using namespace Gen;
33
using namespace X64IRJitConstants;
34
35
static const bool enableDebug = false;
36
static const bool enableDisasm = false;
37
38
static void ShowPC(void *membase, void *jitbase) {
39
static int count = 0;
40
if (currentMIPS) {
41
u32 downcount = currentMIPS->downcount;
42
ERROR_LOG(Log::JIT, "[%08x] ShowPC Downcount : %08x %d %p %p", currentMIPS->pc, downcount, count, membase, jitbase);
43
} else {
44
ERROR_LOG(Log::JIT, "Universe corrupt?");
45
}
46
//if (count > 2000)
47
// exit(0);
48
count++;
49
}
50
51
void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
52
// This will be used as a writable scratch area, always 32-bit accessible.
53
const u8 *start = AlignCodePage();
54
if (DebugProfilerEnabled()) {
55
ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
56
hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
57
Write32(0);
58
hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr();
59
Write32(0);
60
}
61
62
EmitFPUConstants();
63
EmitVecConstants();
64
65
const u8 *disasmStart = AlignCodePage();
66
BeginWrite(GetMemoryProtectPageSize());
67
68
jo.downcountInRegister = false;
69
#if PPSSPP_ARCH(AMD64)
70
bool jitbaseInR15 = false;
71
int jitbaseCtxDisp = 0;
72
// We pre-bake the MIPS_EMUHACK_OPCODE subtraction into our jitbase value.
73
intptr_t jitbase = (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE;
74
if ((jitbase < -0x80000000LL || jitbase > 0x7FFFFFFFLL) && !Accessible((const u8 *)&mipsState->f[0], (const u8 *)jitbase)) {
75
jo.reserveR15ForAsm = true;
76
jitbaseInR15 = true;
77
} else {
78
jo.downcountInRegister = true;
79
jo.reserveR15ForAsm = true;
80
if (jitbase < -0x80000000LL || jitbase > 0x7FFFFFFFLL) {
81
jitbaseCtxDisp = (int)(jitbase - (intptr_t)&mipsState->f[0]);
82
}
83
}
84
#endif
85
86
if (jo.useStaticAlloc && false) {
87
saveStaticRegisters_ = AlignCode16();
88
if (jo.downcountInRegister)
89
MOV(32, MDisp(CTXREG, downcountOffset), R(DOWNCOUNTREG));
90
//regs_.EmitSaveStaticRegisters();
91
RET();
92
93
// Note: needs to not modify EAX, or to save it if it does.
94
loadStaticRegisters_ = AlignCode16();
95
//regs_.EmitLoadStaticRegisters();
96
if (jo.downcountInRegister)
97
MOV(32, R(DOWNCOUNTREG), MDisp(CTXREG, downcountOffset));
98
RET();
99
} else {
100
saveStaticRegisters_ = nullptr;
101
loadStaticRegisters_ = nullptr;
102
}
103
104
restoreRoundingMode_ = AlignCode16();
105
{
106
STMXCSR(MDisp(CTXREG, tempOffset));
107
// Clear the rounding mode and flush-to-zero bits back to 0.
108
AND(32, MDisp(CTXREG, tempOffset), Imm32(~(7 << 13)));
109
LDMXCSR(MDisp(CTXREG, tempOffset));
110
RET();
111
}
112
113
applyRoundingMode_ = AlignCode16();
114
{
115
MOV(32, R(SCRATCH1), MDisp(CTXREG, fcr31Offset));
116
AND(32, R(SCRATCH1), Imm32(0x01000003));
117
118
// If it's 0 (nearest + no flush0), we don't actually bother setting - we cleared the rounding
119
// mode out in restoreRoundingMode anyway. This is the most common.
120
FixupBranch skip = J_CC(CC_Z);
121
STMXCSR(MDisp(CTXREG, tempOffset));
122
123
// The MIPS bits don't correspond exactly, so we have to adjust.
124
// 0 -> 0 (skip2), 1 -> 3, 2 -> 2 (skip2), 3 -> 1
125
TEST(8, R(AL), Imm8(1));
126
FixupBranch skip2 = J_CC(CC_Z);
127
XOR(32, R(SCRATCH1), Imm8(2));
128
SetJumpTarget(skip2);
129
130
// Adjustment complete, now reconstruct MXCSR
131
SHL(32, R(SCRATCH1), Imm8(13));
132
// Before setting new bits, we must clear the old ones.
133
// Clearing bits 13-14 (rounding mode) and 15 (flush to zero.)
134
AND(32, MDisp(CTXREG, tempOffset), Imm32(~(7 << 13)));
135
OR(32, MDisp(CTXREG, tempOffset), R(SCRATCH1));
136
137
TEST(32, MDisp(CTXREG, fcr31Offset), Imm32(1 << 24));
138
FixupBranch skip3 = J_CC(CC_Z);
139
OR(32, MDisp(CTXREG, tempOffset), Imm32(1 << 15));
140
SetJumpTarget(skip3);
141
142
LDMXCSR(MDisp(CTXREG, tempOffset));
143
SetJumpTarget(skip);
144
RET();
145
}
146
147
hooks_.enterDispatcher = (IRNativeFuncNoArg)AlignCode16();
148
149
ABI_PushAllCalleeSavedRegsAndAdjustStack();
150
#if PPSSPP_ARCH(AMD64)
151
// Two x64-specific statically allocated registers.
152
MOV(64, R(MEMBASEREG), ImmPtr(Memory::base));
153
if (jitbaseInR15)
154
MOV(64, R(JITBASEREG), ImmPtr((const void *)jitbase));
155
#endif
156
// From the start of the FP reg, a single byte offset can reach all GPR + all FPR (but not VFPR.)
157
MOV(PTRBITS, R(CTXREG), ImmPtr(&mipsState->f[0]));
158
159
LoadStaticRegisters();
160
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
161
MovFromPC(SCRATCH1);
162
WriteDebugPC(SCRATCH1);
163
outerLoopPCInSCRATCH1_ = GetCodePtr();
164
MovToPC(SCRATCH1);
165
outerLoop_ = GetCodePtr();
166
// Advance can change the downcount (or thread), so must save/restore around it.
167
SaveStaticRegisters();
168
RestoreRoundingMode(true);
169
WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
170
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
171
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
172
ApplyRoundingMode(true);
173
LoadStaticRegisters();
174
175
dispatcherCheckCoreState_ = GetCodePtr();
176
// TODO: See if we can get the slice decrement to line up with IR.
177
178
if (RipAccessible((const void *)&coreState)) {
179
CMP(32, M(&coreState), Imm8(0)); // rip accessible
180
} else {
181
MOV(PTRBITS, R(RAX), ImmPtr((const void *)&coreState));
182
CMP(32, MatR(RAX), Imm8(0));
183
}
184
FixupBranch badCoreState = J_CC(CC_NZ, true);
185
186
if (jo.downcountInRegister) {
187
TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));
188
} else {
189
CMP(32, MDisp(CTXREG, downcountOffset), Imm8(0));
190
}
191
J_CC(CC_S, outerLoop_);
192
FixupBranch skipToRealDispatch = J();
193
194
dispatcherPCInSCRATCH1_ = GetCodePtr();
195
MovToPC(SCRATCH1);
196
197
hooks_.dispatcher = GetCodePtr();
198
199
// TODO: See if we can get the slice decrement to line up with IR.
200
if (jo.downcountInRegister) {
201
TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));
202
} else {
203
CMP(32, MDisp(CTXREG, downcountOffset), Imm8(0));
204
}
205
FixupBranch bail = J_CC(CC_S, true);
206
SetJumpTarget(skipToRealDispatch);
207
208
dispatcherNoCheck_ = GetCodePtr();
209
210
// Debug
211
if (enableDebug) {
212
#if PPSSPP_ARCH(AMD64)
213
if (jitbaseInR15) {
214
ABI_CallFunctionAA(reinterpret_cast<void *>(&ShowPC), R(MEMBASEREG), R(JITBASEREG));
215
} else if (jitbaseCtxDisp != 0) {
216
LEA(64, SCRATCH1, MDisp(CTXREG, jitbaseCtxDisp));
217
ABI_CallFunctionAA(reinterpret_cast<void *>(&ShowPC), R(MEMBASEREG), R(SCRATCH1));
218
} else {
219
ABI_CallFunctionAC(reinterpret_cast<void *>(&ShowPC), R(MEMBASEREG), (u32)jitbase);
220
}
221
#else
222
ABI_CallFunctionCC(reinterpret_cast<void *>(&ShowPC), (u32)Memory::base, (u32)GetBasePtr());
223
#endif
224
}
225
226
MovFromPC(SCRATCH1);
227
WriteDebugPC(SCRATCH1);
228
#ifdef MASKED_PSP_MEMORY
229
AND(32, R(SCRATCH1), Imm32(Memory::MEMVIEW32_MASK));
230
#endif
231
hooks_.dispatchFetch = GetCodePtr();
232
#if PPSSPP_ARCH(X86)
233
_assert_msg_( Memory::base != 0, "Memory base bogus");
234
MOV(32, R(SCRATCH1), MDisp(SCRATCH1, (u32)Memory::base));
235
#elif PPSSPP_ARCH(AMD64)
236
MOV(32, R(SCRATCH1), MComplex(MEMBASEREG, SCRATCH1, SCALE_1, 0));
237
#endif
238
_assert_msg_(MIPS_JITBLOCK_MASK == 0xFF000000, "Hardcoded assumption of emuhack mask");
239
if (cpu_info.bBMI2) {
240
RORX(32, EDX, R(SCRATCH1), 24);
241
CMP(8, R(EDX), Imm8(MIPS_EMUHACK_OPCODE >> 24));
242
} else {
243
MOV(32, R(EDX), R(SCRATCH1));
244
SHR(32, R(EDX), Imm8(24));
245
CMP(32, R(EDX), Imm8(MIPS_EMUHACK_OPCODE >> 24));
246
}
247
FixupBranch needsCompile = J_CC(CC_NE);
248
// We don't mask here - that's baked into jitbase.
249
#if PPSSPP_ARCH(X86)
250
LEA(32, SCRATCH1, MDisp(SCRATCH1, (u32)GetBasePtr() - MIPS_EMUHACK_OPCODE));
251
#elif PPSSPP_ARCH(AMD64)
252
if (jitbaseInR15) {
253
ADD(64, R(SCRATCH1), R(JITBASEREG));
254
} else if (jitbaseCtxDisp) {
255
LEA(64, SCRATCH1, MComplex(CTXREG, SCRATCH1, SCALE_1, jitbaseCtxDisp));
256
} else {
257
// See above, reserveR15ForAsm is used when above 0x7FFFFFFF.
258
LEA(64, SCRATCH1, MDisp(SCRATCH1, (s32)jitbase));
259
}
260
#endif
261
JMPptr(R(SCRATCH1));
262
SetJumpTarget(needsCompile);
263
264
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
265
RestoreRoundingMode(true);
266
WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
267
ABI_CallFunction(&MIPSComp::JitAt);
268
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
269
ApplyRoundingMode(true);
270
// Let's just dispatch again, we'll enter the block since we know it's there.
271
JMP(dispatcherNoCheck_, true);
272
273
SetJumpTarget(bail);
274
275
if (RipAccessible((const void *)&coreState)) {
276
CMP(32, M(&coreState), Imm8(0)); // rip accessible
277
} else {
278
MOV(PTRBITS, R(RAX), ImmPtr((const void *)&coreState));
279
CMP(32, MatR(RAX), Imm8(0));
280
}
281
J_CC(CC_Z, outerLoop_, true);
282
283
const uint8_t *quitLoop = GetCodePtr();
284
SetJumpTarget(badCoreState);
285
286
WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
287
SaveStaticRegisters();
288
RestoreRoundingMode(true);
289
ABI_PopAllCalleeSavedRegsAndAdjustStack();
290
RET();
291
292
hooks_.crashHandler = GetCodePtr();
293
if (RipAccessible((const void *)&coreState)) {
294
MOV(32, M(&coreState), Imm32(CORE_RUNTIME_ERROR));
295
} else {
296
MOV(PTRBITS, R(RAX), ImmPtr((const void *)&coreState));
297
MOV(32, MatR(RAX), Imm32(CORE_RUNTIME_ERROR));
298
}
299
JMP(quitLoop, true);
300
301
302
// Leave this at the end, add more stuff above.
303
if (enableDisasm) {
304
#if PPSSPP_ARCH(AMD64)
305
std::vector<std::string> lines = DisassembleX86(disasmStart, (int)(GetCodePtr() - disasmStart));
306
for (auto s : lines) {
307
INFO_LOG(Log::JIT, "%s", s.c_str());
308
}
309
#endif
310
}
311
312
// Let's spare the pre-generated code from unprotect-reprotect.
313
AlignCodePage();
314
jitStartOffset_ = (int)(GetCodePtr() - start);
315
EndWrite();
316
}
317
318
} // namespace MIPSComp
319
320
#endif
321
322