Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/core/cpu_recompiler_riscv64.cpp
4214 views
1
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
4
#include "cpu_recompiler_riscv64.h"
5
#include "cpu_code_cache_private.h"
6
#include "cpu_core_private.h"
7
#include "cpu_pgxp.h"
8
#include "gte.h"
9
#include "settings.h"
10
#include "timing_event.h"
11
12
#include "common/align.h"
13
#include "common/assert.h"
14
#include "common/log.h"
15
#include "common/memmap.h"
16
#include "common/string_util.h"
17
18
#include <limits>
19
20
#ifdef CPU_ARCH_RISCV64
21
22
LOG_CHANNEL(Recompiler);
23
24
#ifdef ENABLE_HOST_DISASSEMBLY
25
extern "C" {
26
#include "riscv-disas.h"
27
}
28
#endif
29
30
// For LW/SW/etc.
31
#define PTR(x) ((u32)(((u8*)(x)) - ((u8*)&g_state))), RSTATE
32
33
static constexpr u32 BLOCK_LINK_SIZE = 8; // auipc+jr
34
35
#define RRET biscuit::a0
36
#define RARG1 biscuit::a0
37
#define RARG2 biscuit::a1
38
#define RARG3 biscuit::a2
39
#define RSCRATCH biscuit::t6
40
#define RSTATE biscuit::s10
41
#define RMEMBASE biscuit::s11
42
43
static bool rvIsCallerSavedRegister(u32 id);
44
static bool rvIsValidSExtITypeImm(u32 imm);
45
static std::pair<s32, s32> rvGetAddressImmediates(const void* cur, const void* target);
46
static void rvMoveAddressToReg(biscuit::Assembler* armAsm, const biscuit::GPR& reg, const void* addr);
47
static void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm);
48
static void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm);
49
static u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg = biscuit::zero);
50
static u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr);
51
static void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
52
bool sign_extend_word = false);
53
static void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
54
const biscuit::GPR& tempreg = RSCRATCH);
55
static void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
56
static void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
57
static void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
58
static void rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word
59
static void rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword
60
static void rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword
61
62
namespace CPU {
63
64
using namespace biscuit;
65
66
RISCV64Recompiler s_instance;
67
Recompiler* g_compiler = &s_instance;
68
69
} // namespace CPU
70
71
bool rvIsCallerSavedRegister(u32 id)
72
{
73
return (id == 1 || (id >= 3 && id < 8) || (id >= 10 && id <= 17) || (id >= 28 && id <= 31));
74
}
75
76
bool rvIsValidSExtITypeImm(u32 imm)
77
{
78
return (static_cast<u32>((static_cast<s32>(imm) << 20) >> 20) == imm);
79
}
80
81
std::pair<s32, s32> rvGetAddressImmediates(const void* cur, const void* target)
82
{
83
const s64 disp = static_cast<s64>(reinterpret_cast<intptr_t>(target) - reinterpret_cast<intptr_t>(cur));
84
Assert(disp >= static_cast<s64>(std::numeric_limits<s32>::min()) &&
85
disp <= static_cast<s64>(std::numeric_limits<s32>::max()));
86
87
const s64 hi = disp + 0x800;
88
const s64 lo = disp - (hi & 0xFFFFF000);
89
return std::make_pair(static_cast<s32>(hi >> 12), static_cast<s32>((lo << 52) >> 52));
90
}
91
92
void rvMoveAddressToReg(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr)
93
{
94
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);
95
rvAsm->AUIPC(reg, hi);
96
rvAsm->ADDI(reg, reg, lo);
97
}
98
99
void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm)
100
{
101
// Borrowed from biscuit, but doesn't emit an ADDI if the lower 12 bits are zero.
102
const u32 lower = imm & 0xFFF;
103
const u32 upper = (imm & 0xFFFFF000) >> 12;
104
const s32 simm = static_cast<s32>(imm);
105
if (rvIsValidSExtITypeImm(simm))
106
{
107
rvAsm->ADDI(rd, biscuit::zero, static_cast<s32>(lower));
108
}
109
else
110
{
111
const bool needs_increment = (lower & 0x800) != 0;
112
const u32 upper_imm = needs_increment ? upper + 1 : upper;
113
rvAsm->LUI(rd, upper_imm);
114
rvAsm->ADDI(rd, rd, static_cast<int32_t>(lower));
115
}
116
}
117
118
void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm)
119
{
120
// TODO: Make better..
121
rvEmitMov(rvAsm, rd, static_cast<u32>(imm >> 32));
122
rvEmitMov(rvAsm, scratch, static_cast<u32>(imm));
123
rvAsm->SLLI64(rd, rd, 32);
124
rvAsm->SLLI64(scratch, scratch, 32);
125
rvAsm->SRLI64(scratch, scratch, 32);
126
rvAsm->ADD(rd, rd, scratch);
127
}
128
129
u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg)
130
{
131
// TODO: use J if displacement is <1MB, needs a bool because backpatch must be 8 bytes
132
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), ptr);
133
rvAsm->AUIPC(RSCRATCH, hi);
134
rvAsm->JALR(link_reg, lo, RSCRATCH);
135
return 8;
136
}
137
138
u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr)
139
{
140
return rvEmitJmp(rvAsm, ptr, biscuit::ra);
141
}
142
143
void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, bool sign_extend_word)
144
{
145
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);
146
rvAsm->AUIPC(reg, hi);
147
if (sign_extend_word)
148
rvAsm->LW(reg, lo, reg);
149
else
150
rvAsm->LWU(reg, lo, reg);
151
}
152
153
[[maybe_unused]] void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,
154
const biscuit::GPR& tempreg)
155
{
156
const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);
157
rvAsm->AUIPC(tempreg, hi);
158
rvAsm->SW(reg, lo, tempreg);
159
}
160
161
void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
162
{
163
rvAsm->SLLI(rd, rs, 24);
164
rvAsm->SRAIW(rd, rd, 24);
165
}
166
167
void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
168
{
169
rvAsm->ANDI(rd, rs, 0xFF);
170
}
171
172
void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
173
{
174
rvAsm->SLLI(rd, rs, 16);
175
rvAsm->SRAIW(rd, rd, 16);
176
}
177
178
void rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
179
{
180
rvAsm->SLLI(rd, rs, 16);
181
rvAsm->SRLI(rd, rd, 16);
182
}
183
184
void rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
185
{
186
rvAsm->ADDIW(rd, rs, 0);
187
}
188
189
void rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)
190
{
191
rvAsm->SLLI64(rd, rs, 32);
192
rvAsm->SRLI64(rd, rd, 32);
193
}
194
195
void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)
196
{
197
#ifdef ENABLE_HOST_DISASSEMBLY
198
const u8* cur = static_cast<const u8*>(start);
199
const u8* end = cur + size;
200
char buf[256];
201
while (cur < end)
202
{
203
rv_inst inst;
204
size_t instlen;
205
inst_fetch(cur, &inst, &instlen);
206
disasm_inst(buf, std::size(buf), rv64, static_cast<u64>(reinterpret_cast<uintptr_t>(cur)), inst);
207
DEBUG_LOG("\t0x{:016X}\t{}", static_cast<u64>(reinterpret_cast<uintptr_t>(cur)), buf);
208
cur += instlen;
209
}
210
#else
211
ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY.");
212
#endif
213
}
214
215
u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size)
216
{
217
#ifdef ENABLE_HOST_DISASSEMBLY
218
const u8* cur = static_cast<const u8*>(start);
219
const u8* end = cur + size;
220
u32 icount = 0;
221
while (cur < end)
222
{
223
rv_inst inst;
224
size_t instlen;
225
inst_fetch(cur, &inst, &instlen);
226
cur += instlen;
227
icount++;
228
}
229
return icount;
230
#else
231
ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY.");
232
return 0;
233
#endif
234
}
235
236
u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)
237
{
238
using namespace biscuit;
239
240
Assembler actual_asm(static_cast<u8*>(code), code_size);
241
Assembler* rvAsm = &actual_asm;
242
243
Label dispatch;
244
Label run_events_and_dispatch;
245
246
g_enter_recompiler = reinterpret_cast<decltype(g_enter_recompiler)>(rvAsm->GetCursorPointer());
247
{
248
// TODO: reserve some space for saving caller-saved registers
249
250
// Need the CPU state for basically everything :-)
251
rvMoveAddressToReg(rvAsm, RSTATE, &g_state);
252
253
// Fastmem setup
254
if (IsUsingFastmem())
255
rvAsm->LD(RMEMBASE, PTR(&g_state.fastmem_base));
256
257
// Fall through to event dispatcher
258
}
259
260
// check events then for frame done
261
{
262
Label skip_event_check;
263
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
264
rvAsm->LW(RARG2, PTR(&g_state.downcount));
265
rvAsm->BLTU(RARG1, RARG2, &skip_event_check);
266
267
rvAsm->Bind(&run_events_and_dispatch);
268
g_run_events_and_dispatch = rvAsm->GetCursorPointer();
269
rvEmitCall(rvAsm, reinterpret_cast<const void*>(&TimingEvents::RunEvents));
270
271
rvAsm->Bind(&skip_event_check);
272
}
273
274
// TODO: align?
275
g_dispatcher = rvAsm->GetCursorPointer();
276
{
277
rvAsm->Bind(&dispatch);
278
279
// x9 <- s_fast_map[pc >> 16]
280
rvAsm->LW(RARG1, PTR(&g_state.pc));
281
rvMoveAddressToReg(rvAsm, RARG3, g_code_lut.data());
282
rvAsm->SRLIW(RARG2, RARG1, 16);
283
rvAsm->SLLI(RARG2, RARG2, 3);
284
rvAsm->ADD(RARG2, RARG2, RARG3);
285
rvAsm->LD(RARG2, 0, RARG2);
286
rvAsm->SLLI64(RARG1, RARG1, 48); // idx = (pc & 0xFFFF) >> 2
287
rvAsm->SRLI64(RARG1, RARG1, 50);
288
rvAsm->SLLI(RARG1, RARG1, 3);
289
290
// blr(x9[pc * 2]) (fast_map[idx])
291
rvAsm->ADD(RARG1, RARG1, RARG2);
292
rvAsm->LD(RARG1, 0, RARG1);
293
rvAsm->JR(RARG1);
294
}
295
296
g_compile_or_revalidate_block = rvAsm->GetCursorPointer();
297
{
298
rvAsm->LW(RARG1, PTR(&g_state.pc));
299
rvEmitCall(rvAsm, reinterpret_cast<const void*>(&CompileOrRevalidateBlock));
300
rvAsm->J(&dispatch);
301
}
302
303
g_discard_and_recompile_block = rvAsm->GetCursorPointer();
304
{
305
rvAsm->LW(RARG1, PTR(&g_state.pc));
306
rvEmitCall(rvAsm, reinterpret_cast<const void*>(&DiscardAndRecompileBlock));
307
rvAsm->J(&dispatch);
308
}
309
310
g_interpret_block = rvAsm->GetCursorPointer();
311
{
312
rvEmitCall(rvAsm, CodeCache::GetInterpretUncachedBlockFunction());
313
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
314
rvAsm->LW(RARG2, PTR(&g_state.downcount));
315
rvAsm->BGE(RARG1, RARG2, &run_events_and_dispatch);
316
rvAsm->J(&dispatch);
317
}
318
319
// TODO: align?
320
321
return static_cast<u32>(rvAsm->GetCodeBuffer().GetSizeInBytes());
322
}
323
324
void CPU::CodeCache::EmitAlignmentPadding(void* dst, size_t size)
325
{
326
constexpr u8 padding_value = 0x00;
327
std::memset(dst, padding_value, size);
328
}
329
330
u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)
331
{
332
// TODO: get rid of assembler construction here
333
{
334
biscuit::Assembler assembler(static_cast<u8*>(code), BLOCK_LINK_SIZE);
335
rvEmitCall(&assembler, dst);
336
337
DebugAssert(assembler.GetCodeBuffer().GetSizeInBytes() <= BLOCK_LINK_SIZE);
338
if (assembler.GetCodeBuffer().GetRemainingBytes() > 0)
339
assembler.NOP();
340
}
341
342
if (flush_icache)
343
MemMap::FlushInstructionCache(code, BLOCK_LINK_SIZE);
344
345
return BLOCK_LINK_SIZE;
346
}
347
348
CPU::RISCV64Recompiler::RISCV64Recompiler() = default;
349
350
CPU::RISCV64Recompiler::~RISCV64Recompiler() = default;
351
352
const void* CPU::RISCV64Recompiler::GetCurrentCodePointer()
353
{
354
return rvAsm->GetCursorPointer();
355
}
356
357
void CPU::RISCV64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,
358
u32 far_code_space)
359
{
360
Recompiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space);
361
362
// TODO: don't recreate this every time..
363
DebugAssert(!m_emitter && !m_far_emitter && !rvAsm);
364
m_emitter = std::make_unique<Assembler>(code_buffer, code_buffer_space);
365
m_far_emitter = std::make_unique<Assembler>(far_code_buffer, far_code_space);
366
rvAsm = m_emitter.get();
367
368
// Need to wipe it out so it's correct when toggling fastmem.
369
m_host_regs = {};
370
371
const u32 membase_idx = CodeCache::IsUsingFastmem() ? RMEMBASE.Index() : NUM_HOST_REGS;
372
for (u32 i = 0; i < NUM_HOST_REGS; i++)
373
{
374
HostRegAlloc& hra = m_host_regs[i];
375
376
if (i == RARG1.Index() || i == RARG2.Index() || i == RARG3.Index() || i == RSCRATCH.Index() ||
377
i == RSTATE.Index() || i == membase_idx || i < 5 /* zero, ra, sp, gp, tp */)
378
{
379
continue;
380
}
381
382
hra.flags = HR_USABLE | (rvIsCallerSavedRegister(i) ? 0 : HR_CALLEE_SAVED);
383
}
384
}
385
386
void CPU::RISCV64Recompiler::SwitchToFarCode(bool emit_jump,
387
void (biscuit::Assembler::*inverted_cond)(biscuit::GPR, biscuit::GPR,
388
biscuit::Label*) /* = nullptr */,
389
const biscuit::GPR& rs1 /* = biscuit::zero */,
390
const biscuit::GPR& rs2 /* = biscuit::zero */)
391
{
392
DebugAssert(rvAsm == m_emitter.get());
393
if (emit_jump)
394
{
395
const void* target = m_far_emitter->GetCursorPointer();
396
if (inverted_cond)
397
{
398
Label skip;
399
(rvAsm->*inverted_cond)(rs1, rs2, &skip);
400
rvEmitJmp(rvAsm, target);
401
rvAsm->Bind(&skip);
402
}
403
else
404
{
405
rvEmitCall(rvAsm, target);
406
}
407
}
408
rvAsm = m_far_emitter.get();
409
}
410
411
void CPU::RISCV64Recompiler::SwitchToNearCode(bool emit_jump)
412
{
413
DebugAssert(rvAsm == m_far_emitter.get());
414
if (emit_jump)
415
rvEmitJmp(rvAsm, m_emitter->GetCursorPointer());
416
rvAsm = m_emitter.get();
417
}
418
419
void CPU::RISCV64Recompiler::EmitMov(const biscuit::GPR& dst, u32 val)
420
{
421
rvEmitMov(rvAsm, dst, val);
422
}
423
424
void CPU::RISCV64Recompiler::EmitCall(const void* ptr)
425
{
426
rvEmitCall(rvAsm, ptr);
427
}
428
429
void CPU::RISCV64Recompiler::SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm,
430
void (biscuit::Assembler::*iop)(GPR, GPR, u32),
431
void (biscuit::Assembler::*rop)(GPR, GPR, GPR))
432
{
433
DebugAssert(rd != RSCRATCH && rs != RSCRATCH);
434
435
if (rvIsValidSExtITypeImm(imm))
436
{
437
(rvAsm->*iop)(rd, rs, imm);
438
return;
439
}
440
441
rvEmitMov(rvAsm, RSCRATCH, imm);
442
(rvAsm->*rop)(rd, rs, RSCRATCH);
443
}
444
445
void CPU::RISCV64Recompiler::SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
446
{
447
SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDI),
448
&Assembler::ADD);
449
}
450
451
void CPU::RISCV64Recompiler::SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
452
{
453
SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW),
454
&Assembler::ADDW);
455
}
456
457
void CPU::RISCV64Recompiler::SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
458
{
459
const u32 nimm = static_cast<u32>(-static_cast<s32>(imm));
460
SafeImmSExtIType(rd, rs, nimm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW),
461
&Assembler::ADDW);
462
}
463
464
void CPU::RISCV64Recompiler::SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
465
{
466
SafeImmSExtIType(rd, rs, imm, &Assembler::ANDI, &Assembler::AND);
467
}
468
469
void CPU::RISCV64Recompiler::SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
470
{
471
SafeImmSExtIType(rd, rs, imm, &Assembler::ORI, &Assembler::OR);
472
}
473
474
void CPU::RISCV64Recompiler::SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
475
{
476
SafeImmSExtIType(rd, rs, imm, &Assembler::XORI, &Assembler::XOR);
477
}
478
479
void CPU::RISCV64Recompiler::SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
480
{
481
SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTI),
482
&Assembler::SLT);
483
}
484
485
void CPU::RISCV64Recompiler::SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)
486
{
487
SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTIU),
488
&Assembler::SLTU);
489
}
490
491
void CPU::RISCV64Recompiler::EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs)
492
{
493
rvEmitSExtB(rvAsm, rd, rs);
494
}
495
496
void CPU::RISCV64Recompiler::EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs)
497
{
498
rvEmitUExtB(rvAsm, rd, rs);
499
}
500
501
void CPU::RISCV64Recompiler::EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs)
502
{
503
rvEmitSExtH(rvAsm, rd, rs);
504
}
505
506
void CPU::RISCV64Recompiler::EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs)
507
{
508
rvEmitUExtH(rvAsm, rd, rs);
509
}
510
511
void CPU::RISCV64Recompiler::EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs)
512
{
513
rvEmitDSExtW(rvAsm, rd, rs);
514
}
515
516
void CPU::RISCV64Recompiler::EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs)
517
{
518
rvEmitDUExtW(rvAsm, rd, rs);
519
}
520
521
void CPU::RISCV64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size)
522
{
523
// store it first to reduce code size, because we can offset
524
// TODO: 64-bit displacement is needed :/
525
// rvMoveAddressToReg(rvAsm, RARG1, ram_ptr);
526
// rvMoveAddressToReg(rvAsm, RARG2, shadow_ptr);
527
rvEmitMov64(rvAsm, RARG1, RSCRATCH, static_cast<u64>(reinterpret_cast<uintptr_t>(ram_ptr)));
528
rvEmitMov64(rvAsm, RARG2, RSCRATCH, static_cast<u64>(reinterpret_cast<uintptr_t>(shadow_ptr)));
529
530
u32 offset = 0;
531
Label block_changed;
532
533
while (size >= 8)
534
{
535
rvAsm->LD(RARG3, offset, RARG1);
536
rvAsm->LD(RSCRATCH, offset, RARG2);
537
rvAsm->BNE(RARG3, RSCRATCH, &block_changed);
538
offset += 8;
539
size -= 8;
540
}
541
542
while (size >= 4)
543
{
544
rvAsm->LW(RARG3, offset, RARG1);
545
rvAsm->LW(RSCRATCH, offset, RARG2);
546
rvAsm->BNE(RARG3, RSCRATCH, &block_changed);
547
offset += 4;
548
size -= 4;
549
}
550
551
DebugAssert(size == 0);
552
553
Label block_unchanged;
554
rvAsm->J(&block_unchanged);
555
rvAsm->Bind(&block_changed);
556
rvEmitJmp(rvAsm, CodeCache::g_discard_and_recompile_block);
557
rvAsm->Bind(&block_unchanged);
558
}
559
560
void CPU::RISCV64Recompiler::GenerateICacheCheckAndUpdate()
561
{
562
if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))
563
{
564
if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))
565
{
566
rvEmitFarLoad(rvAsm, RARG2, GetFetchMemoryAccessTimePtr());
567
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
568
rvEmitMov(rvAsm, RARG3, m_block->size);
569
rvAsm->MULW(RARG2, RARG2, RARG3);
570
rvAsm->ADD(RARG1, RARG1, RARG2);
571
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
572
}
573
else
574
{
575
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
576
SafeADDIW(RARG1, RARG1, static_cast<u32>(m_block->uncached_fetch_ticks));
577
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
578
}
579
}
580
else if (m_block->icache_line_count > 0)
581
{
582
const auto& ticks_reg = RARG1;
583
const auto& current_tag_reg = RARG2;
584
const auto& existing_tag_reg = RARG3;
585
586
// start of block, nothing should be using this
587
const auto& maddr_reg = biscuit::t0;
588
DebugAssert(!IsHostRegAllocated(maddr_reg.Index()));
589
590
VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;
591
rvAsm->LW(ticks_reg, PTR(&g_state.pending_ticks));
592
rvEmitMov(rvAsm, current_tag_reg, current_pc);
593
594
for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)
595
{
596
const TickCount fill_ticks = GetICacheFillTicks(current_pc);
597
if (fill_ticks <= 0)
598
continue;
599
600
const u32 line = GetICacheLine(current_pc);
601
const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32));
602
603
// Offsets must fit in signed 12 bits.
604
Label cache_hit;
605
if (offset >= 2048)
606
{
607
SafeADDI(maddr_reg, RSTATE, offset);
608
rvAsm->LW(existing_tag_reg, 0, maddr_reg);
609
rvAsm->BEQ(existing_tag_reg, current_tag_reg, &cache_hit);
610
rvAsm->SW(current_tag_reg, 0, maddr_reg);
611
}
612
else
613
{
614
rvAsm->LW(existing_tag_reg, offset, RSTATE);
615
rvAsm->BEQ(existing_tag_reg, current_tag_reg, &cache_hit);
616
rvAsm->SW(current_tag_reg, offset, RSTATE);
617
}
618
619
SafeADDIW(ticks_reg, ticks_reg, static_cast<u32>(fill_ticks));
620
rvAsm->Bind(&cache_hit);
621
622
if (i != (m_block->icache_line_count - 1))
623
SafeADDIW(current_tag_reg, current_tag_reg, ICACHE_LINE_SIZE);
624
}
625
626
rvAsm->SW(ticks_reg, PTR(&g_state.pending_ticks));
627
}
628
}
629
630
void CPU::RISCV64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/,
631
s32 arg3reg /*= -1*/)
632
{
633
if (arg1reg >= 0 && arg1reg != static_cast<s32>(RARG1.Index()))
634
rvAsm->MV(RARG1, GPR(arg1reg));
635
if (arg2reg >= 0 && arg2reg != static_cast<s32>(RARG2.Index()))
636
rvAsm->MV(RARG2, GPR(arg2reg));
637
if (arg3reg >= 0 && arg3reg != static_cast<s32>(RARG3.Index()))
638
rvAsm->MV(RARG3, GPR(arg3reg));
639
EmitCall(func);
640
}
641
642
void CPU::RISCV64Recompiler::EndBlock(const std::optional<u32>& newpc, bool do_event_test)
643
{
644
if (newpc.has_value())
645
{
646
if (m_dirty_pc || m_compiler_pc != newpc)
647
{
648
EmitMov(RSCRATCH, newpc.value());
649
rvAsm->SW(RSCRATCH, PTR(&g_state.pc));
650
}
651
}
652
m_dirty_pc = false;
653
654
// flush regs
655
Flush(FLUSH_END_BLOCK);
656
EndAndLinkBlock(newpc, do_event_test, false);
657
}
658
659
void CPU::RISCV64Recompiler::EndBlockWithException(Exception excode)
660
{
661
// flush regs, but not pc, it's going to get overwritten
662
// flush cycles because of the GTE instruction stuff...
663
Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL);
664
665
// TODO: flush load delay
666
667
EmitMov(RARG1, Cop0Registers::CAUSE::MakeValueForException(excode, m_current_instruction_branch_delay_slot, false,
668
inst->cop.cop_n));
669
EmitMov(RARG2, m_current_instruction_pc);
670
if (excode != Exception::BP)
671
{
672
EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));
673
}
674
else
675
{
676
EmitMov(RARG3, inst->bits);
677
EmitCall(reinterpret_cast<const void*>(&CPU::RaiseBreakException));
678
}
679
m_dirty_pc = false;
680
681
EndAndLinkBlock(std::nullopt, true, false);
682
}
683
684
void CPU::RISCV64Recompiler::EndAndLinkBlock(const std::optional<u32>& newpc, bool do_event_test, bool force_run_events)
685
{
686
// event test
687
// pc should've been flushed
688
DebugAssert(!m_dirty_pc && !m_block_ended);
689
m_block_ended = true;
690
691
// TODO: try extracting this to a function
692
// TODO: move the cycle flush in here..
693
694
// save cycles for event test
695
const TickCount cycles = std::exchange(m_cycles, 0);
696
697
// pending_ticks += cycles
698
// if (pending_ticks >= downcount) { dispatch_event(); }
699
if (do_event_test || m_gte_done_cycle > cycles || cycles > 0)
700
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
701
if (do_event_test)
702
rvAsm->LW(RARG2, PTR(&g_state.downcount));
703
if (cycles > 0)
704
{
705
SafeADDIW(RARG1, RARG1, cycles);
706
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
707
}
708
if (m_gte_done_cycle > cycles)
709
{
710
SafeADDIW(RARG2, RARG1, m_gte_done_cycle - cycles);
711
rvAsm->SW(RARG1, PTR(&g_state.gte_completion_tick));
712
}
713
714
if (do_event_test)
715
{
716
// TODO: see if we can do a far jump somehow with this..
717
Label cont;
718
rvAsm->BLT(RARG1, RARG2, &cont);
719
rvEmitJmp(rvAsm, CodeCache::g_run_events_and_dispatch);
720
rvAsm->Bind(&cont);
721
}
722
723
// jump to dispatcher or next block
724
if (force_run_events)
725
{
726
rvEmitJmp(rvAsm, CodeCache::g_run_events_and_dispatch);
727
}
728
else if (!newpc.has_value())
729
{
730
rvEmitJmp(rvAsm, CodeCache::g_dispatcher);
731
}
732
else
733
{
734
const void* target =
735
(newpc.value() == m_block->pc) ?
736
CodeCache::CreateSelfBlockLink(m_block, rvAsm->GetCursorPointer(), rvAsm->GetBufferPointer(0)) :
737
CodeCache::CreateBlockLink(m_block, rvAsm->GetCursorPointer(), newpc.value());
738
rvEmitJmp(rvAsm, target);
739
}
740
}
741
742
const void* CPU::RISCV64Recompiler::EndCompile(u32* code_size, u32* far_code_size)
743
{
744
u8* const code = m_emitter->GetBufferPointer(0);
745
*code_size = static_cast<u32>(m_emitter->GetCodeBuffer().GetSizeInBytes());
746
*far_code_size = static_cast<u32>(m_far_emitter->GetCodeBuffer().GetSizeInBytes());
747
rvAsm = nullptr;
748
m_far_emitter.reset();
749
m_emitter.reset();
750
return code;
751
}
752
753
const char* CPU::RISCV64Recompiler::GetHostRegName(u32 reg) const
754
{
755
static constexpr std::array<const char*, 32> reg64_names = {
756
{"zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5",
757
"a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"}};
758
return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN";
759
}
760
761
void CPU::RISCV64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val)
762
{
763
EmitMov(GPR(reg), val);
764
}
765
766
void CPU::RISCV64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr)
767
{
768
rvAsm->LW(GPR(reg), PTR(ptr));
769
}
770
771
void CPU::RISCV64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr)
772
{
773
rvAsm->SW(GPR(reg), PTR(ptr));
774
}
775
776
void CPU::RISCV64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr)
777
{
778
if (val == 0)
779
{
780
rvAsm->SW(zero, PTR(ptr));
781
return;
782
}
783
784
EmitMov(RSCRATCH, val);
785
rvAsm->SW(RSCRATCH, PTR(ptr));
786
}
787
788
void CPU::RISCV64Recompiler::CopyHostReg(u32 dst, u32 src)
789
{
790
if (src != dst)
791
rvAsm->MV(GPR(dst), GPR(src));
792
}
793
794
void CPU::RISCV64Recompiler::AssertRegOrConstS(CompileFlags cf) const
795
{
796
DebugAssert(cf.valid_host_s || cf.const_s);
797
}
798
799
void CPU::RISCV64Recompiler::AssertRegOrConstT(CompileFlags cf) const
800
{
801
DebugAssert(cf.valid_host_t || cf.const_t);
802
}
803
804
biscuit::GPR CPU::RISCV64Recompiler::CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg)
805
{
806
if (cf.valid_host_s)
807
{
808
return GPR(cf.host_s);
809
}
810
else if (cf.const_s)
811
{
812
if (HasConstantRegValue(cf.MipsS(), 0))
813
return zero;
814
815
EmitMov(temp_reg, GetConstantRegU32(cf.MipsS()));
816
return temp_reg;
817
}
818
else
819
{
820
WARNING_LOG("Hit memory path in CFGetSafeRegS() for {}", GetRegName(cf.MipsS()));
821
rvAsm->LW(temp_reg, PTR(&g_state.regs.r[cf.mips_s]));
822
return temp_reg;
823
}
824
}
825
826
biscuit::GPR CPU::RISCV64Recompiler::CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg)
827
{
828
if (cf.valid_host_t)
829
{
830
return GPR(cf.host_t);
831
}
832
else if (cf.const_t)
833
{
834
if (HasConstantRegValue(cf.MipsT(), 0))
835
return zero;
836
837
EmitMov(temp_reg, GetConstantRegU32(cf.MipsT()));
838
return temp_reg;
839
}
840
else
841
{
842
WARNING_LOG("Hit memory path in CFGetSafeRegT() for {}", GetRegName(cf.MipsT()));
843
rvAsm->LW(temp_reg, PTR(&g_state.regs.r[cf.mips_t]));
844
return temp_reg;
845
}
846
}
847
848
biscuit::GPR CPU::RISCV64Recompiler::CFGetRegD(CompileFlags cf) const
849
{
850
DebugAssert(cf.valid_host_d);
851
return GPR(cf.host_d);
852
}
853
854
biscuit::GPR CPU::RISCV64Recompiler::CFGetRegS(CompileFlags cf) const
855
{
856
DebugAssert(cf.valid_host_s);
857
return GPR(cf.host_s);
858
}
859
860
biscuit::GPR CPU::RISCV64Recompiler::CFGetRegT(CompileFlags cf) const
861
{
862
DebugAssert(cf.valid_host_t);
863
return GPR(cf.host_t);
864
}
865
866
biscuit::GPR CPU::RISCV64Recompiler::CFGetRegLO(CompileFlags cf) const
867
{
868
DebugAssert(cf.valid_host_lo);
869
return GPR(cf.host_lo);
870
}
871
872
biscuit::GPR CPU::RISCV64Recompiler::CFGetRegHI(CompileFlags cf) const
873
{
874
DebugAssert(cf.valid_host_hi);
875
return GPR(cf.host_hi);
876
}
877
878
void CPU::RISCV64Recompiler::MoveSToReg(const biscuit::GPR& dst, CompileFlags cf)
879
{
880
if (cf.valid_host_s)
881
{
882
if (cf.host_s != dst.Index())
883
rvAsm->MV(dst, GPR(cf.host_s));
884
}
885
else if (cf.const_s)
886
{
887
EmitMov(dst, GetConstantRegU32(cf.MipsS()));
888
}
889
else
890
{
891
WARNING_LOG("Hit memory path in MoveSToReg() for {}", GetRegName(cf.MipsS()));
892
rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s]));
893
}
894
}
895
896
void CPU::RISCV64Recompiler::MoveTToReg(const biscuit::GPR& dst, CompileFlags cf)
897
{
898
if (cf.valid_host_t)
899
{
900
if (cf.host_t != dst.Index())
901
rvAsm->MV(dst, GPR(cf.host_t));
902
}
903
else if (cf.const_t)
904
{
905
EmitMov(dst, GetConstantRegU32(cf.MipsT()));
906
}
907
else
908
{
909
WARNING_LOG("Hit memory path in MoveTToReg() for {}", GetRegName(cf.MipsT()));
910
rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_t]));
911
}
912
}
913
914
void CPU::RISCV64Recompiler::MoveMIPSRegToReg(const biscuit::GPR& dst, Reg reg)
915
{
916
DebugAssert(reg < Reg::count);
917
if (const std::optional<u32> hreg = CheckHostReg(0, Recompiler::HR_TYPE_CPU_REG, reg))
918
rvAsm->MV(dst, GPR(hreg.value()));
919
else if (HasConstantReg(reg))
920
EmitMov(dst, GetConstantRegU32(reg));
921
else
922
rvAsm->LW(dst, PTR(&g_state.regs.r[static_cast<u8>(reg)]));
923
}
924
925
void CPU::RISCV64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg /* = Reg::count */,
926
Reg arg3reg /* = Reg::count */)
927
{
928
DebugAssert(g_settings.gpu_pgxp_enable);
929
930
Flush(FLUSH_FOR_C_CALL);
931
932
if (arg2reg != Reg::count)
933
MoveMIPSRegToReg(RARG2, arg2reg);
934
if (arg3reg != Reg::count)
935
MoveMIPSRegToReg(RARG3, arg3reg);
936
937
EmitMov(RARG1, arg1val);
938
EmitCall(func);
939
}
940
941
void CPU::RISCV64Recompiler::Flush(u32 flags)
942
{
943
Recompiler::Flush(flags);
944
945
if (flags & FLUSH_PC && m_dirty_pc)
946
{
947
StoreConstantToCPUPointer(m_compiler_pc, &g_state.pc);
948
m_dirty_pc = false;
949
}
950
951
if (flags & FLUSH_INSTRUCTION_BITS)
952
{
953
// This sucks, but it's only used for fallbacks.
954
Panic("Not implemented");
955
}
956
957
if (flags & FLUSH_LOAD_DELAY_FROM_STATE && m_load_delay_dirty)
958
{
959
// This sucks :(
960
// TODO: make it a function?
961
rvAsm->LBU(RARG1, PTR(&g_state.load_delay_reg));
962
rvAsm->LW(RARG2, PTR(&g_state.load_delay_value));
963
rvAsm->SLLI(RARG1, RARG1, 2); // *4
964
rvAsm->ADD(RARG1, RARG1, RSTATE);
965
rvAsm->SW(RARG2, OFFSETOF(CPU::State, regs.r[0]), RARG1);
966
rvAsm->LI(RSCRATCH, static_cast<u8>(Reg::count));
967
rvAsm->SB(RSCRATCH, PTR(&g_state.load_delay_reg));
968
m_load_delay_dirty = false;
969
}
970
971
if (flags & FLUSH_LOAD_DELAY && m_load_delay_register != Reg::count)
972
{
973
if (m_load_delay_value_register != NUM_HOST_REGS)
974
FreeHostReg(m_load_delay_value_register);
975
976
EmitMov(RSCRATCH, static_cast<u8>(m_load_delay_register));
977
rvAsm->SB(RSCRATCH, PTR(&g_state.load_delay_reg));
978
m_load_delay_register = Reg::count;
979
m_load_delay_dirty = true;
980
}
981
982
if (flags & FLUSH_GTE_STALL_FROM_STATE && m_dirty_gte_done_cycle)
983
{
984
// May as well flush cycles while we're here.
985
// GTE spanning blocks is very rare, we _could_ disable this for speed.
986
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
987
rvAsm->LW(RARG2, PTR(&g_state.gte_completion_tick));
988
if (m_cycles > 0)
989
{
990
SafeADDIW(RARG1, RARG1, m_cycles);
991
m_cycles = 0;
992
}
993
Label no_stall;
994
rvAsm->BGE(RARG1, RARG2, &no_stall);
995
rvAsm->MV(RARG1, RARG2);
996
rvAsm->Bind(&no_stall);
997
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
998
m_dirty_gte_done_cycle = false;
999
}
1000
1001
if (flags & FLUSH_GTE_DONE_CYCLE && m_gte_done_cycle > m_cycles)
1002
{
1003
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
1004
1005
// update cycles at the same time
1006
if (flags & FLUSH_CYCLES && m_cycles > 0)
1007
{
1008
SafeADDIW(RARG1, RARG1, m_cycles);
1009
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
1010
m_gte_done_cycle -= m_cycles;
1011
m_cycles = 0;
1012
}
1013
1014
SafeADDIW(RARG1, RARG1, m_gte_done_cycle);
1015
rvAsm->SW(RARG1, PTR(&g_state.gte_completion_tick));
1016
m_gte_done_cycle = 0;
1017
m_dirty_gte_done_cycle = true;
1018
}
1019
1020
if (flags & FLUSH_CYCLES && m_cycles > 0)
1021
{
1022
rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));
1023
SafeADDIW(RARG1, RARG1, m_cycles);
1024
rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));
1025
m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_cycles, 0);
1026
m_cycles = 0;
1027
}
1028
}
1029
1030
void CPU::RISCV64Recompiler::Compile_Fallback()
1031
{
1032
WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc,
1033
inst->bits);
1034
1035
Flush(FLUSH_FOR_INTERPRETER);
1036
1037
#if 0
1038
cg->call(&CPU::RecompilerThunks::InterpretInstruction);
1039
1040
// TODO: make me less garbage
1041
// TODO: this is wrong, it flushes the load delay on the same cycle when we return.
1042
// but nothing should be going through here..
1043
Label no_load_delay;
1044
cg->movzx(RWARG1, cg->byte[PTR(&g_state.next_load_delay_reg)]);
1045
cg->cmp(RWARG1, static_cast<u8>(Reg::count));
1046
cg->je(no_load_delay, CodeGenerator::T_SHORT);
1047
cg->mov(RWARG2, cg->dword[PTR(&g_state.next_load_delay_value)]);
1048
cg->mov(cg->byte[PTR(&g_state.load_delay_reg)], RWARG1);
1049
cg->mov(cg->dword[PTR(&g_state.load_delay_value)], RWARG2);
1050
cg->mov(cg->byte[PTR(&g_state.next_load_delay_reg)], static_cast<u32>(Reg::count));
1051
cg->L(no_load_delay);
1052
1053
m_load_delay_dirty = EMULATE_LOAD_DELAYS;
1054
#else
1055
Panic("Fixme");
1056
#endif
1057
}
1058
1059
void CPU::RISCV64Recompiler::CheckBranchTarget(const biscuit::GPR& pcreg)
1060
{
1061
if (!g_settings.cpu_recompiler_memory_exceptions)
1062
return;
1063
1064
DebugAssert(pcreg != RSCRATCH);
1065
rvAsm->ANDI(RSCRATCH, pcreg, 0x3);
1066
SwitchToFarCode(true, &Assembler::BEQ, RSCRATCH, zero);
1067
1068
BackupHostState();
1069
EndBlockWithException(Exception::AdEL);
1070
1071
RestoreHostState();
1072
SwitchToNearCode(false);
1073
}
1074
1075
void CPU::RISCV64Recompiler::Compile_jr(CompileFlags cf)
1076
{
1077
const GPR pcreg = CFGetRegS(cf);
1078
CheckBranchTarget(pcreg);
1079
1080
rvAsm->SW(pcreg, PTR(&g_state.pc));
1081
1082
CompileBranchDelaySlot(false);
1083
EndBlock(std::nullopt, true);
1084
}
1085
1086
void CPU::RISCV64Recompiler::Compile_jalr(CompileFlags cf)
1087
{
1088
const GPR pcreg = CFGetRegS(cf);
1089
if (MipsD() != Reg::zero)
1090
SetConstantReg(MipsD(), GetBranchReturnAddress(cf));
1091
1092
CheckBranchTarget(pcreg);
1093
rvAsm->SW(pcreg, PTR(&g_state.pc));
1094
1095
CompileBranchDelaySlot(false);
1096
EndBlock(std::nullopt, true);
1097
}
1098
1099
void CPU::RISCV64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond)
1100
{
1101
AssertRegOrConstS(cf);
1102
1103
const u32 taken_pc = GetConditionalBranchTarget(cf);
1104
1105
Flush(FLUSH_FOR_BRANCH);
1106
1107
DebugAssert(cf.valid_host_s);
1108
1109
// MipsT() here should equal zero for zero branches.
1110
DebugAssert(cond == BranchCondition::Equal || cond == BranchCondition::NotEqual || cf.MipsT() == Reg::zero);
1111
1112
Label taken;
1113
const GPR rs = CFGetRegS(cf);
1114
switch (cond)
1115
{
1116
case BranchCondition::Equal:
1117
case BranchCondition::NotEqual:
1118
{
1119
AssertRegOrConstT(cf);
1120
if (cf.const_t && HasConstantRegValue(cf.MipsT(), 0))
1121
{
1122
(cond == BranchCondition::Equal) ? rvAsm->BEQZ(rs, &taken) : rvAsm->BNEZ(rs, &taken);
1123
}
1124
else
1125
{
1126
const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG1;
1127
if (!cf.valid_host_t)
1128
MoveTToReg(RARG1, cf);
1129
if (cond == Recompiler::BranchCondition::Equal)
1130
rvAsm->BEQ(rs, rt, &taken);
1131
else
1132
rvAsm->BNE(rs, rt, &taken);
1133
}
1134
}
1135
break;
1136
1137
case BranchCondition::GreaterThanZero:
1138
{
1139
rvAsm->BGTZ(rs, &taken);
1140
}
1141
break;
1142
1143
case BranchCondition::GreaterEqualZero:
1144
{
1145
rvAsm->BGEZ(rs, &taken);
1146
}
1147
break;
1148
1149
case BranchCondition::LessThanZero:
1150
{
1151
rvAsm->BLTZ(rs, &taken);
1152
}
1153
break;
1154
1155
case BranchCondition::LessEqualZero:
1156
{
1157
rvAsm->BLEZ(rs, &taken);
1158
}
1159
break;
1160
}
1161
1162
BackupHostState();
1163
if (!cf.delay_slot_swapped)
1164
CompileBranchDelaySlot();
1165
1166
EndBlock(m_compiler_pc, true);
1167
1168
rvAsm->Bind(&taken);
1169
1170
RestoreHostState();
1171
if (!cf.delay_slot_swapped)
1172
CompileBranchDelaySlot();
1173
1174
EndBlock(taken_pc, true);
1175
}
1176
1177
void CPU::RISCV64Recompiler::Compile_addi(CompileFlags cf, bool overflow)
1178
{
1179
const GPR rs = CFGetRegS(cf);
1180
const GPR rt = CFGetRegT(cf);
1181
if (const u32 imm = inst->i.imm_sext32(); imm != 0)
1182
{
1183
if (!overflow)
1184
{
1185
SafeADDIW(rt, rs, imm);
1186
}
1187
else
1188
{
1189
SafeADDI(RARG1, rs, imm);
1190
SafeADDIW(rt, rs, imm);
1191
TestOverflow(RARG1, rt, rt);
1192
}
1193
}
1194
else if (rt.Index() != rs.Index())
1195
{
1196
rvAsm->MV(rt, rs);
1197
}
1198
}
1199
1200
void CPU::RISCV64Recompiler::Compile_addi(CompileFlags cf)
1201
{
1202
Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions);
1203
}
1204
1205
void CPU::RISCV64Recompiler::Compile_addiu(CompileFlags cf)
1206
{
1207
Compile_addi(cf, false);
1208
}
1209
1210
void CPU::RISCV64Recompiler::Compile_slti(CompileFlags cf)
1211
{
1212
Compile_slti(cf, true);
1213
}
1214
1215
void CPU::RISCV64Recompiler::Compile_sltiu(CompileFlags cf)
1216
{
1217
Compile_slti(cf, false);
1218
}
1219
1220
void CPU::RISCV64Recompiler::Compile_slti(CompileFlags cf, bool sign)
1221
{
1222
if (sign)
1223
SafeSLTI(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32());
1224
else
1225
SafeSLTIU(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32());
1226
}
1227
1228
void CPU::RISCV64Recompiler::Compile_andi(CompileFlags cf)
1229
{
1230
const GPR rt = CFGetRegT(cf);
1231
if (const u32 imm = inst->i.imm_zext32(); imm != 0)
1232
SafeANDI(rt, CFGetRegS(cf), imm);
1233
else
1234
EmitMov(rt, 0);
1235
}
1236
1237
void CPU::RISCV64Recompiler::Compile_ori(CompileFlags cf)
1238
{
1239
const GPR rt = CFGetRegT(cf);
1240
const GPR rs = CFGetRegS(cf);
1241
if (const u32 imm = inst->i.imm_zext32(); imm != 0)
1242
SafeORI(rt, rs, imm);
1243
else if (rt.Index() != rs.Index())
1244
rvAsm->MV(rt, rs);
1245
}
1246
1247
void CPU::RISCV64Recompiler::Compile_xori(CompileFlags cf)
1248
{
1249
const GPR rt = CFGetRegT(cf);
1250
const GPR rs = CFGetRegS(cf);
1251
if (const u32 imm = inst->i.imm_zext32(); imm != 0)
1252
SafeXORI(rt, rs, imm);
1253
else if (rt.Index() != rs.Index())
1254
rvAsm->MV(rt, rs);
1255
}
1256
1257
void CPU::RISCV64Recompiler::Compile_shift(CompileFlags cf,
1258
void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
1259
void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned))
1260
{
1261
const GPR rd = CFGetRegD(cf);
1262
const GPR rt = CFGetRegT(cf);
1263
if (inst->r.shamt > 0)
1264
(rvAsm->*op_const)(rd, rt, inst->r.shamt);
1265
else if (rd.Index() != rt.Index())
1266
rvAsm->MV(rd, rt);
1267
}
1268
1269
void CPU::RISCV64Recompiler::Compile_sll(CompileFlags cf)
1270
{
1271
Compile_shift(cf, &Assembler::SLLW, &Assembler::SLLIW);
1272
}
1273
1274
void CPU::RISCV64Recompiler::Compile_srl(CompileFlags cf)
1275
{
1276
Compile_shift(cf, &Assembler::SRLW, &Assembler::SRLIW);
1277
}
1278
1279
void CPU::RISCV64Recompiler::Compile_sra(CompileFlags cf)
1280
{
1281
Compile_shift(cf, &Assembler::SRAW, &Assembler::SRAIW);
1282
}
1283
1284
void CPU::RISCV64Recompiler::Compile_variable_shift(
1285
CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
1286
void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned))
1287
{
1288
const GPR rd = CFGetRegD(cf);
1289
1290
AssertRegOrConstS(cf);
1291
AssertRegOrConstT(cf);
1292
1293
const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;
1294
if (!cf.valid_host_t)
1295
MoveTToReg(rt, cf);
1296
1297
if (cf.const_s)
1298
{
1299
if (const u32 shift = GetConstantRegU32(cf.MipsS()); shift != 0)
1300
(rvAsm->*op_const)(rd, rt, shift & 31u);
1301
else if (rd.Index() != rt.Index())
1302
rvAsm->MV(rd, rt);
1303
}
1304
else
1305
{
1306
(rvAsm->*op)(rd, rt, CFGetRegS(cf));
1307
}
1308
}
1309
1310
void CPU::RISCV64Recompiler::Compile_sllv(CompileFlags cf)
1311
{
1312
Compile_variable_shift(cf, &Assembler::SLLW, &Assembler::SLLIW);
1313
}
1314
1315
void CPU::RISCV64Recompiler::Compile_srlv(CompileFlags cf)
1316
{
1317
Compile_variable_shift(cf, &Assembler::SRLW, &Assembler::SRLIW);
1318
}
1319
1320
void CPU::RISCV64Recompiler::Compile_srav(CompileFlags cf)
1321
{
1322
Compile_variable_shift(cf, &Assembler::SRAW, &Assembler::SRAIW);
1323
}
1324
1325
void CPU::RISCV64Recompiler::Compile_mult(CompileFlags cf, bool sign)
1326
{
1327
const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;
1328
if (!cf.valid_host_s)
1329
MoveSToReg(rs, cf);
1330
1331
const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;
1332
if (!cf.valid_host_t)
1333
MoveTToReg(rt, cf);
1334
1335
// TODO: if lo/hi gets killed, we can use a 32-bit multiply
1336
const GPR lo = CFGetRegLO(cf);
1337
const GPR hi = CFGetRegHI(cf);
1338
1339
if (sign)
1340
{
1341
rvAsm->MUL(lo, rs, rt);
1342
rvAsm->SRAI64(hi, lo, 32);
1343
EmitDSExtW(lo, lo);
1344
}
1345
else
1346
{
1347
// Need to make it unsigned.
1348
EmitDUExtW(RARG1, rs);
1349
EmitDUExtW(RARG2, rt);
1350
rvAsm->MUL(lo, RARG1, RARG2);
1351
rvAsm->SRAI64(hi, lo, 32);
1352
EmitDSExtW(lo, lo);
1353
}
1354
}
1355
1356
void CPU::RISCV64Recompiler::Compile_mult(CompileFlags cf)
1357
{
1358
Compile_mult(cf, true);
1359
}
1360
1361
void CPU::RISCV64Recompiler::Compile_multu(CompileFlags cf)
1362
{
1363
Compile_mult(cf, false);
1364
}
1365
1366
void CPU::RISCV64Recompiler::Compile_div(CompileFlags cf)
1367
{
1368
// 36 Volume I: RISC-V User-Level ISA V2.2
1369
const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;
1370
if (!cf.valid_host_s)
1371
MoveSToReg(rs, cf);
1372
1373
const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;
1374
if (!cf.valid_host_t)
1375
MoveTToReg(rt, cf);
1376
1377
const GPR rlo = CFGetRegLO(cf);
1378
const GPR rhi = CFGetRegHI(cf);
1379
1380
Label done;
1381
Label not_divide_by_zero;
1382
rvAsm->BNEZ(rt, &not_divide_by_zero);
1383
rvAsm->MV(rhi, rs); // hi = num
1384
rvAsm->SRAI64(rlo, rs, 63);
1385
rvAsm->ANDI(rlo, rlo, 2);
1386
rvAsm->ADDI(rlo, rlo, -1); // lo = s >= 0 ? -1 : 1
1387
rvAsm->J(&done);
1388
1389
rvAsm->Bind(&not_divide_by_zero);
1390
Label not_unrepresentable;
1391
EmitMov(RSCRATCH, static_cast<u32>(-1));
1392
rvAsm->BNE(rt, RSCRATCH, &not_unrepresentable);
1393
EmitMov(rlo, 0x80000000u);
1394
rvAsm->BNE(rs, rlo, &not_unrepresentable);
1395
EmitMov(rhi, 0);
1396
rvAsm->J(&done);
1397
1398
rvAsm->Bind(&not_unrepresentable);
1399
1400
rvAsm->DIVW(rlo, rs, rt);
1401
rvAsm->REMW(rhi, rs, rt);
1402
1403
rvAsm->Bind(&done);
1404
}
1405
1406
void CPU::RISCV64Recompiler::Compile_divu(CompileFlags cf)
1407
{
1408
const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;
1409
if (!cf.valid_host_s)
1410
MoveSToReg(rs, cf);
1411
1412
const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;
1413
if (!cf.valid_host_t)
1414
MoveTToReg(rt, cf);
1415
1416
const GPR rlo = CFGetRegLO(cf);
1417
const GPR rhi = CFGetRegHI(cf);
1418
1419
// Semantics match? :-)
1420
rvAsm->DIVUW(rlo, rs, rt);
1421
rvAsm->REMUW(rhi, rs, rt);
1422
}
1423
1424
void CPU::RISCV64Recompiler::TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res,
1425
const biscuit::GPR& reg_to_discard)
1426
{
1427
SwitchToFarCode(true, &Assembler::BEQ, long_res, res);
1428
1429
BackupHostState();
1430
1431
// toss the result
1432
ClearHostReg(reg_to_discard.Index());
1433
1434
EndBlockWithException(Exception::Ov);
1435
1436
RestoreHostState();
1437
1438
SwitchToNearCode(false);
1439
}
1440
1441
void CPU::RISCV64Recompiler::Compile_dst_op(
1442
CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),
1443
void (RISCV64Recompiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm),
1444
void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative, bool overflow)
1445
{
1446
AssertRegOrConstS(cf);
1447
AssertRegOrConstT(cf);
1448
1449
const GPR rd = CFGetRegD(cf);
1450
1451
if (overflow)
1452
{
1453
const GPR rs = CFGetSafeRegS(cf, RARG1);
1454
const GPR rt = CFGetSafeRegT(cf, RARG2);
1455
(rvAsm->*op)(RARG3, rs, rt);
1456
(rvAsm->*op_long)(rd, rs, rt);
1457
TestOverflow(RARG3, rd, rd);
1458
return;
1459
}
1460
1461
if (cf.valid_host_s && cf.valid_host_t)
1462
{
1463
(rvAsm->*op)(rd, CFGetRegS(cf), CFGetRegT(cf));
1464
}
1465
else if (commutative && (cf.const_s || cf.const_t))
1466
{
1467
const GPR src = cf.const_s ? CFGetRegT(cf) : CFGetRegS(cf);
1468
if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0)
1469
{
1470
(this->*op_const)(rd, src, cv);
1471
}
1472
else
1473
{
1474
if (rd.Index() != src.Index())
1475
rvAsm->MV(rd, src);
1476
overflow = false;
1477
}
1478
}
1479
else if (cf.const_s)
1480
{
1481
if (HasConstantRegValue(cf.MipsS(), 0))
1482
{
1483
(rvAsm->*op)(rd, zero, CFGetRegT(cf));
1484
}
1485
else
1486
{
1487
EmitMov(RSCRATCH, GetConstantRegU32(cf.MipsS()));
1488
(rvAsm->*op)(rd, RSCRATCH, CFGetRegT(cf));
1489
}
1490
}
1491
else if (cf.const_t)
1492
{
1493
const GPR rs = CFGetRegS(cf);
1494
if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0)
1495
{
1496
(this->*op_const)(rd, rs, cv);
1497
}
1498
else
1499
{
1500
if (rd.Index() != rs.Index())
1501
rvAsm->MV(rd, rs);
1502
overflow = false;
1503
}
1504
}
1505
}
1506
1507
void CPU::RISCV64Recompiler::Compile_add(CompileFlags cf)
1508
{
1509
Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true,
1510
g_settings.cpu_recompiler_memory_exceptions);
1511
}
1512
1513
void CPU::RISCV64Recompiler::Compile_addu(CompileFlags cf)
1514
{
1515
Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true, false);
1516
}
1517
1518
void CPU::RISCV64Recompiler::Compile_sub(CompileFlags cf)
1519
{
1520
Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false,
1521
g_settings.cpu_recompiler_memory_exceptions);
1522
}
1523
1524
void CPU::RISCV64Recompiler::Compile_subu(CompileFlags cf)
1525
{
1526
Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false, false);
1527
}
1528
1529
void CPU::RISCV64Recompiler::Compile_and(CompileFlags cf)
1530
{
1531
AssertRegOrConstS(cf);
1532
AssertRegOrConstT(cf);
1533
1534
// special cases - and with self -> self, and with 0 -> 0
1535
const GPR regd = CFGetRegD(cf);
1536
if (cf.MipsS() == cf.MipsT())
1537
{
1538
rvAsm->MV(regd, CFGetRegS(cf));
1539
return;
1540
}
1541
else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0))
1542
{
1543
EmitMov(regd, 0);
1544
return;
1545
}
1546
1547
Compile_dst_op(cf, &Assembler::AND, &RISCV64Recompiler::SafeANDI, &Assembler::AND, true, false);
1548
}
1549
1550
void CPU::RISCV64Recompiler::Compile_or(CompileFlags cf)
1551
{
1552
AssertRegOrConstS(cf);
1553
AssertRegOrConstT(cf);
1554
1555
// or/nor with 0 -> no effect
1556
const GPR regd = CFGetRegD(cf);
1557
if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0) || cf.MipsS() == cf.MipsT())
1558
{
1559
cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf);
1560
return;
1561
}
1562
1563
Compile_dst_op(cf, &Assembler::OR, &RISCV64Recompiler::SafeORI, &Assembler::OR, true, false);
1564
}
1565
1566
void CPU::RISCV64Recompiler::Compile_xor(CompileFlags cf)
1567
{
1568
AssertRegOrConstS(cf);
1569
AssertRegOrConstT(cf);
1570
1571
const GPR regd = CFGetRegD(cf);
1572
if (cf.MipsS() == cf.MipsT())
1573
{
1574
// xor with self -> zero
1575
EmitMov(regd, 0);
1576
return;
1577
}
1578
else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0))
1579
{
1580
// xor with zero -> no effect
1581
cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf);
1582
return;
1583
}
1584
1585
Compile_dst_op(cf, &Assembler::XOR, &RISCV64Recompiler::SafeXORI, &Assembler::XOR, true, false);
1586
}
1587
1588
void CPU::RISCV64Recompiler::Compile_nor(CompileFlags cf)
1589
{
1590
Compile_or(cf);
1591
rvAsm->NOT(CFGetRegD(cf), CFGetRegD(cf));
1592
}
1593
1594
void CPU::RISCV64Recompiler::Compile_slt(CompileFlags cf)
1595
{
1596
Compile_slt(cf, true);
1597
}
1598
1599
void CPU::RISCV64Recompiler::Compile_sltu(CompileFlags cf)
1600
{
1601
Compile_slt(cf, false);
1602
}
1603
1604
void CPU::RISCV64Recompiler::Compile_slt(CompileFlags cf, bool sign)
1605
{
1606
AssertRegOrConstS(cf);
1607
AssertRegOrConstT(cf);
1608
1609
const GPR rd = CFGetRegD(cf);
1610
const GPR rs = CFGetSafeRegS(cf, RARG1);
1611
1612
if (cf.const_t && rvIsValidSExtITypeImm(GetConstantRegU32(cf.MipsT())))
1613
{
1614
if (sign)
1615
rvAsm->SLTI(rd, rs, GetConstantRegS32(cf.MipsT()));
1616
else
1617
rvAsm->SLTIU(rd, rs, GetConstantRegS32(cf.MipsT()));
1618
}
1619
else
1620
{
1621
const GPR rt = CFGetSafeRegT(cf, RARG2);
1622
if (sign)
1623
rvAsm->SLT(rd, rs, rt);
1624
else
1625
rvAsm->SLTU(rd, rs, rt);
1626
}
1627
}
1628
1629
biscuit::GPR CPU::RISCV64Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf,
1630
const std::optional<VirtualMemoryAddress>& address,
1631
const std::optional<const biscuit::GPR>& reg)
1632
{
1633
const u32 imm = inst->i.imm_sext32();
1634
if (cf.valid_host_s && imm == 0 && !reg.has_value())
1635
return CFGetRegS(cf);
1636
1637
const GPR dst = reg.has_value() ? reg.value() : RARG1;
1638
if (address.has_value())
1639
{
1640
EmitMov(dst, address.value());
1641
}
1642
else if (imm == 0)
1643
{
1644
if (cf.valid_host_s)
1645
{
1646
if (const GPR src = CFGetRegS(cf); src.Index() != dst.Index())
1647
rvAsm->MV(dst, CFGetRegS(cf));
1648
}
1649
else
1650
{
1651
rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s]));
1652
}
1653
}
1654
else
1655
{
1656
if (cf.valid_host_s)
1657
{
1658
SafeADDIW(dst, CFGetRegS(cf), inst->i.imm_sext32());
1659
}
1660
else
1661
{
1662
rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s]));
1663
SafeADDIW(dst, dst, inst->i.imm_sext32());
1664
}
1665
}
1666
1667
return dst;
1668
}
1669
1670
template<typename RegAllocFn>
1671
biscuit::GPR CPU::RISCV64Recompiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign,
1672
bool use_fastmem, const RegAllocFn& dst_reg_alloc)
1673
{
1674
if (use_fastmem)
1675
{
1676
m_cycles += Bus::RAM_READ_TICKS;
1677
1678
// TODO: Make this better. If we're loading the address from state, we can use LWU instead, and skip this.
1679
// TODO: LUT fastmem
1680
const GPR dst = dst_reg_alloc();
1681
rvAsm->SLLI64(RSCRATCH, addr_reg, 32);
1682
rvAsm->SRLI64(RSCRATCH, RSCRATCH, 32);
1683
1684
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
1685
{
1686
DebugAssert(addr_reg.Index() != RARG3.Index());
1687
rvAsm->SRLI64(RARG3, RSCRATCH, Bus::FASTMEM_LUT_PAGE_SHIFT);
1688
rvAsm->SLLI64(RARG3, RARG3, 8);
1689
rvAsm->ADD(RARG3, RARG3, RMEMBASE);
1690
rvAsm->LD(RARG3, 0, RARG3);
1691
rvAsm->ADD(RSCRATCH, RSCRATCH, RARG3);
1692
}
1693
else
1694
{
1695
rvAsm->ADD(RSCRATCH, RSCRATCH, RMEMBASE);
1696
}
1697
1698
u8* start = m_emitter->GetCursorPointer();
1699
switch (size)
1700
{
1701
case MemoryAccessSize::Byte:
1702
sign ? rvAsm->LB(dst, 0, RSCRATCH) : rvAsm->LBU(dst, 0, RSCRATCH);
1703
break;
1704
1705
case MemoryAccessSize::HalfWord:
1706
sign ? rvAsm->LH(dst, 0, RSCRATCH) : rvAsm->LHU(dst, 0, RSCRATCH);
1707
break;
1708
1709
case MemoryAccessSize::Word:
1710
rvAsm->LW(dst, 0, RSCRATCH);
1711
break;
1712
}
1713
1714
// We need a nop, because the slowmem jump might be more than 1MB away.
1715
rvAsm->NOP();
1716
1717
AddLoadStoreInfo(start, 8, addr_reg.Index(), dst.Index(), size, sign, true);
1718
return dst;
1719
}
1720
1721
if (addr_reg.Index() != RARG1.Index())
1722
rvAsm->MV(RARG1, addr_reg);
1723
1724
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
1725
switch (size)
1726
{
1727
case MemoryAccessSize::Byte:
1728
{
1729
EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::ReadMemoryByte) :
1730
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryByte));
1731
}
1732
break;
1733
case MemoryAccessSize::HalfWord:
1734
{
1735
EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::ReadMemoryHalfWord) :
1736
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryHalfWord));
1737
}
1738
break;
1739
case MemoryAccessSize::Word:
1740
{
1741
EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::ReadMemoryWord) :
1742
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryWord));
1743
}
1744
break;
1745
}
1746
1747
// TODO: turn this into an asm function instead
1748
if (checked)
1749
{
1750
rvAsm->SRLI64(RSCRATCH, RRET, 63);
1751
SwitchToFarCode(true, &Assembler::BEQ, RSCRATCH, zero);
1752
BackupHostState();
1753
1754
// Need to stash this in a temp because of the flush.
1755
const GPR temp = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));
1756
rvAsm->NEG(temp, RRET);
1757
rvAsm->SLLIW(temp, temp, 2);
1758
1759
Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION);
1760
1761
// cause_bits = (-result << 2) | BD | cop_n
1762
SafeORI(RARG1, temp,
1763
Cop0Registers::CAUSE::MakeValueForException(
1764
static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n));
1765
EmitMov(RARG2, m_current_instruction_pc);
1766
EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));
1767
FreeHostReg(temp.Index());
1768
EndBlock(std::nullopt, true);
1769
1770
RestoreHostState();
1771
SwitchToNearCode(false);
1772
}
1773
1774
const GPR dst_reg = dst_reg_alloc();
1775
switch (size)
1776
{
1777
case MemoryAccessSize::Byte:
1778
{
1779
sign ? EmitSExtB(dst_reg, RRET) : EmitUExtB(dst_reg, RRET);
1780
}
1781
break;
1782
case MemoryAccessSize::HalfWord:
1783
{
1784
sign ? EmitSExtH(dst_reg, RRET) : EmitUExtH(dst_reg, RRET);
1785
}
1786
break;
1787
case MemoryAccessSize::Word:
1788
{
1789
// Need to undo the zero-extend.
1790
if (checked)
1791
rvEmitDSExtW(rvAsm, dst_reg, RRET);
1792
else if (dst_reg.Index() != RRET.Index())
1793
rvAsm->MV(dst_reg, RRET);
1794
}
1795
break;
1796
}
1797
1798
return dst_reg;
1799
}
1800
1801
void CPU::RISCV64Recompiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg,
1802
MemoryAccessSize size, bool use_fastmem)
1803
{
1804
if (use_fastmem)
1805
{
1806
DebugAssert(value_reg != RSCRATCH);
1807
rvAsm->SLLI64(RSCRATCH, addr_reg, 32);
1808
rvAsm->SRLI64(RSCRATCH, RSCRATCH, 32);
1809
1810
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)
1811
{
1812
DebugAssert(addr_reg.Index() != RARG3.Index());
1813
rvAsm->SRLI64(RARG3, RSCRATCH, Bus::FASTMEM_LUT_PAGE_SHIFT);
1814
rvAsm->SLLI64(RARG3, RARG3, 8);
1815
rvAsm->ADD(RARG3, RARG3, RMEMBASE);
1816
rvAsm->LD(RARG3, 0, RARG3);
1817
rvAsm->ADD(RSCRATCH, RSCRATCH, RARG3);
1818
}
1819
else
1820
{
1821
rvAsm->ADD(RSCRATCH, RSCRATCH, RMEMBASE);
1822
}
1823
1824
u8* start = m_emitter->GetCursorPointer();
1825
switch (size)
1826
{
1827
case MemoryAccessSize::Byte:
1828
rvAsm->SB(value_reg, 0, RSCRATCH);
1829
break;
1830
1831
case MemoryAccessSize::HalfWord:
1832
rvAsm->SH(value_reg, 0, RSCRATCH);
1833
break;
1834
1835
case MemoryAccessSize::Word:
1836
rvAsm->SW(value_reg, 0, RSCRATCH);
1837
break;
1838
}
1839
1840
// We need a nop, because the slowmem jump might be more than 1MB away.
1841
rvAsm->NOP();
1842
1843
AddLoadStoreInfo(start, 8, addr_reg.Index(), value_reg.Index(), size, false, false);
1844
return;
1845
}
1846
1847
if (addr_reg.Index() != RARG1.Index())
1848
rvAsm->MV(RARG1, addr_reg);
1849
if (value_reg.Index() != RARG2.Index())
1850
rvAsm->MV(RARG2, value_reg);
1851
1852
const bool checked = g_settings.cpu_recompiler_memory_exceptions;
1853
switch (size)
1854
{
1855
case MemoryAccessSize::Byte:
1856
{
1857
EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::WriteMemoryByte) :
1858
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryByte));
1859
}
1860
break;
1861
case MemoryAccessSize::HalfWord:
1862
{
1863
EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::WriteMemoryHalfWord) :
1864
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryHalfWord));
1865
}
1866
break;
1867
case MemoryAccessSize::Word:
1868
{
1869
EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::WriteMemoryWord) :
1870
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryWord));
1871
}
1872
break;
1873
}
1874
1875
// TODO: turn this into an asm function instead
1876
if (checked)
1877
{
1878
SwitchToFarCode(true, &Assembler::BEQ, RRET, zero);
1879
BackupHostState();
1880
1881
// Need to stash this in a temp because of the flush.
1882
const GPR temp = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));
1883
rvAsm->SLLIW(temp, RRET, 2);
1884
1885
Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION);
1886
1887
// cause_bits = (result << 2) | BD | cop_n
1888
SafeORI(RARG1, temp,
1889
Cop0Registers::CAUSE::MakeValueForException(
1890
static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n));
1891
EmitMov(RARG2, m_current_instruction_pc);
1892
EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));
1893
FreeHostReg(temp.Index());
1894
EndBlock(std::nullopt, true);
1895
1896
RestoreHostState();
1897
SwitchToNearCode(false);
1898
}
1899
}
1900
1901
void CPU::RISCV64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
1902
const std::optional<VirtualMemoryAddress>& address)
1903
{
1904
const std::optional<GPR> addr_reg = (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero) ?
1905
std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) :
1906
std::optional<GPR>();
1907
FlushForLoadStore(address, false, use_fastmem);
1908
const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
1909
const GPR data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {
1910
if (cf.MipsT() == Reg::zero)
1911
return RRET;
1912
1913
return GPR(AllocateHostReg(GetFlagsForNewLoadDelayedReg(),
1914
EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, cf.MipsT()));
1915
});
1916
1917
if (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero)
1918
{
1919
Flush(FLUSH_FOR_C_CALL);
1920
1921
EmitMov(RARG1, inst->bits);
1922
rvAsm->MV(RARG2, addr);
1923
rvAsm->MV(RARG3, data);
1924
EmitCall(s_pgxp_mem_load_functions[static_cast<u32>(size)][static_cast<u32>(sign)]);
1925
FreeHostReg(addr_reg.value().Index());
1926
}
1927
}
1928
1929
void CPU::RISCV64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
1930
const std::optional<VirtualMemoryAddress>& address)
1931
{
1932
DebugAssert(size == MemoryAccessSize::Word && !sign);
1933
1934
const GPR addr = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));
1935
FlushForLoadStore(address, false, use_fastmem);
1936
1937
// TODO: if address is constant, this can be simplified..
1938
1939
// If we're coming from another block, just flush the load delay and hope for the best..
1940
if (m_load_delay_dirty)
1941
UpdateLoadDelay();
1942
1943
// We'd need to be careful here if we weren't overwriting it..
1944
ComputeLoadStoreAddressArg(cf, address, addr);
1945
rvAsm->ANDI(RARG1, addr, ~0x3u);
1946
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
1947
1948
if (inst->r.rt == Reg::zero)
1949
{
1950
FreeHostReg(addr.Index());
1951
return;
1952
}
1953
1954
// lwl/lwr from a load-delayed value takes the new value, but it itself, is load delayed, so the original value is
1955
// never written back. NOTE: can't trust T in cf because of the flush
1956
const Reg rt = inst->r.rt;
1957
GPR value;
1958
if (m_load_delay_register == rt)
1959
{
1960
const u32 existing_ld_rt = (m_load_delay_value_register == NUM_HOST_REGS) ?
1961
AllocateHostReg(HR_MODE_READ, HR_TYPE_LOAD_DELAY_VALUE, rt) :
1962
m_load_delay_value_register;
1963
RenameHostReg(existing_ld_rt, HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt);
1964
value = GPR(existing_ld_rt);
1965
}
1966
else
1967
{
1968
if constexpr (EMULATE_LOAD_DELAYS)
1969
{
1970
value = GPR(AllocateHostReg(HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt));
1971
if (const std::optional<u32> rtreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); rtreg.has_value())
1972
rvAsm->MV(value, GPR(rtreg.value()));
1973
else if (HasConstantReg(rt))
1974
EmitMov(value, GetConstantRegU32(rt));
1975
else
1976
rvAsm->LW(value, PTR(&g_state.regs.r[static_cast<u8>(rt)]));
1977
}
1978
else
1979
{
1980
value = GPR(AllocateHostReg(HR_MODE_READ | HR_MODE_WRITE, HR_TYPE_CPU_REG, rt));
1981
}
1982
}
1983
1984
DebugAssert(value.Index() != RARG2.Index() && value.Index() != RARG3.Index());
1985
rvAsm->ANDI(RARG2, addr, 3);
1986
rvAsm->SLLIW(RARG2, RARG2, 3); // *8
1987
EmitMov(RARG3, 24);
1988
rvAsm->SUBW(RARG3, RARG3, RARG2);
1989
1990
if (inst->op == InstructionOp::lwl)
1991
{
1992
// const u32 mask = UINT32_C(0x00FFFFFF) >> shift;
1993
// new_value = (value & mask) | (RWRET << (24 - shift));
1994
EmitMov(RSCRATCH, 0xFFFFFFu);
1995
rvAsm->SRLW(RSCRATCH, RSCRATCH, RARG2);
1996
rvAsm->AND(value, value, RSCRATCH);
1997
rvAsm->SLLW(RRET, RRET, RARG3);
1998
rvAsm->OR(value, value, RRET);
1999
}
2000
else
2001
{
2002
// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);
2003
// new_value = (value & mask) | (RWRET >> shift);
2004
rvAsm->SRLW(RRET, RRET, RARG2);
2005
EmitMov(RSCRATCH, 0xFFFFFF00u);
2006
rvAsm->SLLW(RSCRATCH, RSCRATCH, RARG3);
2007
rvAsm->AND(value, value, RSCRATCH);
2008
rvAsm->OR(value, value, RRET);
2009
}
2010
2011
FreeHostReg(addr.Index());
2012
2013
if (g_settings.gpu_pgxp_enable)
2014
{
2015
Flush(FLUSH_FOR_C_CALL);
2016
rvAsm->MV(RARG3, value);
2017
rvAsm->ANDI(RARG2, addr, ~0x3u);
2018
EmitMov(RARG1, inst->bits);
2019
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LW));
2020
}
2021
}
2022
2023
void CPU::RISCV64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
2024
const std::optional<VirtualMemoryAddress>& address)
2025
{
2026
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
2027
const auto [ptr, action] = GetGTERegisterPointer(index, true);
2028
const std::optional<GPR> addr_reg =
2029
g_settings.gpu_pgxp_enable ? std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional<GPR>();
2030
FlushForLoadStore(address, false, use_fastmem);
2031
const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
2032
const GPR value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action = action]() {
2033
return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ?
2034
GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) :
2035
RRET;
2036
});
2037
2038
switch (action)
2039
{
2040
case GTERegisterAccessAction::Ignore:
2041
{
2042
break;
2043
}
2044
2045
case GTERegisterAccessAction::Direct:
2046
{
2047
rvAsm->SW(value, PTR(ptr));
2048
break;
2049
}
2050
2051
case GTERegisterAccessAction::SignExtend16:
2052
{
2053
EmitSExtH(RARG3, value);
2054
rvAsm->SW(RARG3, PTR(ptr));
2055
break;
2056
}
2057
2058
case GTERegisterAccessAction::ZeroExtend16:
2059
{
2060
EmitUExtH(RARG3, value);
2061
rvAsm->SW(RARG3, PTR(ptr));
2062
break;
2063
}
2064
2065
case GTERegisterAccessAction::CallHandler:
2066
{
2067
Flush(FLUSH_FOR_C_CALL);
2068
rvAsm->MV(RARG2, value);
2069
EmitMov(RARG1, index);
2070
EmitCall(reinterpret_cast<const void*>(&GTE::WriteRegister));
2071
break;
2072
}
2073
2074
case GTERegisterAccessAction::PushFIFO:
2075
{
2076
// SXY0 <- SXY1
2077
// SXY1 <- SXY2
2078
// SXY2 <- SXYP
2079
DebugAssert(value.Index() != RARG2.Index() && value.Index() != RARG3.Index());
2080
rvAsm->LW(RARG2, PTR(&g_state.gte_regs.SXY1[0]));
2081
rvAsm->LW(RARG3, PTR(&g_state.gte_regs.SXY2[0]));
2082
rvAsm->SW(RARG2, PTR(&g_state.gte_regs.SXY0[0]));
2083
rvAsm->SW(RARG3, PTR(&g_state.gte_regs.SXY1[0]));
2084
rvAsm->SW(value, PTR(&g_state.gte_regs.SXY2[0]));
2085
break;
2086
}
2087
2088
default:
2089
{
2090
Panic("Unknown action");
2091
return;
2092
}
2093
}
2094
2095
if (g_settings.gpu_pgxp_enable)
2096
{
2097
Flush(FLUSH_FOR_C_CALL);
2098
rvAsm->MV(RARG3, value);
2099
if (value.Index() != RRET.Index())
2100
FreeHostReg(value.Index());
2101
rvAsm->MV(RARG2, addr);
2102
FreeHostReg(addr_reg.value().Index());
2103
EmitMov(RARG1, inst->bits);
2104
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2));
2105
}
2106
}
2107
2108
void CPU::RISCV64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
2109
const std::optional<VirtualMemoryAddress>& address)
2110
{
2111
AssertRegOrConstS(cf);
2112
AssertRegOrConstT(cf);
2113
2114
const std::optional<GPR> addr_reg =
2115
g_settings.gpu_pgxp_enable ? std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional<GPR>();
2116
FlushForLoadStore(address, true, use_fastmem);
2117
const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);
2118
const GPR data = cf.valid_host_t ? CFGetRegT(cf) : RARG2;
2119
if (!cf.valid_host_t)
2120
MoveTToReg(RARG2, cf);
2121
2122
GenerateStore(addr, data, size, use_fastmem);
2123
2124
if (g_settings.gpu_pgxp_enable)
2125
{
2126
Flush(FLUSH_FOR_C_CALL);
2127
MoveMIPSRegToReg(RARG3, cf.MipsT());
2128
rvAsm->MV(RARG2, addr);
2129
EmitMov(RARG1, inst->bits);
2130
EmitCall(s_pgxp_mem_store_functions[static_cast<u32>(size)]);
2131
FreeHostReg(addr_reg.value().Index());
2132
}
2133
}
2134
2135
void CPU::RISCV64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
2136
const std::optional<VirtualMemoryAddress>& address)
2137
{
2138
DebugAssert(size == MemoryAccessSize::Word && !sign);
2139
2140
// TODO: this can take over rt's value if it's no longer needed
2141
// NOTE: can't trust T in cf because of the alloc
2142
const GPR addr = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));
2143
const GPR value = g_settings.gpu_pgxp_enable ? GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2;
2144
if (g_settings.gpu_pgxp_enable)
2145
MoveMIPSRegToReg(value, inst->r.rt);
2146
2147
FlushForLoadStore(address, true, use_fastmem);
2148
2149
// TODO: if address is constant, this can be simplified..
2150
// We'd need to be careful here if we weren't overwriting it..
2151
ComputeLoadStoreAddressArg(cf, address, addr);
2152
rvAsm->ANDI(RARG1, addr, ~0x3u);
2153
GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });
2154
2155
rvAsm->ANDI(RSCRATCH, addr, 3);
2156
rvAsm->SLLIW(RSCRATCH, RSCRATCH, 3); // *8
2157
rvAsm->ANDI(addr, addr, ~0x3u);
2158
2159
// Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush.
2160
if (!g_settings.gpu_pgxp_enable)
2161
MoveMIPSRegToReg(value, inst->r.rt);
2162
2163
if (inst->op == InstructionOp::swl)
2164
{
2165
// const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift;
2166
// new_value = (RWRET & mem_mask) | (value >> (24 - shift));
2167
EmitMov(RARG3, 0xFFFFFF00u);
2168
rvAsm->SLLW(RARG3, RARG3, RSCRATCH);
2169
rvAsm->AND(RRET, RRET, RARG3);
2170
2171
EmitMov(RARG3, 24);
2172
rvAsm->SUBW(RARG3, RARG3, RSCRATCH);
2173
rvAsm->SRLW(value, value, RARG3);
2174
rvAsm->OR(value, value, RRET);
2175
}
2176
else
2177
{
2178
// const u32 mem_mask = UINT32_C(0x00FFFFFF) >> (24 - shift);
2179
// new_value = (RWRET & mem_mask) | (value << shift);
2180
rvAsm->SLLW(value, value, RSCRATCH);
2181
2182
EmitMov(RARG3, 24);
2183
rvAsm->SUBW(RARG3, RARG3, RSCRATCH);
2184
EmitMov(RSCRATCH, 0x00FFFFFFu);
2185
rvAsm->SRLW(RSCRATCH, RSCRATCH, RARG3);
2186
rvAsm->AND(RRET, RRET, RSCRATCH);
2187
rvAsm->OR(value, value, RRET);
2188
}
2189
2190
if (!g_settings.gpu_pgxp_enable)
2191
{
2192
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
2193
FreeHostReg(addr.Index());
2194
}
2195
else
2196
{
2197
GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);
2198
2199
Flush(FLUSH_FOR_C_CALL);
2200
rvAsm->MV(RARG3, value);
2201
FreeHostReg(value.Index());
2202
rvAsm->MV(RARG2, addr);
2203
FreeHostReg(addr.Index());
2204
EmitMov(RARG1, inst->bits);
2205
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SW));
2206
}
2207
}
2208
2209
void CPU::RISCV64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,
2210
const std::optional<VirtualMemoryAddress>& address)
2211
{
2212
const u32 index = static_cast<u32>(inst->r.rt.GetValue());
2213
const auto [ptr, action] = GetGTERegisterPointer(index, false);
2214
const GPR addr = (g_settings.gpu_pgxp_enable || action == GTERegisterAccessAction::CallHandler) ?
2215
GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) :
2216
RARG1;
2217
const GPR data = g_settings.gpu_pgxp_enable ? GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2;
2218
FlushForLoadStore(address, true, use_fastmem);
2219
ComputeLoadStoreAddressArg(cf, address, addr);
2220
2221
switch (action)
2222
{
2223
case GTERegisterAccessAction::Direct:
2224
{
2225
rvAsm->LW(data, PTR(ptr));
2226
}
2227
break;
2228
2229
case GTERegisterAccessAction::CallHandler:
2230
{
2231
// should already be flushed.. except in fastmem case
2232
Flush(FLUSH_FOR_C_CALL);
2233
EmitMov(RARG1, index);
2234
EmitCall(reinterpret_cast<const void*>(&GTE::ReadRegister));
2235
rvAsm->MV(data, RRET);
2236
}
2237
break;
2238
2239
default:
2240
{
2241
Panic("Unknown action");
2242
}
2243
break;
2244
}
2245
2246
GenerateStore(addr, data, size, use_fastmem);
2247
2248
if (!g_settings.gpu_pgxp_enable)
2249
{
2250
if (addr.Index() != RARG1.Index())
2251
FreeHostReg(addr.Index());
2252
}
2253
else
2254
{
2255
// TODO: This can be simplified because we don't need to validate in PGXP..
2256
Flush(FLUSH_FOR_C_CALL);
2257
rvAsm->MV(RARG3, data);
2258
FreeHostReg(data.Index());
2259
rvAsm->MV(RARG2, addr);
2260
FreeHostReg(addr.Index());
2261
EmitMov(RARG1, inst->bits);
2262
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SWC2));
2263
}
2264
}
2265
2266
void CPU::RISCV64Recompiler::Compile_mtc0(CompileFlags cf)
2267
{
2268
// TODO: we need better constant setting here.. which will need backprop
2269
AssertRegOrConstT(cf);
2270
2271
const Cop0Reg reg = static_cast<Cop0Reg>(MipsD());
2272
const u32* ptr = GetCop0RegPtr(reg);
2273
const u32 mask = GetCop0RegWriteMask(reg);
2274
if (!ptr)
2275
{
2276
Compile_Fallback();
2277
return;
2278
}
2279
2280
if (mask == 0)
2281
{
2282
// if it's a read-only register, ignore
2283
DEBUG_LOG("Ignoring write to read-only cop0 reg {}", static_cast<u32>(reg));
2284
return;
2285
}
2286
2287
// for some registers, we need to test certain bits
2288
const bool needs_bit_test = (reg == Cop0Reg::SR);
2289
const GPR new_value = RARG1;
2290
const GPR old_value = RARG2;
2291
const GPR changed_bits = RARG3;
2292
const GPR mask_reg = RSCRATCH;
2293
2294
// Load old value
2295
rvAsm->LW(old_value, PTR(ptr));
2296
2297
// No way we fit this in an immediate..
2298
EmitMov(mask_reg, mask);
2299
2300
// update value
2301
// TODO: This is creating pointless MV instructions.. why?
2302
if (cf.valid_host_t)
2303
rvAsm->AND(new_value, CFGetRegT(cf), mask_reg);
2304
else
2305
EmitMov(new_value, GetConstantRegU32(cf.MipsT()) & mask);
2306
2307
if (needs_bit_test)
2308
rvAsm->XOR(changed_bits, old_value, new_value);
2309
rvAsm->NOT(mask_reg, mask_reg);
2310
rvAsm->AND(old_value, old_value, mask_reg);
2311
rvAsm->OR(new_value, old_value, new_value);
2312
rvAsm->SW(new_value, PTR(ptr));
2313
2314
if (reg == Cop0Reg::SR)
2315
{
2316
// TODO: replace with register backup
2317
// We could just inline the whole thing..
2318
Flush(FLUSH_FOR_C_CALL);
2319
2320
Label caches_unchanged;
2321
rvAsm->SRLIW(RSCRATCH, changed_bits, 16);
2322
rvAsm->ANDI(RSCRATCH, RSCRATCH, 1);
2323
rvAsm->BEQ(RSCRATCH, zero, &caches_unchanged);
2324
EmitCall(reinterpret_cast<const void*>(&CPU::UpdateMemoryPointers));
2325
rvAsm->LW(new_value, PTR(ptr));
2326
if (CodeCache::IsUsingFastmem())
2327
rvAsm->LD(RMEMBASE, PTR(&g_state.fastmem_base));
2328
rvAsm->Bind(&caches_unchanged);
2329
2330
TestInterrupts(RARG1);
2331
}
2332
else if (reg == Cop0Reg::CAUSE)
2333
{
2334
rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits));
2335
TestInterrupts(RARG1);
2336
}
2337
else if (reg == Cop0Reg::DCIC || reg == Cop0Reg::BPCM)
2338
{
2339
// need to check whether we're switching to debug mode
2340
Flush(FLUSH_FOR_C_CALL);
2341
EmitCall(reinterpret_cast<const void*>(&CPU::UpdateDebugDispatcherFlag));
2342
SwitchToFarCode(true, &Assembler::BEQ, RRET, zero);
2343
BackupHostState();
2344
Flush(FLUSH_FOR_EARLY_BLOCK_EXIT);
2345
EmitCall(reinterpret_cast<const void*>(&CPU::ExitExecution)); // does not return
2346
RestoreHostState();
2347
SwitchToNearCode(false);
2348
}
2349
}
2350
2351
void CPU::RISCV64Recompiler::Compile_rfe(CompileFlags cf)
2352
{
2353
// shift mode bits right two, preserving upper bits
2354
rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits));
2355
rvAsm->SRLIW(RSCRATCH, RARG1, 2);
2356
rvAsm->ANDI(RSCRATCH, RSCRATCH, 0xf);
2357
rvAsm->ANDI(RARG1, RARG1, ~0xfu);
2358
rvAsm->OR(RARG1, RARG1, RSCRATCH);
2359
rvAsm->SW(RARG1, PTR(&g_state.cop0_regs.sr.bits));
2360
2361
TestInterrupts(RARG1);
2362
}
2363
2364
void CPU::RISCV64Recompiler::TestInterrupts(const biscuit::GPR& sr)
2365
{
2366
DebugAssert(sr != RSCRATCH);
2367
2368
// if Iec == 0 then goto no_interrupt
2369
Label no_interrupt;
2370
rvAsm->ANDI(RSCRATCH, sr, 1);
2371
rvAsm->BEQZ(RSCRATCH, &no_interrupt);
2372
2373
// sr & cause
2374
rvAsm->LW(RSCRATCH, PTR(&g_state.cop0_regs.cause.bits));
2375
rvAsm->AND(sr, sr, RSCRATCH);
2376
2377
// ((sr & cause) & 0xff00) == 0 goto no_interrupt
2378
rvAsm->SRLIW(sr, sr, 8);
2379
rvAsm->ANDI(sr, sr, 0xFF);
2380
SwitchToFarCode(true, &Assembler::BEQ, sr, zero);
2381
2382
BackupHostState();
2383
2384
// Update load delay, this normally happens at the end of an instruction, but we're finishing it early.
2385
UpdateLoadDelay();
2386
2387
Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL);
2388
2389
// Can't use EndBlockWithException() here, because it'll use the wrong PC.
2390
// Can't use RaiseException() on the fast path if we're the last instruction, because the next PC is unknown.
2391
if (!iinfo->is_last_instruction)
2392
{
2393
EmitMov(RARG1, Cop0Registers::CAUSE::MakeValueForException(Exception::INT, iinfo->is_branch_instruction, false,
2394
(inst + 1)->cop.cop_n));
2395
EmitMov(RARG2, m_compiler_pc);
2396
EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));
2397
m_dirty_pc = false;
2398
EndAndLinkBlock(std::nullopt, true, false);
2399
}
2400
else
2401
{
2402
if (m_dirty_pc)
2403
EmitMov(RARG1, m_compiler_pc);
2404
rvAsm->SW(biscuit::zero, PTR(&g_state.downcount));
2405
if (m_dirty_pc)
2406
rvAsm->SW(RARG1, PTR(&g_state.pc));
2407
m_dirty_pc = false;
2408
EndAndLinkBlock(std::nullopt, false, true);
2409
}
2410
2411
RestoreHostState();
2412
SwitchToNearCode(false);
2413
2414
rvAsm->Bind(&no_interrupt);
2415
}
2416
2417
void CPU::RISCV64Recompiler::Compile_mfc2(CompileFlags cf)
2418
{
2419
const u32 index = inst->cop.Cop2Index();
2420
const Reg rt = inst->r.rt;
2421
2422
const auto [ptr, action] = GetGTERegisterPointer(index, false);
2423
if (action == GTERegisterAccessAction::Ignore)
2424
return;
2425
2426
u32 hreg;
2427
if (action == GTERegisterAccessAction::Direct)
2428
{
2429
hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(),
2430
EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt);
2431
rvAsm->LW(GPR(hreg), PTR(ptr));
2432
}
2433
else if (action == GTERegisterAccessAction::CallHandler)
2434
{
2435
Flush(FLUSH_FOR_C_CALL);
2436
EmitMov(RARG1, index);
2437
EmitCall(reinterpret_cast<const void*>(&GTE::ReadRegister));
2438
2439
hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(),
2440
EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt);
2441
rvAsm->MV(GPR(hreg), RRET);
2442
}
2443
else
2444
{
2445
Panic("Unknown action");
2446
}
2447
2448
if (g_settings.gpu_pgxp_enable)
2449
{
2450
Flush(FLUSH_FOR_C_CALL);
2451
EmitMov(RARG1, inst->bits);
2452
rvAsm->MV(RARG2, GPR(hreg));
2453
EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_MFC2));
2454
}
2455
}
2456
2457
void CPU::RISCV64Recompiler::Compile_mtc2(CompileFlags cf)
2458
{
2459
const u32 index = inst->cop.Cop2Index();
2460
const auto [ptr, action] = GetGTERegisterPointer(index, true);
2461
if (action == GTERegisterAccessAction::Ignore)
2462
return;
2463
2464
if (action == GTERegisterAccessAction::Direct)
2465
{
2466
if (cf.const_t)
2467
StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), ptr);
2468
else
2469
rvAsm->SW(CFGetRegT(cf), PTR(ptr));
2470
}
2471
else if (action == GTERegisterAccessAction::SignExtend16 || action == GTERegisterAccessAction::ZeroExtend16)
2472
{
2473
const bool sign = (action == GTERegisterAccessAction::SignExtend16);
2474
if (cf.valid_host_t)
2475
{
2476
sign ? EmitSExtH(RARG1, CFGetRegT(cf)) : EmitUExtH(RARG1, CFGetRegT(cf));
2477
rvAsm->SW(RARG1, PTR(ptr));
2478
}
2479
else if (cf.const_t)
2480
{
2481
const u16 cv = Truncate16(GetConstantRegU32(cf.MipsT()));
2482
StoreConstantToCPUPointer(sign ? ::SignExtend32(cv) : ::ZeroExtend32(cv), ptr);
2483
}
2484
else
2485
{
2486
Panic("Unsupported setup");
2487
}
2488
}
2489
else if (action == GTERegisterAccessAction::CallHandler)
2490
{
2491
Flush(FLUSH_FOR_C_CALL);
2492
EmitMov(RARG1, index);
2493
MoveTToReg(RARG2, cf);
2494
EmitCall(reinterpret_cast<const void*>(&GTE::WriteRegister));
2495
}
2496
else if (action == GTERegisterAccessAction::PushFIFO)
2497
{
2498
// SXY0 <- SXY1
2499
// SXY1 <- SXY2
2500
// SXY2 <- SXYP
2501
DebugAssert(RRET.Index() != RARG2.Index() && RRET.Index() != RARG3.Index());
2502
rvAsm->LW(RARG2, PTR(&g_state.gte_regs.SXY1[0]));
2503
rvAsm->LW(RARG3, PTR(&g_state.gte_regs.SXY2[0]));
2504
rvAsm->SW(RARG2, PTR(&g_state.gte_regs.SXY0[0]));
2505
rvAsm->SW(RARG3, PTR(&g_state.gte_regs.SXY1[0]));
2506
if (cf.valid_host_t)
2507
rvAsm->SW(CFGetRegT(cf), PTR(&g_state.gte_regs.SXY2[0]));
2508
else if (cf.const_t)
2509
StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), &g_state.gte_regs.SXY2[0]);
2510
else
2511
Panic("Unsupported setup");
2512
}
2513
else
2514
{
2515
Panic("Unknown action");
2516
}
2517
}
2518
2519
void CPU::RISCV64Recompiler::Compile_cop2(CompileFlags cf)
2520
{
2521
TickCount func_ticks;
2522
GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks);
2523
2524
Flush(FLUSH_FOR_C_CALL);
2525
EmitMov(RARG1, inst->bits & GTE::Instruction::REQUIRED_BITS_MASK);
2526
EmitCall(reinterpret_cast<const void*>(func));
2527
2528
AddGTETicks(func_ticks);
2529
}
2530
2531
u32 CPU::Recompiler::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size,
2532
TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask,
2533
u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed,
2534
bool is_load)
2535
{
2536
Assembler arm_asm(static_cast<u8*>(thunk_code), thunk_space);
2537
Assembler* rvAsm = &arm_asm;
2538
2539
static constexpr u32 GPR_SIZE = 8;
2540
2541
// save regs
2542
u32 num_gprs = 0;
2543
2544
for (u32 i = 0; i < NUM_HOST_REGS; i++)
2545
{
2546
if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i))
2547
num_gprs++;
2548
}
2549
2550
const u32 stack_size = (((num_gprs + 1) & ~1u) * GPR_SIZE);
2551
2552
if (stack_size > 0)
2553
{
2554
rvAsm->ADDI(sp, sp, -static_cast<s32>(stack_size));
2555
2556
u32 stack_offset = 0;
2557
for (u32 i = 0; i < NUM_HOST_REGS; i++)
2558
{
2559
if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i))
2560
{
2561
rvAsm->SD(GPR(i), stack_offset, sp);
2562
stack_offset += GPR_SIZE;
2563
}
2564
}
2565
}
2566
2567
if (cycles_to_add != 0)
2568
{
2569
// NOTE: we have to reload here, because memory writes can run DMA, which can screw with cycles
2570
Assert(rvIsValidSExtITypeImm(cycles_to_add));
2571
rvAsm->LW(RSCRATCH, PTR(&g_state.pending_ticks));
2572
rvAsm->ADDIW(RSCRATCH, RSCRATCH, cycles_to_add);
2573
rvAsm->SW(RSCRATCH, PTR(&g_state.pending_ticks));
2574
}
2575
2576
if (address_register != RARG1.Index())
2577
rvAsm->MV(RARG1, GPR(address_register));
2578
2579
if (!is_load)
2580
{
2581
if (data_register != RARG2.Index())
2582
rvAsm->MV(RARG2, GPR(data_register));
2583
}
2584
2585
switch (size)
2586
{
2587
case MemoryAccessSize::Byte:
2588
{
2589
rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryByte) :
2590
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryByte));
2591
}
2592
break;
2593
case MemoryAccessSize::HalfWord:
2594
{
2595
rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryHalfWord) :
2596
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryHalfWord));
2597
}
2598
break;
2599
case MemoryAccessSize::Word:
2600
{
2601
rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryWord) :
2602
reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryWord));
2603
}
2604
break;
2605
}
2606
2607
if (is_load)
2608
{
2609
const GPR dst = GPR(data_register);
2610
switch (size)
2611
{
2612
case MemoryAccessSize::Byte:
2613
{
2614
is_signed ? rvEmitSExtB(rvAsm, dst, RRET) : rvEmitUExtB(rvAsm, dst, RRET);
2615
}
2616
break;
2617
case MemoryAccessSize::HalfWord:
2618
{
2619
is_signed ? rvEmitSExtH(rvAsm, dst, RRET) : rvEmitUExtH(rvAsm, dst, RRET);
2620
}
2621
break;
2622
case MemoryAccessSize::Word:
2623
{
2624
if (dst.Index() != RRET.Index())
2625
rvAsm->MV(dst, RRET);
2626
}
2627
break;
2628
}
2629
}
2630
2631
if (cycles_to_remove != 0)
2632
{
2633
Assert(rvIsValidSExtITypeImm(-cycles_to_remove));
2634
rvAsm->LW(RSCRATCH, PTR(&g_state.pending_ticks));
2635
rvAsm->ADDIW(RSCRATCH, RSCRATCH, -cycles_to_remove);
2636
rvAsm->SW(RSCRATCH, PTR(&g_state.pending_ticks));
2637
}
2638
2639
// restore regs
2640
if (stack_size > 0)
2641
{
2642
u32 stack_offset = 0;
2643
for (u32 i = 0; i < NUM_HOST_REGS; i++)
2644
{
2645
if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i))
2646
{
2647
rvAsm->LD(GPR(i), stack_offset, sp);
2648
stack_offset += GPR_SIZE;
2649
}
2650
}
2651
2652
rvAsm->ADDI(sp, sp, stack_size);
2653
}
2654
2655
rvEmitJmp(rvAsm, static_cast<const u8*>(code_address) + code_size);
2656
2657
return static_cast<u32>(rvAsm->GetCodeBuffer().GetSizeInBytes());
2658
}
2659
2660
#endif // CPU_ARCH_RISCV64
2661
2662