Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/core/cpu_recompiler.cpp
4214 views
1
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
4
#include "cpu_recompiler.h"
5
#include "cpu_code_cache.h"
6
#include "cpu_core_private.h"
7
#include "cpu_disasm.h"
8
#include "cpu_pgxp.h"
9
#include "settings.h"
10
11
#include "common/assert.h"
12
#include "common/log.h"
13
#include "common/small_string.h"
14
15
#include <cstdint>
16
#include <limits>
17
18
LOG_CHANNEL(Recompiler);
19
20
// TODO: direct link skip delay slot check
21
// TODO: speculative constants
22
// TODO: std::bitset in msvc has bounds checks even in release...
23
24
const std::array<std::array<const void*, 2>, 3> CPU::Recompiler::Recompiler::s_pgxp_mem_load_functions = {
25
{{{reinterpret_cast<const void*>(&PGXP::CPU_LBx), reinterpret_cast<const void*>(&PGXP::CPU_LBx)}},
26
{{reinterpret_cast<const void*>(&PGXP::CPU_LHU), reinterpret_cast<const void*>(&PGXP::CPU_LH)}},
27
{{reinterpret_cast<const void*>(&PGXP::CPU_LW)}}}};
28
const std::array<const void*, 3> CPU::Recompiler::Recompiler::s_pgxp_mem_store_functions = {
29
{reinterpret_cast<const void*>(&PGXP::CPU_SB), reinterpret_cast<const void*>(&PGXP::CPU_SH),
30
reinterpret_cast<const void*>(&PGXP::CPU_SW)}};
31
32
CPU::Recompiler::Recompiler::Recompiler() = default;
33
34
CPU::Recompiler::Recompiler::~Recompiler() = default;
35
36
void CPU::Recompiler::Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space,
37
u8* far_code_buffer, u32 far_code_space)
38
{
39
m_block = block;
40
m_compiler_pc = block->pc;
41
m_cycles = 0;
42
m_gte_done_cycle = 0;
43
inst = nullptr;
44
iinfo = nullptr;
45
m_current_instruction_pc = 0;
46
m_current_instruction_branch_delay_slot = false;
47
m_dirty_pc = false;
48
m_dirty_instruction_bits = false;
49
m_dirty_gte_done_cycle = true;
50
m_block_ended = false;
51
m_constant_reg_values.fill(0);
52
m_constant_regs_valid.reset();
53
m_constant_regs_dirty.reset();
54
55
for (u32 i = 0; i < NUM_HOST_REGS; i++)
56
ClearHostReg(i);
57
m_register_alloc_counter = 0;
58
59
m_constant_reg_values[static_cast<u32>(Reg::zero)] = 0;
60
m_constant_regs_valid.set(static_cast<u32>(Reg::zero));
61
62
m_load_delay_dirty = EMULATE_LOAD_DELAYS;
63
m_load_delay_register = Reg::count;
64
m_load_delay_value_register = NUM_HOST_REGS;
65
66
InitSpeculativeRegs();
67
}
68
69
void CPU::Recompiler::Recompiler::BeginBlock()
70
{
71
#if 0
72
GenerateCall(reinterpret_cast<const void*>(&CPU::CodeCache::LogCurrentState));
73
#endif
74
75
if (m_block->protection == CodeCache::PageProtectionMode::ManualCheck)
76
{
77
DEBUG_LOG("Generate manual protection for PC {:08X}", m_block->pc);
78
const u8* ram_ptr = Bus::g_ram + VirtualAddressToPhysical(m_block->pc);
79
const u8* shadow_ptr = reinterpret_cast<const u8*>(m_block->Instructions());
80
GenerateBlockProtectCheck(ram_ptr, shadow_ptr, m_block->size * sizeof(Instruction));
81
}
82
83
GenerateICacheCheckAndUpdate();
84
85
if (g_settings.bios_tty_logging)
86
{
87
const u32 masked_pc = VirtualAddressToPhysical(m_block->pc);
88
if (masked_pc == 0xa0)
89
GenerateCall(reinterpret_cast<const void*>(&CPU::HandleA0Syscall));
90
else if (masked_pc == 0xb0)
91
GenerateCall(reinterpret_cast<const void*>(&CPU::HandleB0Syscall));
92
}
93
94
inst = m_block->Instructions();
95
iinfo = m_block->InstructionsInfo();
96
m_current_instruction_pc = m_block->pc;
97
m_current_instruction_branch_delay_slot = false;
98
m_compiler_pc += sizeof(Instruction);
99
m_dirty_pc = true;
100
m_dirty_instruction_bits = true;
101
}
102
103
const void* CPU::Recompiler::Recompiler::CompileBlock(CodeCache::Block* block, u32* host_code_size,
104
u32* host_far_code_size)
105
{
106
CodeCache::AlignCode(FUNCTION_ALIGNMENT);
107
108
Reset(block, CodeCache::GetFreeCodePointer(), CodeCache::GetFreeCodeSpace(), CodeCache::GetFreeFarCodePointer(),
109
CodeCache::GetFreeFarCodeSpace());
110
111
DEBUG_LOG("Block range: {:08X} -> {:08X}", block->pc, block->pc + block->size * 4);
112
113
BeginBlock();
114
115
for (;;)
116
{
117
CompileInstruction();
118
119
if (m_block_ended || iinfo->is_last_instruction)
120
{
121
if (!m_block_ended)
122
{
123
// Block was truncated. Link it.
124
EndBlock(m_compiler_pc, false);
125
}
126
127
break;
128
}
129
130
inst++;
131
iinfo++;
132
m_current_instruction_pc += sizeof(Instruction);
133
m_compiler_pc += sizeof(Instruction);
134
m_dirty_pc = true;
135
m_dirty_instruction_bits = true;
136
}
137
138
// Nothing should be valid anymore
139
for (u32 i = 0; i < NUM_HOST_REGS; i++)
140
DebugAssert(!IsHostRegAllocated(i));
141
for (u32 i = 1; i < static_cast<u32>(Reg::count); i++)
142
DebugAssert(!m_constant_regs_dirty.test(i) && !m_constant_regs_valid.test(i));
143
m_speculative_constants.memory.clear();
144
145
u32 code_size, far_code_size;
146
const void* code = EndCompile(&code_size, &far_code_size);
147
*host_code_size = code_size;
148
*host_far_code_size = far_code_size;
149
CodeCache::CommitCode(code_size);
150
CodeCache::CommitFarCode(far_code_size);
151
152
return code;
153
}
154
155
void CPU::Recompiler::Recompiler::SetConstantReg(Reg r, u32 v)
156
{
157
DebugAssert(r < Reg::count && r != Reg::zero);
158
159
// There might still be an incoming load delay which we need to cancel.
160
CancelLoadDelaysToReg(r);
161
162
if (m_constant_regs_valid.test(static_cast<u32>(r)) && m_constant_reg_values[static_cast<u8>(r)] == v)
163
{
164
// Shouldn't be any host regs though.
165
DebugAssert(!CheckHostReg(0, HR_TYPE_CPU_REG, r).has_value());
166
return;
167
}
168
169
m_constant_reg_values[static_cast<u32>(r)] = v;
170
m_constant_regs_valid.set(static_cast<u32>(r));
171
m_constant_regs_dirty.set(static_cast<u32>(r));
172
173
if (const std::optional<u32> hostreg = CheckHostReg(0, HR_TYPE_CPU_REG, r); hostreg.has_value())
174
{
175
DEBUG_LOG("Discarding guest register {} in host register {} due to constant set", GetRegName(r),
176
GetHostRegName(hostreg.value()));
177
FreeHostReg(hostreg.value());
178
}
179
}
180
181
void CPU::Recompiler::Recompiler::CancelLoadDelaysToReg(Reg reg)
182
{
183
if (m_load_delay_register != reg)
184
return;
185
186
DEBUG_LOG("Cancelling load delay to {}", GetRegName(reg));
187
m_load_delay_register = Reg::count;
188
if (m_load_delay_value_register != NUM_HOST_REGS)
189
ClearHostReg(m_load_delay_value_register);
190
}
191
192
void CPU::Recompiler::Recompiler::UpdateLoadDelay()
193
{
194
if (m_load_delay_dirty)
195
{
196
// we shouldn't have a static load delay.
197
DebugAssert(!HasLoadDelay());
198
199
// have to invalidate registers, we might have one of them cached
200
// TODO: double check the order here, will we trash a new value? we shouldn't...
201
// thankfully since this only happens on the first instruction, we can get away with just killing anything which
202
// isn't in write mode, because nothing could've been written before it, and the new value overwrites any
203
// load-delayed value
204
DEBUG_LOG("Invalidating non-dirty registers, and flushing load delay from state");
205
206
constexpr u32 req_flags = (HR_ALLOCATED | HR_MODE_WRITE);
207
208
for (u32 i = 0; i < NUM_HOST_REGS; i++)
209
{
210
HostRegAlloc& ra = m_host_regs[i];
211
if (ra.type != HR_TYPE_CPU_REG || !IsHostRegAllocated(i) || ((ra.flags & req_flags) == req_flags))
212
continue;
213
214
DEBUG_LOG("Freeing non-dirty cached register {} in {}", GetRegName(ra.reg), GetHostRegName(i));
215
DebugAssert(!(ra.flags & HR_MODE_WRITE));
216
ClearHostReg(i);
217
}
218
219
// remove any non-dirty constants too
220
for (u32 i = 1; i < static_cast<u32>(Reg::count); i++)
221
{
222
if (!HasConstantReg(static_cast<Reg>(i)) || HasDirtyConstantReg(static_cast<Reg>(i)))
223
continue;
224
225
DEBUG_LOG("Clearing non-dirty constant {}", GetRegName(static_cast<Reg>(i)));
226
ClearConstantReg(static_cast<Reg>(i));
227
}
228
229
Flush(FLUSH_LOAD_DELAY_FROM_STATE);
230
}
231
232
// commit the delayed register load
233
FinishLoadDelay();
234
235
// move next load delay forward
236
if (m_next_load_delay_register != Reg::count)
237
{
238
// if it somehow got flushed, read it back in.
239
if (m_next_load_delay_value_register == NUM_HOST_REGS)
240
{
241
AllocateHostReg(HR_MODE_READ, HR_TYPE_NEXT_LOAD_DELAY_VALUE, m_next_load_delay_register);
242
DebugAssert(m_next_load_delay_value_register != NUM_HOST_REGS);
243
}
244
245
HostRegAlloc& ra = m_host_regs[m_next_load_delay_value_register];
246
ra.flags |= HR_MODE_WRITE;
247
ra.type = HR_TYPE_LOAD_DELAY_VALUE;
248
249
m_load_delay_register = m_next_load_delay_register;
250
m_load_delay_value_register = m_next_load_delay_value_register;
251
m_next_load_delay_register = Reg::count;
252
m_next_load_delay_value_register = NUM_HOST_REGS;
253
}
254
}
255
256
void CPU::Recompiler::Recompiler::FinishLoadDelay()
257
{
258
DebugAssert(!m_load_delay_dirty);
259
if (!HasLoadDelay())
260
return;
261
262
// we may need to reload the value..
263
if (m_load_delay_value_register == NUM_HOST_REGS)
264
{
265
AllocateHostReg(HR_MODE_READ, HR_TYPE_LOAD_DELAY_VALUE, m_load_delay_register);
266
DebugAssert(m_load_delay_value_register != NUM_HOST_REGS);
267
}
268
269
// kill any (old) cached value for this register
270
DeleteMIPSReg(m_load_delay_register, false);
271
272
DEBUG_LOG("Finished delayed load to {} in host register {}", GetRegName(m_load_delay_register),
273
GetHostRegName(m_load_delay_value_register));
274
275
// and swap the mode over so it gets written back later
276
HostRegAlloc& ra = m_host_regs[m_load_delay_value_register];
277
DebugAssert(ra.reg == m_load_delay_register);
278
ra.flags = (ra.flags & IMMUTABLE_HR_FLAGS) | HR_ALLOCATED | HR_MODE_READ | HR_MODE_WRITE;
279
ra.counter = m_register_alloc_counter++;
280
ra.type = HR_TYPE_CPU_REG;
281
282
// constants are gone
283
DEBUG_LOG("Clearing constant in {} due to load delay", GetRegName(m_load_delay_register));
284
ClearConstantReg(m_load_delay_register);
285
286
m_load_delay_register = Reg::count;
287
m_load_delay_value_register = NUM_HOST_REGS;
288
}
289
290
void CPU::Recompiler::Recompiler::FinishLoadDelayToReg(Reg reg)
291
{
292
if (m_load_delay_dirty)
293
{
294
// inter-block :(
295
UpdateLoadDelay();
296
return;
297
}
298
299
if (m_load_delay_register != reg)
300
return;
301
302
FinishLoadDelay();
303
}
304
305
u32 CPU::Recompiler::Recompiler::GetFlagsForNewLoadDelayedReg() const
306
{
307
return g_settings.gpu_pgxp_enable ? (HR_MODE_WRITE | HR_CALLEE_SAVED) : (HR_MODE_WRITE);
308
}
309
310
void CPU::Recompiler::Recompiler::ClearConstantReg(Reg r)
311
{
312
DebugAssert(r < Reg::count && r != Reg::zero);
313
m_constant_reg_values[static_cast<u32>(r)] = 0;
314
m_constant_regs_valid.reset(static_cast<u32>(r));
315
m_constant_regs_dirty.reset(static_cast<u32>(r));
316
}
317
318
void CPU::Recompiler::Recompiler::FlushConstantRegs(bool invalidate)
319
{
320
for (u32 i = 1; i < static_cast<u32>(Reg::count); i++)
321
{
322
if (m_constant_regs_dirty.test(static_cast<u32>(i)))
323
FlushConstantReg(static_cast<Reg>(i));
324
if (invalidate)
325
ClearConstantReg(static_cast<Reg>(i));
326
}
327
}
328
329
CPU::Reg CPU::Recompiler::Recompiler::MipsD() const
330
{
331
return inst->r.rd;
332
}
333
334
u32 CPU::Recompiler::Recompiler::GetConditionalBranchTarget(CompileFlags cf) const
335
{
336
// compiler pc has already been advanced when swapping branch delay slots
337
const u32 current_pc = m_compiler_pc - (cf.delay_slot_swapped ? sizeof(Instruction) : 0);
338
return current_pc + (inst->i.imm_sext32() << 2);
339
}
340
341
u32 CPU::Recompiler::Recompiler::GetBranchReturnAddress(CompileFlags cf) const
342
{
343
// compiler pc has already been advanced when swapping branch delay slots
344
return m_compiler_pc + (cf.delay_slot_swapped ? 0 : sizeof(Instruction));
345
}
346
347
bool CPU::Recompiler::Recompiler::TrySwapDelaySlot(Reg rs, Reg rt, Reg rd)
348
{
349
if constexpr (!SWAP_BRANCH_DELAY_SLOTS)
350
return false;
351
352
const Instruction* next_instruction = inst + 1;
353
DebugAssert(next_instruction < (m_block->Instructions() + m_block->size));
354
355
const Reg opcode_rs = next_instruction->r.rs;
356
const Reg opcode_rt = next_instruction->r.rt;
357
const Reg opcode_rd = next_instruction->r.rd;
358
359
#if defined(_DEBUG) || defined(_DEVEL)
360
TinyString disasm;
361
DisassembleInstruction(&disasm, m_current_instruction_pc + 4, next_instruction->bits);
362
#endif
363
364
// Just in case we read it in the instruction.. but the block should end after this.
365
const Instruction* const backup_instruction = inst;
366
const u32 backup_instruction_pc = m_current_instruction_pc;
367
const bool backup_instruction_delay_slot = m_current_instruction_branch_delay_slot;
368
369
if (next_instruction->bits == 0)
370
{
371
// nop
372
goto is_safe;
373
}
374
375
// can't swap when the branch is the first instruction because of bloody load delays
376
if ((EMULATE_LOAD_DELAYS && m_block->pc == m_current_instruction_pc) || m_load_delay_dirty ||
377
(HasLoadDelay() && (m_load_delay_register == rs || m_load_delay_register == rt || m_load_delay_register == rd)))
378
{
379
goto is_unsafe;
380
}
381
382
switch (next_instruction->op)
383
{
384
case InstructionOp::addi:
385
case InstructionOp::addiu:
386
case InstructionOp::slti:
387
case InstructionOp::sltiu:
388
case InstructionOp::andi:
389
case InstructionOp::ori:
390
case InstructionOp::xori:
391
case InstructionOp::lui:
392
case InstructionOp::lb:
393
case InstructionOp::lh:
394
case InstructionOp::lwl:
395
case InstructionOp::lw:
396
case InstructionOp::lbu:
397
case InstructionOp::lhu:
398
case InstructionOp::lwr:
399
{
400
if ((rs != Reg::zero && rs == opcode_rt) || (rt != Reg::zero && rt == opcode_rt) ||
401
(rd != Reg::zero && (rd == opcode_rs || rd == opcode_rt)))
402
{
403
goto is_unsafe;
404
}
405
}
406
break;
407
408
case InstructionOp::sb:
409
case InstructionOp::sh:
410
case InstructionOp::swl:
411
case InstructionOp::sw:
412
case InstructionOp::swr:
413
case InstructionOp::lwc2:
414
case InstructionOp::swc2:
415
break;
416
417
case InstructionOp::funct: // SPECIAL
418
{
419
switch (next_instruction->r.funct)
420
{
421
case InstructionFunct::sll:
422
case InstructionFunct::srl:
423
case InstructionFunct::sra:
424
case InstructionFunct::sllv:
425
case InstructionFunct::srlv:
426
case InstructionFunct::srav:
427
case InstructionFunct::add:
428
case InstructionFunct::addu:
429
case InstructionFunct::sub:
430
case InstructionFunct::subu:
431
case InstructionFunct::and_:
432
case InstructionFunct::or_:
433
case InstructionFunct::xor_:
434
case InstructionFunct::nor:
435
case InstructionFunct::slt:
436
case InstructionFunct::sltu:
437
{
438
if ((rs != Reg::zero && rs == opcode_rd) || (rt != Reg::zero && rt == opcode_rd) ||
439
(rd != Reg::zero && (rd == opcode_rs || rd == opcode_rt)))
440
{
441
goto is_unsafe;
442
}
443
}
444
break;
445
446
case InstructionFunct::mult:
447
case InstructionFunct::multu:
448
case InstructionFunct::div:
449
case InstructionFunct::divu:
450
break;
451
452
default:
453
goto is_unsafe;
454
}
455
}
456
break;
457
458
case InstructionOp::cop0: // COP0
459
case InstructionOp::cop1: // COP1
460
case InstructionOp::cop2: // COP2
461
case InstructionOp::cop3: // COP3
462
{
463
if (next_instruction->cop.IsCommonInstruction())
464
{
465
switch (next_instruction->cop.CommonOp())
466
{
467
case CopCommonInstruction::mfcn: // MFC0
468
case CopCommonInstruction::cfcn: // CFC0
469
{
470
if ((rs != Reg::zero && rs == opcode_rt) || (rt != Reg::zero && rt == opcode_rt) ||
471
(rd != Reg::zero && rd == opcode_rt))
472
{
473
goto is_unsafe;
474
}
475
}
476
break;
477
478
case CopCommonInstruction::mtcn: // MTC0
479
case CopCommonInstruction::ctcn: // CTC0
480
break;
481
}
482
}
483
else
484
{
485
// swap when it's GTE
486
if (next_instruction->op != InstructionOp::cop2)
487
goto is_unsafe;
488
}
489
}
490
break;
491
492
default:
493
goto is_unsafe;
494
}
495
496
is_safe:
497
#if defined(_DEBUG) || defined(_DEVEL)
498
DEBUG_LOG("Swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm);
499
#endif
500
501
CompileBranchDelaySlot();
502
503
inst = backup_instruction;
504
m_current_instruction_pc = backup_instruction_pc;
505
m_current_instruction_branch_delay_slot = backup_instruction_delay_slot;
506
return true;
507
508
is_unsafe:
509
#if defined(_DEBUG) || defined(_DEVEL)
510
DEBUG_LOG("NOT swapping delay slot {:08X} {}", m_current_instruction_pc + 4, disasm);
511
#endif
512
513
return false;
514
}
515
516
void CPU::Recompiler::Recompiler::SetCompilerPC(u32 newpc)
517
{
518
m_compiler_pc = newpc;
519
m_dirty_pc = true;
520
}
521
522
u32 CPU::Recompiler::Recompiler::GetFreeHostReg(u32 flags)
523
{
524
const u32 req_flags = HR_USABLE | (flags & HR_CALLEE_SAVED);
525
526
u32 fallback = NUM_HOST_REGS;
527
for (u32 i = 0; i < NUM_HOST_REGS; i++)
528
{
529
if ((m_host_regs[i].flags & (req_flags | HR_NEEDED | HR_ALLOCATED)) == req_flags)
530
{
531
// Prefer callee-saved registers.
532
if (m_host_regs[i].flags & HR_CALLEE_SAVED)
533
return i;
534
else if (fallback == NUM_HOST_REGS)
535
fallback = i;
536
}
537
}
538
if (fallback != NUM_HOST_REGS)
539
return fallback;
540
541
// find register with lowest counter
542
u32 lowest = NUM_HOST_REGS;
543
u32 lowest_count = std::numeric_limits<u32>::max();
544
for (u32 i = 0; i < NUM_HOST_REGS; i++)
545
{
546
const HostRegAlloc& ra = m_host_regs[i];
547
if ((ra.flags & (req_flags | HR_NEEDED)) != req_flags)
548
continue;
549
550
DebugAssert(ra.flags & HR_ALLOCATED);
551
if (ra.type == HR_TYPE_TEMP)
552
{
553
// can't punt temps
554
continue;
555
}
556
557
if (ra.counter < lowest_count)
558
{
559
lowest = i;
560
lowest_count = ra.counter;
561
}
562
}
563
564
//
565
566
AssertMsg(lowest != NUM_HOST_REGS, "Register allocation failed.");
567
568
const HostRegAlloc& ra = m_host_regs[lowest];
569
switch (ra.type)
570
{
571
case HR_TYPE_CPU_REG:
572
{
573
// If the register is needed later, and we're allocating a callee-saved register, try moving it to a caller-saved
574
// register.
575
if (iinfo->UsedTest(ra.reg) && flags & HR_CALLEE_SAVED)
576
{
577
u32 caller_saved_lowest = NUM_HOST_REGS;
578
u32 caller_saved_lowest_count = std::numeric_limits<u32>::max();
579
for (u32 i = 0; i < NUM_HOST_REGS; i++)
580
{
581
constexpr u32 caller_req_flags = HR_USABLE;
582
constexpr u32 caller_req_mask = HR_USABLE | HR_NEEDED | HR_CALLEE_SAVED;
583
const HostRegAlloc& caller_ra = m_host_regs[i];
584
if ((caller_ra.flags & caller_req_mask) != caller_req_flags)
585
continue;
586
587
if (!(caller_ra.flags & HR_ALLOCATED))
588
{
589
caller_saved_lowest = i;
590
caller_saved_lowest_count = 0;
591
break;
592
}
593
594
if (caller_ra.type == HR_TYPE_TEMP)
595
continue;
596
597
if (caller_ra.counter < caller_saved_lowest_count)
598
{
599
caller_saved_lowest = i;
600
caller_saved_lowest_count = caller_ra.counter;
601
}
602
}
603
604
if (caller_saved_lowest_count < lowest_count)
605
{
606
DEBUG_LOG("Moving caller-saved host register {} with MIPS register {} to {} for allocation",
607
GetHostRegName(lowest), GetRegName(ra.reg), GetHostRegName(caller_saved_lowest));
608
if (IsHostRegAllocated(caller_saved_lowest))
609
FreeHostReg(caller_saved_lowest);
610
CopyHostReg(caller_saved_lowest, lowest);
611
SwapHostRegAlloc(caller_saved_lowest, lowest);
612
DebugAssert(!IsHostRegAllocated(lowest));
613
return lowest;
614
}
615
}
616
617
DEBUG_LOG("Freeing register {} in host register {} for allocation", GetRegName(ra.reg), GetHostRegName(lowest));
618
}
619
break;
620
case HR_TYPE_LOAD_DELAY_VALUE:
621
{
622
DEBUG_LOG("Freeing load delay register {} in host register {} for allocation", GetHostRegName(lowest),
623
GetRegName(ra.reg));
624
}
625
break;
626
case HR_TYPE_NEXT_LOAD_DELAY_VALUE:
627
{
628
DEBUG_LOG("Freeing next load delay register {} in host register {} due for allocation", GetRegName(ra.reg),
629
GetHostRegName(lowest));
630
}
631
break;
632
default:
633
{
634
Panic("Unknown type freed");
635
}
636
break;
637
}
638
639
FreeHostReg(lowest);
640
return lowest;
641
}
642
643
const char* CPU::Recompiler::Recompiler::GetReadWriteModeString(u32 flags)
644
{
645
if ((flags & (HR_MODE_READ | HR_MODE_WRITE)) == (HR_MODE_READ | HR_MODE_WRITE))
646
return "read-write";
647
else if (flags & HR_MODE_READ)
648
return "read-only";
649
else if (flags & HR_MODE_WRITE)
650
return "write-only";
651
else
652
return "UNKNOWN";
653
}
654
655
u32 CPU::Recompiler::Recompiler::AllocateHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */,
656
Reg reg /* = Reg::count */)
657
{
658
// Cancel any load delays before booting anything out
659
if (flags & HR_MODE_WRITE && (type == HR_TYPE_CPU_REG || type == HR_TYPE_NEXT_LOAD_DELAY_VALUE))
660
CancelLoadDelaysToReg(reg);
661
662
// Already have a matching type?
663
if (type != HR_TYPE_TEMP)
664
{
665
const std::optional<u32> check_reg = CheckHostReg(flags, type, reg);
666
667
// shouldn't be allocating >1 load delay in a single instruction..
668
// TODO: prefer callee saved registers for load delay
669
DebugAssert((type != HR_TYPE_LOAD_DELAY_VALUE && type != HR_TYPE_NEXT_LOAD_DELAY_VALUE) || !check_reg.has_value());
670
if (check_reg.has_value())
671
return check_reg.value();
672
}
673
674
const u32 hreg = GetFreeHostReg(flags);
675
HostRegAlloc& ra = m_host_regs[hreg];
676
ra.flags = (ra.flags & IMMUTABLE_HR_FLAGS) | (flags & ALLOWED_HR_FLAGS) | HR_ALLOCATED | HR_NEEDED;
677
ra.type = type;
678
ra.reg = reg;
679
ra.counter = m_register_alloc_counter++;
680
681
switch (type)
682
{
683
case HR_TYPE_CPU_REG:
684
{
685
DebugAssert(reg != Reg::zero);
686
687
DEBUG_LOG("Allocate host reg {} to guest reg {} in {} mode", GetHostRegName(hreg), GetRegName(reg),
688
GetReadWriteModeString(flags));
689
690
if (flags & HR_MODE_READ)
691
{
692
DebugAssert(ra.reg > Reg::zero && ra.reg < Reg::count);
693
694
if (HasConstantReg(reg))
695
{
696
// may as well flush it now
697
DEBUG_LOG("Flush constant register in guest reg {} to host reg {}", GetRegName(reg), GetHostRegName(hreg));
698
LoadHostRegWithConstant(hreg, GetConstantRegU32(reg));
699
m_constant_regs_dirty.reset(static_cast<u8>(reg));
700
ra.flags |= HR_MODE_WRITE;
701
}
702
else
703
{
704
LoadHostRegFromCPUPointer(hreg, &g_state.regs.r[static_cast<u8>(reg)]);
705
}
706
}
707
708
if (flags & HR_MODE_WRITE && HasConstantReg(reg))
709
{
710
DebugAssert(reg != Reg::zero);
711
DEBUG_LOG("Clearing constant register in guest reg {} due to write mode in {}", GetRegName(reg),
712
GetHostRegName(hreg));
713
714
ClearConstantReg(reg);
715
}
716
}
717
break;
718
719
case HR_TYPE_LOAD_DELAY_VALUE:
720
{
721
DebugAssert(!m_load_delay_dirty && (!HasLoadDelay() || !(flags & HR_MODE_WRITE)));
722
DEBUG_LOG("Allocating load delayed guest register {} in host reg {} in {} mode", GetRegName(reg),
723
GetHostRegName(hreg), GetReadWriteModeString(flags));
724
m_load_delay_register = reg;
725
m_load_delay_value_register = hreg;
726
if (flags & HR_MODE_READ)
727
LoadHostRegFromCPUPointer(hreg, &g_state.load_delay_value);
728
}
729
break;
730
731
case HR_TYPE_NEXT_LOAD_DELAY_VALUE:
732
{
733
DEBUG_LOG("Allocating next load delayed guest register {} in host reg {} in {} mode", GetRegName(reg),
734
GetHostRegName(hreg), GetReadWriteModeString(flags));
735
m_next_load_delay_register = reg;
736
m_next_load_delay_value_register = hreg;
737
if (flags & HR_MODE_READ)
738
LoadHostRegFromCPUPointer(hreg, &g_state.next_load_delay_value);
739
}
740
break;
741
742
case HR_TYPE_TEMP:
743
{
744
DebugAssert(!(flags & (HR_MODE_READ | HR_MODE_WRITE)));
745
DEBUG_LOG("Allocate host reg {} as temporary", GetHostRegName(hreg));
746
}
747
break;
748
749
default:
750
Panic("Unknown type");
751
break;
752
}
753
754
return hreg;
755
}
756
757
std::optional<u32> CPU::Recompiler::Recompiler::CheckHostReg(u32 flags, HostRegAllocType type /* = HR_TYPE_TEMP */,
758
Reg reg /* = Reg::count */)
759
{
760
for (u32 i = 0; i < NUM_HOST_REGS; i++)
761
{
762
HostRegAlloc& ra = m_host_regs[i];
763
if (!(ra.flags & HR_ALLOCATED) || ra.type != type || ra.reg != reg)
764
continue;
765
766
DebugAssert(ra.flags & HR_MODE_READ);
767
if (flags & HR_MODE_WRITE)
768
{
769
DebugAssert(type == HR_TYPE_CPU_REG);
770
if (!(ra.flags & HR_MODE_WRITE))
771
DEBUG_LOG("Switch guest reg {} from read to read-write in host reg {}", GetRegName(reg), GetHostRegName(i));
772
773
if (HasConstantReg(reg))
774
{
775
DebugAssert(reg != Reg::zero);
776
DEBUG_LOG("Clearing constant register in guest reg {} due to write mode in {}", GetRegName(reg),
777
GetHostRegName(i));
778
779
ClearConstantReg(reg);
780
}
781
}
782
783
ra.flags |= (flags & ALLOWED_HR_FLAGS) | HR_NEEDED;
784
ra.counter = m_register_alloc_counter++;
785
786
// Need a callee saved reg?
787
if (flags & HR_CALLEE_SAVED && !(ra.flags & HR_CALLEE_SAVED))
788
{
789
// Need to move it to one which is
790
const u32 new_reg = GetFreeHostReg(HR_CALLEE_SAVED);
791
DEBUG_LOG("Rename host reg {} to {} for callee saved", GetHostRegName(i), GetHostRegName(new_reg));
792
793
CopyHostReg(new_reg, i);
794
SwapHostRegAlloc(i, new_reg);
795
DebugAssert(!IsHostRegAllocated(i));
796
return new_reg;
797
}
798
799
return i;
800
}
801
802
return std::nullopt;
803
}
804
805
u32 CPU::Recompiler::Recompiler::AllocateTempHostReg(u32 flags)
806
{
807
return AllocateHostReg(flags, HR_TYPE_TEMP);
808
}
809
810
void CPU::Recompiler::Recompiler::SwapHostRegAlloc(u32 lhs, u32 rhs)
811
{
812
HostRegAlloc& lra = m_host_regs[lhs];
813
HostRegAlloc& rra = m_host_regs[rhs];
814
815
const u8 lra_flags = lra.flags;
816
lra.flags = (lra.flags & IMMUTABLE_HR_FLAGS) | (rra.flags & ~IMMUTABLE_HR_FLAGS);
817
rra.flags = (rra.flags & IMMUTABLE_HR_FLAGS) | (lra_flags & ~IMMUTABLE_HR_FLAGS);
818
std::swap(lra.type, rra.type);
819
std::swap(lra.reg, rra.reg);
820
std::swap(lra.counter, rra.counter);
821
}
822
823
void CPU::Recompiler::Recompiler::FlushHostReg(u32 reg)
824
{
825
HostRegAlloc& ra = m_host_regs[reg];
826
if (ra.flags & HR_MODE_WRITE)
827
{
828
switch (ra.type)
829
{
830
case HR_TYPE_CPU_REG:
831
{
832
DebugAssert(ra.reg > Reg::zero && ra.reg < Reg::count);
833
DEBUG_LOG("Flushing register {} in host register {} to state", GetRegName(ra.reg), GetHostRegName(reg));
834
StoreHostRegToCPUPointer(reg, &g_state.regs.r[static_cast<u8>(ra.reg)]);
835
}
836
break;
837
838
case HR_TYPE_LOAD_DELAY_VALUE:
839
{
840
DebugAssert(m_load_delay_value_register == reg);
841
DEBUG_LOG("Flushing load delayed register {} in host register {} to state", GetRegName(ra.reg),
842
GetHostRegName(reg));
843
844
StoreHostRegToCPUPointer(reg, &g_state.load_delay_value);
845
m_load_delay_value_register = NUM_HOST_REGS;
846
}
847
break;
848
849
case HR_TYPE_NEXT_LOAD_DELAY_VALUE:
850
{
851
DebugAssert(m_next_load_delay_value_register == reg);
852
WARNING_LOG("Flushing NEXT load delayed register {} in host register {} to state", GetRegName(ra.reg),
853
GetHostRegName(reg));
854
855
StoreHostRegToCPUPointer(reg, &g_state.next_load_delay_value);
856
m_next_load_delay_value_register = NUM_HOST_REGS;
857
}
858
break;
859
860
default:
861
break;
862
}
863
864
ra.flags = (ra.flags & ~HR_MODE_WRITE) | HR_MODE_READ;
865
}
866
}
867
868
void CPU::Recompiler::Recompiler::FreeHostReg(u32 reg)
869
{
870
DebugAssert(IsHostRegAllocated(reg));
871
DEBUG_LOG("Freeing host register {}", GetHostRegName(reg));
872
FlushHostReg(reg);
873
ClearHostReg(reg);
874
}
875
876
void CPU::Recompiler::Recompiler::ClearHostReg(u32 reg)
877
{
878
HostRegAlloc& ra = m_host_regs[reg];
879
ra.flags &= IMMUTABLE_HR_FLAGS;
880
ra.type = HR_TYPE_TEMP;
881
ra.counter = 0;
882
ra.reg = Reg::count;
883
}
884
885
void CPU::Recompiler::Recompiler::MarkRegsNeeded(HostRegAllocType type, Reg reg)
886
{
887
for (u32 i = 0; i < NUM_HOST_REGS; i++)
888
{
889
HostRegAlloc& ra = m_host_regs[i];
890
if (ra.flags & HR_ALLOCATED && ra.type == type && ra.reg == reg)
891
ra.flags |= HR_NEEDED;
892
}
893
}
894
895
void CPU::Recompiler::Recompiler::RenameHostReg(u32 reg, u32 new_flags, HostRegAllocType new_type, Reg new_reg)
896
{
897
// only supported for cpu regs for now
898
DebugAssert(new_type == HR_TYPE_TEMP || new_type == HR_TYPE_CPU_REG || new_type == HR_TYPE_NEXT_LOAD_DELAY_VALUE);
899
900
const std::optional<u32> old_reg = CheckHostReg(0, new_type, new_reg);
901
if (old_reg.has_value())
902
{
903
// don't writeback
904
ClearHostReg(old_reg.value());
905
}
906
907
// kill any load delay to this reg
908
if (new_type == HR_TYPE_CPU_REG || new_type == HR_TYPE_NEXT_LOAD_DELAY_VALUE)
909
CancelLoadDelaysToReg(new_reg);
910
911
if (new_type == HR_TYPE_CPU_REG)
912
{
913
DEBUG_LOG("Renaming host reg {} to guest reg {}", GetHostRegName(reg), GetRegName(new_reg));
914
}
915
else if (new_type == HR_TYPE_NEXT_LOAD_DELAY_VALUE)
916
{
917
DEBUG_LOG("Renaming host reg {} to load delayed guest reg {}", GetHostRegName(reg), GetRegName(new_reg));
918
DebugAssert(m_next_load_delay_register == Reg::count && m_next_load_delay_value_register == NUM_HOST_REGS);
919
m_next_load_delay_register = new_reg;
920
m_next_load_delay_value_register = reg;
921
}
922
else
923
{
924
DEBUG_LOG("Renaming host reg {} to temp", GetHostRegName(reg));
925
}
926
927
HostRegAlloc& ra = m_host_regs[reg];
928
ra.flags = (ra.flags & IMMUTABLE_HR_FLAGS) | HR_NEEDED | HR_ALLOCATED | (new_flags & ALLOWED_HR_FLAGS);
929
ra.counter = m_register_alloc_counter++;
930
ra.type = new_type;
931
ra.reg = new_reg;
932
}
933
934
void CPU::Recompiler::Recompiler::ClearHostRegNeeded(u32 reg)
935
{
936
DebugAssert(reg < NUM_HOST_REGS && IsHostRegAllocated(reg));
937
HostRegAlloc& ra = m_host_regs[reg];
938
if (ra.flags & HR_MODE_WRITE)
939
ra.flags |= HR_MODE_READ;
940
941
ra.flags &= ~HR_NEEDED;
942
}
943
944
void CPU::Recompiler::Recompiler::ClearHostRegsNeeded()
945
{
946
for (u32 i = 0; i < NUM_HOST_REGS; i++)
947
{
948
HostRegAlloc& ra = m_host_regs[i];
949
if (!(ra.flags & HR_ALLOCATED))
950
continue;
951
952
// shouldn't have any temps left
953
DebugAssert(ra.type != HR_TYPE_TEMP);
954
955
if (ra.flags & HR_MODE_WRITE)
956
ra.flags |= HR_MODE_READ;
957
958
ra.flags &= ~HR_NEEDED;
959
}
960
}
961
962
void CPU::Recompiler::Recompiler::DeleteMIPSReg(Reg reg, bool flush)
963
{
964
DebugAssert(reg != Reg::zero);
965
966
for (u32 i = 0; i < NUM_HOST_REGS; i++)
967
{
968
HostRegAlloc& ra = m_host_regs[i];
969
if (ra.flags & HR_ALLOCATED && ra.type == HR_TYPE_CPU_REG && ra.reg == reg)
970
{
971
if (flush)
972
FlushHostReg(i);
973
ClearHostReg(i);
974
ClearConstantReg(reg);
975
return;
976
}
977
}
978
979
if (flush)
980
FlushConstantReg(reg);
981
ClearConstantReg(reg);
982
}
983
984
bool CPU::Recompiler::Recompiler::TryRenameMIPSReg(Reg to, Reg from, u32 fromhost, Reg other)
985
{
986
// can't rename when in form Rd = Rs op Rt and Rd == Rs or Rd == Rt
987
if (to == from || to == other || !iinfo->RenameTest(from))
988
return false;
989
990
DEBUG_LOG("Renaming MIPS register {} to {}", GetRegName(from), GetRegName(to));
991
992
if (iinfo->LiveTest(from))
993
FlushHostReg(fromhost);
994
995
// remove all references to renamed-to register
996
DeleteMIPSReg(to, false);
997
CancelLoadDelaysToReg(to);
998
999
// and do the actual rename, new register has been modified.
1000
m_host_regs[fromhost].reg = to;
1001
m_host_regs[fromhost].flags |= HR_MODE_READ | HR_MODE_WRITE;
1002
return true;
1003
}
1004
1005
void CPU::Recompiler::Recompiler::UpdateHostRegCounters()
1006
{
1007
const CodeCache::InstructionInfo* const info_end = m_block->InstructionsInfo() + m_block->size;
1008
1009
for (u32 i = 0; i < NUM_HOST_REGS; i++)
1010
{
1011
HostRegAlloc& ra = m_host_regs[i];
1012
if ((ra.flags & (HR_ALLOCATED | HR_NEEDED)) != HR_ALLOCATED)
1013
continue;
1014
1015
// Try not to punt out load delays.
1016
if (ra.type != HR_TYPE_CPU_REG)
1017
{
1018
ra.counter = std::numeric_limits<u16>::max();
1019
continue;
1020
}
1021
1022
DebugAssert(IsHostRegAllocated(i));
1023
const CodeCache::InstructionInfo* cur = iinfo;
1024
const Reg reg = ra.reg;
1025
if (!(cur->reg_flags[static_cast<u8>(reg)] & CodeCache::RI_USED))
1026
{
1027
ra.counter = 0;
1028
continue;
1029
}
1030
1031
// order based on the number of instructions until this register is used
1032
u16 counter_val = std::numeric_limits<u16>::max();
1033
for (; cur != info_end; cur++, counter_val--)
1034
{
1035
if (cur->ReadsReg(reg))
1036
break;
1037
}
1038
1039
ra.counter = counter_val;
1040
}
1041
}
1042
1043
void CPU::Recompiler::Recompiler::Flush(u32 flags)
1044
{
1045
// TODO: Flush unneeded caller-saved regs (backup/replace calle-saved needed with caller-saved)
1046
if (flags &
1047
(FLUSH_FREE_UNNEEDED_CALLER_SAVED_REGISTERS | FLUSH_FREE_CALLER_SAVED_REGISTERS | FLUSH_FREE_ALL_REGISTERS))
1048
{
1049
const u32 req_mask = (flags & FLUSH_FREE_ALL_REGISTERS) ?
1050
HR_ALLOCATED :
1051
((flags & FLUSH_FREE_CALLER_SAVED_REGISTERS) ? (HR_ALLOCATED | HR_CALLEE_SAVED) :
1052
(HR_ALLOCATED | HR_CALLEE_SAVED | HR_NEEDED));
1053
constexpr u32 req_flags = HR_ALLOCATED;
1054
1055
for (u32 i = 0; i < NUM_HOST_REGS; i++)
1056
{
1057
HostRegAlloc& ra = m_host_regs[i];
1058
if ((ra.flags & req_mask) == req_flags)
1059
FreeHostReg(i);
1060
}
1061
}
1062
1063
if (flags & FLUSH_INVALIDATE_MIPS_REGISTERS)
1064
{
1065
for (u32 i = 0; i < NUM_HOST_REGS; i++)
1066
{
1067
HostRegAlloc& ra = m_host_regs[i];
1068
if (ra.flags & HR_ALLOCATED && ra.type == HR_TYPE_CPU_REG)
1069
FreeHostReg(i);
1070
}
1071
1072
FlushConstantRegs(true);
1073
}
1074
else
1075
{
1076
if (flags & FLUSH_FLUSH_MIPS_REGISTERS)
1077
{
1078
for (u32 i = 0; i < NUM_HOST_REGS; i++)
1079
{
1080
HostRegAlloc& ra = m_host_regs[i];
1081
if ((ra.flags & (HR_ALLOCATED | HR_MODE_WRITE)) == (HR_ALLOCATED | HR_MODE_WRITE) && ra.type == HR_TYPE_CPU_REG)
1082
FlushHostReg(i);
1083
}
1084
1085
// flush any constant registers which are dirty too
1086
FlushConstantRegs(false);
1087
}
1088
}
1089
1090
if (flags & FLUSH_INVALIDATE_SPECULATIVE_CONSTANTS)
1091
InvalidateSpeculativeValues();
1092
}
1093
1094
void CPU::Recompiler::Recompiler::FlushConstantReg(Reg r)
1095
{
1096
DebugAssert(m_constant_regs_valid.test(static_cast<u32>(r)));
1097
DEBUG_LOG("Writing back register {} with constant value 0x{:08X}", GetRegName(r),
1098
m_constant_reg_values[static_cast<u32>(r)]);
1099
StoreConstantToCPUPointer(m_constant_reg_values[static_cast<u32>(r)], &g_state.regs.r[static_cast<u32>(r)]);
1100
m_constant_regs_dirty.reset(static_cast<u32>(r));
1101
}
1102
1103
void CPU::Recompiler::Recompiler::BackupHostState()
1104
{
1105
DebugAssert(m_host_state_backup_count < m_host_state_backup.size());
1106
1107
// need to back up everything...
1108
HostStateBackup& bu = m_host_state_backup[m_host_state_backup_count];
1109
bu.cycles = m_cycles;
1110
bu.gte_done_cycle = m_gte_done_cycle;
1111
bu.compiler_pc = m_compiler_pc;
1112
bu.dirty_pc = m_dirty_pc;
1113
bu.dirty_instruction_bits = m_dirty_instruction_bits;
1114
bu.dirty_gte_done_cycle = m_dirty_gte_done_cycle;
1115
bu.block_ended = m_block_ended;
1116
bu.inst = inst;
1117
bu.iinfo = iinfo;
1118
bu.current_instruction_pc = m_current_instruction_pc;
1119
bu.current_instruction_delay_slot = m_current_instruction_branch_delay_slot;
1120
bu.const_regs_valid = m_constant_regs_valid;
1121
bu.const_regs_dirty = m_constant_regs_dirty;
1122
bu.const_regs_values = m_constant_reg_values;
1123
bu.host_regs = m_host_regs;
1124
bu.register_alloc_counter = m_register_alloc_counter;
1125
bu.load_delay_dirty = m_load_delay_dirty;
1126
bu.load_delay_register = m_load_delay_register;
1127
bu.load_delay_value_register = m_load_delay_value_register;
1128
bu.next_load_delay_register = m_next_load_delay_register;
1129
bu.next_load_delay_value_register = m_next_load_delay_value_register;
1130
m_host_state_backup_count++;
1131
}
1132
1133
void CPU::Recompiler::Recompiler::RestoreHostState()
1134
{
1135
DebugAssert(m_host_state_backup_count > 0);
1136
m_host_state_backup_count--;
1137
1138
HostStateBackup& bu = m_host_state_backup[m_host_state_backup_count];
1139
m_host_regs = std::move(bu.host_regs);
1140
m_constant_reg_values = std::move(bu.const_regs_values);
1141
m_constant_regs_dirty = std::move(bu.const_regs_dirty);
1142
m_constant_regs_valid = std::move(bu.const_regs_valid);
1143
m_current_instruction_branch_delay_slot = bu.current_instruction_delay_slot;
1144
m_current_instruction_pc = bu.current_instruction_pc;
1145
inst = bu.inst;
1146
iinfo = bu.iinfo;
1147
m_block_ended = bu.block_ended;
1148
m_dirty_gte_done_cycle = bu.dirty_gte_done_cycle;
1149
m_dirty_instruction_bits = bu.dirty_instruction_bits;
1150
m_dirty_pc = bu.dirty_pc;
1151
m_compiler_pc = bu.compiler_pc;
1152
m_register_alloc_counter = bu.register_alloc_counter;
1153
m_load_delay_dirty = bu.load_delay_dirty;
1154
m_load_delay_register = bu.load_delay_register;
1155
m_load_delay_value_register = bu.load_delay_value_register;
1156
m_next_load_delay_register = bu.next_load_delay_register;
1157
m_next_load_delay_value_register = bu.next_load_delay_value_register;
1158
m_gte_done_cycle = bu.gte_done_cycle;
1159
m_cycles = bu.cycles;
1160
}
1161
1162
void CPU::Recompiler::Recompiler::AddLoadStoreInfo(void* code_address, u32 code_size, u32 address_register,
1163
u32 data_register, MemoryAccessSize size, bool is_signed,
1164
bool is_load)
1165
{
1166
DebugAssert(CodeCache::IsUsingFastmem());
1167
DebugAssert(address_register < NUM_HOST_REGS);
1168
DebugAssert(data_register < NUM_HOST_REGS);
1169
1170
u32 gpr_bitmask = 0;
1171
for (u32 i = 0; i < NUM_HOST_REGS; i++)
1172
{
1173
if (IsHostRegAllocated(i))
1174
gpr_bitmask |= (1u << i);
1175
}
1176
1177
CPU::CodeCache::AddLoadStoreInfo(code_address, code_size, m_current_instruction_pc, m_block->pc, m_cycles,
1178
gpr_bitmask, static_cast<u8>(address_register), static_cast<u8>(data_register), size,
1179
is_signed, is_load);
1180
}
1181
1182
void CPU::Recompiler::Recompiler::CompileInstruction()
1183
{
1184
#if defined(_DEBUG) || defined(_DEVEL)
1185
TinyString str;
1186
DisassembleInstruction(&str, m_current_instruction_pc, inst->bits);
1187
DEBUG_LOG("Compiling{} {:08X}: {}", m_current_instruction_branch_delay_slot ? " branch delay slot" : "",
1188
m_current_instruction_pc, str);
1189
#endif
1190
1191
m_cycles++;
1192
1193
if (IsNopInstruction(*inst))
1194
{
1195
UpdateLoadDelay();
1196
return;
1197
}
1198
1199
switch (inst->op)
1200
{
1201
#define PGXPFN(x) reinterpret_cast<const void*>(&PGXP::x)
1202
1203
// clang-format off
1204
// TODO: PGXP for jalr
1205
1206
case InstructionOp::funct:
1207
{
1208
switch (inst->r.funct)
1209
{
1210
case InstructionFunct::sll: CompileTemplate(&Recompiler::Compile_sll_const, &Recompiler::Compile_sll, PGXPFN(CPU_SLL), TF_WRITES_D | TF_READS_T); SpecExec_sll(); break;
1211
case InstructionFunct::srl: CompileTemplate(&Recompiler::Compile_srl_const, &Recompiler::Compile_srl, PGXPFN(CPU_SRL), TF_WRITES_D | TF_READS_T); SpecExec_srl(); break;
1212
case InstructionFunct::sra: CompileTemplate(&Recompiler::Compile_sra_const, &Recompiler::Compile_sra, PGXPFN(CPU_SRA), TF_WRITES_D | TF_READS_T); SpecExec_sra(); break;
1213
case InstructionFunct::sllv: CompileTemplate(&Recompiler::Compile_sllv_const, &Recompiler::Compile_sllv, PGXPFN(CPU_SLLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_sllv(); break;
1214
case InstructionFunct::srlv: CompileTemplate(&Recompiler::Compile_srlv_const, &Recompiler::Compile_srlv, PGXPFN(CPU_SRLV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srlv(); break;
1215
case InstructionFunct::srav: CompileTemplate(&Recompiler::Compile_srav_const, &Recompiler::Compile_srav, PGXPFN(CPU_SRAV), TF_WRITES_D | TF_READS_S | TF_READS_T); SpecExec_srav(); break;
1216
case InstructionFunct::jr: CompileTemplate(&Recompiler::Compile_jr_const, &Recompiler::Compile_jr, nullptr, TF_READS_S); break;
1217
case InstructionFunct::jalr: CompileTemplate(&Recompiler::Compile_jalr_const, &Recompiler::Compile_jalr, nullptr, /*TF_WRITES_D |*/ TF_READS_S | TF_NO_NOP); SpecExec_jalr(); break;
1218
case InstructionFunct::syscall: Compile_syscall(); break;
1219
case InstructionFunct::break_: Compile_break(); break;
1220
case InstructionFunct::mfhi: SpecCopyReg(inst->r.rd, Reg::hi); CompileMoveRegTemplate(inst->r.rd, Reg::hi, g_settings.gpu_pgxp_cpu); break;
1221
case InstructionFunct::mthi: SpecCopyReg(Reg::hi, inst->r.rs); CompileMoveRegTemplate(Reg::hi, inst->r.rs, g_settings.gpu_pgxp_cpu); break;
1222
case InstructionFunct::mflo: SpecCopyReg(inst->r.rd, Reg::lo); CompileMoveRegTemplate(inst->r.rd, Reg::lo, g_settings.gpu_pgxp_cpu); break;
1223
case InstructionFunct::mtlo: SpecCopyReg(Reg::lo, inst->r.rs); CompileMoveRegTemplate(Reg::lo, inst->r.rs, g_settings.gpu_pgxp_cpu); break;
1224
case InstructionFunct::mult: CompileTemplate(&Recompiler::Compile_mult_const, &Recompiler::Compile_mult, PGXPFN(CPU_MULT), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_mult(); break;
1225
case InstructionFunct::multu: CompileTemplate(&Recompiler::Compile_multu_const, &Recompiler::Compile_multu, PGXPFN(CPU_MULTU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI | TF_COMMUTATIVE); SpecExec_multu(); break;
1226
case InstructionFunct::div: CompileTemplate(&Recompiler::Compile_div_const, &Recompiler::Compile_div, PGXPFN(CPU_DIV), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_div(); break;
1227
case InstructionFunct::divu: CompileTemplate(&Recompiler::Compile_divu_const, &Recompiler::Compile_divu, PGXPFN(CPU_DIVU), TF_READS_S | TF_READS_T | TF_WRITES_LO | TF_WRITES_HI); SpecExec_divu(); break;
1228
case InstructionFunct::add: CompileTemplate(&Recompiler::Compile_add_const, &Recompiler::Compile_add, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_add(); break;
1229
case InstructionFunct::addu: CompileTemplate(&Recompiler::Compile_addu_const, &Recompiler::Compile_addu, PGXPFN(CPU_ADD), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_addu(); break;
1230
case InstructionFunct::sub: CompileTemplate(&Recompiler::Compile_sub_const, &Recompiler::Compile_sub, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_T); SpecExec_sub(); break;
1231
case InstructionFunct::subu: CompileTemplate(&Recompiler::Compile_subu_const, &Recompiler::Compile_subu, PGXPFN(CPU_SUB), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_RENAME_WITH_ZERO_T); SpecExec_subu(); break;
1232
case InstructionFunct::and_: CompileTemplate(&Recompiler::Compile_and_const, &Recompiler::Compile_and, PGXPFN(CPU_AND_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_and(); break;
1233
case InstructionFunct::or_: CompileTemplate(&Recompiler::Compile_or_const, &Recompiler::Compile_or, PGXPFN(CPU_OR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_or(); break;
1234
case InstructionFunct::xor_: CompileTemplate(&Recompiler::Compile_xor_const, &Recompiler::Compile_xor, PGXPFN(CPU_XOR_), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_T); SpecExec_xor(); break;
1235
case InstructionFunct::nor: CompileTemplate(&Recompiler::Compile_nor_const, &Recompiler::Compile_nor, PGXPFN(CPU_NOR), TF_WRITES_D | TF_READS_S | TF_READS_T | TF_COMMUTATIVE); SpecExec_nor(); break;
1236
case InstructionFunct::slt: CompileTemplate(&Recompiler::Compile_slt_const, &Recompiler::Compile_slt, PGXPFN(CPU_SLT), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_slt(); break;
1237
case InstructionFunct::sltu: CompileTemplate(&Recompiler::Compile_sltu_const, &Recompiler::Compile_sltu, PGXPFN(CPU_SLTU), TF_WRITES_D | TF_READS_T | TF_READS_S); SpecExec_sltu(); break;
1238
default: Compile_Fallback(); InvalidateSpeculativeValues(); TruncateBlock(); break;
1239
}
1240
}
1241
break;
1242
1243
case InstructionOp::j: Compile_j(); break;
1244
case InstructionOp::jal: Compile_jal(); SpecExec_jal(); break;
1245
1246
case InstructionOp::b: CompileTemplate(&Recompiler::Compile_b_const, &Recompiler::Compile_b, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); SpecExec_b(); break;
1247
case InstructionOp::blez: CompileTemplate(&Recompiler::Compile_blez_const, &Recompiler::Compile_blez, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break;
1248
case InstructionOp::bgtz: CompileTemplate(&Recompiler::Compile_bgtz_const, &Recompiler::Compile_bgtz, nullptr, TF_READS_S | TF_CAN_SWAP_DELAY_SLOT); break;
1249
case InstructionOp::beq: CompileTemplate(&Recompiler::Compile_beq_const, &Recompiler::Compile_beq, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break;
1250
case InstructionOp::bne: CompileTemplate(&Recompiler::Compile_bne_const, &Recompiler::Compile_bne, nullptr, TF_READS_S | TF_READS_T | TF_COMMUTATIVE | TF_CAN_SWAP_DELAY_SLOT); break;
1251
1252
case InstructionOp::addi: CompileTemplate(&Recompiler::Compile_addi_const, &Recompiler::Compile_addi, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_CAN_OVERFLOW | TF_RENAME_WITH_ZERO_IMM); SpecExec_addi(); break;
1253
case InstructionOp::addiu: CompileTemplate(&Recompiler::Compile_addiu_const, &Recompiler::Compile_addiu, PGXPFN(CPU_ADDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_addiu(); break;
1254
case InstructionOp::slti: CompileTemplate(&Recompiler::Compile_slti_const, &Recompiler::Compile_slti, PGXPFN(CPU_SLTI), TF_WRITES_T | TF_READS_S); SpecExec_slti(); break;
1255
case InstructionOp::sltiu: CompileTemplate(&Recompiler::Compile_sltiu_const, &Recompiler::Compile_sltiu, PGXPFN(CPU_SLTIU), TF_WRITES_T | TF_READS_S); SpecExec_sltiu(); break;
1256
case InstructionOp::andi: CompileTemplate(&Recompiler::Compile_andi_const, &Recompiler::Compile_andi, PGXPFN(CPU_ANDI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE); SpecExec_andi(); break;
1257
case InstructionOp::ori: CompileTemplate(&Recompiler::Compile_ori_const, &Recompiler::Compile_ori, PGXPFN(CPU_ORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_ori(); break;
1258
case InstructionOp::xori: CompileTemplate(&Recompiler::Compile_xori_const, &Recompiler::Compile_xori, PGXPFN(CPU_XORI), TF_WRITES_T | TF_READS_S | TF_COMMUTATIVE | TF_RENAME_WITH_ZERO_IMM); SpecExec_xori(); break;
1259
case InstructionOp::lui: Compile_lui(); SpecExec_lui(); break;
1260
1261
case InstructionOp::lb: CompileLoadStoreTemplate(&Recompiler::Compile_lxx, MemoryAccessSize::Byte, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, true); break;
1262
case InstructionOp::lbu: CompileLoadStoreTemplate(&Recompiler::Compile_lxx, MemoryAccessSize::Byte, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Byte, false); break;
1263
case InstructionOp::lh: CompileLoadStoreTemplate(&Recompiler::Compile_lxx, MemoryAccessSize::HalfWord, false, true, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, true); break;
1264
case InstructionOp::lhu: CompileLoadStoreTemplate(&Recompiler::Compile_lxx, MemoryAccessSize::HalfWord, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::HalfWord, false); break;
1265
case InstructionOp::lw: CompileLoadStoreTemplate(&Recompiler::Compile_lxx, MemoryAccessSize::Word, false, false, TF_READS_S | TF_WRITES_T | TF_LOAD_DELAY); SpecExec_lxx(MemoryAccessSize::Word, false); break;
1266
case InstructionOp::lwl: CompileLoadStoreTemplate(&Recompiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(false); break;
1267
case InstructionOp::lwr: CompileLoadStoreTemplate(&Recompiler::Compile_lwx, MemoryAccessSize::Word, false, false, TF_READS_S | /*TF_READS_T | TF_WRITES_T | */TF_LOAD_DELAY); SpecExec_lwx(true); break;
1268
case InstructionOp::sb: CompileLoadStoreTemplate(&Recompiler::Compile_sxx, MemoryAccessSize::Byte, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Byte); break;
1269
case InstructionOp::sh: CompileLoadStoreTemplate(&Recompiler::Compile_sxx, MemoryAccessSize::HalfWord, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::HalfWord); break;
1270
case InstructionOp::sw: CompileLoadStoreTemplate(&Recompiler::Compile_sxx, MemoryAccessSize::Word, true, false, TF_READS_S | TF_READS_T); SpecExec_sxx(MemoryAccessSize::Word); break;
1271
case InstructionOp::swl: CompileLoadStoreTemplate(&Recompiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(false); break;
1272
case InstructionOp::swr: CompileLoadStoreTemplate(&Recompiler::Compile_swx, MemoryAccessSize::Word, false, false, TF_READS_S /*| TF_READS_T*/); SpecExec_swx(true); break;
1273
1274
case InstructionOp::cop0:
1275
{
1276
if (inst->cop.IsCommonInstruction())
1277
{
1278
switch (inst->cop.CommonOp())
1279
{
1280
case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Recompiler::Compile_mfc0, PGXPFN(CPU_MFC0), TF_WRITES_T | TF_LOAD_DELAY); } SpecExec_mfc0(); break;
1281
case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Recompiler::Compile_mtc0, PGXPFN(CPU_MTC0), TF_READS_T); SpecExec_mtc0(); break;
1282
default: Compile_Fallback(); break;
1283
}
1284
}
1285
else
1286
{
1287
switch (inst->cop.Cop0Op())
1288
{
1289
case Cop0Instruction::rfe: CompileTemplate(nullptr, &Recompiler::Compile_rfe, nullptr, 0); SpecExec_rfe(); break;
1290
default: Compile_Fallback(); break;
1291
}
1292
}
1293
}
1294
break;
1295
1296
case InstructionOp::cop2:
1297
{
1298
if (inst->cop.IsCommonInstruction())
1299
{
1300
switch (inst->cop.CommonOp())
1301
{
1302
case CopCommonInstruction::mfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Recompiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break;
1303
case CopCommonInstruction::cfcn: if (inst->r.rt != Reg::zero) { CompileTemplate(nullptr, &Recompiler::Compile_mfc2, nullptr, TF_GTE_STALL); } break;
1304
case CopCommonInstruction::mtcn: CompileTemplate(nullptr, &Recompiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_READS_T | TF_PGXP_WITHOUT_CPU); break;
1305
case CopCommonInstruction::ctcn: CompileTemplate(nullptr, &Recompiler::Compile_mtc2, PGXPFN(CPU_MTC2), TF_READS_T | TF_PGXP_WITHOUT_CPU); break;
1306
default: Compile_Fallback(); break;
1307
}
1308
}
1309
else
1310
{
1311
// GTE ops
1312
CompileTemplate(nullptr, &Recompiler::Compile_cop2, nullptr, TF_GTE_STALL);
1313
}
1314
}
1315
break;
1316
1317
case InstructionOp::lwc2: CompileLoadStoreTemplate(&Recompiler::Compile_lwc2, MemoryAccessSize::Word, false, false, TF_READS_S); break;
1318
case InstructionOp::swc2: CompileLoadStoreTemplate(&Recompiler::Compile_swc2, MemoryAccessSize::Word, true, false, TF_GTE_STALL | TF_READS_S); SpecExec_swc2(); break;
1319
1320
// swc0/lwc0/cop1/cop3 are essentially no-ops
1321
case InstructionOp::cop1:
1322
case InstructionOp::cop3:
1323
case InstructionOp::lwc0:
1324
case InstructionOp::lwc1:
1325
case InstructionOp::lwc3:
1326
case InstructionOp::swc0:
1327
case InstructionOp::swc1:
1328
case InstructionOp::swc3:
1329
break;
1330
1331
default: Compile_Fallback(); InvalidateSpeculativeValues(); TruncateBlock(); break;
1332
// clang-format on
1333
1334
#undef PGXPFN
1335
}
1336
1337
ClearHostRegsNeeded();
1338
UpdateLoadDelay();
1339
1340
#if 0
1341
const void* end = GetCurrentCodePointer();
1342
if (start != end && !m_current_instruction_branch_delay_slot)
1343
{
1344
CodeCache::DisassembleAndLogHostCode(start,
1345
static_cast<u32>(static_cast<const u8*>(end) - static_cast<const u8*>(start)));
1346
}
1347
#endif
1348
}
1349
1350
void CPU::Recompiler::Recompiler::CompileBranchDelaySlot(bool dirty_pc /* = true */)
1351
{
1352
// Update load delay at the end of the previous instruction.
1353
UpdateLoadDelay();
1354
1355
// Don't need the branch instruction's inputs.
1356
ClearHostRegsNeeded();
1357
1358
// TODO: Move cycle add before this.
1359
inst++;
1360
iinfo++;
1361
m_current_instruction_pc += sizeof(Instruction);
1362
m_current_instruction_branch_delay_slot = true;
1363
m_compiler_pc += sizeof(Instruction);
1364
m_dirty_pc = dirty_pc;
1365
m_dirty_instruction_bits = true;
1366
1367
CompileInstruction();
1368
1369
m_current_instruction_branch_delay_slot = false;
1370
}
1371
1372
void CPU::Recompiler::Recompiler::CompileTemplate(void (Recompiler::*const_func)(CompileFlags),
1373
void (Recompiler::*func)(CompileFlags), const void* pgxp_cpu_func,
1374
u32 tflags)
1375
{
1376
// TODO: This is where we will do memory operand optimization. Remember to kill constants!
1377
// TODO: Swap S and T if commutative
1378
// TODO: For and, treat as zeroing if imm is zero
1379
// TODO: Optimize slt + bne to cmp + jump
1380
// TODO: Prefer memory operands when load delay is dirty, since we're going to invalidate immediately after the first
1381
// instruction..
1382
// TODO: andi with zero -> zero const
1383
// TODO: load constant so it can be flushed if it's not overwritten later
1384
// TODO: inline PGXP ops.
1385
// TODO: don't rename on sltu.
1386
1387
bool allow_constant = static_cast<bool>(const_func);
1388
Reg rs = inst->r.rs.GetValue();
1389
Reg rt = inst->r.rt.GetValue();
1390
Reg rd = inst->r.rd.GetValue();
1391
1392
if (tflags & TF_GTE_STALL)
1393
StallUntilGTEComplete();
1394
1395
// throw away instructions writing to $zero
1396
if (!(tflags & TF_NO_NOP) && (!g_settings.cpu_recompiler_memory_exceptions || !(tflags & TF_CAN_OVERFLOW)) &&
1397
((tflags & TF_WRITES_T && rt == Reg::zero) || (tflags & TF_WRITES_D && rd == Reg::zero)))
1398
{
1399
DEBUG_LOG("Skipping instruction because it writes to zero");
1400
return;
1401
}
1402
1403
// handle rename operations
1404
if ((tflags & TF_RENAME_WITH_ZERO_T && HasConstantRegValue(rt, 0)))
1405
{
1406
DebugAssert((tflags & (TF_WRITES_D | TF_READS_S | TF_READS_T)) == (TF_WRITES_D | TF_READS_S | TF_READS_T));
1407
CompileMoveRegTemplate(rd, rs, true);
1408
return;
1409
}
1410
else if ((tflags & (TF_RENAME_WITH_ZERO_T | TF_COMMUTATIVE)) == (TF_RENAME_WITH_ZERO_T | TF_COMMUTATIVE) &&
1411
HasConstantRegValue(rs, 0))
1412
{
1413
DebugAssert((tflags & (TF_WRITES_D | TF_READS_S | TF_READS_T)) == (TF_WRITES_D | TF_READS_S | TF_READS_T));
1414
CompileMoveRegTemplate(rd, rt, true);
1415
return;
1416
}
1417
else if (tflags & TF_RENAME_WITH_ZERO_IMM && inst->i.imm == 0)
1418
{
1419
CompileMoveRegTemplate(rt, rs, true);
1420
return;
1421
}
1422
1423
if (pgxp_cpu_func && g_settings.gpu_pgxp_enable && ((tflags & TF_PGXP_WITHOUT_CPU) || g_settings.UsingPGXPCPUMode()))
1424
{
1425
std::array<Reg, 2> reg_args = {{Reg::count, Reg::count}};
1426
u32 num_reg_args = 0;
1427
if (tflags & TF_READS_S)
1428
reg_args[num_reg_args++] = rs;
1429
if (tflags & TF_READS_T)
1430
reg_args[num_reg_args++] = rt;
1431
if (tflags & TF_READS_LO)
1432
reg_args[num_reg_args++] = Reg::lo;
1433
if (tflags & TF_READS_HI)
1434
reg_args[num_reg_args++] = Reg::hi;
1435
1436
DebugAssert(num_reg_args <= 2);
1437
GeneratePGXPCallWithMIPSRegs(pgxp_cpu_func, inst->bits, reg_args[0], reg_args[1]);
1438
}
1439
1440
// if it's a commutative op, and we have one constant reg but not the other, swap them
1441
// TODO: make it swap when writing to T as well
1442
// TODO: drop the hack for rd == rt
1443
if (tflags & TF_COMMUTATIVE && !(tflags & TF_WRITES_T) &&
1444
((HasConstantReg(rs) && !HasConstantReg(rt)) || (tflags & TF_WRITES_D && rd == rt)))
1445
{
1446
DEBUG_LOG("Swapping S:{} and T:{} due to commutative op and constants", GetRegName(rs), GetRegName(rt));
1447
std::swap(rs, rt);
1448
}
1449
1450
CompileFlags cf = {};
1451
1452
if (tflags & TF_READS_S)
1453
{
1454
MarkRegsNeeded(HR_TYPE_CPU_REG, rs);
1455
if (HasConstantReg(rs))
1456
cf.const_s = true;
1457
else
1458
allow_constant = false;
1459
}
1460
if (tflags & TF_READS_T)
1461
{
1462
MarkRegsNeeded(HR_TYPE_CPU_REG, rt);
1463
if (HasConstantReg(rt))
1464
cf.const_t = true;
1465
else
1466
allow_constant = false;
1467
}
1468
if (tflags & TF_READS_LO)
1469
{
1470
MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::lo);
1471
if (HasConstantReg(Reg::lo))
1472
cf.const_lo = true;
1473
else
1474
allow_constant = false;
1475
}
1476
if (tflags & TF_READS_HI)
1477
{
1478
MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::hi);
1479
if (HasConstantReg(Reg::hi))
1480
cf.const_hi = true;
1481
else
1482
allow_constant = false;
1483
}
1484
1485
// Needed because of potential swapping
1486
if (tflags & TF_READS_S)
1487
cf.mips_s = static_cast<u8>(rs);
1488
if (tflags & (TF_READS_T | TF_WRITES_T))
1489
cf.mips_t = static_cast<u8>(rt);
1490
1491
if (allow_constant)
1492
{
1493
// woot, constant path
1494
(this->*const_func)(cf);
1495
return;
1496
}
1497
1498
UpdateHostRegCounters();
1499
1500
if (tflags & TF_CAN_SWAP_DELAY_SLOT && TrySwapDelaySlot(cf.MipsS(), cf.MipsT()))
1501
{
1502
// CompileBranchDelaySlot() clears needed, so need to reset.
1503
cf.delay_slot_swapped = true;
1504
if (tflags & TF_READS_S)
1505
MarkRegsNeeded(HR_TYPE_CPU_REG, rs);
1506
if (tflags & TF_READS_T)
1507
MarkRegsNeeded(HR_TYPE_CPU_REG, rt);
1508
if (tflags & TF_READS_LO)
1509
MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::lo);
1510
if (tflags & TF_READS_HI)
1511
MarkRegsNeeded(HR_TYPE_CPU_REG, Reg::hi);
1512
}
1513
1514
if (tflags & TF_READS_S &&
1515
(tflags & TF_NEEDS_REG_S || !cf.const_s || (tflags & TF_WRITES_D && rd != Reg::zero && rd == rs)))
1516
{
1517
cf.host_s = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rs);
1518
cf.const_s = false;
1519
cf.valid_host_s = true;
1520
}
1521
1522
if (tflags & TF_READS_T &&
1523
(tflags & (TF_NEEDS_REG_T | TF_WRITES_T) || !cf.const_t || (tflags & TF_WRITES_D && rd != Reg::zero && rd == rt)))
1524
{
1525
cf.host_t = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt);
1526
cf.const_t = false;
1527
cf.valid_host_t = true;
1528
}
1529
1530
if (tflags & (TF_READS_LO | TF_WRITES_LO))
1531
{
1532
cf.host_lo =
1533
AllocateHostReg(((tflags & TF_READS_LO) ? HR_MODE_READ : 0u) | ((tflags & TF_WRITES_LO) ? HR_MODE_WRITE : 0u),
1534
HR_TYPE_CPU_REG, Reg::lo);
1535
cf.const_lo = false;
1536
cf.valid_host_lo = true;
1537
}
1538
1539
if (tflags & (TF_READS_HI | TF_WRITES_HI))
1540
{
1541
cf.host_hi =
1542
AllocateHostReg(((tflags & TF_READS_HI) ? HR_MODE_READ : 0u) | ((tflags & TF_WRITES_HI) ? HR_MODE_WRITE : 0u),
1543
HR_TYPE_CPU_REG, Reg::hi);
1544
cf.const_hi = false;
1545
cf.valid_host_hi = true;
1546
}
1547
1548
const HostRegAllocType write_type =
1549
(tflags & TF_LOAD_DELAY && EMULATE_LOAD_DELAYS) ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG;
1550
1551
if (tflags & TF_CAN_OVERFLOW && g_settings.cpu_recompiler_memory_exceptions)
1552
{
1553
// allocate a temp register for the result, then swap it back
1554
const u32 tempreg = AllocateHostReg(0, HR_TYPE_TEMP);
1555
;
1556
if (tflags & TF_WRITES_D)
1557
{
1558
cf.host_d = tempreg;
1559
cf.valid_host_d = true;
1560
}
1561
else if (tflags & TF_WRITES_T)
1562
{
1563
cf.host_t = tempreg;
1564
cf.valid_host_t = true;
1565
}
1566
1567
(this->*func)(cf);
1568
1569
if (tflags & TF_WRITES_D && rd != Reg::zero)
1570
{
1571
DeleteMIPSReg(rd, false);
1572
RenameHostReg(tempreg, HR_MODE_WRITE, write_type, rd);
1573
}
1574
else if (tflags & TF_WRITES_T && rt != Reg::zero)
1575
{
1576
DeleteMIPSReg(rt, false);
1577
RenameHostReg(tempreg, HR_MODE_WRITE, write_type, rt);
1578
}
1579
else
1580
{
1581
FreeHostReg(tempreg);
1582
}
1583
}
1584
else
1585
{
1586
if (tflags & TF_WRITES_D && rd != Reg::zero)
1587
{
1588
if (tflags & TF_READS_S && cf.valid_host_s && TryRenameMIPSReg(rd, rs, cf.host_s, Reg::count))
1589
cf.host_d = cf.host_s;
1590
else
1591
cf.host_d = AllocateHostReg(HR_MODE_WRITE, write_type, rd);
1592
cf.valid_host_d = true;
1593
}
1594
1595
if (tflags & TF_WRITES_T && rt != Reg::zero)
1596
{
1597
if (tflags & TF_READS_S && cf.valid_host_s && TryRenameMIPSReg(rt, rs, cf.host_s, Reg::count))
1598
cf.host_t = cf.host_s;
1599
else
1600
cf.host_t = AllocateHostReg(HR_MODE_WRITE, write_type, rt);
1601
cf.valid_host_t = true;
1602
}
1603
1604
(this->*func)(cf);
1605
}
1606
}
1607
1608
void CPU::Recompiler::Recompiler::CompileLoadStoreTemplate(
1609
void (Recompiler::*func)(CompileFlags, MemoryAccessSize, bool, bool, const std::optional<VirtualMemoryAddress>&),
1610
MemoryAccessSize size, bool store, bool sign, u32 tflags)
1611
{
1612
const Reg rs = inst->i.rs;
1613
const Reg rt = inst->i.rt;
1614
1615
if (tflags & TF_GTE_STALL)
1616
StallUntilGTEComplete();
1617
1618
CompileFlags cf = {};
1619
1620
if (tflags & TF_READS_S)
1621
{
1622
MarkRegsNeeded(HR_TYPE_CPU_REG, rs);
1623
cf.mips_s = static_cast<u8>(rs);
1624
}
1625
if (tflags & (TF_READS_T | TF_WRITES_T))
1626
{
1627
if (tflags & TF_READS_T)
1628
MarkRegsNeeded(HR_TYPE_CPU_REG, rt);
1629
cf.mips_t = static_cast<u8>(rt);
1630
}
1631
1632
UpdateHostRegCounters();
1633
1634
// constant address?
1635
std::optional<VirtualMemoryAddress> addr;
1636
std::optional<VirtualMemoryAddress> spec_addr;
1637
bool use_fastmem = CodeCache::IsUsingFastmem() && !g_settings.cpu_recompiler_memory_exceptions &&
1638
!SpecIsCacheIsolated() && !CodeCache::HasPreviouslyFaultedOnPC(m_current_instruction_pc);
1639
if (HasConstantReg(rs))
1640
{
1641
addr = GetConstantRegU32(rs) + inst->i.imm_sext32();
1642
spec_addr = addr;
1643
cf.const_s = true;
1644
1645
if (!Bus::CanUseFastmemForAddress(addr.value()))
1646
{
1647
DEBUG_LOG("Not using fastmem for {:08X}", addr.value());
1648
use_fastmem = false;
1649
}
1650
}
1651
else
1652
{
1653
spec_addr = SpecExec_LoadStoreAddr();
1654
if (use_fastmem && spec_addr.has_value() && !Bus::CanUseFastmemForAddress(spec_addr.value()))
1655
{
1656
DEBUG_LOG("Not using fastmem for speculative {:08X}", spec_addr.value());
1657
use_fastmem = false;
1658
}
1659
1660
if constexpr (HAS_MEMORY_OPERANDS)
1661
{
1662
// don't bother caching it since we're going to flush anyway
1663
// TODO: make less rubbish, if it's caller saved we don't need to flush...
1664
const std::optional<u32> hreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rs);
1665
if (hreg.has_value())
1666
{
1667
cf.valid_host_s = true;
1668
cf.host_s = hreg.value();
1669
}
1670
}
1671
else
1672
{
1673
// need rs in a register
1674
cf.host_s = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rs);
1675
cf.valid_host_s = true;
1676
}
1677
}
1678
1679
// reads T -> store, writes T -> load
1680
// for now, we defer the allocation to afterwards, because C call
1681
if (tflags & TF_READS_T)
1682
{
1683
if (HasConstantReg(rt))
1684
{
1685
cf.const_t = true;
1686
}
1687
else
1688
{
1689
if constexpr (HAS_MEMORY_OPERANDS)
1690
{
1691
const std::optional<u32> hreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt);
1692
if (hreg.has_value())
1693
{
1694
cf.valid_host_t = true;
1695
cf.host_t = hreg.value();
1696
}
1697
}
1698
else
1699
{
1700
cf.host_t = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt);
1701
cf.valid_host_t = true;
1702
}
1703
}
1704
}
1705
1706
// when not using fastmem, flush GTE completion cycle
1707
// otherwise we end up consuming more cycles, because we're only counting a single cycle for loads
1708
// and ram loads would have normally used up all the cycles the GTE was busy for
1709
if (!use_fastmem && !store)
1710
Flush(FLUSH_GTE_DONE_CYCLE);
1711
1712
(this->*func)(cf, size, sign, use_fastmem, addr);
1713
1714
if (store && !m_block_ended && !m_current_instruction_branch_delay_slot && spec_addr.has_value() &&
1715
GetSegmentForAddress(spec_addr.value()) != Segment::KSEG2)
1716
{
1717
// Get rid of physical aliases.
1718
const u32 phys_spec_addr = VirtualAddressToPhysical(spec_addr.value());
1719
if (phys_spec_addr >= VirtualAddressToPhysical(m_compiler_pc) &&
1720
phys_spec_addr < VirtualAddressToPhysical(m_block->pc + (m_block->size * sizeof(Instruction))))
1721
{
1722
WARNING_LOG("Instruction {:08X} speculatively writes to {:08X} inside block {:08X}-{:08X}. Truncating block.",
1723
m_current_instruction_pc, phys_spec_addr, m_block->pc,
1724
m_block->pc + (m_block->size * sizeof(Instruction)));
1725
TruncateBlock();
1726
}
1727
}
1728
}
1729
1730
void CPU::Recompiler::Recompiler::TruncateBlock()
1731
{
1732
m_block->size = ((m_current_instruction_pc - m_block->pc) / sizeof(Instruction)) + 1;
1733
iinfo->is_last_instruction = true;
1734
}
1735
1736
const TickCount* CPU::Recompiler::Recompiler::GetFetchMemoryAccessTimePtr() const
1737
{
1738
const TickCount* ptr = Bus::GetMemoryAccessTimePtr(VirtualAddressToPhysical(m_block->pc), MemoryAccessSize::Word);
1739
AssertMsg(ptr, "Address has dynamic fetch ticks");
1740
return ptr;
1741
}
1742
1743
void CPU::Recompiler::Recompiler::FlushForLoadStore(const std::optional<VirtualMemoryAddress>& address, bool store,
1744
bool use_fastmem)
1745
{
1746
if (use_fastmem)
1747
return;
1748
1749
// TODO: Stores don't need to flush GTE cycles...
1750
Flush(FLUSH_FOR_C_CALL | FLUSH_FOR_LOADSTORE);
1751
}
1752
1753
void CPU::Recompiler::Recompiler::CompileMoveRegTemplate(Reg dst, Reg src, bool pgxp_move)
1754
{
1755
if (dst == src || dst == Reg::zero)
1756
return;
1757
1758
if (HasConstantReg(src))
1759
{
1760
DeleteMIPSReg(dst, false);
1761
SetConstantReg(dst, GetConstantRegU32(src));
1762
}
1763
else
1764
{
1765
const u32 srcreg = AllocateHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, src);
1766
if (!TryRenameMIPSReg(dst, src, srcreg, Reg::count))
1767
{
1768
const u32 dstreg = AllocateHostReg(HR_MODE_WRITE, HR_TYPE_CPU_REG, dst);
1769
CopyHostReg(dstreg, srcreg);
1770
ClearHostRegNeeded(dstreg);
1771
}
1772
}
1773
1774
// TODO: This could be made better if we only did it for registers where there was a previous MFC2.
1775
if (g_settings.gpu_pgxp_enable && pgxp_move)
1776
{
1777
// might've been renamed, so use dst here
1778
GeneratePGXPCallWithMIPSRegs(reinterpret_cast<const void*>(&PGXP::CPU_MOVE_Packed), PGXP::PackMoveArgs(dst, src),
1779
dst);
1780
}
1781
}
1782
1783
void CPU::Recompiler::Recompiler::Compile_j()
1784
{
1785
const u32 newpc = (m_compiler_pc & UINT32_C(0xF0000000)) | (inst->j.target << 2);
1786
1787
// TODO: Delay slot swap.
1788
// We could also move the cycle commit back.
1789
CompileBranchDelaySlot();
1790
EndBlock(newpc, true);
1791
}
1792
1793
void CPU::Recompiler::Recompiler::Compile_jr_const(CompileFlags cf)
1794
{
1795
DebugAssert(HasConstantReg(cf.MipsS()));
1796
const u32 newpc = GetConstantRegU32(cf.MipsS());
1797
if (newpc & 3 && g_settings.cpu_recompiler_memory_exceptions)
1798
{
1799
EndBlockWithException(Exception::AdEL);
1800
return;
1801
}
1802
1803
CompileBranchDelaySlot();
1804
EndBlock(newpc, true);
1805
}
1806
1807
void CPU::Recompiler::Recompiler::Compile_jal()
1808
{
1809
const u32 newpc = (m_compiler_pc & UINT32_C(0xF0000000)) | (inst->j.target << 2);
1810
SetConstantReg(Reg::ra, GetBranchReturnAddress({}));
1811
CompileBranchDelaySlot();
1812
EndBlock(newpc, true);
1813
}
1814
1815
void CPU::Recompiler::Recompiler::Compile_jalr_const(CompileFlags cf)
1816
{
1817
DebugAssert(HasConstantReg(cf.MipsS()));
1818
const u32 newpc = GetConstantRegU32(cf.MipsS());
1819
if (MipsD() != Reg::zero)
1820
SetConstantReg(MipsD(), GetBranchReturnAddress({}));
1821
1822
CompileBranchDelaySlot();
1823
EndBlock(newpc, true);
1824
}
1825
1826
void CPU::Recompiler::Recompiler::Compile_syscall()
1827
{
1828
EndBlockWithException(Exception::Syscall);
1829
}
1830
1831
void CPU::Recompiler::Recompiler::Compile_break()
1832
{
1833
EndBlockWithException(Exception::BP);
1834
}
1835
1836
void CPU::Recompiler::Recompiler::Compile_b_const(CompileFlags cf)
1837
{
1838
DebugAssert(HasConstantReg(cf.MipsS()));
1839
1840
const u8 irt = static_cast<u8>(inst->i.rt.GetValue());
1841
const bool bgez = ConvertToBoolUnchecked(irt & u8(1));
1842
const bool link = (irt & u8(0x1E)) == u8(0x10);
1843
1844
const s32 rs = GetConstantRegS32(cf.MipsS());
1845
const bool taken = bgez ? (rs >= 0) : (rs < 0);
1846
const u32 taken_pc = GetConditionalBranchTarget(cf);
1847
1848
if (link)
1849
SetConstantReg(Reg::ra, GetBranchReturnAddress(cf));
1850
1851
CompileBranchDelaySlot();
1852
EndBlock(taken ? taken_pc : m_compiler_pc, true);
1853
}
1854
1855
void CPU::Recompiler::Recompiler::Compile_b(CompileFlags cf)
1856
{
1857
const u8 irt = static_cast<u8>(inst->i.rt.GetValue());
1858
const bool bgez = ConvertToBoolUnchecked(irt & u8(1));
1859
const bool link = (irt & u8(0x1E)) == u8(0x10);
1860
1861
if (link)
1862
SetConstantReg(Reg::ra, GetBranchReturnAddress(cf));
1863
1864
Compile_bxx(cf, bgez ? BranchCondition::GreaterEqualZero : BranchCondition::LessThanZero);
1865
}
1866
1867
void CPU::Recompiler::Recompiler::Compile_blez(CompileFlags cf)
1868
{
1869
Compile_bxx(cf, BranchCondition::LessEqualZero);
1870
}
1871
1872
void CPU::Recompiler::Recompiler::Compile_blez_const(CompileFlags cf)
1873
{
1874
Compile_bxx_const(cf, BranchCondition::LessEqualZero);
1875
}
1876
1877
void CPU::Recompiler::Recompiler::Compile_bgtz(CompileFlags cf)
1878
{
1879
Compile_bxx(cf, BranchCondition::GreaterThanZero);
1880
}
1881
1882
void CPU::Recompiler::Recompiler::Compile_bgtz_const(CompileFlags cf)
1883
{
1884
Compile_bxx_const(cf, BranchCondition::GreaterThanZero);
1885
}
1886
1887
void CPU::Recompiler::Recompiler::Compile_beq(CompileFlags cf)
1888
{
1889
Compile_bxx(cf, BranchCondition::Equal);
1890
}
1891
1892
void CPU::Recompiler::Recompiler::Compile_beq_const(CompileFlags cf)
1893
{
1894
Compile_bxx_const(cf, BranchCondition::Equal);
1895
}
1896
1897
void CPU::Recompiler::Recompiler::Compile_bne(CompileFlags cf)
1898
{
1899
Compile_bxx(cf, BranchCondition::NotEqual);
1900
}
1901
1902
void CPU::Recompiler::Recompiler::Compile_bne_const(CompileFlags cf)
1903
{
1904
Compile_bxx_const(cf, BranchCondition::NotEqual);
1905
}
1906
1907
void CPU::Recompiler::Recompiler::Compile_bxx_const(CompileFlags cf, BranchCondition cond)
1908
{
1909
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
1910
1911
bool taken;
1912
switch (cond)
1913
{
1914
case BranchCondition::Equal:
1915
taken = GetConstantRegU32(cf.MipsS()) == GetConstantRegU32(cf.MipsT());
1916
break;
1917
1918
case BranchCondition::NotEqual:
1919
taken = GetConstantRegU32(cf.MipsS()) != GetConstantRegU32(cf.MipsT());
1920
break;
1921
1922
case BranchCondition::GreaterThanZero:
1923
taken = GetConstantRegS32(cf.MipsS()) > 0;
1924
break;
1925
1926
case BranchCondition::GreaterEqualZero:
1927
taken = GetConstantRegS32(cf.MipsS()) >= 0;
1928
break;
1929
1930
case BranchCondition::LessThanZero:
1931
taken = GetConstantRegS32(cf.MipsS()) < 0;
1932
break;
1933
1934
case BranchCondition::LessEqualZero:
1935
taken = GetConstantRegS32(cf.MipsS()) <= 0;
1936
break;
1937
1938
default:
1939
Panic("Unhandled condition");
1940
return;
1941
}
1942
1943
const u32 taken_pc = GetConditionalBranchTarget(cf);
1944
CompileBranchDelaySlot();
1945
EndBlock(taken ? taken_pc : m_compiler_pc, true);
1946
}
1947
1948
void CPU::Recompiler::Recompiler::Compile_sll_const(CompileFlags cf)
1949
{
1950
DebugAssert(HasConstantReg(cf.MipsT()));
1951
SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) << inst->r.shamt);
1952
}
1953
1954
void CPU::Recompiler::Recompiler::Compile_srl_const(CompileFlags cf)
1955
{
1956
DebugAssert(HasConstantReg(cf.MipsT()));
1957
SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) >> inst->r.shamt);
1958
}
1959
1960
void CPU::Recompiler::Recompiler::Compile_sra_const(CompileFlags cf)
1961
{
1962
DebugAssert(HasConstantReg(cf.MipsT()));
1963
SetConstantReg(MipsD(), static_cast<u32>(GetConstantRegS32(cf.MipsT()) >> inst->r.shamt));
1964
}
1965
1966
void CPU::Recompiler::Recompiler::Compile_sllv_const(CompileFlags cf)
1967
{
1968
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
1969
SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) << (GetConstantRegU32(cf.MipsS()) & 0x1Fu));
1970
}
1971
1972
void CPU::Recompiler::Recompiler::Compile_srlv_const(CompileFlags cf)
1973
{
1974
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
1975
SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsT()) >> (GetConstantRegU32(cf.MipsS()) & 0x1Fu));
1976
}
1977
1978
void CPU::Recompiler::Recompiler::Compile_srav_const(CompileFlags cf)
1979
{
1980
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
1981
SetConstantReg(MipsD(), static_cast<u32>(GetConstantRegS32(cf.MipsT()) >> (GetConstantRegU32(cf.MipsS()) & 0x1Fu)));
1982
}
1983
1984
void CPU::Recompiler::Recompiler::Compile_and_const(CompileFlags cf)
1985
{
1986
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
1987
SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) & GetConstantRegU32(cf.MipsT()));
1988
}
1989
1990
void CPU::Recompiler::Recompiler::Compile_or_const(CompileFlags cf)
1991
{
1992
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
1993
SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) | GetConstantRegU32(cf.MipsT()));
1994
}
1995
1996
void CPU::Recompiler::Recompiler::Compile_xor_const(CompileFlags cf)
1997
{
1998
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
1999
SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) ^ GetConstantRegU32(cf.MipsT()));
2000
}
2001
2002
void CPU::Recompiler::Recompiler::Compile_nor_const(CompileFlags cf)
2003
{
2004
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
2005
SetConstantReg(MipsD(), ~(GetConstantRegU32(cf.MipsS()) | GetConstantRegU32(cf.MipsT())));
2006
}
2007
2008
void CPU::Recompiler::Recompiler::Compile_slt_const(CompileFlags cf)
2009
{
2010
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
2011
SetConstantReg(MipsD(), BoolToUInt32(GetConstantRegS32(cf.MipsS()) < GetConstantRegS32(cf.MipsT())));
2012
}
2013
2014
void CPU::Recompiler::Recompiler::Compile_sltu_const(CompileFlags cf)
2015
{
2016
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
2017
SetConstantReg(MipsD(), BoolToUInt32(GetConstantRegU32(cf.MipsS()) < GetConstantRegU32(cf.MipsT())));
2018
}
2019
2020
void CPU::Recompiler::Recompiler::Compile_mult_const(CompileFlags cf)
2021
{
2022
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
2023
2024
const u64 res =
2025
static_cast<u64>(static_cast<s64>(GetConstantRegS32(cf.MipsS())) * static_cast<s64>(GetConstantRegS32(cf.MipsT())));
2026
SetConstantReg(Reg::hi, static_cast<u32>(res >> 32));
2027
SetConstantReg(Reg::lo, static_cast<u32>(res));
2028
}
2029
2030
void CPU::Recompiler::Recompiler::Compile_multu_const(CompileFlags cf)
2031
{
2032
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
2033
2034
const u64 res = static_cast<u64>(GetConstantRegU32(cf.MipsS())) * static_cast<u64>(GetConstantRegU32(cf.MipsT()));
2035
SetConstantReg(Reg::hi, static_cast<u32>(res >> 32));
2036
SetConstantReg(Reg::lo, static_cast<u32>(res));
2037
}
2038
2039
void CPU::Recompiler::Recompiler::MIPSSignedDivide(s32 num, s32 denom, u32* lo, u32* hi)
2040
{
2041
if (denom == 0)
2042
{
2043
// divide by zero
2044
*lo = (num >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1);
2045
*hi = static_cast<u32>(num);
2046
}
2047
else if (static_cast<u32>(num) == UINT32_C(0x80000000) && denom == -1)
2048
{
2049
// unrepresentable
2050
*lo = UINT32_C(0x80000000);
2051
*hi = 0;
2052
}
2053
else
2054
{
2055
*lo = static_cast<u32>(num / denom);
2056
*hi = static_cast<u32>(num % denom);
2057
}
2058
}
2059
2060
void CPU::Recompiler::Recompiler::MIPSUnsignedDivide(u32 num, u32 denom, u32* lo, u32* hi)
2061
{
2062
if (denom == 0)
2063
{
2064
// divide by zero
2065
*lo = UINT32_C(0xFFFFFFFF);
2066
*hi = static_cast<u32>(num);
2067
}
2068
else
2069
{
2070
*lo = num / denom;
2071
*hi = num % denom;
2072
}
2073
}
2074
2075
void CPU::Recompiler::Recompiler::Compile_div_const(CompileFlags cf)
2076
{
2077
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
2078
2079
const s32 num = GetConstantRegS32(cf.MipsS());
2080
const s32 denom = GetConstantRegS32(cf.MipsT());
2081
2082
u32 lo, hi;
2083
MIPSSignedDivide(num, denom, &lo, &hi);
2084
2085
SetConstantReg(Reg::hi, hi);
2086
SetConstantReg(Reg::lo, lo);
2087
}
2088
2089
void CPU::Recompiler::Recompiler::Compile_divu_const(CompileFlags cf)
2090
{
2091
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
2092
2093
const u32 num = GetConstantRegU32(cf.MipsS());
2094
const u32 denom = GetConstantRegU32(cf.MipsT());
2095
2096
u32 lo, hi;
2097
MIPSUnsignedDivide(num, denom, &lo, &hi);
2098
2099
SetConstantReg(Reg::hi, hi);
2100
SetConstantReg(Reg::lo, lo);
2101
}
2102
2103
void CPU::Recompiler::Recompiler::Compile_add_const(CompileFlags cf)
2104
{
2105
// TODO: Overflow
2106
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
2107
if (MipsD() != Reg::zero)
2108
SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) + GetConstantRegU32(cf.MipsT()));
2109
}
2110
2111
void CPU::Recompiler::Recompiler::Compile_addu_const(CompileFlags cf)
2112
{
2113
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
2114
SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) + GetConstantRegU32(cf.MipsT()));
2115
}
2116
2117
void CPU::Recompiler::Recompiler::Compile_sub_const(CompileFlags cf)
2118
{
2119
// TODO: Overflow
2120
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
2121
if (MipsD() != Reg::zero)
2122
SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) - GetConstantRegU32(cf.MipsT()));
2123
}
2124
2125
void CPU::Recompiler::Recompiler::Compile_subu_const(CompileFlags cf)
2126
{
2127
DebugAssert(HasConstantReg(cf.MipsS()) && HasConstantReg(cf.MipsT()));
2128
SetConstantReg(MipsD(), GetConstantRegU32(cf.MipsS()) - GetConstantRegU32(cf.MipsT()));
2129
}
2130
2131
void CPU::Recompiler::Recompiler::Compile_addi_const(CompileFlags cf)
2132
{
2133
// TODO: Overflow
2134
DebugAssert(HasConstantReg(cf.MipsS()));
2135
if (cf.MipsT() != Reg::zero)
2136
SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) + inst->i.imm_sext32());
2137
}
2138
2139
void CPU::Recompiler::Recompiler::Compile_addiu_const(CompileFlags cf)
2140
{
2141
DebugAssert(HasConstantReg(cf.MipsS()));
2142
SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) + inst->i.imm_sext32());
2143
}
2144
2145
void CPU::Recompiler::Recompiler::Compile_slti_const(CompileFlags cf)
2146
{
2147
DebugAssert(HasConstantReg(cf.MipsS()));
2148
SetConstantReg(cf.MipsT(), BoolToUInt32(GetConstantRegS32(cf.MipsS()) < static_cast<s32>(inst->i.imm_sext32())));
2149
}
2150
2151
void CPU::Recompiler::Recompiler::Compile_sltiu_const(CompileFlags cf)
2152
{
2153
DebugAssert(HasConstantReg(cf.MipsS()));
2154
SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) < inst->i.imm_sext32());
2155
}
2156
2157
void CPU::Recompiler::Recompiler::Compile_andi_const(CompileFlags cf)
2158
{
2159
DebugAssert(HasConstantReg(cf.MipsS()));
2160
SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) & inst->i.imm_zext32());
2161
}
2162
2163
void CPU::Recompiler::Recompiler::Compile_ori_const(CompileFlags cf)
2164
{
2165
DebugAssert(HasConstantReg(cf.MipsS()));
2166
SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) | inst->i.imm_zext32());
2167
}
2168
2169
void CPU::Recompiler::Recompiler::Compile_xori_const(CompileFlags cf)
2170
{
2171
DebugAssert(HasConstantReg(cf.MipsS()));
2172
SetConstantReg(cf.MipsT(), GetConstantRegU32(cf.MipsS()) ^ inst->i.imm_zext32());
2173
}
2174
2175
void CPU::Recompiler::Recompiler::Compile_lui()
2176
{
2177
if (inst->i.rt == Reg::zero)
2178
return;
2179
2180
SetConstantReg(inst->i.rt, inst->i.imm_zext32() << 16);
2181
2182
if (g_settings.UsingPGXPCPUMode())
2183
GeneratePGXPCallWithMIPSRegs(reinterpret_cast<const void*>(&PGXP::CPU_LUI), inst->bits);
2184
}
2185
2186
static constexpr const std::array<std::pair<u32*, u32>, 16> s_cop0_table = {
2187
{{nullptr, 0x00000000u},
2188
{nullptr, 0x00000000u},
2189
{nullptr, 0x00000000u},
2190
{&CPU::g_state.cop0_regs.BPC, 0xffffffffu},
2191
{nullptr, 0},
2192
{&CPU::g_state.cop0_regs.BDA, 0xffffffffu},
2193
{&CPU::g_state.cop0_regs.TAR, 0x00000000u},
2194
{&CPU::g_state.cop0_regs.dcic.bits, CPU::Cop0Registers::DCIC::WRITE_MASK},
2195
{&CPU::g_state.cop0_regs.BadVaddr, 0x00000000u},
2196
{&CPU::g_state.cop0_regs.BDAM, 0xffffffffu},
2197
{nullptr, 0x00000000u},
2198
{&CPU::g_state.cop0_regs.BPCM, 0xffffffffu},
2199
{&CPU::g_state.cop0_regs.sr.bits, CPU::Cop0Registers::SR::WRITE_MASK},
2200
{&CPU::g_state.cop0_regs.cause.bits, CPU::Cop0Registers::CAUSE::WRITE_MASK},
2201
{&CPU::g_state.cop0_regs.EPC, 0x00000000u},
2202
{&CPU::g_state.cop0_regs.PRID, 0x00000000u}}};
2203
2204
u32* CPU::Recompiler::Recompiler::GetCop0RegPtr(Cop0Reg reg)
2205
{
2206
return (static_cast<u8>(reg) < s_cop0_table.size()) ? s_cop0_table[static_cast<u8>(reg)].first : nullptr;
2207
}
2208
2209
u32 CPU::Recompiler::Recompiler::GetCop0RegWriteMask(Cop0Reg reg)
2210
{
2211
return (static_cast<u8>(reg) < s_cop0_table.size()) ? s_cop0_table[static_cast<u8>(reg)].second : 0;
2212
}
2213
2214
void CPU::Recompiler::Recompiler::Compile_mfc0(CompileFlags cf)
2215
{
2216
const Cop0Reg r = static_cast<Cop0Reg>(MipsD());
2217
const u32* ptr = GetCop0RegPtr(r);
2218
if (!ptr)
2219
{
2220
ERROR_LOG("Read from unknown cop0 reg {}", static_cast<u32>(r));
2221
Compile_Fallback();
2222
return;
2223
}
2224
2225
DebugAssert(cf.valid_host_t);
2226
LoadHostRegFromCPUPointer(cf.host_t, ptr);
2227
}
2228
2229
std::pair<u32*, CPU::Recompiler::Recompiler::GTERegisterAccessAction>
2230
CPU::Recompiler::Recompiler::GetGTERegisterPointer(u32 index, bool writing)
2231
{
2232
if (!writing)
2233
{
2234
// Most GTE registers can be read directly. Handle the special cases here.
2235
if (index == 15) // SXY3
2236
{
2237
// mirror of SXY2
2238
index = 14;
2239
}
2240
2241
switch (index)
2242
{
2243
case 28: // IRGB
2244
case 29: // ORGB
2245
{
2246
return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::CallHandler);
2247
}
2248
break;
2249
2250
default:
2251
{
2252
return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::Direct);
2253
}
2254
break;
2255
}
2256
}
2257
else
2258
{
2259
switch (index)
2260
{
2261
case 1: // V0[z]
2262
case 3: // V1[z]
2263
case 5: // V2[z]
2264
case 8: // IR0
2265
case 9: // IR1
2266
case 10: // IR2
2267
case 11: // IR3
2268
case 36: // RT33
2269
case 44: // L33
2270
case 52: // LR33
2271
case 58: // H - sign-extended on read but zext on use
2272
case 59: // DQA
2273
case 61: // ZSF3
2274
case 62: // ZSF4
2275
{
2276
// sign-extend z component of vector registers
2277
return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::SignExtend16);
2278
}
2279
break;
2280
2281
case 7: // OTZ
2282
case 16: // SZ0
2283
case 17: // SZ1
2284
case 18: // SZ2
2285
case 19: // SZ3
2286
{
2287
// zero-extend unsigned values
2288
return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::ZeroExtend16);
2289
}
2290
break;
2291
2292
case 15: // SXY3
2293
{
2294
// writing to SXYP pushes to the FIFO
2295
return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::PushFIFO);
2296
}
2297
break;
2298
2299
case 28: // IRGB
2300
case 30: // LZCS
2301
case 63: // FLAG
2302
{
2303
return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::CallHandler);
2304
}
2305
2306
case 29: // ORGB
2307
case 31: // LZCR
2308
{
2309
// read-only registers
2310
return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::Ignore);
2311
}
2312
2313
default:
2314
{
2315
// written as-is, 2x16 or 1x32 bits
2316
return std::make_pair(&g_state.gte_regs.r32[index], GTERegisterAccessAction::Direct);
2317
}
2318
}
2319
}
2320
}
2321
2322
void CPU::Recompiler::Recompiler::AddGTETicks(TickCount ticks)
2323
{
2324
// TODO: check, int has +1 here
2325
m_gte_done_cycle = m_cycles + ticks;
2326
DEBUG_LOG("Adding {} GTE ticks", ticks);
2327
}
2328
2329
void CPU::Recompiler::Recompiler::StallUntilGTEComplete()
2330
{
2331
// TODO: hack to match old rec.. this may or may not be correct behavior
2332
// it's the difference between stalling before and after the current instruction's cycle
2333
DebugAssert(m_cycles > 0);
2334
m_cycles--;
2335
2336
if (!m_dirty_gte_done_cycle)
2337
{
2338
// simple case - in block scheduling
2339
if (m_gte_done_cycle > m_cycles)
2340
{
2341
DEBUG_LOG("Stalling for {} ticks from GTE", m_gte_done_cycle - m_cycles);
2342
m_cycles += (m_gte_done_cycle - m_cycles);
2343
}
2344
}
2345
else
2346
{
2347
// switch to in block scheduling
2348
DEBUG_LOG("Flushing GTE stall from state");
2349
Flush(FLUSH_GTE_STALL_FROM_STATE);
2350
}
2351
2352
m_cycles++;
2353
}
2354
2355
void CPU::Recompiler::BackpatchLoadStore(void* exception_pc, const CodeCache::LoadstoreBackpatchInfo& info)
2356
{
2357
// remove the cycles we added for the memory read, then take them off again after the backpatch
2358
// the normal rec path will add the ram read ticks later, so we need to take them off at the end
2359
DebugAssert(!info.is_load || info.cycles >= Bus::RAM_READ_TICKS);
2360
const TickCount cycles_to_add =
2361
static_cast<TickCount>(static_cast<u32>(info.cycles)) - (info.is_load ? Bus::RAM_READ_TICKS : 0);
2362
const TickCount cycles_to_remove = static_cast<TickCount>(static_cast<u32>(info.cycles));
2363
2364
void* thunk_address = CPU::CodeCache::GetFreeFarCodePointer();
2365
const u32 thunk_size = CompileLoadStoreThunk(
2366
thunk_address, CPU::CodeCache::GetFreeFarCodeSpace(), exception_pc, info.code_size, cycles_to_add, cycles_to_remove,
2367
info.gpr_bitmask, info.address_register, info.data_register, info.AccessSize(), info.is_signed, info.is_load);
2368
2369
#if 0
2370
Log_DebugPrint("**Backpatch Thunk**");
2371
CPU::CodeCache::DisassembleAndLogHostCode(thunk_address, thunk_size);
2372
#endif
2373
2374
// backpatch to a jump to the slowmem handler
2375
CPU::CodeCache::EmitJump(exception_pc, thunk_address, true);
2376
2377
CPU::CodeCache::CommitFarCode(thunk_size);
2378
}
2379
2380
void CPU::Recompiler::Recompiler::InitSpeculativeRegs()
2381
{
2382
for (u8 i = 0; i < static_cast<u8>(Reg::count); i++)
2383
m_speculative_constants.regs[i] = g_state.regs.r[i];
2384
2385
m_speculative_constants.cop0_sr = g_state.cop0_regs.sr.bits;
2386
m_speculative_constants.memory.clear();
2387
}
2388
2389
void CPU::Recompiler::Recompiler::InvalidateSpeculativeValues()
2390
{
2391
m_speculative_constants.regs.fill(std::nullopt);
2392
m_speculative_constants.memory.clear();
2393
m_speculative_constants.cop0_sr.reset();
2394
}
2395
2396
CPU::Recompiler::Recompiler::SpecValue CPU::Recompiler::Recompiler::SpecReadReg(Reg reg)
2397
{
2398
return m_speculative_constants.regs[static_cast<u8>(reg)];
2399
}
2400
2401
void CPU::Recompiler::Recompiler::SpecWriteReg(Reg reg, SpecValue value)
2402
{
2403
if (reg == Reg::zero)
2404
return;
2405
2406
m_speculative_constants.regs[static_cast<u8>(reg)] = value;
2407
}
2408
2409
void CPU::Recompiler::Recompiler::SpecInvalidateReg(Reg reg)
2410
{
2411
if (reg == Reg::zero)
2412
return;
2413
2414
m_speculative_constants.regs[static_cast<u8>(reg)].reset();
2415
}
2416
2417
void CPU::Recompiler::Recompiler::SpecCopyReg(Reg dst, Reg src)
2418
{
2419
if (dst == Reg::zero)
2420
return;
2421
2422
m_speculative_constants.regs[static_cast<u8>(dst)] = m_speculative_constants.regs[static_cast<u8>(src)];
2423
}
2424
2425
CPU::Recompiler::Recompiler::SpecValue CPU::Recompiler::Recompiler::SpecReadMem(VirtualMemoryAddress address)
2426
{
2427
auto it = m_speculative_constants.memory.find(address);
2428
if (it != m_speculative_constants.memory.end())
2429
return it->second;
2430
2431
u32 value;
2432
if ((address & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR)
2433
{
2434
u32 scratchpad_offset = address & SCRATCHPAD_OFFSET_MASK;
2435
std::memcpy(&value, &CPU::g_state.scratchpad[scratchpad_offset], sizeof(value));
2436
return value;
2437
}
2438
2439
if (CPU::CodeCache::AddressInRAM(address))
2440
{
2441
u32 ram_offset = address & Bus::g_ram_mask;
2442
std::memcpy(&value, &Bus::g_ram[ram_offset], sizeof(value));
2443
return value;
2444
}
2445
2446
return std::nullopt;
2447
}
2448
2449
void CPU::Recompiler::Recompiler::SpecWriteMem(u32 address, SpecValue value)
2450
{
2451
auto it = m_speculative_constants.memory.find(address);
2452
if (it != m_speculative_constants.memory.end())
2453
{
2454
it->second = value;
2455
return;
2456
}
2457
2458
if ((address & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR)
2459
m_speculative_constants.memory.emplace(address, value);
2460
else if (CPU::CodeCache::AddressInRAM(address))
2461
m_speculative_constants.memory.emplace(address & Bus::g_ram_mask, value);
2462
}
2463
2464
void CPU::Recompiler::Recompiler::SpecInvalidateMem(VirtualMemoryAddress address)
2465
{
2466
SpecWriteMem(address, std::nullopt);
2467
}
2468
2469
bool CPU::Recompiler::Recompiler::SpecIsCacheIsolated()
2470
{
2471
if (!m_speculative_constants.cop0_sr.has_value())
2472
return false;
2473
2474
const Cop0Registers::SR sr{m_speculative_constants.cop0_sr.value()};
2475
return sr.Isc;
2476
}
2477
2478
void CPU::Recompiler::Recompiler::SpecExec_b()
2479
{
2480
const bool link = (static_cast<u8>(inst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10);
2481
if (link)
2482
SpecWriteReg(Reg::ra, m_compiler_pc);
2483
}
2484
2485
void CPU::Recompiler::Recompiler::SpecExec_jal()
2486
{
2487
SpecWriteReg(Reg::ra, m_compiler_pc);
2488
}
2489
2490
void CPU::Recompiler::Recompiler::SpecExec_jalr()
2491
{
2492
SpecWriteReg(inst->r.rd, m_compiler_pc);
2493
}
2494
2495
void CPU::Recompiler::Recompiler::SpecExec_sll()
2496
{
2497
const SpecValue rt = SpecReadReg(inst->r.rt);
2498
if (rt.has_value())
2499
SpecWriteReg(inst->r.rd, rt.value() << inst->r.shamt);
2500
else
2501
SpecInvalidateReg(inst->r.rd);
2502
}
2503
2504
void CPU::Recompiler::Recompiler::SpecExec_srl()
2505
{
2506
const SpecValue rt = SpecReadReg(inst->r.rt);
2507
if (rt.has_value())
2508
SpecWriteReg(inst->r.rd, rt.value() >> inst->r.shamt);
2509
else
2510
SpecInvalidateReg(inst->r.rd);
2511
}
2512
2513
void CPU::Recompiler::Recompiler::SpecExec_sra()
2514
{
2515
const SpecValue rt = SpecReadReg(inst->r.rt);
2516
if (rt.has_value())
2517
SpecWriteReg(inst->r.rd, static_cast<u32>(static_cast<s32>(rt.value()) >> inst->r.shamt));
2518
else
2519
SpecInvalidateReg(inst->r.rd);
2520
}
2521
2522
void CPU::Recompiler::Recompiler::SpecExec_sllv()
2523
{
2524
const SpecValue rs = SpecReadReg(inst->r.rs);
2525
const SpecValue rt = SpecReadReg(inst->r.rt);
2526
if (rs.has_value() && rt.has_value())
2527
SpecWriteReg(inst->r.rd, rt.value() << (rs.value() & 0x1F));
2528
else
2529
SpecInvalidateReg(inst->r.rd);
2530
}
2531
2532
void CPU::Recompiler::Recompiler::SpecExec_srlv()
2533
{
2534
const SpecValue rs = SpecReadReg(inst->r.rs);
2535
const SpecValue rt = SpecReadReg(inst->r.rt);
2536
if (rs.has_value() && rt.has_value())
2537
SpecWriteReg(inst->r.rd, rt.value() >> (rs.value() & 0x1F));
2538
else
2539
SpecInvalidateReg(inst->r.rd);
2540
}
2541
2542
void CPU::Recompiler::Recompiler::SpecExec_srav()
2543
{
2544
const SpecValue rs = SpecReadReg(inst->r.rs);
2545
const SpecValue rt = SpecReadReg(inst->r.rt);
2546
if (rs.has_value() && rt.has_value())
2547
SpecWriteReg(inst->r.rd, static_cast<u32>(static_cast<s32>(rt.value()) >> (rs.value() & 0x1F)));
2548
else
2549
SpecInvalidateReg(inst->r.rd);
2550
}
2551
2552
void CPU::Recompiler::Recompiler::SpecExec_mult()
2553
{
2554
const SpecValue rs = SpecReadReg(inst->r.rs);
2555
const SpecValue rt = SpecReadReg(inst->r.rt);
2556
if (rs.has_value() && rt.has_value())
2557
{
2558
const u64 result =
2559
static_cast<u64>(static_cast<s64>(SignExtend64(rs.value())) * static_cast<s64>(SignExtend64(rt.value())));
2560
SpecWriteReg(Reg::hi, Truncate32(result >> 32));
2561
SpecWriteReg(Reg::lo, Truncate32(result));
2562
}
2563
else
2564
{
2565
SpecInvalidateReg(Reg::hi);
2566
SpecInvalidateReg(Reg::lo);
2567
}
2568
}
2569
2570
void CPU::Recompiler::Recompiler::SpecExec_multu()
2571
{
2572
const SpecValue rs = SpecReadReg(inst->r.rs);
2573
const SpecValue rt = SpecReadReg(inst->r.rt);
2574
if (rs.has_value() && rt.has_value())
2575
{
2576
const u64 result = ZeroExtend64(rs.value()) * SignExtend64(rt.value());
2577
SpecWriteReg(Reg::hi, Truncate32(result >> 32));
2578
SpecWriteReg(Reg::lo, Truncate32(result));
2579
}
2580
else
2581
{
2582
SpecInvalidateReg(Reg::hi);
2583
SpecInvalidateReg(Reg::lo);
2584
}
2585
}
2586
2587
void CPU::Recompiler::Recompiler::SpecExec_div()
2588
{
2589
const SpecValue rs = SpecReadReg(inst->r.rs);
2590
const SpecValue rt = SpecReadReg(inst->r.rt);
2591
if (rs.has_value() && rt.has_value())
2592
{
2593
u32 lo, hi;
2594
MIPSSignedDivide(static_cast<s32>(rs.value()), static_cast<s32>(rt.value()), &lo, &hi);
2595
SpecWriteReg(Reg::hi, hi);
2596
SpecWriteReg(Reg::lo, lo);
2597
}
2598
else
2599
{
2600
SpecInvalidateReg(Reg::hi);
2601
SpecInvalidateReg(Reg::lo);
2602
}
2603
}
2604
2605
void CPU::Recompiler::Recompiler::SpecExec_divu()
2606
{
2607
const SpecValue rs = SpecReadReg(inst->r.rs);
2608
const SpecValue rt = SpecReadReg(inst->r.rt);
2609
if (rs.has_value() && rt.has_value())
2610
{
2611
u32 lo, hi;
2612
MIPSUnsignedDivide(rs.value(), rt.value(), &lo, &hi);
2613
SpecWriteReg(Reg::hi, hi);
2614
SpecWriteReg(Reg::lo, lo);
2615
}
2616
else
2617
{
2618
SpecInvalidateReg(Reg::hi);
2619
SpecInvalidateReg(Reg::lo);
2620
}
2621
}
2622
2623
void CPU::Recompiler::Recompiler::SpecExec_add()
2624
{
2625
SpecExec_addu();
2626
}
2627
2628
void CPU::Recompiler::Recompiler::SpecExec_addu()
2629
{
2630
const SpecValue rs = SpecReadReg(inst->r.rs);
2631
const SpecValue rt = SpecReadReg(inst->r.rt);
2632
if (rs.has_value() && rt.has_value())
2633
SpecWriteReg(inst->r.rd, rs.value() + rt.value());
2634
else
2635
SpecInvalidateReg(inst->r.rd);
2636
}
2637
2638
void CPU::Recompiler::Recompiler::SpecExec_sub()
2639
{
2640
SpecExec_subu();
2641
}
2642
2643
void CPU::Recompiler::Recompiler::SpecExec_subu()
2644
{
2645
const SpecValue rs = SpecReadReg(inst->r.rs);
2646
const SpecValue rt = SpecReadReg(inst->r.rt);
2647
if (rs.has_value() && rt.has_value())
2648
SpecWriteReg(inst->r.rd, rs.value() - rt.value());
2649
else
2650
SpecInvalidateReg(inst->r.rd);
2651
}
2652
2653
void CPU::Recompiler::Recompiler::SpecExec_and()
2654
{
2655
const SpecValue rs = SpecReadReg(inst->r.rs);
2656
const SpecValue rt = SpecReadReg(inst->r.rt);
2657
if (rs.has_value() && rt.has_value())
2658
SpecWriteReg(inst->r.rd, rs.value() & rt.value());
2659
else
2660
SpecInvalidateReg(inst->r.rd);
2661
}
2662
2663
void CPU::Recompiler::Recompiler::SpecExec_or()
2664
{
2665
const SpecValue rs = SpecReadReg(inst->r.rs);
2666
const SpecValue rt = SpecReadReg(inst->r.rt);
2667
if (rs.has_value() && rt.has_value())
2668
SpecWriteReg(inst->r.rd, rs.value() | rt.value());
2669
else
2670
SpecInvalidateReg(inst->r.rd);
2671
}
2672
2673
void CPU::Recompiler::Recompiler::SpecExec_xor()
2674
{
2675
const SpecValue rs = SpecReadReg(inst->r.rs);
2676
const SpecValue rt = SpecReadReg(inst->r.rt);
2677
if (rs.has_value() && rt.has_value())
2678
SpecWriteReg(inst->r.rd, rs.value() ^ rt.value());
2679
else
2680
SpecInvalidateReg(inst->r.rd);
2681
}
2682
2683
void CPU::Recompiler::Recompiler::SpecExec_nor()
2684
{
2685
const SpecValue rs = SpecReadReg(inst->r.rs);
2686
const SpecValue rt = SpecReadReg(inst->r.rt);
2687
if (rs.has_value() && rt.has_value())
2688
SpecWriteReg(inst->r.rd, ~(rs.value() | rt.value()));
2689
else
2690
SpecInvalidateReg(inst->r.rd);
2691
}
2692
2693
void CPU::Recompiler::Recompiler::SpecExec_slt()
2694
{
2695
const SpecValue rs = SpecReadReg(inst->r.rs);
2696
const SpecValue rt = SpecReadReg(inst->r.rt);
2697
if (rs.has_value() && rt.has_value())
2698
SpecWriteReg(inst->r.rd, BoolToUInt32(static_cast<s32>(rs.value()) < static_cast<s32>(rt.value())));
2699
else
2700
SpecInvalidateReg(inst->r.rd);
2701
}
2702
2703
void CPU::Recompiler::Recompiler::SpecExec_sltu()
2704
{
2705
const SpecValue rs = SpecReadReg(inst->r.rs);
2706
const SpecValue rt = SpecReadReg(inst->r.rt);
2707
if (rs.has_value() && rt.has_value())
2708
SpecWriteReg(inst->r.rd, BoolToUInt32(rs.value() < rt.value()));
2709
else
2710
SpecInvalidateReg(inst->r.rd);
2711
}
2712
2713
void CPU::Recompiler::Recompiler::SpecExec_addi()
2714
{
2715
SpecExec_addiu();
2716
}
2717
2718
void CPU::Recompiler::Recompiler::SpecExec_addiu()
2719
{
2720
const SpecValue rs = SpecReadReg(inst->i.rs);
2721
if (rs.has_value())
2722
SpecWriteReg(inst->i.rt, rs.value() + inst->i.imm_sext32());
2723
else
2724
SpecInvalidateReg(inst->i.rt);
2725
}
2726
2727
void CPU::Recompiler::Recompiler::SpecExec_slti()
2728
{
2729
const SpecValue rs = SpecReadReg(inst->i.rs);
2730
if (rs.has_value())
2731
SpecWriteReg(inst->i.rt, BoolToUInt32(static_cast<s32>(rs.value()) < static_cast<s32>(inst->i.imm_sext32())));
2732
else
2733
SpecInvalidateReg(inst->i.rt);
2734
}
2735
2736
void CPU::Recompiler::Recompiler::SpecExec_sltiu()
2737
{
2738
const SpecValue rs = SpecReadReg(inst->i.rs);
2739
if (rs.has_value())
2740
SpecWriteReg(inst->i.rt, BoolToUInt32(rs.value() < inst->i.imm_sext32()));
2741
else
2742
SpecInvalidateReg(inst->i.rt);
2743
}
2744
2745
void CPU::Recompiler::Recompiler::SpecExec_andi()
2746
{
2747
const SpecValue rs = SpecReadReg(inst->i.rs);
2748
if (rs.has_value())
2749
SpecWriteReg(inst->i.rt, rs.value() & inst->i.imm_zext32());
2750
else
2751
SpecInvalidateReg(inst->i.rt);
2752
}
2753
2754
void CPU::Recompiler::Recompiler::SpecExec_ori()
2755
{
2756
const SpecValue rs = SpecReadReg(inst->i.rs);
2757
if (rs.has_value())
2758
SpecWriteReg(inst->i.rt, rs.value() | inst->i.imm_zext32());
2759
else
2760
SpecInvalidateReg(inst->i.rt);
2761
}
2762
2763
void CPU::Recompiler::Recompiler::SpecExec_xori()
2764
{
2765
const SpecValue rs = SpecReadReg(inst->i.rs);
2766
if (rs.has_value())
2767
SpecWriteReg(inst->i.rt, rs.value() ^ inst->i.imm_zext32());
2768
else
2769
SpecInvalidateReg(inst->i.rt);
2770
}
2771
2772
void CPU::Recompiler::Recompiler::SpecExec_lui()
2773
{
2774
SpecWriteReg(inst->i.rt, inst->i.imm_zext32() << 16);
2775
}
2776
2777
CPU::Recompiler::Recompiler::SpecValue CPU::Recompiler::Recompiler::SpecExec_LoadStoreAddr()
2778
{
2779
const SpecValue rs = SpecReadReg(inst->i.rs);
2780
return rs.has_value() ? (rs.value() + inst->i.imm_sext32()) : rs;
2781
}
2782
2783
void CPU::Recompiler::Recompiler::SpecExec_lxx(MemoryAccessSize size, bool sign)
2784
{
2785
const SpecValue addr = SpecExec_LoadStoreAddr();
2786
SpecValue val;
2787
if (!addr.has_value() || !(val = SpecReadMem(addr.value())).has_value())
2788
{
2789
SpecInvalidateReg(inst->i.rt);
2790
return;
2791
}
2792
2793
switch (size)
2794
{
2795
case MemoryAccessSize::Byte:
2796
val = sign ? SignExtend32(static_cast<u8>(val.value())) : ZeroExtend32(static_cast<u8>(val.value()));
2797
break;
2798
2799
case MemoryAccessSize::HalfWord:
2800
val = sign ? SignExtend32(static_cast<u16>(val.value())) : ZeroExtend32(static_cast<u16>(val.value()));
2801
break;
2802
2803
case MemoryAccessSize::Word:
2804
break;
2805
2806
default:
2807
UnreachableCode();
2808
}
2809
2810
SpecWriteReg(inst->r.rt, val);
2811
}
2812
2813
void CPU::Recompiler::Recompiler::SpecExec_lwx(bool lwr)
2814
{
2815
// TODO
2816
SpecInvalidateReg(inst->i.rt);
2817
}
2818
2819
void CPU::Recompiler::Recompiler::SpecExec_sxx(MemoryAccessSize size)
2820
{
2821
const SpecValue addr = SpecExec_LoadStoreAddr();
2822
if (!addr.has_value())
2823
return;
2824
2825
SpecValue rt = SpecReadReg(inst->i.rt);
2826
if (rt.has_value())
2827
{
2828
switch (size)
2829
{
2830
case MemoryAccessSize::Byte:
2831
rt = ZeroExtend32(static_cast<u8>(rt.value()));
2832
break;
2833
2834
case MemoryAccessSize::HalfWord:
2835
rt = ZeroExtend32(static_cast<u16>(rt.value()));
2836
break;
2837
2838
case MemoryAccessSize::Word:
2839
break;
2840
2841
default:
2842
UnreachableCode();
2843
}
2844
}
2845
2846
SpecWriteMem(addr.value(), rt);
2847
}
2848
2849
void CPU::Recompiler::Recompiler::SpecExec_swx(bool swr)
2850
{
2851
const SpecValue addr = SpecExec_LoadStoreAddr();
2852
if (addr.has_value())
2853
SpecInvalidateMem(addr.value() & ~3u);
2854
}
2855
2856
void CPU::Recompiler::Recompiler::SpecExec_swc2()
2857
{
2858
const SpecValue addr = SpecExec_LoadStoreAddr();
2859
if (addr.has_value())
2860
SpecInvalidateMem(addr.value());
2861
}
2862
2863
void CPU::Recompiler::Recompiler::SpecExec_mfc0()
2864
{
2865
const Cop0Reg rd = static_cast<Cop0Reg>(inst->r.rd.GetValue());
2866
if (rd != Cop0Reg::SR)
2867
{
2868
SpecInvalidateReg(inst->r.rt);
2869
return;
2870
}
2871
2872
SpecWriteReg(inst->r.rt, m_speculative_constants.cop0_sr);
2873
}
2874
2875
void CPU::Recompiler::Recompiler::SpecExec_mtc0()
2876
{
2877
const Cop0Reg rd = static_cast<Cop0Reg>(inst->r.rd.GetValue());
2878
if (rd != Cop0Reg::SR || !m_speculative_constants.cop0_sr.has_value())
2879
return;
2880
2881
SpecValue val = SpecReadReg(inst->r.rt);
2882
if (val.has_value())
2883
{
2884
constexpr u32 mask = Cop0Registers::SR::WRITE_MASK;
2885
val = (m_speculative_constants.cop0_sr.value() & mask) | (val.value() & mask);
2886
}
2887
2888
m_speculative_constants.cop0_sr = val;
2889
}
2890
2891
void CPU::Recompiler::Recompiler::SpecExec_rfe()
2892
{
2893
if (!m_speculative_constants.cop0_sr.has_value())
2894
return;
2895
2896
const u32 val = m_speculative_constants.cop0_sr.value();
2897
m_speculative_constants.cop0_sr = (val & UINT32_C(0b110000)) | ((val & UINT32_C(0b111111)) >> 2);
2898
}
2899
2900