Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/core/cpu_pgxp.cpp
4212 views
1
// SPDX-FileCopyrightText: 2016 iCatButler, 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
//
4
// This file has been completely rewritten over the years compared to the original PCSXR-PGXP release.
5
// No original code remains. The original copyright notice is included above for historical purposes.
6
//
7
8
#include "cpu_pgxp.h"
9
#include "bus.h"
10
#include "cpu_core.h"
11
#include "cpu_core_private.h"
12
#include "cpu_disasm.h"
13
#include "gpu_types.h"
14
#include "settings.h"
15
16
#include "util/gpu_device.h"
17
18
#include "common/assert.h"
19
#include "common/log.h"
20
21
#include <climits>
22
#include <cmath>
23
24
LOG_CHANNEL(CPU);
25
26
// #define LOG_VALUES 1
27
// #define LOG_LOOKUPS 1
28
29
// TODO: Don't update flags on Validate(), instead return it.
30
31
namespace CPU::PGXP {
32
33
enum : u32
34
{
35
VERTEX_CACHE_WIDTH = 2048,
36
VERTEX_CACHE_HEIGHT = 2048,
37
VERTEX_CACHE_SIZE = VERTEX_CACHE_WIDTH * VERTEX_CACHE_HEIGHT,
38
PGXP_MEM_SIZE = (static_cast<u32>(Bus::RAM_8MB_SIZE) + static_cast<u32>(CPU::SCRATCHPAD_SIZE)) / 4,
39
PGXP_MEM_SCRATCH_OFFSET = Bus::RAM_8MB_SIZE / 4,
40
};
41
42
enum : u32
43
{
44
VALID_X = (1u << 0),
45
VALID_Y = (1u << 1),
46
VALID_Z = (1u << 2),
47
VALID_LOWZ = (1u << 16), // Valid Z from the low part of a 32-bit value.
48
VALID_HIGHZ = (1u << 17), // Valid Z from the high part of a 32-bit value.
49
VALID_TAINTED_Z = (1u << 31), // X/Y has been changed, Z may not be accurate.
50
51
VALID_XY = (VALID_X | VALID_Y),
52
VALID_XYZ = (VALID_X | VALID_Y | VALID_Z),
53
VALID_ALL = (VALID_X | VALID_Y | VALID_Z),
54
};
55
56
#define LOWORD_U16(val) (static_cast<u16>(val))
57
#define HIWORD_U16(val) (static_cast<u16>(static_cast<u32>(val) >> 16))
58
#define LOWORD_S16(val) (static_cast<s16>(static_cast<u16>(val)))
59
#define HIWORD_S16(val) (static_cast<s16>(static_cast<u16>(static_cast<u32>(val) >> 16)))
60
#define SET_LOWORD(val, loword) ((static_cast<u32>(val) & 0xFFFF0000u) | static_cast<u32>(static_cast<u16>(loword)))
61
#define SET_HIWORD(val, hiword) ((static_cast<u32>(val) & 0x0000FFFFu) | (static_cast<u32>(hiword) << 16))
62
63
static double f16Sign(double val);
64
static double f16Unsign(double val);
65
static double f16Overflow(double val);
66
67
static void CacheVertex(u32 value, const PGXPValue& vertex);
68
static PGXPValue* GetCachedVertex(u32 value);
69
70
static float TruncateVertexPosition(float p);
71
static bool IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y);
72
73
static PGXPValue& GetRdValue(Instruction instr);
74
static PGXPValue& GetRtValue(Instruction instr);
75
static PGXPValue& ValidateAndGetRtValue(Instruction instr, u32 rtVal);
76
static PGXPValue& ValidateAndGetRsValue(Instruction instr, u32 rsVal);
77
static void SetRtValue(Instruction instr, const PGXPValue& val);
78
static void SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal);
79
static PGXPValue& GetSXY0();
80
static PGXPValue& GetSXY1();
81
static PGXPValue& GetSXY2();
82
static PGXPValue& PushSXY();
83
84
static PGXPValue* GetPtr(u32 addr);
85
static const PGXPValue& ValidateAndLoadMem(u32 addr, u32 value);
86
static void ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign);
87
88
static void CPU_MTC2(u32 reg, const PGXPValue& value, u32 val);
89
static void CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal);
90
static void CPU_SLL(Instruction instr, u32 rtVal, u32 sh);
91
static void CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable);
92
93
static void WriteMem(u32 addr, const PGXPValue& value);
94
static void WriteMem16(u32 addr, const PGXPValue& value);
95
96
static void CopyZIfMissing(PGXPValue& dst, const PGXPValue& src);
97
static void SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1, const PGXPValue& src2);
98
99
#ifdef LOG_VALUES
100
static void LogInstruction(u32 pc, Instruction instr);
101
static void LogValue(const char* name, u32 rval, const PGXPValue* val);
102
static void LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val);
103
104
// clang-format off
105
#define LOG_VALUES_NV() do { LogInstruction(CPU::g_state.current_instruction_pc, instr); } while (0)
106
#define LOG_VALUES_1(name, rval, val) do { LogInstruction(CPU::g_state.current_instruction_pc, instr); LogValue(name, rval, val); } while (0)
107
#define LOG_VALUES_C1(rnum, rval) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(rnum)), rval, &g_state.pgxp_gpr[static_cast<u32>(rnum)]); } while(0)
108
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r1num)), r1val, &g_state.pgxp_gpr[static_cast<u32>(r1num)]); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r2num)), r2val, &g_state.pgxp_gpr[static_cast<u32>(r2num)]); } while(0)
109
#define LOG_VALUES_LOAD(addr, val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(TinyString::from_format("MEM[{:08X}]", addr).c_str(), val, GetPtr(addr)); } while(0)
110
#define LOG_VALUES_STORE(rnum, rval, addr) do { LOG_VALUES_C1(rnum, rval); std::fprintf(s_log, " addr=%08X", addr); } while(0)
111
#else
112
#define LOG_VALUES_NV() (void)0
113
#define LOG_VALUES_1(name, rval, val) (void)0
114
#define LOG_VALUES_C1(rnum, rval) (void)0
115
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) (void)0
116
#define LOG_VALUES_LOAD(addr, val) (void)0
117
#define LOG_VALUES_STORE(rnum, rval, addr) (void)0
118
#endif
119
// clang-format on
120
121
static constexpr const PGXPValue INVALID_VALUE = {};
122
123
static PGXPValue* s_mem = nullptr;
124
static PGXPValue* s_vertex_cache = nullptr;
125
126
#ifdef LOG_VALUES
127
static std::FILE* s_log;
128
#endif
129
} // namespace CPU::PGXP
130
131
void CPU::PGXP::Initialize()
132
{
133
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
134
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
135
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
136
137
if (!s_mem)
138
{
139
s_mem = static_cast<PGXPValue*>(std::calloc(PGXP_MEM_SIZE, sizeof(PGXPValue)));
140
if (!s_mem)
141
Panic("Failed to allocate PGXP memory");
142
}
143
144
if (g_settings.gpu_pgxp_vertex_cache && !s_vertex_cache)
145
{
146
s_vertex_cache = static_cast<PGXPValue*>(std::calloc(VERTEX_CACHE_SIZE, sizeof(PGXPValue)));
147
if (!s_vertex_cache)
148
{
149
ERROR_LOG("Failed to allocate memory for vertex cache, disabling.");
150
g_settings.gpu_pgxp_vertex_cache = false;
151
}
152
}
153
154
if (s_vertex_cache)
155
std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
156
}
157
158
void CPU::PGXP::Reset()
159
{
160
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
161
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
162
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
163
164
if (s_mem)
165
std::memset(s_mem, 0, sizeof(PGXPValue) * PGXP_MEM_SIZE);
166
167
if (g_settings.gpu_pgxp_vertex_cache && s_vertex_cache)
168
std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
169
}
170
171
void CPU::PGXP::Shutdown()
172
{
173
if (s_vertex_cache)
174
{
175
std::free(s_vertex_cache);
176
s_vertex_cache = nullptr;
177
}
178
if (s_mem)
179
{
180
std::free(s_mem);
181
s_mem = nullptr;
182
}
183
184
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
185
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
186
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
187
}
188
189
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Sign(double val)
190
{
191
const s32 s = static_cast<s32>(static_cast<s64>(val * (USHRT_MAX + 1)));
192
return static_cast<double>(s) / static_cast<double>(USHRT_MAX + 1);
193
}
194
195
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Unsign(double val)
196
{
197
return (val >= 0) ? val : (val + (USHRT_MAX + 1));
198
}
199
200
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Overflow(double val)
201
{
202
return static_cast<double>(static_cast<s64>(val) >> 16);
203
}
204
205
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRdValue(Instruction instr)
206
{
207
return g_state.pgxp_gpr[static_cast<u8>(instr.r.rd.GetValue())];
208
}
209
210
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRtValue(Instruction instr)
211
{
212
return g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
213
}
214
215
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRtValue(Instruction instr, u32 rtVal)
216
{
217
PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
218
ret.Validate(rtVal);
219
return ret;
220
}
221
222
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRsValue(Instruction instr, u32 rsVal)
223
{
224
PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rs.GetValue())];
225
ret.Validate(rsVal);
226
return ret;
227
}
228
229
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val)
230
{
231
g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())] = val;
232
}
233
234
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal)
235
{
236
PGXPValue& prtVal = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
237
prtVal = val;
238
prtVal.value = rtVal;
239
}
240
241
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY0()
242
{
243
return g_state.pgxp_gte[12];
244
}
245
246
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY1()
247
{
248
return g_state.pgxp_gte[13];
249
}
250
251
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY2()
252
{
253
return g_state.pgxp_gte[14];
254
}
255
256
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::PushSXY()
257
{
258
g_state.pgxp_gte[12] = g_state.pgxp_gte[13];
259
g_state.pgxp_gte[13] = g_state.pgxp_gte[14];
260
return g_state.pgxp_gte[14];
261
}
262
263
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetPtr(u32 addr)
264
{
265
#if 0
266
if ((addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) >= 0x0017A2B4 &&
267
(addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) <= 0x0017A2B4)
268
__debugbreak();
269
#endif
270
271
if ((addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR)
272
return &s_mem[PGXP_MEM_SCRATCH_OFFSET + ((addr & SCRATCHPAD_OFFSET_MASK) >> 2)];
273
274
// Don't worry about >512MB here for performance reasons.
275
const u32 paddr = (addr & KSEG_MASK);
276
if (paddr < Bus::RAM_MIRROR_END)
277
return &s_mem[(paddr & Bus::g_ram_mask) >> 2];
278
else
279
return nullptr;
280
}
281
282
ALWAYS_INLINE_RELEASE const CPU::PGXPValue& CPU::PGXP::ValidateAndLoadMem(u32 addr, u32 value)
283
{
284
PGXPValue* pMem = GetPtr(addr);
285
if (!pMem) [[unlikely]]
286
return INVALID_VALUE;
287
288
pMem->Validate(value);
289
return *pMem;
290
}
291
292
ALWAYS_INLINE_RELEASE void CPU::PGXP::ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign)
293
{
294
PGXPValue* pMem = GetPtr(addr);
295
if (!pMem) [[unlikely]]
296
{
297
dest = INVALID_VALUE;
298
return;
299
}
300
301
// determine if high or low word
302
const bool hiword = ((addr & 2) != 0);
303
304
// only validate the component we're interested in
305
pMem->flags = hiword ?
306
((Truncate16(pMem->value >> 16) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_Y)) :
307
((Truncate16(pMem->value) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_X));
308
309
// copy whole value
310
dest = *pMem;
311
312
// if high word then shift
313
if (hiword)
314
{
315
dest.x = dest.y;
316
dest.flags = (dest.flags & ~VALID_X) | ((dest.flags & VALID_Y) >> 1);
317
}
318
319
// only set y as valid if x is also valid.. don't want to make fake values
320
if (dest.flags & VALID_X)
321
{
322
dest.y = (dest.x < 0) ? -1.0f * sign : 0.0f;
323
dest.flags |= VALID_Y;
324
}
325
else
326
{
327
dest.y = 0.0f;
328
dest.flags &= ~VALID_Y;
329
}
330
331
dest.value = value;
332
}
333
334
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem(u32 addr, const PGXPValue& value)
335
{
336
PGXPValue* pMem = GetPtr(addr);
337
if (!pMem) [[unlikely]]
338
return;
339
340
*pMem = value;
341
pMem->flags |= VALID_LOWZ | VALID_HIGHZ;
342
}
343
344
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem16(u32 addr, const PGXPValue& value)
345
{
346
PGXPValue* dest = GetPtr(addr);
347
if (!dest) [[unlikely]]
348
return;
349
350
// determine if high or low word
351
const bool hiword = ((addr & 2) != 0);
352
if (hiword)
353
{
354
dest->y = value.x;
355
dest->flags = (dest->flags & ~VALID_Y) | ((value.flags & VALID_X) << 1);
356
dest->value = (dest->value & UINT32_C(0x0000FFFF)) | (value.value << 16);
357
}
358
else
359
{
360
dest->x = value.x;
361
dest->flags = (dest->flags & ~VALID_X) | (value.flags & VALID_X);
362
dest->value = (dest->value & UINT32_C(0xFFFF0000)) | (value.value & UINT32_C(0x0000FFFF));
363
}
364
365
// overwrite z/w if valid
366
// TODO: Check modified
367
if (value.flags & VALID_Z)
368
{
369
dest->z = value.z;
370
dest->flags |= VALID_Z | (hiword ? VALID_HIGHZ : VALID_LOWZ);
371
}
372
else
373
{
374
dest->flags &= hiword ? ~VALID_HIGHZ : ~VALID_LOWZ;
375
if (dest->flags & VALID_Z && !(dest->flags & (VALID_HIGHZ | VALID_LOWZ)))
376
dest->flags &= ~VALID_Z;
377
}
378
}
379
380
ALWAYS_INLINE_RELEASE void CPU::PGXP::CopyZIfMissing(PGXPValue& dst, const PGXPValue& src)
381
{
382
dst.z = (dst.flags & VALID_Z) ? dst.z : src.z;
383
dst.flags |= (src.flags & VALID_Z);
384
}
385
386
ALWAYS_INLINE_RELEASE void CPU::PGXP::SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1,
387
const PGXPValue& src2)
388
{
389
// Prefer src2 if src1 is missing Z, or is potentially an imprecise value, when src2 is precise.
390
dst_z = (!(src1.flags & VALID_Z) ||
391
(src1.flags & VALID_TAINTED_Z && (src2.flags & (VALID_Z | VALID_TAINTED_Z)) == VALID_Z)) ?
392
src2.z :
393
src1.z;
394
dst_flags |= ((src1.flags | src2.flags) & VALID_Z);
395
}
396
397
#ifdef LOG_VALUES
398
void CPU::PGXP::LogInstruction(u32 pc, Instruction instr)
399
{
400
if (!s_log) [[unlikely]]
401
{
402
s_log = std::fopen("pgxp.log", "wb");
403
}
404
else
405
{
406
std::fflush(s_log);
407
std::fputc('\n', s_log);
408
}
409
410
SmallString str;
411
DisassembleInstruction(&str, pc, instr.bits);
412
std::fprintf(s_log, "%08X %08X %-20s", pc, instr.bits, str.c_str());
413
}
414
415
void CPU::PGXP::LogValue(const char* name, u32 rval, const PGXPValue* val)
416
{
417
if (!s_log) [[unlikely]]
418
return;
419
420
SmallString str;
421
LogValueStr(str, name, rval, val);
422
std::fprintf(s_log, " %s", str.c_str());
423
}
424
425
void CPU::PGXP::LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val)
426
{
427
str.append_format("{}=[{:08X}", name, rval);
428
if (!val)
429
{
430
str.append(", NULL]");
431
}
432
else
433
{
434
if (val->value != rval)
435
str.append_format(", PGXP{:08X}", val->value);
436
437
str.append_format(", {{{},{},{}}}", val->x, val->y, val->z);
438
439
if (val->flags & VALID_ALL)
440
{
441
str.append(", valid=");
442
if (val->flags & VALID_X)
443
str.append('X');
444
if (val->flags & VALID_Y)
445
str.append('Y');
446
if (val->flags & VALID_Z)
447
str.append('Z');
448
}
449
450
// if (val->flags & VALID_TAINTED_Z)
451
// str.append(", tainted");
452
453
str.append(']');
454
}
455
}
456
457
#endif
458
459
void CPU::PGXP::GTE_RTPS(float x, float y, float z, u32 value)
460
{
461
PGXPValue& pvalue = PushSXY();
462
pvalue.x = x;
463
pvalue.y = y;
464
pvalue.z = z;
465
pvalue.value = value;
466
pvalue.flags = VALID_ALL;
467
468
if (g_settings.gpu_pgxp_vertex_cache)
469
CacheVertex(value, pvalue);
470
}
471
472
bool CPU::PGXP::GTE_HasPreciseVertices(u32 sxy0, u32 sxy1, u32 sxy2)
473
{
474
PGXPValue& SXY0 = GetSXY0();
475
SXY0.Validate(sxy0);
476
PGXPValue& SXY1 = GetSXY1();
477
SXY1.Validate(sxy1);
478
PGXPValue& SXY2 = GetSXY2();
479
SXY2.Validate(sxy2);
480
481
// Don't use accurate clipping for game-constructed values, which don't have a valid Z.
482
return (((SXY0.flags & SXY1.flags & SXY2.flags & VALID_XYZ) == VALID_XYZ));
483
}
484
485
float CPU::PGXP::GTE_NCLIP()
486
{
487
const PGXPValue& SXY0 = GetSXY0();
488
const PGXPValue& SXY1 = GetSXY1();
489
const PGXPValue& SXY2 = GetSXY2();
490
float nclip = ((SXY0.x * SXY1.y) + (SXY1.x * SXY2.y) + (SXY2.x * SXY0.y) - (SXY0.x * SXY2.y) - (SXY1.x * SXY0.y) -
491
(SXY2.x * SXY1.y));
492
493
// ensure fractional values are not incorrectly rounded to 0
494
const float nclip_abs = std::abs(nclip);
495
if (0.1f < nclip_abs && nclip_abs < 1.0f)
496
nclip += (nclip < 0.0f ? -1.0f : 1.0f);
497
498
return nclip;
499
}
500
501
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_MTC2(u32 reg, const PGXPValue& value, u32 val)
502
{
503
switch (reg)
504
{
505
case 15:
506
{
507
// push FIFO
508
PGXPValue& SXY2 = PushSXY();
509
SXY2 = value;
510
return;
511
}
512
513
// read-only registers
514
case 29:
515
case 31:
516
{
517
return;
518
}
519
520
default:
521
{
522
PGXPValue& gteVal = g_state.pgxp_gte[reg];
523
gteVal = value;
524
gteVal.value = val;
525
return;
526
}
527
}
528
}
529
530
void CPU::PGXP::CPU_MFC2(Instruction instr, u32 rdVal)
531
{
532
// CPU[Rt] = GTE_D[Rd]
533
const u32 idx = instr.cop.Cop2Index();
534
LOG_VALUES_1(CPU::GetGTERegisterName(idx), rdVal, &g_state.pgxp_gte[idx]);
535
536
PGXPValue& prdVal = g_state.pgxp_gte[idx];
537
prdVal.Validate(rdVal);
538
SetRtValue(instr, prdVal, rdVal);
539
}
540
541
void CPU::PGXP::CPU_MTC2(Instruction instr, u32 rtVal)
542
{
543
// GTE_D[Rd] = CPU[Rt]
544
const u32 idx = instr.cop.Cop2Index();
545
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
546
547
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
548
CPU_MTC2(idx, prtVal, rtVal);
549
}
550
551
void CPU::PGXP::CPU_LWC2(Instruction instr, u32 addr, u32 rtVal)
552
{
553
// GTE_D[Rt] = Mem[addr]
554
LOG_VALUES_LOAD(addr, rtVal);
555
556
const PGXPValue& pMem = ValidateAndLoadMem(addr, rtVal);
557
CPU_MTC2(static_cast<u32>(instr.r.rt.GetValue()), pMem, rtVal);
558
}
559
560
void CPU::PGXP::CPU_SWC2(Instruction instr, u32 addr, u32 rtVal)
561
{
562
// Mem[addr] = GTE_D[Rt]
563
const u32 idx = static_cast<u32>(instr.r.rt.GetValue());
564
PGXPValue& prtVal = g_state.pgxp_gte[idx];
565
#ifdef LOG_VALUES
566
LOG_VALUES_1(CPU::GetGTERegisterName(idx), rtVal, &prtVal);
567
std::fprintf(s_log, " addr=%08X", addr);
568
#endif
569
prtVal.Validate(rtVal);
570
WriteMem(addr, prtVal);
571
}
572
573
ALWAYS_INLINE_RELEASE void CPU::PGXP::CacheVertex(u32 value, const PGXPValue& vertex)
574
{
575
const s16 sx = static_cast<s16>(value & 0xFFFFu);
576
const s16 sy = static_cast<s16>(value >> 16);
577
DebugAssert(sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1023);
578
s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] = vertex;
579
}
580
581
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetCachedVertex(u32 value)
582
{
583
const s16 sx = static_cast<s16>(value & 0xFFFFu);
584
const s16 sy = static_cast<s16>(value >> 16);
585
return (sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1013) ?
586
&s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] :
587
nullptr;
588
}
589
590
ALWAYS_INLINE_RELEASE float CPU::PGXP::TruncateVertexPosition(float p)
591
{
592
// Truncates positions to 11 bits before drawing.
593
// Matches GPU command parsing, where the upper 5 bits are dropped.
594
// Necessary for Jet Moto and Racingroovy VS.
595
const s32 int_part = static_cast<s32>(p);
596
const float int_part_f = static_cast<float>(int_part);
597
return static_cast<float>(TruncateGPUVertexPosition(int_part)) + (p - int_part_f);
598
}
599
600
ALWAYS_INLINE_RELEASE bool CPU::PGXP::IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y)
601
{
602
const float tolerance = g_settings.gpu_pgxp_tolerance;
603
if (tolerance < 0.0f)
604
return true;
605
606
return (std::abs(precise_x - static_cast<float>(int_x)) <= tolerance &&
607
std::abs(precise_y - static_cast<float>(int_y)) <= tolerance);
608
}
609
610
bool CPU::PGXP::GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y,
611
float* out_w)
612
{
613
const PGXPValue* vert = GetPtr(addr);
614
if (vert && (vert->flags & VALID_XY) == VALID_XY && vert->value == value)
615
{
616
*out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
617
*out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
618
*out_w = vert->z / static_cast<float>(GTE::MAX_Z);
619
620
#ifdef LOG_LOOKUPS
621
GL_INS_FMT("0x{:08X} {},{} => {},{} ({},{},{}) ({},{})", addr, x, y, *out_x, *out_y,
622
TruncateVertexPosition(vert->x), TruncateVertexPosition(vert->y), vert->z, std::abs(*out_x - x),
623
std::abs(*out_y - y));
624
#endif
625
626
if (IsWithinTolerance(*out_x, *out_y, x, y))
627
{
628
// check validity of z component
629
return ((vert->flags & VALID_Z) == VALID_Z);
630
}
631
}
632
633
if (g_settings.gpu_pgxp_vertex_cache)
634
{
635
vert = GetCachedVertex(value);
636
if (vert && (vert->flags & VALID_XY) == VALID_XY)
637
{
638
*out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
639
*out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
640
*out_w = vert->z / static_cast<float>(GTE::MAX_Z);
641
642
if (IsWithinTolerance(*out_x, *out_y, x, y))
643
return false;
644
}
645
}
646
647
// no valid value can be found anywhere, use the native PSX data
648
*out_x = static_cast<float>(x);
649
*out_y = static_cast<float>(y);
650
*out_w = 1.0f;
651
return false;
652
}
653
654
void CPU::PGXP::CPU_LW(Instruction instr, u32 addr, u32 rtVal)
655
{
656
// Rt = Mem[Rs + Im]
657
LOG_VALUES_LOAD(addr, rtVal);
658
SetRtValue(instr, ValidateAndLoadMem(addr, rtVal));
659
}
660
661
void CPU::PGXP::CPU_LBx(Instruction instr, u32 addr, u32 rtVal)
662
{
663
LOG_VALUES_LOAD(addr, rtVal);
664
SetRtValue(instr, INVALID_VALUE);
665
}
666
667
void CPU::PGXP::CPU_LH(Instruction instr, u32 addr, u32 rtVal)
668
{
669
// Rt = Mem[Rs + Im] (sign extended)
670
LOG_VALUES_LOAD(addr, rtVal);
671
ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, true);
672
}
673
674
void CPU::PGXP::CPU_LHU(Instruction instr, u32 addr, u32 rtVal)
675
{
676
// Rt = Mem[Rs + Im] (zero extended)
677
LOG_VALUES_LOAD(addr, rtVal);
678
ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, false);
679
}
680
681
void CPU::PGXP::CPU_SB(Instruction instr, u32 addr, u32 rtVal)
682
{
683
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
684
WriteMem(addr, INVALID_VALUE);
685
}
686
687
void CPU::PGXP::CPU_SH(Instruction instr, u32 addr, u32 rtVal)
688
{
689
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
690
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
691
WriteMem16(addr, prtVal);
692
}
693
694
void CPU::PGXP::CPU_SW(Instruction instr, u32 addr, u32 rtVal)
695
{
696
// Mem[Rs + Im] = Rt
697
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
698
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
699
WriteMem(addr, prtVal);
700
}
701
702
void CPU::PGXP::CPU_MOVE_Packed(u32 rd_and_rs, u32 rsVal)
703
{
704
const u32 Rs = (rd_and_rs & 0xFFu);
705
const u32 Rd = (rd_and_rs >> 8);
706
CPU_MOVE(Rd, Rs, rsVal);
707
}
708
709
void CPU::PGXP::CPU_MOVE(u32 Rd, u32 Rs, u32 rsVal)
710
{
711
#ifdef LOG_VALUES
712
const Instruction instr = {0};
713
LOG_VALUES_C1(Rs, rsVal);
714
#endif
715
PGXPValue& prsVal = g_state.pgxp_gpr[Rs];
716
prsVal.Validate(rsVal);
717
g_state.pgxp_gpr[Rd] = prsVal;
718
}
719
720
void CPU::PGXP::CPU_ADDI(Instruction instr, u32 rsVal)
721
{
722
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
723
724
// Rt = Rs + Imm (signed)
725
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
726
727
const u32 immVal = instr.i.imm_sext32();
728
729
PGXPValue& prtVal = GetRtValue(instr);
730
prtVal = prsVal;
731
732
if (immVal == 0)
733
return;
734
735
if (rsVal == 0)
736
{
737
// x is low precision value
738
prtVal.x = static_cast<float>(LOWORD_S16(immVal));
739
prtVal.y = static_cast<float>(HIWORD_S16(immVal));
740
prtVal.flags |= VALID_X | VALID_Y | VALID_TAINTED_Z;
741
prtVal.value = immVal;
742
return;
743
}
744
745
prtVal.x = static_cast<float>(f16Unsign(prtVal.x));
746
prtVal.x += static_cast<float>(LOWORD_U16(immVal));
747
748
// carry on over/underflow
749
const float of = (prtVal.x > USHRT_MAX) ? 1.0f : (prtVal.x < 0.0f) ? -1.0f : 0.0f;
750
prtVal.x = static_cast<float>(f16Sign(prtVal.x));
751
prtVal.y += HIWORD_S16(immVal) + of;
752
753
// truncate on overflow/underflow
754
prtVal.y += (prtVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prtVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
755
756
prtVal.value = rsVal + immVal;
757
758
prtVal.flags |= VALID_TAINTED_Z;
759
}
760
761
void CPU::PGXP::CPU_ANDI(Instruction instr, u32 rsVal)
762
{
763
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
764
765
// Rt = Rs & Imm
766
const u32 imm = instr.i.imm_zext32();
767
const u32 rtVal = rsVal & imm;
768
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
769
PGXPValue& prtVal = GetRtValue(instr);
770
771
// remove upper 16-bits
772
prtVal.y = 0.0f;
773
prtVal.z = prsVal.z;
774
prtVal.value = rtVal;
775
prtVal.flags = prsVal.flags | VALID_Y | VALID_TAINTED_Z;
776
777
switch (imm)
778
{
779
case 0:
780
{
781
// if 0 then x == 0
782
prtVal.x = 0.0f;
783
prtVal.flags |= VALID_X;
784
}
785
break;
786
787
case 0xFFFFu:
788
{
789
// if saturated then x == x
790
prtVal.x = prsVal.x;
791
}
792
break;
793
794
default:
795
{
796
// otherwise x is low precision value
797
prtVal.x = static_cast<float>(LOWORD_S16(rtVal));
798
prtVal.flags |= VALID_X;
799
}
800
break;
801
}
802
}
803
804
void CPU::PGXP::CPU_ORI(Instruction instr, u32 rsVal)
805
{
806
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
807
808
// Rt = Rs | Imm
809
const u32 imm = instr.i.imm_zext32();
810
const u32 rtVal = rsVal | imm;
811
812
PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
813
PGXPValue& pRtVal = GetRtValue(instr);
814
pRtVal = pRsVal;
815
pRtVal.value = rtVal;
816
817
if (imm == 0) [[unlikely]]
818
{
819
// if 0 then x == x
820
}
821
else
822
{
823
// otherwise x is low precision value
824
pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
825
pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
826
}
827
}
828
829
void CPU::PGXP::CPU_XORI(Instruction instr, u32 rsVal)
830
{
831
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
832
833
// Rt = Rs ^ Imm
834
const u32 imm = instr.i.imm_zext32();
835
const u32 rtVal = rsVal ^ imm;
836
837
PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
838
PGXPValue& pRtVal = GetRtValue(instr);
839
pRtVal = pRsVal;
840
pRtVal.value = rtVal;
841
842
if (imm == 0) [[unlikely]]
843
{
844
// if 0 then x == x
845
}
846
else
847
{
848
// otherwise x is low precision value
849
pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
850
pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
851
}
852
}
853
854
void CPU::PGXP::CPU_SLTI(Instruction instr, u32 rsVal)
855
{
856
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
857
858
// Rt = Rs < Imm (signed)
859
const s32 imm = instr.i.imm_s16();
860
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
861
862
const float fimmx = static_cast<float>(imm);
863
const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
864
865
PGXPValue& prtVal = GetRtValue(instr);
866
prtVal.x = (prsVal.GetValidY(rsVal) < fimmy || prsVal.GetValidX(rsVal) < fimmx) ? 1.0f : 0.0f;
867
prtVal.y = 0.0f;
868
prtVal.z = prsVal.z;
869
prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
870
prtVal.value = BoolToUInt32(static_cast<s32>(rsVal) < imm);
871
}
872
873
void CPU::PGXP::CPU_SLTIU(Instruction instr, u32 rsVal)
874
{
875
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
876
877
// Rt = Rs < Imm (Unsigned)
878
const u32 imm = instr.i.imm_u16();
879
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
880
881
const float fimmx = static_cast<float>(static_cast<s16>(imm)); // deliberately signed
882
const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
883
884
PGXPValue& prtVal = GetRtValue(instr);
885
prtVal.x =
886
(f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(fimmy) || f16Unsign(prsVal.GetValidX(rsVal)) < fimmx) ? 1.0f : 0.0f;
887
prtVal.y = 0.0f;
888
prtVal.z = prsVal.z;
889
prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
890
prtVal.value = BoolToUInt32(rsVal < imm);
891
}
892
893
void CPU::PGXP::CPU_LUI(Instruction instr)
894
{
895
LOG_VALUES_NV();
896
897
// Rt = Imm << 16
898
PGXPValue& pRtVal = GetRtValue(instr);
899
pRtVal.x = 0.0f;
900
pRtVal.y = static_cast<float>(instr.i.imm_s16());
901
pRtVal.z = 0.0f;
902
pRtVal.value = instr.i.imm_zext32() << 16;
903
pRtVal.flags = VALID_XY;
904
}
905
906
void CPU::PGXP::CPU_ADD(Instruction instr, u32 rsVal, u32 rtVal)
907
{
908
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
909
910
// Rd = Rs + Rt (signed)
911
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
912
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
913
PGXPValue& prdVal = GetRdValue(instr);
914
915
if (rtVal == 0)
916
{
917
prdVal = prsVal;
918
CopyZIfMissing(prdVal, prtVal);
919
}
920
else if (rsVal == 0)
921
{
922
prdVal = prtVal;
923
CopyZIfMissing(prdVal, prsVal);
924
}
925
else
926
{
927
const double x = f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prtVal.GetValidX(rtVal));
928
929
// carry on over/underflow
930
const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
931
prdVal.x = static_cast<float>(f16Sign(x));
932
prdVal.y = prsVal.GetValidY(rsVal) + prtVal.GetValidY(rtVal) + of;
933
934
// truncate on overflow/underflow
935
prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
936
937
prdVal.value = rsVal + rtVal;
938
939
// valid x/y only if one side had a valid x/y
940
prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
941
942
SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
943
}
944
}
945
946
void CPU::PGXP::CPU_SUB(Instruction instr, u32 rsVal, u32 rtVal)
947
{
948
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
949
950
// Rd = Rs - Rt (signed)
951
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
952
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
953
PGXPValue& prdVal = GetRdValue(instr);
954
955
if (rtVal == 0)
956
{
957
prdVal = prsVal;
958
CopyZIfMissing(prdVal, prtVal);
959
}
960
else
961
{
962
const double x = f16Unsign(prsVal.GetValidX(rsVal)) - f16Unsign(prtVal.GetValidX(rtVal));
963
964
// carry on over/underflow
965
const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
966
prdVal.x = static_cast<float>(f16Sign(x));
967
prdVal.y = prsVal.GetValidY(rsVal) - (prtVal.GetValidY(rtVal) - of);
968
969
// truncate on overflow/underflow
970
prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
971
972
prdVal.value = rsVal - rtVal;
973
974
// valid x/y only if one side had a valid x/y
975
prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
976
977
SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
978
}
979
}
980
981
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal)
982
{
983
// Rd = Rs & Rt
984
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
985
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
986
987
float x, y;
988
if (LOWORD_U16(rdVal) == 0)
989
x = 0.0f;
990
else if (LOWORD_U16(rdVal) == LOWORD_U16(rsVal))
991
x = prsVal.GetValidX(rsVal);
992
else if (LOWORD_U16(rdVal) == LOWORD_U16(rtVal))
993
x = prtVal.GetValidX(rtVal);
994
else
995
x = static_cast<float>(LOWORD_S16(rdVal));
996
997
if (HIWORD_U16(rdVal) == 0)
998
y = 0.0f;
999
else if (HIWORD_U16(rdVal) == HIWORD_U16(rsVal))
1000
y = prsVal.GetValidY(rsVal);
1001
else if (HIWORD_U16(rdVal) == HIWORD_U16(rtVal))
1002
y = prtVal.GetValidY(rtVal);
1003
else
1004
y = static_cast<float>(HIWORD_S16(rdVal));
1005
1006
// Why not write directly to prdVal? Because it might be the same as the source.
1007
u32 flags = ((prsVal.flags | prtVal.flags) & VALID_XY) ? (VALID_XY | VALID_TAINTED_Z) : 0;
1008
PGXPValue& prdVal = GetRdValue(instr);
1009
SelectZ(prdVal.z, flags, prsVal, prtVal);
1010
prdVal.x = x;
1011
prdVal.y = y;
1012
prdVal.flags = flags;
1013
prdVal.value = rdVal;
1014
}
1015
1016
void CPU::PGXP::CPU_AND_(Instruction instr, u32 rsVal, u32 rtVal)
1017
{
1018
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1019
1020
// Rd = Rs & Rt
1021
const u32 rdVal = rsVal & rtVal;
1022
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1023
}
1024
1025
void CPU::PGXP::CPU_OR_(Instruction instr, u32 rsVal, u32 rtVal)
1026
{
1027
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1028
1029
// Rd = Rs | Rt
1030
const u32 rdVal = rsVal | rtVal;
1031
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1032
}
1033
1034
void CPU::PGXP::CPU_XOR_(Instruction instr, u32 rsVal, u32 rtVal)
1035
{
1036
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1037
1038
// Rd = Rs ^ Rt
1039
const u32 rdVal = rsVal ^ rtVal;
1040
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1041
}
1042
1043
void CPU::PGXP::CPU_NOR(Instruction instr, u32 rsVal, u32 rtVal)
1044
{
1045
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1046
1047
// Rd = Rs NOR Rt
1048
const u32 rdVal = ~(rsVal | rtVal);
1049
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1050
}
1051
1052
void CPU::PGXP::CPU_SLT(Instruction instr, u32 rsVal, u32 rtVal)
1053
{
1054
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1055
1056
// Rd = Rs < Rt (signed)
1057
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1058
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1059
PGXPValue& prdVal = GetRdValue(instr);
1060
prdVal.x = (prsVal.GetValidY(rsVal) < prtVal.GetValidY(rtVal) ||
1061
f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1062
1.0f :
1063
0.0f;
1064
prdVal.y = 0.0f;
1065
prdVal.z = prsVal.z;
1066
prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
1067
prdVal.value = BoolToUInt32(static_cast<s32>(rsVal) < static_cast<s32>(rtVal));
1068
}
1069
1070
void CPU::PGXP::CPU_SLTU(Instruction instr, u32 rsVal, u32 rtVal)
1071
{
1072
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1073
1074
// Rd = Rs < Rt (unsigned)
1075
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1076
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1077
PGXPValue& prdVal = GetRdValue(instr);
1078
prdVal.x = (f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(prtVal.GetValidY(rtVal)) ||
1079
f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1080
1.0f :
1081
0.0f;
1082
prdVal.y = 0.0f;
1083
prdVal.z = prsVal.z;
1084
prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
1085
prdVal.value = BoolToUInt32(rsVal < rtVal);
1086
}
1087
1088
void CPU::PGXP::CPU_MULT(Instruction instr, u32 rsVal, u32 rtVal)
1089
{
1090
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1091
1092
// Hi/Lo = Rs * Rt (signed)
1093
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1094
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1095
1096
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1097
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1098
ploVal = prsVal;
1099
CopyZIfMissing(ploVal, prsVal);
1100
1101
// Z/valid is the same
1102
phiVal = ploVal;
1103
1104
const float rsx = prsVal.GetValidX(rsVal);
1105
const float rsy = prsVal.GetValidY(rsVal);
1106
const float rtx = prtVal.GetValidX(rtVal);
1107
const float rty = prtVal.GetValidY(rtVal);
1108
1109
// Multiply out components
1110
const double xx = f16Unsign(rsx) * f16Unsign(rtx);
1111
const double xy = f16Unsign(rsx) * (rty);
1112
const double yx = rsy * f16Unsign(rtx);
1113
const double yy = rsy * rty;
1114
1115
// Split values into outputs
1116
const double lx = xx;
1117
const double ly = f16Overflow(xx) + (xy + yx);
1118
const double hx = f16Overflow(ly) + yy;
1119
const double hy = f16Overflow(hx);
1120
1121
ploVal.x = static_cast<float>(f16Sign(lx));
1122
ploVal.y = static_cast<float>(f16Sign(ly));
1123
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1124
phiVal.x = static_cast<float>(f16Sign(hx));
1125
phiVal.y = static_cast<float>(f16Sign(hy));
1126
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1127
1128
// compute PSX value
1129
const u64 result = static_cast<u64>(static_cast<s64>(SignExtend64(rsVal)) * static_cast<s64>(SignExtend64(rtVal)));
1130
phiVal.value = Truncate32(result >> 32);
1131
ploVal.value = Truncate32(result);
1132
}
1133
1134
void CPU::PGXP::CPU_MULTU(Instruction instr, u32 rsVal, u32 rtVal)
1135
{
1136
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1137
1138
// Hi/Lo = Rs * Rt (unsigned)
1139
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1140
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1141
1142
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1143
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1144
ploVal = prsVal;
1145
CopyZIfMissing(ploVal, prsVal);
1146
1147
// Z/valid is the same
1148
phiVal = ploVal;
1149
1150
const float rsx = prsVal.GetValidX(rsVal);
1151
const float rsy = prsVal.GetValidY(rsVal);
1152
const float rtx = prtVal.GetValidX(rtVal);
1153
const float rty = prtVal.GetValidY(rtVal);
1154
1155
// Multiply out components
1156
const double xx = f16Unsign(rsx) * f16Unsign(rtx);
1157
const double xy = f16Unsign(rsx) * f16Unsign(rty);
1158
const double yx = f16Unsign(rsy) * f16Unsign(rtx);
1159
const double yy = f16Unsign(rsy) * f16Unsign(rty);
1160
1161
// Split values into outputs
1162
const double lx = xx;
1163
const double ly = f16Overflow(xx) + (xy + yx);
1164
const double hx = f16Overflow(ly) + yy;
1165
const double hy = f16Overflow(hx);
1166
1167
ploVal.x = static_cast<float>(f16Sign(lx));
1168
ploVal.y = static_cast<float>(f16Sign(ly));
1169
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1170
phiVal.x = static_cast<float>(f16Sign(hx));
1171
phiVal.y = static_cast<float>(f16Sign(hy));
1172
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1173
1174
// compute PSX value
1175
const u64 result = ZeroExtend64(rsVal) * ZeroExtend64(rtVal);
1176
phiVal.value = Truncate32(result >> 32);
1177
ploVal.value = Truncate32(result);
1178
}
1179
1180
void CPU::PGXP::CPU_DIV(Instruction instr, u32 rsVal, u32 rtVal)
1181
{
1182
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1183
1184
// Lo = Rs / Rt (signed)
1185
// Hi = Rs % Rt (signed)
1186
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1187
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1188
1189
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1190
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1191
ploVal = prsVal;
1192
CopyZIfMissing(ploVal, prsVal);
1193
1194
// Z/valid is the same
1195
phiVal = ploVal;
1196
1197
const double vs = f16Unsign(prsVal.GetValidX(rsVal)) + prsVal.GetValidY(rsVal) * static_cast<double>(1 << 16);
1198
const double vt = f16Unsign(prtVal.GetValidX(rtVal)) + prtVal.GetValidY(rtVal) * static_cast<double>(1 << 16);
1199
1200
const double lo = vs / vt;
1201
ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
1202
ploVal.x = static_cast<float>(f16Sign(lo));
1203
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1204
1205
const double hi = std::fmod(vs, vt);
1206
phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
1207
phiVal.x = static_cast<float>(f16Sign(hi));
1208
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1209
1210
// compute PSX value
1211
if (static_cast<s32>(rtVal) == 0)
1212
{
1213
// divide by zero
1214
ploVal.value = (static_cast<s32>(rsVal) >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1);
1215
phiVal.value = static_cast<u32>(static_cast<s32>(rsVal));
1216
}
1217
else if (rsVal == UINT32_C(0x80000000) && static_cast<s32>(rtVal) == -1)
1218
{
1219
// unrepresentable
1220
ploVal.value = UINT32_C(0x80000000);
1221
phiVal.value = 0;
1222
}
1223
else
1224
{
1225
ploVal.value = static_cast<u32>(static_cast<s32>(rsVal) / static_cast<s32>(rtVal));
1226
phiVal.value = static_cast<u32>(static_cast<s32>(rsVal) % static_cast<s32>(rtVal));
1227
}
1228
}
1229
1230
void CPU::PGXP::CPU_DIVU(Instruction instr, u32 rsVal, u32 rtVal)
1231
{
1232
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1233
1234
// Lo = Rs / Rt (unsigned)
1235
// Hi = Rs % Rt (unsigned)
1236
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1237
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1238
1239
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1240
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1241
ploVal = prsVal;
1242
CopyZIfMissing(ploVal, prsVal);
1243
1244
// Z/valid is the same
1245
phiVal = ploVal;
1246
1247
const double vs =
1248
f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prsVal.GetValidY(rsVal)) * static_cast<double>(1 << 16);
1249
const double vt =
1250
f16Unsign(prtVal.GetValidX(rtVal)) + f16Unsign(prtVal.GetValidY(rtVal)) * static_cast<double>(1 << 16);
1251
1252
const double lo = vs / vt;
1253
ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
1254
ploVal.x = static_cast<float>(f16Sign(lo));
1255
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1256
1257
const double hi = std::fmod(vs, vt);
1258
phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
1259
phiVal.x = static_cast<float>(f16Sign(hi));
1260
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1261
1262
if (rtVal == 0)
1263
{
1264
// divide by zero
1265
ploVal.value = UINT32_C(0xFFFFFFFF);
1266
phiVal.value = rsVal;
1267
}
1268
else
1269
{
1270
ploVal.value = rsVal / rtVal;
1271
phiVal.value = rsVal % rtVal;
1272
}
1273
}
1274
1275
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal, u32 sh)
1276
{
1277
const u32 rdVal = rtVal << sh;
1278
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1279
PGXPValue& prdVal = GetRdValue(instr);
1280
prdVal.z = prtVal.z;
1281
prdVal.value = rdVal;
1282
1283
if (sh >= 32) [[unlikely]]
1284
{
1285
prdVal.x = 0.0f;
1286
prdVal.y = 0.0f;
1287
prdVal.flags = prtVal.flags | VALID_XY | VALID_TAINTED_Z;
1288
}
1289
else if (sh == 16)
1290
{
1291
prdVal.y = prtVal.x;
1292
prdVal.x = 0.0f;
1293
1294
// Only set valid X if there's also a valid Y. We could use GetValidX() to pull it from the low precision value
1295
// instead, need to investigate further. Spyro breaks if only X is set even if Y is not valid.
1296
// prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
1297
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
1298
}
1299
else if (sh >= 16)
1300
{
1301
prdVal.y = static_cast<float>(f16Sign(f16Unsign(prtVal.x * static_cast<double>(1 << (sh - 16)))));
1302
prdVal.x = 0.0f;
1303
1304
// See above.
1305
// prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
1306
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
1307
}
1308
else
1309
{
1310
const double x = f16Unsign(prtVal.x) * static_cast<double>(1 << sh);
1311
const double y = (f16Unsign(prtVal.y) * static_cast<double>(1 << sh)) + f16Overflow(x);
1312
prdVal.x = static_cast<float>(f16Sign(x));
1313
prdVal.y = static_cast<float>(f16Sign(y));
1314
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z);
1315
}
1316
}
1317
1318
void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal)
1319
{
1320
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1321
1322
// Rd = Rt << Sa
1323
const u32 sh = instr.r.shamt;
1324
CPU_SLL(instr, rtVal, sh);
1325
}
1326
1327
void CPU::PGXP::CPU_SLLV(Instruction instr, u32 rtVal, u32 rsVal)
1328
{
1329
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1330
1331
// Rd = Rt << Rs
1332
const u32 sh = rsVal & 0x1F;
1333
CPU_SLL(instr, rtVal, sh);
1334
}
1335
1336
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable)
1337
{
1338
const u32 rdVal = sign ? static_cast<u32>(static_cast<s32>(rtVal) >> sh) : (rtVal >> sh);
1339
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1340
1341
double x = prtVal.x;
1342
double y = sign ? prtVal.y : f16Unsign(prtVal.y);
1343
1344
const u32 iX = SignExtend32(LOWORD_S16(rtVal)); // remove Y
1345
const u32 iY = SET_LOWORD(rtVal, HIWORD_U16(iX)); // overwrite x with sign(x)
1346
1347
// Shift test values
1348
const u32 dX = static_cast<u32>(static_cast<s32>(iX) >> sh);
1349
const u32 dY = sign ? static_cast<u32>(static_cast<s32>(iY) >> sh) : (iY >> sh);
1350
1351
if (LOWORD_S16(dX) != HIWORD_S16(iX))
1352
x = x / static_cast<double>(1 << sh);
1353
else
1354
x = LOWORD_S16(dX); // only sign bits left
1355
1356
if (LOWORD_S16(dY) != HIWORD_S16(iX))
1357
{
1358
if (sh == 16)
1359
{
1360
x = y;
1361
}
1362
else if (sh < 16)
1363
{
1364
x += y * static_cast<double>(1 << (16 - sh));
1365
if (prtVal.x < 0)
1366
x += static_cast<double>(1 << (16 - sh));
1367
}
1368
else
1369
{
1370
x += y / static_cast<double>(1 << (sh - 16));
1371
}
1372
}
1373
1374
if ((HIWORD_S16(dY) == 0) || (HIWORD_S16(dY) == -1))
1375
y = HIWORD_S16(dY);
1376
else
1377
y = y / static_cast<double>(1 << sh);
1378
1379
PGXPValue& prdVal = GetRdValue(instr);
1380
1381
// Use low precision/rounded values when we're not shifting an entire component,
1382
// and it's not originally from a 3D value. Too many false positives in P2/etc.
1383
// What we probably should do is not set the valid flag on non-3D values to begin
1384
// with, only letting them become valid when used in another expression.
1385
if (sign && !is_variable && !(prtVal.flags & VALID_Z) && sh < 16)
1386
{
1387
prdVal.x = static_cast<float>(LOWORD_S16(rdVal));
1388
prdVal.y = static_cast<float>(HIWORD_S16(rdVal));
1389
prdVal.z = 0.0f;
1390
prdVal.value = rdVal;
1391
prdVal.flags = VALID_XY | VALID_TAINTED_Z;
1392
}
1393
else
1394
{
1395
prdVal.x = static_cast<float>(f16Sign(x));
1396
prdVal.y = static_cast<float>(f16Sign(y));
1397
prdVal.z = prtVal.z;
1398
prdVal.value = rdVal;
1399
prdVal.flags = prtVal.flags | VALID_TAINTED_Z;
1400
}
1401
}
1402
1403
void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal)
1404
{
1405
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1406
1407
// Rd = Rt >> Sa
1408
const u32 sh = instr.r.shamt;
1409
CPU_SRx(instr, rtVal, sh, false, false);
1410
}
1411
1412
void CPU::PGXP::CPU_SRLV(Instruction instr, u32 rtVal, u32 rsVal)
1413
{
1414
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1415
1416
// Rd = Rt >> Sa
1417
const u32 sh = rsVal & 0x1F;
1418
CPU_SRx(instr, rtVal, sh, false, true);
1419
}
1420
1421
void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal)
1422
{
1423
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1424
1425
// Rd = Rt >> Sa
1426
const u32 sh = instr.r.shamt;
1427
CPU_SRx(instr, rtVal, sh, true, false);
1428
}
1429
1430
void CPU::PGXP::CPU_SRAV(Instruction instr, u32 rtVal, u32 rsVal)
1431
{
1432
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1433
1434
// Rd = Rt >> Sa
1435
const u32 sh = rsVal & 0x1F;
1436
CPU_SRx(instr, rtVal, sh, true, true);
1437
}
1438
1439
void CPU::PGXP::CPU_MFC0(Instruction instr, u32 rdVal)
1440
{
1441
const u32 idx = static_cast<u8>(instr.r.rd.GetValue());
1442
LOG_VALUES_1(TinyString::from_format("cop0_{}", idx).c_str(), rdVal, &g_state.pgxp_cop0[idx]);
1443
1444
// CPU[Rt] = CP0[Rd]
1445
PGXPValue& prdVal = g_state.pgxp_cop0[idx];
1446
prdVal.Validate(rdVal);
1447
1448
PGXPValue& prtVal = GetRtValue(instr);
1449
prtVal = prdVal;
1450
prtVal.value = rdVal;
1451
}
1452
1453
void CPU::PGXP::CPU_MTC0(Instruction instr, u32 rdVal, u32 rtVal)
1454
{
1455
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1456
1457
// CP0[Rd] = CPU[Rt]
1458
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1459
PGXPValue& prdVal = g_state.pgxp_cop0[static_cast<u8>(instr.r.rd.GetValue())];
1460
prdVal = prtVal;
1461
prtVal.value = rdVal;
1462
}
1463
1464