Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/core/cpu_pgxp.cpp
7393 views
1
// SPDX-FileCopyrightText: 2016 iCatButler, 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
//
4
// This file has been completely rewritten over the years compared to the original PCSXR-PGXP release.
5
// No original code remains. The original copyright notice is included above for historical purposes.
6
//
7
8
#include "cpu_pgxp.h"
9
#include "bus.h"
10
#include "cpu_core.h"
11
#include "cpu_core_private.h"
12
#include "cpu_disasm.h"
13
#include "gpu_helpers.h"
14
#include "settings.h"
15
16
#include "util/gpu_device.h"
17
#include "util/state_wrapper.h"
18
19
#include "common/assert.h"
20
#include "common/log.h"
21
22
#include <climits>
23
#include <cmath>
24
25
LOG_CHANNEL(CPU);
26
27
// #define LOG_VALUES 1
28
// #define LOG_LOOKUPS 1
29
30
// TODO: Don't update flags on Validate(), instead return it.
31
32
namespace CPU::PGXP {
33
34
enum : u32
35
{
36
VERTEX_CACHE_WIDTH = 2048,
37
VERTEX_CACHE_HEIGHT = 2048,
38
VERTEX_CACHE_SIZE = VERTEX_CACHE_WIDTH * VERTEX_CACHE_HEIGHT,
39
PGXP_MEM_SIZE = (static_cast<u32>(Bus::RAM_8MB_SIZE) + static_cast<u32>(CPU::SCRATCHPAD_SIZE)) / 4,
40
PGXP_MEM_SCRATCH_OFFSET = Bus::RAM_8MB_SIZE / 4,
41
};
42
43
enum : u32
44
{
45
VALID_X = (1u << 0),
46
VALID_Y = (1u << 1),
47
VALID_Z = (1u << 2),
48
VALID_LOWZ = (1u << 16), // Valid Z from the low part of a 32-bit value.
49
VALID_HIGHZ = (1u << 17), // Valid Z from the high part of a 32-bit value.
50
VALID_TAINTED_Z = (1u << 31), // X/Y has been changed, Z may not be accurate.
51
52
VALID_XY = (VALID_X | VALID_Y),
53
VALID_XYZ = (VALID_X | VALID_Y | VALID_Z),
54
VALID_ALL = (VALID_X | VALID_Y | VALID_Z),
55
};
56
57
#define LOWORD_U16(val) (static_cast<u16>(val))
58
#define HIWORD_U16(val) (static_cast<u16>(static_cast<u32>(val) >> 16))
59
#define LOWORD_S16(val) (static_cast<s16>(static_cast<u16>(val)))
60
#define HIWORD_S16(val) (static_cast<s16>(static_cast<u16>(static_cast<u32>(val) >> 16)))
61
#define SET_LOWORD(val, loword) ((static_cast<u32>(val) & 0xFFFF0000u) | static_cast<u32>(static_cast<u16>(loword)))
62
#define SET_HIWORD(val, hiword) ((static_cast<u32>(val) & 0x0000FFFFu) | (static_cast<u32>(hiword) << 16))
63
64
#define PGXP_GTE_REGISTER(field) g_state.pgxp_gte[offsetof(GTE::Regs, field) / sizeof(u32)]
65
66
static double f16Sign(double val);
67
static double f16Unsign(double val);
68
static double f16Overflow(double val);
69
70
static void CacheVertex(u32 value, const PGXPValue& vertex);
71
static PGXPValue* GetCachedVertex(u32 value);
72
73
static float TruncateVertexPosition(float p);
74
static bool IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y);
75
76
static PGXPValue& GetRdValue(Instruction instr);
77
static PGXPValue& GetRtValue(Instruction instr);
78
static PGXPValue& ValidateAndGetRtValue(Instruction instr, u32 rtVal);
79
static PGXPValue& ValidateAndGetRsValue(Instruction instr, u32 rsVal);
80
static void SetRtValue(Instruction instr, const PGXPValue& val);
81
static void SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal);
82
static void PushScreenXYFIFO();
83
84
static PGXPValue* GetPtr(u32 addr);
85
static const PGXPValue& ValidateAndLoadMem(u32 addr, u32 value);
86
static void ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign);
87
88
static void CPU_MTC2(u32 reg, const PGXPValue& value, u32 val);
89
static void CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal);
90
static void CPU_SLL(Instruction instr, u32 rtVal, u32 sh);
91
static void CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable);
92
93
static void WriteMem(u32 addr, const PGXPValue& value);
94
static void WriteMem16(u32 addr, const PGXPValue& value);
95
96
static void CopyZIfMissing(PGXPValue& dst, const PGXPValue& src);
97
static void SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1, const PGXPValue& src2);
98
99
#ifdef LOG_VALUES
100
static void LogInstruction(u32 pc, Instruction instr);
101
static void LogValue(const char* name, u32 rval, const PGXPValue* val);
102
static void LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val);
103
104
// clang-format off
105
#define LOG_VALUES_NV() do { LogInstruction(CPU::g_state.current_instruction_pc, instr); } while (0)
106
#define LOG_VALUES_1(name, rval, val) do { LogInstruction(CPU::g_state.current_instruction_pc, instr); LogValue(name, rval, val); } while (0)
107
#define LOG_VALUES_C1(rnum, rval) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(rnum)), rval, &g_state.pgxp_gpr[static_cast<u32>(rnum)]); } while(0)
108
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r1num)), r1val, &g_state.pgxp_gpr[static_cast<u32>(r1num)]); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r2num)), r2val, &g_state.pgxp_gpr[static_cast<u32>(r2num)]); } while(0)
109
#define LOG_VALUES_LOAD(addr, val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(TinyString::from_format("MEM[{:08X}]", addr).c_str(), val, GetPtr(addr)); } while(0)
110
#define LOG_VALUES_STORE(rnum, rval, addr) do { LOG_VALUES_C1(rnum, rval); std::fprintf(s_log, " addr=%08X", addr); } while(0)
111
#else
112
#define LOG_VALUES_NV() (void)0
113
#define LOG_VALUES_1(name, rval, val) (void)0
114
#define LOG_VALUES_C1(rnum, rval) (void)0
115
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) (void)0
116
#define LOG_VALUES_LOAD(addr, val) (void)0
117
#define LOG_VALUES_STORE(rnum, rval, addr) (void)0
118
#endif
119
// clang-format on
120
121
static constexpr const PGXPValue INVALID_VALUE = {};
122
123
static PGXPValue* s_mem = nullptr;
124
static PGXPValue* s_vertex_cache = nullptr;
125
126
#ifdef LOG_VALUES
127
static std::FILE* s_log;
128
#endif
129
} // namespace CPU::PGXP
130
131
void CPU::PGXP::Initialize()
132
{
133
// Just in case due to memory layout...
134
static_assert(&PGXP_GTE_REGISTER(SXY0) == &g_state.pgxp_gte[12]);
135
static_assert(&PGXP_GTE_REGISTER(SXY1) == &g_state.pgxp_gte[13]);
136
static_assert(&PGXP_GTE_REGISTER(SXY2) == &g_state.pgxp_gte[14]);
137
static_assert(&PGXP_GTE_REGISTER(SXYP) == &g_state.pgxp_gte[15]);
138
139
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
140
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
141
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
142
143
if (!s_mem)
144
{
145
s_mem = static_cast<PGXPValue*>(std::calloc(PGXP_MEM_SIZE, sizeof(PGXPValue)));
146
if (!s_mem)
147
Panic("Failed to allocate PGXP memory");
148
}
149
150
if (g_settings.gpu_pgxp_vertex_cache && !s_vertex_cache)
151
{
152
s_vertex_cache = static_cast<PGXPValue*>(std::calloc(VERTEX_CACHE_SIZE, sizeof(PGXPValue)));
153
if (!s_vertex_cache)
154
{
155
ERROR_LOG("Failed to allocate memory for vertex cache, disabling.");
156
g_settings.gpu_pgxp_vertex_cache = false;
157
}
158
}
159
160
if (s_vertex_cache)
161
std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
162
}
163
164
void CPU::PGXP::Reset()
165
{
166
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
167
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
168
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
169
170
if (s_mem)
171
std::memset(s_mem, 0, sizeof(PGXPValue) * PGXP_MEM_SIZE);
172
173
if (g_settings.gpu_pgxp_vertex_cache && s_vertex_cache)
174
std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
175
}
176
177
void CPU::PGXP::Shutdown()
178
{
179
if (s_vertex_cache)
180
{
181
std::free(s_vertex_cache);
182
s_vertex_cache = nullptr;
183
}
184
if (s_mem)
185
{
186
std::free(s_mem);
187
s_mem = nullptr;
188
}
189
190
std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
191
std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
192
std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
193
}
194
195
bool CPU::PGXP::ShouldSavePGXPState()
196
{
197
// Only save PGXP state for runahead, not rewind.
198
// The performance impact is too great, and the glitches are much less noticeable with rewind.
199
return (g_settings.gpu_pgxp_enable && g_settings.IsRunaheadEnabled());
200
}
201
202
size_t CPU::PGXP::GetStateSize()
203
{
204
const size_t base_size = sizeof(g_state.pgxp_gpr) + sizeof(g_state.pgxp_cop0) + sizeof(g_state.pgxp_gte) +
205
(sizeof(PGXPValue) * PGXP_MEM_SIZE);
206
const size_t vertex_cache_size = sizeof(PGXPValue) * VERTEX_CACHE_SIZE;
207
return base_size + (g_settings.gpu_pgxp_vertex_cache ? vertex_cache_size : 0);
208
}
209
210
void CPU::PGXP::DoState(StateWrapper& sw)
211
{
212
if (!ShouldSavePGXPState())
213
{
214
// Value checks will fail and fall back to imprecise geometry when using rewind.
215
return;
216
}
217
218
sw.DoBytes(g_state.pgxp_gpr, sizeof(g_state.pgxp_gpr));
219
sw.DoBytes(g_state.pgxp_cop0, sizeof(g_state.pgxp_cop0));
220
sw.DoBytes(g_state.pgxp_gte, sizeof(g_state.pgxp_gte));
221
222
sw.DoBytes(s_mem, sizeof(PGXPValue) * PGXP_MEM_SIZE);
223
224
if (s_vertex_cache)
225
sw.DoBytes(s_vertex_cache, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
226
}
227
228
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Sign(double val)
229
{
230
const s32 s = static_cast<s32>(static_cast<s64>(val * (USHRT_MAX + 1)));
231
return static_cast<double>(s) / static_cast<double>(USHRT_MAX + 1);
232
}
233
234
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Unsign(double val)
235
{
236
return (val >= 0) ? val : (val + (USHRT_MAX + 1));
237
}
238
239
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Overflow(double val)
240
{
241
return static_cast<double>(static_cast<s64>(val) >> 16);
242
}
243
244
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRdValue(Instruction instr)
245
{
246
return g_state.pgxp_gpr[static_cast<u8>(instr.r.rd.GetValue())];
247
}
248
249
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRtValue(Instruction instr)
250
{
251
return g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
252
}
253
254
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRtValue(Instruction instr, u32 rtVal)
255
{
256
PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
257
ret.Validate(rtVal);
258
return ret;
259
}
260
261
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRsValue(Instruction instr, u32 rsVal)
262
{
263
PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rs.GetValue())];
264
ret.Validate(rsVal);
265
return ret;
266
}
267
268
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val)
269
{
270
g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())] = val;
271
}
272
273
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal)
274
{
275
PGXPValue& prtVal = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
276
prtVal = val;
277
prtVal.value = rtVal;
278
}
279
280
ALWAYS_INLINE void CPU::PGXP::PushScreenXYFIFO()
281
{
282
PGXP_GTE_REGISTER(SXY0) = PGXP_GTE_REGISTER(SXY1); // SXY0 = SXY1
283
PGXP_GTE_REGISTER(SXY1) = PGXP_GTE_REGISTER(SXY2); // SXY1 = SXY2
284
PGXP_GTE_REGISTER(SXY2) = PGXP_GTE_REGISTER(SXYP); // SXY2 = SXYP
285
}
286
287
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetPtr(u32 addr)
288
{
289
#if 0
290
if ((addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) >= 0x0017A2B4 &&
291
(addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) <= 0x0017A2B4)
292
__debugbreak();
293
#endif
294
295
if ((addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR)
296
return &s_mem[PGXP_MEM_SCRATCH_OFFSET + ((addr & SCRATCHPAD_OFFSET_MASK) >> 2)];
297
298
// Don't worry about >512MB here for performance reasons.
299
const u32 paddr = (addr & KSEG_MASK);
300
if (paddr < Bus::RAM_MIRROR_END)
301
return &s_mem[(paddr & Bus::g_ram_mask) >> 2];
302
else
303
return nullptr;
304
}
305
306
ALWAYS_INLINE_RELEASE const CPU::PGXPValue& CPU::PGXP::ValidateAndLoadMem(u32 addr, u32 value)
307
{
308
PGXPValue* pMem = GetPtr(addr);
309
if (!pMem) [[unlikely]]
310
return INVALID_VALUE;
311
312
pMem->Validate(value);
313
return *pMem;
314
}
315
316
ALWAYS_INLINE_RELEASE void CPU::PGXP::ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign)
317
{
318
PGXPValue* pMem = GetPtr(addr);
319
if (!pMem) [[unlikely]]
320
{
321
dest = INVALID_VALUE;
322
return;
323
}
324
325
// determine if high or low word
326
const bool hiword = ((addr & 2) != 0);
327
328
// only validate the component we're interested in
329
pMem->flags = hiword ?
330
((Truncate16(pMem->value >> 16) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_Y)) :
331
((Truncate16(pMem->value) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_X));
332
333
// copy whole value
334
dest = *pMem;
335
336
// if high word then shift
337
if (hiword)
338
{
339
dest.x = dest.y;
340
dest.flags = (dest.flags & ~VALID_X) | ((dest.flags & VALID_Y) >> 1);
341
}
342
343
// only set y as valid if x is also valid.. don't want to make fake values
344
if (dest.flags & VALID_X)
345
{
346
dest.y = (dest.x < 0) ? -1.0f * sign : 0.0f;
347
dest.flags |= VALID_Y;
348
}
349
else
350
{
351
dest.y = 0.0f;
352
dest.flags &= ~VALID_Y;
353
}
354
355
dest.value = value;
356
}
357
358
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem(u32 addr, const PGXPValue& value)
359
{
360
PGXPValue* pMem = GetPtr(addr);
361
if (!pMem) [[unlikely]]
362
return;
363
364
*pMem = value;
365
pMem->flags =
366
(value.flags & ~(VALID_LOWZ | VALID_HIGHZ)) | ((value.flags & VALID_Z) ? (VALID_LOWZ | VALID_HIGHZ) : 0);
367
}
368
369
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem16(u32 addr, const PGXPValue& value)
370
{
371
PGXPValue* dest = GetPtr(addr);
372
if (!dest) [[unlikely]]
373
return;
374
375
// determine if high or low word
376
const bool hiword = ((addr & 2) != 0);
377
if (hiword)
378
{
379
dest->y = value.x;
380
dest->flags = (dest->flags & ~VALID_Y) | ((value.flags & VALID_X) << 1);
381
dest->value = (dest->value & UINT32_C(0x0000FFFF)) | (value.value << 16);
382
}
383
else
384
{
385
dest->x = value.x;
386
dest->flags = (dest->flags & ~VALID_X) | (value.flags & VALID_X);
387
dest->value = (dest->value & UINT32_C(0xFFFF0000)) | (value.value & UINT32_C(0x0000FFFF));
388
}
389
390
// overwrite z/w if valid
391
// TODO: Check modified
392
if (value.flags & VALID_Z)
393
{
394
dest->z = value.z;
395
dest->flags |= VALID_Z | (hiword ? VALID_HIGHZ : VALID_LOWZ);
396
}
397
else
398
{
399
dest->flags &= hiword ? ~VALID_HIGHZ : ~VALID_LOWZ;
400
if (dest->flags & VALID_Z && !(dest->flags & (VALID_HIGHZ | VALID_LOWZ)))
401
dest->flags &= ~VALID_Z;
402
}
403
}
404
405
ALWAYS_INLINE_RELEASE void CPU::PGXP::CopyZIfMissing(PGXPValue& dst, const PGXPValue& src)
406
{
407
dst.z = (dst.flags & VALID_Z) ? dst.z : src.z;
408
dst.flags |= (src.flags & VALID_Z);
409
}
410
411
ALWAYS_INLINE_RELEASE void CPU::PGXP::SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1,
412
const PGXPValue& src2)
413
{
414
// Prefer src2 if src1 is missing Z, or is potentially an imprecise value, when src2 is precise.
415
dst_z = (!(src1.flags & VALID_Z) ||
416
(src1.flags & VALID_TAINTED_Z && (src2.flags & (VALID_Z | VALID_TAINTED_Z)) == VALID_Z)) ?
417
src2.z :
418
src1.z;
419
dst_flags |= ((src1.flags | src2.flags) & VALID_Z);
420
}
421
422
#ifdef LOG_VALUES
423
void CPU::PGXP::LogInstruction(u32 pc, Instruction instr)
424
{
425
if (!s_log) [[unlikely]]
426
{
427
s_log = std::fopen("pgxp.log", "wb");
428
}
429
else
430
{
431
std::fflush(s_log);
432
std::fputc('\n', s_log);
433
}
434
435
SmallString str;
436
DisassembleInstruction(&str, pc, instr.bits);
437
std::fprintf(s_log, "%08X %08X %-20s", pc, instr.bits, str.c_str());
438
}
439
440
void CPU::PGXP::LogValue(const char* name, u32 rval, const PGXPValue* val)
441
{
442
if (!s_log) [[unlikely]]
443
return;
444
445
SmallString str;
446
LogValueStr(str, name, rval, val);
447
std::fprintf(s_log, " %s", str.c_str());
448
}
449
450
void CPU::PGXP::LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val)
451
{
452
str.append_format("{}=[{:08X}", name, rval);
453
if (!val)
454
{
455
str.append(", NULL]");
456
}
457
else
458
{
459
if (val->value != rval)
460
str.append_format(", PGXP{:08X}", val->value);
461
462
str.append_format(", {{{},{},{}}}", val->x, val->y, val->z);
463
464
if (val->flags & VALID_ALL)
465
{
466
str.append(", valid=");
467
if (val->flags & VALID_X)
468
str.append('X');
469
if (val->flags & VALID_Y)
470
str.append('Y');
471
if (val->flags & VALID_Z)
472
str.append('Z');
473
}
474
475
// if (val->flags & VALID_TAINTED_Z)
476
// str.append(", tainted");
477
478
str.append(']');
479
}
480
}
481
482
#endif
483
484
void CPU::PGXP::GTE_RTPS(float x, float y, float z, u32 value)
485
{
486
PGXPValue& SXYP = PGXP_GTE_REGISTER(SXYP);
487
SXYP.x = x;
488
SXYP.y = y;
489
SXYP.z = z;
490
SXYP.value = value;
491
SXYP.flags = VALID_ALL;
492
PushScreenXYFIFO();
493
494
if (g_settings.gpu_pgxp_vertex_cache)
495
CacheVertex(value, SXYP);
496
}
497
498
bool CPU::PGXP::GTE_HasPreciseVertices(u32 sxy0, u32 sxy1, u32 sxy2)
499
{
500
PGXPValue& SXY0 = PGXP_GTE_REGISTER(SXY0);
501
SXY0.Validate(sxy0);
502
PGXPValue& SXY1 = PGXP_GTE_REGISTER(SXY1);
503
SXY1.Validate(sxy1);
504
PGXPValue& SXY2 = PGXP_GTE_REGISTER(SXY2);
505
SXY2.Validate(sxy2);
506
507
// Don't use accurate clipping for game-constructed values, which don't have a valid Z.
508
return (((SXY0.flags & SXY1.flags & SXY2.flags & VALID_XYZ) == VALID_XYZ));
509
}
510
511
float CPU::PGXP::GTE_NCLIP()
512
{
513
const PGXPValue& SXY0 = PGXP_GTE_REGISTER(SXY0);
514
const PGXPValue& SXY1 = PGXP_GTE_REGISTER(SXY1);
515
const PGXPValue& SXY2 = PGXP_GTE_REGISTER(SXY2);
516
float nclip = ((SXY0.x * SXY1.y) + (SXY1.x * SXY2.y) + (SXY2.x * SXY0.y) - (SXY0.x * SXY2.y) - (SXY1.x * SXY0.y) -
517
(SXY2.x * SXY1.y));
518
519
// ensure fractional values are not incorrectly rounded to 0
520
const float nclip_abs = std::abs(nclip);
521
if (0.1f < nclip_abs && nclip_abs < 1.0f)
522
nclip += (nclip < 0.0f ? -1.0f : 1.0f);
523
524
return nclip;
525
}
526
527
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_MTC2(u32 reg, const PGXPValue& value, u32 val)
528
{
529
switch (reg)
530
{
531
case 15:
532
{
533
// push FIFO
534
PGXP_GTE_REGISTER(SXYP) = value;
535
PushScreenXYFIFO();
536
return;
537
}
538
539
// read-only registers
540
case 29:
541
case 31:
542
{
543
return;
544
}
545
546
default:
547
{
548
PGXPValue& gteVal = g_state.pgxp_gte[reg];
549
gteVal = value;
550
gteVal.value = val;
551
return;
552
}
553
}
554
}
555
556
void CPU::PGXP::CPU_MFC2(Instruction instr, u32 rdVal)
557
{
558
// CPU[Rt] = GTE_D[Rd]
559
const u32 idx = instr.cop.Cop2Index();
560
LOG_VALUES_1(CPU::GetGTERegisterName(idx), rdVal, &g_state.pgxp_gte[idx]);
561
562
PGXPValue& prdVal = g_state.pgxp_gte[idx];
563
prdVal.Validate(rdVal);
564
SetRtValue(instr, prdVal, rdVal);
565
}
566
567
void CPU::PGXP::CPU_MTC2(Instruction instr, u32 rtVal)
568
{
569
// GTE_D[Rd] = CPU[Rt]
570
const u32 idx = instr.cop.Cop2Index();
571
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
572
573
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
574
CPU_MTC2(idx, prtVal, rtVal);
575
}
576
577
void CPU::PGXP::CPU_LWC2(Instruction instr, u32 addr, u32 rtVal)
578
{
579
// GTE_D[Rt] = Mem[addr]
580
LOG_VALUES_LOAD(addr, rtVal);
581
582
const PGXPValue& pMem = ValidateAndLoadMem(addr, rtVal);
583
CPU_MTC2(static_cast<u32>(instr.r.rt.GetValue()), pMem, rtVal);
584
}
585
586
void CPU::PGXP::CPU_SWC2(Instruction instr, u32 addr, u32 rtVal)
587
{
588
// Mem[addr] = GTE_D[Rt]
589
const u32 idx = static_cast<u32>(instr.r.rt.GetValue());
590
PGXPValue& prtVal = g_state.pgxp_gte[idx];
591
#ifdef LOG_VALUES
592
LOG_VALUES_1(CPU::GetGTERegisterName(idx), rtVal, &prtVal);
593
std::fprintf(s_log, " addr=%08X", addr);
594
#endif
595
prtVal.Validate(rtVal);
596
WriteMem(addr, prtVal);
597
}
598
599
ALWAYS_INLINE_RELEASE void CPU::PGXP::CacheVertex(u32 value, const PGXPValue& vertex)
600
{
601
const s16 sx = static_cast<s16>(value & 0xFFFFu);
602
const s16 sy = static_cast<s16>(value >> 16);
603
DebugAssert(sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1023);
604
s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] = vertex;
605
}
606
607
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetCachedVertex(u32 value)
608
{
609
const s16 sx = static_cast<s16>(value & 0xFFFFu);
610
const s16 sy = static_cast<s16>(value >> 16);
611
return (sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1013) ?
612
&s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] :
613
nullptr;
614
}
615
616
ALWAYS_INLINE_RELEASE float CPU::PGXP::TruncateVertexPosition(float p)
617
{
618
// Truncates positions to 11 bits before drawing.
619
// Matches GPU command parsing, where the upper 5 bits are dropped.
620
// Necessary for Jet Moto and Racingroovy VS.
621
const s32 int_part = static_cast<s32>(p);
622
const float int_part_f = static_cast<float>(int_part);
623
return static_cast<float>(TruncateGPUVertexPosition(int_part)) + (p - int_part_f);
624
}
625
626
ALWAYS_INLINE_RELEASE bool CPU::PGXP::IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y)
627
{
628
const float tolerance = g_settings.gpu_pgxp_tolerance;
629
if (tolerance < 0.0f)
630
return true;
631
632
return (std::abs(precise_x - static_cast<float>(int_x)) <= tolerance &&
633
std::abs(precise_y - static_cast<float>(int_y)) <= tolerance);
634
}
635
636
bool CPU::PGXP::GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y,
637
float* out_w)
638
{
639
const PGXPValue* vert = GetPtr(addr);
640
if (vert && (vert->flags & VALID_XY) == VALID_XY && vert->value == value)
641
{
642
*out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
643
*out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
644
*out_w = vert->z / static_cast<float>(GTE::MAX_Z);
645
646
#ifdef LOG_LOOKUPS
647
GL_INS_FMT("0x{:08X} {},{} => {},{} ({},{},{}) ({},{})", addr, x, y, *out_x, *out_y,
648
TruncateVertexPosition(vert->x), TruncateVertexPosition(vert->y), vert->z, std::abs(*out_x - x),
649
std::abs(*out_y - y));
650
#endif
651
652
if (IsWithinTolerance(*out_x, *out_y, x, y))
653
{
654
// check validity of z component
655
return ((vert->flags & VALID_Z) == VALID_Z);
656
}
657
}
658
659
if (g_settings.gpu_pgxp_vertex_cache)
660
{
661
vert = GetCachedVertex(value);
662
if (vert && (vert->flags & VALID_XY) == VALID_XY)
663
{
664
*out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
665
*out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
666
*out_w = vert->z / static_cast<float>(GTE::MAX_Z);
667
668
#ifdef LOG_LOOKUPS
669
GL_INS_FMT("0x{:08X} {},{} => VERTEX_CACHE{{{},{} ({},{},{}) ({},{})}}", addr, x, y, *out_x, *out_y,
670
TruncateVertexPosition(vert->x), TruncateVertexPosition(vert->y), vert->z, std::abs(*out_x - x),
671
std::abs(*out_y - y));
672
#endif
673
674
if (IsWithinTolerance(*out_x, *out_y, x, y))
675
{
676
// This is only really used for Syphon Filter 3, and including Z tends to make things worse.
677
// At least it can get rid of the jitter, but not the warping.
678
return false;
679
}
680
}
681
}
682
683
// no valid value can be found anywhere, use the native PSX data
684
*out_x = static_cast<float>(x);
685
*out_y = static_cast<float>(y);
686
*out_w = 1.0f;
687
688
#ifdef LOG_LOOKUPS
689
GL_INS_FMT("0x{:08X} {},{} => MISS", addr, x, y);
690
#endif
691
return false;
692
}
693
694
void CPU::PGXP::CPU_LW(Instruction instr, u32 addr, u32 rtVal)
695
{
696
// Rt = Mem[Rs + Im]
697
LOG_VALUES_LOAD(addr, rtVal);
698
SetRtValue(instr, ValidateAndLoadMem(addr, rtVal));
699
}
700
701
void CPU::PGXP::CPU_LBx(Instruction instr, u32 addr, u32 rtVal)
702
{
703
LOG_VALUES_LOAD(addr, rtVal);
704
SetRtValue(instr, INVALID_VALUE);
705
}
706
707
void CPU::PGXP::CPU_LH(Instruction instr, u32 addr, u32 rtVal)
708
{
709
// Rt = Mem[Rs + Im] (sign extended)
710
LOG_VALUES_LOAD(addr, rtVal);
711
ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, true);
712
}
713
714
void CPU::PGXP::CPU_LHU(Instruction instr, u32 addr, u32 rtVal)
715
{
716
// Rt = Mem[Rs + Im] (zero extended)
717
LOG_VALUES_LOAD(addr, rtVal);
718
ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, false);
719
}
720
721
void CPU::PGXP::CPU_SB(Instruction instr, u32 addr, u32 rtVal)
722
{
723
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
724
WriteMem(addr, INVALID_VALUE);
725
}
726
727
void CPU::PGXP::CPU_SH(Instruction instr, u32 addr, u32 rtVal)
728
{
729
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
730
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
731
WriteMem16(addr, prtVal);
732
}
733
734
void CPU::PGXP::CPU_SW(Instruction instr, u32 addr, u32 rtVal)
735
{
736
// Mem[Rs + Im] = Rt
737
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
738
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
739
WriteMem(addr, prtVal);
740
}
741
742
void CPU::PGXP::CPU_LWx(Instruction instr, u32 addr, u32 rtVal)
743
{
744
const u32 aligned_addr = addr & ~3u;
745
PGXPValue* pmemVal = GetPtr(aligned_addr);
746
u32 memVal;
747
if (!pmemVal)
748
return;
749
if (!CPU::SafeReadMemoryWord(aligned_addr, &memVal)) [[unlikely]]
750
return;
751
pmemVal->Validate(memVal);
752
LOG_VALUES_LOAD(addr, memVal);
753
754
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
755
756
const u32 byte_shift = addr & 3u;
757
758
if (instr.op == InstructionOp::lwl)
759
{
760
const u32 bit_shift = (byte_shift * 8);
761
const u32 mixed_value = (rtVal & (UINT32_C(0x00FFFFFF) >> bit_shift)) | (memVal << (24 - bit_shift));
762
763
switch (byte_shift)
764
{
765
case 0:
766
{
767
// only writing the upper half of Y, can't do much about that..
768
prtVal.y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
769
prtVal.value = mixed_value;
770
prtVal.flags = (prtVal.flags & ~VALID_Y);
771
}
772
break;
773
774
case 1:
775
{
776
prtVal.y = pmemVal->x;
777
prtVal.z = (pmemVal->flags & VALID_LOWZ) ? pmemVal->z : prtVal.z;
778
prtVal.value = mixed_value;
779
prtVal.flags =
780
(prtVal.flags & ~VALID_Y) | ((pmemVal->flags & VALID_X) << 1) | ((pmemVal->flags & VALID_LOWZ) ? VALID_Z : 0);
781
}
782
break;
783
784
case 2:
785
{
786
// making a dog's breakfast of both X and Y
787
prtVal.x = static_cast<float>(static_cast<s16>(mixed_value));
788
prtVal.y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
789
prtVal.value = mixed_value;
790
prtVal.flags &= ~(VALID_X | VALID_Y | VALID_Z);
791
}
792
break;
793
794
case 3:
795
{
796
// effectively the same as a normal load.
797
prtVal = *pmemVal;
798
prtVal.value = mixed_value;
799
}
800
break;
801
802
DefaultCaseIsUnreachable();
803
}
804
}
805
else
806
{
807
const u32 bit_shift = (byte_shift * 8);
808
const u32 mixed_value = (rtVal & (UINT32_C(0xFFFFFF00) << (24 - bit_shift))) | (memVal >> bit_shift);
809
810
switch (byte_shift)
811
{
812
case 0:
813
{
814
// effectively the same as a normal load.
815
prtVal = *pmemVal;
816
prtVal.value = mixed_value;
817
}
818
break;
819
820
case 1:
821
{
822
// making a dog's breakfast of both X and Y
823
prtVal.x = static_cast<float>(static_cast<s16>(mixed_value));
824
prtVal.y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
825
prtVal.value = mixed_value;
826
prtVal.flags &= ~(VALID_X | VALID_Y | VALID_Z);
827
}
828
break;
829
830
case 2:
831
{
832
prtVal.x = pmemVal->y;
833
prtVal.z = (pmemVal->flags & VALID_HIGHZ) ? pmemVal->z : prtVal.z;
834
prtVal.value = mixed_value;
835
prtVal.flags = (prtVal.flags & ~VALID_X) | ((pmemVal->flags & VALID_Y) >> 1) |
836
((pmemVal->flags & VALID_HIGHZ) ? VALID_Z : 0);
837
}
838
break;
839
840
case 3:
841
{
842
// only writing the lower half of X, can't do much about that..
843
prtVal.x = static_cast<float>(static_cast<s16>(mixed_value));
844
prtVal.value = mixed_value;
845
prtVal.flags = (prtVal.flags & ~VALID_X);
846
}
847
break;
848
849
DefaultCaseIsUnreachable();
850
}
851
}
852
}
853
854
void CPU::PGXP::CPU_SWx(Instruction instr, u32 addr, u32 rtVal)
855
{
856
LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
857
858
const u32 aligned_addr = addr & ~3u;
859
PGXPValue* pmemVal = GetPtr(aligned_addr);
860
u32 memVal;
861
if (!pmemVal)
862
return;
863
if (!CPU::SafeReadMemoryWord(aligned_addr, &memVal)) [[unlikely]]
864
return;
865
pmemVal->Validate(memVal);
866
867
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
868
869
const u32 byte_shift = addr & 3u;
870
871
if (instr.op == InstructionOp::swl)
872
{
873
const u32 bit_shift = (byte_shift * 8);
874
const u32 mixed_value = (memVal & (UINT32_C(0xFFFFFF00) << bit_shift)) | (rtVal >> (24 - bit_shift));
875
876
switch (byte_shift)
877
{
878
case 0:
879
{
880
// only writing the lower half of X, can't do much about that..
881
pmemVal->x = static_cast<float>(static_cast<s16>(mixed_value));
882
pmemVal->value = mixed_value;
883
pmemVal->flags =
884
(pmemVal->flags & ~(VALID_X | VALID_Z | VALID_LOWZ)) | ((pmemVal->flags & VALID_HIGHZ) ? VALID_Z : 0);
885
}
886
break;
887
888
case 1:
889
{
890
pmemVal->x = prtVal.y;
891
pmemVal->z = (prtVal.flags & VALID_Z) ? prtVal.z : pmemVal->z;
892
pmemVal->value = mixed_value;
893
pmemVal->flags = (pmemVal->flags & ~(VALID_X | VALID_Z | VALID_LOWZ)) | ((prtVal.flags & VALID_Y) >> 1) |
894
((prtVal.flags & VALID_Z) ? (VALID_Z | VALID_LOWZ) : 0) |
895
((pmemVal->flags & VALID_HIGHZ) ? VALID_Z : 0);
896
}
897
break;
898
899
case 2:
900
{
901
// making a dog's breakfast of both X and Y
902
pmemVal->x = static_cast<float>(static_cast<s16>(mixed_value));
903
pmemVal->y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
904
pmemVal->value = mixed_value;
905
pmemVal->flags &= ~(VALID_X | VALID_Y | VALID_Z | VALID_LOWZ | VALID_HIGHZ);
906
}
907
break;
908
909
case 3:
910
{
911
// effectively the same as a normal store.
912
*pmemVal = prtVal;
913
pmemVal->value = mixed_value;
914
pmemVal->flags =
915
(prtVal.flags & ~(VALID_LOWZ | VALID_HIGHZ)) | ((prtVal.flags & VALID_Z) ? (VALID_LOWZ | VALID_HIGHZ) : 0);
916
}
917
break;
918
919
DefaultCaseIsUnreachable();
920
}
921
}
922
else
923
{
924
const u32 bit_shift = (byte_shift * 8);
925
const u32 mixed_value = (memVal & (UINT32_C(0x00FFFFFF) >> (24 - bit_shift))) | (rtVal << bit_shift);
926
927
switch (byte_shift)
928
{
929
case 0:
930
{
931
// effectively the same as a normal store.
932
*pmemVal = prtVal;
933
pmemVal->value = mixed_value;
934
pmemVal->flags =
935
(prtVal.flags & ~(VALID_LOWZ | VALID_HIGHZ)) | ((prtVal.flags & VALID_Z) ? (VALID_LOWZ | VALID_HIGHZ) : 0);
936
}
937
break;
938
939
case 1:
940
{
941
// making a dog's breakfast of both X and Y
942
pmemVal->x = static_cast<float>(static_cast<s16>(mixed_value));
943
pmemVal->y = static_cast<float>(static_cast<s16>(mixed_value >> 16));
944
pmemVal->value = mixed_value;
945
pmemVal->flags &= ~(VALID_X | VALID_Y | VALID_LOWZ | VALID_HIGHZ);
946
}
947
break;
948
949
case 2:
950
{
951
pmemVal->y = prtVal.x;
952
pmemVal->z = (prtVal.flags & VALID_Z) ? prtVal.z : pmemVal->z;
953
pmemVal->value = mixed_value;
954
pmemVal->flags = (pmemVal->flags & ~(VALID_X | VALID_Z | VALID_HIGHZ)) | ((prtVal.flags & VALID_X) << 1) |
955
((prtVal.flags & VALID_Z) ? (VALID_Z | VALID_HIGHZ) : 0) |
956
((pmemVal->flags & VALID_LOWZ) ? VALID_Z : 0);
957
}
958
break;
959
960
case 3:
961
{
962
// only writing the upper half of Y, can't do much about that..
963
pmemVal->y = static_cast<float>(static_cast<s16>(mixed_value));
964
pmemVal->value = mixed_value;
965
pmemVal->flags =
966
(pmemVal->flags & ~(VALID_X | VALID_Z | VALID_HIGHZ)) | ((pmemVal->flags & VALID_LOWZ) ? VALID_Z : 0);
967
}
968
break;
969
970
DefaultCaseIsUnreachable();
971
}
972
}
973
}
974
975
void CPU::PGXP::CPU_MOVE_Packed(u32 rd_and_rs, u32 rsVal)
976
{
977
const u32 Rs = (rd_and_rs & 0xFFu);
978
const u32 Rd = (rd_and_rs >> 8);
979
CPU_MOVE(Rd, Rs, rsVal);
980
}
981
982
void CPU::PGXP::CPU_MOVE(u32 Rd, u32 Rs, u32 rsVal)
983
{
984
#ifdef LOG_VALUES
985
const Instruction instr = {0};
986
LOG_VALUES_C1(Rs, rsVal);
987
#endif
988
PGXPValue& prsVal = g_state.pgxp_gpr[Rs];
989
prsVal.Validate(rsVal);
990
g_state.pgxp_gpr[Rd] = prsVal;
991
}
992
993
void CPU::PGXP::CPU_ADDI(Instruction instr, u32 rsVal)
994
{
995
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
996
997
// Rt = Rs + Imm (signed)
998
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
999
1000
const u32 immVal = instr.i.imm_sext32();
1001
1002
PGXPValue& prtVal = GetRtValue(instr);
1003
prtVal = prsVal;
1004
1005
if (immVal == 0)
1006
return;
1007
1008
if (rsVal == 0)
1009
{
1010
// x is low precision value
1011
prtVal.x = static_cast<float>(LOWORD_S16(immVal));
1012
prtVal.y = static_cast<float>(HIWORD_S16(immVal));
1013
prtVal.flags |= VALID_X | VALID_Y | VALID_TAINTED_Z;
1014
prtVal.value = immVal;
1015
return;
1016
}
1017
1018
prtVal.x = static_cast<float>(f16Unsign(prtVal.x));
1019
prtVal.x += static_cast<float>(LOWORD_U16(immVal));
1020
1021
// carry on over/underflow
1022
const float of = (prtVal.x > USHRT_MAX) ? 1.0f : (prtVal.x < 0.0f) ? -1.0f : 0.0f;
1023
prtVal.x = static_cast<float>(f16Sign(prtVal.x));
1024
prtVal.y += HIWORD_S16(immVal) + of;
1025
1026
// truncate on overflow/underflow
1027
prtVal.y += (prtVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prtVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
1028
1029
prtVal.value = rsVal + immVal;
1030
1031
prtVal.flags |= VALID_TAINTED_Z;
1032
}
1033
1034
void CPU::PGXP::CPU_ANDI(Instruction instr, u32 rsVal)
1035
{
1036
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1037
1038
// Rt = Rs & Imm
1039
const u32 imm = instr.i.imm_zext32();
1040
const u32 rtVal = rsVal & imm;
1041
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1042
PGXPValue& prtVal = GetRtValue(instr);
1043
1044
// remove upper 16-bits
1045
prtVal.y = 0.0f;
1046
prtVal.z = prsVal.z;
1047
prtVal.value = rtVal;
1048
prtVal.flags = prsVal.flags | VALID_Y | VALID_TAINTED_Z;
1049
1050
switch (imm)
1051
{
1052
case 0:
1053
{
1054
// if 0 then x == 0
1055
prtVal.x = 0.0f;
1056
prtVal.flags |= VALID_X;
1057
}
1058
break;
1059
1060
case 0xFFFFu:
1061
{
1062
// if saturated then x == x
1063
prtVal.x = prsVal.x;
1064
}
1065
break;
1066
1067
default:
1068
{
1069
// otherwise x is low precision value
1070
prtVal.x = static_cast<float>(LOWORD_S16(rtVal));
1071
prtVal.flags |= VALID_X;
1072
}
1073
break;
1074
}
1075
}
1076
1077
void CPU::PGXP::CPU_ORI(Instruction instr, u32 rsVal)
1078
{
1079
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1080
1081
// Rt = Rs | Imm
1082
const u32 imm = instr.i.imm_zext32();
1083
const u32 rtVal = rsVal | imm;
1084
1085
PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
1086
PGXPValue& pRtVal = GetRtValue(instr);
1087
pRtVal = pRsVal;
1088
pRtVal.value = rtVal;
1089
1090
if (imm == 0) [[unlikely]]
1091
{
1092
// if 0 then x == x
1093
}
1094
else
1095
{
1096
// otherwise x is low precision value
1097
pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
1098
pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
1099
}
1100
}
1101
1102
void CPU::PGXP::CPU_XORI(Instruction instr, u32 rsVal)
1103
{
1104
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1105
1106
// Rt = Rs ^ Imm
1107
const u32 imm = instr.i.imm_zext32();
1108
const u32 rtVal = rsVal ^ imm;
1109
1110
PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
1111
PGXPValue& pRtVal = GetRtValue(instr);
1112
pRtVal = pRsVal;
1113
pRtVal.value = rtVal;
1114
1115
if (imm == 0) [[unlikely]]
1116
{
1117
// if 0 then x == x
1118
}
1119
else
1120
{
1121
// otherwise x is low precision value
1122
pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
1123
pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
1124
}
1125
}
1126
1127
void CPU::PGXP::CPU_SLTI(Instruction instr, u32 rsVal)
1128
{
1129
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1130
1131
// Rt = Rs < Imm (signed)
1132
const s32 imm = instr.i.imm_s16();
1133
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1134
1135
const float fimmx = static_cast<float>(imm);
1136
const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
1137
1138
PGXPValue& prtVal = GetRtValue(instr);
1139
prtVal.x = (prsVal.GetValidY(rsVal) < fimmy || prsVal.GetValidX(rsVal) < fimmx) ? 1.0f : 0.0f;
1140
prtVal.y = 0.0f;
1141
prtVal.z = prsVal.z;
1142
prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
1143
prtVal.value = BoolToUInt32(static_cast<s32>(rsVal) < imm);
1144
}
1145
1146
void CPU::PGXP::CPU_SLTIU(Instruction instr, u32 rsVal)
1147
{
1148
LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
1149
1150
// Rt = Rs < Imm (Unsigned)
1151
const u32 imm = instr.i.imm_u16();
1152
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1153
1154
const float fimmx = static_cast<float>(static_cast<s16>(imm)); // deliberately signed
1155
const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
1156
1157
PGXPValue& prtVal = GetRtValue(instr);
1158
prtVal.x =
1159
(f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(fimmy) || f16Unsign(prsVal.GetValidX(rsVal)) < fimmx) ? 1.0f : 0.0f;
1160
prtVal.y = 0.0f;
1161
prtVal.z = prsVal.z;
1162
prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
1163
prtVal.value = BoolToUInt32(rsVal < imm);
1164
}
1165
1166
void CPU::PGXP::CPU_LUI(Instruction instr)
1167
{
1168
LOG_VALUES_NV();
1169
1170
// Rt = Imm << 16
1171
PGXPValue& pRtVal = GetRtValue(instr);
1172
pRtVal.x = 0.0f;
1173
pRtVal.y = static_cast<float>(instr.i.imm_s16());
1174
pRtVal.z = 0.0f;
1175
pRtVal.value = instr.i.imm_zext32() << 16;
1176
pRtVal.flags = VALID_XY;
1177
}
1178
1179
void CPU::PGXP::CPU_ADD(Instruction instr, u32 rsVal, u32 rtVal)
1180
{
1181
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1182
1183
// Rd = Rs + Rt (signed)
1184
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1185
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1186
PGXPValue& prdVal = GetRdValue(instr);
1187
1188
if (rtVal == 0)
1189
{
1190
prdVal = prsVal;
1191
CopyZIfMissing(prdVal, prtVal);
1192
}
1193
else if (rsVal == 0)
1194
{
1195
prdVal = prtVal;
1196
CopyZIfMissing(prdVal, prsVal);
1197
}
1198
else
1199
{
1200
const double x = f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prtVal.GetValidX(rtVal));
1201
1202
// carry on over/underflow
1203
const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
1204
prdVal.x = static_cast<float>(f16Sign(x));
1205
prdVal.y = prsVal.GetValidY(rsVal) + prtVal.GetValidY(rtVal) + of;
1206
1207
// truncate on overflow/underflow
1208
prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
1209
1210
prdVal.value = rsVal + rtVal;
1211
1212
// valid x/y only if one side had a valid x/y
1213
prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
1214
1215
SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
1216
}
1217
}
1218
1219
void CPU::PGXP::CPU_SUB(Instruction instr, u32 rsVal, u32 rtVal)
1220
{
1221
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1222
1223
// Rd = Rs - Rt (signed)
1224
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1225
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1226
PGXPValue& prdVal = GetRdValue(instr);
1227
1228
if (rtVal == 0)
1229
{
1230
prdVal = prsVal;
1231
CopyZIfMissing(prdVal, prtVal);
1232
}
1233
else
1234
{
1235
const double x = f16Unsign(prsVal.GetValidX(rsVal)) - f16Unsign(prtVal.GetValidX(rtVal));
1236
1237
// carry on over/underflow
1238
const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
1239
prdVal.x = static_cast<float>(f16Sign(x));
1240
prdVal.y = prsVal.GetValidY(rsVal) - (prtVal.GetValidY(rtVal) - of);
1241
1242
// truncate on overflow/underflow
1243
prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
1244
1245
prdVal.value = rsVal - rtVal;
1246
1247
// valid x/y only if one side had a valid x/y
1248
prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
1249
1250
SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
1251
}
1252
}
1253
1254
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal)
1255
{
1256
// Rd = Rs & Rt
1257
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1258
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1259
1260
float x, y;
1261
if (LOWORD_U16(rdVal) == 0)
1262
x = 0.0f;
1263
else if (LOWORD_U16(rdVal) == LOWORD_U16(rsVal))
1264
x = prsVal.GetValidX(rsVal);
1265
else if (LOWORD_U16(rdVal) == LOWORD_U16(rtVal))
1266
x = prtVal.GetValidX(rtVal);
1267
else
1268
x = static_cast<float>(LOWORD_S16(rdVal));
1269
1270
if (HIWORD_U16(rdVal) == 0)
1271
y = 0.0f;
1272
else if (HIWORD_U16(rdVal) == HIWORD_U16(rsVal))
1273
y = prsVal.GetValidY(rsVal);
1274
else if (HIWORD_U16(rdVal) == HIWORD_U16(rtVal))
1275
y = prtVal.GetValidY(rtVal);
1276
else
1277
y = static_cast<float>(HIWORD_S16(rdVal));
1278
1279
// Why not write directly to prdVal? Because it might be the same as the source.
1280
u32 flags = ((prsVal.flags | prtVal.flags) & VALID_XY) ? (VALID_XY | VALID_TAINTED_Z) : 0;
1281
PGXPValue& prdVal = GetRdValue(instr);
1282
SelectZ(prdVal.z, flags, prsVal, prtVal);
1283
prdVal.x = x;
1284
prdVal.y = y;
1285
prdVal.flags = flags;
1286
prdVal.value = rdVal;
1287
}
1288
1289
void CPU::PGXP::CPU_AND_(Instruction instr, u32 rsVal, u32 rtVal)
1290
{
1291
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1292
1293
// Rd = Rs & Rt
1294
const u32 rdVal = rsVal & rtVal;
1295
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1296
}
1297
1298
void CPU::PGXP::CPU_OR_(Instruction instr, u32 rsVal, u32 rtVal)
1299
{
1300
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1301
1302
// Rd = Rs | Rt
1303
const u32 rdVal = rsVal | rtVal;
1304
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1305
}
1306
1307
void CPU::PGXP::CPU_XOR_(Instruction instr, u32 rsVal, u32 rtVal)
1308
{
1309
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1310
1311
// Rd = Rs ^ Rt
1312
const u32 rdVal = rsVal ^ rtVal;
1313
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1314
}
1315
1316
void CPU::PGXP::CPU_NOR(Instruction instr, u32 rsVal, u32 rtVal)
1317
{
1318
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1319
1320
// Rd = Rs NOR Rt
1321
const u32 rdVal = ~(rsVal | rtVal);
1322
CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1323
}
1324
1325
void CPU::PGXP::CPU_SLT(Instruction instr, u32 rsVal, u32 rtVal)
1326
{
1327
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1328
1329
// Rd = Rs < Rt (signed)
1330
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1331
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1332
PGXPValue& prdVal = GetRdValue(instr);
1333
prdVal.x = (prsVal.GetValidY(rsVal) < prtVal.GetValidY(rtVal) ||
1334
f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1335
1.0f :
1336
0.0f;
1337
prdVal.y = 0.0f;
1338
prdVal.z = prsVal.z;
1339
prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
1340
prdVal.value = BoolToUInt32(static_cast<s32>(rsVal) < static_cast<s32>(rtVal));
1341
}
1342
1343
void CPU::PGXP::CPU_SLTU(Instruction instr, u32 rsVal, u32 rtVal)
1344
{
1345
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1346
1347
// Rd = Rs < Rt (unsigned)
1348
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1349
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1350
PGXPValue& prdVal = GetRdValue(instr);
1351
prdVal.x = (f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(prtVal.GetValidY(rtVal)) ||
1352
f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1353
1.0f :
1354
0.0f;
1355
prdVal.y = 0.0f;
1356
prdVal.z = prsVal.z;
1357
prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
1358
prdVal.value = BoolToUInt32(rsVal < rtVal);
1359
}
1360
1361
void CPU::PGXP::CPU_MULT(Instruction instr, u32 rsVal, u32 rtVal)
1362
{
1363
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1364
1365
// Hi/Lo = Rs * Rt (signed)
1366
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1367
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1368
1369
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1370
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1371
ploVal = prsVal;
1372
CopyZIfMissing(ploVal, prsVal);
1373
1374
// Z/valid is the same
1375
phiVal = ploVal;
1376
1377
const float rsx = prsVal.GetValidX(rsVal);
1378
const float rsy = prsVal.GetValidY(rsVal);
1379
const float rtx = prtVal.GetValidX(rtVal);
1380
const float rty = prtVal.GetValidY(rtVal);
1381
1382
// Multiply out components
1383
const double xx = f16Unsign(rsx) * f16Unsign(rtx);
1384
const double xy = f16Unsign(rsx) * (rty);
1385
const double yx = rsy * f16Unsign(rtx);
1386
const double yy = rsy * rty;
1387
1388
// Split values into outputs
1389
const double lx = xx;
1390
const double ly = f16Overflow(xx) + (xy + yx);
1391
const double hx = f16Overflow(ly) + yy;
1392
const double hy = f16Overflow(hx);
1393
1394
ploVal.x = static_cast<float>(f16Sign(lx));
1395
ploVal.y = static_cast<float>(f16Sign(ly));
1396
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1397
phiVal.x = static_cast<float>(f16Sign(hx));
1398
phiVal.y = static_cast<float>(f16Sign(hy));
1399
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1400
1401
// compute PSX value
1402
const u64 result = static_cast<u64>(static_cast<s64>(SignExtend64(rsVal)) * static_cast<s64>(SignExtend64(rtVal)));
1403
phiVal.value = Truncate32(result >> 32);
1404
ploVal.value = Truncate32(result);
1405
}
1406
1407
void CPU::PGXP::CPU_MULTU(Instruction instr, u32 rsVal, u32 rtVal)
1408
{
1409
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1410
1411
// Hi/Lo = Rs * Rt (unsigned)
1412
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1413
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1414
1415
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1416
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1417
ploVal = prsVal;
1418
CopyZIfMissing(ploVal, prsVal);
1419
1420
// Z/valid is the same
1421
phiVal = ploVal;
1422
1423
const float rsx = prsVal.GetValidX(rsVal);
1424
const float rsy = prsVal.GetValidY(rsVal);
1425
const float rtx = prtVal.GetValidX(rtVal);
1426
const float rty = prtVal.GetValidY(rtVal);
1427
1428
// Multiply out components
1429
const double xx = f16Unsign(rsx) * f16Unsign(rtx);
1430
const double xy = f16Unsign(rsx) * f16Unsign(rty);
1431
const double yx = f16Unsign(rsy) * f16Unsign(rtx);
1432
const double yy = f16Unsign(rsy) * f16Unsign(rty);
1433
1434
// Split values into outputs
1435
const double lx = xx;
1436
const double ly = f16Overflow(xx) + (xy + yx);
1437
const double hx = f16Overflow(ly) + yy;
1438
const double hy = f16Overflow(hx);
1439
1440
ploVal.x = static_cast<float>(f16Sign(lx));
1441
ploVal.y = static_cast<float>(f16Sign(ly));
1442
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1443
phiVal.x = static_cast<float>(f16Sign(hx));
1444
phiVal.y = static_cast<float>(f16Sign(hy));
1445
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1446
1447
// compute PSX value
1448
const u64 result = ZeroExtend64(rsVal) * ZeroExtend64(rtVal);
1449
phiVal.value = Truncate32(result >> 32);
1450
ploVal.value = Truncate32(result);
1451
}
1452
1453
void CPU::PGXP::CPU_DIV(Instruction instr, u32 rsVal, u32 rtVal)
1454
{
1455
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1456
1457
// Lo = Rs / Rt (signed)
1458
// Hi = Rs % Rt (signed)
1459
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1460
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1461
1462
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1463
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1464
ploVal = prsVal;
1465
CopyZIfMissing(ploVal, prsVal);
1466
1467
// Z/valid is the same
1468
phiVal = ploVal;
1469
1470
const double vs = f16Unsign(prsVal.GetValidX(rsVal)) + prsVal.GetValidY(rsVal) * static_cast<double>(1 << 16);
1471
const double vt = f16Unsign(prtVal.GetValidX(rtVal)) + prtVal.GetValidY(rtVal) * static_cast<double>(1 << 16);
1472
1473
const double lo = vs / vt;
1474
ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
1475
ploVal.x = static_cast<float>(f16Sign(lo));
1476
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1477
1478
const double hi = std::fmod(vs, vt);
1479
phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
1480
phiVal.x = static_cast<float>(f16Sign(hi));
1481
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1482
1483
// compute PSX value
1484
if (static_cast<s32>(rtVal) == 0)
1485
{
1486
// divide by zero
1487
ploVal.value = (static_cast<s32>(rsVal) >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1);
1488
phiVal.value = static_cast<u32>(static_cast<s32>(rsVal));
1489
}
1490
else if (rsVal == UINT32_C(0x80000000) && static_cast<s32>(rtVal) == -1)
1491
{
1492
// unrepresentable
1493
ploVal.value = UINT32_C(0x80000000);
1494
phiVal.value = 0;
1495
}
1496
else
1497
{
1498
ploVal.value = static_cast<u32>(static_cast<s32>(rsVal) / static_cast<s32>(rtVal));
1499
phiVal.value = static_cast<u32>(static_cast<s32>(rsVal) % static_cast<s32>(rtVal));
1500
}
1501
}
1502
1503
void CPU::PGXP::CPU_DIVU(Instruction instr, u32 rsVal, u32 rtVal)
1504
{
1505
LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1506
1507
// Lo = Rs / Rt (unsigned)
1508
// Hi = Rs % Rt (unsigned)
1509
PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1510
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1511
1512
PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1513
PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1514
ploVal = prsVal;
1515
CopyZIfMissing(ploVal, prsVal);
1516
1517
// Z/valid is the same
1518
phiVal = ploVal;
1519
1520
const double vs =
1521
f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prsVal.GetValidY(rsVal)) * static_cast<double>(1 << 16);
1522
const double vt =
1523
f16Unsign(prtVal.GetValidX(rtVal)) + f16Unsign(prtVal.GetValidY(rtVal)) * static_cast<double>(1 << 16);
1524
1525
const double lo = vs / vt;
1526
ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
1527
ploVal.x = static_cast<float>(f16Sign(lo));
1528
ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1529
1530
const double hi = std::fmod(vs, vt);
1531
phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
1532
phiVal.x = static_cast<float>(f16Sign(hi));
1533
phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1534
1535
if (rtVal == 0)
1536
{
1537
// divide by zero
1538
ploVal.value = UINT32_C(0xFFFFFFFF);
1539
phiVal.value = rsVal;
1540
}
1541
else
1542
{
1543
ploVal.value = rsVal / rtVal;
1544
phiVal.value = rsVal % rtVal;
1545
}
1546
}
1547
1548
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal, u32 sh)
1549
{
1550
const u32 rdVal = rtVal << sh;
1551
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1552
PGXPValue& prdVal = GetRdValue(instr);
1553
prdVal.z = prtVal.z;
1554
prdVal.value = rdVal;
1555
1556
if (sh >= 32) [[unlikely]]
1557
{
1558
prdVal.x = 0.0f;
1559
prdVal.y = 0.0f;
1560
prdVal.flags = prtVal.flags | VALID_XY | VALID_TAINTED_Z;
1561
}
1562
else if (sh == 16)
1563
{
1564
prdVal.y = prtVal.x;
1565
prdVal.x = 0.0f;
1566
1567
// Only set valid X if there's also a valid Y. We could use GetValidX() to pull it from the low precision value
1568
// instead, need to investigate further. Spyro breaks if only X is set even if Y is not valid.
1569
// prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
1570
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
1571
}
1572
else if (sh >= 16)
1573
{
1574
prdVal.y = static_cast<float>(f16Sign(f16Unsign(prtVal.x * static_cast<double>(1 << (sh - 16)))));
1575
prdVal.x = 0.0f;
1576
1577
// See above.
1578
// prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
1579
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
1580
}
1581
else
1582
{
1583
const double x = f16Unsign(prtVal.x) * static_cast<double>(1 << sh);
1584
const double y = (f16Unsign(prtVal.y) * static_cast<double>(1 << sh)) + f16Overflow(x);
1585
prdVal.x = static_cast<float>(f16Sign(x));
1586
prdVal.y = static_cast<float>(f16Sign(y));
1587
prdVal.flags = (prtVal.flags | VALID_TAINTED_Z);
1588
}
1589
}
1590
1591
void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal)
1592
{
1593
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1594
1595
// Rd = Rt << Sa
1596
const u32 sh = instr.r.shamt;
1597
CPU_SLL(instr, rtVal, sh);
1598
}
1599
1600
void CPU::PGXP::CPU_SLLV(Instruction instr, u32 rtVal, u32 rsVal)
1601
{
1602
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1603
1604
// Rd = Rt << Rs
1605
const u32 sh = rsVal & 0x1F;
1606
CPU_SLL(instr, rtVal, sh);
1607
}
1608
1609
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable)
1610
{
1611
const u32 rdVal = sign ? static_cast<u32>(static_cast<s32>(rtVal) >> sh) : (rtVal >> sh);
1612
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1613
1614
double x = prtVal.x;
1615
double y = sign ? prtVal.y : f16Unsign(prtVal.y);
1616
1617
const u32 iX = SignExtend32(LOWORD_S16(rtVal)); // remove Y
1618
const u32 iY = SET_LOWORD(rtVal, HIWORD_U16(iX)); // overwrite x with sign(x)
1619
1620
// Shift test values
1621
const u32 dX = static_cast<u32>(static_cast<s32>(iX) >> sh);
1622
const u32 dY = sign ? static_cast<u32>(static_cast<s32>(iY) >> sh) : (iY >> sh);
1623
1624
if (LOWORD_S16(dX) != HIWORD_S16(iX))
1625
x = x / static_cast<double>(1 << sh);
1626
else
1627
x = LOWORD_S16(dX); // only sign bits left
1628
1629
if (LOWORD_S16(dY) != HIWORD_S16(iX))
1630
{
1631
if (sh == 16)
1632
{
1633
x = y;
1634
}
1635
else if (sh < 16)
1636
{
1637
x += y * static_cast<double>(1 << (16 - sh));
1638
if (prtVal.x < 0)
1639
x += static_cast<double>(1 << (16 - sh));
1640
}
1641
else
1642
{
1643
x += y / static_cast<double>(1 << (sh - 16));
1644
}
1645
}
1646
1647
if ((HIWORD_S16(dY) == 0) || (HIWORD_S16(dY) == -1))
1648
y = HIWORD_S16(dY);
1649
else
1650
y = y / static_cast<double>(1 << sh);
1651
1652
PGXPValue& prdVal = GetRdValue(instr);
1653
1654
// Use low precision/rounded values when we're not shifting an entire component,
1655
// and it's not originally from a 3D value. Too many false positives in P2/etc.
1656
// What we probably should do is not set the valid flag on non-3D values to begin
1657
// with, only letting them become valid when used in another expression.
1658
if (sign && !is_variable && !(prtVal.flags & VALID_Z) && sh < 16)
1659
{
1660
prdVal.x = static_cast<float>(LOWORD_S16(rdVal));
1661
prdVal.y = static_cast<float>(HIWORD_S16(rdVal));
1662
prdVal.z = 0.0f;
1663
prdVal.value = rdVal;
1664
prdVal.flags = VALID_XY | VALID_TAINTED_Z;
1665
}
1666
else
1667
{
1668
prdVal.x = static_cast<float>(f16Sign(x));
1669
prdVal.y = static_cast<float>(f16Sign(y));
1670
prdVal.z = prtVal.z;
1671
prdVal.value = rdVal;
1672
prdVal.flags = prtVal.flags | VALID_TAINTED_Z;
1673
}
1674
}
1675
1676
void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal)
1677
{
1678
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1679
1680
// Rd = Rt >> Sa
1681
const u32 sh = instr.r.shamt;
1682
CPU_SRx(instr, rtVal, sh, false, false);
1683
}
1684
1685
void CPU::PGXP::CPU_SRLV(Instruction instr, u32 rtVal, u32 rsVal)
1686
{
1687
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1688
1689
// Rd = Rt >> Sa
1690
const u32 sh = rsVal & 0x1F;
1691
CPU_SRx(instr, rtVal, sh, false, true);
1692
}
1693
1694
void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal)
1695
{
1696
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1697
1698
// Rd = Rt >> Sa
1699
const u32 sh = instr.r.shamt;
1700
CPU_SRx(instr, rtVal, sh, true, false);
1701
}
1702
1703
void CPU::PGXP::CPU_SRAV(Instruction instr, u32 rtVal, u32 rsVal)
1704
{
1705
LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1706
1707
// Rd = Rt >> Sa
1708
const u32 sh = rsVal & 0x1F;
1709
CPU_SRx(instr, rtVal, sh, true, true);
1710
}
1711
1712
void CPU::PGXP::CPU_MFC0(Instruction instr, u32 rdVal)
1713
{
1714
const u32 idx = static_cast<u8>(instr.r.rd.GetValue());
1715
LOG_VALUES_1(TinyString::from_format("cop0_{}", idx).c_str(), rdVal, &g_state.pgxp_cop0[idx]);
1716
1717
// CPU[Rt] = CP0[Rd]
1718
PGXPValue& prdVal = g_state.pgxp_cop0[idx];
1719
prdVal.Validate(rdVal);
1720
1721
PGXPValue& prtVal = GetRtValue(instr);
1722
prtVal = prdVal;
1723
prtVal.value = rdVal;
1724
}
1725
1726
void CPU::PGXP::CPU_MTC0(Instruction instr, u32 rdVal, u32 rtVal)
1727
{
1728
LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1729
1730
// CP0[Rd] = CPU[Rt]
1731
PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1732
PGXPValue& prdVal = g_state.pgxp_cop0[static_cast<u8>(instr.r.rd.GetValue())];
1733
prdVal = prtVal;
1734
prtVal.value = rdVal;
1735
}
1736
1737