CoCalc -- cpu_pgxp.cpp

GitHub Repository: stenzek/duckstation
Path: blob/master/src/core/cpu_pgxp.cpp
⁴²¹² views
1
// SPDX-FileCopyrightText: 2016 iCatButler, 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
//
4
// This file has been completely rewritten over the years compared to the original PCSXR-PGXP release.
5
// No original code remains. The original copyright notice is included above for historical purposes.
6
//
7

8
#include "cpu_pgxp.h"
9
#include "bus.h"
10
#include "cpu_core.h"
11
#include "cpu_core_private.h"
12
#include "cpu_disasm.h"
13
#include "gpu_types.h"
14
#include "settings.h"
15

16
#include "util/gpu_device.h"
17

18
#include "common/assert.h"
19
#include "common/log.h"
20

21
#include <climits>
22
#include <cmath>
23

24
LOG_CHANNEL(CPU);
25

26
// #define LOG_VALUES 1
27
// #define LOG_LOOKUPS 1
28

29
// TODO: Don't update flags on Validate(), instead return it.
30

31
namespace CPU::PGXP {
32

33
enum : u32
34
{
35
  VERTEX_CACHE_WIDTH = 2048,
36
  VERTEX_CACHE_HEIGHT = 2048,
37
  VERTEX_CACHE_SIZE = VERTEX_CACHE_WIDTH * VERTEX_CACHE_HEIGHT,
38
  PGXP_MEM_SIZE = (static_cast<u32>(Bus::RAM_8MB_SIZE) + static_cast<u32>(CPU::SCRATCHPAD_SIZE)) / 4,
39
  PGXP_MEM_SCRATCH_OFFSET = Bus::RAM_8MB_SIZE / 4,
40
};
41

42
enum : u32
43
{
44
  VALID_X = (1u << 0),
45
  VALID_Y = (1u << 1),
46
  VALID_Z = (1u << 2),
47
  VALID_LOWZ = (1u << 16),      // Valid Z from the low part of a 32-bit value.
48
  VALID_HIGHZ = (1u << 17),     // Valid Z from the high part of a 32-bit value.
49
  VALID_TAINTED_Z = (1u << 31), // X/Y has been changed, Z may not be accurate.
50

51
  VALID_XY = (VALID_X | VALID_Y),
52
  VALID_XYZ = (VALID_X | VALID_Y | VALID_Z),
53
  VALID_ALL = (VALID_X | VALID_Y | VALID_Z),
54
};
55

56
#define LOWORD_U16(val) (static_cast<u16>(val))
57
#define HIWORD_U16(val) (static_cast<u16>(static_cast<u32>(val) >> 16))
58
#define LOWORD_S16(val) (static_cast<s16>(static_cast<u16>(val)))
59
#define HIWORD_S16(val) (static_cast<s16>(static_cast<u16>(static_cast<u32>(val) >> 16)))
60
#define SET_LOWORD(val, loword) ((static_cast<u32>(val) & 0xFFFF0000u) | static_cast<u32>(static_cast<u16>(loword)))
61
#define SET_HIWORD(val, hiword) ((static_cast<u32>(val) & 0x0000FFFFu) | (static_cast<u32>(hiword) << 16))
62

63
static double f16Sign(double val);
64
static double f16Unsign(double val);
65
static double f16Overflow(double val);
66

67
static void CacheVertex(u32 value, const PGXPValue& vertex);
68
static PGXPValue* GetCachedVertex(u32 value);
69

70
static float TruncateVertexPosition(float p);
71
static bool IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y);
72

73
static PGXPValue& GetRdValue(Instruction instr);
74
static PGXPValue& GetRtValue(Instruction instr);
75
static PGXPValue& ValidateAndGetRtValue(Instruction instr, u32 rtVal);
76
static PGXPValue& ValidateAndGetRsValue(Instruction instr, u32 rsVal);
77
static void SetRtValue(Instruction instr, const PGXPValue& val);
78
static void SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal);
79
static PGXPValue& GetSXY0();
80
static PGXPValue& GetSXY1();
81
static PGXPValue& GetSXY2();
82
static PGXPValue& PushSXY();
83

84
static PGXPValue* GetPtr(u32 addr);
85
static const PGXPValue& ValidateAndLoadMem(u32 addr, u32 value);
86
static void ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign);
87

88
static void CPU_MTC2(u32 reg, const PGXPValue& value, u32 val);
89
static void CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal);
90
static void CPU_SLL(Instruction instr, u32 rtVal, u32 sh);
91
static void CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable);
92

93
static void WriteMem(u32 addr, const PGXPValue& value);
94
static void WriteMem16(u32 addr, const PGXPValue& value);
95

96
static void CopyZIfMissing(PGXPValue& dst, const PGXPValue& src);
97
static void SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1, const PGXPValue& src2);
98

99
#ifdef LOG_VALUES
100
static void LogInstruction(u32 pc, Instruction instr);
101
static void LogValue(const char* name, u32 rval, const PGXPValue* val);
102
static void LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val);
103

104
// clang-format off
105
#define LOG_VALUES_NV() do { LogInstruction(CPU::g_state.current_instruction_pc, instr); } while (0)
106
#define LOG_VALUES_1(name, rval, val) do { LogInstruction(CPU::g_state.current_instruction_pc, instr); LogValue(name, rval, val); } while (0)
107
#define LOG_VALUES_C1(rnum, rval) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(rnum)), rval, &g_state.pgxp_gpr[static_cast<u32>(rnum)]); } while(0)
108
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r1num)), r1val, &g_state.pgxp_gpr[static_cast<u32>(r1num)]); LogValue(CPU::GetRegName(static_cast<CPU::Reg>(r2num)), r2val, &g_state.pgxp_gpr[static_cast<u32>(r2num)]); } while(0)
109
#define LOG_VALUES_LOAD(addr, val) do { LogInstruction(CPU::g_state.current_instruction_pc,instr); LogValue(TinyString::from_format("MEM[{:08X}]", addr).c_str(), val, GetPtr(addr)); } while(0)
110
#define LOG_VALUES_STORE(rnum, rval, addr) do { LOG_VALUES_C1(rnum, rval); std::fprintf(s_log, " addr=%08X", addr); } while(0)
111
#else
112
#define LOG_VALUES_NV() (void)0
113
#define LOG_VALUES_1(name, rval, val) (void)0
114
#define LOG_VALUES_C1(rnum, rval) (void)0
115
#define LOG_VALUES_C2(r1num, r1val, r2num, r2val) (void)0
116
#define LOG_VALUES_LOAD(addr, val) (void)0
117
#define LOG_VALUES_STORE(rnum, rval, addr) (void)0
118
#endif
119
// clang-format on
120

121
static constexpr const PGXPValue INVALID_VALUE = {};
122

123
static PGXPValue* s_mem = nullptr;
124
static PGXPValue* s_vertex_cache = nullptr;
125

126
#ifdef LOG_VALUES
127
static std::FILE* s_log;
128
#endif
129
} // namespace CPU::PGXP
130

131
void CPU::PGXP::Initialize()
132
{
133
  std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
134
  std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
135
  std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
136

137
  if (!s_mem)
138
  {
139
    s_mem = static_cast<PGXPValue*>(std::calloc(PGXP_MEM_SIZE, sizeof(PGXPValue)));
140
    if (!s_mem)
141
      Panic("Failed to allocate PGXP memory");
142
  }
143

144
  if (g_settings.gpu_pgxp_vertex_cache && !s_vertex_cache)
145
  {
146
    s_vertex_cache = static_cast<PGXPValue*>(std::calloc(VERTEX_CACHE_SIZE, sizeof(PGXPValue)));
147
    if (!s_vertex_cache)
148
    {
149
      ERROR_LOG("Failed to allocate memory for vertex cache, disabling.");
150
      g_settings.gpu_pgxp_vertex_cache = false;
151
    }
152
  }
153

154
  if (s_vertex_cache)
155
    std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
156
}
157

158
void CPU::PGXP::Reset()
159
{
160
  std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
161
  std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
162
  std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
163

164
  if (s_mem)
165
    std::memset(s_mem, 0, sizeof(PGXPValue) * PGXP_MEM_SIZE);
166

167
  if (g_settings.gpu_pgxp_vertex_cache && s_vertex_cache)
168
    std::memset(s_vertex_cache, 0, sizeof(PGXPValue) * VERTEX_CACHE_SIZE);
169
}
170

171
void CPU::PGXP::Shutdown()
172
{
173
  if (s_vertex_cache)
174
  {
175
    std::free(s_vertex_cache);
176
    s_vertex_cache = nullptr;
177
  }
178
  if (s_mem)
179
  {
180
    std::free(s_mem);
181
    s_mem = nullptr;
182
  }
183

184
  std::memset(g_state.pgxp_gte, 0, sizeof(g_state.pgxp_gte));
185
  std::memset(g_state.pgxp_gpr, 0, sizeof(g_state.pgxp_gpr));
186
  std::memset(g_state.pgxp_cop0, 0, sizeof(g_state.pgxp_cop0));
187
}
188

189
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Sign(double val)
190
{
191
  const s32 s = static_cast<s32>(static_cast<s64>(val * (USHRT_MAX + 1)));
192
  return static_cast<double>(s) / static_cast<double>(USHRT_MAX + 1);
193
}
194

195
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Unsign(double val)
196
{
197
  return (val >= 0) ? val : (val + (USHRT_MAX + 1));
198
}
199

200
ALWAYS_INLINE_RELEASE double CPU::PGXP::f16Overflow(double val)
201
{
202
  return static_cast<double>(static_cast<s64>(val) >> 16);
203
}
204

205
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRdValue(Instruction instr)
206
{
207
  return g_state.pgxp_gpr[static_cast<u8>(instr.r.rd.GetValue())];
208
}
209

210
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetRtValue(Instruction instr)
211
{
212
  return g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
213
}
214

215
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRtValue(Instruction instr, u32 rtVal)
216
{
217
  PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
218
  ret.Validate(rtVal);
219
  return ret;
220
}
221

222
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::ValidateAndGetRsValue(Instruction instr, u32 rsVal)
223
{
224
  PGXPValue& ret = g_state.pgxp_gpr[static_cast<u8>(instr.r.rs.GetValue())];
225
  ret.Validate(rsVal);
226
  return ret;
227
}
228

229
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val)
230
{
231
  g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())] = val;
232
}
233

234
ALWAYS_INLINE void CPU::PGXP::SetRtValue(Instruction instr, const PGXPValue& val, u32 rtVal)
235
{
236
  PGXPValue& prtVal = g_state.pgxp_gpr[static_cast<u8>(instr.r.rt.GetValue())];
237
  prtVal = val;
238
  prtVal.value = rtVal;
239
}
240

241
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY0()
242
{
243
  return g_state.pgxp_gte[12];
244
}
245

246
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY1()
247
{
248
  return g_state.pgxp_gte[13];
249
}
250

251
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::GetSXY2()
252
{
253
  return g_state.pgxp_gte[14];
254
}
255

256
ALWAYS_INLINE CPU::PGXPValue& CPU::PGXP::PushSXY()
257
{
258
  g_state.pgxp_gte[12] = g_state.pgxp_gte[13];
259
  g_state.pgxp_gte[13] = g_state.pgxp_gte[14];
260
  return g_state.pgxp_gte[14];
261
}
262

263
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetPtr(u32 addr)
264
{
265
#if 0
266
  if ((addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) >= 0x0017A2B4 &&
267
      (addr & CPU::PHYSICAL_MEMORY_ADDRESS_MASK) <= 0x0017A2B4)
268
    __debugbreak();
269
#endif
270

271
  if ((addr & SCRATCHPAD_ADDR_MASK) == SCRATCHPAD_ADDR)
272
    return &s_mem[PGXP_MEM_SCRATCH_OFFSET + ((addr & SCRATCHPAD_OFFSET_MASK) >> 2)];
273

274
  // Don't worry about >512MB here for performance reasons.
275
  const u32 paddr = (addr & KSEG_MASK);
276
  if (paddr < Bus::RAM_MIRROR_END)
277
    return &s_mem[(paddr & Bus::g_ram_mask) >> 2];
278
  else
279
    return nullptr;
280
}
281

282
ALWAYS_INLINE_RELEASE const CPU::PGXPValue& CPU::PGXP::ValidateAndLoadMem(u32 addr, u32 value)
283
{
284
  PGXPValue* pMem = GetPtr(addr);
285
  if (!pMem) [[unlikely]]
286
    return INVALID_VALUE;
287

288
  pMem->Validate(value);
289
  return *pMem;
290
}
291

292
ALWAYS_INLINE_RELEASE void CPU::PGXP::ValidateAndLoadMem16(PGXPValue& dest, u32 addr, u32 value, bool sign)
293
{
294
  PGXPValue* pMem = GetPtr(addr);
295
  if (!pMem) [[unlikely]]
296
  {
297
    dest = INVALID_VALUE;
298
    return;
299
  }
300

301
  // determine if high or low word
302
  const bool hiword = ((addr & 2) != 0);
303

304
  // only validate the component we're interested in
305
  pMem->flags = hiword ?
306
                  ((Truncate16(pMem->value >> 16) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_Y)) :
307
                  ((Truncate16(pMem->value) == Truncate16(value)) ? pMem->flags : (pMem->flags & ~VALID_X));
308

309
  // copy whole value
310
  dest = *pMem;
311

312
  // if high word then shift
313
  if (hiword)
314
  {
315
    dest.x = dest.y;
316
    dest.flags = (dest.flags & ~VALID_X) | ((dest.flags & VALID_Y) >> 1);
317
  }
318

319
  // only set y as valid if x is also valid.. don't want to make fake values
320
  if (dest.flags & VALID_X)
321
  {
322
    dest.y = (dest.x < 0) ? -1.0f * sign : 0.0f;
323
    dest.flags |= VALID_Y;
324
  }
325
  else
326
  {
327
    dest.y = 0.0f;
328
    dest.flags &= ~VALID_Y;
329
  }
330

331
  dest.value = value;
332
}
333

334
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem(u32 addr, const PGXPValue& value)
335
{
336
  PGXPValue* pMem = GetPtr(addr);
337
  if (!pMem) [[unlikely]]
338
    return;
339

340
  *pMem = value;
341
  pMem->flags |= VALID_LOWZ | VALID_HIGHZ;
342
}
343

344
ALWAYS_INLINE_RELEASE void CPU::PGXP::WriteMem16(u32 addr, const PGXPValue& value)
345
{
346
  PGXPValue* dest = GetPtr(addr);
347
  if (!dest) [[unlikely]]
348
    return;
349

350
  // determine if high or low word
351
  const bool hiword = ((addr & 2) != 0);
352
  if (hiword)
353
  {
354
    dest->y = value.x;
355
    dest->flags = (dest->flags & ~VALID_Y) | ((value.flags & VALID_X) << 1);
356
    dest->value = (dest->value & UINT32_C(0x0000FFFF)) | (value.value << 16);
357
  }
358
  else
359
  {
360
    dest->x = value.x;
361
    dest->flags = (dest->flags & ~VALID_X) | (value.flags & VALID_X);
362
    dest->value = (dest->value & UINT32_C(0xFFFF0000)) | (value.value & UINT32_C(0x0000FFFF));
363
  }
364

365
  // overwrite z/w if valid
366
  // TODO: Check modified
367
  if (value.flags & VALID_Z)
368
  {
369
    dest->z = value.z;
370
    dest->flags |= VALID_Z | (hiword ? VALID_HIGHZ : VALID_LOWZ);
371
  }
372
  else
373
  {
374
    dest->flags &= hiword ? ~VALID_HIGHZ : ~VALID_LOWZ;
375
    if (dest->flags & VALID_Z && !(dest->flags & (VALID_HIGHZ | VALID_LOWZ)))
376
      dest->flags &= ~VALID_Z;
377
  }
378
}
379

380
ALWAYS_INLINE_RELEASE void CPU::PGXP::CopyZIfMissing(PGXPValue& dst, const PGXPValue& src)
381
{
382
  dst.z = (dst.flags & VALID_Z) ? dst.z : src.z;
383
  dst.flags |= (src.flags & VALID_Z);
384
}
385

386
ALWAYS_INLINE_RELEASE void CPU::PGXP::SelectZ(float& dst_z, u32& dst_flags, const PGXPValue& src1,
387
                                              const PGXPValue& src2)
388
{
389
  // Prefer src2 if src1 is missing Z, or is potentially an imprecise value, when src2 is precise.
390
  dst_z = (!(src1.flags & VALID_Z) ||
391
           (src1.flags & VALID_TAINTED_Z && (src2.flags & (VALID_Z | VALID_TAINTED_Z)) == VALID_Z)) ?
392
            src2.z :
393
            src1.z;
394
  dst_flags |= ((src1.flags | src2.flags) & VALID_Z);
395
}
396

397
#ifdef LOG_VALUES
398
void CPU::PGXP::LogInstruction(u32 pc, Instruction instr)
399
{
400
  if (!s_log) [[unlikely]]
401
  {
402
    s_log = std::fopen("pgxp.log", "wb");
403
  }
404
  else
405
  {
406
    std::fflush(s_log);
407
    std::fputc('\n', s_log);
408
  }
409

410
  SmallString str;
411
  DisassembleInstruction(&str, pc, instr.bits);
412
  std::fprintf(s_log, "%08X %08X %-20s", pc, instr.bits, str.c_str());
413
}
414

415
void CPU::PGXP::LogValue(const char* name, u32 rval, const PGXPValue* val)
416
{
417
  if (!s_log) [[unlikely]]
418
    return;
419

420
  SmallString str;
421
  LogValueStr(str, name, rval, val);
422
  std::fprintf(s_log, " %s", str.c_str());
423
}
424

425
void CPU::PGXP::LogValueStr(SmallStringBase& str, const char* name, u32 rval, const PGXPValue* val)
426
{
427
  str.append_format("{}=[{:08X}", name, rval);
428
  if (!val)
429
  {
430
    str.append(", NULL]");
431
  }
432
  else
433
  {
434
    if (val->value != rval)
435
      str.append_format(", PGXP{:08X}", val->value);
436

437
    str.append_format(", {{{},{},{}}}", val->x, val->y, val->z);
438

439
    if (val->flags & VALID_ALL)
440
    {
441
      str.append(", valid=");
442
      if (val->flags & VALID_X)
443
        str.append('X');
444
      if (val->flags & VALID_Y)
445
        str.append('Y');
446
      if (val->flags & VALID_Z)
447
        str.append('Z');
448
    }
449

450
    // if (val->flags & VALID_TAINTED_Z)
451
    // str.append(", tainted");
452

453
    str.append(']');
454
  }
455
}
456

457
#endif
458

459
void CPU::PGXP::GTE_RTPS(float x, float y, float z, u32 value)
460
{
461
  PGXPValue& pvalue = PushSXY();
462
  pvalue.x = x;
463
  pvalue.y = y;
464
  pvalue.z = z;
465
  pvalue.value = value;
466
  pvalue.flags = VALID_ALL;
467

468
  if (g_settings.gpu_pgxp_vertex_cache)
469
    CacheVertex(value, pvalue);
470
}
471

472
bool CPU::PGXP::GTE_HasPreciseVertices(u32 sxy0, u32 sxy1, u32 sxy2)
473
{
474
  PGXPValue& SXY0 = GetSXY0();
475
  SXY0.Validate(sxy0);
476
  PGXPValue& SXY1 = GetSXY1();
477
  SXY1.Validate(sxy1);
478
  PGXPValue& SXY2 = GetSXY2();
479
  SXY2.Validate(sxy2);
480

481
  // Don't use accurate clipping for game-constructed values, which don't have a valid Z.
482
  return (((SXY0.flags & SXY1.flags & SXY2.flags & VALID_XYZ) == VALID_XYZ));
483
}
484

485
float CPU::PGXP::GTE_NCLIP()
486
{
487
  const PGXPValue& SXY0 = GetSXY0();
488
  const PGXPValue& SXY1 = GetSXY1();
489
  const PGXPValue& SXY2 = GetSXY2();
490
  float nclip = ((SXY0.x * SXY1.y) + (SXY1.x * SXY2.y) + (SXY2.x * SXY0.y) - (SXY0.x * SXY2.y) - (SXY1.x * SXY0.y) -
491
                 (SXY2.x * SXY1.y));
492

493
  // ensure fractional values are not incorrectly rounded to 0
494
  const float nclip_abs = std::abs(nclip);
495
  if (0.1f < nclip_abs && nclip_abs < 1.0f)
496
    nclip += (nclip < 0.0f ? -1.0f : 1.0f);
497

498
  return nclip;
499
}
500

501
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_MTC2(u32 reg, const PGXPValue& value, u32 val)
502
{
503
  switch (reg)
504
  {
505
    case 15:
506
    {
507
      // push FIFO
508
      PGXPValue& SXY2 = PushSXY();
509
      SXY2 = value;
510
      return;
511
    }
512

513
    // read-only registers
514
    case 29:
515
    case 31:
516
    {
517
      return;
518
    }
519

520
    default:
521
    {
522
      PGXPValue& gteVal = g_state.pgxp_gte[reg];
523
      gteVal = value;
524
      gteVal.value = val;
525
      return;
526
    }
527
  }
528
}
529

530
void CPU::PGXP::CPU_MFC2(Instruction instr, u32 rdVal)
531
{
532
  // CPU[Rt] = GTE_D[Rd]
533
  const u32 idx = instr.cop.Cop2Index();
534
  LOG_VALUES_1(CPU::GetGTERegisterName(idx), rdVal, &g_state.pgxp_gte[idx]);
535

536
  PGXPValue& prdVal = g_state.pgxp_gte[idx];
537
  prdVal.Validate(rdVal);
538
  SetRtValue(instr, prdVal, rdVal);
539
}
540

541
void CPU::PGXP::CPU_MTC2(Instruction instr, u32 rtVal)
542
{
543
  // GTE_D[Rd] = CPU[Rt]
544
  const u32 idx = instr.cop.Cop2Index();
545
  LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
546

547
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
548
  CPU_MTC2(idx, prtVal, rtVal);
549
}
550

551
void CPU::PGXP::CPU_LWC2(Instruction instr, u32 addr, u32 rtVal)
552
{
553
  // GTE_D[Rt] = Mem[addr]
554
  LOG_VALUES_LOAD(addr, rtVal);
555

556
  const PGXPValue& pMem = ValidateAndLoadMem(addr, rtVal);
557
  CPU_MTC2(static_cast<u32>(instr.r.rt.GetValue()), pMem, rtVal);
558
}
559

560
void CPU::PGXP::CPU_SWC2(Instruction instr, u32 addr, u32 rtVal)
561
{
562
  //  Mem[addr] = GTE_D[Rt]
563
  const u32 idx = static_cast<u32>(instr.r.rt.GetValue());
564
  PGXPValue& prtVal = g_state.pgxp_gte[idx];
565
#ifdef LOG_VALUES
566
  LOG_VALUES_1(CPU::GetGTERegisterName(idx), rtVal, &prtVal);
567
  std::fprintf(s_log, " addr=%08X", addr);
568
#endif
569
  prtVal.Validate(rtVal);
570
  WriteMem(addr, prtVal);
571
}
572

573
ALWAYS_INLINE_RELEASE void CPU::PGXP::CacheVertex(u32 value, const PGXPValue& vertex)
574
{
575
  const s16 sx = static_cast<s16>(value & 0xFFFFu);
576
  const s16 sy = static_cast<s16>(value >> 16);
577
  DebugAssert(sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1023);
578
  s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] = vertex;
579
}
580

581
ALWAYS_INLINE_RELEASE CPU::PGXPValue* CPU::PGXP::GetCachedVertex(u32 value)
582
{
583
  const s16 sx = static_cast<s16>(value & 0xFFFFu);
584
  const s16 sy = static_cast<s16>(value >> 16);
585
  return (sx >= -1024 && sx <= 1023 && sy >= -1024 && sy <= 1013) ?
586
           &s_vertex_cache[(sy + 1024) * VERTEX_CACHE_WIDTH + (sx + 1024)] :
587
           nullptr;
588
}
589

590
ALWAYS_INLINE_RELEASE float CPU::PGXP::TruncateVertexPosition(float p)
591
{
592
  // Truncates positions to 11 bits before drawing.
593
  // Matches GPU command parsing, where the upper 5 bits are dropped.
594
  // Necessary for Jet Moto and Racingroovy VS.
595
  const s32 int_part = static_cast<s32>(p);
596
  const float int_part_f = static_cast<float>(int_part);
597
  return static_cast<float>(TruncateGPUVertexPosition(int_part)) + (p - int_part_f);
598
}
599

600
ALWAYS_INLINE_RELEASE bool CPU::PGXP::IsWithinTolerance(float precise_x, float precise_y, int int_x, int int_y)
601
{
602
  const float tolerance = g_settings.gpu_pgxp_tolerance;
603
  if (tolerance < 0.0f)
604
    return true;
605

606
  return (std::abs(precise_x - static_cast<float>(int_x)) <= tolerance &&
607
          std::abs(precise_y - static_cast<float>(int_y)) <= tolerance);
608
}
609

610
bool CPU::PGXP::GetPreciseVertex(u32 addr, u32 value, int x, int y, int xOffs, int yOffs, float* out_x, float* out_y,
611
                                 float* out_w)
612
{
613
  const PGXPValue* vert = GetPtr(addr);
614
  if (vert && (vert->flags & VALID_XY) == VALID_XY && vert->value == value)
615
  {
616
    *out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
617
    *out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
618
    *out_w = vert->z / static_cast<float>(GTE::MAX_Z);
619

620
#ifdef LOG_LOOKUPS
621
    GL_INS_FMT("0x{:08X} {},{} => {},{} ({},{},{}) ({},{})", addr, x, y, *out_x, *out_y,
622
               TruncateVertexPosition(vert->x), TruncateVertexPosition(vert->y), vert->z, std::abs(*out_x - x),
623
               std::abs(*out_y - y));
624
#endif
625

626
    if (IsWithinTolerance(*out_x, *out_y, x, y))
627
    {
628
      // check validity of z component
629
      return ((vert->flags & VALID_Z) == VALID_Z);
630
    }
631
  }
632

633
  if (g_settings.gpu_pgxp_vertex_cache)
634
  {
635
    vert = GetCachedVertex(value);
636
    if (vert && (vert->flags & VALID_XY) == VALID_XY)
637
    {
638
      *out_x = TruncateVertexPosition(vert->x) + static_cast<float>(xOffs);
639
      *out_y = TruncateVertexPosition(vert->y) + static_cast<float>(yOffs);
640
      *out_w = vert->z / static_cast<float>(GTE::MAX_Z);
641

642
      if (IsWithinTolerance(*out_x, *out_y, x, y))
643
        return false;
644
    }
645
  }
646

647
  // no valid value can be found anywhere, use the native PSX data
648
  *out_x = static_cast<float>(x);
649
  *out_y = static_cast<float>(y);
650
  *out_w = 1.0f;
651
  return false;
652
}
653

654
void CPU::PGXP::CPU_LW(Instruction instr, u32 addr, u32 rtVal)
655
{
656
  // Rt = Mem[Rs + Im]
657
  LOG_VALUES_LOAD(addr, rtVal);
658
  SetRtValue(instr, ValidateAndLoadMem(addr, rtVal));
659
}
660

661
void CPU::PGXP::CPU_LBx(Instruction instr, u32 addr, u32 rtVal)
662
{
663
  LOG_VALUES_LOAD(addr, rtVal);
664
  SetRtValue(instr, INVALID_VALUE);
665
}
666

667
void CPU::PGXP::CPU_LH(Instruction instr, u32 addr, u32 rtVal)
668
{
669
  // Rt = Mem[Rs + Im] (sign extended)
670
  LOG_VALUES_LOAD(addr, rtVal);
671
  ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, true);
672
}
673

674
void CPU::PGXP::CPU_LHU(Instruction instr, u32 addr, u32 rtVal)
675
{
676
  // Rt = Mem[Rs + Im] (zero extended)
677
  LOG_VALUES_LOAD(addr, rtVal);
678
  ValidateAndLoadMem16(GetRtValue(instr), addr, rtVal, false);
679
}
680

681
void CPU::PGXP::CPU_SB(Instruction instr, u32 addr, u32 rtVal)
682
{
683
  LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
684
  WriteMem(addr, INVALID_VALUE);
685
}
686

687
void CPU::PGXP::CPU_SH(Instruction instr, u32 addr, u32 rtVal)
688
{
689
  LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
690
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
691
  WriteMem16(addr, prtVal);
692
}
693

694
void CPU::PGXP::CPU_SW(Instruction instr, u32 addr, u32 rtVal)
695
{
696
  // Mem[Rs + Im] = Rt
697
  LOG_VALUES_STORE(instr.r.rt.GetValue(), rtVal, addr);
698
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
699
  WriteMem(addr, prtVal);
700
}
701

702
void CPU::PGXP::CPU_MOVE_Packed(u32 rd_and_rs, u32 rsVal)
703
{
704
  const u32 Rs = (rd_and_rs & 0xFFu);
705
  const u32 Rd = (rd_and_rs >> 8);
706
  CPU_MOVE(Rd, Rs, rsVal);
707
}
708

709
void CPU::PGXP::CPU_MOVE(u32 Rd, u32 Rs, u32 rsVal)
710
{
711
#ifdef LOG_VALUES
712
  const Instruction instr = {0};
713
  LOG_VALUES_C1(Rs, rsVal);
714
#endif
715
  PGXPValue& prsVal = g_state.pgxp_gpr[Rs];
716
  prsVal.Validate(rsVal);
717
  g_state.pgxp_gpr[Rd] = prsVal;
718
}
719

720
void CPU::PGXP::CPU_ADDI(Instruction instr, u32 rsVal)
721
{
722
  LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
723

724
  // Rt = Rs + Imm (signed)
725
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
726

727
  const u32 immVal = instr.i.imm_sext32();
728

729
  PGXPValue& prtVal = GetRtValue(instr);
730
  prtVal = prsVal;
731

732
  if (immVal == 0)
733
    return;
734

735
  if (rsVal == 0)
736
  {
737
    // x is low precision value
738
    prtVal.x = static_cast<float>(LOWORD_S16(immVal));
739
    prtVal.y = static_cast<float>(HIWORD_S16(immVal));
740
    prtVal.flags |= VALID_X | VALID_Y | VALID_TAINTED_Z;
741
    prtVal.value = immVal;
742
    return;
743
  }
744

745
  prtVal.x = static_cast<float>(f16Unsign(prtVal.x));
746
  prtVal.x += static_cast<float>(LOWORD_U16(immVal));
747

748
  // carry on over/underflow
749
  const float of = (prtVal.x > USHRT_MAX) ? 1.0f : (prtVal.x < 0.0f) ? -1.0f : 0.0f;
750
  prtVal.x = static_cast<float>(f16Sign(prtVal.x));
751
  prtVal.y += HIWORD_S16(immVal) + of;
752

753
  // truncate on overflow/underflow
754
  prtVal.y += (prtVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prtVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
755

756
  prtVal.value = rsVal + immVal;
757

758
  prtVal.flags |= VALID_TAINTED_Z;
759
}
760

761
void CPU::PGXP::CPU_ANDI(Instruction instr, u32 rsVal)
762
{
763
  LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
764

765
  // Rt = Rs & Imm
766
  const u32 imm = instr.i.imm_zext32();
767
  const u32 rtVal = rsVal & imm;
768
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
769
  PGXPValue& prtVal = GetRtValue(instr);
770

771
  // remove upper 16-bits
772
  prtVal.y = 0.0f;
773
  prtVal.z = prsVal.z;
774
  prtVal.value = rtVal;
775
  prtVal.flags = prsVal.flags | VALID_Y | VALID_TAINTED_Z;
776

777
  switch (imm)
778
  {
779
    case 0:
780
    {
781
      // if 0 then x == 0
782
      prtVal.x = 0.0f;
783
      prtVal.flags |= VALID_X;
784
    }
785
    break;
786

787
    case 0xFFFFu:
788
    {
789
      // if saturated then x == x
790
      prtVal.x = prsVal.x;
791
    }
792
    break;
793

794
    default:
795
    {
796
      // otherwise x is low precision value
797
      prtVal.x = static_cast<float>(LOWORD_S16(rtVal));
798
      prtVal.flags |= VALID_X;
799
    }
800
    break;
801
  }
802
}
803

804
void CPU::PGXP::CPU_ORI(Instruction instr, u32 rsVal)
805
{
806
  LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
807

808
  // Rt = Rs | Imm
809
  const u32 imm = instr.i.imm_zext32();
810
  const u32 rtVal = rsVal | imm;
811

812
  PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
813
  PGXPValue& pRtVal = GetRtValue(instr);
814
  pRtVal = pRsVal;
815
  pRtVal.value = rtVal;
816

817
  if (imm == 0) [[unlikely]]
818
  {
819
    // if 0 then x == x
820
  }
821
  else
822
  {
823
    // otherwise x is low precision value
824
    pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
825
    pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
826
  }
827
}
828

829
void CPU::PGXP::CPU_XORI(Instruction instr, u32 rsVal)
830
{
831
  LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
832

833
  // Rt = Rs ^ Imm
834
  const u32 imm = instr.i.imm_zext32();
835
  const u32 rtVal = rsVal ^ imm;
836

837
  PGXPValue& pRsVal = ValidateAndGetRsValue(instr, rsVal);
838
  PGXPValue& pRtVal = GetRtValue(instr);
839
  pRtVal = pRsVal;
840
  pRtVal.value = rtVal;
841

842
  if (imm == 0) [[unlikely]]
843
  {
844
    // if 0 then x == x
845
  }
846
  else
847
  {
848
    // otherwise x is low precision value
849
    pRtVal.x = static_cast<float>(LOWORD_S16(rtVal));
850
    pRtVal.flags |= VALID_X | VALID_TAINTED_Z;
851
  }
852
}
853

854
void CPU::PGXP::CPU_SLTI(Instruction instr, u32 rsVal)
855
{
856
  LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
857

858
  // Rt = Rs < Imm (signed)
859
  const s32 imm = instr.i.imm_s16();
860
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
861

862
  const float fimmx = static_cast<float>(imm);
863
  const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
864

865
  PGXPValue& prtVal = GetRtValue(instr);
866
  prtVal.x = (prsVal.GetValidY(rsVal) < fimmy || prsVal.GetValidX(rsVal) < fimmx) ? 1.0f : 0.0f;
867
  prtVal.y = 0.0f;
868
  prtVal.z = prsVal.z;
869
  prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
870
  prtVal.value = BoolToUInt32(static_cast<s32>(rsVal) < imm);
871
}
872

873
void CPU::PGXP::CPU_SLTIU(Instruction instr, u32 rsVal)
874
{
875
  LOG_VALUES_C1(instr.i.rs.GetValue(), rsVal);
876

877
  // Rt = Rs < Imm (Unsigned)
878
  const u32 imm = instr.i.imm_u16();
879
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
880

881
  const float fimmx = static_cast<float>(static_cast<s16>(imm)); // deliberately signed
882
  const float fimmy = fimmx < 0.0f ? -1.0f : 0.0f;
883

884
  PGXPValue& prtVal = GetRtValue(instr);
885
  prtVal.x =
886
    (f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(fimmy) || f16Unsign(prsVal.GetValidX(rsVal)) < fimmx) ? 1.0f : 0.0f;
887
  prtVal.y = 0.0f;
888
  prtVal.z = prsVal.z;
889
  prtVal.flags = prsVal.flags | VALID_X | VALID_Y | VALID_TAINTED_Z;
890
  prtVal.value = BoolToUInt32(rsVal < imm);
891
}
892

893
void CPU::PGXP::CPU_LUI(Instruction instr)
894
{
895
  LOG_VALUES_NV();
896

897
  // Rt = Imm << 16
898
  PGXPValue& pRtVal = GetRtValue(instr);
899
  pRtVal.x = 0.0f;
900
  pRtVal.y = static_cast<float>(instr.i.imm_s16());
901
  pRtVal.z = 0.0f;
902
  pRtVal.value = instr.i.imm_zext32() << 16;
903
  pRtVal.flags = VALID_XY;
904
}
905

906
void CPU::PGXP::CPU_ADD(Instruction instr, u32 rsVal, u32 rtVal)
907
{
908
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
909

910
  // Rd = Rs + Rt (signed)
911
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
912
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
913
  PGXPValue& prdVal = GetRdValue(instr);
914

915
  if (rtVal == 0)
916
  {
917
    prdVal = prsVal;
918
    CopyZIfMissing(prdVal, prtVal);
919
  }
920
  else if (rsVal == 0)
921
  {
922
    prdVal = prtVal;
923
    CopyZIfMissing(prdVal, prsVal);
924
  }
925
  else
926
  {
927
    const double x = f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prtVal.GetValidX(rtVal));
928

929
    // carry on over/underflow
930
    const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
931
    prdVal.x = static_cast<float>(f16Sign(x));
932
    prdVal.y = prsVal.GetValidY(rsVal) + prtVal.GetValidY(rtVal) + of;
933

934
    // truncate on overflow/underflow
935
    prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
936

937
    prdVal.value = rsVal + rtVal;
938

939
    // valid x/y only if one side had a valid x/y
940
    prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
941

942
    SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
943
  }
944
}
945

946
void CPU::PGXP::CPU_SUB(Instruction instr, u32 rsVal, u32 rtVal)
947
{
948
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
949

950
  // Rd = Rs - Rt (signed)
951
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
952
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
953
  PGXPValue& prdVal = GetRdValue(instr);
954

955
  if (rtVal == 0)
956
  {
957
    prdVal = prsVal;
958
    CopyZIfMissing(prdVal, prtVal);
959
  }
960
  else
961
  {
962
    const double x = f16Unsign(prsVal.GetValidX(rsVal)) - f16Unsign(prtVal.GetValidX(rtVal));
963

964
    // carry on over/underflow
965
    const float of = (x > USHRT_MAX) ? 1.0f : (x < 0.0f) ? -1.0f : 0.0f;
966
    prdVal.x = static_cast<float>(f16Sign(x));
967
    prdVal.y = prsVal.GetValidY(rsVal) - (prtVal.GetValidY(rtVal) - of);
968

969
    // truncate on overflow/underflow
970
    prdVal.y += (prdVal.y > SHRT_MAX) ? -(USHRT_MAX + 1) : (prdVal.y < SHRT_MIN) ? (USHRT_MAX + 1) : 0.0f;
971

972
    prdVal.value = rsVal - rtVal;
973

974
    // valid x/y only if one side had a valid x/y
975
    prdVal.flags = prsVal.flags | (prtVal.flags & VALID_XY) | VALID_TAINTED_Z;
976

977
    SelectZ(prdVal.z, prdVal.flags, prsVal, prtVal);
978
  }
979
}
980

981
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_BITWISE(Instruction instr, u32 rdVal, u32 rsVal, u32 rtVal)
982
{
983
  // Rd = Rs & Rt
984
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
985
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
986

987
  float x, y;
988
  if (LOWORD_U16(rdVal) == 0)
989
    x = 0.0f;
990
  else if (LOWORD_U16(rdVal) == LOWORD_U16(rsVal))
991
    x = prsVal.GetValidX(rsVal);
992
  else if (LOWORD_U16(rdVal) == LOWORD_U16(rtVal))
993
    x = prtVal.GetValidX(rtVal);
994
  else
995
    x = static_cast<float>(LOWORD_S16(rdVal));
996

997
  if (HIWORD_U16(rdVal) == 0)
998
    y = 0.0f;
999
  else if (HIWORD_U16(rdVal) == HIWORD_U16(rsVal))
1000
    y = prsVal.GetValidY(rsVal);
1001
  else if (HIWORD_U16(rdVal) == HIWORD_U16(rtVal))
1002
    y = prtVal.GetValidY(rtVal);
1003
  else
1004
    y = static_cast<float>(HIWORD_S16(rdVal));
1005

1006
  // Why not write directly to prdVal? Because it might be the same as the source.
1007
  u32 flags = ((prsVal.flags | prtVal.flags) & VALID_XY) ? (VALID_XY | VALID_TAINTED_Z) : 0;
1008
  PGXPValue& prdVal = GetRdValue(instr);
1009
  SelectZ(prdVal.z, flags, prsVal, prtVal);
1010
  prdVal.x = x;
1011
  prdVal.y = y;
1012
  prdVal.flags = flags;
1013
  prdVal.value = rdVal;
1014
}
1015

1016
void CPU::PGXP::CPU_AND_(Instruction instr, u32 rsVal, u32 rtVal)
1017
{
1018
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1019

1020
  // Rd = Rs & Rt
1021
  const u32 rdVal = rsVal & rtVal;
1022
  CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1023
}
1024

1025
void CPU::PGXP::CPU_OR_(Instruction instr, u32 rsVal, u32 rtVal)
1026
{
1027
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1028

1029
  // Rd = Rs | Rt
1030
  const u32 rdVal = rsVal | rtVal;
1031
  CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1032
}
1033

1034
void CPU::PGXP::CPU_XOR_(Instruction instr, u32 rsVal, u32 rtVal)
1035
{
1036
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1037

1038
  // Rd = Rs ^ Rt
1039
  const u32 rdVal = rsVal ^ rtVal;
1040
  CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1041
}
1042

1043
void CPU::PGXP::CPU_NOR(Instruction instr, u32 rsVal, u32 rtVal)
1044
{
1045
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1046

1047
  // Rd = Rs NOR Rt
1048
  const u32 rdVal = ~(rsVal | rtVal);
1049
  CPU_BITWISE(instr, rdVal, rsVal, rtVal);
1050
}
1051

1052
void CPU::PGXP::CPU_SLT(Instruction instr, u32 rsVal, u32 rtVal)
1053
{
1054
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1055

1056
  // Rd = Rs < Rt (signed)
1057
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1058
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1059
  PGXPValue& prdVal = GetRdValue(instr);
1060
  prdVal.x = (prsVal.GetValidY(rsVal) < prtVal.GetValidY(rtVal) ||
1061
              f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1062
               1.0f :
1063
               0.0f;
1064
  prdVal.y = 0.0f;
1065
  prdVal.z = prsVal.z;
1066
  prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
1067
  prdVal.value = BoolToUInt32(static_cast<s32>(rsVal) < static_cast<s32>(rtVal));
1068
}
1069

1070
void CPU::PGXP::CPU_SLTU(Instruction instr, u32 rsVal, u32 rtVal)
1071
{
1072
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1073

1074
  // Rd = Rs < Rt (unsigned)
1075
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1076
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1077
  PGXPValue& prdVal = GetRdValue(instr);
1078
  prdVal.x = (f16Unsign(prsVal.GetValidY(rsVal)) < f16Unsign(prtVal.GetValidY(rtVal)) ||
1079
              f16Unsign(prsVal.GetValidX(rsVal)) < f16Unsign(prtVal.GetValidX(rtVal))) ?
1080
               1.0f :
1081
               0.0f;
1082
  prdVal.y = 0.0f;
1083
  prdVal.z = prsVal.z;
1084
  prdVal.flags = prsVal.flags | VALID_TAINTED_Z | VALID_X | VALID_Y;
1085
  prdVal.value = BoolToUInt32(rsVal < rtVal);
1086
}
1087

1088
void CPU::PGXP::CPU_MULT(Instruction instr, u32 rsVal, u32 rtVal)
1089
{
1090
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1091

1092
  // Hi/Lo = Rs * Rt (signed)
1093
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1094
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1095

1096
  PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1097
  PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1098
  ploVal = prsVal;
1099
  CopyZIfMissing(ploVal, prsVal);
1100

1101
  // Z/valid is the same
1102
  phiVal = ploVal;
1103

1104
  const float rsx = prsVal.GetValidX(rsVal);
1105
  const float rsy = prsVal.GetValidY(rsVal);
1106
  const float rtx = prtVal.GetValidX(rtVal);
1107
  const float rty = prtVal.GetValidY(rtVal);
1108

1109
  // Multiply out components
1110
  const double xx = f16Unsign(rsx) * f16Unsign(rtx);
1111
  const double xy = f16Unsign(rsx) * (rty);
1112
  const double yx = rsy * f16Unsign(rtx);
1113
  const double yy = rsy * rty;
1114

1115
  // Split values into outputs
1116
  const double lx = xx;
1117
  const double ly = f16Overflow(xx) + (xy + yx);
1118
  const double hx = f16Overflow(ly) + yy;
1119
  const double hy = f16Overflow(hx);
1120

1121
  ploVal.x = static_cast<float>(f16Sign(lx));
1122
  ploVal.y = static_cast<float>(f16Sign(ly));
1123
  ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1124
  phiVal.x = static_cast<float>(f16Sign(hx));
1125
  phiVal.y = static_cast<float>(f16Sign(hy));
1126
  phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1127

1128
  // compute PSX value
1129
  const u64 result = static_cast<u64>(static_cast<s64>(SignExtend64(rsVal)) * static_cast<s64>(SignExtend64(rtVal)));
1130
  phiVal.value = Truncate32(result >> 32);
1131
  ploVal.value = Truncate32(result);
1132
}
1133

1134
void CPU::PGXP::CPU_MULTU(Instruction instr, u32 rsVal, u32 rtVal)
1135
{
1136
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1137

1138
  // Hi/Lo = Rs * Rt (unsigned)
1139
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1140
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1141

1142
  PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1143
  PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1144
  ploVal = prsVal;
1145
  CopyZIfMissing(ploVal, prsVal);
1146

1147
  // Z/valid is the same
1148
  phiVal = ploVal;
1149

1150
  const float rsx = prsVal.GetValidX(rsVal);
1151
  const float rsy = prsVal.GetValidY(rsVal);
1152
  const float rtx = prtVal.GetValidX(rtVal);
1153
  const float rty = prtVal.GetValidY(rtVal);
1154

1155
  // Multiply out components
1156
  const double xx = f16Unsign(rsx) * f16Unsign(rtx);
1157
  const double xy = f16Unsign(rsx) * f16Unsign(rty);
1158
  const double yx = f16Unsign(rsy) * f16Unsign(rtx);
1159
  const double yy = f16Unsign(rsy) * f16Unsign(rty);
1160

1161
  // Split values into outputs
1162
  const double lx = xx;
1163
  const double ly = f16Overflow(xx) + (xy + yx);
1164
  const double hx = f16Overflow(ly) + yy;
1165
  const double hy = f16Overflow(hx);
1166

1167
  ploVal.x = static_cast<float>(f16Sign(lx));
1168
  ploVal.y = static_cast<float>(f16Sign(ly));
1169
  ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1170
  phiVal.x = static_cast<float>(f16Sign(hx));
1171
  phiVal.y = static_cast<float>(f16Sign(hy));
1172
  phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1173

1174
  // compute PSX value
1175
  const u64 result = ZeroExtend64(rsVal) * ZeroExtend64(rtVal);
1176
  phiVal.value = Truncate32(result >> 32);
1177
  ploVal.value = Truncate32(result);
1178
}
1179

1180
void CPU::PGXP::CPU_DIV(Instruction instr, u32 rsVal, u32 rtVal)
1181
{
1182
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1183

1184
  // Lo = Rs / Rt (signed)
1185
  // Hi = Rs % Rt (signed)
1186
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1187
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1188

1189
  PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1190
  PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1191
  ploVal = prsVal;
1192
  CopyZIfMissing(ploVal, prsVal);
1193

1194
  // Z/valid is the same
1195
  phiVal = ploVal;
1196

1197
  const double vs = f16Unsign(prsVal.GetValidX(rsVal)) + prsVal.GetValidY(rsVal) * static_cast<double>(1 << 16);
1198
  const double vt = f16Unsign(prtVal.GetValidX(rtVal)) + prtVal.GetValidY(rtVal) * static_cast<double>(1 << 16);
1199

1200
  const double lo = vs / vt;
1201
  ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
1202
  ploVal.x = static_cast<float>(f16Sign(lo));
1203
  ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1204

1205
  const double hi = std::fmod(vs, vt);
1206
  phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
1207
  phiVal.x = static_cast<float>(f16Sign(hi));
1208
  phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1209

1210
  // compute PSX value
1211
  if (static_cast<s32>(rtVal) == 0)
1212
  {
1213
    // divide by zero
1214
    ploVal.value = (static_cast<s32>(rsVal) >= 0) ? UINT32_C(0xFFFFFFFF) : UINT32_C(1);
1215
    phiVal.value = static_cast<u32>(static_cast<s32>(rsVal));
1216
  }
1217
  else if (rsVal == UINT32_C(0x80000000) && static_cast<s32>(rtVal) == -1)
1218
  {
1219
    // unrepresentable
1220
    ploVal.value = UINT32_C(0x80000000);
1221
    phiVal.value = 0;
1222
  }
1223
  else
1224
  {
1225
    ploVal.value = static_cast<u32>(static_cast<s32>(rsVal) / static_cast<s32>(rtVal));
1226
    phiVal.value = static_cast<u32>(static_cast<s32>(rsVal) % static_cast<s32>(rtVal));
1227
  }
1228
}
1229

1230
void CPU::PGXP::CPU_DIVU(Instruction instr, u32 rsVal, u32 rtVal)
1231
{
1232
  LOG_VALUES_C2(instr.r.rs.GetValue(), rsVal, instr.r.rt.GetValue(), rtVal);
1233

1234
  // Lo = Rs / Rt (unsigned)
1235
  // Hi = Rs % Rt (unsigned)
1236
  PGXPValue& prsVal = ValidateAndGetRsValue(instr, rsVal);
1237
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1238

1239
  PGXPValue& ploVal = g_state.pgxp_gpr[static_cast<u8>(Reg::lo)];
1240
  PGXPValue& phiVal = g_state.pgxp_gpr[static_cast<u8>(Reg::hi)];
1241
  ploVal = prsVal;
1242
  CopyZIfMissing(ploVal, prsVal);
1243

1244
  // Z/valid is the same
1245
  phiVal = ploVal;
1246

1247
  const double vs =
1248
    f16Unsign(prsVal.GetValidX(rsVal)) + f16Unsign(prsVal.GetValidY(rsVal)) * static_cast<double>(1 << 16);
1249
  const double vt =
1250
    f16Unsign(prtVal.GetValidX(rtVal)) + f16Unsign(prtVal.GetValidY(rtVal)) * static_cast<double>(1 << 16);
1251

1252
  const double lo = vs / vt;
1253
  ploVal.y = static_cast<float>(f16Sign(f16Overflow(lo)));
1254
  ploVal.x = static_cast<float>(f16Sign(lo));
1255
  ploVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1256

1257
  const double hi = std::fmod(vs, vt);
1258
  phiVal.y = static_cast<float>(f16Sign(f16Overflow(hi)));
1259
  phiVal.x = static_cast<float>(f16Sign(hi));
1260
  phiVal.flags |= VALID_TAINTED_Z | (prtVal.flags & VALID_XY);
1261

1262
  if (rtVal == 0)
1263
  {
1264
    // divide by zero
1265
    ploVal.value = UINT32_C(0xFFFFFFFF);
1266
    phiVal.value = rsVal;
1267
  }
1268
  else
1269
  {
1270
    ploVal.value = rsVal / rtVal;
1271
    phiVal.value = rsVal % rtVal;
1272
  }
1273
}
1274

1275
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal, u32 sh)
1276
{
1277
  const u32 rdVal = rtVal << sh;
1278
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1279
  PGXPValue& prdVal = GetRdValue(instr);
1280
  prdVal.z = prtVal.z;
1281
  prdVal.value = rdVal;
1282

1283
  if (sh >= 32) [[unlikely]]
1284
  {
1285
    prdVal.x = 0.0f;
1286
    prdVal.y = 0.0f;
1287
    prdVal.flags = prtVal.flags | VALID_XY | VALID_TAINTED_Z;
1288
  }
1289
  else if (sh == 16)
1290
  {
1291
    prdVal.y = prtVal.x;
1292
    prdVal.x = 0.0f;
1293

1294
    // Only set valid X if there's also a valid Y. We could use GetValidX() to pull it from the low precision value
1295
    // instead, need to investigate further. Spyro breaks if only X is set even if Y is not valid.
1296
    // prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
1297
    prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
1298
  }
1299
  else if (sh >= 16)
1300
  {
1301
    prdVal.y = static_cast<float>(f16Sign(f16Unsign(prtVal.x * static_cast<double>(1 << (sh - 16)))));
1302
    prdVal.x = 0.0f;
1303

1304
    // See above.
1305
    // prdVal.flags = (prtVal.flags & ~VALID_Y) | ((prtVal.flags & VALID_X) << 1) | VALID_X | VALID_TAINTED_Z;
1306
    prdVal.flags = (prtVal.flags | VALID_TAINTED_Z) | ((prtVal.flags & VALID_Y) >> 1);
1307
  }
1308
  else
1309
  {
1310
    const double x = f16Unsign(prtVal.x) * static_cast<double>(1 << sh);
1311
    const double y = (f16Unsign(prtVal.y) * static_cast<double>(1 << sh)) + f16Overflow(x);
1312
    prdVal.x = static_cast<float>(f16Sign(x));
1313
    prdVal.y = static_cast<float>(f16Sign(y));
1314
    prdVal.flags = (prtVal.flags | VALID_TAINTED_Z);
1315
  }
1316
}
1317

1318
void CPU::PGXP::CPU_SLL(Instruction instr, u32 rtVal)
1319
{
1320
  LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1321

1322
  // Rd = Rt << Sa
1323
  const u32 sh = instr.r.shamt;
1324
  CPU_SLL(instr, rtVal, sh);
1325
}
1326

1327
void CPU::PGXP::CPU_SLLV(Instruction instr, u32 rtVal, u32 rsVal)
1328
{
1329
  LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1330

1331
  // Rd = Rt << Rs
1332
  const u32 sh = rsVal & 0x1F;
1333
  CPU_SLL(instr, rtVal, sh);
1334
}
1335

1336
ALWAYS_INLINE_RELEASE void CPU::PGXP::CPU_SRx(Instruction instr, u32 rtVal, u32 sh, bool sign, bool is_variable)
1337
{
1338
  const u32 rdVal = sign ? static_cast<u32>(static_cast<s32>(rtVal) >> sh) : (rtVal >> sh);
1339
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1340

1341
  double x = prtVal.x;
1342
  double y = sign ? prtVal.y : f16Unsign(prtVal.y);
1343

1344
  const u32 iX = SignExtend32(LOWORD_S16(rtVal));   // remove Y
1345
  const u32 iY = SET_LOWORD(rtVal, HIWORD_U16(iX)); // overwrite x with sign(x)
1346

1347
  // Shift test values
1348
  const u32 dX = static_cast<u32>(static_cast<s32>(iX) >> sh);
1349
  const u32 dY = sign ? static_cast<u32>(static_cast<s32>(iY) >> sh) : (iY >> sh);
1350

1351
  if (LOWORD_S16(dX) != HIWORD_S16(iX))
1352
    x = x / static_cast<double>(1 << sh);
1353
  else
1354
    x = LOWORD_S16(dX); // only sign bits left
1355

1356
  if (LOWORD_S16(dY) != HIWORD_S16(iX))
1357
  {
1358
    if (sh == 16)
1359
    {
1360
      x = y;
1361
    }
1362
    else if (sh < 16)
1363
    {
1364
      x += y * static_cast<double>(1 << (16 - sh));
1365
      if (prtVal.x < 0)
1366
        x += static_cast<double>(1 << (16 - sh));
1367
    }
1368
    else
1369
    {
1370
      x += y / static_cast<double>(1 << (sh - 16));
1371
    }
1372
  }
1373

1374
  if ((HIWORD_S16(dY) == 0) || (HIWORD_S16(dY) == -1))
1375
    y = HIWORD_S16(dY);
1376
  else
1377
    y = y / static_cast<double>(1 << sh);
1378

1379
  PGXPValue& prdVal = GetRdValue(instr);
1380

1381
  // Use low precision/rounded values when we're not shifting an entire component,
1382
  // and it's not originally from a 3D value. Too many false positives in P2/etc.
1383
  // What we probably should do is not set the valid flag on non-3D values to begin
1384
  // with, only letting them become valid when used in another expression.
1385
  if (sign && !is_variable && !(prtVal.flags & VALID_Z) && sh < 16)
1386
  {
1387
    prdVal.x = static_cast<float>(LOWORD_S16(rdVal));
1388
    prdVal.y = static_cast<float>(HIWORD_S16(rdVal));
1389
    prdVal.z = 0.0f;
1390
    prdVal.value = rdVal;
1391
    prdVal.flags = VALID_XY | VALID_TAINTED_Z;
1392
  }
1393
  else
1394
  {
1395
    prdVal.x = static_cast<float>(f16Sign(x));
1396
    prdVal.y = static_cast<float>(f16Sign(y));
1397
    prdVal.z = prtVal.z;
1398
    prdVal.value = rdVal;
1399
    prdVal.flags = prtVal.flags | VALID_TAINTED_Z;
1400
  }
1401
}
1402

1403
void CPU::PGXP::CPU_SRL(Instruction instr, u32 rtVal)
1404
{
1405
  LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1406

1407
  // Rd = Rt >> Sa
1408
  const u32 sh = instr.r.shamt;
1409
  CPU_SRx(instr, rtVal, sh, false, false);
1410
}
1411

1412
void CPU::PGXP::CPU_SRLV(Instruction instr, u32 rtVal, u32 rsVal)
1413
{
1414
  LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1415

1416
  // Rd = Rt >> Sa
1417
  const u32 sh = rsVal & 0x1F;
1418
  CPU_SRx(instr, rtVal, sh, false, true);
1419
}
1420

1421
void CPU::PGXP::CPU_SRA(Instruction instr, u32 rtVal)
1422
{
1423
  LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1424

1425
  // Rd = Rt >> Sa
1426
  const u32 sh = instr.r.shamt;
1427
  CPU_SRx(instr, rtVal, sh, true, false);
1428
}
1429

1430
void CPU::PGXP::CPU_SRAV(Instruction instr, u32 rtVal, u32 rsVal)
1431
{
1432
  LOG_VALUES_C2(instr.r.rt.GetValue(), rtVal, instr.r.rs.GetValue(), rsVal);
1433

1434
  // Rd = Rt >> Sa
1435
  const u32 sh = rsVal & 0x1F;
1436
  CPU_SRx(instr, rtVal, sh, true, true);
1437
}
1438

1439
void CPU::PGXP::CPU_MFC0(Instruction instr, u32 rdVal)
1440
{
1441
  const u32 idx = static_cast<u8>(instr.r.rd.GetValue());
1442
  LOG_VALUES_1(TinyString::from_format("cop0_{}", idx).c_str(), rdVal, &g_state.pgxp_cop0[idx]);
1443

1444
  // CPU[Rt] = CP0[Rd]
1445
  PGXPValue& prdVal = g_state.pgxp_cop0[idx];
1446
  prdVal.Validate(rdVal);
1447

1448
  PGXPValue& prtVal = GetRtValue(instr);
1449
  prtVal = prdVal;
1450
  prtVal.value = rdVal;
1451
}
1452

1453
void CPU::PGXP::CPU_MTC0(Instruction instr, u32 rdVal, u32 rtVal)
1454
{
1455
  LOG_VALUES_C1(instr.r.rt.GetValue(), rtVal);
1456

1457
  // CP0[Rd] = CPU[Rt]
1458
  PGXPValue& prtVal = ValidateAndGetRtValue(instr, rtVal);
1459
  PGXPValue& prdVal = g_state.pgxp_cop0[static_cast<u8>(instr.r.rd.GetValue())];
1460
  prdVal = prtVal;
1461
  prtVal.value = rdVal;
1462
}
1463

1464
Product

Resources

Company