CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/MIPSIntVFPU.cpp
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
// TODO: Test and maybe fix: https://code.google.com/p/jpcsp/source/detail?r=3082#
19
20
#include <cmath>
21
#include <limits>
22
#include <algorithm>
23
24
#include "Common/Data/Convert/SmallDataConvert.h"
25
#include "Common/Math/math_util.h"
26
27
#include "Core/Compatibility.h"
28
#include "Core/Core.h"
29
#include "Core/MemMap.h"
30
#include "Core/Reporting.h"
31
#include "Core/System.h"
32
33
#include "Core/MIPS/MIPS.h"
34
#include "Core/MIPS/MIPSInt.h"
35
#include "Core/MIPS/MIPSTables.h"
36
#include "Core/MIPS/MIPSVFPUUtils.h"
37
38
#define R(i) (currentMIPS->r[i])
39
#define V(i) (currentMIPS->v[voffset[i]])
40
#define VI(i) (currentMIPS->vi[voffset[i]])
41
#define FI(i) (currentMIPS->fi[i])
42
#define FsI(i) (currentMIPS->fs[i])
43
#define PC (currentMIPS->pc)
44
45
#define _RS ((op>>21) & 0x1F)
46
#define _RT ((op>>16) & 0x1F)
47
#define _RD ((op>>11) & 0x1F)
48
#define _FS ((op>>11) & 0x1F)
49
#define _FT ((op>>16) & 0x1F)
50
#define _FD ((op>>6 ) & 0x1F)
51
#define _POS ((op>>6 ) & 0x1F)
52
#define _SIZE ((op>>11) & 0x1F)
53
54
#define HI currentMIPS->hi
55
#define LO currentMIPS->lo
56
57
#ifndef M_LOG2E
58
#define M_E 2.71828182845904523536f
59
#define M_LOG2E 1.44269504088896340736f
60
#define M_LOG10E 0.434294481903251827651f
61
#define M_LN2 0.693147180559945309417f
62
#define M_LN10 2.30258509299404568402f
63
#undef M_PI
64
#define M_PI 3.14159265358979323846f
65
#ifndef M_PI_2
66
#define M_PI_2 1.57079632679489661923f
67
#endif
68
#define M_PI_4 0.785398163397448309616f
69
#define M_1_PI 0.318309886183790671538f
70
#define M_2_PI 0.636619772367581343076f
71
#define M_2_SQRTPI 1.12837916709551257390f
72
#define M_SQRT2 1.41421356237309504880f
73
#define M_SQRT1_2 0.707106781186547524401f
74
#endif
75
76
static const bool USE_VFPU_DOT = false;
77
static const bool USE_VFPU_SQRT = false;
78
79
union FloatBits {
80
float f[4];
81
u32 u[4];
82
int i[4];
83
};
84
85
// Preserves NaN in first param, takes sign of equal second param.
86
// Technically, std::max may do this but it's undefined.
87
inline float nanmax(float f, float cst)
88
{
89
return f <= cst ? cst : f;
90
}
91
92
// Preserves NaN in first param, takes sign of equal second param.
93
inline float nanmin(float f, float cst)
94
{
95
return f >= cst ? cst : f;
96
}
97
98
// Preserves NaN in first param, takes sign of equal value in others.
99
inline float nanclamp(float f, float lower, float upper)
100
{
101
return nanmin(nanmax(f, lower), upper);
102
}
103
104
static void ApplyPrefixST(float *r, u32 data, VectorSize size, float invalid = 0.0f) {
105
// Check for no prefix.
106
if (data == 0xe4)
107
return;
108
109
int n = GetNumVectorElements(size);
110
float origV[4]{ invalid, invalid, invalid, invalid };
111
static const float constantArray[8] = {0.f, 1.f, 2.f, 0.5f, 3.f, 1.f/3.f, 0.25f, 1.f/6.f};
112
113
for (int i = 0; i < n; i++) {
114
origV[i] = r[i];
115
}
116
117
for (int i = 0; i < n; i++) {
118
int regnum = (data >> (i*2)) & 3;
119
int abs = (data >> (8+i)) & 1;
120
int negate = (data >> (16+i)) & 1;
121
int constants = (data >> (12+i)) & 1;
122
123
if (!constants) {
124
if (regnum >= n) {
125
// We mostly handle this now, but still worth reporting.
126
ERROR_LOG_REPORT(Log::CPU, "Invalid VFPU swizzle: %08x: %i / %d at PC = %08x (%s)", data, regnum, n, currentMIPS->pc, MIPSDisasmAt(currentMIPS->pc).c_str());
127
}
128
r[i] = origV[regnum];
129
if (abs)
130
((u32 *)r)[i] = ((u32 *)r)[i] & 0x7FFFFFFF;
131
} else {
132
r[i] = constantArray[regnum + (abs<<2)];
133
}
134
135
if (negate)
136
((u32 *)r)[i] = ((u32 *)r)[i] ^ 0x80000000;
137
}
138
}
139
140
inline void ApplySwizzleS(float *v, VectorSize size, float invalid = 0.0f)
141
{
142
ApplyPrefixST(v, currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX], size, invalid);
143
}
144
145
inline void ApplySwizzleT(float *v, VectorSize size, float invalid = 0.0f)
146
{
147
ApplyPrefixST(v, currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX], size, invalid);
148
}
149
150
void ApplyPrefixD(float *v, VectorSize size, bool onlyWriteMask = false)
151
{
152
u32 data = currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX];
153
if (!data || onlyWriteMask)
154
return;
155
int n = GetNumVectorElements(size);
156
for (int i = 0; i < n; i++)
157
{
158
int sat = (data >> (i * 2)) & 3;
159
if (sat == 1)
160
v[i] = vfpu_clamp(v[i], 0.0f, 1.0f);
161
else if (sat == 3)
162
v[i] = vfpu_clamp(v[i], -1.0f, 1.0f);
163
}
164
}
165
166
static void RetainInvalidSwizzleST(float *d, VectorSize sz) {
167
// Somehow it's like a supernan, maybe wires through to zero?
168
// Doesn't apply to all ops.
169
int sPrefix = currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX];
170
int tPrefix = currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX];
171
int n = GetNumVectorElements(sz);
172
173
// TODO: We can probably do some faster check of sPrefix and tPrefix to skip over this loop.
174
for (int i = 0; i < n; i++) {
175
int swizzleS = (sPrefix >> (i + i)) & 3;
176
int swizzleT = (tPrefix >> (i + i)) & 3;
177
int constS = (sPrefix >> (12 + i)) & 1;
178
int constT = (tPrefix >> (12 + i)) & 1;
179
if ((swizzleS >= n && !constS) || (swizzleT >= n && !constT))
180
d[i] = 0.0f;
181
}
182
}
183
184
void EatPrefixes()
185
{
186
currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX] = 0xe4; // passthru
187
currentMIPS->vfpuCtrl[VFPU_CTRL_TPREFIX] = 0xe4; // passthru
188
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = 0;
189
}
190
191
namespace MIPSInt
192
{
193
void Int_VPFX(MIPSOpcode op)
194
{
195
int data = op & 0x000FFFFF;
196
int regnum = (op >> 24) & 3;
197
if (regnum == VFPU_CTRL_DPREFIX)
198
data &= 0x00000FFF;
199
currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX + regnum] = data;
200
PC += 4;
201
}
202
203
void Int_SVQ(MIPSOpcode op)
204
{
205
int imm = SignExtend16ToS32(op & 0xFFFC);
206
int rs = _RS;
207
int vt = (((op >> 16) & 0x1f)) | ((op&1) << 5);
208
209
u32 addr = R(rs) + imm;
210
float *f;
211
const float *cf;
212
213
switch (op >> 26)
214
{
215
case 53: //lvl.q/lvr.q
216
{
217
if (addr & 0x3)
218
{
219
_dbg_assert_msg_( 0, "Misaligned lvX.q at %08x (pc = %08x)", addr, PC);
220
}
221
float d[4];
222
ReadVector(d, V_Quad, vt);
223
int offset = (addr >> 2) & 3;
224
if ((op & 2) == 0)
225
{
226
// It's an LVL
227
for (int i = 0; i < offset + 1; i++)
228
{
229
d[3 - i] = Memory::Read_Float(addr - 4 * i);
230
}
231
}
232
else
233
{
234
// It's an LVR
235
for (int i = 0; i < (3 - offset) + 1; i++)
236
{
237
d[i] = Memory::Read_Float(addr + 4 * i);
238
}
239
}
240
WriteVector(d, V_Quad, vt);
241
}
242
break;
243
244
case 54: //lv.q
245
if (addr & 0xF)
246
{
247
_dbg_assert_msg_( 0, "Misaligned lv.q at %08x (pc = %08x)", addr, PC);
248
}
249
#ifndef COMMON_BIG_ENDIAN
250
cf = reinterpret_cast<const float *>(Memory::GetPointerRange(addr, 16));
251
if (cf)
252
WriteVector(cf, V_Quad, vt);
253
#else
254
float lvqd[4];
255
256
lvqd[0] = Memory::Read_Float(addr);
257
lvqd[1] = Memory::Read_Float(addr + 4);
258
lvqd[2] = Memory::Read_Float(addr + 8);
259
lvqd[3] = Memory::Read_Float(addr + 12);
260
261
WriteVector(lvqd, V_Quad, vt);
262
#endif
263
break;
264
265
case 61: // svl.q/svr.q
266
{
267
if (addr & 0x3)
268
{
269
_dbg_assert_msg_( 0, "Misaligned svX.q at %08x (pc = %08x)", addr, PC);
270
}
271
float d[4];
272
ReadVector(d, V_Quad, vt);
273
int offset = (addr >> 2) & 3;
274
if ((op&2) == 0)
275
{
276
// It's an SVL
277
for (int i = 0; i < offset + 1; i++)
278
{
279
Memory::Write_Float(d[3 - i], addr - i * 4);
280
}
281
}
282
else
283
{
284
// It's an SVR
285
for (int i = 0; i < (3 - offset) + 1; i++)
286
{
287
Memory::Write_Float(d[i], addr + 4 * i);
288
}
289
}
290
break;
291
}
292
293
case 62: //sv.q
294
if (addr & 0xF)
295
{
296
_dbg_assert_msg_( 0, "Misaligned sv.q at %08x (pc = %08x)", addr, PC);
297
}
298
#ifndef COMMON_BIG_ENDIAN
299
f = reinterpret_cast<float *>(Memory::GetPointerWriteRange(addr, 16));
300
if (f)
301
ReadVector(f, V_Quad, vt);
302
#else
303
float svqd[4];
304
ReadVector(svqd, V_Quad, vt);
305
306
Memory::Write_Float(svqd[0], addr);
307
Memory::Write_Float(svqd[1], addr + 4);
308
Memory::Write_Float(svqd[2], addr + 8);
309
Memory::Write_Float(svqd[3], addr + 12);
310
#endif
311
break;
312
313
default:
314
_dbg_assert_msg_(false,"Trying to interpret VQ instruction that can't be interpreted");
315
break;
316
}
317
PC += 4;
318
}
319
320
void Int_VMatrixInit(MIPSOpcode op) {
321
static const float idt[16] = {
322
1,0,0,0,
323
0,1,0,0,
324
0,0,1,0,
325
0,0,0,1,
326
};
327
static const float zero[16] = {
328
0,0,0,0,
329
0,0,0,0,
330
0,0,0,0,
331
0,0,0,0,
332
};
333
static const float one[16] = {
334
1,1,1,1,
335
1,1,1,1,
336
1,1,1,1,
337
1,1,1,1,
338
};
339
int vd = _VD;
340
MatrixSize sz = GetMtxSize(op);
341
const float *m;
342
343
switch ((op >> 16) & 0xF) {
344
case 3: m=idt; break; //identity // vmidt
345
case 6: m=zero; break; // vmzero
346
case 7: m=one; break; // vmone
347
default:
348
_dbg_assert_msg_(false,"Trying to interpret instruction that can't be interpreted");
349
PC += 4;
350
EatPrefixes();
351
return;
352
}
353
354
// The S prefix generates constants, but only for the final (possibly transposed) row.
355
if (currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX] & 0xF0F00) {
356
float prefixed[16];
357
memcpy(prefixed, m, sizeof(prefixed));
358
359
int off = GetMatrixSide(sz) - 1;
360
u32 sprefixRemove = VFPU_ANY_SWIZZLE();
361
u32 sprefixAdd = 0;
362
switch ((op >> 16) & 0xF) {
363
case 3:
364
{
365
VFPUConst constX = off == 0 ? VFPUConst::ONE : VFPUConst::ZERO;
366
VFPUConst constY = off == 1 ? VFPUConst::ONE : VFPUConst::ZERO;
367
VFPUConst constZ = off == 2 ? VFPUConst::ONE : VFPUConst::ZERO;
368
VFPUConst constW = off == 3 ? VFPUConst::ONE : VFPUConst::ZERO;
369
sprefixAdd = VFPU_MAKE_CONSTANTS(constX, constY, constZ, constW);
370
break;
371
}
372
case 6:
373
sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO);
374
break;
375
case 7:
376
sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE);
377
break;
378
default:
379
_dbg_assert_msg_( 0, "Unknown matrix init op");
380
break;
381
}
382
ApplyPrefixST(&prefixed[off * 4], VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), V_Quad);
383
WriteMatrix(prefixed, sz, vd);
384
} else {
385
// Write mask applies to the final (maybe transposed) row. Sat causes hang.
386
WriteMatrix(m, sz, vd);
387
}
388
PC += 4;
389
EatPrefixes();
390
}
391
392
void Int_VVectorInit(MIPSOpcode op)
393
{
394
int vd = _VD;
395
VectorSize sz = GetVecSize(op);
396
float d[4];
397
398
VFPUConst constant = VFPUConst::ZERO;
399
switch ((op >> 16) & 0xF) {
400
case 6: constant = VFPUConst::ZERO; break; //vzero
401
case 7: constant = VFPUConst::ONE; break; //vone
402
default:
403
_dbg_assert_msg_( 0, "Trying to interpret instruction that can't be interpreted");
404
PC += 4;
405
EatPrefixes();
406
return;
407
}
408
409
// The S prefix generates constants, but negate is still respected.
410
u32 sprefixRemove = VFPU_ANY_SWIZZLE();
411
u32 sprefixAdd = VFPU_MAKE_CONSTANTS(constant, constant, constant, constant);
412
ApplyPrefixST(d, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), sz);
413
414
ApplyPrefixD(d, sz);
415
WriteVector(d, sz, vd);
416
417
EatPrefixes();
418
PC += 4;
419
}
420
421
void Int_Viim(MIPSOpcode op) {
422
int vt = _VT;
423
s32 imm = SignExtend16ToS32(op & 0xFFFF);
424
u16 uimm16 = (op&0xFFFF);
425
float f[1];
426
int type = (op >> 23) & 7;
427
if (type == 6) {
428
f[0] = (float)imm; // viim
429
} else if (type == 7) {
430
f[0] = Float16ToFloat32((u16)uimm16); // vfim
431
} else {
432
_dbg_assert_msg_( 0, "Invalid Viim opcode type %d", type);
433
f[0] = 0;
434
}
435
436
ApplyPrefixD(f, V_Single);
437
WriteVector(f, V_Single, vt);
438
PC += 4;
439
EatPrefixes();
440
}
441
442
void Int_Vidt(MIPSOpcode op) {
443
int vd = _VD;
444
VectorSize sz = GetVecSize(op);
445
float f[4];
446
447
// The S prefix generates constants, but negate is still respected.
448
int offmask = sz == V_Quad || sz == V_Triple ? 3 : 1;
449
int off = vd & offmask;
450
// If it's a pair, the identity starts in a different position.
451
VFPUConst constX = off == (0 & offmask) ? VFPUConst::ONE : VFPUConst::ZERO;
452
VFPUConst constY = off == (1 & offmask) ? VFPUConst::ONE : VFPUConst::ZERO;
453
VFPUConst constZ = off == (2 & offmask) ? VFPUConst::ONE : VFPUConst::ZERO;
454
VFPUConst constW = off == (3 & offmask) ? VFPUConst::ONE : VFPUConst::ZERO;
455
456
u32 sprefixRemove = VFPU_ANY_SWIZZLE();
457
u32 sprefixAdd = VFPU_MAKE_CONSTANTS(constX, constY, constZ, constW);
458
ApplyPrefixST(f, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), sz);
459
460
ApplyPrefixD(f, sz);
461
WriteVector(f, sz, vd);
462
PC += 4;
463
EatPrefixes();
464
}
465
466
// The test really needs some work.
467
void Int_Vmmul(MIPSOpcode op) {
468
float s[16]{}, t[16]{}, d[16];
469
470
int vd = _VD;
471
int vs = _VS;
472
int vt = _VT;
473
MatrixSize sz = GetMtxSize(op);
474
int n = GetMatrixSide(sz);
475
476
ReadMatrix(s, sz, vs);
477
ReadMatrix(t, sz, vt);
478
479
// TODO: Always use the more accurate path in interpreter?
480
bool useAccurateDot = USE_VFPU_DOT || PSP_CoreParameter().compat.flags().MoreAccurateVMMUL;
481
for (int a = 0; a < n; a++) {
482
for (int b = 0; b < n; b++) {
483
union { float f; uint32_t u; } sum = { 0.0f };
484
if (a == n - 1 && b == n - 1) {
485
// S and T prefixes work on the final (or maybe first, in reverse?) dot.
486
ApplySwizzleS(&s[b * 4], V_Quad);
487
ApplySwizzleT(&t[a * 4], V_Quad);
488
}
489
490
if (useAccurateDot) {
491
sum.f = vfpu_dot(&s[b * 4], &t[a * 4]);
492
if (my_isnan(sum.f)) {
493
sum.u = 0x7f800001;
494
} else if ((sum.u & 0x7F800000) == 0) {
495
sum.u &= 0xFF800000;
496
}
497
} else {
498
if (a == n - 1 && b == n - 1) {
499
for (int c = 0; c < 4; c++) {
500
sum.f += s[b * 4 + c] * t[a * 4 + c];
501
}
502
} else {
503
for (int c = 0; c < n; c++) {
504
sum.f += s[b * 4 + c] * t[a * 4 + c];
505
}
506
}
507
}
508
509
d[a * 4 + b] = sum.f;
510
}
511
}
512
513
// The D prefix applies ONLY to the final element, but sat does work.
514
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);
515
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);
516
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
517
ApplyPrefixD(&d[4 * (n - 1)], V_Quad, false);
518
WriteMatrix(d, sz, vd);
519
PC += 4;
520
EatPrefixes();
521
}
522
523
void Int_Vmscl(MIPSOpcode op) {
524
float s[16]{}, t[4]{}, d[16];
525
526
int vd = _VD;
527
int vs = _VS;
528
int vt = _VT;
529
MatrixSize sz = GetMtxSize(op);
530
int n = GetMatrixSide(sz);
531
532
ReadMatrix(s, sz, vs);
533
ReadVector(t, V_Single, vt);
534
535
for (int a = 0; a < n - 1; a++) {
536
for (int b = 0; b < n; b++) {
537
d[a * 4 + b] = s[a * 4 + b] * t[0];
538
}
539
}
540
541
// S prefix applies to the last row.
542
ApplySwizzleS(&s[(n - 1) * 4], V_Quad);
543
// T prefix applies only for the last row, and is used per element.
544
// This is like vscl, but instead of zzzz it uses xxxx.
545
int tlane = (vt >> 5) & 3;
546
t[tlane] = t[0];
547
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
548
u32 tprefixAdd = VFPU_SWIZZLE(tlane, tlane, tlane, tlane);
549
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
550
551
for (int b = 0; b < n; b++) {
552
d[(n - 1) * 4 + b] = s[(n - 1) * 4 + b] * t[b];
553
}
554
555
// The D prefix is applied to the last row.
556
ApplyPrefixD(&d[(n - 1) * 4], V_Quad);
557
WriteMatrix(d, sz, vd);
558
PC += 4;
559
EatPrefixes();
560
}
561
562
void Int_Vmmov(MIPSOpcode op) {
563
float s[16]{};
564
int vd = _VD;
565
int vs = _VS;
566
MatrixSize sz = GetMtxSize(op);
567
ReadMatrix(s, sz, vs);
568
// S and D prefixes are applied to the last row.
569
int off = GetMatrixSide(sz) - 1;
570
ApplySwizzleS(&s[off * 4], V_Quad);
571
ApplyPrefixD(&s[off * 4], V_Quad);
572
WriteMatrix(s, sz, vd);
573
PC += 4;
574
EatPrefixes();
575
}
576
577
void Int_Vflush(MIPSOpcode op)
578
{
579
VERBOSE_LOG(Log::CPU, "vflush");
580
PC += 4;
581
// Anything with 0xFC000000 is a nop, but only 0xFFFF0000 retains prefixes.
582
if ((op & 0xFFFF0000) != 0xFFFF0000)
583
EatPrefixes();
584
}
585
586
void Int_VV2Op(MIPSOpcode op) {
587
float s[4], d[4];
588
int vd = _VD;
589
int vs = _VS;
590
int optype = (op >> 16) & 0x1f;
591
VectorSize sz = GetVecSize(op);
592
u32 n = GetNumVectorElements(sz);
593
ReadVector(s, sz, vs);
594
// Some of these are prefix hacks (affects constants, etc.)
595
switch (optype) {
596
case 1:
597
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, VFPU_ABS(1, 1, 1, 1)), sz);
598
break;
599
case 2:
600
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, VFPU_NEGATE(1, 1, 1, 1)), sz);
601
break;
602
case 16:
603
case 17:
604
case 18:
605
case 19:
606
case 20:
607
case 21:
608
case 22:
609
case 23:
610
// Similar to vdiv. Some of the behavior using the invalid constant is iffy.
611
ApplySwizzleS(&s[n - 1], V_Single, INFINITY);
612
break;
613
case 24:
614
case 26:
615
// Similar to above, but also ignores negate.
616
ApplyPrefixST(&s[n - 1], VFPURewritePrefix(VFPU_CTRL_SPREFIX, VFPU_NEGATE(1, 0, 0, 0), 0), V_Single, -INFINITY);
617
break;
618
case 28:
619
// Similar to above, but also ignores negate.
620
ApplyPrefixST(&s[n - 1], VFPURewritePrefix(VFPU_CTRL_SPREFIX, VFPU_NEGATE(1, 0, 0, 0), 0), V_Single, INFINITY);
621
break;
622
default:
623
ApplySwizzleS(s, sz);
624
break;
625
}
626
for (int i = 0; i < (int)n; i++) {
627
switch (optype) {
628
case 0: d[i] = s[i]; break; //vmov
629
case 1: d[i] = s[i]; break; //vabs (prefix)
630
case 2: d[i] = s[i]; break; //vneg (prefix)
631
// vsat0 changes -0.0 to +0.0, both retain NAN.
632
case 4: if (s[i] <= 0) d[i] = 0; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat0
633
case 5: if (s[i] < -1.0f) d[i] = -1.0f; else {if(s[i] > 1.0f) d[i] = 1.0f; else d[i] = s[i];} break; // vsat1
634
case 16: { d[i] = vfpu_rcp(s[i]); } break; //vrcp
635
case 17: d[i] = USE_VFPU_SQRT ? vfpu_rsqrt(s[i]) : 1.0f / sqrtf(s[i]); break; //vrsq
636
637
case 18: { d[i] = vfpu_sin(s[i]); } break; //vsin
638
case 19: { d[i] = vfpu_cos(s[i]); } break; //vcos
639
case 20: { d[i] = vfpu_exp2(s[i]); } break; //vexp2
640
case 21: { d[i] = vfpu_log2(s[i]); } break; //vlog2
641
case 22: d[i] = USE_VFPU_SQRT ? vfpu_sqrt(s[i]) : fabsf(sqrtf(s[i])); break; //vsqrt
642
case 23: { d[i] = vfpu_asin(s[i]); } break; //vasin
643
case 24: { d[i] = -vfpu_rcp(s[i]); } break; // vnrcp
644
case 26: { d[i] = -vfpu_sin(s[i]); } break; // vnsin
645
case 28: { d[i] = vfpu_rexp2(s[i]); } break; // vrexp2
646
default:
647
_dbg_assert_msg_( false, "Invalid VV2Op op type %d", optype);
648
break;
649
}
650
}
651
// vsat1 is a prefix hack, so 0:1 doesn't apply. Others don't process sat at all.
652
switch (optype) {
653
case 5:
654
ApplyPrefixD(d, sz, true);
655
break;
656
case 16:
657
case 17:
658
case 18:
659
case 19:
660
case 20:
661
case 21:
662
case 22:
663
case 23:
664
case 24:
665
case 26:
666
case 28:
667
{
668
// Only the last element gets the mask applied.
669
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);
670
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);
671
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
672
ApplyPrefixD(d, sz);
673
break;
674
}
675
default:
676
ApplyPrefixD(d, sz);
677
}
678
WriteVector(d, sz, vd);
679
PC += 4;
680
EatPrefixes();
681
}
682
683
void Int_Vocp(MIPSOpcode op) {
684
float s[4], t[4], d[4];
685
int vd = _VD;
686
int vs = _VS;
687
VectorSize sz = GetVecSize(op);
688
ReadVector(s, sz, vs);
689
690
// S prefix forces the negate flags.
691
u32 sprefixAdd = VFPU_NEGATE(1, 1, 1, 1);
692
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, sprefixAdd), sz);
693
694
// T prefix forces constants on and regnum to 1.
695
// That means negate still works, and abs activates a different constant.
696
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
697
u32 tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE);
698
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
699
700
for (int i = 0; i < GetNumVectorElements(sz); i++) {
701
// Always positive NaN. Note that s is always negated from the registers.
702
d[i] = my_isnan(s[i]) ? fabsf(s[i]) : t[i] + s[i];
703
}
704
RetainInvalidSwizzleST(d, sz);
705
ApplyPrefixD(d, sz);
706
WriteVector(d, sz, vd);
707
PC += 4;
708
EatPrefixes();
709
}
710
711
void Int_Vsocp(MIPSOpcode op) {
712
float s[4], t[4], d[4];
713
int vd = _VD;
714
int vs = _VS;
715
VectorSize sz = GetVecSize(op);
716
VectorSize outSize = GetDoubleVectorSizeSafe(sz);
717
if (outSize == V_Invalid)
718
outSize = V_Quad;
719
ReadVector(s, sz, vs);
720
721
// S prefix forces negate in even/odd and xxyy swizzle.
722
// abs works, and applies to final position (not source.)
723
u32 sprefixRemove = VFPU_ANY_SWIZZLE() | VFPU_NEGATE(1, 1, 1, 1);
724
u32 sprefixAdd = VFPU_SWIZZLE(0, 0, 1, 1) | VFPU_NEGATE(1, 0, 1, 0);
725
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), outSize);
726
727
// T prefix forces constants on and regnum to 1, 0, 1, 0.
728
// That means negate still works, and abs activates a different constant.
729
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
730
u32 tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::ZERO, VFPUConst::ONE, VFPUConst::ZERO);
731
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), outSize);
732
733
// Essentially D prefix saturation is forced.
734
d[0] = nanclamp(t[0] + s[0], 0.0f, 1.0f);
735
d[1] = nanclamp(t[1] + s[1], 0.0f, 1.0f);
736
if (outSize == V_Quad) {
737
d[2] = nanclamp(t[2] + s[2], 0.0f, 1.0f);
738
d[3] = nanclamp(t[3] + s[3], 0.0f, 1.0f);
739
}
740
ApplyPrefixD(d, sz, true);
741
WriteVector(d, outSize, vd);
742
PC += 4;
743
EatPrefixes();
744
}
745
746
void Int_Vsgn(MIPSOpcode op) {
747
float s[4], t[4], d[4];
748
int vd = _VD;
749
int vs = _VS;
750
VectorSize sz = GetVecSize(op);
751
ReadVector(s, sz, vs);
752
753
// Not sure who would do this, but using abs/neg allows a compare against 3 or -3.
754
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
755
u32 tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO);
756
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
757
758
int n = GetNumVectorElements(sz);
759
if (n < 4) {
760
// Compare with a swizzled value out of bounds always produces 0.
761
memcpy(&s[n], &t[n], sizeof(float) * (4 - n));
762
}
763
ApplySwizzleS(s, V_Quad);
764
765
for (int i = 0; i < n; i++) {
766
float diff = s[i] - t[i];
767
// To handle NaNs correctly, we do this with integer hackery
768
u32 val;
769
memcpy(&val, &diff, sizeof(u32));
770
if (val == 0 || val == 0x80000000)
771
d[i] = 0.0f;
772
else if ((val >> 31) == 0)
773
d[i] = 1.0f;
774
else
775
d[i] = -1.0f;
776
}
777
ApplyPrefixD(d, sz);
778
WriteVector(d, sz, vd);
779
PC += 4;
780
EatPrefixes();
781
}
782
783
inline int round_vfpu_n(double param) {
784
// return floorf(param);
785
return (int)round_ieee_754(param);
786
}
787
788
void Int_Vf2i(MIPSOpcode op) {
789
float s[4];
790
int d[4];
791
int vd = _VD;
792
int vs = _VS;
793
int imm = (op >> 16) & 0x1f;
794
float mult = (float)(1UL << imm);
795
VectorSize sz = GetVecSize(op);
796
ReadVector(s, sz, vs);
797
// Negate, abs, and constants apply as you'd expect to the bits.
798
ApplySwizzleS(s, sz);
799
for (int i = 0; i < GetNumVectorElements(sz); i++) {
800
if (my_isnan(s[i])) {
801
d[i] = 0x7FFFFFFF;
802
continue;
803
}
804
double sv = s[i] * mult; // (float)0x7fffffff == (float)0x80000000
805
// Cap/floor it to 0x7fffffff / 0x80000000
806
if (sv > (double)0x7fffffff) {
807
d[i] = 0x7fffffff;
808
} else if (sv <= (double)(int)0x80000000) {
809
d[i] = 0x80000000;
810
} else {
811
switch ((op >> 21) & 0x1f)
812
{
813
case 16: d[i] = (int)round_vfpu_n(sv); break; //(floor(sv + 0.5f)); break; //n
814
case 17: d[i] = s[i]>=0 ? (int)floor(sv) : (int)ceil(sv); break; //z
815
case 18: d[i] = (int)ceil(sv); break; //u
816
case 19: d[i] = (int)floor(sv); break; //d
817
default: d[i] = 0x7FFFFFFF; break;
818
}
819
}
820
}
821
// Does not apply sat, but does apply mask.
822
ApplyPrefixD(reinterpret_cast<float *>(d), sz, true);
823
WriteVector(reinterpret_cast<float *>(d), sz, vd);
824
PC += 4;
825
EatPrefixes();
826
}
827
828
void Int_Vi2f(MIPSOpcode op) {
829
int s[4];
830
float d[4];
831
int vd = _VD;
832
int vs = _VS;
833
int imm = (op >> 16) & 0x1f;
834
float mult = 1.0f/(float)(1UL << imm);
835
VectorSize sz = GetVecSize(op);
836
ReadVector(reinterpret_cast<float *>(s), sz, vs);
837
// Negate, abs, and constants apply as you'd expect to the bits.
838
ApplySwizzleS(reinterpret_cast<float *>(s), sz);
839
for (int i = 0; i < GetNumVectorElements(sz); i++) {
840
d[i] = (float)s[i] * mult;
841
}
842
// Sat and mask apply normally.
843
ApplyPrefixD(d, sz);
844
WriteVector(d, sz, vd);
845
PC += 4;
846
EatPrefixes();
847
}
848
849
void Int_Vh2f(MIPSOpcode op) {
850
u32 s[4];
851
float d[4];
852
int vd = _VD;
853
int vs = _VS;
854
VectorSize sz = GetVecSize(op);
855
ReadVector(reinterpret_cast<float *>(s), sz, vs);
856
ApplySwizzleS(reinterpret_cast<float *>(s), sz);
857
858
VectorSize outsize = V_Pair;
859
switch (sz) {
860
case V_Single:
861
outsize = V_Pair;
862
d[0] = ExpandHalf(s[0] & 0xFFFF);
863
d[1] = ExpandHalf(s[0] >> 16);
864
break;
865
case V_Pair:
866
default:
867
// All other sizes are treated the same.
868
outsize = V_Quad;
869
d[0] = ExpandHalf(s[0] & 0xFFFF);
870
d[1] = ExpandHalf(s[0] >> 16);
871
d[2] = ExpandHalf(s[1] & 0xFFFF);
872
d[3] = ExpandHalf(s[1] >> 16);
873
break;
874
}
875
ApplyPrefixD(d, outsize);
876
WriteVector(d, outsize, vd);
877
PC += 4;
878
EatPrefixes();
879
}
880
881
void Int_Vf2h(MIPSOpcode op) {
882
float s[4]{};
883
u32 d[4];
884
int vd = _VD;
885
int vs = _VS;
886
VectorSize sz = GetVecSize(op);
887
ReadVector(s, sz, vs);
888
// Swizzle can cause V_Single to properly write both components.
889
ApplySwizzleS(s, V_Quad);
890
// Negate should not actually apply to invalid swizzle.
891
RetainInvalidSwizzleST(s, V_Quad);
892
893
VectorSize outsize = V_Single;
894
switch (sz) {
895
case V_Single:
896
case V_Pair:
897
outsize = V_Single;
898
d[0] = ShrinkToHalf(s[0]) | ((u32)ShrinkToHalf(s[1]) << 16);
899
break;
900
case V_Triple:
901
case V_Quad:
902
outsize = V_Pair;
903
d[0] = ShrinkToHalf(s[0]) | ((u32)ShrinkToHalf(s[1]) << 16);
904
d[1] = ShrinkToHalf(s[2]) | ((u32)ShrinkToHalf(s[3]) << 16);
905
break;
906
907
default:
908
ERROR_LOG_REPORT(Log::CPU, "vf2h with invalid elements");
909
break;
910
}
911
ApplyPrefixD(reinterpret_cast<float *>(d), outsize);
912
WriteVector(reinterpret_cast<float *>(d), outsize, vd);
913
PC += 4;
914
EatPrefixes();
915
}
916
917
void Int_Vx2i(MIPSOpcode op) {
918
u32 s[4], d[4]{};
919
int vd = _VD;
920
int vs = _VS;
921
VectorSize sz = GetVecSize(op);
922
VectorSize oz = sz;
923
ReadVector(reinterpret_cast<float *>(s), sz, vs);
924
ApplySwizzleS(reinterpret_cast<float *>(s), sz);
925
926
// TODO: Similar to colorconv, invalid swizzle seems to reuse last output.
927
switch ((op >> 16) & 3) {
928
case 0: // vuc2i
929
// Quad is the only option.
930
// This converts 8-bit unsigned to 31-bit signed, swizzling to saturate.
931
// Similar to 5-bit to 8-bit color swizzling, but clamping to INT_MAX.
932
{
933
u32 value = s[0];
934
for (int i = 0; i < 4; i++) {
935
d[i] = (u32)((u32)(value & 0xFF) * 0x01010101UL) >> 1;
936
value >>= 8;
937
}
938
oz = V_Quad;
939
}
940
break;
941
942
case 1: // vc2i
943
// Quad is the only option
944
// Unlike vuc2i, the source and destination are signed so there is no shift.
945
// It lacks the swizzle because of negative values.
946
{
947
u32 value = s[0];
948
d[0] = (value & 0xFF) << 24;
949
d[1] = (value & 0xFF00) << 16;
950
d[2] = (value & 0xFF0000) << 8;
951
d[3] = (value & 0xFF000000);
952
oz = V_Quad;
953
}
954
break;
955
956
case 2: // vus2i
957
// Note: for some reason, this skips swizzle such that 0xFFFF -> 0x7FFF8000 unlike vuc2i.
958
oz = V_Pair;
959
switch (sz) {
960
case V_Quad:
961
case V_Triple:
962
sz = V_Pair;
963
// Intentional fallthrough.
964
case V_Pair:
965
oz = V_Quad;
966
// Intentional fallthrough.
967
case V_Single:
968
for (int i = 0; i < GetNumVectorElements(sz); i++) {
969
u32 value = s[i];
970
d[i * 2] = (value & 0xFFFF) << 15;
971
d[i * 2 + 1] = (value & 0xFFFF0000) >> 1;
972
}
973
break;
974
975
default:
976
ERROR_LOG_REPORT(Log::CPU, "vus2i with more than 2 elements");
977
break;
978
}
979
break;
980
981
case 3: // vs2i
982
oz = V_Pair;
983
switch (sz) {
984
case V_Quad:
985
case V_Triple:
986
sz = V_Pair;
987
// Intentional fallthrough.
988
case V_Pair:
989
oz = V_Quad;
990
// Intentional fallthrough.
991
case V_Single:
992
for (int i = 0; i < GetNumVectorElements(sz); i++) {
993
u32 value = s[i];
994
d[i * 2] = (value & 0xFFFF) << 16;
995
d[i * 2 + 1] = value & 0xFFFF0000;
996
}
997
break;
998
999
default:
1000
ERROR_LOG_REPORT(Log::CPU, "vs2i with more than 2 elements");
1001
break;
1002
}
1003
break;
1004
1005
default:
1006
_dbg_assert_msg_( false, "Trying to interpret instruction that can't be interpreted");
1007
break;
1008
}
1009
1010
// Saturation does in fact apply.
1011
ApplyPrefixD(reinterpret_cast<float *>(d),oz);
1012
WriteVector(reinterpret_cast<float *>(d), oz, vd);
1013
PC += 4;
1014
EatPrefixes();
1015
}
1016
1017
void Int_Vi2x(MIPSOpcode op) {
1018
int s[4]{};
1019
u32 d[2]{};
1020
const int vd = _VD;
1021
const int vs = _VS;
1022
const VectorSize sz = GetVecSize(op);
1023
VectorSize oz;
1024
ReadVector(reinterpret_cast<float *>(s), sz, vs);
1025
// Negate, const, etc. apply as expected.
1026
ApplySwizzleS(reinterpret_cast<float *>(s), V_Quad);
1027
1028
// TODO: Similar to colorconv, invalid swizzle seems to reuse last output.
1029
switch ((op >> 16) & 3) {
1030
case 0: //vi2uc
1031
for (int i = 0; i < 4; i++) {
1032
int v = s[i];
1033
if (v < 0) v = 0;
1034
v >>= 23;
1035
d[0] |= ((u32)v & 0xFF) << (i * 8);
1036
}
1037
oz = V_Single;
1038
break;
1039
1040
case 1: //vi2c
1041
for (int i = 0; i < 4; i++) {
1042
u32 v = s[i];
1043
d[0] |= (v >> 24) << (i * 8);
1044
}
1045
oz = V_Single;
1046
break;
1047
1048
case 2: //vi2us
1049
{
1050
int elems = (GetNumVectorElements(sz) + 1) / 2;
1051
for (int i = 0; i < elems; i++) {
1052
int low = s[i * 2];
1053
int high = s[i * 2 + 1];
1054
if (low < 0) low = 0;
1055
if (high < 0) high = 0;
1056
low >>= 15;
1057
high >>= 15;
1058
d[i] = low | (high << 16);
1059
}
1060
switch (sz) {
1061
case V_Quad: oz = V_Pair; break;
1062
case V_Triple: oz = V_Pair; break;
1063
case V_Pair: oz = V_Single; break;
1064
case V_Single: oz = V_Single; break;
1065
default:
1066
_dbg_assert_msg_( false, "Trying to interpret instruction that can't be interpreted");
1067
oz = V_Single;
1068
break;
1069
}
1070
break;
1071
}
1072
case 3: //vi2s
1073
{
1074
int elems = (GetNumVectorElements(sz) + 1) / 2;
1075
for (int i = 0; i < elems; i++) {
1076
u32 low = s[i * 2];
1077
u32 high = s[i * 2 + 1];
1078
low >>= 16;
1079
high >>= 16;
1080
d[i] = low | (high << 16);
1081
}
1082
switch (sz) {
1083
case V_Quad: oz = V_Pair; break;
1084
case V_Triple: oz = V_Pair; break;
1085
case V_Pair: oz = V_Single; break;
1086
case V_Single: oz = V_Single; break;
1087
default:
1088
_dbg_assert_msg_(0, "Trying to interpret instruction that can't be interpreted");
1089
oz = V_Single;
1090
break;
1091
}
1092
break;
1093
}
1094
default:
1095
_dbg_assert_msg_( 0, "Trying to interpret instruction that can't be interpreted");
1096
oz = V_Single;
1097
break;
1098
}
1099
// D prefix applies as expected.
1100
ApplyPrefixD(reinterpret_cast<float *>(d), oz);
1101
WriteVector(reinterpret_cast<float *>(d), oz, vd);
1102
PC += 4;
1103
EatPrefixes();
1104
}
1105
1106
void Int_ColorConv(MIPSOpcode op)
1107
{
1108
int vd = _VD;
1109
int vs = _VS;
1110
u32 s[4];
1111
VectorSize isz = GetVecSize(op);
1112
VectorSize sz = V_Quad;
1113
ReadVector(reinterpret_cast<float *>(s), sz, vs);
1114
ApplySwizzleS(reinterpret_cast<float *>(s), sz);
1115
u16 colors[4];
1116
// TODO: Invalid swizzle values almost seem to use the last value converted in a
1117
// previous execution of these ops. It's a bit odd.
1118
for (int i = 0; i < 4; i++)
1119
{
1120
u32 in = s[i];
1121
u16 col = 0;
1122
switch ((op >> 16) & 3)
1123
{
1124
case 1: // 4444
1125
{
1126
int a = ((in >> 24) & 0xFF) >> 4;
1127
int b = ((in >> 16) & 0xFF) >> 4;
1128
int g = ((in >> 8) & 0xFF) >> 4;
1129
int r = ((in) & 0xFF) >> 4;
1130
col = (a << 12) | (b << 8) | (g << 4) | (r);
1131
break;
1132
}
1133
case 2: // 5551
1134
{
1135
int a = ((in >> 24) & 0xFF) >> 7;
1136
int b = ((in >> 16) & 0xFF) >> 3;
1137
int g = ((in >> 8) & 0xFF) >> 3;
1138
int r = ((in) & 0xFF) >> 3;
1139
col = (a << 15) | (b << 10) | (g << 5) | (r);
1140
break;
1141
}
1142
case 3: // 565
1143
{
1144
int b = ((in >> 16) & 0xFF) >> 3;
1145
int g = ((in >> 8) & 0xFF) >> 2;
1146
int r = ((in) & 0xFF) >> 3;
1147
col = (b << 11) | (g << 5) | (r);
1148
break;
1149
}
1150
}
1151
colors[i] = col;
1152
}
1153
u32 ov[2] = {(u32)colors[0] | (colors[1] << 16), (u32)colors[2] | (colors[3] << 16)};
1154
ApplyPrefixD(reinterpret_cast<float *>(ov), V_Pair);
1155
WriteVector((const float *)ov, isz == V_Single ? V_Single : V_Pair, vd);
1156
PC += 4;
1157
EatPrefixes();
1158
}
1159
1160
void Int_VDot(MIPSOpcode op) {
1161
float s[4]{}, t[4]{};
1162
union { float f; uint32_t u; } d;
1163
int vd = _VD;
1164
int vs = _VS;
1165
int vt = _VT;
1166
VectorSize sz = GetVecSize(op);
1167
ReadVector(s, sz, vs);
1168
ApplySwizzleS(s, V_Quad);
1169
ReadVector(t, sz, vt);
1170
ApplySwizzleT(t, V_Quad);
1171
1172
if (USE_VFPU_DOT) {
1173
d.f = vfpu_dot(s, t);
1174
if (my_isnan(d.f)) {
1175
d.u = 0x7f800001;
1176
} else if ((d.u & 0x7F800000) == 0) {
1177
d.u &= 0xFF800000;
1178
}
1179
} else {
1180
d.f = 0.0f;
1181
for (int i = 0; i < 4; i++) {
1182
d.f += s[i] * t[i];
1183
}
1184
}
1185
1186
ApplyPrefixD(&d.f, V_Single);
1187
WriteVector(&d.f, V_Single, vd);
1188
PC += 4;
1189
EatPrefixes();
1190
}
1191
1192
void Int_VHdp(MIPSOpcode op) {
1193
float s[4]{}, t[4]{};
1194
float d;
1195
int vd = _VD;
1196
int vs = _VS;
1197
int vt = _VT;
1198
VectorSize sz = GetVecSize(op);
1199
ReadVector(s, sz, vs);
1200
ReadVector(t, sz, vt);
1201
ApplySwizzleT(t, V_Quad);
1202
1203
// S prefix forces constant 1 for the last element (w for quad.)
1204
// Otherwise it is the same as vdot.
1205
u32 sprefixRemove;
1206
u32 sprefixAdd;
1207
if (sz == V_Quad) {
1208
sprefixRemove = VFPU_SWIZZLE(0, 0, 0, 3);
1209
sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::NONE, VFPUConst::NONE, VFPUConst::NONE, VFPUConst::ONE);
1210
} else if (sz == V_Triple) {
1211
sprefixRemove = VFPU_SWIZZLE(0, 0, 3, 0);
1212
sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::NONE, VFPUConst::NONE, VFPUConst::ONE, VFPUConst::NONE);
1213
} else if (sz == V_Pair) {
1214
sprefixRemove = VFPU_SWIZZLE(0, 3, 0, 0);
1215
sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::NONE, VFPUConst::ONE, VFPUConst::NONE, VFPUConst::NONE);
1216
} else {
1217
sprefixRemove = VFPU_SWIZZLE(3, 0, 0, 0);
1218
sprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::NONE, VFPUConst::NONE, VFPUConst::NONE);
1219
}
1220
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), V_Quad);
1221
1222
float sum = 0.0f;
1223
if (USE_VFPU_DOT) {
1224
sum = vfpu_dot(s, t);
1225
} else {
1226
for (int i = 0; i < 4; i++) {
1227
sum += s[i] * t[i];
1228
}
1229
}
1230
d = my_isnan(sum) ? fabsf(sum) : sum;
1231
ApplyPrefixD(&d, V_Single);
1232
WriteVector(&d, V_Single, vd);
1233
PC += 4;
1234
EatPrefixes();
1235
}
1236
1237
void Int_Vbfy(MIPSOpcode op) {
1238
float s[4]{}, t[4]{}, d[4];
1239
int vd = _VD;
1240
int vs = _VS;
1241
VectorSize sz = GetVecSize(op);
1242
ReadVector(s, sz, vs);
1243
ReadVector(t, sz, vs);
1244
1245
if (op & 0x10000) {
1246
// vbfy2
1247
// S prefix forces the negate flags (so z and w are negative.)
1248
u32 sprefixAdd = VFPU_NEGATE(0, 0, 1, 1);
1249
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, sprefixAdd), sz);
1250
1251
// T prefix forces swizzle (zwxy.)
1252
// That means negate still works, but constants are a bit weird.
1253
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
1254
u32 tprefixAdd = VFPU_SWIZZLE(2, 3, 0, 1);
1255
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1256
1257
// Other sizes don't seem completely predictable.
1258
if (sz != V_Quad) {
1259
ERROR_LOG_REPORT_ONCE(vbfy2, Log::CPU, "vfby2 with incorrect size");
1260
}
1261
} else {
1262
// vbfy1
1263
// S prefix forces the negate flags (so y and w are negative.)
1264
u32 sprefixAdd = VFPU_NEGATE(0, 1, 0, 1);
1265
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, 0, sprefixAdd), sz);
1266
1267
// T prefix forces swizzle (yxwz.)
1268
// That means negate still works, but constants are a bit weird.
1269
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
1270
u32 tprefixAdd = VFPU_SWIZZLE(1, 0, 3, 2);
1271
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1272
1273
if (sz != V_Quad && sz != V_Pair) {
1274
ERROR_LOG_REPORT_ONCE(vbfy2, Log::CPU, "vfby1 with incorrect size");
1275
}
1276
}
1277
1278
d[0] = s[0] + t[0];
1279
d[1] = s[1] + t[1];
1280
d[2] = s[2] + t[2];
1281
d[3] = s[3] + t[3];
1282
1283
ApplyPrefixD(d, sz);
1284
WriteVector(d, sz, vd);
1285
PC += 4;
1286
EatPrefixes();
1287
}
1288
1289
void Int_Vsrt1(MIPSOpcode op) {
1290
float s[4], t[4], d[4];
1291
int vd = _VD;
1292
int vs = _VS;
1293
VectorSize sz = GetVecSize(op);
1294
ReadVector(s, sz, vs);
1295
ApplySwizzleS(s, sz);
1296
ReadVector(t, sz, vs);
1297
1298
// T is force swizzled to yxwz from S.
1299
u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 3);
1300
u32 tprefixAdd = VFPU_SWIZZLE(1, 0, 3, 2);
1301
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1302
1303
// TODO: May mishandle NAN / negative zero / etc.
1304
d[0] = std::min(s[0], t[0]);
1305
d[1] = std::max(s[1], t[1]);
1306
d[2] = std::min(s[2], t[2]);
1307
d[3] = std::max(s[3], t[3]);
1308
RetainInvalidSwizzleST(d, sz);
1309
ApplyPrefixD(d, sz);
1310
WriteVector(d, sz, vd);
1311
PC += 4;
1312
EatPrefixes();
1313
}
1314
1315
void Int_Vsrt2(MIPSOpcode op) {
1316
float s[4], t[4], d[4];
1317
int vd = _VD;
1318
int vs = _VS;
1319
VectorSize sz = GetVecSize(op);
1320
ReadVector(s, sz, vs);
1321
ApplySwizzleS(s, sz);
1322
ReadVector(t, sz, vs);
1323
1324
// T is force swizzled to wzyx from S.
1325
u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 3);
1326
u32 tprefixAdd = VFPU_SWIZZLE(3, 2, 1, 0);
1327
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1328
1329
// TODO: May mishandle NAN / negative zero / etc.
1330
d[0] = std::min(s[0], t[0]);
1331
d[1] = std::min(s[1], t[1]);
1332
d[2] = std::max(s[2], t[2]);
1333
d[3] = std::max(s[3], t[3]);
1334
RetainInvalidSwizzleST(d, sz);
1335
ApplyPrefixD(d, sz);
1336
WriteVector(d, sz, vd);
1337
PC += 4;
1338
EatPrefixes();
1339
}
1340
1341
void Int_Vsrt3(MIPSOpcode op) {
1342
float s[4], t[4], d[4];
1343
int vd = _VD;
1344
int vs = _VS;
1345
VectorSize sz = GetVecSize(op);
1346
ReadVector(s, sz, vs);
1347
ApplySwizzleS(s, sz);
1348
ReadVector(t, sz, vs);
1349
1350
// T is force swizzled to yxwz from S.
1351
u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 3);
1352
u32 tprefixAdd = VFPU_SWIZZLE(1, 0, 3, 2);
1353
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1354
1355
// TODO: May mishandle NAN / negative zero / etc.
1356
d[0] = std::max(s[0], t[0]);
1357
d[1] = std::min(s[1], t[1]);
1358
d[2] = std::max(s[2], t[2]);
1359
d[3] = std::min(s[3], t[3]);
1360
RetainInvalidSwizzleST(d, sz);
1361
ApplyPrefixD(d, sz);
1362
WriteVector(d, sz, vd);
1363
PC += 4;
1364
EatPrefixes();
1365
}
1366
1367
void Int_Vsrt4(MIPSOpcode op) {
1368
float s[4], t[4], d[4];
1369
int vd = _VD;
1370
int vs = _VS;
1371
VectorSize sz = GetVecSize(op);
1372
ReadVector(s, sz, vs);
1373
ApplySwizzleS(s, sz);
1374
ReadVector(t, sz, vs);
1375
1376
// T is force swizzled to wzyx from S.
1377
u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 3);
1378
u32 tprefixAdd = VFPU_SWIZZLE(3, 2, 1, 0);
1379
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1380
1381
// TODO: May mishandle NAN / negative zero / etc.
1382
d[0] = std::max(s[0], t[0]);
1383
d[1] = std::max(s[1], t[1]);
1384
d[2] = std::min(s[2], t[2]);
1385
d[3] = std::min(s[3], t[3]);
1386
RetainInvalidSwizzleST(d, sz);
1387
ApplyPrefixD(d, sz);
1388
WriteVector(d, sz, vd);
1389
PC += 4;
1390
EatPrefixes();
1391
}
1392
1393
void Int_Vcrs(MIPSOpcode op) {
1394
//half a cross product
1395
float s[4]{}, t[4]{}, d[4];
1396
int vd = _VD;
1397
int vs = _VS;
1398
int vt = _VT;
1399
VectorSize sz = GetVecSize(op);
1400
ReadVector(s, sz, vs);
1401
ReadVector(t, sz, vt);
1402
1403
// S prefix forces swizzle (yzx?.)
1404
// That means negate still works, but constants are a bit weird.
1405
u32 sprefixRemove = VFPU_SWIZZLE(3, 3, 3, 0);
1406
u32 sprefixAdd = VFPU_SWIZZLE(1, 2, 0, 0);
1407
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), sz);
1408
1409
// T prefix forces swizzle (zxy?.)
1410
u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 3, 0);
1411
u32 tprefixAdd = VFPU_SWIZZLE(2, 0, 1, 0);
1412
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), sz);
1413
1414
d[0] = s[0] * t[0];
1415
d[1] = s[1] * t[1];
1416
d[2] = s[2] * t[2];
1417
d[3] = s[3] * t[3];
1418
ApplyPrefixD(d, sz);
1419
WriteVector(d, sz, vd);
1420
PC += 4;
1421
EatPrefixes();
1422
}
1423
1424
void Int_Vdet(MIPSOpcode op) {
1425
float s[4]{}, t[4]{}, d[4];
1426
int vd = _VD;
1427
int vs = _VS;
1428
int vt = _VT;
1429
VectorSize sz = GetVecSize(op);
1430
// This is normally V_Pair. Unfilled s/t values are treated as zero.
1431
ReadVector(s, sz, vs);
1432
ApplySwizzleS(s, V_Quad);
1433
ReadVector(t, sz, vt);
1434
1435
// T prefix forces swizzle for x and y (yx??.)
1436
// That means negate still works, but constants are a bit weird.
1437
// Note: there is no forced negation here.
1438
u32 tprefixRemove = VFPU_SWIZZLE(3, 3, 0, 0);
1439
u32 tprefixAdd = VFPU_SWIZZLE(1, 0, 0, 0);
1440
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
1441
1442
if (USE_VFPU_DOT) {
1443
s[1] = -s[1];
1444
d[0] = vfpu_dot(s, t);
1445
} else {
1446
d[0] = s[0] * t[0] - s[1] * t[1];
1447
d[0] += s[2] * t[2] + s[3] * t[3];
1448
}
1449
1450
ApplyPrefixD(d, V_Single);
1451
WriteVector(d, V_Single, vd);
1452
PC += 4;
1453
EatPrefixes();
1454
}
1455
1456
void Int_Vfad(MIPSOpcode op) {
1457
float s[4]{}, t[4]{};
1458
float d;
1459
int vd = _VD;
1460
int vs = _VS;
1461
VectorSize sz = GetVecSize(op);
1462
ReadVector(s, sz, vs);
1463
ApplySwizzleS(s, V_Quad);
1464
1465
// T prefix generates constants, but abs can change the constant.
1466
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
1467
u32 tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE, VFPUConst::ONE);
1468
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
1469
1470
if (USE_VFPU_DOT) {
1471
d = vfpu_dot(s, t);
1472
} else {
1473
d = 0.0f;
1474
for (int i = 0; i < 4; i++) {
1475
d += s[i] * t[i];
1476
}
1477
}
1478
ApplyPrefixD(&d, V_Single);
1479
WriteVector(&d, V_Single, vd);
1480
PC += 4;
1481
EatPrefixes();
1482
}
1483
1484
void Int_Vavg(MIPSOpcode op) {
1485
float s[4]{}, t[4]{};
1486
float d;
1487
int vd = _VD;
1488
int vs = _VS;
1489
VectorSize sz = GetVecSize(op);
1490
ReadVector(s, sz, vs);
1491
ApplySwizzleS(s, V_Quad);
1492
1493
// T prefix generates constants, but supports negate.
1494
u32 tprefixRemove = VFPU_ANY_SWIZZLE() | VFPU_ABS(1, 1, 1, 1);
1495
u32 tprefixAdd;
1496
if (sz == V_Single)
1497
tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO, VFPUConst::ZERO);
1498
else if (sz == V_Pair)
1499
tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::HALF, VFPUConst::HALF, VFPUConst::HALF, VFPUConst::HALF);
1500
else if (sz == V_Triple)
1501
tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::THIRD, VFPUConst::THIRD, VFPUConst::THIRD, VFPUConst::THIRD);
1502
else if (sz == V_Quad)
1503
tprefixAdd = VFPU_MAKE_CONSTANTS(VFPUConst::FOURTH, VFPUConst::FOURTH, VFPUConst::FOURTH, VFPUConst::FOURTH);
1504
else
1505
tprefixAdd = 0;
1506
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
1507
1508
if (USE_VFPU_DOT) {
1509
d = vfpu_dot(s, t);
1510
} else {
1511
d = 0.0f;
1512
for (int i = 0; i < 4; i++) {
1513
d += s[i] * t[i];
1514
}
1515
}
1516
ApplyPrefixD(&d, V_Single);
1517
WriteVector(&d, V_Single, vd);
1518
PC += 4;
1519
EatPrefixes();
1520
}
1521
1522
void Int_VScl(MIPSOpcode op) {
1523
float s[4], t[4], d[4];
1524
int vd = _VD;
1525
int vs = _VS;
1526
int vt = _VT;
1527
VectorSize sz = GetVecSize(op);
1528
ReadVector(s, sz, vs);
1529
ApplySwizzleS(s, sz);
1530
1531
// T prefix forces swizzle (zzzz for some reason, so we force V_Quad.)
1532
// That means negate still works, but constants are a bit weird.
1533
int tlane = (vt >> 5) & 3;
1534
t[tlane] = V(vt);
1535
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
1536
u32 tprefixAdd = VFPU_SWIZZLE(tlane, tlane, tlane, tlane);
1537
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
1538
1539
int n = GetNumVectorElements(sz);
1540
for (int i = 0; i < n; i++) {
1541
d[i] = s[i] * t[i];
1542
}
1543
ApplyPrefixD(d, sz);
1544
WriteVector(d, sz, vd);
1545
PC += 4;
1546
EatPrefixes();
1547
}
1548
1549
void Int_Vrnds(MIPSOpcode op) {
1550
int vd = _VD;
1551
int seed = VI(vd);
1552
// Swizzles apply a constant value, constants/abs/neg work to vary the seed.
1553
ApplySwizzleS(reinterpret_cast<float *>(&seed), V_Single);
1554
vrnd_init(uint32_t(seed), currentMIPS->vfpuCtrl + VFPU_CTRL_RCX0);
1555
PC += 4;
1556
EatPrefixes();
1557
}
1558
1559
void Int_VrndX(MIPSOpcode op) {
1560
FloatBits d;
1561
int vd = _VD;
1562
VectorSize sz = GetVecSize(op);
1563
u32 n = GetNumVectorElements(sz);
1564
// Values are written in backwards order.
1565
for (int i = n - 1; i >= 0; i--) {
1566
switch ((op >> 16) & 0x1f) {
1567
case 1: d.u[i] = vrnd_generate(currentMIPS->vfpuCtrl + VFPU_CTRL_RCX0); break; // vrndi
1568
case 2: d.u[i] = 0x3F800000 | (vrnd_generate(currentMIPS->vfpuCtrl + VFPU_CTRL_RCX0) & 0x007FFFFF); break; // vrndf1 (>= 1, < 2)
1569
case 3: d.u[i] = 0x40000000 | (vrnd_generate(currentMIPS->vfpuCtrl + VFPU_CTRL_RCX0) & 0x007FFFFF); break; // vrndf2 (>= 2, < 4)
1570
default: _dbg_assert_msg_(false,"Trying to interpret instruction that can't be interpreted");
1571
}
1572
}
1573
// D prefix is broken and applies to the last element only (mask and sat.)
1574
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);
1575
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);
1576
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
1577
ApplyPrefixD(d.f, sz);
1578
WriteVector(d.f, sz, vd);
1579
PC += 4;
1580
EatPrefixes();
1581
}
1582
1583
// Generates one line of a rotation matrix around one of the three axes
1584
void Int_Vrot(MIPSOpcode op) {
1585
float d[4]{};
1586
int vd = _VD;
1587
int vs = _VS;
1588
int imm = (op >> 16) & 0x1f;
1589
VectorSize sz = GetVecSize(op);
1590
bool negSin = (imm & 0x10) != 0;
1591
int sineLane = (imm >> 2) & 3;
1592
int cosineLane = imm & 3;
1593
1594
float sine, cosine;
1595
if (currentMIPS->vfpuCtrl[VFPU_CTRL_SPREFIX] == 0x000E4) {
1596
vfpu_sincos(V(vs), sine, cosine);
1597
if (negSin)
1598
sine = -sine;
1599
} else {
1600
// Swizzle on S is a bit odd here, but generally only applies to sine.
1601
float s[4]{};
1602
ReadVector(s, V_Single, vs);
1603
u32 sprefixRemove = VFPU_NEGATE(1, 0, 0, 0);
1604
// We apply negSin later, not here. This handles zero a bit better.
1605
u32 sprefixAdd = VFPU_NEGATE(0, 0, 0, 0);
1606
ApplyPrefixST(s, VFPURewritePrefix(VFPU_CTRL_SPREFIX, sprefixRemove, sprefixAdd), V_Single);
1607
1608
// Cosine ignores all prefixes, so take the original.
1609
cosine = vfpu_cos(V(vs));
1610
sine = vfpu_sin(s[0]);
1611
1612
if (negSin)
1613
sine = -sine;
1614
RetainInvalidSwizzleST(&sine, V_Single);
1615
}
1616
1617
if (sineLane == cosineLane) {
1618
for (int i = 0; i < 4; i++)
1619
d[i] = sine;
1620
} else {
1621
d[sineLane] = sine;
1622
}
1623
1624
if (((vd >> 2) & 7) == ((vs >> 2) & 7)) {
1625
u8 dregs[4]{};
1626
GetVectorRegs(dregs, sz, vd);
1627
// Calculate cosine based on sine/zero result.
1628
bool written = false;
1629
for (int i = 0; i < 4; i++) {
1630
if (vs == dregs[i]) {
1631
d[cosineLane] = vfpu_cos(d[i]);
1632
written = true;
1633
break;
1634
}
1635
}
1636
if (!written)
1637
d[cosineLane] = cosine;
1638
} else {
1639
d[cosineLane] = cosine;
1640
}
1641
1642
// D prefix works, just not for the cosine lane.
1643
uint32_t dprefixRemove = (3 << cosineLane) | (1 << (8 + cosineLane));
1644
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] &= 0xFFFFF ^ dprefixRemove;
1645
ApplyPrefixD(d, sz);
1646
WriteVector(d, sz, vd);
1647
PC += 4;
1648
EatPrefixes();
1649
}
1650
1651
void Int_Vtfm(MIPSOpcode op) {
1652
float s[16]{}, t[4]{};
1653
FloatBits d;
1654
int vd = _VD;
1655
int vs = _VS;
1656
int vt = _VT;
1657
int ins = (op >> 23) & 3;
1658
1659
VectorSize sz = (VectorSize)(ins + 1);
1660
MatrixSize msz = (MatrixSize)(ins + 1);
1661
int n = GetNumVectorElements(GetVecSize(op));
1662
1663
int tn = std::min(n, ins + 1);
1664
ReadMatrix(s, msz, vs);
1665
ReadVector(t, sz, vt);
1666
1667
if (USE_VFPU_DOT) {
1668
float t2[4];
1669
for (int i = 0; i < 4; i++) {
1670
if (i < tn) {
1671
t2[i] = t[i];
1672
} else if (i == ins) {
1673
t2[i] = 1.0f;
1674
} else {
1675
t2[i] = 0.0f;
1676
}
1677
}
1678
1679
for (int i = 0; i < ins; i++) {
1680
d.f[i] = vfpu_dot(&s[i * 4], t2);
1681
1682
if (my_isnan(d.f[i])) {
1683
d.u[i] = 0x7f800001;
1684
} else if ((d.u[i] & 0x7F800000) == 0) {
1685
d.u[i] &= 0xFF800000;
1686
}
1687
}
1688
} else {
1689
for (int i = 0; i < ins; i++) {
1690
d.f[i] = s[i * 4] * t[0];
1691
for (int k = 1; k < tn; k++) {
1692
d.f[i] += s[i * 4 + k] * t[k];
1693
}
1694
if (ins >= n) {
1695
d.f[i] += s[i * 4 + ins];
1696
}
1697
}
1698
}
1699
1700
// S and T prefixes apply for the final row only.
1701
// The T prefix is used to apply zero/one constants, but abs still changes it.
1702
ApplySwizzleS(&s[ins * 4], V_Quad);
1703
VFPUConst constX = VFPUConst::NONE;
1704
VFPUConst constY = n < 2 ? VFPUConst::ZERO : VFPUConst::NONE;
1705
VFPUConst constZ = n < 3 ? VFPUConst::ZERO : VFPUConst::NONE;
1706
VFPUConst constW = n < 4 ? VFPUConst::ZERO : VFPUConst::NONE;
1707
if (ins >= n) {
1708
if (ins == 1) {
1709
constY = VFPUConst::ONE;
1710
} else if (ins == 2) {
1711
constZ = VFPUConst::ONE;
1712
} else if (ins == 3) {
1713
constW = VFPUConst::ONE;
1714
}
1715
}
1716
u32 tprefixRemove = VFPU_SWIZZLE(0, n < 2 ? 3 : 0, n < 3 ? 3 : 0, n < 4 ? 3 : 0);
1717
u32 tprefixAdd = VFPU_MAKE_CONSTANTS(constX, constY, constZ, constW);
1718
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
1719
1720
// Really this is the operation all rows probably use (with constant wiring.)
1721
if (USE_VFPU_DOT) {
1722
d.f[ins] = vfpu_dot(&s[ins * 4], t);
1723
1724
if (my_isnan(d.f[ins])) {
1725
d.u[ins] = 0x7f800001;
1726
} else if ((d.u[ins] & 0x7F800000) == 0) {
1727
d.u[ins] &= 0xFF800000;
1728
}
1729
} else {
1730
d.f[ins] = s[ins * 4] * t[0];
1731
for (int k = 1; k < 4; k++) {
1732
d.f[ins] += s[ins * 4 + k] * t[k];
1733
}
1734
}
1735
1736
// D prefix applies to the last element only.
1737
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << ins;
1738
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (ins + ins);
1739
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
1740
ApplyPrefixD(d.f, sz);
1741
WriteVector(d.f, sz, vd);
1742
PC += 4;
1743
EatPrefixes();
1744
}
1745
1746
void Int_SV(MIPSOpcode op)
1747
{
1748
s32 imm = SignExtend16ToS32(op & 0xFFFC);
1749
int vt = ((op >> 16) & 0x1f) | ((op & 3) << 5);
1750
int rs = _RS;
1751
u32 addr = R(rs) + imm;
1752
1753
switch (op >> 26)
1754
{
1755
case 50: //lv.s
1756
VI(vt) = Memory::Read_U32(addr);
1757
break;
1758
case 58: //sv.s
1759
Memory::Write_U32(VI(vt), addr);
1760
break;
1761
default:
1762
_dbg_assert_msg_(false,"Trying to interpret instruction that can't be interpreted");
1763
break;
1764
}
1765
PC += 4;
1766
}
1767
1768
1769
void Int_Mftv(MIPSOpcode op)
1770
{
1771
int imm = op & 0xFF;
1772
int rt = _RT;
1773
switch ((op >> 21) & 0x1f)
1774
{
1775
case 3: //mfv / mfvc
1776
// rt = 0, imm = 255 appears to be used as a CPU interlock by some games.
1777
if (rt != 0) {
1778
if (imm < 128) {
1779
R(rt) = VI(imm);
1780
} else if (imm < 128 + VFPU_CTRL_MAX) { //mfvc
1781
R(rt) = currentMIPS->vfpuCtrl[imm - 128];
1782
} else {
1783
//ERROR - maybe need to make this value too an "interlock" value?
1784
_dbg_assert_msg_(false,"mfv - invalid register");
1785
}
1786
}
1787
break;
1788
1789
case 7: //mtv
1790
if (imm < 128) {
1791
VI(imm) = R(rt);
1792
} else if (imm < 128 + VFPU_CTRL_MAX) { //mtvc
1793
u32 mask;
1794
if (GetVFPUCtrlMask(imm - 128, &mask)) {
1795
currentMIPS->vfpuCtrl[imm - 128] = R(rt) & mask;
1796
}
1797
} else {
1798
//ERROR
1799
_dbg_assert_msg_(false,"mtv - invalid register");
1800
}
1801
break;
1802
1803
default:
1804
_dbg_assert_msg_(false,"Trying to interpret instruction that can't be interpreted");
1805
break;
1806
}
1807
PC += 4;
1808
}
1809
1810
void Int_Vmfvc(MIPSOpcode op) {
1811
int vd = _VD;
1812
int imm = (op >> 8) & 0x7F;
1813
if (imm < VFPU_CTRL_MAX) {
1814
VI(vd) = currentMIPS->vfpuCtrl[imm];
1815
} else {
1816
VI(vd) = 0;
1817
}
1818
PC += 4;
1819
}
1820
1821
void Int_Vmtvc(MIPSOpcode op) {
1822
int vs = _VS;
1823
int imm = op & 0x7F;
1824
if (imm < VFPU_CTRL_MAX) {
1825
u32 mask;
1826
if (GetVFPUCtrlMask(imm, &mask)) {
1827
currentMIPS->vfpuCtrl[imm] = VI(vs) & mask;
1828
}
1829
}
1830
PC += 4;
1831
}
1832
1833
void Int_Vcst(MIPSOpcode op)
1834
{
1835
int conNum = (op >> 16) & 0x1f;
1836
int vd = _VD;
1837
1838
VectorSize sz = GetVecSize(op);
1839
float c = cst_constants[conNum];
1840
float temp[4] = {c,c,c,c};
1841
ApplyPrefixD(temp, sz);
1842
WriteVector(temp, sz, vd);
1843
PC += 4;
1844
EatPrefixes();
1845
}
1846
1847
void Int_Vcmp(MIPSOpcode op)
1848
{
1849
int vs = _VS;
1850
int vt = _VT;
1851
int cond = op & 0xf;
1852
VectorSize sz = GetVecSize(op);
1853
int n = GetNumVectorElements(sz);
1854
float s[4];
1855
float t[4];
1856
ReadVector(s, sz, vs);
1857
ApplySwizzleS(s, sz);
1858
ReadVector(t, sz, vt);
1859
ApplySwizzleT(t, sz);
1860
int cc = 0;
1861
int or_val = 0;
1862
int and_val = 1;
1863
int affected_bits = (1 << 4) | (1 << 5); // 4 and 5
1864
for (int i = 0; i < n; i++)
1865
{
1866
int c;
1867
// These set c to 0 or 1, nothing else.
1868
switch (cond)
1869
{
1870
case VC_FL: c = 0; break;
1871
case VC_EQ: c = s[i] == t[i]; break;
1872
case VC_LT: c = s[i] < t[i]; break;
1873
case VC_LE: c = s[i] <= t[i]; break;
1874
1875
case VC_TR: c = 1; break;
1876
case VC_NE: c = s[i] != t[i]; break;
1877
case VC_GE: c = s[i] >= t[i]; break;
1878
case VC_GT: c = s[i] > t[i]; break;
1879
1880
case VC_EZ: c = s[i] == 0.0f || s[i] == -0.0f; break;
1881
case VC_EN: c = my_isnan(s[i]); break;
1882
case VC_EI: c = my_isinf(s[i]); break;
1883
case VC_ES: c = my_isnanorinf(s[i]); break; // Tekken Dark Resurrection
1884
1885
case VC_NZ: c = s[i] != 0; break;
1886
case VC_NN: c = !my_isnan(s[i]); break;
1887
case VC_NI: c = !my_isinf(s[i]); break;
1888
case VC_NS: c = !(my_isnanorinf(s[i])); break; // How about t[i] ?
1889
1890
default:
1891
_dbg_assert_msg_(false,"Unsupported vcmp condition code %d", cond);
1892
PC += 4;
1893
EatPrefixes();
1894
return;
1895
}
1896
cc |= (c<<i);
1897
or_val |= c;
1898
and_val &= c;
1899
affected_bits |= 1 << i;
1900
}
1901
// Use masking to only change the affected bits
1902
currentMIPS->vfpuCtrl[VFPU_CTRL_CC] =
1903
(currentMIPS->vfpuCtrl[VFPU_CTRL_CC] & ~affected_bits) |
1904
((cc | (or_val << 4) | (and_val << 5)) & affected_bits);
1905
PC += 4;
1906
EatPrefixes();
1907
}
1908
1909
void Int_Vminmax(MIPSOpcode op) {
1910
FloatBits s, t, d;
1911
int vt = _VT;
1912
int vs = _VS;
1913
int vd = _VD;
1914
int cond = op&15;
1915
VectorSize sz = GetVecSize(op);
1916
int numElements = GetNumVectorElements(sz);
1917
1918
ReadVector(s.f, sz, vs);
1919
ApplySwizzleS(s.f, sz);
1920
ReadVector(t.f, sz, vt);
1921
ApplySwizzleT(t.f, sz);
1922
1923
// If both are zero, take t's sign.
1924
// Otherwise: -NAN < -INF < real < INF < NAN (higher mantissa is farther from 0.)
1925
1926
switch ((op >> 23) & 3) {
1927
case 2: // vmin
1928
for (int i = 0; i < numElements; i++) {
1929
if (my_isnanorinf(s.f[i]) || my_isnanorinf(t.f[i])) {
1930
// If both are negative, we flip the comparison (not two's compliment.)
1931
if (s.i[i] < 0 && t.i[i] < 0) {
1932
// If at least one side is NAN, we take the highest mantissa bits.
1933
d.i[i] = std::max(t.i[i], s.i[i]);
1934
} else {
1935
// Otherwise, we take the lowest value (negative or lowest mantissa.)
1936
d.i[i] = std::min(t.i[i], s.i[i]);
1937
}
1938
} else {
1939
d.f[i] = std::min(t.f[i], s.f[i]);
1940
}
1941
}
1942
break;
1943
case 3: // vmax
1944
for (int i = 0; i < numElements; i++) {
1945
// This is the same logic as vmin, just reversed.
1946
if (my_isnanorinf(s.f[i]) || my_isnanorinf(t.f[i])) {
1947
if (s.i[i] < 0 && t.i[i] < 0) {
1948
d.i[i] = std::min(t.i[i], s.i[i]);
1949
} else {
1950
d.i[i] = std::max(t.i[i], s.i[i]);
1951
}
1952
} else {
1953
d.f[i] = std::max(t.f[i], s.f[i]);
1954
}
1955
}
1956
break;
1957
default:
1958
_dbg_assert_msg_(false,"unknown min/max op %d", cond);
1959
PC += 4;
1960
EatPrefixes();
1961
return;
1962
}
1963
RetainInvalidSwizzleST(d.f, sz);
1964
ApplyPrefixD(d.f, sz);
1965
WriteVector(d.f, sz, vd);
1966
PC += 4;
1967
EatPrefixes();
1968
}
1969
1970
void Int_Vscmp(MIPSOpcode op) {
1971
FloatBits s, t, d;
1972
int vt = _VT;
1973
int vs = _VS;
1974
int vd = _VD;
1975
VectorSize sz = GetVecSize(op);
1976
ReadVector(s.f, sz, vs);
1977
ApplySwizzleS(s.f, sz);
1978
ReadVector(t.f, sz, vt);
1979
ApplySwizzleT(t.f, sz);
1980
int n = GetNumVectorElements(sz);
1981
for (int i = 0; i < n ; i++) {
1982
float a = s.f[i] - t.f[i];
1983
if (my_isnan(a)) {
1984
// NAN/INF are treated as just larger numbers, as in vmin/vmax.
1985
int sMagnitude = s.u[i] & 0x7FFFFFFF;
1986
int tMagnitude = t.u[i] & 0x7FFFFFFF;
1987
int b = (s.i[i] < 0 ? -sMagnitude : sMagnitude) - (t.i[i] < 0 ? -tMagnitude : tMagnitude);
1988
d.f[i] = (float)((0 < b) - (b < 0));
1989
} else {
1990
d.f[i] = (float)((0.0f < a) - (a < 0.0f));
1991
}
1992
}
1993
RetainInvalidSwizzleST(d.f, sz);
1994
ApplyPrefixD(d.f, sz);
1995
WriteVector(d.f, sz, vd);
1996
PC += 4;
1997
EatPrefixes();
1998
}
1999
2000
void Int_Vsge(MIPSOpcode op) {
2001
float s[4], t[4], d[4];
2002
int vt = _VT;
2003
int vs = _VS;
2004
int vd = _VD;
2005
VectorSize sz = GetVecSize(op);
2006
int numElements = GetNumVectorElements(sz);
2007
ReadVector(s, sz, vs);
2008
ApplySwizzleS(s, sz);
2009
ReadVector(t, sz, vt);
2010
ApplySwizzleT(t, sz);
2011
for (int i = 0; i < numElements; i++) {
2012
if ( my_isnan(s[i]) || my_isnan(t[i]) )
2013
d[i] = 0.0f;
2014
else
2015
d[i] = s[i] >= t[i] ? 1.0f : 0.0f;
2016
}
2017
RetainInvalidSwizzleST(d, sz);
2018
// The clamp cannot matter, so skip it.
2019
ApplyPrefixD(d, sz, true);
2020
WriteVector(d, sz, vd);
2021
PC += 4;
2022
EatPrefixes();
2023
}
2024
2025
void Int_Vslt(MIPSOpcode op) {
2026
float s[4], t[4], d[4];
2027
int vt = _VT;
2028
int vs = _VS;
2029
int vd = _VD;
2030
VectorSize sz = GetVecSize(op);
2031
int numElements = GetNumVectorElements(sz);
2032
ReadVector(s, sz, vs);
2033
ApplySwizzleS(s, sz);
2034
ReadVector(t, sz, vt);
2035
ApplySwizzleT(t, sz);
2036
for (int i = 0; i < numElements; i++) {
2037
if ( my_isnan(s[i]) || my_isnan(t[i]) )
2038
d[i] = 0.0f;
2039
else
2040
d[i] = s[i] < t[i] ? 1.0f : 0.0f;
2041
}
2042
RetainInvalidSwizzleST(d, sz);
2043
// The clamp cannot matter, so skip it.
2044
ApplyPrefixD(d, sz, true);
2045
WriteVector(d, sz, vd);
2046
PC += 4;
2047
EatPrefixes();
2048
}
2049
2050
2051
void Int_Vcmov(MIPSOpcode op) {
2052
int vs = _VS;
2053
int vd = _VD;
2054
int tf = (op >> 19) & 1;
2055
int imm3 = (op >> 16) & 7;
2056
VectorSize sz = GetVecSize(op);
2057
int n = GetNumVectorElements(sz);
2058
float s[4];
2059
float d[4];
2060
ReadVector(s, sz, vs);
2061
ApplySwizzleS(s, sz);
2062
// Not only is D read (as T), but the T prefix applies to it.
2063
ReadVector(d, sz, vd);
2064
ApplySwizzleT(d, sz);
2065
2066
int CC = currentMIPS->vfpuCtrl[VFPU_CTRL_CC];
2067
2068
if (imm3 < 6) {
2069
if (((CC >> imm3) & 1) == !tf) {
2070
for (int i = 0; i < n; i++)
2071
d[i] = s[i];
2072
}
2073
} else if (imm3 == 6) {
2074
for (int i = 0; i < n; i++) {
2075
if (((CC >> i) & 1) == !tf)
2076
d[i] = s[i];
2077
}
2078
} else {
2079
ERROR_LOG_REPORT(Log::CPU, "Bad Imm3 in cmov: %d", imm3);
2080
}
2081
ApplyPrefixD(d, sz);
2082
WriteVector(d, sz, vd);
2083
PC += 4;
2084
EatPrefixes();
2085
}
2086
2087
void Int_VecDo3(MIPSOpcode op) {
2088
float s[4], t[4];
2089
FloatBits d;
2090
int vd = _VD;
2091
int vs = _VS;
2092
int vt = _VT;
2093
VectorSize sz = GetVecSize(op);
2094
2095
int optype = 0;
2096
switch (op >> 26) {
2097
case 24: //VFPU0
2098
switch ((op >> 23) & 7) {
2099
case 0: optype = 0; break;
2100
case 1: optype = 1; break;
2101
case 7: optype = 7; break;
2102
default: goto bad;
2103
}
2104
break;
2105
case 25: //VFPU1
2106
switch ((op >> 23) & 7) {
2107
case 0: optype = 8; break;
2108
default: goto bad;
2109
}
2110
break;
2111
default:
2112
bad:
2113
_dbg_assert_msg_( 0, "Trying to interpret instruction that can't be interpreted");
2114
break;
2115
}
2116
2117
u32 n = GetNumVectorElements(sz);
2118
ReadVector(s, sz, vs);
2119
ReadVector(t, sz, vt);
2120
if (optype != 7) {
2121
ApplySwizzleS(s, sz);
2122
ApplySwizzleT(t, sz);
2123
} else {
2124
// The prefix handling of S/T is a bit odd, probably the HW doesn't do it in parallel.
2125
// The X prefix is applied to the last element in sz.
2126
// TODO: This doesn't match exactly for a swizzle past x in some cases...
2127
ApplySwizzleS(&s[n - 1], V_Single, -INFINITY);
2128
ApplySwizzleT(&t[n - 1], V_Single, -INFINITY);
2129
}
2130
2131
for (int i = 0; i < (int)n; i++) {
2132
switch (optype) {
2133
case 0: d.f[i] = s[i] + t[i]; break; //vadd
2134
case 1: d.f[i] = s[i] - t[i]; break; //vsub
2135
case 7: d.f[i] = s[i] / t[i]; break; //vdiv
2136
case 8: d.f[i] = s[i] * t[i]; break; //vmul
2137
}
2138
2139
if (USE_VFPU_DOT) {
2140
if (my_isnan(d.f[i])) {
2141
d.u[i] = (d.u[i] & 0xff800001) | 1;
2142
} else if ((d.u[i] & 0x7F800000) == 0) {
2143
d.u[i] &= 0xFF800000;
2144
}
2145
}
2146
}
2147
2148
// For vdiv only, the D prefix only applies mask (and like S/T, x applied to last.)
2149
if (optype == 7) {
2150
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);
2151
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);
2152
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
2153
ApplyPrefixD(d.f, sz);
2154
} else {
2155
RetainInvalidSwizzleST(d.f, sz);
2156
ApplyPrefixD(d.f, sz);
2157
}
2158
WriteVector(d.f, sz, vd);
2159
PC += 4;
2160
EatPrefixes();
2161
}
2162
2163
void Int_CrossQuat(MIPSOpcode op) {
2164
float s[4]{}, t[4]{}, d[4];
2165
int vd = _VD;
2166
int vs = _VS;
2167
int vt = _VT;
2168
VectorSize sz = GetVecSize(op);
2169
u32 n = GetNumVectorElements(sz);
2170
ReadVector(s, sz, vs);
2171
ReadVector(t, sz, vt);
2172
2173
u32 tprefixRemove = VFPU_ANY_SWIZZLE() | VFPU_NEGATE(1, 1, 1, 1);
2174
u32 tprefixAdd;
2175
2176
switch (sz) {
2177
case V_Triple: // vcrsp.t
2178
{
2179
if (USE_VFPU_DOT) {
2180
float t0[4] = { 0.0f, t[2], -t[1], 0.0f };
2181
float t1[4] = { -t[2], 0.0f, t[0], 0.0f };
2182
d[0] = vfpu_dot(s, t0);
2183
d[1] = vfpu_dot(s, t1);
2184
} else {
2185
d[0] = s[1] * t[2] - s[2] * t[1];
2186
d[1] = s[2] * t[0] - s[0] * t[2];
2187
}
2188
2189
// T prefix forces swizzle and negate, can be used to have weird constants.
2190
tprefixAdd = VFPU_SWIZZLE(1, 0, 3, 2) | VFPU_NEGATE(0, 1, 0, 0);
2191
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
2192
ApplySwizzleS(s, V_Quad);
2193
if (USE_VFPU_DOT) {
2194
// TODO: But flush any infs to 0? This seems sketchy.
2195
for (int i = 0; i < 4; ++i) {
2196
if (my_isinf(s[i]))
2197
s[i] = 0.0f;
2198
if (my_isinf(t[i]))
2199
t[i] = 0.0f;
2200
}
2201
d[2] = vfpu_dot(s, t);
2202
} else {
2203
d[2] = s[0] * t[0] + s[1] * t[1] + s[2] * t[2] + s[3] * t[3];
2204
}
2205
break;
2206
}
2207
2208
case V_Quad: // vqmul.q
2209
{
2210
if (USE_VFPU_DOT) {
2211
float t0[4] = { t[3], t[2], -t[1], t[0] };
2212
float t1[4] = { -t[2], t[3], t[0], t[1] };
2213
float t2[4] = { t[1], -t[0], t[3], t[2] };
2214
d[0] = vfpu_dot(s, t0);
2215
d[1] = vfpu_dot(s, t1);
2216
d[2] = vfpu_dot(s, t2);
2217
} else {
2218
d[0] = s[0] * t[3] + s[1] * t[2] - s[2] * t[1] + s[3] * t[0];
2219
d[1] = -s[0] * t[2] + s[1] * t[3] + s[2] * t[0] + s[3] * t[1];
2220
d[2] = s[0] * t[1] - s[1] * t[0] + s[2] * t[3] + s[3] * t[2];
2221
}
2222
2223
// T prefix forces swizzle and negate, can be used to have weird constants.
2224
tprefixAdd = VFPU_SWIZZLE(0, 1, 2, 3) | VFPU_NEGATE(1, 1, 1, 0);
2225
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
2226
ApplySwizzleS(s, sz);
2227
if (USE_VFPU_DOT)
2228
d[3] = vfpu_dot(s, t);
2229
else
2230
d[3] = s[0] * t[0] + s[1] * t[1] + s[2] * t[2] + s[3] * t[3];
2231
break;
2232
}
2233
2234
case V_Pair:
2235
// t swizzles invalid so the multiply is always zero.
2236
d[0] = 0;
2237
2238
tprefixAdd = VFPU_SWIZZLE(0, 0, 0, 0) | VFPU_NEGATE(0, 0, 0, 0);
2239
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
2240
ApplySwizzleS(s, V_Quad);
2241
// It's possible to populate a value by swizzling s[2].
2242
d[1] = s[2] * t[2];
2243
break;
2244
2245
case V_Single:
2246
// t swizzles invalid so the multiply is always zero.
2247
d[0] = 0;
2248
break;
2249
2250
default:
2251
ERROR_LOG_REPORT(Log::CPU, "vcrsp/vqmul with invalid elements");
2252
break;
2253
}
2254
2255
// D prefix applies to the last element only (mask and sat) for pair and larger.
2256
if (sz != V_Single) {
2257
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);
2258
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);
2259
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
2260
ApplyPrefixD(d, sz);
2261
} else {
2262
// Single always seems to write out zero.
2263
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = 0;
2264
}
2265
WriteVector(d, sz, vd);
2266
PC += 4;
2267
EatPrefixes();
2268
}
2269
2270
void Int_Vlgb(MIPSOpcode op) {
2271
// Vector log binary (extract exponent)
2272
FloatBits d, s;
2273
int vd = _VD;
2274
int vs = _VS;
2275
VectorSize sz = GetVecSize(op);
2276
2277
ReadVector(s.f, sz, vs);
2278
ApplySwizzleS(s.f, sz);
2279
2280
int exp = (s.u[0] & 0x7F800000) >> 23;
2281
if (exp == 0xFF) {
2282
d.f[0] = s.f[0];
2283
} else if (exp == 0) {
2284
d.f[0] = -INFINITY;
2285
} else {
2286
d.f[0] = (float)(exp - 127);
2287
}
2288
2289
// If sz is greater than V_Single, the rest are copied unchanged.
2290
for (int i = 1; i < GetNumVectorElements(sz); ++i) {
2291
d.u[i] = s.u[i];
2292
}
2293
2294
RetainInvalidSwizzleST(d.f, sz);
2295
ApplyPrefixD(d.f, sz);
2296
WriteVector(d.f, sz, vd);
2297
PC += 4;
2298
EatPrefixes();
2299
}
2300
2301
void Int_Vwbn(MIPSOpcode op) {
2302
FloatBits d, s;
2303
int vd = _VD;
2304
int vs = _VS;
2305
VectorSize sz = GetVecSize(op);
2306
u8 exp = (u8)((op >> 16) & 0xFF);
2307
2308
ReadVector(s.f, sz, vs);
2309
ApplySwizzleS(s.f, sz);
2310
2311
u32 sigbit = s.u[0] & 0x80000000;
2312
u32 prevExp = (s.u[0] & 0x7F800000) >> 23;
2313
u32 mantissa = (s.u[0] & 0x007FFFFF) | 0x00800000;
2314
if (prevExp != 0xFF && prevExp != 0) {
2315
if (exp > prevExp) {
2316
s8 shift = (exp - prevExp) & 0xF;
2317
mantissa = mantissa >> shift;
2318
} else {
2319
s8 shift = (prevExp - exp) & 0xF;
2320
mantissa = mantissa << shift;
2321
}
2322
d.u[0] = sigbit | (mantissa & 0x007FFFFF) | (exp << 23);
2323
} else {
2324
d.u[0] = s.u[0] | (exp << 23);
2325
}
2326
2327
// If sz is greater than V_Single, the rest are copied unchanged.
2328
for (int i = 1; i < GetNumVectorElements(sz); ++i) {
2329
d.u[i] = s.u[i];
2330
}
2331
2332
RetainInvalidSwizzleST(d.f, sz);
2333
ApplyPrefixD(d.f, sz);
2334
WriteVector(d.f, sz, vd);
2335
PC += 4;
2336
EatPrefixes();
2337
}
2338
2339
void Int_Vsbn(MIPSOpcode op) {
2340
FloatBits d, s, t;
2341
int vd = _VD;
2342
int vs = _VS;
2343
int vt = _VT;
2344
VectorSize sz = GetVecSize(op);
2345
2346
ReadVector(s.f, sz, vs);
2347
ApplySwizzleS(s.f, sz);
2348
ReadVector(t.f, sz, vt);
2349
ApplySwizzleT(t.f, sz);
2350
// Swizzle does apply to the value read as an integer.
2351
u8 exp = (u8)(127 + t.i[0]);
2352
2353
// Simply replace the exponent bits.
2354
u32 prev = s.u[0] & 0x7F800000;
2355
if (prev != 0 && prev != 0x7F800000) {
2356
d.u[0] = (s.u[0] & ~0x7F800000) | (exp << 23);
2357
} else {
2358
d.u[0] = s.u[0];
2359
}
2360
2361
// If sz is greater than V_Single, the rest are copied unchanged.
2362
for (int i = 1; i < GetNumVectorElements(sz); ++i) {
2363
d.u[i] = s.u[i];
2364
}
2365
2366
ApplyPrefixD(d.f, sz);
2367
WriteVector(d.f, sz, vd);
2368
PC += 4;
2369
EatPrefixes();
2370
}
2371
2372
void Int_Vsbz(MIPSOpcode op) {
2373
// Vector scale by zero (set exp to 0 to extract mantissa)
2374
FloatBits d, s;
2375
int vd = _VD;
2376
int vs = _VS;
2377
VectorSize sz = GetVecSize(op);
2378
2379
ReadVector(s.f, sz, vs);
2380
ApplySwizzleS(s.f, sz);
2381
2382
// NAN and denormals pass through.
2383
if (my_isnan(s.f[0]) || (s.u[0] & 0x7F800000) == 0) {
2384
d.u[0] = s.u[0];
2385
} else {
2386
d.u[0] = (127 << 23) | (s.u[0] & 0x007FFFFF);
2387
}
2388
2389
// If sz is greater than V_Single, the rest are copied unchanged.
2390
for (int i = 1; i < GetNumVectorElements(sz); ++i) {
2391
d.u[i] = s.u[i];
2392
}
2393
2394
ApplyPrefixD(d.f, sz);
2395
WriteVector(d.f, sz, vd);
2396
PC += 4;
2397
EatPrefixes();
2398
}
2399
}
2400
2401