CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/x86/CompFPU.cpp
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
20
21
#include "Core/Config.h"
22
#include "Core/MemMap.h"
23
#include "Common/CommonTypes.h"
24
#include "Core/MIPS/MIPS.h"
25
#include "Core/MIPS/MIPSCodeUtils.h"
26
#include "Core/MIPS/x86/Jit.h"
27
#include "Core/MIPS/x86/RegCache.h"
28
29
#define _RS MIPS_GET_RS(op)
30
#define _RT MIPS_GET_RT(op)
31
#define _RD MIPS_GET_RD(op)
32
#define _FS MIPS_GET_FS(op)
33
#define _FT MIPS_GET_FT(op)
34
#define _FD MIPS_GET_FD(op)
35
#define _SA MIPS_GET_SA(op)
36
#define _POS ((op>> 6) & 0x1F)
37
#define _SIZE ((op>>11) & 0x1F)
38
#define _IMM16 (signed short)(op & 0xFFFF)
39
#define _IMM26 (op & 0x03FFFFFF)
40
41
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
42
// Currently known non working ones should have DISABLE.
43
44
// #define CONDITIONAL_DISABLE(flag) { Comp_Generic(op); return; }
45
#define CONDITIONAL_DISABLE(flag) if (jo.Disabled(JitDisable::flag)) { Comp_Generic(op); return; }
46
#define DISABLE { Comp_Generic(op); return; }
47
48
namespace MIPSComp {
49
50
using namespace Gen;
51
using namespace X64JitConstants;
52
53
alignas(16) const u32 reverseQNAN[4] = { 0x803FFFFF, 0x803FFFFF, 0x803FFFFF, 0x803FFFFF };
54
55
void Jit::CopyFPReg(X64Reg dst, OpArg src) {
56
if (src.IsSimpleReg()) {
57
MOVAPS(dst, src);
58
} else {
59
MOVSS(dst, src);
60
}
61
}
62
63
void Jit::CompFPTriArith(MIPSOpcode op, void (XEmitter::*arith)(X64Reg reg, OpArg), bool orderMatters) {
64
int ft = _FT;
65
int fs = _FS;
66
int fd = _FD;
67
fpr.SpillLock(fd, fs, ft);
68
69
if (fs == fd) {
70
fpr.MapReg(fd, true, true);
71
(this->*arith)(fpr.RX(fd), fpr.R(ft));
72
} else if (ft == fd && !orderMatters) {
73
fpr.MapReg(fd, true, true);
74
(this->*arith)(fpr.RX(fd), fpr.R(fs));
75
} else if (ft != fd) {
76
// fs can't be fd (handled above.)
77
fpr.MapReg(fd, false, true);
78
CopyFPReg(fpr.RX(fd), fpr.R(fs));
79
(this->*arith)(fpr.RX(fd), fpr.R(ft));
80
} else {
81
// fd must be ft, and order must matter.
82
fpr.MapReg(fd, true, true);
83
CopyFPReg(XMM0, fpr.R(fs));
84
(this->*arith)(XMM0, fpr.R(ft));
85
MOVAPS(fpr.RX(fd), R(XMM0));
86
}
87
fpr.ReleaseSpillLocks();
88
}
89
90
void Jit::Comp_FPU3op(MIPSOpcode op) {
91
CONDITIONAL_DISABLE(FPU);
92
switch (op & 0x3f) {
93
case 0: CompFPTriArith(op, &XEmitter::ADDSS, false); break; //F(fd) = F(fs) + F(ft); //add
94
case 1: CompFPTriArith(op, &XEmitter::SUBSS, true); break; //F(fd) = F(fs) - F(ft); //sub
95
case 2: //F(fd) = F(fs) * F(ft); //mul
96
// XMM1 = !my_isnan(fs) && !my_isnan(ft)
97
MOVSS(XMM1, fpr.R(_FS));
98
CMPORDSS(XMM1, fpr.R(_FT));
99
CompFPTriArith(op, &XEmitter::MULSS, false);
100
101
// fd must still be in a reg, save it in XMM0 for now.
102
MOVAPS(XMM0, fpr.R(_FD));
103
// fd = my_isnan(fd) && !my_isnan(fs) && !my_isnan(ft)
104
CMPUNORDSS(fpr.RX(_FD), fpr.R(_FD));
105
ANDPS(fpr.RX(_FD), R(XMM1));
106
// At this point fd = FFFFFFFF if non-NAN inputs produced a NAN output.
107
// We'll AND it with the inverse QNAN bits to clear (00000000 means no change.)
108
if (RipAccessible(&reverseQNAN)) {
109
ANDPS(fpr.RX(_FD), M(&reverseQNAN)); // rip accessible
110
} else {
111
MOV(PTRBITS, R(TEMPREG), ImmPtr(&reverseQNAN));
112
ANDPS(fpr.RX(_FD), MatR(TEMPREG));
113
}
114
// ANDN is backwards, which is why we saved XMM0 to start. Now put it back.
115
ANDNPS(fpr.RX(_FD), R(XMM0));
116
break;
117
case 3: CompFPTriArith(op, &XEmitter::DIVSS, true); break; //F(fd) = F(fs) / F(ft); //div
118
default:
119
_dbg_assert_msg_(false,"Trying to compile FPU3Op instruction that can't be interpreted");
120
break;
121
}
122
}
123
124
void Jit::Comp_FPULS(MIPSOpcode op) {
125
CONDITIONAL_DISABLE(LSU_FPU);
126
s32 offset = _IMM16;
127
int ft = _FT;
128
MIPSGPReg rs = _RS;
129
130
CheckMemoryBreakpoint(0, rs, offset);
131
132
switch (op >> 26) {
133
case 49: //FI(ft) = Memory::Read_U32(addr); break; //lwc1
134
{
135
gpr.Lock(rs);
136
fpr.SpillLock(ft);
137
fpr.MapReg(ft, false, true);
138
139
JitSafeMem safe(this, rs, offset);
140
OpArg src;
141
if (safe.PrepareRead(src, 4))
142
MOVSS(fpr.RX(ft), src);
143
if (safe.PrepareSlowRead(safeMemFuncs.readU32))
144
MOVD_xmm(fpr.RX(ft), R(EAX));
145
safe.Finish();
146
147
gpr.UnlockAll();
148
fpr.ReleaseSpillLocks();
149
}
150
break;
151
case 57: //Memory::Write_U32(FI(ft), addr); break; //swc1
152
{
153
gpr.Lock(rs);
154
fpr.SpillLock(ft);
155
fpr.MapReg(ft, true, false);
156
157
JitSafeMem safe(this, rs, offset);
158
OpArg dest;
159
if (safe.PrepareWrite(dest, 4))
160
MOVSS(dest, fpr.RX(ft));
161
if (safe.PrepareSlowWrite())
162
{
163
MOVSS(MIPSSTATE_VAR(temp), fpr.RX(ft));
164
safe.DoSlowWrite(safeMemFuncs.writeU32, MIPSSTATE_VAR(temp));
165
}
166
safe.Finish();
167
168
gpr.UnlockAll();
169
fpr.ReleaseSpillLocks();
170
}
171
break;
172
173
default:
174
_dbg_assert_msg_(false,"Trying to interpret FPULS instruction that can't be interpreted");
175
break;
176
}
177
}
178
179
alignas(16) static const u64 ssSignBits2[2] = {0x8000000080000000ULL, 0x8000000080000000ULL};
180
alignas(16) static const u64 ssNoSignMask[2] = {0x7FFFFFFF7FFFFFFFULL, 0x7FFFFFFF7FFFFFFFULL};
181
182
void Jit::CompFPComp(int lhs, int rhs, u8 compare, bool allowNaN) {
183
gpr.MapReg(MIPS_REG_FPCOND, false, true);
184
185
// This means that NaN also means true, e.g. !<> or !>, etc.
186
if (allowNaN) {
187
CopyFPReg(XMM0, fpr.R(lhs));
188
CopyFPReg(XMM1, fpr.R(lhs));
189
CMPSS(XMM0, fpr.R(rhs), compare);
190
CMPUNORDSS(XMM1, fpr.R(rhs));
191
192
POR(XMM0, R(XMM1));
193
} else {
194
CopyFPReg(XMM0, fpr.R(lhs));
195
CMPSS(XMM0, fpr.R(rhs), compare);
196
}
197
198
MOVD_xmm(gpr.R(MIPS_REG_FPCOND), XMM0);
199
}
200
201
void Jit::Comp_FPUComp(MIPSOpcode op) {
202
CONDITIONAL_DISABLE(FPU_COMP);
203
204
int fs = _FS;
205
int ft = _FT;
206
207
switch (op & 0xf) {
208
case 0: //f
209
case 8: //sf
210
gpr.SetImm(MIPS_REG_FPCOND, 0);
211
break;
212
213
case 1: //un
214
case 9: //ngle
215
CompFPComp(fs, ft, CMP_UNORD);
216
break;
217
218
case 2: //eq
219
case 10: //seq
220
CompFPComp(fs, ft, CMP_EQ);
221
break;
222
223
case 3: //ueq
224
case 11: //ngl
225
CompFPComp(fs, ft, CMP_EQ, true);
226
break;
227
228
case 4: //olt
229
case 12: //lt
230
CompFPComp(fs, ft, CMP_LT);
231
break;
232
233
case 5: //ult
234
case 13: //nge
235
CompFPComp(ft, fs, CMP_NLE);
236
break;
237
238
case 6: //ole
239
case 14: //le
240
CompFPComp(fs, ft, CMP_LE);
241
break;
242
243
case 7: //ule
244
case 15: //ngt
245
CompFPComp(ft, fs, CMP_NLT);
246
break;
247
248
default:
249
DISABLE;
250
}
251
}
252
253
void Jit::Comp_FPU2op(MIPSOpcode op) {
254
CONDITIONAL_DISABLE(FPU);
255
256
int fs = _FS;
257
int fd = _FD;
258
259
auto execRounding = [&](void (XEmitter::*conv)(X64Reg, OpArg), int setMXCSR) {
260
fpr.SpillLock(fd, fs);
261
fpr.MapReg(fd, fs == fd, true);
262
263
// Small optimization: 0 is our default mode anyway.
264
if (setMXCSR == 0 && !js.hasSetRounding) {
265
setMXCSR = -1;
266
}
267
if (setMXCSR != -1) {
268
STMXCSR(MIPSSTATE_VAR(mxcsrTemp));
269
MOV(32, R(TEMPREG), MIPSSTATE_VAR(mxcsrTemp));
270
AND(32, R(TEMPREG), Imm32(~(3 << 13)));
271
OR(32, R(TEMPREG), Imm32(setMXCSR << 13));
272
MOV(32, MIPSSTATE_VAR(temp), R(TEMPREG));
273
LDMXCSR(MIPSSTATE_VAR(temp));
274
}
275
276
(this->*conv)(TEMPREG, fpr.R(fs));
277
278
// Did we get an indefinite integer value?
279
CMP(32, R(TEMPREG), Imm32(0x80000000));
280
FixupBranch skip = J_CC(CC_NE);
281
if (fd != fs) {
282
CopyFPReg(fpr.RX(fd), fpr.R(fs));
283
}
284
XORPS(XMM1, R(XMM1));
285
CMPSS(fpr.RX(fd), R(XMM1), CMP_LT);
286
287
// At this point, -inf = 0xffffffff, inf/nan = 0x00000000.
288
// We want -inf to be 0x80000000 inf/nan to be 0x7fffffff, so we flip those bits.
289
MOVD_xmm(R(TEMPREG), fpr.RX(fd));
290
XOR(32, R(TEMPREG), Imm32(0x7fffffff));
291
292
SetJumpTarget(skip);
293
MOVD_xmm(fpr.RX(fd), R(TEMPREG));
294
295
if (setMXCSR != -1) {
296
LDMXCSR(MIPSSTATE_VAR(mxcsrTemp));
297
}
298
};
299
300
switch (op & 0x3f) {
301
case 5: //F(fd) = fabsf(F(fs)); break; //abs
302
fpr.SpillLock(fd, fs);
303
fpr.MapReg(fd, fd == fs, true);
304
MOV(PTRBITS, R(TEMPREG), ImmPtr(&ssNoSignMask[0]));
305
if (fd != fs && fpr.IsMapped(fs)) {
306
MOVAPS(fpr.RX(fd), MatR(TEMPREG));
307
ANDPS(fpr.RX(fd), fpr.R(fs));
308
} else {
309
if (fd != fs) {
310
MOVSS(fpr.RX(fd), fpr.R(fs));
311
}
312
ANDPS(fpr.RX(fd), MatR(TEMPREG));
313
}
314
break;
315
316
case 6: //F(fd) = F(fs); break; //mov
317
if (fd != fs) {
318
fpr.SpillLock(fd, fs);
319
fpr.MapReg(fd, fd == fs, true);
320
CopyFPReg(fpr.RX(fd), fpr.R(fs));
321
}
322
break;
323
324
case 7: //F(fd) = -F(fs); break; //neg
325
fpr.SpillLock(fd, fs);
326
fpr.MapReg(fd, fd == fs, true);
327
MOV(PTRBITS, R(TEMPREG), ImmPtr(&ssSignBits2[0]));
328
if (fd != fs && fpr.IsMapped(fs)) {
329
MOVAPS(fpr.RX(fd), MatR(TEMPREG));
330
XORPS(fpr.RX(fd), fpr.R(fs));
331
} else {
332
if (fd != fs) {
333
MOVSS(fpr.RX(fd), fpr.R(fs));
334
}
335
XORPS(fpr.RX(fd), MatR(TEMPREG));
336
}
337
break;
338
339
case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt
340
fpr.SpillLock(fd, fs);
341
fpr.MapReg(fd, fd == fs, true);
342
SQRTSS(fpr.RX(fd), fpr.R(fs));
343
break;
344
345
case 13: //FsI(fd) = F(fs)>=0 ? (int)floorf(F(fs)) : (int)ceilf(F(fs)); break; //trunc.w.s
346
execRounding(&XEmitter::CVTTSS2SI, -1);
347
break;
348
349
case 32: //F(fd) = (float)FsI(fs); break; //cvt.s.w
350
fpr.SpillLock(fd, fs);
351
fpr.MapReg(fd, fs == fd, true);
352
if (fpr.IsMapped(fs)) {
353
CVTDQ2PS(fpr.RX(fd), fpr.R(fs));
354
} else {
355
// If fs was fd, we'd be in the case above since we mapped fd.
356
MOVSS(fpr.RX(fd), fpr.R(fs));
357
CVTDQ2PS(fpr.RX(fd), fpr.R(fd));
358
}
359
break;
360
361
case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s
362
// Uses the current rounding mode.
363
execRounding(&XEmitter::CVTSS2SI, -1);
364
break;
365
366
case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s
367
execRounding(&XEmitter::CVTSS2SI, 0);
368
break;
369
case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s
370
execRounding(&XEmitter::CVTSS2SI, 2);
371
break;
372
case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s
373
execRounding(&XEmitter::CVTSS2SI, 1);
374
break;
375
default:
376
DISABLE;
377
return;
378
}
379
fpr.ReleaseSpillLocks();
380
}
381
382
void Jit::Comp_mxc1(MIPSOpcode op) {
383
CONDITIONAL_DISABLE(FPU_XFER);
384
385
int fs = _FS;
386
MIPSGPReg rt = _RT;
387
388
switch ((op >> 21) & 0x1f) {
389
case 0: // R(rt) = FI(fs); break; //mfc1
390
if (rt == MIPS_REG_ZERO)
391
return;
392
gpr.MapReg(rt, false, true);
393
// If fs is not mapped, most likely it's being abandoned.
394
// Just load from memory in that case.
395
if (fpr.R(fs).IsSimpleReg()) {
396
MOVD_xmm(gpr.R(rt), fpr.RX(fs));
397
} else {
398
MOV(32, gpr.R(rt), fpr.R(fs));
399
}
400
break;
401
402
case 2: // R(rt) = currentMIPS->ReadFCR(fs); break; //cfc1
403
if (rt == MIPS_REG_ZERO)
404
return;
405
if (fs == 31) {
406
bool wasImm = gpr.IsImm(MIPS_REG_FPCOND);
407
if (!wasImm) {
408
gpr.Lock(rt, MIPS_REG_FPCOND);
409
gpr.MapReg(MIPS_REG_FPCOND, true, false);
410
}
411
gpr.MapReg(rt, false, true);
412
MOV(32, gpr.R(rt), MIPSSTATE_VAR(fcr31));
413
if (wasImm) {
414
if (gpr.GetImm(MIPS_REG_FPCOND) & 1) {
415
OR(32, gpr.R(rt), Imm32(1 << 23));
416
} else {
417
AND(32, gpr.R(rt), Imm32(~(1 << 23)));
418
}
419
} else {
420
AND(32, gpr.R(rt), Imm32(~(1 << 23)));
421
MOV(32, R(TEMPREG), gpr.R(MIPS_REG_FPCOND));
422
AND(32, R(TEMPREG), Imm32(1));
423
SHL(32, R(TEMPREG), Imm8(23));
424
OR(32, gpr.R(rt), R(TEMPREG));
425
}
426
gpr.UnlockAll();
427
} else if (fs == 0) {
428
gpr.SetImm(rt, MIPSState::FCR0_VALUE);
429
} else {
430
Comp_Generic(op);
431
}
432
return;
433
434
case 4: //FI(fs) = R(rt); break; //mtc1
435
fpr.MapReg(fs, false, true);
436
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0) {
437
XORPS(fpr.RX(fs), fpr.R(fs));
438
} else {
439
gpr.KillImmediate(rt, true, false);
440
MOVD_xmm(fpr.RX(fs), gpr.R(rt));
441
}
442
return;
443
444
case 6: //currentMIPS->WriteFCR(fs, R(rt)); break; //ctc1
445
if (fs == 31) {
446
// Must clear before setting, since ApplyRoundingMode() assumes it was cleared.
447
RestoreRoundingMode();
448
if (gpr.IsImm(rt)) {
449
gpr.SetImm(MIPS_REG_FPCOND, (gpr.GetImm(rt) >> 23) & 1);
450
MOV(32, MIPSSTATE_VAR(fcr31), Imm32(gpr.GetImm(rt) & 0x0181FFFF));
451
if ((gpr.GetImm(rt) & 0x1000003) == 0) {
452
// Default nearest / no-flush mode, just leave it cleared.
453
} else {
454
UpdateRoundingMode(gpr.GetImm(rt));
455
ApplyRoundingMode();
456
}
457
} else {
458
gpr.Lock(rt, MIPS_REG_FPCOND);
459
gpr.MapReg(rt, true, false);
460
gpr.MapReg(MIPS_REG_FPCOND, false, true);
461
MOV(32, gpr.R(MIPS_REG_FPCOND), gpr.R(rt));
462
SHR(32, gpr.R(MIPS_REG_FPCOND), Imm8(23));
463
AND(32, gpr.R(MIPS_REG_FPCOND), Imm32(1));
464
MOV(32, MIPSSTATE_VAR(fcr31), gpr.R(rt));
465
AND(32, MIPSSTATE_VAR(fcr31), Imm32(0x0181FFFF));
466
gpr.UnlockAll();
467
UpdateRoundingMode();
468
ApplyRoundingMode();
469
}
470
} else {
471
Comp_Generic(op);
472
}
473
return;
474
}
475
}
476
477
} // namespace MIPSComp
478
479
#endif // PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
480
481