CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/x86/CompALU.cpp
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
20
21
#include <algorithm>
22
23
#include "Common/BitSet.h"
24
#include "Common/CommonTypes.h"
25
#include "Common/CPUDetect.h"
26
#include "Core/MIPS/MIPSAnalyst.h"
27
#include "Core/MIPS/MIPSCodeUtils.h"
28
#include "Core/MIPS/x86/Jit.h"
29
#include "Core/MIPS/x86/RegCache.h"
30
31
using namespace MIPSAnalyst;
32
33
#define _RS MIPS_GET_RS(op)
34
#define _RT MIPS_GET_RT(op)
35
#define _RD MIPS_GET_RD(op)
36
#define _FS MIPS_GET_FS(op)
37
#define _FT MIPS_GET_FT(op)
38
#define _FD MIPS_GET_FD(op)
39
#define _SA MIPS_GET_SA(op)
40
#define _POS ((op>> 6) & 0x1F)
41
#define _SIZE ((op>>11) & 0x1F)
42
#define _IMM16 (signed short)(op & 0xFFFF)
43
#define _IMM26 (op & 0x03FFFFFF)
44
45
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
46
// Currently known non working ones should have DISABLE.
47
48
// #define CONDITIONAL_DISABLE(ignore) { Comp_Generic(op); return; }
49
#define CONDITIONAL_DISABLE(flag) if (jo.Disabled(JitDisable::flag)) { Comp_Generic(op); return; }
50
#define DISABLE { Comp_Generic(op); return; }
51
52
namespace MIPSComp
53
{
54
using namespace Gen;
55
using namespace X64JitConstants;
56
57
static bool HasLowSubregister(OpArg arg) {
58
#if !PPSSPP_ARCH(AMD64)
59
// Can't use ESI or EDI (which we use), no 8-bit versions. Only these.
60
if (!arg.IsSimpleReg(EAX) && !arg.IsSimpleReg(EBX) && !arg.IsSimpleReg(ECX) && !arg.IsSimpleReg(EDX)) {
61
return false;
62
}
63
#endif
64
return arg.IsSimpleReg();
65
}
66
67
void Jit::CompImmLogic(MIPSOpcode op, void (XEmitter::*arith)(int, const OpArg &, const OpArg &))
68
{
69
u32 uimm = (u16)(op & 0xFFFF);
70
MIPSGPReg rt = _RT;
71
MIPSGPReg rs = _RS;
72
gpr.Lock(rt, rs);
73
gpr.MapReg(rt, rt == rs, true);
74
if (rt != rs)
75
MOV(32, gpr.R(rt), gpr.R(rs));
76
(this->*arith)(32, gpr.R(rt), Imm32(uimm));
77
gpr.UnlockAll();
78
}
79
80
void Jit::Comp_IType(MIPSOpcode op)
81
{
82
CONDITIONAL_DISABLE(ALU_IMM);
83
u32 uimm = op & 0xFFFF;
84
s32 simm = SignExtend16ToS32(op);
85
u32 suimm = SignExtend16ToU32(op);
86
87
MIPSGPReg rt = _RT;
88
MIPSGPReg rs = _RS;
89
90
// noop, won't write to ZERO.
91
if (rt == MIPS_REG_ZERO)
92
return;
93
94
switch (op >> 26)
95
{
96
case 8: // same as addiu?
97
case 9: // R(rt) = R(rs) + simm; break; //addiu
98
{
99
if (gpr.IsImm(rs)) {
100
gpr.SetImm(rt, gpr.GetImm(rs) + simm);
101
break;
102
}
103
104
gpr.Lock(rt, rs);
105
gpr.MapReg(rt, rt == rs, true);
106
if (rt == rs) {
107
if (simm > 0) {
108
ADD(32, gpr.R(rt), UImmAuto(simm));
109
} else if (simm < 0) {
110
SUB(32, gpr.R(rt), UImmAuto(-simm));
111
}
112
} else if (gpr.R(rs).IsSimpleReg()) {
113
LEA(32, gpr.RX(rt), MDisp(gpr.RX(rs), simm));
114
} else {
115
MOV(32, gpr.R(rt), gpr.R(rs));
116
if (simm > 0)
117
ADD(32, gpr.R(rt), UImmAuto(simm));
118
else if (simm < 0) {
119
SUB(32, gpr.R(rt), UImmAuto(-simm));
120
}
121
}
122
gpr.UnlockAll();
123
}
124
break;
125
126
case 10: // R(rt) = (s32)R(rs) < simm; break; //slti
127
if (gpr.IsImm(rs)) {
128
gpr.SetImm(rt, (s32)gpr.GetImm(rs) < simm);
129
} else {
130
gpr.Lock(rt, rs);
131
// This is often used before a branch. If rs is not already mapped, let's leave it.
132
gpr.MapReg(rt, rt == rs, true);
133
134
bool needsTemp = !HasLowSubregister(gpr.R(rt)) || rt == rs;
135
if (needsTemp) {
136
CMP(32, gpr.R(rs), Imm32(suimm));
137
SETcc(CC_L, R(TEMPREG));
138
MOVZX(32, 8, gpr.RX(rt), R(TEMPREG));
139
} else {
140
XOR(32, gpr.R(rt), gpr.R(rt));
141
CMP(32, gpr.R(rs), Imm32(suimm));
142
SETcc(CC_L, gpr.R(rt));
143
}
144
gpr.UnlockAll();
145
}
146
break;
147
148
case 11: // R(rt) = R(rs) < uimm; break; //sltiu
149
if (gpr.IsImm(rs)) {
150
gpr.SetImm(rt, gpr.GetImm(rs) < suimm);
151
} else {
152
gpr.Lock(rt, rs);
153
// This is often used before a branch. If rs is not already mapped, let's leave it.
154
gpr.MapReg(rt, rt == rs, true);
155
156
bool needsTemp = !HasLowSubregister(gpr.R(rt)) || rt == rs;
157
if (needsTemp) {
158
CMP(32, gpr.R(rs), Imm32(suimm));
159
SETcc(CC_B, R(TEMPREG));
160
MOVZX(32, 8, gpr.RX(rt), R(TEMPREG));
161
} else {
162
XOR(32, gpr.R(rt), gpr.R(rt));
163
CMP(32, gpr.R(rs), Imm32(suimm));
164
SETcc(CC_B, gpr.R(rt));
165
}
166
gpr.UnlockAll();
167
}
168
break;
169
170
case 12: // R(rt) = R(rs) & uimm; break; //andi
171
if (uimm == 0)
172
gpr.SetImm(rt, 0);
173
else if (gpr.IsImm(rs))
174
gpr.SetImm(rt, gpr.GetImm(rs) & uimm);
175
else
176
CompImmLogic(op, &XEmitter::AND);
177
break;
178
179
case 13: // R(rt) = R(rs) | uimm; break; //ori
180
if (gpr.IsImm(rs))
181
gpr.SetImm(rt, gpr.GetImm(rs) | uimm);
182
else
183
CompImmLogic(op, &XEmitter::OR);
184
break;
185
186
case 14: // R(rt) = R(rs) ^ uimm; break; //xori
187
if (gpr.IsImm(rs))
188
gpr.SetImm(rt, gpr.GetImm(rs) ^ uimm);
189
else
190
CompImmLogic(op, &XEmitter::XOR);
191
break;
192
193
case 15: //R(rt) = uimm << 16; break; //lui
194
gpr.SetImm(rt, uimm << 16);
195
break;
196
197
default:
198
Comp_Generic(op);
199
break;
200
}
201
}
202
203
void Jit::Comp_RType2(MIPSOpcode op)
204
{
205
CONDITIONAL_DISABLE(ALU_BIT);
206
MIPSGPReg rs = _RS;
207
MIPSGPReg rd = _RD;
208
209
// Don't change $zr.
210
if (rd == MIPS_REG_ZERO)
211
return;
212
213
switch (op & 63)
214
{
215
case 22: //clz
216
if (gpr.IsImm(rs))
217
{
218
u32 value = gpr.GetImm(rs);
219
int x = 31;
220
int count = 0;
221
while (x >= 0 && !(value & (1 << x)))
222
{
223
count++;
224
x--;
225
}
226
gpr.SetImm(rd, count);
227
}
228
else
229
{
230
gpr.Lock(rd, rs);
231
gpr.MapReg(rd, rd == rs, true);
232
BSR(32, TEMPREG, gpr.R(rs));
233
FixupBranch notFound = J_CC(CC_Z);
234
235
MOV(32, gpr.R(rd), Imm32(31));
236
SUB(32, gpr.R(rd), R(TEMPREG));
237
FixupBranch skip = J();
238
239
SetJumpTarget(notFound);
240
MOV(32, gpr.R(rd), Imm32(32));
241
242
SetJumpTarget(skip);
243
gpr.UnlockAll();
244
}
245
break;
246
case 23: //clo
247
if (gpr.IsImm(rs))
248
{
249
u32 value = gpr.GetImm(rs);
250
int x = 31;
251
int count = 0;
252
while (x >= 0 && (value & (1 << x)))
253
{
254
count++;
255
x--;
256
}
257
gpr.SetImm(rd, count);
258
}
259
else
260
{
261
gpr.Lock(rd, rs);
262
gpr.MapReg(rd, rd == rs, true);
263
MOV(32, R(TEMPREG), gpr.R(rs));
264
NOT(32, R(TEMPREG));
265
BSR(32, TEMPREG, R(TEMPREG));
266
FixupBranch notFound = J_CC(CC_Z);
267
268
MOV(32, gpr.R(rd), Imm32(31));
269
SUB(32, gpr.R(rd), R(TEMPREG));
270
FixupBranch skip = J();
271
272
SetJumpTarget(notFound);
273
MOV(32, gpr.R(rd), Imm32(32));
274
275
SetJumpTarget(skip);
276
gpr.UnlockAll();
277
}
278
break;
279
default:
280
DISABLE;
281
}
282
}
283
284
static u32 RType3_ImmAdd(const u32 a, const u32 b)
285
{
286
return a + b;
287
}
288
289
static u32 RType3_ImmSub(const u32 a, const u32 b)
290
{
291
return a - b;
292
}
293
294
static u32 RType3_ImmAnd(const u32 a, const u32 b)
295
{
296
return a & b;
297
}
298
299
static u32 RType3_ImmOr(const u32 a, const u32 b)
300
{
301
return a | b;
302
}
303
304
static u32 RType3_ImmXor(const u32 a, const u32 b)
305
{
306
return a ^ b;
307
}
308
309
//rd = rs X rt
310
void Jit::CompTriArith(MIPSOpcode op, void (XEmitter::*arith)(int, const OpArg &, const OpArg &), u32 (*doImm)(const u32, const u32), bool invertResult)
311
{
312
MIPSGPReg rt = _RT;
313
MIPSGPReg rs = _RS;
314
MIPSGPReg rd = _RD;
315
316
// Both sides known, we can just evaporate the instruction.
317
if (doImm && gpr.IsImm(rs) && gpr.IsImm(rt)) {
318
u32 value = doImm(gpr.GetImm(rs), gpr.GetImm(rt));
319
gpr.SetImm(rd, invertResult ? (~value) : value);
320
return;
321
}
322
323
// Act like zero was used if the operand is equivalent. This happens.
324
if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0)
325
rs = MIPS_REG_ZERO;
326
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
327
rt = MIPS_REG_ZERO;
328
329
// Special cases that translate nicely
330
if (doImm == &RType3_ImmSub && rs == MIPS_REG_ZERO && rt == rd) {
331
gpr.MapReg(rd, true, true);
332
NEG(32, gpr.R(rd));
333
if (invertResult) {
334
NOT(32, gpr.R(rd));
335
}
336
return;
337
}
338
339
gpr.Lock(rt, rs, rd);
340
// Optimize out operations against 0... and is the only one that isn't a MOV.
341
if (rt == MIPS_REG_ZERO || (rs == MIPS_REG_ZERO && doImm != &RType3_ImmSub)) {
342
if (doImm == &RType3_ImmAnd) {
343
gpr.SetImm(rd, invertResult ? 0xFFFFFFFF : 0);
344
} else {
345
MIPSGPReg rsource = (rt == MIPS_REG_ZERO) ? rs : rt;
346
if (rsource != rd) {
347
gpr.MapReg(rd, false, true);
348
MOV(32, gpr.R(rd), gpr.R(rsource));
349
if (invertResult) {
350
NOT(32, gpr.R(rd));
351
}
352
} else if (invertResult) {
353
// rsource == rd, but still need to invert.
354
gpr.MapReg(rd, true, true);
355
NOT(32, gpr.R(rd));
356
}
357
}
358
} else if (gpr.IsImm(rt)) {
359
// No temporary needed.
360
u32 rtval = gpr.GetImm(rt);
361
gpr.MapReg(rd, rs == rd, true);
362
if (rs != rd) {
363
MOV(32, gpr.R(rd), gpr.R(rs));
364
}
365
(this->*arith)(32, gpr.R(rd), Imm32(rtval));
366
if (invertResult) {
367
NOT(32, gpr.R(rd));
368
}
369
} else {
370
// Use TEMPREG as a temporary if we'd overwrite it.
371
if (rd == rt)
372
MOV(32, R(TEMPREG), gpr.R(rt));
373
gpr.MapReg(rd, rs == rd, true);
374
if (rs != rd)
375
MOV(32, gpr.R(rd), gpr.R(rs));
376
(this->*arith)(32, gpr.R(rd), rd == rt ? R(TEMPREG) : gpr.R(rt));
377
if (invertResult) {
378
NOT(32, gpr.R(rd));
379
}
380
}
381
gpr.UnlockAll();
382
}
383
384
void Jit::Comp_RType3(MIPSOpcode op)
385
{
386
CONDITIONAL_DISABLE(ALU);
387
388
MIPSGPReg rt = _RT;
389
MIPSGPReg rs = _RS;
390
MIPSGPReg rd = _RD;
391
392
// noop, won't write to ZERO.
393
if (rd == MIPS_REG_ZERO)
394
return;
395
396
switch (op & 63)
397
{
398
case 10: //if (R(rt) == 0) R(rd) = R(rs); break; //movz
399
if (rd == rs)
400
break;
401
gpr.Lock(rt, rs, rd);
402
if (!gpr.IsImm(rt))
403
{
404
gpr.KillImmediate(rs, true, false);
405
// Need to load rd in case the condition fails.
406
gpr.MapReg(rd, true, true);
407
CMP(32, gpr.R(rt), Imm32(0));
408
CMOVcc(32, gpr.RX(rd), gpr.R(rs), CC_E);
409
}
410
else if (gpr.GetImm(rt) == 0)
411
{
412
if (gpr.IsImm(rs))
413
gpr.SetImm(rd, gpr.GetImm(rs));
414
else if (rd != rs)
415
{
416
gpr.MapReg(rd, false, true);
417
MOV(32, gpr.R(rd), gpr.R(rs));
418
}
419
}
420
gpr.UnlockAll();
421
break;
422
423
case 11: //if (R(rt) != 0) R(rd) = R(rs); break; //movn
424
if (rd == rs)
425
break;
426
gpr.Lock(rt, rs, rd);
427
if (!gpr.IsImm(rt))
428
{
429
gpr.KillImmediate(rs, true, false);
430
// Need to load rd in case the condition fails.
431
gpr.MapReg(rd, true, true);
432
CMP(32, gpr.R(rt), Imm32(0));
433
CMOVcc(32, gpr.RX(rd), gpr.R(rs), CC_NE);
434
}
435
else if (gpr.GetImm(rt) != 0)
436
{
437
if (gpr.IsImm(rs))
438
gpr.SetImm(rd, gpr.GetImm(rs));
439
else if (rd != rs)
440
{
441
gpr.MapReg(rd, false, true);
442
MOV(32, gpr.R(rd), gpr.R(rs));
443
}
444
}
445
gpr.UnlockAll();
446
break;
447
448
case 32: //R(rd) = R(rs) + R(rt); break; //add
449
case 33: //R(rd) = R(rs) + R(rt); break; //addu
450
if (rd != rs && rd != rt && gpr.R(rs).IsSimpleReg() && gpr.R(rt).IsSimpleReg()) {
451
gpr.Lock(rt, rs, rd);
452
gpr.MapReg(rd, false, true);
453
LEA(32, gpr.RX(rd), MRegSum(gpr.RX(rs), gpr.RX(rt)));
454
gpr.UnlockAll();
455
} else {
456
CompTriArith(op, &XEmitter::ADD, &RType3_ImmAdd);
457
}
458
break;
459
case 34: //R(rd) = R(rs) - R(rt); break; //sub
460
case 35: //R(rd) = R(rs) - R(rt); break; //subu
461
CompTriArith(op, &XEmitter::SUB, &RType3_ImmSub);
462
break;
463
case 36: //R(rd) = R(rs) & R(rt); break; //and
464
CompTriArith(op, &XEmitter::AND, &RType3_ImmAnd);
465
break;
466
case 37: //R(rd) = R(rs) | R(rt); break; //or
467
CompTriArith(op, &XEmitter::OR, &RType3_ImmOr);
468
break;
469
case 38: //R(rd) = R(rs) ^ R(rt); break; //xor
470
CompTriArith(op, &XEmitter::XOR, &RType3_ImmXor);
471
break;
472
473
case 39: // R(rd) = ~(R(rs) | R(rt)); //nor
474
CompTriArith(op, &XEmitter::OR, &RType3_ImmOr, true);
475
break;
476
477
case 42: //R(rd) = (int)R(rs) < (int)R(rt); break; //slt
478
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
479
gpr.SetImm(rd, (s32)gpr.GetImm(rs) < (s32)gpr.GetImm(rt));
480
} else if (rs == rt) {
481
gpr.SetImm(rd, 0);
482
} else {
483
gpr.Lock(rd, rs, rt);
484
gpr.MapReg(rd, rd == rt || rd == rs, true);
485
486
// Let's try to avoid loading rs or if it's an imm, flushing it.
487
MIPSGPReg lhs = rs;
488
MIPSGPReg rhs = rt;
489
CCFlags cc = CC_L;
490
if (gpr.IsImm(lhs)) {
491
// rhs is guaranteed not to be an imm (handled above.)
492
std::swap(lhs, rhs);
493
cc = SwapCCFlag(cc);
494
} else if (!gpr.R(lhs).CanDoOpWith(gpr.R(rhs))) {
495
// Let's try to pick which makes more sense to load.
496
if (MIPSAnalyst::IsRegisterUsed(rhs, GetCompilerPC() + 4, 3)) {
497
std::swap(lhs, rhs);
498
cc = SwapCCFlag(cc);
499
}
500
gpr.MapReg(lhs, true, false);
501
}
502
503
bool needsTemp = !HasLowSubregister(gpr.R(rd)) || rd == rt || rd == rs;
504
if (needsTemp) {
505
CMP(32, gpr.R(lhs), gpr.R(rhs));
506
SETcc(cc, R(TEMPREG));
507
MOVZX(32, 8, gpr.RX(rd), R(TEMPREG));
508
} else {
509
XOR(32, gpr.R(rd), gpr.R(rd));
510
CMP(32, gpr.R(lhs), gpr.R(rhs));
511
SETcc(cc, gpr.R(rd));
512
}
513
gpr.UnlockAll();
514
}
515
break;
516
517
case 43: //R(rd) = R(rs) < R(rt); break; //sltu
518
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
519
gpr.SetImm(rd, gpr.GetImm(rs) < gpr.GetImm(rt));
520
} else if (rs == rt) {
521
gpr.SetImm(rd, 0);
522
} else {
523
gpr.Lock(rd, rs, rt);
524
gpr.MapReg(rd, rd == rt || rd == rs, true);
525
526
// Let's try to avoid loading rs or if it's an imm, flushing it.
527
MIPSGPReg lhs = rs;
528
MIPSGPReg rhs = rt;
529
CCFlags cc = CC_B;
530
if (gpr.IsImm(lhs)) {
531
// rhs is guaranteed not to be an imm (handled above.)
532
std::swap(lhs, rhs);
533
cc = SwapCCFlag(cc);
534
} else if (!gpr.R(lhs).CanDoOpWith(gpr.R(rhs))) {
535
// Let's try to pick which makes more sense to load.
536
if (MIPSAnalyst::IsRegisterUsed(rhs, GetCompilerPC() + 4, 3)) {
537
std::swap(lhs, rhs);
538
cc = SwapCCFlag(cc);
539
}
540
gpr.MapReg(lhs, true, false);
541
}
542
543
bool needsTemp = !HasLowSubregister(gpr.R(rd)) || rd == rt || rd == rs;
544
if (needsTemp) {
545
CMP(32, gpr.R(lhs), gpr.R(rhs));
546
SETcc(cc, R(TEMPREG));
547
MOVZX(32, 8, gpr.RX(rd), R(TEMPREG));
548
} else {
549
XOR(32, gpr.R(rd), gpr.R(rd));
550
CMP(32, gpr.R(lhs), gpr.R(rhs));
551
SETcc(cc, gpr.R(rd));
552
}
553
gpr.UnlockAll();
554
}
555
break;
556
557
case 44: //R(rd) = (R(rs) > R(rt)) ? R(rs) : R(rt); break; //max
558
if (gpr.IsImm(rs) && gpr.IsImm(rt))
559
gpr.SetImm(rd, std::max((s32)gpr.GetImm(rs), (s32)gpr.GetImm(rt)));
560
else
561
{
562
MIPSGPReg rsrc = rd == rt ? rs : rt;
563
gpr.Lock(rd, rs, rt);
564
gpr.KillImmediate(rsrc, true, false);
565
gpr.MapReg(rd, rd == rs || rd == rt, true);
566
if (rd != rt && rd != rs)
567
MOV(32, gpr.R(rd), gpr.R(rs));
568
CMP(32, gpr.R(rd), gpr.R(rsrc));
569
CMOVcc(32, gpr.RX(rd), gpr.R(rsrc), CC_L);
570
gpr.UnlockAll();
571
}
572
break;
573
574
case 45: //R(rd) = (R(rs) < R(rt)) ? R(rs) : R(rt); break; //min
575
if (gpr.IsImm(rs) && gpr.IsImm(rt))
576
gpr.SetImm(rd, std::min((s32)gpr.GetImm(rs), (s32)gpr.GetImm(rt)));
577
else
578
{
579
MIPSGPReg rsrc = rd == rt ? rs : rt;
580
gpr.Lock(rd, rs, rt);
581
gpr.KillImmediate(rsrc, true, false);
582
gpr.MapReg(rd, rd == rs || rd == rt, true);
583
if (rd != rt && rd != rs)
584
MOV(32, gpr.R(rd), gpr.R(rs));
585
CMP(32, gpr.R(rd), gpr.R(rsrc));
586
CMOVcc(32, gpr.RX(rd), gpr.R(rsrc), CC_G);
587
gpr.UnlockAll();
588
}
589
break;
590
591
default:
592
Comp_Generic(op);
593
break;
594
}
595
}
596
597
static u32 ShiftType_ImmLogicalLeft(const u32 a, const u32 b)
598
{
599
return a << (b & 0x1f);
600
}
601
602
static u32 ShiftType_ImmLogicalRight(const u32 a, const u32 b)
603
{
604
return a >> (b & 0x1f);
605
}
606
607
static u32 ShiftType_ImmArithRight(const u32 a, const u32 b)
608
{
609
return ((s32) a) >> (b & 0x1f);
610
}
611
612
static u32 ShiftType_ImmRotateRight(const u32 a, const u32 b)
613
{
614
const s8 sa = b & 0x1f;
615
return (a >> sa) | (a << (32 - sa));
616
}
617
618
void Jit::CompShiftImm(MIPSOpcode op, void (XEmitter::*shift)(int, OpArg, OpArg), u32 (*doImm)(const u32, const u32))
619
{
620
MIPSGPReg rd = _RD;
621
MIPSGPReg rt = _RT;
622
int sa = _SA;
623
624
if (doImm && gpr.IsImm(rt))
625
{
626
gpr.SetImm(rd, doImm(gpr.GetImm(rt), sa));
627
return;
628
}
629
630
gpr.Lock(rd, rt);
631
gpr.MapReg(rd, rd == rt, true);
632
if (rd != rt)
633
MOV(32, gpr.R(rd), gpr.R(rt));
634
(this->*shift)(32, gpr.R(rd), Imm8(sa));
635
gpr.UnlockAll();
636
}
637
638
// "over-shifts" work the same as on x86 - only bottom 5 bits are used to get the shift value
639
void Jit::CompShiftVar(MIPSOpcode op, void (XEmitter::*shift)(int, OpArg, OpArg), u32 (*doImm)(const u32, const u32))
640
{
641
MIPSGPReg rd = _RD;
642
MIPSGPReg rt = _RT;
643
MIPSGPReg rs = _RS;
644
645
if (doImm && gpr.IsImm(rs) && gpr.IsImm(rt))
646
{
647
gpr.SetImm(rd, doImm(gpr.GetImm(rt), gpr.GetImm(rs)));
648
return;
649
}
650
651
gpr.Lock(rd, rt, rs);
652
if (gpr.IsImm(rs)) {
653
int sa = gpr.GetImm(rs);
654
gpr.MapReg(rd, rd == rt, true);
655
if (cpu_info.bBMI2 && shift == &XEmitter::ROR) {
656
_assert_(!gpr.IsImm(rt));
657
RORX(32, gpr.RX(rd), gpr.R(rt), sa & 0x1F);
658
} else {
659
if (rd != rt)
660
MOV(32, gpr.R(rd), gpr.R(rt));
661
(this->*shift)(32, gpr.R(rd), Imm8(sa & 0x1F));
662
}
663
} else if (cpu_info.bBMI2 && shift != &XEmitter::ROR) {
664
gpr.MapReg(rd, rd == rt || rd == rs, true);
665
gpr.MapReg(rs, true, false);
666
MIPSGPReg src = rt;
667
if (gpr.IsImm(rt) && rd == rs) {
668
gpr.MapReg(rt, true, false);
669
} else if (gpr.IsImm(rt)) {
670
MOV(32, gpr.R(rd), gpr.R(rt));
671
src = rd;
672
}
673
if (shift == &XEmitter::SHL)
674
SHLX(32, gpr.RX(rd), gpr.R(src), gpr.RX(rs));
675
else if (shift == &XEmitter::SHR)
676
SHRX(32, gpr.RX(rd), gpr.R(src), gpr.RX(rs));
677
else if (shift == &XEmitter::SAR)
678
SARX(32, gpr.RX(rd), gpr.R(src), gpr.RX(rs));
679
else
680
_assert_msg_(false, "Unexpected shift type");
681
} else {
682
gpr.FlushLockX(ECX);
683
gpr.MapReg(rd, rd == rt || rd == rs, true);
684
MOV(32, R(ECX), gpr.R(rs)); // Only ECX can be used for variable shifts.
685
AND(32, R(ECX), Imm32(0x1f));
686
if (rd != rt)
687
MOV(32, gpr.R(rd), gpr.R(rt));
688
(this->*shift)(32, gpr.R(rd), R(ECX));
689
gpr.UnlockAllX();
690
}
691
gpr.UnlockAll();
692
}
693
694
void Jit::Comp_ShiftType(MIPSOpcode op)
695
{
696
CONDITIONAL_DISABLE(ALU);
697
int rs = (op>>21) & 0x1F;
698
MIPSGPReg rd = _RD;
699
int fd = (op>>6) & 0x1F;
700
701
// noop, won't write to ZERO.
702
if (rd == MIPS_REG_ZERO)
703
return;
704
705
// WARNING : ROTR
706
switch (op & 0x3f)
707
{
708
case 0: CompShiftImm(op, &XEmitter::SHL, &ShiftType_ImmLogicalLeft); break;
709
case 2: CompShiftImm(op, rs == 1 ? &XEmitter::ROR : &XEmitter::SHR, rs == 1 ? &ShiftType_ImmRotateRight : &ShiftType_ImmLogicalRight); break; // srl, rotr
710
case 3: CompShiftImm(op, &XEmitter::SAR, &ShiftType_ImmArithRight); break; // sra
711
712
case 4: CompShiftVar(op, &XEmitter::SHL, &ShiftType_ImmLogicalLeft); break; //sllv
713
case 6: CompShiftVar(op, fd == 1 ? &XEmitter::ROR : &XEmitter::SHR, fd == 1 ? &ShiftType_ImmRotateRight : &ShiftType_ImmLogicalRight); break; //srlv
714
case 7: CompShiftVar(op, &XEmitter::SAR, &ShiftType_ImmArithRight); break; //srav
715
716
default:
717
Comp_Generic(op);
718
break;
719
}
720
}
721
722
void Jit::Comp_Special3(MIPSOpcode op)
723
{
724
CONDITIONAL_DISABLE(ALU_BIT);
725
MIPSGPReg rs = _RS;
726
MIPSGPReg rt = _RT;
727
728
int pos = _POS;
729
int size = _SIZE + 1;
730
u32 mask = 0xFFFFFFFFUL >> (32 - size);
731
732
// Don't change $zr.
733
if (rt == MIPS_REG_ZERO)
734
return;
735
736
switch (op & 0x3f)
737
{
738
case 0x0: //ext
739
if (gpr.IsImm(rs))
740
{
741
gpr.SetImm(rt, (gpr.GetImm(rs) >> pos) & mask);
742
return;
743
}
744
745
gpr.Lock(rs, rt);
746
gpr.MapReg(rt, rs == rt, true);
747
if (rs != rt)
748
MOV(32, gpr.R(rt), gpr.R(rs));
749
if (pos != 0) {
750
SHR(32, gpr.R(rt), Imm8(pos));
751
}
752
// Might not need to AND if we used a wall anyway.
753
if ((0xFFFFFFFF >> pos) != mask) {
754
AND(32, gpr.R(rt), Imm32(mask));
755
}
756
gpr.UnlockAll();
757
break;
758
759
case 0x4: //ins
760
{
761
u32 sourcemask = mask >> pos;
762
u32 destmask = ~(sourcemask << pos);
763
if (gpr.IsImm(rs))
764
{
765
u32 inserted = (gpr.GetImm(rs) & sourcemask) << pos;
766
if (gpr.IsImm(rt))
767
{
768
gpr.SetImm(rt, (gpr.GetImm(rt) & destmask) | inserted);
769
return;
770
}
771
772
gpr.Lock(rs, rt);
773
gpr.MapReg(rt, true, true);
774
AND(32, gpr.R(rt), Imm32(destmask));
775
if (inserted != 0)
776
OR(32, gpr.R(rt), Imm32(inserted));
777
gpr.UnlockAll();
778
}
779
else if (gpr.IsImm(rt))
780
{
781
// This happens. We can skip the AND and a load.
782
gpr.Lock(rs, rt);
783
u32 rtImm = gpr.GetImm(rt) & destmask;
784
gpr.MapReg(rt, false, true);
785
MOV(32, gpr.R(rt), gpr.R(rs));
786
AND(32, gpr.R(rt), Imm32(sourcemask));
787
if (pos != 0) {
788
SHL(32, gpr.R(rt), Imm8(pos));
789
}
790
OR(32, gpr.R(rt), Imm32(rtImm));
791
gpr.UnlockAll();
792
}
793
else
794
{
795
gpr.Lock(rs, rt);
796
gpr.MapReg(rt, true, true);
797
MOV(32, R(TEMPREG), gpr.R(rs));
798
AND(32, R(TEMPREG), Imm32(sourcemask));
799
if (pos != 0) {
800
SHL(32, R(TEMPREG), Imm8(pos));
801
}
802
AND(32, gpr.R(rt), Imm32(destmask));
803
OR(32, gpr.R(rt), R(TEMPREG));
804
gpr.UnlockAll();
805
}
806
}
807
break;
808
}
809
}
810
811
812
void Jit::Comp_Allegrex(MIPSOpcode op)
813
{
814
CONDITIONAL_DISABLE(ALU_BIT);
815
MIPSGPReg rt = _RT;
816
MIPSGPReg rd = _RD;
817
// Don't change $zr.
818
if (rd == MIPS_REG_ZERO)
819
return;
820
821
switch ((op >> 6) & 31)
822
{
823
case 16: // seb // R(rd) = SignExtend8ToU32(R(rt));
824
if (gpr.IsImm(rt))
825
{
826
gpr.SetImm(rd, SignExtend8ToU32(gpr.GetImm(rt)));
827
break;
828
}
829
830
gpr.Lock(rd, rt);
831
gpr.MapReg(rd, rd == rt, true);
832
// Work around the byte-register addressing problem.
833
if (gpr.R(rt).IsSimpleReg() && !HasLowSubregister(gpr.R(rt)))
834
{
835
MOV(32, R(TEMPREG), gpr.R(rt));
836
MOVSX(32, 8, gpr.RX(rd), R(TEMPREG));
837
}
838
else
839
{
840
gpr.KillImmediate(rt, true, false);
841
MOVSX(32, 8, gpr.RX(rd), gpr.R(rt));
842
}
843
gpr.UnlockAll();
844
break;
845
846
case 20: //bitrev
847
if (gpr.IsImm(rt))
848
{
849
// http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
850
u32 v = gpr.GetImm(rt);
851
// swap odd and even bits
852
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
853
// swap consecutive pairs
854
v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
855
// swap nibbles ...
856
v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
857
// swap bytes
858
v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
859
// swap 2-byte long pairs
860
v = ( v >> 16 ) | ( v << 16);
861
gpr.SetImm(rd, v);
862
break;
863
}
864
865
gpr.Lock(rd, rt);
866
gpr.MapReg(rd, rd == rt, true);
867
if (rd != rt)
868
MOV(32, gpr.R(rd), gpr.R(rt));
869
870
LEA(32, TEMPREG, MScaled(gpr.RX(rd), 2, 0));
871
SHR(32, gpr.R(rd), Imm8(1));
872
XOR(32, gpr.R(rd), R(TEMPREG));
873
AND(32, gpr.R(rd), Imm32(0x55555555));
874
XOR(32, gpr.R(rd), R(TEMPREG));
875
876
LEA(32, TEMPREG, MScaled(gpr.RX(rd), 4, 0));
877
SHR(32, gpr.R(rd), Imm8(2));
878
XOR(32, gpr.R(rd), R(TEMPREG));
879
AND(32, gpr.R(rd), Imm32(0x33333333));
880
XOR(32, gpr.R(rd), R(TEMPREG));
881
882
MOV(32, R(TEMPREG), gpr.R(rd));
883
SHL(32, R(TEMPREG), Imm8(4));
884
SHR(32, gpr.R(rd), Imm8(4));
885
XOR(32, gpr.R(rd), R(TEMPREG));
886
AND(32, gpr.R(rd), Imm32(0x0F0F0F0F));
887
XOR(32, gpr.R(rd), R(TEMPREG));
888
889
MOV(32, R(TEMPREG), gpr.R(rd));
890
SHL(32, R(TEMPREG), Imm8(8));
891
SHR(32, gpr.R(rd), Imm8(8));
892
XOR(32, gpr.R(rd), R(TEMPREG));
893
AND(32, gpr.R(rd), Imm32(0x00FF00FF));
894
XOR(32, gpr.R(rd), R(TEMPREG));
895
896
ROL(32, gpr.R(rd), Imm8(16));
897
898
gpr.UnlockAll();
899
break;
900
901
case 24: // seh // R(rd) = SignExtend16ToU32(R(rt));
902
if (gpr.IsImm(rt))
903
{
904
gpr.SetImm(rd, SignExtend16ToU32(gpr.GetImm(rt)));
905
break;
906
}
907
908
gpr.Lock(rd, rt);
909
gpr.MapReg(rd, rd == rt, true);
910
MOVSX(32, 16, gpr.RX(rd), gpr.R(rt));
911
gpr.UnlockAll();
912
break;
913
914
default:
915
Comp_Generic(op);
916
return;
917
}
918
}
919
920
void Jit::Comp_Allegrex2(MIPSOpcode op)
921
{
922
CONDITIONAL_DISABLE(ALU_BIT);
923
MIPSGPReg rt = _RT;
924
MIPSGPReg rd = _RD;
925
// Don't change $zr.
926
if (rd == MIPS_REG_ZERO)
927
return;
928
929
switch (op & 0x3ff)
930
{
931
case 0xA0: //wsbh
932
if (gpr.IsImm(rt)) {
933
u32 rtImm = gpr.GetImm(rt);
934
gpr.SetImm(rd, ((rtImm & 0xFF00FF00) >> 8) | ((rtImm & 0x00FF00FF) << 8));
935
break;
936
}
937
gpr.Lock(rd, rt);
938
gpr.MapReg(rd, rd == rt, true);
939
if (rd != rt)
940
MOV(32, gpr.R(rd), gpr.R(rt));
941
// Swap both 16-bit halfwords by rotating afterward.
942
BSWAP(32, gpr.RX(rd));
943
ROR(32, gpr.R(rd), Imm8(16));
944
gpr.UnlockAll();
945
break;
946
case 0xE0: //wsbw
947
if (gpr.IsImm(rt)) {
948
gpr.SetImm(rd, swap32(gpr.GetImm(rt)));
949
break;
950
}
951
gpr.Lock(rd, rt);
952
gpr.MapReg(rd, rd == rt, true);
953
if (rd != rt)
954
MOV(32, gpr.R(rd), gpr.R(rt));
955
BSWAP(32, gpr.RX(rd));
956
gpr.UnlockAll();
957
break;
958
default:
959
Comp_Generic(op);
960
break;
961
}
962
}
963
964
void Jit::Comp_MulDivType(MIPSOpcode op)
965
{
966
CONDITIONAL_DISABLE(MULDIV);
967
MIPSGPReg rt = _RT;
968
MIPSGPReg rs = _RS;
969
MIPSGPReg rd = _RD;
970
971
switch (op & 63)
972
{
973
case 16: // R(rd) = HI; //mfhi
974
if (rd != MIPS_REG_ZERO) {
975
gpr.MapReg(rd, false, true);
976
MOV(32, gpr.R(rd), gpr.R(MIPS_REG_HI));
977
}
978
break;
979
980
case 17: // HI = R(rs); //mthi
981
gpr.KillImmediate(MIPS_REG_HI, false, true);
982
gpr.MapReg(rs, true, false);
983
MOV(32, gpr.R(MIPS_REG_HI), gpr.R(rs));
984
break;
985
986
case 18: // R(rd) = LO; break; //mflo
987
if (rd != MIPS_REG_ZERO) {
988
gpr.MapReg(rd, false, true);
989
MOV(32, gpr.R(rd), gpr.R(MIPS_REG_LO));
990
}
991
break;
992
993
case 19: // LO = R(rs); break; //mtlo
994
gpr.KillImmediate(MIPS_REG_LO, false, true);
995
gpr.MapReg(rs, true, false);
996
MOV(32, gpr.R(MIPS_REG_LO), gpr.R(rs));
997
break;
998
999
case 24: //mult (the most popular one). lo,hi = signed mul (rs * rt)
1000
gpr.FlushLockX(EDX);
1001
gpr.KillImmediate(MIPS_REG_HI, false, true);
1002
gpr.KillImmediate(MIPS_REG_LO, false, true);
1003
gpr.KillImmediate(rt, true, false);
1004
// Mul, this must be EAX!
1005
MOV(32, R(EAX), gpr.R(rs));
1006
IMUL(32, gpr.R(rt));
1007
MOV(32, gpr.R(MIPS_REG_HI), R(EDX));
1008
MOV(32, gpr.R(MIPS_REG_LO), R(EAX));
1009
gpr.UnlockAllX();
1010
break;
1011
1012
1013
case 25: //multu (2nd) lo,hi = unsigned mul (rs * rt)
1014
gpr.FlushLockX(EDX);
1015
gpr.KillImmediate(MIPS_REG_HI, false, true);
1016
gpr.KillImmediate(MIPS_REG_LO, false, true);
1017
gpr.KillImmediate(rt, true, false);
1018
MOV(32, R(EAX), gpr.R(rs));
1019
MUL(32, gpr.R(rt));
1020
MOV(32, gpr.R(MIPS_REG_HI), R(EDX));
1021
MOV(32, gpr.R(MIPS_REG_LO), R(EAX));
1022
gpr.UnlockAllX();
1023
break;
1024
1025
case 26: //div
1026
{
1027
gpr.FlushLockX(EDX);
1028
gpr.KillImmediate(MIPS_REG_HI, false, true);
1029
gpr.KillImmediate(MIPS_REG_LO, false, true);
1030
// For CMP.
1031
gpr.KillImmediate(rs, true, false);
1032
gpr.KillImmediate(rt, true, false);
1033
1034
MOV(32, R(EAX), gpr.R(rs));
1035
1036
CMP(32, gpr.R(rt), Imm32(0));
1037
FixupBranch divZero = J_CC(CC_E);
1038
1039
// INT_MAX / -1 would overflow.
1040
CMP(32, gpr.R(rs), Imm32(0x80000000));
1041
FixupBranch notOverflow = J_CC(CC_NE);
1042
CMP(32, gpr.R(rt), Imm32((u32) -1));
1043
FixupBranch notOverflow2 = J_CC(CC_NE);
1044
MOV(32, gpr.R(MIPS_REG_LO), Imm32(0x80000000));
1045
MOV(32, gpr.R(MIPS_REG_HI), Imm32(-1));
1046
FixupBranch skip2 = J();
1047
1048
SetJumpTarget(notOverflow);
1049
SetJumpTarget(notOverflow2);
1050
1051
CDQ();
1052
IDIV(32, gpr.R(rt));
1053
MOV(32, gpr.R(MIPS_REG_HI), R(EDX));
1054
MOV(32, gpr.R(MIPS_REG_LO), R(EAX));
1055
FixupBranch skip = J();
1056
1057
SetJumpTarget(divZero);
1058
MOV(32, gpr.R(MIPS_REG_HI), R(EAX));
1059
MOV(32, gpr.R(MIPS_REG_LO), Imm32(-1));
1060
CMP(32, R(EAX), Imm32(0));
1061
FixupBranch positiveDivZero = J_CC(CC_GE);
1062
MOV(32, gpr.R(MIPS_REG_LO), Imm32(1));
1063
1064
SetJumpTarget(positiveDivZero);
1065
SetJumpTarget(skip);
1066
SetJumpTarget(skip2);
1067
gpr.UnlockAllX();
1068
}
1069
break;
1070
1071
case 27: //divu
1072
{
1073
gpr.FlushLockX(EDX);
1074
gpr.KillImmediate(MIPS_REG_HI, false, true);
1075
gpr.KillImmediate(MIPS_REG_LO, false, true);
1076
gpr.KillImmediate(rt, true, false);
1077
1078
MOV(32, R(EAX), gpr.R(rs));
1079
MOV(32, R(EDX), Imm32(0));
1080
1081
CMP(32, gpr.R(rt), Imm32(0));
1082
FixupBranch divZero = J_CC(CC_E);
1083
1084
DIV(32, gpr.R(rt));
1085
MOV(32, gpr.R(MIPS_REG_HI), R(EDX));
1086
MOV(32, gpr.R(MIPS_REG_LO), R(EAX));
1087
FixupBranch skip = J();
1088
1089
SetJumpTarget(divZero);
1090
MOV(32, gpr.R(MIPS_REG_HI), R(EAX));
1091
MOV(32, gpr.R(MIPS_REG_LO), Imm32(-1));
1092
CMP(32, R(EAX), Imm32(0xFFFF));
1093
FixupBranch moreThan16Bit = J_CC(CC_A);
1094
MOV(32, gpr.R(MIPS_REG_LO), Imm32(0xFFFF));
1095
1096
SetJumpTarget(moreThan16Bit);
1097
SetJumpTarget(skip);
1098
gpr.UnlockAllX();
1099
}
1100
break;
1101
1102
case 28: // madd
1103
gpr.FlushLockX(EDX);
1104
gpr.KillImmediate(MIPS_REG_HI, false, true);
1105
gpr.KillImmediate(MIPS_REG_LO, false, true);
1106
gpr.KillImmediate(rt, true, false);
1107
MOV(32, R(EAX), gpr.R(rs));
1108
IMUL(32, gpr.R(rt));
1109
ADD(32, gpr.R(MIPS_REG_LO), R(EAX));
1110
ADC(32, gpr.R(MIPS_REG_HI), R(EDX));
1111
gpr.UnlockAllX();
1112
break;
1113
1114
case 29: // maddu
1115
gpr.FlushLockX(EDX);
1116
gpr.KillImmediate(MIPS_REG_HI, false, true);
1117
gpr.KillImmediate(MIPS_REG_LO, false, true);
1118
gpr.KillImmediate(rt, true, false);
1119
MOV(32, R(EAX), gpr.R(rs));
1120
MUL(32, gpr.R(rt));
1121
ADD(32, gpr.R(MIPS_REG_LO), R(EAX));
1122
ADC(32, gpr.R(MIPS_REG_HI), R(EDX));
1123
gpr.UnlockAllX();
1124
break;
1125
1126
case 46: // msub
1127
gpr.FlushLockX(EDX);
1128
gpr.KillImmediate(MIPS_REG_HI, false, true);
1129
gpr.KillImmediate(MIPS_REG_LO, false, true);
1130
gpr.KillImmediate(rt, true, false);
1131
MOV(32, R(EAX), gpr.R(rs));
1132
IMUL(32, gpr.R(rt));
1133
SUB(32, gpr.R(MIPS_REG_LO), R(EAX));
1134
SBB(32, gpr.R(MIPS_REG_HI), R(EDX));
1135
gpr.UnlockAllX();
1136
break;
1137
1138
case 47: // msubu
1139
gpr.FlushLockX(EDX);
1140
gpr.KillImmediate(MIPS_REG_HI, false, true);
1141
gpr.KillImmediate(MIPS_REG_LO, false, true);
1142
gpr.KillImmediate(rt, true, false);
1143
MOV(32, R(EAX), gpr.R(rs));
1144
MUL(32, gpr.R(rt));
1145
SUB(32, gpr.R(MIPS_REG_LO), R(EAX));
1146
SBB(32, gpr.R(MIPS_REG_HI), R(EDX));
1147
gpr.UnlockAllX();
1148
break;
1149
1150
default:
1151
DISABLE;
1152
}
1153
}
1154
}
1155
1156
#endif // PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
1157
1158