CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/RiscV/RiscVCompALU.cpp
Views: 1401
1
// Copyright (c) 2023- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "Common/CPUDetect.h"
19
#include "Core/MemMap.h"
20
#include "Core/MIPS/RiscV/RiscVJit.h"
21
#include "Core/MIPS/RiscV/RiscVRegCache.h"
22
23
// This file contains compilation for integer / arithmetic / logic related instructions.
24
//
25
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
26
// Currently known non working ones should have DISABLE. No flags because that's in IR already.
27
28
// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; }
29
#define CONDITIONAL_DISABLE {}
30
#define DISABLE { CompIR_Generic(inst); return; }
31
#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; }
32
33
namespace MIPSComp {
34
35
using namespace RiscVGen;
36
using namespace RiscVJitConstants;
37
38
void RiscVJitBackend::CompIR_Arith(IRInst inst) {
39
CONDITIONAL_DISABLE;
40
41
bool allowPtrMath = true;
42
#ifdef MASKED_PSP_MEMORY
43
// Since we modify it, we can't safely.
44
allowPtrMath = false;
45
#endif
46
47
// RISC-V only adds signed immediates, so rewrite a small enough subtract to an add.
48
// We use -2047 and 2048 here because the range swaps.
49
if (inst.op == IROp::SubConst && (int32_t)inst.constant >= -2047 && (int32_t)inst.constant <= 2048) {
50
inst.op = IROp::AddConst;
51
inst.constant = (uint32_t)-(int32_t)inst.constant;
52
}
53
54
switch (inst.op) {
55
case IROp::Add:
56
regs_.Map(inst);
57
ADDW(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));
58
regs_.MarkGPRDirty(inst.dest, true);
59
break;
60
61
case IROp::Sub:
62
regs_.Map(inst);
63
SUBW(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));
64
regs_.MarkGPRDirty(inst.dest, true);
65
break;
66
67
case IROp::AddConst:
68
if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) {
69
// Typical of stack pointer updates.
70
if (regs_.IsGPRMappedAsPointer(inst.dest) && inst.dest == inst.src1 && allowPtrMath) {
71
regs_.MarkGPRAsPointerDirty(inst.dest);
72
ADDI(regs_.RPtr(inst.dest), regs_.RPtr(inst.dest), inst.constant);
73
} else {
74
regs_.Map(inst);
75
ADDIW(regs_.R(inst.dest), regs_.R(inst.src1), inst.constant);
76
regs_.MarkGPRDirty(inst.dest, true);
77
}
78
} else {
79
regs_.Map(inst);
80
LI(SCRATCH1, (int32_t)inst.constant);
81
ADDW(regs_.R(inst.dest), regs_.R(inst.src1), SCRATCH1);
82
regs_.MarkGPRDirty(inst.dest, true);
83
}
84
break;
85
86
case IROp::SubConst:
87
regs_.Map(inst);
88
LI(SCRATCH1, (int32_t)inst.constant);
89
SUBW(regs_.R(inst.dest), regs_.R(inst.src1), SCRATCH1);
90
regs_.MarkGPRDirty(inst.dest, true);
91
break;
92
93
case IROp::Neg:
94
regs_.Map(inst);
95
SUBW(regs_.R(inst.dest), R_ZERO, regs_.R(inst.src1));
96
regs_.MarkGPRDirty(inst.dest, true);
97
break;
98
99
default:
100
INVALIDOP;
101
break;
102
}
103
}
104
105
void RiscVJitBackend::CompIR_Logic(IRInst inst) {
106
CONDITIONAL_DISABLE;
107
108
bool resultNormalized = false;
109
switch (inst.op) {
110
case IROp::And:
111
if (inst.src1 != inst.src2) {
112
regs_.Map(inst);
113
AND(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));
114
} else if (inst.src1 != inst.dest) {
115
regs_.Map(inst);
116
MV(regs_.R(inst.dest), regs_.R(inst.src1));
117
regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));
118
}
119
break;
120
121
case IROp::Or:
122
if (inst.src1 != inst.src2) {
123
// If both were normalized before, the result is normalized.
124
resultNormalized = regs_.IsNormalized32(inst.src1) && regs_.IsNormalized32(inst.src2);
125
regs_.Map(inst);
126
OR(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));
127
regs_.MarkGPRDirty(inst.dest, resultNormalized);
128
} else if (inst.src1 != inst.dest) {
129
regs_.Map(inst);
130
MV(regs_.R(inst.dest), regs_.R(inst.src1));
131
regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));
132
}
133
break;
134
135
case IROp::Xor:
136
if (inst.src1 == inst.src2) {
137
regs_.SetGPRImm(inst.dest, 0);
138
} else {
139
regs_.Map(inst);
140
XOR(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));
141
}
142
break;
143
144
case IROp::AndConst:
145
resultNormalized = regs_.IsNormalized32(inst.src1);
146
regs_.Map(inst);
147
if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) {
148
ANDI(regs_.R(inst.dest), regs_.R(inst.src1), inst.constant);
149
} else {
150
LI(SCRATCH1, (int32_t)inst.constant);
151
AND(regs_.R(inst.dest), regs_.R(inst.src1), SCRATCH1);
152
}
153
// If the sign bits aren't cleared, and it was normalized before - it still is.
154
if ((inst.constant & 0x80000000) != 0 && resultNormalized)
155
regs_.MarkGPRDirty(inst.dest, true);
156
// Otherwise, if we cleared the sign bits, it's naturally normalized.
157
else if ((inst.constant & 0x80000000) == 0)
158
regs_.MarkGPRDirty(inst.dest, true);
159
break;
160
161
case IROp::OrConst:
162
resultNormalized = regs_.IsNormalized32(inst.src1);
163
regs_.Map(inst);
164
if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) {
165
ORI(regs_.R(inst.dest), regs_.R(inst.src1), inst.constant);
166
} else {
167
LI(SCRATCH1, (int32_t)inst.constant);
168
OR(regs_.R(inst.dest), regs_.R(inst.src1), SCRATCH1);
169
}
170
// Since our constant is normalized, oring its bits in won't hurt normalization.
171
regs_.MarkGPRDirty(inst.dest, resultNormalized);
172
break;
173
174
case IROp::XorConst:
175
regs_.Map(inst);
176
if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) {
177
XORI(regs_.R(inst.dest), regs_.R(inst.src1), inst.constant);
178
} else {
179
LI(SCRATCH1, (int32_t)inst.constant);
180
XOR(regs_.R(inst.dest), regs_.R(inst.src1), SCRATCH1);
181
}
182
break;
183
184
case IROp::Not:
185
regs_.Map(inst);
186
NOT(regs_.R(inst.dest), regs_.R(inst.src1));
187
break;
188
189
default:
190
INVALIDOP;
191
break;
192
}
193
}
194
195
void RiscVJitBackend::CompIR_Assign(IRInst inst) {
196
CONDITIONAL_DISABLE;
197
198
switch (inst.op) {
199
case IROp::Mov:
200
if (inst.dest != inst.src1) {
201
regs_.Map(inst);
202
MV(regs_.R(inst.dest), regs_.R(inst.src1));
203
regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));
204
}
205
break;
206
207
case IROp::Ext8to32:
208
regs_.Map(inst);
209
if (cpu_info.RiscV_Zbb) {
210
SEXT_B(regs_.R(inst.dest), regs_.R(inst.src1));
211
} else {
212
SLLI(regs_.R(inst.dest), regs_.R(inst.src1), 24);
213
SRAIW(regs_.R(inst.dest), regs_.R(inst.dest), 24);
214
}
215
regs_.MarkGPRDirty(inst.dest, true);
216
break;
217
218
case IROp::Ext16to32:
219
regs_.Map(inst);
220
if (cpu_info.RiscV_Zbb) {
221
SEXT_H(regs_.R(inst.dest), regs_.R(inst.src1));
222
} else {
223
SLLI(regs_.R(inst.dest), regs_.R(inst.src1), 16);
224
SRAIW(regs_.R(inst.dest), regs_.R(inst.dest), 16);
225
}
226
regs_.MarkGPRDirty(inst.dest, true);
227
break;
228
229
default:
230
INVALIDOP;
231
break;
232
}
233
}
234
235
void RiscVJitBackend::CompIR_Bits(IRInst inst) {
236
CONDITIONAL_DISABLE;
237
238
switch (inst.op) {
239
case IROp::ReverseBits:
240
if (cpu_info.RiscV_Zbb) {
241
regs_.Map(inst);
242
// Start by reversing bytes (note: this puts in upper 32 of XLEN.)
243
REV8(regs_.R(inst.dest), regs_.R(inst.src1));
244
245
// Swap nibbles.
246
LI(SCRATCH1, (s32)0xF0F0F0F0);
247
SRLI(SCRATCH2, regs_.R(inst.dest), XLEN - 32 - 4);
248
AND(SCRATCH2, SCRATCH2, SCRATCH1);
249
if (XLEN >= 64)
250
SRLI(regs_.R(inst.dest), regs_.R(inst.dest), XLEN - 28);
251
else
252
SLLI(regs_.R(inst.dest), regs_.R(inst.dest), 4);
253
SRLIW(SCRATCH1, SCRATCH1, 4);
254
AND(regs_.R(inst.dest), regs_.R(inst.dest), SCRATCH1);
255
OR(regs_.R(inst.dest), regs_.R(inst.dest), SCRATCH2);
256
257
// Now the consecutive pairs.
258
LI(SCRATCH1, (s32)0x33333333);
259
SRLI(SCRATCH2, regs_.R(inst.dest), 2);
260
AND(SCRATCH2, SCRATCH2, SCRATCH1);
261
AND(regs_.R(inst.dest), regs_.R(inst.dest), SCRATCH1);
262
SLLIW(regs_.R(inst.dest), regs_.R(inst.dest), 2);
263
OR(regs_.R(inst.dest), regs_.R(inst.dest), SCRATCH2);
264
265
// And finally the even and odd bits.
266
LI(SCRATCH1, (s32)0x55555555);
267
SRLI(SCRATCH2, regs_.R(inst.dest), 1);
268
AND(SCRATCH2, SCRATCH2, SCRATCH1);
269
AND(regs_.R(inst.dest), regs_.R(inst.dest), SCRATCH1);
270
SLLIW(regs_.R(inst.dest), regs_.R(inst.dest), 1);
271
OR(regs_.R(inst.dest), regs_.R(inst.dest), SCRATCH2);
272
} else {
273
CompIR_Generic(inst);
274
}
275
break;
276
277
case IROp::BSwap16:
278
CompIR_Generic(inst);
279
break;
280
281
case IROp::BSwap32:
282
if (cpu_info.RiscV_Zbb) {
283
regs_.Map(inst);
284
REV8(regs_.R(inst.dest), regs_.R(inst.src1));
285
if (XLEN >= 64) {
286
// REV8 swaps the entire register, so get the 32 highest bits.
287
SRAI(regs_.R(inst.dest), regs_.R(inst.dest), XLEN - 32);
288
regs_.MarkGPRDirty(inst.dest, true);
289
}
290
} else {
291
CompIR_Generic(inst);
292
}
293
break;
294
295
case IROp::Clz:
296
if (cpu_info.RiscV_Zbb) {
297
regs_.Map(inst);
298
// This even sets to 32 when zero, perfect.
299
CLZW(regs_.R(inst.dest), regs_.R(inst.src1));
300
regs_.MarkGPRDirty(inst.dest, true);
301
} else {
302
CompIR_Generic(inst);
303
}
304
break;
305
306
default:
307
INVALIDOP;
308
break;
309
}
310
}
311
312
void RiscVJitBackend::CompIR_Shift(IRInst inst) {
313
CONDITIONAL_DISABLE;
314
315
switch (inst.op) {
316
case IROp::Shl:
317
regs_.Map(inst);
318
SLLW(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));
319
regs_.MarkGPRDirty(inst.dest, true);
320
break;
321
322
case IROp::Shr:
323
regs_.Map(inst);
324
SRLW(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));
325
regs_.MarkGPRDirty(inst.dest, true);
326
break;
327
328
case IROp::Sar:
329
regs_.Map(inst);
330
SRAW(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));
331
regs_.MarkGPRDirty(inst.dest, true);
332
break;
333
334
case IROp::Ror:
335
if (cpu_info.RiscV_Zbb) {
336
regs_.Map(inst);
337
RORW(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));
338
regs_.MarkGPRDirty(inst.dest, true);
339
} else {
340
CompIR_Generic(inst);
341
}
342
break;
343
344
case IROp::ShlImm:
345
// Shouldn't happen, but let's be safe of any passes that modify the ops.
346
if (inst.src2 >= 32) {
347
regs_.SetGPRImm(inst.dest, 0);
348
} else if (inst.src2 == 0) {
349
if (inst.dest != inst.src1) {
350
regs_.Map(inst);
351
MV(regs_.R(inst.dest), regs_.R(inst.src1));
352
regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));
353
}
354
} else {
355
regs_.Map(inst);
356
SLLIW(regs_.R(inst.dest), regs_.R(inst.src1), inst.src2);
357
regs_.MarkGPRDirty(inst.dest, true);
358
}
359
break;
360
361
case IROp::ShrImm:
362
// Shouldn't happen, but let's be safe of any passes that modify the ops.
363
if (inst.src2 >= 32) {
364
regs_.SetGPRImm(inst.dest, 0);
365
} else if (inst.src2 == 0) {
366
if (inst.dest != inst.src1) {
367
regs_.Map(inst);
368
MV(regs_.R(inst.dest), regs_.R(inst.src1));
369
regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));
370
}
371
} else {
372
regs_.Map(inst);
373
SRLIW(regs_.R(inst.dest), regs_.R(inst.src1), inst.src2);
374
regs_.MarkGPRDirty(inst.dest, true);
375
}
376
break;
377
378
case IROp::SarImm:
379
// Shouldn't happen, but let's be safe of any passes that modify the ops.
380
if (inst.src2 >= 32) {
381
regs_.Map(inst);
382
SRAIW(regs_.R(inst.dest), regs_.R(inst.src1), 31);
383
regs_.MarkGPRDirty(inst.dest, true);
384
} else if (inst.src2 == 0) {
385
if (inst.dest != inst.src1) {
386
regs_.Map(inst);
387
MV(regs_.R(inst.dest), regs_.R(inst.src1));
388
regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));
389
}
390
} else {
391
regs_.Map(inst);
392
SRAIW(regs_.R(inst.dest), regs_.R(inst.src1), inst.src2);
393
regs_.MarkGPRDirty(inst.dest, true);
394
}
395
break;
396
397
case IROp::RorImm:
398
if (inst.src2 == 0) {
399
if (inst.dest != inst.src1) {
400
regs_.Map(inst);
401
MV(regs_.R(inst.dest), regs_.R(inst.src1));
402
regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));
403
}
404
} else if (cpu_info.RiscV_Zbb) {
405
regs_.Map(inst);
406
RORIW(regs_.R(inst.dest), regs_.R(inst.src1), inst.src2 & 31);
407
regs_.MarkGPRDirty(inst.dest, true);
408
} else {
409
CompIR_Generic(inst);
410
}
411
break;
412
413
default:
414
INVALIDOP;
415
break;
416
}
417
}
418
419
void RiscVJitBackend::CompIR_Compare(IRInst inst) {
420
CONDITIONAL_DISABLE;
421
422
RiscVReg lhs = INVALID_REG;
423
RiscVReg rhs = INVALID_REG;
424
switch (inst.op) {
425
case IROp::Slt:
426
regs_.Map(inst);
427
NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);
428
429
SLT(regs_.R(inst.dest), lhs, rhs);
430
regs_.MarkGPRDirty(inst.dest, true);
431
break;
432
433
case IROp::SltConst:
434
if (inst.constant == 0) {
435
// Basically, getting the sign bit. Let's shift instead.
436
regs_.Map(inst);
437
SRLIW(regs_.R(inst.dest), regs_.R(inst.src1), 31);
438
regs_.MarkGPRDirty(inst.dest, true);
439
} else {
440
regs_.Map(inst);
441
NormalizeSrc1(inst, &lhs, SCRATCH1, false);
442
443
if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) {
444
SLTI(regs_.R(inst.dest), lhs, (int32_t)inst.constant);
445
} else {
446
LI(SCRATCH2, (int32_t)inst.constant);
447
SLT(regs_.R(inst.dest), lhs, SCRATCH2);
448
}
449
regs_.MarkGPRDirty(inst.dest, true);
450
}
451
break;
452
453
case IROp::SltU:
454
regs_.Map(inst);
455
// It's still fine to sign extend, the biggest just get even bigger.
456
NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);
457
458
SLTU(regs_.R(inst.dest), lhs, rhs);
459
regs_.MarkGPRDirty(inst.dest, true);
460
break;
461
462
case IROp::SltUConst:
463
if (inst.constant == 0) {
464
regs_.SetGPRImm(inst.dest, 0);
465
} else {
466
regs_.Map(inst);
467
NormalizeSrc1(inst, &lhs, SCRATCH1, false);
468
469
// We sign extend because we're comparing against something normalized.
470
// It's also the most efficient to set.
471
if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) {
472
SLTIU(regs_.R(inst.dest), lhs, (int32_t)inst.constant);
473
} else {
474
LI(SCRATCH2, (int32_t)inst.constant);
475
SLTU(regs_.R(inst.dest), lhs, SCRATCH2);
476
}
477
regs_.MarkGPRDirty(inst.dest, true);
478
}
479
break;
480
481
default:
482
INVALIDOP;
483
break;
484
}
485
}
486
487
void RiscVJitBackend::CompIR_CondAssign(IRInst inst) {
488
CONDITIONAL_DISABLE;
489
490
RiscVReg lhs = INVALID_REG;
491
RiscVReg rhs = INVALID_REG;
492
FixupBranch fixup;
493
switch (inst.op) {
494
case IROp::MovZ:
495
case IROp::MovNZ:
496
if (inst.dest == inst.src2)
497
return;
498
499
// We could have a "zero" with wrong upper due to XOR, so we have to normalize.
500
regs_.Map(inst);
501
NormalizeSrc1(inst, &lhs, SCRATCH1, true);
502
503
switch (inst.op) {
504
case IROp::MovZ:
505
fixup = BNE(lhs, R_ZERO);
506
break;
507
case IROp::MovNZ:
508
fixup = BEQ(lhs, R_ZERO);
509
break;
510
default:
511
INVALIDOP;
512
break;
513
}
514
515
MV(regs_.R(inst.dest), regs_.R(inst.src2));
516
SetJumpTarget(fixup);
517
break;
518
519
case IROp::Max:
520
if (inst.src1 != inst.src2) {
521
if (cpu_info.RiscV_Zbb) {
522
regs_.Map(inst);
523
NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);
524
MAX(regs_.R(inst.dest), lhs, rhs);
525
// Because we had to normalize the inputs, the output is normalized.
526
regs_.MarkGPRDirty(inst.dest, true);
527
} else {
528
CompIR_Generic(inst);
529
}
530
} else if (inst.dest != inst.src1) {
531
regs_.Map(inst);
532
MV(regs_.R(inst.dest), regs_.R(inst.src1));
533
regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));
534
}
535
break;
536
537
case IROp::Min:
538
if (inst.src1 != inst.src2) {
539
if (cpu_info.RiscV_Zbb) {
540
regs_.Map(inst);
541
NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);
542
MIN(regs_.R(inst.dest), lhs, rhs);
543
// Because we had to normalize the inputs, the output is normalized.
544
regs_.MarkGPRDirty(inst.dest, true);
545
} else {
546
CompIR_Generic(inst);
547
}
548
} else if (inst.dest != inst.src1) {
549
regs_.Map(inst);
550
MV(regs_.R(inst.dest), regs_.R(inst.src1));
551
regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));
552
}
553
break;
554
555
default:
556
INVALIDOP;
557
break;
558
}
559
}
560
561
void RiscVJitBackend::CompIR_HiLo(IRInst inst) {
562
CONDITIONAL_DISABLE;
563
564
switch (inst.op) {
565
case IROp::MtLo:
566
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
567
// First, clear the bits we're replacing.
568
SRLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);
569
SLLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);
570
// And now, insert the low 32 bits of src1.
571
if (cpu_info.RiscV_Zba) {
572
ADD_UW(regs_.R(IRREG_LO), regs_.R(inst.src1), regs_.R(IRREG_LO));
573
} else {
574
SLLI(SCRATCH1, regs_.R(inst.src1), XLEN - 32);
575
SRLI(SCRATCH1, SCRATCH1, XLEN - 32);
576
ADD(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);
577
}
578
break;
579
580
case IROp::MtHi:
581
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
582
SLLI(SCRATCH1, regs_.R(inst.src1), XLEN - 32);
583
if (cpu_info.RiscV_Zba) {
584
ADD_UW(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);
585
} else {
586
SLLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);
587
SRLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);
588
ADD(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);
589
}
590
break;
591
592
case IROp::MfLo:
593
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::INIT } });
594
// It won't be normalized, but that's fine...
595
MV(regs_.R(inst.dest), regs_.R(IRREG_LO));
596
break;
597
598
case IROp::MfHi:
599
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::INIT } });
600
SRAI(regs_.R(inst.dest), regs_.R(IRREG_LO), 32);
601
if (XLEN == 64)
602
regs_.MarkGPRDirty(inst.dest, true);
603
break;
604
605
default:
606
INVALIDOP;
607
break;
608
}
609
}
610
611
void RiscVJitBackend::CompIR_Mult(IRInst inst) {
612
CONDITIONAL_DISABLE;
613
614
auto makeArgsUnsigned = [&](RiscVReg *lhs, RiscVReg *rhs) {
615
if (cpu_info.RiscV_Zba) {
616
ZEXT_W(SCRATCH1, regs_.R(inst.src1));
617
ZEXT_W(SCRATCH2, regs_.R(inst.src2));
618
} else {
619
SLLI(SCRATCH1, regs_.R(inst.src1), XLEN - 32);
620
SRLI(SCRATCH1, SCRATCH1, XLEN - 32);
621
SLLI(SCRATCH2, regs_.R(inst.src2), XLEN - 32);
622
SRLI(SCRATCH2, SCRATCH2, XLEN - 32);
623
}
624
*lhs = SCRATCH1;
625
*rhs = SCRATCH2;
626
};
627
628
RiscVReg lhs = INVALID_REG;
629
RiscVReg rhs = INVALID_REG;
630
switch (inst.op) {
631
case IROp::Mult:
632
// TODO: Maybe IR could simplify when HI is not needed or clobbered?
633
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });
634
NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);
635
MUL(regs_.R(IRREG_LO), lhs, rhs);
636
break;
637
638
case IROp::MultU:
639
// This is an "anti-norm32" case. Let's just zero always.
640
// TODO: If we could know that LO was only needed, we could use MULW.
641
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });
642
makeArgsUnsigned(&lhs, &rhs);
643
MUL(regs_.R(IRREG_LO), lhs, rhs);
644
break;
645
646
case IROp::Madd:
647
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
648
NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);
649
MUL(SCRATCH1, lhs, rhs);
650
ADD(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);
651
break;
652
653
case IROp::MaddU:
654
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
655
makeArgsUnsigned(&lhs, &rhs);
656
MUL(SCRATCH1, lhs, rhs);
657
ADD(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);
658
break;
659
660
case IROp::Msub:
661
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
662
NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);
663
MUL(SCRATCH1, lhs, rhs);
664
SUB(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);
665
break;
666
667
case IROp::MsubU:
668
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });
669
makeArgsUnsigned(&lhs, &rhs);
670
MUL(SCRATCH1, lhs, rhs);
671
SUB(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);
672
break;
673
674
default:
675
INVALIDOP;
676
break;
677
}
678
}
679
680
void RiscVJitBackend::CompIR_Div(IRInst inst) {
681
CONDITIONAL_DISABLE;
682
683
RiscVReg numReg, denomReg;
684
switch (inst.op) {
685
case IROp::Div:
686
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });
687
// We have to do this because of the divide by zero and overflow checks below.
688
NormalizeSrc12(inst, &numReg, &denomReg, SCRATCH1, SCRATCH2, true);
689
DIVW(regs_.R(IRREG_LO), numReg, denomReg);
690
REMW(R_RA, numReg, denomReg);
691
// Now to combine them. We'll do more with them below...
692
SLLI(R_RA, R_RA, 32);
693
if (cpu_info.RiscV_Zba) {
694
ADD_UW(regs_.R(IRREG_LO), regs_.R(IRREG_LO), R_RA);
695
} else {
696
SLLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);
697
SRLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);
698
ADD(regs_.R(IRREG_LO), regs_.R(IRREG_LO), R_RA);
699
}
700
701
// Now some tweaks for divide by zero and overflow.
702
{
703
// Start with divide by zero, remainder is fine.
704
FixupBranch skipNonZero = BNE(denomReg, R_ZERO);
705
FixupBranch keepNegOne = BGE(numReg, R_ZERO);
706
// Clear the -1 and replace it with 1.
707
SRLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), 32);
708
SLLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), 32);
709
ADDI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), 1);
710
SetJumpTarget(keepNegOne);
711
SetJumpTarget(skipNonZero);
712
713
// For overflow, RISC-V sets LO right, but remainder to zero.
714
// Cheating a bit by using R_RA as a temp...
715
LI(R_RA, (int32_t)0x80000000);
716
FixupBranch notMostNegative = BNE(numReg, R_RA);
717
LI(R_RA, -1);
718
FixupBranch notNegativeOne = BNE(denomReg, R_RA);
719
// Take our R_RA and put it in the high bits.
720
SLLI(R_RA, R_RA, 32);
721
OR(regs_.R(IRREG_LO), regs_.R(IRREG_LO), R_RA);
722
SetJumpTarget(notNegativeOne);
723
SetJumpTarget(notMostNegative);
724
}
725
break;
726
727
case IROp::DivU:
728
regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });
729
// We have to do this because of the divide by zero check below.
730
NormalizeSrc12(inst, &numReg, &denomReg, SCRATCH1, SCRATCH2, true);
731
DIVUW(regs_.R(IRREG_LO), numReg, denomReg);
732
REMUW(R_RA, numReg, denomReg);
733
734
// On divide by zero, everything is correct already except the 0xFFFF case.
735
{
736
FixupBranch skipNonZero = BNE(denomReg, R_ZERO);
737
// Luckily, we don't need SCRATCH2/denomReg anymore.
738
LI(SCRATCH2, 0xFFFF);
739
FixupBranch keepNegOne = BLTU(SCRATCH2, numReg);
740
MV(regs_.R(IRREG_LO), SCRATCH2);
741
SetJumpTarget(keepNegOne);
742
SetJumpTarget(skipNonZero);
743
}
744
745
// Now combine the remainder in.
746
SLLI(R_RA, R_RA, 32);
747
if (cpu_info.RiscV_Zba) {
748
ADD_UW(regs_.R(IRREG_LO), regs_.R(IRREG_LO), R_RA);
749
} else {
750
SLLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);
751
SRLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);
752
ADD(regs_.R(IRREG_LO), regs_.R(IRREG_LO), R_RA);
753
}
754
break;
755
756
default:
757
INVALIDOP;
758
break;
759
}
760
}
761
762
} // namespace MIPSComp
763
764