CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/RiscV/RiscVCompFPU.cpp
Views: 1401
1
// Copyright (c) 2023- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "Core/MIPS/RiscV/RiscVJit.h"
19
#include "Core/MIPS/RiscV/RiscVRegCache.h"
20
21
// This file contains compilation for floating point related instructions.
22
//
23
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
24
// Currently known non working ones should have DISABLE. No flags because that's in IR already.
25
26
// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; }
27
#define CONDITIONAL_DISABLE {}
28
#define DISABLE { CompIR_Generic(inst); return; }
29
#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; }
30
31
namespace MIPSComp {
32
33
using namespace RiscVGen;
34
using namespace RiscVJitConstants;
35
36
void RiscVJitBackend::CompIR_FArith(IRInst inst) {
37
CONDITIONAL_DISABLE;
38
39
switch (inst.op) {
40
case IROp::FAdd:
41
regs_.Map(inst);
42
FADD(32, regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
43
break;
44
45
case IROp::FSub:
46
regs_.Map(inst);
47
FSUB(32, regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
48
break;
49
50
case IROp::FMul:
51
regs_.Map(inst);
52
// We'll assume everyone will make it such that 0 * infinity = NAN properly.
53
// See blame on this comment if that proves untrue.
54
FMUL(32, regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
55
break;
56
57
case IROp::FDiv:
58
regs_.Map(inst);
59
FDIV(32, regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
60
break;
61
62
case IROp::FSqrt:
63
regs_.Map(inst);
64
FSQRT(32, regs_.F(inst.dest), regs_.F(inst.src1));
65
break;
66
67
case IROp::FNeg:
68
regs_.Map(inst);
69
FNEG(32, regs_.F(inst.dest), regs_.F(inst.src1));
70
break;
71
72
default:
73
INVALIDOP;
74
break;
75
}
76
}
77
78
void RiscVJitBackend::CompIR_FCondAssign(IRInst inst) {
79
CONDITIONAL_DISABLE;
80
if (inst.op != IROp::FMin && inst.op != IROp::FMax)
81
INVALIDOP;
82
bool maxCondition = inst.op == IROp::FMax;
83
84
// FMin and FMax are used by VFPU and handle NAN/INF as just a larger exponent.
85
regs_.Map(inst);
86
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
87
FCLASS(32, SCRATCH2, regs_.F(inst.src2));
88
89
// If either side is a NAN, it needs to participate in the comparison.
90
OR(SCRATCH1, SCRATCH1, SCRATCH2);
91
// NAN is either 0x100 or 0x200.
92
ANDI(SCRATCH1, SCRATCH1, 0x300);
93
FixupBranch useNormalCond = BEQ(SCRATCH1, R_ZERO);
94
95
// Time to use bits... classify won't help because it ignores -NAN.
96
FMV(FMv::X, FMv::W, SCRATCH1, regs_.F(inst.src1));
97
FMV(FMv::X, FMv::W, SCRATCH2, regs_.F(inst.src2));
98
99
// If both are negative, we flip the comparison (not two's compliment.)
100
// We cheat and use RA...
101
AND(R_RA, SCRATCH1, SCRATCH2);
102
SRLIW(R_RA, R_RA, 31);
103
104
if (cpu_info.RiscV_Zbb) {
105
FixupBranch swapCompare = BNE(R_RA, R_ZERO);
106
if (maxCondition)
107
MAX(SCRATCH1, SCRATCH1, SCRATCH2);
108
else
109
MIN(SCRATCH1, SCRATCH1, SCRATCH2);
110
FixupBranch skipSwapCompare = J();
111
SetJumpTarget(swapCompare);
112
if (maxCondition)
113
MIN(SCRATCH1, SCRATCH1, SCRATCH2);
114
else
115
MAX(SCRATCH1, SCRATCH1, SCRATCH2);
116
SetJumpTarget(skipSwapCompare);
117
} else {
118
RiscVReg isSrc1LowerReg = regs_.GetAndLockTempGPR();
119
SLT(isSrc1LowerReg, SCRATCH1, SCRATCH2);
120
// Flip the flag (to reverse the min/max) based on if both were negative.
121
XOR(isSrc1LowerReg, isSrc1LowerReg, R_RA);
122
FixupBranch useSrc1;
123
if (maxCondition)
124
useSrc1 = BEQ(isSrc1LowerReg, R_ZERO);
125
else
126
useSrc1 = BNE(isSrc1LowerReg, R_ZERO);
127
MV(SCRATCH1, SCRATCH2);
128
SetJumpTarget(useSrc1);
129
}
130
131
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
132
FixupBranch finish = J();
133
134
SetJumpTarget(useNormalCond);
135
if (maxCondition)
136
FMAX(32, regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
137
else
138
FMIN(32, regs_.F(inst.dest), regs_.F(inst.src1), regs_.F(inst.src2));
139
SetJumpTarget(finish);
140
}
141
142
void RiscVJitBackend::CompIR_FAssign(IRInst inst) {
143
CONDITIONAL_DISABLE;
144
145
switch (inst.op) {
146
case IROp::FMov:
147
if (inst.dest != inst.src1) {
148
regs_.Map(inst);
149
FMV(32, regs_.F(inst.dest), regs_.F(inst.src1));
150
}
151
break;
152
153
case IROp::FAbs:
154
regs_.Map(inst);
155
FABS(32, regs_.F(inst.dest), regs_.F(inst.src1));
156
break;
157
158
case IROp::FSign:
159
{
160
regs_.Map(inst);
161
// Check if it's negative zero, either 0x10/0x08 is zero.
162
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
163
ANDI(SCRATCH1, SCRATCH1, 0x18);
164
SEQZ(SCRATCH1, SCRATCH1);
165
// Okay, it's zero if zero, 1 otherwise. Convert 1 to a constant 1.0.
166
// Probably non-zero is the common case, so we make that the straight line.
167
FixupBranch skipOne = BEQ(SCRATCH1, R_ZERO);
168
LI(SCRATCH1, 1.0f);
169
170
// Now we just need the sign from it.
171
FMV(FMv::X, FMv::W, SCRATCH2, regs_.F(inst.src1));
172
// Use a wall to isolate the sign, and combine.
173
SRAIW(SCRATCH2, SCRATCH2, 31);
174
SLLIW(SCRATCH2, SCRATCH2, 31);
175
OR(SCRATCH1, SCRATCH1, SCRATCH2);
176
177
SetJumpTarget(skipOne);
178
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
179
break;
180
}
181
182
default:
183
INVALIDOP;
184
break;
185
}
186
}
187
188
void RiscVJitBackend::CompIR_FRound(IRInst inst) {
189
CONDITIONAL_DISABLE;
190
191
// TODO: If this is followed by a GPR transfer, might want to combine.
192
regs_.Map(inst);
193
194
switch (inst.op) {
195
case IROp::FRound:
196
FCVT(FConv::W, FConv::S, SCRATCH1, regs_.F(inst.src1), Round::NEAREST_EVEN);
197
break;
198
199
case IROp::FTrunc:
200
FCVT(FConv::W, FConv::S, SCRATCH1, regs_.F(inst.src1), Round::TOZERO);
201
break;
202
203
case IROp::FCeil:
204
FCVT(FConv::W, FConv::S, SCRATCH1, regs_.F(inst.src1), Round::UP);
205
break;
206
207
case IROp::FFloor:
208
FCVT(FConv::W, FConv::S, SCRATCH1, regs_.F(inst.src1), Round::DOWN);
209
break;
210
211
default:
212
INVALIDOP;
213
break;
214
}
215
216
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
217
}
218
219
void RiscVJitBackend::CompIR_FCvt(IRInst inst) {
220
CONDITIONAL_DISABLE;
221
222
RiscVReg tempReg = INVALID_REG;
223
switch (inst.op) {
224
case IROp::FCvtWS:
225
CompIR_Generic(inst);
226
break;
227
228
case IROp::FCvtSW:
229
// TODO: This is probably proceeded by a GPR transfer, might be ideal to combine.
230
regs_.Map(inst);
231
FMV(FMv::X, FMv::W, SCRATCH1, regs_.F(inst.src1));
232
FCVT(FConv::S, FConv::W, regs_.F(inst.dest), SCRATCH1);
233
break;
234
235
case IROp::FCvtScaledWS:
236
{
237
Round rm = Round::NEAREST_EVEN;
238
switch (inst.src2 >> 6) {
239
case 0: rm = Round::NEAREST_EVEN; break;
240
case 1: rm = Round::TOZERO; break;
241
case 2: rm = Round::UP; break;
242
case 3: rm = Round::DOWN; break;
243
default:
244
_assert_msg_(false, "Invalid rounding mode for FCvtScaledWS");
245
}
246
247
tempReg = regs_.MapWithFPRTemp(inst);
248
// Prepare the multiplier.
249
QuickFLI(32, tempReg, (float)(1UL << (inst.src2 & 0x1F)), SCRATCH1);
250
251
FMUL(32, regs_.F(inst.dest), regs_.F(inst.src1), tempReg, rm);
252
// NAN and clamping should all be correct.
253
FCVT(FConv::W, FConv::S, SCRATCH1, regs_.F(inst.dest), rm);
254
// TODO: Could combine with a transfer, often is one...
255
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
256
break;
257
}
258
259
case IROp::FCvtScaledSW:
260
// TODO: This is probably proceeded by a GPR transfer, might be ideal to combine.
261
tempReg = regs_.MapWithFPRTemp(inst);
262
FMV(FMv::X, FMv::W, SCRATCH1, regs_.F(inst.src1));
263
FCVT(FConv::S, FConv::W, regs_.F(inst.dest), SCRATCH1);
264
265
// Pre-divide so we can avoid any actual divide.
266
QuickFLI(32, tempReg, 1.0f / (1UL << (inst.src2 & 0x1F)), SCRATCH1);
267
FMUL(32, regs_.F(inst.dest), regs_.F(inst.dest), tempReg);
268
break;
269
270
default:
271
INVALIDOP;
272
break;
273
}
274
}
275
276
void RiscVJitBackend::CompIR_FSat(IRInst inst) {
277
CONDITIONAL_DISABLE;
278
279
RiscVReg tempReg = INVALID_REG;
280
FixupBranch skipLower;
281
FixupBranch finishLower;
282
FixupBranch skipHigher;
283
switch (inst.op) {
284
case IROp::FSat0_1:
285
tempReg = regs_.MapWithFPRTemp(inst);
286
if (inst.dest != inst.src1)
287
FMV(32, regs_.F(inst.dest), regs_.F(inst.src1));
288
289
// First, set SCRATCH1 = clamp to zero, SCRATCH2 = clamp to one.
290
FCVT(FConv::S, FConv::W, tempReg, R_ZERO);
291
// FLE here is intentional to convert -0.0 to +0.0.
292
FLE(32, SCRATCH1, regs_.F(inst.src1), tempReg);
293
QuickFLI(32, tempReg, 1.0f, SCRATCH2);
294
FLT(32, SCRATCH2, tempReg, regs_.F(inst.src1));
295
296
skipLower = BEQ(SCRATCH1, R_ZERO);
297
FCVT(FConv::S, FConv::W, regs_.F(inst.dest), R_ZERO);
298
finishLower = J();
299
300
SetJumpTarget(skipLower);
301
skipHigher = BEQ(SCRATCH2, R_ZERO);
302
// Still has 1.0 in it.
303
FMV(32, regs_.F(inst.dest), tempReg);
304
305
SetJumpTarget(finishLower);
306
SetJumpTarget(skipHigher);
307
break;
308
309
case IROp::FSatMinus1_1:
310
tempReg = regs_.MapWithFPRTemp(inst);
311
if (inst.dest != inst.src1)
312
FMV(32, regs_.F(inst.dest), regs_.F(inst.src1));
313
314
// First, set SCRATCH1 = clamp to negative, SCRATCH2 = clamp to positive.
315
QuickFLI(32, tempReg, -1.0f, SCRATCH2);
316
FLT(32, SCRATCH1, regs_.F(inst.src1), tempReg);
317
FNEG(32, tempReg, tempReg);
318
FLT(32, SCRATCH2, tempReg, regs_.F(inst.src1));
319
320
// But we can actually do one branch, using sign-injection to keep the original sign.
321
OR(SCRATCH1, SCRATCH1, SCRATCH2);
322
323
skipLower = BEQ(SCRATCH1, R_ZERO);
324
FSGNJ(32, regs_.F(inst.dest), tempReg, regs_.F(inst.dest));
325
SetJumpTarget(skipLower);
326
break;
327
328
default:
329
INVALIDOP;
330
break;
331
}
332
}
333
334
void RiscVJitBackend::CompIR_FCompare(IRInst inst) {
335
CONDITIONAL_DISABLE;
336
337
constexpr IRReg IRREG_VFPU_CC = IRREG_VFPU_CTRL_BASE + VFPU_CTRL_CC;
338
339
switch (inst.op) {
340
case IROp::FCmp:
341
switch (inst.dest) {
342
case IRFpCompareMode::False:
343
regs_.SetGPRImm(IRREG_FPCOND, 0);
344
break;
345
346
case IRFpCompareMode::EitherUnordered:
347
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
348
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
349
FCLASS(32, SCRATCH2, regs_.F(inst.src2));
350
OR(SCRATCH1, SCRATCH1, SCRATCH2);
351
// NAN is 0x100 or 0x200.
352
ANDI(SCRATCH1, SCRATCH1, 0x300);
353
SNEZ(regs_.R(IRREG_FPCOND), SCRATCH1);
354
regs_.MarkGPRDirty(IRREG_FPCOND, true);
355
break;
356
357
case IRFpCompareMode::EqualOrdered:
358
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
359
FEQ(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src1), regs_.F(inst.src2));
360
regs_.MarkGPRDirty(IRREG_FPCOND, true);
361
break;
362
363
case IRFpCompareMode::EqualUnordered:
364
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
365
FEQ(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src1), regs_.F(inst.src2));
366
367
// Now let's just OR in the unordered check.
368
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
369
FCLASS(32, SCRATCH2, regs_.F(inst.src2));
370
OR(SCRATCH1, SCRATCH1, SCRATCH2);
371
// NAN is 0x100 or 0x200.
372
ANDI(SCRATCH1, SCRATCH1, 0x300);
373
SNEZ(SCRATCH1, SCRATCH1);
374
OR(regs_.R(IRREG_FPCOND), regs_.R(IRREG_FPCOND), SCRATCH1);
375
regs_.MarkGPRDirty(IRREG_FPCOND, true);
376
break;
377
378
case IRFpCompareMode::LessEqualOrdered:
379
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
380
FLE(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src1), regs_.F(inst.src2));
381
regs_.MarkGPRDirty(IRREG_FPCOND, true);
382
break;
383
384
case IRFpCompareMode::LessEqualUnordered:
385
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
386
FLT(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src2), regs_.F(inst.src1));
387
SEQZ(regs_.R(IRREG_FPCOND), regs_.R(IRREG_FPCOND));
388
regs_.MarkGPRDirty(IRREG_FPCOND, true);
389
break;
390
391
case IRFpCompareMode::LessOrdered:
392
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
393
FLT(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src1), regs_.F(inst.src2));
394
regs_.MarkGPRDirty(IRREG_FPCOND, true);
395
break;
396
397
case IRFpCompareMode::LessUnordered:
398
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
399
FLE(32, regs_.R(IRREG_FPCOND), regs_.F(inst.src2), regs_.F(inst.src1));
400
SEQZ(regs_.R(IRREG_FPCOND), regs_.R(IRREG_FPCOND));
401
regs_.MarkGPRDirty(IRREG_FPCOND, true);
402
break;
403
404
default:
405
_assert_msg_(false, "Unexpected IRFpCompareMode %d", inst.dest);
406
}
407
break;
408
409
case IROp::FCmovVfpuCC:
410
regs_.MapWithExtra(inst, { { 'G', IRREG_VFPU_CC, 1, MIPSMap::INIT } });
411
if ((inst.src2 & 0xF) == 0) {
412
ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), 1);
413
} else if (cpu_info.RiscV_Zbs) {
414
BEXTI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.src2 & 0xF);
415
} else {
416
SRLI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.src2 & 0xF);
417
ANDI(SCRATCH1, SCRATCH1, 1);
418
}
419
if ((inst.src2 >> 7) & 1) {
420
FixupBranch skip = BEQ(SCRATCH1, R_ZERO);
421
FMV(32, regs_.F(inst.dest), regs_.F(inst.src1));
422
SetJumpTarget(skip);
423
} else {
424
FixupBranch skip = BNE(SCRATCH1, R_ZERO);
425
FMV(32, regs_.F(inst.dest), regs_.F(inst.src1));
426
SetJumpTarget(skip);
427
}
428
break;
429
430
case IROp::FCmpVfpuBit:
431
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
432
433
switch (VCondition(inst.dest & 0xF)) {
434
case VC_EQ:
435
regs_.Map(inst);
436
FEQ(32, SCRATCH1, regs_.F(inst.src1), regs_.F(inst.src2));
437
break;
438
case VC_NE:
439
regs_.Map(inst);
440
FEQ(32, SCRATCH1, regs_.F(inst.src1), regs_.F(inst.src2));
441
SEQZ(SCRATCH1, SCRATCH1);
442
break;
443
case VC_LT:
444
regs_.Map(inst);
445
FLT(32, SCRATCH1, regs_.F(inst.src1), regs_.F(inst.src2));
446
break;
447
case VC_LE:
448
regs_.Map(inst);
449
FLE(32, SCRATCH1, regs_.F(inst.src1), regs_.F(inst.src2));
450
break;
451
case VC_GT:
452
regs_.Map(inst);
453
FLT(32, SCRATCH1, regs_.F(inst.src2), regs_.F(inst.src1));
454
break;
455
case VC_GE:
456
regs_.Map(inst);
457
FLE(32, SCRATCH1, regs_.F(inst.src2), regs_.F(inst.src1));
458
break;
459
case VC_EZ:
460
case VC_NZ:
461
regs_.MapFPR(inst.src1);
462
// Zero is either 0x10 or 0x08.
463
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
464
ANDI(SCRATCH1, SCRATCH1, 0x18);
465
if ((inst.dest & 4) == 0)
466
SNEZ(SCRATCH1, SCRATCH1);
467
else
468
SEQZ(SCRATCH1, SCRATCH1);
469
break;
470
case VC_EN:
471
case VC_NN:
472
regs_.MapFPR(inst.src1);
473
// NAN is either 0x100 or 0x200.
474
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
475
ANDI(SCRATCH1, SCRATCH1, 0x300);
476
if ((inst.dest & 4) == 0)
477
SNEZ(SCRATCH1, SCRATCH1);
478
else
479
SEQZ(SCRATCH1, SCRATCH1);
480
break;
481
case VC_EI:
482
case VC_NI:
483
regs_.MapFPR(inst.src1);
484
// Infinity is either 0x80 or 0x01.
485
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
486
ANDI(SCRATCH1, SCRATCH1, 0x81);
487
if ((inst.dest & 4) == 0)
488
SNEZ(SCRATCH1, SCRATCH1);
489
else
490
SEQZ(SCRATCH1, SCRATCH1);
491
break;
492
case VC_ES:
493
case VC_NS:
494
regs_.MapFPR(inst.src1);
495
// Infinity is either 0x80 or 0x01, NAN is either 0x100 or 0x200.
496
FCLASS(32, SCRATCH1, regs_.F(inst.src1));
497
ANDI(SCRATCH1, SCRATCH1, 0x381);
498
if ((inst.dest & 4) == 0)
499
SNEZ(SCRATCH1, SCRATCH1);
500
else
501
SEQZ(SCRATCH1, SCRATCH1);
502
break;
503
case VC_TR:
504
LI(SCRATCH1, 1);
505
break;
506
case VC_FL:
507
LI(SCRATCH1, 0);
508
break;
509
}
510
511
ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~(1 << (inst.dest >> 4)));
512
if ((inst.dest >> 4) != 0)
513
SLLI(SCRATCH1, SCRATCH1, inst.dest >> 4);
514
OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
515
break;
516
517
case IROp::FCmpVfpuAggregate:
518
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
519
if (inst.dest == 1) {
520
ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest);
521
// Negate so 1 becomes all bits set and zero stays zero, then mask to 0x30.
522
NEG(SCRATCH1, SCRATCH1);
523
ANDI(SCRATCH1, SCRATCH1, 0x30);
524
525
// Reject the old any/all bits and replace them with our own.
526
ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30);
527
OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
528
} else {
529
ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest);
530
FixupBranch skipZero = BEQ(SCRATCH1, R_ZERO);
531
532
// To compare to inst.dest for "all", let's simply subtract it and compare to zero.
533
ADDI(SCRATCH1, SCRATCH1, -inst.dest);
534
SEQZ(SCRATCH1, SCRATCH1);
535
// Now we combine with the "any" bit.
536
SLLI(SCRATCH1, SCRATCH1, 5);
537
ORI(SCRATCH1, SCRATCH1, 0x10);
538
539
SetJumpTarget(skipZero);
540
541
// Reject the old any/all bits and replace them with our own.
542
ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30);
543
OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
544
}
545
break;
546
547
default:
548
INVALIDOP;
549
break;
550
}
551
}
552
553
void RiscVJitBackend::CompIR_RoundingMode(IRInst inst) {
554
CONDITIONAL_DISABLE;
555
556
switch (inst.op) {
557
case IROp::RestoreRoundingMode:
558
RestoreRoundingMode();
559
break;
560
561
case IROp::ApplyRoundingMode:
562
ApplyRoundingMode();
563
break;
564
565
case IROp::UpdateRoundingMode:
566
// We don't need to do anything, instructions allow a "dynamic" rounding mode.
567
break;
568
569
default:
570
INVALIDOP;
571
break;
572
}
573
}
574
575
void RiscVJitBackend::CompIR_FSpecial(IRInst inst) {
576
CONDITIONAL_DISABLE;
577
578
#ifdef __riscv_float_abi_soft
579
#error Currently hard float is required.
580
#endif
581
582
auto callFuncF_F = [&](float (*func)(float)) {
583
regs_.FlushBeforeCall();
584
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
585
586
// It might be in a non-volatile register.
587
// TODO: May have to handle a transfer if SIMD here.
588
if (regs_.IsFPRMapped(inst.src1)) {
589
FMV(32, F10, regs_.F(inst.src1));
590
} else {
591
int offset = offsetof(MIPSState, f) + inst.src1 * 4;
592
FL(32, F10, CTXREG, offset);
593
}
594
QuickCallFunction(func, SCRATCH1);
595
596
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);
597
// If it's already F10, we're done - MapReg doesn't actually overwrite the reg in that case.
598
if (regs_.F(inst.dest) != F10) {
599
FMV(32, regs_.F(inst.dest), F10);
600
}
601
602
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
603
};
604
605
RiscVReg tempReg = INVALID_REG;
606
switch (inst.op) {
607
case IROp::FSin:
608
callFuncF_F(&vfpu_sin);
609
break;
610
611
case IROp::FCos:
612
callFuncF_F(&vfpu_cos);
613
break;
614
615
case IROp::FRSqrt:
616
tempReg = regs_.MapWithFPRTemp(inst);
617
FSQRT(32, regs_.F(inst.dest), regs_.F(inst.src1));
618
619
// Ugh, we can't really avoid a temp here. Probably not worth a permanent one.
620
QuickFLI(32, tempReg, 1.0f, SCRATCH1);
621
FDIV(32, regs_.F(inst.dest), tempReg, regs_.F(inst.dest));
622
break;
623
624
case IROp::FRecip:
625
if (inst.dest != inst.src1) {
626
// This is the easy case.
627
regs_.Map(inst);
628
LI(SCRATCH1, 1.0f);
629
FMV(FMv::W, FMv::X, regs_.F(inst.dest), SCRATCH1);
630
FDIV(32, regs_.F(inst.dest), regs_.F(inst.dest), regs_.F(inst.src1));
631
} else {
632
tempReg = regs_.MapWithFPRTemp(inst);
633
QuickFLI(32, tempReg, 1.0f, SCRATCH1);
634
FDIV(32, regs_.F(inst.dest), tempReg, regs_.F(inst.src1));
635
}
636
break;
637
638
case IROp::FAsin:
639
callFuncF_F(&vfpu_asin);
640
break;
641
642
default:
643
INVALIDOP;
644
break;
645
}
646
}
647
648
} // namespace MIPSComp
649
650