CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/RiscV/RiscVCompALU.cpp
Views: 1401
// Copyright (c) 2023- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "Common/CPUDetect.h"18#include "Core/MemMap.h"19#include "Core/MIPS/RiscV/RiscVJit.h"20#include "Core/MIPS/RiscV/RiscVRegCache.h"2122// This file contains compilation for integer / arithmetic / logic related instructions.23//24// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.25// Currently known non working ones should have DISABLE. No flags because that's in IR already.2627// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; }28#define CONDITIONAL_DISABLE {}29#define DISABLE { CompIR_Generic(inst); return; }30#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; }3132namespace MIPSComp {3334using namespace RiscVGen;35using namespace RiscVJitConstants;3637void RiscVJitBackend::CompIR_Arith(IRInst inst) {38CONDITIONAL_DISABLE;3940bool allowPtrMath = true;41#ifdef MASKED_PSP_MEMORY42// Since we modify it, we can't safely.43allowPtrMath = false;44#endif4546// RISC-V only adds signed immediates, so rewrite a small enough subtract to an add.47// We use -2047 and 2048 here because the range swaps.48if (inst.op == IROp::SubConst && (int32_t)inst.constant >= -2047 && (int32_t)inst.constant <= 2048) {49inst.op = IROp::AddConst;50inst.constant = (uint32_t)-(int32_t)inst.constant;51}5253switch (inst.op) {54case IROp::Add:55regs_.Map(inst);56ADDW(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));57regs_.MarkGPRDirty(inst.dest, true);58break;5960case IROp::Sub:61regs_.Map(inst);62SUBW(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));63regs_.MarkGPRDirty(inst.dest, true);64break;6566case IROp::AddConst:67if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) {68// Typical of stack pointer updates.69if (regs_.IsGPRMappedAsPointer(inst.dest) && inst.dest == inst.src1 && allowPtrMath) {70regs_.MarkGPRAsPointerDirty(inst.dest);71ADDI(regs_.RPtr(inst.dest), regs_.RPtr(inst.dest), inst.constant);72} else {73regs_.Map(inst);74ADDIW(regs_.R(inst.dest), regs_.R(inst.src1), inst.constant);75regs_.MarkGPRDirty(inst.dest, true);76}77} else {78regs_.Map(inst);79LI(SCRATCH1, (int32_t)inst.constant);80ADDW(regs_.R(inst.dest), regs_.R(inst.src1), SCRATCH1);81regs_.MarkGPRDirty(inst.dest, true);82}83break;8485case IROp::SubConst:86regs_.Map(inst);87LI(SCRATCH1, (int32_t)inst.constant);88SUBW(regs_.R(inst.dest), regs_.R(inst.src1), SCRATCH1);89regs_.MarkGPRDirty(inst.dest, true);90break;9192case IROp::Neg:93regs_.Map(inst);94SUBW(regs_.R(inst.dest), R_ZERO, regs_.R(inst.src1));95regs_.MarkGPRDirty(inst.dest, true);96break;9798default:99INVALIDOP;100break;101}102}103104void RiscVJitBackend::CompIR_Logic(IRInst inst) {105CONDITIONAL_DISABLE;106107bool resultNormalized = false;108switch (inst.op) {109case IROp::And:110if (inst.src1 != inst.src2) {111regs_.Map(inst);112AND(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));113} else if (inst.src1 != inst.dest) {114regs_.Map(inst);115MV(regs_.R(inst.dest), regs_.R(inst.src1));116regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));117}118break;119120case IROp::Or:121if (inst.src1 != inst.src2) {122// If both were normalized before, the result is normalized.123resultNormalized = regs_.IsNormalized32(inst.src1) && regs_.IsNormalized32(inst.src2);124regs_.Map(inst);125OR(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));126regs_.MarkGPRDirty(inst.dest, resultNormalized);127} else if (inst.src1 != inst.dest) {128regs_.Map(inst);129MV(regs_.R(inst.dest), regs_.R(inst.src1));130regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));131}132break;133134case IROp::Xor:135if (inst.src1 == inst.src2) {136regs_.SetGPRImm(inst.dest, 0);137} else {138regs_.Map(inst);139XOR(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));140}141break;142143case IROp::AndConst:144resultNormalized = regs_.IsNormalized32(inst.src1);145regs_.Map(inst);146if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) {147ANDI(regs_.R(inst.dest), regs_.R(inst.src1), inst.constant);148} else {149LI(SCRATCH1, (int32_t)inst.constant);150AND(regs_.R(inst.dest), regs_.R(inst.src1), SCRATCH1);151}152// If the sign bits aren't cleared, and it was normalized before - it still is.153if ((inst.constant & 0x80000000) != 0 && resultNormalized)154regs_.MarkGPRDirty(inst.dest, true);155// Otherwise, if we cleared the sign bits, it's naturally normalized.156else if ((inst.constant & 0x80000000) == 0)157regs_.MarkGPRDirty(inst.dest, true);158break;159160case IROp::OrConst:161resultNormalized = regs_.IsNormalized32(inst.src1);162regs_.Map(inst);163if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) {164ORI(regs_.R(inst.dest), regs_.R(inst.src1), inst.constant);165} else {166LI(SCRATCH1, (int32_t)inst.constant);167OR(regs_.R(inst.dest), regs_.R(inst.src1), SCRATCH1);168}169// Since our constant is normalized, oring its bits in won't hurt normalization.170regs_.MarkGPRDirty(inst.dest, resultNormalized);171break;172173case IROp::XorConst:174regs_.Map(inst);175if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) {176XORI(regs_.R(inst.dest), regs_.R(inst.src1), inst.constant);177} else {178LI(SCRATCH1, (int32_t)inst.constant);179XOR(regs_.R(inst.dest), regs_.R(inst.src1), SCRATCH1);180}181break;182183case IROp::Not:184regs_.Map(inst);185NOT(regs_.R(inst.dest), regs_.R(inst.src1));186break;187188default:189INVALIDOP;190break;191}192}193194void RiscVJitBackend::CompIR_Assign(IRInst inst) {195CONDITIONAL_DISABLE;196197switch (inst.op) {198case IROp::Mov:199if (inst.dest != inst.src1) {200regs_.Map(inst);201MV(regs_.R(inst.dest), regs_.R(inst.src1));202regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));203}204break;205206case IROp::Ext8to32:207regs_.Map(inst);208if (cpu_info.RiscV_Zbb) {209SEXT_B(regs_.R(inst.dest), regs_.R(inst.src1));210} else {211SLLI(regs_.R(inst.dest), regs_.R(inst.src1), 24);212SRAIW(regs_.R(inst.dest), regs_.R(inst.dest), 24);213}214regs_.MarkGPRDirty(inst.dest, true);215break;216217case IROp::Ext16to32:218regs_.Map(inst);219if (cpu_info.RiscV_Zbb) {220SEXT_H(regs_.R(inst.dest), regs_.R(inst.src1));221} else {222SLLI(regs_.R(inst.dest), regs_.R(inst.src1), 16);223SRAIW(regs_.R(inst.dest), regs_.R(inst.dest), 16);224}225regs_.MarkGPRDirty(inst.dest, true);226break;227228default:229INVALIDOP;230break;231}232}233234void RiscVJitBackend::CompIR_Bits(IRInst inst) {235CONDITIONAL_DISABLE;236237switch (inst.op) {238case IROp::ReverseBits:239if (cpu_info.RiscV_Zbb) {240regs_.Map(inst);241// Start by reversing bytes (note: this puts in upper 32 of XLEN.)242REV8(regs_.R(inst.dest), regs_.R(inst.src1));243244// Swap nibbles.245LI(SCRATCH1, (s32)0xF0F0F0F0);246SRLI(SCRATCH2, regs_.R(inst.dest), XLEN - 32 - 4);247AND(SCRATCH2, SCRATCH2, SCRATCH1);248if (XLEN >= 64)249SRLI(regs_.R(inst.dest), regs_.R(inst.dest), XLEN - 28);250else251SLLI(regs_.R(inst.dest), regs_.R(inst.dest), 4);252SRLIW(SCRATCH1, SCRATCH1, 4);253AND(regs_.R(inst.dest), regs_.R(inst.dest), SCRATCH1);254OR(regs_.R(inst.dest), regs_.R(inst.dest), SCRATCH2);255256// Now the consecutive pairs.257LI(SCRATCH1, (s32)0x33333333);258SRLI(SCRATCH2, regs_.R(inst.dest), 2);259AND(SCRATCH2, SCRATCH2, SCRATCH1);260AND(regs_.R(inst.dest), regs_.R(inst.dest), SCRATCH1);261SLLIW(regs_.R(inst.dest), regs_.R(inst.dest), 2);262OR(regs_.R(inst.dest), regs_.R(inst.dest), SCRATCH2);263264// And finally the even and odd bits.265LI(SCRATCH1, (s32)0x55555555);266SRLI(SCRATCH2, regs_.R(inst.dest), 1);267AND(SCRATCH2, SCRATCH2, SCRATCH1);268AND(regs_.R(inst.dest), regs_.R(inst.dest), SCRATCH1);269SLLIW(regs_.R(inst.dest), regs_.R(inst.dest), 1);270OR(regs_.R(inst.dest), regs_.R(inst.dest), SCRATCH2);271} else {272CompIR_Generic(inst);273}274break;275276case IROp::BSwap16:277CompIR_Generic(inst);278break;279280case IROp::BSwap32:281if (cpu_info.RiscV_Zbb) {282regs_.Map(inst);283REV8(regs_.R(inst.dest), regs_.R(inst.src1));284if (XLEN >= 64) {285// REV8 swaps the entire register, so get the 32 highest bits.286SRAI(regs_.R(inst.dest), regs_.R(inst.dest), XLEN - 32);287regs_.MarkGPRDirty(inst.dest, true);288}289} else {290CompIR_Generic(inst);291}292break;293294case IROp::Clz:295if (cpu_info.RiscV_Zbb) {296regs_.Map(inst);297// This even sets to 32 when zero, perfect.298CLZW(regs_.R(inst.dest), regs_.R(inst.src1));299regs_.MarkGPRDirty(inst.dest, true);300} else {301CompIR_Generic(inst);302}303break;304305default:306INVALIDOP;307break;308}309}310311void RiscVJitBackend::CompIR_Shift(IRInst inst) {312CONDITIONAL_DISABLE;313314switch (inst.op) {315case IROp::Shl:316regs_.Map(inst);317SLLW(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));318regs_.MarkGPRDirty(inst.dest, true);319break;320321case IROp::Shr:322regs_.Map(inst);323SRLW(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));324regs_.MarkGPRDirty(inst.dest, true);325break;326327case IROp::Sar:328regs_.Map(inst);329SRAW(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));330regs_.MarkGPRDirty(inst.dest, true);331break;332333case IROp::Ror:334if (cpu_info.RiscV_Zbb) {335regs_.Map(inst);336RORW(regs_.R(inst.dest), regs_.R(inst.src1), regs_.R(inst.src2));337regs_.MarkGPRDirty(inst.dest, true);338} else {339CompIR_Generic(inst);340}341break;342343case IROp::ShlImm:344// Shouldn't happen, but let's be safe of any passes that modify the ops.345if (inst.src2 >= 32) {346regs_.SetGPRImm(inst.dest, 0);347} else if (inst.src2 == 0) {348if (inst.dest != inst.src1) {349regs_.Map(inst);350MV(regs_.R(inst.dest), regs_.R(inst.src1));351regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));352}353} else {354regs_.Map(inst);355SLLIW(regs_.R(inst.dest), regs_.R(inst.src1), inst.src2);356regs_.MarkGPRDirty(inst.dest, true);357}358break;359360case IROp::ShrImm:361// Shouldn't happen, but let's be safe of any passes that modify the ops.362if (inst.src2 >= 32) {363regs_.SetGPRImm(inst.dest, 0);364} else if (inst.src2 == 0) {365if (inst.dest != inst.src1) {366regs_.Map(inst);367MV(regs_.R(inst.dest), regs_.R(inst.src1));368regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));369}370} else {371regs_.Map(inst);372SRLIW(regs_.R(inst.dest), regs_.R(inst.src1), inst.src2);373regs_.MarkGPRDirty(inst.dest, true);374}375break;376377case IROp::SarImm:378// Shouldn't happen, but let's be safe of any passes that modify the ops.379if (inst.src2 >= 32) {380regs_.Map(inst);381SRAIW(regs_.R(inst.dest), regs_.R(inst.src1), 31);382regs_.MarkGPRDirty(inst.dest, true);383} else if (inst.src2 == 0) {384if (inst.dest != inst.src1) {385regs_.Map(inst);386MV(regs_.R(inst.dest), regs_.R(inst.src1));387regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));388}389} else {390regs_.Map(inst);391SRAIW(regs_.R(inst.dest), regs_.R(inst.src1), inst.src2);392regs_.MarkGPRDirty(inst.dest, true);393}394break;395396case IROp::RorImm:397if (inst.src2 == 0) {398if (inst.dest != inst.src1) {399regs_.Map(inst);400MV(regs_.R(inst.dest), regs_.R(inst.src1));401regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));402}403} else if (cpu_info.RiscV_Zbb) {404regs_.Map(inst);405RORIW(regs_.R(inst.dest), regs_.R(inst.src1), inst.src2 & 31);406regs_.MarkGPRDirty(inst.dest, true);407} else {408CompIR_Generic(inst);409}410break;411412default:413INVALIDOP;414break;415}416}417418void RiscVJitBackend::CompIR_Compare(IRInst inst) {419CONDITIONAL_DISABLE;420421RiscVReg lhs = INVALID_REG;422RiscVReg rhs = INVALID_REG;423switch (inst.op) {424case IROp::Slt:425regs_.Map(inst);426NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);427428SLT(regs_.R(inst.dest), lhs, rhs);429regs_.MarkGPRDirty(inst.dest, true);430break;431432case IROp::SltConst:433if (inst.constant == 0) {434// Basically, getting the sign bit. Let's shift instead.435regs_.Map(inst);436SRLIW(regs_.R(inst.dest), regs_.R(inst.src1), 31);437regs_.MarkGPRDirty(inst.dest, true);438} else {439regs_.Map(inst);440NormalizeSrc1(inst, &lhs, SCRATCH1, false);441442if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) {443SLTI(regs_.R(inst.dest), lhs, (int32_t)inst.constant);444} else {445LI(SCRATCH2, (int32_t)inst.constant);446SLT(regs_.R(inst.dest), lhs, SCRATCH2);447}448regs_.MarkGPRDirty(inst.dest, true);449}450break;451452case IROp::SltU:453regs_.Map(inst);454// It's still fine to sign extend, the biggest just get even bigger.455NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);456457SLTU(regs_.R(inst.dest), lhs, rhs);458regs_.MarkGPRDirty(inst.dest, true);459break;460461case IROp::SltUConst:462if (inst.constant == 0) {463regs_.SetGPRImm(inst.dest, 0);464} else {465regs_.Map(inst);466NormalizeSrc1(inst, &lhs, SCRATCH1, false);467468// We sign extend because we're comparing against something normalized.469// It's also the most efficient to set.470if ((int32_t)inst.constant >= -2048 && (int32_t)inst.constant <= 2047) {471SLTIU(regs_.R(inst.dest), lhs, (int32_t)inst.constant);472} else {473LI(SCRATCH2, (int32_t)inst.constant);474SLTU(regs_.R(inst.dest), lhs, SCRATCH2);475}476regs_.MarkGPRDirty(inst.dest, true);477}478break;479480default:481INVALIDOP;482break;483}484}485486void RiscVJitBackend::CompIR_CondAssign(IRInst inst) {487CONDITIONAL_DISABLE;488489RiscVReg lhs = INVALID_REG;490RiscVReg rhs = INVALID_REG;491FixupBranch fixup;492switch (inst.op) {493case IROp::MovZ:494case IROp::MovNZ:495if (inst.dest == inst.src2)496return;497498// We could have a "zero" with wrong upper due to XOR, so we have to normalize.499regs_.Map(inst);500NormalizeSrc1(inst, &lhs, SCRATCH1, true);501502switch (inst.op) {503case IROp::MovZ:504fixup = BNE(lhs, R_ZERO);505break;506case IROp::MovNZ:507fixup = BEQ(lhs, R_ZERO);508break;509default:510INVALIDOP;511break;512}513514MV(regs_.R(inst.dest), regs_.R(inst.src2));515SetJumpTarget(fixup);516break;517518case IROp::Max:519if (inst.src1 != inst.src2) {520if (cpu_info.RiscV_Zbb) {521regs_.Map(inst);522NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);523MAX(regs_.R(inst.dest), lhs, rhs);524// Because we had to normalize the inputs, the output is normalized.525regs_.MarkGPRDirty(inst.dest, true);526} else {527CompIR_Generic(inst);528}529} else if (inst.dest != inst.src1) {530regs_.Map(inst);531MV(regs_.R(inst.dest), regs_.R(inst.src1));532regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));533}534break;535536case IROp::Min:537if (inst.src1 != inst.src2) {538if (cpu_info.RiscV_Zbb) {539regs_.Map(inst);540NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);541MIN(regs_.R(inst.dest), lhs, rhs);542// Because we had to normalize the inputs, the output is normalized.543regs_.MarkGPRDirty(inst.dest, true);544} else {545CompIR_Generic(inst);546}547} else if (inst.dest != inst.src1) {548regs_.Map(inst);549MV(regs_.R(inst.dest), regs_.R(inst.src1));550regs_.MarkGPRDirty(inst.dest, regs_.IsNormalized32(inst.src1));551}552break;553554default:555INVALIDOP;556break;557}558}559560void RiscVJitBackend::CompIR_HiLo(IRInst inst) {561CONDITIONAL_DISABLE;562563switch (inst.op) {564case IROp::MtLo:565regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });566// First, clear the bits we're replacing.567SRLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);568SLLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);569// And now, insert the low 32 bits of src1.570if (cpu_info.RiscV_Zba) {571ADD_UW(regs_.R(IRREG_LO), regs_.R(inst.src1), regs_.R(IRREG_LO));572} else {573SLLI(SCRATCH1, regs_.R(inst.src1), XLEN - 32);574SRLI(SCRATCH1, SCRATCH1, XLEN - 32);575ADD(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);576}577break;578579case IROp::MtHi:580regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });581SLLI(SCRATCH1, regs_.R(inst.src1), XLEN - 32);582if (cpu_info.RiscV_Zba) {583ADD_UW(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);584} else {585SLLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);586SRLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);587ADD(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);588}589break;590591case IROp::MfLo:592regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::INIT } });593// It won't be normalized, but that's fine...594MV(regs_.R(inst.dest), regs_.R(IRREG_LO));595break;596597case IROp::MfHi:598regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::INIT } });599SRAI(regs_.R(inst.dest), regs_.R(IRREG_LO), 32);600if (XLEN == 64)601regs_.MarkGPRDirty(inst.dest, true);602break;603604default:605INVALIDOP;606break;607}608}609610void RiscVJitBackend::CompIR_Mult(IRInst inst) {611CONDITIONAL_DISABLE;612613auto makeArgsUnsigned = [&](RiscVReg *lhs, RiscVReg *rhs) {614if (cpu_info.RiscV_Zba) {615ZEXT_W(SCRATCH1, regs_.R(inst.src1));616ZEXT_W(SCRATCH2, regs_.R(inst.src2));617} else {618SLLI(SCRATCH1, regs_.R(inst.src1), XLEN - 32);619SRLI(SCRATCH1, SCRATCH1, XLEN - 32);620SLLI(SCRATCH2, regs_.R(inst.src2), XLEN - 32);621SRLI(SCRATCH2, SCRATCH2, XLEN - 32);622}623*lhs = SCRATCH1;624*rhs = SCRATCH2;625};626627RiscVReg lhs = INVALID_REG;628RiscVReg rhs = INVALID_REG;629switch (inst.op) {630case IROp::Mult:631// TODO: Maybe IR could simplify when HI is not needed or clobbered?632regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });633NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);634MUL(regs_.R(IRREG_LO), lhs, rhs);635break;636637case IROp::MultU:638// This is an "anti-norm32" case. Let's just zero always.639// TODO: If we could know that LO was only needed, we could use MULW.640regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });641makeArgsUnsigned(&lhs, &rhs);642MUL(regs_.R(IRREG_LO), lhs, rhs);643break;644645case IROp::Madd:646regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });647NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);648MUL(SCRATCH1, lhs, rhs);649ADD(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);650break;651652case IROp::MaddU:653regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });654makeArgsUnsigned(&lhs, &rhs);655MUL(SCRATCH1, lhs, rhs);656ADD(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);657break;658659case IROp::Msub:660regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });661NormalizeSrc12(inst, &lhs, &rhs, SCRATCH1, SCRATCH2, true);662MUL(SCRATCH1, lhs, rhs);663SUB(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);664break;665666case IROp::MsubU:667regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });668makeArgsUnsigned(&lhs, &rhs);669MUL(SCRATCH1, lhs, rhs);670SUB(regs_.R(IRREG_LO), regs_.R(IRREG_LO), SCRATCH1);671break;672673default:674INVALIDOP;675break;676}677}678679void RiscVJitBackend::CompIR_Div(IRInst inst) {680CONDITIONAL_DISABLE;681682RiscVReg numReg, denomReg;683switch (inst.op) {684case IROp::Div:685regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });686// We have to do this because of the divide by zero and overflow checks below.687NormalizeSrc12(inst, &numReg, &denomReg, SCRATCH1, SCRATCH2, true);688DIVW(regs_.R(IRREG_LO), numReg, denomReg);689REMW(R_RA, numReg, denomReg);690// Now to combine them. We'll do more with them below...691SLLI(R_RA, R_RA, 32);692if (cpu_info.RiscV_Zba) {693ADD_UW(regs_.R(IRREG_LO), regs_.R(IRREG_LO), R_RA);694} else {695SLLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);696SRLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);697ADD(regs_.R(IRREG_LO), regs_.R(IRREG_LO), R_RA);698}699700// Now some tweaks for divide by zero and overflow.701{702// Start with divide by zero, remainder is fine.703FixupBranch skipNonZero = BNE(denomReg, R_ZERO);704FixupBranch keepNegOne = BGE(numReg, R_ZERO);705// Clear the -1 and replace it with 1.706SRLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), 32);707SLLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), 32);708ADDI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), 1);709SetJumpTarget(keepNegOne);710SetJumpTarget(skipNonZero);711712// For overflow, RISC-V sets LO right, but remainder to zero.713// Cheating a bit by using R_RA as a temp...714LI(R_RA, (int32_t)0x80000000);715FixupBranch notMostNegative = BNE(numReg, R_RA);716LI(R_RA, -1);717FixupBranch notNegativeOne = BNE(denomReg, R_RA);718// Take our R_RA and put it in the high bits.719SLLI(R_RA, R_RA, 32);720OR(regs_.R(IRREG_LO), regs_.R(IRREG_LO), R_RA);721SetJumpTarget(notNegativeOne);722SetJumpTarget(notMostNegative);723}724break;725726case IROp::DivU:727regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });728// We have to do this because of the divide by zero check below.729NormalizeSrc12(inst, &numReg, &denomReg, SCRATCH1, SCRATCH2, true);730DIVUW(regs_.R(IRREG_LO), numReg, denomReg);731REMUW(R_RA, numReg, denomReg);732733// On divide by zero, everything is correct already except the 0xFFFF case.734{735FixupBranch skipNonZero = BNE(denomReg, R_ZERO);736// Luckily, we don't need SCRATCH2/denomReg anymore.737LI(SCRATCH2, 0xFFFF);738FixupBranch keepNegOne = BLTU(SCRATCH2, numReg);739MV(regs_.R(IRREG_LO), SCRATCH2);740SetJumpTarget(keepNegOne);741SetJumpTarget(skipNonZero);742}743744// Now combine the remainder in.745SLLI(R_RA, R_RA, 32);746if (cpu_info.RiscV_Zba) {747ADD_UW(regs_.R(IRREG_LO), regs_.R(IRREG_LO), R_RA);748} else {749SLLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);750SRLI(regs_.R(IRREG_LO), regs_.R(IRREG_LO), XLEN - 32);751ADD(regs_.R(IRREG_LO), regs_.R(IRREG_LO), R_RA);752}753break;754755default:756INVALIDOP;757break;758}759}760761} // namespace MIPSComp762763764