Path: blob/main/contrib/llvm-project/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
35294 views
//===-- RISCVLegalizerInfo.cpp ----------------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7/// \file8/// This file implements the targeting of the Machinelegalizer class for RISC-V.9/// \todo This should be generated by TableGen.10//===----------------------------------------------------------------------===//1112#include "RISCVLegalizerInfo.h"13#include "MCTargetDesc/RISCVMatInt.h"14#include "RISCVMachineFunctionInfo.h"15#include "RISCVSubtarget.h"16#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutor.h"17#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"18#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"19#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"20#include "llvm/CodeGen/MachineConstantPool.h"21#include "llvm/CodeGen/MachineRegisterInfo.h"22#include "llvm/CodeGen/TargetOpcodes.h"23#include "llvm/CodeGen/ValueTypes.h"24#include "llvm/IR/DerivedTypes.h"25#include "llvm/IR/Type.h"2627using namespace llvm;28using namespace LegalityPredicates;29using namespace LegalizeMutations;3031// Is this type supported by scalar FP arithmetic operations given the current32// subtarget.33static LegalityPredicate typeIsScalarFPArith(unsigned TypeIdx,34const RISCVSubtarget &ST) {35return [=, &ST](const LegalityQuery &Query) {36return Query.Types[TypeIdx].isScalar() &&37((ST.hasStdExtZfh() && Query.Types[TypeIdx].getSizeInBits() == 16) ||38(ST.hasStdExtF() && Query.Types[TypeIdx].getSizeInBits() == 32) ||39(ST.hasStdExtD() && Query.Types[TypeIdx].getSizeInBits() == 64));40};41}4243static LegalityPredicate44typeIsLegalIntOrFPVec(unsigned TypeIdx,45std::initializer_list<LLT> IntOrFPVecTys,46const RISCVSubtarget &ST) {47LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {48return ST.hasVInstructions() &&49(Query.Types[TypeIdx].getScalarSizeInBits() != 64 ||50ST.hasVInstructionsI64()) &&51(Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||52ST.getELen() == 64);53};5455return all(typeInSet(TypeIdx, IntOrFPVecTys), P);56}5758static LegalityPredicate59typeIsLegalBoolVec(unsigned TypeIdx, std::initializer_list<LLT> BoolVecTys,60const RISCVSubtarget &ST) {61LegalityPredicate P = [=, &ST](const LegalityQuery &Query) {62return ST.hasVInstructions() &&63(Query.Types[TypeIdx].getElementCount().getKnownMinValue() != 1 ||64ST.getELen() == 64);65};66return all(typeInSet(TypeIdx, BoolVecTys), P);67}6869RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)70: STI(ST), XLen(STI.getXLen()), sXLen(LLT::scalar(XLen)) {71const LLT sDoubleXLen = LLT::scalar(2 * XLen);72const LLT p0 = LLT::pointer(0, XLen);73const LLT s1 = LLT::scalar(1);74const LLT s8 = LLT::scalar(8);75const LLT s16 = LLT::scalar(16);76const LLT s32 = LLT::scalar(32);77const LLT s64 = LLT::scalar(64);7879const LLT nxv1s1 = LLT::scalable_vector(1, s1);80const LLT nxv2s1 = LLT::scalable_vector(2, s1);81const LLT nxv4s1 = LLT::scalable_vector(4, s1);82const LLT nxv8s1 = LLT::scalable_vector(8, s1);83const LLT nxv16s1 = LLT::scalable_vector(16, s1);84const LLT nxv32s1 = LLT::scalable_vector(32, s1);85const LLT nxv64s1 = LLT::scalable_vector(64, s1);8687const LLT nxv1s8 = LLT::scalable_vector(1, s8);88const LLT nxv2s8 = LLT::scalable_vector(2, s8);89const LLT nxv4s8 = LLT::scalable_vector(4, s8);90const LLT nxv8s8 = LLT::scalable_vector(8, s8);91const LLT nxv16s8 = LLT::scalable_vector(16, s8);92const LLT nxv32s8 = LLT::scalable_vector(32, s8);93const LLT nxv64s8 = LLT::scalable_vector(64, s8);9495const LLT nxv1s16 = LLT::scalable_vector(1, s16);96const LLT nxv2s16 = LLT::scalable_vector(2, s16);97const LLT nxv4s16 = LLT::scalable_vector(4, s16);98const LLT nxv8s16 = LLT::scalable_vector(8, s16);99const LLT nxv16s16 = LLT::scalable_vector(16, s16);100const LLT nxv32s16 = LLT::scalable_vector(32, s16);101102const LLT nxv1s32 = LLT::scalable_vector(1, s32);103const LLT nxv2s32 = LLT::scalable_vector(2, s32);104const LLT nxv4s32 = LLT::scalable_vector(4, s32);105const LLT nxv8s32 = LLT::scalable_vector(8, s32);106const LLT nxv16s32 = LLT::scalable_vector(16, s32);107108const LLT nxv1s64 = LLT::scalable_vector(1, s64);109const LLT nxv2s64 = LLT::scalable_vector(2, s64);110const LLT nxv4s64 = LLT::scalable_vector(4, s64);111const LLT nxv8s64 = LLT::scalable_vector(8, s64);112113using namespace TargetOpcode;114115auto BoolVecTys = {nxv1s1, nxv2s1, nxv4s1, nxv8s1, nxv16s1, nxv32s1, nxv64s1};116117auto IntOrFPVecTys = {nxv1s8, nxv2s8, nxv4s8, nxv8s8, nxv16s8, nxv32s8,118nxv64s8, nxv1s16, nxv2s16, nxv4s16, nxv8s16, nxv16s16,119nxv32s16, nxv1s32, nxv2s32, nxv4s32, nxv8s32, nxv16s32,120nxv1s64, nxv2s64, nxv4s64, nxv8s64};121122getActionDefinitionsBuilder({G_ADD, G_SUB, G_AND, G_OR, G_XOR})123.legalFor({s32, sXLen})124.legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))125.widenScalarToNextPow2(0)126.clampScalar(0, s32, sXLen);127128getActionDefinitionsBuilder(129{G_UADDE, G_UADDO, G_USUBE, G_USUBO}).lower();130131getActionDefinitionsBuilder({G_SADDO, G_SSUBO}).minScalar(0, sXLen).lower();132133// TODO: Use Vector Single-Width Saturating Instructions for vector types.134getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT})135.lower();136137auto &ShiftActions = getActionDefinitionsBuilder({G_ASHR, G_LSHR, G_SHL});138if (ST.is64Bit())139ShiftActions.customFor({{s32, s32}});140ShiftActions.legalFor({{s32, s32}, {s32, sXLen}, {sXLen, sXLen}})141.widenScalarToNextPow2(0)142.clampScalar(1, s32, sXLen)143.clampScalar(0, s32, sXLen)144.minScalarSameAs(1, 0)145.widenScalarToNextPow2(1);146147auto &ExtActions =148getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT})149.legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),150typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)));151if (ST.is64Bit()) {152ExtActions.legalFor({{sXLen, s32}});153getActionDefinitionsBuilder(G_SEXT_INREG)154.customFor({sXLen})155.maxScalar(0, sXLen)156.lower();157} else {158getActionDefinitionsBuilder(G_SEXT_INREG).maxScalar(0, sXLen).lower();159}160ExtActions.customIf(typeIsLegalBoolVec(1, BoolVecTys, ST))161.maxScalar(0, sXLen);162163// Merge/Unmerge164for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) {165auto &MergeUnmergeActions = getActionDefinitionsBuilder(Op);166unsigned BigTyIdx = Op == G_MERGE_VALUES ? 0 : 1;167unsigned LitTyIdx = Op == G_MERGE_VALUES ? 1 : 0;168if (XLen == 32 && ST.hasStdExtD()) {169MergeUnmergeActions.legalIf(170all(typeIs(BigTyIdx, s64), typeIs(LitTyIdx, s32)));171}172MergeUnmergeActions.widenScalarToNextPow2(LitTyIdx, XLen)173.widenScalarToNextPow2(BigTyIdx, XLen)174.clampScalar(LitTyIdx, sXLen, sXLen)175.clampScalar(BigTyIdx, sXLen, sXLen);176}177178getActionDefinitionsBuilder({G_FSHL, G_FSHR}).lower();179180auto &RotateActions = getActionDefinitionsBuilder({G_ROTL, G_ROTR});181if (ST.hasStdExtZbb() || ST.hasStdExtZbkb()) {182RotateActions.legalFor({{s32, sXLen}, {sXLen, sXLen}});183// Widen s32 rotate amount to s64 so SDAG patterns will match.184if (ST.is64Bit())185RotateActions.widenScalarIf(all(typeIs(0, s32), typeIs(1, s32)),186changeTo(1, sXLen));187}188RotateActions.lower();189190getActionDefinitionsBuilder(G_BITREVERSE).maxScalar(0, sXLen).lower();191192getActionDefinitionsBuilder(G_BITCAST).legalIf(193all(LegalityPredicates::any(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),194typeIsLegalBoolVec(0, BoolVecTys, ST)),195LegalityPredicates::any(typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST),196typeIsLegalBoolVec(1, BoolVecTys, ST))));197198auto &BSWAPActions = getActionDefinitionsBuilder(G_BSWAP);199if (ST.hasStdExtZbb() || ST.hasStdExtZbkb())200BSWAPActions.legalFor({sXLen}).clampScalar(0, sXLen, sXLen);201else202BSWAPActions.maxScalar(0, sXLen).lower();203204auto &CountZerosActions = getActionDefinitionsBuilder({G_CTLZ, G_CTTZ});205auto &CountZerosUndefActions =206getActionDefinitionsBuilder({G_CTLZ_ZERO_UNDEF, G_CTTZ_ZERO_UNDEF});207if (ST.hasStdExtZbb()) {208CountZerosActions.legalFor({{s32, s32}, {sXLen, sXLen}})209.clampScalar(0, s32, sXLen)210.widenScalarToNextPow2(0)211.scalarSameSizeAs(1, 0);212} else {213CountZerosActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();214CountZerosUndefActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0);215}216CountZerosUndefActions.lower();217218auto &CTPOPActions = getActionDefinitionsBuilder(G_CTPOP);219if (ST.hasStdExtZbb()) {220CTPOPActions.legalFor({{s32, s32}, {sXLen, sXLen}})221.clampScalar(0, s32, sXLen)222.widenScalarToNextPow2(0)223.scalarSameSizeAs(1, 0);224} else {225CTPOPActions.maxScalar(0, sXLen).scalarSameSizeAs(1, 0).lower();226}227228auto &ConstantActions = getActionDefinitionsBuilder(G_CONSTANT);229ConstantActions.legalFor({s32, p0});230if (ST.is64Bit())231ConstantActions.customFor({s64});232ConstantActions.widenScalarToNextPow2(0).clampScalar(0, s32, sXLen);233234// TODO: transform illegal vector types into legal vector type235getActionDefinitionsBuilder(236{G_IMPLICIT_DEF, G_CONSTANT_FOLD_BARRIER, G_FREEZE})237.legalFor({s32, sXLen, p0})238.legalIf(typeIsLegalBoolVec(0, BoolVecTys, ST))239.legalIf(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST))240.widenScalarToNextPow2(0)241.clampScalar(0, s32, sXLen);242243getActionDefinitionsBuilder(G_ICMP)244.legalFor({{sXLen, sXLen}, {sXLen, p0}})245.legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST),246typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST)))247.widenScalarOrEltToNextPow2OrMinSize(1, 8)248.clampScalar(1, sXLen, sXLen)249.clampScalar(0, sXLen, sXLen);250251auto &SelectActions =252getActionDefinitionsBuilder(G_SELECT)253.legalFor({{s32, sXLen}, {p0, sXLen}})254.legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),255typeIsLegalBoolVec(1, BoolVecTys, ST)));256if (XLen == 64 || ST.hasStdExtD())257SelectActions.legalFor({{s64, sXLen}});258SelectActions.widenScalarToNextPow2(0)259.clampScalar(0, s32, (XLen == 64 || ST.hasStdExtD()) ? s64 : s32)260.clampScalar(1, sXLen, sXLen);261262auto &LoadStoreActions =263getActionDefinitionsBuilder({G_LOAD, G_STORE})264.legalForTypesWithMemDesc({{s32, p0, s8, 8},265{s32, p0, s16, 16},266{s32, p0, s32, 32},267{p0, p0, sXLen, XLen}});268auto &ExtLoadActions =269getActionDefinitionsBuilder({G_SEXTLOAD, G_ZEXTLOAD})270.legalForTypesWithMemDesc({{s32, p0, s8, 8}, {s32, p0, s16, 16}});271if (XLen == 64) {272LoadStoreActions.legalForTypesWithMemDesc({{s64, p0, s8, 8},273{s64, p0, s16, 16},274{s64, p0, s32, 32},275{s64, p0, s64, 64}});276ExtLoadActions.legalForTypesWithMemDesc(277{{s64, p0, s8, 8}, {s64, p0, s16, 16}, {s64, p0, s32, 32}});278} else if (ST.hasStdExtD()) {279LoadStoreActions.legalForTypesWithMemDesc({{s64, p0, s64, 64}});280}281LoadStoreActions.clampScalar(0, s32, sXLen).lower();282ExtLoadActions.widenScalarToNextPow2(0).clampScalar(0, s32, sXLen).lower();283284getActionDefinitionsBuilder({G_PTR_ADD, G_PTRMASK}).legalFor({{p0, sXLen}});285286getActionDefinitionsBuilder(G_PTRTOINT)287.legalFor({{sXLen, p0}})288.clampScalar(0, sXLen, sXLen);289290getActionDefinitionsBuilder(G_INTTOPTR)291.legalFor({{p0, sXLen}})292.clampScalar(1, sXLen, sXLen);293294getActionDefinitionsBuilder(G_BRCOND).legalFor({sXLen}).minScalar(0, sXLen);295296getActionDefinitionsBuilder(G_BRJT).legalFor({{p0, sXLen}});297298getActionDefinitionsBuilder(G_BRINDIRECT).legalFor({p0});299300getActionDefinitionsBuilder(G_PHI)301.legalFor({p0, sXLen})302.widenScalarToNextPow2(0)303.clampScalar(0, sXLen, sXLen);304305getActionDefinitionsBuilder({G_GLOBAL_VALUE, G_JUMP_TABLE, G_CONSTANT_POOL})306.legalFor({p0});307308if (ST.hasStdExtZmmul()) {309getActionDefinitionsBuilder(G_MUL)310.legalFor({s32, sXLen})311.widenScalarToNextPow2(0)312.clampScalar(0, s32, sXLen);313314// clang-format off315getActionDefinitionsBuilder({G_SMULH, G_UMULH})316.legalFor({sXLen})317.lower();318// clang-format on319320getActionDefinitionsBuilder({G_SMULO, G_UMULO}).minScalar(0, sXLen).lower();321} else {322getActionDefinitionsBuilder(G_MUL)323.libcallFor({sXLen, sDoubleXLen})324.widenScalarToNextPow2(0)325.clampScalar(0, sXLen, sDoubleXLen);326327getActionDefinitionsBuilder({G_SMULH, G_UMULH}).lowerFor({sXLen});328329getActionDefinitionsBuilder({G_SMULO, G_UMULO})330.minScalar(0, sXLen)331// Widen sXLen to sDoubleXLen so we can use a single libcall to get332// the low bits for the mul result and high bits to do the overflow333// check.334.widenScalarIf(typeIs(0, sXLen),335LegalizeMutations::changeTo(0, sDoubleXLen))336.lower();337}338339if (ST.hasStdExtM()) {340getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})341.legalFor({s32, sXLen})342.libcallFor({sDoubleXLen})343.clampScalar(0, s32, sDoubleXLen)344.widenScalarToNextPow2(0);345} else {346getActionDefinitionsBuilder({G_UDIV, G_SDIV, G_UREM, G_SREM})347.libcallFor({sXLen, sDoubleXLen})348.clampScalar(0, sXLen, sDoubleXLen)349.widenScalarToNextPow2(0);350}351352// TODO: Use libcall for sDoubleXLen.353getActionDefinitionsBuilder({G_UDIVREM, G_SDIVREM}).lower();354355auto &AbsActions = getActionDefinitionsBuilder(G_ABS);356if (ST.hasStdExtZbb())357AbsActions.customFor({s32, sXLen}).minScalar(0, sXLen);358AbsActions.lower();359360auto &MinMaxActions =361getActionDefinitionsBuilder({G_UMAX, G_UMIN, G_SMAX, G_SMIN});362if (ST.hasStdExtZbb())363MinMaxActions.legalFor({sXLen}).minScalar(0, sXLen);364MinMaxActions.lower();365366getActionDefinitionsBuilder(G_FRAME_INDEX).legalFor({p0});367368getActionDefinitionsBuilder({G_MEMCPY, G_MEMMOVE, G_MEMSET}).libcall();369370getActionDefinitionsBuilder(G_DYN_STACKALLOC).lower();371372// FP Operations373374getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,375G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM})376.legalIf(typeIsScalarFPArith(0, ST));377378getActionDefinitionsBuilder(G_FREM)379.libcallFor({s32, s64})380.minScalar(0, s32)381.scalarize(0);382383getActionDefinitionsBuilder(G_FCOPYSIGN)384.legalIf(all(typeIsScalarFPArith(0, ST), typeIsScalarFPArith(1, ST)));385386// FIXME: Use Zfhmin.387getActionDefinitionsBuilder(G_FPTRUNC).legalIf(388[=, &ST](const LegalityQuery &Query) -> bool {389return (ST.hasStdExtD() && typeIs(0, s32)(Query) &&390typeIs(1, s64)(Query)) ||391(ST.hasStdExtZfh() && typeIs(0, s16)(Query) &&392typeIs(1, s32)(Query)) ||393(ST.hasStdExtZfh() && ST.hasStdExtD() && typeIs(0, s16)(Query) &&394typeIs(1, s64)(Query));395});396getActionDefinitionsBuilder(G_FPEXT).legalIf(397[=, &ST](const LegalityQuery &Query) -> bool {398return (ST.hasStdExtD() && typeIs(0, s64)(Query) &&399typeIs(1, s32)(Query)) ||400(ST.hasStdExtZfh() && typeIs(0, s32)(Query) &&401typeIs(1, s16)(Query)) ||402(ST.hasStdExtZfh() && ST.hasStdExtD() && typeIs(0, s64)(Query) &&403typeIs(1, s16)(Query));404});405406getActionDefinitionsBuilder(G_FCMP)407.legalIf(all(typeIs(0, sXLen), typeIsScalarFPArith(1, ST)))408.clampScalar(0, sXLen, sXLen);409410// TODO: Support vector version of G_IS_FPCLASS.411getActionDefinitionsBuilder(G_IS_FPCLASS)412.customIf(all(typeIs(0, s1), typeIsScalarFPArith(1, ST)));413414getActionDefinitionsBuilder(G_FCONSTANT)415.legalIf(typeIsScalarFPArith(0, ST))416.lowerFor({s32, s64});417418getActionDefinitionsBuilder({G_FPTOSI, G_FPTOUI})419.legalIf(all(typeInSet(0, {s32, sXLen}), typeIsScalarFPArith(1, ST)))420.widenScalarToNextPow2(0)421.clampScalar(0, s32, sXLen)422.libcall();423424getActionDefinitionsBuilder({G_SITOFP, G_UITOFP})425.legalIf(all(typeIsScalarFPArith(0, ST), typeInSet(1, {s32, sXLen})))426.widenScalarToNextPow2(1)427.clampScalar(1, s32, sXLen);428429// FIXME: We can do custom inline expansion like SelectionDAG.430// FIXME: Legal with Zfa.431getActionDefinitionsBuilder({G_FCEIL, G_FFLOOR})432.libcallFor({s32, s64});433434getActionDefinitionsBuilder(G_VASTART).customFor({p0});435436// va_list must be a pointer, but most sized types are pretty easy to handle437// as the destination.438getActionDefinitionsBuilder(G_VAARG)439// TODO: Implement narrowScalar and widenScalar for G_VAARG for types440// outside the [s32, sXLen] range.441.clampScalar(0, s32, sXLen)442.lowerForCartesianProduct({s32, sXLen, p0}, {p0});443444getActionDefinitionsBuilder(G_VSCALE)445.clampScalar(0, sXLen, sXLen)446.customFor({sXLen});447448auto &SplatActions =449getActionDefinitionsBuilder(G_SPLAT_VECTOR)450.legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST),451typeIs(1, sXLen)))452.customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIs(1, s1)));453// Handle case of s64 element vectors on RV32. If the subtarget does not have454// f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget455// does have f64, then we don't know whether the type is an f64 or an i64,456// so mark the G_SPLAT_VECTOR as legal and decide later what to do with it,457// depending on how the instructions it consumes are legalized. They are not458// legalized yet since legalization is in reverse postorder, so we cannot459// make the decision at this moment.460if (XLen == 32) {461if (ST.hasVInstructionsF64() && ST.hasStdExtD())462SplatActions.legalIf(all(463typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));464else if (ST.hasVInstructionsI64())465SplatActions.customIf(all(466typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64)));467}468469SplatActions.clampScalar(1, sXLen, sXLen);470471getLegacyLegalizerInfo().computeTables();472}473474bool RISCVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper,475MachineInstr &MI) const {476Intrinsic::ID IntrinsicID = cast<GIntrinsic>(MI).getIntrinsicID();477switch (IntrinsicID) {478default:479return false;480case Intrinsic::vacopy: {481// vacopy arguments must be legal because of the intrinsic signature.482// No need to check here.483484MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;485MachineRegisterInfo &MRI = *MIRBuilder.getMRI();486MachineFunction &MF = *MI.getMF();487const DataLayout &DL = MIRBuilder.getDataLayout();488LLVMContext &Ctx = MF.getFunction().getContext();489490Register DstLst = MI.getOperand(1).getReg();491LLT PtrTy = MRI.getType(DstLst);492493// Load the source va_list494Align Alignment = DL.getABITypeAlign(getTypeForLLT(PtrTy, Ctx));495MachineMemOperand *LoadMMO = MF.getMachineMemOperand(496MachinePointerInfo(), MachineMemOperand::MOLoad, PtrTy, Alignment);497auto Tmp = MIRBuilder.buildLoad(PtrTy, MI.getOperand(2), *LoadMMO);498499// Store the result in the destination va_list500MachineMemOperand *StoreMMO = MF.getMachineMemOperand(501MachinePointerInfo(), MachineMemOperand::MOStore, PtrTy, Alignment);502MIRBuilder.buildStore(Tmp, DstLst, *StoreMMO);503504MI.eraseFromParent();505return true;506}507}508}509510bool RISCVLegalizerInfo::legalizeShlAshrLshr(511MachineInstr &MI, MachineIRBuilder &MIRBuilder,512GISelChangeObserver &Observer) const {513assert(MI.getOpcode() == TargetOpcode::G_ASHR ||514MI.getOpcode() == TargetOpcode::G_LSHR ||515MI.getOpcode() == TargetOpcode::G_SHL);516MachineRegisterInfo &MRI = *MIRBuilder.getMRI();517// If the shift amount is a G_CONSTANT, promote it to a 64 bit type so the518// imported patterns can select it later. Either way, it will be legal.519Register AmtReg = MI.getOperand(2).getReg();520auto VRegAndVal = getIConstantVRegValWithLookThrough(AmtReg, MRI);521if (!VRegAndVal)522return true;523// Check the shift amount is in range for an immediate form.524uint64_t Amount = VRegAndVal->Value.getZExtValue();525if (Amount > 31)526return true; // This will have to remain a register variant.527auto ExtCst = MIRBuilder.buildConstant(LLT::scalar(64), Amount);528Observer.changingInstr(MI);529MI.getOperand(2).setReg(ExtCst.getReg(0));530Observer.changedInstr(MI);531return true;532}533534bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI,535MachineIRBuilder &MIRBuilder) const {536// Stores the address of the VarArgsFrameIndex slot into the memory location537assert(MI.getOpcode() == TargetOpcode::G_VASTART);538MachineFunction *MF = MI.getParent()->getParent();539RISCVMachineFunctionInfo *FuncInfo = MF->getInfo<RISCVMachineFunctionInfo>();540int FI = FuncInfo->getVarArgsFrameIndex();541LLT AddrTy = MIRBuilder.getMRI()->getType(MI.getOperand(0).getReg());542auto FINAddr = MIRBuilder.buildFrameIndex(AddrTy, FI);543assert(MI.hasOneMemOperand());544MIRBuilder.buildStore(FINAddr, MI.getOperand(0).getReg(),545*MI.memoperands()[0]);546MI.eraseFromParent();547return true;548}549550bool RISCVLegalizerInfo::shouldBeInConstantPool(APInt APImm,551bool ShouldOptForSize) const {552assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64);553int64_t Imm = APImm.getSExtValue();554// All simm32 constants should be handled by isel.555// NOTE: The getMaxBuildIntsCost call below should return a value >= 2 making556// this check redundant, but small immediates are common so this check557// should have better compile time.558if (isInt<32>(Imm))559return false;560561// We only need to cost the immediate, if constant pool lowering is enabled.562if (!STI.useConstantPoolForLargeInts())563return false;564565RISCVMatInt::InstSeq Seq = RISCVMatInt::generateInstSeq(Imm, STI);566if (Seq.size() <= STI.getMaxBuildIntsCost())567return false;568569// Optimizations below are disabled for opt size. If we're optimizing for570// size, use a constant pool.571if (ShouldOptForSize)572return true;573//574// Special case. See if we can build the constant as (ADD (SLLI X, C), X) do575// that if it will avoid a constant pool.576// It will require an extra temporary register though.577// If we have Zba we can use (ADD_UW X, (SLLI X, 32)) to handle cases where578// low and high 32 bits are the same and bit 31 and 63 are set.579unsigned ShiftAmt, AddOpc;580RISCVMatInt::InstSeq SeqLo =581RISCVMatInt::generateTwoRegInstSeq(Imm, STI, ShiftAmt, AddOpc);582return !(!SeqLo.empty() && (SeqLo.size() + 2) <= STI.getMaxBuildIntsCost());583}584585bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI,586MachineIRBuilder &MIB) const {587const LLT XLenTy(STI.getXLenVT());588Register Dst = MI.getOperand(0).getReg();589590// We define our scalable vector types for lmul=1 to use a 64 bit known591// minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate592// vscale as VLENB / 8.593static_assert(RISCV::RVVBitsPerBlock == 64, "Unexpected bits per block!");594if (STI.getRealMinVLen() < RISCV::RVVBitsPerBlock)595// Support for VLEN==32 is incomplete.596return false;597598// We assume VLENB is a multiple of 8. We manually choose the best shift599// here because SimplifyDemandedBits isn't always able to simplify it.600uint64_t Val = MI.getOperand(1).getCImm()->getZExtValue();601if (isPowerOf2_64(Val)) {602uint64_t Log2 = Log2_64(Val);603if (Log2 < 3) {604auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});605MIB.buildLShr(Dst, VLENB, MIB.buildConstant(XLenTy, 3 - Log2));606} else if (Log2 > 3) {607auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});608MIB.buildShl(Dst, VLENB, MIB.buildConstant(XLenTy, Log2 - 3));609} else {610MIB.buildInstr(RISCV::G_READ_VLENB, {Dst}, {});611}612} else if ((Val % 8) == 0) {613// If the multiplier is a multiple of 8, scale it down to avoid needing614// to shift the VLENB value.615auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});616MIB.buildMul(Dst, VLENB, MIB.buildConstant(XLenTy, Val / 8));617} else {618auto VLENB = MIB.buildInstr(RISCV::G_READ_VLENB, {XLenTy}, {});619auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3));620MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val));621}622MI.eraseFromParent();623return true;624}625626// Custom-lower extensions from mask vectors by using a vselect either with 1627// for zero/any-extension or -1 for sign-extension:628// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)629// Note that any-extension is lowered identically to zero-extension.630bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI,631MachineIRBuilder &MIB) const {632633unsigned Opc = MI.getOpcode();634assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT ||635Opc == TargetOpcode::G_ANYEXT);636637MachineRegisterInfo &MRI = *MIB.getMRI();638Register Dst = MI.getOperand(0).getReg();639Register Src = MI.getOperand(1).getReg();640641LLT DstTy = MRI.getType(Dst);642int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1;643LLT DstEltTy = DstTy.getElementType();644auto SplatZero = MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, 0));645auto SplatTrue =646MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, ExtTrueVal));647MIB.buildSelect(Dst, Src, SplatTrue, SplatZero);648649MI.eraseFromParent();650return true;651}652653/// Return the type of the mask type suitable for masking the provided654/// vector type. This is simply an i1 element type vector of the same655/// (possibly scalable) length.656static LLT getMaskTypeFor(LLT VecTy) {657assert(VecTy.isVector());658ElementCount EC = VecTy.getElementCount();659return LLT::vector(EC, LLT::scalar(1));660}661662/// Creates an all ones mask suitable for masking a vector of type VecTy with663/// vector length VL.664static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL,665MachineIRBuilder &MIB,666MachineRegisterInfo &MRI) {667LLT MaskTy = getMaskTypeFor(VecTy);668return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL});669}670671/// Gets the two common "VL" operands: an all-ones mask and the vector length.672/// VecTy is a scalable vector type.673static std::pair<MachineInstrBuilder, Register>674buildDefaultVLOps(const DstOp &Dst, MachineIRBuilder &MIB,675MachineRegisterInfo &MRI) {676LLT VecTy = Dst.getLLTTy(MRI);677assert(VecTy.isScalableVector() && "Expecting scalable container type");678Register VL(RISCV::X0);679MachineInstrBuilder Mask = buildAllOnesMask(VecTy, VL, MIB, MRI);680return {Mask, VL};681}682683static MachineInstrBuilder684buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo,685Register Hi, Register VL, MachineIRBuilder &MIB,686MachineRegisterInfo &MRI) {687// TODO: If the Hi bits of the splat are undefined, then it's fine to just688// splat Lo even if it might be sign extended. I don't think we have689// introduced a case where we're build a s64 where the upper bits are undef690// yet.691692// Fall back to a stack store and stride x0 vector load.693// TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in694// preprocessDAG in SDAG.695return MIB.buildInstr(RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, {Dst},696{Passthru, Lo, Hi, VL});697}698699static MachineInstrBuilder700buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru,701const SrcOp &Scalar, Register VL,702MachineIRBuilder &MIB, MachineRegisterInfo &MRI) {703assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!");704auto Unmerge = MIB.buildUnmerge(LLT::scalar(32), Scalar);705return buildSplatPartsS64WithVL(Dst, Passthru, Unmerge.getReg(0),706Unmerge.getReg(1), VL, MIB, MRI);707}708709// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a710// legal equivalently-sized i8 type, so we can use that as a go-between.711// Splats of s1 types that have constant value can be legalized as VMSET_VL or712// VMCLR_VL.713bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI,714MachineIRBuilder &MIB) const {715assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR);716717MachineRegisterInfo &MRI = *MIB.getMRI();718719Register Dst = MI.getOperand(0).getReg();720Register SplatVal = MI.getOperand(1).getReg();721722LLT VecTy = MRI.getType(Dst);723LLT XLenTy(STI.getXLenVT());724725// Handle case of s64 element vectors on rv32726if (XLenTy.getSizeInBits() == 32 &&727VecTy.getElementType().getSizeInBits() == 64) {728auto [_, VL] = buildDefaultVLOps(Dst, MIB, MRI);729buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB,730MRI);731MI.eraseFromParent();732return true;733}734735// All-zeros or all-ones splats are handled specially.736MachineInstr &SplatValMI = *MRI.getVRegDef(SplatVal);737if (isAllOnesOrAllOnesSplat(SplatValMI, MRI)) {738auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;739MIB.buildInstr(RISCV::G_VMSET_VL, {Dst}, {VL});740MI.eraseFromParent();741return true;742}743if (isNullOrNullSplat(SplatValMI, MRI)) {744auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second;745MIB.buildInstr(RISCV::G_VMCLR_VL, {Dst}, {VL});746MI.eraseFromParent();747return true;748}749750// Handle non-constant mask splat (i.e. not sure if it's all zeros or all751// ones) by promoting it to an s8 splat.752LLT InterEltTy = LLT::scalar(8);753LLT InterTy = VecTy.changeElementType(InterEltTy);754auto ZExtSplatVal = MIB.buildZExt(InterEltTy, SplatVal);755auto And =756MIB.buildAnd(InterEltTy, ZExtSplatVal, MIB.buildConstant(InterEltTy, 1));757auto LHS = MIB.buildSplatVector(InterTy, And);758auto ZeroSplat =759MIB.buildSplatVector(InterTy, MIB.buildConstant(InterEltTy, 0));760MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, LHS, ZeroSplat);761MI.eraseFromParent();762return true;763}764765bool RISCVLegalizerInfo::legalizeCustom(766LegalizerHelper &Helper, MachineInstr &MI,767LostDebugLocObserver &LocObserver) const {768MachineIRBuilder &MIRBuilder = Helper.MIRBuilder;769GISelChangeObserver &Observer = Helper.Observer;770MachineFunction &MF = *MI.getParent()->getParent();771switch (MI.getOpcode()) {772default:773// No idea what to do.774return false;775case TargetOpcode::G_ABS:776return Helper.lowerAbsToMaxNeg(MI);777// TODO: G_FCONSTANT778case TargetOpcode::G_CONSTANT: {779const Function &F = MF.getFunction();780// TODO: if PSI and BFI are present, add " ||781// llvm::shouldOptForSize(*CurMBB, PSI, BFI)".782bool ShouldOptForSize = F.hasOptSize() || F.hasMinSize();783const ConstantInt *ConstVal = MI.getOperand(1).getCImm();784if (!shouldBeInConstantPool(ConstVal->getValue(), ShouldOptForSize))785return true;786return Helper.lowerConstant(MI);787}788case TargetOpcode::G_SHL:789case TargetOpcode::G_ASHR:790case TargetOpcode::G_LSHR:791return legalizeShlAshrLshr(MI, MIRBuilder, Observer);792case TargetOpcode::G_SEXT_INREG: {793// Source size of 32 is sext.w.794int64_t SizeInBits = MI.getOperand(2).getImm();795if (SizeInBits == 32)796return true;797798return Helper.lower(MI, 0, /* Unused hint type */ LLT()) ==799LegalizerHelper::Legalized;800}801case TargetOpcode::G_IS_FPCLASS: {802Register GISFPCLASS = MI.getOperand(0).getReg();803Register Src = MI.getOperand(1).getReg();804const MachineOperand &ImmOp = MI.getOperand(2);805MachineIRBuilder MIB(MI);806807// Turn LLVM IR's floating point classes to that in RISC-V,808// by simply rotating the 10-bit immediate right by two bits.809APInt GFpClassImm(10, static_cast<uint64_t>(ImmOp.getImm()));810auto FClassMask = MIB.buildConstant(sXLen, GFpClassImm.rotr(2).zext(XLen));811auto ConstZero = MIB.buildConstant(sXLen, 0);812813auto GFClass = MIB.buildInstr(RISCV::G_FCLASS, {sXLen}, {Src});814auto And = MIB.buildAnd(sXLen, GFClass, FClassMask);815MIB.buildICmp(CmpInst::ICMP_NE, GISFPCLASS, And, ConstZero);816817MI.eraseFromParent();818return true;819}820case TargetOpcode::G_VASTART:821return legalizeVAStart(MI, MIRBuilder);822case TargetOpcode::G_VSCALE:823return legalizeVScale(MI, MIRBuilder);824case TargetOpcode::G_ZEXT:825case TargetOpcode::G_SEXT:826case TargetOpcode::G_ANYEXT:827return legalizeExt(MI, MIRBuilder);828case TargetOpcode::G_SPLAT_VECTOR:829return legalizeSplatVector(MI, MIRBuilder);830}831832llvm_unreachable("expected switch to return");833}834835836