Path: blob/main/contrib/llvm-project/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
35269 views
//===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines an instruction selector for the AArch64 target.9//10//===----------------------------------------------------------------------===//1112#include "AArch64MachineFunctionInfo.h"13#include "AArch64TargetMachine.h"14#include "MCTargetDesc/AArch64AddressingModes.h"15#include "llvm/ADT/APSInt.h"16#include "llvm/CodeGen/ISDOpcodes.h"17#include "llvm/CodeGen/SelectionDAGISel.h"18#include "llvm/IR/Function.h" // To access function attributes.19#include "llvm/IR/GlobalValue.h"20#include "llvm/IR/Intrinsics.h"21#include "llvm/IR/IntrinsicsAArch64.h"22#include "llvm/Support/Debug.h"23#include "llvm/Support/ErrorHandling.h"24#include "llvm/Support/KnownBits.h"25#include "llvm/Support/MathExtras.h"26#include "llvm/Support/raw_ostream.h"2728using namespace llvm;2930#define DEBUG_TYPE "aarch64-isel"31#define PASS_NAME "AArch64 Instruction Selection"3233//===--------------------------------------------------------------------===//34/// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine35/// instructions for SelectionDAG operations.36///37namespace {3839class AArch64DAGToDAGISel : public SelectionDAGISel {4041/// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can42/// make the right decision when generating code for different targets.43const AArch64Subtarget *Subtarget;4445public:46AArch64DAGToDAGISel() = delete;4748explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,49CodeGenOptLevel OptLevel)50: SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}5152bool runOnMachineFunction(MachineFunction &MF) override {53Subtarget = &MF.getSubtarget<AArch64Subtarget>();54return SelectionDAGISel::runOnMachineFunction(MF);55}5657void Select(SDNode *Node) override;5859/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for60/// inline asm expressions.61bool SelectInlineAsmMemoryOperand(const SDValue &Op,62InlineAsm::ConstraintCode ConstraintID,63std::vector<SDValue> &OutOps) override;6465template <signed Low, signed High, signed Scale>66bool SelectRDVLImm(SDValue N, SDValue &Imm);6768bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);69bool SelectArithUXTXRegister(SDValue N, SDValue &Reg, SDValue &Shift);70bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);71bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);72bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {73return SelectShiftedRegister(N, false, Reg, Shift);74}75bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {76return SelectShiftedRegister(N, true, Reg, Shift);77}78bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {79return SelectAddrModeIndexed7S(N, 1, Base, OffImm);80}81bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {82return SelectAddrModeIndexed7S(N, 2, Base, OffImm);83}84bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {85return SelectAddrModeIndexed7S(N, 4, Base, OffImm);86}87bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {88return SelectAddrModeIndexed7S(N, 8, Base, OffImm);89}90bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {91return SelectAddrModeIndexed7S(N, 16, Base, OffImm);92}93bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {94return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);95}96bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {97return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);98}99bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {100return SelectAddrModeIndexed(N, 1, Base, OffImm);101}102bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {103return SelectAddrModeIndexed(N, 2, Base, OffImm);104}105bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {106return SelectAddrModeIndexed(N, 4, Base, OffImm);107}108bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {109return SelectAddrModeIndexed(N, 8, Base, OffImm);110}111bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {112return SelectAddrModeIndexed(N, 16, Base, OffImm);113}114bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {115return SelectAddrModeUnscaled(N, 1, Base, OffImm);116}117bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {118return SelectAddrModeUnscaled(N, 2, Base, OffImm);119}120bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {121return SelectAddrModeUnscaled(N, 4, Base, OffImm);122}123bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {124return SelectAddrModeUnscaled(N, 8, Base, OffImm);125}126bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {127return SelectAddrModeUnscaled(N, 16, Base, OffImm);128}129template <unsigned Size, unsigned Max>130bool SelectAddrModeIndexedUImm(SDValue N, SDValue &Base, SDValue &OffImm) {131// Test if there is an appropriate addressing mode and check if the132// immediate fits.133bool Found = SelectAddrModeIndexed(N, Size, Base, OffImm);134if (Found) {135if (auto *CI = dyn_cast<ConstantSDNode>(OffImm)) {136int64_t C = CI->getSExtValue();137if (C <= Max)138return true;139}140}141142// Otherwise, base only, materialize address in register.143Base = N;144OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);145return true;146}147148template<int Width>149bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,150SDValue &SignExtend, SDValue &DoShift) {151return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);152}153154template<int Width>155bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,156SDValue &SignExtend, SDValue &DoShift) {157return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);158}159160bool SelectExtractHigh(SDValue N, SDValue &Res) {161if (Subtarget->isLittleEndian() && N->getOpcode() == ISD::BITCAST)162N = N->getOperand(0);163if (N->getOpcode() != ISD::EXTRACT_SUBVECTOR ||164!isa<ConstantSDNode>(N->getOperand(1)))165return false;166EVT VT = N->getValueType(0);167EVT LVT = N->getOperand(0).getValueType();168unsigned Index = N->getConstantOperandVal(1);169if (!VT.is64BitVector() || !LVT.is128BitVector() ||170Index != VT.getVectorNumElements())171return false;172Res = N->getOperand(0);173return true;174}175176bool SelectRoundingVLShr(SDValue N, SDValue &Res1, SDValue &Res2) {177if (N.getOpcode() != AArch64ISD::VLSHR)178return false;179SDValue Op = N->getOperand(0);180EVT VT = Op.getValueType();181unsigned ShtAmt = N->getConstantOperandVal(1);182if (ShtAmt > VT.getScalarSizeInBits() / 2 || Op.getOpcode() != ISD::ADD)183return false;184185APInt Imm;186if (Op.getOperand(1).getOpcode() == AArch64ISD::MOVIshift)187Imm = APInt(VT.getScalarSizeInBits(),188Op.getOperand(1).getConstantOperandVal(0)189<< Op.getOperand(1).getConstantOperandVal(1));190else if (Op.getOperand(1).getOpcode() == AArch64ISD::DUP &&191isa<ConstantSDNode>(Op.getOperand(1).getOperand(0)))192Imm = APInt(VT.getScalarSizeInBits(),193Op.getOperand(1).getConstantOperandVal(0));194else195return false;196197if (Imm != 1ULL << (ShtAmt - 1))198return false;199200Res1 = Op.getOperand(0);201Res2 = CurDAG->getTargetConstant(ShtAmt, SDLoc(N), MVT::i32);202return true;203}204205bool SelectDupZeroOrUndef(SDValue N) {206switch(N->getOpcode()) {207case ISD::UNDEF:208return true;209case AArch64ISD::DUP:210case ISD::SPLAT_VECTOR: {211auto Opnd0 = N->getOperand(0);212if (isNullConstant(Opnd0))213return true;214if (isNullFPConstant(Opnd0))215return true;216break;217}218default:219break;220}221222return false;223}224225bool SelectDupZero(SDValue N) {226switch(N->getOpcode()) {227case AArch64ISD::DUP:228case ISD::SPLAT_VECTOR: {229auto Opnd0 = N->getOperand(0);230if (isNullConstant(Opnd0))231return true;232if (isNullFPConstant(Opnd0))233return true;234break;235}236}237238return false;239}240241bool SelectDupNegativeZero(SDValue N) {242switch(N->getOpcode()) {243case AArch64ISD::DUP:244case ISD::SPLAT_VECTOR: {245ConstantFPSDNode *Const = dyn_cast<ConstantFPSDNode>(N->getOperand(0));246return Const && Const->isZero() && Const->isNegative();247}248}249250return false;251}252253template<MVT::SimpleValueType VT>254bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {255return SelectSVEAddSubImm(N, VT, Imm, Shift);256}257258template <MVT::SimpleValueType VT, bool Negate>259bool SelectSVEAddSubSSatImm(SDValue N, SDValue &Imm, SDValue &Shift) {260return SelectSVEAddSubSSatImm(N, VT, Imm, Shift, Negate);261}262263template <MVT::SimpleValueType VT>264bool SelectSVECpyDupImm(SDValue N, SDValue &Imm, SDValue &Shift) {265return SelectSVECpyDupImm(N, VT, Imm, Shift);266}267268template <MVT::SimpleValueType VT, bool Invert = false>269bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {270return SelectSVELogicalImm(N, VT, Imm, Invert);271}272273template <MVT::SimpleValueType VT>274bool SelectSVEArithImm(SDValue N, SDValue &Imm) {275return SelectSVEArithImm(N, VT, Imm);276}277278template <unsigned Low, unsigned High, bool AllowSaturation = false>279bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {280return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);281}282283bool SelectSVEShiftSplatImmR(SDValue N, SDValue &Imm) {284if (N->getOpcode() != ISD::SPLAT_VECTOR)285return false;286287EVT EltVT = N->getValueType(0).getVectorElementType();288return SelectSVEShiftImm(N->getOperand(0), /* Low */ 1,289/* High */ EltVT.getFixedSizeInBits(),290/* AllowSaturation */ true, Imm);291}292293// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.294template<signed Min, signed Max, signed Scale, bool Shift>295bool SelectCntImm(SDValue N, SDValue &Imm) {296if (!isa<ConstantSDNode>(N))297return false;298299int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();300if (Shift)301MulImm = 1LL << MulImm;302303if ((MulImm % std::abs(Scale)) != 0)304return false;305306MulImm /= Scale;307if ((MulImm >= Min) && (MulImm <= Max)) {308Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);309return true;310}311312return false;313}314315template <signed Max, signed Scale>316bool SelectEXTImm(SDValue N, SDValue &Imm) {317if (!isa<ConstantSDNode>(N))318return false;319320int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();321322if (MulImm >= 0 && MulImm <= Max) {323MulImm *= Scale;324Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);325return true;326}327328return false;329}330331template <unsigned BaseReg, unsigned Max>332bool ImmToReg(SDValue N, SDValue &Imm) {333if (auto *CI = dyn_cast<ConstantSDNode>(N)) {334uint64_t C = CI->getZExtValue();335336if (C > Max)337return false;338339Imm = CurDAG->getRegister(BaseReg + C, MVT::Other);340return true;341}342return false;343}344345/// Form sequences of consecutive 64/128-bit registers for use in NEON346/// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have347/// between 1 and 4 elements. If it contains a single element that is returned348/// unchanged; otherwise a REG_SEQUENCE value is returned.349SDValue createDTuple(ArrayRef<SDValue> Vecs);350SDValue createQTuple(ArrayRef<SDValue> Vecs);351// Form a sequence of SVE registers for instructions using list of vectors,352// e.g. structured loads and stores (ldN, stN).353SDValue createZTuple(ArrayRef<SDValue> Vecs);354355// Similar to above, except the register must start at a multiple of the356// tuple, e.g. z2 for a 2-tuple, or z8 for a 4-tuple.357SDValue createZMulTuple(ArrayRef<SDValue> Regs);358359/// Generic helper for the createDTuple/createQTuple360/// functions. Those should almost always be called instead.361SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],362const unsigned SubRegs[]);363364void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);365366bool tryIndexedLoad(SDNode *N);367368void SelectPtrauthAuth(SDNode *N);369void SelectPtrauthResign(SDNode *N);370371bool trySelectStackSlotTagP(SDNode *N);372void SelectTagP(SDNode *N);373374void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,375unsigned SubRegIdx);376void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,377unsigned SubRegIdx);378void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);379void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);380void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,381unsigned Opc_rr, unsigned Opc_ri,382bool IsIntr = false);383void SelectContiguousMultiVectorLoad(SDNode *N, unsigned NumVecs,384unsigned Scale, unsigned Opc_ri,385unsigned Opc_rr);386void SelectDestructiveMultiIntrinsic(SDNode *N, unsigned NumVecs,387bool IsZmMulti, unsigned Opcode,388bool HasPred = false);389void SelectPExtPair(SDNode *N, unsigned Opc);390void SelectWhilePair(SDNode *N, unsigned Opc);391void SelectCVTIntrinsic(SDNode *N, unsigned NumVecs, unsigned Opcode);392void SelectClamp(SDNode *N, unsigned NumVecs, unsigned Opcode);393void SelectUnaryMultiIntrinsic(SDNode *N, unsigned NumOutVecs,394bool IsTupleInput, unsigned Opc);395void SelectFrintFromVT(SDNode *N, unsigned NumVecs, unsigned Opcode);396397template <unsigned MaxIdx, unsigned Scale>398void SelectMultiVectorMove(SDNode *N, unsigned NumVecs, unsigned BaseReg,399unsigned Op);400void SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,401unsigned Op, unsigned MaxIdx, unsigned Scale,402unsigned BaseReg = 0);403bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);404/// SVE Reg+Imm addressing mode.405template <int64_t Min, int64_t Max>406bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,407SDValue &OffImm);408/// SVE Reg+Reg address mode.409template <unsigned Scale>410bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {411return SelectSVERegRegAddrMode(N, Scale, Base, Offset);412}413414void SelectMultiVectorLuti(SDNode *Node, unsigned NumOutVecs, unsigned Opc,415uint32_t MaxImm);416417template <unsigned MaxIdx, unsigned Scale>418bool SelectSMETileSlice(SDValue N, SDValue &Vector, SDValue &Offset) {419return SelectSMETileSlice(N, MaxIdx, Vector, Offset, Scale);420}421422void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);423void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);424void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);425void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);426void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,427unsigned Opc_rr, unsigned Opc_ri);428std::tuple<unsigned, SDValue, SDValue>429findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,430const SDValue &OldBase, const SDValue &OldOffset,431unsigned Scale);432433bool tryBitfieldExtractOp(SDNode *N);434bool tryBitfieldExtractOpFromSExt(SDNode *N);435bool tryBitfieldInsertOp(SDNode *N);436bool tryBitfieldInsertInZeroOp(SDNode *N);437bool tryShiftAmountMod(SDNode *N);438439bool tryReadRegister(SDNode *N);440bool tryWriteRegister(SDNode *N);441442bool trySelectCastFixedLengthToScalableVector(SDNode *N);443bool trySelectCastScalableToFixedLengthVector(SDNode *N);444445bool trySelectXAR(SDNode *N);446447// Include the pieces autogenerated from the target description.448#include "AArch64GenDAGISel.inc"449450private:451bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,452SDValue &Shift);453bool SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg, SDValue &Shift);454bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,455SDValue &OffImm) {456return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);457}458bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,459unsigned Size, SDValue &Base,460SDValue &OffImm);461bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,462SDValue &OffImm);463bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,464SDValue &OffImm);465bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,466SDValue &Offset, SDValue &SignExtend,467SDValue &DoShift);468bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,469SDValue &Offset, SDValue &SignExtend,470SDValue &DoShift);471bool isWorthFoldingALU(SDValue V, bool LSL = false) const;472bool isWorthFoldingAddr(SDValue V, unsigned Size) const;473bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,474SDValue &Offset, SDValue &SignExtend);475476template<unsigned RegWidth>477bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {478return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);479}480481bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);482483template<unsigned RegWidth>484bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos) {485return SelectCVTFixedPosRecipOperand(N, FixedPos, RegWidth);486}487488bool SelectCVTFixedPosRecipOperand(SDValue N, SDValue &FixedPos,489unsigned Width);490491bool SelectCMP_SWAP(SDNode *N);492493bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);494bool SelectSVEAddSubSSatImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift,495bool Negate);496bool SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);497bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);498499bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);500bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,501bool AllowSaturation, SDValue &Imm);502503bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);504bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,505SDValue &Offset);506bool SelectSMETileSlice(SDValue N, unsigned MaxSize, SDValue &Vector,507SDValue &Offset, unsigned Scale = 1);508509bool SelectAllActivePredicate(SDValue N);510bool SelectAnyPredicate(SDValue N);511};512513class AArch64DAGToDAGISelLegacy : public SelectionDAGISelLegacy {514public:515static char ID;516explicit AArch64DAGToDAGISelLegacy(AArch64TargetMachine &tm,517CodeGenOptLevel OptLevel)518: SelectionDAGISelLegacy(519ID, std::make_unique<AArch64DAGToDAGISel>(tm, OptLevel)) {}520};521} // end anonymous namespace522523char AArch64DAGToDAGISelLegacy::ID = 0;524525INITIALIZE_PASS(AArch64DAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)526527/// isIntImmediate - This method tests to see if the node is a constant528/// operand. If so Imm will receive the 32-bit value.529static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {530if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {531Imm = C->getZExtValue();532return true;533}534return false;535}536537// isIntImmediate - This method tests to see if a constant operand.538// If so Imm will receive the value.539static bool isIntImmediate(SDValue N, uint64_t &Imm) {540return isIntImmediate(N.getNode(), Imm);541}542543// isOpcWithIntImmediate - This method tests to see if the node is a specific544// opcode and that it has a immediate integer right operand.545// If so Imm will receive the 32 bit value.546static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,547uint64_t &Imm) {548return N->getOpcode() == Opc &&549isIntImmediate(N->getOperand(1).getNode(), Imm);550}551552// isIntImmediateEq - This method tests to see if N is a constant operand that553// is equivalent to 'ImmExpected'.554#ifndef NDEBUG555static bool isIntImmediateEq(SDValue N, const uint64_t ImmExpected) {556uint64_t Imm;557if (!isIntImmediate(N.getNode(), Imm))558return false;559return Imm == ImmExpected;560}561#endif562563bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(564const SDValue &Op, const InlineAsm::ConstraintCode ConstraintID,565std::vector<SDValue> &OutOps) {566switch(ConstraintID) {567default:568llvm_unreachable("Unexpected asm memory constraint");569case InlineAsm::ConstraintCode::m:570case InlineAsm::ConstraintCode::o:571case InlineAsm::ConstraintCode::Q:572// We need to make sure that this one operand does not end up in XZR, thus573// require the address to be in a PointerRegClass register.574const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();575const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);576SDLoc dl(Op);577SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);578SDValue NewOp =579SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,580dl, Op.getValueType(),581Op, RC), 0);582OutOps.push_back(NewOp);583return false;584}585return true;586}587588/// SelectArithImmed - Select an immediate value that can be represented as589/// a 12-bit value shifted left by either 0 or 12. If so, return true with590/// Val set to the 12-bit value and Shift set to the shifter operand.591bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,592SDValue &Shift) {593// This function is called from the addsub_shifted_imm ComplexPattern,594// which lists [imm] as the list of opcode it's interested in, however595// we still need to check whether the operand is actually an immediate596// here because the ComplexPattern opcode list is only used in597// root-level opcode matching.598if (!isa<ConstantSDNode>(N.getNode()))599return false;600601uint64_t Immed = N.getNode()->getAsZExtVal();602unsigned ShiftAmt;603604if (Immed >> 12 == 0) {605ShiftAmt = 0;606} else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {607ShiftAmt = 12;608Immed = Immed >> 12;609} else610return false;611612unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);613SDLoc dl(N);614Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);615Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);616return true;617}618619/// SelectNegArithImmed - As above, but negates the value before trying to620/// select it.621bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,622SDValue &Shift) {623// This function is called from the addsub_shifted_imm ComplexPattern,624// which lists [imm] as the list of opcode it's interested in, however625// we still need to check whether the operand is actually an immediate626// here because the ComplexPattern opcode list is only used in627// root-level opcode matching.628if (!isa<ConstantSDNode>(N.getNode()))629return false;630631// The immediate operand must be a 24-bit zero-extended immediate.632uint64_t Immed = N.getNode()->getAsZExtVal();633634// This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"635// have the opposite effect on the C flag, so this pattern mustn't match under636// those circumstances.637if (Immed == 0)638return false;639640if (N.getValueType() == MVT::i32)641Immed = ~((uint32_t)Immed) + 1;642else643Immed = ~Immed + 1ULL;644if (Immed & 0xFFFFFFFFFF000000ULL)645return false;646647Immed &= 0xFFFFFFULL;648return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,649Shift);650}651652/// getShiftTypeForNode - Translate a shift node to the corresponding653/// ShiftType value.654static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {655switch (N.getOpcode()) {656default:657return AArch64_AM::InvalidShiftExtend;658case ISD::SHL:659return AArch64_AM::LSL;660case ISD::SRL:661return AArch64_AM::LSR;662case ISD::SRA:663return AArch64_AM::ASR;664case ISD::ROTR:665return AArch64_AM::ROR;666}667}668669/// Determine whether it is worth it to fold SHL into the addressing670/// mode.671static bool isWorthFoldingSHL(SDValue V) {672assert(V.getOpcode() == ISD::SHL && "invalid opcode");673// It is worth folding logical shift of up to three places.674auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));675if (!CSD)676return false;677unsigned ShiftVal = CSD->getZExtValue();678if (ShiftVal > 3)679return false;680681// Check if this particular node is reused in any non-memory related682// operation. If yes, do not try to fold this node into the address683// computation, since the computation will be kept.684const SDNode *Node = V.getNode();685for (SDNode *UI : Node->uses())686if (!isa<MemSDNode>(*UI))687for (SDNode *UII : UI->uses())688if (!isa<MemSDNode>(*UII))689return false;690return true;691}692693/// Determine whether it is worth to fold V into an extended register addressing694/// mode.695bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const {696// Trivial if we are optimizing for code size or if there is only697// one use of the value.698if (CurDAG->shouldOptForSize() || V.hasOneUse())699return true;700701// If a subtarget has a slow shift, folding a shift into multiple loads702// costs additional micro-ops.703if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16))704return false;705706// Check whether we're going to emit the address arithmetic anyway because707// it's used by a non-address operation.708if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V))709return true;710if (V.getOpcode() == ISD::ADD) {711const SDValue LHS = V.getOperand(0);712const SDValue RHS = V.getOperand(1);713if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))714return true;715if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))716return true;717}718719// It hurts otherwise, since the value will be reused.720return false;721}722723/// and (shl/srl/sra, x, c), mask --> shl (srl/sra, x, c1), c2724/// to select more shifted register725bool AArch64DAGToDAGISel::SelectShiftedRegisterFromAnd(SDValue N, SDValue &Reg,726SDValue &Shift) {727EVT VT = N.getValueType();728if (VT != MVT::i32 && VT != MVT::i64)729return false;730731if (N->getOpcode() != ISD::AND || !N->hasOneUse())732return false;733SDValue LHS = N.getOperand(0);734if (!LHS->hasOneUse())735return false;736737unsigned LHSOpcode = LHS->getOpcode();738if (LHSOpcode != ISD::SHL && LHSOpcode != ISD::SRL && LHSOpcode != ISD::SRA)739return false;740741ConstantSDNode *ShiftAmtNode = dyn_cast<ConstantSDNode>(LHS.getOperand(1));742if (!ShiftAmtNode)743return false;744745uint64_t ShiftAmtC = ShiftAmtNode->getZExtValue();746ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N.getOperand(1));747if (!RHSC)748return false;749750APInt AndMask = RHSC->getAPIntValue();751unsigned LowZBits, MaskLen;752if (!AndMask.isShiftedMask(LowZBits, MaskLen))753return false;754755unsigned BitWidth = N.getValueSizeInBits();756SDLoc DL(LHS);757uint64_t NewShiftC;758unsigned NewShiftOp;759if (LHSOpcode == ISD::SHL) {760// LowZBits <= ShiftAmtC will fall into isBitfieldPositioningOp761// BitWidth != LowZBits + MaskLen doesn't match the pattern762if (LowZBits <= ShiftAmtC || (BitWidth != LowZBits + MaskLen))763return false;764765NewShiftC = LowZBits - ShiftAmtC;766NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;767} else {768if (LowZBits == 0)769return false;770771// NewShiftC >= BitWidth will fall into isBitfieldExtractOp772NewShiftC = LowZBits + ShiftAmtC;773if (NewShiftC >= BitWidth)774return false;775776// SRA need all high bits777if (LHSOpcode == ISD::SRA && (BitWidth != (LowZBits + MaskLen)))778return false;779780// SRL high bits can be 0 or 1781if (LHSOpcode == ISD::SRL && (BitWidth > (NewShiftC + MaskLen)))782return false;783784if (LHSOpcode == ISD::SRL)785NewShiftOp = VT == MVT::i64 ? AArch64::UBFMXri : AArch64::UBFMWri;786else787NewShiftOp = VT == MVT::i64 ? AArch64::SBFMXri : AArch64::SBFMWri;788}789790assert(NewShiftC < BitWidth && "Invalid shift amount");791SDValue NewShiftAmt = CurDAG->getTargetConstant(NewShiftC, DL, VT);792SDValue BitWidthMinus1 = CurDAG->getTargetConstant(BitWidth - 1, DL, VT);793Reg = SDValue(CurDAG->getMachineNode(NewShiftOp, DL, VT, LHS->getOperand(0),794NewShiftAmt, BitWidthMinus1),7950);796unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, LowZBits);797Shift = CurDAG->getTargetConstant(ShVal, DL, MVT::i32);798return true;799}800801/// getExtendTypeForNode - Translate an extend node to the corresponding802/// ExtendType value.803static AArch64_AM::ShiftExtendType804getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {805if (N.getOpcode() == ISD::SIGN_EXTEND ||806N.getOpcode() == ISD::SIGN_EXTEND_INREG) {807EVT SrcVT;808if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)809SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();810else811SrcVT = N.getOperand(0).getValueType();812813if (!IsLoadStore && SrcVT == MVT::i8)814return AArch64_AM::SXTB;815else if (!IsLoadStore && SrcVT == MVT::i16)816return AArch64_AM::SXTH;817else if (SrcVT == MVT::i32)818return AArch64_AM::SXTW;819assert(SrcVT != MVT::i64 && "extend from 64-bits?");820821return AArch64_AM::InvalidShiftExtend;822} else if (N.getOpcode() == ISD::ZERO_EXTEND ||823N.getOpcode() == ISD::ANY_EXTEND) {824EVT SrcVT = N.getOperand(0).getValueType();825if (!IsLoadStore && SrcVT == MVT::i8)826return AArch64_AM::UXTB;827else if (!IsLoadStore && SrcVT == MVT::i16)828return AArch64_AM::UXTH;829else if (SrcVT == MVT::i32)830return AArch64_AM::UXTW;831assert(SrcVT != MVT::i64 && "extend from 64-bits?");832833return AArch64_AM::InvalidShiftExtend;834} else if (N.getOpcode() == ISD::AND) {835ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));836if (!CSD)837return AArch64_AM::InvalidShiftExtend;838uint64_t AndMask = CSD->getZExtValue();839840switch (AndMask) {841default:842return AArch64_AM::InvalidShiftExtend;843case 0xFF:844return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;845case 0xFFFF:846return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;847case 0xFFFFFFFF:848return AArch64_AM::UXTW;849}850}851852return AArch64_AM::InvalidShiftExtend;853}854855/// Determine whether it is worth to fold V into an extended register of an856/// Add/Sub. LSL means we are folding into an `add w0, w1, w2, lsl #N`857/// instruction, and the shift should be treated as worth folding even if has858/// multiple uses.859bool AArch64DAGToDAGISel::isWorthFoldingALU(SDValue V, bool LSL) const {860// Trivial if we are optimizing for code size or if there is only861// one use of the value.862if (CurDAG->shouldOptForSize() || V.hasOneUse())863return true;864865// If a subtarget has a fastpath LSL we can fold a logical shift into866// the add/sub and save a cycle.867if (LSL && Subtarget->hasALULSLFast() && V.getOpcode() == ISD::SHL &&868V.getConstantOperandVal(1) <= 4 &&869getExtendTypeForNode(V.getOperand(0)) == AArch64_AM::InvalidShiftExtend)870return true;871872// It hurts otherwise, since the value will be reused.873return false;874}875876/// SelectShiftedRegister - Select a "shifted register" operand. If the value877/// is not shifted, set the Shift operand to default of "LSL 0". The logical878/// instructions allow the shifted register to be rotated, but the arithmetic879/// instructions do not. The AllowROR parameter specifies whether ROR is880/// supported.881bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,882SDValue &Reg, SDValue &Shift) {883if (SelectShiftedRegisterFromAnd(N, Reg, Shift))884return true;885886AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);887if (ShType == AArch64_AM::InvalidShiftExtend)888return false;889if (!AllowROR && ShType == AArch64_AM::ROR)890return false;891892if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {893unsigned BitSize = N.getValueSizeInBits();894unsigned Val = RHS->getZExtValue() & (BitSize - 1);895unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);896897Reg = N.getOperand(0);898Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);899return isWorthFoldingALU(N, true);900}901902return false;903}904905/// Instructions that accept extend modifiers like UXTW expect the register906/// being extended to be a GPR32, but the incoming DAG might be acting on a907/// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if908/// this is the case.909static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {910if (N.getValueType() == MVT::i32)911return N;912913SDLoc dl(N);914return CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl, MVT::i32, N);915}916917// Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.918template<signed Low, signed High, signed Scale>919bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {920if (!isa<ConstantSDNode>(N))921return false;922923int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();924if ((MulImm % std::abs(Scale)) == 0) {925int64_t RDVLImm = MulImm / Scale;926if ((RDVLImm >= Low) && (RDVLImm <= High)) {927Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);928return true;929}930}931932return false;933}934935/// SelectArithExtendedRegister - Select a "extended register" operand. This936/// operand folds in an extend followed by an optional left shift.937bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,938SDValue &Shift) {939unsigned ShiftVal = 0;940AArch64_AM::ShiftExtendType Ext;941942if (N.getOpcode() == ISD::SHL) {943ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));944if (!CSD)945return false;946ShiftVal = CSD->getZExtValue();947if (ShiftVal > 4)948return false;949950Ext = getExtendTypeForNode(N.getOperand(0));951if (Ext == AArch64_AM::InvalidShiftExtend)952return false;953954Reg = N.getOperand(0).getOperand(0);955} else {956Ext = getExtendTypeForNode(N);957if (Ext == AArch64_AM::InvalidShiftExtend)958return false;959960Reg = N.getOperand(0);961962// Don't match if free 32-bit -> 64-bit zext can be used instead. Use the963// isDef32 as a heuristic for when the operand is likely to be a 32bit def.964auto isDef32 = [](SDValue N) {965unsigned Opc = N.getOpcode();966return Opc != ISD::TRUNCATE && Opc != TargetOpcode::EXTRACT_SUBREG &&967Opc != ISD::CopyFromReg && Opc != ISD::AssertSext &&968Opc != ISD::AssertZext && Opc != ISD::AssertAlign &&969Opc != ISD::FREEZE;970};971if (Ext == AArch64_AM::UXTW && Reg->getValueType(0).getSizeInBits() == 32 &&972isDef32(Reg))973return false;974}975976// AArch64 mandates that the RHS of the operation must use the smallest977// register class that could contain the size being extended from. Thus,978// if we're folding a (sext i8), we need the RHS to be a GPR32, even though979// there might not be an actual 32-bit value in the program. We can980// (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.981assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);982Reg = narrowIfNeeded(CurDAG, Reg);983Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),984MVT::i32);985return isWorthFoldingALU(N);986}987988/// SelectArithUXTXRegister - Select a "UXTX register" operand. This989/// operand is refered by the instructions have SP operand990bool AArch64DAGToDAGISel::SelectArithUXTXRegister(SDValue N, SDValue &Reg,991SDValue &Shift) {992unsigned ShiftVal = 0;993AArch64_AM::ShiftExtendType Ext;994995if (N.getOpcode() != ISD::SHL)996return false;997998ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));999if (!CSD)1000return false;1001ShiftVal = CSD->getZExtValue();1002if (ShiftVal > 4)1003return false;10041005Ext = AArch64_AM::UXTX;1006Reg = N.getOperand(0);1007Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),1008MVT::i32);1009return isWorthFoldingALU(N);1010}10111012/// If there's a use of this ADDlow that's not itself a load/store then we'll1013/// need to create a real ADD instruction from it anyway and there's no point in1014/// folding it into the mem op. Theoretically, it shouldn't matter, but there's1015/// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding1016/// leads to duplicated ADRP instructions.1017static bool isWorthFoldingADDlow(SDValue N) {1018for (auto *Use : N->uses()) {1019if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&1020Use->getOpcode() != ISD::ATOMIC_LOAD &&1021Use->getOpcode() != ISD::ATOMIC_STORE)1022return false;10231024// ldar and stlr have much more restrictive addressing modes (just a1025// register).1026if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getSuccessOrdering()))1027return false;1028}10291030return true;1031}10321033/// Check if the immediate offset is valid as a scaled immediate.1034static bool isValidAsScaledImmediate(int64_t Offset, unsigned Range,1035unsigned Size) {1036if ((Offset & (Size - 1)) == 0 && Offset >= 0 &&1037Offset < (Range << Log2_32(Size)))1038return true;1039return false;1040}10411042/// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit1043/// immediate" address. The "Size" argument is the size in bytes of the memory1044/// reference, which determines the scale.1045bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,1046unsigned BW, unsigned Size,1047SDValue &Base,1048SDValue &OffImm) {1049SDLoc dl(N);1050const DataLayout &DL = CurDAG->getDataLayout();1051const TargetLowering *TLI = getTargetLowering();1052if (N.getOpcode() == ISD::FrameIndex) {1053int FI = cast<FrameIndexSDNode>(N)->getIndex();1054Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));1055OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);1056return true;1057}10581059// As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed1060// selected here doesn't support labels/immediates, only base+offset.1061if (CurDAG->isBaseWithConstantOffset(N)) {1062if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {1063if (IsSignedImm) {1064int64_t RHSC = RHS->getSExtValue();1065unsigned Scale = Log2_32(Size);1066int64_t Range = 0x1LL << (BW - 1);10671068if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&1069RHSC < (Range << Scale)) {1070Base = N.getOperand(0);1071if (Base.getOpcode() == ISD::FrameIndex) {1072int FI = cast<FrameIndexSDNode>(Base)->getIndex();1073Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));1074}1075OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);1076return true;1077}1078} else {1079// unsigned Immediate1080uint64_t RHSC = RHS->getZExtValue();1081unsigned Scale = Log2_32(Size);1082uint64_t Range = 0x1ULL << BW;10831084if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {1085Base = N.getOperand(0);1086if (Base.getOpcode() == ISD::FrameIndex) {1087int FI = cast<FrameIndexSDNode>(Base)->getIndex();1088Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));1089}1090OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);1091return true;1092}1093}1094}1095}1096// Base only. The address will be materialized into a register before1097// the memory is accessed.1098// add x0, Xbase, #offset1099// stp x1, x2, [x0]1100Base = N;1101OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);1102return true;1103}11041105/// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit1106/// immediate" address. The "Size" argument is the size in bytes of the memory1107/// reference, which determines the scale.1108bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,1109SDValue &Base, SDValue &OffImm) {1110SDLoc dl(N);1111const DataLayout &DL = CurDAG->getDataLayout();1112const TargetLowering *TLI = getTargetLowering();1113if (N.getOpcode() == ISD::FrameIndex) {1114int FI = cast<FrameIndexSDNode>(N)->getIndex();1115Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));1116OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);1117return true;1118}11191120if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {1121GlobalAddressSDNode *GAN =1122dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());1123Base = N.getOperand(0);1124OffImm = N.getOperand(1);1125if (!GAN)1126return true;11271128if (GAN->getOffset() % Size == 0 &&1129GAN->getGlobal()->getPointerAlignment(DL) >= Size)1130return true;1131}11321133if (CurDAG->isBaseWithConstantOffset(N)) {1134if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {1135int64_t RHSC = (int64_t)RHS->getZExtValue();1136unsigned Scale = Log2_32(Size);1137if (isValidAsScaledImmediate(RHSC, 0x1000, Size)) {1138Base = N.getOperand(0);1139if (Base.getOpcode() == ISD::FrameIndex) {1140int FI = cast<FrameIndexSDNode>(Base)->getIndex();1141Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));1142}1143OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);1144return true;1145}1146}1147}11481149// Before falling back to our general case, check if the unscaled1150// instructions can handle this. If so, that's preferable.1151if (SelectAddrModeUnscaled(N, Size, Base, OffImm))1152return false;11531154// Base only. The address will be materialized into a register before1155// the memory is accessed.1156// add x0, Xbase, #offset1157// ldr x0, [x0]1158Base = N;1159OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);1160return true;1161}11621163/// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit1164/// immediate" address. This should only match when there is an offset that1165/// is not valid for a scaled immediate addressing mode. The "Size" argument1166/// is the size in bytes of the memory reference, which is needed here to know1167/// what is valid for a scaled immediate.1168bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,1169SDValue &Base,1170SDValue &OffImm) {1171if (!CurDAG->isBaseWithConstantOffset(N))1172return false;1173if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {1174int64_t RHSC = RHS->getSExtValue();1175if (RHSC >= -256 && RHSC < 256) {1176Base = N.getOperand(0);1177if (Base.getOpcode() == ISD::FrameIndex) {1178int FI = cast<FrameIndexSDNode>(Base)->getIndex();1179const TargetLowering *TLI = getTargetLowering();1180Base = CurDAG->getTargetFrameIndex(1181FI, TLI->getPointerTy(CurDAG->getDataLayout()));1182}1183OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);1184return true;1185}1186}1187return false;1188}11891190static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {1191SDLoc dl(N);1192SDValue ImpDef = SDValue(1193CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);1194return CurDAG->getTargetInsertSubreg(AArch64::sub_32, dl, MVT::i64, ImpDef,1195N);1196}11971198/// Check if the given SHL node (\p N), can be used to form an1199/// extended register for an addressing mode.1200bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,1201bool WantExtend, SDValue &Offset,1202SDValue &SignExtend) {1203assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");1204ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));1205if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())1206return false;12071208SDLoc dl(N);1209if (WantExtend) {1210AArch64_AM::ShiftExtendType Ext =1211getExtendTypeForNode(N.getOperand(0), true);1212if (Ext == AArch64_AM::InvalidShiftExtend)1213return false;12141215Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));1216SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,1217MVT::i32);1218} else {1219Offset = N.getOperand(0);1220SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);1221}12221223unsigned LegalShiftVal = Log2_32(Size);1224unsigned ShiftVal = CSD->getZExtValue();12251226if (ShiftVal != 0 && ShiftVal != LegalShiftVal)1227return false;12281229return isWorthFoldingAddr(N, Size);1230}12311232bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,1233SDValue &Base, SDValue &Offset,1234SDValue &SignExtend,1235SDValue &DoShift) {1236if (N.getOpcode() != ISD::ADD)1237return false;1238SDValue LHS = N.getOperand(0);1239SDValue RHS = N.getOperand(1);1240SDLoc dl(N);12411242// We don't want to match immediate adds here, because they are better lowered1243// to the register-immediate addressing modes.1244if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))1245return false;12461247// Check if this particular node is reused in any non-memory related1248// operation. If yes, do not try to fold this node into the address1249// computation, since the computation will be kept.1250const SDNode *Node = N.getNode();1251for (SDNode *UI : Node->uses()) {1252if (!isa<MemSDNode>(*UI))1253return false;1254}12551256// Remember if it is worth folding N when it produces extended register.1257bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);12581259// Try to match a shifted extend on the RHS.1260if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&1261SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {1262Base = LHS;1263DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);1264return true;1265}12661267// Try to match a shifted extend on the LHS.1268if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&1269SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {1270Base = RHS;1271DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);1272return true;1273}12741275// There was no shift, whatever else we find.1276DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);12771278AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;1279// Try to match an unshifted extend on the LHS.1280if (IsExtendedRegisterWorthFolding &&1281(Ext = getExtendTypeForNode(LHS, true)) !=1282AArch64_AM::InvalidShiftExtend) {1283Base = RHS;1284Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));1285SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,1286MVT::i32);1287if (isWorthFoldingAddr(LHS, Size))1288return true;1289}12901291// Try to match an unshifted extend on the RHS.1292if (IsExtendedRegisterWorthFolding &&1293(Ext = getExtendTypeForNode(RHS, true)) !=1294AArch64_AM::InvalidShiftExtend) {1295Base = LHS;1296Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));1297SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,1298MVT::i32);1299if (isWorthFoldingAddr(RHS, Size))1300return true;1301}13021303return false;1304}13051306// Check if the given immediate is preferred by ADD. If an immediate can be1307// encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be1308// encoded by one MOVZ, return true.1309static bool isPreferredADD(int64_t ImmOff) {1310// Constant in [0x0, 0xfff] can be encoded in ADD.1311if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)1312return true;1313// Check if it can be encoded in an "ADD LSL #12".1314if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)1315// As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.1316return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&1317(ImmOff & 0xffffffffffff0fffLL) != 0x0LL;1318return false;1319}13201321bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,1322SDValue &Base, SDValue &Offset,1323SDValue &SignExtend,1324SDValue &DoShift) {1325if (N.getOpcode() != ISD::ADD)1326return false;1327SDValue LHS = N.getOperand(0);1328SDValue RHS = N.getOperand(1);1329SDLoc DL(N);13301331// Check if this particular node is reused in any non-memory related1332// operation. If yes, do not try to fold this node into the address1333// computation, since the computation will be kept.1334const SDNode *Node = N.getNode();1335for (SDNode *UI : Node->uses()) {1336if (!isa<MemSDNode>(*UI))1337return false;1338}13391340// Watch out if RHS is a wide immediate, it can not be selected into1341// [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into1342// ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate1343// instructions like:1344// MOV X0, WideImmediate1345// ADD X1, BaseReg, X01346// LDR X2, [X1, 0]1347// For such situation, using [BaseReg, XReg] addressing mode can save one1348// ADD/SUB:1349// MOV X0, WideImmediate1350// LDR X2, [BaseReg, X0]1351if (isa<ConstantSDNode>(RHS)) {1352int64_t ImmOff = (int64_t)RHS->getAsZExtVal();1353// Skip the immediate can be selected by load/store addressing mode.1354// Also skip the immediate can be encoded by a single ADD (SUB is also1355// checked by using -ImmOff).1356if (isValidAsScaledImmediate(ImmOff, 0x1000, Size) ||1357isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))1358return false;13591360SDValue Ops[] = { RHS };1361SDNode *MOVI =1362CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);1363SDValue MOVIV = SDValue(MOVI, 0);1364// This ADD of two X register will be selected into [Reg+Reg] mode.1365N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);1366}13671368// Remember if it is worth folding N when it produces extended register.1369bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size);13701371// Try to match a shifted extend on the RHS.1372if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&1373SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {1374Base = LHS;1375DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);1376return true;1377}13781379// Try to match a shifted extend on the LHS.1380if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&1381SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {1382Base = RHS;1383DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);1384return true;1385}13861387// Match any non-shifted, non-extend, non-immediate add expression.1388Base = LHS;1389Offset = RHS;1390SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);1391DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);1392// Reg1 + Reg2 is free: no check needed.1393return true;1394}13951396SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {1397static const unsigned RegClassIDs[] = {1398AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};1399static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,1400AArch64::dsub2, AArch64::dsub3};14011402return createTuple(Regs, RegClassIDs, SubRegs);1403}14041405SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {1406static const unsigned RegClassIDs[] = {1407AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};1408static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,1409AArch64::qsub2, AArch64::qsub3};14101411return createTuple(Regs, RegClassIDs, SubRegs);1412}14131414SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {1415static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,1416AArch64::ZPR3RegClassID,1417AArch64::ZPR4RegClassID};1418static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,1419AArch64::zsub2, AArch64::zsub3};14201421return createTuple(Regs, RegClassIDs, SubRegs);1422}14231424SDValue AArch64DAGToDAGISel::createZMulTuple(ArrayRef<SDValue> Regs) {1425assert(Regs.size() == 2 || Regs.size() == 4);14261427// The createTuple interface requires 3 RegClassIDs for each possible1428// tuple type even though we only have them for ZPR2 and ZPR4.1429static const unsigned RegClassIDs[] = {AArch64::ZPR2Mul2RegClassID, 0,1430AArch64::ZPR4Mul4RegClassID};1431static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,1432AArch64::zsub2, AArch64::zsub3};1433return createTuple(Regs, RegClassIDs, SubRegs);1434}14351436SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,1437const unsigned RegClassIDs[],1438const unsigned SubRegs[]) {1439// There's no special register-class for a vector-list of 1 element: it's just1440// a vector.1441if (Regs.size() == 1)1442return Regs[0];14431444assert(Regs.size() >= 2 && Regs.size() <= 4);14451446SDLoc DL(Regs[0]);14471448SmallVector<SDValue, 4> Ops;14491450// First operand of REG_SEQUENCE is the desired RegClass.1451Ops.push_back(1452CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));14531454// Then we get pairs of source & subregister-position for the components.1455for (unsigned i = 0; i < Regs.size(); ++i) {1456Ops.push_back(Regs[i]);1457Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));1458}14591460SDNode *N =1461CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);1462return SDValue(N, 0);1463}14641465void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,1466bool isExt) {1467SDLoc dl(N);1468EVT VT = N->getValueType(0);14691470unsigned ExtOff = isExt;14711472// Form a REG_SEQUENCE to force register allocation.1473unsigned Vec0Off = ExtOff + 1;1474SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,1475N->op_begin() + Vec0Off + NumVecs);1476SDValue RegSeq = createQTuple(Regs);14771478SmallVector<SDValue, 6> Ops;1479if (isExt)1480Ops.push_back(N->getOperand(1));1481Ops.push_back(RegSeq);1482Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));1483ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));1484}14851486static std::tuple<SDValue, SDValue>1487extractPtrauthBlendDiscriminators(SDValue Disc, SelectionDAG *DAG) {1488SDLoc DL(Disc);1489SDValue AddrDisc;1490SDValue ConstDisc;14911492// If this is a blend, remember the constant and address discriminators.1493// Otherwise, it's either a constant discriminator, or a non-blended1494// address discriminator.1495if (Disc->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&1496Disc->getConstantOperandVal(0) == Intrinsic::ptrauth_blend) {1497AddrDisc = Disc->getOperand(1);1498ConstDisc = Disc->getOperand(2);1499} else {1500ConstDisc = Disc;1501}15021503// If the constant discriminator (either the blend RHS, or the entire1504// discriminator value) isn't a 16-bit constant, bail out, and let the1505// discriminator be computed separately.1506auto *ConstDiscN = dyn_cast<ConstantSDNode>(ConstDisc);1507if (!ConstDiscN || !isUInt<16>(ConstDiscN->getZExtValue()))1508return std::make_tuple(DAG->getTargetConstant(0, DL, MVT::i64), Disc);15091510// If there's no address discriminator, use XZR directly.1511if (!AddrDisc)1512AddrDisc = DAG->getRegister(AArch64::XZR, MVT::i64);15131514return std::make_tuple(1515DAG->getTargetConstant(ConstDiscN->getZExtValue(), DL, MVT::i64),1516AddrDisc);1517}15181519void AArch64DAGToDAGISel::SelectPtrauthAuth(SDNode *N) {1520SDLoc DL(N);1521// IntrinsicID is operand #01522SDValue Val = N->getOperand(1);1523SDValue AUTKey = N->getOperand(2);1524SDValue AUTDisc = N->getOperand(3);15251526unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();1527AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);15281529SDValue AUTAddrDisc, AUTConstDisc;1530std::tie(AUTConstDisc, AUTAddrDisc) =1531extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);15321533SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,1534AArch64::X16, Val, SDValue());1535SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, X16Copy.getValue(1)};15361537SDNode *AUT = CurDAG->getMachineNode(AArch64::AUT, DL, MVT::i64, Ops);1538ReplaceNode(N, AUT);1539return;1540}15411542void AArch64DAGToDAGISel::SelectPtrauthResign(SDNode *N) {1543SDLoc DL(N);1544// IntrinsicID is operand #01545SDValue Val = N->getOperand(1);1546SDValue AUTKey = N->getOperand(2);1547SDValue AUTDisc = N->getOperand(3);1548SDValue PACKey = N->getOperand(4);1549SDValue PACDisc = N->getOperand(5);15501551unsigned AUTKeyC = cast<ConstantSDNode>(AUTKey)->getZExtValue();1552unsigned PACKeyC = cast<ConstantSDNode>(PACKey)->getZExtValue();15531554AUTKey = CurDAG->getTargetConstant(AUTKeyC, DL, MVT::i64);1555PACKey = CurDAG->getTargetConstant(PACKeyC, DL, MVT::i64);15561557SDValue AUTAddrDisc, AUTConstDisc;1558std::tie(AUTConstDisc, AUTAddrDisc) =1559extractPtrauthBlendDiscriminators(AUTDisc, CurDAG);15601561SDValue PACAddrDisc, PACConstDisc;1562std::tie(PACConstDisc, PACAddrDisc) =1563extractPtrauthBlendDiscriminators(PACDisc, CurDAG);15641565SDValue X16Copy = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL,1566AArch64::X16, Val, SDValue());15671568SDValue Ops[] = {AUTKey, AUTConstDisc, AUTAddrDisc, PACKey,1569PACConstDisc, PACAddrDisc, X16Copy.getValue(1)};15701571SDNode *AUTPAC = CurDAG->getMachineNode(AArch64::AUTPAC, DL, MVT::i64, Ops);1572ReplaceNode(N, AUTPAC);1573return;1574}15751576bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {1577LoadSDNode *LD = cast<LoadSDNode>(N);1578if (LD->isUnindexed())1579return false;1580EVT VT = LD->getMemoryVT();1581EVT DstVT = N->getValueType(0);1582ISD::MemIndexedMode AM = LD->getAddressingMode();1583bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;15841585// We're not doing validity checking here. That was done when checking1586// if we should mark the load as indexed or not. We're just selecting1587// the right instruction.1588unsigned Opcode = 0;15891590ISD::LoadExtType ExtType = LD->getExtensionType();1591bool InsertTo64 = false;1592if (VT == MVT::i64)1593Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;1594else if (VT == MVT::i32) {1595if (ExtType == ISD::NON_EXTLOAD)1596Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;1597else if (ExtType == ISD::SEXTLOAD)1598Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;1599else {1600Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;1601InsertTo64 = true;1602// The result of the load is only i32. It's the subreg_to_reg that makes1603// it into an i64.1604DstVT = MVT::i32;1605}1606} else if (VT == MVT::i16) {1607if (ExtType == ISD::SEXTLOAD) {1608if (DstVT == MVT::i64)1609Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;1610else1611Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;1612} else {1613Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;1614InsertTo64 = DstVT == MVT::i64;1615// The result of the load is only i32. It's the subreg_to_reg that makes1616// it into an i64.1617DstVT = MVT::i32;1618}1619} else if (VT == MVT::i8) {1620if (ExtType == ISD::SEXTLOAD) {1621if (DstVT == MVT::i64)1622Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;1623else1624Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;1625} else {1626Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;1627InsertTo64 = DstVT == MVT::i64;1628// The result of the load is only i32. It's the subreg_to_reg that makes1629// it into an i64.1630DstVT = MVT::i32;1631}1632} else if (VT == MVT::f16) {1633Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;1634} else if (VT == MVT::bf16) {1635Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;1636} else if (VT == MVT::f32) {1637Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;1638} else if (VT == MVT::f64 || VT.is64BitVector()) {1639Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;1640} else if (VT.is128BitVector()) {1641Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;1642} else1643return false;1644SDValue Chain = LD->getChain();1645SDValue Base = LD->getBasePtr();1646ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());1647int OffsetVal = (int)OffsetOp->getZExtValue();1648SDLoc dl(N);1649SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);1650SDValue Ops[] = { Base, Offset, Chain };1651SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,1652MVT::Other, Ops);16531654// Transfer memoperands.1655MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();1656CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});16571658// Either way, we're replacing the node, so tell the caller that.1659SDValue LoadedVal = SDValue(Res, 1);1660if (InsertTo64) {1661SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);1662LoadedVal =1663SDValue(CurDAG->getMachineNode(1664AArch64::SUBREG_TO_REG, dl, MVT::i64,1665CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,1666SubReg),16670);1668}16691670ReplaceUses(SDValue(N, 0), LoadedVal);1671ReplaceUses(SDValue(N, 1), SDValue(Res, 0));1672ReplaceUses(SDValue(N, 2), SDValue(Res, 2));1673CurDAG->RemoveDeadNode(N);1674return true;1675}16761677void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,1678unsigned SubRegIdx) {1679SDLoc dl(N);1680EVT VT = N->getValueType(0);1681SDValue Chain = N->getOperand(0);16821683SDValue Ops[] = {N->getOperand(2), // Mem operand;1684Chain};16851686const EVT ResTys[] = {MVT::Untyped, MVT::Other};16871688SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);1689SDValue SuperReg = SDValue(Ld, 0);1690for (unsigned i = 0; i < NumVecs; ++i)1691ReplaceUses(SDValue(N, i),1692CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));16931694ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));16951696// Transfer memoperands. In the case of AArch64::LD64B, there won't be one,1697// because it's too simple to have needed special treatment during lowering.1698if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {1699MachineMemOperand *MemOp = MemIntr->getMemOperand();1700CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});1701}17021703CurDAG->RemoveDeadNode(N);1704}17051706void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,1707unsigned Opc, unsigned SubRegIdx) {1708SDLoc dl(N);1709EVT VT = N->getValueType(0);1710SDValue Chain = N->getOperand(0);17111712SDValue Ops[] = {N->getOperand(1), // Mem operand1713N->getOperand(2), // Incremental1714Chain};17151716const EVT ResTys[] = {MVT::i64, // Type of the write back register1717MVT::Untyped, MVT::Other};17181719SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);17201721// Update uses of write back register1722ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));17231724// Update uses of vector list1725SDValue SuperReg = SDValue(Ld, 1);1726if (NumVecs == 1)1727ReplaceUses(SDValue(N, 0), SuperReg);1728else1729for (unsigned i = 0; i < NumVecs; ++i)1730ReplaceUses(SDValue(N, i),1731CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));17321733// Update the chain1734ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));1735CurDAG->RemoveDeadNode(N);1736}17371738/// Optimize \param OldBase and \param OldOffset selecting the best addressing1739/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the1740/// new Base and an SDValue representing the new offset.1741std::tuple<unsigned, SDValue, SDValue>1742AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,1743unsigned Opc_ri,1744const SDValue &OldBase,1745const SDValue &OldOffset,1746unsigned Scale) {1747SDValue NewBase = OldBase;1748SDValue NewOffset = OldOffset;1749// Detect a possible Reg+Imm addressing mode.1750const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(1751N, OldBase, NewBase, NewOffset);17521753// Detect a possible reg+reg addressing mode, but only if we haven't already1754// detected a Reg+Imm one.1755const bool IsRegReg =1756!IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);17571758// Select the instruction.1759return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);1760}17611762enum class SelectTypeKind {1763Int1 = 0,1764Int = 1,1765FP = 2,1766AnyType = 3,1767};17681769/// This function selects an opcode from a list of opcodes, which is1770/// expected to be the opcode for { 8-bit, 16-bit, 32-bit, 64-bit }1771/// element types, in this order.1772template <SelectTypeKind Kind>1773static unsigned SelectOpcodeFromVT(EVT VT, ArrayRef<unsigned> Opcodes) {1774// Only match scalable vector VTs1775if (!VT.isScalableVector())1776return 0;17771778EVT EltVT = VT.getVectorElementType();1779unsigned Key = VT.getVectorMinNumElements();1780switch (Kind) {1781case SelectTypeKind::AnyType:1782break;1783case SelectTypeKind::Int:1784if (EltVT != MVT::i8 && EltVT != MVT::i16 && EltVT != MVT::i32 &&1785EltVT != MVT::i64)1786return 0;1787break;1788case SelectTypeKind::Int1:1789if (EltVT != MVT::i1)1790return 0;1791break;1792case SelectTypeKind::FP:1793if (EltVT == MVT::bf16)1794Key = 16;1795else if (EltVT != MVT::bf16 && EltVT != MVT::f16 && EltVT != MVT::f32 &&1796EltVT != MVT::f64)1797return 0;1798break;1799}18001801unsigned Offset;1802switch (Key) {1803case 16: // 8-bit or bf161804Offset = 0;1805break;1806case 8: // 16-bit1807Offset = 1;1808break;1809case 4: // 32-bit1810Offset = 2;1811break;1812case 2: // 64-bit1813Offset = 3;1814break;1815default:1816return 0;1817}18181819return (Opcodes.size() <= Offset) ? 0 : Opcodes[Offset];1820}18211822// This function is almost identical to SelectWhilePair, but has an1823// extra check on the range of the immediate operand.1824// TODO: Merge these two functions together at some point?1825void AArch64DAGToDAGISel::SelectPExtPair(SDNode *N, unsigned Opc) {1826// Immediate can be either 0 or 1.1827if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(N->getOperand(2)))1828if (Imm->getZExtValue() > 1)1829return;18301831SDLoc DL(N);1832EVT VT = N->getValueType(0);1833SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};1834SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);1835SDValue SuperReg = SDValue(WhilePair, 0);18361837for (unsigned I = 0; I < 2; ++I)1838ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(1839AArch64::psub0 + I, DL, VT, SuperReg));18401841CurDAG->RemoveDeadNode(N);1842}18431844void AArch64DAGToDAGISel::SelectWhilePair(SDNode *N, unsigned Opc) {1845SDLoc DL(N);1846EVT VT = N->getValueType(0);18471848SDValue Ops[] = {N->getOperand(1), N->getOperand(2)};18491850SDNode *WhilePair = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);1851SDValue SuperReg = SDValue(WhilePair, 0);18521853for (unsigned I = 0; I < 2; ++I)1854ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(1855AArch64::psub0 + I, DL, VT, SuperReg));18561857CurDAG->RemoveDeadNode(N);1858}18591860void AArch64DAGToDAGISel::SelectCVTIntrinsic(SDNode *N, unsigned NumVecs,1861unsigned Opcode) {1862EVT VT = N->getValueType(0);1863SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);1864SDValue Ops = createZTuple(Regs);1865SDLoc DL(N);1866SDNode *Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Ops);1867SDValue SuperReg = SDValue(Intrinsic, 0);1868for (unsigned i = 0; i < NumVecs; ++i)1869ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(1870AArch64::zsub0 + i, DL, VT, SuperReg));18711872CurDAG->RemoveDeadNode(N);1873}18741875void AArch64DAGToDAGISel::SelectDestructiveMultiIntrinsic(SDNode *N,1876unsigned NumVecs,1877bool IsZmMulti,1878unsigned Opcode,1879bool HasPred) {1880assert(Opcode != 0 && "Unexpected opcode");18811882SDLoc DL(N);1883EVT VT = N->getValueType(0);1884unsigned FirstVecIdx = HasPred ? 2 : 1;18851886auto GetMultiVecOperand = [=](unsigned StartIdx) {1887SmallVector<SDValue, 4> Regs(N->op_begin() + StartIdx,1888N->op_begin() + StartIdx + NumVecs);1889return createZMulTuple(Regs);1890};18911892SDValue Zdn = GetMultiVecOperand(FirstVecIdx);18931894SDValue Zm;1895if (IsZmMulti)1896Zm = GetMultiVecOperand(NumVecs + FirstVecIdx);1897else1898Zm = N->getOperand(NumVecs + FirstVecIdx);18991900SDNode *Intrinsic;1901if (HasPred)1902Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped,1903N->getOperand(1), Zdn, Zm);1904else1905Intrinsic = CurDAG->getMachineNode(Opcode, DL, MVT::Untyped, Zdn, Zm);1906SDValue SuperReg = SDValue(Intrinsic, 0);1907for (unsigned i = 0; i < NumVecs; ++i)1908ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(1909AArch64::zsub0 + i, DL, VT, SuperReg));19101911CurDAG->RemoveDeadNode(N);1912}19131914void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,1915unsigned Scale, unsigned Opc_ri,1916unsigned Opc_rr, bool IsIntr) {1917assert(Scale < 5 && "Invalid scaling value.");1918SDLoc DL(N);1919EVT VT = N->getValueType(0);1920SDValue Chain = N->getOperand(0);19211922// Optimize addressing mode.1923SDValue Base, Offset;1924unsigned Opc;1925std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(1926N, Opc_rr, Opc_ri, N->getOperand(IsIntr ? 3 : 2),1927CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);19281929SDValue Ops[] = {N->getOperand(IsIntr ? 2 : 1), // Predicate1930Base, // Memory operand1931Offset, Chain};19321933const EVT ResTys[] = {MVT::Untyped, MVT::Other};19341935SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);1936SDValue SuperReg = SDValue(Load, 0);1937for (unsigned i = 0; i < NumVecs; ++i)1938ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(1939AArch64::zsub0 + i, DL, VT, SuperReg));19401941// Copy chain1942unsigned ChainIdx = NumVecs;1943ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));1944CurDAG->RemoveDeadNode(N);1945}19461947void AArch64DAGToDAGISel::SelectContiguousMultiVectorLoad(SDNode *N,1948unsigned NumVecs,1949unsigned Scale,1950unsigned Opc_ri,1951unsigned Opc_rr) {1952assert(Scale < 4 && "Invalid scaling value.");1953SDLoc DL(N);1954EVT VT = N->getValueType(0);1955SDValue Chain = N->getOperand(0);19561957SDValue PNg = N->getOperand(2);1958SDValue Base = N->getOperand(3);1959SDValue Offset = CurDAG->getTargetConstant(0, DL, MVT::i64);1960unsigned Opc;1961std::tie(Opc, Base, Offset) =1962findAddrModeSVELoadStore(N, Opc_rr, Opc_ri, Base, Offset, Scale);19631964SDValue Ops[] = {PNg, // Predicate-as-counter1965Base, // Memory operand1966Offset, Chain};19671968const EVT ResTys[] = {MVT::Untyped, MVT::Other};19691970SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);1971SDValue SuperReg = SDValue(Load, 0);1972for (unsigned i = 0; i < NumVecs; ++i)1973ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(1974AArch64::zsub0 + i, DL, VT, SuperReg));19751976// Copy chain1977unsigned ChainIdx = NumVecs;1978ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));1979CurDAG->RemoveDeadNode(N);1980}19811982void AArch64DAGToDAGISel::SelectFrintFromVT(SDNode *N, unsigned NumVecs,1983unsigned Opcode) {1984if (N->getValueType(0) != MVT::nxv4f32)1985return;1986SelectUnaryMultiIntrinsic(N, NumVecs, true, Opcode);1987}19881989void AArch64DAGToDAGISel::SelectMultiVectorLuti(SDNode *Node,1990unsigned NumOutVecs,1991unsigned Opc, uint32_t MaxImm) {1992if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Node->getOperand(4)))1993if (Imm->getZExtValue() > MaxImm)1994return;19951996SDValue ZtValue;1997if (!ImmToReg<AArch64::ZT0, 0>(Node->getOperand(2), ZtValue))1998return;1999SDValue Ops[] = {ZtValue, Node->getOperand(3), Node->getOperand(4)};2000SDLoc DL(Node);2001EVT VT = Node->getValueType(0);20022003SDNode *Instruction =2004CurDAG->getMachineNode(Opc, DL, {MVT::Untyped, MVT::Other}, Ops);2005SDValue SuperReg = SDValue(Instruction, 0);20062007for (unsigned I = 0; I < NumOutVecs; ++I)2008ReplaceUses(SDValue(Node, I), CurDAG->getTargetExtractSubreg(2009AArch64::zsub0 + I, DL, VT, SuperReg));20102011// Copy chain2012unsigned ChainIdx = NumOutVecs;2013ReplaceUses(SDValue(Node, ChainIdx), SDValue(Instruction, 1));2014CurDAG->RemoveDeadNode(Node);2015}20162017void AArch64DAGToDAGISel::SelectClamp(SDNode *N, unsigned NumVecs,2018unsigned Op) {2019SDLoc DL(N);2020EVT VT = N->getValueType(0);20212022SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);2023SDValue Zd = createZMulTuple(Regs);2024SDValue Zn = N->getOperand(1 + NumVecs);2025SDValue Zm = N->getOperand(2 + NumVecs);20262027SDValue Ops[] = {Zd, Zn, Zm};20282029SDNode *Intrinsic = CurDAG->getMachineNode(Op, DL, MVT::Untyped, Ops);2030SDValue SuperReg = SDValue(Intrinsic, 0);2031for (unsigned i = 0; i < NumVecs; ++i)2032ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(2033AArch64::zsub0 + i, DL, VT, SuperReg));20342035CurDAG->RemoveDeadNode(N);2036}20372038bool SelectSMETile(unsigned &BaseReg, unsigned TileNum) {2039switch (BaseReg) {2040default:2041return false;2042case AArch64::ZA:2043case AArch64::ZAB0:2044if (TileNum == 0)2045break;2046return false;2047case AArch64::ZAH0:2048if (TileNum <= 1)2049break;2050return false;2051case AArch64::ZAS0:2052if (TileNum <= 3)2053break;2054return false;2055case AArch64::ZAD0:2056if (TileNum <= 7)2057break;2058return false;2059}20602061BaseReg += TileNum;2062return true;2063}20642065template <unsigned MaxIdx, unsigned Scale>2066void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs,2067unsigned BaseReg, unsigned Op) {2068unsigned TileNum = 0;2069if (BaseReg != AArch64::ZA)2070TileNum = N->getConstantOperandVal(2);20712072if (!SelectSMETile(BaseReg, TileNum))2073return;20742075SDValue SliceBase, Base, Offset;2076if (BaseReg == AArch64::ZA)2077SliceBase = N->getOperand(2);2078else2079SliceBase = N->getOperand(3);20802081if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))2082return;20832084SDLoc DL(N);2085SDValue SubReg = CurDAG->getRegister(BaseReg, MVT::Other);2086SDValue Ops[] = {SubReg, Base, Offset, /*Chain*/ N->getOperand(0)};2087SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);20882089EVT VT = N->getValueType(0);2090for (unsigned I = 0; I < NumVecs; ++I)2091ReplaceUses(SDValue(N, I),2092CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,2093SDValue(Mov, 0)));2094// Copy chain2095unsigned ChainIdx = NumVecs;2096ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));2097CurDAG->RemoveDeadNode(N);2098}20992100void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs,2101unsigned Op, unsigned MaxIdx,2102unsigned Scale, unsigned BaseReg) {2103// Slice can be in different positions2104// The array to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(slice)2105// The tile to vector: llvm.aarch64.sme.readz.<h/v>.<sz>(tile, slice)2106SDValue SliceBase = N->getOperand(2);2107if (BaseReg != AArch64::ZA)2108SliceBase = N->getOperand(3);21092110SDValue Base, Offset;2111if (!SelectSMETileSlice(SliceBase, MaxIdx, Base, Offset, Scale))2112return;2113// The correct Za tile number is computed in Machine Instruction2114// See EmitZAInstr2115// DAG cannot select Za tile as an output register with ZReg2116SDLoc DL(N);2117SmallVector<SDValue, 6> Ops;2118if (BaseReg != AArch64::ZA )2119Ops.push_back(N->getOperand(2));2120Ops.push_back(Base);2121Ops.push_back(Offset);2122Ops.push_back(N->getOperand(0)); //Chain2123SDNode *Mov = CurDAG->getMachineNode(Op, DL, {MVT::Untyped, MVT::Other}, Ops);21242125EVT VT = N->getValueType(0);2126for (unsigned I = 0; I < NumVecs; ++I)2127ReplaceUses(SDValue(N, I),2128CurDAG->getTargetExtractSubreg(AArch64::zsub0 + I, DL, VT,2129SDValue(Mov, 0)));21302131// Copy chain2132unsigned ChainIdx = NumVecs;2133ReplaceUses(SDValue(N, ChainIdx), SDValue(Mov, 1));2134CurDAG->RemoveDeadNode(N);2135}21362137void AArch64DAGToDAGISel::SelectUnaryMultiIntrinsic(SDNode *N,2138unsigned NumOutVecs,2139bool IsTupleInput,2140unsigned Opc) {2141SDLoc DL(N);2142EVT VT = N->getValueType(0);2143unsigned NumInVecs = N->getNumOperands() - 1;21442145SmallVector<SDValue, 6> Ops;2146if (IsTupleInput) {2147assert((NumInVecs == 2 || NumInVecs == 4) &&2148"Don't know how to handle multi-register input!");2149SmallVector<SDValue, 4> Regs(N->op_begin() + 1,2150N->op_begin() + 1 + NumInVecs);2151Ops.push_back(createZMulTuple(Regs));2152} else {2153// All intrinsic nodes have the ID as the first operand, hence the "1 + I".2154for (unsigned I = 0; I < NumInVecs; I++)2155Ops.push_back(N->getOperand(1 + I));2156}21572158SDNode *Res = CurDAG->getMachineNode(Opc, DL, MVT::Untyped, Ops);2159SDValue SuperReg = SDValue(Res, 0);21602161for (unsigned I = 0; I < NumOutVecs; I++)2162ReplaceUses(SDValue(N, I), CurDAG->getTargetExtractSubreg(2163AArch64::zsub0 + I, DL, VT, SuperReg));2164CurDAG->RemoveDeadNode(N);2165}21662167void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,2168unsigned Opc) {2169SDLoc dl(N);2170EVT VT = N->getOperand(2)->getValueType(0);21712172// Form a REG_SEQUENCE to force register allocation.2173bool Is128Bit = VT.getSizeInBits() == 128;2174SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);2175SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);21762177SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};2178SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);21792180// Transfer memoperands.2181MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();2182CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});21832184ReplaceNode(N, St);2185}21862187void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,2188unsigned Scale, unsigned Opc_rr,2189unsigned Opc_ri) {2190SDLoc dl(N);21912192// Form a REG_SEQUENCE to force register allocation.2193SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);2194SDValue RegSeq = createZTuple(Regs);21952196// Optimize addressing mode.2197unsigned Opc;2198SDValue Offset, Base;2199std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(2200N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),2201CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);22022203SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate2204Base, // address2205Offset, // offset2206N->getOperand(0)}; // chain2207SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);22082209ReplaceNode(N, St);2210}22112212bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,2213SDValue &OffImm) {2214SDLoc dl(N);2215const DataLayout &DL = CurDAG->getDataLayout();2216const TargetLowering *TLI = getTargetLowering();22172218// Try to match it for the frame address2219if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {2220int FI = FINode->getIndex();2221Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));2222OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);2223return true;2224}22252226return false;2227}22282229void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,2230unsigned Opc) {2231SDLoc dl(N);2232EVT VT = N->getOperand(2)->getValueType(0);2233const EVT ResTys[] = {MVT::i64, // Type of the write back register2234MVT::Other}; // Type for the Chain22352236// Form a REG_SEQUENCE to force register allocation.2237bool Is128Bit = VT.getSizeInBits() == 128;2238SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);2239SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);22402241SDValue Ops[] = {RegSeq,2242N->getOperand(NumVecs + 1), // base register2243N->getOperand(NumVecs + 2), // Incremental2244N->getOperand(0)}; // Chain2245SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);22462247ReplaceNode(N, St);2248}22492250namespace {2251/// WidenVector - Given a value in the V64 register class, produce the2252/// equivalent value in the V128 register class.2253class WidenVector {2254SelectionDAG &DAG;22552256public:2257WidenVector(SelectionDAG &DAG) : DAG(DAG) {}22582259SDValue operator()(SDValue V64Reg) {2260EVT VT = V64Reg.getValueType();2261unsigned NarrowSize = VT.getVectorNumElements();2262MVT EltTy = VT.getVectorElementType().getSimpleVT();2263MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);2264SDLoc DL(V64Reg);22652266SDValue Undef =2267SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);2268return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);2269}2270};2271} // namespace22722273/// NarrowVector - Given a value in the V128 register class, produce the2274/// equivalent value in the V64 register class.2275static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {2276EVT VT = V128Reg.getValueType();2277unsigned WideSize = VT.getVectorNumElements();2278MVT EltTy = VT.getVectorElementType().getSimpleVT();2279MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);22802281return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,2282V128Reg);2283}22842285void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,2286unsigned Opc) {2287SDLoc dl(N);2288EVT VT = N->getValueType(0);2289bool Narrow = VT.getSizeInBits() == 64;22902291// Form a REG_SEQUENCE to force register allocation.2292SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);22932294if (Narrow)2295transform(Regs, Regs.begin(),2296WidenVector(*CurDAG));22972298SDValue RegSeq = createQTuple(Regs);22992300const EVT ResTys[] = {MVT::Untyped, MVT::Other};23012302unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);23032304SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),2305N->getOperand(NumVecs + 3), N->getOperand(0)};2306SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);2307SDValue SuperReg = SDValue(Ld, 0);23082309EVT WideVT = RegSeq.getOperand(1)->getValueType(0);2310static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,2311AArch64::qsub2, AArch64::qsub3 };2312for (unsigned i = 0; i < NumVecs; ++i) {2313SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);2314if (Narrow)2315NV = NarrowVector(NV, *CurDAG);2316ReplaceUses(SDValue(N, i), NV);2317}23182319ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));2320CurDAG->RemoveDeadNode(N);2321}23222323void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,2324unsigned Opc) {2325SDLoc dl(N);2326EVT VT = N->getValueType(0);2327bool Narrow = VT.getSizeInBits() == 64;23282329// Form a REG_SEQUENCE to force register allocation.2330SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);23312332if (Narrow)2333transform(Regs, Regs.begin(),2334WidenVector(*CurDAG));23352336SDValue RegSeq = createQTuple(Regs);23372338const EVT ResTys[] = {MVT::i64, // Type of the write back register2339RegSeq->getValueType(0), MVT::Other};23402341unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);23422343SDValue Ops[] = {RegSeq,2344CurDAG->getTargetConstant(LaneNo, dl,2345MVT::i64), // Lane Number2346N->getOperand(NumVecs + 2), // Base register2347N->getOperand(NumVecs + 3), // Incremental2348N->getOperand(0)};2349SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);23502351// Update uses of the write back register2352ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));23532354// Update uses of the vector list2355SDValue SuperReg = SDValue(Ld, 1);2356if (NumVecs == 1) {2357ReplaceUses(SDValue(N, 0),2358Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);2359} else {2360EVT WideVT = RegSeq.getOperand(1)->getValueType(0);2361static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,2362AArch64::qsub2, AArch64::qsub3 };2363for (unsigned i = 0; i < NumVecs; ++i) {2364SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,2365SuperReg);2366if (Narrow)2367NV = NarrowVector(NV, *CurDAG);2368ReplaceUses(SDValue(N, i), NV);2369}2370}23712372// Update the Chain2373ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));2374CurDAG->RemoveDeadNode(N);2375}23762377void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,2378unsigned Opc) {2379SDLoc dl(N);2380EVT VT = N->getOperand(2)->getValueType(0);2381bool Narrow = VT.getSizeInBits() == 64;23822383// Form a REG_SEQUENCE to force register allocation.2384SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);23852386if (Narrow)2387transform(Regs, Regs.begin(),2388WidenVector(*CurDAG));23892390SDValue RegSeq = createQTuple(Regs);23912392unsigned LaneNo = N->getConstantOperandVal(NumVecs + 2);23932394SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),2395N->getOperand(NumVecs + 3), N->getOperand(0)};2396SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);23972398// Transfer memoperands.2399MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();2400CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});24012402ReplaceNode(N, St);2403}24042405void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,2406unsigned Opc) {2407SDLoc dl(N);2408EVT VT = N->getOperand(2)->getValueType(0);2409bool Narrow = VT.getSizeInBits() == 64;24102411// Form a REG_SEQUENCE to force register allocation.2412SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);24132414if (Narrow)2415transform(Regs, Regs.begin(),2416WidenVector(*CurDAG));24172418SDValue RegSeq = createQTuple(Regs);24192420const EVT ResTys[] = {MVT::i64, // Type of the write back register2421MVT::Other};24222423unsigned LaneNo = N->getConstantOperandVal(NumVecs + 1);24242425SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),2426N->getOperand(NumVecs + 2), // Base Register2427N->getOperand(NumVecs + 3), // Incremental2428N->getOperand(0)};2429SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);24302431// Transfer memoperands.2432MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();2433CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});24342435ReplaceNode(N, St);2436}24372438static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,2439unsigned &Opc, SDValue &Opd0,2440unsigned &LSB, unsigned &MSB,2441unsigned NumberOfIgnoredLowBits,2442bool BiggerPattern) {2443assert(N->getOpcode() == ISD::AND &&2444"N must be a AND operation to call this function");24452446EVT VT = N->getValueType(0);24472448// Here we can test the type of VT and return false when the type does not2449// match, but since it is done prior to that call in the current context2450// we turned that into an assert to avoid redundant code.2451assert((VT == MVT::i32 || VT == MVT::i64) &&2452"Type checking must have been done before calling this function");24532454// FIXME: simplify-demanded-bits in DAGCombine will probably have2455// changed the AND node to a 32-bit mask operation. We'll have to2456// undo that as part of the transform here if we want to catch all2457// the opportunities.2458// Currently the NumberOfIgnoredLowBits argument helps to recover2459// from these situations when matching bigger pattern (bitfield insert).24602461// For unsigned extracts, check for a shift right and mask2462uint64_t AndImm = 0;2463if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))2464return false;24652466const SDNode *Op0 = N->getOperand(0).getNode();24672468// Because of simplify-demanded-bits in DAGCombine, the mask may have been2469// simplified. Try to undo that2470AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);24712472// The immediate is a mask of the low bits iff imm & (imm+1) == 02473if (AndImm & (AndImm + 1))2474return false;24752476bool ClampMSB = false;2477uint64_t SrlImm = 0;2478// Handle the SRL + ANY_EXTEND case.2479if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&2480isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {2481// Extend the incoming operand of the SRL to 64-bit.2482Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));2483// Make sure to clamp the MSB so that we preserve the semantics of the2484// original operations.2485ClampMSB = true;2486} else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&2487isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,2488SrlImm)) {2489// If the shift result was truncated, we can still combine them.2490Opd0 = Op0->getOperand(0).getOperand(0);24912492// Use the type of SRL node.2493VT = Opd0->getValueType(0);2494} else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {2495Opd0 = Op0->getOperand(0);2496ClampMSB = (VT == MVT::i32);2497} else if (BiggerPattern) {2498// Let's pretend a 0 shift right has been performed.2499// The resulting code will be at least as good as the original one2500// plus it may expose more opportunities for bitfield insert pattern.2501// FIXME: Currently we limit this to the bigger pattern, because2502// some optimizations expect AND and not UBFM.2503Opd0 = N->getOperand(0);2504} else2505return false;25062507// Bail out on large immediates. This happens when no proper2508// combining/constant folding was performed.2509if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {2510LLVM_DEBUG(2511(dbgs() << N2512<< ": Found large shift immediate, this should not happen\n"));2513return false;2514}25152516LSB = SrlImm;2517MSB = SrlImm +2518(VT == MVT::i32 ? llvm::countr_one<uint32_t>(AndImm)2519: llvm::countr_one<uint64_t>(AndImm)) -25201;2521if (ClampMSB)2522// Since we're moving the extend before the right shift operation, we need2523// to clamp the MSB to make sure we don't shift in undefined bits instead of2524// the zeros which would get shifted in with the original right shift2525// operation.2526MSB = MSB > 31 ? 31 : MSB;25272528Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;2529return true;2530}25312532static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,2533SDValue &Opd0, unsigned &Immr,2534unsigned &Imms) {2535assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);25362537EVT VT = N->getValueType(0);2538unsigned BitWidth = VT.getSizeInBits();2539assert((VT == MVT::i32 || VT == MVT::i64) &&2540"Type checking must have been done before calling this function");25412542SDValue Op = N->getOperand(0);2543if (Op->getOpcode() == ISD::TRUNCATE) {2544Op = Op->getOperand(0);2545VT = Op->getValueType(0);2546BitWidth = VT.getSizeInBits();2547}25482549uint64_t ShiftImm;2550if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&2551!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))2552return false;25532554unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();2555if (ShiftImm + Width > BitWidth)2556return false;25572558Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;2559Opd0 = Op.getOperand(0);2560Immr = ShiftImm;2561Imms = ShiftImm + Width - 1;2562return true;2563}25642565static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,2566SDValue &Opd0, unsigned &LSB,2567unsigned &MSB) {2568// We are looking for the following pattern which basically extracts several2569// continuous bits from the source value and places it from the LSB of the2570// destination value, all other bits of the destination value or set to zero:2571//2572// Value2 = AND Value, MaskImm2573// SRL Value2, ShiftImm2574//2575// with MaskImm >> ShiftImm to search for the bit width.2576//2577// This gets selected into a single UBFM:2578//2579// UBFM Value, ShiftImm, Log2_64(MaskImm)2580//25812582if (N->getOpcode() != ISD::SRL)2583return false;25842585uint64_t AndMask = 0;2586if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))2587return false;25882589Opd0 = N->getOperand(0).getOperand(0);25902591uint64_t SrlImm = 0;2592if (!isIntImmediate(N->getOperand(1), SrlImm))2593return false;25942595// Check whether we really have several bits extract here.2596if (!isMask_64(AndMask >> SrlImm))2597return false;25982599Opc = N->getValueType(0) == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;2600LSB = SrlImm;2601MSB = llvm::Log2_64(AndMask);2602return true;2603}26042605static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,2606unsigned &Immr, unsigned &Imms,2607bool BiggerPattern) {2608assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&2609"N must be a SHR/SRA operation to call this function");26102611EVT VT = N->getValueType(0);26122613// Here we can test the type of VT and return false when the type does not2614// match, but since it is done prior to that call in the current context2615// we turned that into an assert to avoid redundant code.2616assert((VT == MVT::i32 || VT == MVT::i64) &&2617"Type checking must have been done before calling this function");26182619// Check for AND + SRL doing several bits extract.2620if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))2621return true;26222623// We're looking for a shift of a shift.2624uint64_t ShlImm = 0;2625uint64_t TruncBits = 0;2626if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {2627Opd0 = N->getOperand(0).getOperand(0);2628} else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&2629N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {2630// We are looking for a shift of truncate. Truncate from i64 to i32 could2631// be considered as setting high 32 bits as zero. Our strategy here is to2632// always generate 64bit UBFM. This consistency will help the CSE pass2633// later find more redundancy.2634Opd0 = N->getOperand(0).getOperand(0);2635TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();2636VT = Opd0.getValueType();2637assert(VT == MVT::i64 && "the promoted type should be i64");2638} else if (BiggerPattern) {2639// Let's pretend a 0 shift left has been performed.2640// FIXME: Currently we limit this to the bigger pattern case,2641// because some optimizations expect AND and not UBFM2642Opd0 = N->getOperand(0);2643} else2644return false;26452646// Missing combines/constant folding may have left us with strange2647// constants.2648if (ShlImm >= VT.getSizeInBits()) {2649LLVM_DEBUG(2650(dbgs() << N2651<< ": Found large shift immediate, this should not happen\n"));2652return false;2653}26542655uint64_t SrlImm = 0;2656if (!isIntImmediate(N->getOperand(1), SrlImm))2657return false;26582659assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&2660"bad amount in shift node!");2661int immr = SrlImm - ShlImm;2662Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;2663Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;2664// SRA requires a signed extraction2665if (VT == MVT::i32)2666Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;2667else2668Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;2669return true;2670}26712672bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {2673assert(N->getOpcode() == ISD::SIGN_EXTEND);26742675EVT VT = N->getValueType(0);2676EVT NarrowVT = N->getOperand(0)->getValueType(0);2677if (VT != MVT::i64 || NarrowVT != MVT::i32)2678return false;26792680uint64_t ShiftImm;2681SDValue Op = N->getOperand(0);2682if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))2683return false;26842685SDLoc dl(N);2686// Extend the incoming operand of the shift to 64-bits.2687SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));2688unsigned Immr = ShiftImm;2689unsigned Imms = NarrowVT.getSizeInBits() - 1;2690SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),2691CurDAG->getTargetConstant(Imms, dl, VT)};2692CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);2693return true;2694}26952696static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,2697SDValue &Opd0, unsigned &Immr, unsigned &Imms,2698unsigned NumberOfIgnoredLowBits = 0,2699bool BiggerPattern = false) {2700if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)2701return false;27022703switch (N->getOpcode()) {2704default:2705if (!N->isMachineOpcode())2706return false;2707break;2708case ISD::AND:2709return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,2710NumberOfIgnoredLowBits, BiggerPattern);2711case ISD::SRL:2712case ISD::SRA:2713return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);27142715case ISD::SIGN_EXTEND_INREG:2716return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);2717}27182719unsigned NOpc = N->getMachineOpcode();2720switch (NOpc) {2721default:2722return false;2723case AArch64::SBFMWri:2724case AArch64::UBFMWri:2725case AArch64::SBFMXri:2726case AArch64::UBFMXri:2727Opc = NOpc;2728Opd0 = N->getOperand(0);2729Immr = N->getConstantOperandVal(1);2730Imms = N->getConstantOperandVal(2);2731return true;2732}2733// Unreachable2734return false;2735}27362737bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {2738unsigned Opc, Immr, Imms;2739SDValue Opd0;2740if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))2741return false;27422743EVT VT = N->getValueType(0);2744SDLoc dl(N);27452746// If the bit extract operation is 64bit but the original type is 32bit, we2747// need to add one EXTRACT_SUBREG.2748if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {2749SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),2750CurDAG->getTargetConstant(Imms, dl, MVT::i64)};27512752SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);2753SDValue Inner = CurDAG->getTargetExtractSubreg(AArch64::sub_32, dl,2754MVT::i32, SDValue(BFM, 0));2755ReplaceNode(N, Inner.getNode());2756return true;2757}27582759SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),2760CurDAG->getTargetConstant(Imms, dl, VT)};2761CurDAG->SelectNodeTo(N, Opc, VT, Ops);2762return true;2763}27642765/// Does DstMask form a complementary pair with the mask provided by2766/// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,2767/// this asks whether DstMask zeroes precisely those bits that will be set by2768/// the other half.2769static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,2770unsigned NumberOfIgnoredHighBits, EVT VT) {2771assert((VT == MVT::i32 || VT == MVT::i64) &&2772"i32 or i64 mask type expected!");2773unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;27742775APInt SignificantDstMask = APInt(BitWidth, DstMask);2776APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);27772778return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&2779(SignificantDstMask | SignificantBitsToBeInserted).isAllOnes();2780}27812782// Look for bits that will be useful for later uses.2783// A bit is consider useless as soon as it is dropped and never used2784// before it as been dropped.2785// E.g., looking for useful bit of x2786// 1. y = x & 0x72787// 2. z = y >> 22788// After #1, x useful bits are 0x7, then the useful bits of x, live through2789// y.2790// After #2, the useful bits of x are 0x4.2791// However, if x is used on an unpredicatable instruction, then all its bits2792// are useful.2793// E.g.2794// 1. y = x & 0x72795// 2. z = y >> 22796// 3. str x, [@x]2797static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);27982799static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,2800unsigned Depth) {2801uint64_t Imm =2802cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();2803Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());2804UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);2805getUsefulBits(Op, UsefulBits, Depth + 1);2806}28072808static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,2809uint64_t Imm, uint64_t MSB,2810unsigned Depth) {2811// inherit the bitwidth value2812APInt OpUsefulBits(UsefulBits);2813OpUsefulBits = 1;28142815if (MSB >= Imm) {2816OpUsefulBits <<= MSB - Imm + 1;2817--OpUsefulBits;2818// The interesting part will be in the lower part of the result2819getUsefulBits(Op, OpUsefulBits, Depth + 1);2820// The interesting part was starting at Imm in the argument2821OpUsefulBits <<= Imm;2822} else {2823OpUsefulBits <<= MSB + 1;2824--OpUsefulBits;2825// The interesting part will be shifted in the result2826OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;2827getUsefulBits(Op, OpUsefulBits, Depth + 1);2828// The interesting part was at zero in the argument2829OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);2830}28312832UsefulBits &= OpUsefulBits;2833}28342835static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,2836unsigned Depth) {2837uint64_t Imm =2838cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();2839uint64_t MSB =2840cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();28412842getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);2843}28442845static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,2846unsigned Depth) {2847uint64_t ShiftTypeAndValue =2848cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();2849APInt Mask(UsefulBits);2850Mask.clearAllBits();2851Mask.flipAllBits();28522853if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {2854// Shift Left2855uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);2856Mask <<= ShiftAmt;2857getUsefulBits(Op, Mask, Depth + 1);2858Mask.lshrInPlace(ShiftAmt);2859} else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {2860// Shift Right2861// We do not handle AArch64_AM::ASR, because the sign will change the2862// number of useful bits2863uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);2864Mask.lshrInPlace(ShiftAmt);2865getUsefulBits(Op, Mask, Depth + 1);2866Mask <<= ShiftAmt;2867} else2868return;28692870UsefulBits &= Mask;2871}28722873static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,2874unsigned Depth) {2875uint64_t Imm =2876cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();2877uint64_t MSB =2878cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();28792880APInt OpUsefulBits(UsefulBits);2881OpUsefulBits = 1;28822883APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);2884ResultUsefulBits.flipAllBits();2885APInt Mask(UsefulBits.getBitWidth(), 0);28862887getUsefulBits(Op, ResultUsefulBits, Depth + 1);28882889if (MSB >= Imm) {2890// The instruction is a BFXIL.2891uint64_t Width = MSB - Imm + 1;2892uint64_t LSB = Imm;28932894OpUsefulBits <<= Width;2895--OpUsefulBits;28962897if (Op.getOperand(1) == Orig) {2898// Copy the low bits from the result to bits starting from LSB.2899Mask = ResultUsefulBits & OpUsefulBits;2900Mask <<= LSB;2901}29022903if (Op.getOperand(0) == Orig)2904// Bits starting from LSB in the input contribute to the result.2905Mask |= (ResultUsefulBits & ~OpUsefulBits);2906} else {2907// The instruction is a BFI.2908uint64_t Width = MSB + 1;2909uint64_t LSB = UsefulBits.getBitWidth() - Imm;29102911OpUsefulBits <<= Width;2912--OpUsefulBits;2913OpUsefulBits <<= LSB;29142915if (Op.getOperand(1) == Orig) {2916// Copy the bits from the result to the zero bits.2917Mask = ResultUsefulBits & OpUsefulBits;2918Mask.lshrInPlace(LSB);2919}29202921if (Op.getOperand(0) == Orig)2922Mask |= (ResultUsefulBits & ~OpUsefulBits);2923}29242925UsefulBits &= Mask;2926}29272928static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,2929SDValue Orig, unsigned Depth) {29302931// Users of this node should have already been instruction selected2932// FIXME: Can we turn that into an assert?2933if (!UserNode->isMachineOpcode())2934return;29352936switch (UserNode->getMachineOpcode()) {2937default:2938return;2939case AArch64::ANDSWri:2940case AArch64::ANDSXri:2941case AArch64::ANDWri:2942case AArch64::ANDXri:2943// We increment Depth only when we call the getUsefulBits2944return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,2945Depth);2946case AArch64::UBFMWri:2947case AArch64::UBFMXri:2948return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);29492950case AArch64::ORRWrs:2951case AArch64::ORRXrs:2952if (UserNode->getOperand(0) != Orig && UserNode->getOperand(1) == Orig)2953getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,2954Depth);2955return;2956case AArch64::BFMWri:2957case AArch64::BFMXri:2958return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);29592960case AArch64::STRBBui:2961case AArch64::STURBBi:2962if (UserNode->getOperand(0) != Orig)2963return;2964UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);2965return;29662967case AArch64::STRHHui:2968case AArch64::STURHHi:2969if (UserNode->getOperand(0) != Orig)2970return;2971UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);2972return;2973}2974}29752976static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {2977if (Depth >= SelectionDAG::MaxRecursionDepth)2978return;2979// Initialize UsefulBits2980if (!Depth) {2981unsigned Bitwidth = Op.getScalarValueSizeInBits();2982// At the beginning, assume every produced bits is useful2983UsefulBits = APInt(Bitwidth, 0);2984UsefulBits.flipAllBits();2985}2986APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);29872988for (SDNode *Node : Op.getNode()->uses()) {2989// A use cannot produce useful bits2990APInt UsefulBitsForUse = APInt(UsefulBits);2991getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);2992UsersUsefulBits |= UsefulBitsForUse;2993}2994// UsefulBits contains the produced bits that are meaningful for the2995// current definition, thus a user cannot make a bit meaningful at2996// this point2997UsefulBits &= UsersUsefulBits;2998}29993000/// Create a machine node performing a notional SHL of Op by ShlAmount. If3001/// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is3002/// 0, return Op unchanged.3003static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {3004if (ShlAmount == 0)3005return Op;30063007EVT VT = Op.getValueType();3008SDLoc dl(Op);3009unsigned BitWidth = VT.getSizeInBits();3010unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;30113012SDNode *ShiftNode;3013if (ShlAmount > 0) {3014// LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt3015ShiftNode = CurDAG->getMachineNode(3016UBFMOpc, dl, VT, Op,3017CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),3018CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));3019} else {3020// LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-13021assert(ShlAmount < 0 && "expected right shift");3022int ShrAmount = -ShlAmount;3023ShiftNode = CurDAG->getMachineNode(3024UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),3025CurDAG->getTargetConstant(BitWidth - 1, dl, VT));3026}30273028return SDValue(ShiftNode, 0);3029}30303031// For bit-field-positioning pattern "(and (shl VAL, N), ShiftedMask)".3032static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,3033bool BiggerPattern,3034const uint64_t NonZeroBits,3035SDValue &Src, int &DstLSB,3036int &Width);30373038// For bit-field-positioning pattern "shl VAL, N)".3039static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,3040bool BiggerPattern,3041const uint64_t NonZeroBits,3042SDValue &Src, int &DstLSB,3043int &Width);30443045/// Does this tree qualify as an attempt to move a bitfield into position,3046/// essentially "(and (shl VAL, N), Mask)" or (shl VAL, N).3047static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,3048bool BiggerPattern, SDValue &Src,3049int &DstLSB, int &Width) {3050EVT VT = Op.getValueType();3051unsigned BitWidth = VT.getSizeInBits();3052(void)BitWidth;3053assert(BitWidth == 32 || BitWidth == 64);30543055KnownBits Known = CurDAG->computeKnownBits(Op);30563057// Non-zero in the sense that they're not provably zero, which is the key3058// point if we want to use this value3059const uint64_t NonZeroBits = (~Known.Zero).getZExtValue();3060if (!isShiftedMask_64(NonZeroBits))3061return false;30623063switch (Op.getOpcode()) {3064default:3065break;3066case ISD::AND:3067return isBitfieldPositioningOpFromAnd(CurDAG, Op, BiggerPattern,3068NonZeroBits, Src, DstLSB, Width);3069case ISD::SHL:3070return isBitfieldPositioningOpFromShl(CurDAG, Op, BiggerPattern,3071NonZeroBits, Src, DstLSB, Width);3072}30733074return false;3075}30763077static bool isBitfieldPositioningOpFromAnd(SelectionDAG *CurDAG, SDValue Op,3078bool BiggerPattern,3079const uint64_t NonZeroBits,3080SDValue &Src, int &DstLSB,3081int &Width) {3082assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");30833084EVT VT = Op.getValueType();3085assert((VT == MVT::i32 || VT == MVT::i64) &&3086"Caller guarantees VT is one of i32 or i64");3087(void)VT;30883089uint64_t AndImm;3090if (!isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm))3091return false;30923093// If (~AndImm & NonZeroBits) is not zero at POS, we know that3094// 1) (AndImm & (1 << POS) == 0)3095// 2) the result of AND is not zero at POS bit (according to NonZeroBits)3096//3097// 1) and 2) don't agree so something must be wrong (e.g., in3098// 'SelectionDAG::computeKnownBits')3099assert((~AndImm & NonZeroBits) == 0 &&3100"Something must be wrong (e.g., in SelectionDAG::computeKnownBits)");31013102SDValue AndOp0 = Op.getOperand(0);31033104uint64_t ShlImm;3105SDValue ShlOp0;3106if (isOpcWithIntImmediate(AndOp0.getNode(), ISD::SHL, ShlImm)) {3107// For pattern "and(shl(val, N), shifted-mask)", 'ShlOp0' is set to 'val'.3108ShlOp0 = AndOp0.getOperand(0);3109} else if (VT == MVT::i64 && AndOp0.getOpcode() == ISD::ANY_EXTEND &&3110isOpcWithIntImmediate(AndOp0.getOperand(0).getNode(), ISD::SHL,3111ShlImm)) {3112// For pattern "and(any_extend(shl(val, N)), shifted-mask)"31133114// ShlVal == shl(val, N), which is a left shift on a smaller type.3115SDValue ShlVal = AndOp0.getOperand(0);31163117// Since this is after type legalization and ShlVal is extended to MVT::i64,3118// expect VT to be MVT::i32.3119assert((ShlVal.getValueType() == MVT::i32) && "Expect VT to be MVT::i32.");31203121// Widens 'val' to MVT::i64 as the source of bit field positioning.3122ShlOp0 = Widen(CurDAG, ShlVal.getOperand(0));3123} else3124return false;31253126// For !BiggerPattern, bail out if the AndOp0 has more than one use, since3127// then we'll end up generating AndOp0+UBFIZ instead of just keeping3128// AndOp0+AND.3129if (!BiggerPattern && !AndOp0.hasOneUse())3130return false;31313132DstLSB = llvm::countr_zero(NonZeroBits);3133Width = llvm::countr_one(NonZeroBits >> DstLSB);31343135// Bail out on large Width. This happens when no proper combining / constant3136// folding was performed.3137if (Width >= (int)VT.getSizeInBits()) {3138// If VT is i64, Width > 64 is insensible since NonZeroBits is uint64_t, and3139// Width == 64 indicates a missed dag-combine from "(and val, AllOnes)" to3140// "val".3141// If VT is i32, what Width >= 32 means:3142// - For "(and (any_extend(shl val, N)), shifted-mask)", the`and` Op3143// demands at least 'Width' bits (after dag-combiner). This together with3144// `any_extend` Op (undefined higher bits) indicates missed combination3145// when lowering the 'and' IR instruction to an machine IR instruction.3146LLVM_DEBUG(3147dbgs()3148<< "Found large Width in bit-field-positioning -- this indicates no "3149"proper combining / constant folding was performed\n");3150return false;3151}31523153// BFI encompasses sufficiently many nodes that it's worth inserting an extra3154// LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL3155// amount. BiggerPattern is true when this pattern is being matched for BFI,3156// BiggerPattern is false when this pattern is being matched for UBFIZ, in3157// which case it is not profitable to insert an extra shift.3158if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)3159return false;31603161Src = getLeftShift(CurDAG, ShlOp0, ShlImm - DstLSB);3162return true;3163}31643165// For node (shl (and val, mask), N)), returns true if the node is equivalent to3166// UBFIZ.3167static bool isSeveralBitsPositioningOpFromShl(const uint64_t ShlImm, SDValue Op,3168SDValue &Src, int &DstLSB,3169int &Width) {3170// Caller should have verified that N is a left shift with constant shift3171// amount; asserts that.3172assert(Op.getOpcode() == ISD::SHL &&3173"Op.getNode() should be a SHL node to call this function");3174assert(isIntImmediateEq(Op.getOperand(1), ShlImm) &&3175"Op.getNode() should shift ShlImm to call this function");31763177uint64_t AndImm = 0;3178SDValue Op0 = Op.getOperand(0);3179if (!isOpcWithIntImmediate(Op0.getNode(), ISD::AND, AndImm))3180return false;31813182const uint64_t ShiftedAndImm = ((AndImm << ShlImm) >> ShlImm);3183if (isMask_64(ShiftedAndImm)) {3184// AndImm is a superset of (AllOnes >> ShlImm); in other words, AndImm3185// should end with Mask, and could be prefixed with random bits if those3186// bits are shifted out.3187//3188// For example, xyz11111 (with {x,y,z} being 0 or 1) is fine if ShlImm >= 3;3189// the AND result corresponding to those bits are shifted out, so it's fine3190// to not extract them.3191Width = llvm::countr_one(ShiftedAndImm);3192DstLSB = ShlImm;3193Src = Op0.getOperand(0);3194return true;3195}3196return false;3197}31983199static bool isBitfieldPositioningOpFromShl(SelectionDAG *CurDAG, SDValue Op,3200bool BiggerPattern,3201const uint64_t NonZeroBits,3202SDValue &Src, int &DstLSB,3203int &Width) {3204assert(isShiftedMask_64(NonZeroBits) && "Caller guaranteed");32053206EVT VT = Op.getValueType();3207assert((VT == MVT::i32 || VT == MVT::i64) &&3208"Caller guarantees that type is i32 or i64");3209(void)VT;32103211uint64_t ShlImm;3212if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))3213return false;32143215if (!BiggerPattern && !Op.hasOneUse())3216return false;32173218if (isSeveralBitsPositioningOpFromShl(ShlImm, Op, Src, DstLSB, Width))3219return true;32203221DstLSB = llvm::countr_zero(NonZeroBits);3222Width = llvm::countr_one(NonZeroBits >> DstLSB);32233224if (ShlImm != uint64_t(DstLSB) && !BiggerPattern)3225return false;32263227Src = getLeftShift(CurDAG, Op.getOperand(0), ShlImm - DstLSB);3228return true;3229}32303231static bool isShiftedMask(uint64_t Mask, EVT VT) {3232assert(VT == MVT::i32 || VT == MVT::i64);3233if (VT == MVT::i32)3234return isShiftedMask_32(Mask);3235return isShiftedMask_64(Mask);3236}32373238// Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being3239// inserted only sets known zero bits.3240static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {3241assert(N->getOpcode() == ISD::OR && "Expect a OR operation");32423243EVT VT = N->getValueType(0);3244if (VT != MVT::i32 && VT != MVT::i64)3245return false;32463247unsigned BitWidth = VT.getSizeInBits();32483249uint64_t OrImm;3250if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))3251return false;32523253// Skip this transformation if the ORR immediate can be encoded in the ORR.3254// Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely3255// performance neutral.3256if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))3257return false;32583259uint64_t MaskImm;3260SDValue And = N->getOperand(0);3261// Must be a single use AND with an immediate operand.3262if (!And.hasOneUse() ||3263!isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))3264return false;32653266// Compute the Known Zero for the AND as this allows us to catch more general3267// cases than just looking for AND with imm.3268KnownBits Known = CurDAG->computeKnownBits(And);32693270// Non-zero in the sense that they're not provably zero, which is the key3271// point if we want to use this value.3272uint64_t NotKnownZero = (~Known.Zero).getZExtValue();32733274// The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).3275if (!isShiftedMask(Known.Zero.getZExtValue(), VT))3276return false;32773278// The bits being inserted must only set those bits that are known to be zero.3279if ((OrImm & NotKnownZero) != 0) {3280// FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't3281// currently handle this case.3282return false;3283}32843285// BFI/BFXIL dst, src, #lsb, #width.3286int LSB = llvm::countr_one(NotKnownZero);3287int Width = BitWidth - APInt(BitWidth, NotKnownZero).popcount();32883289// BFI/BFXIL is an alias of BFM, so translate to BFM operands.3290unsigned ImmR = (BitWidth - LSB) % BitWidth;3291unsigned ImmS = Width - 1;32923293// If we're creating a BFI instruction avoid cases where we need more3294// instructions to materialize the BFI constant as compared to the original3295// ORR. A BFXIL will use the same constant as the original ORR, so the code3296// should be no worse in this case.3297bool IsBFI = LSB != 0;3298uint64_t BFIImm = OrImm >> LSB;3299if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {3300// We have a BFI instruction and we know the constant can't be materialized3301// with a ORR-immediate with the zero register.3302unsigned OrChunks = 0, BFIChunks = 0;3303for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {3304if (((OrImm >> Shift) & 0xFFFF) != 0)3305++OrChunks;3306if (((BFIImm >> Shift) & 0xFFFF) != 0)3307++BFIChunks;3308}3309if (BFIChunks > OrChunks)3310return false;3311}33123313// Materialize the constant to be inserted.3314SDLoc DL(N);3315unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;3316SDNode *MOVI = CurDAG->getMachineNode(3317MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));33183319// Create the BFI/BFXIL instruction.3320SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),3321CurDAG->getTargetConstant(ImmR, DL, VT),3322CurDAG->getTargetConstant(ImmS, DL, VT)};3323unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;3324CurDAG->SelectNodeTo(N, Opc, VT, Ops);3325return true;3326}33273328static bool isWorthFoldingIntoOrrWithShift(SDValue Dst, SelectionDAG *CurDAG,3329SDValue &ShiftedOperand,3330uint64_t &EncodedShiftImm) {3331// Avoid folding Dst into ORR-with-shift if Dst has other uses than ORR.3332if (!Dst.hasOneUse())3333return false;33343335EVT VT = Dst.getValueType();3336assert((VT == MVT::i32 || VT == MVT::i64) &&3337"Caller should guarantee that VT is one of i32 or i64");3338const unsigned SizeInBits = VT.getSizeInBits();33393340SDLoc DL(Dst.getNode());3341uint64_t AndImm, ShlImm;3342if (isOpcWithIntImmediate(Dst.getNode(), ISD::AND, AndImm) &&3343isShiftedMask_64(AndImm)) {3344// Avoid transforming 'DstOp0' if it has other uses than the AND node.3345SDValue DstOp0 = Dst.getOperand(0);3346if (!DstOp0.hasOneUse())3347return false;33483349// An example to illustrate the transformation3350// From:3351// lsr x8, x1, #13352// and x8, x8, #0x3f803353// bfxil x8, x1, #0, #73354// To:3355// and x8, x23, #0x7f3356// ubfx x9, x23, #8, #73357// orr x23, x8, x9, lsl #73358//3359// The number of instructions remains the same, but ORR is faster than BFXIL3360// on many AArch64 processors (or as good as BFXIL if not faster). Besides,3361// the dependency chain is improved after the transformation.3362uint64_t SrlImm;3363if (isOpcWithIntImmediate(DstOp0.getNode(), ISD::SRL, SrlImm)) {3364uint64_t NumTrailingZeroInShiftedMask = llvm::countr_zero(AndImm);3365if ((SrlImm + NumTrailingZeroInShiftedMask) < SizeInBits) {3366unsigned MaskWidth =3367llvm::countr_one(AndImm >> NumTrailingZeroInShiftedMask);3368unsigned UBFMOpc =3369(VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;3370SDNode *UBFMNode = CurDAG->getMachineNode(3371UBFMOpc, DL, VT, DstOp0.getOperand(0),3372CurDAG->getTargetConstant(SrlImm + NumTrailingZeroInShiftedMask, DL,3373VT),3374CurDAG->getTargetConstant(3375SrlImm + NumTrailingZeroInShiftedMask + MaskWidth - 1, DL, VT));3376ShiftedOperand = SDValue(UBFMNode, 0);3377EncodedShiftImm = AArch64_AM::getShifterImm(3378AArch64_AM::LSL, NumTrailingZeroInShiftedMask);3379return true;3380}3381}3382return false;3383}33843385if (isOpcWithIntImmediate(Dst.getNode(), ISD::SHL, ShlImm)) {3386ShiftedOperand = Dst.getOperand(0);3387EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm);3388return true;3389}33903391uint64_t SrlImm;3392if (isOpcWithIntImmediate(Dst.getNode(), ISD::SRL, SrlImm)) {3393ShiftedOperand = Dst.getOperand(0);3394EncodedShiftImm = AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm);3395return true;3396}3397return false;3398}33993400// Given an 'ISD::OR' node that is going to be selected as BFM, analyze3401// the operands and select it to AArch64::ORR with shifted registers if3402// that's more efficient. Returns true iff selection to AArch64::ORR happens.3403static bool tryOrrWithShift(SDNode *N, SDValue OrOpd0, SDValue OrOpd1,3404SDValue Src, SDValue Dst, SelectionDAG *CurDAG,3405const bool BiggerPattern) {3406EVT VT = N->getValueType(0);3407assert(N->getOpcode() == ISD::OR && "Expect N to be an OR node");3408assert(((N->getOperand(0) == OrOpd0 && N->getOperand(1) == OrOpd1) ||3409(N->getOperand(1) == OrOpd0 && N->getOperand(0) == OrOpd1)) &&3410"Expect OrOpd0 and OrOpd1 to be operands of ISD::OR");3411assert((VT == MVT::i32 || VT == MVT::i64) &&3412"Expect result type to be i32 or i64 since N is combinable to BFM");3413SDLoc DL(N);34143415// Bail out if BFM simplifies away one node in BFM Dst.3416if (OrOpd1 != Dst)3417return false;34183419const unsigned OrrOpc = (VT == MVT::i32) ? AArch64::ORRWrs : AArch64::ORRXrs;3420// For "BFM Rd, Rn, #immr, #imms", it's known that BFM simplifies away fewer3421// nodes from Rn (or inserts additional shift node) if BiggerPattern is true.3422if (BiggerPattern) {3423uint64_t SrcAndImm;3424if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::AND, SrcAndImm) &&3425isMask_64(SrcAndImm) && OrOpd0.getOperand(0) == Src) {3426// OrOpd0 = AND Src, #Mask3427// So BFM simplifies away one AND node from Src and doesn't simplify away3428// nodes from Dst. If ORR with left-shifted operand also simplifies away3429// one node (from Rd), ORR is better since it has higher throughput and3430// smaller latency than BFM on many AArch64 processors (and for the rest3431// ORR is at least as good as BFM).3432SDValue ShiftedOperand;3433uint64_t EncodedShiftImm;3434if (isWorthFoldingIntoOrrWithShift(Dst, CurDAG, ShiftedOperand,3435EncodedShiftImm)) {3436SDValue Ops[] = {OrOpd0, ShiftedOperand,3437CurDAG->getTargetConstant(EncodedShiftImm, DL, VT)};3438CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);3439return true;3440}3441}3442return false;3443}34443445assert((!BiggerPattern) && "BiggerPattern should be handled above");34463447uint64_t ShlImm;3448if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SHL, ShlImm)) {3449if (OrOpd0.getOperand(0) == Src && OrOpd0.hasOneUse()) {3450SDValue Ops[] = {3451Dst, Src,3452CurDAG->getTargetConstant(3453AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};3454CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);3455return true;3456}34573458// Select the following pattern to left-shifted operand rather than BFI.3459// %val1 = op ..3460// %val2 = shl %val1, #imm3461// %res = or %val1, %val23462//3463// If N is selected to be BFI, we know that3464// 1) OrOpd0 would be the operand from which extract bits (i.e., folded into3465// BFI) 2) OrOpd1 would be the destination operand (i.e., preserved)3466//3467// Instead of selecting N to BFI, fold OrOpd0 as a left shift directly.3468if (OrOpd0.getOperand(0) == OrOpd1) {3469SDValue Ops[] = {3470OrOpd1, OrOpd1,3471CurDAG->getTargetConstant(3472AArch64_AM::getShifterImm(AArch64_AM::LSL, ShlImm), DL, VT)};3473CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);3474return true;3475}3476}34773478uint64_t SrlImm;3479if (isOpcWithIntImmediate(OrOpd0.getNode(), ISD::SRL, SrlImm)) {3480// Select the following pattern to right-shifted operand rather than BFXIL.3481// %val1 = op ..3482// %val2 = lshr %val1, #imm3483// %res = or %val1, %val23484//3485// If N is selected to be BFXIL, we know that3486// 1) OrOpd0 would be the operand from which extract bits (i.e., folded into3487// BFXIL) 2) OrOpd1 would be the destination operand (i.e., preserved)3488//3489// Instead of selecting N to BFXIL, fold OrOpd0 as a right shift directly.3490if (OrOpd0.getOperand(0) == OrOpd1) {3491SDValue Ops[] = {3492OrOpd1, OrOpd1,3493CurDAG->getTargetConstant(3494AArch64_AM::getShifterImm(AArch64_AM::LSR, SrlImm), DL, VT)};3495CurDAG->SelectNodeTo(N, OrrOpc, VT, Ops);3496return true;3497}3498}34993500return false;3501}35023503static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,3504SelectionDAG *CurDAG) {3505assert(N->getOpcode() == ISD::OR && "Expect a OR operation");35063507EVT VT = N->getValueType(0);3508if (VT != MVT::i32 && VT != MVT::i64)3509return false;35103511unsigned BitWidth = VT.getSizeInBits();35123513// Because of simplify-demanded-bits in DAGCombine, involved masks may not3514// have the expected shape. Try to undo that.35153516unsigned NumberOfIgnoredLowBits = UsefulBits.countr_zero();3517unsigned NumberOfIgnoredHighBits = UsefulBits.countl_zero();35183519// Given a OR operation, check if we have the following pattern3520// ubfm c, b, imm, imm2 (or something that does the same jobs, see3521// isBitfieldExtractOp)3522// d = e & mask2 ; where mask is a binary sequence of 1..10..0 and3523// countTrailingZeros(mask2) == imm2 - imm + 13524// f = d | c3525// if yes, replace the OR instruction with:3526// f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm235273528// OR is commutative, check all combinations of operand order and values of3529// BiggerPattern, i.e.3530// Opd0, Opd1, BiggerPattern=false3531// Opd1, Opd0, BiggerPattern=false3532// Opd0, Opd1, BiggerPattern=true3533// Opd1, Opd0, BiggerPattern=true3534// Several of these combinations may match, so check with BiggerPattern=false3535// first since that will produce better results by matching more instructions3536// and/or inserting fewer extra instructions.3537for (int I = 0; I < 4; ++I) {35383539SDValue Dst, Src;3540unsigned ImmR, ImmS;3541bool BiggerPattern = I / 2;3542SDValue OrOpd0Val = N->getOperand(I % 2);3543SDNode *OrOpd0 = OrOpd0Val.getNode();3544SDValue OrOpd1Val = N->getOperand((I + 1) % 2);3545SDNode *OrOpd1 = OrOpd1Val.getNode();35463547unsigned BFXOpc;3548int DstLSB, Width;3549if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,3550NumberOfIgnoredLowBits, BiggerPattern)) {3551// Check that the returned opcode is compatible with the pattern,3552// i.e., same type and zero extended (U and not S)3553if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||3554(BFXOpc != AArch64::UBFMWri && VT == MVT::i32))3555continue;35563557// Compute the width of the bitfield insertion3558DstLSB = 0;3559Width = ImmS - ImmR + 1;3560// FIXME: This constraint is to catch bitfield insertion we may3561// want to widen the pattern if we want to grab general bitfied3562// move case3563if (Width <= 0)3564continue;35653566// If the mask on the insertee is correct, we have a BFXIL operation. We3567// can share the ImmR and ImmS values from the already-computed UBFM.3568} else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,3569BiggerPattern,3570Src, DstLSB, Width)) {3571ImmR = (BitWidth - DstLSB) % BitWidth;3572ImmS = Width - 1;3573} else3574continue;35753576// Check the second part of the pattern3577EVT VT = OrOpd1Val.getValueType();3578assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");35793580// Compute the Known Zero for the candidate of the first operand.3581// This allows to catch more general case than just looking for3582// AND with imm. Indeed, simplify-demanded-bits may have removed3583// the AND instruction because it proves it was useless.3584KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);35853586// Check if there is enough room for the second operand to appear3587// in the first one3588APInt BitsToBeInserted =3589APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);35903591if ((BitsToBeInserted & ~Known.Zero) != 0)3592continue;35933594// Set the first operand3595uint64_t Imm;3596if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&3597isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))3598// In that case, we can eliminate the AND3599Dst = OrOpd1->getOperand(0);3600else3601// Maybe the AND has been removed by simplify-demanded-bits3602// or is useful because it discards more bits3603Dst = OrOpd1Val;36043605// Before selecting ISD::OR node to AArch64::BFM, see if an AArch64::ORR3606// with shifted operand is more efficient.3607if (tryOrrWithShift(N, OrOpd0Val, OrOpd1Val, Src, Dst, CurDAG,3608BiggerPattern))3609return true;36103611// both parts match3612SDLoc DL(N);3613SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),3614CurDAG->getTargetConstant(ImmS, DL, VT)};3615unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;3616CurDAG->SelectNodeTo(N, Opc, VT, Ops);3617return true;3618}36193620// Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff3621// Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted3622// mask (e.g., 0x000ffff0).3623uint64_t Mask0Imm, Mask1Imm;3624SDValue And0 = N->getOperand(0);3625SDValue And1 = N->getOperand(1);3626if (And0.hasOneUse() && And1.hasOneUse() &&3627isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&3628isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&3629APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&3630(isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {36313632// ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),3633// (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the3634// bits to be inserted.3635if (isShiftedMask(Mask0Imm, VT)) {3636std::swap(And0, And1);3637std::swap(Mask0Imm, Mask1Imm);3638}36393640SDValue Src = And1->getOperand(0);3641SDValue Dst = And0->getOperand(0);3642unsigned LSB = llvm::countr_zero(Mask1Imm);3643int Width = BitWidth - APInt(BitWidth, Mask0Imm).popcount();36443645// The BFXIL inserts the low-order bits from a source register, so right3646// shift the needed bits into place.3647SDLoc DL(N);3648unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;3649uint64_t LsrImm = LSB;3650if (Src->hasOneUse() &&3651isOpcWithIntImmediate(Src.getNode(), ISD::SRL, LsrImm) &&3652(LsrImm + LSB) < BitWidth) {3653Src = Src->getOperand(0);3654LsrImm += LSB;3655}36563657SDNode *LSR = CurDAG->getMachineNode(3658ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LsrImm, DL, VT),3659CurDAG->getTargetConstant(BitWidth - 1, DL, VT));36603661// BFXIL is an alias of BFM, so translate to BFM operands.3662unsigned ImmR = (BitWidth - LSB) % BitWidth;3663unsigned ImmS = Width - 1;36643665// Create the BFXIL instruction.3666SDValue Ops[] = {Dst, SDValue(LSR, 0),3667CurDAG->getTargetConstant(ImmR, DL, VT),3668CurDAG->getTargetConstant(ImmS, DL, VT)};3669unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;3670CurDAG->SelectNodeTo(N, Opc, VT, Ops);3671return true;3672}36733674return false;3675}36763677bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {3678if (N->getOpcode() != ISD::OR)3679return false;36803681APInt NUsefulBits;3682getUsefulBits(SDValue(N, 0), NUsefulBits);36833684// If all bits are not useful, just return UNDEF.3685if (!NUsefulBits) {3686CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));3687return true;3688}36893690if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))3691return true;36923693return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);3694}36953696/// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the3697/// equivalent of a left shift by a constant amount followed by an and masking3698/// out a contiguous set of bits.3699bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {3700if (N->getOpcode() != ISD::AND)3701return false;37023703EVT VT = N->getValueType(0);3704if (VT != MVT::i32 && VT != MVT::i64)3705return false;37063707SDValue Op0;3708int DstLSB, Width;3709if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,3710Op0, DstLSB, Width))3711return false;37123713// ImmR is the rotate right amount.3714unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();3715// ImmS is the most significant bit of the source to be moved.3716unsigned ImmS = Width - 1;37173718SDLoc DL(N);3719SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),3720CurDAG->getTargetConstant(ImmS, DL, VT)};3721unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;3722CurDAG->SelectNodeTo(N, Opc, VT, Ops);3723return true;3724}37253726/// tryShiftAmountMod - Take advantage of built-in mod of shift amount in3727/// variable shift/rotate instructions.3728bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {3729EVT VT = N->getValueType(0);37303731unsigned Opc;3732switch (N->getOpcode()) {3733case ISD::ROTR:3734Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;3735break;3736case ISD::SHL:3737Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;3738break;3739case ISD::SRL:3740Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;3741break;3742case ISD::SRA:3743Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;3744break;3745default:3746return false;3747}37483749uint64_t Size;3750uint64_t Bits;3751if (VT == MVT::i32) {3752Bits = 5;3753Size = 32;3754} else if (VT == MVT::i64) {3755Bits = 6;3756Size = 64;3757} else3758return false;37593760SDValue ShiftAmt = N->getOperand(1);3761SDLoc DL(N);3762SDValue NewShiftAmt;37633764// Skip over an extend of the shift amount.3765if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||3766ShiftAmt->getOpcode() == ISD::ANY_EXTEND)3767ShiftAmt = ShiftAmt->getOperand(0);37683769if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {3770SDValue Add0 = ShiftAmt->getOperand(0);3771SDValue Add1 = ShiftAmt->getOperand(1);3772uint64_t Add0Imm;3773uint64_t Add1Imm;3774if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) {3775// If we are shifting by X+/-N where N == 0 mod Size, then just shift by X3776// to avoid the ADD/SUB.3777NewShiftAmt = Add0;3778} else if (ShiftAmt->getOpcode() == ISD::SUB &&3779isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&3780(Add0Imm % Size == 0)) {3781// If we are shifting by N-X where N == 0 mod Size, then just shift by -X3782// to generate a NEG instead of a SUB from a constant.3783unsigned NegOpc;3784unsigned ZeroReg;3785EVT SubVT = ShiftAmt->getValueType(0);3786if (SubVT == MVT::i32) {3787NegOpc = AArch64::SUBWrr;3788ZeroReg = AArch64::WZR;3789} else {3790assert(SubVT == MVT::i64);3791NegOpc = AArch64::SUBXrr;3792ZeroReg = AArch64::XZR;3793}3794SDValue Zero =3795CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);3796MachineSDNode *Neg =3797CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);3798NewShiftAmt = SDValue(Neg, 0);3799} else if (ShiftAmt->getOpcode() == ISD::SUB &&3800isIntImmediate(Add0, Add0Imm) && (Add0Imm % Size == Size - 1)) {3801// If we are shifting by N-X where N == -1 mod Size, then just shift by ~X3802// to generate a NOT instead of a SUB from a constant.3803unsigned NotOpc;3804unsigned ZeroReg;3805EVT SubVT = ShiftAmt->getValueType(0);3806if (SubVT == MVT::i32) {3807NotOpc = AArch64::ORNWrr;3808ZeroReg = AArch64::WZR;3809} else {3810assert(SubVT == MVT::i64);3811NotOpc = AArch64::ORNXrr;3812ZeroReg = AArch64::XZR;3813}3814SDValue Zero =3815CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);3816MachineSDNode *Not =3817CurDAG->getMachineNode(NotOpc, DL, SubVT, Zero, Add1);3818NewShiftAmt = SDValue(Not, 0);3819} else3820return false;3821} else {3822// If the shift amount is masked with an AND, check that the mask covers the3823// bits that are implicitly ANDed off by the above opcodes and if so, skip3824// the AND.3825uint64_t MaskImm;3826if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&3827!isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))3828return false;38293830if ((unsigned)llvm::countr_one(MaskImm) < Bits)3831return false;38323833NewShiftAmt = ShiftAmt->getOperand(0);3834}38353836// Narrow/widen the shift amount to match the size of the shift operation.3837if (VT == MVT::i32)3838NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);3839else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {3840SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);3841MachineSDNode *Ext = CurDAG->getMachineNode(3842AArch64::SUBREG_TO_REG, DL, VT,3843CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);3844NewShiftAmt = SDValue(Ext, 0);3845}38463847SDValue Ops[] = {N->getOperand(0), NewShiftAmt};3848CurDAG->SelectNodeTo(N, Opc, VT, Ops);3849return true;3850}38513852static bool checkCVTFixedPointOperandWithFBits(SelectionDAG *CurDAG, SDValue N,3853SDValue &FixedPos,3854unsigned RegWidth,3855bool isReciprocal) {3856APFloat FVal(0.0);3857if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))3858FVal = CN->getValueAPF();3859else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {3860// Some otherwise illegal constants are allowed in this case.3861if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||3862!isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))3863return false;38643865ConstantPoolSDNode *CN =3866dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));3867FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();3868} else3869return false;38703871// An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits3872// is between 1 and 32 for a destination w-register, or 1 and 64 for an3873// x-register.3874//3875// By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we3876// want THIS_NODE to be 2^fbits. This is much easier to deal with using3877// integers.3878bool IsExact;38793880if (isReciprocal)3881if (!FVal.getExactInverse(&FVal))3882return false;38833884// fbits is between 1 and 64 in the worst-case, which means the fmul3885// could have 2^64 as an actual operand. Need 65 bits of precision.3886APSInt IntVal(65, true);3887FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);38883889// N.b. isPowerOf2 also checks for > 0.3890if (!IsExact || !IntVal.isPowerOf2())3891return false;3892unsigned FBits = IntVal.logBase2();38933894// Checks above should have guaranteed that we haven't lost information in3895// finding FBits, but it must still be in range.3896if (FBits == 0 || FBits > RegWidth) return false;38973898FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);3899return true;3900}39013902bool AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,3903unsigned RegWidth) {3904return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,3905false);3906}39073908bool AArch64DAGToDAGISel::SelectCVTFixedPosRecipOperand(SDValue N,3909SDValue &FixedPos,3910unsigned RegWidth) {3911return checkCVTFixedPointOperandWithFBits(CurDAG, N, FixedPos, RegWidth,3912true);3913}39143915// Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields3916// of the string and obtains the integer values from them and combines these3917// into a single value to be used in the MRS/MSR instruction.3918static int getIntOperandFromRegisterString(StringRef RegString) {3919SmallVector<StringRef, 5> Fields;3920RegString.split(Fields, ':');39213922if (Fields.size() == 1)3923return -1;39243925assert(Fields.size() == 53926&& "Invalid number of fields in read register string");39273928SmallVector<int, 5> Ops;3929bool AllIntFields = true;39303931for (StringRef Field : Fields) {3932unsigned IntField;3933AllIntFields &= !Field.getAsInteger(10, IntField);3934Ops.push_back(IntField);3935}39363937assert(AllIntFields &&3938"Unexpected non-integer value in special register string.");3939(void)AllIntFields;39403941// Need to combine the integer fields of the string into a single value3942// based on the bit encoding of MRS/MSR instruction.3943return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |3944(Ops[3] << 3) | (Ops[4]);3945}39463947// Lower the read_register intrinsic to an MRS instruction node if the special3948// register string argument is either of the form detailed in the ALCE (the3949// form described in getIntOperandsFromRegsterString) or is a named register3950// known by the MRS SysReg mapper.3951bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {3952const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));3953const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));3954SDLoc DL(N);39553956bool ReadIs128Bit = N->getOpcode() == AArch64ISD::MRRS;39573958unsigned Opcode64Bit = AArch64::MRS;3959int Imm = getIntOperandFromRegisterString(RegString->getString());3960if (Imm == -1) {3961// No match, Use the sysreg mapper to map the remaining possible strings to3962// the value for the register to be used for the instruction operand.3963const auto *TheReg =3964AArch64SysReg::lookupSysRegByName(RegString->getString());3965if (TheReg && TheReg->Readable &&3966TheReg->haveFeatures(Subtarget->getFeatureBits()))3967Imm = TheReg->Encoding;3968else3969Imm = AArch64SysReg::parseGenericRegister(RegString->getString());39703971if (Imm == -1) {3972// Still no match, see if this is "pc" or give up.3973if (!ReadIs128Bit && RegString->getString() == "pc") {3974Opcode64Bit = AArch64::ADR;3975Imm = 0;3976} else {3977return false;3978}3979}3980}39813982SDValue InChain = N->getOperand(0);3983SDValue SysRegImm = CurDAG->getTargetConstant(Imm, DL, MVT::i32);3984if (!ReadIs128Bit) {3985CurDAG->SelectNodeTo(N, Opcode64Bit, MVT::i64, MVT::Other /* Chain */,3986{SysRegImm, InChain});3987} else {3988SDNode *MRRS = CurDAG->getMachineNode(3989AArch64::MRRS, DL,3990{MVT::Untyped /* XSeqPair */, MVT::Other /* Chain */},3991{SysRegImm, InChain});39923993// Sysregs are not endian. The even register always contains the low half3994// of the register.3995SDValue Lo = CurDAG->getTargetExtractSubreg(AArch64::sube64, DL, MVT::i64,3996SDValue(MRRS, 0));3997SDValue Hi = CurDAG->getTargetExtractSubreg(AArch64::subo64, DL, MVT::i64,3998SDValue(MRRS, 0));3999SDValue OutChain = SDValue(MRRS, 1);40004001ReplaceUses(SDValue(N, 0), Lo);4002ReplaceUses(SDValue(N, 1), Hi);4003ReplaceUses(SDValue(N, 2), OutChain);4004};4005return true;4006}40074008// Lower the write_register intrinsic to an MSR instruction node if the special4009// register string argument is either of the form detailed in the ALCE (the4010// form described in getIntOperandsFromRegsterString) or is a named register4011// known by the MSR SysReg mapper.4012bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {4013const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));4014const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));4015SDLoc DL(N);40164017bool WriteIs128Bit = N->getOpcode() == AArch64ISD::MSRR;40184019if (!WriteIs128Bit) {4020// Check if the register was one of those allowed as the pstatefield value4021// in the MSR (immediate) instruction. To accept the values allowed in the4022// pstatefield for the MSR (immediate) instruction, we also require that an4023// immediate value has been provided as an argument, we know that this is4024// the case as it has been ensured by semantic checking.4025auto trySelectPState = [&](auto PMapper, unsigned State) {4026if (PMapper) {4027assert(isa<ConstantSDNode>(N->getOperand(2)) &&4028"Expected a constant integer expression.");4029unsigned Reg = PMapper->Encoding;4030uint64_t Immed = N->getConstantOperandVal(2);4031CurDAG->SelectNodeTo(4032N, State, MVT::Other, CurDAG->getTargetConstant(Reg, DL, MVT::i32),4033CurDAG->getTargetConstant(Immed, DL, MVT::i16), N->getOperand(0));4034return true;4035}4036return false;4037};40384039if (trySelectPState(4040AArch64PState::lookupPStateImm0_15ByName(RegString->getString()),4041AArch64::MSRpstateImm4))4042return true;4043if (trySelectPState(4044AArch64PState::lookupPStateImm0_1ByName(RegString->getString()),4045AArch64::MSRpstateImm1))4046return true;4047}40484049int Imm = getIntOperandFromRegisterString(RegString->getString());4050if (Imm == -1) {4051// Use the sysreg mapper to attempt to map the remaining possible strings4052// to the value for the register to be used for the MSR (register)4053// instruction operand.4054auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());4055if (TheReg && TheReg->Writeable &&4056TheReg->haveFeatures(Subtarget->getFeatureBits()))4057Imm = TheReg->Encoding;4058else4059Imm = AArch64SysReg::parseGenericRegister(RegString->getString());40604061if (Imm == -1)4062return false;4063}40644065SDValue InChain = N->getOperand(0);4066if (!WriteIs128Bit) {4067CurDAG->SelectNodeTo(N, AArch64::MSR, MVT::Other,4068CurDAG->getTargetConstant(Imm, DL, MVT::i32),4069N->getOperand(2), InChain);4070} else {4071// No endian swap. The lower half always goes into the even subreg, and the4072// higher half always into the odd supreg.4073SDNode *Pair = CurDAG->getMachineNode(4074TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped /* XSeqPair */,4075{CurDAG->getTargetConstant(AArch64::XSeqPairsClassRegClass.getID(), DL,4076MVT::i32),4077N->getOperand(2),4078CurDAG->getTargetConstant(AArch64::sube64, DL, MVT::i32),4079N->getOperand(3),4080CurDAG->getTargetConstant(AArch64::subo64, DL, MVT::i32)});40814082CurDAG->SelectNodeTo(N, AArch64::MSRR, MVT::Other,4083CurDAG->getTargetConstant(Imm, DL, MVT::i32),4084SDValue(Pair, 0), InChain);4085}40864087return true;4088}40894090/// We've got special pseudo-instructions for these4091bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {4092unsigned Opcode;4093EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();40944095// Leave IR for LSE if subtarget supports it.4096if (Subtarget->hasLSE()) return false;40974098if (MemTy == MVT::i8)4099Opcode = AArch64::CMP_SWAP_8;4100else if (MemTy == MVT::i16)4101Opcode = AArch64::CMP_SWAP_16;4102else if (MemTy == MVT::i32)4103Opcode = AArch64::CMP_SWAP_32;4104else if (MemTy == MVT::i64)4105Opcode = AArch64::CMP_SWAP_64;4106else4107llvm_unreachable("Unknown AtomicCmpSwap type");41084109MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;4110SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),4111N->getOperand(0)};4112SDNode *CmpSwap = CurDAG->getMachineNode(4113Opcode, SDLoc(N),4114CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);41154116MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();4117CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});41184119ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));4120ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));4121CurDAG->RemoveDeadNode(N);41224123return true;4124}41254126bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm,4127SDValue &Shift) {4128if (!isa<ConstantSDNode>(N))4129return false;41304131SDLoc DL(N);4132uint64_t Val = cast<ConstantSDNode>(N)4133->getAPIntValue()4134.trunc(VT.getFixedSizeInBits())4135.getZExtValue();41364137switch (VT.SimpleTy) {4138case MVT::i8:4139// All immediates are supported.4140Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);4141Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);4142return true;4143case MVT::i16:4144case MVT::i32:4145case MVT::i64:4146// Support 8bit unsigned immediates.4147if (Val <= 255) {4148Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);4149Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);4150return true;4151}4152// Support 16bit unsigned immediates that are a multiple of 256.4153if (Val <= 65280 && Val % 256 == 0) {4154Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);4155Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);4156return true;4157}4158break;4159default:4160break;4161}41624163return false;4164}41654166bool AArch64DAGToDAGISel::SelectSVEAddSubSSatImm(SDValue N, MVT VT,4167SDValue &Imm, SDValue &Shift,4168bool Negate) {4169if (!isa<ConstantSDNode>(N))4170return false;41714172SDLoc DL(N);4173int64_t Val = cast<ConstantSDNode>(N)4174->getAPIntValue()4175.trunc(VT.getFixedSizeInBits())4176.getSExtValue();41774178if (Negate)4179Val = -Val;41804181// Signed saturating instructions treat their immediate operand as unsigned,4182// whereas the related intrinsics define their operands to be signed. This4183// means we can only use the immediate form when the operand is non-negative.4184if (Val < 0)4185return false;41864187switch (VT.SimpleTy) {4188case MVT::i8:4189// All positive immediates are supported.4190Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);4191Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);4192return true;4193case MVT::i16:4194case MVT::i32:4195case MVT::i64:4196// Support 8bit positive immediates.4197if (Val <= 255) {4198Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);4199Imm = CurDAG->getTargetConstant(Val, DL, MVT::i32);4200return true;4201}4202// Support 16bit positive immediates that are a multiple of 256.4203if (Val <= 65280 && Val % 256 == 0) {4204Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);4205Imm = CurDAG->getTargetConstant(Val >> 8, DL, MVT::i32);4206return true;4207}4208break;4209default:4210break;4211}42124213return false;4214}42154216bool AArch64DAGToDAGISel::SelectSVECpyDupImm(SDValue N, MVT VT, SDValue &Imm,4217SDValue &Shift) {4218if (!isa<ConstantSDNode>(N))4219return false;42204221SDLoc DL(N);4222int64_t Val = cast<ConstantSDNode>(N)4223->getAPIntValue()4224.trunc(VT.getFixedSizeInBits())4225.getSExtValue();42264227switch (VT.SimpleTy) {4228case MVT::i8:4229// All immediates are supported.4230Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);4231Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);4232return true;4233case MVT::i16:4234case MVT::i32:4235case MVT::i64:4236// Support 8bit signed immediates.4237if (Val >= -128 && Val <= 127) {4238Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);4239Imm = CurDAG->getTargetConstant(Val & 0xFF, DL, MVT::i32);4240return true;4241}4242// Support 16bit signed immediates that are a multiple of 256.4243if (Val >= -32768 && Val <= 32512 && Val % 256 == 0) {4244Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);4245Imm = CurDAG->getTargetConstant((Val >> 8) & 0xFF, DL, MVT::i32);4246return true;4247}4248break;4249default:4250break;4251}42524253return false;4254}42554256bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {4257if (auto CNode = dyn_cast<ConstantSDNode>(N)) {4258int64_t ImmVal = CNode->getSExtValue();4259SDLoc DL(N);4260if (ImmVal >= -128 && ImmVal < 128) {4261Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);4262return true;4263}4264}4265return false;4266}42674268bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {4269if (auto CNode = dyn_cast<ConstantSDNode>(N)) {4270uint64_t ImmVal = CNode->getZExtValue();42714272switch (VT.SimpleTy) {4273case MVT::i8:4274ImmVal &= 0xFF;4275break;4276case MVT::i16:4277ImmVal &= 0xFFFF;4278break;4279case MVT::i32:4280ImmVal &= 0xFFFFFFFF;4281break;4282case MVT::i64:4283break;4284default:4285llvm_unreachable("Unexpected type");4286}42874288if (ImmVal < 256) {4289Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);4290return true;4291}4292}4293return false;4294}42954296bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,4297bool Invert) {4298if (auto CNode = dyn_cast<ConstantSDNode>(N)) {4299uint64_t ImmVal = CNode->getZExtValue();4300SDLoc DL(N);43014302if (Invert)4303ImmVal = ~ImmVal;43044305// Shift mask depending on type size.4306switch (VT.SimpleTy) {4307case MVT::i8:4308ImmVal &= 0xFF;4309ImmVal |= ImmVal << 8;4310ImmVal |= ImmVal << 16;4311ImmVal |= ImmVal << 32;4312break;4313case MVT::i16:4314ImmVal &= 0xFFFF;4315ImmVal |= ImmVal << 16;4316ImmVal |= ImmVal << 32;4317break;4318case MVT::i32:4319ImmVal &= 0xFFFFFFFF;4320ImmVal |= ImmVal << 32;4321break;4322case MVT::i64:4323break;4324default:4325llvm_unreachable("Unexpected type");4326}43274328uint64_t encoding;4329if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {4330Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);4331return true;4332}4333}4334return false;4335}43364337// SVE shift intrinsics allow shift amounts larger than the element's bitwidth.4338// Rather than attempt to normalise everything we can sometimes saturate the4339// shift amount during selection. This function also allows for consistent4340// isel patterns by ensuring the resulting "Imm" node is of the i32 type4341// required by the instructions.4342bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,4343uint64_t High, bool AllowSaturation,4344SDValue &Imm) {4345if (auto *CN = dyn_cast<ConstantSDNode>(N)) {4346uint64_t ImmVal = CN->getZExtValue();43474348// Reject shift amounts that are too small.4349if (ImmVal < Low)4350return false;43514352// Reject or saturate shift amounts that are too big.4353if (ImmVal > High) {4354if (!AllowSaturation)4355return false;4356ImmVal = High;4357}43584359Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);4360return true;4361}43624363return false;4364}43654366bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {4367// tagp(FrameIndex, IRGstack, tag_offset):4368// since the offset between FrameIndex and IRGstack is a compile-time4369// constant, this can be lowered to a single ADDG instruction.4370if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {4371return false;4372}43734374SDValue IRG_SP = N->getOperand(2);4375if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||4376IRG_SP->getConstantOperandVal(1) != Intrinsic::aarch64_irg_sp) {4377return false;4378}43794380const TargetLowering *TLI = getTargetLowering();4381SDLoc DL(N);4382int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();4383SDValue FiOp = CurDAG->getTargetFrameIndex(4384FI, TLI->getPointerTy(CurDAG->getDataLayout()));4385int TagOffset = N->getConstantOperandVal(3);43864387SDNode *Out = CurDAG->getMachineNode(4388AArch64::TAGPstack, DL, MVT::i64,4389{FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),4390CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});4391ReplaceNode(N, Out);4392return true;4393}43944395void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {4396assert(isa<ConstantSDNode>(N->getOperand(3)) &&4397"llvm.aarch64.tagp third argument must be an immediate");4398if (trySelectStackSlotTagP(N))4399return;4400// FIXME: above applies in any case when offset between Op1 and Op2 is a4401// compile-time constant, not just for stack allocations.44024403// General case for unrelated pointers in Op1 and Op2.4404SDLoc DL(N);4405int TagOffset = N->getConstantOperandVal(3);4406SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,4407{N->getOperand(1), N->getOperand(2)});4408SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,4409{SDValue(N1, 0), N->getOperand(2)});4410SDNode *N3 = CurDAG->getMachineNode(4411AArch64::ADDG, DL, MVT::i64,4412{SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),4413CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});4414ReplaceNode(N, N3);4415}44164417bool AArch64DAGToDAGISel::trySelectCastFixedLengthToScalableVector(SDNode *N) {4418assert(N->getOpcode() == ISD::INSERT_SUBVECTOR && "Invalid Node!");44194420// Bail when not a "cast" like insert_subvector.4421if (N->getConstantOperandVal(2) != 0)4422return false;4423if (!N->getOperand(0).isUndef())4424return false;44254426// Bail when normal isel should do the job.4427EVT VT = N->getValueType(0);4428EVT InVT = N->getOperand(1).getValueType();4429if (VT.isFixedLengthVector() || InVT.isScalableVector())4430return false;4431if (InVT.getSizeInBits() <= 128)4432return false;44334434// NOTE: We can only get here when doing fixed length SVE code generation.4435// We do manual selection because the types involved are not linked to real4436// registers (despite being legal) and must be coerced into SVE registers.44374438assert(VT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&4439"Expected to insert into a packed scalable vector!");44404441SDLoc DL(N);4442auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);4443ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,4444N->getOperand(1), RC));4445return true;4446}44474448bool AArch64DAGToDAGISel::trySelectCastScalableToFixedLengthVector(SDNode *N) {4449assert(N->getOpcode() == ISD::EXTRACT_SUBVECTOR && "Invalid Node!");44504451// Bail when not a "cast" like extract_subvector.4452if (N->getConstantOperandVal(1) != 0)4453return false;44544455// Bail when normal isel can do the job.4456EVT VT = N->getValueType(0);4457EVT InVT = N->getOperand(0).getValueType();4458if (VT.isScalableVector() || InVT.isFixedLengthVector())4459return false;4460if (VT.getSizeInBits() <= 128)4461return false;44624463// NOTE: We can only get here when doing fixed length SVE code generation.4464// We do manual selection because the types involved are not linked to real4465// registers (despite being legal) and must be coerced into SVE registers.44664467assert(InVT.getSizeInBits().getKnownMinValue() == AArch64::SVEBitsPerBlock &&4468"Expected to extract from a packed scalable vector!");44694470SDLoc DL(N);4471auto RC = CurDAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);4472ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT,4473N->getOperand(0), RC));4474return true;4475}44764477bool AArch64DAGToDAGISel::trySelectXAR(SDNode *N) {4478assert(N->getOpcode() == ISD::OR && "Expected OR instruction");44794480SDValue N0 = N->getOperand(0);4481SDValue N1 = N->getOperand(1);4482EVT VT = N->getValueType(0);44834484// Essentially: rotr (xor(x, y), imm) -> xar (x, y, imm)4485// Rotate by a constant is a funnel shift in IR which is exanded to4486// an OR with shifted operands.4487// We do the following transform:4488// OR N0, N1 -> xar (x, y, imm)4489// Where:4490// N1 = SRL_PRED true, V, splat(imm) --> rotr amount4491// N0 = SHL_PRED true, V, splat(bits-imm)4492// V = (xor x, y)4493if (VT.isScalableVector() &&4494(Subtarget->hasSVE2() ||4495(Subtarget->hasSME() && Subtarget->isStreaming()))) {4496if (N0.getOpcode() != AArch64ISD::SHL_PRED ||4497N1.getOpcode() != AArch64ISD::SRL_PRED)4498std::swap(N0, N1);4499if (N0.getOpcode() != AArch64ISD::SHL_PRED ||4500N1.getOpcode() != AArch64ISD::SRL_PRED)4501return false;45024503auto *TLI = static_cast<const AArch64TargetLowering *>(getTargetLowering());4504if (!TLI->isAllActivePredicate(*CurDAG, N0.getOperand(0)) ||4505!TLI->isAllActivePredicate(*CurDAG, N1.getOperand(0)))4506return false;45074508SDValue XOR = N0.getOperand(1);4509if (XOR.getOpcode() != ISD::XOR || XOR != N1.getOperand(1))4510return false;45114512APInt ShlAmt, ShrAmt;4513if (!ISD::isConstantSplatVector(N0.getOperand(2).getNode(), ShlAmt) ||4514!ISD::isConstantSplatVector(N1.getOperand(2).getNode(), ShrAmt))4515return false;45164517if (ShlAmt + ShrAmt != VT.getScalarSizeInBits())4518return false;45194520SDLoc DL(N);4521SDValue Imm =4522CurDAG->getTargetConstant(ShrAmt.getZExtValue(), DL, MVT::i32);45234524SDValue Ops[] = {XOR.getOperand(0), XOR.getOperand(1), Imm};4525if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::Int>(4526VT, {AArch64::XAR_ZZZI_B, AArch64::XAR_ZZZI_H, AArch64::XAR_ZZZI_S,4527AArch64::XAR_ZZZI_D})) {4528CurDAG->SelectNodeTo(N, Opc, VT, Ops);4529return true;4530}4531return false;4532}45334534if (!Subtarget->hasSHA3())4535return false;45364537if (N0->getOpcode() != AArch64ISD::VSHL ||4538N1->getOpcode() != AArch64ISD::VLSHR)4539return false;45404541if (N0->getOperand(0) != N1->getOperand(0) ||4542N1->getOperand(0)->getOpcode() != ISD::XOR)4543return false;45444545SDValue XOR = N0.getOperand(0);4546SDValue R1 = XOR.getOperand(0);4547SDValue R2 = XOR.getOperand(1);45484549unsigned HsAmt = N0.getConstantOperandVal(1);4550unsigned ShAmt = N1.getConstantOperandVal(1);45514552SDLoc DL = SDLoc(N0.getOperand(1));4553SDValue Imm = CurDAG->getTargetConstant(4554ShAmt, DL, N0.getOperand(1).getValueType(), false);45554556if (ShAmt + HsAmt != 64)4557return false;45584559SDValue Ops[] = {R1, R2, Imm};4560CurDAG->SelectNodeTo(N, AArch64::XAR, N0.getValueType(), Ops);45614562return true;4563}45644565void AArch64DAGToDAGISel::Select(SDNode *Node) {4566// If we have a custom node, we already have selected!4567if (Node->isMachineOpcode()) {4568LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");4569Node->setNodeId(-1);4570return;4571}45724573// Few custom selection stuff.4574EVT VT = Node->getValueType(0);45754576switch (Node->getOpcode()) {4577default:4578break;45794580case ISD::ATOMIC_CMP_SWAP:4581if (SelectCMP_SWAP(Node))4582return;4583break;45844585case ISD::READ_REGISTER:4586case AArch64ISD::MRRS:4587if (tryReadRegister(Node))4588return;4589break;45904591case ISD::WRITE_REGISTER:4592case AArch64ISD::MSRR:4593if (tryWriteRegister(Node))4594return;4595break;45964597case ISD::LOAD: {4598// Try to select as an indexed load. Fall through to normal processing4599// if we can't.4600if (tryIndexedLoad(Node))4601return;4602break;4603}46044605case ISD::SRL:4606case ISD::AND:4607case ISD::SRA:4608case ISD::SIGN_EXTEND_INREG:4609if (tryBitfieldExtractOp(Node))4610return;4611if (tryBitfieldInsertInZeroOp(Node))4612return;4613[[fallthrough]];4614case ISD::ROTR:4615case ISD::SHL:4616if (tryShiftAmountMod(Node))4617return;4618break;46194620case ISD::SIGN_EXTEND:4621if (tryBitfieldExtractOpFromSExt(Node))4622return;4623break;46244625case ISD::OR:4626if (tryBitfieldInsertOp(Node))4627return;4628if (trySelectXAR(Node))4629return;4630break;46314632case ISD::EXTRACT_SUBVECTOR: {4633if (trySelectCastScalableToFixedLengthVector(Node))4634return;4635break;4636}46374638case ISD::INSERT_SUBVECTOR: {4639if (trySelectCastFixedLengthToScalableVector(Node))4640return;4641break;4642}46434644case ISD::Constant: {4645// Materialize zero constants as copies from WZR/XZR. This allows4646// the coalescer to propagate these into other instructions.4647ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);4648if (ConstNode->isZero()) {4649if (VT == MVT::i32) {4650SDValue New = CurDAG->getCopyFromReg(4651CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);4652ReplaceNode(Node, New.getNode());4653return;4654} else if (VT == MVT::i64) {4655SDValue New = CurDAG->getCopyFromReg(4656CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);4657ReplaceNode(Node, New.getNode());4658return;4659}4660}4661break;4662}46634664case ISD::FrameIndex: {4665// Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.4666int FI = cast<FrameIndexSDNode>(Node)->getIndex();4667unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);4668const TargetLowering *TLI = getTargetLowering();4669SDValue TFI = CurDAG->getTargetFrameIndex(4670FI, TLI->getPointerTy(CurDAG->getDataLayout()));4671SDLoc DL(Node);4672SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),4673CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };4674CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);4675return;4676}4677case ISD::INTRINSIC_W_CHAIN: {4678unsigned IntNo = Node->getConstantOperandVal(1);4679switch (IntNo) {4680default:4681break;4682case Intrinsic::aarch64_gcsss: {4683SDLoc DL(Node);4684SDValue Chain = Node->getOperand(0);4685SDValue Val = Node->getOperand(2);4686SDValue Zero = CurDAG->getCopyFromReg(Chain, DL, AArch64::XZR, MVT::i64);4687SDNode *SS1 =4688CurDAG->getMachineNode(AArch64::GCSSS1, DL, MVT::Other, Val, Chain);4689SDNode *SS2 = CurDAG->getMachineNode(AArch64::GCSSS2, DL, MVT::i64,4690MVT::Other, Zero, SDValue(SS1, 0));4691ReplaceNode(Node, SS2);4692return;4693}4694case Intrinsic::aarch64_ldaxp:4695case Intrinsic::aarch64_ldxp: {4696unsigned Op =4697IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;4698SDValue MemAddr = Node->getOperand(2);4699SDLoc DL(Node);4700SDValue Chain = Node->getOperand(0);47014702SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,4703MVT::Other, MemAddr, Chain);47044705// Transfer memoperands.4706MachineMemOperand *MemOp =4707cast<MemIntrinsicSDNode>(Node)->getMemOperand();4708CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});4709ReplaceNode(Node, Ld);4710return;4711}4712case Intrinsic::aarch64_stlxp:4713case Intrinsic::aarch64_stxp: {4714unsigned Op =4715IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;4716SDLoc DL(Node);4717SDValue Chain = Node->getOperand(0);4718SDValue ValLo = Node->getOperand(2);4719SDValue ValHi = Node->getOperand(3);4720SDValue MemAddr = Node->getOperand(4);47214722// Place arguments in the right order.4723SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};47244725SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);4726// Transfer memoperands.4727MachineMemOperand *MemOp =4728cast<MemIntrinsicSDNode>(Node)->getMemOperand();4729CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});47304731ReplaceNode(Node, St);4732return;4733}4734case Intrinsic::aarch64_neon_ld1x2:4735if (VT == MVT::v8i8) {4736SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);4737return;4738} else if (VT == MVT::v16i8) {4739SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);4740return;4741} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {4742SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);4743return;4744} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {4745SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);4746return;4747} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {4748SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);4749return;4750} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {4751SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);4752return;4753} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {4754SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);4755return;4756} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {4757SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);4758return;4759}4760break;4761case Intrinsic::aarch64_neon_ld1x3:4762if (VT == MVT::v8i8) {4763SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);4764return;4765} else if (VT == MVT::v16i8) {4766SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);4767return;4768} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {4769SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);4770return;4771} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {4772SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);4773return;4774} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {4775SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);4776return;4777} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {4778SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);4779return;4780} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {4781SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);4782return;4783} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {4784SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);4785return;4786}4787break;4788case Intrinsic::aarch64_neon_ld1x4:4789if (VT == MVT::v8i8) {4790SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);4791return;4792} else if (VT == MVT::v16i8) {4793SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);4794return;4795} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {4796SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);4797return;4798} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {4799SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);4800return;4801} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {4802SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);4803return;4804} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {4805SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);4806return;4807} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {4808SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);4809return;4810} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {4811SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);4812return;4813}4814break;4815case Intrinsic::aarch64_neon_ld2:4816if (VT == MVT::v8i8) {4817SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);4818return;4819} else if (VT == MVT::v16i8) {4820SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);4821return;4822} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {4823SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);4824return;4825} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {4826SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);4827return;4828} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {4829SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);4830return;4831} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {4832SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);4833return;4834} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {4835SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);4836return;4837} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {4838SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);4839return;4840}4841break;4842case Intrinsic::aarch64_neon_ld3:4843if (VT == MVT::v8i8) {4844SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);4845return;4846} else if (VT == MVT::v16i8) {4847SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);4848return;4849} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {4850SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);4851return;4852} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {4853SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);4854return;4855} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {4856SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);4857return;4858} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {4859SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);4860return;4861} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {4862SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);4863return;4864} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {4865SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);4866return;4867}4868break;4869case Intrinsic::aarch64_neon_ld4:4870if (VT == MVT::v8i8) {4871SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);4872return;4873} else if (VT == MVT::v16i8) {4874SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);4875return;4876} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {4877SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);4878return;4879} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {4880SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);4881return;4882} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {4883SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);4884return;4885} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {4886SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);4887return;4888} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {4889SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);4890return;4891} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {4892SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);4893return;4894}4895break;4896case Intrinsic::aarch64_neon_ld2r:4897if (VT == MVT::v8i8) {4898SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);4899return;4900} else if (VT == MVT::v16i8) {4901SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);4902return;4903} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {4904SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);4905return;4906} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {4907SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);4908return;4909} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {4910SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);4911return;4912} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {4913SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);4914return;4915} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {4916SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);4917return;4918} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {4919SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);4920return;4921}4922break;4923case Intrinsic::aarch64_neon_ld3r:4924if (VT == MVT::v8i8) {4925SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);4926return;4927} else if (VT == MVT::v16i8) {4928SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);4929return;4930} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {4931SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);4932return;4933} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {4934SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);4935return;4936} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {4937SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);4938return;4939} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {4940SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);4941return;4942} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {4943SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);4944return;4945} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {4946SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);4947return;4948}4949break;4950case Intrinsic::aarch64_neon_ld4r:4951if (VT == MVT::v8i8) {4952SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);4953return;4954} else if (VT == MVT::v16i8) {4955SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);4956return;4957} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {4958SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);4959return;4960} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {4961SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);4962return;4963} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {4964SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);4965return;4966} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {4967SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);4968return;4969} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {4970SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);4971return;4972} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {4973SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);4974return;4975}4976break;4977case Intrinsic::aarch64_neon_ld2lane:4978if (VT == MVT::v16i8 || VT == MVT::v8i8) {4979SelectLoadLane(Node, 2, AArch64::LD2i8);4980return;4981} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||4982VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {4983SelectLoadLane(Node, 2, AArch64::LD2i16);4984return;4985} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||4986VT == MVT::v2f32) {4987SelectLoadLane(Node, 2, AArch64::LD2i32);4988return;4989} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||4990VT == MVT::v1f64) {4991SelectLoadLane(Node, 2, AArch64::LD2i64);4992return;4993}4994break;4995case Intrinsic::aarch64_neon_ld3lane:4996if (VT == MVT::v16i8 || VT == MVT::v8i8) {4997SelectLoadLane(Node, 3, AArch64::LD3i8);4998return;4999} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||5000VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {5001SelectLoadLane(Node, 3, AArch64::LD3i16);5002return;5003} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||5004VT == MVT::v2f32) {5005SelectLoadLane(Node, 3, AArch64::LD3i32);5006return;5007} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||5008VT == MVT::v1f64) {5009SelectLoadLane(Node, 3, AArch64::LD3i64);5010return;5011}5012break;5013case Intrinsic::aarch64_neon_ld4lane:5014if (VT == MVT::v16i8 || VT == MVT::v8i8) {5015SelectLoadLane(Node, 4, AArch64::LD4i8);5016return;5017} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||5018VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {5019SelectLoadLane(Node, 4, AArch64::LD4i16);5020return;5021} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||5022VT == MVT::v2f32) {5023SelectLoadLane(Node, 4, AArch64::LD4i32);5024return;5025} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||5026VT == MVT::v1f64) {5027SelectLoadLane(Node, 4, AArch64::LD4i64);5028return;5029}5030break;5031case Intrinsic::aarch64_ld64b:5032SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);5033return;5034case Intrinsic::aarch64_sve_ld2q_sret: {5035SelectPredicatedLoad(Node, 2, 4, AArch64::LD2Q_IMM, AArch64::LD2Q, true);5036return;5037}5038case Intrinsic::aarch64_sve_ld3q_sret: {5039SelectPredicatedLoad(Node, 3, 4, AArch64::LD3Q_IMM, AArch64::LD3Q, true);5040return;5041}5042case Intrinsic::aarch64_sve_ld4q_sret: {5043SelectPredicatedLoad(Node, 4, 4, AArch64::LD4Q_IMM, AArch64::LD4Q, true);5044return;5045}5046case Intrinsic::aarch64_sve_ld2_sret: {5047if (VT == MVT::nxv16i8) {5048SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B,5049true);5050return;5051} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5052VT == MVT::nxv8bf16) {5053SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H,5054true);5055return;5056} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5057SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W,5058true);5059return;5060} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5061SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D,5062true);5063return;5064}5065break;5066}5067case Intrinsic::aarch64_sve_ld1_pn_x2: {5068if (VT == MVT::nxv16i8) {5069if (Subtarget->hasSME2())5070SelectContiguousMultiVectorLoad(5071Node, 2, 0, AArch64::LD1B_2Z_IMM_PSEUDO, AArch64::LD1B_2Z_PSEUDO);5072else if (Subtarget->hasSVE2p1())5073SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LD1B_2Z_IMM,5074AArch64::LD1B_2Z);5075else5076break;5077return;5078} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5079VT == MVT::nxv8bf16) {5080if (Subtarget->hasSME2())5081SelectContiguousMultiVectorLoad(5082Node, 2, 1, AArch64::LD1H_2Z_IMM_PSEUDO, AArch64::LD1H_2Z_PSEUDO);5083else if (Subtarget->hasSVE2p1())5084SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LD1H_2Z_IMM,5085AArch64::LD1H_2Z);5086else5087break;5088return;5089} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5090if (Subtarget->hasSME2())5091SelectContiguousMultiVectorLoad(5092Node, 2, 2, AArch64::LD1W_2Z_IMM_PSEUDO, AArch64::LD1W_2Z_PSEUDO);5093else if (Subtarget->hasSVE2p1())5094SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LD1W_2Z_IMM,5095AArch64::LD1W_2Z);5096else5097break;5098return;5099} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5100if (Subtarget->hasSME2())5101SelectContiguousMultiVectorLoad(5102Node, 2, 3, AArch64::LD1D_2Z_IMM_PSEUDO, AArch64::LD1D_2Z_PSEUDO);5103else if (Subtarget->hasSVE2p1())5104SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LD1D_2Z_IMM,5105AArch64::LD1D_2Z);5106else5107break;5108return;5109}5110break;5111}5112case Intrinsic::aarch64_sve_ld1_pn_x4: {5113if (VT == MVT::nxv16i8) {5114if (Subtarget->hasSME2())5115SelectContiguousMultiVectorLoad(5116Node, 4, 0, AArch64::LD1B_4Z_IMM_PSEUDO, AArch64::LD1B_4Z_PSEUDO);5117else if (Subtarget->hasSVE2p1())5118SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LD1B_4Z_IMM,5119AArch64::LD1B_4Z);5120else5121break;5122return;5123} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5124VT == MVT::nxv8bf16) {5125if (Subtarget->hasSME2())5126SelectContiguousMultiVectorLoad(5127Node, 4, 1, AArch64::LD1H_4Z_IMM_PSEUDO, AArch64::LD1H_4Z_PSEUDO);5128else if (Subtarget->hasSVE2p1())5129SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LD1H_4Z_IMM,5130AArch64::LD1H_4Z);5131else5132break;5133return;5134} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5135if (Subtarget->hasSME2())5136SelectContiguousMultiVectorLoad(5137Node, 4, 2, AArch64::LD1W_4Z_IMM_PSEUDO, AArch64::LD1W_4Z_PSEUDO);5138else if (Subtarget->hasSVE2p1())5139SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LD1W_4Z_IMM,5140AArch64::LD1W_4Z);5141else5142break;5143return;5144} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5145if (Subtarget->hasSME2())5146SelectContiguousMultiVectorLoad(5147Node, 4, 3, AArch64::LD1D_4Z_IMM_PSEUDO, AArch64::LD1D_4Z_PSEUDO);5148else if (Subtarget->hasSVE2p1())5149SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LD1D_4Z_IMM,5150AArch64::LD1D_4Z);5151else5152break;5153return;5154}5155break;5156}5157case Intrinsic::aarch64_sve_ldnt1_pn_x2: {5158if (VT == MVT::nxv16i8) {5159if (Subtarget->hasSME2())5160SelectContiguousMultiVectorLoad(Node, 2, 0,5161AArch64::LDNT1B_2Z_IMM_PSEUDO,5162AArch64::LDNT1B_2Z_PSEUDO);5163else if (Subtarget->hasSVE2p1())5164SelectContiguousMultiVectorLoad(Node, 2, 0, AArch64::LDNT1B_2Z_IMM,5165AArch64::LDNT1B_2Z);5166else5167break;5168return;5169} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5170VT == MVT::nxv8bf16) {5171if (Subtarget->hasSME2())5172SelectContiguousMultiVectorLoad(Node, 2, 1,5173AArch64::LDNT1H_2Z_IMM_PSEUDO,5174AArch64::LDNT1H_2Z_PSEUDO);5175else if (Subtarget->hasSVE2p1())5176SelectContiguousMultiVectorLoad(Node, 2, 1, AArch64::LDNT1H_2Z_IMM,5177AArch64::LDNT1H_2Z);5178else5179break;5180return;5181} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5182if (Subtarget->hasSME2())5183SelectContiguousMultiVectorLoad(Node, 2, 2,5184AArch64::LDNT1W_2Z_IMM_PSEUDO,5185AArch64::LDNT1W_2Z_PSEUDO);5186else if (Subtarget->hasSVE2p1())5187SelectContiguousMultiVectorLoad(Node, 2, 2, AArch64::LDNT1W_2Z_IMM,5188AArch64::LDNT1W_2Z);5189else5190break;5191return;5192} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5193if (Subtarget->hasSME2())5194SelectContiguousMultiVectorLoad(Node, 2, 3,5195AArch64::LDNT1D_2Z_IMM_PSEUDO,5196AArch64::LDNT1D_2Z_PSEUDO);5197else if (Subtarget->hasSVE2p1())5198SelectContiguousMultiVectorLoad(Node, 2, 3, AArch64::LDNT1D_2Z_IMM,5199AArch64::LDNT1D_2Z);5200else5201break;5202return;5203}5204break;5205}5206case Intrinsic::aarch64_sve_ldnt1_pn_x4: {5207if (VT == MVT::nxv16i8) {5208if (Subtarget->hasSME2())5209SelectContiguousMultiVectorLoad(Node, 4, 0,5210AArch64::LDNT1B_4Z_IMM_PSEUDO,5211AArch64::LDNT1B_4Z_PSEUDO);5212else if (Subtarget->hasSVE2p1())5213SelectContiguousMultiVectorLoad(Node, 4, 0, AArch64::LDNT1B_4Z_IMM,5214AArch64::LDNT1B_4Z);5215else5216break;5217return;5218} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5219VT == MVT::nxv8bf16) {5220if (Subtarget->hasSME2())5221SelectContiguousMultiVectorLoad(Node, 4, 1,5222AArch64::LDNT1H_4Z_IMM_PSEUDO,5223AArch64::LDNT1H_4Z_PSEUDO);5224else if (Subtarget->hasSVE2p1())5225SelectContiguousMultiVectorLoad(Node, 4, 1, AArch64::LDNT1H_4Z_IMM,5226AArch64::LDNT1H_4Z);5227else5228break;5229return;5230} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5231if (Subtarget->hasSME2())5232SelectContiguousMultiVectorLoad(Node, 4, 2,5233AArch64::LDNT1W_4Z_IMM_PSEUDO,5234AArch64::LDNT1W_4Z_PSEUDO);5235else if (Subtarget->hasSVE2p1())5236SelectContiguousMultiVectorLoad(Node, 4, 2, AArch64::LDNT1W_4Z_IMM,5237AArch64::LDNT1W_4Z);5238else5239break;5240return;5241} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5242if (Subtarget->hasSME2())5243SelectContiguousMultiVectorLoad(Node, 4, 3,5244AArch64::LDNT1D_4Z_IMM_PSEUDO,5245AArch64::LDNT1D_4Z_PSEUDO);5246else if (Subtarget->hasSVE2p1())5247SelectContiguousMultiVectorLoad(Node, 4, 3, AArch64::LDNT1D_4Z_IMM,5248AArch64::LDNT1D_4Z);5249else5250break;5251return;5252}5253break;5254}5255case Intrinsic::aarch64_sve_ld3_sret: {5256if (VT == MVT::nxv16i8) {5257SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B,5258true);5259return;5260} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5261VT == MVT::nxv8bf16) {5262SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H,5263true);5264return;5265} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5266SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W,5267true);5268return;5269} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5270SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D,5271true);5272return;5273}5274break;5275}5276case Intrinsic::aarch64_sve_ld4_sret: {5277if (VT == MVT::nxv16i8) {5278SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B,5279true);5280return;5281} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5282VT == MVT::nxv8bf16) {5283SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H,5284true);5285return;5286} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5287SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W,5288true);5289return;5290} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5291SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D,5292true);5293return;5294}5295break;5296}5297case Intrinsic::aarch64_sme_read_hor_vg2: {5298if (VT == MVT::nxv16i8) {5299SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,5300AArch64::MOVA_2ZMXI_H_B);5301return;5302} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5303VT == MVT::nxv8bf16) {5304SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,5305AArch64::MOVA_2ZMXI_H_H);5306return;5307} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5308SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,5309AArch64::MOVA_2ZMXI_H_S);5310return;5311} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5312SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,5313AArch64::MOVA_2ZMXI_H_D);5314return;5315}5316break;5317}5318case Intrinsic::aarch64_sme_read_ver_vg2: {5319if (VT == MVT::nxv16i8) {5320SelectMultiVectorMove<14, 2>(Node, 2, AArch64::ZAB0,5321AArch64::MOVA_2ZMXI_V_B);5322return;5323} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5324VT == MVT::nxv8bf16) {5325SelectMultiVectorMove<6, 2>(Node, 2, AArch64::ZAH0,5326AArch64::MOVA_2ZMXI_V_H);5327return;5328} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5329SelectMultiVectorMove<2, 2>(Node, 2, AArch64::ZAS0,5330AArch64::MOVA_2ZMXI_V_S);5331return;5332} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5333SelectMultiVectorMove<0, 2>(Node, 2, AArch64::ZAD0,5334AArch64::MOVA_2ZMXI_V_D);5335return;5336}5337break;5338}5339case Intrinsic::aarch64_sme_read_hor_vg4: {5340if (VT == MVT::nxv16i8) {5341SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,5342AArch64::MOVA_4ZMXI_H_B);5343return;5344} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5345VT == MVT::nxv8bf16) {5346SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,5347AArch64::MOVA_4ZMXI_H_H);5348return;5349} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5350SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAS0,5351AArch64::MOVA_4ZMXI_H_S);5352return;5353} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5354SelectMultiVectorMove<0, 2>(Node, 4, AArch64::ZAD0,5355AArch64::MOVA_4ZMXI_H_D);5356return;5357}5358break;5359}5360case Intrinsic::aarch64_sme_read_ver_vg4: {5361if (VT == MVT::nxv16i8) {5362SelectMultiVectorMove<12, 4>(Node, 4, AArch64::ZAB0,5363AArch64::MOVA_4ZMXI_V_B);5364return;5365} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5366VT == MVT::nxv8bf16) {5367SelectMultiVectorMove<4, 4>(Node, 4, AArch64::ZAH0,5368AArch64::MOVA_4ZMXI_V_H);5369return;5370} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5371SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAS0,5372AArch64::MOVA_4ZMXI_V_S);5373return;5374} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5375SelectMultiVectorMove<0, 4>(Node, 4, AArch64::ZAD0,5376AArch64::MOVA_4ZMXI_V_D);5377return;5378}5379break;5380}5381case Intrinsic::aarch64_sme_read_vg1x2: {5382SelectMultiVectorMove<7, 1>(Node, 2, AArch64::ZA,5383AArch64::MOVA_VG2_2ZMXI);5384return;5385}5386case Intrinsic::aarch64_sme_read_vg1x4: {5387SelectMultiVectorMove<7, 1>(Node, 4, AArch64::ZA,5388AArch64::MOVA_VG4_4ZMXI);5389return;5390}5391case Intrinsic::aarch64_sme_readz_horiz_x2: {5392if (VT == MVT::nxv16i8) {5393SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_B_PSEUDO, 14, 2);5394return;5395} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5396VT == MVT::nxv8bf16) {5397SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_H_PSEUDO, 6, 2);5398return;5399} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5400SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_S_PSEUDO, 2, 2);5401return;5402} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5403SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_H_D_PSEUDO, 0, 2);5404return;5405}5406break;5407}5408case Intrinsic::aarch64_sme_readz_vert_x2: {5409if (VT == MVT::nxv16i8) {5410SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_B_PSEUDO, 14, 2);5411return;5412} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5413VT == MVT::nxv8bf16) {5414SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_H_PSEUDO, 6, 2);5415return;5416} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5417SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_S_PSEUDO, 2, 2);5418return;5419} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5420SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_2ZMI_V_D_PSEUDO, 0, 2);5421return;5422}5423break;5424}5425case Intrinsic::aarch64_sme_readz_horiz_x4: {5426if (VT == MVT::nxv16i8) {5427SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_B_PSEUDO, 12, 4);5428return;5429} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5430VT == MVT::nxv8bf16) {5431SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_H_PSEUDO, 4, 4);5432return;5433} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5434SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_S_PSEUDO, 0, 4);5435return;5436} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5437SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_H_D_PSEUDO, 0, 4);5438return;5439}5440break;5441}5442case Intrinsic::aarch64_sme_readz_vert_x4: {5443if (VT == MVT::nxv16i8) {5444SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_B_PSEUDO, 12, 4);5445return;5446} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||5447VT == MVT::nxv8bf16) {5448SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_H_PSEUDO, 4, 4);5449return;5450} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {5451SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_S_PSEUDO, 0, 4);5452return;5453} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {5454SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_4ZMI_V_D_PSEUDO, 0, 4);5455return;5456}5457break;5458}5459case Intrinsic::aarch64_sme_readz_x2: {5460SelectMultiVectorMoveZ(Node, 2, AArch64::MOVAZ_VG2_2ZMXI_PSEUDO, 7, 1,5461AArch64::ZA);5462return;5463}5464case Intrinsic::aarch64_sme_readz_x4: {5465SelectMultiVectorMoveZ(Node, 4, AArch64::MOVAZ_VG4_4ZMXI_PSEUDO, 7, 1,5466AArch64::ZA);5467return;5468}5469case Intrinsic::swift_async_context_addr: {5470SDLoc DL(Node);5471SDValue Chain = Node->getOperand(0);5472SDValue CopyFP = CurDAG->getCopyFromReg(Chain, DL, AArch64::FP, MVT::i64);5473SDValue Res = SDValue(5474CurDAG->getMachineNode(AArch64::SUBXri, DL, MVT::i64, CopyFP,5475CurDAG->getTargetConstant(8, DL, MVT::i32),5476CurDAG->getTargetConstant(0, DL, MVT::i32)),54770);5478ReplaceUses(SDValue(Node, 0), Res);5479ReplaceUses(SDValue(Node, 1), CopyFP.getValue(1));5480CurDAG->RemoveDeadNode(Node);54815482auto &MF = CurDAG->getMachineFunction();5483MF.getFrameInfo().setFrameAddressIsTaken(true);5484MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);5485return;5486}5487case Intrinsic::aarch64_sme_luti2_lane_zt_x4: {5488if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(5489Node->getValueType(0),5490{AArch64::LUTI2_4ZTZI_B, AArch64::LUTI2_4ZTZI_H,5491AArch64::LUTI2_4ZTZI_S}))5492// Second Immediate must be <= 3:5493SelectMultiVectorLuti(Node, 4, Opc, 3);5494return;5495}5496case Intrinsic::aarch64_sme_luti4_lane_zt_x4: {5497if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(5498Node->getValueType(0),5499{0, AArch64::LUTI4_4ZTZI_H, AArch64::LUTI4_4ZTZI_S}))5500// Second Immediate must be <= 1:5501SelectMultiVectorLuti(Node, 4, Opc, 1);5502return;5503}5504case Intrinsic::aarch64_sme_luti2_lane_zt_x2: {5505if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(5506Node->getValueType(0),5507{AArch64::LUTI2_2ZTZI_B, AArch64::LUTI2_2ZTZI_H,5508AArch64::LUTI2_2ZTZI_S}))5509// Second Immediate must be <= 7:5510SelectMultiVectorLuti(Node, 2, Opc, 7);5511return;5512}5513case Intrinsic::aarch64_sme_luti4_lane_zt_x2: {5514if (auto Opc = SelectOpcodeFromVT<SelectTypeKind::AnyType>(5515Node->getValueType(0),5516{AArch64::LUTI4_2ZTZI_B, AArch64::LUTI4_2ZTZI_H,5517AArch64::LUTI4_2ZTZI_S}))5518// Second Immediate must be <= 3:5519SelectMultiVectorLuti(Node, 2, Opc, 3);5520return;5521}5522}5523} break;5524case ISD::INTRINSIC_WO_CHAIN: {5525unsigned IntNo = Node->getConstantOperandVal(0);5526switch (IntNo) {5527default:5528break;5529case Intrinsic::aarch64_tagp:5530SelectTagP(Node);5531return;55325533case Intrinsic::ptrauth_auth:5534SelectPtrauthAuth(Node);5535return;55365537case Intrinsic::ptrauth_resign:5538SelectPtrauthResign(Node);5539return;55405541case Intrinsic::aarch64_neon_tbl2:5542SelectTable(Node, 2,5543VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,5544false);5545return;5546case Intrinsic::aarch64_neon_tbl3:5547SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three5548: AArch64::TBLv16i8Three,5549false);5550return;5551case Intrinsic::aarch64_neon_tbl4:5552SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four5553: AArch64::TBLv16i8Four,5554false);5555return;5556case Intrinsic::aarch64_neon_tbx2:5557SelectTable(Node, 2,5558VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,5559true);5560return;5561case Intrinsic::aarch64_neon_tbx3:5562SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three5563: AArch64::TBXv16i8Three,5564true);5565return;5566case Intrinsic::aarch64_neon_tbx4:5567SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four5568: AArch64::TBXv16i8Four,5569true);5570return;5571case Intrinsic::aarch64_sve_srshl_single_x2:5572if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5573Node->getValueType(0),5574{AArch64::SRSHL_VG2_2ZZ_B, AArch64::SRSHL_VG2_2ZZ_H,5575AArch64::SRSHL_VG2_2ZZ_S, AArch64::SRSHL_VG2_2ZZ_D}))5576SelectDestructiveMultiIntrinsic(Node, 2, false, Op);5577return;5578case Intrinsic::aarch64_sve_srshl_single_x4:5579if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5580Node->getValueType(0),5581{AArch64::SRSHL_VG4_4ZZ_B, AArch64::SRSHL_VG4_4ZZ_H,5582AArch64::SRSHL_VG4_4ZZ_S, AArch64::SRSHL_VG4_4ZZ_D}))5583SelectDestructiveMultiIntrinsic(Node, 4, false, Op);5584return;5585case Intrinsic::aarch64_sve_urshl_single_x2:5586if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5587Node->getValueType(0),5588{AArch64::URSHL_VG2_2ZZ_B, AArch64::URSHL_VG2_2ZZ_H,5589AArch64::URSHL_VG2_2ZZ_S, AArch64::URSHL_VG2_2ZZ_D}))5590SelectDestructiveMultiIntrinsic(Node, 2, false, Op);5591return;5592case Intrinsic::aarch64_sve_urshl_single_x4:5593if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5594Node->getValueType(0),5595{AArch64::URSHL_VG4_4ZZ_B, AArch64::URSHL_VG4_4ZZ_H,5596AArch64::URSHL_VG4_4ZZ_S, AArch64::URSHL_VG4_4ZZ_D}))5597SelectDestructiveMultiIntrinsic(Node, 4, false, Op);5598return;5599case Intrinsic::aarch64_sve_srshl_x2:5600if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5601Node->getValueType(0),5602{AArch64::SRSHL_VG2_2Z2Z_B, AArch64::SRSHL_VG2_2Z2Z_H,5603AArch64::SRSHL_VG2_2Z2Z_S, AArch64::SRSHL_VG2_2Z2Z_D}))5604SelectDestructiveMultiIntrinsic(Node, 2, true, Op);5605return;5606case Intrinsic::aarch64_sve_srshl_x4:5607if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5608Node->getValueType(0),5609{AArch64::SRSHL_VG4_4Z4Z_B, AArch64::SRSHL_VG4_4Z4Z_H,5610AArch64::SRSHL_VG4_4Z4Z_S, AArch64::SRSHL_VG4_4Z4Z_D}))5611SelectDestructiveMultiIntrinsic(Node, 4, true, Op);5612return;5613case Intrinsic::aarch64_sve_urshl_x2:5614if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5615Node->getValueType(0),5616{AArch64::URSHL_VG2_2Z2Z_B, AArch64::URSHL_VG2_2Z2Z_H,5617AArch64::URSHL_VG2_2Z2Z_S, AArch64::URSHL_VG2_2Z2Z_D}))5618SelectDestructiveMultiIntrinsic(Node, 2, true, Op);5619return;5620case Intrinsic::aarch64_sve_urshl_x4:5621if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5622Node->getValueType(0),5623{AArch64::URSHL_VG4_4Z4Z_B, AArch64::URSHL_VG4_4Z4Z_H,5624AArch64::URSHL_VG4_4Z4Z_S, AArch64::URSHL_VG4_4Z4Z_D}))5625SelectDestructiveMultiIntrinsic(Node, 4, true, Op);5626return;5627case Intrinsic::aarch64_sve_sqdmulh_single_vgx2:5628if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5629Node->getValueType(0),5630{AArch64::SQDMULH_VG2_2ZZ_B, AArch64::SQDMULH_VG2_2ZZ_H,5631AArch64::SQDMULH_VG2_2ZZ_S, AArch64::SQDMULH_VG2_2ZZ_D}))5632SelectDestructiveMultiIntrinsic(Node, 2, false, Op);5633return;5634case Intrinsic::aarch64_sve_sqdmulh_single_vgx4:5635if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5636Node->getValueType(0),5637{AArch64::SQDMULH_VG4_4ZZ_B, AArch64::SQDMULH_VG4_4ZZ_H,5638AArch64::SQDMULH_VG4_4ZZ_S, AArch64::SQDMULH_VG4_4ZZ_D}))5639SelectDestructiveMultiIntrinsic(Node, 4, false, Op);5640return;5641case Intrinsic::aarch64_sve_sqdmulh_vgx2:5642if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5643Node->getValueType(0),5644{AArch64::SQDMULH_VG2_2Z2Z_B, AArch64::SQDMULH_VG2_2Z2Z_H,5645AArch64::SQDMULH_VG2_2Z2Z_S, AArch64::SQDMULH_VG2_2Z2Z_D}))5646SelectDestructiveMultiIntrinsic(Node, 2, true, Op);5647return;5648case Intrinsic::aarch64_sve_sqdmulh_vgx4:5649if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5650Node->getValueType(0),5651{AArch64::SQDMULH_VG4_4Z4Z_B, AArch64::SQDMULH_VG4_4Z4Z_H,5652AArch64::SQDMULH_VG4_4Z4Z_S, AArch64::SQDMULH_VG4_4Z4Z_D}))5653SelectDestructiveMultiIntrinsic(Node, 4, true, Op);5654return;5655case Intrinsic::aarch64_sve_whilege_x2:5656if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(5657Node->getValueType(0),5658{AArch64::WHILEGE_2PXX_B, AArch64::WHILEGE_2PXX_H,5659AArch64::WHILEGE_2PXX_S, AArch64::WHILEGE_2PXX_D}))5660SelectWhilePair(Node, Op);5661return;5662case Intrinsic::aarch64_sve_whilegt_x2:5663if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(5664Node->getValueType(0),5665{AArch64::WHILEGT_2PXX_B, AArch64::WHILEGT_2PXX_H,5666AArch64::WHILEGT_2PXX_S, AArch64::WHILEGT_2PXX_D}))5667SelectWhilePair(Node, Op);5668return;5669case Intrinsic::aarch64_sve_whilehi_x2:5670if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(5671Node->getValueType(0),5672{AArch64::WHILEHI_2PXX_B, AArch64::WHILEHI_2PXX_H,5673AArch64::WHILEHI_2PXX_S, AArch64::WHILEHI_2PXX_D}))5674SelectWhilePair(Node, Op);5675return;5676case Intrinsic::aarch64_sve_whilehs_x2:5677if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(5678Node->getValueType(0),5679{AArch64::WHILEHS_2PXX_B, AArch64::WHILEHS_2PXX_H,5680AArch64::WHILEHS_2PXX_S, AArch64::WHILEHS_2PXX_D}))5681SelectWhilePair(Node, Op);5682return;5683case Intrinsic::aarch64_sve_whilele_x2:5684if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(5685Node->getValueType(0),5686{AArch64::WHILELE_2PXX_B, AArch64::WHILELE_2PXX_H,5687AArch64::WHILELE_2PXX_S, AArch64::WHILELE_2PXX_D}))5688SelectWhilePair(Node, Op);5689return;5690case Intrinsic::aarch64_sve_whilelo_x2:5691if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(5692Node->getValueType(0),5693{AArch64::WHILELO_2PXX_B, AArch64::WHILELO_2PXX_H,5694AArch64::WHILELO_2PXX_S, AArch64::WHILELO_2PXX_D}))5695SelectWhilePair(Node, Op);5696return;5697case Intrinsic::aarch64_sve_whilels_x2:5698if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(5699Node->getValueType(0),5700{AArch64::WHILELS_2PXX_B, AArch64::WHILELS_2PXX_H,5701AArch64::WHILELS_2PXX_S, AArch64::WHILELS_2PXX_D}))5702SelectWhilePair(Node, Op);5703return;5704case Intrinsic::aarch64_sve_whilelt_x2:5705if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int1>(5706Node->getValueType(0),5707{AArch64::WHILELT_2PXX_B, AArch64::WHILELT_2PXX_H,5708AArch64::WHILELT_2PXX_S, AArch64::WHILELT_2PXX_D}))5709SelectWhilePair(Node, Op);5710return;5711case Intrinsic::aarch64_sve_smax_single_x2:5712if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5713Node->getValueType(0),5714{AArch64::SMAX_VG2_2ZZ_B, AArch64::SMAX_VG2_2ZZ_H,5715AArch64::SMAX_VG2_2ZZ_S, AArch64::SMAX_VG2_2ZZ_D}))5716SelectDestructiveMultiIntrinsic(Node, 2, false, Op);5717return;5718case Intrinsic::aarch64_sve_umax_single_x2:5719if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5720Node->getValueType(0),5721{AArch64::UMAX_VG2_2ZZ_B, AArch64::UMAX_VG2_2ZZ_H,5722AArch64::UMAX_VG2_2ZZ_S, AArch64::UMAX_VG2_2ZZ_D}))5723SelectDestructiveMultiIntrinsic(Node, 2, false, Op);5724return;5725case Intrinsic::aarch64_sve_fmax_single_x2:5726if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5727Node->getValueType(0),5728{AArch64::BFMAX_VG2_2ZZ_H, AArch64::FMAX_VG2_2ZZ_H,5729AArch64::FMAX_VG2_2ZZ_S, AArch64::FMAX_VG2_2ZZ_D}))5730SelectDestructiveMultiIntrinsic(Node, 2, false, Op);5731return;5732case Intrinsic::aarch64_sve_smax_single_x4:5733if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5734Node->getValueType(0),5735{AArch64::SMAX_VG4_4ZZ_B, AArch64::SMAX_VG4_4ZZ_H,5736AArch64::SMAX_VG4_4ZZ_S, AArch64::SMAX_VG4_4ZZ_D}))5737SelectDestructiveMultiIntrinsic(Node, 4, false, Op);5738return;5739case Intrinsic::aarch64_sve_umax_single_x4:5740if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5741Node->getValueType(0),5742{AArch64::UMAX_VG4_4ZZ_B, AArch64::UMAX_VG4_4ZZ_H,5743AArch64::UMAX_VG4_4ZZ_S, AArch64::UMAX_VG4_4ZZ_D}))5744SelectDestructiveMultiIntrinsic(Node, 4, false, Op);5745return;5746case Intrinsic::aarch64_sve_fmax_single_x4:5747if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5748Node->getValueType(0),5749{AArch64::BFMAX_VG4_4ZZ_H, AArch64::FMAX_VG4_4ZZ_H,5750AArch64::FMAX_VG4_4ZZ_S, AArch64::FMAX_VG4_4ZZ_D}))5751SelectDestructiveMultiIntrinsic(Node, 4, false, Op);5752return;5753case Intrinsic::aarch64_sve_smin_single_x2:5754if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5755Node->getValueType(0),5756{AArch64::SMIN_VG2_2ZZ_B, AArch64::SMIN_VG2_2ZZ_H,5757AArch64::SMIN_VG2_2ZZ_S, AArch64::SMIN_VG2_2ZZ_D}))5758SelectDestructiveMultiIntrinsic(Node, 2, false, Op);5759return;5760case Intrinsic::aarch64_sve_umin_single_x2:5761if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5762Node->getValueType(0),5763{AArch64::UMIN_VG2_2ZZ_B, AArch64::UMIN_VG2_2ZZ_H,5764AArch64::UMIN_VG2_2ZZ_S, AArch64::UMIN_VG2_2ZZ_D}))5765SelectDestructiveMultiIntrinsic(Node, 2, false, Op);5766return;5767case Intrinsic::aarch64_sve_fmin_single_x2:5768if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5769Node->getValueType(0),5770{AArch64::BFMIN_VG2_2ZZ_H, AArch64::FMIN_VG2_2ZZ_H,5771AArch64::FMIN_VG2_2ZZ_S, AArch64::FMIN_VG2_2ZZ_D}))5772SelectDestructiveMultiIntrinsic(Node, 2, false, Op);5773return;5774case Intrinsic::aarch64_sve_smin_single_x4:5775if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5776Node->getValueType(0),5777{AArch64::SMIN_VG4_4ZZ_B, AArch64::SMIN_VG4_4ZZ_H,5778AArch64::SMIN_VG4_4ZZ_S, AArch64::SMIN_VG4_4ZZ_D}))5779SelectDestructiveMultiIntrinsic(Node, 4, false, Op);5780return;5781case Intrinsic::aarch64_sve_umin_single_x4:5782if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5783Node->getValueType(0),5784{AArch64::UMIN_VG4_4ZZ_B, AArch64::UMIN_VG4_4ZZ_H,5785AArch64::UMIN_VG4_4ZZ_S, AArch64::UMIN_VG4_4ZZ_D}))5786SelectDestructiveMultiIntrinsic(Node, 4, false, Op);5787return;5788case Intrinsic::aarch64_sve_fmin_single_x4:5789if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5790Node->getValueType(0),5791{AArch64::BFMIN_VG4_4ZZ_H, AArch64::FMIN_VG4_4ZZ_H,5792AArch64::FMIN_VG4_4ZZ_S, AArch64::FMIN_VG4_4ZZ_D}))5793SelectDestructiveMultiIntrinsic(Node, 4, false, Op);5794return;5795case Intrinsic::aarch64_sve_smax_x2:5796if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5797Node->getValueType(0),5798{AArch64::SMAX_VG2_2Z2Z_B, AArch64::SMAX_VG2_2Z2Z_H,5799AArch64::SMAX_VG2_2Z2Z_S, AArch64::SMAX_VG2_2Z2Z_D}))5800SelectDestructiveMultiIntrinsic(Node, 2, true, Op);5801return;5802case Intrinsic::aarch64_sve_umax_x2:5803if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5804Node->getValueType(0),5805{AArch64::UMAX_VG2_2Z2Z_B, AArch64::UMAX_VG2_2Z2Z_H,5806AArch64::UMAX_VG2_2Z2Z_S, AArch64::UMAX_VG2_2Z2Z_D}))5807SelectDestructiveMultiIntrinsic(Node, 2, true, Op);5808return;5809case Intrinsic::aarch64_sve_fmax_x2:5810if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5811Node->getValueType(0),5812{AArch64::BFMAX_VG2_2Z2Z_H, AArch64::FMAX_VG2_2Z2Z_H,5813AArch64::FMAX_VG2_2Z2Z_S, AArch64::FMAX_VG2_2Z2Z_D}))5814SelectDestructiveMultiIntrinsic(Node, 2, true, Op);5815return;5816case Intrinsic::aarch64_sve_smax_x4:5817if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5818Node->getValueType(0),5819{AArch64::SMAX_VG4_4Z4Z_B, AArch64::SMAX_VG4_4Z4Z_H,5820AArch64::SMAX_VG4_4Z4Z_S, AArch64::SMAX_VG4_4Z4Z_D}))5821SelectDestructiveMultiIntrinsic(Node, 4, true, Op);5822return;5823case Intrinsic::aarch64_sve_umax_x4:5824if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5825Node->getValueType(0),5826{AArch64::UMAX_VG4_4Z4Z_B, AArch64::UMAX_VG4_4Z4Z_H,5827AArch64::UMAX_VG4_4Z4Z_S, AArch64::UMAX_VG4_4Z4Z_D}))5828SelectDestructiveMultiIntrinsic(Node, 4, true, Op);5829return;5830case Intrinsic::aarch64_sve_fmax_x4:5831if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5832Node->getValueType(0),5833{AArch64::BFMAX_VG4_4Z2Z_H, AArch64::FMAX_VG4_4Z4Z_H,5834AArch64::FMAX_VG4_4Z4Z_S, AArch64::FMAX_VG4_4Z4Z_D}))5835SelectDestructiveMultiIntrinsic(Node, 4, true, Op);5836return;5837case Intrinsic::aarch64_sve_smin_x2:5838if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5839Node->getValueType(0),5840{AArch64::SMIN_VG2_2Z2Z_B, AArch64::SMIN_VG2_2Z2Z_H,5841AArch64::SMIN_VG2_2Z2Z_S, AArch64::SMIN_VG2_2Z2Z_D}))5842SelectDestructiveMultiIntrinsic(Node, 2, true, Op);5843return;5844case Intrinsic::aarch64_sve_umin_x2:5845if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5846Node->getValueType(0),5847{AArch64::UMIN_VG2_2Z2Z_B, AArch64::UMIN_VG2_2Z2Z_H,5848AArch64::UMIN_VG2_2Z2Z_S, AArch64::UMIN_VG2_2Z2Z_D}))5849SelectDestructiveMultiIntrinsic(Node, 2, true, Op);5850return;5851case Intrinsic::aarch64_sve_fmin_x2:5852if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5853Node->getValueType(0),5854{AArch64::BFMIN_VG2_2Z2Z_H, AArch64::FMIN_VG2_2Z2Z_H,5855AArch64::FMIN_VG2_2Z2Z_S, AArch64::FMIN_VG2_2Z2Z_D}))5856SelectDestructiveMultiIntrinsic(Node, 2, true, Op);5857return;5858case Intrinsic::aarch64_sve_smin_x4:5859if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5860Node->getValueType(0),5861{AArch64::SMIN_VG4_4Z4Z_B, AArch64::SMIN_VG4_4Z4Z_H,5862AArch64::SMIN_VG4_4Z4Z_S, AArch64::SMIN_VG4_4Z4Z_D}))5863SelectDestructiveMultiIntrinsic(Node, 4, true, Op);5864return;5865case Intrinsic::aarch64_sve_umin_x4:5866if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5867Node->getValueType(0),5868{AArch64::UMIN_VG4_4Z4Z_B, AArch64::UMIN_VG4_4Z4Z_H,5869AArch64::UMIN_VG4_4Z4Z_S, AArch64::UMIN_VG4_4Z4Z_D}))5870SelectDestructiveMultiIntrinsic(Node, 4, true, Op);5871return;5872case Intrinsic::aarch64_sve_fmin_x4:5873if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5874Node->getValueType(0),5875{AArch64::BFMIN_VG4_4Z2Z_H, AArch64::FMIN_VG4_4Z4Z_H,5876AArch64::FMIN_VG4_4Z4Z_S, AArch64::FMIN_VG4_4Z4Z_D}))5877SelectDestructiveMultiIntrinsic(Node, 4, true, Op);5878return;5879case Intrinsic::aarch64_sve_fmaxnm_single_x2 :5880if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5881Node->getValueType(0),5882{AArch64::BFMAXNM_VG2_2ZZ_H, AArch64::FMAXNM_VG2_2ZZ_H,5883AArch64::FMAXNM_VG2_2ZZ_S, AArch64::FMAXNM_VG2_2ZZ_D}))5884SelectDestructiveMultiIntrinsic(Node, 2, false, Op);5885return;5886case Intrinsic::aarch64_sve_fmaxnm_single_x4 :5887if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5888Node->getValueType(0),5889{AArch64::BFMAXNM_VG4_4ZZ_H, AArch64::FMAXNM_VG4_4ZZ_H,5890AArch64::FMAXNM_VG4_4ZZ_S, AArch64::FMAXNM_VG4_4ZZ_D}))5891SelectDestructiveMultiIntrinsic(Node, 4, false, Op);5892return;5893case Intrinsic::aarch64_sve_fminnm_single_x2:5894if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5895Node->getValueType(0),5896{AArch64::BFMINNM_VG2_2ZZ_H, AArch64::FMINNM_VG2_2ZZ_H,5897AArch64::FMINNM_VG2_2ZZ_S, AArch64::FMINNM_VG2_2ZZ_D}))5898SelectDestructiveMultiIntrinsic(Node, 2, false, Op);5899return;5900case Intrinsic::aarch64_sve_fminnm_single_x4:5901if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5902Node->getValueType(0),5903{AArch64::BFMINNM_VG4_4ZZ_H, AArch64::FMINNM_VG4_4ZZ_H,5904AArch64::FMINNM_VG4_4ZZ_S, AArch64::FMINNM_VG4_4ZZ_D}))5905SelectDestructiveMultiIntrinsic(Node, 4, false, Op);5906return;5907case Intrinsic::aarch64_sve_fmaxnm_x2:5908if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5909Node->getValueType(0),5910{AArch64::BFMAXNM_VG2_2Z2Z_H, AArch64::FMAXNM_VG2_2Z2Z_H,5911AArch64::FMAXNM_VG2_2Z2Z_S, AArch64::FMAXNM_VG2_2Z2Z_D}))5912SelectDestructiveMultiIntrinsic(Node, 2, true, Op);5913return;5914case Intrinsic::aarch64_sve_fmaxnm_x4:5915if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5916Node->getValueType(0),5917{AArch64::BFMAXNM_VG4_4Z2Z_H, AArch64::FMAXNM_VG4_4Z4Z_H,5918AArch64::FMAXNM_VG4_4Z4Z_S, AArch64::FMAXNM_VG4_4Z4Z_D}))5919SelectDestructiveMultiIntrinsic(Node, 4, true, Op);5920return;5921case Intrinsic::aarch64_sve_fminnm_x2:5922if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5923Node->getValueType(0),5924{AArch64::BFMINNM_VG2_2Z2Z_H, AArch64::FMINNM_VG2_2Z2Z_H,5925AArch64::FMINNM_VG2_2Z2Z_S, AArch64::FMINNM_VG2_2Z2Z_D}))5926SelectDestructiveMultiIntrinsic(Node, 2, true, Op);5927return;5928case Intrinsic::aarch64_sve_fminnm_x4:5929if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5930Node->getValueType(0),5931{AArch64::BFMINNM_VG4_4Z2Z_H, AArch64::FMINNM_VG4_4Z4Z_H,5932AArch64::FMINNM_VG4_4Z4Z_S, AArch64::FMINNM_VG4_4Z4Z_D}))5933SelectDestructiveMultiIntrinsic(Node, 4, true, Op);5934return;5935case Intrinsic::aarch64_sve_fcvtzs_x2:5936SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS);5937return;5938case Intrinsic::aarch64_sve_scvtf_x2:5939SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS);5940return;5941case Intrinsic::aarch64_sve_fcvtzu_x2:5942SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS);5943return;5944case Intrinsic::aarch64_sve_ucvtf_x2:5945SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS);5946return;5947case Intrinsic::aarch64_sve_fcvtzs_x4:5948SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS);5949return;5950case Intrinsic::aarch64_sve_scvtf_x4:5951SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS);5952return;5953case Intrinsic::aarch64_sve_fcvtzu_x4:5954SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS);5955return;5956case Intrinsic::aarch64_sve_ucvtf_x4:5957SelectCVTIntrinsic(Node, 4, AArch64::UCVTF_4Z4Z_StoS);5958return;5959case Intrinsic::aarch64_sve_fcvt_widen_x2:5960SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVT_2ZZ_H_S);5961return;5962case Intrinsic::aarch64_sve_fcvtl_widen_x2:5963SelectUnaryMultiIntrinsic(Node, 2, false, AArch64::FCVTL_2ZZ_H_S);5964return;5965case Intrinsic::aarch64_sve_sclamp_single_x2:5966if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5967Node->getValueType(0),5968{AArch64::SCLAMP_VG2_2Z2Z_B, AArch64::SCLAMP_VG2_2Z2Z_H,5969AArch64::SCLAMP_VG2_2Z2Z_S, AArch64::SCLAMP_VG2_2Z2Z_D}))5970SelectClamp(Node, 2, Op);5971return;5972case Intrinsic::aarch64_sve_uclamp_single_x2:5973if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5974Node->getValueType(0),5975{AArch64::UCLAMP_VG2_2Z2Z_B, AArch64::UCLAMP_VG2_2Z2Z_H,5976AArch64::UCLAMP_VG2_2Z2Z_S, AArch64::UCLAMP_VG2_2Z2Z_D}))5977SelectClamp(Node, 2, Op);5978return;5979case Intrinsic::aarch64_sve_fclamp_single_x2:5980if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(5981Node->getValueType(0),5982{0, AArch64::FCLAMP_VG2_2Z2Z_H, AArch64::FCLAMP_VG2_2Z2Z_S,5983AArch64::FCLAMP_VG2_2Z2Z_D}))5984SelectClamp(Node, 2, Op);5985return;5986case Intrinsic::aarch64_sve_bfclamp_single_x2:5987SelectClamp(Node, 2, AArch64::BFCLAMP_VG2_2ZZZ_H);5988return;5989case Intrinsic::aarch64_sve_sclamp_single_x4:5990if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5991Node->getValueType(0),5992{AArch64::SCLAMP_VG4_4Z4Z_B, AArch64::SCLAMP_VG4_4Z4Z_H,5993AArch64::SCLAMP_VG4_4Z4Z_S, AArch64::SCLAMP_VG4_4Z4Z_D}))5994SelectClamp(Node, 4, Op);5995return;5996case Intrinsic::aarch64_sve_uclamp_single_x4:5997if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(5998Node->getValueType(0),5999{AArch64::UCLAMP_VG4_4Z4Z_B, AArch64::UCLAMP_VG4_4Z4Z_H,6000AArch64::UCLAMP_VG4_4Z4Z_S, AArch64::UCLAMP_VG4_4Z4Z_D}))6001SelectClamp(Node, 4, Op);6002return;6003case Intrinsic::aarch64_sve_fclamp_single_x4:6004if (auto Op = SelectOpcodeFromVT<SelectTypeKind::FP>(6005Node->getValueType(0),6006{0, AArch64::FCLAMP_VG4_4Z4Z_H, AArch64::FCLAMP_VG4_4Z4Z_S,6007AArch64::FCLAMP_VG4_4Z4Z_D}))6008SelectClamp(Node, 4, Op);6009return;6010case Intrinsic::aarch64_sve_bfclamp_single_x4:6011SelectClamp(Node, 4, AArch64::BFCLAMP_VG4_4ZZZ_H);6012return;6013case Intrinsic::aarch64_sve_add_single_x2:6014if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(6015Node->getValueType(0),6016{AArch64::ADD_VG2_2ZZ_B, AArch64::ADD_VG2_2ZZ_H,6017AArch64::ADD_VG2_2ZZ_S, AArch64::ADD_VG2_2ZZ_D}))6018SelectDestructiveMultiIntrinsic(Node, 2, false, Op);6019return;6020case Intrinsic::aarch64_sve_add_single_x4:6021if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(6022Node->getValueType(0),6023{AArch64::ADD_VG4_4ZZ_B, AArch64::ADD_VG4_4ZZ_H,6024AArch64::ADD_VG4_4ZZ_S, AArch64::ADD_VG4_4ZZ_D}))6025SelectDestructiveMultiIntrinsic(Node, 4, false, Op);6026return;6027case Intrinsic::aarch64_sve_zip_x2:6028if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(6029Node->getValueType(0),6030{AArch64::ZIP_VG2_2ZZZ_B, AArch64::ZIP_VG2_2ZZZ_H,6031AArch64::ZIP_VG2_2ZZZ_S, AArch64::ZIP_VG2_2ZZZ_D}))6032SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);6033return;6034case Intrinsic::aarch64_sve_zipq_x2:6035SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,6036AArch64::ZIP_VG2_2ZZZ_Q);6037return;6038case Intrinsic::aarch64_sve_zip_x4:6039if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(6040Node->getValueType(0),6041{AArch64::ZIP_VG4_4Z4Z_B, AArch64::ZIP_VG4_4Z4Z_H,6042AArch64::ZIP_VG4_4Z4Z_S, AArch64::ZIP_VG4_4Z4Z_D}))6043SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);6044return;6045case Intrinsic::aarch64_sve_zipq_x4:6046SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,6047AArch64::ZIP_VG4_4Z4Z_Q);6048return;6049case Intrinsic::aarch64_sve_uzp_x2:6050if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(6051Node->getValueType(0),6052{AArch64::UZP_VG2_2ZZZ_B, AArch64::UZP_VG2_2ZZZ_H,6053AArch64::UZP_VG2_2ZZZ_S, AArch64::UZP_VG2_2ZZZ_D}))6054SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);6055return;6056case Intrinsic::aarch64_sve_uzpq_x2:6057SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false,6058AArch64::UZP_VG2_2ZZZ_Q);6059return;6060case Intrinsic::aarch64_sve_uzp_x4:6061if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(6062Node->getValueType(0),6063{AArch64::UZP_VG4_4Z4Z_B, AArch64::UZP_VG4_4Z4Z_H,6064AArch64::UZP_VG4_4Z4Z_S, AArch64::UZP_VG4_4Z4Z_D}))6065SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);6066return;6067case Intrinsic::aarch64_sve_uzpq_x4:6068SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true,6069AArch64::UZP_VG4_4Z4Z_Q);6070return;6071case Intrinsic::aarch64_sve_sel_x2:6072if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(6073Node->getValueType(0),6074{AArch64::SEL_VG2_2ZC2Z2Z_B, AArch64::SEL_VG2_2ZC2Z2Z_H,6075AArch64::SEL_VG2_2ZC2Z2Z_S, AArch64::SEL_VG2_2ZC2Z2Z_D}))6076SelectDestructiveMultiIntrinsic(Node, 2, true, Op, /*HasPred=*/true);6077return;6078case Intrinsic::aarch64_sve_sel_x4:6079if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(6080Node->getValueType(0),6081{AArch64::SEL_VG4_4ZC4Z4Z_B, AArch64::SEL_VG4_4ZC4Z4Z_H,6082AArch64::SEL_VG4_4ZC4Z4Z_S, AArch64::SEL_VG4_4ZC4Z4Z_D}))6083SelectDestructiveMultiIntrinsic(Node, 4, true, Op, /*HasPred=*/true);6084return;6085case Intrinsic::aarch64_sve_frinta_x2:6086SelectFrintFromVT(Node, 2, AArch64::FRINTA_2Z2Z_S);6087return;6088case Intrinsic::aarch64_sve_frinta_x4:6089SelectFrintFromVT(Node, 4, AArch64::FRINTA_4Z4Z_S);6090return;6091case Intrinsic::aarch64_sve_frintm_x2:6092SelectFrintFromVT(Node, 2, AArch64::FRINTM_2Z2Z_S);6093return;6094case Intrinsic::aarch64_sve_frintm_x4:6095SelectFrintFromVT(Node, 4, AArch64::FRINTM_4Z4Z_S);6096return;6097case Intrinsic::aarch64_sve_frintn_x2:6098SelectFrintFromVT(Node, 2, AArch64::FRINTN_2Z2Z_S);6099return;6100case Intrinsic::aarch64_sve_frintn_x4:6101SelectFrintFromVT(Node, 4, AArch64::FRINTN_4Z4Z_S);6102return;6103case Intrinsic::aarch64_sve_frintp_x2:6104SelectFrintFromVT(Node, 2, AArch64::FRINTP_2Z2Z_S);6105return;6106case Intrinsic::aarch64_sve_frintp_x4:6107SelectFrintFromVT(Node, 4, AArch64::FRINTP_4Z4Z_S);6108return;6109case Intrinsic::aarch64_sve_sunpk_x2:6110if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(6111Node->getValueType(0),6112{0, AArch64::SUNPK_VG2_2ZZ_H, AArch64::SUNPK_VG2_2ZZ_S,6113AArch64::SUNPK_VG2_2ZZ_D}))6114SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);6115return;6116case Intrinsic::aarch64_sve_uunpk_x2:6117if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(6118Node->getValueType(0),6119{0, AArch64::UUNPK_VG2_2ZZ_H, AArch64::UUNPK_VG2_2ZZ_S,6120AArch64::UUNPK_VG2_2ZZ_D}))6121SelectUnaryMultiIntrinsic(Node, 2, /*IsTupleInput=*/false, Op);6122return;6123case Intrinsic::aarch64_sve_sunpk_x4:6124if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(6125Node->getValueType(0),6126{0, AArch64::SUNPK_VG4_4Z2Z_H, AArch64::SUNPK_VG4_4Z2Z_S,6127AArch64::SUNPK_VG4_4Z2Z_D}))6128SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);6129return;6130case Intrinsic::aarch64_sve_uunpk_x4:6131if (auto Op = SelectOpcodeFromVT<SelectTypeKind::Int>(6132Node->getValueType(0),6133{0, AArch64::UUNPK_VG4_4Z2Z_H, AArch64::UUNPK_VG4_4Z2Z_S,6134AArch64::UUNPK_VG4_4Z2Z_D}))6135SelectUnaryMultiIntrinsic(Node, 4, /*IsTupleInput=*/true, Op);6136return;6137case Intrinsic::aarch64_sve_pext_x2: {6138if (auto Op = SelectOpcodeFromVT<SelectTypeKind::AnyType>(6139Node->getValueType(0),6140{AArch64::PEXT_2PCI_B, AArch64::PEXT_2PCI_H, AArch64::PEXT_2PCI_S,6141AArch64::PEXT_2PCI_D}))6142SelectPExtPair(Node, Op);6143return;6144}6145}6146break;6147}6148case ISD::INTRINSIC_VOID: {6149unsigned IntNo = Node->getConstantOperandVal(1);6150if (Node->getNumOperands() >= 3)6151VT = Node->getOperand(2)->getValueType(0);6152switch (IntNo) {6153default:6154break;6155case Intrinsic::aarch64_neon_st1x2: {6156if (VT == MVT::v8i8) {6157SelectStore(Node, 2, AArch64::ST1Twov8b);6158return;6159} else if (VT == MVT::v16i8) {6160SelectStore(Node, 2, AArch64::ST1Twov16b);6161return;6162} else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||6163VT == MVT::v4bf16) {6164SelectStore(Node, 2, AArch64::ST1Twov4h);6165return;6166} else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||6167VT == MVT::v8bf16) {6168SelectStore(Node, 2, AArch64::ST1Twov8h);6169return;6170} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6171SelectStore(Node, 2, AArch64::ST1Twov2s);6172return;6173} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6174SelectStore(Node, 2, AArch64::ST1Twov4s);6175return;6176} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6177SelectStore(Node, 2, AArch64::ST1Twov2d);6178return;6179} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6180SelectStore(Node, 2, AArch64::ST1Twov1d);6181return;6182}6183break;6184}6185case Intrinsic::aarch64_neon_st1x3: {6186if (VT == MVT::v8i8) {6187SelectStore(Node, 3, AArch64::ST1Threev8b);6188return;6189} else if (VT == MVT::v16i8) {6190SelectStore(Node, 3, AArch64::ST1Threev16b);6191return;6192} else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||6193VT == MVT::v4bf16) {6194SelectStore(Node, 3, AArch64::ST1Threev4h);6195return;6196} else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||6197VT == MVT::v8bf16) {6198SelectStore(Node, 3, AArch64::ST1Threev8h);6199return;6200} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6201SelectStore(Node, 3, AArch64::ST1Threev2s);6202return;6203} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6204SelectStore(Node, 3, AArch64::ST1Threev4s);6205return;6206} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6207SelectStore(Node, 3, AArch64::ST1Threev2d);6208return;6209} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6210SelectStore(Node, 3, AArch64::ST1Threev1d);6211return;6212}6213break;6214}6215case Intrinsic::aarch64_neon_st1x4: {6216if (VT == MVT::v8i8) {6217SelectStore(Node, 4, AArch64::ST1Fourv8b);6218return;6219} else if (VT == MVT::v16i8) {6220SelectStore(Node, 4, AArch64::ST1Fourv16b);6221return;6222} else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||6223VT == MVT::v4bf16) {6224SelectStore(Node, 4, AArch64::ST1Fourv4h);6225return;6226} else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||6227VT == MVT::v8bf16) {6228SelectStore(Node, 4, AArch64::ST1Fourv8h);6229return;6230} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6231SelectStore(Node, 4, AArch64::ST1Fourv2s);6232return;6233} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6234SelectStore(Node, 4, AArch64::ST1Fourv4s);6235return;6236} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6237SelectStore(Node, 4, AArch64::ST1Fourv2d);6238return;6239} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6240SelectStore(Node, 4, AArch64::ST1Fourv1d);6241return;6242}6243break;6244}6245case Intrinsic::aarch64_neon_st2: {6246if (VT == MVT::v8i8) {6247SelectStore(Node, 2, AArch64::ST2Twov8b);6248return;6249} else if (VT == MVT::v16i8) {6250SelectStore(Node, 2, AArch64::ST2Twov16b);6251return;6252} else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||6253VT == MVT::v4bf16) {6254SelectStore(Node, 2, AArch64::ST2Twov4h);6255return;6256} else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||6257VT == MVT::v8bf16) {6258SelectStore(Node, 2, AArch64::ST2Twov8h);6259return;6260} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6261SelectStore(Node, 2, AArch64::ST2Twov2s);6262return;6263} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6264SelectStore(Node, 2, AArch64::ST2Twov4s);6265return;6266} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6267SelectStore(Node, 2, AArch64::ST2Twov2d);6268return;6269} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6270SelectStore(Node, 2, AArch64::ST1Twov1d);6271return;6272}6273break;6274}6275case Intrinsic::aarch64_neon_st3: {6276if (VT == MVT::v8i8) {6277SelectStore(Node, 3, AArch64::ST3Threev8b);6278return;6279} else if (VT == MVT::v16i8) {6280SelectStore(Node, 3, AArch64::ST3Threev16b);6281return;6282} else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||6283VT == MVT::v4bf16) {6284SelectStore(Node, 3, AArch64::ST3Threev4h);6285return;6286} else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||6287VT == MVT::v8bf16) {6288SelectStore(Node, 3, AArch64::ST3Threev8h);6289return;6290} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6291SelectStore(Node, 3, AArch64::ST3Threev2s);6292return;6293} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6294SelectStore(Node, 3, AArch64::ST3Threev4s);6295return;6296} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6297SelectStore(Node, 3, AArch64::ST3Threev2d);6298return;6299} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6300SelectStore(Node, 3, AArch64::ST1Threev1d);6301return;6302}6303break;6304}6305case Intrinsic::aarch64_neon_st4: {6306if (VT == MVT::v8i8) {6307SelectStore(Node, 4, AArch64::ST4Fourv8b);6308return;6309} else if (VT == MVT::v16i8) {6310SelectStore(Node, 4, AArch64::ST4Fourv16b);6311return;6312} else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||6313VT == MVT::v4bf16) {6314SelectStore(Node, 4, AArch64::ST4Fourv4h);6315return;6316} else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||6317VT == MVT::v8bf16) {6318SelectStore(Node, 4, AArch64::ST4Fourv8h);6319return;6320} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6321SelectStore(Node, 4, AArch64::ST4Fourv2s);6322return;6323} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6324SelectStore(Node, 4, AArch64::ST4Fourv4s);6325return;6326} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6327SelectStore(Node, 4, AArch64::ST4Fourv2d);6328return;6329} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6330SelectStore(Node, 4, AArch64::ST1Fourv1d);6331return;6332}6333break;6334}6335case Intrinsic::aarch64_neon_st2lane: {6336if (VT == MVT::v16i8 || VT == MVT::v8i8) {6337SelectStoreLane(Node, 2, AArch64::ST2i8);6338return;6339} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||6340VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {6341SelectStoreLane(Node, 2, AArch64::ST2i16);6342return;6343} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||6344VT == MVT::v2f32) {6345SelectStoreLane(Node, 2, AArch64::ST2i32);6346return;6347} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||6348VT == MVT::v1f64) {6349SelectStoreLane(Node, 2, AArch64::ST2i64);6350return;6351}6352break;6353}6354case Intrinsic::aarch64_neon_st3lane: {6355if (VT == MVT::v16i8 || VT == MVT::v8i8) {6356SelectStoreLane(Node, 3, AArch64::ST3i8);6357return;6358} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||6359VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {6360SelectStoreLane(Node, 3, AArch64::ST3i16);6361return;6362} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||6363VT == MVT::v2f32) {6364SelectStoreLane(Node, 3, AArch64::ST3i32);6365return;6366} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||6367VT == MVT::v1f64) {6368SelectStoreLane(Node, 3, AArch64::ST3i64);6369return;6370}6371break;6372}6373case Intrinsic::aarch64_neon_st4lane: {6374if (VT == MVT::v16i8 || VT == MVT::v8i8) {6375SelectStoreLane(Node, 4, AArch64::ST4i8);6376return;6377} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||6378VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {6379SelectStoreLane(Node, 4, AArch64::ST4i16);6380return;6381} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||6382VT == MVT::v2f32) {6383SelectStoreLane(Node, 4, AArch64::ST4i32);6384return;6385} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||6386VT == MVT::v1f64) {6387SelectStoreLane(Node, 4, AArch64::ST4i64);6388return;6389}6390break;6391}6392case Intrinsic::aarch64_sve_st2q: {6393SelectPredicatedStore(Node, 2, 4, AArch64::ST2Q, AArch64::ST2Q_IMM);6394return;6395}6396case Intrinsic::aarch64_sve_st3q: {6397SelectPredicatedStore(Node, 3, 4, AArch64::ST3Q, AArch64::ST3Q_IMM);6398return;6399}6400case Intrinsic::aarch64_sve_st4q: {6401SelectPredicatedStore(Node, 4, 4, AArch64::ST4Q, AArch64::ST4Q_IMM);6402return;6403}6404case Intrinsic::aarch64_sve_st2: {6405if (VT == MVT::nxv16i8) {6406SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);6407return;6408} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||6409VT == MVT::nxv8bf16) {6410SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);6411return;6412} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {6413SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);6414return;6415} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {6416SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);6417return;6418}6419break;6420}6421case Intrinsic::aarch64_sve_st3: {6422if (VT == MVT::nxv16i8) {6423SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);6424return;6425} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||6426VT == MVT::nxv8bf16) {6427SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);6428return;6429} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {6430SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);6431return;6432} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {6433SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);6434return;6435}6436break;6437}6438case Intrinsic::aarch64_sve_st4: {6439if (VT == MVT::nxv16i8) {6440SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);6441return;6442} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||6443VT == MVT::nxv8bf16) {6444SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);6445return;6446} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {6447SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);6448return;6449} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {6450SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);6451return;6452}6453break;6454}6455}6456break;6457}6458case AArch64ISD::LD2post: {6459if (VT == MVT::v8i8) {6460SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);6461return;6462} else if (VT == MVT::v16i8) {6463SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);6464return;6465} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6466SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);6467return;6468} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6469SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);6470return;6471} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6472SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);6473return;6474} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6475SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);6476return;6477} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6478SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);6479return;6480} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6481SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);6482return;6483}6484break;6485}6486case AArch64ISD::LD3post: {6487if (VT == MVT::v8i8) {6488SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);6489return;6490} else if (VT == MVT::v16i8) {6491SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);6492return;6493} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6494SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);6495return;6496} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6497SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);6498return;6499} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6500SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);6501return;6502} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6503SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);6504return;6505} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6506SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);6507return;6508} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6509SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);6510return;6511}6512break;6513}6514case AArch64ISD::LD4post: {6515if (VT == MVT::v8i8) {6516SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);6517return;6518} else if (VT == MVT::v16i8) {6519SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);6520return;6521} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6522SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);6523return;6524} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6525SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);6526return;6527} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6528SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);6529return;6530} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6531SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);6532return;6533} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6534SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);6535return;6536} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6537SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);6538return;6539}6540break;6541}6542case AArch64ISD::LD1x2post: {6543if (VT == MVT::v8i8) {6544SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);6545return;6546} else if (VT == MVT::v16i8) {6547SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);6548return;6549} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6550SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);6551return;6552} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6553SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);6554return;6555} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6556SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);6557return;6558} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6559SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);6560return;6561} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6562SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);6563return;6564} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6565SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);6566return;6567}6568break;6569}6570case AArch64ISD::LD1x3post: {6571if (VT == MVT::v8i8) {6572SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);6573return;6574} else if (VT == MVT::v16i8) {6575SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);6576return;6577} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6578SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);6579return;6580} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6581SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);6582return;6583} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6584SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);6585return;6586} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6587SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);6588return;6589} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6590SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);6591return;6592} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6593SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);6594return;6595}6596break;6597}6598case AArch64ISD::LD1x4post: {6599if (VT == MVT::v8i8) {6600SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);6601return;6602} else if (VT == MVT::v16i8) {6603SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);6604return;6605} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6606SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);6607return;6608} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6609SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);6610return;6611} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6612SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);6613return;6614} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6615SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);6616return;6617} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6618SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);6619return;6620} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6621SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);6622return;6623}6624break;6625}6626case AArch64ISD::LD1DUPpost: {6627if (VT == MVT::v8i8) {6628SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);6629return;6630} else if (VT == MVT::v16i8) {6631SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);6632return;6633} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6634SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);6635return;6636} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6637SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);6638return;6639} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6640SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);6641return;6642} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6643SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);6644return;6645} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6646SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);6647return;6648} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6649SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);6650return;6651}6652break;6653}6654case AArch64ISD::LD2DUPpost: {6655if (VT == MVT::v8i8) {6656SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);6657return;6658} else if (VT == MVT::v16i8) {6659SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);6660return;6661} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6662SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);6663return;6664} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6665SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);6666return;6667} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6668SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);6669return;6670} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6671SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);6672return;6673} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6674SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);6675return;6676} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6677SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);6678return;6679}6680break;6681}6682case AArch64ISD::LD3DUPpost: {6683if (VT == MVT::v8i8) {6684SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);6685return;6686} else if (VT == MVT::v16i8) {6687SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);6688return;6689} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6690SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);6691return;6692} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6693SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);6694return;6695} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6696SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);6697return;6698} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6699SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);6700return;6701} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6702SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);6703return;6704} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6705SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);6706return;6707}6708break;6709}6710case AArch64ISD::LD4DUPpost: {6711if (VT == MVT::v8i8) {6712SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);6713return;6714} else if (VT == MVT::v16i8) {6715SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);6716return;6717} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6718SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);6719return;6720} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6721SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);6722return;6723} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6724SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);6725return;6726} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6727SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);6728return;6729} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6730SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);6731return;6732} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6733SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);6734return;6735}6736break;6737}6738case AArch64ISD::LD1LANEpost: {6739if (VT == MVT::v16i8 || VT == MVT::v8i8) {6740SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);6741return;6742} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||6743VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {6744SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);6745return;6746} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||6747VT == MVT::v2f32) {6748SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);6749return;6750} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||6751VT == MVT::v1f64) {6752SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);6753return;6754}6755break;6756}6757case AArch64ISD::LD2LANEpost: {6758if (VT == MVT::v16i8 || VT == MVT::v8i8) {6759SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);6760return;6761} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||6762VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {6763SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);6764return;6765} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||6766VT == MVT::v2f32) {6767SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);6768return;6769} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||6770VT == MVT::v1f64) {6771SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);6772return;6773}6774break;6775}6776case AArch64ISD::LD3LANEpost: {6777if (VT == MVT::v16i8 || VT == MVT::v8i8) {6778SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);6779return;6780} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||6781VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {6782SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);6783return;6784} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||6785VT == MVT::v2f32) {6786SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);6787return;6788} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||6789VT == MVT::v1f64) {6790SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);6791return;6792}6793break;6794}6795case AArch64ISD::LD4LANEpost: {6796if (VT == MVT::v16i8 || VT == MVT::v8i8) {6797SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);6798return;6799} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||6800VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {6801SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);6802return;6803} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||6804VT == MVT::v2f32) {6805SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);6806return;6807} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||6808VT == MVT::v1f64) {6809SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);6810return;6811}6812break;6813}6814case AArch64ISD::ST2post: {6815VT = Node->getOperand(1).getValueType();6816if (VT == MVT::v8i8) {6817SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);6818return;6819} else if (VT == MVT::v16i8) {6820SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);6821return;6822} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6823SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);6824return;6825} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6826SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);6827return;6828} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6829SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);6830return;6831} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6832SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);6833return;6834} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6835SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);6836return;6837} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6838SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);6839return;6840}6841break;6842}6843case AArch64ISD::ST3post: {6844VT = Node->getOperand(1).getValueType();6845if (VT == MVT::v8i8) {6846SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);6847return;6848} else if (VT == MVT::v16i8) {6849SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);6850return;6851} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6852SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);6853return;6854} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6855SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);6856return;6857} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6858SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);6859return;6860} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6861SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);6862return;6863} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6864SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);6865return;6866} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6867SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);6868return;6869}6870break;6871}6872case AArch64ISD::ST4post: {6873VT = Node->getOperand(1).getValueType();6874if (VT == MVT::v8i8) {6875SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);6876return;6877} else if (VT == MVT::v16i8) {6878SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);6879return;6880} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6881SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);6882return;6883} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6884SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);6885return;6886} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6887SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);6888return;6889} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6890SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);6891return;6892} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6893SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);6894return;6895} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6896SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);6897return;6898}6899break;6900}6901case AArch64ISD::ST1x2post: {6902VT = Node->getOperand(1).getValueType();6903if (VT == MVT::v8i8) {6904SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);6905return;6906} else if (VT == MVT::v16i8) {6907SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);6908return;6909} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6910SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);6911return;6912} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6913SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);6914return;6915} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6916SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);6917return;6918} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6919SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);6920return;6921} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6922SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);6923return;6924} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6925SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);6926return;6927}6928break;6929}6930case AArch64ISD::ST1x3post: {6931VT = Node->getOperand(1).getValueType();6932if (VT == MVT::v8i8) {6933SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);6934return;6935} else if (VT == MVT::v16i8) {6936SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);6937return;6938} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6939SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);6940return;6941} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {6942SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);6943return;6944} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6945SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);6946return;6947} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6948SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);6949return;6950} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6951SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);6952return;6953} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6954SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);6955return;6956}6957break;6958}6959case AArch64ISD::ST1x4post: {6960VT = Node->getOperand(1).getValueType();6961if (VT == MVT::v8i8) {6962SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);6963return;6964} else if (VT == MVT::v16i8) {6965SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);6966return;6967} else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {6968SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);6969return;6970} else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {6971SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);6972return;6973} else if (VT == MVT::v2i32 || VT == MVT::v2f32) {6974SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);6975return;6976} else if (VT == MVT::v4i32 || VT == MVT::v4f32) {6977SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);6978return;6979} else if (VT == MVT::v1i64 || VT == MVT::v1f64) {6980SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);6981return;6982} else if (VT == MVT::v2i64 || VT == MVT::v2f64) {6983SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);6984return;6985}6986break;6987}6988case AArch64ISD::ST2LANEpost: {6989VT = Node->getOperand(1).getValueType();6990if (VT == MVT::v16i8 || VT == MVT::v8i8) {6991SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);6992return;6993} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||6994VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {6995SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);6996return;6997} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||6998VT == MVT::v2f32) {6999SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);7000return;7001} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||7002VT == MVT::v1f64) {7003SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);7004return;7005}7006break;7007}7008case AArch64ISD::ST3LANEpost: {7009VT = Node->getOperand(1).getValueType();7010if (VT == MVT::v16i8 || VT == MVT::v8i8) {7011SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);7012return;7013} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||7014VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {7015SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);7016return;7017} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||7018VT == MVT::v2f32) {7019SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);7020return;7021} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||7022VT == MVT::v1f64) {7023SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);7024return;7025}7026break;7027}7028case AArch64ISD::ST4LANEpost: {7029VT = Node->getOperand(1).getValueType();7030if (VT == MVT::v16i8 || VT == MVT::v8i8) {7031SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);7032return;7033} else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||7034VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {7035SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);7036return;7037} else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||7038VT == MVT::v2f32) {7039SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);7040return;7041} else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||7042VT == MVT::v1f64) {7043SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);7044return;7045}7046break;7047}7048case AArch64ISD::SVE_LD2_MERGE_ZERO: {7049if (VT == MVT::nxv16i8) {7050SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);7051return;7052} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||7053VT == MVT::nxv8bf16) {7054SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);7055return;7056} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {7057SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);7058return;7059} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {7060SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);7061return;7062}7063break;7064}7065case AArch64ISD::SVE_LD3_MERGE_ZERO: {7066if (VT == MVT::nxv16i8) {7067SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);7068return;7069} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||7070VT == MVT::nxv8bf16) {7071SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);7072return;7073} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {7074SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);7075return;7076} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {7077SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);7078return;7079}7080break;7081}7082case AArch64ISD::SVE_LD4_MERGE_ZERO: {7083if (VT == MVT::nxv16i8) {7084SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);7085return;7086} else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||7087VT == MVT::nxv8bf16) {7088SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);7089return;7090} else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {7091SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);7092return;7093} else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {7094SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);7095return;7096}7097break;7098}7099}71007101// Select the default instruction7102SelectCode(Node);7103}71047105/// createAArch64ISelDag - This pass converts a legalized DAG into a7106/// AArch64-specific DAG, ready for instruction scheduling.7107FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,7108CodeGenOptLevel OptLevel) {7109return new AArch64DAGToDAGISelLegacy(TM, OptLevel);7110}71117112/// When \p PredVT is a scalable vector predicate in the form7113/// MVT::nx<M>xi1, it builds the correspondent scalable vector of7114/// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting7115/// structured vectors (NumVec >1), the output data type is7116/// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input7117/// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid7118/// EVT.7119static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,7120unsigned NumVec) {7121assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");7122if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)7123return EVT();71247125if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&7126PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)7127return EVT();71287129ElementCount EC = PredVT.getVectorElementCount();7130EVT ScalarVT =7131EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());7132EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);71337134return MemVT;7135}71367137/// Return the EVT of the data associated to a memory operation in \p7138/// Root. If such EVT cannot be retrived, it returns an invalid EVT.7139static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {7140if (isa<MemSDNode>(Root))7141return cast<MemSDNode>(Root)->getMemoryVT();71427143if (isa<MemIntrinsicSDNode>(Root))7144return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();71457146const unsigned Opcode = Root->getOpcode();7147// For custom ISD nodes, we have to look at them individually to extract the7148// type of the data moved to/from memory.7149switch (Opcode) {7150case AArch64ISD::LD1_MERGE_ZERO:7151case AArch64ISD::LD1S_MERGE_ZERO:7152case AArch64ISD::LDNF1_MERGE_ZERO:7153case AArch64ISD::LDNF1S_MERGE_ZERO:7154return cast<VTSDNode>(Root->getOperand(3))->getVT();7155case AArch64ISD::ST1_PRED:7156return cast<VTSDNode>(Root->getOperand(4))->getVT();7157case AArch64ISD::SVE_LD2_MERGE_ZERO:7158return getPackedVectorTypeFromPredicateType(7159Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);7160case AArch64ISD::SVE_LD3_MERGE_ZERO:7161return getPackedVectorTypeFromPredicateType(7162Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);7163case AArch64ISD::SVE_LD4_MERGE_ZERO:7164return getPackedVectorTypeFromPredicateType(7165Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);7166default:7167break;7168}71697170if (Opcode != ISD::INTRINSIC_VOID && Opcode != ISD::INTRINSIC_W_CHAIN)7171return EVT();71727173switch (Root->getConstantOperandVal(1)) {7174default:7175return EVT();7176case Intrinsic::aarch64_sme_ldr:7177case Intrinsic::aarch64_sme_str:7178return MVT::nxv16i8;7179case Intrinsic::aarch64_sve_prf:7180// We are using an SVE prefetch intrinsic. Type must be inferred from the7181// width of the predicate.7182return getPackedVectorTypeFromPredicateType(7183Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);7184case Intrinsic::aarch64_sve_ld2_sret:7185case Intrinsic::aarch64_sve_ld2q_sret:7186return getPackedVectorTypeFromPredicateType(7187Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/2);7188case Intrinsic::aarch64_sve_st2q:7189return getPackedVectorTypeFromPredicateType(7190Ctx, Root->getOperand(4)->getValueType(0), /*NumVec=*/2);7191case Intrinsic::aarch64_sve_ld3_sret:7192case Intrinsic::aarch64_sve_ld3q_sret:7193return getPackedVectorTypeFromPredicateType(7194Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/3);7195case Intrinsic::aarch64_sve_st3q:7196return getPackedVectorTypeFromPredicateType(7197Ctx, Root->getOperand(5)->getValueType(0), /*NumVec=*/3);7198case Intrinsic::aarch64_sve_ld4_sret:7199case Intrinsic::aarch64_sve_ld4q_sret:7200return getPackedVectorTypeFromPredicateType(7201Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/4);7202case Intrinsic::aarch64_sve_st4q:7203return getPackedVectorTypeFromPredicateType(7204Ctx, Root->getOperand(6)->getValueType(0), /*NumVec=*/4);7205case Intrinsic::aarch64_sve_ld1udq:7206case Intrinsic::aarch64_sve_st1dq:7207return EVT(MVT::nxv1i64);7208case Intrinsic::aarch64_sve_ld1uwq:7209case Intrinsic::aarch64_sve_st1wq:7210return EVT(MVT::nxv1i32);7211}7212}72137214/// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:7215/// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max7216/// where Root is the memory access using N for its address.7217template <int64_t Min, int64_t Max>7218bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,7219SDValue &Base,7220SDValue &OffImm) {7221const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);7222const DataLayout &DL = CurDAG->getDataLayout();7223const MachineFrameInfo &MFI = MF->getFrameInfo();72247225if (N.getOpcode() == ISD::FrameIndex) {7226int FI = cast<FrameIndexSDNode>(N)->getIndex();7227// We can only encode VL scaled offsets, so only fold in frame indexes7228// referencing SVE objects.7229if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {7230Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));7231OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);7232return true;7233}72347235return false;7236}72377238if (MemVT == EVT())7239return false;72407241if (N.getOpcode() != ISD::ADD)7242return false;72437244SDValue VScale = N.getOperand(1);7245if (VScale.getOpcode() != ISD::VSCALE)7246return false;72477248TypeSize TS = MemVT.getSizeInBits();7249int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinValue()) / 8;7250int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();72517252if ((MulImm % MemWidthBytes) != 0)7253return false;72547255int64_t Offset = MulImm / MemWidthBytes;7256if (Offset < Min || Offset > Max)7257return false;72587259Base = N.getOperand(0);7260if (Base.getOpcode() == ISD::FrameIndex) {7261int FI = cast<FrameIndexSDNode>(Base)->getIndex();7262// We can only encode VL scaled offsets, so only fold in frame indexes7263// referencing SVE objects.7264if (MFI.getStackID(FI) == TargetStackID::ScalableVector)7265Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));7266}72677268OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);7269return true;7270}72717272/// Select register plus register addressing mode for SVE, with scaled7273/// offset.7274bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,7275SDValue &Base,7276SDValue &Offset) {7277if (N.getOpcode() != ISD::ADD)7278return false;72797280// Process an ADD node.7281const SDValue LHS = N.getOperand(0);7282const SDValue RHS = N.getOperand(1);72837284// 8 bit data does not come with the SHL node, so it is treated7285// separately.7286if (Scale == 0) {7287Base = LHS;7288Offset = RHS;7289return true;7290}72917292if (auto C = dyn_cast<ConstantSDNode>(RHS)) {7293int64_t ImmOff = C->getSExtValue();7294unsigned Size = 1 << Scale;72957296// To use the reg+reg addressing mode, the immediate must be a multiple of7297// the vector element's byte size.7298if (ImmOff % Size)7299return false;73007301SDLoc DL(N);7302Base = LHS;7303Offset = CurDAG->getTargetConstant(ImmOff >> Scale, DL, MVT::i64);7304SDValue Ops[] = {Offset};7305SDNode *MI = CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);7306Offset = SDValue(MI, 0);7307return true;7308}73097310// Check if the RHS is a shift node with a constant.7311if (RHS.getOpcode() != ISD::SHL)7312return false;73137314const SDValue ShiftRHS = RHS.getOperand(1);7315if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))7316if (C->getZExtValue() == Scale) {7317Base = LHS;7318Offset = RHS.getOperand(0);7319return true;7320}73217322return false;7323}73247325bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {7326const AArch64TargetLowering *TLI =7327static_cast<const AArch64TargetLowering *>(getTargetLowering());73287329return TLI->isAllActivePredicate(*CurDAG, N);7330}73317332bool AArch64DAGToDAGISel::SelectAnyPredicate(SDValue N) {7333EVT VT = N.getValueType();7334return VT.isScalableVector() && VT.getVectorElementType() == MVT::i1;7335}73367337bool AArch64DAGToDAGISel::SelectSMETileSlice(SDValue N, unsigned MaxSize,7338SDValue &Base, SDValue &Offset,7339unsigned Scale) {7340// Try to untangle an ADD node into a 'reg + offset'7341if (N.getOpcode() == ISD::ADD)7342if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) {7343int64_t ImmOff = C->getSExtValue();7344if ((ImmOff > 0 && ImmOff <= MaxSize && (ImmOff % Scale == 0))) {7345Base = N.getOperand(0);7346Offset = CurDAG->getTargetConstant(ImmOff / Scale, SDLoc(N), MVT::i64);7347return true;7348}7349}73507351// By default, just match reg + 0.7352Base = N;7353Offset = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i64);7354return true;7355}735673577358