Path: blob/main/contrib/llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp
35266 views
//===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines an instruction selector for the ARM target.9//10//===----------------------------------------------------------------------===//1112#include "ARM.h"13#include "ARMBaseInstrInfo.h"14#include "ARMTargetMachine.h"15#include "MCTargetDesc/ARMAddressingModes.h"16#include "Utils/ARMBaseInfo.h"17#include "llvm/ADT/APSInt.h"18#include "llvm/ADT/StringSwitch.h"19#include "llvm/CodeGen/MachineFrameInfo.h"20#include "llvm/CodeGen/MachineFunction.h"21#include "llvm/CodeGen/MachineInstrBuilder.h"22#include "llvm/CodeGen/MachineRegisterInfo.h"23#include "llvm/CodeGen/SelectionDAG.h"24#include "llvm/CodeGen/SelectionDAGISel.h"25#include "llvm/CodeGen/TargetLowering.h"26#include "llvm/IR/CallingConv.h"27#include "llvm/IR/Constants.h"28#include "llvm/IR/DerivedTypes.h"29#include "llvm/IR/Function.h"30#include "llvm/IR/Intrinsics.h"31#include "llvm/IR/IntrinsicsARM.h"32#include "llvm/IR/LLVMContext.h"33#include "llvm/Support/CommandLine.h"34#include "llvm/Support/Debug.h"35#include "llvm/Support/ErrorHandling.h"36#include "llvm/Target/TargetOptions.h"37#include <optional>3839using namespace llvm;4041#define DEBUG_TYPE "arm-isel"42#define PASS_NAME "ARM Instruction Selection"4344static cl::opt<bool>45DisableShifterOp("disable-shifter-op", cl::Hidden,46cl::desc("Disable isel of shifter-op"),47cl::init(false));4849//===--------------------------------------------------------------------===//50/// ARMDAGToDAGISel - ARM specific code to select ARM machine51/// instructions for SelectionDAG operations.52///53namespace {5455class ARMDAGToDAGISel : public SelectionDAGISel {56/// Subtarget - Keep a pointer to the ARMSubtarget around so that we can57/// make the right decision when generating code for different targets.58const ARMSubtarget *Subtarget;5960public:61ARMDAGToDAGISel() = delete;6263explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)64: SelectionDAGISel(tm, OptLevel) {}6566bool runOnMachineFunction(MachineFunction &MF) override {67// Reset the subtarget each time through.68Subtarget = &MF.getSubtarget<ARMSubtarget>();69SelectionDAGISel::runOnMachineFunction(MF);70return true;71}7273void PreprocessISelDAG() override;7475/// getI32Imm - Return a target constant of type i32 with the specified76/// value.77inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {78return CurDAG->getTargetConstant(Imm, dl, MVT::i32);79}8081void Select(SDNode *N) override;8283/// Return true as some complex patterns, like those that call84/// canExtractShiftFromMul can modify the DAG inplace.85bool ComplexPatternFuncMutatesDAG() const override { return true; }8687bool hasNoVMLxHazardUse(SDNode *N) const;88bool isShifterOpProfitable(const SDValue &Shift,89ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);90bool SelectRegShifterOperand(SDValue N, SDValue &A,91SDValue &B, SDValue &C,92bool CheckProfitability = true);93bool SelectImmShifterOperand(SDValue N, SDValue &A,94SDValue &B, bool CheckProfitability = true);95bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,96SDValue &C) {97// Don't apply the profitability check98return SelectRegShifterOperand(N, A, B, C, false);99}100bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {101// Don't apply the profitability check102return SelectImmShifterOperand(N, A, B, false);103}104bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {105if (!N.hasOneUse())106return false;107return SelectImmShifterOperand(N, A, B, false);108}109110bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);111112bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);113bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);114115bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) {116const ConstantSDNode *CN = cast<ConstantSDNode>(N);117Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32);118Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32);119return true;120}121122bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,123SDValue &Offset, SDValue &Opc);124bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,125SDValue &Offset, SDValue &Opc);126bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,127SDValue &Offset, SDValue &Opc);128bool SelectAddrOffsetNone(SDValue N, SDValue &Base);129bool SelectAddrMode3(SDValue N, SDValue &Base,130SDValue &Offset, SDValue &Opc);131bool SelectAddrMode3Offset(SDNode *Op, SDValue N,132SDValue &Offset, SDValue &Opc);133bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);134bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);135bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);136bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);137bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);138139bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);140141// Thumb Addressing Modes:142bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);143bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);144bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,145SDValue &OffImm);146bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,147SDValue &OffImm);148bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,149SDValue &OffImm);150bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,151SDValue &OffImm);152bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);153template <unsigned Shift>154bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);155156// Thumb 2 Addressing Modes:157bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);158template <unsigned Shift>159bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);160bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,161SDValue &OffImm);162bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,163SDValue &OffImm);164template <unsigned Shift>165bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);166bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,167unsigned Shift);168template <unsigned Shift>169bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);170bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,171SDValue &OffReg, SDValue &ShImm);172bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);173174template<int Min, int Max>175bool SelectImmediateInRange(SDValue N, SDValue &OffImm);176177inline bool is_so_imm(unsigned Imm) const {178return ARM_AM::getSOImmVal(Imm) != -1;179}180181inline bool is_so_imm_not(unsigned Imm) const {182return ARM_AM::getSOImmVal(~Imm) != -1;183}184185inline bool is_t2_so_imm(unsigned Imm) const {186return ARM_AM::getT2SOImmVal(Imm) != -1;187}188189inline bool is_t2_so_imm_not(unsigned Imm) const {190return ARM_AM::getT2SOImmVal(~Imm) != -1;191}192193// Include the pieces autogenerated from the target description.194#include "ARMGenDAGISel.inc"195196private:197void transferMemOperands(SDNode *Src, SDNode *Dst);198199/// Indexed (pre/post inc/dec) load matching code for ARM.200bool tryARMIndexedLoad(SDNode *N);201bool tryT1IndexedLoad(SDNode *N);202bool tryT2IndexedLoad(SDNode *N);203bool tryMVEIndexedLoad(SDNode *N);204bool tryFMULFixed(SDNode *N, SDLoc dl);205bool tryFP_TO_INT(SDNode *N, SDLoc dl);206bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,207bool IsUnsigned,208bool FixedToFloat);209210/// SelectVLD - Select NEON load intrinsics. NumVecs should be211/// 1, 2, 3 or 4. The opcode arrays specify the instructions used for212/// loads of D registers and even subregs and odd subregs of Q registers.213/// For NumVecs <= 2, QOpcodes1 is not used.214void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,215const uint16_t *DOpcodes, const uint16_t *QOpcodes0,216const uint16_t *QOpcodes1);217218/// SelectVST - Select NEON store intrinsics. NumVecs should219/// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for220/// stores of D registers and even subregs and odd subregs of Q registers.221/// For NumVecs <= 2, QOpcodes1 is not used.222void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,223const uint16_t *DOpcodes, const uint16_t *QOpcodes0,224const uint16_t *QOpcodes1);225226/// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should227/// be 2, 3 or 4. The opcode arrays specify the instructions used for228/// load/store of D registers and Q registers.229void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,230unsigned NumVecs, const uint16_t *DOpcodes,231const uint16_t *QOpcodes);232233/// Helper functions for setting up clusters of MVE predication operands.234template <typename SDValueVector>235void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,236SDValue PredicateMask);237template <typename SDValueVector>238void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,239SDValue PredicateMask, SDValue Inactive);240241template <typename SDValueVector>242void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);243template <typename SDValueVector>244void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);245246/// SelectMVE_WB - Select MVE writeback load/store intrinsics.247void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);248249/// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.250void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,251bool HasSaturationOperand);252253/// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.254void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,255uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);256257/// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between258/// vector lanes.259void SelectMVE_VSHLC(SDNode *N, bool Predicated);260261/// Select long MVE vector reductions with two vector operands262/// Stride is the number of vector element widths the instruction can operate263/// on:264/// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]265/// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]266/// Stride is used when addressing the OpcodesS array which contains multiple267/// opcodes for each element width.268/// TySize is the index into the list of element types listed above269void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,270const uint16_t *OpcodesS, const uint16_t *OpcodesU,271size_t Stride, size_t TySize);272273/// Select a 64-bit MVE vector reduction with two vector operands274/// arm_mve_vmlldava_[predicated]275void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,276const uint16_t *OpcodesU);277/// Select a 72-bit MVE vector rounding reduction with two vector operands278/// int_arm_mve_vrmlldavha[_predicated]279void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,280const uint16_t *OpcodesU);281282/// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs283/// should be 2 or 4. The opcode array specifies the instructions284/// used for 8, 16 and 32-bit lane sizes respectively, and each285/// pointer points to a set of NumVecs sub-opcodes used for the286/// different stages (e.g. VLD20 versus VLD21) of each load family.287void SelectMVE_VLD(SDNode *N, unsigned NumVecs,288const uint16_t *const *Opcodes, bool HasWriteback);289290/// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an291/// array of 3 elements for the 8, 16 and 32-bit lane sizes.292void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,293bool Wrapping, bool Predicated);294295/// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,296/// CX1DA, CX2D, CX2DA, CX3, CX3DA).297/// \arg \c NumExtraOps number of extra operands besides the coprocossor,298/// the accumulator and the immediate operand, i.e. 0299/// for CX1*, 1 for CX2*, 2 for CX3*300/// \arg \c HasAccum whether the instruction has an accumulator operand301void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,302bool HasAccum);303304/// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs305/// should be 1, 2, 3 or 4. The opcode array specifies the instructions used306/// for loading D registers.307void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,308unsigned NumVecs, const uint16_t *DOpcodes,309const uint16_t *QOpcodes0 = nullptr,310const uint16_t *QOpcodes1 = nullptr);311312/// Try to select SBFX/UBFX instructions for ARM.313bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);314315bool tryInsertVectorElt(SDNode *N);316317// Select special operations if node forms integer ABS pattern318bool tryABSOp(SDNode *N);319320bool tryReadRegister(SDNode *N);321bool tryWriteRegister(SDNode *N);322323bool tryInlineAsm(SDNode *N);324325void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);326327void SelectCMP_SWAP(SDNode *N);328329/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for330/// inline asm expressions.331bool SelectInlineAsmMemoryOperand(const SDValue &Op,332InlineAsm::ConstraintCode ConstraintID,333std::vector<SDValue> &OutOps) override;334335// Form pairs of consecutive R, S, D, or Q registers.336SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);337SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);338SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);339SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);340341// Form sequences of 4 consecutive S, D, or Q registers.342SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);343SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);344SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);345346// Get the alignment operand for a NEON VLD or VST instruction.347SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,348bool is64BitVector);349350/// Checks if N is a multiplication by a constant where we can extract out a351/// power of two from the constant so that it can be used in a shift, but only352/// if it simplifies the materialization of the constant. Returns true if it353/// is, and assigns to PowerOfTwo the power of two that should be extracted354/// out and to NewMulConst the new constant to be multiplied by.355bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,356unsigned &PowerOfTwo, SDValue &NewMulConst) const;357358/// Replace N with M in CurDAG, in a way that also ensures that M gets359/// selected when N would have been selected.360void replaceDAGValue(const SDValue &N, SDValue M);361};362363class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {364public:365static char ID;366ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)367: SelectionDAGISelLegacy(368ID, std::make_unique<ARMDAGToDAGISel>(tm, OptLevel)) {}369};370}371372char ARMDAGToDAGISelLegacy::ID = 0;373374INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)375376/// isInt32Immediate - This method tests to see if the node is a 32-bit constant377/// operand. If so Imm will receive the 32-bit value.378static bool isInt32Immediate(SDNode *N, unsigned &Imm) {379if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {380Imm = N->getAsZExtVal();381return true;382}383return false;384}385386// isInt32Immediate - This method tests to see if a constant operand.387// If so Imm will receive the 32 bit value.388static bool isInt32Immediate(SDValue N, unsigned &Imm) {389return isInt32Immediate(N.getNode(), Imm);390}391392// isOpcWithIntImmediate - This method tests to see if the node is a specific393// opcode and that it has a immediate integer right operand.394// If so Imm will receive the 32 bit value.395static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {396return N->getOpcode() == Opc &&397isInt32Immediate(N->getOperand(1).getNode(), Imm);398}399400/// Check whether a particular node is a constant value representable as401/// (N * Scale) where (N in [\p RangeMin, \p RangeMax).402///403/// \param ScaledConstant [out] - On success, the pre-scaled constant value.404static bool isScaledConstantInRange(SDValue Node, int Scale,405int RangeMin, int RangeMax,406int &ScaledConstant) {407assert(Scale > 0 && "Invalid scale!");408409// Check that this is a constant.410const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);411if (!C)412return false;413414ScaledConstant = (int) C->getZExtValue();415if ((ScaledConstant % Scale) != 0)416return false;417418ScaledConstant /= Scale;419return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;420}421422void ARMDAGToDAGISel::PreprocessISelDAG() {423if (!Subtarget->hasV6T2Ops())424return;425426bool isThumb2 = Subtarget->isThumb();427// We use make_early_inc_range to avoid invalidation issues.428for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {429if (N.getOpcode() != ISD::ADD)430continue;431432// Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with433// leading zeros, followed by consecutive set bits, followed by 1 or 2434// trailing zeros, e.g. 1020.435// Transform the expression to436// (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number437// of trailing zeros of c2. The left shift would be folded as an shifter438// operand of 'add' and the 'and' and 'srl' would become a bits extraction439// node (UBFX).440441SDValue N0 = N.getOperand(0);442SDValue N1 = N.getOperand(1);443unsigned And_imm = 0;444if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {445if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))446std::swap(N0, N1);447}448if (!And_imm)449continue;450451// Check if the AND mask is an immediate of the form: 000.....1111111100452unsigned TZ = llvm::countr_zero(And_imm);453if (TZ != 1 && TZ != 2)454// Be conservative here. Shifter operands aren't always free. e.g. On455// Swift, left shifter operand of 1 / 2 for free but others are not.456// e.g.457// ubfx r3, r1, #16, #8458// ldr.w r3, [r0, r3, lsl #2]459// vs.460// mov.w r9, #1020461// and.w r2, r9, r1, lsr #14462// ldr r2, [r0, r2]463continue;464And_imm >>= TZ;465if (And_imm & (And_imm + 1))466continue;467468// Look for (and (srl X, c1), c2).469SDValue Srl = N1.getOperand(0);470unsigned Srl_imm = 0;471if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||472(Srl_imm <= 2))473continue;474475// Make sure first operand is not a shifter operand which would prevent476// folding of the left shift.477SDValue CPTmp0;478SDValue CPTmp1;479SDValue CPTmp2;480if (isThumb2) {481if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))482continue;483} else {484if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||485SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))486continue;487}488489// Now make the transformation.490Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,491Srl.getOperand(0),492CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),493MVT::i32));494N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,495Srl,496CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));497N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,498N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));499CurDAG->UpdateNodeOperands(&N, N0, N1);500}501}502503/// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS504/// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at505/// least on current ARM implementations) which should be avoidded.506bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {507if (OptLevel == CodeGenOptLevel::None)508return true;509510if (!Subtarget->hasVMLxHazards())511return true;512513if (!N->hasOneUse())514return false;515516SDNode *Use = *N->use_begin();517if (Use->getOpcode() == ISD::CopyToReg)518return true;519if (Use->isMachineOpcode()) {520const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(521CurDAG->getSubtarget().getInstrInfo());522523const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode());524if (MCID.mayStore())525return true;526unsigned Opcode = MCID.getOpcode();527if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)528return true;529// vmlx feeding into another vmlx. We actually want to unfold530// the use later in the MLxExpansion pass. e.g.531// vmla532// vmla (stall 8 cycles)533//534// vmul (5 cycles)535// vadd (5 cycles)536// vmla537// This adds up to about 18 - 19 cycles.538//539// vmla540// vmul (stall 4 cycles)541// vadd adds up to about 14 cycles.542return TII->isFpMLxInstruction(Opcode);543}544545return false;546}547548bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,549ARM_AM::ShiftOpc ShOpcVal,550unsigned ShAmt) {551if (!Subtarget->isLikeA9() && !Subtarget->isSwift())552return true;553if (Shift.hasOneUse())554return true;555// R << 2 is free.556return ShOpcVal == ARM_AM::lsl &&557(ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));558}559560bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,561unsigned MaxShift,562unsigned &PowerOfTwo,563SDValue &NewMulConst) const {564assert(N.getOpcode() == ISD::MUL);565assert(MaxShift > 0);566567// If the multiply is used in more than one place then changing the constant568// will make other uses incorrect, so don't.569if (!N.hasOneUse()) return false;570// Check if the multiply is by a constant571ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));572if (!MulConst) return false;573// If the constant is used in more than one place then modifying it will mean574// we need to materialize two constants instead of one, which is a bad idea.575if (!MulConst->hasOneUse()) return false;576unsigned MulConstVal = MulConst->getZExtValue();577if (MulConstVal == 0) return false;578579// Find the largest power of 2 that MulConstVal is a multiple of580PowerOfTwo = MaxShift;581while ((MulConstVal % (1 << PowerOfTwo)) != 0) {582--PowerOfTwo;583if (PowerOfTwo == 0) return false;584}585586// Only optimise if the new cost is better587unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);588NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);589unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);590unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);591return NewCost < OldCost;592}593594void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {595CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());596ReplaceUses(N, M);597}598599bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,600SDValue &BaseReg,601SDValue &Opc,602bool CheckProfitability) {603if (DisableShifterOp)604return false;605606// If N is a multiply-by-constant and it's profitable to extract a shift and607// use it in a shifted operand do so.608if (N.getOpcode() == ISD::MUL) {609unsigned PowerOfTwo = 0;610SDValue NewMulConst;611if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {612HandleSDNode Handle(N);613SDLoc Loc(N);614replaceDAGValue(N.getOperand(1), NewMulConst);615BaseReg = Handle.getValue();616Opc = CurDAG->getTargetConstant(617ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);618return true;619}620}621622ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());623624// Don't match base register only case. That is matched to a separate625// lower complexity pattern with explicit register operand.626if (ShOpcVal == ARM_AM::no_shift) return false;627628BaseReg = N.getOperand(0);629unsigned ShImmVal = 0;630ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));631if (!RHS) return false;632ShImmVal = RHS->getZExtValue() & 31;633Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),634SDLoc(N), MVT::i32);635return true;636}637638bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,639SDValue &BaseReg,640SDValue &ShReg,641SDValue &Opc,642bool CheckProfitability) {643if (DisableShifterOp)644return false;645646ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());647648// Don't match base register only case. That is matched to a separate649// lower complexity pattern with explicit register operand.650if (ShOpcVal == ARM_AM::no_shift) return false;651652BaseReg = N.getOperand(0);653unsigned ShImmVal = 0;654ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));655if (RHS) return false;656657ShReg = N.getOperand(1);658if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))659return false;660Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),661SDLoc(N), MVT::i32);662return true;663}664665// Determine whether an ISD::OR's operands are suitable to turn the operation666// into an addition, which often has more compact encodings.667bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {668assert(Parent->getOpcode() == ISD::OR && "unexpected parent");669Out = N;670return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));671}672673674bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,675SDValue &Base,676SDValue &OffImm) {677// Match simple R + imm12 operands.678679// Base only.680if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&681!CurDAG->isBaseWithConstantOffset(N)) {682if (N.getOpcode() == ISD::FrameIndex) {683// Match frame index.684int FI = cast<FrameIndexSDNode>(N)->getIndex();685Base = CurDAG->getTargetFrameIndex(686FI, TLI->getPointerTy(CurDAG->getDataLayout()));687OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);688return true;689}690691if (N.getOpcode() == ARMISD::Wrapper &&692N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&693N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&694N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {695Base = N.getOperand(0);696} else697Base = N;698OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);699return true;700}701702if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {703int RHSC = (int)RHS->getSExtValue();704if (N.getOpcode() == ISD::SUB)705RHSC = -RHSC;706707if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits708Base = N.getOperand(0);709if (Base.getOpcode() == ISD::FrameIndex) {710int FI = cast<FrameIndexSDNode>(Base)->getIndex();711Base = CurDAG->getTargetFrameIndex(712FI, TLI->getPointerTy(CurDAG->getDataLayout()));713}714OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);715return true;716}717}718719// Base only.720Base = N;721OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);722return true;723}724725726727bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,728SDValue &Opc) {729if (N.getOpcode() == ISD::MUL &&730((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {731if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {732// X * [3,5,9] -> X + X * [2,4,8] etc.733int RHSC = (int)RHS->getZExtValue();734if (RHSC & 1) {735RHSC = RHSC & ~1;736ARM_AM::AddrOpc AddSub = ARM_AM::add;737if (RHSC < 0) {738AddSub = ARM_AM::sub;739RHSC = - RHSC;740}741if (isPowerOf2_32(RHSC)) {742unsigned ShAmt = Log2_32(RHSC);743Base = Offset = N.getOperand(0);744Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,745ARM_AM::lsl),746SDLoc(N), MVT::i32);747return true;748}749}750}751}752753if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&754// ISD::OR that is equivalent to an ISD::ADD.755!CurDAG->isBaseWithConstantOffset(N))756return false;757758// Leave simple R +/- imm12 operands for LDRi12759if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {760int RHSC;761if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,762-0x1000+1, 0x1000, RHSC)) // 12 bits.763return false;764}765766// Otherwise this is R +/- [possibly shifted] R.767ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;768ARM_AM::ShiftOpc ShOpcVal =769ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());770unsigned ShAmt = 0;771772Base = N.getOperand(0);773Offset = N.getOperand(1);774775if (ShOpcVal != ARM_AM::no_shift) {776// Check to see if the RHS of the shift is a constant, if not, we can't fold777// it.778if (ConstantSDNode *Sh =779dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {780ShAmt = Sh->getZExtValue();781if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))782Offset = N.getOperand(1).getOperand(0);783else {784ShAmt = 0;785ShOpcVal = ARM_AM::no_shift;786}787} else {788ShOpcVal = ARM_AM::no_shift;789}790}791792// Try matching (R shl C) + (R).793if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&794!(Subtarget->isLikeA9() || Subtarget->isSwift() ||795N.getOperand(0).hasOneUse())) {796ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());797if (ShOpcVal != ARM_AM::no_shift) {798// Check to see if the RHS of the shift is a constant, if not, we can't799// fold it.800if (ConstantSDNode *Sh =801dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {802ShAmt = Sh->getZExtValue();803if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {804Offset = N.getOperand(0).getOperand(0);805Base = N.getOperand(1);806} else {807ShAmt = 0;808ShOpcVal = ARM_AM::no_shift;809}810} else {811ShOpcVal = ARM_AM::no_shift;812}813}814}815816// If Offset is a multiply-by-constant and it's profitable to extract a shift817// and use it in a shifted operand do so.818if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {819unsigned PowerOfTwo = 0;820SDValue NewMulConst;821if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {822HandleSDNode Handle(Offset);823replaceDAGValue(Offset.getOperand(1), NewMulConst);824Offset = Handle.getValue();825ShAmt = PowerOfTwo;826ShOpcVal = ARM_AM::lsl;827}828}829830Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),831SDLoc(N), MVT::i32);832return true;833}834835bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,836SDValue &Offset, SDValue &Opc) {837unsigned Opcode = Op->getOpcode();838ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)839? cast<LoadSDNode>(Op)->getAddressingMode()840: cast<StoreSDNode>(Op)->getAddressingMode();841ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)842? ARM_AM::add : ARM_AM::sub;843int Val;844if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))845return false;846847Offset = N;848ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());849unsigned ShAmt = 0;850if (ShOpcVal != ARM_AM::no_shift) {851// Check to see if the RHS of the shift is a constant, if not, we can't fold852// it.853if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {854ShAmt = Sh->getZExtValue();855if (isShifterOpProfitable(N, ShOpcVal, ShAmt))856Offset = N.getOperand(0);857else {858ShAmt = 0;859ShOpcVal = ARM_AM::no_shift;860}861} else {862ShOpcVal = ARM_AM::no_shift;863}864}865866Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),867SDLoc(N), MVT::i32);868return true;869}870871bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,872SDValue &Offset, SDValue &Opc) {873unsigned Opcode = Op->getOpcode();874ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)875? cast<LoadSDNode>(Op)->getAddressingMode()876: cast<StoreSDNode>(Op)->getAddressingMode();877ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)878? ARM_AM::add : ARM_AM::sub;879int Val;880if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.881if (AddSub == ARM_AM::sub) Val *= -1;882Offset = CurDAG->getRegister(0, MVT::i32);883Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32);884return true;885}886887return false;888}889890891bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,892SDValue &Offset, SDValue &Opc) {893unsigned Opcode = Op->getOpcode();894ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)895? cast<LoadSDNode>(Op)->getAddressingMode()896: cast<StoreSDNode>(Op)->getAddressingMode();897ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)898? ARM_AM::add : ARM_AM::sub;899int Val;900if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.901Offset = CurDAG->getRegister(0, MVT::i32);902Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,903ARM_AM::no_shift),904SDLoc(Op), MVT::i32);905return true;906}907908return false;909}910911bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {912Base = N;913return true;914}915916bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,917SDValue &Base, SDValue &Offset,918SDValue &Opc) {919if (N.getOpcode() == ISD::SUB) {920// X - C is canonicalize to X + -C, no need to handle it here.921Base = N.getOperand(0);922Offset = N.getOperand(1);923Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),924MVT::i32);925return true;926}927928if (!CurDAG->isBaseWithConstantOffset(N)) {929Base = N;930if (N.getOpcode() == ISD::FrameIndex) {931int FI = cast<FrameIndexSDNode>(N)->getIndex();932Base = CurDAG->getTargetFrameIndex(933FI, TLI->getPointerTy(CurDAG->getDataLayout()));934}935Offset = CurDAG->getRegister(0, MVT::i32);936Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),937MVT::i32);938return true;939}940941// If the RHS is +/- imm8, fold into addr mode.942int RHSC;943if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,944-256 + 1, 256, RHSC)) { // 8 bits.945Base = N.getOperand(0);946if (Base.getOpcode() == ISD::FrameIndex) {947int FI = cast<FrameIndexSDNode>(Base)->getIndex();948Base = CurDAG->getTargetFrameIndex(949FI, TLI->getPointerTy(CurDAG->getDataLayout()));950}951Offset = CurDAG->getRegister(0, MVT::i32);952953ARM_AM::AddrOpc AddSub = ARM_AM::add;954if (RHSC < 0) {955AddSub = ARM_AM::sub;956RHSC = -RHSC;957}958Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),959MVT::i32);960return true;961}962963Base = N.getOperand(0);964Offset = N.getOperand(1);965Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),966MVT::i32);967return true;968}969970bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,971SDValue &Offset, SDValue &Opc) {972unsigned Opcode = Op->getOpcode();973ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)974? cast<LoadSDNode>(Op)->getAddressingMode()975: cast<StoreSDNode>(Op)->getAddressingMode();976ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)977? ARM_AM::add : ARM_AM::sub;978int Val;979if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.980Offset = CurDAG->getRegister(0, MVT::i32);981Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),982MVT::i32);983return true;984}985986Offset = N;987Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),988MVT::i32);989return true;990}991992bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,993bool FP16) {994if (!CurDAG->isBaseWithConstantOffset(N)) {995Base = N;996if (N.getOpcode() == ISD::FrameIndex) {997int FI = cast<FrameIndexSDNode>(N)->getIndex();998Base = CurDAG->getTargetFrameIndex(999FI, TLI->getPointerTy(CurDAG->getDataLayout()));1000} else if (N.getOpcode() == ARMISD::Wrapper &&1001N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&1002N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&1003N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {1004Base = N.getOperand(0);1005}1006Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),1007SDLoc(N), MVT::i32);1008return true;1009}10101011// If the RHS is +/- imm8, fold into addr mode.1012int RHSC;1013const int Scale = FP16 ? 2 : 4;10141015if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {1016Base = N.getOperand(0);1017if (Base.getOpcode() == ISD::FrameIndex) {1018int FI = cast<FrameIndexSDNode>(Base)->getIndex();1019Base = CurDAG->getTargetFrameIndex(1020FI, TLI->getPointerTy(CurDAG->getDataLayout()));1021}10221023ARM_AM::AddrOpc AddSub = ARM_AM::add;1024if (RHSC < 0) {1025AddSub = ARM_AM::sub;1026RHSC = -RHSC;1027}10281029if (FP16)1030Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),1031SDLoc(N), MVT::i32);1032else1033Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),1034SDLoc(N), MVT::i32);10351036return true;1037}10381039Base = N;10401041if (FP16)1042Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),1043SDLoc(N), MVT::i32);1044else1045Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),1046SDLoc(N), MVT::i32);10471048return true;1049}10501051bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,1052SDValue &Base, SDValue &Offset) {1053return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);1054}10551056bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,1057SDValue &Base, SDValue &Offset) {1058return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);1059}10601061bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,1062SDValue &Align) {1063Addr = N;10641065unsigned Alignment = 0;10661067MemSDNode *MemN = cast<MemSDNode>(Parent);10681069if (isa<LSBaseSDNode>(MemN) ||1070((MemN->getOpcode() == ARMISD::VST1_UPD ||1071MemN->getOpcode() == ARMISD::VLD1_UPD) &&1072MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {1073// This case occurs only for VLD1-lane/dup and VST1-lane instructions.1074// The maximum alignment is equal to the memory size being referenced.1075llvm::Align MMOAlign = MemN->getAlign();1076unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;1077if (MMOAlign.value() >= MemSize && MemSize > 1)1078Alignment = MemSize;1079} else {1080// All other uses of addrmode6 are for intrinsics. For now just record1081// the raw alignment value; it will be refined later based on the legal1082// alignment operands for the intrinsic.1083Alignment = MemN->getAlign().value();1084}10851086Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);1087return true;1088}10891090bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,1091SDValue &Offset) {1092LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);1093ISD::MemIndexedMode AM = LdSt->getAddressingMode();1094if (AM != ISD::POST_INC)1095return false;1096Offset = N;1097if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {1098if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())1099Offset = CurDAG->getRegister(0, MVT::i32);1100}1101return true;1102}11031104bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,1105SDValue &Offset, SDValue &Label) {1106if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {1107Offset = N.getOperand(0);1108SDValue N1 = N.getOperand(1);1109Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32);1110return true;1111}11121113return false;1114}111511161117//===----------------------------------------------------------------------===//1118// Thumb Addressing Modes1119//===----------------------------------------------------------------------===//11201121static bool shouldUseZeroOffsetLdSt(SDValue N) {1122// Negative numbers are difficult to materialise in thumb1. If we are1123// selecting the add of a negative, instead try to select ri with a zero1124// offset, so create the add node directly which will become a sub.1125if (N.getOpcode() != ISD::ADD)1126return false;11271128// Look for an imm which is not legal for ld/st, but is legal for sub.1129if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))1130return C->getSExtValue() < 0 && C->getSExtValue() >= -255;11311132return false;1133}11341135bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,1136SDValue &Offset) {1137if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {1138if (!isNullConstant(N))1139return false;11401141Base = Offset = N;1142return true;1143}11441145Base = N.getOperand(0);1146Offset = N.getOperand(1);1147return true;1148}11491150bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,1151SDValue &Offset) {1152if (shouldUseZeroOffsetLdSt(N))1153return false; // Select ri instead1154return SelectThumbAddrModeRRSext(N, Base, Offset);1155}11561157bool1158ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,1159SDValue &Base, SDValue &OffImm) {1160if (shouldUseZeroOffsetLdSt(N)) {1161Base = N;1162OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);1163return true;1164}11651166if (!CurDAG->isBaseWithConstantOffset(N)) {1167if (N.getOpcode() == ISD::ADD) {1168return false; // We want to select register offset instead1169} else if (N.getOpcode() == ARMISD::Wrapper &&1170N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&1171N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&1172N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&1173N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {1174Base = N.getOperand(0);1175} else {1176Base = N;1177}11781179OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);1180return true;1181}11821183// If the RHS is + imm5 * scale, fold into addr mode.1184int RHSC;1185if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {1186Base = N.getOperand(0);1187OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);1188return true;1189}11901191// Offset is too large, so use register offset instead.1192return false;1193}11941195bool1196ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,1197SDValue &OffImm) {1198return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);1199}12001201bool1202ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,1203SDValue &OffImm) {1204return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);1205}12061207bool1208ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,1209SDValue &OffImm) {1210return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);1211}12121213bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,1214SDValue &Base, SDValue &OffImm) {1215if (N.getOpcode() == ISD::FrameIndex) {1216int FI = cast<FrameIndexSDNode>(N)->getIndex();1217// Only multiples of 4 are allowed for the offset, so the frame object1218// alignment must be at least 4.1219MachineFrameInfo &MFI = MF->getFrameInfo();1220if (MFI.getObjectAlign(FI) < Align(4))1221MFI.setObjectAlignment(FI, Align(4));1222Base = CurDAG->getTargetFrameIndex(1223FI, TLI->getPointerTy(CurDAG->getDataLayout()));1224OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);1225return true;1226}12271228if (!CurDAG->isBaseWithConstantOffset(N))1229return false;12301231if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {1232// If the RHS is + imm8 * scale, fold into addr mode.1233int RHSC;1234if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {1235Base = N.getOperand(0);1236int FI = cast<FrameIndexSDNode>(Base)->getIndex();1237// Make sure the offset is inside the object, or we might fail to1238// allocate an emergency spill slot. (An out-of-range access is UB, but1239// it could show up anyway.)1240MachineFrameInfo &MFI = MF->getFrameInfo();1241if (RHSC * 4 < MFI.getObjectSize(FI)) {1242// For LHS+RHS to result in an offset that's a multiple of 4 the object1243// indexed by the LHS must be 4-byte aligned.1244if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))1245MFI.setObjectAlignment(FI, Align(4));1246if (MFI.getObjectAlign(FI) >= Align(4)) {1247Base = CurDAG->getTargetFrameIndex(1248FI, TLI->getPointerTy(CurDAG->getDataLayout()));1249OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);1250return true;1251}1252}1253}1254}12551256return false;1257}12581259template <unsigned Shift>1260bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,1261SDValue &OffImm) {1262if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {1263int RHSC;1264if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,1265RHSC)) {1266Base = N.getOperand(0);1267if (N.getOpcode() == ISD::SUB)1268RHSC = -RHSC;1269OffImm =1270CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);1271return true;1272}1273}12741275// Base only.1276Base = N;1277OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);1278return true;1279}128012811282//===----------------------------------------------------------------------===//1283// Thumb 2 Addressing Modes1284//===----------------------------------------------------------------------===//128512861287bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,1288SDValue &Base, SDValue &OffImm) {1289// Match simple R + imm12 operands.12901291// Base only.1292if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&1293!CurDAG->isBaseWithConstantOffset(N)) {1294if (N.getOpcode() == ISD::FrameIndex) {1295// Match frame index.1296int FI = cast<FrameIndexSDNode>(N)->getIndex();1297Base = CurDAG->getTargetFrameIndex(1298FI, TLI->getPointerTy(CurDAG->getDataLayout()));1299OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);1300return true;1301}13021303if (N.getOpcode() == ARMISD::Wrapper &&1304N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&1305N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&1306N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {1307Base = N.getOperand(0);1308if (Base.getOpcode() == ISD::TargetConstantPool)1309return false; // We want to select t2LDRpci instead.1310} else1311Base = N;1312OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);1313return true;1314}13151316if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {1317if (SelectT2AddrModeImm8(N, Base, OffImm))1318// Let t2LDRi8 handle (R - imm8).1319return false;13201321int RHSC = (int)RHS->getZExtValue();1322if (N.getOpcode() == ISD::SUB)1323RHSC = -RHSC;13241325if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)1326Base = N.getOperand(0);1327if (Base.getOpcode() == ISD::FrameIndex) {1328int FI = cast<FrameIndexSDNode>(Base)->getIndex();1329Base = CurDAG->getTargetFrameIndex(1330FI, TLI->getPointerTy(CurDAG->getDataLayout()));1331}1332OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);1333return true;1334}1335}13361337// Base only.1338Base = N;1339OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);1340return true;1341}13421343template <unsigned Shift>1344bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,1345SDValue &OffImm) {1346if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {1347int RHSC;1348if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {1349Base = N.getOperand(0);1350if (Base.getOpcode() == ISD::FrameIndex) {1351int FI = cast<FrameIndexSDNode>(Base)->getIndex();1352Base = CurDAG->getTargetFrameIndex(1353FI, TLI->getPointerTy(CurDAG->getDataLayout()));1354}13551356if (N.getOpcode() == ISD::SUB)1357RHSC = -RHSC;1358OffImm =1359CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);1360return true;1361}1362}13631364// Base only.1365Base = N;1366OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);1367return true;1368}13691370bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,1371SDValue &Base, SDValue &OffImm) {1372// Match simple R - imm8 operands.1373if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&1374!CurDAG->isBaseWithConstantOffset(N))1375return false;13761377if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {1378int RHSC = (int)RHS->getSExtValue();1379if (N.getOpcode() == ISD::SUB)1380RHSC = -RHSC;13811382if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)1383Base = N.getOperand(0);1384if (Base.getOpcode() == ISD::FrameIndex) {1385int FI = cast<FrameIndexSDNode>(Base)->getIndex();1386Base = CurDAG->getTargetFrameIndex(1387FI, TLI->getPointerTy(CurDAG->getDataLayout()));1388}1389OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32);1390return true;1391}1392}13931394return false;1395}13961397bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,1398SDValue &OffImm){1399unsigned Opcode = Op->getOpcode();1400ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)1401? cast<LoadSDNode>(Op)->getAddressingMode()1402: cast<StoreSDNode>(Op)->getAddressingMode();1403int RHSC;1404if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.1405OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))1406? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32)1407: CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32);1408return true;1409}14101411return false;1412}14131414template <unsigned Shift>1415bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,1416SDValue &OffImm) {1417if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {1418int RHSC;1419if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,1420RHSC)) {1421Base = N.getOperand(0);1422if (Base.getOpcode() == ISD::FrameIndex) {1423int FI = cast<FrameIndexSDNode>(Base)->getIndex();1424Base = CurDAG->getTargetFrameIndex(1425FI, TLI->getPointerTy(CurDAG->getDataLayout()));1426}14271428if (N.getOpcode() == ISD::SUB)1429RHSC = -RHSC;1430OffImm =1431CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32);1432return true;1433}1434}14351436// Base only.1437Base = N;1438OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);1439return true;1440}14411442template <unsigned Shift>1443bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,1444SDValue &OffImm) {1445return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);1446}14471448bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,1449SDValue &OffImm,1450unsigned Shift) {1451unsigned Opcode = Op->getOpcode();1452ISD::MemIndexedMode AM;1453switch (Opcode) {1454case ISD::LOAD:1455AM = cast<LoadSDNode>(Op)->getAddressingMode();1456break;1457case ISD::STORE:1458AM = cast<StoreSDNode>(Op)->getAddressingMode();1459break;1460case ISD::MLOAD:1461AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();1462break;1463case ISD::MSTORE:1464AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();1465break;1466default:1467llvm_unreachable("Unexpected Opcode for Imm7Offset");1468}14691470int RHSC;1471// 7 bit constant, shifted by Shift.1472if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {1473OffImm =1474((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))1475? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32)1476: CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N),1477MVT::i32);1478return true;1479}1480return false;1481}14821483template <int Min, int Max>1484bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {1485int Val;1486if (isScaledConstantInRange(N, 1, Min, Max, Val)) {1487OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32);1488return true;1489}1490return false;1491}14921493bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,1494SDValue &Base,1495SDValue &OffReg, SDValue &ShImm) {1496// (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.1497if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))1498return false;14991500// Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.1501if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {1502int RHSC = (int)RHS->getZExtValue();1503if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)1504return false;1505else if (RHSC < 0 && RHSC >= -255) // 8 bits1506return false;1507}15081509// Look for (R + R) or (R + (R << [1,2,3])).1510unsigned ShAmt = 0;1511Base = N.getOperand(0);1512OffReg = N.getOperand(1);15131514// Swap if it is ((R << c) + R).1515ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());1516if (ShOpcVal != ARM_AM::lsl) {1517ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());1518if (ShOpcVal == ARM_AM::lsl)1519std::swap(Base, OffReg);1520}15211522if (ShOpcVal == ARM_AM::lsl) {1523// Check to see if the RHS of the shift is a constant, if not, we can't fold1524// it.1525if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {1526ShAmt = Sh->getZExtValue();1527if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))1528OffReg = OffReg.getOperand(0);1529else {1530ShAmt = 0;1531}1532}1533}15341535// If OffReg is a multiply-by-constant and it's profitable to extract a shift1536// and use it in a shifted operand do so.1537if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {1538unsigned PowerOfTwo = 0;1539SDValue NewMulConst;1540if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {1541HandleSDNode Handle(OffReg);1542replaceDAGValue(OffReg.getOperand(1), NewMulConst);1543OffReg = Handle.getValue();1544ShAmt = PowerOfTwo;1545}1546}15471548ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);15491550return true;1551}15521553bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,1554SDValue &OffImm) {1555// This *must* succeed since it's used for the irreplaceable ldrex and strex1556// instructions.1557Base = N;1558OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);15591560if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))1561return true;15621563ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));1564if (!RHS)1565return true;15661567uint32_t RHSC = (int)RHS->getZExtValue();1568if (RHSC > 1020 || RHSC % 4 != 0)1569return true;15701571Base = N.getOperand(0);1572if (Base.getOpcode() == ISD::FrameIndex) {1573int FI = cast<FrameIndexSDNode>(Base)->getIndex();1574Base = CurDAG->getTargetFrameIndex(1575FI, TLI->getPointerTy(CurDAG->getDataLayout()));1576}15771578OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);1579return true;1580}15811582//===--------------------------------------------------------------------===//15831584/// getAL - Returns a ARMCC::AL immediate node.1585static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {1586return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);1587}15881589void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {1590MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();1591CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});1592}15931594bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {1595LoadSDNode *LD = cast<LoadSDNode>(N);1596ISD::MemIndexedMode AM = LD->getAddressingMode();1597if (AM == ISD::UNINDEXED)1598return false;15991600EVT LoadedVT = LD->getMemoryVT();1601SDValue Offset, AMOpc;1602bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);1603unsigned Opcode = 0;1604bool Match = false;1605if (LoadedVT == MVT::i32 && isPre &&1606SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {1607Opcode = ARM::LDR_PRE_IMM;1608Match = true;1609} else if (LoadedVT == MVT::i32 && !isPre &&1610SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {1611Opcode = ARM::LDR_POST_IMM;1612Match = true;1613} else if (LoadedVT == MVT::i32 &&1614SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {1615Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;1616Match = true;16171618} else if (LoadedVT == MVT::i16 &&1619SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {1620Match = true;1621Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)1622? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)1623: (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);1624} else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {1625if (LD->getExtensionType() == ISD::SEXTLOAD) {1626if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {1627Match = true;1628Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;1629}1630} else {1631if (isPre &&1632SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {1633Match = true;1634Opcode = ARM::LDRB_PRE_IMM;1635} else if (!isPre &&1636SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {1637Match = true;1638Opcode = ARM::LDRB_POST_IMM;1639} else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {1640Match = true;1641Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;1642}1643}1644}16451646if (Match) {1647if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {1648SDValue Chain = LD->getChain();1649SDValue Base = LD->getBasePtr();1650SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),1651CurDAG->getRegister(0, MVT::i32), Chain };1652SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,1653MVT::Other, Ops);1654transferMemOperands(N, New);1655ReplaceNode(N, New);1656return true;1657} else {1658SDValue Chain = LD->getChain();1659SDValue Base = LD->getBasePtr();1660SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),1661CurDAG->getRegister(0, MVT::i32), Chain };1662SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,1663MVT::Other, Ops);1664transferMemOperands(N, New);1665ReplaceNode(N, New);1666return true;1667}1668}16691670return false;1671}16721673bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {1674LoadSDNode *LD = cast<LoadSDNode>(N);1675EVT LoadedVT = LD->getMemoryVT();1676ISD::MemIndexedMode AM = LD->getAddressingMode();1677if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||1678LoadedVT.getSimpleVT().SimpleTy != MVT::i32)1679return false;16801681auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());1682if (!COffs || COffs->getZExtValue() != 4)1683return false;16841685// A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.1686// The encoding of LDM is not how the rest of ISel expects a post-inc load to1687// look however, so we use a pseudo here and switch it for a tLDMIA_UPD after1688// ISel.1689SDValue Chain = LD->getChain();1690SDValue Base = LD->getBasePtr();1691SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),1692CurDAG->getRegister(0, MVT::i32), Chain };1693SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,1694MVT::i32, MVT::Other, Ops);1695transferMemOperands(N, New);1696ReplaceNode(N, New);1697return true;1698}16991700bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {1701LoadSDNode *LD = cast<LoadSDNode>(N);1702ISD::MemIndexedMode AM = LD->getAddressingMode();1703if (AM == ISD::UNINDEXED)1704return false;17051706EVT LoadedVT = LD->getMemoryVT();1707bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;1708SDValue Offset;1709bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);1710unsigned Opcode = 0;1711bool Match = false;1712if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {1713switch (LoadedVT.getSimpleVT().SimpleTy) {1714case MVT::i32:1715Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;1716break;1717case MVT::i16:1718if (isSExtLd)1719Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;1720else1721Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;1722break;1723case MVT::i8:1724case MVT::i1:1725if (isSExtLd)1726Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;1727else1728Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;1729break;1730default:1731return false;1732}1733Match = true;1734}17351736if (Match) {1737SDValue Chain = LD->getChain();1738SDValue Base = LD->getBasePtr();1739SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),1740CurDAG->getRegister(0, MVT::i32), Chain };1741SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,1742MVT::Other, Ops);1743transferMemOperands(N, New);1744ReplaceNode(N, New);1745return true;1746}17471748return false;1749}17501751bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {1752EVT LoadedVT;1753unsigned Opcode = 0;1754bool isSExtLd, isPre;1755Align Alignment;1756ARMVCC::VPTCodes Pred;1757SDValue PredReg;1758SDValue Chain, Base, Offset;17591760if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {1761ISD::MemIndexedMode AM = LD->getAddressingMode();1762if (AM == ISD::UNINDEXED)1763return false;1764LoadedVT = LD->getMemoryVT();1765if (!LoadedVT.isVector())1766return false;17671768Chain = LD->getChain();1769Base = LD->getBasePtr();1770Offset = LD->getOffset();1771Alignment = LD->getAlign();1772isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;1773isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);1774Pred = ARMVCC::None;1775PredReg = CurDAG->getRegister(0, MVT::i32);1776} else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {1777ISD::MemIndexedMode AM = LD->getAddressingMode();1778if (AM == ISD::UNINDEXED)1779return false;1780LoadedVT = LD->getMemoryVT();1781if (!LoadedVT.isVector())1782return false;17831784Chain = LD->getChain();1785Base = LD->getBasePtr();1786Offset = LD->getOffset();1787Alignment = LD->getAlign();1788isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;1789isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);1790Pred = ARMVCC::Then;1791PredReg = LD->getMask();1792} else1793llvm_unreachable("Expected a Load or a Masked Load!");17941795// We allow LE non-masked loads to change the type (for example use a vldrb.81796// as opposed to a vldrw.32). This can allow extra addressing modes or1797// alignments for what is otherwise an equivalent instruction.1798bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);17991800SDValue NewOffset;1801if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&1802SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {1803if (isSExtLd)1804Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;1805else1806Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;1807} else if (LoadedVT == MVT::v8i8 &&1808SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {1809if (isSExtLd)1810Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;1811else1812Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;1813} else if (LoadedVT == MVT::v4i8 &&1814SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {1815if (isSExtLd)1816Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;1817else1818Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;1819} else if (Alignment >= Align(4) &&1820(CanChangeType || LoadedVT == MVT::v4i32 ||1821LoadedVT == MVT::v4f32) &&1822SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))1823Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;1824else if (Alignment >= Align(2) &&1825(CanChangeType || LoadedVT == MVT::v8i16 ||1826LoadedVT == MVT::v8f16) &&1827SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))1828Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;1829else if ((CanChangeType || LoadedVT == MVT::v16i8) &&1830SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))1831Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;1832else1833return false;18341835SDValue Ops[] = {Base,1836NewOffset,1837CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),1838PredReg,1839CurDAG->getRegister(0, MVT::i32), // tp_reg1840Chain};1841SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,1842N->getValueType(0), MVT::Other, Ops);1843transferMemOperands(N, New);1844ReplaceUses(SDValue(N, 0), SDValue(New, 1));1845ReplaceUses(SDValue(N, 1), SDValue(New, 0));1846ReplaceUses(SDValue(N, 2), SDValue(New, 2));1847CurDAG->RemoveDeadNode(N);1848return true;1849}18501851/// Form a GPRPair pseudo register from a pair of GPR regs.1852SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {1853SDLoc dl(V0.getNode());1854SDValue RegClass =1855CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);1856SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);1857SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);1858const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };1859return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);1860}18611862/// Form a D register from a pair of S registers.1863SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {1864SDLoc dl(V0.getNode());1865SDValue RegClass =1866CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);1867SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);1868SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);1869const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };1870return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);1871}18721873/// Form a quad register from a pair of D registers.1874SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {1875SDLoc dl(V0.getNode());1876SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,1877MVT::i32);1878SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);1879SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);1880const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };1881return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);1882}18831884/// Form 4 consecutive D registers from a pair of Q registers.1885SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {1886SDLoc dl(V0.getNode());1887SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,1888MVT::i32);1889SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);1890SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);1891const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };1892return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);1893}18941895/// Form 4 consecutive S registers.1896SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,1897SDValue V2, SDValue V3) {1898SDLoc dl(V0.getNode());1899SDValue RegClass =1900CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);1901SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);1902SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);1903SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);1904SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);1905const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,1906V2, SubReg2, V3, SubReg3 };1907return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);1908}19091910/// Form 4 consecutive D registers.1911SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,1912SDValue V2, SDValue V3) {1913SDLoc dl(V0.getNode());1914SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,1915MVT::i32);1916SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);1917SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);1918SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);1919SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);1920const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,1921V2, SubReg2, V3, SubReg3 };1922return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);1923}19241925/// Form 4 consecutive Q registers.1926SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,1927SDValue V2, SDValue V3) {1928SDLoc dl(V0.getNode());1929SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,1930MVT::i32);1931SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);1932SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);1933SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);1934SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);1935const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,1936V2, SubReg2, V3, SubReg3 };1937return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);1938}19391940/// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand1941/// of a NEON VLD or VST instruction. The supported values depend on the1942/// number of registers being loaded.1943SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,1944unsigned NumVecs, bool is64BitVector) {1945unsigned NumRegs = NumVecs;1946if (!is64BitVector && NumVecs < 3)1947NumRegs *= 2;19481949unsigned Alignment = Align->getAsZExtVal();1950if (Alignment >= 32 && NumRegs == 4)1951Alignment = 32;1952else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))1953Alignment = 16;1954else if (Alignment >= 8)1955Alignment = 8;1956else1957Alignment = 0;19581959return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);1960}19611962static bool isVLDfixed(unsigned Opc)1963{1964switch (Opc) {1965default: return false;1966case ARM::VLD1d8wb_fixed : return true;1967case ARM::VLD1d16wb_fixed : return true;1968case ARM::VLD1d64Qwb_fixed : return true;1969case ARM::VLD1d32wb_fixed : return true;1970case ARM::VLD1d64wb_fixed : return true;1971case ARM::VLD1d8TPseudoWB_fixed : return true;1972case ARM::VLD1d16TPseudoWB_fixed : return true;1973case ARM::VLD1d32TPseudoWB_fixed : return true;1974case ARM::VLD1d64TPseudoWB_fixed : return true;1975case ARM::VLD1d8QPseudoWB_fixed : return true;1976case ARM::VLD1d16QPseudoWB_fixed : return true;1977case ARM::VLD1d32QPseudoWB_fixed : return true;1978case ARM::VLD1d64QPseudoWB_fixed : return true;1979case ARM::VLD1q8wb_fixed : return true;1980case ARM::VLD1q16wb_fixed : return true;1981case ARM::VLD1q32wb_fixed : return true;1982case ARM::VLD1q64wb_fixed : return true;1983case ARM::VLD1DUPd8wb_fixed : return true;1984case ARM::VLD1DUPd16wb_fixed : return true;1985case ARM::VLD1DUPd32wb_fixed : return true;1986case ARM::VLD1DUPq8wb_fixed : return true;1987case ARM::VLD1DUPq16wb_fixed : return true;1988case ARM::VLD1DUPq32wb_fixed : return true;1989case ARM::VLD2d8wb_fixed : return true;1990case ARM::VLD2d16wb_fixed : return true;1991case ARM::VLD2d32wb_fixed : return true;1992case ARM::VLD2q8PseudoWB_fixed : return true;1993case ARM::VLD2q16PseudoWB_fixed : return true;1994case ARM::VLD2q32PseudoWB_fixed : return true;1995case ARM::VLD2DUPd8wb_fixed : return true;1996case ARM::VLD2DUPd16wb_fixed : return true;1997case ARM::VLD2DUPd32wb_fixed : return true;1998case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;1999case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;2000case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;2001}2002}20032004static bool isVSTfixed(unsigned Opc)2005{2006switch (Opc) {2007default: return false;2008case ARM::VST1d8wb_fixed : return true;2009case ARM::VST1d16wb_fixed : return true;2010case ARM::VST1d32wb_fixed : return true;2011case ARM::VST1d64wb_fixed : return true;2012case ARM::VST1q8wb_fixed : return true;2013case ARM::VST1q16wb_fixed : return true;2014case ARM::VST1q32wb_fixed : return true;2015case ARM::VST1q64wb_fixed : return true;2016case ARM::VST1d8TPseudoWB_fixed : return true;2017case ARM::VST1d16TPseudoWB_fixed : return true;2018case ARM::VST1d32TPseudoWB_fixed : return true;2019case ARM::VST1d64TPseudoWB_fixed : return true;2020case ARM::VST1d8QPseudoWB_fixed : return true;2021case ARM::VST1d16QPseudoWB_fixed : return true;2022case ARM::VST1d32QPseudoWB_fixed : return true;2023case ARM::VST1d64QPseudoWB_fixed : return true;2024case ARM::VST2d8wb_fixed : return true;2025case ARM::VST2d16wb_fixed : return true;2026case ARM::VST2d32wb_fixed : return true;2027case ARM::VST2q8PseudoWB_fixed : return true;2028case ARM::VST2q16PseudoWB_fixed : return true;2029case ARM::VST2q32PseudoWB_fixed : return true;2030}2031}20322033// Get the register stride update opcode of a VLD/VST instruction that2034// is otherwise equivalent to the given fixed stride updating instruction.2035static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {2036assert((isVLDfixed(Opc) || isVSTfixed(Opc))2037&& "Incorrect fixed stride updating instruction.");2038switch (Opc) {2039default: break;2040case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;2041case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;2042case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;2043case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;2044case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;2045case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;2046case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;2047case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;2048case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;2049case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;2050case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;2051case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;2052case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;2053case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;2054case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;2055case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;2056case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;2057case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;2058case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;2059case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;2060case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;2061case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;2062case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;2063case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;2064case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;2065case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;2066case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;20672068case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;2069case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;2070case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;2071case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;2072case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;2073case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;2074case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;2075case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;2076case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;2077case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;2078case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;2079case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;2080case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;2081case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;2082case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;2083case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;20842085case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;2086case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;2087case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;2088case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;2089case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;2090case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;20912092case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;2093case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;2094case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;2095case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;2096case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;2097case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;20982099case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;2100case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;2101case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;2102}2103return Opc; // If not one we handle, return it unchanged.2104}21052106/// Returns true if the given increment is a Constant known to be equal to the2107/// access size performed by a NEON load/store. This means the "[rN]!" form can2108/// be used.2109static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {2110auto C = dyn_cast<ConstantSDNode>(Inc);2111return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;2112}21132114void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,2115const uint16_t *DOpcodes,2116const uint16_t *QOpcodes0,2117const uint16_t *QOpcodes1) {2118assert(Subtarget->hasNEON());2119assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");2120SDLoc dl(N);21212122SDValue MemAddr, Align;2123bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating2124// nodes are not intrinsics.2125unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;2126if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))2127return;21282129SDValue Chain = N->getOperand(0);2130EVT VT = N->getValueType(0);2131bool is64BitVector = VT.is64BitVector();2132Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);21332134unsigned OpcodeIndex;2135switch (VT.getSimpleVT().SimpleTy) {2136default: llvm_unreachable("unhandled vld type");2137// Double-register operations:2138case MVT::v8i8: OpcodeIndex = 0; break;2139case MVT::v4f16:2140case MVT::v4bf16:2141case MVT::v4i16: OpcodeIndex = 1; break;2142case MVT::v2f32:2143case MVT::v2i32: OpcodeIndex = 2; break;2144case MVT::v1i64: OpcodeIndex = 3; break;2145// Quad-register operations:2146case MVT::v16i8: OpcodeIndex = 0; break;2147case MVT::v8f16:2148case MVT::v8bf16:2149case MVT::v8i16: OpcodeIndex = 1; break;2150case MVT::v4f32:2151case MVT::v4i32: OpcodeIndex = 2; break;2152case MVT::v2f64:2153case MVT::v2i64: OpcodeIndex = 3; break;2154}21552156EVT ResTy;2157if (NumVecs == 1)2158ResTy = VT;2159else {2160unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;2161if (!is64BitVector)2162ResTyElts *= 2;2163ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);2164}2165std::vector<EVT> ResTys;2166ResTys.push_back(ResTy);2167if (isUpdating)2168ResTys.push_back(MVT::i32);2169ResTys.push_back(MVT::Other);21702171SDValue Pred = getAL(CurDAG, dl);2172SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);2173SDNode *VLd;2174SmallVector<SDValue, 7> Ops;21752176// Double registers and VLD1/VLD2 quad registers are directly supported.2177if (is64BitVector || NumVecs <= 2) {2178unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :2179QOpcodes0[OpcodeIndex]);2180Ops.push_back(MemAddr);2181Ops.push_back(Align);2182if (isUpdating) {2183SDValue Inc = N->getOperand(AddrOpIdx + 1);2184bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);2185if (!IsImmUpdate) {2186// We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so2187// check for the opcode rather than the number of vector elements.2188if (isVLDfixed(Opc))2189Opc = getVLDSTRegisterUpdateOpcode(Opc);2190Ops.push_back(Inc);2191// VLD1/VLD2 fixed increment does not need Reg0 so only include it in2192// the operands if not such an opcode.2193} else if (!isVLDfixed(Opc))2194Ops.push_back(Reg0);2195}2196Ops.push_back(Pred);2197Ops.push_back(Reg0);2198Ops.push_back(Chain);2199VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);22002201} else {2202// Otherwise, quad registers are loaded with two separate instructions,2203// where one loads the even registers and the other loads the odd registers.2204EVT AddrTy = MemAddr.getValueType();22052206// Load the even subregs. This is always an updating load, so that it2207// provides the address to the second load for the odd subregs.2208SDValue ImplDef =2209SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);2210const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };2211SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,2212ResTy, AddrTy, MVT::Other, OpsA);2213Chain = SDValue(VLdA, 2);22142215// Load the odd subregs.2216Ops.push_back(SDValue(VLdA, 1));2217Ops.push_back(Align);2218if (isUpdating) {2219SDValue Inc = N->getOperand(AddrOpIdx + 1);2220assert(isa<ConstantSDNode>(Inc.getNode()) &&2221"only constant post-increment update allowed for VLD3/4");2222(void)Inc;2223Ops.push_back(Reg0);2224}2225Ops.push_back(SDValue(VLdA, 0));2226Ops.push_back(Pred);2227Ops.push_back(Reg0);2228Ops.push_back(Chain);2229VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);2230}22312232// Transfer memoperands.2233MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();2234CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});22352236if (NumVecs == 1) {2237ReplaceNode(N, VLd);2238return;2239}22402241// Extract out the subregisters.2242SDValue SuperReg = SDValue(VLd, 0);2243static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&2244ARM::qsub_3 == ARM::qsub_0 + 3,2245"Unexpected subreg numbering");2246unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);2247for (unsigned Vec = 0; Vec < NumVecs; ++Vec)2248ReplaceUses(SDValue(N, Vec),2249CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));2250ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));2251if (isUpdating)2252ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));2253CurDAG->RemoveDeadNode(N);2254}22552256void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,2257const uint16_t *DOpcodes,2258const uint16_t *QOpcodes0,2259const uint16_t *QOpcodes1) {2260assert(Subtarget->hasNEON());2261assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");2262SDLoc dl(N);22632264SDValue MemAddr, Align;2265bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating2266// nodes are not intrinsics.2267unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;2268unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)2269if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))2270return;22712272MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();22732274SDValue Chain = N->getOperand(0);2275EVT VT = N->getOperand(Vec0Idx).getValueType();2276bool is64BitVector = VT.is64BitVector();2277Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);22782279unsigned OpcodeIndex;2280switch (VT.getSimpleVT().SimpleTy) {2281default: llvm_unreachable("unhandled vst type");2282// Double-register operations:2283case MVT::v8i8: OpcodeIndex = 0; break;2284case MVT::v4f16:2285case MVT::v4bf16:2286case MVT::v4i16: OpcodeIndex = 1; break;2287case MVT::v2f32:2288case MVT::v2i32: OpcodeIndex = 2; break;2289case MVT::v1i64: OpcodeIndex = 3; break;2290// Quad-register operations:2291case MVT::v16i8: OpcodeIndex = 0; break;2292case MVT::v8f16:2293case MVT::v8bf16:2294case MVT::v8i16: OpcodeIndex = 1; break;2295case MVT::v4f32:2296case MVT::v4i32: OpcodeIndex = 2; break;2297case MVT::v2f64:2298case MVT::v2i64: OpcodeIndex = 3; break;2299}23002301std::vector<EVT> ResTys;2302if (isUpdating)2303ResTys.push_back(MVT::i32);2304ResTys.push_back(MVT::Other);23052306SDValue Pred = getAL(CurDAG, dl);2307SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);2308SmallVector<SDValue, 7> Ops;23092310// Double registers and VST1/VST2 quad registers are directly supported.2311if (is64BitVector || NumVecs <= 2) {2312SDValue SrcReg;2313if (NumVecs == 1) {2314SrcReg = N->getOperand(Vec0Idx);2315} else if (is64BitVector) {2316// Form a REG_SEQUENCE to force register allocation.2317SDValue V0 = N->getOperand(Vec0Idx + 0);2318SDValue V1 = N->getOperand(Vec0Idx + 1);2319if (NumVecs == 2)2320SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);2321else {2322SDValue V2 = N->getOperand(Vec0Idx + 2);2323// If it's a vst3, form a quad D-register and leave the last part as2324// an undef.2325SDValue V3 = (NumVecs == 3)2326? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)2327: N->getOperand(Vec0Idx + 3);2328SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);2329}2330} else {2331// Form a QQ register.2332SDValue Q0 = N->getOperand(Vec0Idx);2333SDValue Q1 = N->getOperand(Vec0Idx + 1);2334SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);2335}23362337unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :2338QOpcodes0[OpcodeIndex]);2339Ops.push_back(MemAddr);2340Ops.push_back(Align);2341if (isUpdating) {2342SDValue Inc = N->getOperand(AddrOpIdx + 1);2343bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);2344if (!IsImmUpdate) {2345// We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so2346// check for the opcode rather than the number of vector elements.2347if (isVSTfixed(Opc))2348Opc = getVLDSTRegisterUpdateOpcode(Opc);2349Ops.push_back(Inc);2350}2351// VST1/VST2 fixed increment does not need Reg0 so only include it in2352// the operands if not such an opcode.2353else if (!isVSTfixed(Opc))2354Ops.push_back(Reg0);2355}2356Ops.push_back(SrcReg);2357Ops.push_back(Pred);2358Ops.push_back(Reg0);2359Ops.push_back(Chain);2360SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);23612362// Transfer memoperands.2363CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});23642365ReplaceNode(N, VSt);2366return;2367}23682369// Otherwise, quad registers are stored with two separate instructions,2370// where one stores the even registers and the other stores the odd registers.23712372// Form the QQQQ REG_SEQUENCE.2373SDValue V0 = N->getOperand(Vec0Idx + 0);2374SDValue V1 = N->getOperand(Vec0Idx + 1);2375SDValue V2 = N->getOperand(Vec0Idx + 2);2376SDValue V3 = (NumVecs == 3)2377? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)2378: N->getOperand(Vec0Idx + 3);2379SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);23802381// Store the even D registers. This is always an updating store, so that it2382// provides the address to the second store for the odd subregs.2383const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };2384SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,2385MemAddr.getValueType(),2386MVT::Other, OpsA);2387CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});2388Chain = SDValue(VStA, 1);23892390// Store the odd D registers.2391Ops.push_back(SDValue(VStA, 0));2392Ops.push_back(Align);2393if (isUpdating) {2394SDValue Inc = N->getOperand(AddrOpIdx + 1);2395assert(isa<ConstantSDNode>(Inc.getNode()) &&2396"only constant post-increment update allowed for VST3/4");2397(void)Inc;2398Ops.push_back(Reg0);2399}2400Ops.push_back(RegSeq);2401Ops.push_back(Pred);2402Ops.push_back(Reg0);2403Ops.push_back(Chain);2404SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,2405Ops);2406CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});2407ReplaceNode(N, VStB);2408}24092410void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,2411unsigned NumVecs,2412const uint16_t *DOpcodes,2413const uint16_t *QOpcodes) {2414assert(Subtarget->hasNEON());2415assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");2416SDLoc dl(N);24172418SDValue MemAddr, Align;2419bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating2420// nodes are not intrinsics.2421unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;2422unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)2423if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))2424return;24252426MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();24272428SDValue Chain = N->getOperand(0);2429unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs);2430EVT VT = N->getOperand(Vec0Idx).getValueType();2431bool is64BitVector = VT.is64BitVector();24322433unsigned Alignment = 0;2434if (NumVecs != 3) {2435Alignment = Align->getAsZExtVal();2436unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;2437if (Alignment > NumBytes)2438Alignment = NumBytes;2439if (Alignment < 8 && Alignment < NumBytes)2440Alignment = 0;2441// Alignment must be a power of two; make sure of that.2442Alignment = (Alignment & -Alignment);2443if (Alignment == 1)2444Alignment = 0;2445}2446Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);24472448unsigned OpcodeIndex;2449switch (VT.getSimpleVT().SimpleTy) {2450default: llvm_unreachable("unhandled vld/vst lane type");2451// Double-register operations:2452case MVT::v8i8: OpcodeIndex = 0; break;2453case MVT::v4f16:2454case MVT::v4bf16:2455case MVT::v4i16: OpcodeIndex = 1; break;2456case MVT::v2f32:2457case MVT::v2i32: OpcodeIndex = 2; break;2458// Quad-register operations:2459case MVT::v8f16:2460case MVT::v8bf16:2461case MVT::v8i16: OpcodeIndex = 0; break;2462case MVT::v4f32:2463case MVT::v4i32: OpcodeIndex = 1; break;2464}24652466std::vector<EVT> ResTys;2467if (IsLoad) {2468unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;2469if (!is64BitVector)2470ResTyElts *= 2;2471ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),2472MVT::i64, ResTyElts));2473}2474if (isUpdating)2475ResTys.push_back(MVT::i32);2476ResTys.push_back(MVT::Other);24772478SDValue Pred = getAL(CurDAG, dl);2479SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);24802481SmallVector<SDValue, 8> Ops;2482Ops.push_back(MemAddr);2483Ops.push_back(Align);2484if (isUpdating) {2485SDValue Inc = N->getOperand(AddrOpIdx + 1);2486bool IsImmUpdate =2487isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);2488Ops.push_back(IsImmUpdate ? Reg0 : Inc);2489}24902491SDValue SuperReg;2492SDValue V0 = N->getOperand(Vec0Idx + 0);2493SDValue V1 = N->getOperand(Vec0Idx + 1);2494if (NumVecs == 2) {2495if (is64BitVector)2496SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);2497else2498SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);2499} else {2500SDValue V2 = N->getOperand(Vec0Idx + 2);2501SDValue V3 = (NumVecs == 3)2502? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)2503: N->getOperand(Vec0Idx + 3);2504if (is64BitVector)2505SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);2506else2507SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);2508}2509Ops.push_back(SuperReg);2510Ops.push_back(getI32Imm(Lane, dl));2511Ops.push_back(Pred);2512Ops.push_back(Reg0);2513Ops.push_back(Chain);25142515unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :2516QOpcodes[OpcodeIndex]);2517SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);2518CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});2519if (!IsLoad) {2520ReplaceNode(N, VLdLn);2521return;2522}25232524// Extract the subregisters.2525SuperReg = SDValue(VLdLn, 0);2526static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&2527ARM::qsub_3 == ARM::qsub_0 + 3,2528"Unexpected subreg numbering");2529unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;2530for (unsigned Vec = 0; Vec < NumVecs; ++Vec)2531ReplaceUses(SDValue(N, Vec),2532CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));2533ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));2534if (isUpdating)2535ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));2536CurDAG->RemoveDeadNode(N);2537}25382539template <typename SDValueVector>2540void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,2541SDValue PredicateMask) {2542Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));2543Ops.push_back(PredicateMask);2544Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg2545}25462547template <typename SDValueVector>2548void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,2549SDValue PredicateMask,2550SDValue Inactive) {2551Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));2552Ops.push_back(PredicateMask);2553Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg2554Ops.push_back(Inactive);2555}25562557template <typename SDValueVector>2558void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {2559Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));2560Ops.push_back(CurDAG->getRegister(0, MVT::i32));2561Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg2562}25632564template <typename SDValueVector>2565void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,2566EVT InactiveTy) {2567Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));2568Ops.push_back(CurDAG->getRegister(0, MVT::i32));2569Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg2570Ops.push_back(SDValue(2571CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));2572}25732574void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,2575bool Predicated) {2576SDLoc Loc(N);2577SmallVector<SDValue, 8> Ops;25782579uint16_t Opcode;2580switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {2581case 32:2582Opcode = Opcodes[0];2583break;2584case 64:2585Opcode = Opcodes[1];2586break;2587default:2588llvm_unreachable("bad vector element size in SelectMVE_WB");2589}25902591Ops.push_back(N->getOperand(2)); // vector of base addresses25922593int32_t ImmValue = N->getConstantOperandVal(3);2594Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset25952596if (Predicated)2597AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));2598else2599AddEmptyMVEPredicateToOps(Ops, Loc);26002601Ops.push_back(N->getOperand(0)); // chain26022603SmallVector<EVT, 8> VTs;2604VTs.push_back(N->getValueType(1));2605VTs.push_back(N->getValueType(0));2606VTs.push_back(N->getValueType(2));26072608SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);2609ReplaceUses(SDValue(N, 0), SDValue(New, 1));2610ReplaceUses(SDValue(N, 1), SDValue(New, 0));2611ReplaceUses(SDValue(N, 2), SDValue(New, 2));2612transferMemOperands(N, New);2613CurDAG->RemoveDeadNode(N);2614}26152616void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,2617bool Immediate,2618bool HasSaturationOperand) {2619SDLoc Loc(N);2620SmallVector<SDValue, 8> Ops;26212622// Two 32-bit halves of the value to be shifted2623Ops.push_back(N->getOperand(1));2624Ops.push_back(N->getOperand(2));26252626// The shift count2627if (Immediate) {2628int32_t ImmValue = N->getConstantOperandVal(3);2629Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count2630} else {2631Ops.push_back(N->getOperand(3));2632}26332634// The immediate saturation operand, if any2635if (HasSaturationOperand) {2636int32_t SatOp = N->getConstantOperandVal(4);2637int SatBit = (SatOp == 64 ? 0 : 1);2638Ops.push_back(getI32Imm(SatBit, Loc));2639}26402641// MVE scalar shifts are IT-predicable, so include the standard2642// predicate arguments.2643Ops.push_back(getAL(CurDAG, Loc));2644Ops.push_back(CurDAG->getRegister(0, MVT::i32));26452646CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));2647}26482649void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,2650uint16_t OpcodeWithNoCarry,2651bool Add, bool Predicated) {2652SDLoc Loc(N);2653SmallVector<SDValue, 8> Ops;2654uint16_t Opcode;26552656unsigned FirstInputOp = Predicated ? 2 : 1;26572658// Two input vectors and the input carry flag2659Ops.push_back(N->getOperand(FirstInputOp));2660Ops.push_back(N->getOperand(FirstInputOp + 1));2661SDValue CarryIn = N->getOperand(FirstInputOp + 2);2662ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);2663uint32_t CarryMask = 1 << 29;2664uint32_t CarryExpected = Add ? 0 : CarryMask;2665if (CarryInConstant &&2666(CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {2667Opcode = OpcodeWithNoCarry;2668} else {2669Ops.push_back(CarryIn);2670Opcode = OpcodeWithCarry;2671}26722673if (Predicated)2674AddMVEPredicateToOps(Ops, Loc,2675N->getOperand(FirstInputOp + 3), // predicate2676N->getOperand(FirstInputOp - 1)); // inactive2677else2678AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));26792680CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));2681}26822683void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {2684SDLoc Loc(N);2685SmallVector<SDValue, 8> Ops;26862687// One vector input, followed by a 32-bit word of bits to shift in2688// and then an immediate shift count2689Ops.push_back(N->getOperand(1));2690Ops.push_back(N->getOperand(2));2691int32_t ImmValue = N->getConstantOperandVal(3);2692Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count26932694if (Predicated)2695AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));2696else2697AddEmptyMVEPredicateToOps(Ops, Loc);26982699CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));2700}27012702static bool SDValueToConstBool(SDValue SDVal) {2703assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");2704ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);2705uint64_t Value = SDValConstant->getZExtValue();2706assert((Value == 0 || Value == 1) && "expected value 0 or 1");2707return Value;2708}27092710void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,2711const uint16_t *OpcodesS,2712const uint16_t *OpcodesU,2713size_t Stride, size_t TySize) {2714assert(TySize < Stride && "Invalid TySize");2715bool IsUnsigned = SDValueToConstBool(N->getOperand(1));2716bool IsSub = SDValueToConstBool(N->getOperand(2));2717bool IsExchange = SDValueToConstBool(N->getOperand(3));2718if (IsUnsigned) {2719assert(!IsSub &&2720"Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");2721assert(!IsExchange &&2722"Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");2723}27242725auto OpIsZero = [N](size_t OpNo) {2726return isNullConstant(N->getOperand(OpNo));2727};27282729// If the input accumulator value is not zero, select an instruction with2730// accumulator, otherwise select an instruction without accumulator2731bool IsAccum = !(OpIsZero(4) && OpIsZero(5));27322733const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;2734if (IsSub)2735Opcodes += 4 * Stride;2736if (IsExchange)2737Opcodes += 2 * Stride;2738if (IsAccum)2739Opcodes += Stride;2740uint16_t Opcode = Opcodes[TySize];27412742SDLoc Loc(N);2743SmallVector<SDValue, 8> Ops;2744// Push the accumulator operands, if they are used2745if (IsAccum) {2746Ops.push_back(N->getOperand(4));2747Ops.push_back(N->getOperand(5));2748}2749// Push the two vector operands2750Ops.push_back(N->getOperand(6));2751Ops.push_back(N->getOperand(7));27522753if (Predicated)2754AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));2755else2756AddEmptyMVEPredicateToOps(Ops, Loc);27572758CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));2759}27602761void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,2762const uint16_t *OpcodesS,2763const uint16_t *OpcodesU) {2764EVT VecTy = N->getOperand(6).getValueType();2765size_t SizeIndex;2766switch (VecTy.getVectorElementType().getSizeInBits()) {2767case 16:2768SizeIndex = 0;2769break;2770case 32:2771SizeIndex = 1;2772break;2773default:2774llvm_unreachable("bad vector element size");2775}27762777SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);2778}27792780void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,2781const uint16_t *OpcodesS,2782const uint16_t *OpcodesU) {2783assert(2784N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==278532 &&2786"bad vector element size");2787SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);2788}27892790void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,2791const uint16_t *const *Opcodes,2792bool HasWriteback) {2793EVT VT = N->getValueType(0);2794SDLoc Loc(N);27952796const uint16_t *OurOpcodes;2797switch (VT.getVectorElementType().getSizeInBits()) {2798case 8:2799OurOpcodes = Opcodes[0];2800break;2801case 16:2802OurOpcodes = Opcodes[1];2803break;2804case 32:2805OurOpcodes = Opcodes[2];2806break;2807default:2808llvm_unreachable("bad vector element size in SelectMVE_VLD");2809}28102811EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);2812SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};2813unsigned PtrOperand = HasWriteback ? 1 : 2;28142815auto Data = SDValue(2816CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);2817SDValue Chain = N->getOperand(0);2818// Add a MVE_VLDn instruction for each Vec, except the last2819for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {2820SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};2821auto LoadInst =2822CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);2823Data = SDValue(LoadInst, 0);2824Chain = SDValue(LoadInst, 1);2825transferMemOperands(N, LoadInst);2826}2827// The last may need a writeback on it2828if (HasWriteback)2829ResultTys = {DataTy, MVT::i32, MVT::Other};2830SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};2831auto LoadInst =2832CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);2833transferMemOperands(N, LoadInst);28342835unsigned i;2836for (i = 0; i < NumVecs; i++)2837ReplaceUses(SDValue(N, i),2838CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,2839SDValue(LoadInst, 0)));2840if (HasWriteback)2841ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));2842ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));2843CurDAG->RemoveDeadNode(N);2844}28452846void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,2847bool Wrapping, bool Predicated) {2848EVT VT = N->getValueType(0);2849SDLoc Loc(N);28502851uint16_t Opcode;2852switch (VT.getScalarSizeInBits()) {2853case 8:2854Opcode = Opcodes[0];2855break;2856case 16:2857Opcode = Opcodes[1];2858break;2859case 32:2860Opcode = Opcodes[2];2861break;2862default:2863llvm_unreachable("bad vector element size in SelectMVE_VxDUP");2864}28652866SmallVector<SDValue, 8> Ops;2867unsigned OpIdx = 1;28682869SDValue Inactive;2870if (Predicated)2871Inactive = N->getOperand(OpIdx++);28722873Ops.push_back(N->getOperand(OpIdx++)); // base2874if (Wrapping)2875Ops.push_back(N->getOperand(OpIdx++)); // limit28762877SDValue ImmOp = N->getOperand(OpIdx++); // step2878int ImmValue = ImmOp->getAsZExtVal();2879Ops.push_back(getI32Imm(ImmValue, Loc));28802881if (Predicated)2882AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);2883else2884AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));28852886CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));2887}28882889void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,2890size_t NumExtraOps, bool HasAccum) {2891bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();2892SDLoc Loc(N);2893SmallVector<SDValue, 8> Ops;28942895unsigned OpIdx = 1;28962897// Convert and append the immediate operand designating the coprocessor.2898SDValue ImmCorpoc = N->getOperand(OpIdx++);2899uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();2900Ops.push_back(getI32Imm(ImmCoprocVal, Loc));29012902// For accumulating variants copy the low and high order parts of the2903// accumulator into a register pair and add it to the operand vector.2904if (HasAccum) {2905SDValue AccLo = N->getOperand(OpIdx++);2906SDValue AccHi = N->getOperand(OpIdx++);2907if (IsBigEndian)2908std::swap(AccLo, AccHi);2909Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));2910}29112912// Copy extra operands as-is.2913for (size_t I = 0; I < NumExtraOps; I++)2914Ops.push_back(N->getOperand(OpIdx++));29152916// Convert and append the immediate operand2917SDValue Imm = N->getOperand(OpIdx);2918uint32_t ImmVal = Imm->getAsZExtVal();2919Ops.push_back(getI32Imm(ImmVal, Loc));29202921// Accumulating variants are IT-predicable, add predicate operands.2922if (HasAccum) {2923SDValue Pred = getAL(CurDAG, Loc);2924SDValue PredReg = CurDAG->getRegister(0, MVT::i32);2925Ops.push_back(Pred);2926Ops.push_back(PredReg);2927}29282929// Create the CDE intruction2930SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);2931SDValue ResultPair = SDValue(InstrNode, 0);29322933// The original intrinsic had two outputs, and the output of the dual-register2934// CDE instruction is a register pair. We need to extract the two subregisters2935// and replace all uses of the original outputs with the extracted2936// subregisters.2937uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};2938if (IsBigEndian)2939std::swap(SubRegs[0], SubRegs[1]);29402941for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {2942if (SDValue(N, ResIdx).use_empty())2943continue;2944SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,2945MVT::i32, ResultPair);2946ReplaceUses(SDValue(N, ResIdx), SubReg);2947}29482949CurDAG->RemoveDeadNode(N);2950}29512952void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,2953bool isUpdating, unsigned NumVecs,2954const uint16_t *DOpcodes,2955const uint16_t *QOpcodes0,2956const uint16_t *QOpcodes1) {2957assert(Subtarget->hasNEON());2958assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");2959SDLoc dl(N);29602961SDValue MemAddr, Align;2962unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;2963if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))2964return;29652966SDValue Chain = N->getOperand(0);2967EVT VT = N->getValueType(0);2968bool is64BitVector = VT.is64BitVector();29692970unsigned Alignment = 0;2971if (NumVecs != 3) {2972Alignment = Align->getAsZExtVal();2973unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;2974if (Alignment > NumBytes)2975Alignment = NumBytes;2976if (Alignment < 8 && Alignment < NumBytes)2977Alignment = 0;2978// Alignment must be a power of two; make sure of that.2979Alignment = (Alignment & -Alignment);2980if (Alignment == 1)2981Alignment = 0;2982}2983Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);29842985unsigned OpcodeIndex;2986switch (VT.getSimpleVT().SimpleTy) {2987default: llvm_unreachable("unhandled vld-dup type");2988case MVT::v8i8:2989case MVT::v16i8: OpcodeIndex = 0; break;2990case MVT::v4i16:2991case MVT::v8i16:2992case MVT::v4f16:2993case MVT::v8f16:2994case MVT::v4bf16:2995case MVT::v8bf16:2996OpcodeIndex = 1; break;2997case MVT::v2f32:2998case MVT::v2i32:2999case MVT::v4f32:3000case MVT::v4i32: OpcodeIndex = 2; break;3001case MVT::v1f64:3002case MVT::v1i64: OpcodeIndex = 3; break;3003}30043005unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;3006if (!is64BitVector)3007ResTyElts *= 2;3008EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);30093010std::vector<EVT> ResTys;3011ResTys.push_back(ResTy);3012if (isUpdating)3013ResTys.push_back(MVT::i32);3014ResTys.push_back(MVT::Other);30153016SDValue Pred = getAL(CurDAG, dl);3017SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);30183019SmallVector<SDValue, 6> Ops;3020Ops.push_back(MemAddr);3021Ops.push_back(Align);3022unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex]3023: (NumVecs == 1) ? QOpcodes0[OpcodeIndex]3024: QOpcodes1[OpcodeIndex];3025if (isUpdating) {3026SDValue Inc = N->getOperand(2);3027bool IsImmUpdate =3028isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);3029if (IsImmUpdate) {3030if (!isVLDfixed(Opc))3031Ops.push_back(Reg0);3032} else {3033if (isVLDfixed(Opc))3034Opc = getVLDSTRegisterUpdateOpcode(Opc);3035Ops.push_back(Inc);3036}3037}3038if (is64BitVector || NumVecs == 1) {3039// Double registers and VLD1 quad registers are directly supported.3040} else {3041SDValue ImplDef = SDValue(3042CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);3043const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};3044SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,3045MVT::Other, OpsA);3046Ops.push_back(SDValue(VLdA, 0));3047Chain = SDValue(VLdA, 1);3048}30493050Ops.push_back(Pred);3051Ops.push_back(Reg0);3052Ops.push_back(Chain);30533054SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);30553056// Transfer memoperands.3057MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();3058CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});30593060// Extract the subregisters.3061if (NumVecs == 1) {3062ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));3063} else {3064SDValue SuperReg = SDValue(VLdDup, 0);3065static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");3066unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;3067for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {3068ReplaceUses(SDValue(N, Vec),3069CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));3070}3071}3072ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));3073if (isUpdating)3074ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));3075CurDAG->RemoveDeadNode(N);3076}30773078bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {3079if (!Subtarget->hasMVEIntegerOps())3080return false;30813082SDLoc dl(N);30833084// We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and3085// extracts of v8f16 and v8i16 vectors. Check that we have two adjacent3086// inserts of the correct type:3087SDValue Ins1 = SDValue(N, 0);3088SDValue Ins2 = N->getOperand(0);3089EVT VT = Ins1.getValueType();3090if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||3091!isa<ConstantSDNode>(Ins1.getOperand(2)) ||3092!isa<ConstantSDNode>(Ins2.getOperand(2)) ||3093(VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))3094return false;30953096unsigned Lane1 = Ins1.getConstantOperandVal(2);3097unsigned Lane2 = Ins2.getConstantOperandVal(2);3098if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)3099return false;31003101// If the inserted values will be able to use T/B already, leave it to the3102// existing tablegen patterns. For example VCVTT/VCVTB.3103SDValue Val1 = Ins1.getOperand(1);3104SDValue Val2 = Ins2.getOperand(1);3105if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)3106return false;31073108// Check if the inserted values are both extracts.3109if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||3110Val1.getOpcode() == ARMISD::VGETLANEu) &&3111(Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||3112Val2.getOpcode() == ARMISD::VGETLANEu) &&3113isa<ConstantSDNode>(Val1.getOperand(1)) &&3114isa<ConstantSDNode>(Val2.getOperand(1)) &&3115(Val1.getOperand(0).getValueType() == MVT::v8f16 ||3116Val1.getOperand(0).getValueType() == MVT::v8i16) &&3117(Val2.getOperand(0).getValueType() == MVT::v8f16 ||3118Val2.getOperand(0).getValueType() == MVT::v8i16)) {3119unsigned ExtractLane1 = Val1.getConstantOperandVal(1);3120unsigned ExtractLane2 = Val2.getConstantOperandVal(1);31213122// If the two extracted lanes are from the same place and adjacent, this3123// simplifies into a f32 lane move.3124if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&3125ExtractLane1 == ExtractLane2 + 1) {3126SDValue NewExt = CurDAG->getTargetExtractSubreg(3127ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));3128SDValue NewIns = CurDAG->getTargetInsertSubreg(3129ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),3130NewExt);3131ReplaceUses(Ins1, NewIns);3132return true;3133}31343135// Else v8i16 pattern of an extract and an insert, with a optional vmovx for3136// extracting odd lanes.3137if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {3138SDValue Inp1 = CurDAG->getTargetExtractSubreg(3139ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));3140SDValue Inp2 = CurDAG->getTargetExtractSubreg(3141ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));3142if (ExtractLane1 % 2 != 0)3143Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);3144if (ExtractLane2 % 2 != 0)3145Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);3146SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);3147SDValue NewIns =3148CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,3149Ins2.getOperand(0), SDValue(VINS, 0));3150ReplaceUses(Ins1, NewIns);3151return true;3152}3153}31543155// The inserted values are not extracted - if they are f16 then insert them3156// directly using a VINS.3157if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {3158SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);3159SDValue NewIns =3160CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,3161Ins2.getOperand(0), SDValue(VINS, 0));3162ReplaceUses(Ins1, NewIns);3163return true;3164}31653166return false;3167}31683169bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,3170SDNode *FMul,3171bool IsUnsigned,3172bool FixedToFloat) {3173auto Type = N->getValueType(0);3174unsigned ScalarBits = Type.getScalarSizeInBits();3175if (ScalarBits > 32)3176return false;31773178SDNodeFlags FMulFlags = FMul->getFlags();3179// The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is3180// allowed in 16 bit unsigned floats3181if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)3182return false;31833184SDValue ImmNode = FMul->getOperand(1);3185SDValue VecVal = FMul->getOperand(0);3186if (VecVal->getOpcode() == ISD::UINT_TO_FP ||3187VecVal->getOpcode() == ISD::SINT_TO_FP)3188VecVal = VecVal->getOperand(0);31893190if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)3191return false;31923193if (ImmNode.getOpcode() == ISD::BITCAST) {3194if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)3195return false;3196ImmNode = ImmNode.getOperand(0);3197}31983199if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)3200return false;32013202APFloat ImmAPF(0.0f);3203switch (ImmNode.getOpcode()) {3204case ARMISD::VMOVIMM:3205case ARMISD::VDUP: {3206if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))3207return false;3208unsigned Imm = ImmNode.getConstantOperandVal(0);3209if (ImmNode.getOpcode() == ARMISD::VMOVIMM)3210Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);3211ImmAPF =3212APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),3213APInt(ScalarBits, Imm));3214break;3215}3216case ARMISD::VMOVFPIMM: {3217ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0)));3218break;3219}3220default:3221return false;3222}32233224// Where n is the number of fractional bits, multiplying by 2^n will convert3225// from float to fixed and multiplying by 2^-n will convert from fixed to3226// float. Taking log2 of the factor (after taking the inverse in the case of3227// float to fixed) will give n.3228APFloat ToConvert = ImmAPF;3229if (FixedToFloat) {3230if (!ImmAPF.getExactInverse(&ToConvert))3231return false;3232}3233APSInt Converted(64, false);3234bool IsExact;3235ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven,3236&IsExact);3237if (!IsExact || !Converted.isPowerOf2())3238return false;32393240unsigned FracBits = Converted.logBase2();3241if (FracBits > ScalarBits)3242return false;32433244SmallVector<SDValue, 3> Ops{3245VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};3246AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);32473248unsigned int Opcode;3249switch (ScalarBits) {3250case 16:3251if (FixedToFloat)3252Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;3253else3254Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;3255break;3256case 32:3257if (FixedToFloat)3258Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;3259else3260Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;3261break;3262default:3263llvm_unreachable("unexpected number of scalar bits");3264break;3265}32663267ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));3268return true;3269}32703271bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {3272// Transform a floating-point to fixed-point conversion to a VCVT3273if (!Subtarget->hasMVEFloatOps())3274return false;3275EVT Type = N->getValueType(0);3276if (!Type.isVector())3277return false;3278unsigned int ScalarBits = Type.getScalarSizeInBits();32793280bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||3281N->getOpcode() == ISD::FP_TO_UINT_SAT;3282SDNode *Node = N->getOperand(0).getNode();32833284// floating-point to fixed-point with one fractional bit gets turned into an3285// FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))3286if (Node->getOpcode() == ISD::FADD) {3287if (Node->getOperand(0) != Node->getOperand(1))3288return false;3289SDNodeFlags Flags = Node->getFlags();3290// The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is3291// allowed in 16 bit unsigned floats3292if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)3293return false;32943295unsigned Opcode;3296switch (ScalarBits) {3297case 16:3298Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;3299break;3300case 32:3301Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;3302break;3303}3304SmallVector<SDValue, 3> Ops{Node->getOperand(0),3305CurDAG->getConstant(1, dl, MVT::i32)};3306AddEmptyMVEPredicateToOps(Ops, dl, Type);33073308ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));3309return true;3310}33113312if (Node->getOpcode() != ISD::FMUL)3313return false;33143315return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);3316}33173318bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {3319// Transform a fixed-point to floating-point conversion to a VCVT3320if (!Subtarget->hasMVEFloatOps())3321return false;3322auto Type = N->getValueType(0);3323if (!Type.isVector())3324return false;33253326auto LHS = N->getOperand(0);3327if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)3328return false;33293330return transformFixedFloatingPointConversion(3331N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);3332}33333334bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {3335if (!Subtarget->hasV6T2Ops())3336return false;33373338unsigned Opc = isSigned3339? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)3340: (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);3341SDLoc dl(N);33423343// For unsigned extracts, check for a shift right and mask3344unsigned And_imm = 0;3345if (N->getOpcode() == ISD::AND) {3346if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {33473348// The immediate is a mask of the low bits iff imm & (imm+1) == 03349if (And_imm & (And_imm + 1))3350return false;33513352unsigned Srl_imm = 0;3353if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,3354Srl_imm)) {3355assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");33563357// Mask off the unnecessary bits of the AND immediate; normally3358// DAGCombine will do this, but that might not happen if3359// targetShrinkDemandedConstant chooses a different immediate.3360And_imm &= -1U >> Srl_imm;33613362// Note: The width operand is encoded as width-1.3363unsigned Width = llvm::countr_one(And_imm) - 1;3364unsigned LSB = Srl_imm;33653366SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);33673368if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {3369// It's cheaper to use a right shift to extract the top bits.3370if (Subtarget->isThumb()) {3371Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;3372SDValue Ops[] = { N->getOperand(0).getOperand(0),3373CurDAG->getTargetConstant(LSB, dl, MVT::i32),3374getAL(CurDAG, dl), Reg0, Reg0 };3375CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);3376return true;3377}33783379// ARM models shift instructions as MOVsi with shifter operand.3380ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);3381SDValue ShOpc =3382CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,3383MVT::i32);3384SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,3385getAL(CurDAG, dl), Reg0, Reg0 };3386CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);3387return true;3388}33893390assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");3391SDValue Ops[] = { N->getOperand(0).getOperand(0),3392CurDAG->getTargetConstant(LSB, dl, MVT::i32),3393CurDAG->getTargetConstant(Width, dl, MVT::i32),3394getAL(CurDAG, dl), Reg0 };3395CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);3396return true;3397}3398}3399return false;3400}34013402// Otherwise, we're looking for a shift of a shift3403unsigned Shl_imm = 0;3404if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {3405assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");3406unsigned Srl_imm = 0;3407if (isInt32Immediate(N->getOperand(1), Srl_imm)) {3408assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");3409// Note: The width operand is encoded as width-1.3410unsigned Width = 32 - Srl_imm - 1;3411int LSB = Srl_imm - Shl_imm;3412if (LSB < 0)3413return false;3414SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);3415assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");3416SDValue Ops[] = { N->getOperand(0).getOperand(0),3417CurDAG->getTargetConstant(LSB, dl, MVT::i32),3418CurDAG->getTargetConstant(Width, dl, MVT::i32),3419getAL(CurDAG, dl), Reg0 };3420CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);3421return true;3422}3423}34243425// Or we are looking for a shift of an and, with a mask operand3426if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&3427isShiftedMask_32(And_imm)) {3428unsigned Srl_imm = 0;3429unsigned LSB = llvm::countr_zero(And_imm);3430// Shift must be the same as the ands lsb3431if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {3432assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");3433unsigned MSB = llvm::Log2_32(And_imm);3434// Note: The width operand is encoded as width-1.3435unsigned Width = MSB - LSB;3436SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);3437assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");3438SDValue Ops[] = { N->getOperand(0).getOperand(0),3439CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),3440CurDAG->getTargetConstant(Width, dl, MVT::i32),3441getAL(CurDAG, dl), Reg0 };3442CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);3443return true;3444}3445}34463447if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {3448unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();3449unsigned LSB = 0;3450if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&3451!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))3452return false;34533454if (LSB + Width > 32)3455return false;34563457SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);3458assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");3459SDValue Ops[] = { N->getOperand(0).getOperand(0),3460CurDAG->getTargetConstant(LSB, dl, MVT::i32),3461CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),3462getAL(CurDAG, dl), Reg0 };3463CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);3464return true;3465}34663467return false;3468}34693470/// Target-specific DAG combining for ISD::SUB.3471/// Target-independent combining lowers SELECT_CC nodes of the form3472/// select_cc setg[ge] X, 0, X, -X3473/// select_cc setgt X, -1, X, -X3474/// select_cc setl[te] X, 0, -X, X3475/// select_cc setlt X, 1, -X, X3476/// which represent Integer ABS into:3477/// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)3478/// ARM instruction selection detects the latter and matches it to3479/// ARM::ABS or ARM::t2ABS machine node.3480bool ARMDAGToDAGISel::tryABSOp(SDNode *N){3481SDValue SUBSrc0 = N->getOperand(0);3482SDValue SUBSrc1 = N->getOperand(1);3483EVT VT = N->getValueType(0);34843485if (Subtarget->isThumb1Only())3486return false;34873488if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)3489return false;34903491SDValue XORSrc0 = SUBSrc0.getOperand(0);3492SDValue XORSrc1 = SUBSrc0.getOperand(1);3493SDValue SRASrc0 = SUBSrc1.getOperand(0);3494SDValue SRASrc1 = SUBSrc1.getOperand(1);3495ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1);3496EVT XType = SRASrc0.getValueType();3497unsigned Size = XType.getSizeInBits() - 1;34983499if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&3500SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {3501unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;3502CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);3503return true;3504}35053506return false;3507}35083509/// We've got special pseudo-instructions for these3510void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {3511unsigned Opcode;3512EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();3513if (MemTy == MVT::i8)3514Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;3515else if (MemTy == MVT::i16)3516Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;3517else if (MemTy == MVT::i32)3518Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;3519else3520llvm_unreachable("Unknown AtomicCmpSwap type");35213522SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),3523N->getOperand(0)};3524SDNode *CmpSwap = CurDAG->getMachineNode(3525Opcode, SDLoc(N),3526CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);35273528MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();3529CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});35303531ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));3532ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));3533CurDAG->RemoveDeadNode(N);3534}35353536static std::optional<std::pair<unsigned, unsigned>>3537getContiguousRangeOfSetBits(const APInt &A) {3538unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;3539unsigned LastOne = A.countr_zero();3540if (A.popcount() != (FirstOne - LastOne + 1))3541return std::nullopt;3542return std::make_pair(FirstOne, LastOne);3543}35443545void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {3546assert(N->getOpcode() == ARMISD::CMPZ);3547SwitchEQNEToPLMI = false;35483549if (!Subtarget->isThumb())3550// FIXME: Work out whether it is profitable to do this in A32 mode - LSL and3551// LSR don't exist as standalone instructions - they need the barrel shifter.3552return;35533554// select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))3555SDValue And = N->getOperand(0);3556if (!And->hasOneUse())3557return;35583559SDValue Zero = N->getOperand(1);3560if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND)3561return;3562SDValue X = And.getOperand(0);3563auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));35643565if (!C)3566return;3567auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());3568if (!Range)3569return;35703571// There are several ways to lower this:3572SDNode *NewN;3573SDLoc dl(N);35743575auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {3576if (Subtarget->isThumb2()) {3577Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;3578SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),3579getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),3580CurDAG->getRegister(0, MVT::i32) };3581return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);3582} else {3583SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,3584CurDAG->getTargetConstant(Imm, dl, MVT::i32),3585getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};3586return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);3587}3588};35893590if (Range->second == 0) {3591// 1. Mask includes the LSB -> Simply shift the top N bits off3592NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);3593ReplaceNode(And.getNode(), NewN);3594} else if (Range->first == 31) {3595// 2. Mask includes the MSB -> Simply shift the bottom N bits off3596NewN = EmitShift(ARM::tLSRri, X, Range->second);3597ReplaceNode(And.getNode(), NewN);3598} else if (Range->first == Range->second) {3599// 3. Only one bit is set. We can shift this into the sign bit and use a3600// PL/MI comparison.3601NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);3602ReplaceNode(And.getNode(), NewN);36033604SwitchEQNEToPLMI = true;3605} else if (!Subtarget->hasV6T2Ops()) {3606// 4. Do a double shift to clear bottom and top bits, but only in3607// thumb-1 mode as in thumb-2 we can use UBFX.3608NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);3609NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),3610Range->second + (31 - Range->first));3611ReplaceNode(And.getNode(), NewN);3612}3613}36143615static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],3616unsigned Opc128[3]) {3617assert((VT.is64BitVector() || VT.is128BitVector()) &&3618"Unexpected vector shuffle length");3619switch (VT.getScalarSizeInBits()) {3620default:3621llvm_unreachable("Unexpected vector shuffle element size");3622case 8:3623return VT.is64BitVector() ? Opc64[0] : Opc128[0];3624case 16:3625return VT.is64BitVector() ? Opc64[1] : Opc128[1];3626case 32:3627return VT.is64BitVector() ? Opc64[2] : Opc128[2];3628}3629}36303631void ARMDAGToDAGISel::Select(SDNode *N) {3632SDLoc dl(N);36333634if (N->isMachineOpcode()) {3635N->setNodeId(-1);3636return; // Already selected.3637}36383639switch (N->getOpcode()) {3640default: break;3641case ISD::STORE: {3642// For Thumb1, match an sp-relative store in C++. This is a little3643// unfortunate, but I don't think I can make the chain check work3644// otherwise. (The chain of the store has to be the same as the chain3645// of the CopyFromReg, or else we can't replace the CopyFromReg with3646// a direct reference to "SP".)3647//3648// This is only necessary on Thumb1 because Thumb1 sp-relative stores use3649// a different addressing mode from other four-byte stores.3650//3651// This pattern usually comes up with call arguments.3652StoreSDNode *ST = cast<StoreSDNode>(N);3653SDValue Ptr = ST->getBasePtr();3654if (Subtarget->isThumb1Only() && ST->isUnindexed()) {3655int RHSC = 0;3656if (Ptr.getOpcode() == ISD::ADD &&3657isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))3658Ptr = Ptr.getOperand(0);36593660if (Ptr.getOpcode() == ISD::CopyFromReg &&3661cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&3662Ptr.getOperand(0) == ST->getChain()) {3663SDValue Ops[] = {ST->getValue(),3664CurDAG->getRegister(ARM::SP, MVT::i32),3665CurDAG->getTargetConstant(RHSC, dl, MVT::i32),3666getAL(CurDAG, dl),3667CurDAG->getRegister(0, MVT::i32),3668ST->getChain()};3669MachineSDNode *ResNode =3670CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);3671MachineMemOperand *MemOp = ST->getMemOperand();3672CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});3673ReplaceNode(N, ResNode);3674return;3675}3676}3677break;3678}3679case ISD::WRITE_REGISTER:3680if (tryWriteRegister(N))3681return;3682break;3683case ISD::READ_REGISTER:3684if (tryReadRegister(N))3685return;3686break;3687case ISD::INLINEASM:3688case ISD::INLINEASM_BR:3689if (tryInlineAsm(N))3690return;3691break;3692case ISD::SUB:3693// Select special operations if SUB node forms integer ABS pattern3694if (tryABSOp(N))3695return;3696// Other cases are autogenerated.3697break;3698case ISD::Constant: {3699unsigned Val = N->getAsZExtVal();3700// If we can't materialize the constant we need to use a literal pool3701if (ConstantMaterializationCost(Val, Subtarget) > 2 &&3702!Subtarget->genExecuteOnly()) {3703SDValue CPIdx = CurDAG->getTargetConstantPool(3704ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),3705TLI->getPointerTy(CurDAG->getDataLayout()));37063707SDNode *ResNode;3708if (Subtarget->isThumb()) {3709SDValue Ops[] = {3710CPIdx,3711getAL(CurDAG, dl),3712CurDAG->getRegister(0, MVT::i32),3713CurDAG->getEntryNode()3714};3715ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,3716Ops);3717} else {3718SDValue Ops[] = {3719CPIdx,3720CurDAG->getTargetConstant(0, dl, MVT::i32),3721getAL(CurDAG, dl),3722CurDAG->getRegister(0, MVT::i32),3723CurDAG->getEntryNode()3724};3725ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,3726Ops);3727}3728// Annotate the Node with memory operand information so that MachineInstr3729// queries work properly. This e.g. gives the register allocation the3730// required information for rematerialization.3731MachineFunction& MF = CurDAG->getMachineFunction();3732MachineMemOperand *MemOp =3733MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),3734MachineMemOperand::MOLoad, 4, Align(4));37353736CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});37373738ReplaceNode(N, ResNode);3739return;3740}37413742// Other cases are autogenerated.3743break;3744}3745case ISD::FrameIndex: {3746// Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.3747int FI = cast<FrameIndexSDNode>(N)->getIndex();3748SDValue TFI = CurDAG->getTargetFrameIndex(3749FI, TLI->getPointerTy(CurDAG->getDataLayout()));3750if (Subtarget->isThumb1Only()) {3751// Set the alignment of the frame object to 4, to avoid having to generate3752// more than one ADD3753MachineFrameInfo &MFI = MF->getFrameInfo();3754if (MFI.getObjectAlign(FI) < Align(4))3755MFI.setObjectAlignment(FI, Align(4));3756CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,3757CurDAG->getTargetConstant(0, dl, MVT::i32));3758return;3759} else {3760unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?3761ARM::t2ADDri : ARM::ADDri);3762SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),3763getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),3764CurDAG->getRegister(0, MVT::i32) };3765CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);3766return;3767}3768}3769case ISD::INSERT_VECTOR_ELT: {3770if (tryInsertVectorElt(N))3771return;3772break;3773}3774case ISD::SRL:3775if (tryV6T2BitfieldExtractOp(N, false))3776return;3777break;3778case ISD::SIGN_EXTEND_INREG:3779case ISD::SRA:3780if (tryV6T2BitfieldExtractOp(N, true))3781return;3782break;3783case ISD::FP_TO_UINT:3784case ISD::FP_TO_SINT:3785case ISD::FP_TO_UINT_SAT:3786case ISD::FP_TO_SINT_SAT:3787if (tryFP_TO_INT(N, dl))3788return;3789break;3790case ISD::FMUL:3791if (tryFMULFixed(N, dl))3792return;3793break;3794case ISD::MUL:3795if (Subtarget->isThumb1Only())3796break;3797if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {3798unsigned RHSV = C->getZExtValue();3799if (!RHSV) break;3800if (isPowerOf2_32(RHSV-1)) { // 2^n+1?3801unsigned ShImm = Log2_32(RHSV-1);3802if (ShImm >= 32)3803break;3804SDValue V = N->getOperand(0);3805ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);3806SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);3807SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);3808if (Subtarget->isThumb()) {3809SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };3810CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);3811return;3812} else {3813SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,3814Reg0 };3815CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);3816return;3817}3818}3819if (isPowerOf2_32(RHSV+1)) { // 2^n-1?3820unsigned ShImm = Log2_32(RHSV+1);3821if (ShImm >= 32)3822break;3823SDValue V = N->getOperand(0);3824ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);3825SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);3826SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);3827if (Subtarget->isThumb()) {3828SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };3829CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);3830return;3831} else {3832SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,3833Reg0 };3834CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);3835return;3836}3837}3838}3839break;3840case ISD::AND: {3841// Check for unsigned bitfield extract3842if (tryV6T2BitfieldExtractOp(N, false))3843return;38443845// If an immediate is used in an AND node, it is possible that the immediate3846// can be more optimally materialized when negated. If this is the case we3847// can negate the immediate and use a BIC instead.3848auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));3849if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {3850uint32_t Imm = (uint32_t) N1C->getZExtValue();38513852// In Thumb2 mode, an AND can take a 12-bit immediate. If this3853// immediate can be negated and fit in the immediate operand of3854// a t2BIC, don't do any manual transform here as this can be3855// handled by the generic ISel machinery.3856bool PreferImmediateEncoding =3857Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));3858if (!PreferImmediateEncoding &&3859ConstantMaterializationCost(Imm, Subtarget) >3860ConstantMaterializationCost(~Imm, Subtarget)) {3861// The current immediate costs more to materialize than a negated3862// immediate, so negate the immediate and use a BIC.3863SDValue NewImm =3864CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32);3865// If the new constant didn't exist before, reposition it in the topological3866// ordering so it is just before N. Otherwise, don't touch its location.3867if (NewImm->getNodeId() == -1)3868CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());38693870if (!Subtarget->hasThumb2()) {3871SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),3872N->getOperand(0), NewImm, getAL(CurDAG, dl),3873CurDAG->getRegister(0, MVT::i32)};3874ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));3875return;3876} else {3877SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),3878CurDAG->getRegister(0, MVT::i32),3879CurDAG->getRegister(0, MVT::i32)};3880ReplaceNode(N,3881CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));3882return;3883}3884}3885}38863887// (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits3888// of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits3889// are entirely contributed by c2 and lower 16-bits are entirely contributed3890// by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).3891// Select it to: "movt x, ((c1 & 0xffff) >> 16)3892EVT VT = N->getValueType(0);3893if (VT != MVT::i32)3894break;3895unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())3896? ARM::t2MOVTi163897: (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);3898if (!Opc)3899break;3900SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);3901N1C = dyn_cast<ConstantSDNode>(N1);3902if (!N1C)3903break;3904if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {3905SDValue N2 = N0.getOperand(1);3906ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);3907if (!N2C)3908break;3909unsigned N1CVal = N1C->getZExtValue();3910unsigned N2CVal = N2C->getZExtValue();3911if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&3912(N1CVal & 0xffffU) == 0xffffU &&3913(N2CVal & 0xffffU) == 0x0U) {3914SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,3915dl, MVT::i32);3916SDValue Ops[] = { N0.getOperand(0), Imm16,3917getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };3918ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));3919return;3920}3921}39223923break;3924}3925case ARMISD::UMAAL: {3926unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;3927SDValue Ops[] = { N->getOperand(0), N->getOperand(1),3928N->getOperand(2), N->getOperand(3),3929getAL(CurDAG, dl),3930CurDAG->getRegister(0, MVT::i32) };3931ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));3932return;3933}3934case ARMISD::UMLAL:{3935if (Subtarget->isThumb()) {3936SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),3937N->getOperand(3), getAL(CurDAG, dl),3938CurDAG->getRegister(0, MVT::i32)};3939ReplaceNode(3940N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));3941return;3942}else{3943SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),3944N->getOperand(3), getAL(CurDAG, dl),3945CurDAG->getRegister(0, MVT::i32),3946CurDAG->getRegister(0, MVT::i32) };3947ReplaceNode(N, CurDAG->getMachineNode(3948Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,3949MVT::i32, MVT::i32, Ops));3950return;3951}3952}3953case ARMISD::SMLAL:{3954if (Subtarget->isThumb()) {3955SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),3956N->getOperand(3), getAL(CurDAG, dl),3957CurDAG->getRegister(0, MVT::i32)};3958ReplaceNode(3959N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));3960return;3961}else{3962SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),3963N->getOperand(3), getAL(CurDAG, dl),3964CurDAG->getRegister(0, MVT::i32),3965CurDAG->getRegister(0, MVT::i32) };3966ReplaceNode(N, CurDAG->getMachineNode(3967Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,3968MVT::i32, MVT::i32, Ops));3969return;3970}3971}3972case ARMISD::SUBE: {3973if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())3974break;3975// Look for a pattern to match SMMLS3976// (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))3977if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||3978N->getOperand(2).getOpcode() != ARMISD::SUBC ||3979!SDValue(N, 1).use_empty())3980break;39813982if (Subtarget->isThumb())3983assert(Subtarget->hasThumb2() &&3984"This pattern should not be generated for Thumb");39853986SDValue SmulLoHi = N->getOperand(1);3987SDValue Subc = N->getOperand(2);3988SDValue Zero = Subc.getOperand(0);39893990if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) ||3991N->getOperand(1) != SmulLoHi.getValue(1) ||3992N->getOperand(2) != Subc.getValue(1))3993break;39943995unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;3996SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),3997N->getOperand(0), getAL(CurDAG, dl),3998CurDAG->getRegister(0, MVT::i32) };3999ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));4000return;4001}4002case ISD::LOAD: {4003if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))4004return;4005if (Subtarget->isThumb() && Subtarget->hasThumb2()) {4006if (tryT2IndexedLoad(N))4007return;4008} else if (Subtarget->isThumb()) {4009if (tryT1IndexedLoad(N))4010return;4011} else if (tryARMIndexedLoad(N))4012return;4013// Other cases are autogenerated.4014break;4015}4016case ISD::MLOAD:4017if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))4018return;4019// Other cases are autogenerated.4020break;4021case ARMISD::WLSSETUP: {4022SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,4023N->getOperand(0));4024ReplaceUses(N, New);4025CurDAG->RemoveDeadNode(N);4026return;4027}4028case ARMISD::WLS: {4029SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,4030N->getOperand(1), N->getOperand(2),4031N->getOperand(0));4032ReplaceUses(N, New);4033CurDAG->RemoveDeadNode(N);4034return;4035}4036case ARMISD::LE: {4037SDValue Ops[] = { N->getOperand(1),4038N->getOperand(2),4039N->getOperand(0) };4040unsigned Opc = ARM::t2LoopEnd;4041SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);4042ReplaceUses(N, New);4043CurDAG->RemoveDeadNode(N);4044return;4045}4046case ARMISD::LDRD: {4047if (Subtarget->isThumb2())4048break; // TableGen handles isel in this case.4049SDValue Base, RegOffset, ImmOffset;4050const SDValue &Chain = N->getOperand(0);4051const SDValue &Addr = N->getOperand(1);4052SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);4053if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {4054// The register-offset variant of LDRD mandates that the register4055// allocated to RegOffset is not reused in any of the remaining operands.4056// This restriction is currently not enforced. Therefore emitting this4057// variant is explicitly avoided.4058Base = Addr;4059RegOffset = CurDAG->getRegister(0, MVT::i32);4060}4061SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};4062SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,4063{MVT::Untyped, MVT::Other}, Ops);4064SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,4065SDValue(New, 0));4066SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,4067SDValue(New, 0));4068transferMemOperands(N, New);4069ReplaceUses(SDValue(N, 0), Lo);4070ReplaceUses(SDValue(N, 1), Hi);4071ReplaceUses(SDValue(N, 2), SDValue(New, 1));4072CurDAG->RemoveDeadNode(N);4073return;4074}4075case ARMISD::STRD: {4076if (Subtarget->isThumb2())4077break; // TableGen handles isel in this case.4078SDValue Base, RegOffset, ImmOffset;4079const SDValue &Chain = N->getOperand(0);4080const SDValue &Addr = N->getOperand(3);4081SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);4082if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {4083// The register-offset variant of STRD mandates that the register4084// allocated to RegOffset is not reused in any of the remaining operands.4085// This restriction is currently not enforced. Therefore emitting this4086// variant is explicitly avoided.4087Base = Addr;4088RegOffset = CurDAG->getRegister(0, MVT::i32);4089}4090SDNode *RegPair =4091createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));4092SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};4093SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);4094transferMemOperands(N, New);4095ReplaceUses(SDValue(N, 0), SDValue(New, 0));4096CurDAG->RemoveDeadNode(N);4097return;4098}4099case ARMISD::LOOP_DEC: {4100SDValue Ops[] = { N->getOperand(1),4101N->getOperand(2),4102N->getOperand(0) };4103SDNode *Dec =4104CurDAG->getMachineNode(ARM::t2LoopDec, dl,4105CurDAG->getVTList(MVT::i32, MVT::Other), Ops);4106ReplaceUses(N, Dec);4107CurDAG->RemoveDeadNode(N);4108return;4109}4110case ARMISD::BRCOND: {4111// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)4112// Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)4113// Pattern complexity = 6 cost = 1 size = 041144115// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)4116// Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)4117// Pattern complexity = 6 cost = 1 size = 041184119// Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)4120// Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)4121// Pattern complexity = 6 cost = 1 size = 041224123unsigned Opc = Subtarget->isThumb() ?4124((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;4125SDValue Chain = N->getOperand(0);4126SDValue N1 = N->getOperand(1);4127SDValue N2 = N->getOperand(2);4128SDValue N3 = N->getOperand(3);4129SDValue InGlue = N->getOperand(4);4130assert(N1.getOpcode() == ISD::BasicBlock);4131assert(N2.getOpcode() == ISD::Constant);4132assert(N3.getOpcode() == ISD::Register);41334134unsigned CC = (unsigned)N2->getAsZExtVal();41354136if (InGlue.getOpcode() == ARMISD::CMPZ) {4137if (InGlue.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {4138SDValue Int = InGlue.getOperand(0);4139uint64_t ID = Int->getConstantOperandVal(1);41404141// Handle low-overhead loops.4142if (ID == Intrinsic::loop_decrement_reg) {4143SDValue Elements = Int.getOperand(2);4144SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3),4145dl, MVT::i32);41464147SDValue Args[] = { Elements, Size, Int.getOperand(0) };4148SDNode *LoopDec =4149CurDAG->getMachineNode(ARM::t2LoopDec, dl,4150CurDAG->getVTList(MVT::i32, MVT::Other),4151Args);4152ReplaceUses(Int.getNode(), LoopDec);41534154SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };4155SDNode *LoopEnd =4156CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);41574158ReplaceUses(N, LoopEnd);4159CurDAG->RemoveDeadNode(N);4160CurDAG->RemoveDeadNode(InGlue.getNode());4161CurDAG->RemoveDeadNode(Int.getNode());4162return;4163}4164}41654166bool SwitchEQNEToPLMI;4167SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);4168InGlue = N->getOperand(4);41694170if (SwitchEQNEToPLMI) {4171switch ((ARMCC::CondCodes)CC) {4172default: llvm_unreachable("CMPZ must be either NE or EQ!");4173case ARMCC::NE:4174CC = (unsigned)ARMCC::MI;4175break;4176case ARMCC::EQ:4177CC = (unsigned)ARMCC::PL;4178break;4179}4180}4181}41824183SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);4184SDValue Ops[] = { N1, Tmp2, N3, Chain, InGlue };4185SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other,4186MVT::Glue, Ops);4187Chain = SDValue(ResNode, 0);4188if (N->getNumValues() == 2) {4189InGlue = SDValue(ResNode, 1);4190ReplaceUses(SDValue(N, 1), InGlue);4191}4192ReplaceUses(SDValue(N, 0),4193SDValue(Chain.getNode(), Chain.getResNo()));4194CurDAG->RemoveDeadNode(N);4195return;4196}41974198case ARMISD::CMPZ: {4199// select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)4200// This allows us to avoid materializing the expensive negative constant.4201// The CMPZ #0 is useless and will be peepholed away but we need to keep it4202// for its glue output.4203SDValue X = N->getOperand(0);4204auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());4205if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {4206int64_t Addend = -C->getSExtValue();42074208SDNode *Add = nullptr;4209// ADDS can be better than CMN if the immediate fits in a4210// 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.4211// Outside that range we can just use a CMN which is 32-bit but has a4212// 12-bit immediate range.4213if (Addend < 1<<8) {4214if (Subtarget->isThumb2()) {4215SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),4216getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),4217CurDAG->getRegister(0, MVT::i32) };4218Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);4219} else {4220unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;4221SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,4222CurDAG->getTargetConstant(Addend, dl, MVT::i32),4223getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};4224Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);4225}4226}4227if (Add) {4228SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};4229CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2);4230}4231}4232// Other cases are autogenerated.4233break;4234}42354236case ARMISD::CMOV: {4237SDValue InGlue = N->getOperand(4);42384239if (InGlue.getOpcode() == ARMISD::CMPZ) {4240bool SwitchEQNEToPLMI;4241SelectCMPZ(InGlue.getNode(), SwitchEQNEToPLMI);42424243if (SwitchEQNEToPLMI) {4244SDValue ARMcc = N->getOperand(2);4245ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();42464247switch (CC) {4248default: llvm_unreachable("CMPZ must be either NE or EQ!");4249case ARMCC::NE:4250CC = ARMCC::MI;4251break;4252case ARMCC::EQ:4253CC = ARMCC::PL;4254break;4255}4256SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);4257SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,4258N->getOperand(3), N->getOperand(4)};4259CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);4260}42614262}4263// Other cases are autogenerated.4264break;4265}4266case ARMISD::VZIP: {4267EVT VT = N->getValueType(0);4268// vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.4269unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};4270unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};4271unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);4272SDValue Pred = getAL(CurDAG, dl);4273SDValue PredReg = CurDAG->getRegister(0, MVT::i32);4274SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};4275ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));4276return;4277}4278case ARMISD::VUZP: {4279EVT VT = N->getValueType(0);4280// vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.4281unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};4282unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};4283unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);4284SDValue Pred = getAL(CurDAG, dl);4285SDValue PredReg = CurDAG->getRegister(0, MVT::i32);4286SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};4287ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));4288return;4289}4290case ARMISD::VTRN: {4291EVT VT = N->getValueType(0);4292unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};4293unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};4294unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);4295SDValue Pred = getAL(CurDAG, dl);4296SDValue PredReg = CurDAG->getRegister(0, MVT::i32);4297SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};4298ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));4299return;4300}4301case ARMISD::BUILD_VECTOR: {4302EVT VecVT = N->getValueType(0);4303EVT EltVT = VecVT.getVectorElementType();4304unsigned NumElts = VecVT.getVectorNumElements();4305if (EltVT == MVT::f64) {4306assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");4307ReplaceNode(4308N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));4309return;4310}4311assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");4312if (NumElts == 2) {4313ReplaceNode(4314N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));4315return;4316}4317assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");4318ReplaceNode(N,4319createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),4320N->getOperand(2), N->getOperand(3)));4321return;4322}43234324case ARMISD::VLD1DUP: {4325static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,4326ARM::VLD1DUPd32 };4327static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,4328ARM::VLD1DUPq32 };4329SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);4330return;4331}43324333case ARMISD::VLD2DUP: {4334static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,4335ARM::VLD2DUPd32 };4336SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);4337return;4338}43394340case ARMISD::VLD3DUP: {4341static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,4342ARM::VLD3DUPd16Pseudo,4343ARM::VLD3DUPd32Pseudo };4344SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);4345return;4346}43474348case ARMISD::VLD4DUP: {4349static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,4350ARM::VLD4DUPd16Pseudo,4351ARM::VLD4DUPd32Pseudo };4352SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);4353return;4354}43554356case ARMISD::VLD1DUP_UPD: {4357static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,4358ARM::VLD1DUPd16wb_fixed,4359ARM::VLD1DUPd32wb_fixed };4360static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,4361ARM::VLD1DUPq16wb_fixed,4362ARM::VLD1DUPq32wb_fixed };4363SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);4364return;4365}43664367case ARMISD::VLD2DUP_UPD: {4368static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,4369ARM::VLD2DUPd16wb_fixed,4370ARM::VLD2DUPd32wb_fixed,4371ARM::VLD1q64wb_fixed };4372static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,4373ARM::VLD2DUPq16EvenPseudo,4374ARM::VLD2DUPq32EvenPseudo };4375static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,4376ARM::VLD2DUPq16OddPseudoWB_fixed,4377ARM::VLD2DUPq32OddPseudoWB_fixed };4378SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);4379return;4380}43814382case ARMISD::VLD3DUP_UPD: {4383static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,4384ARM::VLD3DUPd16Pseudo_UPD,4385ARM::VLD3DUPd32Pseudo_UPD,4386ARM::VLD1d64TPseudoWB_fixed };4387static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,4388ARM::VLD3DUPq16EvenPseudo,4389ARM::VLD3DUPq32EvenPseudo };4390static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,4391ARM::VLD3DUPq16OddPseudo_UPD,4392ARM::VLD3DUPq32OddPseudo_UPD };4393SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);4394return;4395}43964397case ARMISD::VLD4DUP_UPD: {4398static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,4399ARM::VLD4DUPd16Pseudo_UPD,4400ARM::VLD4DUPd32Pseudo_UPD,4401ARM::VLD1d64QPseudoWB_fixed };4402static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,4403ARM::VLD4DUPq16EvenPseudo,4404ARM::VLD4DUPq32EvenPseudo };4405static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,4406ARM::VLD4DUPq16OddPseudo_UPD,4407ARM::VLD4DUPq32OddPseudo_UPD };4408SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);4409return;4410}44114412case ARMISD::VLD1_UPD: {4413static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,4414ARM::VLD1d16wb_fixed,4415ARM::VLD1d32wb_fixed,4416ARM::VLD1d64wb_fixed };4417static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,4418ARM::VLD1q16wb_fixed,4419ARM::VLD1q32wb_fixed,4420ARM::VLD1q64wb_fixed };4421SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);4422return;4423}44244425case ARMISD::VLD2_UPD: {4426if (Subtarget->hasNEON()) {4427static const uint16_t DOpcodes[] = {4428ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,4429ARM::VLD1q64wb_fixed};4430static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,4431ARM::VLD2q16PseudoWB_fixed,4432ARM::VLD2q32PseudoWB_fixed};4433SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);4434} else {4435static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,4436ARM::MVE_VLD21_8_wb};4437static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,4438ARM::MVE_VLD21_16_wb};4439static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,4440ARM::MVE_VLD21_32_wb};4441static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};4442SelectMVE_VLD(N, 2, Opcodes, true);4443}4444return;4445}44464447case ARMISD::VLD3_UPD: {4448static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,4449ARM::VLD3d16Pseudo_UPD,4450ARM::VLD3d32Pseudo_UPD,4451ARM::VLD1d64TPseudoWB_fixed};4452static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,4453ARM::VLD3q16Pseudo_UPD,4454ARM::VLD3q32Pseudo_UPD };4455static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,4456ARM::VLD3q16oddPseudo_UPD,4457ARM::VLD3q32oddPseudo_UPD };4458SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);4459return;4460}44614462case ARMISD::VLD4_UPD: {4463if (Subtarget->hasNEON()) {4464static const uint16_t DOpcodes[] = {4465ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,4466ARM::VLD1d64QPseudoWB_fixed};4467static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,4468ARM::VLD4q16Pseudo_UPD,4469ARM::VLD4q32Pseudo_UPD};4470static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,4471ARM::VLD4q16oddPseudo_UPD,4472ARM::VLD4q32oddPseudo_UPD};4473SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);4474} else {4475static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,4476ARM::MVE_VLD42_8,4477ARM::MVE_VLD43_8_wb};4478static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,4479ARM::MVE_VLD42_16,4480ARM::MVE_VLD43_16_wb};4481static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,4482ARM::MVE_VLD42_32,4483ARM::MVE_VLD43_32_wb};4484static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};4485SelectMVE_VLD(N, 4, Opcodes, true);4486}4487return;4488}44894490case ARMISD::VLD1x2_UPD: {4491if (Subtarget->hasNEON()) {4492static const uint16_t DOpcodes[] = {4493ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,4494ARM::VLD1q64wb_fixed};4495static const uint16_t QOpcodes[] = {4496ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,4497ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};4498SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);4499return;4500}4501break;4502}45034504case ARMISD::VLD1x3_UPD: {4505if (Subtarget->hasNEON()) {4506static const uint16_t DOpcodes[] = {4507ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,4508ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};4509static const uint16_t QOpcodes0[] = {4510ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,4511ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};4512static const uint16_t QOpcodes1[] = {4513ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,4514ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};4515SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);4516return;4517}4518break;4519}45204521case ARMISD::VLD1x4_UPD: {4522if (Subtarget->hasNEON()) {4523static const uint16_t DOpcodes[] = {4524ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,4525ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};4526static const uint16_t QOpcodes0[] = {4527ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,4528ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};4529static const uint16_t QOpcodes1[] = {4530ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,4531ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};4532SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);4533return;4534}4535break;4536}45374538case ARMISD::VLD2LN_UPD: {4539static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,4540ARM::VLD2LNd16Pseudo_UPD,4541ARM::VLD2LNd32Pseudo_UPD };4542static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,4543ARM::VLD2LNq32Pseudo_UPD };4544SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);4545return;4546}45474548case ARMISD::VLD3LN_UPD: {4549static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,4550ARM::VLD3LNd16Pseudo_UPD,4551ARM::VLD3LNd32Pseudo_UPD };4552static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,4553ARM::VLD3LNq32Pseudo_UPD };4554SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);4555return;4556}45574558case ARMISD::VLD4LN_UPD: {4559static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,4560ARM::VLD4LNd16Pseudo_UPD,4561ARM::VLD4LNd32Pseudo_UPD };4562static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,4563ARM::VLD4LNq32Pseudo_UPD };4564SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);4565return;4566}45674568case ARMISD::VST1_UPD: {4569static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,4570ARM::VST1d16wb_fixed,4571ARM::VST1d32wb_fixed,4572ARM::VST1d64wb_fixed };4573static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,4574ARM::VST1q16wb_fixed,4575ARM::VST1q32wb_fixed,4576ARM::VST1q64wb_fixed };4577SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);4578return;4579}45804581case ARMISD::VST2_UPD: {4582if (Subtarget->hasNEON()) {4583static const uint16_t DOpcodes[] = {4584ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,4585ARM::VST1q64wb_fixed};4586static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,4587ARM::VST2q16PseudoWB_fixed,4588ARM::VST2q32PseudoWB_fixed};4589SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);4590return;4591}4592break;4593}45944595case ARMISD::VST3_UPD: {4596static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,4597ARM::VST3d16Pseudo_UPD,4598ARM::VST3d32Pseudo_UPD,4599ARM::VST1d64TPseudoWB_fixed};4600static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,4601ARM::VST3q16Pseudo_UPD,4602ARM::VST3q32Pseudo_UPD };4603static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,4604ARM::VST3q16oddPseudo_UPD,4605ARM::VST3q32oddPseudo_UPD };4606SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);4607return;4608}46094610case ARMISD::VST4_UPD: {4611if (Subtarget->hasNEON()) {4612static const uint16_t DOpcodes[] = {4613ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,4614ARM::VST1d64QPseudoWB_fixed};4615static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,4616ARM::VST4q16Pseudo_UPD,4617ARM::VST4q32Pseudo_UPD};4618static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,4619ARM::VST4q16oddPseudo_UPD,4620ARM::VST4q32oddPseudo_UPD};4621SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);4622return;4623}4624break;4625}46264627case ARMISD::VST1x2_UPD: {4628if (Subtarget->hasNEON()) {4629static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,4630ARM::VST1q16wb_fixed,4631ARM::VST1q32wb_fixed,4632ARM::VST1q64wb_fixed};4633static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,4634ARM::VST1d16QPseudoWB_fixed,4635ARM::VST1d32QPseudoWB_fixed,4636ARM::VST1d64QPseudoWB_fixed };4637SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);4638return;4639}4640break;4641}46424643case ARMISD::VST1x3_UPD: {4644if (Subtarget->hasNEON()) {4645static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,4646ARM::VST1d16TPseudoWB_fixed,4647ARM::VST1d32TPseudoWB_fixed,4648ARM::VST1d64TPseudoWB_fixed };4649static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,4650ARM::VST1q16LowTPseudo_UPD,4651ARM::VST1q32LowTPseudo_UPD,4652ARM::VST1q64LowTPseudo_UPD };4653static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,4654ARM::VST1q16HighTPseudo_UPD,4655ARM::VST1q32HighTPseudo_UPD,4656ARM::VST1q64HighTPseudo_UPD };4657SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);4658return;4659}4660break;4661}46624663case ARMISD::VST1x4_UPD: {4664if (Subtarget->hasNEON()) {4665static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,4666ARM::VST1d16QPseudoWB_fixed,4667ARM::VST1d32QPseudoWB_fixed,4668ARM::VST1d64QPseudoWB_fixed };4669static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,4670ARM::VST1q16LowQPseudo_UPD,4671ARM::VST1q32LowQPseudo_UPD,4672ARM::VST1q64LowQPseudo_UPD };4673static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,4674ARM::VST1q16HighQPseudo_UPD,4675ARM::VST1q32HighQPseudo_UPD,4676ARM::VST1q64HighQPseudo_UPD };4677SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);4678return;4679}4680break;4681}4682case ARMISD::VST2LN_UPD: {4683static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,4684ARM::VST2LNd16Pseudo_UPD,4685ARM::VST2LNd32Pseudo_UPD };4686static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,4687ARM::VST2LNq32Pseudo_UPD };4688SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);4689return;4690}46914692case ARMISD::VST3LN_UPD: {4693static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,4694ARM::VST3LNd16Pseudo_UPD,4695ARM::VST3LNd32Pseudo_UPD };4696static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,4697ARM::VST3LNq32Pseudo_UPD };4698SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);4699return;4700}47014702case ARMISD::VST4LN_UPD: {4703static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,4704ARM::VST4LNd16Pseudo_UPD,4705ARM::VST4LNd32Pseudo_UPD };4706static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,4707ARM::VST4LNq32Pseudo_UPD };4708SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);4709return;4710}47114712case ISD::INTRINSIC_VOID:4713case ISD::INTRINSIC_W_CHAIN: {4714unsigned IntNo = N->getConstantOperandVal(1);4715switch (IntNo) {4716default:4717break;47184719case Intrinsic::arm_mrrc:4720case Intrinsic::arm_mrrc2: {4721SDLoc dl(N);4722SDValue Chain = N->getOperand(0);4723unsigned Opc;47244725if (Subtarget->isThumb())4726Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);4727else4728Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);47294730SmallVector<SDValue, 5> Ops;4731Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */4732Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */4733Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */47344735// The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded4736// instruction will always be '1111' but it is possible in assembly language to specify4737// AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.4738if (Opc != ARM::MRRC2) {4739Ops.push_back(getAL(CurDAG, dl));4740Ops.push_back(CurDAG->getRegister(0, MVT::i32));4741}47424743Ops.push_back(Chain);47444745// Writes to two registers.4746const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};47474748ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));4749return;4750}4751case Intrinsic::arm_ldaexd:4752case Intrinsic::arm_ldrexd: {4753SDLoc dl(N);4754SDValue Chain = N->getOperand(0);4755SDValue MemAddr = N->getOperand(2);4756bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();47574758bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;4759unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)4760: (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);47614762// arm_ldrexd returns a i64 value in {i32, i32}4763std::vector<EVT> ResTys;4764if (isThumb) {4765ResTys.push_back(MVT::i32);4766ResTys.push_back(MVT::i32);4767} else4768ResTys.push_back(MVT::Untyped);4769ResTys.push_back(MVT::Other);47704771// Place arguments in the right order.4772SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),4773CurDAG->getRegister(0, MVT::i32), Chain};4774SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);4775// Transfer memoperands.4776MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();4777CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});47784779// Remap uses.4780SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);4781if (!SDValue(N, 0).use_empty()) {4782SDValue Result;4783if (isThumb)4784Result = SDValue(Ld, 0);4785else {4786SDValue SubRegIdx =4787CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);4788SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,4789dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);4790Result = SDValue(ResNode,0);4791}4792ReplaceUses(SDValue(N, 0), Result);4793}4794if (!SDValue(N, 1).use_empty()) {4795SDValue Result;4796if (isThumb)4797Result = SDValue(Ld, 1);4798else {4799SDValue SubRegIdx =4800CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);4801SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,4802dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);4803Result = SDValue(ResNode,0);4804}4805ReplaceUses(SDValue(N, 1), Result);4806}4807ReplaceUses(SDValue(N, 2), OutChain);4808CurDAG->RemoveDeadNode(N);4809return;4810}4811case Intrinsic::arm_stlexd:4812case Intrinsic::arm_strexd: {4813SDLoc dl(N);4814SDValue Chain = N->getOperand(0);4815SDValue Val0 = N->getOperand(2);4816SDValue Val1 = N->getOperand(3);4817SDValue MemAddr = N->getOperand(4);48184819// Store exclusive double return a i32 value which is the return status4820// of the issued store.4821const EVT ResTys[] = {MVT::i32, MVT::Other};48224823bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();4824// Place arguments in the right order.4825SmallVector<SDValue, 7> Ops;4826if (isThumb) {4827Ops.push_back(Val0);4828Ops.push_back(Val1);4829} else4830// arm_strexd uses GPRPair.4831Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));4832Ops.push_back(MemAddr);4833Ops.push_back(getAL(CurDAG, dl));4834Ops.push_back(CurDAG->getRegister(0, MVT::i32));4835Ops.push_back(Chain);48364837bool IsRelease = IntNo == Intrinsic::arm_stlexd;4838unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)4839: (IsRelease ? ARM::STLEXD : ARM::STREXD);48404841SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);4842// Transfer memoperands.4843MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();4844CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});48454846ReplaceNode(N, St);4847return;4848}48494850case Intrinsic::arm_neon_vld1: {4851static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,4852ARM::VLD1d32, ARM::VLD1d64 };4853static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,4854ARM::VLD1q32, ARM::VLD1q64};4855SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);4856return;4857}48584859case Intrinsic::arm_neon_vld1x2: {4860static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,4861ARM::VLD1q32, ARM::VLD1q64 };4862static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,4863ARM::VLD1d16QPseudo,4864ARM::VLD1d32QPseudo,4865ARM::VLD1d64QPseudo };4866SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);4867return;4868}48694870case Intrinsic::arm_neon_vld1x3: {4871static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,4872ARM::VLD1d16TPseudo,4873ARM::VLD1d32TPseudo,4874ARM::VLD1d64TPseudo };4875static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,4876ARM::VLD1q16LowTPseudo_UPD,4877ARM::VLD1q32LowTPseudo_UPD,4878ARM::VLD1q64LowTPseudo_UPD };4879static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,4880ARM::VLD1q16HighTPseudo,4881ARM::VLD1q32HighTPseudo,4882ARM::VLD1q64HighTPseudo };4883SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);4884return;4885}48864887case Intrinsic::arm_neon_vld1x4: {4888static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,4889ARM::VLD1d16QPseudo,4890ARM::VLD1d32QPseudo,4891ARM::VLD1d64QPseudo };4892static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,4893ARM::VLD1q16LowQPseudo_UPD,4894ARM::VLD1q32LowQPseudo_UPD,4895ARM::VLD1q64LowQPseudo_UPD };4896static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,4897ARM::VLD1q16HighQPseudo,4898ARM::VLD1q32HighQPseudo,4899ARM::VLD1q64HighQPseudo };4900SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);4901return;4902}49034904case Intrinsic::arm_neon_vld2: {4905static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,4906ARM::VLD2d32, ARM::VLD1q64 };4907static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,4908ARM::VLD2q32Pseudo };4909SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);4910return;4911}49124913case Intrinsic::arm_neon_vld3: {4914static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,4915ARM::VLD3d16Pseudo,4916ARM::VLD3d32Pseudo,4917ARM::VLD1d64TPseudo };4918static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,4919ARM::VLD3q16Pseudo_UPD,4920ARM::VLD3q32Pseudo_UPD };4921static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,4922ARM::VLD3q16oddPseudo,4923ARM::VLD3q32oddPseudo };4924SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);4925return;4926}49274928case Intrinsic::arm_neon_vld4: {4929static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,4930ARM::VLD4d16Pseudo,4931ARM::VLD4d32Pseudo,4932ARM::VLD1d64QPseudo };4933static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,4934ARM::VLD4q16Pseudo_UPD,4935ARM::VLD4q32Pseudo_UPD };4936static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,4937ARM::VLD4q16oddPseudo,4938ARM::VLD4q32oddPseudo };4939SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);4940return;4941}49424943case Intrinsic::arm_neon_vld2dup: {4944static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,4945ARM::VLD2DUPd32, ARM::VLD1q64 };4946static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,4947ARM::VLD2DUPq16EvenPseudo,4948ARM::VLD2DUPq32EvenPseudo };4949static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,4950ARM::VLD2DUPq16OddPseudo,4951ARM::VLD2DUPq32OddPseudo };4952SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,4953DOpcodes, QOpcodes0, QOpcodes1);4954return;4955}49564957case Intrinsic::arm_neon_vld3dup: {4958static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,4959ARM::VLD3DUPd16Pseudo,4960ARM::VLD3DUPd32Pseudo,4961ARM::VLD1d64TPseudo };4962static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,4963ARM::VLD3DUPq16EvenPseudo,4964ARM::VLD3DUPq32EvenPseudo };4965static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,4966ARM::VLD3DUPq16OddPseudo,4967ARM::VLD3DUPq32OddPseudo };4968SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,4969DOpcodes, QOpcodes0, QOpcodes1);4970return;4971}49724973case Intrinsic::arm_neon_vld4dup: {4974static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,4975ARM::VLD4DUPd16Pseudo,4976ARM::VLD4DUPd32Pseudo,4977ARM::VLD1d64QPseudo };4978static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,4979ARM::VLD4DUPq16EvenPseudo,4980ARM::VLD4DUPq32EvenPseudo };4981static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,4982ARM::VLD4DUPq16OddPseudo,4983ARM::VLD4DUPq32OddPseudo };4984SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,4985DOpcodes, QOpcodes0, QOpcodes1);4986return;4987}49884989case Intrinsic::arm_neon_vld2lane: {4990static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,4991ARM::VLD2LNd16Pseudo,4992ARM::VLD2LNd32Pseudo };4993static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,4994ARM::VLD2LNq32Pseudo };4995SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);4996return;4997}49984999case Intrinsic::arm_neon_vld3lane: {5000static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,5001ARM::VLD3LNd16Pseudo,5002ARM::VLD3LNd32Pseudo };5003static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,5004ARM::VLD3LNq32Pseudo };5005SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);5006return;5007}50085009case Intrinsic::arm_neon_vld4lane: {5010static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,5011ARM::VLD4LNd16Pseudo,5012ARM::VLD4LNd32Pseudo };5013static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,5014ARM::VLD4LNq32Pseudo };5015SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);5016return;5017}50185019case Intrinsic::arm_neon_vst1: {5020static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,5021ARM::VST1d32, ARM::VST1d64 };5022static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,5023ARM::VST1q32, ARM::VST1q64 };5024SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);5025return;5026}50275028case Intrinsic::arm_neon_vst1x2: {5029static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,5030ARM::VST1q32, ARM::VST1q64 };5031static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,5032ARM::VST1d16QPseudo,5033ARM::VST1d32QPseudo,5034ARM::VST1d64QPseudo };5035SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);5036return;5037}50385039case Intrinsic::arm_neon_vst1x3: {5040static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,5041ARM::VST1d16TPseudo,5042ARM::VST1d32TPseudo,5043ARM::VST1d64TPseudo };5044static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,5045ARM::VST1q16LowTPseudo_UPD,5046ARM::VST1q32LowTPseudo_UPD,5047ARM::VST1q64LowTPseudo_UPD };5048static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,5049ARM::VST1q16HighTPseudo,5050ARM::VST1q32HighTPseudo,5051ARM::VST1q64HighTPseudo };5052SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);5053return;5054}50555056case Intrinsic::arm_neon_vst1x4: {5057static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,5058ARM::VST1d16QPseudo,5059ARM::VST1d32QPseudo,5060ARM::VST1d64QPseudo };5061static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,5062ARM::VST1q16LowQPseudo_UPD,5063ARM::VST1q32LowQPseudo_UPD,5064ARM::VST1q64LowQPseudo_UPD };5065static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,5066ARM::VST1q16HighQPseudo,5067ARM::VST1q32HighQPseudo,5068ARM::VST1q64HighQPseudo };5069SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);5070return;5071}50725073case Intrinsic::arm_neon_vst2: {5074static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,5075ARM::VST2d32, ARM::VST1q64 };5076static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,5077ARM::VST2q32Pseudo };5078SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);5079return;5080}50815082case Intrinsic::arm_neon_vst3: {5083static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,5084ARM::VST3d16Pseudo,5085ARM::VST3d32Pseudo,5086ARM::VST1d64TPseudo };5087static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,5088ARM::VST3q16Pseudo_UPD,5089ARM::VST3q32Pseudo_UPD };5090static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,5091ARM::VST3q16oddPseudo,5092ARM::VST3q32oddPseudo };5093SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);5094return;5095}50965097case Intrinsic::arm_neon_vst4: {5098static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,5099ARM::VST4d16Pseudo,5100ARM::VST4d32Pseudo,5101ARM::VST1d64QPseudo };5102static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,5103ARM::VST4q16Pseudo_UPD,5104ARM::VST4q32Pseudo_UPD };5105static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,5106ARM::VST4q16oddPseudo,5107ARM::VST4q32oddPseudo };5108SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);5109return;5110}51115112case Intrinsic::arm_neon_vst2lane: {5113static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,5114ARM::VST2LNd16Pseudo,5115ARM::VST2LNd32Pseudo };5116static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,5117ARM::VST2LNq32Pseudo };5118SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);5119return;5120}51215122case Intrinsic::arm_neon_vst3lane: {5123static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,5124ARM::VST3LNd16Pseudo,5125ARM::VST3LNd32Pseudo };5126static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,5127ARM::VST3LNq32Pseudo };5128SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);5129return;5130}51315132case Intrinsic::arm_neon_vst4lane: {5133static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,5134ARM::VST4LNd16Pseudo,5135ARM::VST4LNd32Pseudo };5136static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,5137ARM::VST4LNq32Pseudo };5138SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);5139return;5140}51415142case Intrinsic::arm_mve_vldr_gather_base_wb:5143case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {5144static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,5145ARM::MVE_VLDRDU64_qi_pre};5146SelectMVE_WB(N, Opcodes,5147IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);5148return;5149}51505151case Intrinsic::arm_mve_vld2q: {5152static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};5153static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,5154ARM::MVE_VLD21_16};5155static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,5156ARM::MVE_VLD21_32};5157static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};5158SelectMVE_VLD(N, 2, Opcodes, false);5159return;5160}51615162case Intrinsic::arm_mve_vld4q: {5163static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,5164ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};5165static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,5166ARM::MVE_VLD42_16,5167ARM::MVE_VLD43_16};5168static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,5169ARM::MVE_VLD42_32,5170ARM::MVE_VLD43_32};5171static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};5172SelectMVE_VLD(N, 4, Opcodes, false);5173return;5174}5175}5176break;5177}51785179case ISD::INTRINSIC_WO_CHAIN: {5180unsigned IntNo = N->getConstantOperandVal(0);5181switch (IntNo) {5182default:5183break;51845185// Scalar f32 -> bf165186case Intrinsic::arm_neon_vcvtbfp2bf: {5187SDLoc dl(N);5188const SDValue &Src = N->getOperand(1);5189llvm::EVT DestTy = N->getValueType(0);5190SDValue Pred = getAL(CurDAG, dl);5191SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);5192SDValue Ops[] = { Src, Src, Pred, Reg0 };5193CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);5194return;5195}51965197// Vector v4f32 -> v4bf165198case Intrinsic::arm_neon_vcvtfp2bf: {5199SDLoc dl(N);5200const SDValue &Src = N->getOperand(1);5201SDValue Pred = getAL(CurDAG, dl);5202SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);5203SDValue Ops[] = { Src, Pred, Reg0 };5204CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);5205return;5206}52075208case Intrinsic::arm_mve_urshrl:5209SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);5210return;5211case Intrinsic::arm_mve_uqshll:5212SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);5213return;5214case Intrinsic::arm_mve_srshrl:5215SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);5216return;5217case Intrinsic::arm_mve_sqshll:5218SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);5219return;5220case Intrinsic::arm_mve_uqrshll:5221SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);5222return;5223case Intrinsic::arm_mve_sqrshrl:5224SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);5225return;52265227case Intrinsic::arm_mve_vadc:5228case Intrinsic::arm_mve_vadc_predicated:5229SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,5230IntNo == Intrinsic::arm_mve_vadc_predicated);5231return;5232case Intrinsic::arm_mve_vsbc:5233case Intrinsic::arm_mve_vsbc_predicated:5234SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true,5235IntNo == Intrinsic::arm_mve_vsbc_predicated);5236return;5237case Intrinsic::arm_mve_vshlc:5238case Intrinsic::arm_mve_vshlc_predicated:5239SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);5240return;52415242case Intrinsic::arm_mve_vmlldava:5243case Intrinsic::arm_mve_vmlldava_predicated: {5244static const uint16_t OpcodesU[] = {5245ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32,5246ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32,5247};5248static const uint16_t OpcodesS[] = {5249ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32,5250ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32,5251ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32,5252ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,5253ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32,5254ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32,5255ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32,5256ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,5257};5258SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,5259OpcodesS, OpcodesU);5260return;5261}52625263case Intrinsic::arm_mve_vrmlldavha:5264case Intrinsic::arm_mve_vrmlldavha_predicated: {5265static const uint16_t OpcodesU[] = {5266ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32,5267};5268static const uint16_t OpcodesS[] = {5269ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32,5270ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,5271ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32,5272ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,5273};5274SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,5275OpcodesS, OpcodesU);5276return;5277}52785279case Intrinsic::arm_mve_vidup:5280case Intrinsic::arm_mve_vidup_predicated: {5281static const uint16_t Opcodes[] = {5282ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,5283};5284SelectMVE_VxDUP(N, Opcodes, false,5285IntNo == Intrinsic::arm_mve_vidup_predicated);5286return;5287}52885289case Intrinsic::arm_mve_vddup:5290case Intrinsic::arm_mve_vddup_predicated: {5291static const uint16_t Opcodes[] = {5292ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,5293};5294SelectMVE_VxDUP(N, Opcodes, false,5295IntNo == Intrinsic::arm_mve_vddup_predicated);5296return;5297}52985299case Intrinsic::arm_mve_viwdup:5300case Intrinsic::arm_mve_viwdup_predicated: {5301static const uint16_t Opcodes[] = {5302ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,5303};5304SelectMVE_VxDUP(N, Opcodes, true,5305IntNo == Intrinsic::arm_mve_viwdup_predicated);5306return;5307}53085309case Intrinsic::arm_mve_vdwdup:5310case Intrinsic::arm_mve_vdwdup_predicated: {5311static const uint16_t Opcodes[] = {5312ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,5313};5314SelectMVE_VxDUP(N, Opcodes, true,5315IntNo == Intrinsic::arm_mve_vdwdup_predicated);5316return;5317}53185319case Intrinsic::arm_cde_cx1d:5320case Intrinsic::arm_cde_cx1da:5321case Intrinsic::arm_cde_cx2d:5322case Intrinsic::arm_cde_cx2da:5323case Intrinsic::arm_cde_cx3d:5324case Intrinsic::arm_cde_cx3da: {5325bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||5326IntNo == Intrinsic::arm_cde_cx2da ||5327IntNo == Intrinsic::arm_cde_cx3da;5328size_t NumExtraOps;5329uint16_t Opcode;5330switch (IntNo) {5331case Intrinsic::arm_cde_cx1d:5332case Intrinsic::arm_cde_cx1da:5333NumExtraOps = 0;5334Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;5335break;5336case Intrinsic::arm_cde_cx2d:5337case Intrinsic::arm_cde_cx2da:5338NumExtraOps = 1;5339Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;5340break;5341case Intrinsic::arm_cde_cx3d:5342case Intrinsic::arm_cde_cx3da:5343NumExtraOps = 2;5344Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;5345break;5346default:5347llvm_unreachable("Unexpected opcode");5348}5349SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);5350return;5351}5352}5353break;5354}53555356case ISD::ATOMIC_CMP_SWAP:5357SelectCMP_SWAP(N);5358return;5359}53605361SelectCode(N);5362}53635364// Inspect a register string of the form5365// cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or5366// cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string5367// and obtain the integer operands from them, adding these operands to the5368// provided vector.5369static void getIntOperandsFromRegisterString(StringRef RegString,5370SelectionDAG *CurDAG,5371const SDLoc &DL,5372std::vector<SDValue> &Ops) {5373SmallVector<StringRef, 5> Fields;5374RegString.split(Fields, ':');53755376if (Fields.size() > 1) {5377bool AllIntFields = true;53785379for (StringRef Field : Fields) {5380// Need to trim out leading 'cp' characters and get the integer field.5381unsigned IntField;5382AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);5383Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));5384}53855386assert(AllIntFields &&5387"Unexpected non-integer value in special register string.");5388(void)AllIntFields;5389}5390}53915392// Maps a Banked Register string to its mask value. The mask value returned is5393// for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register5394// mask operand, which expresses which register is to be used, e.g. r8, and in5395// which mode it is to be used, e.g. usr. Returns -1 to signify that the string5396// was invalid.5397static inline int getBankedRegisterMask(StringRef RegString) {5398auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());5399if (!TheReg)5400return -1;5401return TheReg->Encoding;5402}54035404// The flags here are common to those allowed for apsr in the A class cores and5405// those allowed for the special registers in the M class cores. Returns a5406// value representing which flags were present, -1 if invalid.5407static inline int getMClassFlagsMask(StringRef Flags) {5408return StringSwitch<int>(Flags)5409.Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is5410// correct when flags are not permitted5411.Case("g", 0x1)5412.Case("nzcvq", 0x2)5413.Case("nzcvqg", 0x3)5414.Default(-1);5415}54165417// Maps MClass special registers string to its value for use in the5418// t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.5419// Returns -1 to signify that the string was invalid.5420static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {5421auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);5422const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();5423if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))5424return -1;5425return (int)(TheReg->Encoding & 0xFFF); // SYSm value5426}54275428static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {5429// The mask operand contains the special register (R Bit) in bit 4, whether5430// the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and5431// bits 3-0 contains the fields to be accessed in the special register, set by5432// the flags provided with the register.5433int Mask = 0;5434if (Reg == "apsr") {5435// The flags permitted for apsr are the same flags that are allowed in5436// M class registers. We get the flag value and then shift the flags into5437// the correct place to combine with the mask.5438Mask = getMClassFlagsMask(Flags);5439if (Mask == -1)5440return -1;5441return Mask << 2;5442}54435444if (Reg != "cpsr" && Reg != "spsr") {5445return -1;5446}54475448// This is the same as if the flags were "fc"5449if (Flags.empty() || Flags == "all")5450return Mask | 0x9;54515452// Inspect the supplied flags string and set the bits in the mask for5453// the relevant and valid flags allowed for cpsr and spsr.5454for (char Flag : Flags) {5455int FlagVal;5456switch (Flag) {5457case 'c':5458FlagVal = 0x1;5459break;5460case 'x':5461FlagVal = 0x2;5462break;5463case 's':5464FlagVal = 0x4;5465break;5466case 'f':5467FlagVal = 0x8;5468break;5469default:5470FlagVal = 0;5471}54725473// This avoids allowing strings where the same flag bit appears twice.5474if (!FlagVal || (Mask & FlagVal))5475return -1;5476Mask |= FlagVal;5477}54785479// If the register is spsr then we need to set the R bit.5480if (Reg == "spsr")5481Mask |= 0x10;54825483return Mask;5484}54855486// Lower the read_register intrinsic to ARM specific DAG nodes5487// using the supplied metadata string to select the instruction node to use5488// and the registers/masks to construct as operands for the node.5489bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){5490const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));5491const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));5492bool IsThumb2 = Subtarget->isThumb2();5493SDLoc DL(N);54945495std::vector<SDValue> Ops;5496getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);54975498if (!Ops.empty()) {5499// If the special register string was constructed of fields (as defined5500// in the ACLE) then need to lower to MRC node (32 bit) or5501// MRRC node(64 bit), we can make the distinction based on the number of5502// operands we have.5503unsigned Opcode;5504SmallVector<EVT, 3> ResTypes;5505if (Ops.size() == 5){5506Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;5507ResTypes.append({ MVT::i32, MVT::Other });5508} else {5509assert(Ops.size() == 3 &&5510"Invalid number of fields in special register string.");5511Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;5512ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });5513}55145515Ops.push_back(getAL(CurDAG, DL));5516Ops.push_back(CurDAG->getRegister(0, MVT::i32));5517Ops.push_back(N->getOperand(0));5518ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));5519return true;5520}55215522std::string SpecialReg = RegString->getString().lower();55235524int BankedReg = getBankedRegisterMask(SpecialReg);5525if (BankedReg != -1) {5526Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),5527getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),5528N->getOperand(0) };5529ReplaceNode(5530N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,5531DL, MVT::i32, MVT::Other, Ops));5532return true;5533}55345535// The VFP registers are read by creating SelectionDAG nodes with opcodes5536// corresponding to the register that is being read from. So we switch on the5537// string to find which opcode we need to use.5538unsigned Opcode = StringSwitch<unsigned>(SpecialReg)5539.Case("fpscr", ARM::VMRS)5540.Case("fpexc", ARM::VMRS_FPEXC)5541.Case("fpsid", ARM::VMRS_FPSID)5542.Case("mvfr0", ARM::VMRS_MVFR0)5543.Case("mvfr1", ARM::VMRS_MVFR1)5544.Case("mvfr2", ARM::VMRS_MVFR2)5545.Case("fpinst", ARM::VMRS_FPINST)5546.Case("fpinst2", ARM::VMRS_FPINST2)5547.Default(0);55485549// If an opcode was found then we can lower the read to a VFP instruction.5550if (Opcode) {5551if (!Subtarget->hasVFP2Base())5552return false;5553if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())5554return false;55555556Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),5557N->getOperand(0) };5558ReplaceNode(N,5559CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));5560return true;5561}55625563// If the target is M Class then need to validate that the register string5564// is an acceptable value, so check that a mask can be constructed from the5565// string.5566if (Subtarget->isMClass()) {5567int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);5568if (SYSmValue == -1)5569return false;55705571SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),5572getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),5573N->getOperand(0) };5574ReplaceNode(5575N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));5576return true;5577}55785579// Here we know the target is not M Class so we need to check if it is one5580// of the remaining possible values which are apsr, cpsr or spsr.5581if (SpecialReg == "apsr" || SpecialReg == "cpsr") {5582Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),5583N->getOperand(0) };5584ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,5585DL, MVT::i32, MVT::Other, Ops));5586return true;5587}55885589if (SpecialReg == "spsr") {5590Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),5591N->getOperand(0) };5592ReplaceNode(5593N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,5594MVT::i32, MVT::Other, Ops));5595return true;5596}55975598return false;5599}56005601// Lower the write_register intrinsic to ARM specific DAG nodes5602// using the supplied metadata string to select the instruction node to use5603// and the registers/masks to use in the nodes5604bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){5605const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));5606const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));5607bool IsThumb2 = Subtarget->isThumb2();5608SDLoc DL(N);56095610std::vector<SDValue> Ops;5611getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);56125613if (!Ops.empty()) {5614// If the special register string was constructed of fields (as defined5615// in the ACLE) then need to lower to MCR node (32 bit) or5616// MCRR node(64 bit), we can make the distinction based on the number of5617// operands we have.5618unsigned Opcode;5619if (Ops.size() == 5) {5620Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;5621Ops.insert(Ops.begin()+2, N->getOperand(2));5622} else {5623assert(Ops.size() == 3 &&5624"Invalid number of fields in special register string.");5625Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;5626SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };5627Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);5628}56295630Ops.push_back(getAL(CurDAG, DL));5631Ops.push_back(CurDAG->getRegister(0, MVT::i32));5632Ops.push_back(N->getOperand(0));56335634ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));5635return true;5636}56375638std::string SpecialReg = RegString->getString().lower();5639int BankedReg = getBankedRegisterMask(SpecialReg);5640if (BankedReg != -1) {5641Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),5642getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),5643N->getOperand(0) };5644ReplaceNode(5645N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,5646DL, MVT::Other, Ops));5647return true;5648}56495650// The VFP registers are written to by creating SelectionDAG nodes with5651// opcodes corresponding to the register that is being written. So we switch5652// on the string to find which opcode we need to use.5653unsigned Opcode = StringSwitch<unsigned>(SpecialReg)5654.Case("fpscr", ARM::VMSR)5655.Case("fpexc", ARM::VMSR_FPEXC)5656.Case("fpsid", ARM::VMSR_FPSID)5657.Case("fpinst", ARM::VMSR_FPINST)5658.Case("fpinst2", ARM::VMSR_FPINST2)5659.Default(0);56605661if (Opcode) {5662if (!Subtarget->hasVFP2Base())5663return false;5664Ops = { N->getOperand(2), getAL(CurDAG, DL),5665CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };5666ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));5667return true;5668}56695670std::pair<StringRef, StringRef> Fields;5671Fields = StringRef(SpecialReg).rsplit('_');5672std::string Reg = Fields.first.str();5673StringRef Flags = Fields.second;56745675// If the target was M Class then need to validate the special register value5676// and retrieve the mask for use in the instruction node.5677if (Subtarget->isMClass()) {5678int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);5679if (SYSmValue == -1)5680return false;56815682SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),5683N->getOperand(2), getAL(CurDAG, DL),5684CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };5685ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));5686return true;5687}56885689// We then check to see if a valid mask can be constructed for one of the5690// register string values permitted for the A and R class cores. These values5691// are apsr, spsr and cpsr; these are also valid on older cores.5692int Mask = getARClassRegisterMask(Reg, Flags);5693if (Mask != -1) {5694Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),5695getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),5696N->getOperand(0) };5697ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,5698DL, MVT::Other, Ops));5699return true;5700}57015702return false;5703}57045705bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){5706std::vector<SDValue> AsmNodeOperands;5707InlineAsm::Flag Flag;5708bool Changed = false;5709unsigned NumOps = N->getNumOperands();57105711// Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.5712// However, some instrstions (e.g. ldrexd/strexd in ARM mode) require5713// (even/even+1) GPRs and use %n and %Hn to refer to the individual regs5714// respectively. Since there is no constraint to explicitly specify a5715// reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,5716// the 64-bit data may be referred by H, Q, R modifiers, so we still pack5717// them into a GPRPair.57185719SDLoc dl(N);5720SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();57215722SmallVector<bool, 8> OpChanged;5723// Glue node will be appended late.5724for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {5725SDValue op = N->getOperand(i);5726AsmNodeOperands.push_back(op);57275728if (i < InlineAsm::Op_FirstOperand)5729continue;57305731if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))5732Flag = InlineAsm::Flag(C->getZExtValue());5733else5734continue;57355736// Immediate operands to inline asm in the SelectionDAG are modeled with5737// two operands. The first is a constant of value InlineAsm::Kind::Imm, and5738// the second is a constant with the value of the immediate. If we get here5739// and we have a Kind::Imm, skip the next operand, and continue.5740if (Flag.isImmKind()) {5741SDValue op = N->getOperand(++i);5742AsmNodeOperands.push_back(op);5743continue;5744}57455746const unsigned NumRegs = Flag.getNumOperandRegisters();5747if (NumRegs)5748OpChanged.push_back(false);57495750unsigned DefIdx = 0;5751bool IsTiedToChangedOp = false;5752// If it's a use that is tied with a previous def, it has no5753// reg class constraint.5754if (Changed && Flag.isUseOperandTiedToDef(DefIdx))5755IsTiedToChangedOp = OpChanged[DefIdx];57565757// Memory operands to inline asm in the SelectionDAG are modeled with two5758// operands: a constant of value InlineAsm::Kind::Mem followed by the input5759// operand. If we get here and we have a Kind::Mem, skip the next operand5760// (so it doesn't get misinterpreted), and continue. We do this here because5761// it's important to update the OpChanged array correctly before moving on.5762if (Flag.isMemKind()) {5763SDValue op = N->getOperand(++i);5764AsmNodeOperands.push_back(op);5765continue;5766}57675768if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&5769!Flag.isRegDefEarlyClobberKind())5770continue;57715772unsigned RC;5773const bool HasRC = Flag.hasRegClassConstraint(RC);5774if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))5775|| NumRegs != 2)5776continue;57775778assert((i+2 < NumOps) && "Invalid number of operands in inline asm");5779SDValue V0 = N->getOperand(i+1);5780SDValue V1 = N->getOperand(i+2);5781Register Reg0 = cast<RegisterSDNode>(V0)->getReg();5782Register Reg1 = cast<RegisterSDNode>(V1)->getReg();5783SDValue PairedReg;5784MachineRegisterInfo &MRI = MF->getRegInfo();57855786if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {5787// Replace the two GPRs with 1 GPRPair and copy values from GPRPair to5788// the original GPRs.57895790Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);5791PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);5792SDValue Chain = SDValue(N,0);57935794SDNode *GU = N->getGluedUser();5795SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,5796Chain.getValue(1));57975798// Extract values from a GPRPair reg and copy to the original GPR reg.5799SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,5800RegCopy);5801SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,5802RegCopy);5803SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,5804RegCopy.getValue(1));5805SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));58065807// Update the original glue user.5808std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);5809Ops.push_back(T1.getValue(1));5810CurDAG->UpdateNodeOperands(GU, Ops);5811} else {5812// For Kind == InlineAsm::Kind::RegUse, we first copy two GPRs into a5813// GPRPair and then pass the GPRPair to the inline asm.5814SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];58155816// As REG_SEQ doesn't take RegisterSDNode, we copy them first.5817SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,5818Chain.getValue(1));5819SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,5820T0.getValue(1));5821SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);58225823// Copy REG_SEQ into a GPRPair-typed VR and replace the original two5824// i32 VRs of inline asm with it.5825Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);5826PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);5827Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));58285829AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;5830Glue = Chain.getValue(1);5831}58325833Changed = true;58345835if(PairedReg.getNode()) {5836OpChanged[OpChanged.size() -1 ] = true;5837Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);5838if (IsTiedToChangedOp)5839Flag.setMatchingOp(DefIdx);5840else5841Flag.setRegClass(ARM::GPRPairRegClassID);5842// Replace the current flag.5843AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(5844Flag, dl, MVT::i32);5845// Add the new register node and skip the original two GPRs.5846AsmNodeOperands.push_back(PairedReg);5847// Skip the next two GPRs.5848i += 2;5849}5850}58515852if (Glue.getNode())5853AsmNodeOperands.push_back(Glue);5854if (!Changed)5855return false;58565857SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),5858CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);5859New->setNodeId(-1);5860ReplaceNode(N, New.getNode());5861return true;5862}58635864bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(5865const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,5866std::vector<SDValue> &OutOps) {5867switch(ConstraintID) {5868default:5869llvm_unreachable("Unexpected asm memory constraint");5870case InlineAsm::ConstraintCode::m:5871case InlineAsm::ConstraintCode::o:5872case InlineAsm::ConstraintCode::Q:5873case InlineAsm::ConstraintCode::Um:5874case InlineAsm::ConstraintCode::Un:5875case InlineAsm::ConstraintCode::Uq:5876case InlineAsm::ConstraintCode::Us:5877case InlineAsm::ConstraintCode::Ut:5878case InlineAsm::ConstraintCode::Uv:5879case InlineAsm::ConstraintCode::Uy:5880// Require the address to be in a register. That is safe for all ARM5881// variants and it is hard to do anything much smarter without knowing5882// how the operand is used.5883OutOps.push_back(Op);5884return false;5885}5886return true;5887}58885889/// createARMISelDag - This pass converts a legalized DAG into a5890/// ARM-specific DAG, ready for instruction scheduling.5891///5892FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,5893CodeGenOptLevel OptLevel) {5894return new ARMDAGToDAGISelLegacy(TM, OptLevel);5895}589658975898