Path: blob/main/contrib/llvm-project/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
35268 views
//===-- ARMBaseInstrInfo.cpp - ARM Instruction Information ----------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains the Base ARM implementation of the TargetInstrInfo class.9//10//===----------------------------------------------------------------------===//1112#include "ARMBaseInstrInfo.h"13#include "ARMBaseRegisterInfo.h"14#include "ARMConstantPoolValue.h"15#include "ARMFeatures.h"16#include "ARMHazardRecognizer.h"17#include "ARMMachineFunctionInfo.h"18#include "ARMSubtarget.h"19#include "MCTargetDesc/ARMAddressingModes.h"20#include "MCTargetDesc/ARMBaseInfo.h"21#include "MVETailPredUtils.h"22#include "llvm/ADT/DenseMap.h"23#include "llvm/ADT/STLExtras.h"24#include "llvm/ADT/SmallSet.h"25#include "llvm/ADT/SmallVector.h"26#include "llvm/CodeGen/DFAPacketizer.h"27#include "llvm/CodeGen/LiveVariables.h"28#include "llvm/CodeGen/MachineBasicBlock.h"29#include "llvm/CodeGen/MachineConstantPool.h"30#include "llvm/CodeGen/MachineFrameInfo.h"31#include "llvm/CodeGen/MachineFunction.h"32#include "llvm/CodeGen/MachineInstr.h"33#include "llvm/CodeGen/MachineInstrBuilder.h"34#include "llvm/CodeGen/MachineMemOperand.h"35#include "llvm/CodeGen/MachineModuleInfo.h"36#include "llvm/CodeGen/MachineOperand.h"37#include "llvm/CodeGen/MachinePipeliner.h"38#include "llvm/CodeGen/MachineRegisterInfo.h"39#include "llvm/CodeGen/MachineScheduler.h"40#include "llvm/CodeGen/MultiHazardRecognizer.h"41#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"42#include "llvm/CodeGen/SelectionDAGNodes.h"43#include "llvm/CodeGen/TargetInstrInfo.h"44#include "llvm/CodeGen/TargetRegisterInfo.h"45#include "llvm/CodeGen/TargetSchedule.h"46#include "llvm/IR/Attributes.h"47#include "llvm/IR/Constants.h"48#include "llvm/IR/DebugLoc.h"49#include "llvm/IR/Function.h"50#include "llvm/IR/GlobalValue.h"51#include "llvm/IR/Module.h"52#include "llvm/MC/MCAsmInfo.h"53#include "llvm/MC/MCInstrDesc.h"54#include "llvm/MC/MCInstrItineraries.h"55#include "llvm/Support/BranchProbability.h"56#include "llvm/Support/Casting.h"57#include "llvm/Support/CommandLine.h"58#include "llvm/Support/Compiler.h"59#include "llvm/Support/Debug.h"60#include "llvm/Support/ErrorHandling.h"61#include "llvm/Support/raw_ostream.h"62#include "llvm/Target/TargetMachine.h"63#include "llvm/TargetParser/Triple.h"64#include <algorithm>65#include <cassert>66#include <cstdint>67#include <iterator>68#include <new>69#include <utility>70#include <vector>7172using namespace llvm;7374#define DEBUG_TYPE "arm-instrinfo"7576#define GET_INSTRINFO_CTOR_DTOR77#include "ARMGenInstrInfo.inc"7879static cl::opt<bool>80EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,81cl::desc("Enable ARM 2-addr to 3-addr conv"));8283/// ARM_MLxEntry - Record information about MLA / MLS instructions.84struct ARM_MLxEntry {85uint16_t MLxOpc; // MLA / MLS opcode86uint16_t MulOpc; // Expanded multiplication opcode87uint16_t AddSubOpc; // Expanded add / sub opcode88bool NegAcc; // True if the acc is negated before the add / sub.89bool HasLane; // True if instruction has an extra "lane" operand.90};9192static const ARM_MLxEntry ARM_MLxTable[] = {93// MLxOpc, MulOpc, AddSubOpc, NegAcc, HasLane94// fp scalar ops95{ ARM::VMLAS, ARM::VMULS, ARM::VADDS, false, false },96{ ARM::VMLSS, ARM::VMULS, ARM::VSUBS, false, false },97{ ARM::VMLAD, ARM::VMULD, ARM::VADDD, false, false },98{ ARM::VMLSD, ARM::VMULD, ARM::VSUBD, false, false },99{ ARM::VNMLAS, ARM::VNMULS, ARM::VSUBS, true, false },100{ ARM::VNMLSS, ARM::VMULS, ARM::VSUBS, true, false },101{ ARM::VNMLAD, ARM::VNMULD, ARM::VSUBD, true, false },102{ ARM::VNMLSD, ARM::VMULD, ARM::VSUBD, true, false },103104// fp SIMD ops105{ ARM::VMLAfd, ARM::VMULfd, ARM::VADDfd, false, false },106{ ARM::VMLSfd, ARM::VMULfd, ARM::VSUBfd, false, false },107{ ARM::VMLAfq, ARM::VMULfq, ARM::VADDfq, false, false },108{ ARM::VMLSfq, ARM::VMULfq, ARM::VSUBfq, false, false },109{ ARM::VMLAslfd, ARM::VMULslfd, ARM::VADDfd, false, true },110{ ARM::VMLSslfd, ARM::VMULslfd, ARM::VSUBfd, false, true },111{ ARM::VMLAslfq, ARM::VMULslfq, ARM::VADDfq, false, true },112{ ARM::VMLSslfq, ARM::VMULslfq, ARM::VSUBfq, false, true },113};114115ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)116: ARMGenInstrInfo(ARM::ADJCALLSTACKDOWN, ARM::ADJCALLSTACKUP),117Subtarget(STI) {118for (unsigned i = 0, e = std::size(ARM_MLxTable); i != e; ++i) {119if (!MLxEntryMap.insert(std::make_pair(ARM_MLxTable[i].MLxOpc, i)).second)120llvm_unreachable("Duplicated entries?");121MLxHazardOpcodes.insert(ARM_MLxTable[i].AddSubOpc);122MLxHazardOpcodes.insert(ARM_MLxTable[i].MulOpc);123}124}125126// Use a ScoreboardHazardRecognizer for prepass ARM scheduling. TargetInstrImpl127// currently defaults to no prepass hazard recognizer.128ScheduleHazardRecognizer *129ARMBaseInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,130const ScheduleDAG *DAG) const {131if (usePreRAHazardRecognizer()) {132const InstrItineraryData *II =133static_cast<const ARMSubtarget *>(STI)->getInstrItineraryData();134return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched");135}136return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);137}138139// Called during:140// - pre-RA scheduling141// - post-RA scheduling when FeatureUseMISched is set142ScheduleHazardRecognizer *ARMBaseInstrInfo::CreateTargetMIHazardRecognizer(143const InstrItineraryData *II, const ScheduleDAGMI *DAG) const {144MultiHazardRecognizer *MHR = new MultiHazardRecognizer();145146// We would like to restrict this hazard recognizer to only147// post-RA scheduling; we can tell that we're post-RA because we don't148// track VRegLiveness.149// Cortex-M7: TRM indicates that there is a single ITCM bank and two DTCM150// banks banked on bit 2. Assume that TCMs are in use.151if (Subtarget.isCortexM7() && !DAG->hasVRegLiveness())152MHR->AddHazardRecognizer(153std::make_unique<ARMBankConflictHazardRecognizer>(DAG, 0x4, true));154155// Not inserting ARMHazardRecognizerFPMLx because that would change156// legacy behavior157158auto BHR = TargetInstrInfo::CreateTargetMIHazardRecognizer(II, DAG);159MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));160return MHR;161}162163// Called during post-RA scheduling when FeatureUseMISched is not set164ScheduleHazardRecognizer *ARMBaseInstrInfo::165CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,166const ScheduleDAG *DAG) const {167MultiHazardRecognizer *MHR = new MultiHazardRecognizer();168169if (Subtarget.isThumb2() || Subtarget.hasVFP2Base())170MHR->AddHazardRecognizer(std::make_unique<ARMHazardRecognizerFPMLx>());171172auto BHR = TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG);173if (BHR)174MHR->AddHazardRecognizer(std::unique_ptr<ScheduleHazardRecognizer>(BHR));175return MHR;176}177178MachineInstr *179ARMBaseInstrInfo::convertToThreeAddress(MachineInstr &MI, LiveVariables *LV,180LiveIntervals *LIS) const {181// FIXME: Thumb2 support.182183if (!EnableARM3Addr)184return nullptr;185186MachineFunction &MF = *MI.getParent()->getParent();187uint64_t TSFlags = MI.getDesc().TSFlags;188bool isPre = false;189switch ((TSFlags & ARMII::IndexModeMask) >> ARMII::IndexModeShift) {190default: return nullptr;191case ARMII::IndexModePre:192isPre = true;193break;194case ARMII::IndexModePost:195break;196}197198// Try splitting an indexed load/store to an un-indexed one plus an add/sub199// operation.200unsigned MemOpc = getUnindexedOpcode(MI.getOpcode());201if (MemOpc == 0)202return nullptr;203204MachineInstr *UpdateMI = nullptr;205MachineInstr *MemMI = nullptr;206unsigned AddrMode = (TSFlags & ARMII::AddrModeMask);207const MCInstrDesc &MCID = MI.getDesc();208unsigned NumOps = MCID.getNumOperands();209bool isLoad = !MI.mayStore();210const MachineOperand &WB = isLoad ? MI.getOperand(1) : MI.getOperand(0);211const MachineOperand &Base = MI.getOperand(2);212const MachineOperand &Offset = MI.getOperand(NumOps - 3);213Register WBReg = WB.getReg();214Register BaseReg = Base.getReg();215Register OffReg = Offset.getReg();216unsigned OffImm = MI.getOperand(NumOps - 2).getImm();217ARMCC::CondCodes Pred = (ARMCC::CondCodes)MI.getOperand(NumOps - 1).getImm();218switch (AddrMode) {219default: llvm_unreachable("Unknown indexed op!");220case ARMII::AddrMode2: {221bool isSub = ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;222unsigned Amt = ARM_AM::getAM2Offset(OffImm);223if (OffReg == 0) {224if (ARM_AM::getSOImmVal(Amt) == -1)225// Can't encode it in a so_imm operand. This transformation will226// add more than 1 instruction. Abandon!227return nullptr;228UpdateMI = BuildMI(MF, MI.getDebugLoc(),229get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)230.addReg(BaseReg)231.addImm(Amt)232.add(predOps(Pred))233.add(condCodeOp());234} else if (Amt != 0) {235ARM_AM::ShiftOpc ShOpc = ARM_AM::getAM2ShiftOpc(OffImm);236unsigned SOOpc = ARM_AM::getSORegOpc(ShOpc, Amt);237UpdateMI = BuildMI(MF, MI.getDebugLoc(),238get(isSub ? ARM::SUBrsi : ARM::ADDrsi), WBReg)239.addReg(BaseReg)240.addReg(OffReg)241.addReg(0)242.addImm(SOOpc)243.add(predOps(Pred))244.add(condCodeOp());245} else246UpdateMI = BuildMI(MF, MI.getDebugLoc(),247get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)248.addReg(BaseReg)249.addReg(OffReg)250.add(predOps(Pred))251.add(condCodeOp());252break;253}254case ARMII::AddrMode3 : {255bool isSub = ARM_AM::getAM3Op(OffImm) == ARM_AM::sub;256unsigned Amt = ARM_AM::getAM3Offset(OffImm);257if (OffReg == 0)258// Immediate is 8-bits. It's guaranteed to fit in a so_imm operand.259UpdateMI = BuildMI(MF, MI.getDebugLoc(),260get(isSub ? ARM::SUBri : ARM::ADDri), WBReg)261.addReg(BaseReg)262.addImm(Amt)263.add(predOps(Pred))264.add(condCodeOp());265else266UpdateMI = BuildMI(MF, MI.getDebugLoc(),267get(isSub ? ARM::SUBrr : ARM::ADDrr), WBReg)268.addReg(BaseReg)269.addReg(OffReg)270.add(predOps(Pred))271.add(condCodeOp());272break;273}274}275276std::vector<MachineInstr*> NewMIs;277if (isPre) {278if (isLoad)279MemMI =280BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())281.addReg(WBReg)282.addImm(0)283.addImm(Pred);284else285MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))286.addReg(MI.getOperand(1).getReg())287.addReg(WBReg)288.addReg(0)289.addImm(0)290.addImm(Pred);291NewMIs.push_back(MemMI);292NewMIs.push_back(UpdateMI);293} else {294if (isLoad)295MemMI =296BuildMI(MF, MI.getDebugLoc(), get(MemOpc), MI.getOperand(0).getReg())297.addReg(BaseReg)298.addImm(0)299.addImm(Pred);300else301MemMI = BuildMI(MF, MI.getDebugLoc(), get(MemOpc))302.addReg(MI.getOperand(1).getReg())303.addReg(BaseReg)304.addReg(0)305.addImm(0)306.addImm(Pred);307if (WB.isDead())308UpdateMI->getOperand(0).setIsDead();309NewMIs.push_back(UpdateMI);310NewMIs.push_back(MemMI);311}312313// Transfer LiveVariables states, kill / dead info.314if (LV) {315for (const MachineOperand &MO : MI.operands()) {316if (MO.isReg() && MO.getReg().isVirtual()) {317Register Reg = MO.getReg();318319LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);320if (MO.isDef()) {321MachineInstr *NewMI = (Reg == WBReg) ? UpdateMI : MemMI;322if (MO.isDead())323LV->addVirtualRegisterDead(Reg, *NewMI);324}325if (MO.isUse() && MO.isKill()) {326for (unsigned j = 0; j < 2; ++j) {327// Look at the two new MI's in reverse order.328MachineInstr *NewMI = NewMIs[j];329if (!NewMI->readsRegister(Reg, /*TRI=*/nullptr))330continue;331LV->addVirtualRegisterKilled(Reg, *NewMI);332if (VI.removeKill(MI))333VI.Kills.push_back(NewMI);334break;335}336}337}338}339}340341MachineBasicBlock &MBB = *MI.getParent();342MBB.insert(MI, NewMIs[1]);343MBB.insert(MI, NewMIs[0]);344return NewMIs[0];345}346347// Branch analysis.348// Cond vector output format:349// 0 elements indicates an unconditional branch350// 2 elements indicates a conditional branch; the elements are351// the condition to check and the CPSR.352// 3 elements indicates a hardware loop end; the elements353// are the opcode, the operand value to test, and a dummy354// operand used to pad out to 3 operands.355bool ARMBaseInstrInfo::analyzeBranch(MachineBasicBlock &MBB,356MachineBasicBlock *&TBB,357MachineBasicBlock *&FBB,358SmallVectorImpl<MachineOperand> &Cond,359bool AllowModify) const {360TBB = nullptr;361FBB = nullptr;362363MachineBasicBlock::instr_iterator I = MBB.instr_end();364if (I == MBB.instr_begin())365return false; // Empty blocks are easy.366--I;367368// Walk backwards from the end of the basic block until the branch is369// analyzed or we give up.370while (isPredicated(*I) || I->isTerminator() || I->isDebugValue()) {371// Flag to be raised on unanalyzeable instructions. This is useful in cases372// where we want to clean up on the end of the basic block before we bail373// out.374bool CantAnalyze = false;375376// Skip over DEBUG values, predicated nonterminators and speculation377// barrier terminators.378while (I->isDebugInstr() || !I->isTerminator() ||379isSpeculationBarrierEndBBOpcode(I->getOpcode()) ||380I->getOpcode() == ARM::t2DoLoopStartTP){381if (I == MBB.instr_begin())382return false;383--I;384}385386if (isIndirectBranchOpcode(I->getOpcode()) ||387isJumpTableBranchOpcode(I->getOpcode())) {388// Indirect branches and jump tables can't be analyzed, but we still want389// to clean up any instructions at the tail of the basic block.390CantAnalyze = true;391} else if (isUncondBranchOpcode(I->getOpcode())) {392TBB = I->getOperand(0).getMBB();393} else if (isCondBranchOpcode(I->getOpcode())) {394// Bail out if we encounter multiple conditional branches.395if (!Cond.empty())396return true;397398assert(!FBB && "FBB should have been null.");399FBB = TBB;400TBB = I->getOperand(0).getMBB();401Cond.push_back(I->getOperand(1));402Cond.push_back(I->getOperand(2));403} else if (I->isReturn()) {404// Returns can't be analyzed, but we should run cleanup.405CantAnalyze = true;406} else if (I->getOpcode() == ARM::t2LoopEnd &&407MBB.getParent()408->getSubtarget<ARMSubtarget>()409.enableMachinePipeliner()) {410if (!Cond.empty())411return true;412FBB = TBB;413TBB = I->getOperand(1).getMBB();414Cond.push_back(MachineOperand::CreateImm(I->getOpcode()));415Cond.push_back(I->getOperand(0));416Cond.push_back(MachineOperand::CreateImm(0));417} else {418// We encountered other unrecognized terminator. Bail out immediately.419return true;420}421422// Cleanup code - to be run for unpredicated unconditional branches and423// returns.424if (!isPredicated(*I) &&425(isUncondBranchOpcode(I->getOpcode()) ||426isIndirectBranchOpcode(I->getOpcode()) ||427isJumpTableBranchOpcode(I->getOpcode()) ||428I->isReturn())) {429// Forget any previous condition branch information - it no longer applies.430Cond.clear();431FBB = nullptr;432433// If we can modify the function, delete everything below this434// unconditional branch.435if (AllowModify) {436MachineBasicBlock::iterator DI = std::next(I);437while (DI != MBB.instr_end()) {438MachineInstr &InstToDelete = *DI;439++DI;440// Speculation barriers must not be deleted.441if (isSpeculationBarrierEndBBOpcode(InstToDelete.getOpcode()))442continue;443InstToDelete.eraseFromParent();444}445}446}447448if (CantAnalyze) {449// We may not be able to analyze the block, but we could still have450// an unconditional branch as the last instruction in the block, which451// just branches to layout successor. If this is the case, then just452// remove it if we're allowed to make modifications.453if (AllowModify && !isPredicated(MBB.back()) &&454isUncondBranchOpcode(MBB.back().getOpcode()) &&455TBB && MBB.isLayoutSuccessor(TBB))456removeBranch(MBB);457return true;458}459460if (I == MBB.instr_begin())461return false;462463--I;464}465466// We made it past the terminators without bailing out - we must have467// analyzed this branch successfully.468return false;469}470471unsigned ARMBaseInstrInfo::removeBranch(MachineBasicBlock &MBB,472int *BytesRemoved) const {473assert(!BytesRemoved && "code size not handled");474475MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();476if (I == MBB.end())477return 0;478479if (!isUncondBranchOpcode(I->getOpcode()) &&480!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)481return 0;482483// Remove the branch.484I->eraseFromParent();485486I = MBB.end();487488if (I == MBB.begin()) return 1;489--I;490if (!isCondBranchOpcode(I->getOpcode()) && I->getOpcode() != ARM::t2LoopEnd)491return 1;492493// Remove the branch.494I->eraseFromParent();495return 2;496}497498unsigned ARMBaseInstrInfo::insertBranch(MachineBasicBlock &MBB,499MachineBasicBlock *TBB,500MachineBasicBlock *FBB,501ArrayRef<MachineOperand> Cond,502const DebugLoc &DL,503int *BytesAdded) const {504assert(!BytesAdded && "code size not handled");505ARMFunctionInfo *AFI = MBB.getParent()->getInfo<ARMFunctionInfo>();506int BOpc = !AFI->isThumbFunction()507? ARM::B : (AFI->isThumb2Function() ? ARM::t2B : ARM::tB);508int BccOpc = !AFI->isThumbFunction()509? ARM::Bcc : (AFI->isThumb2Function() ? ARM::t2Bcc : ARM::tBcc);510bool isThumb = AFI->isThumbFunction() || AFI->isThumb2Function();511512// Shouldn't be a fall through.513assert(TBB && "insertBranch must not be told to insert a fallthrough");514assert((Cond.size() == 2 || Cond.size() == 0 || Cond.size() == 3) &&515"ARM branch conditions have two or three components!");516517// For conditional branches, we use addOperand to preserve CPSR flags.518519if (!FBB) {520if (Cond.empty()) { // Unconditional branch?521if (isThumb)522BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB).add(predOps(ARMCC::AL));523else524BuildMI(&MBB, DL, get(BOpc)).addMBB(TBB);525} else if (Cond.size() == 2) {526BuildMI(&MBB, DL, get(BccOpc))527.addMBB(TBB)528.addImm(Cond[0].getImm())529.add(Cond[1]);530} else531BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);532return 1;533}534535// Two-way conditional branch.536if (Cond.size() == 2)537BuildMI(&MBB, DL, get(BccOpc))538.addMBB(TBB)539.addImm(Cond[0].getImm())540.add(Cond[1]);541else if (Cond.size() == 3)542BuildMI(&MBB, DL, get(Cond[0].getImm())).add(Cond[1]).addMBB(TBB);543if (isThumb)544BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB).add(predOps(ARMCC::AL));545else546BuildMI(&MBB, DL, get(BOpc)).addMBB(FBB);547return 2;548}549550bool ARMBaseInstrInfo::551reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {552if (Cond.size() == 2) {553ARMCC::CondCodes CC = (ARMCC::CondCodes)(int)Cond[0].getImm();554Cond[0].setImm(ARMCC::getOppositeCondition(CC));555return false;556}557return true;558}559560bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {561if (MI.isBundle()) {562MachineBasicBlock::const_instr_iterator I = MI.getIterator();563MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();564while (++I != E && I->isInsideBundle()) {565int PIdx = I->findFirstPredOperandIdx();566if (PIdx != -1 && I->getOperand(PIdx).getImm() != ARMCC::AL)567return true;568}569return false;570}571572int PIdx = MI.findFirstPredOperandIdx();573return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;574}575576std::string ARMBaseInstrInfo::createMIROperandComment(577const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,578const TargetRegisterInfo *TRI) const {579580// First, let's see if there is a generic comment for this operand581std::string GenericComment =582TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);583if (!GenericComment.empty())584return GenericComment;585586// If not, check if we have an immediate operand.587if (!Op.isImm())588return std::string();589590// And print its corresponding condition code if the immediate is a591// predicate.592int FirstPredOp = MI.findFirstPredOperandIdx();593if (FirstPredOp != (int) OpIdx)594return std::string();595596std::string CC = "CC::";597CC += ARMCondCodeToString((ARMCC::CondCodes)Op.getImm());598return CC;599}600601bool ARMBaseInstrInfo::PredicateInstruction(602MachineInstr &MI, ArrayRef<MachineOperand> Pred) const {603unsigned Opc = MI.getOpcode();604if (isUncondBranchOpcode(Opc)) {605MI.setDesc(get(getMatchingCondBranchOpcode(Opc)));606MachineInstrBuilder(*MI.getParent()->getParent(), MI)607.addImm(Pred[0].getImm())608.addReg(Pred[1].getReg());609return true;610}611612int PIdx = MI.findFirstPredOperandIdx();613if (PIdx != -1) {614MachineOperand &PMO = MI.getOperand(PIdx);615PMO.setImm(Pred[0].getImm());616MI.getOperand(PIdx+1).setReg(Pred[1].getReg());617618// Thumb 1 arithmetic instructions do not set CPSR when executed inside an619// IT block. This affects how they are printed.620const MCInstrDesc &MCID = MI.getDesc();621if (MCID.TSFlags & ARMII::ThumbArithFlagSetting) {622assert(MCID.operands()[1].isOptionalDef() &&623"CPSR def isn't expected operand");624assert((MI.getOperand(1).isDead() ||625MI.getOperand(1).getReg() != ARM::CPSR) &&626"if conversion tried to stop defining used CPSR");627MI.getOperand(1).setReg(ARM::NoRegister);628}629630return true;631}632return false;633}634635bool ARMBaseInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,636ArrayRef<MachineOperand> Pred2) const {637if (Pred1.size() > 2 || Pred2.size() > 2)638return false;639640ARMCC::CondCodes CC1 = (ARMCC::CondCodes)Pred1[0].getImm();641ARMCC::CondCodes CC2 = (ARMCC::CondCodes)Pred2[0].getImm();642if (CC1 == CC2)643return true;644645switch (CC1) {646default:647return false;648case ARMCC::AL:649return true;650case ARMCC::HS:651return CC2 == ARMCC::HI;652case ARMCC::LS:653return CC2 == ARMCC::LO || CC2 == ARMCC::EQ;654case ARMCC::GE:655return CC2 == ARMCC::GT;656case ARMCC::LE:657return CC2 == ARMCC::LT;658}659}660661bool ARMBaseInstrInfo::ClobbersPredicate(MachineInstr &MI,662std::vector<MachineOperand> &Pred,663bool SkipDead) const {664bool Found = false;665for (const MachineOperand &MO : MI.operands()) {666bool ClobbersCPSR = MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR);667bool IsCPSR = MO.isReg() && MO.isDef() && MO.getReg() == ARM::CPSR;668if (ClobbersCPSR || IsCPSR) {669670// Filter out T1 instructions that have a dead CPSR,671// allowing IT blocks to be generated containing T1 instructions672const MCInstrDesc &MCID = MI.getDesc();673if (MCID.TSFlags & ARMII::ThumbArithFlagSetting && MO.isDead() &&674SkipDead)675continue;676677Pred.push_back(MO);678Found = true;679}680}681682return Found;683}684685bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) {686for (const auto &MO : MI.operands())687if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())688return true;689return false;690}691692static bool isEligibleForITBlock(const MachineInstr *MI) {693switch (MI->getOpcode()) {694default: return true;695case ARM::tADC: // ADC (register) T1696case ARM::tADDi3: // ADD (immediate) T1697case ARM::tADDi8: // ADD (immediate) T2698case ARM::tADDrr: // ADD (register) T1699case ARM::tAND: // AND (register) T1700case ARM::tASRri: // ASR (immediate) T1701case ARM::tASRrr: // ASR (register) T1702case ARM::tBIC: // BIC (register) T1703case ARM::tEOR: // EOR (register) T1704case ARM::tLSLri: // LSL (immediate) T1705case ARM::tLSLrr: // LSL (register) T1706case ARM::tLSRri: // LSR (immediate) T1707case ARM::tLSRrr: // LSR (register) T1708case ARM::tMUL: // MUL T1709case ARM::tMVN: // MVN (register) T1710case ARM::tORR: // ORR (register) T1711case ARM::tROR: // ROR (register) T1712case ARM::tRSB: // RSB (immediate) T1713case ARM::tSBC: // SBC (register) T1714case ARM::tSUBi3: // SUB (immediate) T1715case ARM::tSUBi8: // SUB (immediate) T2716case ARM::tSUBrr: // SUB (register) T1717return !ARMBaseInstrInfo::isCPSRDefined(*MI);718}719}720721/// isPredicable - Return true if the specified instruction can be predicated.722/// By default, this returns true for every instruction with a723/// PredicateOperand.724bool ARMBaseInstrInfo::isPredicable(const MachineInstr &MI) const {725if (!MI.isPredicable())726return false;727728if (MI.isBundle())729return false;730731if (!isEligibleForITBlock(&MI))732return false;733734const MachineFunction *MF = MI.getParent()->getParent();735const ARMFunctionInfo *AFI =736MF->getInfo<ARMFunctionInfo>();737738// Neon instructions in Thumb2 IT blocks are deprecated, see ARMARM.739// In their ARM encoding, they can't be encoded in a conditional form.740if ((MI.getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON)741return false;742743// Make indirect control flow changes unpredicable when SLS mitigation is744// enabled.745const ARMSubtarget &ST = MF->getSubtarget<ARMSubtarget>();746if (ST.hardenSlsRetBr() && isIndirectControlFlowNotComingBack(MI))747return false;748if (ST.hardenSlsBlr() && isIndirectCall(MI))749return false;750751if (AFI->isThumb2Function()) {752if (getSubtarget().restrictIT())753return isV8EligibleForIT(&MI);754}755756return true;757}758759namespace llvm {760761template <> bool IsCPSRDead<MachineInstr>(const MachineInstr *MI) {762for (const MachineOperand &MO : MI->operands()) {763if (!MO.isReg() || MO.isUndef() || MO.isUse())764continue;765if (MO.getReg() != ARM::CPSR)766continue;767if (!MO.isDead())768return false;769}770// all definitions of CPSR are dead771return true;772}773774} // end namespace llvm775776/// GetInstSize - Return the size of the specified MachineInstr.777///778unsigned ARMBaseInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {779const MachineBasicBlock &MBB = *MI.getParent();780const MachineFunction *MF = MBB.getParent();781const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();782783const MCInstrDesc &MCID = MI.getDesc();784785switch (MI.getOpcode()) {786default:787// Return the size specified in .td file. If there's none, return 0, as we788// can't define a default size (Thumb1 instructions are 2 bytes, Thumb2789// instructions are 2-4 bytes, and ARM instructions are 4 bytes), in790// contrast to AArch64 instructions which have a default size of 4 bytes for791// example.792return MCID.getSize();793case TargetOpcode::BUNDLE:794return getInstBundleLength(MI);795case ARM::CONSTPOOL_ENTRY:796case ARM::JUMPTABLE_INSTS:797case ARM::JUMPTABLE_ADDRS:798case ARM::JUMPTABLE_TBB:799case ARM::JUMPTABLE_TBH:800// If this machine instr is a constant pool entry, its size is recorded as801// operand #2.802return MI.getOperand(2).getImm();803case ARM::SPACE:804return MI.getOperand(1).getImm();805case ARM::INLINEASM:806case ARM::INLINEASM_BR: {807// If this machine instr is an inline asm, measure it.808unsigned Size = getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);809if (!MF->getInfo<ARMFunctionInfo>()->isThumbFunction())810Size = alignTo(Size, 4);811return Size;812}813}814}815816unsigned ARMBaseInstrInfo::getInstBundleLength(const MachineInstr &MI) const {817unsigned Size = 0;818MachineBasicBlock::const_instr_iterator I = MI.getIterator();819MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();820while (++I != E && I->isInsideBundle()) {821assert(!I->isBundle() && "No nested bundle!");822Size += getInstSizeInBytes(*I);823}824return Size;825}826827void ARMBaseInstrInfo::copyFromCPSR(MachineBasicBlock &MBB,828MachineBasicBlock::iterator I,829unsigned DestReg, bool KillSrc,830const ARMSubtarget &Subtarget) const {831unsigned Opc = Subtarget.isThumb()832? (Subtarget.isMClass() ? ARM::t2MRS_M : ARM::t2MRS_AR)833: ARM::MRS;834835MachineInstrBuilder MIB =836BuildMI(MBB, I, I->getDebugLoc(), get(Opc), DestReg);837838// There is only 1 A/R class MRS instruction, and it always refers to839// APSR. However, there are lots of other possibilities on M-class cores.840if (Subtarget.isMClass())841MIB.addImm(0x800);842843MIB.add(predOps(ARMCC::AL))844.addReg(ARM::CPSR, RegState::Implicit | getKillRegState(KillSrc));845}846847void ARMBaseInstrInfo::copyToCPSR(MachineBasicBlock &MBB,848MachineBasicBlock::iterator I,849unsigned SrcReg, bool KillSrc,850const ARMSubtarget &Subtarget) const {851unsigned Opc = Subtarget.isThumb()852? (Subtarget.isMClass() ? ARM::t2MSR_M : ARM::t2MSR_AR)853: ARM::MSR;854855MachineInstrBuilder MIB = BuildMI(MBB, I, I->getDebugLoc(), get(Opc));856857if (Subtarget.isMClass())858MIB.addImm(0x800);859else860MIB.addImm(8);861862MIB.addReg(SrcReg, getKillRegState(KillSrc))863.add(predOps(ARMCC::AL))864.addReg(ARM::CPSR, RegState::Implicit | RegState::Define);865}866867void llvm::addUnpredicatedMveVpredNOp(MachineInstrBuilder &MIB) {868MIB.addImm(ARMVCC::None);869MIB.addReg(0);870MIB.addReg(0); // tp_reg871}872873void llvm::addUnpredicatedMveVpredROp(MachineInstrBuilder &MIB,874Register DestReg) {875addUnpredicatedMveVpredNOp(MIB);876MIB.addReg(DestReg, RegState::Undef);877}878879void llvm::addPredicatedMveVpredNOp(MachineInstrBuilder &MIB, unsigned Cond) {880MIB.addImm(Cond);881MIB.addReg(ARM::VPR, RegState::Implicit);882MIB.addReg(0); // tp_reg883}884885void llvm::addPredicatedMveVpredROp(MachineInstrBuilder &MIB,886unsigned Cond, unsigned Inactive) {887addPredicatedMveVpredNOp(MIB, Cond);888MIB.addReg(Inactive);889}890891void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,892MachineBasicBlock::iterator I,893const DebugLoc &DL, MCRegister DestReg,894MCRegister SrcReg, bool KillSrc) const {895bool GPRDest = ARM::GPRRegClass.contains(DestReg);896bool GPRSrc = ARM::GPRRegClass.contains(SrcReg);897898if (GPRDest && GPRSrc) {899BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg)900.addReg(SrcReg, getKillRegState(KillSrc))901.add(predOps(ARMCC::AL))902.add(condCodeOp());903return;904}905906bool SPRDest = ARM::SPRRegClass.contains(DestReg);907bool SPRSrc = ARM::SPRRegClass.contains(SrcReg);908909unsigned Opc = 0;910if (SPRDest && SPRSrc)911Opc = ARM::VMOVS;912else if (GPRDest && SPRSrc)913Opc = ARM::VMOVRS;914else if (SPRDest && GPRSrc)915Opc = ARM::VMOVSR;916else if (ARM::DPRRegClass.contains(DestReg, SrcReg) && Subtarget.hasFP64())917Opc = ARM::VMOVD;918else if (ARM::QPRRegClass.contains(DestReg, SrcReg))919Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MQPRCopy;920921if (Opc) {922MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opc), DestReg);923MIB.addReg(SrcReg, getKillRegState(KillSrc));924if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR)925MIB.addReg(SrcReg, getKillRegState(KillSrc));926if (Opc == ARM::MVE_VORR)927addUnpredicatedMveVpredROp(MIB, DestReg);928else if (Opc != ARM::MQPRCopy)929MIB.add(predOps(ARMCC::AL));930return;931}932933// Handle register classes that require multiple instructions.934unsigned BeginIdx = 0;935unsigned SubRegs = 0;936int Spacing = 1;937938// Use VORRq when possible.939if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) {940Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;941BeginIdx = ARM::qsub_0;942SubRegs = 2;943} else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) {944Opc = Subtarget.hasNEON() ? ARM::VORRq : ARM::MVE_VORR;945BeginIdx = ARM::qsub_0;946SubRegs = 4;947// Fall back to VMOVD.948} else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) {949Opc = ARM::VMOVD;950BeginIdx = ARM::dsub_0;951SubRegs = 2;952} else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) {953Opc = ARM::VMOVD;954BeginIdx = ARM::dsub_0;955SubRegs = 3;956} else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) {957Opc = ARM::VMOVD;958BeginIdx = ARM::dsub_0;959SubRegs = 4;960} else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) {961Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr;962BeginIdx = ARM::gsub_0;963SubRegs = 2;964} else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) {965Opc = ARM::VMOVD;966BeginIdx = ARM::dsub_0;967SubRegs = 2;968Spacing = 2;969} else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) {970Opc = ARM::VMOVD;971BeginIdx = ARM::dsub_0;972SubRegs = 3;973Spacing = 2;974} else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) {975Opc = ARM::VMOVD;976BeginIdx = ARM::dsub_0;977SubRegs = 4;978Spacing = 2;979} else if (ARM::DPRRegClass.contains(DestReg, SrcReg) &&980!Subtarget.hasFP64()) {981Opc = ARM::VMOVS;982BeginIdx = ARM::ssub_0;983SubRegs = 2;984} else if (SrcReg == ARM::CPSR) {985copyFromCPSR(MBB, I, DestReg, KillSrc, Subtarget);986return;987} else if (DestReg == ARM::CPSR) {988copyToCPSR(MBB, I, SrcReg, KillSrc, Subtarget);989return;990} else if (DestReg == ARM::VPR) {991assert(ARM::GPRRegClass.contains(SrcReg));992BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_P0), DestReg)993.addReg(SrcReg, getKillRegState(KillSrc))994.add(predOps(ARMCC::AL));995return;996} else if (SrcReg == ARM::VPR) {997assert(ARM::GPRRegClass.contains(DestReg));998BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_P0), DestReg)999.addReg(SrcReg, getKillRegState(KillSrc))1000.add(predOps(ARMCC::AL));1001return;1002} else if (DestReg == ARM::FPSCR_NZCV) {1003assert(ARM::GPRRegClass.contains(SrcReg));1004BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMSR_FPSCR_NZCVQC), DestReg)1005.addReg(SrcReg, getKillRegState(KillSrc))1006.add(predOps(ARMCC::AL));1007return;1008} else if (SrcReg == ARM::FPSCR_NZCV) {1009assert(ARM::GPRRegClass.contains(DestReg));1010BuildMI(MBB, I, I->getDebugLoc(), get(ARM::VMRS_FPSCR_NZCVQC), DestReg)1011.addReg(SrcReg, getKillRegState(KillSrc))1012.add(predOps(ARMCC::AL));1013return;1014}10151016assert(Opc && "Impossible reg-to-reg copy");10171018const TargetRegisterInfo *TRI = &getRegisterInfo();1019MachineInstrBuilder Mov;10201021// Copy register tuples backward when the first Dest reg overlaps with SrcReg.1022if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) {1023BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing);1024Spacing = -Spacing;1025}1026#ifndef NDEBUG1027SmallSet<unsigned, 4> DstRegs;1028#endif1029for (unsigned i = 0; i != SubRegs; ++i) {1030Register Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing);1031Register Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing);1032assert(Dst && Src && "Bad sub-register");1033#ifndef NDEBUG1034assert(!DstRegs.count(Src) && "destructive vector copy");1035DstRegs.insert(Dst);1036#endif1037Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src);1038// VORR (NEON or MVE) takes two source operands.1039if (Opc == ARM::VORRq || Opc == ARM::MVE_VORR) {1040Mov.addReg(Src);1041}1042// MVE VORR takes predicate operands in place of an ordinary condition.1043if (Opc == ARM::MVE_VORR)1044addUnpredicatedMveVpredROp(Mov, Dst);1045else1046Mov = Mov.add(predOps(ARMCC::AL));1047// MOVr can set CC.1048if (Opc == ARM::MOVr)1049Mov = Mov.add(condCodeOp());1050}1051// Add implicit super-register defs and kills to the last instruction.1052Mov->addRegisterDefined(DestReg, TRI);1053if (KillSrc)1054Mov->addRegisterKilled(SrcReg, TRI);1055}10561057std::optional<DestSourcePair>1058ARMBaseInstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {1059// VMOVRRD is also a copy instruction but it requires1060// special way of handling. It is more complex copy version1061// and since that we are not considering it. For recognition1062// of such instruction isExtractSubregLike MI interface fuction1063// could be used.1064// VORRq is considered as a move only if two inputs are1065// the same register.1066if (!MI.isMoveReg() ||1067(MI.getOpcode() == ARM::VORRq &&1068MI.getOperand(1).getReg() != MI.getOperand(2).getReg()))1069return std::nullopt;1070return DestSourcePair{MI.getOperand(0), MI.getOperand(1)};1071}10721073std::optional<ParamLoadedValue>1074ARMBaseInstrInfo::describeLoadedValue(const MachineInstr &MI,1075Register Reg) const {1076if (auto DstSrcPair = isCopyInstrImpl(MI)) {1077Register DstReg = DstSrcPair->Destination->getReg();10781079// TODO: We don't handle cases where the forwarding reg is narrower/wider1080// than the copy registers. Consider for example:1081//1082// s16 = VMOVS s01083// s17 = VMOVS s11084// call @callee(d0)1085//1086// We'd like to describe the call site value of d0 as d8, but this requires1087// gathering and merging the descriptions for the two VMOVS instructions.1088//1089// We also don't handle the reverse situation, where the forwarding reg is1090// narrower than the copy destination:1091//1092// d8 = VMOVD d01093// call @callee(s1)1094//1095// We need to produce a fragment description (the call site value of s1 is1096// /not/ just d8).1097if (DstReg != Reg)1098return std::nullopt;1099}1100return TargetInstrInfo::describeLoadedValue(MI, Reg);1101}11021103const MachineInstrBuilder &1104ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,1105unsigned SubIdx, unsigned State,1106const TargetRegisterInfo *TRI) const {1107if (!SubIdx)1108return MIB.addReg(Reg, State);11091110if (Register::isPhysicalRegister(Reg))1111return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);1112return MIB.addReg(Reg, State, SubIdx);1113}11141115void ARMBaseInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,1116MachineBasicBlock::iterator I,1117Register SrcReg, bool isKill, int FI,1118const TargetRegisterClass *RC,1119const TargetRegisterInfo *TRI,1120Register VReg) const {1121MachineFunction &MF = *MBB.getParent();1122MachineFrameInfo &MFI = MF.getFrameInfo();1123Align Alignment = MFI.getObjectAlign(FI);11241125MachineMemOperand *MMO = MF.getMachineMemOperand(1126MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOStore,1127MFI.getObjectSize(FI), Alignment);11281129switch (TRI->getSpillSize(*RC)) {1130case 2:1131if (ARM::HPRRegClass.hasSubClassEq(RC)) {1132BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRH))1133.addReg(SrcReg, getKillRegState(isKill))1134.addFrameIndex(FI)1135.addImm(0)1136.addMemOperand(MMO)1137.add(predOps(ARMCC::AL));1138} else1139llvm_unreachable("Unknown reg class!");1140break;1141case 4:1142if (ARM::GPRRegClass.hasSubClassEq(RC)) {1143BuildMI(MBB, I, DebugLoc(), get(ARM::STRi12))1144.addReg(SrcReg, getKillRegState(isKill))1145.addFrameIndex(FI)1146.addImm(0)1147.addMemOperand(MMO)1148.add(predOps(ARMCC::AL));1149} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {1150BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRS))1151.addReg(SrcReg, getKillRegState(isKill))1152.addFrameIndex(FI)1153.addImm(0)1154.addMemOperand(MMO)1155.add(predOps(ARMCC::AL));1156} else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {1157BuildMI(MBB, I, DebugLoc(), get(ARM::VSTR_P0_off))1158.addReg(SrcReg, getKillRegState(isKill))1159.addFrameIndex(FI)1160.addImm(0)1161.addMemOperand(MMO)1162.add(predOps(ARMCC::AL));1163} else1164llvm_unreachable("Unknown reg class!");1165break;1166case 8:1167if (ARM::DPRRegClass.hasSubClassEq(RC)) {1168BuildMI(MBB, I, DebugLoc(), get(ARM::VSTRD))1169.addReg(SrcReg, getKillRegState(isKill))1170.addFrameIndex(FI)1171.addImm(0)1172.addMemOperand(MMO)1173.add(predOps(ARMCC::AL));1174} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {1175if (Subtarget.hasV5TEOps()) {1176MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STRD));1177AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);1178AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);1179MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)1180.add(predOps(ARMCC::AL));1181} else {1182// Fallback to STM instruction, which has existed since the dawn of1183// time.1184MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::STMIA))1185.addFrameIndex(FI)1186.addMemOperand(MMO)1187.add(predOps(ARMCC::AL));1188AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);1189AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);1190}1191} else1192llvm_unreachable("Unknown reg class!");1193break;1194case 16:1195if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {1196// Use aligned spills if the stack can be realigned.1197if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {1198BuildMI(MBB, I, DebugLoc(), get(ARM::VST1q64))1199.addFrameIndex(FI)1200.addImm(16)1201.addReg(SrcReg, getKillRegState(isKill))1202.addMemOperand(MMO)1203.add(predOps(ARMCC::AL));1204} else {1205BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMQIA))1206.addReg(SrcReg, getKillRegState(isKill))1207.addFrameIndex(FI)1208.addMemOperand(MMO)1209.add(predOps(ARMCC::AL));1210}1211} else if (ARM::QPRRegClass.hasSubClassEq(RC) &&1212Subtarget.hasMVEIntegerOps()) {1213auto MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::MVE_VSTRWU32));1214MIB.addReg(SrcReg, getKillRegState(isKill))1215.addFrameIndex(FI)1216.addImm(0)1217.addMemOperand(MMO);1218addUnpredicatedMveVpredNOp(MIB);1219} else1220llvm_unreachable("Unknown reg class!");1221break;1222case 24:1223if (ARM::DTripleRegClass.hasSubClassEq(RC)) {1224// Use aligned spills if the stack can be realigned.1225if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&1226Subtarget.hasNEON()) {1227BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64TPseudo))1228.addFrameIndex(FI)1229.addImm(16)1230.addReg(SrcReg, getKillRegState(isKill))1231.addMemOperand(MMO)1232.add(predOps(ARMCC::AL));1233} else {1234MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),1235get(ARM::VSTMDIA))1236.addFrameIndex(FI)1237.add(predOps(ARMCC::AL))1238.addMemOperand(MMO);1239MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);1240MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);1241AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);1242}1243} else1244llvm_unreachable("Unknown reg class!");1245break;1246case 32:1247if (ARM::QQPRRegClass.hasSubClassEq(RC) ||1248ARM::MQQPRRegClass.hasSubClassEq(RC) ||1249ARM::DQuadRegClass.hasSubClassEq(RC)) {1250if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&1251Subtarget.hasNEON()) {1252// FIXME: It's possible to only store part of the QQ register if the1253// spilled def has a sub-register index.1254BuildMI(MBB, I, DebugLoc(), get(ARM::VST1d64QPseudo))1255.addFrameIndex(FI)1256.addImm(16)1257.addReg(SrcReg, getKillRegState(isKill))1258.addMemOperand(MMO)1259.add(predOps(ARMCC::AL));1260} else if (Subtarget.hasMVEIntegerOps()) {1261BuildMI(MBB, I, DebugLoc(), get(ARM::MQQPRStore))1262.addReg(SrcReg, getKillRegState(isKill))1263.addFrameIndex(FI)1264.addMemOperand(MMO);1265} else {1266MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(),1267get(ARM::VSTMDIA))1268.addFrameIndex(FI)1269.add(predOps(ARMCC::AL))1270.addMemOperand(MMO);1271MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);1272MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);1273MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);1274AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);1275}1276} else1277llvm_unreachable("Unknown reg class!");1278break;1279case 64:1280if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&1281Subtarget.hasMVEIntegerOps()) {1282BuildMI(MBB, I, DebugLoc(), get(ARM::MQQQQPRStore))1283.addReg(SrcReg, getKillRegState(isKill))1284.addFrameIndex(FI)1285.addMemOperand(MMO);1286} else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {1287MachineInstrBuilder MIB = BuildMI(MBB, I, DebugLoc(), get(ARM::VSTMDIA))1288.addFrameIndex(FI)1289.add(predOps(ARMCC::AL))1290.addMemOperand(MMO);1291MIB = AddDReg(MIB, SrcReg, ARM::dsub_0, getKillRegState(isKill), TRI);1292MIB = AddDReg(MIB, SrcReg, ARM::dsub_1, 0, TRI);1293MIB = AddDReg(MIB, SrcReg, ARM::dsub_2, 0, TRI);1294MIB = AddDReg(MIB, SrcReg, ARM::dsub_3, 0, TRI);1295MIB = AddDReg(MIB, SrcReg, ARM::dsub_4, 0, TRI);1296MIB = AddDReg(MIB, SrcReg, ARM::dsub_5, 0, TRI);1297MIB = AddDReg(MIB, SrcReg, ARM::dsub_6, 0, TRI);1298AddDReg(MIB, SrcReg, ARM::dsub_7, 0, TRI);1299} else1300llvm_unreachable("Unknown reg class!");1301break;1302default:1303llvm_unreachable("Unknown reg class!");1304}1305}13061307Register ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr &MI,1308int &FrameIndex) const {1309switch (MI.getOpcode()) {1310default: break;1311case ARM::STRrs:1312case ARM::t2STRs: // FIXME: don't use t2STRs to access frame.1313if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&1314MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&1315MI.getOperand(3).getImm() == 0) {1316FrameIndex = MI.getOperand(1).getIndex();1317return MI.getOperand(0).getReg();1318}1319break;1320case ARM::STRi12:1321case ARM::t2STRi12:1322case ARM::tSTRspi:1323case ARM::VSTRD:1324case ARM::VSTRS:1325case ARM::VSTR_P0_off:1326case ARM::MVE_VSTRWU32:1327if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&1328MI.getOperand(2).getImm() == 0) {1329FrameIndex = MI.getOperand(1).getIndex();1330return MI.getOperand(0).getReg();1331}1332break;1333case ARM::VST1q64:1334case ARM::VST1d64TPseudo:1335case ARM::VST1d64QPseudo:1336if (MI.getOperand(0).isFI() && MI.getOperand(2).getSubReg() == 0) {1337FrameIndex = MI.getOperand(0).getIndex();1338return MI.getOperand(2).getReg();1339}1340break;1341case ARM::VSTMQIA:1342if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {1343FrameIndex = MI.getOperand(1).getIndex();1344return MI.getOperand(0).getReg();1345}1346break;1347case ARM::MQQPRStore:1348case ARM::MQQQQPRStore:1349if (MI.getOperand(1).isFI()) {1350FrameIndex = MI.getOperand(1).getIndex();1351return MI.getOperand(0).getReg();1352}1353break;1354}13551356return 0;1357}13581359Register ARMBaseInstrInfo::isStoreToStackSlotPostFE(const MachineInstr &MI,1360int &FrameIndex) const {1361SmallVector<const MachineMemOperand *, 1> Accesses;1362if (MI.mayStore() && hasStoreToStackSlot(MI, Accesses) &&1363Accesses.size() == 1) {1364FrameIndex =1365cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())1366->getFrameIndex();1367return true;1368}1369return false;1370}13711372void ARMBaseInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,1373MachineBasicBlock::iterator I,1374Register DestReg, int FI,1375const TargetRegisterClass *RC,1376const TargetRegisterInfo *TRI,1377Register VReg) const {1378DebugLoc DL;1379if (I != MBB.end()) DL = I->getDebugLoc();1380MachineFunction &MF = *MBB.getParent();1381MachineFrameInfo &MFI = MF.getFrameInfo();1382const Align Alignment = MFI.getObjectAlign(FI);1383MachineMemOperand *MMO = MF.getMachineMemOperand(1384MachinePointerInfo::getFixedStack(MF, FI), MachineMemOperand::MOLoad,1385MFI.getObjectSize(FI), Alignment);13861387switch (TRI->getSpillSize(*RC)) {1388case 2:1389if (ARM::HPRRegClass.hasSubClassEq(RC)) {1390BuildMI(MBB, I, DL, get(ARM::VLDRH), DestReg)1391.addFrameIndex(FI)1392.addImm(0)1393.addMemOperand(MMO)1394.add(predOps(ARMCC::AL));1395} else1396llvm_unreachable("Unknown reg class!");1397break;1398case 4:1399if (ARM::GPRRegClass.hasSubClassEq(RC)) {1400BuildMI(MBB, I, DL, get(ARM::LDRi12), DestReg)1401.addFrameIndex(FI)1402.addImm(0)1403.addMemOperand(MMO)1404.add(predOps(ARMCC::AL));1405} else if (ARM::SPRRegClass.hasSubClassEq(RC)) {1406BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)1407.addFrameIndex(FI)1408.addImm(0)1409.addMemOperand(MMO)1410.add(predOps(ARMCC::AL));1411} else if (ARM::VCCRRegClass.hasSubClassEq(RC)) {1412BuildMI(MBB, I, DL, get(ARM::VLDR_P0_off), DestReg)1413.addFrameIndex(FI)1414.addImm(0)1415.addMemOperand(MMO)1416.add(predOps(ARMCC::AL));1417} else1418llvm_unreachable("Unknown reg class!");1419break;1420case 8:1421if (ARM::DPRRegClass.hasSubClassEq(RC)) {1422BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)1423.addFrameIndex(FI)1424.addImm(0)1425.addMemOperand(MMO)1426.add(predOps(ARMCC::AL));1427} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {1428MachineInstrBuilder MIB;14291430if (Subtarget.hasV5TEOps()) {1431MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));1432AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);1433AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);1434MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO)1435.add(predOps(ARMCC::AL));1436} else {1437// Fallback to LDM instruction, which has existed since the dawn of1438// time.1439MIB = BuildMI(MBB, I, DL, get(ARM::LDMIA))1440.addFrameIndex(FI)1441.addMemOperand(MMO)1442.add(predOps(ARMCC::AL));1443MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);1444MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);1445}14461447if (DestReg.isPhysical())1448MIB.addReg(DestReg, RegState::ImplicitDefine);1449} else1450llvm_unreachable("Unknown reg class!");1451break;1452case 16:1453if (ARM::DPairRegClass.hasSubClassEq(RC) && Subtarget.hasNEON()) {1454if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF)) {1455BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)1456.addFrameIndex(FI)1457.addImm(16)1458.addMemOperand(MMO)1459.add(predOps(ARMCC::AL));1460} else {1461BuildMI(MBB, I, DL, get(ARM::VLDMQIA), DestReg)1462.addFrameIndex(FI)1463.addMemOperand(MMO)1464.add(predOps(ARMCC::AL));1465}1466} else if (ARM::QPRRegClass.hasSubClassEq(RC) &&1467Subtarget.hasMVEIntegerOps()) {1468auto MIB = BuildMI(MBB, I, DL, get(ARM::MVE_VLDRWU32), DestReg);1469MIB.addFrameIndex(FI)1470.addImm(0)1471.addMemOperand(MMO);1472addUnpredicatedMveVpredNOp(MIB);1473} else1474llvm_unreachable("Unknown reg class!");1475break;1476case 24:1477if (ARM::DTripleRegClass.hasSubClassEq(RC)) {1478if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&1479Subtarget.hasNEON()) {1480BuildMI(MBB, I, DL, get(ARM::VLD1d64TPseudo), DestReg)1481.addFrameIndex(FI)1482.addImm(16)1483.addMemOperand(MMO)1484.add(predOps(ARMCC::AL));1485} else {1486MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))1487.addFrameIndex(FI)1488.addMemOperand(MMO)1489.add(predOps(ARMCC::AL));1490MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);1491MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);1492MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);1493if (DestReg.isPhysical())1494MIB.addReg(DestReg, RegState::ImplicitDefine);1495}1496} else1497llvm_unreachable("Unknown reg class!");1498break;1499case 32:1500if (ARM::QQPRRegClass.hasSubClassEq(RC) ||1501ARM::MQQPRRegClass.hasSubClassEq(RC) ||1502ARM::DQuadRegClass.hasSubClassEq(RC)) {1503if (Alignment >= 16 && getRegisterInfo().canRealignStack(MF) &&1504Subtarget.hasNEON()) {1505BuildMI(MBB, I, DL, get(ARM::VLD1d64QPseudo), DestReg)1506.addFrameIndex(FI)1507.addImm(16)1508.addMemOperand(MMO)1509.add(predOps(ARMCC::AL));1510} else if (Subtarget.hasMVEIntegerOps()) {1511BuildMI(MBB, I, DL, get(ARM::MQQPRLoad), DestReg)1512.addFrameIndex(FI)1513.addMemOperand(MMO);1514} else {1515MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))1516.addFrameIndex(FI)1517.add(predOps(ARMCC::AL))1518.addMemOperand(MMO);1519MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);1520MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);1521MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);1522MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);1523if (DestReg.isPhysical())1524MIB.addReg(DestReg, RegState::ImplicitDefine);1525}1526} else1527llvm_unreachable("Unknown reg class!");1528break;1529case 64:1530if (ARM::MQQQQPRRegClass.hasSubClassEq(RC) &&1531Subtarget.hasMVEIntegerOps()) {1532BuildMI(MBB, I, DL, get(ARM::MQQQQPRLoad), DestReg)1533.addFrameIndex(FI)1534.addMemOperand(MMO);1535} else if (ARM::QQQQPRRegClass.hasSubClassEq(RC)) {1536MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::VLDMDIA))1537.addFrameIndex(FI)1538.add(predOps(ARMCC::AL))1539.addMemOperand(MMO);1540MIB = AddDReg(MIB, DestReg, ARM::dsub_0, RegState::DefineNoRead, TRI);1541MIB = AddDReg(MIB, DestReg, ARM::dsub_1, RegState::DefineNoRead, TRI);1542MIB = AddDReg(MIB, DestReg, ARM::dsub_2, RegState::DefineNoRead, TRI);1543MIB = AddDReg(MIB, DestReg, ARM::dsub_3, RegState::DefineNoRead, TRI);1544MIB = AddDReg(MIB, DestReg, ARM::dsub_4, RegState::DefineNoRead, TRI);1545MIB = AddDReg(MIB, DestReg, ARM::dsub_5, RegState::DefineNoRead, TRI);1546MIB = AddDReg(MIB, DestReg, ARM::dsub_6, RegState::DefineNoRead, TRI);1547MIB = AddDReg(MIB, DestReg, ARM::dsub_7, RegState::DefineNoRead, TRI);1548if (DestReg.isPhysical())1549MIB.addReg(DestReg, RegState::ImplicitDefine);1550} else1551llvm_unreachable("Unknown reg class!");1552break;1553default:1554llvm_unreachable("Unknown regclass!");1555}1556}15571558Register ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,1559int &FrameIndex) const {1560switch (MI.getOpcode()) {1561default: break;1562case ARM::LDRrs:1563case ARM::t2LDRs: // FIXME: don't use t2LDRs to access frame.1564if (MI.getOperand(1).isFI() && MI.getOperand(2).isReg() &&1565MI.getOperand(3).isImm() && MI.getOperand(2).getReg() == 0 &&1566MI.getOperand(3).getImm() == 0) {1567FrameIndex = MI.getOperand(1).getIndex();1568return MI.getOperand(0).getReg();1569}1570break;1571case ARM::LDRi12:1572case ARM::t2LDRi12:1573case ARM::tLDRspi:1574case ARM::VLDRD:1575case ARM::VLDRS:1576case ARM::VLDR_P0_off:1577case ARM::MVE_VLDRWU32:1578if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&1579MI.getOperand(2).getImm() == 0) {1580FrameIndex = MI.getOperand(1).getIndex();1581return MI.getOperand(0).getReg();1582}1583break;1584case ARM::VLD1q64:1585case ARM::VLD1d8TPseudo:1586case ARM::VLD1d16TPseudo:1587case ARM::VLD1d32TPseudo:1588case ARM::VLD1d64TPseudo:1589case ARM::VLD1d8QPseudo:1590case ARM::VLD1d16QPseudo:1591case ARM::VLD1d32QPseudo:1592case ARM::VLD1d64QPseudo:1593if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {1594FrameIndex = MI.getOperand(1).getIndex();1595return MI.getOperand(0).getReg();1596}1597break;1598case ARM::VLDMQIA:1599if (MI.getOperand(1).isFI() && MI.getOperand(0).getSubReg() == 0) {1600FrameIndex = MI.getOperand(1).getIndex();1601return MI.getOperand(0).getReg();1602}1603break;1604case ARM::MQQPRLoad:1605case ARM::MQQQQPRLoad:1606if (MI.getOperand(1).isFI()) {1607FrameIndex = MI.getOperand(1).getIndex();1608return MI.getOperand(0).getReg();1609}1610break;1611}16121613return 0;1614}16151616Register ARMBaseInstrInfo::isLoadFromStackSlotPostFE(const MachineInstr &MI,1617int &FrameIndex) const {1618SmallVector<const MachineMemOperand *, 1> Accesses;1619if (MI.mayLoad() && hasLoadFromStackSlot(MI, Accesses) &&1620Accesses.size() == 1) {1621FrameIndex =1622cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())1623->getFrameIndex();1624return true;1625}1626return false;1627}16281629/// Expands MEMCPY to either LDMIA/STMIA or LDMIA_UPD/STMID_UPD1630/// depending on whether the result is used.1631void ARMBaseInstrInfo::expandMEMCPY(MachineBasicBlock::iterator MI) const {1632bool isThumb1 = Subtarget.isThumb1Only();1633bool isThumb2 = Subtarget.isThumb2();1634const ARMBaseInstrInfo *TII = Subtarget.getInstrInfo();16351636DebugLoc dl = MI->getDebugLoc();1637MachineBasicBlock *BB = MI->getParent();16381639MachineInstrBuilder LDM, STM;1640if (isThumb1 || !MI->getOperand(1).isDead()) {1641MachineOperand LDWb(MI->getOperand(1));1642LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA_UPD1643: isThumb1 ? ARM::tLDMIA_UPD1644: ARM::LDMIA_UPD))1645.add(LDWb);1646} else {1647LDM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2LDMIA : ARM::LDMIA));1648}16491650if (isThumb1 || !MI->getOperand(0).isDead()) {1651MachineOperand STWb(MI->getOperand(0));1652STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA_UPD1653: isThumb1 ? ARM::tSTMIA_UPD1654: ARM::STMIA_UPD))1655.add(STWb);1656} else {1657STM = BuildMI(*BB, MI, dl, TII->get(isThumb2 ? ARM::t2STMIA : ARM::STMIA));1658}16591660MachineOperand LDBase(MI->getOperand(3));1661LDM.add(LDBase).add(predOps(ARMCC::AL));16621663MachineOperand STBase(MI->getOperand(2));1664STM.add(STBase).add(predOps(ARMCC::AL));16651666// Sort the scratch registers into ascending order.1667const TargetRegisterInfo &TRI = getRegisterInfo();1668SmallVector<unsigned, 6> ScratchRegs;1669for (MachineOperand &MO : llvm::drop_begin(MI->operands(), 5))1670ScratchRegs.push_back(MO.getReg());1671llvm::sort(ScratchRegs,1672[&TRI](const unsigned &Reg1, const unsigned &Reg2) -> bool {1673return TRI.getEncodingValue(Reg1) <1674TRI.getEncodingValue(Reg2);1675});16761677for (const auto &Reg : ScratchRegs) {1678LDM.addReg(Reg, RegState::Define);1679STM.addReg(Reg, RegState::Kill);1680}16811682BB->erase(MI);1683}16841685bool ARMBaseInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {1686if (MI.getOpcode() == TargetOpcode::LOAD_STACK_GUARD) {1687expandLoadStackGuard(MI);1688MI.getParent()->erase(MI);1689return true;1690}16911692if (MI.getOpcode() == ARM::MEMCPY) {1693expandMEMCPY(MI);1694return true;1695}16961697// This hook gets to expand COPY instructions before they become1698// copyPhysReg() calls. Look for VMOVS instructions that can legally be1699// widened to VMOVD. We prefer the VMOVD when possible because it may be1700// changed into a VORR that can go down the NEON pipeline.1701if (!MI.isCopy() || Subtarget.dontWidenVMOVS() || !Subtarget.hasFP64())1702return false;17031704// Look for a copy between even S-registers. That is where we keep floats1705// when using NEON v2f32 instructions for f32 arithmetic.1706Register DstRegS = MI.getOperand(0).getReg();1707Register SrcRegS = MI.getOperand(1).getReg();1708if (!ARM::SPRRegClass.contains(DstRegS, SrcRegS))1709return false;17101711const TargetRegisterInfo *TRI = &getRegisterInfo();1712unsigned DstRegD = TRI->getMatchingSuperReg(DstRegS, ARM::ssub_0,1713&ARM::DPRRegClass);1714unsigned SrcRegD = TRI->getMatchingSuperReg(SrcRegS, ARM::ssub_0,1715&ARM::DPRRegClass);1716if (!DstRegD || !SrcRegD)1717return false;17181719// We want to widen this into a DstRegD = VMOVD SrcRegD copy. This is only1720// legal if the COPY already defines the full DstRegD, and it isn't a1721// sub-register insertion.1722if (!MI.definesRegister(DstRegD, TRI) || MI.readsRegister(DstRegD, TRI))1723return false;17241725// A dead copy shouldn't show up here, but reject it just in case.1726if (MI.getOperand(0).isDead())1727return false;17281729// All clear, widen the COPY.1730LLVM_DEBUG(dbgs() << "widening: " << MI);1731MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);17321733// Get rid of the old implicit-def of DstRegD. Leave it if it defines a Q-reg1734// or some other super-register.1735int ImpDefIdx = MI.findRegisterDefOperandIdx(DstRegD, /*TRI=*/nullptr);1736if (ImpDefIdx != -1)1737MI.removeOperand(ImpDefIdx);17381739// Change the opcode and operands.1740MI.setDesc(get(ARM::VMOVD));1741MI.getOperand(0).setReg(DstRegD);1742MI.getOperand(1).setReg(SrcRegD);1743MIB.add(predOps(ARMCC::AL));17441745// We are now reading SrcRegD instead of SrcRegS. This may upset the1746// register scavenger and machine verifier, so we need to indicate that we1747// are reading an undefined value from SrcRegD, but a proper value from1748// SrcRegS.1749MI.getOperand(1).setIsUndef();1750MIB.addReg(SrcRegS, RegState::Implicit);17511752// SrcRegD may actually contain an unrelated value in the ssub_11753// sub-register. Don't kill it. Only kill the ssub_0 sub-register.1754if (MI.getOperand(1).isKill()) {1755MI.getOperand(1).setIsKill(false);1756MI.addRegisterKilled(SrcRegS, TRI, true);1757}17581759LLVM_DEBUG(dbgs() << "replaced by: " << MI);1760return true;1761}17621763/// Create a copy of a const pool value. Update CPI to the new index and return1764/// the label UID.1765static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) {1766MachineConstantPool *MCP = MF.getConstantPool();1767ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();17681769const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];1770assert(MCPE.isMachineConstantPoolEntry() &&1771"Expecting a machine constantpool entry!");1772ARMConstantPoolValue *ACPV =1773static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);17741775unsigned PCLabelId = AFI->createPICLabelUId();1776ARMConstantPoolValue *NewCPV = nullptr;17771778// FIXME: The below assumes PIC relocation model and that the function1779// is Thumb mode (t1 or t2). PCAdjustment would be 8 for ARM mode PIC, and1780// zero for non-PIC in ARM or Thumb. The callers are all of thumb LDR1781// instructions, so that's probably OK, but is PIC always correct when1782// we get here?1783if (ACPV->isGlobalValue())1784NewCPV = ARMConstantPoolConstant::Create(1785cast<ARMConstantPoolConstant>(ACPV)->getGV(), PCLabelId, ARMCP::CPValue,17864, ACPV->getModifier(), ACPV->mustAddCurrentAddress());1787else if (ACPV->isExtSymbol())1788NewCPV = ARMConstantPoolSymbol::1789Create(MF.getFunction().getContext(),1790cast<ARMConstantPoolSymbol>(ACPV)->getSymbol(), PCLabelId, 4);1791else if (ACPV->isBlockAddress())1792NewCPV = ARMConstantPoolConstant::1793Create(cast<ARMConstantPoolConstant>(ACPV)->getBlockAddress(), PCLabelId,1794ARMCP::CPBlockAddress, 4);1795else if (ACPV->isLSDA())1796NewCPV = ARMConstantPoolConstant::Create(&MF.getFunction(), PCLabelId,1797ARMCP::CPLSDA, 4);1798else if (ACPV->isMachineBasicBlock())1799NewCPV = ARMConstantPoolMBB::1800Create(MF.getFunction().getContext(),1801cast<ARMConstantPoolMBB>(ACPV)->getMBB(), PCLabelId, 4);1802else1803llvm_unreachable("Unexpected ARM constantpool value type!!");1804CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlign());1805return PCLabelId;1806}18071808void ARMBaseInstrInfo::reMaterialize(MachineBasicBlock &MBB,1809MachineBasicBlock::iterator I,1810Register DestReg, unsigned SubIdx,1811const MachineInstr &Orig,1812const TargetRegisterInfo &TRI) const {1813unsigned Opcode = Orig.getOpcode();1814switch (Opcode) {1815default: {1816MachineInstr *MI = MBB.getParent()->CloneMachineInstr(&Orig);1817MI->substituteRegister(Orig.getOperand(0).getReg(), DestReg, SubIdx, TRI);1818MBB.insert(I, MI);1819break;1820}1821case ARM::tLDRpci_pic:1822case ARM::t2LDRpci_pic: {1823MachineFunction &MF = *MBB.getParent();1824unsigned CPI = Orig.getOperand(1).getIndex();1825unsigned PCLabelId = duplicateCPV(MF, CPI);1826BuildMI(MBB, I, Orig.getDebugLoc(), get(Opcode), DestReg)1827.addConstantPoolIndex(CPI)1828.addImm(PCLabelId)1829.cloneMemRefs(Orig);1830break;1831}1832}1833}18341835MachineInstr &1836ARMBaseInstrInfo::duplicate(MachineBasicBlock &MBB,1837MachineBasicBlock::iterator InsertBefore,1838const MachineInstr &Orig) const {1839MachineInstr &Cloned = TargetInstrInfo::duplicate(MBB, InsertBefore, Orig);1840MachineBasicBlock::instr_iterator I = Cloned.getIterator();1841for (;;) {1842switch (I->getOpcode()) {1843case ARM::tLDRpci_pic:1844case ARM::t2LDRpci_pic: {1845MachineFunction &MF = *MBB.getParent();1846unsigned CPI = I->getOperand(1).getIndex();1847unsigned PCLabelId = duplicateCPV(MF, CPI);1848I->getOperand(1).setIndex(CPI);1849I->getOperand(2).setImm(PCLabelId);1850break;1851}1852}1853if (!I->isBundledWithSucc())1854break;1855++I;1856}1857return Cloned;1858}18591860bool ARMBaseInstrInfo::produceSameValue(const MachineInstr &MI0,1861const MachineInstr &MI1,1862const MachineRegisterInfo *MRI) const {1863unsigned Opcode = MI0.getOpcode();1864if (Opcode == ARM::t2LDRpci || Opcode == ARM::t2LDRpci_pic ||1865Opcode == ARM::tLDRpci || Opcode == ARM::tLDRpci_pic ||1866Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||1867Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||1868Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||1869Opcode == ARM::t2MOV_ga_pcrel) {1870if (MI1.getOpcode() != Opcode)1871return false;1872if (MI0.getNumOperands() != MI1.getNumOperands())1873return false;18741875const MachineOperand &MO0 = MI0.getOperand(1);1876const MachineOperand &MO1 = MI1.getOperand(1);1877if (MO0.getOffset() != MO1.getOffset())1878return false;18791880if (Opcode == ARM::LDRLIT_ga_pcrel || Opcode == ARM::LDRLIT_ga_pcrel_ldr ||1881Opcode == ARM::tLDRLIT_ga_pcrel || Opcode == ARM::t2LDRLIT_ga_pcrel ||1882Opcode == ARM::MOV_ga_pcrel || Opcode == ARM::MOV_ga_pcrel_ldr ||1883Opcode == ARM::t2MOV_ga_pcrel)1884// Ignore the PC labels.1885return MO0.getGlobal() == MO1.getGlobal();18861887const MachineFunction *MF = MI0.getParent()->getParent();1888const MachineConstantPool *MCP = MF->getConstantPool();1889int CPI0 = MO0.getIndex();1890int CPI1 = MO1.getIndex();1891const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];1892const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];1893bool isARMCP0 = MCPE0.isMachineConstantPoolEntry();1894bool isARMCP1 = MCPE1.isMachineConstantPoolEntry();1895if (isARMCP0 && isARMCP1) {1896ARMConstantPoolValue *ACPV0 =1897static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);1898ARMConstantPoolValue *ACPV1 =1899static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);1900return ACPV0->hasSameValue(ACPV1);1901} else if (!isARMCP0 && !isARMCP1) {1902return MCPE0.Val.ConstVal == MCPE1.Val.ConstVal;1903}1904return false;1905} else if (Opcode == ARM::PICLDR) {1906if (MI1.getOpcode() != Opcode)1907return false;1908if (MI0.getNumOperands() != MI1.getNumOperands())1909return false;19101911Register Addr0 = MI0.getOperand(1).getReg();1912Register Addr1 = MI1.getOperand(1).getReg();1913if (Addr0 != Addr1) {1914if (!MRI || !Addr0.isVirtual() || !Addr1.isVirtual())1915return false;19161917// This assumes SSA form.1918MachineInstr *Def0 = MRI->getVRegDef(Addr0);1919MachineInstr *Def1 = MRI->getVRegDef(Addr1);1920// Check if the loaded value, e.g. a constantpool of a global address, are1921// the same.1922if (!produceSameValue(*Def0, *Def1, MRI))1923return false;1924}19251926for (unsigned i = 3, e = MI0.getNumOperands(); i != e; ++i) {1927// %12 = PICLDR %11, 0, 14, %noreg1928const MachineOperand &MO0 = MI0.getOperand(i);1929const MachineOperand &MO1 = MI1.getOperand(i);1930if (!MO0.isIdenticalTo(MO1))1931return false;1932}1933return true;1934}19351936return MI0.isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);1937}19381939/// areLoadsFromSameBasePtr - This is used by the pre-regalloc scheduler to1940/// determine if two loads are loading from the same base address. It should1941/// only return true if the base pointers are the same and the only differences1942/// between the two addresses is the offset. It also returns the offsets by1943/// reference.1944///1945/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched1946/// is permanently disabled.1947bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2,1948int64_t &Offset1,1949int64_t &Offset2) const {1950// Don't worry about Thumb: just ARM and Thumb2.1951if (Subtarget.isThumb1Only()) return false;19521953if (!Load1->isMachineOpcode() || !Load2->isMachineOpcode())1954return false;19551956auto IsLoadOpcode = [&](unsigned Opcode) {1957switch (Opcode) {1958default:1959return false;1960case ARM::LDRi12:1961case ARM::LDRBi12:1962case ARM::LDRD:1963case ARM::LDRH:1964case ARM::LDRSB:1965case ARM::LDRSH:1966case ARM::VLDRD:1967case ARM::VLDRS:1968case ARM::t2LDRi8:1969case ARM::t2LDRBi8:1970case ARM::t2LDRDi8:1971case ARM::t2LDRSHi8:1972case ARM::t2LDRi12:1973case ARM::t2LDRBi12:1974case ARM::t2LDRSHi12:1975return true;1976}1977};19781979if (!IsLoadOpcode(Load1->getMachineOpcode()) ||1980!IsLoadOpcode(Load2->getMachineOpcode()))1981return false;19821983// Check if base addresses and chain operands match.1984if (Load1->getOperand(0) != Load2->getOperand(0) ||1985Load1->getOperand(4) != Load2->getOperand(4))1986return false;19871988// Index should be Reg0.1989if (Load1->getOperand(3) != Load2->getOperand(3))1990return false;19911992// Determine the offsets.1993if (isa<ConstantSDNode>(Load1->getOperand(1)) &&1994isa<ConstantSDNode>(Load2->getOperand(1))) {1995Offset1 = cast<ConstantSDNode>(Load1->getOperand(1))->getSExtValue();1996Offset2 = cast<ConstantSDNode>(Load2->getOperand(1))->getSExtValue();1997return true;1998}19992000return false;2001}20022003/// shouldScheduleLoadsNear - This is a used by the pre-regalloc scheduler to2004/// determine (in conjunction with areLoadsFromSameBasePtr) if two loads should2005/// be scheduled togther. On some targets if two loads are loading from2006/// addresses in the same cache line, it's better if they are scheduled2007/// together. This function takes two integers that represent the load offsets2008/// from the common base address. It returns true if it decides it's desirable2009/// to schedule the two loads together. "NumLoads" is the number of loads that2010/// have already been scheduled after Load1.2011///2012/// FIXME: remove this in favor of the MachineInstr interface once pre-RA-sched2013/// is permanently disabled.2014bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2,2015int64_t Offset1, int64_t Offset2,2016unsigned NumLoads) const {2017// Don't worry about Thumb: just ARM and Thumb2.2018if (Subtarget.isThumb1Only()) return false;20192020assert(Offset2 > Offset1);20212022if ((Offset2 - Offset1) / 8 > 64)2023return false;20242025// Check if the machine opcodes are different. If they are different2026// then we consider them to not be of the same base address,2027// EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12.2028// In this case, they are considered to be the same because they are different2029// encoding forms of the same basic instruction.2030if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) &&2031!((Load1->getMachineOpcode() == ARM::t2LDRBi8 &&2032Load2->getMachineOpcode() == ARM::t2LDRBi12) ||2033(Load1->getMachineOpcode() == ARM::t2LDRBi12 &&2034Load2->getMachineOpcode() == ARM::t2LDRBi8)))2035return false; // FIXME: overly conservative?20362037// Four loads in a row should be sufficient.2038if (NumLoads >= 3)2039return false;20402041return true;2042}20432044bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr &MI,2045const MachineBasicBlock *MBB,2046const MachineFunction &MF) const {2047// Debug info is never a scheduling boundary. It's necessary to be explicit2048// due to the special treatment of IT instructions below, otherwise a2049// dbg_value followed by an IT will result in the IT instruction being2050// considered a scheduling hazard, which is wrong. It should be the actual2051// instruction preceding the dbg_value instruction(s), just like it is2052// when debug info is not present.2053if (MI.isDebugInstr())2054return false;20552056// Terminators and labels can't be scheduled around.2057if (MI.isTerminator() || MI.isPosition())2058return true;20592060// INLINEASM_BR can jump to another block2061if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)2062return true;20632064if (isSEHInstruction(MI))2065return true;20662067// Treat the start of the IT block as a scheduling boundary, but schedule2068// t2IT along with all instructions following it.2069// FIXME: This is a big hammer. But the alternative is to add all potential2070// true and anti dependencies to IT block instructions as implicit operands2071// to the t2IT instruction. The added compile time and complexity does not2072// seem worth it.2073MachineBasicBlock::const_iterator I = MI;2074// Make sure to skip any debug instructions2075while (++I != MBB->end() && I->isDebugInstr())2076;2077if (I != MBB->end() && I->getOpcode() == ARM::t2IT)2078return true;20792080// Don't attempt to schedule around any instruction that defines2081// a stack-oriented pointer, as it's unlikely to be profitable. This2082// saves compile time, because it doesn't require every single2083// stack slot reference to depend on the instruction that does the2084// modification.2085// Calls don't actually change the stack pointer, even if they have imp-defs.2086// No ARM calling conventions change the stack pointer. (X86 calling2087// conventions sometimes do).2088if (!MI.isCall() && MI.definesRegister(ARM::SP, /*TRI=*/nullptr))2089return true;20902091return false;2092}20932094bool ARMBaseInstrInfo::2095isProfitableToIfCvt(MachineBasicBlock &MBB,2096unsigned NumCycles, unsigned ExtraPredCycles,2097BranchProbability Probability) const {2098if (!NumCycles)2099return false;21002101// If we are optimizing for size, see if the branch in the predecessor can be2102// lowered to cbn?z by the constant island lowering pass, and return false if2103// so. This results in a shorter instruction sequence.2104if (MBB.getParent()->getFunction().hasOptSize()) {2105MachineBasicBlock *Pred = *MBB.pred_begin();2106if (!Pred->empty()) {2107MachineInstr *LastMI = &*Pred->rbegin();2108if (LastMI->getOpcode() == ARM::t2Bcc) {2109const TargetRegisterInfo *TRI = &getRegisterInfo();2110MachineInstr *CmpMI = findCMPToFoldIntoCBZ(LastMI, TRI);2111if (CmpMI)2112return false;2113}2114}2115}2116return isProfitableToIfCvt(MBB, NumCycles, ExtraPredCycles,2117MBB, 0, 0, Probability);2118}21192120bool ARMBaseInstrInfo::2121isProfitableToIfCvt(MachineBasicBlock &TBB,2122unsigned TCycles, unsigned TExtra,2123MachineBasicBlock &FBB,2124unsigned FCycles, unsigned FExtra,2125BranchProbability Probability) const {2126if (!TCycles)2127return false;21282129// In thumb code we often end up trading one branch for a IT block, and2130// if we are cloning the instruction can increase code size. Prevent2131// blocks with multiple predecesors from being ifcvted to prevent this2132// cloning.2133if (Subtarget.isThumb2() && TBB.getParent()->getFunction().hasMinSize()) {2134if (TBB.pred_size() != 1 || FBB.pred_size() != 1)2135return false;2136}21372138// Attempt to estimate the relative costs of predication versus branching.2139// Here we scale up each component of UnpredCost to avoid precision issue when2140// scaling TCycles/FCycles by Probability.2141const unsigned ScalingUpFactor = 1024;21422143unsigned PredCost = (TCycles + FCycles + TExtra + FExtra) * ScalingUpFactor;2144unsigned UnpredCost;2145if (!Subtarget.hasBranchPredictor()) {2146// When we don't have a branch predictor it's always cheaper to not take a2147// branch than take it, so we have to take that into account.2148unsigned NotTakenBranchCost = 1;2149unsigned TakenBranchCost = Subtarget.getMispredictionPenalty();2150unsigned TUnpredCycles, FUnpredCycles;2151if (!FCycles) {2152// Triangle: TBB is the fallthrough2153TUnpredCycles = TCycles + NotTakenBranchCost;2154FUnpredCycles = TakenBranchCost;2155} else {2156// Diamond: TBB is the block that is branched to, FBB is the fallthrough2157TUnpredCycles = TCycles + TakenBranchCost;2158FUnpredCycles = FCycles + NotTakenBranchCost;2159// The branch at the end of FBB will disappear when it's predicated, so2160// discount it from PredCost.2161PredCost -= 1 * ScalingUpFactor;2162}2163// The total cost is the cost of each path scaled by their probabilites2164unsigned TUnpredCost = Probability.scale(TUnpredCycles * ScalingUpFactor);2165unsigned FUnpredCost = Probability.getCompl().scale(FUnpredCycles * ScalingUpFactor);2166UnpredCost = TUnpredCost + FUnpredCost;2167// When predicating assume that the first IT can be folded away but later2168// ones cost one cycle each2169if (Subtarget.isThumb2() && TCycles + FCycles > 4) {2170PredCost += ((TCycles + FCycles - 4) / 4) * ScalingUpFactor;2171}2172} else {2173unsigned TUnpredCost = Probability.scale(TCycles * ScalingUpFactor);2174unsigned FUnpredCost =2175Probability.getCompl().scale(FCycles * ScalingUpFactor);2176UnpredCost = TUnpredCost + FUnpredCost;2177UnpredCost += 1 * ScalingUpFactor; // The branch itself2178UnpredCost += Subtarget.getMispredictionPenalty() * ScalingUpFactor / 10;2179}21802181return PredCost <= UnpredCost;2182}21832184unsigned2185ARMBaseInstrInfo::extraSizeToPredicateInstructions(const MachineFunction &MF,2186unsigned NumInsts) const {2187// Thumb2 needs a 2-byte IT instruction to predicate up to 4 instructions.2188// ARM has a condition code field in every predicable instruction, using it2189// doesn't change code size.2190if (!Subtarget.isThumb2())2191return 0;21922193// It's possible that the size of the IT is restricted to a single block.2194unsigned MaxInsts = Subtarget.restrictIT() ? 1 : 4;2195return divideCeil(NumInsts, MaxInsts) * 2;2196}21972198unsigned2199ARMBaseInstrInfo::predictBranchSizeForIfCvt(MachineInstr &MI) const {2200// If this branch is likely to be folded into the comparison to form a2201// CB(N)Z, then removing it won't reduce code size at all, because that will2202// just replace the CB(N)Z with a CMP.2203if (MI.getOpcode() == ARM::t2Bcc &&2204findCMPToFoldIntoCBZ(&MI, &getRegisterInfo()))2205return 0;22062207unsigned Size = getInstSizeInBytes(MI);22082209// For Thumb2, all branches are 32-bit instructions during the if conversion2210// pass, but may be replaced with 16-bit instructions during size reduction.2211// Since the branches considered by if conversion tend to be forward branches2212// over small basic blocks, they are very likely to be in range for the2213// narrow instructions, so we assume the final code size will be half what it2214// currently is.2215if (Subtarget.isThumb2())2216Size /= 2;22172218return Size;2219}22202221bool2222ARMBaseInstrInfo::isProfitableToUnpredicate(MachineBasicBlock &TMBB,2223MachineBasicBlock &FMBB) const {2224// Reduce false anti-dependencies to let the target's out-of-order execution2225// engine do its thing.2226return Subtarget.isProfitableToUnpredicate();2227}22282229/// getInstrPredicate - If instruction is predicated, returns its predicate2230/// condition, otherwise returns AL. It also returns the condition code2231/// register by reference.2232ARMCC::CondCodes llvm::getInstrPredicate(const MachineInstr &MI,2233Register &PredReg) {2234int PIdx = MI.findFirstPredOperandIdx();2235if (PIdx == -1) {2236PredReg = 0;2237return ARMCC::AL;2238}22392240PredReg = MI.getOperand(PIdx+1).getReg();2241return (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();2242}22432244unsigned llvm::getMatchingCondBranchOpcode(unsigned Opc) {2245if (Opc == ARM::B)2246return ARM::Bcc;2247if (Opc == ARM::tB)2248return ARM::tBcc;2249if (Opc == ARM::t2B)2250return ARM::t2Bcc;22512252llvm_unreachable("Unknown unconditional branch opcode!");2253}22542255MachineInstr *ARMBaseInstrInfo::commuteInstructionImpl(MachineInstr &MI,2256bool NewMI,2257unsigned OpIdx1,2258unsigned OpIdx2) const {2259switch (MI.getOpcode()) {2260case ARM::MOVCCr:2261case ARM::t2MOVCCr: {2262// MOVCC can be commuted by inverting the condition.2263Register PredReg;2264ARMCC::CondCodes CC = getInstrPredicate(MI, PredReg);2265// MOVCC AL can't be inverted. Shouldn't happen.2266if (CC == ARMCC::AL || PredReg != ARM::CPSR)2267return nullptr;2268MachineInstr *CommutedMI =2269TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);2270if (!CommutedMI)2271return nullptr;2272// After swapping the MOVCC operands, also invert the condition.2273CommutedMI->getOperand(CommutedMI->findFirstPredOperandIdx())2274.setImm(ARMCC::getOppositeCondition(CC));2275return CommutedMI;2276}2277}2278return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);2279}22802281/// Identify instructions that can be folded into a MOVCC instruction, and2282/// return the defining instruction.2283MachineInstr *2284ARMBaseInstrInfo::canFoldIntoMOVCC(Register Reg, const MachineRegisterInfo &MRI,2285const TargetInstrInfo *TII) const {2286if (!Reg.isVirtual())2287return nullptr;2288if (!MRI.hasOneNonDBGUse(Reg))2289return nullptr;2290MachineInstr *MI = MRI.getVRegDef(Reg);2291if (!MI)2292return nullptr;2293// Check if MI can be predicated and folded into the MOVCC.2294if (!isPredicable(*MI))2295return nullptr;2296// Check if MI has any non-dead defs or physreg uses. This also detects2297// predicated instructions which will be reading CPSR.2298for (const MachineOperand &MO : llvm::drop_begin(MI->operands(), 1)) {2299// Reject frame index operands, PEI can't handle the predicated pseudos.2300if (MO.isFI() || MO.isCPI() || MO.isJTI())2301return nullptr;2302if (!MO.isReg())2303continue;2304// MI can't have any tied operands, that would conflict with predication.2305if (MO.isTied())2306return nullptr;2307if (MO.getReg().isPhysical())2308return nullptr;2309if (MO.isDef() && !MO.isDead())2310return nullptr;2311}2312bool DontMoveAcrossStores = true;2313if (!MI->isSafeToMove(/* AliasAnalysis = */ nullptr, DontMoveAcrossStores))2314return nullptr;2315return MI;2316}23172318bool ARMBaseInstrInfo::analyzeSelect(const MachineInstr &MI,2319SmallVectorImpl<MachineOperand> &Cond,2320unsigned &TrueOp, unsigned &FalseOp,2321bool &Optimizable) const {2322assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&2323"Unknown select instruction");2324// MOVCC operands:2325// 0: Def.2326// 1: True use.2327// 2: False use.2328// 3: Condition code.2329// 4: CPSR use.2330TrueOp = 1;2331FalseOp = 2;2332Cond.push_back(MI.getOperand(3));2333Cond.push_back(MI.getOperand(4));2334// We can always fold a def.2335Optimizable = true;2336return false;2337}23382339MachineInstr *2340ARMBaseInstrInfo::optimizeSelect(MachineInstr &MI,2341SmallPtrSetImpl<MachineInstr *> &SeenMIs,2342bool PreferFalse) const {2343assert((MI.getOpcode() == ARM::MOVCCr || MI.getOpcode() == ARM::t2MOVCCr) &&2344"Unknown select instruction");2345MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();2346MachineInstr *DefMI = canFoldIntoMOVCC(MI.getOperand(2).getReg(), MRI, this);2347bool Invert = !DefMI;2348if (!DefMI)2349DefMI = canFoldIntoMOVCC(MI.getOperand(1).getReg(), MRI, this);2350if (!DefMI)2351return nullptr;23522353// Find new register class to use.2354MachineOperand FalseReg = MI.getOperand(Invert ? 2 : 1);2355MachineOperand TrueReg = MI.getOperand(Invert ? 1 : 2);2356Register DestReg = MI.getOperand(0).getReg();2357const TargetRegisterClass *FalseClass = MRI.getRegClass(FalseReg.getReg());2358const TargetRegisterClass *TrueClass = MRI.getRegClass(TrueReg.getReg());2359if (!MRI.constrainRegClass(DestReg, FalseClass))2360return nullptr;2361if (!MRI.constrainRegClass(DestReg, TrueClass))2362return nullptr;23632364// Create a new predicated version of DefMI.2365// Rfalse is the first use.2366MachineInstrBuilder NewMI =2367BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), DefMI->getDesc(), DestReg);23682369// Copy all the DefMI operands, excluding its (null) predicate.2370const MCInstrDesc &DefDesc = DefMI->getDesc();2371for (unsigned i = 1, e = DefDesc.getNumOperands();2372i != e && !DefDesc.operands()[i].isPredicate(); ++i)2373NewMI.add(DefMI->getOperand(i));23742375unsigned CondCode = MI.getOperand(3).getImm();2376if (Invert)2377NewMI.addImm(ARMCC::getOppositeCondition(ARMCC::CondCodes(CondCode)));2378else2379NewMI.addImm(CondCode);2380NewMI.add(MI.getOperand(4));23812382// DefMI is not the -S version that sets CPSR, so add an optional %noreg.2383if (NewMI->hasOptionalDef())2384NewMI.add(condCodeOp());23852386// The output register value when the predicate is false is an implicit2387// register operand tied to the first def.2388// The tie makes the register allocator ensure the FalseReg is allocated the2389// same register as operand 0.2390FalseReg.setImplicit();2391NewMI.add(FalseReg);2392NewMI->tieOperands(0, NewMI->getNumOperands() - 1);23932394// Update SeenMIs set: register newly created MI and erase removed DefMI.2395SeenMIs.insert(NewMI);2396SeenMIs.erase(DefMI);23972398// If MI is inside a loop, and DefMI is outside the loop, then kill flags on2399// DefMI would be invalid when tranferred inside the loop. Checking for a2400// loop is expensive, but at least remove kill flags if they are in different2401// BBs.2402if (DefMI->getParent() != MI.getParent())2403NewMI->clearKillInfo();24042405// The caller will erase MI, but not DefMI.2406DefMI->eraseFromParent();2407return NewMI;2408}24092410/// Map pseudo instructions that imply an 'S' bit onto real opcodes. Whether the2411/// instruction is encoded with an 'S' bit is determined by the optional CPSR2412/// def operand.2413///2414/// This will go away once we can teach tblgen how to set the optional CPSR def2415/// operand itself.2416struct AddSubFlagsOpcodePair {2417uint16_t PseudoOpc;2418uint16_t MachineOpc;2419};24202421static const AddSubFlagsOpcodePair AddSubFlagsOpcodeMap[] = {2422{ARM::ADDSri, ARM::ADDri},2423{ARM::ADDSrr, ARM::ADDrr},2424{ARM::ADDSrsi, ARM::ADDrsi},2425{ARM::ADDSrsr, ARM::ADDrsr},24262427{ARM::SUBSri, ARM::SUBri},2428{ARM::SUBSrr, ARM::SUBrr},2429{ARM::SUBSrsi, ARM::SUBrsi},2430{ARM::SUBSrsr, ARM::SUBrsr},24312432{ARM::RSBSri, ARM::RSBri},2433{ARM::RSBSrsi, ARM::RSBrsi},2434{ARM::RSBSrsr, ARM::RSBrsr},24352436{ARM::tADDSi3, ARM::tADDi3},2437{ARM::tADDSi8, ARM::tADDi8},2438{ARM::tADDSrr, ARM::tADDrr},2439{ARM::tADCS, ARM::tADC},24402441{ARM::tSUBSi3, ARM::tSUBi3},2442{ARM::tSUBSi8, ARM::tSUBi8},2443{ARM::tSUBSrr, ARM::tSUBrr},2444{ARM::tSBCS, ARM::tSBC},2445{ARM::tRSBS, ARM::tRSB},2446{ARM::tLSLSri, ARM::tLSLri},24472448{ARM::t2ADDSri, ARM::t2ADDri},2449{ARM::t2ADDSrr, ARM::t2ADDrr},2450{ARM::t2ADDSrs, ARM::t2ADDrs},24512452{ARM::t2SUBSri, ARM::t2SUBri},2453{ARM::t2SUBSrr, ARM::t2SUBrr},2454{ARM::t2SUBSrs, ARM::t2SUBrs},24552456{ARM::t2RSBSri, ARM::t2RSBri},2457{ARM::t2RSBSrs, ARM::t2RSBrs},2458};24592460unsigned llvm::convertAddSubFlagsOpcode(unsigned OldOpc) {2461for (const auto &Entry : AddSubFlagsOpcodeMap)2462if (OldOpc == Entry.PseudoOpc)2463return Entry.MachineOpc;2464return 0;2465}24662467void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB,2468MachineBasicBlock::iterator &MBBI,2469const DebugLoc &dl, Register DestReg,2470Register BaseReg, int NumBytes,2471ARMCC::CondCodes Pred, Register PredReg,2472const ARMBaseInstrInfo &TII,2473unsigned MIFlags) {2474if (NumBytes == 0 && DestReg != BaseReg) {2475BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg)2476.addReg(BaseReg, RegState::Kill)2477.add(predOps(Pred, PredReg))2478.add(condCodeOp())2479.setMIFlags(MIFlags);2480return;2481}24822483bool isSub = NumBytes < 0;2484if (isSub) NumBytes = -NumBytes;24852486while (NumBytes) {2487unsigned RotAmt = ARM_AM::getSOImmValRotate(NumBytes);2488unsigned ThisVal = NumBytes & llvm::rotr<uint32_t>(0xFF, RotAmt);2489assert(ThisVal && "Didn't extract field correctly");24902491// We will handle these bits from offset, clear them.2492NumBytes &= ~ThisVal;24932494assert(ARM_AM::getSOImmVal(ThisVal) != -1 && "Bit extraction didn't work?");24952496// Build the new ADD / SUB.2497unsigned Opc = isSub ? ARM::SUBri : ARM::ADDri;2498BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg)2499.addReg(BaseReg, RegState::Kill)2500.addImm(ThisVal)2501.add(predOps(Pred, PredReg))2502.add(condCodeOp())2503.setMIFlags(MIFlags);2504BaseReg = DestReg;2505}2506}25072508bool llvm::tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget,2509MachineFunction &MF, MachineInstr *MI,2510unsigned NumBytes) {2511// This optimisation potentially adds lots of load and store2512// micro-operations, it's only really a great benefit to code-size.2513if (!Subtarget.hasMinSize())2514return false;25152516// If only one register is pushed/popped, LLVM can use an LDR/STR2517// instead. We can't modify those so make sure we're dealing with an2518// instruction we understand.2519bool IsPop = isPopOpcode(MI->getOpcode());2520bool IsPush = isPushOpcode(MI->getOpcode());2521if (!IsPush && !IsPop)2522return false;25232524bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD ||2525MI->getOpcode() == ARM::VLDMDIA_UPD;2526bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH ||2527MI->getOpcode() == ARM::tPOP ||2528MI->getOpcode() == ARM::tPOP_RET;25292530assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP &&2531MI->getOperand(1).getReg() == ARM::SP)) &&2532"trying to fold sp update into non-sp-updating push/pop");25332534// The VFP push & pop act on D-registers, so we can only fold an adjustment2535// by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try2536// if this is violated.2537if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0)2538return false;25392540// ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+2541// pred) so the list starts at 4. Thumb1 starts after the predicate.2542int RegListIdx = IsT1PushPop ? 2 : 4;25432544// Calculate the space we'll need in terms of registers.2545unsigned RegsNeeded;2546const TargetRegisterClass *RegClass;2547if (IsVFPPushPop) {2548RegsNeeded = NumBytes / 8;2549RegClass = &ARM::DPRRegClass;2550} else {2551RegsNeeded = NumBytes / 4;2552RegClass = &ARM::GPRRegClass;2553}25542555// We're going to have to strip all list operands off before2556// re-adding them since the order matters, so save the existing ones2557// for later.2558SmallVector<MachineOperand, 4> RegList;25592560// We're also going to need the first register transferred by this2561// instruction, which won't necessarily be the first register in the list.2562unsigned FirstRegEnc = -1;25632564const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo();2565for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) {2566MachineOperand &MO = MI->getOperand(i);2567RegList.push_back(MO);25682569if (MO.isReg() && !MO.isImplicit() &&2570TRI->getEncodingValue(MO.getReg()) < FirstRegEnc)2571FirstRegEnc = TRI->getEncodingValue(MO.getReg());2572}25732574const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF);25752576// Now try to find enough space in the reglist to allocate NumBytes.2577for (int CurRegEnc = FirstRegEnc - 1; CurRegEnc >= 0 && RegsNeeded;2578--CurRegEnc) {2579unsigned CurReg = RegClass->getRegister(CurRegEnc);2580if (IsT1PushPop && CurRegEnc > TRI->getEncodingValue(ARM::R7))2581continue;2582if (!IsPop) {2583// Pushing any register is completely harmless, mark the register involved2584// as undef since we don't care about its value and must not restore it2585// during stack unwinding.2586RegList.push_back(MachineOperand::CreateReg(CurReg, false, false,2587false, false, true));2588--RegsNeeded;2589continue;2590}25912592// However, we can only pop an extra register if it's not live. For2593// registers live within the function we might clobber a return value2594// register; the other way a register can be live here is if it's2595// callee-saved.2596if (isCalleeSavedRegister(CurReg, CSRegs) ||2597MI->getParent()->computeRegisterLiveness(TRI, CurReg, MI) !=2598MachineBasicBlock::LQR_Dead) {2599// VFP pops don't allow holes in the register list, so any skip is fatal2600// for our transformation. GPR pops do, so we should just keep looking.2601if (IsVFPPushPop)2602return false;2603else2604continue;2605}26062607// Mark the unimportant registers as <def,dead> in the POP.2608RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false,2609true));2610--RegsNeeded;2611}26122613if (RegsNeeded > 0)2614return false;26152616// Finally we know we can profitably perform the optimisation so go2617// ahead: strip all existing registers off and add them back again2618// in the right order.2619for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i)2620MI->removeOperand(i);26212622// Add the complete list back in.2623MachineInstrBuilder MIB(MF, &*MI);2624for (const MachineOperand &MO : llvm::reverse(RegList))2625MIB.add(MO);26262627return true;2628}26292630bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,2631Register FrameReg, int &Offset,2632const ARMBaseInstrInfo &TII) {2633unsigned Opcode = MI.getOpcode();2634const MCInstrDesc &Desc = MI.getDesc();2635unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask);2636bool isSub = false;26372638// Memory operands in inline assembly always use AddrMode2.2639if (Opcode == ARM::INLINEASM || Opcode == ARM::INLINEASM_BR)2640AddrMode = ARMII::AddrMode2;26412642if (Opcode == ARM::ADDri) {2643Offset += MI.getOperand(FrameRegIdx+1).getImm();2644if (Offset == 0) {2645// Turn it into a move.2646MI.setDesc(TII.get(ARM::MOVr));2647MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);2648MI.removeOperand(FrameRegIdx+1);2649Offset = 0;2650return true;2651} else if (Offset < 0) {2652Offset = -Offset;2653isSub = true;2654MI.setDesc(TII.get(ARM::SUBri));2655}26562657// Common case: small offset, fits into instruction.2658if (ARM_AM::getSOImmVal(Offset) != -1) {2659// Replace the FrameIndex with sp / fp2660MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);2661MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset);2662Offset = 0;2663return true;2664}26652666// Otherwise, pull as much of the immedidate into this ADDri/SUBri2667// as possible.2668unsigned RotAmt = ARM_AM::getSOImmValRotate(Offset);2669unsigned ThisImmVal = Offset & llvm::rotr<uint32_t>(0xFF, RotAmt);26702671// We will handle these bits from offset, clear them.2672Offset &= ~ThisImmVal;26732674// Get the properly encoded SOImmVal field.2675assert(ARM_AM::getSOImmVal(ThisImmVal) != -1 &&2676"Bit extraction didn't work?");2677MI.getOperand(FrameRegIdx+1).ChangeToImmediate(ThisImmVal);2678} else {2679unsigned ImmIdx = 0;2680int InstrOffs = 0;2681unsigned NumBits = 0;2682unsigned Scale = 1;2683switch (AddrMode) {2684case ARMII::AddrMode_i12:2685ImmIdx = FrameRegIdx + 1;2686InstrOffs = MI.getOperand(ImmIdx).getImm();2687NumBits = 12;2688break;2689case ARMII::AddrMode2:2690ImmIdx = FrameRegIdx+2;2691InstrOffs = ARM_AM::getAM2Offset(MI.getOperand(ImmIdx).getImm());2692if (ARM_AM::getAM2Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)2693InstrOffs *= -1;2694NumBits = 12;2695break;2696case ARMII::AddrMode3:2697ImmIdx = FrameRegIdx+2;2698InstrOffs = ARM_AM::getAM3Offset(MI.getOperand(ImmIdx).getImm());2699if (ARM_AM::getAM3Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)2700InstrOffs *= -1;2701NumBits = 8;2702break;2703case ARMII::AddrMode4:2704case ARMII::AddrMode6:2705// Can't fold any offset even if it's zero.2706return false;2707case ARMII::AddrMode5:2708ImmIdx = FrameRegIdx+1;2709InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());2710if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)2711InstrOffs *= -1;2712NumBits = 8;2713Scale = 4;2714break;2715case ARMII::AddrMode5FP16:2716ImmIdx = FrameRegIdx+1;2717InstrOffs = ARM_AM::getAM5Offset(MI.getOperand(ImmIdx).getImm());2718if (ARM_AM::getAM5Op(MI.getOperand(ImmIdx).getImm()) == ARM_AM::sub)2719InstrOffs *= -1;2720NumBits = 8;2721Scale = 2;2722break;2723case ARMII::AddrModeT2_i7:2724case ARMII::AddrModeT2_i7s2:2725case ARMII::AddrModeT2_i7s4:2726ImmIdx = FrameRegIdx+1;2727InstrOffs = MI.getOperand(ImmIdx).getImm();2728NumBits = 7;2729Scale = (AddrMode == ARMII::AddrModeT2_i7s2 ? 2 :2730AddrMode == ARMII::AddrModeT2_i7s4 ? 4 : 1);2731break;2732default:2733llvm_unreachable("Unsupported addressing mode!");2734}27352736Offset += InstrOffs * Scale;2737assert((Offset & (Scale-1)) == 0 && "Can't encode this offset!");2738if (Offset < 0) {2739Offset = -Offset;2740isSub = true;2741}27422743// Attempt to fold address comp. if opcode has offset bits2744if (NumBits > 0) {2745// Common case: small offset, fits into instruction.2746MachineOperand &ImmOp = MI.getOperand(ImmIdx);2747int ImmedOffset = Offset / Scale;2748unsigned Mask = (1 << NumBits) - 1;2749if ((unsigned)Offset <= Mask * Scale) {2750// Replace the FrameIndex with sp2751MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);2752// FIXME: When addrmode2 goes away, this will simplify (like the2753// T2 version), as the LDR.i12 versions don't need the encoding2754// tricks for the offset value.2755if (isSub) {2756if (AddrMode == ARMII::AddrMode_i12)2757ImmedOffset = -ImmedOffset;2758else2759ImmedOffset |= 1 << NumBits;2760}2761ImmOp.ChangeToImmediate(ImmedOffset);2762Offset = 0;2763return true;2764}27652766// Otherwise, it didn't fit. Pull in what we can to simplify the immed.2767ImmedOffset = ImmedOffset & Mask;2768if (isSub) {2769if (AddrMode == ARMII::AddrMode_i12)2770ImmedOffset = -ImmedOffset;2771else2772ImmedOffset |= 1 << NumBits;2773}2774ImmOp.ChangeToImmediate(ImmedOffset);2775Offset &= ~(Mask*Scale);2776}2777}27782779Offset = (isSub) ? -Offset : Offset;2780return Offset == 0;2781}27822783/// analyzeCompare - For a comparison instruction, return the source registers2784/// in SrcReg and SrcReg2 if having two register operands, and the value it2785/// compares against in CmpValue. Return true if the comparison instruction2786/// can be analyzed.2787bool ARMBaseInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,2788Register &SrcReg2, int64_t &CmpMask,2789int64_t &CmpValue) const {2790switch (MI.getOpcode()) {2791default: break;2792case ARM::CMPri:2793case ARM::t2CMPri:2794case ARM::tCMPi8:2795SrcReg = MI.getOperand(0).getReg();2796SrcReg2 = 0;2797CmpMask = ~0;2798CmpValue = MI.getOperand(1).getImm();2799return true;2800case ARM::CMPrr:2801case ARM::t2CMPrr:2802case ARM::tCMPr:2803SrcReg = MI.getOperand(0).getReg();2804SrcReg2 = MI.getOperand(1).getReg();2805CmpMask = ~0;2806CmpValue = 0;2807return true;2808case ARM::TSTri:2809case ARM::t2TSTri:2810SrcReg = MI.getOperand(0).getReg();2811SrcReg2 = 0;2812CmpMask = MI.getOperand(1).getImm();2813CmpValue = 0;2814return true;2815}28162817return false;2818}28192820/// isSuitableForMask - Identify a suitable 'and' instruction that2821/// operates on the given source register and applies the same mask2822/// as a 'tst' instruction. Provide a limited look-through for copies.2823/// When successful, MI will hold the found instruction.2824static bool isSuitableForMask(MachineInstr *&MI, Register SrcReg,2825int CmpMask, bool CommonUse) {2826switch (MI->getOpcode()) {2827case ARM::ANDri:2828case ARM::t2ANDri:2829if (CmpMask != MI->getOperand(2).getImm())2830return false;2831if (SrcReg == MI->getOperand(CommonUse ? 1 : 0).getReg())2832return true;2833break;2834}28352836return false;2837}28382839/// getCmpToAddCondition - assume the flags are set by CMP(a,b), return2840/// the condition code if we modify the instructions such that flags are2841/// set by ADD(a,b,X).2842inline static ARMCC::CondCodes getCmpToAddCondition(ARMCC::CondCodes CC) {2843switch (CC) {2844default: return ARMCC::AL;2845case ARMCC::HS: return ARMCC::LO;2846case ARMCC::LO: return ARMCC::HS;2847case ARMCC::VS: return ARMCC::VS;2848case ARMCC::VC: return ARMCC::VC;2849}2850}28512852/// isRedundantFlagInstr - check whether the first instruction, whose only2853/// purpose is to update flags, can be made redundant.2854/// CMPrr can be made redundant by SUBrr if the operands are the same.2855/// CMPri can be made redundant by SUBri if the operands are the same.2856/// CMPrr(r0, r1) can be made redundant by ADDr[ri](r0, r1, X).2857/// This function can be extended later on.2858inline static bool isRedundantFlagInstr(const MachineInstr *CmpI,2859Register SrcReg, Register SrcReg2,2860int64_t ImmValue,2861const MachineInstr *OI,2862bool &IsThumb1) {2863if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&2864(OI->getOpcode() == ARM::SUBrr || OI->getOpcode() == ARM::t2SUBrr) &&2865((OI->getOperand(1).getReg() == SrcReg &&2866OI->getOperand(2).getReg() == SrcReg2) ||2867(OI->getOperand(1).getReg() == SrcReg2 &&2868OI->getOperand(2).getReg() == SrcReg))) {2869IsThumb1 = false;2870return true;2871}28722873if (CmpI->getOpcode() == ARM::tCMPr && OI->getOpcode() == ARM::tSUBrr &&2874((OI->getOperand(2).getReg() == SrcReg &&2875OI->getOperand(3).getReg() == SrcReg2) ||2876(OI->getOperand(2).getReg() == SrcReg2 &&2877OI->getOperand(3).getReg() == SrcReg))) {2878IsThumb1 = true;2879return true;2880}28812882if ((CmpI->getOpcode() == ARM::CMPri || CmpI->getOpcode() == ARM::t2CMPri) &&2883(OI->getOpcode() == ARM::SUBri || OI->getOpcode() == ARM::t2SUBri) &&2884OI->getOperand(1).getReg() == SrcReg &&2885OI->getOperand(2).getImm() == ImmValue) {2886IsThumb1 = false;2887return true;2888}28892890if (CmpI->getOpcode() == ARM::tCMPi8 &&2891(OI->getOpcode() == ARM::tSUBi8 || OI->getOpcode() == ARM::tSUBi3) &&2892OI->getOperand(2).getReg() == SrcReg &&2893OI->getOperand(3).getImm() == ImmValue) {2894IsThumb1 = true;2895return true;2896}28972898if ((CmpI->getOpcode() == ARM::CMPrr || CmpI->getOpcode() == ARM::t2CMPrr) &&2899(OI->getOpcode() == ARM::ADDrr || OI->getOpcode() == ARM::t2ADDrr ||2900OI->getOpcode() == ARM::ADDri || OI->getOpcode() == ARM::t2ADDri) &&2901OI->getOperand(0).isReg() && OI->getOperand(1).isReg() &&2902OI->getOperand(0).getReg() == SrcReg &&2903OI->getOperand(1).getReg() == SrcReg2) {2904IsThumb1 = false;2905return true;2906}29072908if (CmpI->getOpcode() == ARM::tCMPr &&2909(OI->getOpcode() == ARM::tADDi3 || OI->getOpcode() == ARM::tADDi8 ||2910OI->getOpcode() == ARM::tADDrr) &&2911OI->getOperand(0).getReg() == SrcReg &&2912OI->getOperand(2).getReg() == SrcReg2) {2913IsThumb1 = true;2914return true;2915}29162917return false;2918}29192920static bool isOptimizeCompareCandidate(MachineInstr *MI, bool &IsThumb1) {2921switch (MI->getOpcode()) {2922default: return false;2923case ARM::tLSLri:2924case ARM::tLSRri:2925case ARM::tLSLrr:2926case ARM::tLSRrr:2927case ARM::tSUBrr:2928case ARM::tADDrr:2929case ARM::tADDi3:2930case ARM::tADDi8:2931case ARM::tSUBi3:2932case ARM::tSUBi8:2933case ARM::tMUL:2934case ARM::tADC:2935case ARM::tSBC:2936case ARM::tRSB:2937case ARM::tAND:2938case ARM::tORR:2939case ARM::tEOR:2940case ARM::tBIC:2941case ARM::tMVN:2942case ARM::tASRri:2943case ARM::tASRrr:2944case ARM::tROR:2945IsThumb1 = true;2946[[fallthrough]];2947case ARM::RSBrr:2948case ARM::RSBri:2949case ARM::RSCrr:2950case ARM::RSCri:2951case ARM::ADDrr:2952case ARM::ADDri:2953case ARM::ADCrr:2954case ARM::ADCri:2955case ARM::SUBrr:2956case ARM::SUBri:2957case ARM::SBCrr:2958case ARM::SBCri:2959case ARM::t2RSBri:2960case ARM::t2ADDrr:2961case ARM::t2ADDri:2962case ARM::t2ADCrr:2963case ARM::t2ADCri:2964case ARM::t2SUBrr:2965case ARM::t2SUBri:2966case ARM::t2SBCrr:2967case ARM::t2SBCri:2968case ARM::ANDrr:2969case ARM::ANDri:2970case ARM::ANDrsr:2971case ARM::ANDrsi:2972case ARM::t2ANDrr:2973case ARM::t2ANDri:2974case ARM::t2ANDrs:2975case ARM::ORRrr:2976case ARM::ORRri:2977case ARM::ORRrsr:2978case ARM::ORRrsi:2979case ARM::t2ORRrr:2980case ARM::t2ORRri:2981case ARM::t2ORRrs:2982case ARM::EORrr:2983case ARM::EORri:2984case ARM::EORrsr:2985case ARM::EORrsi:2986case ARM::t2EORrr:2987case ARM::t2EORri:2988case ARM::t2EORrs:2989case ARM::BICri:2990case ARM::BICrr:2991case ARM::BICrsi:2992case ARM::BICrsr:2993case ARM::t2BICri:2994case ARM::t2BICrr:2995case ARM::t2BICrs:2996case ARM::t2LSRri:2997case ARM::t2LSRrr:2998case ARM::t2LSLri:2999case ARM::t2LSLrr:3000case ARM::MOVsr:3001case ARM::MOVsi:3002return true;3003}3004}30053006/// optimizeCompareInstr - Convert the instruction supplying the argument to the3007/// comparison into one that sets the zero bit in the flags register;3008/// Remove a redundant Compare instruction if an earlier instruction can set the3009/// flags in the same way as Compare.3010/// E.g. SUBrr(r1,r2) and CMPrr(r1,r2). We also handle the case where two3011/// operands are swapped: SUBrr(r1,r2) and CMPrr(r2,r1), by updating the3012/// condition code of instructions which use the flags.3013bool ARMBaseInstrInfo::optimizeCompareInstr(3014MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,3015int64_t CmpValue, const MachineRegisterInfo *MRI) const {3016// Get the unique definition of SrcReg.3017MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);3018if (!MI) return false;30193020// Masked compares sometimes use the same register as the corresponding 'and'.3021if (CmpMask != ~0) {3022if (!isSuitableForMask(MI, SrcReg, CmpMask, false) || isPredicated(*MI)) {3023MI = nullptr;3024for (MachineRegisterInfo::use_instr_iterator3025UI = MRI->use_instr_begin(SrcReg), UE = MRI->use_instr_end();3026UI != UE; ++UI) {3027if (UI->getParent() != CmpInstr.getParent())3028continue;3029MachineInstr *PotentialAND = &*UI;3030if (!isSuitableForMask(PotentialAND, SrcReg, CmpMask, true) ||3031isPredicated(*PotentialAND))3032continue;3033MI = PotentialAND;3034break;3035}3036if (!MI) return false;3037}3038}30393040// Get ready to iterate backward from CmpInstr.3041MachineBasicBlock::iterator I = CmpInstr, E = MI,3042B = CmpInstr.getParent()->begin();30433044// Early exit if CmpInstr is at the beginning of the BB.3045if (I == B) return false;30463047// There are two possible candidates which can be changed to set CPSR:3048// One is MI, the other is a SUB or ADD instruction.3049// For CMPrr(r1,r2), we are looking for SUB(r1,r2), SUB(r2,r1), or3050// ADDr[ri](r1, r2, X).3051// For CMPri(r1, CmpValue), we are looking for SUBri(r1, CmpValue).3052MachineInstr *SubAdd = nullptr;3053if (SrcReg2 != 0)3054// MI is not a candidate for CMPrr.3055MI = nullptr;3056else if (MI->getParent() != CmpInstr.getParent() || CmpValue != 0) {3057// Conservatively refuse to convert an instruction which isn't in the same3058// BB as the comparison.3059// For CMPri w/ CmpValue != 0, a SubAdd may still be a candidate.3060// Thus we cannot return here.3061if (CmpInstr.getOpcode() == ARM::CMPri ||3062CmpInstr.getOpcode() == ARM::t2CMPri ||3063CmpInstr.getOpcode() == ARM::tCMPi8)3064MI = nullptr;3065else3066return false;3067}30683069bool IsThumb1 = false;3070if (MI && !isOptimizeCompareCandidate(MI, IsThumb1))3071return false;30723073// We also want to do this peephole for cases like this: if (a*b == 0),3074// and optimise away the CMP instruction from the generated code sequence:3075// MULS, MOVS, MOVS, CMP. Here the MOVS instructions load the boolean values3076// resulting from the select instruction, but these MOVS instructions for3077// Thumb1 (V6M) are flag setting and are thus preventing this optimisation.3078// However, if we only have MOVS instructions in between the CMP and the3079// other instruction (the MULS in this example), then the CPSR is dead so we3080// can safely reorder the sequence into: MOVS, MOVS, MULS, CMP. We do this3081// reordering and then continue the analysis hoping we can eliminate the3082// CMP. This peephole works on the vregs, so is still in SSA form. As a3083// consequence, the movs won't redefine/kill the MUL operands which would3084// make this reordering illegal.3085const TargetRegisterInfo *TRI = &getRegisterInfo();3086if (MI && IsThumb1) {3087--I;3088if (I != E && !MI->readsRegister(ARM::CPSR, TRI)) {3089bool CanReorder = true;3090for (; I != E; --I) {3091if (I->getOpcode() != ARM::tMOVi8) {3092CanReorder = false;3093break;3094}3095}3096if (CanReorder) {3097MI = MI->removeFromParent();3098E = CmpInstr;3099CmpInstr.getParent()->insert(E, MI);3100}3101}3102I = CmpInstr;3103E = MI;3104}31053106// Check that CPSR isn't set between the comparison instruction and the one we3107// want to change. At the same time, search for SubAdd.3108bool SubAddIsThumb1 = false;3109do {3110const MachineInstr &Instr = *--I;31113112// Check whether CmpInstr can be made redundant by the current instruction.3113if (isRedundantFlagInstr(&CmpInstr, SrcReg, SrcReg2, CmpValue, &Instr,3114SubAddIsThumb1)) {3115SubAdd = &*I;3116break;3117}31183119// Allow E (which was initially MI) to be SubAdd but do not search before E.3120if (I == E)3121break;31223123if (Instr.modifiesRegister(ARM::CPSR, TRI) ||3124Instr.readsRegister(ARM::CPSR, TRI))3125// This instruction modifies or uses CPSR after the one we want to3126// change. We can't do this transformation.3127return false;31283129if (I == B) {3130// In some cases, we scan the use-list of an instruction for an AND;3131// that AND is in the same BB, but may not be scheduled before the3132// corresponding TST. In that case, bail out.3133//3134// FIXME: We could try to reschedule the AND.3135return false;3136}3137} while (true);31383139// Return false if no candidates exist.3140if (!MI && !SubAdd)3141return false;31423143// If we found a SubAdd, use it as it will be closer to the CMP3144if (SubAdd) {3145MI = SubAdd;3146IsThumb1 = SubAddIsThumb1;3147}31483149// We can't use a predicated instruction - it doesn't always write the flags.3150if (isPredicated(*MI))3151return false;31523153// Scan forward for the use of CPSR3154// When checking against MI: if it's a conditional code that requires3155// checking of the V bit or C bit, then this is not safe to do.3156// It is safe to remove CmpInstr if CPSR is redefined or killed.3157// If we are done with the basic block, we need to check whether CPSR is3158// live-out.3159SmallVector<std::pair<MachineOperand*, ARMCC::CondCodes>, 4>3160OperandsToUpdate;3161bool isSafe = false;3162I = CmpInstr;3163E = CmpInstr.getParent()->end();3164while (!isSafe && ++I != E) {3165const MachineInstr &Instr = *I;3166for (unsigned IO = 0, EO = Instr.getNumOperands();3167!isSafe && IO != EO; ++IO) {3168const MachineOperand &MO = Instr.getOperand(IO);3169if (MO.isRegMask() && MO.clobbersPhysReg(ARM::CPSR)) {3170isSafe = true;3171break;3172}3173if (!MO.isReg() || MO.getReg() != ARM::CPSR)3174continue;3175if (MO.isDef()) {3176isSafe = true;3177break;3178}3179// Condition code is after the operand before CPSR except for VSELs.3180ARMCC::CondCodes CC;3181bool IsInstrVSel = true;3182switch (Instr.getOpcode()) {3183default:3184IsInstrVSel = false;3185CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm();3186break;3187case ARM::VSELEQD:3188case ARM::VSELEQS:3189case ARM::VSELEQH:3190CC = ARMCC::EQ;3191break;3192case ARM::VSELGTD:3193case ARM::VSELGTS:3194case ARM::VSELGTH:3195CC = ARMCC::GT;3196break;3197case ARM::VSELGED:3198case ARM::VSELGES:3199case ARM::VSELGEH:3200CC = ARMCC::GE;3201break;3202case ARM::VSELVSD:3203case ARM::VSELVSS:3204case ARM::VSELVSH:3205CC = ARMCC::VS;3206break;3207}32083209if (SubAdd) {3210// If we have SUB(r1, r2) and CMP(r2, r1), the condition code based3211// on CMP needs to be updated to be based on SUB.3212// If we have ADD(r1, r2, X) and CMP(r1, r2), the condition code also3213// needs to be modified.3214// Push the condition code operands to OperandsToUpdate.3215// If it is safe to remove CmpInstr, the condition code of these3216// operands will be modified.3217unsigned Opc = SubAdd->getOpcode();3218bool IsSub = Opc == ARM::SUBrr || Opc == ARM::t2SUBrr ||3219Opc == ARM::SUBri || Opc == ARM::t2SUBri ||3220Opc == ARM::tSUBrr || Opc == ARM::tSUBi3 ||3221Opc == ARM::tSUBi8;3222unsigned OpI = Opc != ARM::tSUBrr ? 1 : 2;3223if (!IsSub ||3224(SrcReg2 != 0 && SubAdd->getOperand(OpI).getReg() == SrcReg2 &&3225SubAdd->getOperand(OpI + 1).getReg() == SrcReg)) {3226// VSel doesn't support condition code update.3227if (IsInstrVSel)3228return false;3229// Ensure we can swap the condition.3230ARMCC::CondCodes NewCC = (IsSub ? getSwappedCondition(CC) : getCmpToAddCondition(CC));3231if (NewCC == ARMCC::AL)3232return false;3233OperandsToUpdate.push_back(3234std::make_pair(&((*I).getOperand(IO - 1)), NewCC));3235}3236} else {3237// No SubAdd, so this is x = <op> y, z; cmp x, 0.3238switch (CC) {3239case ARMCC::EQ: // Z3240case ARMCC::NE: // Z3241case ARMCC::MI: // N3242case ARMCC::PL: // N3243case ARMCC::AL: // none3244// CPSR can be used multiple times, we should continue.3245break;3246case ARMCC::HS: // C3247case ARMCC::LO: // C3248case ARMCC::VS: // V3249case ARMCC::VC: // V3250case ARMCC::HI: // C Z3251case ARMCC::LS: // C Z3252case ARMCC::GE: // N V3253case ARMCC::LT: // N V3254case ARMCC::GT: // Z N V3255case ARMCC::LE: // Z N V3256// The instruction uses the V bit or C bit which is not safe.3257return false;3258}3259}3260}3261}32623263// If CPSR is not killed nor re-defined, we should check whether it is3264// live-out. If it is live-out, do not optimize.3265if (!isSafe) {3266MachineBasicBlock *MBB = CmpInstr.getParent();3267for (MachineBasicBlock *Succ : MBB->successors())3268if (Succ->isLiveIn(ARM::CPSR))3269return false;3270}32713272// Toggle the optional operand to CPSR (if it exists - in Thumb1 we always3273// set CPSR so this is represented as an explicit output)3274if (!IsThumb1) {3275unsigned CPSRRegNum = MI->getNumExplicitOperands() - 1;3276MI->getOperand(CPSRRegNum).setReg(ARM::CPSR);3277MI->getOperand(CPSRRegNum).setIsDef(true);3278}3279assert(!isPredicated(*MI) && "Can't use flags from predicated instruction");3280CmpInstr.eraseFromParent();32813282// Modify the condition code of operands in OperandsToUpdate.3283// Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to3284// be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.3285for (unsigned i = 0, e = OperandsToUpdate.size(); i < e; i++)3286OperandsToUpdate[i].first->setImm(OperandsToUpdate[i].second);32873288MI->clearRegisterDeads(ARM::CPSR);32893290return true;3291}32923293bool ARMBaseInstrInfo::shouldSink(const MachineInstr &MI) const {3294// Do not sink MI if it might be used to optimize a redundant compare.3295// We heuristically only look at the instruction immediately following MI to3296// avoid potentially searching the entire basic block.3297if (isPredicated(MI))3298return true;3299MachineBasicBlock::const_iterator Next = &MI;3300++Next;3301Register SrcReg, SrcReg2;3302int64_t CmpMask, CmpValue;3303bool IsThumb1;3304if (Next != MI.getParent()->end() &&3305analyzeCompare(*Next, SrcReg, SrcReg2, CmpMask, CmpValue) &&3306isRedundantFlagInstr(&*Next, SrcReg, SrcReg2, CmpValue, &MI, IsThumb1))3307return false;3308return true;3309}33103311bool ARMBaseInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,3312Register Reg,3313MachineRegisterInfo *MRI) const {3314// Fold large immediates into add, sub, or, xor.3315unsigned DefOpc = DefMI.getOpcode();3316if (DefOpc != ARM::t2MOVi32imm && DefOpc != ARM::MOVi32imm &&3317DefOpc != ARM::tMOVi32imm)3318return false;3319if (!DefMI.getOperand(1).isImm())3320// Could be t2MOVi32imm @xx3321return false;33223323if (!MRI->hasOneNonDBGUse(Reg))3324return false;33253326const MCInstrDesc &DefMCID = DefMI.getDesc();3327if (DefMCID.hasOptionalDef()) {3328unsigned NumOps = DefMCID.getNumOperands();3329const MachineOperand &MO = DefMI.getOperand(NumOps - 1);3330if (MO.getReg() == ARM::CPSR && !MO.isDead())3331// If DefMI defines CPSR and it is not dead, it's obviously not safe3332// to delete DefMI.3333return false;3334}33353336const MCInstrDesc &UseMCID = UseMI.getDesc();3337if (UseMCID.hasOptionalDef()) {3338unsigned NumOps = UseMCID.getNumOperands();3339if (UseMI.getOperand(NumOps - 1).getReg() == ARM::CPSR)3340// If the instruction sets the flag, do not attempt this optimization3341// since it may change the semantics of the code.3342return false;3343}33443345unsigned UseOpc = UseMI.getOpcode();3346unsigned NewUseOpc = 0;3347uint32_t ImmVal = (uint32_t)DefMI.getOperand(1).getImm();3348uint32_t SOImmValV1 = 0, SOImmValV2 = 0;3349bool Commute = false;3350switch (UseOpc) {3351default: return false;3352case ARM::SUBrr:3353case ARM::ADDrr:3354case ARM::ORRrr:3355case ARM::EORrr:3356case ARM::t2SUBrr:3357case ARM::t2ADDrr:3358case ARM::t2ORRrr:3359case ARM::t2EORrr: {3360Commute = UseMI.getOperand(2).getReg() != Reg;3361switch (UseOpc) {3362default: break;3363case ARM::ADDrr:3364case ARM::SUBrr:3365if (UseOpc == ARM::SUBrr && Commute)3366return false;33673368// ADD/SUB are special because they're essentially the same operation, so3369// we can handle a larger range of immediates.3370if (ARM_AM::isSOImmTwoPartVal(ImmVal))3371NewUseOpc = UseOpc == ARM::ADDrr ? ARM::ADDri : ARM::SUBri;3372else if (ARM_AM::isSOImmTwoPartVal(-ImmVal)) {3373ImmVal = -ImmVal;3374NewUseOpc = UseOpc == ARM::ADDrr ? ARM::SUBri : ARM::ADDri;3375} else3376return false;3377SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);3378SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);3379break;3380case ARM::ORRrr:3381case ARM::EORrr:3382if (!ARM_AM::isSOImmTwoPartVal(ImmVal))3383return false;3384SOImmValV1 = (uint32_t)ARM_AM::getSOImmTwoPartFirst(ImmVal);3385SOImmValV2 = (uint32_t)ARM_AM::getSOImmTwoPartSecond(ImmVal);3386switch (UseOpc) {3387default: break;3388case ARM::ORRrr: NewUseOpc = ARM::ORRri; break;3389case ARM::EORrr: NewUseOpc = ARM::EORri; break;3390}3391break;3392case ARM::t2ADDrr:3393case ARM::t2SUBrr: {3394if (UseOpc == ARM::t2SUBrr && Commute)3395return false;33963397// ADD/SUB are special because they're essentially the same operation, so3398// we can handle a larger range of immediates.3399const bool ToSP = DefMI.getOperand(0).getReg() == ARM::SP;3400const unsigned t2ADD = ToSP ? ARM::t2ADDspImm : ARM::t2ADDri;3401const unsigned t2SUB = ToSP ? ARM::t2SUBspImm : ARM::t2SUBri;3402if (ARM_AM::isT2SOImmTwoPartVal(ImmVal))3403NewUseOpc = UseOpc == ARM::t2ADDrr ? t2ADD : t2SUB;3404else if (ARM_AM::isT2SOImmTwoPartVal(-ImmVal)) {3405ImmVal = -ImmVal;3406NewUseOpc = UseOpc == ARM::t2ADDrr ? t2SUB : t2ADD;3407} else3408return false;3409SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);3410SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);3411break;3412}3413case ARM::t2ORRrr:3414case ARM::t2EORrr:3415if (!ARM_AM::isT2SOImmTwoPartVal(ImmVal))3416return false;3417SOImmValV1 = (uint32_t)ARM_AM::getT2SOImmTwoPartFirst(ImmVal);3418SOImmValV2 = (uint32_t)ARM_AM::getT2SOImmTwoPartSecond(ImmVal);3419switch (UseOpc) {3420default: break;3421case ARM::t2ORRrr: NewUseOpc = ARM::t2ORRri; break;3422case ARM::t2EORrr: NewUseOpc = ARM::t2EORri; break;3423}3424break;3425}3426}3427}34283429unsigned OpIdx = Commute ? 2 : 1;3430Register Reg1 = UseMI.getOperand(OpIdx).getReg();3431bool isKill = UseMI.getOperand(OpIdx).isKill();3432const TargetRegisterClass *TRC = MRI->getRegClass(Reg);3433Register NewReg = MRI->createVirtualRegister(TRC);3434BuildMI(*UseMI.getParent(), UseMI, UseMI.getDebugLoc(), get(NewUseOpc),3435NewReg)3436.addReg(Reg1, getKillRegState(isKill))3437.addImm(SOImmValV1)3438.add(predOps(ARMCC::AL))3439.add(condCodeOp());3440UseMI.setDesc(get(NewUseOpc));3441UseMI.getOperand(1).setReg(NewReg);3442UseMI.getOperand(1).setIsKill();3443UseMI.getOperand(2).ChangeToImmediate(SOImmValV2);3444DefMI.eraseFromParent();3445// FIXME: t2ADDrr should be split, as different rulles apply when writing to SP.3446// Just as t2ADDri, that was split to [t2ADDri, t2ADDspImm].3447// Then the below code will not be needed, as the input/output register3448// classes will be rgpr or gprSP.3449// For now, we fix the UseMI operand explicitly here:3450switch(NewUseOpc){3451case ARM::t2ADDspImm:3452case ARM::t2SUBspImm:3453case ARM::t2ADDri:3454case ARM::t2SUBri:3455MRI->constrainRegClass(UseMI.getOperand(0).getReg(), TRC);3456}3457return true;3458}34593460static unsigned getNumMicroOpsSwiftLdSt(const InstrItineraryData *ItinData,3461const MachineInstr &MI) {3462switch (MI.getOpcode()) {3463default: {3464const MCInstrDesc &Desc = MI.getDesc();3465int UOps = ItinData->getNumMicroOps(Desc.getSchedClass());3466assert(UOps >= 0 && "bad # UOps");3467return UOps;3468}34693470case ARM::LDRrs:3471case ARM::LDRBrs:3472case ARM::STRrs:3473case ARM::STRBrs: {3474unsigned ShOpVal = MI.getOperand(3).getImm();3475bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;3476unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);3477if (!isSub &&3478(ShImm == 0 ||3479((ShImm == 1 || ShImm == 2 || ShImm == 3) &&3480ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))3481return 1;3482return 2;3483}34843485case ARM::LDRH:3486case ARM::STRH: {3487if (!MI.getOperand(2).getReg())3488return 1;34893490unsigned ShOpVal = MI.getOperand(3).getImm();3491bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;3492unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);3493if (!isSub &&3494(ShImm == 0 ||3495((ShImm == 1 || ShImm == 2 || ShImm == 3) &&3496ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))3497return 1;3498return 2;3499}35003501case ARM::LDRSB:3502case ARM::LDRSH:3503return (ARM_AM::getAM3Op(MI.getOperand(3).getImm()) == ARM_AM::sub) ? 3 : 2;35043505case ARM::LDRSB_POST:3506case ARM::LDRSH_POST: {3507Register Rt = MI.getOperand(0).getReg();3508Register Rm = MI.getOperand(3).getReg();3509return (Rt == Rm) ? 4 : 3;3510}35113512case ARM::LDR_PRE_REG:3513case ARM::LDRB_PRE_REG: {3514Register Rt = MI.getOperand(0).getReg();3515Register Rm = MI.getOperand(3).getReg();3516if (Rt == Rm)3517return 3;3518unsigned ShOpVal = MI.getOperand(4).getImm();3519bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;3520unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);3521if (!isSub &&3522(ShImm == 0 ||3523((ShImm == 1 || ShImm == 2 || ShImm == 3) &&3524ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))3525return 2;3526return 3;3527}35283529case ARM::STR_PRE_REG:3530case ARM::STRB_PRE_REG: {3531unsigned ShOpVal = MI.getOperand(4).getImm();3532bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;3533unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);3534if (!isSub &&3535(ShImm == 0 ||3536((ShImm == 1 || ShImm == 2 || ShImm == 3) &&3537ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))3538return 2;3539return 3;3540}35413542case ARM::LDRH_PRE:3543case ARM::STRH_PRE: {3544Register Rt = MI.getOperand(0).getReg();3545Register Rm = MI.getOperand(3).getReg();3546if (!Rm)3547return 2;3548if (Rt == Rm)3549return 3;3550return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 3 : 2;3551}35523553case ARM::LDR_POST_REG:3554case ARM::LDRB_POST_REG:3555case ARM::LDRH_POST: {3556Register Rt = MI.getOperand(0).getReg();3557Register Rm = MI.getOperand(3).getReg();3558return (Rt == Rm) ? 3 : 2;3559}35603561case ARM::LDR_PRE_IMM:3562case ARM::LDRB_PRE_IMM:3563case ARM::LDR_POST_IMM:3564case ARM::LDRB_POST_IMM:3565case ARM::STRB_POST_IMM:3566case ARM::STRB_POST_REG:3567case ARM::STRB_PRE_IMM:3568case ARM::STRH_POST:3569case ARM::STR_POST_IMM:3570case ARM::STR_POST_REG:3571case ARM::STR_PRE_IMM:3572return 2;35733574case ARM::LDRSB_PRE:3575case ARM::LDRSH_PRE: {3576Register Rm = MI.getOperand(3).getReg();3577if (Rm == 0)3578return 3;3579Register Rt = MI.getOperand(0).getReg();3580if (Rt == Rm)3581return 4;3582unsigned ShOpVal = MI.getOperand(4).getImm();3583bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;3584unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);3585if (!isSub &&3586(ShImm == 0 ||3587((ShImm == 1 || ShImm == 2 || ShImm == 3) &&3588ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))3589return 3;3590return 4;3591}35923593case ARM::LDRD: {3594Register Rt = MI.getOperand(0).getReg();3595Register Rn = MI.getOperand(2).getReg();3596Register Rm = MI.getOperand(3).getReg();3597if (Rm)3598return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 43599: 3;3600return (Rt == Rn) ? 3 : 2;3601}36023603case ARM::STRD: {3604Register Rm = MI.getOperand(3).getReg();3605if (Rm)3606return (ARM_AM::getAM3Op(MI.getOperand(4).getImm()) == ARM_AM::sub) ? 43607: 3;3608return 2;3609}36103611case ARM::LDRD_POST:3612case ARM::t2LDRD_POST:3613return 3;36143615case ARM::STRD_POST:3616case ARM::t2STRD_POST:3617return 4;36183619case ARM::LDRD_PRE: {3620Register Rt = MI.getOperand(0).getReg();3621Register Rn = MI.getOperand(3).getReg();3622Register Rm = MI.getOperand(4).getReg();3623if (Rm)3624return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 53625: 4;3626return (Rt == Rn) ? 4 : 3;3627}36283629case ARM::t2LDRD_PRE: {3630Register Rt = MI.getOperand(0).getReg();3631Register Rn = MI.getOperand(3).getReg();3632return (Rt == Rn) ? 4 : 3;3633}36343635case ARM::STRD_PRE: {3636Register Rm = MI.getOperand(4).getReg();3637if (Rm)3638return (ARM_AM::getAM3Op(MI.getOperand(5).getImm()) == ARM_AM::sub) ? 53639: 4;3640return 3;3641}36423643case ARM::t2STRD_PRE:3644return 3;36453646case ARM::t2LDR_POST:3647case ARM::t2LDRB_POST:3648case ARM::t2LDRB_PRE:3649case ARM::t2LDRSBi12:3650case ARM::t2LDRSBi8:3651case ARM::t2LDRSBpci:3652case ARM::t2LDRSBs:3653case ARM::t2LDRH_POST:3654case ARM::t2LDRH_PRE:3655case ARM::t2LDRSBT:3656case ARM::t2LDRSB_POST:3657case ARM::t2LDRSB_PRE:3658case ARM::t2LDRSH_POST:3659case ARM::t2LDRSH_PRE:3660case ARM::t2LDRSHi12:3661case ARM::t2LDRSHi8:3662case ARM::t2LDRSHpci:3663case ARM::t2LDRSHs:3664return 2;36653666case ARM::t2LDRDi8: {3667Register Rt = MI.getOperand(0).getReg();3668Register Rn = MI.getOperand(2).getReg();3669return (Rt == Rn) ? 3 : 2;3670}36713672case ARM::t2STRB_POST:3673case ARM::t2STRB_PRE:3674case ARM::t2STRBs:3675case ARM::t2STRDi8:3676case ARM::t2STRH_POST:3677case ARM::t2STRH_PRE:3678case ARM::t2STRHs:3679case ARM::t2STR_POST:3680case ARM::t2STR_PRE:3681case ARM::t2STRs:3682return 2;3683}3684}36853686// Return the number of 32-bit words loaded by LDM or stored by STM. If this3687// can't be easily determined return 0 (missing MachineMemOperand).3688//3689// FIXME: The current MachineInstr design does not support relying on machine3690// mem operands to determine the width of a memory access. Instead, we expect3691// the target to provide this information based on the instruction opcode and3692// operands. However, using MachineMemOperand is the best solution now for3693// two reasons:3694//3695// 1) getNumMicroOps tries to infer LDM memory width from the total number of MI3696// operands. This is much more dangerous than using the MachineMemOperand3697// sizes because CodeGen passes can insert/remove optional machine operands. In3698// fact, it's totally incorrect for preRA passes and appears to be wrong for3699// postRA passes as well.3700//3701// 2) getNumLDMAddresses is only used by the scheduling machine model and any3702// machine model that calls this should handle the unknown (zero size) case.3703//3704// Long term, we should require a target hook that verifies MachineMemOperand3705// sizes during MC lowering. That target hook should be local to MC lowering3706// because we can't ensure that it is aware of other MI forms. Doing this will3707// ensure that MachineMemOperands are correctly propagated through all passes.3708unsigned ARMBaseInstrInfo::getNumLDMAddresses(const MachineInstr &MI) const {3709unsigned Size = 0;3710for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),3711E = MI.memoperands_end();3712I != E; ++I) {3713Size += (*I)->getSize().getValue();3714}3715// FIXME: The scheduler currently can't handle values larger than 16. But3716// the values can actually go up to 32 for floating-point load/store3717// multiple (VLDMIA etc.). Also, the way this code is reasoning about memory3718// operations isn't right; we could end up with "extra" memory operands for3719// various reasons, like tail merge merging two memory operations.3720return std::min(Size / 4, 16U);3721}37223723static unsigned getNumMicroOpsSingleIssuePlusExtras(unsigned Opc,3724unsigned NumRegs) {3725unsigned UOps = 1 + NumRegs; // 1 for address computation.3726switch (Opc) {3727default:3728break;3729case ARM::VLDMDIA_UPD:3730case ARM::VLDMDDB_UPD:3731case ARM::VLDMSIA_UPD:3732case ARM::VLDMSDB_UPD:3733case ARM::VSTMDIA_UPD:3734case ARM::VSTMDDB_UPD:3735case ARM::VSTMSIA_UPD:3736case ARM::VSTMSDB_UPD:3737case ARM::LDMIA_UPD:3738case ARM::LDMDA_UPD:3739case ARM::LDMDB_UPD:3740case ARM::LDMIB_UPD:3741case ARM::STMIA_UPD:3742case ARM::STMDA_UPD:3743case ARM::STMDB_UPD:3744case ARM::STMIB_UPD:3745case ARM::tLDMIA_UPD:3746case ARM::tSTMIA_UPD:3747case ARM::t2LDMIA_UPD:3748case ARM::t2LDMDB_UPD:3749case ARM::t2STMIA_UPD:3750case ARM::t2STMDB_UPD:3751++UOps; // One for base register writeback.3752break;3753case ARM::LDMIA_RET:3754case ARM::tPOP_RET:3755case ARM::t2LDMIA_RET:3756UOps += 2; // One for base reg wb, one for write to pc.3757break;3758}3759return UOps;3760}37613762unsigned ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,3763const MachineInstr &MI) const {3764if (!ItinData || ItinData->isEmpty())3765return 1;37663767const MCInstrDesc &Desc = MI.getDesc();3768unsigned Class = Desc.getSchedClass();3769int ItinUOps = ItinData->getNumMicroOps(Class);3770if (ItinUOps >= 0) {3771if (Subtarget.isSwift() && (Desc.mayLoad() || Desc.mayStore()))3772return getNumMicroOpsSwiftLdSt(ItinData, MI);37733774return ItinUOps;3775}37763777unsigned Opc = MI.getOpcode();3778switch (Opc) {3779default:3780llvm_unreachable("Unexpected multi-uops instruction!");3781case ARM::VLDMQIA:3782case ARM::VSTMQIA:3783return 2;37843785// The number of uOps for load / store multiple are determined by the number3786// registers.3787//3788// On Cortex-A8, each pair of register loads / stores can be scheduled on the3789// same cycle. The scheduling for the first load / store must be done3790// separately by assuming the address is not 64-bit aligned.3791//3792// On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address3793// is not 64-bit aligned, then AGU would take an extra cycle. For VFP / NEON3794// load / store multiple, the formula is (#reg / 2) + (#reg % 2) + 1.3795case ARM::VLDMDIA:3796case ARM::VLDMDIA_UPD:3797case ARM::VLDMDDB_UPD:3798case ARM::VLDMSIA:3799case ARM::VLDMSIA_UPD:3800case ARM::VLDMSDB_UPD:3801case ARM::VSTMDIA:3802case ARM::VSTMDIA_UPD:3803case ARM::VSTMDDB_UPD:3804case ARM::VSTMSIA:3805case ARM::VSTMSIA_UPD:3806case ARM::VSTMSDB_UPD: {3807unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands();3808return (NumRegs / 2) + (NumRegs % 2) + 1;3809}38103811case ARM::LDMIA_RET:3812case ARM::LDMIA:3813case ARM::LDMDA:3814case ARM::LDMDB:3815case ARM::LDMIB:3816case ARM::LDMIA_UPD:3817case ARM::LDMDA_UPD:3818case ARM::LDMDB_UPD:3819case ARM::LDMIB_UPD:3820case ARM::STMIA:3821case ARM::STMDA:3822case ARM::STMDB:3823case ARM::STMIB:3824case ARM::STMIA_UPD:3825case ARM::STMDA_UPD:3826case ARM::STMDB_UPD:3827case ARM::STMIB_UPD:3828case ARM::tLDMIA:3829case ARM::tLDMIA_UPD:3830case ARM::tSTMIA_UPD:3831case ARM::tPOP_RET:3832case ARM::tPOP:3833case ARM::tPUSH:3834case ARM::t2LDMIA_RET:3835case ARM::t2LDMIA:3836case ARM::t2LDMDB:3837case ARM::t2LDMIA_UPD:3838case ARM::t2LDMDB_UPD:3839case ARM::t2STMIA:3840case ARM::t2STMDB:3841case ARM::t2STMIA_UPD:3842case ARM::t2STMDB_UPD: {3843unsigned NumRegs = MI.getNumOperands() - Desc.getNumOperands() + 1;3844switch (Subtarget.getLdStMultipleTiming()) {3845case ARMSubtarget::SingleIssuePlusExtras:3846return getNumMicroOpsSingleIssuePlusExtras(Opc, NumRegs);3847case ARMSubtarget::SingleIssue:3848// Assume the worst.3849return NumRegs;3850case ARMSubtarget::DoubleIssue: {3851if (NumRegs < 4)3852return 2;3853// 4 registers would be issued: 2, 2.3854// 5 registers would be issued: 2, 2, 1.3855unsigned UOps = (NumRegs / 2);3856if (NumRegs % 2)3857++UOps;3858return UOps;3859}3860case ARMSubtarget::DoubleIssueCheckUnalignedAccess: {3861unsigned UOps = (NumRegs / 2);3862// If there are odd number of registers or if it's not 64-bit aligned,3863// then it takes an extra AGU (Address Generation Unit) cycle.3864if ((NumRegs % 2) || !MI.hasOneMemOperand() ||3865(*MI.memoperands_begin())->getAlign() < Align(8))3866++UOps;3867return UOps;3868}3869}3870}3871}3872llvm_unreachable("Didn't find the number of microops");3873}38743875std::optional<unsigned>3876ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,3877const MCInstrDesc &DefMCID, unsigned DefClass,3878unsigned DefIdx, unsigned DefAlign) const {3879int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;3880if (RegNo <= 0)3881// Def is the address writeback.3882return ItinData->getOperandCycle(DefClass, DefIdx);38833884unsigned DefCycle;3885if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {3886// (regno / 2) + (regno % 2) + 13887DefCycle = RegNo / 2 + 1;3888if (RegNo % 2)3889++DefCycle;3890} else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {3891DefCycle = RegNo;3892bool isSLoad = false;38933894switch (DefMCID.getOpcode()) {3895default: break;3896case ARM::VLDMSIA:3897case ARM::VLDMSIA_UPD:3898case ARM::VLDMSDB_UPD:3899isSLoad = true;3900break;3901}39023903// If there are odd number of 'S' registers or if it's not 64-bit aligned,3904// then it takes an extra cycle.3905if ((isSLoad && (RegNo % 2)) || DefAlign < 8)3906++DefCycle;3907} else {3908// Assume the worst.3909DefCycle = RegNo + 2;3910}39113912return DefCycle;3913}39143915std::optional<unsigned>3916ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,3917const MCInstrDesc &DefMCID, unsigned DefClass,3918unsigned DefIdx, unsigned DefAlign) const {3919int RegNo = (int)(DefIdx+1) - DefMCID.getNumOperands() + 1;3920if (RegNo <= 0)3921// Def is the address writeback.3922return ItinData->getOperandCycle(DefClass, DefIdx);39233924unsigned DefCycle;3925if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {3926// 4 registers would be issued: 1, 2, 1.3927// 5 registers would be issued: 1, 2, 2.3928DefCycle = RegNo / 2;3929if (DefCycle < 1)3930DefCycle = 1;3931// Result latency is issue cycle + 2: E2.3932DefCycle += 2;3933} else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {3934DefCycle = (RegNo / 2);3935// If there are odd number of registers or if it's not 64-bit aligned,3936// then it takes an extra AGU (Address Generation Unit) cycle.3937if ((RegNo % 2) || DefAlign < 8)3938++DefCycle;3939// Result latency is AGU cycles + 2.3940DefCycle += 2;3941} else {3942// Assume the worst.3943DefCycle = RegNo + 2;3944}39453946return DefCycle;3947}39483949std::optional<unsigned>3950ARMBaseInstrInfo::getVSTMUseCycle(const InstrItineraryData *ItinData,3951const MCInstrDesc &UseMCID, unsigned UseClass,3952unsigned UseIdx, unsigned UseAlign) const {3953int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;3954if (RegNo <= 0)3955return ItinData->getOperandCycle(UseClass, UseIdx);39563957unsigned UseCycle;3958if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {3959// (regno / 2) + (regno % 2) + 13960UseCycle = RegNo / 2 + 1;3961if (RegNo % 2)3962++UseCycle;3963} else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {3964UseCycle = RegNo;3965bool isSStore = false;39663967switch (UseMCID.getOpcode()) {3968default: break;3969case ARM::VSTMSIA:3970case ARM::VSTMSIA_UPD:3971case ARM::VSTMSDB_UPD:3972isSStore = true;3973break;3974}39753976// If there are odd number of 'S' registers or if it's not 64-bit aligned,3977// then it takes an extra cycle.3978if ((isSStore && (RegNo % 2)) || UseAlign < 8)3979++UseCycle;3980} else {3981// Assume the worst.3982UseCycle = RegNo + 2;3983}39843985return UseCycle;3986}39873988std::optional<unsigned>3989ARMBaseInstrInfo::getSTMUseCycle(const InstrItineraryData *ItinData,3990const MCInstrDesc &UseMCID, unsigned UseClass,3991unsigned UseIdx, unsigned UseAlign) const {3992int RegNo = (int)(UseIdx+1) - UseMCID.getNumOperands() + 1;3993if (RegNo <= 0)3994return ItinData->getOperandCycle(UseClass, UseIdx);39953996unsigned UseCycle;3997if (Subtarget.isCortexA8() || Subtarget.isCortexA7()) {3998UseCycle = RegNo / 2;3999if (UseCycle < 2)4000UseCycle = 2;4001// Read in E3.4002UseCycle += 2;4003} else if (Subtarget.isLikeA9() || Subtarget.isSwift()) {4004UseCycle = (RegNo / 2);4005// If there are odd number of registers or if it's not 64-bit aligned,4006// then it takes an extra AGU (Address Generation Unit) cycle.4007if ((RegNo % 2) || UseAlign < 8)4008++UseCycle;4009} else {4010// Assume the worst.4011UseCycle = 1;4012}4013return UseCycle;4014}40154016std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(4017const InstrItineraryData *ItinData, const MCInstrDesc &DefMCID,4018unsigned DefIdx, unsigned DefAlign, const MCInstrDesc &UseMCID,4019unsigned UseIdx, unsigned UseAlign) const {4020unsigned DefClass = DefMCID.getSchedClass();4021unsigned UseClass = UseMCID.getSchedClass();40224023if (DefIdx < DefMCID.getNumDefs() && UseIdx < UseMCID.getNumOperands())4024return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);40254026// This may be a def / use of a variable_ops instruction, the operand4027// latency might be determinable dynamically. Let the target try to4028// figure it out.4029std::optional<unsigned> DefCycle;4030bool LdmBypass = false;4031switch (DefMCID.getOpcode()) {4032default:4033DefCycle = ItinData->getOperandCycle(DefClass, DefIdx);4034break;40354036case ARM::VLDMDIA:4037case ARM::VLDMDIA_UPD:4038case ARM::VLDMDDB_UPD:4039case ARM::VLDMSIA:4040case ARM::VLDMSIA_UPD:4041case ARM::VLDMSDB_UPD:4042DefCycle = getVLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);4043break;40444045case ARM::LDMIA_RET:4046case ARM::LDMIA:4047case ARM::LDMDA:4048case ARM::LDMDB:4049case ARM::LDMIB:4050case ARM::LDMIA_UPD:4051case ARM::LDMDA_UPD:4052case ARM::LDMDB_UPD:4053case ARM::LDMIB_UPD:4054case ARM::tLDMIA:4055case ARM::tLDMIA_UPD:4056case ARM::tPUSH:4057case ARM::t2LDMIA_RET:4058case ARM::t2LDMIA:4059case ARM::t2LDMDB:4060case ARM::t2LDMIA_UPD:4061case ARM::t2LDMDB_UPD:4062LdmBypass = true;4063DefCycle = getLDMDefCycle(ItinData, DefMCID, DefClass, DefIdx, DefAlign);4064break;4065}40664067if (!DefCycle)4068// We can't seem to determine the result latency of the def, assume it's 2.4069DefCycle = 2;40704071std::optional<unsigned> UseCycle;4072switch (UseMCID.getOpcode()) {4073default:4074UseCycle = ItinData->getOperandCycle(UseClass, UseIdx);4075break;40764077case ARM::VSTMDIA:4078case ARM::VSTMDIA_UPD:4079case ARM::VSTMDDB_UPD:4080case ARM::VSTMSIA:4081case ARM::VSTMSIA_UPD:4082case ARM::VSTMSDB_UPD:4083UseCycle = getVSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);4084break;40854086case ARM::STMIA:4087case ARM::STMDA:4088case ARM::STMDB:4089case ARM::STMIB:4090case ARM::STMIA_UPD:4091case ARM::STMDA_UPD:4092case ARM::STMDB_UPD:4093case ARM::STMIB_UPD:4094case ARM::tSTMIA_UPD:4095case ARM::tPOP_RET:4096case ARM::tPOP:4097case ARM::t2STMIA:4098case ARM::t2STMDB:4099case ARM::t2STMIA_UPD:4100case ARM::t2STMDB_UPD:4101UseCycle = getSTMUseCycle(ItinData, UseMCID, UseClass, UseIdx, UseAlign);4102break;4103}41044105if (!UseCycle)4106// Assume it's read in the first stage.4107UseCycle = 1;41084109if (UseCycle > *DefCycle + 1)4110return std::nullopt;41114112UseCycle = *DefCycle - *UseCycle + 1;4113if (UseCycle > 0u) {4114if (LdmBypass) {4115// It's a variable_ops instruction so we can't use DefIdx here. Just use4116// first def operand.4117if (ItinData->hasPipelineForwarding(DefClass, DefMCID.getNumOperands()-1,4118UseClass, UseIdx))4119UseCycle = *UseCycle - 1;4120} else if (ItinData->hasPipelineForwarding(DefClass, DefIdx,4121UseClass, UseIdx)) {4122UseCycle = *UseCycle - 1;4123}4124}41254126return UseCycle;4127}41284129static const MachineInstr *getBundledDefMI(const TargetRegisterInfo *TRI,4130const MachineInstr *MI, unsigned Reg,4131unsigned &DefIdx, unsigned &Dist) {4132Dist = 0;41334134MachineBasicBlock::const_iterator I = MI; ++I;4135MachineBasicBlock::const_instr_iterator II = std::prev(I.getInstrIterator());4136assert(II->isInsideBundle() && "Empty bundle?");41374138int Idx = -1;4139while (II->isInsideBundle()) {4140Idx = II->findRegisterDefOperandIdx(Reg, TRI, false, true);4141if (Idx != -1)4142break;4143--II;4144++Dist;4145}41464147assert(Idx != -1 && "Cannot find bundled definition!");4148DefIdx = Idx;4149return &*II;4150}41514152static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,4153const MachineInstr &MI, unsigned Reg,4154unsigned &UseIdx, unsigned &Dist) {4155Dist = 0;41564157MachineBasicBlock::const_instr_iterator II = ++MI.getIterator();4158assert(II->isInsideBundle() && "Empty bundle?");4159MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();41604161// FIXME: This doesn't properly handle multiple uses.4162int Idx = -1;4163while (II != E && II->isInsideBundle()) {4164Idx = II->findRegisterUseOperandIdx(Reg, TRI, false);4165if (Idx != -1)4166break;4167if (II->getOpcode() != ARM::t2IT)4168++Dist;4169++II;4170}41714172if (Idx == -1) {4173Dist = 0;4174return nullptr;4175}41764177UseIdx = Idx;4178return &*II;4179}41804181/// Return the number of cycles to add to (or subtract from) the static4182/// itinerary based on the def opcode and alignment. The caller will ensure that4183/// adjusted latency is at least one cycle.4184static int adjustDefLatency(const ARMSubtarget &Subtarget,4185const MachineInstr &DefMI,4186const MCInstrDesc &DefMCID, unsigned DefAlign) {4187int Adjust = 0;4188if (Subtarget.isCortexA8() || Subtarget.isLikeA9() || Subtarget.isCortexA7()) {4189// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]4190// variants are one cycle cheaper.4191switch (DefMCID.getOpcode()) {4192default: break;4193case ARM::LDRrs:4194case ARM::LDRBrs: {4195unsigned ShOpVal = DefMI.getOperand(3).getImm();4196unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);4197if (ShImm == 0 ||4198(ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))4199--Adjust;4200break;4201}4202case ARM::t2LDRs:4203case ARM::t2LDRBs:4204case ARM::t2LDRHs:4205case ARM::t2LDRSHs: {4206// Thumb2 mode: lsl only.4207unsigned ShAmt = DefMI.getOperand(3).getImm();4208if (ShAmt == 0 || ShAmt == 2)4209--Adjust;4210break;4211}4212}4213} else if (Subtarget.isSwift()) {4214// FIXME: Properly handle all of the latency adjustments for address4215// writeback.4216switch (DefMCID.getOpcode()) {4217default: break;4218case ARM::LDRrs:4219case ARM::LDRBrs: {4220unsigned ShOpVal = DefMI.getOperand(3).getImm();4221bool isSub = ARM_AM::getAM2Op(ShOpVal) == ARM_AM::sub;4222unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);4223if (!isSub &&4224(ShImm == 0 ||4225((ShImm == 1 || ShImm == 2 || ShImm == 3) &&4226ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl)))4227Adjust -= 2;4228else if (!isSub &&4229ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)4230--Adjust;4231break;4232}4233case ARM::t2LDRs:4234case ARM::t2LDRBs:4235case ARM::t2LDRHs:4236case ARM::t2LDRSHs: {4237// Thumb2 mode: lsl only.4238unsigned ShAmt = DefMI.getOperand(3).getImm();4239if (ShAmt == 0 || ShAmt == 1 || ShAmt == 2 || ShAmt == 3)4240Adjust -= 2;4241break;4242}4243}4244}42454246if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment()) {4247switch (DefMCID.getOpcode()) {4248default: break;4249case ARM::VLD1q8:4250case ARM::VLD1q16:4251case ARM::VLD1q32:4252case ARM::VLD1q64:4253case ARM::VLD1q8wb_fixed:4254case ARM::VLD1q16wb_fixed:4255case ARM::VLD1q32wb_fixed:4256case ARM::VLD1q64wb_fixed:4257case ARM::VLD1q8wb_register:4258case ARM::VLD1q16wb_register:4259case ARM::VLD1q32wb_register:4260case ARM::VLD1q64wb_register:4261case ARM::VLD2d8:4262case ARM::VLD2d16:4263case ARM::VLD2d32:4264case ARM::VLD2q8:4265case ARM::VLD2q16:4266case ARM::VLD2q32:4267case ARM::VLD2d8wb_fixed:4268case ARM::VLD2d16wb_fixed:4269case ARM::VLD2d32wb_fixed:4270case ARM::VLD2q8wb_fixed:4271case ARM::VLD2q16wb_fixed:4272case ARM::VLD2q32wb_fixed:4273case ARM::VLD2d8wb_register:4274case ARM::VLD2d16wb_register:4275case ARM::VLD2d32wb_register:4276case ARM::VLD2q8wb_register:4277case ARM::VLD2q16wb_register:4278case ARM::VLD2q32wb_register:4279case ARM::VLD3d8:4280case ARM::VLD3d16:4281case ARM::VLD3d32:4282case ARM::VLD1d64T:4283case ARM::VLD3d8_UPD:4284case ARM::VLD3d16_UPD:4285case ARM::VLD3d32_UPD:4286case ARM::VLD1d64Twb_fixed:4287case ARM::VLD1d64Twb_register:4288case ARM::VLD3q8_UPD:4289case ARM::VLD3q16_UPD:4290case ARM::VLD3q32_UPD:4291case ARM::VLD4d8:4292case ARM::VLD4d16:4293case ARM::VLD4d32:4294case ARM::VLD1d64Q:4295case ARM::VLD4d8_UPD:4296case ARM::VLD4d16_UPD:4297case ARM::VLD4d32_UPD:4298case ARM::VLD1d64Qwb_fixed:4299case ARM::VLD1d64Qwb_register:4300case ARM::VLD4q8_UPD:4301case ARM::VLD4q16_UPD:4302case ARM::VLD4q32_UPD:4303case ARM::VLD1DUPq8:4304case ARM::VLD1DUPq16:4305case ARM::VLD1DUPq32:4306case ARM::VLD1DUPq8wb_fixed:4307case ARM::VLD1DUPq16wb_fixed:4308case ARM::VLD1DUPq32wb_fixed:4309case ARM::VLD1DUPq8wb_register:4310case ARM::VLD1DUPq16wb_register:4311case ARM::VLD1DUPq32wb_register:4312case ARM::VLD2DUPd8:4313case ARM::VLD2DUPd16:4314case ARM::VLD2DUPd32:4315case ARM::VLD2DUPd8wb_fixed:4316case ARM::VLD2DUPd16wb_fixed:4317case ARM::VLD2DUPd32wb_fixed:4318case ARM::VLD2DUPd8wb_register:4319case ARM::VLD2DUPd16wb_register:4320case ARM::VLD2DUPd32wb_register:4321case ARM::VLD4DUPd8:4322case ARM::VLD4DUPd16:4323case ARM::VLD4DUPd32:4324case ARM::VLD4DUPd8_UPD:4325case ARM::VLD4DUPd16_UPD:4326case ARM::VLD4DUPd32_UPD:4327case ARM::VLD1LNd8:4328case ARM::VLD1LNd16:4329case ARM::VLD1LNd32:4330case ARM::VLD1LNd8_UPD:4331case ARM::VLD1LNd16_UPD:4332case ARM::VLD1LNd32_UPD:4333case ARM::VLD2LNd8:4334case ARM::VLD2LNd16:4335case ARM::VLD2LNd32:4336case ARM::VLD2LNq16:4337case ARM::VLD2LNq32:4338case ARM::VLD2LNd8_UPD:4339case ARM::VLD2LNd16_UPD:4340case ARM::VLD2LNd32_UPD:4341case ARM::VLD2LNq16_UPD:4342case ARM::VLD2LNq32_UPD:4343case ARM::VLD4LNd8:4344case ARM::VLD4LNd16:4345case ARM::VLD4LNd32:4346case ARM::VLD4LNq16:4347case ARM::VLD4LNq32:4348case ARM::VLD4LNd8_UPD:4349case ARM::VLD4LNd16_UPD:4350case ARM::VLD4LNd32_UPD:4351case ARM::VLD4LNq16_UPD:4352case ARM::VLD4LNq32_UPD:4353// If the address is not 64-bit aligned, the latencies of these4354// instructions increases by one.4355++Adjust;4356break;4357}4358}4359return Adjust;4360}43614362std::optional<unsigned> ARMBaseInstrInfo::getOperandLatency(4363const InstrItineraryData *ItinData, const MachineInstr &DefMI,4364unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {4365// No operand latency. The caller may fall back to getInstrLatency.4366if (!ItinData || ItinData->isEmpty())4367return std::nullopt;43684369const MachineOperand &DefMO = DefMI.getOperand(DefIdx);4370Register Reg = DefMO.getReg();43714372const MachineInstr *ResolvedDefMI = &DefMI;4373unsigned DefAdj = 0;4374if (DefMI.isBundle())4375ResolvedDefMI =4376getBundledDefMI(&getRegisterInfo(), &DefMI, Reg, DefIdx, DefAdj);4377if (ResolvedDefMI->isCopyLike() || ResolvedDefMI->isInsertSubreg() ||4378ResolvedDefMI->isRegSequence() || ResolvedDefMI->isImplicitDef()) {4379return 1;4380}43814382const MachineInstr *ResolvedUseMI = &UseMI;4383unsigned UseAdj = 0;4384if (UseMI.isBundle()) {4385ResolvedUseMI =4386getBundledUseMI(&getRegisterInfo(), UseMI, Reg, UseIdx, UseAdj);4387if (!ResolvedUseMI)4388return std::nullopt;4389}43904391return getOperandLatencyImpl(4392ItinData, *ResolvedDefMI, DefIdx, ResolvedDefMI->getDesc(), DefAdj, DefMO,4393Reg, *ResolvedUseMI, UseIdx, ResolvedUseMI->getDesc(), UseAdj);4394}43954396std::optional<unsigned> ARMBaseInstrInfo::getOperandLatencyImpl(4397const InstrItineraryData *ItinData, const MachineInstr &DefMI,4398unsigned DefIdx, const MCInstrDesc &DefMCID, unsigned DefAdj,4399const MachineOperand &DefMO, unsigned Reg, const MachineInstr &UseMI,4400unsigned UseIdx, const MCInstrDesc &UseMCID, unsigned UseAdj) const {4401if (Reg == ARM::CPSR) {4402if (DefMI.getOpcode() == ARM::FMSTAT) {4403// fpscr -> cpsr stalls over 20 cycles on A8 (and earlier?)4404return Subtarget.isLikeA9() ? 1 : 20;4405}44064407// CPSR set and branch can be paired in the same cycle.4408if (UseMI.isBranch())4409return 0;44104411// Otherwise it takes the instruction latency (generally one).4412unsigned Latency = getInstrLatency(ItinData, DefMI);44134414// For Thumb2 and -Os, prefer scheduling CPSR setting instruction close to4415// its uses. Instructions which are otherwise scheduled between them may4416// incur a code size penalty (not able to use the CPSR setting 16-bit4417// instructions).4418if (Latency > 0 && Subtarget.isThumb2()) {4419const MachineFunction *MF = DefMI.getParent()->getParent();4420// FIXME: Use Function::hasOptSize().4421if (MF->getFunction().hasFnAttribute(Attribute::OptimizeForSize))4422--Latency;4423}4424return Latency;4425}44264427if (DefMO.isImplicit() || UseMI.getOperand(UseIdx).isImplicit())4428return std::nullopt;44294430unsigned DefAlign = DefMI.hasOneMemOperand()4431? (*DefMI.memoperands_begin())->getAlign().value()4432: 0;4433unsigned UseAlign = UseMI.hasOneMemOperand()4434? (*UseMI.memoperands_begin())->getAlign().value()4435: 0;44364437// Get the itinerary's latency if possible, and handle variable_ops.4438std::optional<unsigned> Latency = getOperandLatency(4439ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);4440// Unable to find operand latency. The caller may resort to getInstrLatency.4441if (!Latency)4442return std::nullopt;44434444// Adjust for IT block position.4445int Adj = DefAdj + UseAdj;44464447// Adjust for dynamic def-side opcode variants not captured by the itinerary.4448Adj += adjustDefLatency(Subtarget, DefMI, DefMCID, DefAlign);4449if (Adj >= 0 || (int)*Latency > -Adj) {4450return *Latency + Adj;4451}4452// Return the itinerary latency, which may be zero but not less than zero.4453return Latency;4454}44554456std::optional<unsigned>4457ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,4458SDNode *DefNode, unsigned DefIdx,4459SDNode *UseNode, unsigned UseIdx) const {4460if (!DefNode->isMachineOpcode())4461return 1;44624463const MCInstrDesc &DefMCID = get(DefNode->getMachineOpcode());44644465if (isZeroCost(DefMCID.Opcode))4466return 0;44674468if (!ItinData || ItinData->isEmpty())4469return DefMCID.mayLoad() ? 3 : 1;44704471if (!UseNode->isMachineOpcode()) {4472std::optional<unsigned> Latency =4473ItinData->getOperandCycle(DefMCID.getSchedClass(), DefIdx);4474int Adj = Subtarget.getPreISelOperandLatencyAdjustment();4475int Threshold = 1 + Adj;4476return !Latency || Latency <= (unsigned)Threshold ? 1 : *Latency - Adj;4477}44784479const MCInstrDesc &UseMCID = get(UseNode->getMachineOpcode());4480auto *DefMN = cast<MachineSDNode>(DefNode);4481unsigned DefAlign = !DefMN->memoperands_empty()4482? (*DefMN->memoperands_begin())->getAlign().value()4483: 0;4484auto *UseMN = cast<MachineSDNode>(UseNode);4485unsigned UseAlign = !UseMN->memoperands_empty()4486? (*UseMN->memoperands_begin())->getAlign().value()4487: 0;4488std::optional<unsigned> Latency = getOperandLatency(4489ItinData, DefMCID, DefIdx, DefAlign, UseMCID, UseIdx, UseAlign);4490if (!Latency)4491return std::nullopt;44924493if (Latency > 1U &&4494(Subtarget.isCortexA8() || Subtarget.isLikeA9() ||4495Subtarget.isCortexA7())) {4496// FIXME: Shifter op hack: no shift (i.e. [r +/- r]) or [r + r << 2]4497// variants are one cycle cheaper.4498switch (DefMCID.getOpcode()) {4499default: break;4500case ARM::LDRrs:4501case ARM::LDRBrs: {4502unsigned ShOpVal = DefNode->getConstantOperandVal(2);4503unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);4504if (ShImm == 0 ||4505(ShImm == 2 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))4506Latency = *Latency - 1;4507break;4508}4509case ARM::t2LDRs:4510case ARM::t2LDRBs:4511case ARM::t2LDRHs:4512case ARM::t2LDRSHs: {4513// Thumb2 mode: lsl only.4514unsigned ShAmt = DefNode->getConstantOperandVal(2);4515if (ShAmt == 0 || ShAmt == 2)4516Latency = *Latency - 1;4517break;4518}4519}4520} else if (DefIdx == 0 && Latency > 2U && Subtarget.isSwift()) {4521// FIXME: Properly handle all of the latency adjustments for address4522// writeback.4523switch (DefMCID.getOpcode()) {4524default: break;4525case ARM::LDRrs:4526case ARM::LDRBrs: {4527unsigned ShOpVal = DefNode->getConstantOperandVal(2);4528unsigned ShImm = ARM_AM::getAM2Offset(ShOpVal);4529if (ShImm == 0 ||4530((ShImm == 1 || ShImm == 2 || ShImm == 3) &&4531ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsl))4532Latency = *Latency - 2;4533else if (ShImm == 1 && ARM_AM::getAM2ShiftOpc(ShOpVal) == ARM_AM::lsr)4534Latency = *Latency - 1;4535break;4536}4537case ARM::t2LDRs:4538case ARM::t2LDRBs:4539case ARM::t2LDRHs:4540case ARM::t2LDRSHs:4541// Thumb2 mode: lsl 0-3 only.4542Latency = *Latency - 2;4543break;4544}4545}45464547if (DefAlign < 8 && Subtarget.checkVLDnAccessAlignment())4548switch (DefMCID.getOpcode()) {4549default: break;4550case ARM::VLD1q8:4551case ARM::VLD1q16:4552case ARM::VLD1q32:4553case ARM::VLD1q64:4554case ARM::VLD1q8wb_register:4555case ARM::VLD1q16wb_register:4556case ARM::VLD1q32wb_register:4557case ARM::VLD1q64wb_register:4558case ARM::VLD1q8wb_fixed:4559case ARM::VLD1q16wb_fixed:4560case ARM::VLD1q32wb_fixed:4561case ARM::VLD1q64wb_fixed:4562case ARM::VLD2d8:4563case ARM::VLD2d16:4564case ARM::VLD2d32:4565case ARM::VLD2q8Pseudo:4566case ARM::VLD2q16Pseudo:4567case ARM::VLD2q32Pseudo:4568case ARM::VLD2d8wb_fixed:4569case ARM::VLD2d16wb_fixed:4570case ARM::VLD2d32wb_fixed:4571case ARM::VLD2q8PseudoWB_fixed:4572case ARM::VLD2q16PseudoWB_fixed:4573case ARM::VLD2q32PseudoWB_fixed:4574case ARM::VLD2d8wb_register:4575case ARM::VLD2d16wb_register:4576case ARM::VLD2d32wb_register:4577case ARM::VLD2q8PseudoWB_register:4578case ARM::VLD2q16PseudoWB_register:4579case ARM::VLD2q32PseudoWB_register:4580case ARM::VLD3d8Pseudo:4581case ARM::VLD3d16Pseudo:4582case ARM::VLD3d32Pseudo:4583case ARM::VLD1d8TPseudo:4584case ARM::VLD1d16TPseudo:4585case ARM::VLD1d32TPseudo:4586case ARM::VLD1d64TPseudo:4587case ARM::VLD1d64TPseudoWB_fixed:4588case ARM::VLD1d64TPseudoWB_register:4589case ARM::VLD3d8Pseudo_UPD:4590case ARM::VLD3d16Pseudo_UPD:4591case ARM::VLD3d32Pseudo_UPD:4592case ARM::VLD3q8Pseudo_UPD:4593case ARM::VLD3q16Pseudo_UPD:4594case ARM::VLD3q32Pseudo_UPD:4595case ARM::VLD3q8oddPseudo:4596case ARM::VLD3q16oddPseudo:4597case ARM::VLD3q32oddPseudo:4598case ARM::VLD3q8oddPseudo_UPD:4599case ARM::VLD3q16oddPseudo_UPD:4600case ARM::VLD3q32oddPseudo_UPD:4601case ARM::VLD4d8Pseudo:4602case ARM::VLD4d16Pseudo:4603case ARM::VLD4d32Pseudo:4604case ARM::VLD1d8QPseudo:4605case ARM::VLD1d16QPseudo:4606case ARM::VLD1d32QPseudo:4607case ARM::VLD1d64QPseudo:4608case ARM::VLD1d64QPseudoWB_fixed:4609case ARM::VLD1d64QPseudoWB_register:4610case ARM::VLD1q8HighQPseudo:4611case ARM::VLD1q8LowQPseudo_UPD:4612case ARM::VLD1q8HighTPseudo:4613case ARM::VLD1q8LowTPseudo_UPD:4614case ARM::VLD1q16HighQPseudo:4615case ARM::VLD1q16LowQPseudo_UPD:4616case ARM::VLD1q16HighTPseudo:4617case ARM::VLD1q16LowTPseudo_UPD:4618case ARM::VLD1q32HighQPseudo:4619case ARM::VLD1q32LowQPseudo_UPD:4620case ARM::VLD1q32HighTPseudo:4621case ARM::VLD1q32LowTPseudo_UPD:4622case ARM::VLD1q64HighQPseudo:4623case ARM::VLD1q64LowQPseudo_UPD:4624case ARM::VLD1q64HighTPseudo:4625case ARM::VLD1q64LowTPseudo_UPD:4626case ARM::VLD4d8Pseudo_UPD:4627case ARM::VLD4d16Pseudo_UPD:4628case ARM::VLD4d32Pseudo_UPD:4629case ARM::VLD4q8Pseudo_UPD:4630case ARM::VLD4q16Pseudo_UPD:4631case ARM::VLD4q32Pseudo_UPD:4632case ARM::VLD4q8oddPseudo:4633case ARM::VLD4q16oddPseudo:4634case ARM::VLD4q32oddPseudo:4635case ARM::VLD4q8oddPseudo_UPD:4636case ARM::VLD4q16oddPseudo_UPD:4637case ARM::VLD4q32oddPseudo_UPD:4638case ARM::VLD1DUPq8:4639case ARM::VLD1DUPq16:4640case ARM::VLD1DUPq32:4641case ARM::VLD1DUPq8wb_fixed:4642case ARM::VLD1DUPq16wb_fixed:4643case ARM::VLD1DUPq32wb_fixed:4644case ARM::VLD1DUPq8wb_register:4645case ARM::VLD1DUPq16wb_register:4646case ARM::VLD1DUPq32wb_register:4647case ARM::VLD2DUPd8:4648case ARM::VLD2DUPd16:4649case ARM::VLD2DUPd32:4650case ARM::VLD2DUPd8wb_fixed:4651case ARM::VLD2DUPd16wb_fixed:4652case ARM::VLD2DUPd32wb_fixed:4653case ARM::VLD2DUPd8wb_register:4654case ARM::VLD2DUPd16wb_register:4655case ARM::VLD2DUPd32wb_register:4656case ARM::VLD2DUPq8EvenPseudo:4657case ARM::VLD2DUPq8OddPseudo:4658case ARM::VLD2DUPq16EvenPseudo:4659case ARM::VLD2DUPq16OddPseudo:4660case ARM::VLD2DUPq32EvenPseudo:4661case ARM::VLD2DUPq32OddPseudo:4662case ARM::VLD3DUPq8EvenPseudo:4663case ARM::VLD3DUPq8OddPseudo:4664case ARM::VLD3DUPq16EvenPseudo:4665case ARM::VLD3DUPq16OddPseudo:4666case ARM::VLD3DUPq32EvenPseudo:4667case ARM::VLD3DUPq32OddPseudo:4668case ARM::VLD4DUPd8Pseudo:4669case ARM::VLD4DUPd16Pseudo:4670case ARM::VLD4DUPd32Pseudo:4671case ARM::VLD4DUPd8Pseudo_UPD:4672case ARM::VLD4DUPd16Pseudo_UPD:4673case ARM::VLD4DUPd32Pseudo_UPD:4674case ARM::VLD4DUPq8EvenPseudo:4675case ARM::VLD4DUPq8OddPseudo:4676case ARM::VLD4DUPq16EvenPseudo:4677case ARM::VLD4DUPq16OddPseudo:4678case ARM::VLD4DUPq32EvenPseudo:4679case ARM::VLD4DUPq32OddPseudo:4680case ARM::VLD1LNq8Pseudo:4681case ARM::VLD1LNq16Pseudo:4682case ARM::VLD1LNq32Pseudo:4683case ARM::VLD1LNq8Pseudo_UPD:4684case ARM::VLD1LNq16Pseudo_UPD:4685case ARM::VLD1LNq32Pseudo_UPD:4686case ARM::VLD2LNd8Pseudo:4687case ARM::VLD2LNd16Pseudo:4688case ARM::VLD2LNd32Pseudo:4689case ARM::VLD2LNq16Pseudo:4690case ARM::VLD2LNq32Pseudo:4691case ARM::VLD2LNd8Pseudo_UPD:4692case ARM::VLD2LNd16Pseudo_UPD:4693case ARM::VLD2LNd32Pseudo_UPD:4694case ARM::VLD2LNq16Pseudo_UPD:4695case ARM::VLD2LNq32Pseudo_UPD:4696case ARM::VLD4LNd8Pseudo:4697case ARM::VLD4LNd16Pseudo:4698case ARM::VLD4LNd32Pseudo:4699case ARM::VLD4LNq16Pseudo:4700case ARM::VLD4LNq32Pseudo:4701case ARM::VLD4LNd8Pseudo_UPD:4702case ARM::VLD4LNd16Pseudo_UPD:4703case ARM::VLD4LNd32Pseudo_UPD:4704case ARM::VLD4LNq16Pseudo_UPD:4705case ARM::VLD4LNq32Pseudo_UPD:4706// If the address is not 64-bit aligned, the latencies of these4707// instructions increases by one.4708Latency = *Latency + 1;4709break;4710}47114712return Latency;4713}47144715unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {4716if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||4717MI.isImplicitDef())4718return 0;47194720if (MI.isBundle())4721return 0;47224723const MCInstrDesc &MCID = MI.getDesc();47244725if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&4726!Subtarget.cheapPredicableCPSRDef())) {4727// When predicated, CPSR is an additional source operand for CPSR updating4728// instructions, this apparently increases their latencies.4729return 1;4730}4731return 0;4732}47334734unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,4735const MachineInstr &MI,4736unsigned *PredCost) const {4737if (MI.isCopyLike() || MI.isInsertSubreg() || MI.isRegSequence() ||4738MI.isImplicitDef())4739return 1;47404741// An instruction scheduler typically runs on unbundled instructions, however4742// other passes may query the latency of a bundled instruction.4743if (MI.isBundle()) {4744unsigned Latency = 0;4745MachineBasicBlock::const_instr_iterator I = MI.getIterator();4746MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();4747while (++I != E && I->isInsideBundle()) {4748if (I->getOpcode() != ARM::t2IT)4749Latency += getInstrLatency(ItinData, *I, PredCost);4750}4751return Latency;4752}47534754const MCInstrDesc &MCID = MI.getDesc();4755if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&4756!Subtarget.cheapPredicableCPSRDef()))) {4757// When predicated, CPSR is an additional source operand for CPSR updating4758// instructions, this apparently increases their latencies.4759*PredCost = 1;4760}4761// Be sure to call getStageLatency for an empty itinerary in case it has a4762// valid MinLatency property.4763if (!ItinData)4764return MI.mayLoad() ? 3 : 1;47654766unsigned Class = MCID.getSchedClass();47674768// For instructions with variable uops, use uops as latency.4769if (!ItinData->isEmpty() && ItinData->getNumMicroOps(Class) < 0)4770return getNumMicroOps(ItinData, MI);47714772// For the common case, fall back on the itinerary's latency.4773unsigned Latency = ItinData->getStageLatency(Class);47744775// Adjust for dynamic def-side opcode variants not captured by the itinerary.4776unsigned DefAlign =4777MI.hasOneMemOperand() ? (*MI.memoperands_begin())->getAlign().value() : 0;4778int Adj = adjustDefLatency(Subtarget, MI, MCID, DefAlign);4779if (Adj >= 0 || (int)Latency > -Adj) {4780return Latency + Adj;4781}4782return Latency;4783}47844785unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,4786SDNode *Node) const {4787if (!Node->isMachineOpcode())4788return 1;47894790if (!ItinData || ItinData->isEmpty())4791return 1;47924793unsigned Opcode = Node->getMachineOpcode();4794switch (Opcode) {4795default:4796return ItinData->getStageLatency(get(Opcode).getSchedClass());4797case ARM::VLDMQIA:4798case ARM::VSTMQIA:4799return 2;4800}4801}48024803bool ARMBaseInstrInfo::hasHighOperandLatency(const TargetSchedModel &SchedModel,4804const MachineRegisterInfo *MRI,4805const MachineInstr &DefMI,4806unsigned DefIdx,4807const MachineInstr &UseMI,4808unsigned UseIdx) const {4809unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;4810unsigned UDomain = UseMI.getDesc().TSFlags & ARMII::DomainMask;4811if (Subtarget.nonpipelinedVFP() &&4812(DDomain == ARMII::DomainVFP || UDomain == ARMII::DomainVFP))4813return true;48144815// Hoist VFP / NEON instructions with 4 or higher latency.4816unsigned Latency =4817SchedModel.computeOperandLatency(&DefMI, DefIdx, &UseMI, UseIdx);4818if (Latency <= 3)4819return false;4820return DDomain == ARMII::DomainVFP || DDomain == ARMII::DomainNEON ||4821UDomain == ARMII::DomainVFP || UDomain == ARMII::DomainNEON;4822}48234824bool ARMBaseInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,4825const MachineInstr &DefMI,4826unsigned DefIdx) const {4827const InstrItineraryData *ItinData = SchedModel.getInstrItineraries();4828if (!ItinData || ItinData->isEmpty())4829return false;48304831unsigned DDomain = DefMI.getDesc().TSFlags & ARMII::DomainMask;4832if (DDomain == ARMII::DomainGeneral) {4833unsigned DefClass = DefMI.getDesc().getSchedClass();4834std::optional<unsigned> DefCycle =4835ItinData->getOperandCycle(DefClass, DefIdx);4836return DefCycle && DefCycle <= 2U;4837}4838return false;4839}48404841bool ARMBaseInstrInfo::verifyInstruction(const MachineInstr &MI,4842StringRef &ErrInfo) const {4843if (convertAddSubFlagsOpcode(MI.getOpcode())) {4844ErrInfo = "Pseudo flag setting opcodes only exist in Selection DAG";4845return false;4846}4847if (MI.getOpcode() == ARM::tMOVr && !Subtarget.hasV6Ops()) {4848// Make sure we don't generate a lo-lo mov that isn't supported.4849if (!ARM::hGPRRegClass.contains(MI.getOperand(0).getReg()) &&4850!ARM::hGPRRegClass.contains(MI.getOperand(1).getReg())) {4851ErrInfo = "Non-flag-setting Thumb1 mov is v6-only";4852return false;4853}4854}4855if (MI.getOpcode() == ARM::tPUSH ||4856MI.getOpcode() == ARM::tPOP ||4857MI.getOpcode() == ARM::tPOP_RET) {4858for (const MachineOperand &MO : llvm::drop_begin(MI.operands(), 2)) {4859if (MO.isImplicit() || !MO.isReg())4860continue;4861Register Reg = MO.getReg();4862if (Reg < ARM::R0 || Reg > ARM::R7) {4863if (!(MI.getOpcode() == ARM::tPUSH && Reg == ARM::LR) &&4864!(MI.getOpcode() == ARM::tPOP_RET && Reg == ARM::PC)) {4865ErrInfo = "Unsupported register in Thumb1 push/pop";4866return false;4867}4868}4869}4870}4871if (MI.getOpcode() == ARM::MVE_VMOV_q_rr) {4872assert(MI.getOperand(4).isImm() && MI.getOperand(5).isImm());4873if ((MI.getOperand(4).getImm() != 2 && MI.getOperand(4).getImm() != 3) ||4874MI.getOperand(4).getImm() != MI.getOperand(5).getImm() + 2) {4875ErrInfo = "Incorrect array index for MVE_VMOV_q_rr";4876return false;4877}4878}48794880// Check the address model by taking the first Imm operand and checking it is4881// legal for that addressing mode.4882ARMII::AddrMode AddrMode =4883(ARMII::AddrMode)(MI.getDesc().TSFlags & ARMII::AddrModeMask);4884switch (AddrMode) {4885default:4886break;4887case ARMII::AddrModeT2_i7:4888case ARMII::AddrModeT2_i7s2:4889case ARMII::AddrModeT2_i7s4:4890case ARMII::AddrModeT2_i8:4891case ARMII::AddrModeT2_i8pos:4892case ARMII::AddrModeT2_i8neg:4893case ARMII::AddrModeT2_i8s4:4894case ARMII::AddrModeT2_i12: {4895uint32_t Imm = 0;4896for (auto Op : MI.operands()) {4897if (Op.isImm()) {4898Imm = Op.getImm();4899break;4900}4901}4902if (!isLegalAddressImm(MI.getOpcode(), Imm, this)) {4903ErrInfo = "Incorrect AddrMode Imm for instruction";4904return false;4905}4906break;4907}4908}4909return true;4910}49114912void ARMBaseInstrInfo::expandLoadStackGuardBase(MachineBasicBlock::iterator MI,4913unsigned LoadImmOpc,4914unsigned LoadOpc) const {4915assert(!Subtarget.isROPI() && !Subtarget.isRWPI() &&4916"ROPI/RWPI not currently supported with stack guard");49174918MachineBasicBlock &MBB = *MI->getParent();4919DebugLoc DL = MI->getDebugLoc();4920Register Reg = MI->getOperand(0).getReg();4921MachineInstrBuilder MIB;4922unsigned int Offset = 0;49234924if (LoadImmOpc == ARM::MRC || LoadImmOpc == ARM::t2MRC) {4925assert(!Subtarget.isReadTPSoft() &&4926"TLS stack protector requires hardware TLS register");49274928BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)4929.addImm(15)4930.addImm(0)4931.addImm(13)4932.addImm(0)4933.addImm(3)4934.add(predOps(ARMCC::AL));49354936Module &M = *MBB.getParent()->getFunction().getParent();4937Offset = M.getStackProtectorGuardOffset();4938if (Offset & ~0xfffU) {4939// The offset won't fit in the LDR's 12-bit immediate field, so emit an4940// extra ADD to cover the delta. This gives us a guaranteed 8 additional4941// bits, resulting in a range of 0 to +1 MiB for the guard offset.4942unsigned AddOpc = (LoadImmOpc == ARM::MRC) ? ARM::ADDri : ARM::t2ADDri;4943BuildMI(MBB, MI, DL, get(AddOpc), Reg)4944.addReg(Reg, RegState::Kill)4945.addImm(Offset & ~0xfffU)4946.add(predOps(ARMCC::AL))4947.addReg(0);4948Offset &= 0xfffU;4949}4950} else {4951const GlobalValue *GV =4952cast<GlobalValue>((*MI->memoperands_begin())->getValue());4953bool IsIndirect = Subtarget.isGVIndirectSymbol(GV);49544955unsigned TargetFlags = ARMII::MO_NO_FLAG;4956if (Subtarget.isTargetMachO()) {4957TargetFlags |= ARMII::MO_NONLAZY;4958} else if (Subtarget.isTargetCOFF()) {4959if (GV->hasDLLImportStorageClass())4960TargetFlags |= ARMII::MO_DLLIMPORT;4961else if (IsIndirect)4962TargetFlags |= ARMII::MO_COFFSTUB;4963} else if (IsIndirect) {4964TargetFlags |= ARMII::MO_GOT;4965}49664967if (LoadImmOpc == ARM::tMOVi32imm) { // Thumb-1 execute-only4968Register CPSRSaveReg = ARM::R12; // Use R12 as scratch register4969auto APSREncoding =4970ARMSysReg::lookupMClassSysRegByName("apsr_nzcvq")->Encoding;4971BuildMI(MBB, MI, DL, get(ARM::t2MRS_M), CPSRSaveReg)4972.addImm(APSREncoding)4973.add(predOps(ARMCC::AL));4974BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)4975.addGlobalAddress(GV, 0, TargetFlags);4976BuildMI(MBB, MI, DL, get(ARM::t2MSR_M))4977.addImm(APSREncoding)4978.addReg(CPSRSaveReg, RegState::Kill)4979.add(predOps(ARMCC::AL));4980} else {4981BuildMI(MBB, MI, DL, get(LoadImmOpc), Reg)4982.addGlobalAddress(GV, 0, TargetFlags);4983}49844985if (IsIndirect) {4986MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);4987MIB.addReg(Reg, RegState::Kill).addImm(0);4988auto Flags = MachineMemOperand::MOLoad |4989MachineMemOperand::MODereferenceable |4990MachineMemOperand::MOInvariant;4991MachineMemOperand *MMO = MBB.getParent()->getMachineMemOperand(4992MachinePointerInfo::getGOT(*MBB.getParent()), Flags, 4, Align(4));4993MIB.addMemOperand(MMO).add(predOps(ARMCC::AL));4994}4995}49964997MIB = BuildMI(MBB, MI, DL, get(LoadOpc), Reg);4998MIB.addReg(Reg, RegState::Kill)4999.addImm(Offset)5000.cloneMemRefs(*MI)5001.add(predOps(ARMCC::AL));5002}50035004bool5005ARMBaseInstrInfo::isFpMLxInstruction(unsigned Opcode, unsigned &MulOpc,5006unsigned &AddSubOpc,5007bool &NegAcc, bool &HasLane) const {5008DenseMap<unsigned, unsigned>::const_iterator I = MLxEntryMap.find(Opcode);5009if (I == MLxEntryMap.end())5010return false;50115012const ARM_MLxEntry &Entry = ARM_MLxTable[I->second];5013MulOpc = Entry.MulOpc;5014AddSubOpc = Entry.AddSubOpc;5015NegAcc = Entry.NegAcc;5016HasLane = Entry.HasLane;5017return true;5018}50195020//===----------------------------------------------------------------------===//5021// Execution domains.5022//===----------------------------------------------------------------------===//5023//5024// Some instructions go down the NEON pipeline, some go down the VFP pipeline,5025// and some can go down both. The vmov instructions go down the VFP pipeline,5026// but they can be changed to vorr equivalents that are executed by the NEON5027// pipeline.5028//5029// We use the following execution domain numbering:5030//5031enum ARMExeDomain {5032ExeGeneric = 0,5033ExeVFP = 1,5034ExeNEON = 25035};50365037//5038// Also see ARMInstrFormats.td and Domain* enums in ARMBaseInfo.h5039//5040std::pair<uint16_t, uint16_t>5041ARMBaseInstrInfo::getExecutionDomain(const MachineInstr &MI) const {5042// If we don't have access to NEON instructions then we won't be able5043// to swizzle anything to the NEON domain. Check to make sure.5044if (Subtarget.hasNEON()) {5045// VMOVD, VMOVRS and VMOVSR are VFP instructions, but can be changed to NEON5046// if they are not predicated.5047if (MI.getOpcode() == ARM::VMOVD && !isPredicated(MI))5048return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));50495050// CortexA9 is particularly picky about mixing the two and wants these5051// converted.5052if (Subtarget.useNEONForFPMovs() && !isPredicated(MI) &&5053(MI.getOpcode() == ARM::VMOVRS || MI.getOpcode() == ARM::VMOVSR ||5054MI.getOpcode() == ARM::VMOVS))5055return std::make_pair(ExeVFP, (1 << ExeVFP) | (1 << ExeNEON));5056}5057// No other instructions can be swizzled, so just determine their domain.5058unsigned Domain = MI.getDesc().TSFlags & ARMII::DomainMask;50595060if (Domain & ARMII::DomainNEON)5061return std::make_pair(ExeNEON, 0);50625063// Certain instructions can go either way on Cortex-A8.5064// Treat them as NEON instructions.5065if ((Domain & ARMII::DomainNEONA8) && Subtarget.isCortexA8())5066return std::make_pair(ExeNEON, 0);50675068if (Domain & ARMII::DomainVFP)5069return std::make_pair(ExeVFP, 0);50705071return std::make_pair(ExeGeneric, 0);5072}50735074static unsigned getCorrespondingDRegAndLane(const TargetRegisterInfo *TRI,5075unsigned SReg, unsigned &Lane) {5076unsigned DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_0, &ARM::DPRRegClass);5077Lane = 0;50785079if (DReg != ARM::NoRegister)5080return DReg;50815082Lane = 1;5083DReg = TRI->getMatchingSuperReg(SReg, ARM::ssub_1, &ARM::DPRRegClass);50845085assert(DReg && "S-register with no D super-register?");5086return DReg;5087}50885089/// getImplicitSPRUseForDPRUse - Given a use of a DPR register and lane,5090/// set ImplicitSReg to a register number that must be marked as implicit-use or5091/// zero if no register needs to be defined as implicit-use.5092///5093/// If the function cannot determine if an SPR should be marked implicit use or5094/// not, it returns false.5095///5096/// This function handles cases where an instruction is being modified from taking5097/// an SPR to a DPR[Lane]. A use of the DPR is being added, which may conflict5098/// with an earlier def of an SPR corresponding to DPR[Lane^1] (i.e. the other5099/// lane of the DPR).5100///5101/// If the other SPR is defined, an implicit-use of it should be added. Else,5102/// (including the case where the DPR itself is defined), it should not.5103///5104static bool getImplicitSPRUseForDPRUse(const TargetRegisterInfo *TRI,5105MachineInstr &MI, unsigned DReg,5106unsigned Lane, unsigned &ImplicitSReg) {5107// If the DPR is defined or used already, the other SPR lane will be chained5108// correctly, so there is nothing to be done.5109if (MI.definesRegister(DReg, TRI) || MI.readsRegister(DReg, TRI)) {5110ImplicitSReg = 0;5111return true;5112}51135114// Otherwise we need to go searching to see if the SPR is set explicitly.5115ImplicitSReg = TRI->getSubReg(DReg,5116(Lane & 1) ? ARM::ssub_0 : ARM::ssub_1);5117MachineBasicBlock::LivenessQueryResult LQR =5118MI.getParent()->computeRegisterLiveness(TRI, ImplicitSReg, MI);51195120if (LQR == MachineBasicBlock::LQR_Live)5121return true;5122else if (LQR == MachineBasicBlock::LQR_Unknown)5123return false;51245125// If the register is known not to be live, there is no need to add an5126// implicit-use.5127ImplicitSReg = 0;5128return true;5129}51305131void ARMBaseInstrInfo::setExecutionDomain(MachineInstr &MI,5132unsigned Domain) const {5133unsigned DstReg, SrcReg, DReg;5134unsigned Lane;5135MachineInstrBuilder MIB(*MI.getParent()->getParent(), MI);5136const TargetRegisterInfo *TRI = &getRegisterInfo();5137switch (MI.getOpcode()) {5138default:5139llvm_unreachable("cannot handle opcode!");5140break;5141case ARM::VMOVD:5142if (Domain != ExeNEON)5143break;51445145// Zap the predicate operands.5146assert(!isPredicated(MI) && "Cannot predicate a VORRd");51475148// Make sure we've got NEON instructions.5149assert(Subtarget.hasNEON() && "VORRd requires NEON");51505151// Source instruction is %DDst = VMOVD %DSrc, 14, %noreg (; implicits)5152DstReg = MI.getOperand(0).getReg();5153SrcReg = MI.getOperand(1).getReg();51545155for (unsigned i = MI.getDesc().getNumOperands(); i; --i)5156MI.removeOperand(i - 1);51575158// Change to a %DDst = VORRd %DSrc, %DSrc, 14, %noreg (; implicits)5159MI.setDesc(get(ARM::VORRd));5160MIB.addReg(DstReg, RegState::Define)5161.addReg(SrcReg)5162.addReg(SrcReg)5163.add(predOps(ARMCC::AL));5164break;5165case ARM::VMOVRS:5166if (Domain != ExeNEON)5167break;5168assert(!isPredicated(MI) && "Cannot predicate a VGETLN");51695170// Source instruction is %RDst = VMOVRS %SSrc, 14, %noreg (; implicits)5171DstReg = MI.getOperand(0).getReg();5172SrcReg = MI.getOperand(1).getReg();51735174for (unsigned i = MI.getDesc().getNumOperands(); i; --i)5175MI.removeOperand(i - 1);51765177DReg = getCorrespondingDRegAndLane(TRI, SrcReg, Lane);51785179// Convert to %RDst = VGETLNi32 %DSrc, Lane, 14, %noreg (; imps)5180// Note that DSrc has been widened and the other lane may be undef, which5181// contaminates the entire register.5182MI.setDesc(get(ARM::VGETLNi32));5183MIB.addReg(DstReg, RegState::Define)5184.addReg(DReg, RegState::Undef)5185.addImm(Lane)5186.add(predOps(ARMCC::AL));51875188// The old source should be an implicit use, otherwise we might think it5189// was dead before here.5190MIB.addReg(SrcReg, RegState::Implicit);5191break;5192case ARM::VMOVSR: {5193if (Domain != ExeNEON)5194break;5195assert(!isPredicated(MI) && "Cannot predicate a VSETLN");51965197// Source instruction is %SDst = VMOVSR %RSrc, 14, %noreg (; implicits)5198DstReg = MI.getOperand(0).getReg();5199SrcReg = MI.getOperand(1).getReg();52005201DReg = getCorrespondingDRegAndLane(TRI, DstReg, Lane);52025203unsigned ImplicitSReg;5204if (!getImplicitSPRUseForDPRUse(TRI, MI, DReg, Lane, ImplicitSReg))5205break;52065207for (unsigned i = MI.getDesc().getNumOperands(); i; --i)5208MI.removeOperand(i - 1);52095210// Convert to %DDst = VSETLNi32 %DDst, %RSrc, Lane, 14, %noreg (; imps)5211// Again DDst may be undefined at the beginning of this instruction.5212MI.setDesc(get(ARM::VSETLNi32));5213MIB.addReg(DReg, RegState::Define)5214.addReg(DReg, getUndefRegState(!MI.readsRegister(DReg, TRI)))5215.addReg(SrcReg)5216.addImm(Lane)5217.add(predOps(ARMCC::AL));52185219// The narrower destination must be marked as set to keep previous chains5220// in place.5221MIB.addReg(DstReg, RegState::Define | RegState::Implicit);5222if (ImplicitSReg != 0)5223MIB.addReg(ImplicitSReg, RegState::Implicit);5224break;5225}5226case ARM::VMOVS: {5227if (Domain != ExeNEON)5228break;52295230// Source instruction is %SDst = VMOVS %SSrc, 14, %noreg (; implicits)5231DstReg = MI.getOperand(0).getReg();5232SrcReg = MI.getOperand(1).getReg();52335234unsigned DstLane = 0, SrcLane = 0, DDst, DSrc;5235DDst = getCorrespondingDRegAndLane(TRI, DstReg, DstLane);5236DSrc = getCorrespondingDRegAndLane(TRI, SrcReg, SrcLane);52375238unsigned ImplicitSReg;5239if (!getImplicitSPRUseForDPRUse(TRI, MI, DSrc, SrcLane, ImplicitSReg))5240break;52415242for (unsigned i = MI.getDesc().getNumOperands(); i; --i)5243MI.removeOperand(i - 1);52445245if (DSrc == DDst) {5246// Destination can be:5247// %DDst = VDUPLN32d %DDst, Lane, 14, %noreg (; implicits)5248MI.setDesc(get(ARM::VDUPLN32d));5249MIB.addReg(DDst, RegState::Define)5250.addReg(DDst, getUndefRegState(!MI.readsRegister(DDst, TRI)))5251.addImm(SrcLane)5252.add(predOps(ARMCC::AL));52535254// Neither the source or the destination are naturally represented any5255// more, so add them in manually.5256MIB.addReg(DstReg, RegState::Implicit | RegState::Define);5257MIB.addReg(SrcReg, RegState::Implicit);5258if (ImplicitSReg != 0)5259MIB.addReg(ImplicitSReg, RegState::Implicit);5260break;5261}52625263// In general there's no single instruction that can perform an S <-> S5264// move in NEON space, but a pair of VEXT instructions *can* do the5265// job. It turns out that the VEXTs needed will only use DSrc once, with5266// the position based purely on the combination of lane-0 and lane-15267// involved. For example5268// vmov s0, s2 -> vext.32 d0, d0, d1, #1 vext.32 d0, d0, d0, #15269// vmov s1, s3 -> vext.32 d0, d1, d0, #1 vext.32 d0, d0, d0, #15270// vmov s0, s3 -> vext.32 d0, d0, d0, #1 vext.32 d0, d1, d0, #15271// vmov s1, s2 -> vext.32 d0, d0, d0, #1 vext.32 d0, d0, d1, #15272//5273// Pattern of the MachineInstrs is:5274// %DDst = VEXTd32 %DSrc1, %DSrc2, Lane, 14, %noreg (;implicits)5275MachineInstrBuilder NewMIB;5276NewMIB = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::VEXTd32),5277DDst);52785279// On the first instruction, both DSrc and DDst may be undef if present.5280// Specifically when the original instruction didn't have them as an5281// <imp-use>.5282unsigned CurReg = SrcLane == 1 && DstLane == 1 ? DSrc : DDst;5283bool CurUndef = !MI.readsRegister(CurReg, TRI);5284NewMIB.addReg(CurReg, getUndefRegState(CurUndef));52855286CurReg = SrcLane == 0 && DstLane == 0 ? DSrc : DDst;5287CurUndef = !MI.readsRegister(CurReg, TRI);5288NewMIB.addReg(CurReg, getUndefRegState(CurUndef))5289.addImm(1)5290.add(predOps(ARMCC::AL));52915292if (SrcLane == DstLane)5293NewMIB.addReg(SrcReg, RegState::Implicit);52945295MI.setDesc(get(ARM::VEXTd32));5296MIB.addReg(DDst, RegState::Define);52975298// On the second instruction, DDst has definitely been defined above, so5299// it is not undef. DSrc, if present, can be undef as above.5300CurReg = SrcLane == 1 && DstLane == 0 ? DSrc : DDst;5301CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);5302MIB.addReg(CurReg, getUndefRegState(CurUndef));53035304CurReg = SrcLane == 0 && DstLane == 1 ? DSrc : DDst;5305CurUndef = CurReg == DSrc && !MI.readsRegister(CurReg, TRI);5306MIB.addReg(CurReg, getUndefRegState(CurUndef))5307.addImm(1)5308.add(predOps(ARMCC::AL));53095310if (SrcLane != DstLane)5311MIB.addReg(SrcReg, RegState::Implicit);53125313// As before, the original destination is no longer represented, add it5314// implicitly.5315MIB.addReg(DstReg, RegState::Define | RegState::Implicit);5316if (ImplicitSReg != 0)5317MIB.addReg(ImplicitSReg, RegState::Implicit);5318break;5319}5320}5321}53225323//===----------------------------------------------------------------------===//5324// Partial register updates5325//===----------------------------------------------------------------------===//5326//5327// Swift renames NEON registers with 64-bit granularity. That means any5328// instruction writing an S-reg implicitly reads the containing D-reg. The5329// problem is mostly avoided by translating f32 operations to v2f32 operations5330// on D-registers, but f32 loads are still a problem.5331//5332// These instructions can load an f32 into a NEON register:5333//5334// VLDRS - Only writes S, partial D update.5335// VLD1LNd32 - Writes all D-regs, explicit partial D update, 2 uops.5336// VLD1DUPd32 - Writes all D-regs, no partial reg update, 2 uops.5337//5338// FCONSTD can be used as a dependency-breaking instruction.5339unsigned ARMBaseInstrInfo::getPartialRegUpdateClearance(5340const MachineInstr &MI, unsigned OpNum,5341const TargetRegisterInfo *TRI) const {5342auto PartialUpdateClearance = Subtarget.getPartialUpdateClearance();5343if (!PartialUpdateClearance)5344return 0;53455346assert(TRI && "Need TRI instance");53475348const MachineOperand &MO = MI.getOperand(OpNum);5349if (MO.readsReg())5350return 0;5351Register Reg = MO.getReg();5352int UseOp = -1;53535354switch (MI.getOpcode()) {5355// Normal instructions writing only an S-register.5356case ARM::VLDRS:5357case ARM::FCONSTS:5358case ARM::VMOVSR:5359case ARM::VMOVv8i8:5360case ARM::VMOVv4i16:5361case ARM::VMOVv2i32:5362case ARM::VMOVv2f32:5363case ARM::VMOVv1i64:5364UseOp = MI.findRegisterUseOperandIdx(Reg, TRI, false);5365break;53665367// Explicitly reads the dependency.5368case ARM::VLD1LNd32:5369UseOp = 3;5370break;5371default:5372return 0;5373}53745375// If this instruction actually reads a value from Reg, there is no unwanted5376// dependency.5377if (UseOp != -1 && MI.getOperand(UseOp).readsReg())5378return 0;53795380// We must be able to clobber the whole D-reg.5381if (Reg.isVirtual()) {5382// Virtual register must be a def undef foo:ssub_0 operand.5383if (!MO.getSubReg() || MI.readsVirtualRegister(Reg))5384return 0;5385} else if (ARM::SPRRegClass.contains(Reg)) {5386// Physical register: MI must define the full D-reg.5387unsigned DReg = TRI->getMatchingSuperReg(Reg, ARM::ssub_0,5388&ARM::DPRRegClass);5389if (!DReg || !MI.definesRegister(DReg, TRI))5390return 0;5391}53925393// MI has an unwanted D-register dependency.5394// Avoid defs in the previous N instructrions.5395return PartialUpdateClearance;5396}53975398// Break a partial register dependency after getPartialRegUpdateClearance5399// returned non-zero.5400void ARMBaseInstrInfo::breakPartialRegDependency(5401MachineInstr &MI, unsigned OpNum, const TargetRegisterInfo *TRI) const {5402assert(OpNum < MI.getDesc().getNumDefs() && "OpNum is not a def");5403assert(TRI && "Need TRI instance");54045405const MachineOperand &MO = MI.getOperand(OpNum);5406Register Reg = MO.getReg();5407assert(Reg.isPhysical() && "Can't break virtual register dependencies.");5408unsigned DReg = Reg;54095410// If MI defines an S-reg, find the corresponding D super-register.5411if (ARM::SPRRegClass.contains(Reg)) {5412DReg = ARM::D0 + (Reg - ARM::S0) / 2;5413assert(TRI->isSuperRegister(Reg, DReg) && "Register enums broken");5414}54155416assert(ARM::DPRRegClass.contains(DReg) && "Can only break D-reg deps");5417assert(MI.definesRegister(DReg, TRI) && "MI doesn't clobber full D-reg");54185419// FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines5420// the full D-register by loading the same value to both lanes. The5421// instruction is micro-coded with 2 uops, so don't do this until we can5422// properly schedule micro-coded instructions. The dispatcher stalls cause5423// too big regressions.54245425// Insert the dependency-breaking FCONSTD before MI.5426// 96 is the encoding of 0.5, but the actual value doesn't matter here.5427BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(ARM::FCONSTD), DReg)5428.addImm(96)5429.add(predOps(ARMCC::AL));5430MI.addRegisterKilled(DReg, TRI, true);5431}54325433bool ARMBaseInstrInfo::hasNOP() const {5434return Subtarget.hasFeature(ARM::HasV6KOps);5435}54365437bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const {5438if (MI->getNumOperands() < 4)5439return true;5440unsigned ShOpVal = MI->getOperand(3).getImm();5441unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal);5442// Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1.5443if ((ShImm == 1 && ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsr) ||5444((ShImm == 1 || ShImm == 2) &&5445ARM_AM::getSORegShOp(ShOpVal) == ARM_AM::lsl))5446return true;54475448return false;5449}54505451bool ARMBaseInstrInfo::getRegSequenceLikeInputs(5452const MachineInstr &MI, unsigned DefIdx,5453SmallVectorImpl<RegSubRegPairAndIdx> &InputRegs) const {5454assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");5455assert(MI.isRegSequenceLike() && "Invalid kind of instruction");54565457switch (MI.getOpcode()) {5458case ARM::VMOVDRR:5459// dX = VMOVDRR rY, rZ5460// is the same as:5461// dX = REG_SEQUENCE rY, ssub_0, rZ, ssub_15462// Populate the InputRegs accordingly.5463// rY5464const MachineOperand *MOReg = &MI.getOperand(1);5465if (!MOReg->isUndef())5466InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),5467MOReg->getSubReg(), ARM::ssub_0));5468// rZ5469MOReg = &MI.getOperand(2);5470if (!MOReg->isUndef())5471InputRegs.push_back(RegSubRegPairAndIdx(MOReg->getReg(),5472MOReg->getSubReg(), ARM::ssub_1));5473return true;5474}5475llvm_unreachable("Target dependent opcode missing");5476}54775478bool ARMBaseInstrInfo::getExtractSubregLikeInputs(5479const MachineInstr &MI, unsigned DefIdx,5480RegSubRegPairAndIdx &InputReg) const {5481assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");5482assert(MI.isExtractSubregLike() && "Invalid kind of instruction");54835484switch (MI.getOpcode()) {5485case ARM::VMOVRRD:5486// rX, rY = VMOVRRD dZ5487// is the same as:5488// rX = EXTRACT_SUBREG dZ, ssub_05489// rY = EXTRACT_SUBREG dZ, ssub_15490const MachineOperand &MOReg = MI.getOperand(2);5491if (MOReg.isUndef())5492return false;5493InputReg.Reg = MOReg.getReg();5494InputReg.SubReg = MOReg.getSubReg();5495InputReg.SubIdx = DefIdx == 0 ? ARM::ssub_0 : ARM::ssub_1;5496return true;5497}5498llvm_unreachable("Target dependent opcode missing");5499}55005501bool ARMBaseInstrInfo::getInsertSubregLikeInputs(5502const MachineInstr &MI, unsigned DefIdx, RegSubRegPair &BaseReg,5503RegSubRegPairAndIdx &InsertedReg) const {5504assert(DefIdx < MI.getDesc().getNumDefs() && "Invalid definition index");5505assert(MI.isInsertSubregLike() && "Invalid kind of instruction");55065507switch (MI.getOpcode()) {5508case ARM::VSETLNi32:5509case ARM::MVE_VMOV_to_lane_32:5510// dX = VSETLNi32 dY, rZ, imm5511// qX = MVE_VMOV_to_lane_32 qY, rZ, imm5512const MachineOperand &MOBaseReg = MI.getOperand(1);5513const MachineOperand &MOInsertedReg = MI.getOperand(2);5514if (MOInsertedReg.isUndef())5515return false;5516const MachineOperand &MOIndex = MI.getOperand(3);5517BaseReg.Reg = MOBaseReg.getReg();5518BaseReg.SubReg = MOBaseReg.getSubReg();55195520InsertedReg.Reg = MOInsertedReg.getReg();5521InsertedReg.SubReg = MOInsertedReg.getSubReg();5522InsertedReg.SubIdx = ARM::ssub_0 + MOIndex.getImm();5523return true;5524}5525llvm_unreachable("Target dependent opcode missing");5526}55275528std::pair<unsigned, unsigned>5529ARMBaseInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {5530const unsigned Mask = ARMII::MO_OPTION_MASK;5531return std::make_pair(TF & Mask, TF & ~Mask);5532}55335534ArrayRef<std::pair<unsigned, const char *>>5535ARMBaseInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {5536using namespace ARMII;55375538static const std::pair<unsigned, const char *> TargetFlags[] = {5539{MO_LO16, "arm-lo16"}, {MO_HI16, "arm-hi16"},5540{MO_LO_0_7, "arm-lo-0-7"}, {MO_HI_0_7, "arm-hi-0-7"},5541{MO_LO_8_15, "arm-lo-8-15"}, {MO_HI_8_15, "arm-hi-8-15"},5542};5543return ArrayRef(TargetFlags);5544}55455546ArrayRef<std::pair<unsigned, const char *>>5547ARMBaseInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {5548using namespace ARMII;55495550static const std::pair<unsigned, const char *> TargetFlags[] = {5551{MO_COFFSTUB, "arm-coffstub"},5552{MO_GOT, "arm-got"},5553{MO_SBREL, "arm-sbrel"},5554{MO_DLLIMPORT, "arm-dllimport"},5555{MO_SECREL, "arm-secrel"},5556{MO_NONLAZY, "arm-nonlazy"}};5557return ArrayRef(TargetFlags);5558}55595560std::optional<RegImmPair>5561ARMBaseInstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {5562int Sign = 1;5563unsigned Opcode = MI.getOpcode();5564int64_t Offset = 0;55655566// TODO: Handle cases where Reg is a super- or sub-register of the5567// destination register.5568const MachineOperand &Op0 = MI.getOperand(0);5569if (!Op0.isReg() || Reg != Op0.getReg())5570return std::nullopt;55715572// We describe SUBri or ADDri instructions.5573if (Opcode == ARM::SUBri)5574Sign = -1;5575else if (Opcode != ARM::ADDri)5576return std::nullopt;55775578// TODO: Third operand can be global address (usually some string). Since5579// strings can be relocated we cannot calculate their offsets for5580// now.5581if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())5582return std::nullopt;55835584Offset = MI.getOperand(2).getImm() * Sign;5585return RegImmPair{MI.getOperand(1).getReg(), Offset};5586}55875588bool llvm::registerDefinedBetween(unsigned Reg,5589MachineBasicBlock::iterator From,5590MachineBasicBlock::iterator To,5591const TargetRegisterInfo *TRI) {5592for (auto I = From; I != To; ++I)5593if (I->modifiesRegister(Reg, TRI))5594return true;5595return false;5596}55975598MachineInstr *llvm::findCMPToFoldIntoCBZ(MachineInstr *Br,5599const TargetRegisterInfo *TRI) {5600// Search backwards to the instruction that defines CSPR. This may or not5601// be a CMP, we check that after this loop. If we find another instruction5602// that reads cpsr, we return nullptr.5603MachineBasicBlock::iterator CmpMI = Br;5604while (CmpMI != Br->getParent()->begin()) {5605--CmpMI;5606if (CmpMI->modifiesRegister(ARM::CPSR, TRI))5607break;5608if (CmpMI->readsRegister(ARM::CPSR, TRI))5609break;5610}56115612// Check that this inst is a CMP r[0-7], #0 and that the register5613// is not redefined between the cmp and the br.5614if (CmpMI->getOpcode() != ARM::tCMPi8 && CmpMI->getOpcode() != ARM::t2CMPri)5615return nullptr;5616Register Reg = CmpMI->getOperand(0).getReg();5617Register PredReg;5618ARMCC::CondCodes Pred = getInstrPredicate(*CmpMI, PredReg);5619if (Pred != ARMCC::AL || CmpMI->getOperand(1).getImm() != 0)5620return nullptr;5621if (!isARMLowRegister(Reg))5622return nullptr;5623if (registerDefinedBetween(Reg, CmpMI->getNextNode(), Br, TRI))5624return nullptr;56255626return &*CmpMI;5627}56285629unsigned llvm::ConstantMaterializationCost(unsigned Val,5630const ARMSubtarget *Subtarget,5631bool ForCodesize) {5632if (Subtarget->isThumb()) {5633if (Val <= 255) // MOV5634return ForCodesize ? 2 : 1;5635if (Subtarget->hasV6T2Ops() && (Val <= 0xffff || // MOV5636ARM_AM::getT2SOImmVal(Val) != -1 || // MOVW5637ARM_AM::getT2SOImmVal(~Val) != -1)) // MVN5638return ForCodesize ? 4 : 1;5639if (Val <= 510) // MOV + ADDi85640return ForCodesize ? 4 : 2;5641if (~Val <= 255) // MOV + MVN5642return ForCodesize ? 4 : 2;5643if (ARM_AM::isThumbImmShiftedVal(Val)) // MOV + LSL5644return ForCodesize ? 4 : 2;5645} else {5646if (ARM_AM::getSOImmVal(Val) != -1) // MOV5647return ForCodesize ? 4 : 1;5648if (ARM_AM::getSOImmVal(~Val) != -1) // MVN5649return ForCodesize ? 4 : 1;5650if (Subtarget->hasV6T2Ops() && Val <= 0xffff) // MOVW5651return ForCodesize ? 4 : 1;5652if (ARM_AM::isSOImmTwoPartVal(Val)) // two instrs5653return ForCodesize ? 8 : 2;5654if (ARM_AM::isSOImmTwoPartValNeg(Val)) // two instrs5655return ForCodesize ? 8 : 2;5656}5657if (Subtarget->useMovt()) // MOVW + MOVT5658return ForCodesize ? 8 : 2;5659return ForCodesize ? 8 : 3; // Literal pool load5660}56615662bool llvm::HasLowerConstantMaterializationCost(unsigned Val1, unsigned Val2,5663const ARMSubtarget *Subtarget,5664bool ForCodesize) {5665// Check with ForCodesize5666unsigned Cost1 = ConstantMaterializationCost(Val1, Subtarget, ForCodesize);5667unsigned Cost2 = ConstantMaterializationCost(Val2, Subtarget, ForCodesize);5668if (Cost1 < Cost2)5669return true;5670if (Cost1 > Cost2)5671return false;56725673// If they are equal, try with !ForCodesize5674return ConstantMaterializationCost(Val1, Subtarget, !ForCodesize) <5675ConstantMaterializationCost(Val2, Subtarget, !ForCodesize);5676}56775678/// Constants defining how certain sequences should be outlined.5679/// This encompasses how an outlined function should be called, and what kind of5680/// frame should be emitted for that outlined function.5681///5682/// \p MachineOutlinerTailCall implies that the function is being created from5683/// a sequence of instructions ending in a return.5684///5685/// That is,5686///5687/// I1 OUTLINED_FUNCTION:5688/// I2 --> B OUTLINED_FUNCTION I15689/// BX LR I25690/// BX LR5691///5692/// +-------------------------+--------+-----+5693/// | | Thumb2 | ARM |5694/// +-------------------------+--------+-----+5695/// | Call overhead in Bytes | 4 | 4 |5696/// | Frame overhead in Bytes | 0 | 0 |5697/// | Stack fixup required | No | No |5698/// +-------------------------+--------+-----+5699///5700/// \p MachineOutlinerThunk implies that the function is being created from5701/// a sequence of instructions ending in a call. The outlined function is5702/// called with a BL instruction, and the outlined function tail-calls the5703/// original call destination.5704///5705/// That is,5706///5707/// I1 OUTLINED_FUNCTION:5708/// I2 --> BL OUTLINED_FUNCTION I15709/// BL f I25710/// B f5711///5712/// +-------------------------+--------+-----+5713/// | | Thumb2 | ARM |5714/// +-------------------------+--------+-----+5715/// | Call overhead in Bytes | 4 | 4 |5716/// | Frame overhead in Bytes | 0 | 0 |5717/// | Stack fixup required | No | No |5718/// +-------------------------+--------+-----+5719///5720/// \p MachineOutlinerNoLRSave implies that the function should be called using5721/// a BL instruction, but doesn't require LR to be saved and restored. This5722/// happens when LR is known to be dead.5723///5724/// That is,5725///5726/// I1 OUTLINED_FUNCTION:5727/// I2 --> BL OUTLINED_FUNCTION I15728/// I3 I25729/// I35730/// BX LR5731///5732/// +-------------------------+--------+-----+5733/// | | Thumb2 | ARM |5734/// +-------------------------+--------+-----+5735/// | Call overhead in Bytes | 4 | 4 |5736/// | Frame overhead in Bytes | 2 | 4 |5737/// | Stack fixup required | No | No |5738/// +-------------------------+--------+-----+5739///5740/// \p MachineOutlinerRegSave implies that the function should be called with a5741/// save and restore of LR to an available register. This allows us to avoid5742/// stack fixups. Note that this outlining variant is compatible with the5743/// NoLRSave case.5744///5745/// That is,5746///5747/// I1 Save LR OUTLINED_FUNCTION:5748/// I2 --> BL OUTLINED_FUNCTION I15749/// I3 Restore LR I25750/// I35751/// BX LR5752///5753/// +-------------------------+--------+-----+5754/// | | Thumb2 | ARM |5755/// +-------------------------+--------+-----+5756/// | Call overhead in Bytes | 8 | 12 |5757/// | Frame overhead in Bytes | 2 | 4 |5758/// | Stack fixup required | No | No |5759/// +-------------------------+--------+-----+5760///5761/// \p MachineOutlinerDefault implies that the function should be called with5762/// a save and restore of LR to the stack.5763///5764/// That is,5765///5766/// I1 Save LR OUTLINED_FUNCTION:5767/// I2 --> BL OUTLINED_FUNCTION I15768/// I3 Restore LR I25769/// I35770/// BX LR5771///5772/// +-------------------------+--------+-----+5773/// | | Thumb2 | ARM |5774/// +-------------------------+--------+-----+5775/// | Call overhead in Bytes | 8 | 12 |5776/// | Frame overhead in Bytes | 2 | 4 |5777/// | Stack fixup required | Yes | Yes |5778/// +-------------------------+--------+-----+57795780enum MachineOutlinerClass {5781MachineOutlinerTailCall,5782MachineOutlinerThunk,5783MachineOutlinerNoLRSave,5784MachineOutlinerRegSave,5785MachineOutlinerDefault5786};57875788enum MachineOutlinerMBBFlags {5789LRUnavailableSomewhere = 0x2,5790HasCalls = 0x4,5791UnsafeRegsDead = 0x85792};57935794struct OutlinerCosts {5795int CallTailCall;5796int FrameTailCall;5797int CallThunk;5798int FrameThunk;5799int CallNoLRSave;5800int FrameNoLRSave;5801int CallRegSave;5802int FrameRegSave;5803int CallDefault;5804int FrameDefault;5805int SaveRestoreLROnStack;58065807OutlinerCosts(const ARMSubtarget &target)5808: CallTailCall(target.isThumb() ? 4 : 4),5809FrameTailCall(target.isThumb() ? 0 : 0),5810CallThunk(target.isThumb() ? 4 : 4),5811FrameThunk(target.isThumb() ? 0 : 0),5812CallNoLRSave(target.isThumb() ? 4 : 4),5813FrameNoLRSave(target.isThumb() ? 2 : 4),5814CallRegSave(target.isThumb() ? 8 : 12),5815FrameRegSave(target.isThumb() ? 2 : 4),5816CallDefault(target.isThumb() ? 8 : 12),5817FrameDefault(target.isThumb() ? 2 : 4),5818SaveRestoreLROnStack(target.isThumb() ? 8 : 8) {}5819};58205821Register5822ARMBaseInstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {5823MachineFunction *MF = C.getMF();5824const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();5825const ARMBaseRegisterInfo *ARI =5826static_cast<const ARMBaseRegisterInfo *>(&TRI);58275828BitVector regsReserved = ARI->getReservedRegs(*MF);5829// Check if there is an available register across the sequence that we can5830// use.5831for (Register Reg : ARM::rGPRRegClass) {5832if (!(Reg < regsReserved.size() && regsReserved.test(Reg)) &&5833Reg != ARM::LR && // LR is not reserved, but don't use it.5834Reg != ARM::R12 && // R12 is not guaranteed to be preserved.5835C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&5836C.isAvailableInsideSeq(Reg, TRI))5837return Reg;5838}5839return Register();5840}58415842// Compute liveness of LR at the point after the interval [I, E), which5843// denotes a *backward* iteration through instructions. Used only for return5844// basic blocks, which do not end with a tail call.5845static bool isLRAvailable(const TargetRegisterInfo &TRI,5846MachineBasicBlock::reverse_iterator I,5847MachineBasicBlock::reverse_iterator E) {5848// At the end of the function LR dead.5849bool Live = false;5850for (; I != E; ++I) {5851const MachineInstr &MI = *I;58525853// Check defs of LR.5854if (MI.modifiesRegister(ARM::LR, &TRI))5855Live = false;58565857// Check uses of LR.5858unsigned Opcode = MI.getOpcode();5859if (Opcode == ARM::BX_RET || Opcode == ARM::MOVPCLR ||5860Opcode == ARM::SUBS_PC_LR || Opcode == ARM::tBX_RET ||5861Opcode == ARM::tBXNS_RET) {5862// These instructions use LR, but it's not an (explicit or implicit)5863// operand.5864Live = true;5865continue;5866}5867if (MI.readsRegister(ARM::LR, &TRI))5868Live = true;5869}5870return !Live;5871}58725873std::optional<outliner::OutlinedFunction>5874ARMBaseInstrInfo::getOutliningCandidateInfo(5875std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {5876unsigned SequenceSize = 0;5877for (auto &MI : RepeatedSequenceLocs[0])5878SequenceSize += getInstSizeInBytes(MI);58795880// Properties about candidate MBBs that hold for all of them.5881unsigned FlagsSetInAll = 0xF;58825883// Compute liveness information for each candidate, and set FlagsSetInAll.5884const TargetRegisterInfo &TRI = getRegisterInfo();5885for (outliner::Candidate &C : RepeatedSequenceLocs)5886FlagsSetInAll &= C.Flags;58875888// According to the ARM Procedure Call Standard, the following are5889// undefined on entry/exit from a function call:5890//5891// * Register R12(IP),5892// * Condition codes (and thus the CPSR register)5893//5894// Since we control the instructions which are part of the outlined regions5895// we don't need to be fully compliant with the AAPCS, but we have to5896// guarantee that if a veneer is inserted at link time the code is still5897// correct. Because of this, we can't outline any sequence of instructions5898// where one of these registers is live into/across it. Thus, we need to5899// delete those candidates.5900auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {5901// If the unsafe registers in this block are all dead, then we don't need5902// to compute liveness here.5903if (C.Flags & UnsafeRegsDead)5904return false;5905return C.isAnyUnavailableAcrossOrOutOfSeq({ARM::R12, ARM::CPSR}, TRI);5906};59075908// Are there any candidates where those registers are live?5909if (!(FlagsSetInAll & UnsafeRegsDead)) {5910// Erase every candidate that violates the restrictions above. (It could be5911// true that we have viable candidates, so it's not worth bailing out in5912// the case that, say, 1 out of 20 candidates violate the restructions.)5913llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);59145915// If the sequence doesn't have enough candidates left, then we're done.5916if (RepeatedSequenceLocs.size() < 2)5917return std::nullopt;5918}59195920// We expect the majority of the outlining candidates to be in consensus with5921// regard to return address sign and authentication, and branch target5922// enforcement, in other words, partitioning according to all the four5923// possible combinations of PAC-RET and BTI is going to yield one big subset5924// and three small (likely empty) subsets. That allows us to cull incompatible5925// candidates separately for PAC-RET and BTI.59265927// Partition the candidates in two sets: one with BTI enabled and one with BTI5928// disabled. Remove the candidates from the smaller set. If they are the same5929// number prefer the non-BTI ones for outlining, since they have less5930// overhead.5931auto NoBTI =5932llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {5933const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();5934return AFI.branchTargetEnforcement();5935});5936if (std::distance(RepeatedSequenceLocs.begin(), NoBTI) >5937std::distance(NoBTI, RepeatedSequenceLocs.end()))5938RepeatedSequenceLocs.erase(NoBTI, RepeatedSequenceLocs.end());5939else5940RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoBTI);59415942if (RepeatedSequenceLocs.size() < 2)5943return std::nullopt;59445945// Likewise, partition the candidates according to PAC-RET enablement.5946auto NoPAC =5947llvm::partition(RepeatedSequenceLocs, [](const outliner::Candidate &C) {5948const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();5949// If the function happens to not spill the LR, do not disqualify it5950// from the outlining.5951return AFI.shouldSignReturnAddress(true);5952});5953if (std::distance(RepeatedSequenceLocs.begin(), NoPAC) >5954std::distance(NoPAC, RepeatedSequenceLocs.end()))5955RepeatedSequenceLocs.erase(NoPAC, RepeatedSequenceLocs.end());5956else5957RepeatedSequenceLocs.erase(RepeatedSequenceLocs.begin(), NoPAC);59585959if (RepeatedSequenceLocs.size() < 2)5960return std::nullopt;59615962// At this point, we have only "safe" candidates to outline. Figure out5963// frame + call instruction information.59645965unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();59665967// Helper lambda which sets call information for every candidate.5968auto SetCandidateCallInfo =5969[&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {5970for (outliner::Candidate &C : RepeatedSequenceLocs)5971C.setCallInfo(CallID, NumBytesForCall);5972};59735974OutlinerCosts Costs(Subtarget);59755976const auto &SomeMFI =5977*RepeatedSequenceLocs.front().getMF()->getInfo<ARMFunctionInfo>();5978// Adjust costs to account for the BTI instructions.5979if (SomeMFI.branchTargetEnforcement()) {5980Costs.FrameDefault += 4;5981Costs.FrameNoLRSave += 4;5982Costs.FrameRegSave += 4;5983Costs.FrameTailCall += 4;5984Costs.FrameThunk += 4;5985}59865987// Adjust costs to account for sign and authentication instructions.5988if (SomeMFI.shouldSignReturnAddress(true)) {5989Costs.CallDefault += 8; // +PAC instr, +AUT instr5990Costs.SaveRestoreLROnStack += 8; // +PAC instr, +AUT instr5991}59925993unsigned FrameID = MachineOutlinerDefault;5994unsigned NumBytesToCreateFrame = Costs.FrameDefault;59955996// If the last instruction in any candidate is a terminator, then we should5997// tail call all of the candidates.5998if (RepeatedSequenceLocs[0].back().isTerminator()) {5999FrameID = MachineOutlinerTailCall;6000NumBytesToCreateFrame = Costs.FrameTailCall;6001SetCandidateCallInfo(MachineOutlinerTailCall, Costs.CallTailCall);6002} else if (LastInstrOpcode == ARM::BL || LastInstrOpcode == ARM::BLX ||6003LastInstrOpcode == ARM::BLX_noip || LastInstrOpcode == ARM::tBL ||6004LastInstrOpcode == ARM::tBLXr ||6005LastInstrOpcode == ARM::tBLXr_noip ||6006LastInstrOpcode == ARM::tBLXi) {6007FrameID = MachineOutlinerThunk;6008NumBytesToCreateFrame = Costs.FrameThunk;6009SetCandidateCallInfo(MachineOutlinerThunk, Costs.CallThunk);6010} else {6011// We need to decide how to emit calls + frames. We can always emit the same6012// frame if we don't need to save to the stack. If we have to save to the6013// stack, then we need a different frame.6014unsigned NumBytesNoStackCalls = 0;6015std::vector<outliner::Candidate> CandidatesWithoutStackFixups;60166017for (outliner::Candidate &C : RepeatedSequenceLocs) {6018// LR liveness is overestimated in return blocks, unless they end with a6019// tail call.6020const auto Last = C.getMBB()->rbegin();6021const bool LRIsAvailable =6022C.getMBB()->isReturnBlock() && !Last->isCall()6023? isLRAvailable(TRI, Last,6024(MachineBasicBlock::reverse_iterator)C.begin())6025: C.isAvailableAcrossAndOutOfSeq(ARM::LR, TRI);6026if (LRIsAvailable) {6027FrameID = MachineOutlinerNoLRSave;6028NumBytesNoStackCalls += Costs.CallNoLRSave;6029C.setCallInfo(MachineOutlinerNoLRSave, Costs.CallNoLRSave);6030CandidatesWithoutStackFixups.push_back(C);6031}60326033// Is an unused register available? If so, we won't modify the stack, so6034// we can outline with the same frame type as those that don't save LR.6035else if (findRegisterToSaveLRTo(C)) {6036FrameID = MachineOutlinerRegSave;6037NumBytesNoStackCalls += Costs.CallRegSave;6038C.setCallInfo(MachineOutlinerRegSave, Costs.CallRegSave);6039CandidatesWithoutStackFixups.push_back(C);6040}60416042// Is SP used in the sequence at all? If not, we don't have to modify6043// the stack, so we are guaranteed to get the same frame.6044else if (C.isAvailableInsideSeq(ARM::SP, TRI)) {6045NumBytesNoStackCalls += Costs.CallDefault;6046C.setCallInfo(MachineOutlinerDefault, Costs.CallDefault);6047CandidatesWithoutStackFixups.push_back(C);6048}60496050// If we outline this, we need to modify the stack. Pretend we don't6051// outline this by saving all of its bytes.6052else6053NumBytesNoStackCalls += SequenceSize;6054}60556056// If there are no places where we have to save LR, then note that we don't6057// have to update the stack. Otherwise, give every candidate the default6058// call type6059if (NumBytesNoStackCalls <=6060RepeatedSequenceLocs.size() * Costs.CallDefault) {6061RepeatedSequenceLocs = CandidatesWithoutStackFixups;6062FrameID = MachineOutlinerNoLRSave;6063if (RepeatedSequenceLocs.size() < 2)6064return std::nullopt;6065} else6066SetCandidateCallInfo(MachineOutlinerDefault, Costs.CallDefault);6067}60686069// Does every candidate's MBB contain a call? If so, then we might have a6070// call in the range.6071if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {6072// check if the range contains a call. These require a save + restore of6073// the link register.6074outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];6075if (std::any_of(FirstCand.begin(), std::prev(FirstCand.end()),6076[](const MachineInstr &MI) { return MI.isCall(); }))6077NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;60786079// Handle the last instruction separately. If it is tail call, then the6080// last instruction is a call, we don't want to save + restore in this6081// case. However, it could be possible that the last instruction is a6082// call without it being valid to tail call this sequence. We should6083// consider this as well.6084else if (FrameID != MachineOutlinerThunk &&6085FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())6086NumBytesToCreateFrame += Costs.SaveRestoreLROnStack;6087}60886089return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,6090NumBytesToCreateFrame, FrameID);6091}60926093bool ARMBaseInstrInfo::checkAndUpdateStackOffset(MachineInstr *MI,6094int64_t Fixup,6095bool Updt) const {6096int SPIdx = MI->findRegisterUseOperandIdx(ARM::SP, /*TRI=*/nullptr);6097unsigned AddrMode = (MI->getDesc().TSFlags & ARMII::AddrModeMask);6098if (SPIdx < 0)6099// No SP operand6100return true;6101else if (SPIdx != 1 && (AddrMode != ARMII::AddrModeT2_i8s4 || SPIdx != 2))6102// If SP is not the base register we can't do much6103return false;61046105// Stack might be involved but addressing mode doesn't handle any offset.6106// Rq: AddrModeT1_[1|2|4] don't operate on SP6107if (AddrMode == ARMII::AddrMode1 || // Arithmetic instructions6108AddrMode == ARMII::AddrMode4 || // Load/Store Multiple6109AddrMode == ARMII::AddrMode6 || // Neon Load/Store Multiple6110AddrMode == ARMII::AddrModeT2_so || // SP can't be used as based register6111AddrMode == ARMII::AddrModeT2_pc || // PCrel access6112AddrMode == ARMII::AddrMode2 || // Used by PRE and POST indexed LD/ST6113AddrMode == ARMII::AddrModeT2_i7 || // v8.1-M MVE6114AddrMode == ARMII::AddrModeT2_i7s2 || // v8.1-M MVE6115AddrMode == ARMII::AddrModeT2_i7s4 || // v8.1-M sys regs VLDR/VSTR6116AddrMode == ARMII::AddrModeNone ||6117AddrMode == ARMII::AddrModeT2_i8 || // Pre/Post inc instructions6118AddrMode == ARMII::AddrModeT2_i8neg) // Always negative imm6119return false;61206121unsigned NumOps = MI->getDesc().getNumOperands();6122unsigned ImmIdx = NumOps - 3;61236124const MachineOperand &Offset = MI->getOperand(ImmIdx);6125assert(Offset.isImm() && "Is not an immediate");6126int64_t OffVal = Offset.getImm();61276128if (OffVal < 0)6129// Don't override data if the are below SP.6130return false;61316132unsigned NumBits = 0;6133unsigned Scale = 1;61346135switch (AddrMode) {6136case ARMII::AddrMode3:6137if (ARM_AM::getAM3Op(OffVal) == ARM_AM::sub)6138return false;6139OffVal = ARM_AM::getAM3Offset(OffVal);6140NumBits = 8;6141break;6142case ARMII::AddrMode5:6143if (ARM_AM::getAM5Op(OffVal) == ARM_AM::sub)6144return false;6145OffVal = ARM_AM::getAM5Offset(OffVal);6146NumBits = 8;6147Scale = 4;6148break;6149case ARMII::AddrMode5FP16:6150if (ARM_AM::getAM5FP16Op(OffVal) == ARM_AM::sub)6151return false;6152OffVal = ARM_AM::getAM5FP16Offset(OffVal);6153NumBits = 8;6154Scale = 2;6155break;6156case ARMII::AddrModeT2_i8pos:6157NumBits = 8;6158break;6159case ARMII::AddrModeT2_i8s4:6160// FIXME: Values are already scaled in this addressing mode.6161assert((Fixup & 3) == 0 && "Can't encode this offset!");6162NumBits = 10;6163break;6164case ARMII::AddrModeT2_ldrex:6165NumBits = 8;6166Scale = 4;6167break;6168case ARMII::AddrModeT2_i12:6169case ARMII::AddrMode_i12:6170NumBits = 12;6171break;6172case ARMII::AddrModeT1_s: // SP-relative LD/ST6173NumBits = 8;6174Scale = 4;6175break;6176default:6177llvm_unreachable("Unsupported addressing mode!");6178}6179// Make sure the offset is encodable for instructions that scale the6180// immediate.6181assert(((OffVal * Scale + Fixup) & (Scale - 1)) == 0 &&6182"Can't encode this offset!");6183OffVal += Fixup / Scale;61846185unsigned Mask = (1 << NumBits) - 1;61866187if (OffVal <= Mask) {6188if (Updt)6189MI->getOperand(ImmIdx).setImm(OffVal);6190return true;6191}61926193return false;6194}61956196void ARMBaseInstrInfo::mergeOutliningCandidateAttributes(6197Function &F, std::vector<outliner::Candidate> &Candidates) const {6198outliner::Candidate &C = Candidates.front();6199// branch-target-enforcement is guaranteed to be consistent between all6200// candidates, so we only need to look at one.6201const Function &CFn = C.getMF()->getFunction();6202if (CFn.hasFnAttribute("branch-target-enforcement"))6203F.addFnAttr(CFn.getFnAttribute("branch-target-enforcement"));62046205ARMGenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);6206}62076208bool ARMBaseInstrInfo::isFunctionSafeToOutlineFrom(6209MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {6210const Function &F = MF.getFunction();62116212// Can F be deduplicated by the linker? If it can, don't outline from it.6213if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())6214return false;62156216// Don't outline from functions with section markings; the program could6217// expect that all the code is in the named section.6218// FIXME: Allow outlining from multiple functions with the same section6219// marking.6220if (F.hasSection())6221return false;62226223// FIXME: Thumb1 outlining is not handled6224if (MF.getInfo<ARMFunctionInfo>()->isThumb1OnlyFunction())6225return false;62266227// It's safe to outline from MF.6228return true;6229}62306231bool ARMBaseInstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,6232unsigned &Flags) const {6233// Check if LR is available through all of the MBB. If it's not, then set6234// a flag.6235assert(MBB.getParent()->getRegInfo().tracksLiveness() &&6236"Suitable Machine Function for outlining must track liveness");62376238LiveRegUnits LRU(getRegisterInfo());62396240for (MachineInstr &MI : llvm::reverse(MBB))6241LRU.accumulate(MI);62426243// Check if each of the unsafe registers are available...6244bool R12AvailableInBlock = LRU.available(ARM::R12);6245bool CPSRAvailableInBlock = LRU.available(ARM::CPSR);62466247// If all of these are dead (and not live out), we know we don't have to check6248// them later.6249if (R12AvailableInBlock && CPSRAvailableInBlock)6250Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;62516252// Now, add the live outs to the set.6253LRU.addLiveOuts(MBB);62546255// If any of these registers is available in the MBB, but also a live out of6256// the block, then we know outlining is unsafe.6257if (R12AvailableInBlock && !LRU.available(ARM::R12))6258return false;6259if (CPSRAvailableInBlock && !LRU.available(ARM::CPSR))6260return false;62616262// Check if there's a call inside this MachineBasicBlock. If there is, then6263// set a flag.6264if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))6265Flags |= MachineOutlinerMBBFlags::HasCalls;62666267// LR liveness is overestimated in return blocks.62686269bool LRIsAvailable =6270MBB.isReturnBlock() && !MBB.back().isCall()6271? isLRAvailable(getRegisterInfo(), MBB.rbegin(), MBB.rend())6272: LRU.available(ARM::LR);6273if (!LRIsAvailable)6274Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;62756276return true;6277}62786279outliner::InstrType6280ARMBaseInstrInfo::getOutliningTypeImpl(MachineBasicBlock::iterator &MIT,6281unsigned Flags) const {6282MachineInstr &MI = *MIT;6283const TargetRegisterInfo *TRI = &getRegisterInfo();62846285// PIC instructions contain labels, outlining them would break offset6286// computing. unsigned Opc = MI.getOpcode();6287unsigned Opc = MI.getOpcode();6288if (Opc == ARM::tPICADD || Opc == ARM::PICADD || Opc == ARM::PICSTR ||6289Opc == ARM::PICSTRB || Opc == ARM::PICSTRH || Opc == ARM::PICLDR ||6290Opc == ARM::PICLDRB || Opc == ARM::PICLDRH || Opc == ARM::PICLDRSB ||6291Opc == ARM::PICLDRSH || Opc == ARM::t2LDRpci_pic ||6292Opc == ARM::t2MOVi16_ga_pcrel || Opc == ARM::t2MOVTi16_ga_pcrel ||6293Opc == ARM::t2MOV_ga_pcrel)6294return outliner::InstrType::Illegal;62956296// Be conservative with ARMv8.1 MVE instructions.6297if (Opc == ARM::t2BF_LabelPseudo || Opc == ARM::t2DoLoopStart ||6298Opc == ARM::t2DoLoopStartTP || Opc == ARM::t2WhileLoopStart ||6299Opc == ARM::t2WhileLoopStartLR || Opc == ARM::t2WhileLoopStartTP ||6300Opc == ARM::t2LoopDec || Opc == ARM::t2LoopEnd ||6301Opc == ARM::t2LoopEndDec)6302return outliner::InstrType::Illegal;63036304const MCInstrDesc &MCID = MI.getDesc();6305uint64_t MIFlags = MCID.TSFlags;6306if ((MIFlags & ARMII::DomainMask) == ARMII::DomainMVE)6307return outliner::InstrType::Illegal;63086309// Is this a terminator for a basic block?6310if (MI.isTerminator())6311// TargetInstrInfo::getOutliningType has already filtered out anything6312// that would break this, so we can allow it here.6313return outliner::InstrType::Legal;63146315// Don't outline if link register or program counter value are used.6316if (MI.readsRegister(ARM::LR, TRI) || MI.readsRegister(ARM::PC, TRI))6317return outliner::InstrType::Illegal;63186319if (MI.isCall()) {6320// Get the function associated with the call. Look at each operand and find6321// the one that represents the calle and get its name.6322const Function *Callee = nullptr;6323for (const MachineOperand &MOP : MI.operands()) {6324if (MOP.isGlobal()) {6325Callee = dyn_cast<Function>(MOP.getGlobal());6326break;6327}6328}63296330// Dont't outline calls to "mcount" like functions, in particular Linux6331// kernel function tracing relies on it.6332if (Callee &&6333(Callee->getName() == "\01__gnu_mcount_nc" ||6334Callee->getName() == "\01mcount" || Callee->getName() == "__mcount"))6335return outliner::InstrType::Illegal;63366337// If we don't know anything about the callee, assume it depends on the6338// stack layout of the caller. In that case, it's only legal to outline6339// as a tail-call. Explicitly list the call instructions we know about so6340// we don't get unexpected results with call pseudo-instructions.6341auto UnknownCallOutlineType = outliner::InstrType::Illegal;6342if (Opc == ARM::BL || Opc == ARM::tBL || Opc == ARM::BLX ||6343Opc == ARM::BLX_noip || Opc == ARM::tBLXr || Opc == ARM::tBLXr_noip ||6344Opc == ARM::tBLXi)6345UnknownCallOutlineType = outliner::InstrType::LegalTerminator;63466347if (!Callee)6348return UnknownCallOutlineType;63496350// We have a function we have information about. Check if it's something we6351// can safely outline.6352MachineFunction *MF = MI.getParent()->getParent();6353MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);63546355// We don't know what's going on with the callee at all. Don't touch it.6356if (!CalleeMF)6357return UnknownCallOutlineType;63586359// Check if we know anything about the callee saves on the function. If we6360// don't, then don't touch it, since that implies that we haven't computed6361// anything about its stack frame yet.6362MachineFrameInfo &MFI = CalleeMF->getFrameInfo();6363if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||6364MFI.getNumObjects() > 0)6365return UnknownCallOutlineType;63666367// At this point, we can say that CalleeMF ought to not pass anything on the6368// stack. Therefore, we can outline it.6369return outliner::InstrType::Legal;6370}63716372// Since calls are handled, don't touch LR or PC6373if (MI.modifiesRegister(ARM::LR, TRI) || MI.modifiesRegister(ARM::PC, TRI))6374return outliner::InstrType::Illegal;63756376// Does this use the stack?6377if (MI.modifiesRegister(ARM::SP, TRI) || MI.readsRegister(ARM::SP, TRI)) {6378// True if there is no chance that any outlined candidate from this range6379// could require stack fixups. That is, both6380// * LR is available in the range (No save/restore around call)6381// * The range doesn't include calls (No save/restore in outlined frame)6382// are true.6383// These conditions also ensure correctness of the return address6384// authentication - we insert sign and authentication instructions only if6385// we save/restore LR on stack, but then this condition ensures that the6386// outlined range does not modify the SP, therefore the SP value used for6387// signing is the same as the one used for authentication.6388// FIXME: This is very restrictive; the flags check the whole block,6389// not just the bit we will try to outline.6390bool MightNeedStackFixUp =6391(Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |6392MachineOutlinerMBBFlags::HasCalls));63936394if (!MightNeedStackFixUp)6395return outliner::InstrType::Legal;63966397// Any modification of SP will break our code to save/restore LR.6398// FIXME: We could handle some instructions which add a constant offset to6399// SP, with a bit more work.6400if (MI.modifiesRegister(ARM::SP, TRI))6401return outliner::InstrType::Illegal;64026403// At this point, we have a stack instruction that we might need to fix up.6404// up. We'll handle it if it's a load or store.6405if (checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(),6406false))6407return outliner::InstrType::Legal;64086409// We can't fix it up, so don't outline it.6410return outliner::InstrType::Illegal;6411}64126413// Be conservative with IT blocks.6414if (MI.readsRegister(ARM::ITSTATE, TRI) ||6415MI.modifiesRegister(ARM::ITSTATE, TRI))6416return outliner::InstrType::Illegal;64176418// Don't outline CFI instructions.6419if (MI.isCFIInstruction())6420return outliner::InstrType::Illegal;64216422return outliner::InstrType::Legal;6423}64246425void ARMBaseInstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {6426for (MachineInstr &MI : MBB) {6427checkAndUpdateStackOffset(&MI, Subtarget.getStackAlignment().value(), true);6428}6429}64306431void ARMBaseInstrInfo::saveLROnStack(MachineBasicBlock &MBB,6432MachineBasicBlock::iterator It, bool CFI,6433bool Auth) const {6434int Align = std::max(Subtarget.getStackAlignment().value(), uint64_t(8));6435unsigned MIFlags = CFI ? MachineInstr::FrameSetup : 0;6436assert(Align >= 8 && Align <= 256);6437if (Auth) {6438assert(Subtarget.isThumb2());6439// Compute PAC in R12. Outlining ensures R12 is dead across the outlined6440// sequence.6441BuildMI(MBB, It, DebugLoc(), get(ARM::t2PAC)).setMIFlags(MIFlags);6442BuildMI(MBB, It, DebugLoc(), get(ARM::t2STRD_PRE), ARM::SP)6443.addReg(ARM::R12, RegState::Kill)6444.addReg(ARM::LR, RegState::Kill)6445.addReg(ARM::SP)6446.addImm(-Align)6447.add(predOps(ARMCC::AL))6448.setMIFlags(MIFlags);6449} else {6450unsigned Opc = Subtarget.isThumb() ? ARM::t2STR_PRE : ARM::STR_PRE_IMM;6451BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::SP)6452.addReg(ARM::LR, RegState::Kill)6453.addReg(ARM::SP)6454.addImm(-Align)6455.add(predOps(ARMCC::AL))6456.setMIFlags(MIFlags);6457}64586459if (!CFI)6460return;64616462MachineFunction &MF = *MBB.getParent();64636464// Add a CFI, saying CFA is offset by Align bytes from SP.6465int64_t StackPosEntry =6466MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, Align));6467BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))6468.addCFIIndex(StackPosEntry)6469.setMIFlags(MachineInstr::FrameSetup);64706471// Add a CFI saying that the LR that we want to find is now higher than6472// before.6473int LROffset = Auth ? Align - 4 : Align;6474const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();6475unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);6476int64_t LRPosEntry = MF.addFrameInst(6477MCCFIInstruction::createOffset(nullptr, DwarfLR, -LROffset));6478BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))6479.addCFIIndex(LRPosEntry)6480.setMIFlags(MachineInstr::FrameSetup);6481if (Auth) {6482// Add a CFI for the location of the return adddress PAC.6483unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);6484int64_t RACPosEntry = MF.addFrameInst(6485MCCFIInstruction::createOffset(nullptr, DwarfRAC, -Align));6486BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))6487.addCFIIndex(RACPosEntry)6488.setMIFlags(MachineInstr::FrameSetup);6489}6490}64916492void ARMBaseInstrInfo::emitCFIForLRSaveToReg(MachineBasicBlock &MBB,6493MachineBasicBlock::iterator It,6494Register Reg) const {6495MachineFunction &MF = *MBB.getParent();6496const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();6497unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);6498unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);64996500int64_t LRPosEntry = MF.addFrameInst(6501MCCFIInstruction::createRegister(nullptr, DwarfLR, DwarfReg));6502BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))6503.addCFIIndex(LRPosEntry)6504.setMIFlags(MachineInstr::FrameSetup);6505}65066507void ARMBaseInstrInfo::restoreLRFromStack(MachineBasicBlock &MBB,6508MachineBasicBlock::iterator It,6509bool CFI, bool Auth) const {6510int Align = Subtarget.getStackAlignment().value();6511unsigned MIFlags = CFI ? MachineInstr::FrameDestroy : 0;6512if (Auth) {6513assert(Subtarget.isThumb2());6514// Restore return address PAC and LR.6515BuildMI(MBB, It, DebugLoc(), get(ARM::t2LDRD_POST))6516.addReg(ARM::R12, RegState::Define)6517.addReg(ARM::LR, RegState::Define)6518.addReg(ARM::SP, RegState::Define)6519.addReg(ARM::SP)6520.addImm(Align)6521.add(predOps(ARMCC::AL))6522.setMIFlags(MIFlags);6523// LR authentication is after the CFI instructions, below.6524} else {6525unsigned Opc = Subtarget.isThumb() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;6526MachineInstrBuilder MIB = BuildMI(MBB, It, DebugLoc(), get(Opc), ARM::LR)6527.addReg(ARM::SP, RegState::Define)6528.addReg(ARM::SP);6529if (!Subtarget.isThumb())6530MIB.addReg(0);6531MIB.addImm(Subtarget.getStackAlignment().value())6532.add(predOps(ARMCC::AL))6533.setMIFlags(MIFlags);6534}65356536if (CFI) {6537// Now stack has moved back up...6538MachineFunction &MF = *MBB.getParent();6539const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();6540unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);6541int64_t StackPosEntry =6542MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));6543BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))6544.addCFIIndex(StackPosEntry)6545.setMIFlags(MachineInstr::FrameDestroy);65466547// ... and we have restored LR.6548int64_t LRPosEntry =6549MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));6550BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))6551.addCFIIndex(LRPosEntry)6552.setMIFlags(MachineInstr::FrameDestroy);65536554if (Auth) {6555unsigned DwarfRAC = MRI->getDwarfRegNum(ARM::RA_AUTH_CODE, true);6556int64_t Entry =6557MF.addFrameInst(MCCFIInstruction::createUndefined(nullptr, DwarfRAC));6558BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))6559.addCFIIndex(Entry)6560.setMIFlags(MachineInstr::FrameDestroy);6561}6562}65636564if (Auth)6565BuildMI(MBB, It, DebugLoc(), get(ARM::t2AUT));6566}65676568void ARMBaseInstrInfo::emitCFIForLRRestoreFromReg(6569MachineBasicBlock &MBB, MachineBasicBlock::iterator It) const {6570MachineFunction &MF = *MBB.getParent();6571const MCRegisterInfo *MRI = Subtarget.getRegisterInfo();6572unsigned DwarfLR = MRI->getDwarfRegNum(ARM::LR, true);65736574int64_t LRPosEntry =6575MF.addFrameInst(MCCFIInstruction::createRestore(nullptr, DwarfLR));6576BuildMI(MBB, It, DebugLoc(), get(ARM::CFI_INSTRUCTION))6577.addCFIIndex(LRPosEntry)6578.setMIFlags(MachineInstr::FrameDestroy);6579}65806581void ARMBaseInstrInfo::buildOutlinedFrame(6582MachineBasicBlock &MBB, MachineFunction &MF,6583const outliner::OutlinedFunction &OF) const {6584// For thunk outlining, rewrite the last instruction from a call to a6585// tail-call.6586if (OF.FrameConstructionID == MachineOutlinerThunk) {6587MachineInstr *Call = &*--MBB.instr_end();6588bool isThumb = Subtarget.isThumb();6589unsigned FuncOp = isThumb ? 2 : 0;6590unsigned Opc = Call->getOperand(FuncOp).isReg()6591? isThumb ? ARM::tTAILJMPr : ARM::TAILJMPr6592: isThumb ? Subtarget.isTargetMachO() ? ARM::tTAILJMPd6593: ARM::tTAILJMPdND6594: ARM::TAILJMPd;6595MachineInstrBuilder MIB = BuildMI(MBB, MBB.end(), DebugLoc(), get(Opc))6596.add(Call->getOperand(FuncOp));6597if (isThumb && !Call->getOperand(FuncOp).isReg())6598MIB.add(predOps(ARMCC::AL));6599Call->eraseFromParent();6600}66016602// Is there a call in the outlined range?6603auto IsNonTailCall = [](MachineInstr &MI) {6604return MI.isCall() && !MI.isReturn();6605};6606if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {6607MachineBasicBlock::iterator It = MBB.begin();6608MachineBasicBlock::iterator Et = MBB.end();66096610if (OF.FrameConstructionID == MachineOutlinerTailCall ||6611OF.FrameConstructionID == MachineOutlinerThunk)6612Et = std::prev(MBB.end());66136614// We have to save and restore LR, we need to add it to the liveins if it6615// is not already part of the set. This is suffient since outlined6616// functions only have one block.6617if (!MBB.isLiveIn(ARM::LR))6618MBB.addLiveIn(ARM::LR);66196620// Insert a save before the outlined region6621bool Auth = OF.Candidates.front()6622.getMF()6623->getInfo<ARMFunctionInfo>()6624->shouldSignReturnAddress(true);6625saveLROnStack(MBB, It, true, Auth);66266627// Fix up the instructions in the range, since we're going to modify the6628// stack.6629assert(OF.FrameConstructionID != MachineOutlinerDefault &&6630"Can only fix up stack references once");6631fixupPostOutline(MBB);66326633// Insert a restore before the terminator for the function. Restore LR.6634restoreLRFromStack(MBB, Et, true, Auth);6635}66366637// If this is a tail call outlined function, then there's already a return.6638if (OF.FrameConstructionID == MachineOutlinerTailCall ||6639OF.FrameConstructionID == MachineOutlinerThunk)6640return;66416642// Here we have to insert the return ourselves. Get the correct opcode from6643// current feature set.6644BuildMI(MBB, MBB.end(), DebugLoc(), get(Subtarget.getReturnOpcode()))6645.add(predOps(ARMCC::AL));66466647// Did we have to modify the stack by saving the link register?6648if (OF.FrameConstructionID != MachineOutlinerDefault &&6649OF.Candidates[0].CallConstructionID != MachineOutlinerDefault)6650return;66516652// We modified the stack.6653// Walk over the basic block and fix up all the stack accesses.6654fixupPostOutline(MBB);6655}66566657MachineBasicBlock::iterator ARMBaseInstrInfo::insertOutlinedCall(6658Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,6659MachineFunction &MF, outliner::Candidate &C) const {6660MachineInstrBuilder MIB;6661MachineBasicBlock::iterator CallPt;6662unsigned Opc;6663bool isThumb = Subtarget.isThumb();66646665// Are we tail calling?6666if (C.CallConstructionID == MachineOutlinerTailCall) {6667// If yes, then we can just branch to the label.6668Opc = isThumb6669? Subtarget.isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND6670: ARM::TAILJMPd;6671MIB = BuildMI(MF, DebugLoc(), get(Opc))6672.addGlobalAddress(M.getNamedValue(MF.getName()));6673if (isThumb)6674MIB.add(predOps(ARMCC::AL));6675It = MBB.insert(It, MIB);6676return It;6677}66786679// Create the call instruction.6680Opc = isThumb ? ARM::tBL : ARM::BL;6681MachineInstrBuilder CallMIB = BuildMI(MF, DebugLoc(), get(Opc));6682if (isThumb)6683CallMIB.add(predOps(ARMCC::AL));6684CallMIB.addGlobalAddress(M.getNamedValue(MF.getName()));66856686if (C.CallConstructionID == MachineOutlinerNoLRSave ||6687C.CallConstructionID == MachineOutlinerThunk) {6688// No, so just insert the call.6689It = MBB.insert(It, CallMIB);6690return It;6691}66926693const ARMFunctionInfo &AFI = *C.getMF()->getInfo<ARMFunctionInfo>();6694// Can we save to a register?6695if (C.CallConstructionID == MachineOutlinerRegSave) {6696Register Reg = findRegisterToSaveLRTo(C);6697assert(Reg != 0 && "No callee-saved register available?");66986699// Save and restore LR from that register.6700copyPhysReg(MBB, It, DebugLoc(), Reg, ARM::LR, true);6701if (!AFI.isLRSpilled())6702emitCFIForLRSaveToReg(MBB, It, Reg);6703CallPt = MBB.insert(It, CallMIB);6704copyPhysReg(MBB, It, DebugLoc(), ARM::LR, Reg, true);6705if (!AFI.isLRSpilled())6706emitCFIForLRRestoreFromReg(MBB, It);6707It--;6708return CallPt;6709}6710// We have the default case. Save and restore from SP.6711if (!MBB.isLiveIn(ARM::LR))6712MBB.addLiveIn(ARM::LR);6713bool Auth = !AFI.isLRSpilled() && AFI.shouldSignReturnAddress(true);6714saveLROnStack(MBB, It, !AFI.isLRSpilled(), Auth);6715CallPt = MBB.insert(It, CallMIB);6716restoreLRFromStack(MBB, It, !AFI.isLRSpilled(), Auth);6717It--;6718return CallPt;6719}67206721bool ARMBaseInstrInfo::shouldOutlineFromFunctionByDefault(6722MachineFunction &MF) const {6723return Subtarget.isMClass() && MF.getFunction().hasMinSize();6724}67256726bool ARMBaseInstrInfo::isReallyTriviallyReMaterializable(6727const MachineInstr &MI) const {6728// Try hard to rematerialize any VCTPs because if we spill P0, it will block6729// the tail predication conversion. This means that the element count6730// register has to be live for longer, but that has to be better than6731// spill/restore and VPT predication.6732return (isVCTP(&MI) && !isPredicated(MI)) ||6733TargetInstrInfo::isReallyTriviallyReMaterializable(MI);6734}67356736unsigned llvm::getBLXOpcode(const MachineFunction &MF) {6737return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_noip6738: ARM::BLX;6739}67406741unsigned llvm::gettBLXrOpcode(const MachineFunction &MF) {6742return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::tBLXr_noip6743: ARM::tBLXr;6744}67456746unsigned llvm::getBLXpredOpcode(const MachineFunction &MF) {6747return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip6748: ARM::BLX_pred;6749}67506751namespace {6752class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {6753MachineInstr *EndLoop, *LoopCount;6754MachineFunction *MF;6755const TargetInstrInfo *TII;67566757// Bitset[0 .. MAX_STAGES-1] ... iterations needed6758// [LAST_IS_USE] : last reference to register in schedule is a use6759// [SEEN_AS_LIVE] : Normal pressure algorithm believes register is live6760static int constexpr MAX_STAGES = 30;6761static int constexpr LAST_IS_USE = MAX_STAGES;6762static int constexpr SEEN_AS_LIVE = MAX_STAGES + 1;6763typedef std::bitset<MAX_STAGES + 2> IterNeed;6764typedef std::map<unsigned, IterNeed> IterNeeds;67656766void bumpCrossIterationPressure(RegPressureTracker &RPT,6767const IterNeeds &CIN);6768bool tooMuchRegisterPressure(SwingSchedulerDAG &SSD, SMSchedule &SMS);67696770// Meanings of the various stuff with loop types:6771// t2Bcc:6772// EndLoop = branch at end of original BB that will become a kernel6773// LoopCount = CC setter live into branch6774// t2LoopEnd:6775// EndLoop = branch at end of original BB6776// LoopCount = t2LoopDec6777public:6778ARMPipelinerLoopInfo(MachineInstr *EndLoop, MachineInstr *LoopCount)6779: EndLoop(EndLoop), LoopCount(LoopCount),6780MF(EndLoop->getParent()->getParent()),6781TII(MF->getSubtarget().getInstrInfo()) {}67826783bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {6784// Only ignore the terminator.6785return MI == EndLoop || MI == LoopCount;6786}67876788bool shouldUseSchedule(SwingSchedulerDAG &SSD, SMSchedule &SMS) override {6789if (tooMuchRegisterPressure(SSD, SMS))6790return false;67916792return true;6793}67946795std::optional<bool> createTripCountGreaterCondition(6796int TC, MachineBasicBlock &MBB,6797SmallVectorImpl<MachineOperand> &Cond) override {67986799if (isCondBranchOpcode(EndLoop->getOpcode())) {6800Cond.push_back(EndLoop->getOperand(1));6801Cond.push_back(EndLoop->getOperand(2));6802if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {6803TII->reverseBranchCondition(Cond);6804}6805return {};6806} else if (EndLoop->getOpcode() == ARM::t2LoopEnd) {6807// General case just lets the unrolled t2LoopDec do the subtraction and6808// therefore just needs to check if zero has been reached.6809MachineInstr *LoopDec = nullptr;6810for (auto &I : MBB.instrs())6811if (I.getOpcode() == ARM::t2LoopDec)6812LoopDec = &I;6813assert(LoopDec && "Unable to find copied LoopDec");6814// Check if we're done with the loop.6815BuildMI(&MBB, LoopDec->getDebugLoc(), TII->get(ARM::t2CMPri))6816.addReg(LoopDec->getOperand(0).getReg())6817.addImm(0)6818.addImm(ARMCC::AL)6819.addReg(ARM::NoRegister);6820Cond.push_back(MachineOperand::CreateImm(ARMCC::EQ));6821Cond.push_back(MachineOperand::CreateReg(ARM::CPSR, false));6822return {};6823} else6824llvm_unreachable("Unknown EndLoop");6825}68266827void setPreheader(MachineBasicBlock *NewPreheader) override {}68286829void adjustTripCount(int TripCountAdjust) override {}68306831void disposed() override {}6832};68336834void ARMPipelinerLoopInfo::bumpCrossIterationPressure(RegPressureTracker &RPT,6835const IterNeeds &CIN) {6836// Increase pressure by the amounts in CrossIterationNeeds6837for (const auto &N : CIN) {6838int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;6839for (int I = 0; I < Cnt; ++I)6840RPT.increaseRegPressure(Register(N.first), LaneBitmask::getNone(),6841LaneBitmask::getAll());6842}6843// Decrease pressure by the amounts in CrossIterationNeeds6844for (const auto &N : CIN) {6845int Cnt = N.second.count() - N.second[SEEN_AS_LIVE] * 2;6846for (int I = 0; I < Cnt; ++I)6847RPT.decreaseRegPressure(Register(N.first), LaneBitmask::getAll(),6848LaneBitmask::getNone());6849}6850}68516852bool ARMPipelinerLoopInfo::tooMuchRegisterPressure(SwingSchedulerDAG &SSD,6853SMSchedule &SMS) {6854IterNeeds CrossIterationNeeds;68556856// Determine which values will be loop-carried after the schedule is6857// applied68586859for (auto &SU : SSD.SUnits) {6860const MachineInstr *MI = SU.getInstr();6861int Stg = SMS.stageScheduled(const_cast<SUnit *>(&SU));6862for (auto &S : SU.Succs)6863if (MI->isPHI() && S.getKind() == SDep::Anti) {6864Register Reg = S.getReg();6865if (Reg.isVirtual())6866CrossIterationNeeds.insert(std::make_pair(Reg.id(), IterNeed()))6867.first->second.set(0);6868} else if (S.isAssignedRegDep()) {6869int OStg = SMS.stageScheduled(S.getSUnit());6870if (OStg >= 0 && OStg != Stg) {6871Register Reg = S.getReg();6872if (Reg.isVirtual())6873CrossIterationNeeds.insert(std::make_pair(Reg.id(), IterNeed()))6874.first->second |= ((1 << (OStg - Stg)) - 1);6875}6876}6877}68786879// Determine more-or-less what the proposed schedule (reversed) is going to6880// be; it might not be quite the same because the within-cycle ordering6881// created by SMSchedule depends upon changes to help with address offsets and6882// the like.6883std::vector<SUnit *> ProposedSchedule;6884for (int Cycle = SMS.getFinalCycle(); Cycle >= SMS.getFirstCycle(); --Cycle)6885for (int Stage = 0, StageEnd = SMS.getMaxStageCount(); Stage <= StageEnd;6886++Stage) {6887std::deque<SUnit *> Instrs =6888SMS.getInstructions(Cycle + Stage * SMS.getInitiationInterval());6889std::sort(Instrs.begin(), Instrs.end(),6890[](SUnit *A, SUnit *B) { return A->NodeNum > B->NodeNum; });6891for (SUnit *SU : Instrs)6892ProposedSchedule.push_back(SU);6893}68946895// Learn whether the last use/def of each cross-iteration register is a use or6896// def. If it is a def, RegisterPressure will implicitly increase max pressure6897// and we do not have to add the pressure.6898for (auto *SU : ProposedSchedule)6899for (ConstMIBundleOperands OperI(*SU->getInstr()); OperI.isValid();6900++OperI) {6901auto MO = *OperI;6902if (!MO.isReg() || !MO.getReg())6903continue;6904Register Reg = MO.getReg();6905auto CIter = CrossIterationNeeds.find(Reg.id());6906if (CIter == CrossIterationNeeds.end() || CIter->second[LAST_IS_USE] ||6907CIter->second[SEEN_AS_LIVE])6908continue;6909if (MO.isDef() && !MO.isDead())6910CIter->second.set(SEEN_AS_LIVE);6911else if (MO.isUse())6912CIter->second.set(LAST_IS_USE);6913}6914for (auto &CI : CrossIterationNeeds)6915CI.second.reset(LAST_IS_USE);69166917RegionPressure RecRegPressure;6918RegPressureTracker RPTracker(RecRegPressure);6919RegisterClassInfo RegClassInfo;6920RegClassInfo.runOnMachineFunction(*MF);6921RPTracker.init(MF, &RegClassInfo, nullptr, EndLoop->getParent(),6922EndLoop->getParent()->end(), false, false);6923const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();69246925bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);69266927for (auto *SU : ProposedSchedule) {6928MachineBasicBlock::const_iterator CurInstI = SU->getInstr();6929RPTracker.setPos(std::next(CurInstI));6930RPTracker.recede();69316932// Track what cross-iteration registers would be seen as live6933for (ConstMIBundleOperands OperI(*CurInstI); OperI.isValid(); ++OperI) {6934auto MO = *OperI;6935if (!MO.isReg() || !MO.getReg())6936continue;6937Register Reg = MO.getReg();6938if (MO.isDef() && !MO.isDead()) {6939auto CIter = CrossIterationNeeds.find(Reg.id());6940if (CIter != CrossIterationNeeds.end()) {6941CIter->second.reset(0);6942CIter->second.reset(SEEN_AS_LIVE);6943}6944}6945}6946for (auto &S : SU->Preds) {6947auto Stg = SMS.stageScheduled(SU);6948if (S.isAssignedRegDep()) {6949Register Reg = S.getReg();6950auto CIter = CrossIterationNeeds.find(Reg.id());6951if (CIter != CrossIterationNeeds.end()) {6952auto Stg2 = SMS.stageScheduled(const_cast<SUnit *>(S.getSUnit()));6953assert(Stg2 <= Stg && "Data dependence upon earlier stage");6954if (Stg - Stg2 < MAX_STAGES)6955CIter->second.set(Stg - Stg2);6956CIter->second.set(SEEN_AS_LIVE);6957}6958}6959}69606961bumpCrossIterationPressure(RPTracker, CrossIterationNeeds);6962}69636964auto &P = RPTracker.getPressure().MaxSetPressure;6965for (unsigned I = 0, E = P.size(); I < E; ++I)6966if (P[I] > TRI->getRegPressureSetLimit(*MF, I)) {6967return true;6968}6969return false;6970}69716972} // namespace69736974std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>6975ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {6976MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();6977MachineBasicBlock *Preheader = *LoopBB->pred_begin();6978if (Preheader == LoopBB)6979Preheader = *std::next(LoopBB->pred_begin());69806981if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {6982// If the branch is a Bcc, then the CPSR should be set somewhere within the6983// block. We need to determine the reaching definition of CPSR so that6984// it can be marked as non-pipelineable, allowing the pipeliner to force6985// it into stage 0 or give up if it cannot or will not do so.6986MachineInstr *CCSetter = nullptr;6987for (auto &L : LoopBB->instrs()) {6988if (L.isCall())6989return nullptr;6990if (isCPSRDefined(L))6991CCSetter = &L;6992}6993if (CCSetter)6994return std::make_unique<ARMPipelinerLoopInfo>(&*I, CCSetter);6995else6996return nullptr; // Unable to find the CC setter, so unable to guarantee6997// that pipeline will work6998}69997000// Recognize:7001// preheader:7002// %1 = t2DoopLoopStart %07003// loop:7004// %2 = phi %1, <not loop>, %..., %loop7005// %3 = t2LoopDec %2, <imm>7006// t2LoopEnd %3, %loop70077008if (I != LoopBB->end() && I->getOpcode() == ARM::t2LoopEnd) {7009for (auto &L : LoopBB->instrs())7010if (L.isCall())7011return nullptr;7012else if (isVCTP(&L))7013return nullptr;7014Register LoopDecResult = I->getOperand(0).getReg();7015MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();7016MachineInstr *LoopDec = MRI.getUniqueVRegDef(LoopDecResult);7017if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec)7018return nullptr;7019MachineInstr *LoopStart = nullptr;7020for (auto &J : Preheader->instrs())7021if (J.getOpcode() == ARM::t2DoLoopStart)7022LoopStart = &J;7023if (!LoopStart)7024return nullptr;7025return std::make_unique<ARMPipelinerLoopInfo>(&*I, LoopDec);7026}7027return nullptr;7028}702970307031