Path: blob/main/contrib/llvm-project/llvm/lib/Target/ARM/ARMFrameLowering.cpp
35266 views
//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains the ARM implementation of TargetFrameLowering class.9//10//===----------------------------------------------------------------------===//11//12// This file contains the ARM implementation of TargetFrameLowering class.13//14// On ARM, stack frames are structured as follows:15//16// The stack grows downward.17//18// All of the individual frame areas on the frame below are optional, i.e. it's19// possible to create a function so that the particular area isn't present20// in the frame.21//22// At function entry, the "frame" looks as follows:23//24// | | Higher address25// |-----------------------------------|26// | |27// | arguments passed on the stack |28// | |29// |-----------------------------------| <- sp30// | | Lower address31//32//33// After the prologue has run, the frame has the following general structure.34// Technically the last frame area (VLAs) doesn't get created until in the35// main function body, after the prologue is run. However, it's depicted here36// for completeness.37//38// | | Higher address39// |-----------------------------------|40// | |41// | arguments passed on the stack |42// | |43// |-----------------------------------| <- (sp at function entry)44// | |45// | varargs from registers |46// | |47// |-----------------------------------|48// | |49// | prev_lr |50// | prev_fp |51// | (a.k.a. "frame record") |52// | |53// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)54// | |55// | callee-saved gpr registers |56// | |57// |-----------------------------------|58// | |59// | callee-saved fp/simd regs |60// | |61// |-----------------------------------|62// |.empty.space.to.make.part.below....|63// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at64// |.the.standard.8-byte.alignment.....| compile time; if present)65// |-----------------------------------|66// | |67// | local variables of fixed size |68// | including spill slots |69// |-----------------------------------| <- base pointer (not defined by ABI,70// |.variable-sized.local.variables....| LLVM chooses r6)71// |.(VLAs)............................| (size of this area is unknown at72// |...................................| compile time)73// |-----------------------------------| <- sp74// | | Lower address75//76//77// To access the data in a frame, at-compile time, a constant offset must be78// computable from one of the pointers (fp, bp, sp) to access it. The size79// of the areas with a dotted background cannot be computed at compile-time80// if they are present, making it required to have all three of fp, bp and81// sp to be set up to be able to access all contents in the frame areas,82// assuming all of the frame areas are non-empty.83//84// For most functions, some of the frame areas are empty. For those functions,85// it may not be necessary to set up fp or bp:86// * A base pointer is definitely needed when there are both VLAs and local87// variables with more-than-default alignment requirements.88// * A frame pointer is definitely needed when there are local variables with89// more-than-default alignment requirements.90//91// In some cases when a base pointer is not strictly needed, it is generated92// anyway when offsets from the frame pointer to access local variables become93// so large that the offset can't be encoded in the immediate fields of loads94// or stores.95//96// The frame pointer might be chosen to be r7 or r11, depending on the target97// architecture and operating system. See ARMSubtarget::getFramePointerReg for98// details.99//100// Outgoing function arguments must be at the bottom of the stack frame when101// calling another function. If we do not have variable-sized stack objects, we102// can allocate a "reserved call frame" area at the bottom of the local103// variable area, large enough for all outgoing calls. If we do have VLAs, then104// the stack pointer must be decremented and incremented around each call to105// make space for the arguments below the VLAs.106//107//===----------------------------------------------------------------------===//108109#include "ARMFrameLowering.h"110#include "ARMBaseInstrInfo.h"111#include "ARMBaseRegisterInfo.h"112#include "ARMConstantPoolValue.h"113#include "ARMMachineFunctionInfo.h"114#include "ARMSubtarget.h"115#include "MCTargetDesc/ARMAddressingModes.h"116#include "MCTargetDesc/ARMBaseInfo.h"117#include "Utils/ARMBaseInfo.h"118#include "llvm/ADT/BitVector.h"119#include "llvm/ADT/STLExtras.h"120#include "llvm/ADT/SmallPtrSet.h"121#include "llvm/ADT/SmallVector.h"122#include "llvm/CodeGen/MachineBasicBlock.h"123#include "llvm/CodeGen/MachineConstantPool.h"124#include "llvm/CodeGen/MachineFrameInfo.h"125#include "llvm/CodeGen/MachineFunction.h"126#include "llvm/CodeGen/MachineInstr.h"127#include "llvm/CodeGen/MachineInstrBuilder.h"128#include "llvm/CodeGen/MachineJumpTableInfo.h"129#include "llvm/CodeGen/MachineModuleInfo.h"130#include "llvm/CodeGen/MachineOperand.h"131#include "llvm/CodeGen/MachineRegisterInfo.h"132#include "llvm/CodeGen/RegisterScavenging.h"133#include "llvm/CodeGen/TargetInstrInfo.h"134#include "llvm/CodeGen/TargetOpcodes.h"135#include "llvm/CodeGen/TargetRegisterInfo.h"136#include "llvm/CodeGen/TargetSubtargetInfo.h"137#include "llvm/IR/Attributes.h"138#include "llvm/IR/CallingConv.h"139#include "llvm/IR/DebugLoc.h"140#include "llvm/IR/Function.h"141#include "llvm/MC/MCAsmInfo.h"142#include "llvm/MC/MCContext.h"143#include "llvm/MC/MCDwarf.h"144#include "llvm/MC/MCInstrDesc.h"145#include "llvm/MC/MCRegisterInfo.h"146#include "llvm/Support/CodeGen.h"147#include "llvm/Support/CommandLine.h"148#include "llvm/Support/Compiler.h"149#include "llvm/Support/Debug.h"150#include "llvm/Support/ErrorHandling.h"151#include "llvm/Support/MathExtras.h"152#include "llvm/Support/raw_ostream.h"153#include "llvm/Target/TargetMachine.h"154#include "llvm/Target/TargetOptions.h"155#include <algorithm>156#include <cassert>157#include <cstddef>158#include <cstdint>159#include <iterator>160#include <utility>161#include <vector>162163#define DEBUG_TYPE "arm-frame-lowering"164165using namespace llvm;166167static cl::opt<bool>168SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),169cl::desc("Align ARM NEON spills in prolog and epilog"));170171static MachineBasicBlock::iterator172skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,173unsigned NumAlignedDPRCS2Regs);174175ARMFrameLowering::ARMFrameLowering(const ARMSubtarget &sti)176: TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),177STI(sti) {}178179bool ARMFrameLowering::keepFramePointer(const MachineFunction &MF) const {180// iOS always has a FP for backtracking, force other targets to keep their FP181// when doing FastISel. The emitted code is currently superior, and in cases182// like test-suite's lencod FastISel isn't quite correct when FP is eliminated.183return MF.getSubtarget<ARMSubtarget>().useFastISel();184}185186/// Returns true if the target can safely skip saving callee-saved registers187/// for noreturn nounwind functions.188bool ARMFrameLowering::enableCalleeSaveSkip(const MachineFunction &MF) const {189assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&190MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&191!MF.getFunction().hasFnAttribute(Attribute::UWTable));192193// Frame pointer and link register are not treated as normal CSR, thus we194// can always skip CSR saves for nonreturning functions.195return true;196}197198/// hasFP - Return true if the specified function should have a dedicated frame199/// pointer register. This is true if the function has variable sized allocas200/// or if frame pointer elimination is disabled.201bool ARMFrameLowering::hasFP(const MachineFunction &MF) const {202const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();203const MachineFrameInfo &MFI = MF.getFrameInfo();204205// ABI-required frame pointer.206if (MF.getTarget().Options.DisableFramePointerElim(MF))207return true;208209// Frame pointer required for use within this function.210return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||211MFI.isFrameAddressTaken());212}213214/// isFPReserved - Return true if the frame pointer register should be215/// considered a reserved register on the scope of the specified function.216bool ARMFrameLowering::isFPReserved(const MachineFunction &MF) const {217return hasFP(MF) || MF.getTarget().Options.FramePointerIsReserved(MF);218}219220/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is221/// not required, we reserve argument space for call sites in the function222/// immediately on entry to the current function. This eliminates the need for223/// add/sub sp brackets around call sites. Returns true if the call frame is224/// included as part of the stack frame.225bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {226const MachineFrameInfo &MFI = MF.getFrameInfo();227unsigned CFSize = MFI.getMaxCallFrameSize();228// It's not always a good idea to include the call frame as part of the229// stack frame. ARM (especially Thumb) has small immediate offset to230// address the stack frame. So a large call frame can cause poor codegen231// and may even makes it impossible to scavenge a register.232if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12233return false;234235return !MFI.hasVarSizedObjects();236}237238/// canSimplifyCallFramePseudos - If there is a reserved call frame, the239/// call frame pseudos can be simplified. Unlike most targets, having a FP240/// is not sufficient here since we still may reference some objects via SP241/// even when FP is available in Thumb2 mode.242bool243ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {244return hasReservedCallFrame(MF) || MF.getFrameInfo().hasVarSizedObjects();245}246247// Returns how much of the incoming argument stack area we should clean up in an248// epilogue. For the C calling convention this will be 0, for guaranteed tail249// call conventions it can be positive (a normal return or a tail call to a250// function that uses less stack space for arguments) or negative (for a tail251// call to a function that needs more stack space than us for arguments).252static int getArgumentStackToRestore(MachineFunction &MF,253MachineBasicBlock &MBB) {254MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();255bool IsTailCallReturn = false;256if (MBB.end() != MBBI) {257unsigned RetOpcode = MBBI->getOpcode();258IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||259RetOpcode == ARM::TCRETURNri ||260RetOpcode == ARM::TCRETURNrinotr12;261}262ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();263264int ArgumentPopSize = 0;265if (IsTailCallReturn) {266MachineOperand &StackAdjust = MBBI->getOperand(1);267268// For a tail-call in a callee-pops-arguments environment, some or all of269// the stack may actually be in use for the call's arguments, this is270// calculated during LowerCall and consumed here...271ArgumentPopSize = StackAdjust.getImm();272} else {273// ... otherwise the amount to pop is *all* of the argument space,274// conveniently stored in the MachineFunctionInfo by275// LowerFormalArguments. This will, of course, be zero for the C calling276// convention.277ArgumentPopSize = AFI->getArgumentStackToRestore();278}279280return ArgumentPopSize;281}282283static bool needsWinCFI(const MachineFunction &MF) {284const Function &F = MF.getFunction();285return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&286F.needsUnwindTableEntry();287}288289// Given a load or a store instruction, generate an appropriate unwinding SEH290// code on Windows.291static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI,292const TargetInstrInfo &TII,293unsigned Flags) {294unsigned Opc = MBBI->getOpcode();295MachineBasicBlock *MBB = MBBI->getParent();296MachineFunction &MF = *MBB->getParent();297DebugLoc DL = MBBI->getDebugLoc();298MachineInstrBuilder MIB;299const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();300const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();301302Flags |= MachineInstr::NoMerge;303304switch (Opc) {305default:306report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));307break;308case ARM::t2ADDri: // add.w r11, sp, #xx309case ARM::t2ADDri12: // add.w r11, sp, #xx310case ARM::t2MOVTi16: // movt r4, #xx311case ARM::tBL: // bl __chkstk312// These are harmless if used for just setting up a frame pointer,313// but that frame pointer can't be relied upon for unwinding, unless314// set up with SEH_SaveSP.315MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))316.addImm(/*Wide=*/1)317.setMIFlags(Flags);318break;319320case ARM::t2MOVi16: { // mov(w) r4, #xx321bool Wide = MBBI->getOperand(1).getImm() >= 256;322if (!Wide) {323MachineInstrBuilder NewInstr =324BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());325NewInstr.add(MBBI->getOperand(0));326NewInstr.add(t1CondCodeOp(/*isDead=*/true));327for (MachineOperand &MO : llvm::drop_begin(MBBI->operands()))328NewInstr.add(MO);329MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);330MBB->erase(MBBI);331MBBI = NewMBBI;332}333MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);334break;335}336337case ARM::tBLXr: // blx r12 (__chkstk)338MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))339.addImm(/*Wide=*/0)340.setMIFlags(Flags);341break;342343case ARM::t2MOVi32imm: // movw+movt344// This pseudo instruction expands into two mov instructions. If the345// second operand is a symbol reference, this will stay as two wide346// instructions, movw+movt. If they're immediates, the first one can347// end up as a narrow mov though.348// As two SEH instructions are appended here, they won't get interleaved349// between the two final movw/movt instructions, but it doesn't make any350// practical difference.351MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))352.addImm(/*Wide=*/1)353.setMIFlags(Flags);354MBB->insertAfter(MBBI, MIB);355MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))356.addImm(/*Wide=*/1)357.setMIFlags(Flags);358break;359360case ARM::t2STR_PRE:361if (MBBI->getOperand(0).getReg() == ARM::SP &&362MBBI->getOperand(2).getReg() == ARM::SP &&363MBBI->getOperand(3).getImm() == -4) {364unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());365MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))366.addImm(1ULL << Reg)367.addImm(/*Wide=*/1)368.setMIFlags(Flags);369} else {370report_fatal_error("No matching SEH Opcode for t2STR_PRE");371}372break;373374case ARM::t2LDR_POST:375if (MBBI->getOperand(1).getReg() == ARM::SP &&376MBBI->getOperand(2).getReg() == ARM::SP &&377MBBI->getOperand(3).getImm() == 4) {378unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());379MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))380.addImm(1ULL << Reg)381.addImm(/*Wide=*/1)382.setMIFlags(Flags);383} else {384report_fatal_error("No matching SEH Opcode for t2LDR_POST");385}386break;387388case ARM::t2LDMIA_RET:389case ARM::t2LDMIA_UPD:390case ARM::t2STMDB_UPD: {391unsigned Mask = 0;392bool Wide = false;393for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {394const MachineOperand &MO = MBBI->getOperand(i);395if (!MO.isReg() || MO.isImplicit())396continue;397unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());398if (Reg == 15)399Reg = 14;400if (Reg >= 8 && Reg <= 13)401Wide = true;402else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)403Wide = true;404Mask |= 1 << Reg;405}406if (!Wide) {407unsigned NewOpc;408switch (Opc) {409case ARM::t2LDMIA_RET:410NewOpc = ARM::tPOP_RET;411break;412case ARM::t2LDMIA_UPD:413NewOpc = ARM::tPOP;414break;415case ARM::t2STMDB_UPD:416NewOpc = ARM::tPUSH;417break;418default:419llvm_unreachable("");420}421MachineInstrBuilder NewInstr =422BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());423for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)424NewInstr.add(MBBI->getOperand(i));425MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);426MBB->erase(MBBI);427MBBI = NewMBBI;428}429unsigned SEHOpc =430(Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;431MIB = BuildMI(MF, DL, TII.get(SEHOpc))432.addImm(Mask)433.addImm(Wide ? 1 : 0)434.setMIFlags(Flags);435break;436}437case ARM::VSTMDDB_UPD:438case ARM::VLDMDIA_UPD: {439int First = -1, Last = 0;440for (const MachineOperand &MO : llvm::drop_begin(MBBI->operands(), 4)) {441unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());442if (First == -1)443First = Reg;444Last = Reg;445}446MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))447.addImm(First)448.addImm(Last)449.setMIFlags(Flags);450break;451}452case ARM::tSUBspi:453case ARM::tADDspi:454MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))455.addImm(MBBI->getOperand(2).getImm() * 4)456.addImm(/*Wide=*/0)457.setMIFlags(Flags);458break;459case ARM::t2SUBspImm:460case ARM::t2SUBspImm12:461case ARM::t2ADDspImm:462case ARM::t2ADDspImm12:463MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))464.addImm(MBBI->getOperand(2).getImm())465.addImm(/*Wide=*/1)466.setMIFlags(Flags);467break;468469case ARM::tMOVr:470if (MBBI->getOperand(1).getReg() == ARM::SP &&471(Flags & MachineInstr::FrameSetup)) {472unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());473MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))474.addImm(Reg)475.setMIFlags(Flags);476} else if (MBBI->getOperand(0).getReg() == ARM::SP &&477(Flags & MachineInstr::FrameDestroy)) {478unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());479MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))480.addImm(Reg)481.setMIFlags(Flags);482} else {483report_fatal_error("No SEH Opcode for MOV");484}485break;486487case ARM::tBX_RET:488case ARM::TCRETURNri:489case ARM::TCRETURNrinotr12:490MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))491.addImm(/*Wide=*/0)492.setMIFlags(Flags);493break;494495case ARM::TCRETURNdi:496MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))497.addImm(/*Wide=*/1)498.setMIFlags(Flags);499break;500}501return MBB->insertAfter(MBBI, MIB);502}503504static MachineBasicBlock::iterator505initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI) {506if (MBBI == MBB.begin())507return MachineBasicBlock::iterator();508return std::prev(MBBI);509}510511static void insertSEHRange(MachineBasicBlock &MBB,512MachineBasicBlock::iterator Start,513const MachineBasicBlock::iterator &End,514const ARMBaseInstrInfo &TII, unsigned MIFlags) {515if (Start.isValid())516Start = std::next(Start);517else518Start = MBB.begin();519520for (auto MI = Start; MI != End;) {521auto Next = std::next(MI);522// Check if this instruction already has got a SEH opcode added. In that523// case, don't do this generic mapping.524if (Next != End && isSEHInstruction(*Next)) {525MI = std::next(Next);526while (MI != End && isSEHInstruction(*MI))527++MI;528continue;529}530insertSEH(MI, TII, MIFlags);531MI = Next;532}533}534535static void emitRegPlusImmediate(536bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,537const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,538unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,539ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {540if (isARM)541emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,542Pred, PredReg, TII, MIFlags);543else544emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,545Pred, PredReg, TII, MIFlags);546}547548static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,549MachineBasicBlock::iterator &MBBI, const DebugLoc &dl,550const ARMBaseInstrInfo &TII, int NumBytes,551unsigned MIFlags = MachineInstr::NoFlags,552ARMCC::CondCodes Pred = ARMCC::AL,553unsigned PredReg = 0) {554emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,555MIFlags, Pred, PredReg);556}557558static int sizeOfSPAdjustment(const MachineInstr &MI) {559int RegSize;560switch (MI.getOpcode()) {561case ARM::VSTMDDB_UPD:562RegSize = 8;563break;564case ARM::STMDB_UPD:565case ARM::t2STMDB_UPD:566RegSize = 4;567break;568case ARM::t2STR_PRE:569case ARM::STR_PRE_IMM:570return 4;571default:572llvm_unreachable("Unknown push or pop like instruction");573}574575int count = 0;576// ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+577// pred) so the list starts at 4.578for (int i = MI.getNumOperands() - 1; i >= 4; --i)579count += RegSize;580return count;581}582583static bool WindowsRequiresStackProbe(const MachineFunction &MF,584size_t StackSizeInBytes) {585const MachineFrameInfo &MFI = MF.getFrameInfo();586const Function &F = MF.getFunction();587unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;588589StackProbeSize =590F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);591return (StackSizeInBytes >= StackProbeSize) &&592!F.hasFnAttribute("no-stack-arg-probe");593}594595namespace {596597struct StackAdjustingInsts {598struct InstInfo {599MachineBasicBlock::iterator I;600unsigned SPAdjust;601bool BeforeFPSet;602};603604SmallVector<InstInfo, 4> Insts;605606void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,607bool BeforeFPSet = false) {608InstInfo Info = {I, SPAdjust, BeforeFPSet};609Insts.push_back(Info);610}611612void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {613auto Info =614llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });615assert(Info != Insts.end() && "invalid sp adjusting instruction");616Info->SPAdjust += ExtraBytes;617}618619void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,620const ARMBaseInstrInfo &TII, bool HasFP) {621MachineFunction &MF = *MBB.getParent();622unsigned CFAOffset = 0;623for (auto &Info : Insts) {624if (HasFP && !Info.BeforeFPSet)625return;626627CFAOffset += Info.SPAdjust;628unsigned CFIIndex = MF.addFrameInst(629MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));630BuildMI(MBB, std::next(Info.I), dl,631TII.get(TargetOpcode::CFI_INSTRUCTION))632.addCFIIndex(CFIIndex)633.setMIFlags(MachineInstr::FrameSetup);634}635}636};637638} // end anonymous namespace639640/// Emit an instruction sequence that will align the address in641/// register Reg by zero-ing out the lower bits. For versions of the642/// architecture that support Neon, this must be done in a single643/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a644/// single instruction. That function only gets called when optimizing645/// spilling of D registers on a core with the Neon instruction set646/// present.647static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI,648const TargetInstrInfo &TII,649MachineBasicBlock &MBB,650MachineBasicBlock::iterator MBBI,651const DebugLoc &DL, const unsigned Reg,652const Align Alignment,653const bool MustBeSingleInstruction) {654const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();655const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();656const unsigned AlignMask = Alignment.value() - 1U;657const unsigned NrBitsToZero = Log2(Alignment);658assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");659if (!AFI->isThumbFunction()) {660// if the BFC instruction is available, use that to zero the lower661// bits:662// bfc Reg, #0, log2(Alignment)663// otherwise use BIC, if the mask to zero the required number of bits664// can be encoded in the bic immediate field665// bic Reg, Reg, Alignment-1666// otherwise, emit667// lsr Reg, Reg, log2(Alignment)668// lsl Reg, Reg, log2(Alignment)669if (CanUseBFC) {670BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)671.addReg(Reg, RegState::Kill)672.addImm(~AlignMask)673.add(predOps(ARMCC::AL));674} else if (AlignMask <= 255) {675BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)676.addReg(Reg, RegState::Kill)677.addImm(AlignMask)678.add(predOps(ARMCC::AL))679.add(condCodeOp());680} else {681assert(!MustBeSingleInstruction &&682"Shouldn't call emitAligningInstructions demanding a single "683"instruction to be emitted for large stack alignment for a target "684"without BFC.");685BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)686.addReg(Reg, RegState::Kill)687.addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))688.add(predOps(ARMCC::AL))689.add(condCodeOp());690BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)691.addReg(Reg, RegState::Kill)692.addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))693.add(predOps(ARMCC::AL))694.add(condCodeOp());695}696} else {697// Since this is only reached for Thumb-2 targets, the BFC instruction698// should always be available.699assert(CanUseBFC);700BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)701.addReg(Reg, RegState::Kill)702.addImm(~AlignMask)703.add(predOps(ARMCC::AL));704}705}706707/// We need the offset of the frame pointer relative to other MachineFrameInfo708/// offsets which are encoded relative to SP at function begin.709/// See also emitPrologue() for how the FP is set up.710/// Unfortunately we cannot determine this value in determineCalleeSaves() yet711/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use712/// this to produce a conservative estimate that we check in an assert() later.713static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,714const MachineFunction &MF) {715// For Thumb1, push.w isn't available, so the first push will always push716// r7 and lr onto the stack first.717if (AFI.isThumb1OnlyFunction())718return -AFI.getArgRegsSaveSize() - (2 * 4);719// This is a conservative estimation: Assume the frame pointer being r7 and720// pc("r15") up to r8 getting spilled before (= 8 registers).721int MaxRegBytes = 8 * 4;722if (STI.splitFramePointerPush(MF)) {723// Here, r11 can be stored below all of r4-r15 (3 registers more than724// above), plus d8-d15.725MaxRegBytes = 11 * 4 + 8 * 8;726}727int FPCXTSaveSize =728(STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;729return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;730}731732void ARMFrameLowering::emitPrologue(MachineFunction &MF,733MachineBasicBlock &MBB) const {734MachineBasicBlock::iterator MBBI = MBB.begin();735MachineFrameInfo &MFI = MF.getFrameInfo();736ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();737MCContext &Context = MF.getContext();738const TargetMachine &TM = MF.getTarget();739const MCRegisterInfo *MRI = Context.getRegisterInfo();740const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();741const ARMBaseInstrInfo &TII = *STI.getInstrInfo();742assert(!AFI->isThumb1OnlyFunction() &&743"This emitPrologue does not support Thumb1!");744bool isARM = !AFI->isThumbFunction();745Align Alignment = STI.getFrameLowering()->getStackAlign();746unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();747unsigned NumBytes = MFI.getStackSize();748const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();749int FPCXTSaveSize = 0;750bool NeedsWinCFI = needsWinCFI(MF);751752// Debug location must be unknown since the first debug location is used753// to determine the end of the prologue.754DebugLoc dl;755756Register FramePtr = RegInfo->getFrameRegister(MF);757758// Determine the sizes of each callee-save spill areas and record which frame759// belongs to which callee-save spill areas.760unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;761int FramePtrSpillFI = 0;762int D8SpillFI = 0;763764// All calls are tail calls in GHC calling conv, and functions have no765// prologue/epilogue.766if (MF.getFunction().getCallingConv() == CallingConv::GHC)767return;768769StackAdjustingInsts DefCFAOffsetCandidates;770bool HasFP = hasFP(MF);771772if (!AFI->hasStackFrame() &&773(!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {774if (NumBytes != 0) {775emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,776MachineInstr::FrameSetup);777DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);778}779if (!NeedsWinCFI)780DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);781if (NeedsWinCFI && MBBI != MBB.begin()) {782insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup);783BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))784.setMIFlag(MachineInstr::FrameSetup);785MF.setHasWinCFI(true);786}787return;788}789790// Determine spill area sizes.791if (STI.splitFramePointerPush(MF)) {792for (const CalleeSavedInfo &I : CSI) {793Register Reg = I.getReg();794int FI = I.getFrameIdx();795switch (Reg) {796case ARM::R11:797case ARM::LR:798if (Reg == FramePtr)799FramePtrSpillFI = FI;800GPRCS2Size += 4;801break;802case ARM::R0:803case ARM::R1:804case ARM::R2:805case ARM::R3:806case ARM::R4:807case ARM::R5:808case ARM::R6:809case ARM::R7:810case ARM::R8:811case ARM::R9:812case ARM::R10:813case ARM::R12:814GPRCS1Size += 4;815break;816case ARM::FPCXTNS:817FPCXTSaveSize = 4;818break;819default:820// This is a DPR. Exclude the aligned DPRCS2 spills.821if (Reg == ARM::D8)822D8SpillFI = FI;823if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())824DPRCSSize += 8;825}826}827} else {828for (const CalleeSavedInfo &I : CSI) {829Register Reg = I.getReg();830int FI = I.getFrameIdx();831switch (Reg) {832case ARM::R8:833case ARM::R9:834case ARM::R10:835case ARM::R11:836case ARM::R12:837if (STI.splitFramePushPop(MF)) {838GPRCS2Size += 4;839break;840}841[[fallthrough]];842case ARM::R0:843case ARM::R1:844case ARM::R2:845case ARM::R3:846case ARM::R4:847case ARM::R5:848case ARM::R6:849case ARM::R7:850case ARM::LR:851if (Reg == FramePtr)852FramePtrSpillFI = FI;853GPRCS1Size += 4;854break;855case ARM::FPCXTNS:856FPCXTSaveSize = 4;857break;858default:859// This is a DPR. Exclude the aligned DPRCS2 spills.860if (Reg == ARM::D8)861D8SpillFI = FI;862if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())863DPRCSSize += 8;864}865}866}867868MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;869870// Move past the PAC computation.871if (AFI->shouldSignReturnAddress())872LastPush = MBBI++;873874// Move past FPCXT area.875if (FPCXTSaveSize > 0) {876LastPush = MBBI++;877DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);878}879880// Allocate the vararg register save area.881if (ArgRegsSaveSize) {882emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,883MachineInstr::FrameSetup);884LastPush = std::prev(MBBI);885DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);886}887888// Move past area 1.889if (GPRCS1Size > 0) {890GPRCS1Push = LastPush = MBBI++;891DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);892}893894// Determine starting offsets of spill areas.895unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;896unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;897unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;898Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);899unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;900if (!STI.splitFramePointerPush(MF)) {901DPRGapSize += GPRCS2Size;902}903DPRGapSize %= DPRAlign.value();904905unsigned DPRCSOffset;906if (STI.splitFramePointerPush(MF)) {907DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;908GPRCS2Offset = DPRCSOffset - GPRCS2Size;909} else {910DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;911}912int FramePtrOffsetInPush = 0;913if (HasFP) {914int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);915assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&916"Max FP estimation is wrong");917FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;918AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +919NumBytes);920}921AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);922AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);923AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);924925// Move past area 2.926if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {927GPRCS2Push = LastPush = MBBI++;928DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);929}930931// Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our932// .cfi_offset operations will reflect that.933if (DPRGapSize) {934assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");935if (LastPush != MBB.end() &&936tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))937DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);938else {939emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,940MachineInstr::FrameSetup);941DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);942}943}944945// Move past area 3.946if (DPRCSSize > 0) {947// Since vpush register list cannot have gaps, there may be multiple vpush948// instructions in the prologue.949while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {950DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));951LastPush = MBBI++;952}953}954955// Move past the aligned DPRCS2 area.956if (AFI->getNumAlignedDPRCS2Regs() > 0) {957MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs());958// The code inserted by emitAlignedDPRCS2Spills realigns the stack, and959// leaves the stack pointer pointing to the DPRCS2 area.960//961// Adjust NumBytes to represent the stack slots below the DPRCS2 area.962NumBytes += MFI.getObjectOffset(D8SpillFI);963} else964NumBytes = DPRCSOffset;965966if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {967GPRCS2Push = LastPush = MBBI++;968DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);969}970971bool NeedsWinCFIStackAlloc = NeedsWinCFI;972if (STI.splitFramePointerPush(MF) && HasFP)973NeedsWinCFIStackAlloc = false;974975if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {976uint32_t NumWords = NumBytes >> 2;977978if (NumWords < 65536) {979BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)980.addImm(NumWords)981.setMIFlags(MachineInstr::FrameSetup)982.add(predOps(ARMCC::AL));983} else {984// Split into two instructions here, instead of using t2MOVi32imm,985// to allow inserting accurate SEH instructions (including accurate986// instruction size for each of them).987BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)988.addImm(NumWords & 0xffff)989.setMIFlags(MachineInstr::FrameSetup)990.add(predOps(ARMCC::AL));991BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)992.addReg(ARM::R4)993.addImm(NumWords >> 16)994.setMIFlags(MachineInstr::FrameSetup)995.add(predOps(ARMCC::AL));996}997998switch (TM.getCodeModel()) {999case CodeModel::Tiny:1000llvm_unreachable("Tiny code model not available on ARM.");1001case CodeModel::Small:1002case CodeModel::Medium:1003case CodeModel::Kernel:1004BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))1005.add(predOps(ARMCC::AL))1006.addExternalSymbol("__chkstk")1007.addReg(ARM::R4, RegState::Implicit)1008.setMIFlags(MachineInstr::FrameSetup);1009break;1010case CodeModel::Large:1011BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)1012.addExternalSymbol("__chkstk")1013.setMIFlags(MachineInstr::FrameSetup);10141015BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))1016.add(predOps(ARMCC::AL))1017.addReg(ARM::R12, RegState::Kill)1018.addReg(ARM::R4, RegState::Implicit)1019.setMIFlags(MachineInstr::FrameSetup);1020break;1021}10221023MachineInstrBuilder Instr, SEH;1024Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)1025.addReg(ARM::SP, RegState::Kill)1026.addReg(ARM::R4, RegState::Kill)1027.setMIFlags(MachineInstr::FrameSetup)1028.add(predOps(ARMCC::AL))1029.add(condCodeOp());1030if (NeedsWinCFIStackAlloc) {1031SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))1032.addImm(NumBytes)1033.addImm(/*Wide=*/1)1034.setMIFlags(MachineInstr::FrameSetup);1035MBB.insertAfter(Instr, SEH);1036}1037NumBytes = 0;1038}10391040if (NumBytes) {1041// Adjust SP after all the callee-save spills.1042if (AFI->getNumAlignedDPRCS2Regs() == 0 &&1043tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))1044DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);1045else {1046emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,1047MachineInstr::FrameSetup);1048DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);1049}10501051if (HasFP && isARM)1052// Restore from fp only in ARM mode: e.g. sub sp, r7, #241053// Note it's not safe to do this in Thumb2 mode because it would have1054// taken two instructions:1055// mov sp, r71056// sub sp, #241057// If an interrupt is taken between the two instructions, then sp is in1058// an inconsistent state (pointing to the middle of callee-saved area).1059// The interrupt handler can end up clobbering the registers.1060AFI->setShouldRestoreSPFromFP(true);1061}10621063// Set FP to point to the stack slot that contains the previous FP.1064// For iOS, FP is R7, which has now been stored in spill area 1.1065// Otherwise, if this is not iOS, all the callee-saved registers go1066// into spill area 1, including the FP in R11. In either case, it1067// is in area one and the adjustment needs to take place just after1068// that push.1069// FIXME: The above is not necessary true when PACBTI is enabled.1070// AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,1071// so FP ends up on area two.1072MachineBasicBlock::iterator AfterPush;1073if (HasFP) {1074AfterPush = std::next(GPRCS1Push);1075unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);1076int FPOffset = PushSize + FramePtrOffsetInPush;1077if (STI.splitFramePointerPush(MF)) {1078AfterPush = std::next(GPRCS2Push);1079emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,1080FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);1081} else {1082emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,1083FramePtr, ARM::SP, FPOffset,1084MachineInstr::FrameSetup);1085}1086if (!NeedsWinCFI) {1087if (FramePtrOffsetInPush + PushSize != 0) {1088unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(1089nullptr, MRI->getDwarfRegNum(FramePtr, true),1090FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));1091BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))1092.addCFIIndex(CFIIndex)1093.setMIFlags(MachineInstr::FrameSetup);1094} else {1095unsigned CFIIndex =1096MF.addFrameInst(MCCFIInstruction::createDefCfaRegister(1097nullptr, MRI->getDwarfRegNum(FramePtr, true)));1098BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))1099.addCFIIndex(CFIIndex)1100.setMIFlags(MachineInstr::FrameSetup);1101}1102}1103}11041105// Emit a SEH opcode indicating the prologue end. The rest of the prologue1106// instructions below don't need to be replayed to unwind the stack.1107if (NeedsWinCFI && MBBI != MBB.begin()) {1108MachineBasicBlock::iterator End = MBBI;1109if (HasFP && STI.splitFramePointerPush(MF))1110End = AfterPush;1111insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup);1112BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))1113.setMIFlag(MachineInstr::FrameSetup);1114MF.setHasWinCFI(true);1115}11161117// Now that the prologue's actual instructions are finalised, we can insert1118// the necessary DWARF cf instructions to describe the situation. Start by1119// recording where each register ended up:1120if (GPRCS1Size > 0 && !NeedsWinCFI) {1121MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);1122int CFIIndex;1123for (const auto &Entry : CSI) {1124Register Reg = Entry.getReg();1125int FI = Entry.getFrameIdx();1126switch (Reg) {1127case ARM::R8:1128case ARM::R9:1129case ARM::R10:1130case ARM::R11:1131case ARM::R12:1132if (STI.splitFramePushPop(MF))1133break;1134[[fallthrough]];1135case ARM::R0:1136case ARM::R1:1137case ARM::R2:1138case ARM::R3:1139case ARM::R4:1140case ARM::R5:1141case ARM::R6:1142case ARM::R7:1143case ARM::LR:1144CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(1145nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));1146BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))1147.addCFIIndex(CFIIndex)1148.setMIFlags(MachineInstr::FrameSetup);1149break;1150}1151}1152}11531154if (GPRCS2Size > 0 && !NeedsWinCFI) {1155MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);1156for (const auto &Entry : CSI) {1157Register Reg = Entry.getReg();1158int FI = Entry.getFrameIdx();1159switch (Reg) {1160case ARM::R8:1161case ARM::R9:1162case ARM::R10:1163case ARM::R11:1164case ARM::R12:1165if (STI.splitFramePushPop(MF)) {1166unsigned DwarfReg = MRI->getDwarfRegNum(1167Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);1168int64_t Offset = MFI.getObjectOffset(FI);1169unsigned CFIIndex = MF.addFrameInst(1170MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));1171BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))1172.addCFIIndex(CFIIndex)1173.setMIFlags(MachineInstr::FrameSetup);1174}1175break;1176}1177}1178}11791180if (DPRCSSize > 0 && !NeedsWinCFI) {1181// Since vpush register list cannot have gaps, there may be multiple vpush1182// instructions in the prologue.1183MachineBasicBlock::iterator Pos = std::next(LastPush);1184for (const auto &Entry : CSI) {1185Register Reg = Entry.getReg();1186int FI = Entry.getFrameIdx();1187if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&1188(Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {1189unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);1190int64_t Offset = MFI.getObjectOffset(FI);1191unsigned CFIIndex = MF.addFrameInst(1192MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));1193BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))1194.addCFIIndex(CFIIndex)1195.setMIFlags(MachineInstr::FrameSetup);1196}1197}1198}11991200// Now we can emit descriptions of where the canonical frame address was1201// throughout the process. If we have a frame pointer, it takes over the job1202// half-way through, so only the first few .cfi_def_cfa_offset instructions1203// actually get emitted.1204if (!NeedsWinCFI)1205DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);12061207if (STI.isTargetELF() && hasFP(MF))1208MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() -1209AFI->getFramePtrSpillOffset());12101211AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);1212AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);1213AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);1214AFI->setDPRCalleeSavedGapSize(DPRGapSize);1215AFI->setDPRCalleeSavedAreaSize(DPRCSSize);12161217// If we need dynamic stack realignment, do it here. Be paranoid and make1218// sure if we also have VLAs, we have a base pointer for frame access.1219// If aligned NEON registers were spilled, the stack has already been1220// realigned.1221if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {1222Align MaxAlign = MFI.getMaxAlign();1223assert(!AFI->isThumb1OnlyFunction());1224if (!AFI->isThumbFunction()) {1225emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,1226false);1227} else {1228// We cannot use sp as source/dest register here, thus we're using r4 to1229// perform the calculations. We're emitting the following sequence:1230// mov r4, sp1231// -- use emitAligningInstructions to produce best sequence to zero1232// -- out lower bits in r41233// mov sp, r41234// FIXME: It will be better just to find spare register here.1235BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)1236.addReg(ARM::SP, RegState::Kill)1237.add(predOps(ARMCC::AL));1238emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,1239false);1240BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)1241.addReg(ARM::R4, RegState::Kill)1242.add(predOps(ARMCC::AL));1243}12441245AFI->setShouldRestoreSPFromFP(true);1246}12471248// If we need a base pointer, set it up here. It's whatever the value1249// of the stack pointer is at this point. Any variable size objects1250// will be allocated after this, so we can still use the base pointer1251// to reference locals.1252// FIXME: Clarify FrameSetup flags here.1253if (RegInfo->hasBasePointer(MF)) {1254if (isARM)1255BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())1256.addReg(ARM::SP)1257.add(predOps(ARMCC::AL))1258.add(condCodeOp());1259else1260BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())1261.addReg(ARM::SP)1262.add(predOps(ARMCC::AL));1263}12641265// If the frame has variable sized objects then the epilogue must restore1266// the sp from fp. We can assume there's an FP here since hasFP already1267// checks for hasVarSizedObjects.1268if (MFI.hasVarSizedObjects())1269AFI->setShouldRestoreSPFromFP(true);1270}12711272void ARMFrameLowering::emitEpilogue(MachineFunction &MF,1273MachineBasicBlock &MBB) const {1274MachineFrameInfo &MFI = MF.getFrameInfo();1275ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();1276const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();1277const ARMBaseInstrInfo &TII =1278*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());1279assert(!AFI->isThumb1OnlyFunction() &&1280"This emitEpilogue does not support Thumb1!");1281bool isARM = !AFI->isThumbFunction();12821283// Amount of stack space we reserved next to incoming args for either1284// varargs registers or stack arguments in tail calls made by this function.1285unsigned ReservedArgStack = AFI->getArgRegsSaveSize();12861287// How much of the stack used by incoming arguments this function is expected1288// to restore in this particular epilogue.1289int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);1290int NumBytes = (int)MFI.getStackSize();1291Register FramePtr = RegInfo->getFrameRegister(MF);12921293// All calls are tail calls in GHC calling conv, and functions have no1294// prologue/epilogue.1295if (MF.getFunction().getCallingConv() == CallingConv::GHC)1296return;12971298// First put ourselves on the first (from top) terminator instructions.1299MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();1300DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();13011302MachineBasicBlock::iterator RangeStart;1303if (!AFI->hasStackFrame()) {1304if (MF.hasWinCFI()) {1305BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))1306.setMIFlag(MachineInstr::FrameDestroy);1307RangeStart = initMBBRange(MBB, MBBI);1308}13091310if (NumBytes + IncomingArgStackToRestore != 0)1311emitSPUpdate(isARM, MBB, MBBI, dl, TII,1312NumBytes + IncomingArgStackToRestore,1313MachineInstr::FrameDestroy);1314} else {1315// Unwind MBBI to point to first LDR / VLDRD.1316if (MBBI != MBB.begin()) {1317do {1318--MBBI;1319} while (MBBI != MBB.begin() &&1320MBBI->getFlag(MachineInstr::FrameDestroy));1321if (!MBBI->getFlag(MachineInstr::FrameDestroy))1322++MBBI;1323}13241325if (MF.hasWinCFI()) {1326BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))1327.setMIFlag(MachineInstr::FrameDestroy);1328RangeStart = initMBBRange(MBB, MBBI);1329}13301331// Move SP to start of FP callee save spill area.1332NumBytes -= (ReservedArgStack +1333AFI->getFPCXTSaveAreaSize() +1334AFI->getGPRCalleeSavedArea1Size() +1335AFI->getGPRCalleeSavedArea2Size() +1336AFI->getDPRCalleeSavedGapSize() +1337AFI->getDPRCalleeSavedAreaSize());13381339// Reset SP based on frame pointer only if the stack frame extends beyond1340// frame pointer stack slot or target is ELF and the function has FP.1341if (AFI->shouldRestoreSPFromFP()) {1342NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;1343if (NumBytes) {1344if (isARM)1345emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,1346ARMCC::AL, 0, TII,1347MachineInstr::FrameDestroy);1348else {1349// It's not possible to restore SP from FP in a single instruction.1350// For iOS, this looks like:1351// mov sp, r71352// sub sp, #241353// This is bad, if an interrupt is taken after the mov, sp is in an1354// inconsistent state.1355// Use the first callee-saved register as a scratch register.1356assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&1357"No scratch register to restore SP from FP!");1358emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,1359ARMCC::AL, 0, TII, MachineInstr::FrameDestroy);1360BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)1361.addReg(ARM::R4)1362.add(predOps(ARMCC::AL))1363.setMIFlag(MachineInstr::FrameDestroy);1364}1365} else {1366// Thumb2 or ARM.1367if (isARM)1368BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)1369.addReg(FramePtr)1370.add(predOps(ARMCC::AL))1371.add(condCodeOp())1372.setMIFlag(MachineInstr::FrameDestroy);1373else1374BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)1375.addReg(FramePtr)1376.add(predOps(ARMCC::AL))1377.setMIFlag(MachineInstr::FrameDestroy);1378}1379} else if (NumBytes &&1380!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))1381emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,1382MachineInstr::FrameDestroy);13831384// Increment past our save areas.1385if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF))1386MBBI++;13871388if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {1389MBBI++;1390// Since vpop register list cannot have gaps, there may be multiple vpop1391// instructions in the epilogue.1392while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)1393MBBI++;1394}1395if (AFI->getDPRCalleeSavedGapSize()) {1396assert(AFI->getDPRCalleeSavedGapSize() == 4 &&1397"unexpected DPR alignment gap");1398emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),1399MachineInstr::FrameDestroy);1400}14011402if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF))1403MBBI++;1404if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;14051406if (ReservedArgStack || IncomingArgStackToRestore) {1407assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&1408"attempting to restore negative stack amount");1409emitSPUpdate(isARM, MBB, MBBI, dl, TII,1410ReservedArgStack + IncomingArgStackToRestore,1411MachineInstr::FrameDestroy);1412}14131414// Validate PAC, It should have been already popped into R12. For CMSE entry1415// function, the validation instruction is emitted during expansion of the1416// tBXNS_RET, since the validation must use the value of SP at function1417// entry, before saving, resp. after restoring, FPCXTNS.1418if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())1419BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));1420}14211422if (MF.hasWinCFI()) {1423insertSEHRange(MBB, RangeStart, MBB.end(), TII, MachineInstr::FrameDestroy);1424BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))1425.setMIFlag(MachineInstr::FrameDestroy);1426}1427}14281429/// getFrameIndexReference - Provide a base+offset reference to an FI slot for1430/// debug info. It's the same as what we use for resolving the code-gen1431/// references for now. FIXME: This can go wrong when references are1432/// SP-relative and simple call frames aren't used.1433StackOffset ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF,1434int FI,1435Register &FrameReg) const {1436return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));1437}14381439int ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF,1440int FI, Register &FrameReg,1441int SPAdj) const {1442const MachineFrameInfo &MFI = MF.getFrameInfo();1443const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(1444MF.getSubtarget().getRegisterInfo());1445const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();1446int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();1447int FPOffset = Offset - AFI->getFramePtrSpillOffset();1448bool isFixed = MFI.isFixedObjectIndex(FI);14491450FrameReg = ARM::SP;1451Offset += SPAdj;14521453// SP can move around if there are allocas. We may also lose track of SP1454// when emergency spilling inside a non-reserved call frame setup.1455bool hasMovingSP = !hasReservedCallFrame(MF);14561457// When dynamically realigning the stack, use the frame pointer for1458// parameters, and the stack/base pointer for locals.1459if (RegInfo->hasStackRealignment(MF)) {1460assert(hasFP(MF) && "dynamic stack realignment without a FP!");1461if (isFixed) {1462FrameReg = RegInfo->getFrameRegister(MF);1463Offset = FPOffset;1464} else if (hasMovingSP) {1465assert(RegInfo->hasBasePointer(MF) &&1466"VLAs and dynamic stack alignment, but missing base pointer!");1467FrameReg = RegInfo->getBaseRegister();1468Offset -= SPAdj;1469}1470return Offset;1471}14721473// If there is a frame pointer, use it when we can.1474if (hasFP(MF) && AFI->hasStackFrame()) {1475// Use frame pointer to reference fixed objects. Use it for locals if1476// there are VLAs (and thus the SP isn't reliable as a base).1477if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {1478FrameReg = RegInfo->getFrameRegister(MF);1479return FPOffset;1480} else if (hasMovingSP) {1481assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");1482if (AFI->isThumb2Function()) {1483// Try to use the frame pointer if we can, else use the base pointer1484// since it's available. This is handy for the emergency spill slot, in1485// particular.1486if (FPOffset >= -255 && FPOffset < 0) {1487FrameReg = RegInfo->getFrameRegister(MF);1488return FPOffset;1489}1490}1491} else if (AFI->isThumbFunction()) {1492// Prefer SP to base pointer, if the offset is suitably aligned and in1493// range as the effective range of the immediate offset is bigger when1494// basing off SP.1495// Use add <rd>, sp, #<imm8>1496// ldr <rd>, [sp, #<imm8>]1497if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)1498return Offset;1499// In Thumb2 mode, the negative offset is very limited. Try to avoid1500// out of range references. ldr <rt>,[<rn>, #-<imm8>]1501if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {1502FrameReg = RegInfo->getFrameRegister(MF);1503return FPOffset;1504}1505} else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {1506// Otherwise, use SP or FP, whichever is closer to the stack slot.1507FrameReg = RegInfo->getFrameRegister(MF);1508return FPOffset;1509}1510}1511// Use the base pointer if we have one.1512// FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?1513// That can happen if we forced a base pointer for a large call frame.1514if (RegInfo->hasBasePointer(MF)) {1515FrameReg = RegInfo->getBaseRegister();1516Offset -= SPAdj;1517}1518return Offset;1519}15201521void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,1522MachineBasicBlock::iterator MI,1523ArrayRef<CalleeSavedInfo> CSI,1524unsigned StmOpc, unsigned StrOpc,1525bool NoGap, bool (*Func)(unsigned, bool),1526unsigned NumAlignedDPRCS2Regs,1527unsigned MIFlags) const {1528MachineFunction &MF = *MBB.getParent();1529const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();1530const TargetRegisterInfo &TRI = *STI.getRegisterInfo();15311532DebugLoc DL;15331534using RegAndKill = std::pair<unsigned, bool>;15351536SmallVector<RegAndKill, 4> Regs;1537unsigned i = CSI.size();1538while (i != 0) {1539unsigned LastReg = 0;1540for (; i != 0; --i) {1541Register Reg = CSI[i-1].getReg();1542if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;15431544// D-registers in the aligned area DPRCS2 are NOT spilled here.1545if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)1546continue;15471548const MachineRegisterInfo &MRI = MF.getRegInfo();1549bool isLiveIn = MRI.isLiveIn(Reg);1550if (!isLiveIn && !MRI.isReserved(Reg))1551MBB.addLiveIn(Reg);1552// If NoGap is true, push consecutive registers and then leave the rest1553// for other instructions. e.g.1554// vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}1555if (NoGap && LastReg && LastReg != Reg-1)1556break;1557LastReg = Reg;1558// Do not set a kill flag on values that are also marked as live-in. This1559// happens with the @llvm-returnaddress intrinsic and with arguments1560// passed in callee saved registers.1561// Omitting the kill flags is conservatively correct even if the live-in1562// is not used after all.1563Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));1564}15651566if (Regs.empty())1567continue;15681569llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {1570return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);1571});15721573if (Regs.size() > 1 || StrOpc== 0) {1574MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)1575.addReg(ARM::SP)1576.setMIFlags(MIFlags)1577.add(predOps(ARMCC::AL));1578for (unsigned i = 0, e = Regs.size(); i < e; ++i)1579MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));1580} else if (Regs.size() == 1) {1581BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)1582.addReg(Regs[0].first, getKillRegState(Regs[0].second))1583.addReg(ARM::SP)1584.setMIFlags(MIFlags)1585.addImm(-4)1586.add(predOps(ARMCC::AL));1587}1588Regs.clear();15891590// Put any subsequent vpush instructions before this one: they will refer to1591// higher register numbers so need to be pushed first in order to preserve1592// monotonicity.1593if (MI != MBB.begin())1594--MI;1595}1596}15971598void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,1599MachineBasicBlock::iterator MI,1600MutableArrayRef<CalleeSavedInfo> CSI,1601unsigned LdmOpc, unsigned LdrOpc,1602bool isVarArg, bool NoGap,1603bool (*Func)(unsigned, bool),1604unsigned NumAlignedDPRCS2Regs) const {1605MachineFunction &MF = *MBB.getParent();1606const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();1607const TargetRegisterInfo &TRI = *STI.getRegisterInfo();1608ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();1609bool hasPAC = AFI->shouldSignReturnAddress();1610DebugLoc DL;1611bool isTailCall = false;1612bool isInterrupt = false;1613bool isTrap = false;1614bool isCmseEntry = false;1615if (MBB.end() != MI) {1616DL = MI->getDebugLoc();1617unsigned RetOpcode = MI->getOpcode();1618isTailCall =1619(RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri ||1620RetOpcode == ARM::TCRETURNrinotr12);1621isInterrupt =1622RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;1623isTrap =1624RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||1625RetOpcode == ARM::tTRAP;1626isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);1627}16281629SmallVector<unsigned, 4> Regs;1630unsigned i = CSI.size();1631while (i != 0) {1632unsigned LastReg = 0;1633bool DeleteRet = false;1634for (; i != 0; --i) {1635CalleeSavedInfo &Info = CSI[i-1];1636Register Reg = Info.getReg();1637if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;16381639// The aligned reloads from area DPRCS2 are not inserted here.1640if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)1641continue;1642if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&1643!isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&1644STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&1645!STI.splitFramePointerPush(MF)) {1646Reg = ARM::PC;1647// Fold the return instruction into the LDM.1648DeleteRet = true;1649LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;1650}16511652// If NoGap is true, pop consecutive registers and then leave the rest1653// for other instructions. e.g.1654// vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}1655if (NoGap && LastReg && LastReg != Reg-1)1656break;16571658LastReg = Reg;1659Regs.push_back(Reg);1660}16611662if (Regs.empty())1663continue;16641665llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {1666return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);1667});16681669if (Regs.size() > 1 || LdrOpc == 0) {1670MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)1671.addReg(ARM::SP)1672.add(predOps(ARMCC::AL))1673.setMIFlags(MachineInstr::FrameDestroy);1674for (unsigned Reg : Regs)1675MIB.addReg(Reg, getDefRegState(true));1676if (DeleteRet) {1677if (MI != MBB.end()) {1678MIB.copyImplicitOps(*MI);1679MI->eraseFromParent();1680}1681}1682MI = MIB;1683} else if (Regs.size() == 1) {1684// If we adjusted the reg to PC from LR above, switch it back here. We1685// only do that for LDM.1686if (Regs[0] == ARM::PC)1687Regs[0] = ARM::LR;1688MachineInstrBuilder MIB =1689BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])1690.addReg(ARM::SP, RegState::Define)1691.addReg(ARM::SP)1692.setMIFlags(MachineInstr::FrameDestroy);1693// ARM mode needs an extra reg0 here due to addrmode2. Will go away once1694// that refactoring is complete (eventually).1695if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {1696MIB.addReg(0);1697MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift));1698} else1699MIB.addImm(4);1700MIB.add(predOps(ARMCC::AL));1701}1702Regs.clear();17031704// Put any subsequent vpop instructions after this one: they will refer to1705// higher register numbers so need to be popped afterwards.1706if (MI != MBB.end())1707++MI;1708}1709}17101711/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers1712/// starting from d8. Also insert stack realignment code and leave the stack1713/// pointer pointing to the d8 spill slot.1714static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB,1715MachineBasicBlock::iterator MI,1716unsigned NumAlignedDPRCS2Regs,1717ArrayRef<CalleeSavedInfo> CSI,1718const TargetRegisterInfo *TRI) {1719MachineFunction &MF = *MBB.getParent();1720ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();1721DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();1722const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();1723MachineFrameInfo &MFI = MF.getFrameInfo();17241725// Mark the D-register spill slots as properly aligned. Since MFI computes1726// stack slot layout backwards, this can actually mean that the d-reg stack1727// slot offsets can be wrong. The offset for d8 will always be correct.1728for (const CalleeSavedInfo &I : CSI) {1729unsigned DNum = I.getReg() - ARM::D8;1730if (DNum > NumAlignedDPRCS2Regs - 1)1731continue;1732int FI = I.getFrameIdx();1733// The even-numbered registers will be 16-byte aligned, the odd-numbered1734// registers will be 8-byte aligned.1735MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));17361737// The stack slot for D8 needs to be maximally aligned because this is1738// actually the point where we align the stack pointer. MachineFrameInfo1739// computes all offsets relative to the incoming stack pointer which is a1740// bit weird when realigning the stack. Any extra padding for this1741// over-alignment is not realized because the code inserted below adjusts1742// the stack pointer by numregs * 8 before aligning the stack pointer.1743if (DNum == 0)1744MFI.setObjectAlignment(FI, MFI.getMaxAlign());1745}17461747// Move the stack pointer to the d8 spill slot, and align it at the same1748// time. Leave the stack slot address in the scratch register r4.1749//1750// sub r4, sp, #numregs * 81751// bic r4, r4, #align - 11752// mov sp, r41753//1754bool isThumb = AFI->isThumbFunction();1755assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");1756AFI->setShouldRestoreSPFromFP(true);17571758// sub r4, sp, #numregs * 81759// The immediate is <= 64, so it doesn't need any special encoding.1760unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;1761BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)1762.addReg(ARM::SP)1763.addImm(8 * NumAlignedDPRCS2Regs)1764.add(predOps(ARMCC::AL))1765.add(condCodeOp());17661767Align MaxAlign = MF.getFrameInfo().getMaxAlign();1768// We must set parameter MustBeSingleInstruction to true, since1769// skipAlignedDPRCS2Spills expects exactly 3 instructions to perform1770// stack alignment. Luckily, this can always be done since all ARM1771// architecture versions that support Neon also support the BFC1772// instruction.1773emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);17741775// mov sp, r41776// The stack pointer must be adjusted before spilling anything, otherwise1777// the stack slots could be clobbered by an interrupt handler.1778// Leave r4 live, it is used below.1779Opc = isThumb ? ARM::tMOVr : ARM::MOVr;1780MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)1781.addReg(ARM::R4)1782.add(predOps(ARMCC::AL));1783if (!isThumb)1784MIB.add(condCodeOp());17851786// Now spill NumAlignedDPRCS2Regs registers starting from d8.1787// r4 holds the stack slot address.1788unsigned NextReg = ARM::D8;17891790// 16-byte aligned vst1.64 with 4 d-regs and address writeback.1791// The writeback is only needed when emitting two vst1.64 instructions.1792if (NumAlignedDPRCS2Regs >= 6) {1793unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,1794&ARM::QQPRRegClass);1795MBB.addLiveIn(SupReg);1796BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)1797.addReg(ARM::R4, RegState::Kill)1798.addImm(16)1799.addReg(NextReg)1800.addReg(SupReg, RegState::ImplicitKill)1801.add(predOps(ARMCC::AL));1802NextReg += 4;1803NumAlignedDPRCS2Regs -= 4;1804}18051806// We won't modify r4 beyond this point. It currently points to the next1807// register to be spilled.1808unsigned R4BaseReg = NextReg;18091810// 16-byte aligned vst1.64 with 4 d-regs, no writeback.1811if (NumAlignedDPRCS2Regs >= 4) {1812unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,1813&ARM::QQPRRegClass);1814MBB.addLiveIn(SupReg);1815BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))1816.addReg(ARM::R4)1817.addImm(16)1818.addReg(NextReg)1819.addReg(SupReg, RegState::ImplicitKill)1820.add(predOps(ARMCC::AL));1821NextReg += 4;1822NumAlignedDPRCS2Regs -= 4;1823}18241825// 16-byte aligned vst1.64 with 2 d-regs.1826if (NumAlignedDPRCS2Regs >= 2) {1827unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,1828&ARM::QPRRegClass);1829MBB.addLiveIn(SupReg);1830BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))1831.addReg(ARM::R4)1832.addImm(16)1833.addReg(SupReg)1834.add(predOps(ARMCC::AL));1835NextReg += 2;1836NumAlignedDPRCS2Regs -= 2;1837}18381839// Finally, use a vanilla vstr.64 for the odd last register.1840if (NumAlignedDPRCS2Regs) {1841MBB.addLiveIn(NextReg);1842// vstr.64 uses addrmode5 which has an offset scale of 4.1843BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))1844.addReg(NextReg)1845.addReg(ARM::R4)1846.addImm((NextReg - R4BaseReg) * 2)1847.add(predOps(ARMCC::AL));1848}18491850// The last spill instruction inserted should kill the scratch register r4.1851std::prev(MI)->addRegisterKilled(ARM::R4, TRI);1852}18531854/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an1855/// iterator to the following instruction.1856static MachineBasicBlock::iterator1857skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI,1858unsigned NumAlignedDPRCS2Regs) {1859// sub r4, sp, #numregs * 81860// bic r4, r4, #align - 11861// mov sp, r41862++MI; ++MI; ++MI;1863assert(MI->mayStore() && "Expecting spill instruction");18641865// These switches all fall through.1866switch(NumAlignedDPRCS2Regs) {1867case 7:1868++MI;1869assert(MI->mayStore() && "Expecting spill instruction");1870[[fallthrough]];1871default:1872++MI;1873assert(MI->mayStore() && "Expecting spill instruction");1874[[fallthrough]];1875case 1:1876case 2:1877case 4:1878assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");1879++MI;1880}1881return MI;1882}18831884/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers1885/// starting from d8. These instructions are assumed to execute while the1886/// stack is still aligned, unlike the code inserted by emitPopInst.1887static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB,1888MachineBasicBlock::iterator MI,1889unsigned NumAlignedDPRCS2Regs,1890ArrayRef<CalleeSavedInfo> CSI,1891const TargetRegisterInfo *TRI) {1892MachineFunction &MF = *MBB.getParent();1893ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();1894DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();1895const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();18961897// Find the frame index assigned to d8.1898int D8SpillFI = 0;1899for (const CalleeSavedInfo &I : CSI)1900if (I.getReg() == ARM::D8) {1901D8SpillFI = I.getFrameIdx();1902break;1903}19041905// Materialize the address of the d8 spill slot into the scratch register r4.1906// This can be fairly complicated if the stack frame is large, so just use1907// the normal frame index elimination mechanism to do it. This code runs as1908// the initial part of the epilog where the stack and base pointers haven't1909// been changed yet.1910bool isThumb = AFI->isThumbFunction();1911assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");19121913unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;1914BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)1915.addFrameIndex(D8SpillFI)1916.addImm(0)1917.add(predOps(ARMCC::AL))1918.add(condCodeOp());19191920// Now restore NumAlignedDPRCS2Regs registers starting from d8.1921unsigned NextReg = ARM::D8;19221923// 16-byte aligned vld1.64 with 4 d-regs and writeback.1924if (NumAlignedDPRCS2Regs >= 6) {1925unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,1926&ARM::QQPRRegClass);1927BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)1928.addReg(ARM::R4, RegState::Define)1929.addReg(ARM::R4, RegState::Kill)1930.addImm(16)1931.addReg(SupReg, RegState::ImplicitDefine)1932.add(predOps(ARMCC::AL));1933NextReg += 4;1934NumAlignedDPRCS2Regs -= 4;1935}19361937// We won't modify r4 beyond this point. It currently points to the next1938// register to be spilled.1939unsigned R4BaseReg = NextReg;19401941// 16-byte aligned vld1.64 with 4 d-regs, no writeback.1942if (NumAlignedDPRCS2Regs >= 4) {1943unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,1944&ARM::QQPRRegClass);1945BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)1946.addReg(ARM::R4)1947.addImm(16)1948.addReg(SupReg, RegState::ImplicitDefine)1949.add(predOps(ARMCC::AL));1950NextReg += 4;1951NumAlignedDPRCS2Regs -= 4;1952}19531954// 16-byte aligned vld1.64 with 2 d-regs.1955if (NumAlignedDPRCS2Regs >= 2) {1956unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,1957&ARM::QPRRegClass);1958BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)1959.addReg(ARM::R4)1960.addImm(16)1961.add(predOps(ARMCC::AL));1962NextReg += 2;1963NumAlignedDPRCS2Regs -= 2;1964}19651966// Finally, use a vanilla vldr.64 for the remaining odd register.1967if (NumAlignedDPRCS2Regs)1968BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)1969.addReg(ARM::R4)1970.addImm(2 * (NextReg - R4BaseReg))1971.add(predOps(ARMCC::AL));19721973// Last store kills r4.1974std::prev(MI)->addRegisterKilled(ARM::R4, TRI);1975}19761977bool ARMFrameLowering::spillCalleeSavedRegisters(1978MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,1979ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {1980if (CSI.empty())1981return false;19821983MachineFunction &MF = *MBB.getParent();1984ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();19851986unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;1987unsigned PushOneOpc = AFI->isThumbFunction() ?1988ARM::t2STR_PRE : ARM::STR_PRE_IMM;1989unsigned FltOpc = ARM::VSTMDDB_UPD;1990unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();1991// Compute PAC in R12.1992if (AFI->shouldSignReturnAddress()) {1993BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))1994.setMIFlags(MachineInstr::FrameSetup);1995}1996// Save the non-secure floating point context.1997if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {1998return C.getReg() == ARM::FPCXTNS;1999})) {2000BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),2001ARM::SP)2002.addReg(ARM::SP)2003.addImm(-4)2004.add(predOps(ARMCC::AL));2005}2006if (STI.splitFramePointerPush(MF)) {2007emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,2008&isSplitFPArea1Register, 0, MachineInstr::FrameSetup);2009emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,2010NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);2011emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,2012&isSplitFPArea2Register, 0, MachineInstr::FrameSetup);2013} else {2014emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,20150, MachineInstr::FrameSetup);2016emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,20170, MachineInstr::FrameSetup);2018emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,2019NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);2020}20212022// The code above does not insert spill code for the aligned DPRCS2 registers.2023// The stack realignment code will be inserted between the push instructions2024// and these spills.2025if (NumAlignedDPRCS2Regs)2026emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);20272028return true;2029}20302031bool ARMFrameLowering::restoreCalleeSavedRegisters(2032MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,2033MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {2034if (CSI.empty())2035return false;20362037MachineFunction &MF = *MBB.getParent();2038ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();2039bool isVarArg = AFI->getArgRegsSaveSize() > 0;2040unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();20412042// The emitPopInst calls below do not insert reloads for the aligned DPRCS22043// registers. Do that here instead.2044if (NumAlignedDPRCS2Regs)2045emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);20462047unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;2048unsigned LdrOpc =2049AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;2050unsigned FltOpc = ARM::VLDMDIA_UPD;2051if (STI.splitFramePointerPush(MF)) {2052emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,2053&isSplitFPArea2Register, 0);2054emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,2055NumAlignedDPRCS2Regs);2056emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,2057&isSplitFPArea1Register, 0);2058} else {2059emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,2060NumAlignedDPRCS2Regs);2061emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,2062&isARMArea2Register, 0);2063emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,2064&isARMArea1Register, 0);2065}20662067return true;2068}20692070// FIXME: Make generic?2071static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF,2072const ARMBaseInstrInfo &TII) {2073unsigned FnSize = 0;2074for (auto &MBB : MF) {2075for (auto &MI : MBB)2076FnSize += TII.getInstSizeInBytes(MI);2077}2078if (MF.getJumpTableInfo())2079for (auto &Table: MF.getJumpTableInfo()->getJumpTables())2080FnSize += Table.MBBs.size() * 4;2081FnSize += MF.getConstantPool()->getConstants().size() * 4;2082return FnSize;2083}20842085/// estimateRSStackSizeLimit - Look at each instruction that references stack2086/// frames and return the stack size limit beyond which some of these2087/// instructions will require a scratch register during their expansion later.2088// FIXME: Move to TII?2089static unsigned estimateRSStackSizeLimit(MachineFunction &MF,2090const TargetFrameLowering *TFI,2091bool &HasNonSPFrameIndex) {2092const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();2093const ARMBaseInstrInfo &TII =2094*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());2095const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();2096unsigned Limit = (1 << 12) - 1;2097for (auto &MBB : MF) {2098for (auto &MI : MBB) {2099if (MI.isDebugInstr())2100continue;2101for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {2102if (!MI.getOperand(i).isFI())2103continue;21042105// When using ADDri to get the address of a stack object, 255 is the2106// largest offset guaranteed to fit in the immediate offset.2107if (MI.getOpcode() == ARM::ADDri) {2108Limit = std::min(Limit, (1U << 8) - 1);2109break;2110}2111// t2ADDri will not require an extra register, it can reuse the2112// destination.2113if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)2114break;21152116const MCInstrDesc &MCID = MI.getDesc();2117const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);2118if (RegClass && !RegClass->contains(ARM::SP))2119HasNonSPFrameIndex = true;21202121// Otherwise check the addressing mode.2122switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {2123case ARMII::AddrMode_i12:2124case ARMII::AddrMode2:2125// Default 12 bit limit.2126break;2127case ARMII::AddrMode3:2128case ARMII::AddrModeT2_i8neg:2129Limit = std::min(Limit, (1U << 8) - 1);2130break;2131case ARMII::AddrMode5FP16:2132Limit = std::min(Limit, ((1U << 8) - 1) * 2);2133break;2134case ARMII::AddrMode5:2135case ARMII::AddrModeT2_i8s4:2136case ARMII::AddrModeT2_ldrex:2137Limit = std::min(Limit, ((1U << 8) - 1) * 4);2138break;2139case ARMII::AddrModeT2_i12:2140// i12 supports only positive offset so these will be converted to2141// i8 opcodes. See llvm::rewriteT2FrameIndex.2142if (TFI->hasFP(MF) && AFI->hasStackFrame())2143Limit = std::min(Limit, (1U << 8) - 1);2144break;2145case ARMII::AddrMode4:2146case ARMII::AddrMode6:2147// Addressing modes 4 & 6 (load/store) instructions can't encode an2148// immediate offset for stack references.2149return 0;2150case ARMII::AddrModeT2_i7:2151Limit = std::min(Limit, ((1U << 7) - 1) * 1);2152break;2153case ARMII::AddrModeT2_i7s2:2154Limit = std::min(Limit, ((1U << 7) - 1) * 2);2155break;2156case ARMII::AddrModeT2_i7s4:2157Limit = std::min(Limit, ((1U << 7) - 1) * 4);2158break;2159default:2160llvm_unreachable("Unhandled addressing mode in stack size limit calculation");2161}2162break; // At most one FI per instruction2163}2164}2165}21662167return Limit;2168}21692170// In functions that realign the stack, it can be an advantage to spill the2171// callee-saved vector registers after realigning the stack. The vst1 and vld12172// instructions take alignment hints that can improve performance.2173static void2174checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs) {2175MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);2176if (!SpillAlignedNEONRegs)2177return;21782179// Naked functions don't spill callee-saved registers.2180if (MF.getFunction().hasFnAttribute(Attribute::Naked))2181return;21822183// We are planning to use NEON instructions vst1 / vld1.2184if (!MF.getSubtarget<ARMSubtarget>().hasNEON())2185return;21862187// Don't bother if the default stack alignment is sufficiently high.2188if (MF.getSubtarget().getFrameLowering()->getStackAlign() >= Align(8))2189return;21902191// Aligned spills require stack realignment.2192if (!static_cast<const ARMBaseRegisterInfo *>(2193MF.getSubtarget().getRegisterInfo())->canRealignStack(MF))2194return;21952196// We always spill contiguous d-registers starting from d8. Count how many2197// needs spilling. The register allocator will almost always use the2198// callee-saved registers in order, but it can happen that there are holes in2199// the range. Registers above the hole will be spilled to the standard DPRCS2200// area.2201unsigned NumSpills = 0;2202for (; NumSpills < 8; ++NumSpills)2203if (!SavedRegs.test(ARM::D8 + NumSpills))2204break;22052206// Don't do this for just one d-register. It's not worth it.2207if (NumSpills < 2)2208return;22092210// Spill the first NumSpills D-registers after realigning the stack.2211MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);22122213// A scratch register is required for the vst1 / vld1 instructions.2214SavedRegs.set(ARM::R4);2215}22162217bool ARMFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {2218// For CMSE entry functions, we want to save the FPCXT_NS immediately2219// upon function entry (resp. restore it immmediately before return)2220if (STI.hasV8_1MMainlineOps() &&2221MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction())2222return false;22232224// We are disabling shrinkwrapping for now when PAC is enabled, as2225// shrinkwrapping can cause clobbering of r12 when the PAC code is2226// generated. A follow-up patch will fix this in a more performant manner.2227if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress(2228true /* SpillsLR */))2229return false;22302231return true;2232}22332234bool ARMFrameLowering::requiresAAPCSFrameRecord(2235const MachineFunction &MF) const {2236const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();2237return Subtarget.createAAPCSFrameChain() && hasFP(MF);2238}22392240// Thumb1 may require a spill when storing to a frame index through FP (or any2241// access with execute-only), for cases where FP is a high register (R11). This2242// scans the function for cases where this may happen.2243static bool canSpillOnFrameIndexAccess(const MachineFunction &MF,2244const TargetFrameLowering &TFI) {2245const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();2246if (!AFI->isThumb1OnlyFunction())2247return false;22482249const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();2250for (const auto &MBB : MF)2251for (const auto &MI : MBB)2252if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||2253STI.genExecuteOnly())2254for (const auto &Op : MI.operands())2255if (Op.isFI()) {2256Register Reg;2257TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);2258if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)2259return true;2260}2261return false;2262}22632264void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF,2265BitVector &SavedRegs,2266RegScavenger *RS) const {2267TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);2268// This tells PEI to spill the FP as if it is any other callee-save register2269// to take advantage the eliminateFrameIndex machinery. This also ensures it2270// is spilled in the order specified by getCalleeSavedRegs() to make it easier2271// to combine multiple loads / stores.2272bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));2273bool CS1Spilled = false;2274bool LRSpilled = false;2275unsigned NumGPRSpills = 0;2276unsigned NumFPRSpills = 0;2277SmallVector<unsigned, 4> UnspilledCS1GPRs;2278SmallVector<unsigned, 4> UnspilledCS2GPRs;2279const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(2280MF.getSubtarget().getRegisterInfo());2281const ARMBaseInstrInfo &TII =2282*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());2283ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();2284MachineFrameInfo &MFI = MF.getFrameInfo();2285MachineRegisterInfo &MRI = MF.getRegInfo();2286const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();2287(void)TRI; // Silence unused warning in non-assert builds.2288Register FramePtr = RegInfo->getFrameRegister(MF);22892290// Spill R4 if Thumb2 function requires stack realignment - it will be used as2291// scratch register. Also spill R4 if Thumb2 function has varsized objects,2292// since it's not always possible to restore sp from fp in a single2293// instruction.2294// FIXME: It will be better just to find spare register here.2295if (AFI->isThumb2Function() &&2296(MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))2297SavedRegs.set(ARM::R4);22982299// If a stack probe will be emitted, spill R4 and LR, since they are2300// clobbered by the stack probe call.2301// This estimate should be a safe, conservative estimate. The actual2302// stack probe is enabled based on the size of the local objects;2303// this estimate also includes the varargs store size.2304if (STI.isTargetWindows() &&2305WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {2306SavedRegs.set(ARM::R4);2307SavedRegs.set(ARM::LR);2308}23092310if (AFI->isThumb1OnlyFunction()) {2311// Spill LR if Thumb1 function uses variable length argument lists.2312if (AFI->getArgRegsSaveSize() > 0)2313SavedRegs.set(ARM::LR);23142315// Spill R4 if Thumb1 epilogue has to restore SP from FP or the function2316// requires stack alignment. We don't know for sure what the stack size2317// will be, but for this, an estimate is good enough. If there anything2318// changes it, it'll be a spill, which implies we've used all the registers2319// and so R4 is already used, so not marking it here will be OK.2320// FIXME: It will be better just to find spare register here.2321if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||2322MFI.estimateStackSize(MF) > 508)2323SavedRegs.set(ARM::R4);2324}23252326// See if we can spill vector registers to aligned stack.2327checkNumAlignedDPRCS2Regs(MF, SavedRegs);23282329// Spill the BasePtr if it's used.2330if (RegInfo->hasBasePointer(MF))2331SavedRegs.set(RegInfo->getBaseRegister());23322333// On v8.1-M.Main CMSE entry functions save/restore FPCXT.2334if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())2335CanEliminateFrame = false;23362337// When return address signing is enabled R12 is treated as callee-saved.2338if (AFI->shouldSignReturnAddress())2339CanEliminateFrame = false;23402341// Don't spill FP if the frame can be eliminated. This is determined2342// by scanning the callee-save registers to see if any is modified.2343const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);2344for (unsigned i = 0; CSRegs[i]; ++i) {2345unsigned Reg = CSRegs[i];2346bool Spilled = false;2347if (SavedRegs.test(Reg)) {2348Spilled = true;2349CanEliminateFrame = false;2350}23512352if (!ARM::GPRRegClass.contains(Reg)) {2353if (Spilled) {2354if (ARM::SPRRegClass.contains(Reg))2355NumFPRSpills++;2356else if (ARM::DPRRegClass.contains(Reg))2357NumFPRSpills += 2;2358else if (ARM::QPRRegClass.contains(Reg))2359NumFPRSpills += 4;2360}2361continue;2362}23632364if (Spilled) {2365NumGPRSpills++;23662367if (!STI.splitFramePushPop(MF)) {2368if (Reg == ARM::LR)2369LRSpilled = true;2370CS1Spilled = true;2371continue;2372}23732374// Keep track if LR and any of R4, R5, R6, and R7 is spilled.2375switch (Reg) {2376case ARM::LR:2377LRSpilled = true;2378[[fallthrough]];2379case ARM::R0: case ARM::R1:2380case ARM::R2: case ARM::R3:2381case ARM::R4: case ARM::R5:2382case ARM::R6: case ARM::R7:2383CS1Spilled = true;2384break;2385default:2386break;2387}2388} else {2389if (!STI.splitFramePushPop(MF)) {2390UnspilledCS1GPRs.push_back(Reg);2391continue;2392}23932394switch (Reg) {2395case ARM::R0: case ARM::R1:2396case ARM::R2: case ARM::R3:2397case ARM::R4: case ARM::R5:2398case ARM::R6: case ARM::R7:2399case ARM::LR:2400UnspilledCS1GPRs.push_back(Reg);2401break;2402default:2403UnspilledCS2GPRs.push_back(Reg);2404break;2405}2406}2407}24082409bool ForceLRSpill = false;2410if (!LRSpilled && AFI->isThumb1OnlyFunction()) {2411unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);2412// Force LR to be spilled if the Thumb function size is > 2048. This enables2413// use of BL to implement far jump.2414if (FnSize >= (1 << 11)) {2415CanEliminateFrame = false;2416ForceLRSpill = true;2417}2418}24192420// If any of the stack slot references may be out of range of an immediate2421// offset, make sure a register (or a spill slot) is available for the2422// register scavenger. Note that if we're indexing off the frame pointer, the2423// effective stack size is 4 bytes larger since the FP points to the stack2424// slot of the previous FP. Also, if we have variable sized objects in the2425// function, stack slot references will often be negative, and some of2426// our instructions are positive-offset only, so conservatively consider2427// that case to want a spill slot (or register) as well. Similarly, if2428// the function adjusts the stack pointer during execution and the2429// adjustments aren't already part of our stack size estimate, our offset2430// calculations may be off, so be conservative.2431// FIXME: We could add logic to be more precise about negative offsets2432// and which instructions will need a scratch register for them. Is it2433// worth the effort and added fragility?2434unsigned EstimatedStackSize =2435MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);24362437// Determine biggest (positive) SP offset in MachineFrameInfo.2438int MaxFixedOffset = 0;2439for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {2440int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);2441MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);2442}24432444bool HasFP = hasFP(MF);2445if (HasFP) {2446if (AFI->hasStackFrame())2447EstimatedStackSize += 4;2448} else {2449// If FP is not used, SP will be used to access arguments, so count the2450// size of arguments into the estimation.2451EstimatedStackSize += MaxFixedOffset;2452}2453EstimatedStackSize += 16; // For possible paddings.24542455unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;2456bool HasNonSPFrameIndex = false;2457if (AFI->isThumb1OnlyFunction()) {2458// For Thumb1, don't bother to iterate over the function. The only2459// instruction that requires an emergency spill slot is a store to a2460// frame index.2461//2462// tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned2463// immediate. tSTRi, which is used for bp- and fp-relative accesses, has2464// a 5-bit unsigned immediate.2465//2466// We could try to check if the function actually contains a tSTRspi2467// that might need the spill slot, but it's not really important.2468// Functions with VLAs or extremely large call frames are rare, and2469// if a function is allocating more than 1KB of stack, an extra 4-byte2470// slot probably isn't relevant.2471//2472// A special case is the scenario where r11 is used as FP, where accesses2473// to a frame index will require its value to be moved into a low reg.2474// This is handled later on, once we are able to determine if we have any2475// fp-relative accesses.2476if (RegInfo->hasBasePointer(MF))2477EstimatedRSStackSizeLimit = (1U << 5) * 4;2478else2479EstimatedRSStackSizeLimit = (1U << 8) * 4;2480EstimatedRSFixedSizeLimit = (1U << 5) * 4;2481} else {2482EstimatedRSStackSizeLimit =2483estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);2484EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;2485}2486// Final estimate of whether sp or bp-relative accesses might require2487// scavenging.2488bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;24892490// If the stack pointer moves and we don't have a base pointer, the2491// estimate logic doesn't work. The actual offsets might be larger when2492// we're constructing a call frame, or we might need to use negative2493// offsets from fp.2494bool HasMovingSP = MFI.hasVarSizedObjects() ||2495(MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));2496bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;24972498// If we have a frame pointer, we assume arguments will be accessed2499// relative to the frame pointer. Check whether fp-relative accesses to2500// arguments require scavenging.2501//2502// We could do slightly better on Thumb1; in some cases, an sp-relative2503// offset would be legal even though an fp-relative offset is not.2504int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);2505bool HasLargeArgumentList =2506HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;25072508bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||2509HasLargeArgumentList || HasNonSPFrameIndex;2510LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit2511<< "; EstimatedStack: " << EstimatedStackSize2512<< "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset2513<< "; BigFrameOffsets: " << BigFrameOffsets << "\n");2514if (BigFrameOffsets ||2515!CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {2516AFI->setHasStackFrame(true);25172518if (HasFP) {2519SavedRegs.set(FramePtr);2520// If the frame pointer is required by the ABI, also spill LR so that we2521// emit a complete frame record.2522if ((requiresAAPCSFrameRecord(MF) ||2523MF.getTarget().Options.DisableFramePointerElim(MF)) &&2524!LRSpilled) {2525SavedRegs.set(ARM::LR);2526LRSpilled = true;2527NumGPRSpills++;2528auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);2529if (LRPos != UnspilledCS1GPRs.end())2530UnspilledCS1GPRs.erase(LRPos);2531}2532auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);2533if (FPPos != UnspilledCS1GPRs.end())2534UnspilledCS1GPRs.erase(FPPos);2535NumGPRSpills++;2536if (FramePtr == ARM::R7)2537CS1Spilled = true;2538}25392540// This is the number of extra spills inserted for callee-save GPRs which2541// would not otherwise be used by the function. When greater than zero it2542// guaranteees that it is possible to scavenge a register to hold the2543// address of a stack slot. On Thumb1, the register must be a valid operand2544// to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r112545// or lr.2546//2547// If we don't insert a spill, we instead allocate an emergency spill2548// slot, which can be used by scavenging to spill an arbitrary register.2549//2550// We currently don't try to figure out whether any specific instruction2551// requires scavening an additional register.2552unsigned NumExtraCSSpill = 0;25532554if (AFI->isThumb1OnlyFunction()) {2555// For Thumb1-only targets, we need some low registers when we save and2556// restore the high registers (which aren't allocatable, but could be2557// used by inline assembly) because the push/pop instructions can not2558// access high registers. If necessary, we might need to push more low2559// registers to ensure that there is at least one free that can be used2560// for the saving & restoring, and preferably we should ensure that as2561// many as are needed are available so that fewer push/pop instructions2562// are required.25632564// Low registers which are not currently pushed, but could be (r4-r7).2565SmallVector<unsigned, 4> AvailableRegs;25662567// Unused argument registers (r0-r3) can be clobbered in the prologue for2568// free.2569int EntryRegDeficit = 0;2570for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {2571if (!MF.getRegInfo().isLiveIn(Reg)) {2572--EntryRegDeficit;2573LLVM_DEBUG(dbgs()2574<< printReg(Reg, TRI)2575<< " is unused argument register, EntryRegDeficit = "2576<< EntryRegDeficit << "\n");2577}2578}25792580// Unused return registers can be clobbered in the epilogue for free.2581int ExitRegDeficit = AFI->getReturnRegsCount() - 4;2582LLVM_DEBUG(dbgs() << AFI->getReturnRegsCount()2583<< " return regs used, ExitRegDeficit = "2584<< ExitRegDeficit << "\n");25852586int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);2587LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");25882589// r4-r6 can be used in the prologue if they are pushed by the first push2590// instruction.2591for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {2592if (SavedRegs.test(Reg)) {2593--RegDeficit;2594LLVM_DEBUG(dbgs() << printReg(Reg, TRI)2595<< " is saved low register, RegDeficit = "2596<< RegDeficit << "\n");2597} else {2598AvailableRegs.push_back(Reg);2599LLVM_DEBUG(2600dbgs()2601<< printReg(Reg, TRI)2602<< " is non-saved low register, adding to AvailableRegs\n");2603}2604}26052606// r7 can be used if it is not being used as the frame pointer.2607if (!HasFP || FramePtr != ARM::R7) {2608if (SavedRegs.test(ARM::R7)) {2609--RegDeficit;2610LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "2611<< RegDeficit << "\n");2612} else {2613AvailableRegs.push_back(ARM::R7);2614LLVM_DEBUG(2615dbgs()2616<< "%r7 is non-saved low register, adding to AvailableRegs\n");2617}2618}26192620// Each of r8-r11 needs to be copied to a low register, then pushed.2621for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {2622if (SavedRegs.test(Reg)) {2623++RegDeficit;2624LLVM_DEBUG(dbgs() << printReg(Reg, TRI)2625<< " is saved high register, RegDeficit = "2626<< RegDeficit << "\n");2627}2628}26292630// LR can only be used by PUSH, not POP, and can't be used at all if the2631// llvm.returnaddress intrinsic is used. This is only worth doing if we2632// are more limited at function entry than exit.2633if ((EntryRegDeficit > ExitRegDeficit) &&2634!(MF.getRegInfo().isLiveIn(ARM::LR) &&2635MF.getFrameInfo().isReturnAddressTaken())) {2636if (SavedRegs.test(ARM::LR)) {2637--RegDeficit;2638LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "2639<< RegDeficit << "\n");2640} else {2641AvailableRegs.push_back(ARM::LR);2642LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");2643}2644}26452646// If there are more high registers that need pushing than low registers2647// available, push some more low registers so that we can use fewer push2648// instructions. This might not reduce RegDeficit all the way to zero,2649// because we can only guarantee that r4-r6 are available, but r8-r11 may2650// need saving.2651LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");2652for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {2653unsigned Reg = AvailableRegs.pop_back_val();2654LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)2655<< " to make up reg deficit\n");2656SavedRegs.set(Reg);2657NumGPRSpills++;2658CS1Spilled = true;2659assert(!MRI.isReserved(Reg) && "Should not be reserved");2660if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))2661NumExtraCSSpill++;2662UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));2663if (Reg == ARM::LR)2664LRSpilled = true;2665}2666LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit2667<< "\n");2668}26692670// Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to2671// restore LR in that case.2672bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();26732674// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.2675// Spill LR as well so we can fold BX_RET to the registers restore (LDM).2676if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {2677SavedRegs.set(ARM::LR);2678NumGPRSpills++;2679SmallVectorImpl<unsigned>::iterator LRPos;2680LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);2681if (LRPos != UnspilledCS1GPRs.end())2682UnspilledCS1GPRs.erase(LRPos);26832684ForceLRSpill = false;2685if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&2686!AFI->isThumb1OnlyFunction())2687NumExtraCSSpill++;2688}26892690// If stack and double are 8-byte aligned and we are spilling an odd number2691// of GPRs, spill one extra callee save GPR so we won't have to pad between2692// the integer and double callee save areas.2693LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");2694const Align TargetAlign = getStackAlign();2695if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {2696if (CS1Spilled && !UnspilledCS1GPRs.empty()) {2697for (unsigned Reg : UnspilledCS1GPRs) {2698// Don't spill high register if the function is thumb. In the case of2699// Windows on ARM, accept R11 (frame pointer)2700if (!AFI->isThumbFunction() ||2701(STI.isTargetWindows() && Reg == ARM::R11) ||2702isARMLowRegister(Reg) ||2703(Reg == ARM::LR && !ExpensiveLRRestore)) {2704SavedRegs.set(Reg);2705LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)2706<< " to make up alignment\n");2707if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&2708!(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))2709NumExtraCSSpill++;2710break;2711}2712}2713} else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {2714unsigned Reg = UnspilledCS2GPRs.front();2715SavedRegs.set(Reg);2716LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)2717<< " to make up alignment\n");2718if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))2719NumExtraCSSpill++;2720}2721}27222723// Estimate if we might need to scavenge registers at some point in order2724// to materialize a stack offset. If so, either spill one additional2725// callee-saved register or reserve a special spill slot to facilitate2726// register scavenging. Thumb1 needs a spill slot for stack pointer2727// adjustments and for frame index accesses when FP is high register,2728// even when the frame itself is small.2729unsigned RegsNeeded = 0;2730if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this)) {2731RegsNeeded++;2732// With thumb1 execute-only we may need an additional register for saving2733// and restoring the CPSR.2734if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())2735RegsNeeded++;2736}27372738if (RegsNeeded > NumExtraCSSpill) {2739// If any non-reserved CS register isn't spilled, just spill one or two2740// extra. That should take care of it!2741unsigned NumExtras = TargetAlign.value() / 4;2742SmallVector<unsigned, 2> Extras;2743while (NumExtras && !UnspilledCS1GPRs.empty()) {2744unsigned Reg = UnspilledCS1GPRs.pop_back_val();2745if (!MRI.isReserved(Reg) &&2746(!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {2747Extras.push_back(Reg);2748NumExtras--;2749}2750}2751// For non-Thumb1 functions, also check for hi-reg CS registers2752if (!AFI->isThumb1OnlyFunction()) {2753while (NumExtras && !UnspilledCS2GPRs.empty()) {2754unsigned Reg = UnspilledCS2GPRs.pop_back_val();2755if (!MRI.isReserved(Reg)) {2756Extras.push_back(Reg);2757NumExtras--;2758}2759}2760}2761if (NumExtras == 0) {2762for (unsigned Reg : Extras) {2763SavedRegs.set(Reg);2764if (!MRI.isPhysRegUsed(Reg))2765NumExtraCSSpill++;2766}2767}2768while ((RegsNeeded > NumExtraCSSpill) && RS) {2769// Reserve a slot closest to SP or frame pointer.2770LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");2771const TargetRegisterClass &RC = ARM::GPRRegClass;2772unsigned Size = TRI->getSpillSize(RC);2773Align Alignment = TRI->getSpillAlign(RC);2774RS->addScavengingFrameIndex(2775MFI.CreateStackObject(Size, Alignment, false));2776--RegsNeeded;2777}2778}2779}27802781if (ForceLRSpill)2782SavedRegs.set(ARM::LR);2783AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));2784}27852786void ARMFrameLowering::updateLRRestored(MachineFunction &MF) {2787MachineFrameInfo &MFI = MF.getFrameInfo();2788if (!MFI.isCalleeSavedInfoValid())2789return;27902791// Check if all terminators do not implicitly use LR. Then we can 'restore' LR2792// into PC so it is not live out of the return block: Clear the Restored bit2793// in that case.2794for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {2795if (Info.getReg() != ARM::LR)2796continue;2797if (all_of(MF, [](const MachineBasicBlock &MBB) {2798return all_of(MBB.terminators(), [](const MachineInstr &Term) {2799return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||2800Term.getOpcode() == ARM::t2LDMIA_RET ||2801Term.getOpcode() == ARM::tPOP_RET;2802});2803})) {2804Info.setRestored(false);2805break;2806}2807}2808}28092810void ARMFrameLowering::processFunctionBeforeFrameFinalized(2811MachineFunction &MF, RegScavenger *RS) const {2812TargetFrameLowering::processFunctionBeforeFrameFinalized(MF, RS);2813updateLRRestored(MF);2814}28152816void ARMFrameLowering::getCalleeSaves(const MachineFunction &MF,2817BitVector &SavedRegs) const {2818TargetFrameLowering::getCalleeSaves(MF, SavedRegs);28192820// If we have the "returned" parameter attribute which guarantees that we2821// return the value which was passed in r0 unmodified (e.g. C++ 'structors),2822// record that fact for IPRA.2823const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();2824if (AFI->getPreservesR0())2825SavedRegs.set(ARM::R0);2826}28272828bool ARMFrameLowering::assignCalleeSavedSpillSlots(2829MachineFunction &MF, const TargetRegisterInfo *TRI,2830std::vector<CalleeSavedInfo> &CSI) const {2831// For CMSE entry functions, handle floating-point context as if it was a2832// callee-saved register.2833if (STI.hasV8_1MMainlineOps() &&2834MF.getInfo<ARMFunctionInfo>()->isCmseNSEntryFunction()) {2835CSI.emplace_back(ARM::FPCXTNS);2836CSI.back().setRestored(false);2837}28382839// For functions, which sign their return address, upon function entry, the2840// return address PAC is computed in R12. Treat R12 as a callee-saved register2841// in this case.2842const auto &AFI = *MF.getInfo<ARMFunctionInfo>();2843if (AFI.shouldSignReturnAddress()) {2844// The order of register must match the order we push them, because the2845// PEI assigns frame indices in that order. When compiling for return2846// address sign and authenication, we use split push, therefore the orders2847// we want are:2848// LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D82849CSI.insert(find_if(CSI,2850[=](const auto &CS) {2851Register Reg = CS.getReg();2852return Reg == ARM::R10 || Reg == ARM::R11 ||2853Reg == ARM::R8 || Reg == ARM::R9 ||2854ARM::DPRRegClass.contains(Reg);2855}),2856CalleeSavedInfo(ARM::R12));2857}28582859return false;2860}28612862const TargetFrameLowering::SpillSlot *2863ARMFrameLowering::getCalleeSavedSpillSlots(unsigned &NumEntries) const {2864static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};2865NumEntries = std::size(FixedSpillOffsets);2866return FixedSpillOffsets;2867}28682869MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(2870MachineFunction &MF, MachineBasicBlock &MBB,2871MachineBasicBlock::iterator I) const {2872const ARMBaseInstrInfo &TII =2873*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());2874ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();2875bool isARM = !AFI->isThumbFunction();2876DebugLoc dl = I->getDebugLoc();2877unsigned Opc = I->getOpcode();2878bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();2879unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;28802881assert(!AFI->isThumb1OnlyFunction() &&2882"This eliminateCallFramePseudoInstr does not support Thumb1!");28832884int PIdx = I->findFirstPredOperandIdx();2885ARMCC::CondCodes Pred = (PIdx == -1)2886? ARMCC::AL2887: (ARMCC::CondCodes)I->getOperand(PIdx).getImm();2888unsigned PredReg = TII.getFramePred(*I);28892890if (!hasReservedCallFrame(MF)) {2891// Bail early if the callee is expected to do the adjustment.2892if (IsDestroy && CalleePopAmount != -1U)2893return MBB.erase(I);28942895// If we have alloca, convert as follows:2896// ADJCALLSTACKDOWN -> sub, sp, sp, amount2897// ADJCALLSTACKUP -> add, sp, sp, amount2898unsigned Amount = TII.getFrameSize(*I);2899if (Amount != 0) {2900// We need to keep the stack aligned properly. To do this, we round the2901// amount of space needed for the outgoing arguments up to the next2902// alignment boundary.2903Amount = alignSPAdjust(Amount);29042905if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {2906emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,2907Pred, PredReg);2908} else {2909assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);2910emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,2911Pred, PredReg);2912}2913}2914} else if (CalleePopAmount != -1U) {2915// If the calling convention demands that the callee pops arguments from the2916// stack, we want to add it back if we have a reserved call frame.2917emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,2918MachineInstr::NoFlags, Pred, PredReg);2919}2920return MBB.erase(I);2921}29222923/// Get the minimum constant for ARM that is greater than or equal to the2924/// argument. In ARM, constants can have any value that can be produced by2925/// rotating an 8-bit value to the right by an even number of bits within a2926/// 32-bit word.2927static uint32_t alignToARMConstant(uint32_t Value) {2928unsigned Shifted = 0;29292930if (Value == 0)2931return 0;29322933while (!(Value & 0xC0000000)) {2934Value = Value << 2;2935Shifted += 2;2936}29372938bool Carry = (Value & 0x00FFFFFF);2939Value = ((Value & 0xFF000000) >> 24) + Carry;29402941if (Value & 0x0000100)2942Value = Value & 0x000001FC;29432944if (Shifted > 24)2945Value = Value >> (Shifted - 24);2946else2947Value = Value << (24 - Shifted);29482949return Value;2950}29512952// The stack limit in the TCB is set to this many bytes above the actual2953// stack limit.2954static const uint64_t kSplitStackAvailable = 256;29552956// Adjust the function prologue to enable split stacks. This currently only2957// supports android and linux.2958//2959// The ABI of the segmented stack prologue is a little arbitrarily chosen, but2960// must be well defined in order to allow for consistent implementations of the2961// __morestack helper function. The ABI is also not a normal ABI in that it2962// doesn't follow the normal calling conventions because this allows the2963// prologue of each function to be optimized further.2964//2965// Currently, the ABI looks like (when calling __morestack)2966//2967// * r4 holds the minimum stack size requested for this function call2968// * r5 holds the stack size of the arguments to the function2969// * the beginning of the function is 3 instructions after the call to2970// __morestack2971//2972// Implementations of __morestack should use r4 to allocate a new stack, r5 to2973// place the arguments on to the new stack, and the 3-instruction knowledge to2974// jump directly to the body of the function when working on the new stack.2975//2976// An old (and possibly no longer compatible) implementation of __morestack for2977// ARM can be found at [1].2978//2979// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S2980void ARMFrameLowering::adjustForSegmentedStacks(2981MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {2982unsigned Opcode;2983unsigned CFIIndex;2984const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();2985bool Thumb = ST->isThumb();2986bool Thumb2 = ST->isThumb2();29872988// Sadly, this currently doesn't support varargs, platforms other than2989// android/linux. Note that thumb1/thumb2 are support for android/linux.2990if (MF.getFunction().isVarArg())2991report_fatal_error("Segmented stacks do not support vararg functions.");2992if (!ST->isTargetAndroid() && !ST->isTargetLinux())2993report_fatal_error("Segmented stacks not supported on this platform.");29942995MachineFrameInfo &MFI = MF.getFrameInfo();2996MCContext &Context = MF.getContext();2997const MCRegisterInfo *MRI = Context.getRegisterInfo();2998const ARMBaseInstrInfo &TII =2999*static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());3000ARMFunctionInfo *ARMFI = MF.getInfo<ARMFunctionInfo>();3001DebugLoc DL;30023003if (!MFI.needsSplitStackProlog())3004return;30053006uint64_t StackSize = MFI.getStackSize();30073008// Use R4 and R5 as scratch registers.3009// We save R4 and R5 before use and restore them before leaving the function.3010unsigned ScratchReg0 = ARM::R4;3011unsigned ScratchReg1 = ARM::R5;3012unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;3013uint64_t AlignedStackSize;30143015MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();3016MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();3017MachineBasicBlock *AllocMBB = MF.CreateMachineBasicBlock();3018MachineBasicBlock *GetMBB = MF.CreateMachineBasicBlock();3019MachineBasicBlock *McrMBB = MF.CreateMachineBasicBlock();30203021// Grab everything that reaches PrologueMBB to update there liveness as well.3022SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;3023SmallVector<MachineBasicBlock *, 2> WalkList;3024WalkList.push_back(&PrologueMBB);30253026do {3027MachineBasicBlock *CurMBB = WalkList.pop_back_val();3028for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {3029if (BeforePrologueRegion.insert(PredBB).second)3030WalkList.push_back(PredBB);3031}3032} while (!WalkList.empty());30333034// The order in that list is important.3035// The blocks will all be inserted before PrologueMBB using that order.3036// Therefore the block that should appear first in the CFG should appear3037// first in the list.3038MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,3039PostStackMBB};30403041for (MachineBasicBlock *B : AddedBlocks)3042BeforePrologueRegion.insert(B);30433044for (const auto &LI : PrologueMBB.liveins()) {3045for (MachineBasicBlock *PredBB : BeforePrologueRegion)3046PredBB->addLiveIn(LI);3047}30483049// Remove the newly added blocks from the list, since we know3050// we do not have to do the following updates for them.3051for (MachineBasicBlock *B : AddedBlocks) {3052BeforePrologueRegion.erase(B);3053MF.insert(PrologueMBB.getIterator(), B);3054}30553056for (MachineBasicBlock *MBB : BeforePrologueRegion) {3057// Make sure the LiveIns are still sorted and unique.3058MBB->sortUniqueLiveIns();3059// Replace the edges to PrologueMBB by edges to the sequences3060// we are about to add, but only update for immediate predecessors.3061if (MBB->isSuccessor(&PrologueMBB))3062MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);3063}30643065// The required stack size that is aligned to ARM constant criterion.3066AlignedStackSize = alignToARMConstant(StackSize);30673068// When the frame size is less than 256 we just compare the stack3069// boundary directly to the value of the stack pointer, per gcc.3070bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;30713072// We will use two of the callee save registers as scratch registers so we3073// need to save those registers onto the stack.3074// We will use SR0 to hold stack limit and SR1 to hold the stack size3075// requested and arguments for __morestack().3076// SR0: Scratch Register #03077// SR1: Scratch Register #13078// push {SR0, SR1}3079if (Thumb) {3080BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))3081.add(predOps(ARMCC::AL))3082.addReg(ScratchReg0)3083.addReg(ScratchReg1);3084} else {3085BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))3086.addReg(ARM::SP, RegState::Define)3087.addReg(ARM::SP)3088.add(predOps(ARMCC::AL))3089.addReg(ScratchReg0)3090.addReg(ScratchReg1);3091}30923093// Emit the relevant DWARF information about the change in stack pointer as3094// well as where to find both r4 and r5 (the callee-save registers)3095if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {3096CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));3097BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))3098.addCFIIndex(CFIIndex);3099CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(3100nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));3101BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))3102.addCFIIndex(CFIIndex);3103CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(3104nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));3105BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))3106.addCFIIndex(CFIIndex);3107}31083109// mov SR1, sp3110if (Thumb) {3111BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)3112.addReg(ARM::SP)3113.add(predOps(ARMCC::AL));3114} else if (CompareStackPointer) {3115BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)3116.addReg(ARM::SP)3117.add(predOps(ARMCC::AL))3118.add(condCodeOp());3119}31203121// sub SR1, sp, #StackSize3122if (!CompareStackPointer && Thumb) {3123if (AlignedStackSize < 256) {3124BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)3125.add(condCodeOp())3126.addReg(ScratchReg1)3127.addImm(AlignedStackSize)3128.add(predOps(ARMCC::AL));3129} else {3130if (Thumb2 || ST->genExecuteOnly()) {3131BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)3132.addImm(AlignedStackSize);3133} else {3134auto MBBI = McrMBB->end();3135auto RegInfo = STI.getRegisterInfo();3136RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,3137AlignedStackSize);3138}3139BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)3140.add(condCodeOp())3141.addReg(ScratchReg1)3142.addReg(ScratchReg0)3143.add(predOps(ARMCC::AL));3144}3145} else if (!CompareStackPointer) {3146if (AlignedStackSize < 256) {3147BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)3148.addReg(ARM::SP)3149.addImm(AlignedStackSize)3150.add(predOps(ARMCC::AL))3151.add(condCodeOp());3152} else {3153auto MBBI = McrMBB->end();3154auto RegInfo = STI.getRegisterInfo();3155RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,3156AlignedStackSize);3157BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)3158.addReg(ARM::SP)3159.addReg(ScratchReg0)3160.add(predOps(ARMCC::AL))3161.add(condCodeOp());3162}3163}31643165if (Thumb && ST->isThumb1Only()) {3166if (ST->genExecuteOnly()) {3167BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)3168.addExternalSymbol("__STACK_LIMIT");3169} else {3170unsigned PCLabelId = ARMFI->createPICLabelUId();3171ARMConstantPoolValue *NewCPV = ARMConstantPoolSymbol::Create(3172MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);3173MachineConstantPool *MCP = MF.getConstantPool();3174unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));31753176// ldr SR0, [pc, offset(STACK_LIMIT)]3177BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)3178.addConstantPoolIndex(CPI)3179.add(predOps(ARMCC::AL));3180}31813182// ldr SR0, [SR0]3183BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)3184.addReg(ScratchReg0)3185.addImm(0)3186.add(predOps(ARMCC::AL));3187} else {3188// Get TLS base address from the coprocessor3189// mrc p15, #0, SR0, c13, c0, #33190BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),3191ScratchReg0)3192.addImm(15)3193.addImm(0)3194.addImm(13)3195.addImm(0)3196.addImm(3)3197.add(predOps(ARMCC::AL));31983199// Use the last tls slot on android and a private field of the TCP on linux.3200assert(ST->isTargetAndroid() || ST->isTargetLinux());3201unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;32023203// Get the stack limit from the right offset3204// ldr SR0, [sr0, #4 * TlsOffset]3205BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),3206ScratchReg0)3207.addReg(ScratchReg0)3208.addImm(4 * TlsOffset)3209.add(predOps(ARMCC::AL));3210}32113212// Compare stack limit with stack size requested.3213// cmp SR0, SR13214Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;3215BuildMI(GetMBB, DL, TII.get(Opcode))3216.addReg(ScratchReg0)3217.addReg(ScratchReg1)3218.add(predOps(ARMCC::AL));32193220// This jump is taken if StackLimit <= SP - stack required.3221Opcode = Thumb ? ARM::tBcc : ARM::Bcc;3222BuildMI(GetMBB, DL, TII.get(Opcode))3223.addMBB(PostStackMBB)3224.addImm(ARMCC::LS)3225.addReg(ARM::CPSR);32263227// Calling __morestack(StackSize, Size of stack arguments).3228// __morestack knows that the stack size requested is in SR0(r4)3229// and amount size of stack arguments is in SR1(r5).32303231// Pass first argument for the __morestack by Scratch Register #0.3232// The amount size of stack required3233if (Thumb) {3234if (AlignedStackSize < 256) {3235BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)3236.add(condCodeOp())3237.addImm(AlignedStackSize)3238.add(predOps(ARMCC::AL));3239} else {3240if (Thumb2 || ST->genExecuteOnly()) {3241BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)3242.addImm(AlignedStackSize);3243} else {3244auto MBBI = AllocMBB->end();3245auto RegInfo = STI.getRegisterInfo();3246RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,3247AlignedStackSize);3248}3249}3250} else {3251if (AlignedStackSize < 256) {3252BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)3253.addImm(AlignedStackSize)3254.add(predOps(ARMCC::AL))3255.add(condCodeOp());3256} else {3257auto MBBI = AllocMBB->end();3258auto RegInfo = STI.getRegisterInfo();3259RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,3260AlignedStackSize);3261}3262}32633264// Pass second argument for the __morestack by Scratch Register #1.3265// The amount size of stack consumed to save function arguments.3266if (Thumb) {3267if (ARMFI->getArgumentStackSize() < 256) {3268BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)3269.add(condCodeOp())3270.addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))3271.add(predOps(ARMCC::AL));3272} else {3273if (Thumb2 || ST->genExecuteOnly()) {3274BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)3275.addImm(alignToARMConstant(ARMFI->getArgumentStackSize()));3276} else {3277auto MBBI = AllocMBB->end();3278auto RegInfo = STI.getRegisterInfo();3279RegInfo->emitLoadConstPool(3280*AllocMBB, MBBI, DL, ScratchReg1, 0,3281alignToARMConstant(ARMFI->getArgumentStackSize()));3282}3283}3284} else {3285if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {3286BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)3287.addImm(alignToARMConstant(ARMFI->getArgumentStackSize()))3288.add(predOps(ARMCC::AL))3289.add(condCodeOp());3290} else {3291auto MBBI = AllocMBB->end();3292auto RegInfo = STI.getRegisterInfo();3293RegInfo->emitLoadConstPool(3294*AllocMBB, MBBI, DL, ScratchReg1, 0,3295alignToARMConstant(ARMFI->getArgumentStackSize()));3296}3297}32983299// push {lr} - Save return address of this function.3300if (Thumb) {3301BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))3302.add(predOps(ARMCC::AL))3303.addReg(ARM::LR);3304} else {3305BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))3306.addReg(ARM::SP, RegState::Define)3307.addReg(ARM::SP)3308.add(predOps(ARMCC::AL))3309.addReg(ARM::LR);3310}33113312// Emit the DWARF info about the change in stack as well as where to find the3313// previous link register3314if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {3315CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));3316BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))3317.addCFIIndex(CFIIndex);3318CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(3319nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));3320BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))3321.addCFIIndex(CFIIndex);3322}33233324// Call __morestack().3325if (Thumb) {3326BuildMI(AllocMBB, DL, TII.get(ARM::tBL))3327.add(predOps(ARMCC::AL))3328.addExternalSymbol("__morestack");3329} else {3330BuildMI(AllocMBB, DL, TII.get(ARM::BL))3331.addExternalSymbol("__morestack");3332}33333334// pop {lr} - Restore return address of this original function.3335if (Thumb) {3336if (ST->isThumb1Only()) {3337BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))3338.add(predOps(ARMCC::AL))3339.addReg(ScratchReg0);3340BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)3341.addReg(ScratchReg0)3342.add(predOps(ARMCC::AL));3343} else {3344BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))3345.addReg(ARM::LR, RegState::Define)3346.addReg(ARM::SP, RegState::Define)3347.addReg(ARM::SP)3348.addImm(4)3349.add(predOps(ARMCC::AL));3350}3351} else {3352BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))3353.addReg(ARM::SP, RegState::Define)3354.addReg(ARM::SP)3355.add(predOps(ARMCC::AL))3356.addReg(ARM::LR);3357}33583359// Restore SR0 and SR1 in case of __morestack() was called.3360// __morestack() will skip PostStackMBB block so we need to restore3361// scratch registers from here.3362// pop {SR0, SR1}3363if (Thumb) {3364BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))3365.add(predOps(ARMCC::AL))3366.addReg(ScratchReg0)3367.addReg(ScratchReg1);3368} else {3369BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))3370.addReg(ARM::SP, RegState::Define)3371.addReg(ARM::SP)3372.add(predOps(ARMCC::AL))3373.addReg(ScratchReg0)3374.addReg(ScratchReg1);3375}33763377// Update the CFA offset now that we've popped3378if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {3379CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));3380BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))3381.addCFIIndex(CFIIndex);3382}33833384// Return from this function.3385BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));33863387// Restore SR0 and SR1 in case of __morestack() was not called.3388// pop {SR0, SR1}3389if (Thumb) {3390BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))3391.add(predOps(ARMCC::AL))3392.addReg(ScratchReg0)3393.addReg(ScratchReg1);3394} else {3395BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))3396.addReg(ARM::SP, RegState::Define)3397.addReg(ARM::SP)3398.add(predOps(ARMCC::AL))3399.addReg(ScratchReg0)3400.addReg(ScratchReg1);3401}34023403// Update the CFA offset now that we've popped3404if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {3405CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));3406BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))3407.addCFIIndex(CFIIndex);34083409// Tell debuggers that r4 and r5 are now the same as they were in the3410// previous function, that they're the "Same Value".3411CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(3412nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));3413BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))3414.addCFIIndex(CFIIndex);3415CFIIndex = MF.addFrameInst(MCCFIInstruction::createSameValue(3416nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));3417BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))3418.addCFIIndex(CFIIndex);3419}34203421// Organizing MBB lists3422PostStackMBB->addSuccessor(&PrologueMBB);34233424AllocMBB->addSuccessor(PostStackMBB);34253426GetMBB->addSuccessor(PostStackMBB);3427GetMBB->addSuccessor(AllocMBB);34283429McrMBB->addSuccessor(GetMBB);34303431PrevStackMBB->addSuccessor(McrMBB);34323433#ifdef EXPENSIVE_CHECKS3434MF.verify();3435#endif3436}343734383439