Path: blob/main/contrib/llvm-project/llvm/lib/Target/Hexagon/HexagonFrameLowering.cpp
35294 views
//===- HexagonFrameLowering.cpp - Define frame lowering -------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//7//===----------------------------------------------------------------------===//89#include "HexagonFrameLowering.h"10#include "HexagonBlockRanges.h"11#include "HexagonInstrInfo.h"12#include "HexagonMachineFunctionInfo.h"13#include "HexagonRegisterInfo.h"14#include "HexagonSubtarget.h"15#include "HexagonTargetMachine.h"16#include "MCTargetDesc/HexagonBaseInfo.h"17#include "llvm/ADT/BitVector.h"18#include "llvm/ADT/DenseMap.h"19#include "llvm/ADT/PostOrderIterator.h"20#include "llvm/ADT/SetVector.h"21#include "llvm/ADT/SmallSet.h"22#include "llvm/ADT/SmallVector.h"23#include "llvm/CodeGen/LivePhysRegs.h"24#include "llvm/CodeGen/MachineBasicBlock.h"25#include "llvm/CodeGen/MachineDominators.h"26#include "llvm/CodeGen/MachineFrameInfo.h"27#include "llvm/CodeGen/MachineFunction.h"28#include "llvm/CodeGen/MachineFunctionPass.h"29#include "llvm/CodeGen/MachineInstr.h"30#include "llvm/CodeGen/MachineInstrBuilder.h"31#include "llvm/CodeGen/MachineMemOperand.h"32#include "llvm/CodeGen/MachineModuleInfo.h"33#include "llvm/CodeGen/MachineOperand.h"34#include "llvm/CodeGen/MachinePostDominators.h"35#include "llvm/CodeGen/MachineRegisterInfo.h"36#include "llvm/CodeGen/PseudoSourceValue.h"37#include "llvm/CodeGen/RegisterScavenging.h"38#include "llvm/CodeGen/TargetRegisterInfo.h"39#include "llvm/IR/Attributes.h"40#include "llvm/IR/DebugLoc.h"41#include "llvm/IR/Function.h"42#include "llvm/MC/MCDwarf.h"43#include "llvm/MC/MCRegisterInfo.h"44#include "llvm/Pass.h"45#include "llvm/Support/CodeGen.h"46#include "llvm/Support/CommandLine.h"47#include "llvm/Support/Compiler.h"48#include "llvm/Support/Debug.h"49#include "llvm/Support/ErrorHandling.h"50#include "llvm/Support/MathExtras.h"51#include "llvm/Support/raw_ostream.h"52#include "llvm/Target/TargetMachine.h"53#include "llvm/Target/TargetOptions.h"54#include <algorithm>55#include <cassert>56#include <cstdint>57#include <iterator>58#include <limits>59#include <map>60#include <optional>61#include <utility>62#include <vector>6364#define DEBUG_TYPE "hexagon-pei"6566// Hexagon stack frame layout as defined by the ABI:67//68// Incoming arguments69// passed via stack70// |71// |72// SP during function's FP during function's |73// +-- runtime (top of stack) runtime (bottom) --+ |74// | | |75// --++---------------------+------------------+-----------------++-+-------76// | parameter area for | variable-size | fixed-size |LR| arg77// | called functions | local objects | local objects |FP|78// --+----------------------+------------------+-----------------+--+-------79// <- size known -> <- size unknown -> <- size known ->80//81// Low address High address82//83// <--- stack growth84//85//86// - In any circumstances, the outgoing function arguments are always accessi-87// ble using the SP, and the incoming arguments are accessible using the FP.88// - If the local objects are not aligned, they can always be accessed using89// the FP.90// - If there are no variable-sized objects, the local objects can always be91// accessed using the SP, regardless whether they are aligned or not. (The92// alignment padding will be at the bottom of the stack (highest address),93// and so the offset with respect to the SP will be known at the compile-94// -time.)95//96// The only complication occurs if there are both, local aligned objects, and97// dynamically allocated (variable-sized) objects. The alignment pad will be98// placed between the FP and the local objects, thus preventing the use of the99// FP to access the local objects. At the same time, the variable-sized objects100// will be between the SP and the local objects, thus introducing an unknown101// distance from the SP to the locals.102//103// To avoid this problem, a new register is created that holds the aligned104// address of the bottom of the stack, referred in the sources as AP (aligned105// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad106// that aligns AP to the required boundary (a maximum of the alignments of107// all stack objects, fixed- and variable-sized). All local objects[1] will108// then use AP as the base pointer.109// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get110// their name from being allocated at fixed locations on the stack, relative111// to the FP. In the presence of dynamic allocation and local alignment, such112// objects can only be accessed through the FP.113//114// Illustration of the AP:115// FP --+116// |117// ---------------+---------------------+-----+-----------------------++-+--118// Rest of the | Local stack objects | Pad | Fixed stack objects |LR|119// stack frame | (aligned) | | (CSR, spills, etc.) |FP|120// ---------------+---------------------+-----+-----------------+-----+--+--121// |<-- Multiple of the -->|122// stack alignment +-- AP123//124// The AP is set up at the beginning of the function. Since it is not a dedi-125// cated (reserved) register, it needs to be kept live throughout the function126// to be available as the base register for local object accesses.127// Normally, an address of a stack objects is obtained by a pseudo-instruction128// PS_fi. To access local objects with the AP register present, a different129// pseudo-instruction needs to be used: PS_fia. The PS_fia takes one extra130// argument compared to PS_fi: the first input register is the AP register.131// This keeps the register live between its definition and its uses.132133// The AP register is originally set up using pseudo-instruction PS_aligna:134// AP = PS_aligna A135// where136// A - required stack alignment137// The alignment value must be the maximum of all alignments required by138// any stack object.139140// The dynamic allocation uses a pseudo-instruction PS_alloca:141// Rd = PS_alloca Rs, A142// where143// Rd - address of the allocated space144// Rs - minimum size (the actual allocated can be larger to accommodate145// alignment)146// A - required alignment147148using namespace llvm;149150static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",151cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));152153static cl::opt<unsigned>154NumberScavengerSlots("number-scavenger-slots", cl::Hidden,155cl::desc("Set the number of scavenger slots"),156cl::init(2));157158static cl::opt<int>159SpillFuncThreshold("spill-func-threshold", cl::Hidden,160cl::desc("Specify O2(not Os) spill func threshold"),161cl::init(6));162163static cl::opt<int>164SpillFuncThresholdOs("spill-func-threshold-Os", cl::Hidden,165cl::desc("Specify Os spill func threshold"),166cl::init(1));167168static cl::opt<bool> EnableStackOVFSanitizer(169"enable-stackovf-sanitizer", cl::Hidden,170cl::desc("Enable runtime checks for stack overflow."), cl::init(false));171172static cl::opt<bool>173EnableShrinkWrapping("hexagon-shrink-frame", cl::init(true), cl::Hidden,174cl::desc("Enable stack frame shrink wrapping"));175176static cl::opt<unsigned>177ShrinkLimit("shrink-frame-limit",178cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden,179cl::desc("Max count of stack frame shrink-wraps"));180181static cl::opt<bool>182EnableSaveRestoreLong("enable-save-restore-long", cl::Hidden,183cl::desc("Enable long calls for save-restore stubs."),184cl::init(false));185186static cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true),187cl::Hidden, cl::desc("Refrain from using FP whenever possible"));188189static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,190cl::init(true), cl::desc("Optimize spill slots"));191192#ifndef NDEBUG193static cl::opt<unsigned> SpillOptMax("spill-opt-max", cl::Hidden,194cl::init(std::numeric_limits<unsigned>::max()));195static unsigned SpillOptCount = 0;196#endif197198namespace llvm {199200void initializeHexagonCallFrameInformationPass(PassRegistry&);201FunctionPass *createHexagonCallFrameInformation();202203} // end namespace llvm204205namespace {206207class HexagonCallFrameInformation : public MachineFunctionPass {208public:209static char ID;210211HexagonCallFrameInformation() : MachineFunctionPass(ID) {212PassRegistry &PR = *PassRegistry::getPassRegistry();213initializeHexagonCallFrameInformationPass(PR);214}215216bool runOnMachineFunction(MachineFunction &MF) override;217218MachineFunctionProperties getRequiredProperties() const override {219return MachineFunctionProperties().set(220MachineFunctionProperties::Property::NoVRegs);221}222};223224char HexagonCallFrameInformation::ID = 0;225226} // end anonymous namespace227228bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {229auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();230bool NeedCFI = MF.needsFrameMoves();231232if (!NeedCFI)233return false;234HFI.insertCFIInstructions(MF);235return true;236}237238INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",239"Hexagon call frame information", false, false)240241FunctionPass *llvm::createHexagonCallFrameInformation() {242return new HexagonCallFrameInformation();243}244245/// Map a register pair Reg to the subregister that has the greater "number",246/// i.e. D3 (aka R7:6) will be mapped to R7, etc.247static Register getMax32BitSubRegister(Register Reg,248const TargetRegisterInfo &TRI,249bool hireg = true) {250if (Reg < Hexagon::D0 || Reg > Hexagon::D15)251return Reg;252253Register RegNo = 0;254for (MCPhysReg SubReg : TRI.subregs(Reg)) {255if (hireg) {256if (SubReg > RegNo)257RegNo = SubReg;258} else {259if (!RegNo || SubReg < RegNo)260RegNo = SubReg;261}262}263return RegNo;264}265266/// Returns the callee saved register with the largest id in the vector.267static Register getMaxCalleeSavedReg(ArrayRef<CalleeSavedInfo> CSI,268const TargetRegisterInfo &TRI) {269static_assert(Hexagon::R1 > 0,270"Assume physical registers are encoded as positive integers");271if (CSI.empty())272return 0;273274Register Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);275for (unsigned I = 1, E = CSI.size(); I < E; ++I) {276Register Reg = getMax32BitSubRegister(CSI[I].getReg(), TRI);277if (Reg > Max)278Max = Reg;279}280return Max;281}282283/// Checks if the basic block contains any instruction that needs a stack284/// frame to be already in place.285static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,286const HexagonRegisterInfo &HRI) {287for (const MachineInstr &MI : MBB) {288if (MI.isCall())289return true;290unsigned Opc = MI.getOpcode();291switch (Opc) {292case Hexagon::PS_alloca:293case Hexagon::PS_aligna:294return true;295default:296break;297}298// Check individual operands.299for (const MachineOperand &MO : MI.operands()) {300// While the presence of a frame index does not prove that a stack301// frame will be required, all frame indexes should be within alloc-302// frame/deallocframe. Otherwise, the code that translates a frame303// index into an offset would have to be aware of the placement of304// the frame creation/destruction instructions.305if (MO.isFI())306return true;307if (MO.isReg()) {308Register R = MO.getReg();309// Debug instructions may refer to $noreg.310if (!R)311continue;312// Virtual registers will need scavenging, which then may require313// a stack slot.314if (R.isVirtual())315return true;316for (MCPhysReg S : HRI.subregs_inclusive(R))317if (CSR[S])318return true;319continue;320}321if (MO.isRegMask()) {322// A regmask would normally have all callee-saved registers marked323// as preserved, so this check would not be needed, but in case of324// ever having other regmasks (for other calling conventions),325// make sure they would be processed correctly.326const uint32_t *BM = MO.getRegMask();327for (int x = CSR.find_first(); x >= 0; x = CSR.find_next(x)) {328unsigned R = x;329// If this regmask does not preserve a CSR, a frame will be needed.330if (!(BM[R/32] & (1u << (R%32))))331return true;332}333}334}335}336return false;337}338339/// Returns true if MBB has a machine instructions that indicates a tail call340/// in the block.341static bool hasTailCall(const MachineBasicBlock &MBB) {342MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();343if (I == MBB.end())344return false;345unsigned RetOpc = I->getOpcode();346return RetOpc == Hexagon::PS_tailcall_i || RetOpc == Hexagon::PS_tailcall_r;347}348349/// Returns true if MBB contains an instruction that returns.350static bool hasReturn(const MachineBasicBlock &MBB) {351for (const MachineInstr &MI : MBB.terminators())352if (MI.isReturn())353return true;354return false;355}356357/// Returns the "return" instruction from this block, or nullptr if there358/// isn't any.359static MachineInstr *getReturn(MachineBasicBlock &MBB) {360for (auto &I : MBB)361if (I.isReturn())362return &I;363return nullptr;364}365366static bool isRestoreCall(unsigned Opc) {367switch (Opc) {368case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:369case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC:370case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT:371case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC:372case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT:373case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC:374case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4:375case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC:376return true;377}378return false;379}380381static inline bool isOptNone(const MachineFunction &MF) {382return MF.getFunction().hasOptNone() ||383MF.getTarget().getOptLevel() == CodeGenOptLevel::None;384}385386static inline bool isOptSize(const MachineFunction &MF) {387const Function &F = MF.getFunction();388return F.hasOptSize() && !F.hasMinSize();389}390391static inline bool isMinSize(const MachineFunction &MF) {392return MF.getFunction().hasMinSize();393}394395/// Implements shrink-wrapping of the stack frame. By default, stack frame396/// is created in the function entry block, and is cleaned up in every block397/// that returns. This function finds alternate blocks: one for the frame398/// setup (prolog) and one for the cleanup (epilog).399void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,400MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {401static unsigned ShrinkCounter = 0;402403if (MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl() &&404MF.getFunction().isVarArg())405return;406if (ShrinkLimit.getPosition()) {407if (ShrinkCounter >= ShrinkLimit)408return;409ShrinkCounter++;410}411412auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();413414MachineDominatorTree MDT;415MDT.calculate(MF);416MachinePostDominatorTree MPT;417MPT.recalculate(MF);418419using UnsignedMap = DenseMap<unsigned, unsigned>;420using RPOTType = ReversePostOrderTraversal<const MachineFunction *>;421422UnsignedMap RPO;423RPOTType RPOT(&MF);424unsigned RPON = 0;425for (auto &I : RPOT)426RPO[I->getNumber()] = RPON++;427428// Don't process functions that have loops, at least for now. Placement429// of prolog and epilog must take loop structure into account. For simpli-430// city don't do it right now.431for (auto &I : MF) {432unsigned BN = RPO[I.getNumber()];433for (MachineBasicBlock *Succ : I.successors())434// If found a back-edge, return.435if (RPO[Succ->getNumber()] <= BN)436return;437}438439// Collect the set of blocks that need a stack frame to execute. Scan440// each block for uses/defs of callee-saved registers, calls, etc.441SmallVector<MachineBasicBlock*,16> SFBlocks;442BitVector CSR(Hexagon::NUM_TARGET_REGS);443for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P)444for (MCPhysReg S : HRI.subregs_inclusive(*P))445CSR[S] = true;446447for (auto &I : MF)448if (needsStackFrame(I, CSR, HRI))449SFBlocks.push_back(&I);450451LLVM_DEBUG({452dbgs() << "Blocks needing SF: {";453for (auto &B : SFBlocks)454dbgs() << " " << printMBBReference(*B);455dbgs() << " }\n";456});457// No frame needed?458if (SFBlocks.empty())459return;460461// Pick a common dominator and a common post-dominator.462MachineBasicBlock *DomB = SFBlocks[0];463for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {464DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]);465if (!DomB)466break;467}468MachineBasicBlock *PDomB = SFBlocks[0];469for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {470PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]);471if (!PDomB)472break;473}474LLVM_DEBUG({475dbgs() << "Computed dom block: ";476if (DomB)477dbgs() << printMBBReference(*DomB);478else479dbgs() << "<null>";480dbgs() << ", computed pdom block: ";481if (PDomB)482dbgs() << printMBBReference(*PDomB);483else484dbgs() << "<null>";485dbgs() << "\n";486});487if (!DomB || !PDomB)488return;489490// Make sure that DomB dominates PDomB and PDomB post-dominates DomB.491if (!MDT.dominates(DomB, PDomB)) {492LLVM_DEBUG(dbgs() << "Dom block does not dominate pdom block\n");493return;494}495if (!MPT.dominates(PDomB, DomB)) {496LLVM_DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");497return;498}499500// Finally, everything seems right.501PrologB = DomB;502EpilogB = PDomB;503}504505/// Perform most of the PEI work here:506/// - saving/restoring of the callee-saved registers,507/// - stack frame creation and destruction.508/// Normally, this work is distributed among various functions, but doing it509/// in one place allows shrink-wrapping of the stack frame.510void HexagonFrameLowering::emitPrologue(MachineFunction &MF,511MachineBasicBlock &MBB) const {512auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();513514MachineFrameInfo &MFI = MF.getFrameInfo();515const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();516517MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;518if (EnableShrinkWrapping)519findShrunkPrologEpilog(MF, PrologB, EpilogB);520521bool PrologueStubs = false;522insertCSRSpillsInBlock(*PrologB, CSI, HRI, PrologueStubs);523insertPrologueInBlock(*PrologB, PrologueStubs);524updateEntryPaths(MF, *PrologB);525526if (EpilogB) {527insertCSRRestoresInBlock(*EpilogB, CSI, HRI);528insertEpilogueInBlock(*EpilogB);529} else {530for (auto &B : MF)531if (B.isReturnBlock())532insertCSRRestoresInBlock(B, CSI, HRI);533534for (auto &B : MF)535if (B.isReturnBlock())536insertEpilogueInBlock(B);537538for (auto &B : MF) {539if (B.empty())540continue;541MachineInstr *RetI = getReturn(B);542if (!RetI || isRestoreCall(RetI->getOpcode()))543continue;544for (auto &R : CSI)545RetI->addOperand(MachineOperand::CreateReg(R.getReg(), false, true));546}547}548549if (EpilogB) {550// If there is an epilog block, it may not have a return instruction.551// In such case, we need to add the callee-saved registers as live-ins552// in all blocks on all paths from the epilog to any return block.553unsigned MaxBN = MF.getNumBlockIDs();554BitVector DoneT(MaxBN+1), DoneF(MaxBN+1), Path(MaxBN+1);555updateExitPaths(*EpilogB, *EpilogB, DoneT, DoneF, Path);556}557}558559/// Returns true if the target can safely skip saving callee-saved registers560/// for noreturn nounwind functions.561bool HexagonFrameLowering::enableCalleeSaveSkip(562const MachineFunction &MF) const {563const auto &F = MF.getFunction();564assert(F.hasFnAttribute(Attribute::NoReturn) &&565F.getFunction().hasFnAttribute(Attribute::NoUnwind) &&566!F.getFunction().hasFnAttribute(Attribute::UWTable));567(void)F;568569// No need to save callee saved registers if the function does not return.570return MF.getSubtarget<HexagonSubtarget>().noreturnStackElim();571}572573// Helper function used to determine when to eliminate the stack frame for574// functions marked as noreturn and when the noreturn-stack-elim options are575// specified. When both these conditions are true, then a FP may not be needed576// if the function makes a call. It is very similar to enableCalleeSaveSkip,577// but it used to check if the allocframe can be eliminated as well.578static bool enableAllocFrameElim(const MachineFunction &MF) {579const auto &F = MF.getFunction();580const auto &MFI = MF.getFrameInfo();581const auto &HST = MF.getSubtarget<HexagonSubtarget>();582assert(!MFI.hasVarSizedObjects() &&583!HST.getRegisterInfo()->hasStackRealignment(MF));584return F.hasFnAttribute(Attribute::NoReturn) &&585F.hasFnAttribute(Attribute::NoUnwind) &&586!F.hasFnAttribute(Attribute::UWTable) && HST.noreturnStackElim() &&587MFI.getStackSize() == 0;588}589590void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,591bool PrologueStubs) const {592MachineFunction &MF = *MBB.getParent();593MachineFrameInfo &MFI = MF.getFrameInfo();594auto &HST = MF.getSubtarget<HexagonSubtarget>();595auto &HII = *HST.getInstrInfo();596auto &HRI = *HST.getRegisterInfo();597598Align MaxAlign = std::max(MFI.getMaxAlign(), getStackAlign());599600// Calculate the total stack frame size.601// Get the number of bytes to allocate from the FrameInfo.602unsigned FrameSize = MFI.getStackSize();603// Round up the max call frame size to the max alignment on the stack.604unsigned MaxCFA = alignTo(MFI.getMaxCallFrameSize(), MaxAlign);605MFI.setMaxCallFrameSize(MaxCFA);606607FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign);608MFI.setStackSize(FrameSize);609610bool AlignStack = (MaxAlign > getStackAlign());611612// Get the number of bytes to allocate from the FrameInfo.613unsigned NumBytes = MFI.getStackSize();614Register SP = HRI.getStackRegister();615unsigned MaxCF = MFI.getMaxCallFrameSize();616MachineBasicBlock::iterator InsertPt = MBB.begin();617618SmallVector<MachineInstr *, 4> AdjustRegs;619for (auto &MBB : MF)620for (auto &MI : MBB)621if (MI.getOpcode() == Hexagon::PS_alloca)622AdjustRegs.push_back(&MI);623624for (auto *MI : AdjustRegs) {625assert((MI->getOpcode() == Hexagon::PS_alloca) && "Expected alloca");626expandAlloca(MI, HII, SP, MaxCF);627MI->eraseFromParent();628}629630DebugLoc dl = MBB.findDebugLoc(InsertPt);631632if (MF.getFunction().isVarArg() &&633MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl()) {634// Calculate the size of register saved area.635int NumVarArgRegs = 6 - FirstVarArgSavedReg;636int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0)637? NumVarArgRegs * 4638: NumVarArgRegs * 4 + 4;639if (RegisterSavedAreaSizePlusPadding > 0) {640// Decrement the stack pointer by size of register saved area plus641// padding if any.642BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)643.addReg(SP)644.addImm(-RegisterSavedAreaSizePlusPadding)645.setMIFlag(MachineInstr::FrameSetup);646647int NumBytes = 0;648// Copy all the named arguments below register saved area.649auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();650for (int i = HMFI.getFirstNamedArgFrameIndex(),651e = HMFI.getLastNamedArgFrameIndex(); i >= e; --i) {652uint64_t ObjSize = MFI.getObjectSize(i);653Align ObjAlign = MFI.getObjectAlign(i);654655// Determine the kind of load/store that should be used.656unsigned LDOpc, STOpc;657uint64_t OpcodeChecker = ObjAlign.value();658659// Handle cases where alignment of an object is > its size.660if (ObjAlign > ObjSize) {661if (ObjSize <= 1)662OpcodeChecker = 1;663else if (ObjSize <= 2)664OpcodeChecker = 2;665else if (ObjSize <= 4)666OpcodeChecker = 4;667else if (ObjSize > 4)668OpcodeChecker = 8;669}670671switch (OpcodeChecker) {672case 1:673LDOpc = Hexagon::L2_loadrb_io;674STOpc = Hexagon::S2_storerb_io;675break;676case 2:677LDOpc = Hexagon::L2_loadrh_io;678STOpc = Hexagon::S2_storerh_io;679break;680case 4:681LDOpc = Hexagon::L2_loadri_io;682STOpc = Hexagon::S2_storeri_io;683break;684case 8:685default:686LDOpc = Hexagon::L2_loadrd_io;687STOpc = Hexagon::S2_storerd_io;688break;689}690691Register RegUsed = LDOpc == Hexagon::L2_loadrd_io ? Hexagon::D3692: Hexagon::R6;693int LoadStoreCount = ObjSize / OpcodeChecker;694695if (ObjSize % OpcodeChecker)696++LoadStoreCount;697698// Get the start location of the load. NumBytes is basically the699// offset from the stack pointer of previous function, which would be700// the caller in this case, as this function has variable argument701// list.702if (NumBytes != 0)703NumBytes = alignTo(NumBytes, ObjAlign);704705int Count = 0;706while (Count < LoadStoreCount) {707// Load the value of the named argument on stack.708BuildMI(MBB, InsertPt, dl, HII.get(LDOpc), RegUsed)709.addReg(SP)710.addImm(RegisterSavedAreaSizePlusPadding +711ObjAlign.value() * Count + NumBytes)712.setMIFlag(MachineInstr::FrameSetup);713714// Store it below the register saved area plus padding.715BuildMI(MBB, InsertPt, dl, HII.get(STOpc))716.addReg(SP)717.addImm(ObjAlign.value() * Count + NumBytes)718.addReg(RegUsed)719.setMIFlag(MachineInstr::FrameSetup);720721Count++;722}723NumBytes += MFI.getObjectSize(i);724}725726// Make NumBytes 8 byte aligned727NumBytes = alignTo(NumBytes, 8);728729// If the number of registers having variable arguments is odd,730// leave 4 bytes of padding to get to the location where first731// variable argument which was passed through register was copied.732NumBytes = (NumVarArgRegs % 2 == 0) ? NumBytes : NumBytes + 4;733734for (int j = FirstVarArgSavedReg, i = 0; j < 6; ++j, ++i) {735BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_storeri_io))736.addReg(SP)737.addImm(NumBytes + 4 * i)738.addReg(Hexagon::R0 + j)739.setMIFlag(MachineInstr::FrameSetup);740}741}742}743744if (hasFP(MF)) {745insertAllocframe(MBB, InsertPt, NumBytes);746if (AlignStack) {747BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)748.addReg(SP)749.addImm(-int64_t(MaxAlign.value()));750}751// If the stack-checking is enabled, and we spilled the callee-saved752// registers inline (i.e. did not use a spill function), then call753// the stack checker directly.754if (EnableStackOVFSanitizer && !PrologueStubs)755BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))756.addExternalSymbol("__runtime_stack_check");757} else if (NumBytes > 0) {758assert(alignTo(NumBytes, 8) == NumBytes);759BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)760.addReg(SP)761.addImm(-int(NumBytes));762}763}764765void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {766MachineFunction &MF = *MBB.getParent();767auto &HST = MF.getSubtarget<HexagonSubtarget>();768auto &HII = *HST.getInstrInfo();769auto &HRI = *HST.getRegisterInfo();770Register SP = HRI.getStackRegister();771772MachineBasicBlock::iterator InsertPt = MBB.getFirstTerminator();773DebugLoc dl = MBB.findDebugLoc(InsertPt);774775if (!hasFP(MF)) {776MachineFrameInfo &MFI = MF.getFrameInfo();777unsigned NumBytes = MFI.getStackSize();778if (MF.getFunction().isVarArg() &&779MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl()) {780// On Hexagon Linux, deallocate the stack for the register saved area.781int NumVarArgRegs = 6 - FirstVarArgSavedReg;782int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ?783(NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4);784NumBytes += RegisterSavedAreaSizePlusPadding;785}786if (NumBytes) {787BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)788.addReg(SP)789.addImm(NumBytes);790}791return;792}793794MachineInstr *RetI = getReturn(MBB);795unsigned RetOpc = RetI ? RetI->getOpcode() : 0;796797// Handle EH_RETURN.798if (RetOpc == Hexagon::EH_RETURN_JMPR) {799BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))800.addDef(Hexagon::D15)801.addReg(Hexagon::R30);802BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP)803.addReg(SP)804.addReg(Hexagon::R28);805return;806}807808// Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-809// frame instruction if we encounter it.810if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4 ||811RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC ||812RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT ||813RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC) {814MachineBasicBlock::iterator It = RetI;815++It;816// Delete all instructions after the RESTORE (except labels).817while (It != MBB.end()) {818if (!It->isLabel())819It = MBB.erase(It);820else821++It;822}823return;824}825826// It is possible that the restoring code is a call to a library function.827// All of the restore* functions include "deallocframe", so we need to make828// sure that we don't add an extra one.829bool NeedsDeallocframe = true;830if (!MBB.empty() && InsertPt != MBB.begin()) {831MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);832unsigned COpc = PrevIt->getOpcode();833if (COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 ||834COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC ||835COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT ||836COpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC ||837COpc == Hexagon::PS_call_nr || COpc == Hexagon::PS_callr_nr)838NeedsDeallocframe = false;839}840841if (!MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl() ||842!MF.getFunction().isVarArg()) {843if (!NeedsDeallocframe)844return;845// If the returning instruction is PS_jmpret, replace it with846// dealloc_return, otherwise just add deallocframe. The function847// could be returning via a tail call.848if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet) {849BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))850.addDef(Hexagon::D15)851.addReg(Hexagon::R30);852return;853}854unsigned NewOpc = Hexagon::L4_return;855MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc))856.addDef(Hexagon::D15)857.addReg(Hexagon::R30);858// Transfer the function live-out registers.859NewI->copyImplicitOps(MF, *RetI);860MBB.erase(RetI);861} else {862// L2_deallocframe instruction after it.863// Calculate the size of register saved area.864int NumVarArgRegs = 6 - FirstVarArgSavedReg;865int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ?866(NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4);867868MachineBasicBlock::iterator Term = MBB.getFirstTerminator();869MachineBasicBlock::iterator I = (Term == MBB.begin()) ? MBB.end()870: std::prev(Term);871if (I == MBB.end() ||872(I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT &&873I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC &&874I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 &&875I->getOpcode() != Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC))876BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))877.addDef(Hexagon::D15)878.addReg(Hexagon::R30);879if (RegisterSavedAreaSizePlusPadding != 0)880BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)881.addReg(SP)882.addImm(RegisterSavedAreaSizePlusPadding);883}884}885886void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,887MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const {888MachineFunction &MF = *MBB.getParent();889auto &HST = MF.getSubtarget<HexagonSubtarget>();890auto &HII = *HST.getInstrInfo();891auto &HRI = *HST.getRegisterInfo();892893// Check for overflow.894// Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?895const unsigned int ALLOCFRAME_MAX = 16384;896897// Create a dummy memory operand to avoid allocframe from being treated as898// a volatile memory reference.899auto *MMO = MF.getMachineMemOperand(MachinePointerInfo::getStack(MF, 0),900MachineMemOperand::MOStore, 4, Align(4));901902DebugLoc dl = MBB.findDebugLoc(InsertPt);903Register SP = HRI.getStackRegister();904905if (NumBytes >= ALLOCFRAME_MAX) {906// Emit allocframe(#0).907BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))908.addDef(SP)909.addReg(SP)910.addImm(0)911.addMemOperand(MMO);912913// Subtract the size from the stack pointer.914Register SP = HRI.getStackRegister();915BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)916.addReg(SP)917.addImm(-int(NumBytes));918} else {919BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))920.addDef(SP)921.addReg(SP)922.addImm(NumBytes)923.addMemOperand(MMO);924}925}926927void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF,928MachineBasicBlock &SaveB) const {929SetVector<unsigned> Worklist;930931MachineBasicBlock &EntryB = MF.front();932Worklist.insert(EntryB.getNumber());933934unsigned SaveN = SaveB.getNumber();935auto &CSI = MF.getFrameInfo().getCalleeSavedInfo();936937for (unsigned i = 0; i < Worklist.size(); ++i) {938unsigned BN = Worklist[i];939MachineBasicBlock &MBB = *MF.getBlockNumbered(BN);940for (auto &R : CSI)941if (!MBB.isLiveIn(R.getReg()))942MBB.addLiveIn(R.getReg());943if (BN != SaveN)944for (auto &SB : MBB.successors())945Worklist.insert(SB->getNumber());946}947}948949bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB,950MachineBasicBlock &RestoreB, BitVector &DoneT, BitVector &DoneF,951BitVector &Path) const {952assert(MBB.getNumber() >= 0);953unsigned BN = MBB.getNumber();954if (Path[BN] || DoneF[BN])955return false;956if (DoneT[BN])957return true;958959auto &CSI = MBB.getParent()->getFrameInfo().getCalleeSavedInfo();960961Path[BN] = true;962bool ReachedExit = false;963for (auto &SB : MBB.successors())964ReachedExit |= updateExitPaths(*SB, RestoreB, DoneT, DoneF, Path);965966if (!MBB.empty() && MBB.back().isReturn()) {967// Add implicit uses of all callee-saved registers to the reached968// return instructions. This is to prevent the anti-dependency breaker969// from renaming these registers.970MachineInstr &RetI = MBB.back();971if (!isRestoreCall(RetI.getOpcode()))972for (auto &R : CSI)973RetI.addOperand(MachineOperand::CreateReg(R.getReg(), false, true));974ReachedExit = true;975}976977// We don't want to add unnecessary live-ins to the restore block: since978// the callee-saved registers are being defined in it, the entry of the979// restore block cannot be on the path from the definitions to any exit.980if (ReachedExit && &MBB != &RestoreB) {981for (auto &R : CSI)982if (!MBB.isLiveIn(R.getReg()))983MBB.addLiveIn(R.getReg());984DoneT[BN] = true;985}986if (!ReachedExit)987DoneF[BN] = true;988989Path[BN] = false;990return ReachedExit;991}992993static std::optional<MachineBasicBlock::iterator>994findCFILocation(MachineBasicBlock &B) {995// The CFI instructions need to be inserted right after allocframe.996// An exception to this is a situation where allocframe is bundled997// with a call: then the CFI instructions need to be inserted before998// the packet with the allocframe+call (in case the call throws an999// exception).1000auto End = B.instr_end();10011002for (MachineInstr &I : B) {1003MachineBasicBlock::iterator It = I.getIterator();1004if (!I.isBundle()) {1005if (I.getOpcode() == Hexagon::S2_allocframe)1006return std::next(It);1007continue;1008}1009// I is a bundle.1010bool HasCall = false, HasAllocFrame = false;1011auto T = It.getInstrIterator();1012while (++T != End && T->isBundled()) {1013if (T->getOpcode() == Hexagon::S2_allocframe)1014HasAllocFrame = true;1015else if (T->isCall())1016HasCall = true;1017}1018if (HasAllocFrame)1019return HasCall ? It : std::next(It);1020}1021return std::nullopt;1022}10231024void HexagonFrameLowering::insertCFIInstructions(MachineFunction &MF) const {1025for (auto &B : MF)1026if (auto At = findCFILocation(B))1027insertCFIInstructionsAt(B, *At);1028}10291030void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,1031MachineBasicBlock::iterator At) const {1032MachineFunction &MF = *MBB.getParent();1033MachineFrameInfo &MFI = MF.getFrameInfo();1034auto &HST = MF.getSubtarget<HexagonSubtarget>();1035auto &HII = *HST.getInstrInfo();1036auto &HRI = *HST.getRegisterInfo();10371038// If CFI instructions have debug information attached, something goes1039// wrong with the final assembly generation: the prolog_end is placed1040// in a wrong location.1041DebugLoc DL;1042const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);10431044MCSymbol *FrameLabel = MF.getContext().createTempSymbol();1045bool HasFP = hasFP(MF);10461047if (HasFP) {1048unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);1049unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);10501051// Define CFA via an offset from the value of FP.1052//1053// -8 -4 0 (SP)1054// --+----+----+---------------------1055// | FP | LR | increasing addresses -->1056// --+----+----+---------------------1057// | +-- Old SP (before allocframe)1058// +-- New FP (after allocframe)1059//1060// MCCFIInstruction::cfiDefCfa adds the offset from the register.1061// MCCFIInstruction::createOffset takes the offset without sign change.1062auto DefCfa = MCCFIInstruction::cfiDefCfa(FrameLabel, DwFPReg, 8);1063BuildMI(MBB, At, DL, CFID)1064.addCFIIndex(MF.addFrameInst(DefCfa));1065// R31 (return addr) = CFA - 41066auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);1067BuildMI(MBB, At, DL, CFID)1068.addCFIIndex(MF.addFrameInst(OffR31));1069// R30 (frame ptr) = CFA - 81070auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);1071BuildMI(MBB, At, DL, CFID)1072.addCFIIndex(MF.addFrameInst(OffR30));1073}10741075static Register RegsToMove[] = {1076Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2,1077Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,1078Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,1079Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,1080Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9,1081Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13,1082Hexagon::NoRegister1083};10841085const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();10861087for (unsigned i = 0; RegsToMove[i] != Hexagon::NoRegister; ++i) {1088Register Reg = RegsToMove[i];1089auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {1090return C.getReg() == Reg;1091};1092auto F = find_if(CSI, IfR);1093if (F == CSI.end())1094continue;10951096int64_t Offset;1097if (HasFP) {1098// If the function has a frame pointer (i.e. has an allocframe),1099// then the CFA has been defined in terms of FP. Any offsets in1100// the following CFI instructions have to be defined relative1101// to FP, which points to the bottom of the stack frame.1102// The function getFrameIndexReference can still choose to use SP1103// for the offset calculation, so we cannot simply call it here.1104// Instead, get the offset (relative to the FP) directly.1105Offset = MFI.getObjectOffset(F->getFrameIdx());1106} else {1107Register FrameReg;1108Offset =1109getFrameIndexReference(MF, F->getFrameIdx(), FrameReg).getFixed();1110}1111// Subtract 8 to make room for R30 and R31, which are added above.1112Offset -= 8;11131114if (Reg < Hexagon::D0 || Reg > Hexagon::D15) {1115unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);1116auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,1117Offset);1118BuildMI(MBB, At, DL, CFID)1119.addCFIIndex(MF.addFrameInst(OffReg));1120} else {1121// Split the double regs into subregs, and generate appropriate1122// cfi_offsets.1123// The only reason, we are split double regs is, llvm-mc does not1124// understand paired registers for cfi_offset.1125// Eg .cfi_offset r1:0, -6411261127Register HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi);1128Register LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo);1129unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);1130unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);1131auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,1132Offset+4);1133BuildMI(MBB, At, DL, CFID)1134.addCFIIndex(MF.addFrameInst(OffHi));1135auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,1136Offset);1137BuildMI(MBB, At, DL, CFID)1138.addCFIIndex(MF.addFrameInst(OffLo));1139}1140}1141}11421143bool HexagonFrameLowering::hasFP(const MachineFunction &MF) const {1144if (MF.getFunction().hasFnAttribute(Attribute::Naked))1145return false;11461147auto &MFI = MF.getFrameInfo();1148auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();1149bool HasExtraAlign = HRI.hasStackRealignment(MF);1150bool HasAlloca = MFI.hasVarSizedObjects();11511152// Insert ALLOCFRAME if we need to or at -O0 for the debugger. Think1153// that this shouldn't be required, but doing so now because gcc does and1154// gdb can't break at the start of the function without it. Will remove if1155// this turns out to be a gdb bug.1156//1157if (MF.getTarget().getOptLevel() == CodeGenOptLevel::None)1158return true;11591160// By default we want to use SP (since it's always there). FP requires1161// some setup (i.e. ALLOCFRAME).1162// Both, alloca and stack alignment modify the stack pointer by an1163// undetermined value, so we need to save it at the entry to the function1164// (i.e. use allocframe).1165if (HasAlloca || HasExtraAlign)1166return true;11671168if (MFI.getStackSize() > 0) {1169// If FP-elimination is disabled, we have to use FP at this point.1170const TargetMachine &TM = MF.getTarget();1171if (TM.Options.DisableFramePointerElim(MF) || !EliminateFramePointer)1172return true;1173if (EnableStackOVFSanitizer)1174return true;1175}11761177const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();1178if ((MFI.hasCalls() && !enableAllocFrameElim(MF)) || HMFI.hasClobberLR())1179return true;11801181return false;1182}11831184enum SpillKind {1185SK_ToMem,1186SK_FromMem,1187SK_FromMemTailcall1188};11891190static const char *getSpillFunctionFor(Register MaxReg, SpillKind SpillType,1191bool Stkchk = false) {1192const char * V4SpillToMemoryFunctions[] = {1193"__save_r16_through_r17",1194"__save_r16_through_r19",1195"__save_r16_through_r21",1196"__save_r16_through_r23",1197"__save_r16_through_r25",1198"__save_r16_through_r27" };11991200const char * V4SpillToMemoryStkchkFunctions[] = {1201"__save_r16_through_r17_stkchk",1202"__save_r16_through_r19_stkchk",1203"__save_r16_through_r21_stkchk",1204"__save_r16_through_r23_stkchk",1205"__save_r16_through_r25_stkchk",1206"__save_r16_through_r27_stkchk" };12071208const char * V4SpillFromMemoryFunctions[] = {1209"__restore_r16_through_r17_and_deallocframe",1210"__restore_r16_through_r19_and_deallocframe",1211"__restore_r16_through_r21_and_deallocframe",1212"__restore_r16_through_r23_and_deallocframe",1213"__restore_r16_through_r25_and_deallocframe",1214"__restore_r16_through_r27_and_deallocframe" };12151216const char * V4SpillFromMemoryTailcallFunctions[] = {1217"__restore_r16_through_r17_and_deallocframe_before_tailcall",1218"__restore_r16_through_r19_and_deallocframe_before_tailcall",1219"__restore_r16_through_r21_and_deallocframe_before_tailcall",1220"__restore_r16_through_r23_and_deallocframe_before_tailcall",1221"__restore_r16_through_r25_and_deallocframe_before_tailcall",1222"__restore_r16_through_r27_and_deallocframe_before_tailcall"1223};12241225const char **SpillFunc = nullptr;12261227switch(SpillType) {1228case SK_ToMem:1229SpillFunc = Stkchk ? V4SpillToMemoryStkchkFunctions1230: V4SpillToMemoryFunctions;1231break;1232case SK_FromMem:1233SpillFunc = V4SpillFromMemoryFunctions;1234break;1235case SK_FromMemTailcall:1236SpillFunc = V4SpillFromMemoryTailcallFunctions;1237break;1238}1239assert(SpillFunc && "Unknown spill kind");12401241// Spill all callee-saved registers up to the highest register used.1242switch (MaxReg) {1243case Hexagon::R17:1244return SpillFunc[0];1245case Hexagon::R19:1246return SpillFunc[1];1247case Hexagon::R21:1248return SpillFunc[2];1249case Hexagon::R23:1250return SpillFunc[3];1251case Hexagon::R25:1252return SpillFunc[4];1253case Hexagon::R27:1254return SpillFunc[5];1255default:1256llvm_unreachable("Unhandled maximum callee save register");1257}1258return nullptr;1259}12601261StackOffset1262HexagonFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,1263Register &FrameReg) const {1264auto &MFI = MF.getFrameInfo();1265auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();12661267int Offset = MFI.getObjectOffset(FI);1268bool HasAlloca = MFI.hasVarSizedObjects();1269bool HasExtraAlign = HRI.hasStackRealignment(MF);1270bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOptLevel::None;12711272auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();1273unsigned FrameSize = MFI.getStackSize();1274Register SP = HRI.getStackRegister();1275Register FP = HRI.getFrameRegister();1276Register AP = HMFI.getStackAlignBaseReg();1277// It may happen that AP will be absent even HasAlloca && HasExtraAlign1278// is true. HasExtraAlign may be set because of vector spills, without1279// aligned locals or aligned outgoing function arguments. Since vector1280// spills will ultimately be "unaligned", it is safe to use FP as the1281// base register.1282// In fact, in such a scenario the stack is actually not required to be1283// aligned, although it may end up being aligned anyway, since this1284// particular case is not easily detectable. The alignment will be1285// unnecessary, but not incorrect.1286// Unfortunately there is no quick way to verify that the above is1287// indeed the case (and that it's not a result of an error), so just1288// assume that missing AP will be replaced by FP.1289// (A better fix would be to rematerialize AP from FP and always align1290// vector spills.)1291bool UseFP = false, UseAP = false; // Default: use SP (except at -O0).1292// Use FP at -O0, except when there are objects with extra alignment.1293// That additional alignment requirement may cause a pad to be inserted,1294// which will make it impossible to use FP to access objects located1295// past the pad.1296if (NoOpt && !HasExtraAlign)1297UseFP = true;1298if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {1299// Fixed and preallocated objects will be located before any padding1300// so FP must be used to access them.1301UseFP |= (HasAlloca || HasExtraAlign);1302} else {1303if (HasAlloca) {1304if (HasExtraAlign)1305UseAP = true;1306else1307UseFP = true;1308}1309}13101311// If FP was picked, then there had better be FP.1312bool HasFP = hasFP(MF);1313assert((HasFP || !UseFP) && "This function must have frame pointer");13141315// Having FP implies allocframe. Allocframe will store extra 8 bytes:1316// FP/LR. If the base register is used to access an object across these1317// 8 bytes, then the offset will need to be adjusted by 8.1318//1319// After allocframe:1320// HexagonISelLowering adds 8 to ---+1321// the offsets of all stack-based |1322// arguments (*) |1323// |1324// getObjectOffset < 0 0 8 getObjectOffset >= 81325// ------------------------+-----+------------------------> increasing1326// <local objects> |FP/LR| <input arguments> addresses1327// -----------------+------+-----+------------------------>1328// | |1329// SP/AP point --+ +-- FP points here (**)1330// somewhere on1331// this side of FP/LR1332//1333// (*) See LowerFormalArguments. The FP/LR is assumed to be present.1334// (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.13351336// The lowering assumes that FP/LR is present, and so the offsets of1337// the formal arguments start at 8. If FP/LR is not there we need to1338// reduce the offset by 8.1339if (Offset > 0 && !HasFP)1340Offset -= 8;13411342if (UseFP)1343FrameReg = FP;1344else if (UseAP)1345FrameReg = AP;1346else1347FrameReg = SP;13481349// Calculate the actual offset in the instruction. If there is no FP1350// (in other words, no allocframe), then SP will not be adjusted (i.e.1351// there will be no SP -= FrameSize), so the frame size should not be1352// added to the calculated offset.1353int RealOffset = Offset;1354if (!UseFP && !UseAP)1355RealOffset = FrameSize+Offset;1356return StackOffset::getFixed(RealOffset);1357}13581359bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,1360const CSIVect &CSI, const HexagonRegisterInfo &HRI,1361bool &PrologueStubs) const {1362if (CSI.empty())1363return true;13641365MachineBasicBlock::iterator MI = MBB.begin();1366PrologueStubs = false;1367MachineFunction &MF = *MBB.getParent();1368auto &HST = MF.getSubtarget<HexagonSubtarget>();1369auto &HII = *HST.getInstrInfo();13701371if (useSpillFunction(MF, CSI)) {1372PrologueStubs = true;1373Register MaxReg = getMaxCalleeSavedReg(CSI, HRI);1374bool StkOvrFlowEnabled = EnableStackOVFSanitizer;1375const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem,1376StkOvrFlowEnabled);1377auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());1378bool IsPIC = HTM.isPositionIndependent();1379bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;13801381// Call spill function.1382DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();1383unsigned SpillOpc;1384if (StkOvrFlowEnabled) {1385if (LongCalls)1386SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT_PIC1387: Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT;1388else1389SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC1390: Hexagon::SAVE_REGISTERS_CALL_V4STK;1391} else {1392if (LongCalls)1393SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC1394: Hexagon::SAVE_REGISTERS_CALL_V4_EXT;1395else1396SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_PIC1397: Hexagon::SAVE_REGISTERS_CALL_V4;1398}13991400MachineInstr *SaveRegsCall =1401BuildMI(MBB, MI, DL, HII.get(SpillOpc))1402.addExternalSymbol(SpillFun);14031404// Add callee-saved registers as use.1405addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true);1406// Add live in registers.1407for (const CalleeSavedInfo &I : CSI)1408MBB.addLiveIn(I.getReg());1409return true;1410}14111412for (const CalleeSavedInfo &I : CSI) {1413Register Reg = I.getReg();1414// Add live in registers. We treat eh_return callee saved register r0 - r31415// specially. They are not really callee saved registers as they are not1416// supposed to be killed.1417bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);1418int FI = I.getFrameIdx();1419const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);1420HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, &HRI, Register());1421if (IsKill)1422MBB.addLiveIn(Reg);1423}1424return true;1425}14261427bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,1428const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {1429if (CSI.empty())1430return false;14311432MachineBasicBlock::iterator MI = MBB.getFirstTerminator();1433MachineFunction &MF = *MBB.getParent();1434auto &HST = MF.getSubtarget<HexagonSubtarget>();1435auto &HII = *HST.getInstrInfo();14361437if (useRestoreFunction(MF, CSI)) {1438bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);1439Register MaxR = getMaxCalleeSavedReg(CSI, HRI);1440SpillKind Kind = HasTC ? SK_FromMemTailcall : SK_FromMem;1441const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);1442auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());1443bool IsPIC = HTM.isPositionIndependent();1444bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;14451446// Call spill function.1447DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()1448: MBB.findDebugLoc(MBB.end());1449MachineInstr *DeallocCall = nullptr;14501451if (HasTC) {1452unsigned RetOpc;1453if (LongCalls)1454RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC1455: Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT;1456else1457RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC1458: Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;1459DeallocCall = BuildMI(MBB, MI, DL, HII.get(RetOpc))1460.addExternalSymbol(RestoreFn);1461} else {1462// The block has a return.1463MachineBasicBlock::iterator It = MBB.getFirstTerminator();1464assert(It->isReturn() && std::next(It) == MBB.end());1465unsigned RetOpc;1466if (LongCalls)1467RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC1468: Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT;1469else1470RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC1471: Hexagon::RESTORE_DEALLOC_RET_JMP_V4;1472DeallocCall = BuildMI(MBB, It, DL, HII.get(RetOpc))1473.addExternalSymbol(RestoreFn);1474// Transfer the function live-out registers.1475DeallocCall->copyImplicitOps(MF, *It);1476}1477addCalleeSaveRegistersAsImpOperand(DeallocCall, CSI, true, false);1478return true;1479}14801481for (const CalleeSavedInfo &I : CSI) {1482Register Reg = I.getReg();1483const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);1484int FI = I.getFrameIdx();1485HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, &HRI, Register());1486}14871488return true;1489}14901491MachineBasicBlock::iterator HexagonFrameLowering::eliminateCallFramePseudoInstr(1492MachineFunction &MF, MachineBasicBlock &MBB,1493MachineBasicBlock::iterator I) const {1494MachineInstr &MI = *I;1495unsigned Opc = MI.getOpcode();1496(void)Opc; // Silence compiler warning.1497assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) &&1498"Cannot handle this call frame pseudo instruction");1499return MBB.erase(I);1500}15011502void HexagonFrameLowering::processFunctionBeforeFrameFinalized(1503MachineFunction &MF, RegScavenger *RS) const {1504// If this function has uses aligned stack and also has variable sized stack1505// objects, then we need to map all spill slots to fixed positions, so that1506// they can be accessed through FP. Otherwise they would have to be accessed1507// via AP, which may not be available at the particular place in the program.1508MachineFrameInfo &MFI = MF.getFrameInfo();1509bool HasAlloca = MFI.hasVarSizedObjects();1510bool NeedsAlign = (MFI.getMaxAlign() > getStackAlign());15111512if (!HasAlloca || !NeedsAlign)1513return;15141515// Set the physical aligned-stack base address register.1516Register AP = 0;1517if (const MachineInstr *AI = getAlignaInstr(MF))1518AP = AI->getOperand(0).getReg();1519auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();1520assert(!AP.isValid() || AP.isPhysical());1521HMFI.setStackAlignBaseReg(AP);1522}15231524/// Returns true if there are no caller-saved registers available in class RC.1525static bool needToReserveScavengingSpillSlots(MachineFunction &MF,1526const HexagonRegisterInfo &HRI, const TargetRegisterClass *RC) {1527MachineRegisterInfo &MRI = MF.getRegInfo();15281529auto IsUsed = [&HRI,&MRI] (Register Reg) -> bool {1530for (MCRegAliasIterator AI(Reg, &HRI, true); AI.isValid(); ++AI)1531if (MRI.isPhysRegUsed(*AI))1532return true;1533return false;1534};15351536// Check for an unused caller-saved register. Callee-saved registers1537// have become pristine by now.1538for (const MCPhysReg *P = HRI.getCallerSavedRegs(&MF, RC); *P; ++P)1539if (!IsUsed(*P))1540return false;15411542// All caller-saved registers are used.1543return true;1544}15451546#ifndef NDEBUG1547static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI) {1548dbgs() << '{';1549for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) {1550Register R = x;1551dbgs() << ' ' << printReg(R, &TRI);1552}1553dbgs() << " }";1554}1555#endif15561557bool HexagonFrameLowering::assignCalleeSavedSpillSlots(MachineFunction &MF,1558const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {1559LLVM_DEBUG(dbgs() << __func__ << " on " << MF.getName() << '\n');1560MachineFrameInfo &MFI = MF.getFrameInfo();1561BitVector SRegs(Hexagon::NUM_TARGET_REGS);15621563// Generate a set of unique, callee-saved registers (SRegs), where each1564// register in the set is maximal in terms of sub-/super-register relation,1565// i.e. for each R in SRegs, no proper super-register of R is also in SRegs.15661567// (1) For each callee-saved register, add that register and all of its1568// sub-registers to SRegs.1569LLVM_DEBUG(dbgs() << "Initial CS registers: {");1570for (const CalleeSavedInfo &I : CSI) {1571Register R = I.getReg();1572LLVM_DEBUG(dbgs() << ' ' << printReg(R, TRI));1573for (MCPhysReg SR : TRI->subregs_inclusive(R))1574SRegs[SR] = true;1575}1576LLVM_DEBUG(dbgs() << " }\n");1577LLVM_DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI);1578dbgs() << "\n");15791580// (2) For each reserved register, remove that register and all of its1581// sub- and super-registers from SRegs.1582BitVector Reserved = TRI->getReservedRegs(MF);1583// Unreserve the stack align register: it is reserved for this function1584// only, it still needs to be saved/restored.1585Register AP =1586MF.getInfo<HexagonMachineFunctionInfo>()->getStackAlignBaseReg();1587if (AP.isValid()) {1588Reserved[AP] = false;1589// Unreserve super-regs if no other subregisters are reserved.1590for (MCPhysReg SP : TRI->superregs(AP)) {1591bool HasResSub = false;1592for (MCPhysReg SB : TRI->subregs(SP)) {1593if (!Reserved[SB])1594continue;1595HasResSub = true;1596break;1597}1598if (!HasResSub)1599Reserved[SP] = false;1600}1601}16021603for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) {1604Register R = x;1605for (MCPhysReg SR : TRI->superregs_inclusive(R))1606SRegs[SR] = false;1607}1608LLVM_DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI);1609dbgs() << "\n");1610LLVM_DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI);1611dbgs() << "\n");16121613// (3) Collect all registers that have at least one sub-register in SRegs,1614// and also have no sub-registers that are reserved. These will be the can-1615// didates for saving as a whole instead of their individual sub-registers.1616// (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.)1617BitVector TmpSup(Hexagon::NUM_TARGET_REGS);1618for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {1619Register R = x;1620for (MCPhysReg SR : TRI->superregs(R))1621TmpSup[SR] = true;1622}1623for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) {1624Register R = x;1625for (MCPhysReg SR : TRI->subregs_inclusive(R)) {1626if (!Reserved[SR])1627continue;1628TmpSup[R] = false;1629break;1630}1631}1632LLVM_DEBUG(dbgs() << "TmpSup: "; dump_registers(TmpSup, *TRI);1633dbgs() << "\n");16341635// (4) Include all super-registers found in (3) into SRegs.1636SRegs |= TmpSup;1637LLVM_DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI);1638dbgs() << "\n");16391640// (5) For each register R in SRegs, if any super-register of R is in SRegs,1641// remove R from SRegs.1642for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {1643Register R = x;1644for (MCPhysReg SR : TRI->superregs(R)) {1645if (!SRegs[SR])1646continue;1647SRegs[R] = false;1648break;1649}1650}1651LLVM_DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI);1652dbgs() << "\n");16531654// Now, for each register that has a fixed stack slot, create the stack1655// object for it.1656CSI.clear();16571658using SpillSlot = TargetFrameLowering::SpillSlot;16591660unsigned NumFixed;1661int64_t MinOffset = 0; // CS offsets are negative.1662const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);1663for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) {1664if (!SRegs[S->Reg])1665continue;1666const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);1667int FI = MFI.CreateFixedSpillStackObject(TRI->getSpillSize(*RC), S->Offset);1668MinOffset = std::min(MinOffset, S->Offset);1669CSI.push_back(CalleeSavedInfo(S->Reg, FI));1670SRegs[S->Reg] = false;1671}16721673// There can be some registers that don't have fixed slots. For example,1674// we need to store R0-R3 in functions with exception handling. For each1675// such register, create a non-fixed stack object.1676for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {1677Register R = x;1678const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);1679unsigned Size = TRI->getSpillSize(*RC);1680int64_t Off = MinOffset - Size;1681Align Alignment = std::min(TRI->getSpillAlign(*RC), getStackAlign());1682Off &= -Alignment.value();1683int FI = MFI.CreateFixedSpillStackObject(Size, Off);1684MinOffset = std::min(MinOffset, Off);1685CSI.push_back(CalleeSavedInfo(R, FI));1686SRegs[R] = false;1687}16881689LLVM_DEBUG({1690dbgs() << "CS information: {";1691for (const CalleeSavedInfo &I : CSI) {1692int FI = I.getFrameIdx();1693int Off = MFI.getObjectOffset(FI);1694dbgs() << ' ' << printReg(I.getReg(), TRI) << ":fi#" << FI << ":sp";1695if (Off >= 0)1696dbgs() << '+';1697dbgs() << Off;1698}1699dbgs() << " }\n";1700});17011702#ifndef NDEBUG1703// Verify that all registers were handled.1704bool MissedReg = false;1705for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {1706Register R = x;1707dbgs() << printReg(R, TRI) << ' ';1708MissedReg = true;1709}1710if (MissedReg)1711llvm_unreachable("...there are unhandled callee-saved registers!");1712#endif17131714return true;1715}17161717bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B,1718MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,1719const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {1720MachineInstr *MI = &*It;1721DebugLoc DL = MI->getDebugLoc();1722Register DstR = MI->getOperand(0).getReg();1723Register SrcR = MI->getOperand(1).getReg();1724if (!Hexagon::ModRegsRegClass.contains(DstR) ||1725!Hexagon::ModRegsRegClass.contains(SrcR))1726return false;17271728Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);1729BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1));1730BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR)1731.addReg(TmpR, RegState::Kill);17321733NewRegs.push_back(TmpR);1734B.erase(It);1735return true;1736}17371738bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,1739MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,1740const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {1741MachineInstr *MI = &*It;1742if (!MI->getOperand(0).isFI())1743return false;17441745DebugLoc DL = MI->getDebugLoc();1746unsigned Opc = MI->getOpcode();1747Register SrcR = MI->getOperand(2).getReg();1748bool IsKill = MI->getOperand(2).isKill();1749int FI = MI->getOperand(0).getIndex();17501751// TmpR = C2_tfrpr SrcR if SrcR is a predicate register1752// TmpR = A2_tfrcrr SrcR if SrcR is a modifier register1753Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);1754unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr1755: Hexagon::A2_tfrcrr;1756BuildMI(B, It, DL, HII.get(TfrOpc), TmpR)1757.addReg(SrcR, getKillRegState(IsKill));17581759// S2_storeri_io FI, 0, TmpR1760BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io))1761.addFrameIndex(FI)1762.addImm(0)1763.addReg(TmpR, RegState::Kill)1764.cloneMemRefs(*MI);17651766NewRegs.push_back(TmpR);1767B.erase(It);1768return true;1769}17701771bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,1772MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,1773const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {1774MachineInstr *MI = &*It;1775if (!MI->getOperand(1).isFI())1776return false;17771778DebugLoc DL = MI->getDebugLoc();1779unsigned Opc = MI->getOpcode();1780Register DstR = MI->getOperand(0).getReg();1781int FI = MI->getOperand(1).getIndex();17821783// TmpR = L2_loadri_io FI, 01784Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);1785BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR)1786.addFrameIndex(FI)1787.addImm(0)1788.cloneMemRefs(*MI);17891790// DstR = C2_tfrrp TmpR if DstR is a predicate register1791// DstR = A2_tfrrcr TmpR if DstR is a modifier register1792unsigned TfrOpc = (Opc == Hexagon::LDriw_pred) ? Hexagon::C2_tfrrp1793: Hexagon::A2_tfrrcr;1794BuildMI(B, It, DL, HII.get(TfrOpc), DstR)1795.addReg(TmpR, RegState::Kill);17961797NewRegs.push_back(TmpR);1798B.erase(It);1799return true;1800}18011802bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B,1803MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,1804const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {1805MachineInstr *MI = &*It;1806if (!MI->getOperand(0).isFI())1807return false;18081809DebugLoc DL = MI->getDebugLoc();1810Register SrcR = MI->getOperand(2).getReg();1811bool IsKill = MI->getOperand(2).isKill();1812int FI = MI->getOperand(0).getIndex();1813auto *RC = &Hexagon::HvxVRRegClass;18141815// Insert transfer to general vector register.1816// TmpR0 = A2_tfrsi 0x010101011817// TmpR1 = V6_vandqrt Qx, TmpR01818// store FI, 0, TmpR11819Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);1820Register TmpR1 = MRI.createVirtualRegister(RC);18211822BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)1823.addImm(0x01010101);18241825BuildMI(B, It, DL, HII.get(Hexagon::V6_vandqrt), TmpR1)1826.addReg(SrcR, getKillRegState(IsKill))1827.addReg(TmpR0, RegState::Kill);18281829auto *HRI = B.getParent()->getSubtarget<HexagonSubtarget>().getRegisterInfo();1830HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, HRI, Register());1831expandStoreVec(B, std::prev(It), MRI, HII, NewRegs);18321833NewRegs.push_back(TmpR0);1834NewRegs.push_back(TmpR1);1835B.erase(It);1836return true;1837}18381839bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B,1840MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,1841const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {1842MachineInstr *MI = &*It;1843if (!MI->getOperand(1).isFI())1844return false;18451846DebugLoc DL = MI->getDebugLoc();1847Register DstR = MI->getOperand(0).getReg();1848int FI = MI->getOperand(1).getIndex();1849auto *RC = &Hexagon::HvxVRRegClass;18501851// TmpR0 = A2_tfrsi 0x010101011852// TmpR1 = load FI, 01853// DstR = V6_vandvrt TmpR1, TmpR01854Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);1855Register TmpR1 = MRI.createVirtualRegister(RC);18561857BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)1858.addImm(0x01010101);1859MachineFunction &MF = *B.getParent();1860auto *HRI = MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();1861HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, HRI, Register());1862expandLoadVec(B, std::prev(It), MRI, HII, NewRegs);18631864BuildMI(B, It, DL, HII.get(Hexagon::V6_vandvrt), DstR)1865.addReg(TmpR1, RegState::Kill)1866.addReg(TmpR0, RegState::Kill);18671868NewRegs.push_back(TmpR0);1869NewRegs.push_back(TmpR1);1870B.erase(It);1871return true;1872}18731874bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,1875MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,1876const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {1877MachineFunction &MF = *B.getParent();1878auto &MFI = MF.getFrameInfo();1879auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();1880MachineInstr *MI = &*It;1881if (!MI->getOperand(0).isFI())1882return false;18831884// It is possible that the double vector being stored is only partially1885// defined. From the point of view of the liveness tracking, it is ok to1886// store it as a whole, but if we break it up we may end up storing a1887// register that is entirely undefined.1888LivePhysRegs LPR(HRI);1889LPR.addLiveIns(B);1890SmallVector<std::pair<MCPhysReg, const MachineOperand*>,2> Clobbers;1891for (auto R = B.begin(); R != It; ++R) {1892Clobbers.clear();1893LPR.stepForward(*R, Clobbers);1894}18951896DebugLoc DL = MI->getDebugLoc();1897Register SrcR = MI->getOperand(2).getReg();1898Register SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo);1899Register SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi);1900bool IsKill = MI->getOperand(2).isKill();1901int FI = MI->getOperand(0).getIndex();19021903unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);1904Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);1905Align HasAlign = MFI.getObjectAlign(FI);1906unsigned StoreOpc;19071908// Store low part.1909if (LPR.contains(SrcLo)) {1910StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai1911: Hexagon::V6_vS32Ub_ai;1912BuildMI(B, It, DL, HII.get(StoreOpc))1913.addFrameIndex(FI)1914.addImm(0)1915.addReg(SrcLo, getKillRegState(IsKill))1916.cloneMemRefs(*MI);1917}19181919// Store high part.1920if (LPR.contains(SrcHi)) {1921StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai1922: Hexagon::V6_vS32Ub_ai;1923BuildMI(B, It, DL, HII.get(StoreOpc))1924.addFrameIndex(FI)1925.addImm(Size)1926.addReg(SrcHi, getKillRegState(IsKill))1927.cloneMemRefs(*MI);1928}19291930B.erase(It);1931return true;1932}19331934bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,1935MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,1936const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {1937MachineFunction &MF = *B.getParent();1938auto &MFI = MF.getFrameInfo();1939auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();1940MachineInstr *MI = &*It;1941if (!MI->getOperand(1).isFI())1942return false;19431944DebugLoc DL = MI->getDebugLoc();1945Register DstR = MI->getOperand(0).getReg();1946Register DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi);1947Register DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo);1948int FI = MI->getOperand(1).getIndex();19491950unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);1951Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);1952Align HasAlign = MFI.getObjectAlign(FI);1953unsigned LoadOpc;19541955// Load low part.1956LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai1957: Hexagon::V6_vL32Ub_ai;1958BuildMI(B, It, DL, HII.get(LoadOpc), DstLo)1959.addFrameIndex(FI)1960.addImm(0)1961.cloneMemRefs(*MI);19621963// Load high part.1964LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai1965: Hexagon::V6_vL32Ub_ai;1966BuildMI(B, It, DL, HII.get(LoadOpc), DstHi)1967.addFrameIndex(FI)1968.addImm(Size)1969.cloneMemRefs(*MI);19701971B.erase(It);1972return true;1973}19741975bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,1976MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,1977const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {1978MachineFunction &MF = *B.getParent();1979auto &MFI = MF.getFrameInfo();1980MachineInstr *MI = &*It;1981if (!MI->getOperand(0).isFI())1982return false;19831984auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();1985DebugLoc DL = MI->getDebugLoc();1986Register SrcR = MI->getOperand(2).getReg();1987bool IsKill = MI->getOperand(2).isKill();1988int FI = MI->getOperand(0).getIndex();19891990Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);1991Align HasAlign = MFI.getObjectAlign(FI);1992unsigned StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai1993: Hexagon::V6_vS32Ub_ai;1994BuildMI(B, It, DL, HII.get(StoreOpc))1995.addFrameIndex(FI)1996.addImm(0)1997.addReg(SrcR, getKillRegState(IsKill))1998.cloneMemRefs(*MI);19992000B.erase(It);2001return true;2002}20032004bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,2005MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,2006const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {2007MachineFunction &MF = *B.getParent();2008auto &MFI = MF.getFrameInfo();2009MachineInstr *MI = &*It;2010if (!MI->getOperand(1).isFI())2011return false;20122013auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();2014DebugLoc DL = MI->getDebugLoc();2015Register DstR = MI->getOperand(0).getReg();2016int FI = MI->getOperand(1).getIndex();20172018Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);2019Align HasAlign = MFI.getObjectAlign(FI);2020unsigned LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai2021: Hexagon::V6_vL32Ub_ai;2022BuildMI(B, It, DL, HII.get(LoadOpc), DstR)2023.addFrameIndex(FI)2024.addImm(0)2025.cloneMemRefs(*MI);20262027B.erase(It);2028return true;2029}20302031bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,2032SmallVectorImpl<Register> &NewRegs) const {2033auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();2034MachineRegisterInfo &MRI = MF.getRegInfo();2035bool Changed = false;20362037for (auto &B : MF) {2038// Traverse the basic block.2039MachineBasicBlock::iterator NextI;2040for (auto I = B.begin(), E = B.end(); I != E; I = NextI) {2041MachineInstr *MI = &*I;2042NextI = std::next(I);2043unsigned Opc = MI->getOpcode();20442045switch (Opc) {2046case TargetOpcode::COPY:2047Changed |= expandCopy(B, I, MRI, HII, NewRegs);2048break;2049case Hexagon::STriw_pred:2050case Hexagon::STriw_ctr:2051Changed |= expandStoreInt(B, I, MRI, HII, NewRegs);2052break;2053case Hexagon::LDriw_pred:2054case Hexagon::LDriw_ctr:2055Changed |= expandLoadInt(B, I, MRI, HII, NewRegs);2056break;2057case Hexagon::PS_vstorerq_ai:2058Changed |= expandStoreVecPred(B, I, MRI, HII, NewRegs);2059break;2060case Hexagon::PS_vloadrq_ai:2061Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs);2062break;2063case Hexagon::PS_vloadrw_ai:2064Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs);2065break;2066case Hexagon::PS_vstorerw_ai:2067Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs);2068break;2069}2070}2071}20722073return Changed;2074}20752076void HexagonFrameLowering::determineCalleeSaves(MachineFunction &MF,2077BitVector &SavedRegs,2078RegScavenger *RS) const {2079auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();20802081SavedRegs.resize(HRI.getNumRegs());20822083// If we have a function containing __builtin_eh_return we want to spill and2084// restore all callee saved registers. Pretend that they are used.2085if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())2086for (const MCPhysReg *R = HRI.getCalleeSavedRegs(&MF); *R; ++R)2087SavedRegs.set(*R);20882089// Replace predicate register pseudo spill code.2090SmallVector<Register,8> NewRegs;2091expandSpillMacros(MF, NewRegs);2092if (OptimizeSpillSlots && !isOptNone(MF))2093optimizeSpillSlots(MF, NewRegs);20942095// We need to reserve a spill slot if scavenging could potentially require2096// spilling a scavenged register.2097if (!NewRegs.empty() || mayOverflowFrameOffset(MF)) {2098MachineFrameInfo &MFI = MF.getFrameInfo();2099MachineRegisterInfo &MRI = MF.getRegInfo();2100SetVector<const TargetRegisterClass*> SpillRCs;2101// Reserve an int register in any case, because it could be used to hold2102// the stack offset in case it does not fit into a spill instruction.2103SpillRCs.insert(&Hexagon::IntRegsRegClass);21042105for (Register VR : NewRegs)2106SpillRCs.insert(MRI.getRegClass(VR));21072108for (const auto *RC : SpillRCs) {2109if (!needToReserveScavengingSpillSlots(MF, HRI, RC))2110continue;2111unsigned Num = 1;2112switch (RC->getID()) {2113case Hexagon::IntRegsRegClassID:2114Num = NumberScavengerSlots;2115break;2116case Hexagon::HvxQRRegClassID:2117Num = 2; // Vector predicate spills also need a vector register.2118break;2119}2120unsigned S = HRI.getSpillSize(*RC);2121Align A = HRI.getSpillAlign(*RC);2122for (unsigned i = 0; i < Num; i++) {2123int NewFI = MFI.CreateSpillStackObject(S, A);2124RS->addScavengingFrameIndex(NewFI);2125}2126}2127}21282129TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);2130}21312132Register HexagonFrameLowering::findPhysReg(MachineFunction &MF,2133HexagonBlockRanges::IndexRange &FIR,2134HexagonBlockRanges::InstrIndexMap &IndexMap,2135HexagonBlockRanges::RegToRangeMap &DeadMap,2136const TargetRegisterClass *RC) const {2137auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();2138auto &MRI = MF.getRegInfo();21392140auto isDead = [&FIR,&DeadMap] (Register Reg) -> bool {2141auto F = DeadMap.find({Reg,0});2142if (F == DeadMap.end())2143return false;2144for (auto &DR : F->second)2145if (DR.contains(FIR))2146return true;2147return false;2148};21492150for (Register Reg : RC->getRawAllocationOrder(MF)) {2151bool Dead = true;2152for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) {2153if (isDead(R.Reg))2154continue;2155Dead = false;2156break;2157}2158if (Dead)2159return Reg;2160}2161return 0;2162}21632164void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,2165SmallVectorImpl<Register> &VRegs) const {2166auto &HST = MF.getSubtarget<HexagonSubtarget>();2167auto &HII = *HST.getInstrInfo();2168auto &HRI = *HST.getRegisterInfo();2169auto &MRI = MF.getRegInfo();2170HexagonBlockRanges HBR(MF);21712172using BlockIndexMap =2173std::map<MachineBasicBlock *, HexagonBlockRanges::InstrIndexMap>;2174using BlockRangeMap =2175std::map<MachineBasicBlock *, HexagonBlockRanges::RangeList>;2176using IndexType = HexagonBlockRanges::IndexType;21772178struct SlotInfo {2179BlockRangeMap Map;2180unsigned Size = 0;2181const TargetRegisterClass *RC = nullptr;21822183SlotInfo() = default;2184};21852186BlockIndexMap BlockIndexes;2187SmallSet<int,4> BadFIs;2188std::map<int,SlotInfo> FIRangeMap;21892190// Accumulate register classes: get a common class for a pre-existing2191// class HaveRC and a new class NewRC. Return nullptr if a common class2192// cannot be found, otherwise return the resulting class. If HaveRC is2193// nullptr, assume that it is still unset.2194auto getCommonRC =2195[](const TargetRegisterClass *HaveRC,2196const TargetRegisterClass *NewRC) -> const TargetRegisterClass * {2197if (HaveRC == nullptr || HaveRC == NewRC)2198return NewRC;2199// Different classes, both non-null. Pick the more general one.2200if (HaveRC->hasSubClassEq(NewRC))2201return HaveRC;2202if (NewRC->hasSubClassEq(HaveRC))2203return NewRC;2204return nullptr;2205};22062207// Scan all blocks in the function. Check all occurrences of frame indexes,2208// and collect relevant information.2209for (auto &B : MF) {2210std::map<int,IndexType> LastStore, LastLoad;2211// Emplace appears not to be supported in gcc 4.7.2-4.2212//auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B));2213auto P = BlockIndexes.insert(2214std::make_pair(&B, HexagonBlockRanges::InstrIndexMap(B)));2215auto &IndexMap = P.first->second;2216LLVM_DEBUG(dbgs() << "Index map for " << printMBBReference(B) << "\n"2217<< IndexMap << '\n');22182219for (auto &In : B) {2220int LFI, SFI;2221bool Load = HII.isLoadFromStackSlot(In, LFI) && !HII.isPredicated(In);2222bool Store = HII.isStoreToStackSlot(In, SFI) && !HII.isPredicated(In);2223if (Load && Store) {2224// If it's both a load and a store, then we won't handle it.2225BadFIs.insert(LFI);2226BadFIs.insert(SFI);2227continue;2228}2229// Check for register classes of the register used as the source for2230// the store, and the register used as the destination for the load.2231// Also, only accept base+imm_offset addressing modes. Other addressing2232// modes can have side-effects (post-increments, etc.). For stack2233// slots they are very unlikely, so there is not much loss due to2234// this restriction.2235if (Load || Store) {2236int TFI = Load ? LFI : SFI;2237unsigned AM = HII.getAddrMode(In);2238SlotInfo &SI = FIRangeMap[TFI];2239bool Bad = (AM != HexagonII::BaseImmOffset);2240if (!Bad) {2241// If the addressing mode is ok, check the register class.2242unsigned OpNum = Load ? 0 : 2;2243auto *RC = HII.getRegClass(In.getDesc(), OpNum, &HRI, MF);2244RC = getCommonRC(SI.RC, RC);2245if (RC == nullptr)2246Bad = true;2247else2248SI.RC = RC;2249}2250if (!Bad) {2251// Check sizes.2252unsigned S = HII.getMemAccessSize(In);2253if (SI.Size != 0 && SI.Size != S)2254Bad = true;2255else2256SI.Size = S;2257}2258if (!Bad) {2259for (auto *Mo : In.memoperands()) {2260if (!Mo->isVolatile() && !Mo->isAtomic())2261continue;2262Bad = true;2263break;2264}2265}2266if (Bad)2267BadFIs.insert(TFI);2268}22692270// Locate uses of frame indices.2271for (unsigned i = 0, n = In.getNumOperands(); i < n; ++i) {2272const MachineOperand &Op = In.getOperand(i);2273if (!Op.isFI())2274continue;2275int FI = Op.getIndex();2276// Make sure that the following operand is an immediate and that2277// it is 0. This is the offset in the stack object.2278if (i+1 >= n || !In.getOperand(i+1).isImm() ||2279In.getOperand(i+1).getImm() != 0)2280BadFIs.insert(FI);2281if (BadFIs.count(FI))2282continue;22832284IndexType Index = IndexMap.getIndex(&In);2285if (Load) {2286if (LastStore[FI] == IndexType::None)2287LastStore[FI] = IndexType::Entry;2288LastLoad[FI] = Index;2289} else if (Store) {2290HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];2291if (LastStore[FI] != IndexType::None)2292RL.add(LastStore[FI], LastLoad[FI], false, false);2293else if (LastLoad[FI] != IndexType::None)2294RL.add(IndexType::Entry, LastLoad[FI], false, false);2295LastLoad[FI] = IndexType::None;2296LastStore[FI] = Index;2297} else {2298BadFIs.insert(FI);2299}2300}2301}23022303for (auto &I : LastLoad) {2304IndexType LL = I.second;2305if (LL == IndexType::None)2306continue;2307auto &RL = FIRangeMap[I.first].Map[&B];2308IndexType &LS = LastStore[I.first];2309if (LS != IndexType::None)2310RL.add(LS, LL, false, false);2311else2312RL.add(IndexType::Entry, LL, false, false);2313LS = IndexType::None;2314}2315for (auto &I : LastStore) {2316IndexType LS = I.second;2317if (LS == IndexType::None)2318continue;2319auto &RL = FIRangeMap[I.first].Map[&B];2320RL.add(LS, IndexType::None, false, false);2321}2322}23232324LLVM_DEBUG({2325for (auto &P : FIRangeMap) {2326dbgs() << "fi#" << P.first;2327if (BadFIs.count(P.first))2328dbgs() << " (bad)";2329dbgs() << " RC: ";2330if (P.second.RC != nullptr)2331dbgs() << HRI.getRegClassName(P.second.RC) << '\n';2332else2333dbgs() << "<null>\n";2334for (auto &R : P.second.Map)2335dbgs() << " " << printMBBReference(*R.first) << " { " << R.second2336<< "}\n";2337}2338});23392340// When a slot is loaded from in a block without being stored to in the2341// same block, it is live-on-entry to this block. To avoid CFG analysis,2342// consider this slot to be live-on-exit from all blocks.2343SmallSet<int,4> LoxFIs;23442345std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap;23462347for (auto &P : FIRangeMap) {2348// P = pair(FI, map: BB->RangeList)2349if (BadFIs.count(P.first))2350continue;2351for (auto &B : MF) {2352auto F = P.second.Map.find(&B);2353// F = pair(BB, RangeList)2354if (F == P.second.Map.end() || F->second.empty())2355continue;2356HexagonBlockRanges::IndexRange &IR = F->second.front();2357if (IR.start() == IndexType::Entry)2358LoxFIs.insert(P.first);2359BlockFIMap[&B].push_back(P.first);2360}2361}23622363LLVM_DEBUG({2364dbgs() << "Block-to-FI map (* -- live-on-exit):\n";2365for (auto &P : BlockFIMap) {2366auto &FIs = P.second;2367if (FIs.empty())2368continue;2369dbgs() << " " << printMBBReference(*P.first) << ": {";2370for (auto I : FIs) {2371dbgs() << " fi#" << I;2372if (LoxFIs.count(I))2373dbgs() << '*';2374}2375dbgs() << " }\n";2376}2377});23782379#ifndef NDEBUG2380bool HasOptLimit = SpillOptMax.getPosition();2381#endif23822383// eliminate loads, when all loads eliminated, eliminate all stores.2384for (auto &B : MF) {2385auto F = BlockIndexes.find(&B);2386assert(F != BlockIndexes.end());2387HexagonBlockRanges::InstrIndexMap &IM = F->second;2388HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM);2389HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM);2390LLVM_DEBUG(dbgs() << printMBBReference(B) << " dead map\n"2391<< HexagonBlockRanges::PrintRangeMap(DM, HRI));23922393for (auto FI : BlockFIMap[&B]) {2394if (BadFIs.count(FI))2395continue;2396LLVM_DEBUG(dbgs() << "Working on fi#" << FI << '\n');2397HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];2398for (auto &Range : RL) {2399LLVM_DEBUG(dbgs() << "--Examining range:" << RL << '\n');2400if (!IndexType::isInstr(Range.start()) ||2401!IndexType::isInstr(Range.end()))2402continue;2403MachineInstr &SI = *IM.getInstr(Range.start());2404MachineInstr &EI = *IM.getInstr(Range.end());2405assert(SI.mayStore() && "Unexpected start instruction");2406assert(EI.mayLoad() && "Unexpected end instruction");2407MachineOperand &SrcOp = SI.getOperand(2);24082409HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),2410SrcOp.getSubReg() };2411auto *RC = HII.getRegClass(SI.getDesc(), 2, &HRI, MF);2412// The this-> is needed to unconfuse MSVC.2413Register FoundR = this->findPhysReg(MF, Range, IM, DM, RC);2414LLVM_DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI)2415<< '\n');2416if (FoundR == 0)2417continue;2418#ifndef NDEBUG2419if (HasOptLimit) {2420if (SpillOptCount >= SpillOptMax)2421return;2422SpillOptCount++;2423}2424#endif24252426// Generate the copy-in: "FoundR = COPY SrcR" at the store location.2427MachineBasicBlock::iterator StartIt = SI.getIterator(), NextIt;2428MachineInstr *CopyIn = nullptr;2429if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) {2430const DebugLoc &DL = SI.getDebugLoc();2431CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)2432.add(SrcOp);2433}24342435++StartIt;2436// Check if this is a last store and the FI is live-on-exit.2437if (LoxFIs.count(FI) && (&Range == &RL.back())) {2438// Update store's source register.2439if (unsigned SR = SrcOp.getSubReg())2440SrcOp.setReg(HRI.getSubReg(FoundR, SR));2441else2442SrcOp.setReg(FoundR);2443SrcOp.setSubReg(0);2444// We are keeping this register live.2445SrcOp.setIsKill(false);2446} else {2447B.erase(&SI);2448IM.replaceInstr(&SI, CopyIn);2449}24502451auto EndIt = std::next(EI.getIterator());2452for (auto It = StartIt; It != EndIt; It = NextIt) {2453MachineInstr &MI = *It;2454NextIt = std::next(It);2455int TFI;2456if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI)2457continue;2458Register DstR = MI.getOperand(0).getReg();2459assert(MI.getOperand(0).getSubReg() == 0);2460MachineInstr *CopyOut = nullptr;2461if (DstR != FoundR) {2462DebugLoc DL = MI.getDebugLoc();2463unsigned MemSize = HII.getMemAccessSize(MI);2464assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset);2465unsigned CopyOpc = TargetOpcode::COPY;2466if (HII.isSignExtendingLoad(MI))2467CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth;2468else if (HII.isZeroExtendingLoad(MI))2469CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth;2470CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR)2471.addReg(FoundR, getKillRegState(&MI == &EI));2472}2473IM.replaceInstr(&MI, CopyOut);2474B.erase(It);2475}24762477// Update the dead map.2478HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 };2479for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI))2480DM[RR].subtract(Range);2481} // for Range in range list2482}2483}2484}24852486void HexagonFrameLowering::expandAlloca(MachineInstr *AI,2487const HexagonInstrInfo &HII, Register SP, unsigned CF) const {2488MachineBasicBlock &MB = *AI->getParent();2489DebugLoc DL = AI->getDebugLoc();2490unsigned A = AI->getOperand(2).getImm();24912492// Have2493// Rd = alloca Rs, #A2494//2495// If Rs and Rd are different registers, use this sequence:2496// Rd = sub(r29, Rs)2497// r29 = sub(r29, Rs)2498// Rd = and(Rd, #-A) ; if necessary2499// r29 = and(r29, #-A) ; if necessary2500// Rd = add(Rd, #CF) ; CF size aligned to at most A2501// otherwise, do2502// Rd = sub(r29, Rs)2503// Rd = and(Rd, #-A) ; if necessary2504// r29 = Rd2505// Rd = add(Rd, #CF) ; CF size aligned to at most A25062507MachineOperand &RdOp = AI->getOperand(0);2508MachineOperand &RsOp = AI->getOperand(1);2509Register Rd = RdOp.getReg(), Rs = RsOp.getReg();25102511// Rd = sub(r29, Rs)2512BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd)2513.addReg(SP)2514.addReg(Rs);2515if (Rs != Rd) {2516// r29 = sub(r29, Rs)2517BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP)2518.addReg(SP)2519.addReg(Rs);2520}2521if (A > 8) {2522// Rd = and(Rd, #-A)2523BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)2524.addReg(Rd)2525.addImm(-int64_t(A));2526if (Rs != Rd)2527BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP)2528.addReg(SP)2529.addImm(-int64_t(A));2530}2531if (Rs == Rd) {2532// r29 = Rd2533BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP)2534.addReg(Rd);2535}2536if (CF > 0) {2537// Rd = add(Rd, #CF)2538BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd)2539.addReg(Rd)2540.addImm(CF);2541}2542}25432544bool HexagonFrameLowering::needsAligna(const MachineFunction &MF) const {2545const MachineFrameInfo &MFI = MF.getFrameInfo();2546if (!MFI.hasVarSizedObjects())2547return false;2548// Do not check for max stack object alignment here, because the stack2549// may not be complete yet. Assume that we will need PS_aligna if there2550// are variable-sized objects.2551return true;2552}25532554const MachineInstr *HexagonFrameLowering::getAlignaInstr(2555const MachineFunction &MF) const {2556for (auto &B : MF)2557for (auto &I : B)2558if (I.getOpcode() == Hexagon::PS_aligna)2559return &I;2560return nullptr;2561}25622563/// Adds all callee-saved registers as implicit uses or defs to the2564/// instruction.2565void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,2566const CSIVect &CSI, bool IsDef, bool IsKill) const {2567// Add the callee-saved registers as implicit uses.2568for (auto &R : CSI)2569MI->addOperand(MachineOperand::CreateReg(R.getReg(), IsDef, true, IsKill));2570}25712572/// Determine whether the callee-saved register saves and restores should2573/// be generated via inline code. If this function returns "true", inline2574/// code will be generated. If this function returns "false", additional2575/// checks are performed, which may still lead to the inline code.2576bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,2577const CSIVect &CSI) const {2578if (MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl())2579return true;2580if (MF.getInfo<HexagonMachineFunctionInfo>()->hasEHReturn())2581return true;2582if (!hasFP(MF))2583return true;2584if (!isOptSize(MF) && !isMinSize(MF))2585if (MF.getTarget().getOptLevel() > CodeGenOptLevel::Default)2586return true;25872588// Check if CSI only has double registers, and if the registers form2589// a contiguous block starting from D8.2590BitVector Regs(Hexagon::NUM_TARGET_REGS);2591for (const CalleeSavedInfo &I : CSI) {2592Register R = I.getReg();2593if (!Hexagon::DoubleRegsRegClass.contains(R))2594return true;2595Regs[R] = true;2596}2597int F = Regs.find_first();2598if (F != Hexagon::D8)2599return true;2600while (F >= 0) {2601int N = Regs.find_next(F);2602if (N >= 0 && N != F+1)2603return true;2604F = N;2605}26062607return false;2608}26092610bool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF,2611const CSIVect &CSI) const {2612if (shouldInlineCSR(MF, CSI))2613return false;2614unsigned NumCSI = CSI.size();2615if (NumCSI <= 1)2616return false;26172618unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs2619: SpillFuncThreshold;2620return Threshold < NumCSI;2621}26222623bool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF,2624const CSIVect &CSI) const {2625if (shouldInlineCSR(MF, CSI))2626return false;2627// The restore functions do a bit more than just restoring registers.2628// The non-returning versions will go back directly to the caller's2629// caller, others will clean up the stack frame in preparation for2630// a tail call. Using them can still save code size even if only one2631// register is getting restores. Make the decision based on -Oz:2632// using -Os will use inline restore for a single register.2633if (isMinSize(MF))2634return true;2635unsigned NumCSI = CSI.size();2636if (NumCSI <= 1)2637return false;26382639unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-12640: SpillFuncThreshold;2641return Threshold < NumCSI;2642}26432644bool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const {2645unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF);2646auto &HST = MF.getSubtarget<HexagonSubtarget>();2647// A fairly simplistic guess as to whether a potential load/store to a2648// stack location could require an extra register.2649if (HST.useHVXOps() && StackSize > 256)2650return true;26512652// Check if the function has store-immediate instructions that access2653// the stack. Since the offset field is not extendable, if the stack2654// size exceeds the offset limit (6 bits, shifted), the stores will2655// require a new base register.2656bool HasImmStack = false;2657unsigned MinLS = ~0u; // Log_2 of the memory access size.26582659for (const MachineBasicBlock &B : MF) {2660for (const MachineInstr &MI : B) {2661unsigned LS = 0;2662switch (MI.getOpcode()) {2663case Hexagon::S4_storeirit_io:2664case Hexagon::S4_storeirif_io:2665case Hexagon::S4_storeiri_io:2666++LS;2667[[fallthrough]];2668case Hexagon::S4_storeirht_io:2669case Hexagon::S4_storeirhf_io:2670case Hexagon::S4_storeirh_io:2671++LS;2672[[fallthrough]];2673case Hexagon::S4_storeirbt_io:2674case Hexagon::S4_storeirbf_io:2675case Hexagon::S4_storeirb_io:2676if (MI.getOperand(0).isFI())2677HasImmStack = true;2678MinLS = std::min(MinLS, LS);2679break;2680}2681}2682}26832684if (HasImmStack)2685return !isUInt<6>(StackSize >> MinLS);26862687return false;2688}26892690namespace {2691// Struct used by orderFrameObjects to help sort the stack objects.2692struct HexagonFrameSortingObject {2693bool IsValid = false;2694unsigned Index = 0; // Index of Object into MFI list.2695unsigned Size = 0;2696Align ObjectAlignment = Align(1); // Alignment of Object in bytes.2697};26982699struct HexagonFrameSortingComparator {2700inline bool operator()(const HexagonFrameSortingObject &A,2701const HexagonFrameSortingObject &B) const {2702return std::make_tuple(!A.IsValid, A.ObjectAlignment, A.Size) <2703std::make_tuple(!B.IsValid, B.ObjectAlignment, B.Size);2704}2705};2706} // namespace27072708// Sort objects on the stack by alignment value and then by size to minimize2709// padding.2710void HexagonFrameLowering::orderFrameObjects(2711const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {27122713if (ObjectsToAllocate.empty())2714return;27152716const MachineFrameInfo &MFI = MF.getFrameInfo();2717int NObjects = ObjectsToAllocate.size();27182719// Create an array of all MFI objects.2720SmallVector<HexagonFrameSortingObject> SortingObjects(2721MFI.getObjectIndexEnd());27222723for (int i = 0, j = 0, e = MFI.getObjectIndexEnd(); i < e && j != NObjects;2724++i) {2725if (i != ObjectsToAllocate[j])2726continue;2727j++;27282729// A variable size object has size equal to 0. Since Hexagon sets2730// getUseLocalStackAllocationBlock() to true, a local block is allocated2731// earlier. This case is not handled here for now.2732int Size = MFI.getObjectSize(i);2733if (Size == 0)2734return;27352736SortingObjects[i].IsValid = true;2737SortingObjects[i].Index = i;2738SortingObjects[i].Size = Size;2739SortingObjects[i].ObjectAlignment = MFI.getObjectAlign(i);2740}27412742// Sort objects by alignment and then by size.2743llvm::stable_sort(SortingObjects, HexagonFrameSortingComparator());27442745// Modify the original list to represent the final order.2746int i = NObjects;2747for (auto &Obj : SortingObjects) {2748if (i == 0)2749break;2750ObjectsToAllocate[--i] = Obj.Index;2751}2752}275327542755