Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Utils/InlineFunction.cpp
35271 views
//===- InlineFunction.cpp - Code to perform function inlining -------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements inlining of a function into a call site, resolving9// parameters and the return value as appropriate.10//11//===----------------------------------------------------------------------===//1213#include "llvm/ADT/DenseMap.h"14#include "llvm/ADT/STLExtras.h"15#include "llvm/ADT/SetVector.h"16#include "llvm/ADT/SmallPtrSet.h"17#include "llvm/ADT/SmallVector.h"18#include "llvm/ADT/StringExtras.h"19#include "llvm/ADT/iterator_range.h"20#include "llvm/Analysis/AliasAnalysis.h"21#include "llvm/Analysis/AssumptionCache.h"22#include "llvm/Analysis/BlockFrequencyInfo.h"23#include "llvm/Analysis/CallGraph.h"24#include "llvm/Analysis/CaptureTracking.h"25#include "llvm/Analysis/IndirectCallVisitor.h"26#include "llvm/Analysis/InstructionSimplify.h"27#include "llvm/Analysis/MemoryProfileInfo.h"28#include "llvm/Analysis/ObjCARCAnalysisUtils.h"29#include "llvm/Analysis/ObjCARCUtil.h"30#include "llvm/Analysis/ProfileSummaryInfo.h"31#include "llvm/Analysis/ValueTracking.h"32#include "llvm/Analysis/VectorUtils.h"33#include "llvm/IR/Argument.h"34#include "llvm/IR/AttributeMask.h"35#include "llvm/IR/BasicBlock.h"36#include "llvm/IR/CFG.h"37#include "llvm/IR/Constant.h"38#include "llvm/IR/ConstantRange.h"39#include "llvm/IR/Constants.h"40#include "llvm/IR/DataLayout.h"41#include "llvm/IR/DebugInfo.h"42#include "llvm/IR/DebugInfoMetadata.h"43#include "llvm/IR/DebugLoc.h"44#include "llvm/IR/DerivedTypes.h"45#include "llvm/IR/Dominators.h"46#include "llvm/IR/EHPersonalities.h"47#include "llvm/IR/Function.h"48#include "llvm/IR/IRBuilder.h"49#include "llvm/IR/InlineAsm.h"50#include "llvm/IR/InstrTypes.h"51#include "llvm/IR/Instruction.h"52#include "llvm/IR/Instructions.h"53#include "llvm/IR/IntrinsicInst.h"54#include "llvm/IR/Intrinsics.h"55#include "llvm/IR/LLVMContext.h"56#include "llvm/IR/MDBuilder.h"57#include "llvm/IR/Metadata.h"58#include "llvm/IR/Module.h"59#include "llvm/IR/ProfDataUtils.h"60#include "llvm/IR/Type.h"61#include "llvm/IR/User.h"62#include "llvm/IR/Value.h"63#include "llvm/Support/Casting.h"64#include "llvm/Support/CommandLine.h"65#include "llvm/Support/ErrorHandling.h"66#include "llvm/Transforms/Utils/AssumeBundleBuilder.h"67#include "llvm/Transforms/Utils/Cloning.h"68#include "llvm/Transforms/Utils/Local.h"69#include "llvm/Transforms/Utils/ValueMapper.h"70#include <algorithm>71#include <cassert>72#include <cstdint>73#include <iterator>74#include <limits>75#include <optional>76#include <string>77#include <utility>78#include <vector>7980#define DEBUG_TYPE "inline-function"8182using namespace llvm;83using namespace llvm::memprof;84using ProfileCount = Function::ProfileCount;8586static cl::opt<bool>87EnableNoAliasConversion("enable-noalias-to-md-conversion", cl::init(true),88cl::Hidden,89cl::desc("Convert noalias attributes to metadata during inlining."));9091static cl::opt<bool>92UseNoAliasIntrinsic("use-noalias-intrinsic-during-inlining", cl::Hidden,93cl::init(true),94cl::desc("Use the llvm.experimental.noalias.scope.decl "95"intrinsic during inlining."));9697// Disabled by default, because the added alignment assumptions may increase98// compile-time and block optimizations. This option is not suitable for use99// with frontends that emit comprehensive parameter alignment annotations.100static cl::opt<bool>101PreserveAlignmentAssumptions("preserve-alignment-assumptions-during-inlining",102cl::init(false), cl::Hidden,103cl::desc("Convert align attributes to assumptions during inlining."));104105static cl::opt<unsigned> InlinerAttributeWindow(106"max-inst-checked-for-throw-during-inlining", cl::Hidden,107cl::desc("the maximum number of instructions analyzed for may throw during "108"attribute inference in inlined body"),109cl::init(4));110111namespace {112113/// A class for recording information about inlining a landing pad.114class LandingPadInliningInfo {115/// Destination of the invoke's unwind.116BasicBlock *OuterResumeDest;117118/// Destination for the callee's resume.119BasicBlock *InnerResumeDest = nullptr;120121/// LandingPadInst associated with the invoke.122LandingPadInst *CallerLPad = nullptr;123124/// PHI for EH values from landingpad insts.125PHINode *InnerEHValuesPHI = nullptr;126127SmallVector<Value*, 8> UnwindDestPHIValues;128129public:130LandingPadInliningInfo(InvokeInst *II)131: OuterResumeDest(II->getUnwindDest()) {132// If there are PHI nodes in the unwind destination block, we need to keep133// track of which values came into them from the invoke before removing134// the edge from this block.135BasicBlock *InvokeBB = II->getParent();136BasicBlock::iterator I = OuterResumeDest->begin();137for (; isa<PHINode>(I); ++I) {138// Save the value to use for this edge.139PHINode *PHI = cast<PHINode>(I);140UnwindDestPHIValues.push_back(PHI->getIncomingValueForBlock(InvokeBB));141}142143CallerLPad = cast<LandingPadInst>(I);144}145146/// The outer unwind destination is the target of147/// unwind edges introduced for calls within the inlined function.148BasicBlock *getOuterResumeDest() const {149return OuterResumeDest;150}151152BasicBlock *getInnerResumeDest();153154LandingPadInst *getLandingPadInst() const { return CallerLPad; }155156/// Forward the 'resume' instruction to the caller's landing pad block.157/// When the landing pad block has only one predecessor, this is158/// a simple branch. When there is more than one predecessor, we need to159/// split the landing pad block after the landingpad instruction and jump160/// to there.161void forwardResume(ResumeInst *RI,162SmallPtrSetImpl<LandingPadInst*> &InlinedLPads);163164/// Add incoming-PHI values to the unwind destination block for the given165/// basic block, using the values for the original invoke's source block.166void addIncomingPHIValuesFor(BasicBlock *BB) const {167addIncomingPHIValuesForInto(BB, OuterResumeDest);168}169170void addIncomingPHIValuesForInto(BasicBlock *src, BasicBlock *dest) const {171BasicBlock::iterator I = dest->begin();172for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {173PHINode *phi = cast<PHINode>(I);174phi->addIncoming(UnwindDestPHIValues[i], src);175}176}177};178179} // end anonymous namespace180181/// Get or create a target for the branch from ResumeInsts.182BasicBlock *LandingPadInliningInfo::getInnerResumeDest() {183if (InnerResumeDest) return InnerResumeDest;184185// Split the landing pad.186BasicBlock::iterator SplitPoint = ++CallerLPad->getIterator();187InnerResumeDest =188OuterResumeDest->splitBasicBlock(SplitPoint,189OuterResumeDest->getName() + ".body");190191// The number of incoming edges we expect to the inner landing pad.192const unsigned PHICapacity = 2;193194// Create corresponding new PHIs for all the PHIs in the outer landing pad.195BasicBlock::iterator InsertPoint = InnerResumeDest->begin();196BasicBlock::iterator I = OuterResumeDest->begin();197for (unsigned i = 0, e = UnwindDestPHIValues.size(); i != e; ++i, ++I) {198PHINode *OuterPHI = cast<PHINode>(I);199PHINode *InnerPHI = PHINode::Create(OuterPHI->getType(), PHICapacity,200OuterPHI->getName() + ".lpad-body");201InnerPHI->insertBefore(InsertPoint);202OuterPHI->replaceAllUsesWith(InnerPHI);203InnerPHI->addIncoming(OuterPHI, OuterResumeDest);204}205206// Create a PHI for the exception values.207InnerEHValuesPHI =208PHINode::Create(CallerLPad->getType(), PHICapacity, "eh.lpad-body");209InnerEHValuesPHI->insertBefore(InsertPoint);210CallerLPad->replaceAllUsesWith(InnerEHValuesPHI);211InnerEHValuesPHI->addIncoming(CallerLPad, OuterResumeDest);212213// All done.214return InnerResumeDest;215}216217/// Forward the 'resume' instruction to the caller's landing pad block.218/// When the landing pad block has only one predecessor, this is a simple219/// branch. When there is more than one predecessor, we need to split the220/// landing pad block after the landingpad instruction and jump to there.221void LandingPadInliningInfo::forwardResume(222ResumeInst *RI, SmallPtrSetImpl<LandingPadInst *> &InlinedLPads) {223BasicBlock *Dest = getInnerResumeDest();224BasicBlock *Src = RI->getParent();225226BranchInst::Create(Dest, Src);227228// Update the PHIs in the destination. They were inserted in an order which229// makes this work.230addIncomingPHIValuesForInto(Src, Dest);231232InnerEHValuesPHI->addIncoming(RI->getOperand(0), Src);233RI->eraseFromParent();234}235236/// Helper for getUnwindDestToken/getUnwindDestTokenHelper.237static Value *getParentPad(Value *EHPad) {238if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))239return FPI->getParentPad();240return cast<CatchSwitchInst>(EHPad)->getParentPad();241}242243using UnwindDestMemoTy = DenseMap<Instruction *, Value *>;244245/// Helper for getUnwindDestToken that does the descendant-ward part of246/// the search.247static Value *getUnwindDestTokenHelper(Instruction *EHPad,248UnwindDestMemoTy &MemoMap) {249SmallVector<Instruction *, 8> Worklist(1, EHPad);250251while (!Worklist.empty()) {252Instruction *CurrentPad = Worklist.pop_back_val();253// We only put pads on the worklist that aren't in the MemoMap. When254// we find an unwind dest for a pad we may update its ancestors, but255// the queue only ever contains uncles/great-uncles/etc. of CurrentPad,256// so they should never get updated while queued on the worklist.257assert(!MemoMap.count(CurrentPad));258Value *UnwindDestToken = nullptr;259if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(CurrentPad)) {260if (CatchSwitch->hasUnwindDest()) {261UnwindDestToken = CatchSwitch->getUnwindDest()->getFirstNonPHI();262} else {263// Catchswitch doesn't have a 'nounwind' variant, and one might be264// annotated as "unwinds to caller" when really it's nounwind (see265// e.g. SimplifyCFGOpt::SimplifyUnreachable), so we can't infer the266// parent's unwind dest from this. We can check its catchpads'267// descendants, since they might include a cleanuppad with an268// "unwinds to caller" cleanupret, which can be trusted.269for (auto HI = CatchSwitch->handler_begin(),270HE = CatchSwitch->handler_end();271HI != HE && !UnwindDestToken; ++HI) {272BasicBlock *HandlerBlock = *HI;273auto *CatchPad = cast<CatchPadInst>(HandlerBlock->getFirstNonPHI());274for (User *Child : CatchPad->users()) {275// Intentionally ignore invokes here -- since the catchswitch is276// marked "unwind to caller", it would be a verifier error if it277// contained an invoke which unwinds out of it, so any invoke we'd278// encounter must unwind to some child of the catch.279if (!isa<CleanupPadInst>(Child) && !isa<CatchSwitchInst>(Child))280continue;281282Instruction *ChildPad = cast<Instruction>(Child);283auto Memo = MemoMap.find(ChildPad);284if (Memo == MemoMap.end()) {285// Haven't figured out this child pad yet; queue it.286Worklist.push_back(ChildPad);287continue;288}289// We've already checked this child, but might have found that290// it offers no proof either way.291Value *ChildUnwindDestToken = Memo->second;292if (!ChildUnwindDestToken)293continue;294// We already know the child's unwind dest, which can either295// be ConstantTokenNone to indicate unwind to caller, or can296// be another child of the catchpad. Only the former indicates297// the unwind dest of the catchswitch.298if (isa<ConstantTokenNone>(ChildUnwindDestToken)) {299UnwindDestToken = ChildUnwindDestToken;300break;301}302assert(getParentPad(ChildUnwindDestToken) == CatchPad);303}304}305}306} else {307auto *CleanupPad = cast<CleanupPadInst>(CurrentPad);308for (User *U : CleanupPad->users()) {309if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(U)) {310if (BasicBlock *RetUnwindDest = CleanupRet->getUnwindDest())311UnwindDestToken = RetUnwindDest->getFirstNonPHI();312else313UnwindDestToken = ConstantTokenNone::get(CleanupPad->getContext());314break;315}316Value *ChildUnwindDestToken;317if (auto *Invoke = dyn_cast<InvokeInst>(U)) {318ChildUnwindDestToken = Invoke->getUnwindDest()->getFirstNonPHI();319} else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {320Instruction *ChildPad = cast<Instruction>(U);321auto Memo = MemoMap.find(ChildPad);322if (Memo == MemoMap.end()) {323// Haven't resolved this child yet; queue it and keep searching.324Worklist.push_back(ChildPad);325continue;326}327// We've checked this child, but still need to ignore it if it328// had no proof either way.329ChildUnwindDestToken = Memo->second;330if (!ChildUnwindDestToken)331continue;332} else {333// Not a relevant user of the cleanuppad334continue;335}336// In a well-formed program, the child/invoke must either unwind to337// an(other) child of the cleanup, or exit the cleanup. In the338// first case, continue searching.339if (isa<Instruction>(ChildUnwindDestToken) &&340getParentPad(ChildUnwindDestToken) == CleanupPad)341continue;342UnwindDestToken = ChildUnwindDestToken;343break;344}345}346// If we haven't found an unwind dest for CurrentPad, we may have queued its347// children, so move on to the next in the worklist.348if (!UnwindDestToken)349continue;350351// Now we know that CurrentPad unwinds to UnwindDestToken. It also exits352// any ancestors of CurrentPad up to but not including UnwindDestToken's353// parent pad. Record this in the memo map, and check to see if the354// original EHPad being queried is one of the ones exited.355Value *UnwindParent;356if (auto *UnwindPad = dyn_cast<Instruction>(UnwindDestToken))357UnwindParent = getParentPad(UnwindPad);358else359UnwindParent = nullptr;360bool ExitedOriginalPad = false;361for (Instruction *ExitedPad = CurrentPad;362ExitedPad && ExitedPad != UnwindParent;363ExitedPad = dyn_cast<Instruction>(getParentPad(ExitedPad))) {364// Skip over catchpads since they just follow their catchswitches.365if (isa<CatchPadInst>(ExitedPad))366continue;367MemoMap[ExitedPad] = UnwindDestToken;368ExitedOriginalPad |= (ExitedPad == EHPad);369}370371if (ExitedOriginalPad)372return UnwindDestToken;373374// Continue the search.375}376377// No definitive information is contained within this funclet.378return nullptr;379}380381/// Given an EH pad, find where it unwinds. If it unwinds to an EH pad,382/// return that pad instruction. If it unwinds to caller, return383/// ConstantTokenNone. If it does not have a definitive unwind destination,384/// return nullptr.385///386/// This routine gets invoked for calls in funclets in inlinees when inlining387/// an invoke. Since many funclets don't have calls inside them, it's queried388/// on-demand rather than building a map of pads to unwind dests up front.389/// Determining a funclet's unwind dest may require recursively searching its390/// descendants, and also ancestors and cousins if the descendants don't provide391/// an answer. Since most funclets will have their unwind dest immediately392/// available as the unwind dest of a catchswitch or cleanupret, this routine393/// searches top-down from the given pad and then up. To avoid worst-case394/// quadratic run-time given that approach, it uses a memo map to avoid395/// re-processing funclet trees. The callers that rewrite the IR as they go396/// take advantage of this, for correctness, by checking/forcing rewritten397/// pads' entries to match the original callee view.398static Value *getUnwindDestToken(Instruction *EHPad,399UnwindDestMemoTy &MemoMap) {400// Catchpads unwind to the same place as their catchswitch;401// redirct any queries on catchpads so the code below can402// deal with just catchswitches and cleanuppads.403if (auto *CPI = dyn_cast<CatchPadInst>(EHPad))404EHPad = CPI->getCatchSwitch();405406// Check if we've already determined the unwind dest for this pad.407auto Memo = MemoMap.find(EHPad);408if (Memo != MemoMap.end())409return Memo->second;410411// Search EHPad and, if necessary, its descendants.412Value *UnwindDestToken = getUnwindDestTokenHelper(EHPad, MemoMap);413assert((UnwindDestToken == nullptr) != (MemoMap.count(EHPad) != 0));414if (UnwindDestToken)415return UnwindDestToken;416417// No information is available for this EHPad from itself or any of its418// descendants. An unwind all the way out to a pad in the caller would419// need also to agree with the unwind dest of the parent funclet, so420// search up the chain to try to find a funclet with information. Put421// null entries in the memo map to avoid re-processing as we go up.422MemoMap[EHPad] = nullptr;423#ifndef NDEBUG424SmallPtrSet<Instruction *, 4> TempMemos;425TempMemos.insert(EHPad);426#endif427Instruction *LastUselessPad = EHPad;428Value *AncestorToken;429for (AncestorToken = getParentPad(EHPad);430auto *AncestorPad = dyn_cast<Instruction>(AncestorToken);431AncestorToken = getParentPad(AncestorToken)) {432// Skip over catchpads since they just follow their catchswitches.433if (isa<CatchPadInst>(AncestorPad))434continue;435// If the MemoMap had an entry mapping AncestorPad to nullptr, since we436// haven't yet called getUnwindDestTokenHelper for AncestorPad in this437// call to getUnwindDestToken, that would mean that AncestorPad had no438// information in itself, its descendants, or its ancestors. If that439// were the case, then we should also have recorded the lack of information440// for the descendant that we're coming from. So assert that we don't441// find a null entry in the MemoMap for AncestorPad.442assert(!MemoMap.count(AncestorPad) || MemoMap[AncestorPad]);443auto AncestorMemo = MemoMap.find(AncestorPad);444if (AncestorMemo == MemoMap.end()) {445UnwindDestToken = getUnwindDestTokenHelper(AncestorPad, MemoMap);446} else {447UnwindDestToken = AncestorMemo->second;448}449if (UnwindDestToken)450break;451LastUselessPad = AncestorPad;452MemoMap[LastUselessPad] = nullptr;453#ifndef NDEBUG454TempMemos.insert(LastUselessPad);455#endif456}457458// We know that getUnwindDestTokenHelper was called on LastUselessPad and459// returned nullptr (and likewise for EHPad and any of its ancestors up to460// LastUselessPad), so LastUselessPad has no information from below. Since461// getUnwindDestTokenHelper must investigate all downward paths through462// no-information nodes to prove that a node has no information like this,463// and since any time it finds information it records it in the MemoMap for464// not just the immediately-containing funclet but also any ancestors also465// exited, it must be the case that, walking downward from LastUselessPad,466// visiting just those nodes which have not been mapped to an unwind dest467// by getUnwindDestTokenHelper (the nullptr TempMemos notwithstanding, since468// they are just used to keep getUnwindDestTokenHelper from repeating work),469// any node visited must have been exhaustively searched with no information470// for it found.471SmallVector<Instruction *, 8> Worklist(1, LastUselessPad);472while (!Worklist.empty()) {473Instruction *UselessPad = Worklist.pop_back_val();474auto Memo = MemoMap.find(UselessPad);475if (Memo != MemoMap.end() && Memo->second) {476// Here the name 'UselessPad' is a bit of a misnomer, because we've found477// that it is a funclet that does have information about unwinding to478// a particular destination; its parent was a useless pad.479// Since its parent has no information, the unwind edge must not escape480// the parent, and must target a sibling of this pad. This local unwind481// gives us no information about EHPad. Leave it and the subtree rooted482// at it alone.483assert(getParentPad(Memo->second) == getParentPad(UselessPad));484continue;485}486// We know we don't have information for UselesPad. If it has an entry in487// the MemoMap (mapping it to nullptr), it must be one of the TempMemos488// added on this invocation of getUnwindDestToken; if a previous invocation489// recorded nullptr, it would have had to prove that the ancestors of490// UselessPad, which include LastUselessPad, had no information, and that491// in turn would have required proving that the descendants of492// LastUselesPad, which include EHPad, have no information about493// LastUselessPad, which would imply that EHPad was mapped to nullptr in494// the MemoMap on that invocation, which isn't the case if we got here.495assert(!MemoMap.count(UselessPad) || TempMemos.count(UselessPad));496// Assert as we enumerate users that 'UselessPad' doesn't have any unwind497// information that we'd be contradicting by making a map entry for it498// (which is something that getUnwindDestTokenHelper must have proved for499// us to get here). Just assert on is direct users here; the checks in500// this downward walk at its descendants will verify that they don't have501// any unwind edges that exit 'UselessPad' either (i.e. they either have no502// unwind edges or unwind to a sibling).503MemoMap[UselessPad] = UnwindDestToken;504if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UselessPad)) {505assert(CatchSwitch->getUnwindDest() == nullptr && "Expected useless pad");506for (BasicBlock *HandlerBlock : CatchSwitch->handlers()) {507auto *CatchPad = HandlerBlock->getFirstNonPHI();508for (User *U : CatchPad->users()) {509assert(510(!isa<InvokeInst>(U) ||511(getParentPad(512cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==513CatchPad)) &&514"Expected useless pad");515if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))516Worklist.push_back(cast<Instruction>(U));517}518}519} else {520assert(isa<CleanupPadInst>(UselessPad));521for (User *U : UselessPad->users()) {522assert(!isa<CleanupReturnInst>(U) && "Expected useless pad");523assert((!isa<InvokeInst>(U) ||524(getParentPad(525cast<InvokeInst>(U)->getUnwindDest()->getFirstNonPHI()) ==526UselessPad)) &&527"Expected useless pad");528if (isa<CatchSwitchInst>(U) || isa<CleanupPadInst>(U))529Worklist.push_back(cast<Instruction>(U));530}531}532}533534return UnwindDestToken;535}536537/// When we inline a basic block into an invoke,538/// we have to turn all of the calls that can throw into invokes.539/// This function analyze BB to see if there are any calls, and if so,540/// it rewrites them to be invokes that jump to InvokeDest and fills in the PHI541/// nodes in that block with the values specified in InvokeDestPHIValues.542static BasicBlock *HandleCallsInBlockInlinedThroughInvoke(543BasicBlock *BB, BasicBlock *UnwindEdge,544UnwindDestMemoTy *FuncletUnwindMap = nullptr) {545for (Instruction &I : llvm::make_early_inc_range(*BB)) {546// We only need to check for function calls: inlined invoke547// instructions require no special handling.548CallInst *CI = dyn_cast<CallInst>(&I);549550if (!CI || CI->doesNotThrow())551continue;552553// We do not need to (and in fact, cannot) convert possibly throwing calls554// to @llvm.experimental_deoptimize (resp. @llvm.experimental.guard) into555// invokes. The caller's "segment" of the deoptimization continuation556// attached to the newly inlined @llvm.experimental_deoptimize557// (resp. @llvm.experimental.guard) call should contain the exception558// handling logic, if any.559if (auto *F = CI->getCalledFunction())560if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize ||561F->getIntrinsicID() == Intrinsic::experimental_guard)562continue;563564if (auto FuncletBundle = CI->getOperandBundle(LLVMContext::OB_funclet)) {565// This call is nested inside a funclet. If that funclet has an unwind566// destination within the inlinee, then unwinding out of this call would567// be UB. Rewriting this call to an invoke which targets the inlined568// invoke's unwind dest would give the call's parent funclet multiple569// unwind destinations, which is something that subsequent EH table570// generation can't handle and that the veirifer rejects. So when we571// see such a call, leave it as a call.572auto *FuncletPad = cast<Instruction>(FuncletBundle->Inputs[0]);573Value *UnwindDestToken =574getUnwindDestToken(FuncletPad, *FuncletUnwindMap);575if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))576continue;577#ifndef NDEBUG578Instruction *MemoKey;579if (auto *CatchPad = dyn_cast<CatchPadInst>(FuncletPad))580MemoKey = CatchPad->getCatchSwitch();581else582MemoKey = FuncletPad;583assert(FuncletUnwindMap->count(MemoKey) &&584(*FuncletUnwindMap)[MemoKey] == UnwindDestToken &&585"must get memoized to avoid confusing later searches");586#endif // NDEBUG587}588589changeToInvokeAndSplitBasicBlock(CI, UnwindEdge);590return BB;591}592return nullptr;593}594595/// If we inlined an invoke site, we need to convert calls596/// in the body of the inlined function into invokes.597///598/// II is the invoke instruction being inlined. FirstNewBlock is the first599/// block of the inlined code (the last block is the end of the function),600/// and InlineCodeInfo is information about the code that got inlined.601static void HandleInlinedLandingPad(InvokeInst *II, BasicBlock *FirstNewBlock,602ClonedCodeInfo &InlinedCodeInfo) {603BasicBlock *InvokeDest = II->getUnwindDest();604605Function *Caller = FirstNewBlock->getParent();606607// The inlined code is currently at the end of the function, scan from the608// start of the inlined code to its end, checking for stuff we need to609// rewrite.610LandingPadInliningInfo Invoke(II);611612// Get all of the inlined landing pad instructions.613SmallPtrSet<LandingPadInst*, 16> InlinedLPads;614for (Function::iterator I = FirstNewBlock->getIterator(), E = Caller->end();615I != E; ++I)616if (InvokeInst *II = dyn_cast<InvokeInst>(I->getTerminator()))617InlinedLPads.insert(II->getLandingPadInst());618619// Append the clauses from the outer landing pad instruction into the inlined620// landing pad instructions.621LandingPadInst *OuterLPad = Invoke.getLandingPadInst();622for (LandingPadInst *InlinedLPad : InlinedLPads) {623unsigned OuterNum = OuterLPad->getNumClauses();624InlinedLPad->reserveClauses(OuterNum);625for (unsigned OuterIdx = 0; OuterIdx != OuterNum; ++OuterIdx)626InlinedLPad->addClause(OuterLPad->getClause(OuterIdx));627if (OuterLPad->isCleanup())628InlinedLPad->setCleanup(true);629}630631for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();632BB != E; ++BB) {633if (InlinedCodeInfo.ContainsCalls)634if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(635&*BB, Invoke.getOuterResumeDest()))636// Update any PHI nodes in the exceptional block to indicate that there637// is now a new entry in them.638Invoke.addIncomingPHIValuesFor(NewBB);639640// Forward any resumes that are remaining here.641if (ResumeInst *RI = dyn_cast<ResumeInst>(BB->getTerminator()))642Invoke.forwardResume(RI, InlinedLPads);643}644645// Now that everything is happy, we have one final detail. The PHI nodes in646// the exception destination block still have entries due to the original647// invoke instruction. Eliminate these entries (which might even delete the648// PHI node) now.649InvokeDest->removePredecessor(II->getParent());650}651652/// If we inlined an invoke site, we need to convert calls653/// in the body of the inlined function into invokes.654///655/// II is the invoke instruction being inlined. FirstNewBlock is the first656/// block of the inlined code (the last block is the end of the function),657/// and InlineCodeInfo is information about the code that got inlined.658static void HandleInlinedEHPad(InvokeInst *II, BasicBlock *FirstNewBlock,659ClonedCodeInfo &InlinedCodeInfo) {660BasicBlock *UnwindDest = II->getUnwindDest();661Function *Caller = FirstNewBlock->getParent();662663assert(UnwindDest->getFirstNonPHI()->isEHPad() && "unexpected BasicBlock!");664665// If there are PHI nodes in the unwind destination block, we need to keep666// track of which values came into them from the invoke before removing the667// edge from this block.668SmallVector<Value *, 8> UnwindDestPHIValues;669BasicBlock *InvokeBB = II->getParent();670for (PHINode &PHI : UnwindDest->phis()) {671// Save the value to use for this edge.672UnwindDestPHIValues.push_back(PHI.getIncomingValueForBlock(InvokeBB));673}674675// Add incoming-PHI values to the unwind destination block for the given basic676// block, using the values for the original invoke's source block.677auto UpdatePHINodes = [&](BasicBlock *Src) {678BasicBlock::iterator I = UnwindDest->begin();679for (Value *V : UnwindDestPHIValues) {680PHINode *PHI = cast<PHINode>(I);681PHI->addIncoming(V, Src);682++I;683}684};685686// This connects all the instructions which 'unwind to caller' to the invoke687// destination.688UnwindDestMemoTy FuncletUnwindMap;689for (Function::iterator BB = FirstNewBlock->getIterator(), E = Caller->end();690BB != E; ++BB) {691if (auto *CRI = dyn_cast<CleanupReturnInst>(BB->getTerminator())) {692if (CRI->unwindsToCaller()) {693auto *CleanupPad = CRI->getCleanupPad();694CleanupReturnInst::Create(CleanupPad, UnwindDest, CRI->getIterator());695CRI->eraseFromParent();696UpdatePHINodes(&*BB);697// Finding a cleanupret with an unwind destination would confuse698// subsequent calls to getUnwindDestToken, so map the cleanuppad699// to short-circuit any such calls and recognize this as an "unwind700// to caller" cleanup.701assert(!FuncletUnwindMap.count(CleanupPad) ||702isa<ConstantTokenNone>(FuncletUnwindMap[CleanupPad]));703FuncletUnwindMap[CleanupPad] =704ConstantTokenNone::get(Caller->getContext());705}706}707708Instruction *I = BB->getFirstNonPHI();709if (!I->isEHPad())710continue;711712Instruction *Replacement = nullptr;713if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {714if (CatchSwitch->unwindsToCaller()) {715Value *UnwindDestToken;716if (auto *ParentPad =717dyn_cast<Instruction>(CatchSwitch->getParentPad())) {718// This catchswitch is nested inside another funclet. If that719// funclet has an unwind destination within the inlinee, then720// unwinding out of this catchswitch would be UB. Rewriting this721// catchswitch to unwind to the inlined invoke's unwind dest would722// give the parent funclet multiple unwind destinations, which is723// something that subsequent EH table generation can't handle and724// that the veirifer rejects. So when we see such a call, leave it725// as "unwind to caller".726UnwindDestToken = getUnwindDestToken(ParentPad, FuncletUnwindMap);727if (UnwindDestToken && !isa<ConstantTokenNone>(UnwindDestToken))728continue;729} else {730// This catchswitch has no parent to inherit constraints from, and731// none of its descendants can have an unwind edge that exits it and732// targets another funclet in the inlinee. It may or may not have a733// descendant that definitively has an unwind to caller. In either734// case, we'll have to assume that any unwinds out of it may need to735// be routed to the caller, so treat it as though it has a definitive736// unwind to caller.737UnwindDestToken = ConstantTokenNone::get(Caller->getContext());738}739auto *NewCatchSwitch = CatchSwitchInst::Create(740CatchSwitch->getParentPad(), UnwindDest,741CatchSwitch->getNumHandlers(), CatchSwitch->getName(),742CatchSwitch->getIterator());743for (BasicBlock *PadBB : CatchSwitch->handlers())744NewCatchSwitch->addHandler(PadBB);745// Propagate info for the old catchswitch over to the new one in746// the unwind map. This also serves to short-circuit any subsequent747// checks for the unwind dest of this catchswitch, which would get748// confused if they found the outer handler in the callee.749FuncletUnwindMap[NewCatchSwitch] = UnwindDestToken;750Replacement = NewCatchSwitch;751}752} else if (!isa<FuncletPadInst>(I)) {753llvm_unreachable("unexpected EHPad!");754}755756if (Replacement) {757Replacement->takeName(I);758I->replaceAllUsesWith(Replacement);759I->eraseFromParent();760UpdatePHINodes(&*BB);761}762}763764if (InlinedCodeInfo.ContainsCalls)765for (Function::iterator BB = FirstNewBlock->getIterator(),766E = Caller->end();767BB != E; ++BB)768if (BasicBlock *NewBB = HandleCallsInBlockInlinedThroughInvoke(769&*BB, UnwindDest, &FuncletUnwindMap))770// Update any PHI nodes in the exceptional block to indicate that there771// is now a new entry in them.772UpdatePHINodes(NewBB);773774// Now that everything is happy, we have one final detail. The PHI nodes in775// the exception destination block still have entries due to the original776// invoke instruction. Eliminate these entries (which might even delete the777// PHI node) now.778UnwindDest->removePredecessor(InvokeBB);779}780781static bool haveCommonPrefix(MDNode *MIBStackContext,782MDNode *CallsiteStackContext) {783assert(MIBStackContext->getNumOperands() > 0 &&784CallsiteStackContext->getNumOperands() > 0);785// Because of the context trimming performed during matching, the callsite786// context could have more stack ids than the MIB. We match up to the end of787// the shortest stack context.788for (auto MIBStackIter = MIBStackContext->op_begin(),789CallsiteStackIter = CallsiteStackContext->op_begin();790MIBStackIter != MIBStackContext->op_end() &&791CallsiteStackIter != CallsiteStackContext->op_end();792MIBStackIter++, CallsiteStackIter++) {793auto *Val1 = mdconst::dyn_extract<ConstantInt>(*MIBStackIter);794auto *Val2 = mdconst::dyn_extract<ConstantInt>(*CallsiteStackIter);795assert(Val1 && Val2);796if (Val1->getZExtValue() != Val2->getZExtValue())797return false;798}799return true;800}801802static void removeMemProfMetadata(CallBase *Call) {803Call->setMetadata(LLVMContext::MD_memprof, nullptr);804}805806static void removeCallsiteMetadata(CallBase *Call) {807Call->setMetadata(LLVMContext::MD_callsite, nullptr);808}809810static void updateMemprofMetadata(CallBase *CI,811const std::vector<Metadata *> &MIBList) {812assert(!MIBList.empty());813// Remove existing memprof, which will either be replaced or may not be needed814// if we are able to use a single allocation type function attribute.815removeMemProfMetadata(CI);816CallStackTrie CallStack;817for (Metadata *MIB : MIBList)818CallStack.addCallStack(cast<MDNode>(MIB));819bool MemprofMDAttached = CallStack.buildAndAttachMIBMetadata(CI);820assert(MemprofMDAttached == CI->hasMetadata(LLVMContext::MD_memprof));821if (!MemprofMDAttached)822// If we used a function attribute remove the callsite metadata as well.823removeCallsiteMetadata(CI);824}825826// Update the metadata on the inlined copy ClonedCall of a call OrigCall in the827// inlined callee body, based on the callsite metadata InlinedCallsiteMD from828// the call that was inlined.829static void propagateMemProfHelper(const CallBase *OrigCall,830CallBase *ClonedCall,831MDNode *InlinedCallsiteMD) {832MDNode *OrigCallsiteMD = ClonedCall->getMetadata(LLVMContext::MD_callsite);833MDNode *ClonedCallsiteMD = nullptr;834// Check if the call originally had callsite metadata, and update it for the835// new call in the inlined body.836if (OrigCallsiteMD) {837// The cloned call's context is now the concatenation of the original call's838// callsite metadata and the callsite metadata on the call where it was839// inlined.840ClonedCallsiteMD = MDNode::concatenate(OrigCallsiteMD, InlinedCallsiteMD);841ClonedCall->setMetadata(LLVMContext::MD_callsite, ClonedCallsiteMD);842}843844// Update any memprof metadata on the cloned call.845MDNode *OrigMemProfMD = ClonedCall->getMetadata(LLVMContext::MD_memprof);846if (!OrigMemProfMD)847return;848// We currently expect that allocations with memprof metadata also have849// callsite metadata for the allocation's part of the context.850assert(OrigCallsiteMD);851852// New call's MIB list.853std::vector<Metadata *> NewMIBList;854855// For each MIB metadata, check if its call stack context starts with the856// new clone's callsite metadata. If so, that MIB goes onto the cloned call in857// the inlined body. If not, it stays on the out-of-line original call.858for (auto &MIBOp : OrigMemProfMD->operands()) {859MDNode *MIB = dyn_cast<MDNode>(MIBOp);860// Stack is first operand of MIB.861MDNode *StackMD = getMIBStackNode(MIB);862assert(StackMD);863// See if the new cloned callsite context matches this profiled context.864if (haveCommonPrefix(StackMD, ClonedCallsiteMD))865// Add it to the cloned call's MIB list.866NewMIBList.push_back(MIB);867}868if (NewMIBList.empty()) {869removeMemProfMetadata(ClonedCall);870removeCallsiteMetadata(ClonedCall);871return;872}873if (NewMIBList.size() < OrigMemProfMD->getNumOperands())874updateMemprofMetadata(ClonedCall, NewMIBList);875}876877// Update memprof related metadata (!memprof and !callsite) based on the878// inlining of Callee into the callsite at CB. The updates include merging the879// inlined callee's callsite metadata with that of the inlined call,880// and moving the subset of any memprof contexts to the inlined callee881// allocations if they match the new inlined call stack.882static void883propagateMemProfMetadata(Function *Callee, CallBase &CB,884bool ContainsMemProfMetadata,885const ValueMap<const Value *, WeakTrackingVH> &VMap) {886MDNode *CallsiteMD = CB.getMetadata(LLVMContext::MD_callsite);887// Only need to update if the inlined callsite had callsite metadata, or if888// there was any memprof metadata inlined.889if (!CallsiteMD && !ContainsMemProfMetadata)890return;891892// Propagate metadata onto the cloned calls in the inlined callee.893for (const auto &Entry : VMap) {894// See if this is a call that has been inlined and remapped, and not895// simplified away in the process.896auto *OrigCall = dyn_cast_or_null<CallBase>(Entry.first);897auto *ClonedCall = dyn_cast_or_null<CallBase>(Entry.second);898if (!OrigCall || !ClonedCall)899continue;900// If the inlined callsite did not have any callsite metadata, then it isn't901// involved in any profiled call contexts, and we can remove any memprof902// metadata on the cloned call.903if (!CallsiteMD) {904removeMemProfMetadata(ClonedCall);905removeCallsiteMetadata(ClonedCall);906continue;907}908propagateMemProfHelper(OrigCall, ClonedCall, CallsiteMD);909}910}911912/// When inlining a call site that has !llvm.mem.parallel_loop_access,913/// !llvm.access.group, !alias.scope or !noalias metadata, that metadata should914/// be propagated to all memory-accessing cloned instructions.915static void PropagateCallSiteMetadata(CallBase &CB, Function::iterator FStart,916Function::iterator FEnd) {917MDNode *MemParallelLoopAccess =918CB.getMetadata(LLVMContext::MD_mem_parallel_loop_access);919MDNode *AccessGroup = CB.getMetadata(LLVMContext::MD_access_group);920MDNode *AliasScope = CB.getMetadata(LLVMContext::MD_alias_scope);921MDNode *NoAlias = CB.getMetadata(LLVMContext::MD_noalias);922if (!MemParallelLoopAccess && !AccessGroup && !AliasScope && !NoAlias)923return;924925for (BasicBlock &BB : make_range(FStart, FEnd)) {926for (Instruction &I : BB) {927// This metadata is only relevant for instructions that access memory.928if (!I.mayReadOrWriteMemory())929continue;930931if (MemParallelLoopAccess) {932// TODO: This probably should not overwrite MemParalleLoopAccess.933MemParallelLoopAccess = MDNode::concatenate(934I.getMetadata(LLVMContext::MD_mem_parallel_loop_access),935MemParallelLoopAccess);936I.setMetadata(LLVMContext::MD_mem_parallel_loop_access,937MemParallelLoopAccess);938}939940if (AccessGroup)941I.setMetadata(LLVMContext::MD_access_group, uniteAccessGroups(942I.getMetadata(LLVMContext::MD_access_group), AccessGroup));943944if (AliasScope)945I.setMetadata(LLVMContext::MD_alias_scope, MDNode::concatenate(946I.getMetadata(LLVMContext::MD_alias_scope), AliasScope));947948if (NoAlias)949I.setMetadata(LLVMContext::MD_noalias, MDNode::concatenate(950I.getMetadata(LLVMContext::MD_noalias), NoAlias));951}952}953}954955/// Bundle operands of the inlined function must be added to inlined call sites.956static void PropagateOperandBundles(Function::iterator InlinedBB,957Instruction *CallSiteEHPad) {958for (Instruction &II : llvm::make_early_inc_range(*InlinedBB)) {959CallBase *I = dyn_cast<CallBase>(&II);960if (!I)961continue;962// Skip call sites which already have a "funclet" bundle.963if (I->getOperandBundle(LLVMContext::OB_funclet))964continue;965// Skip call sites which are nounwind intrinsics (as long as they don't966// lower into regular function calls in the course of IR transformations).967auto *CalledFn =968dyn_cast<Function>(I->getCalledOperand()->stripPointerCasts());969if (CalledFn && CalledFn->isIntrinsic() && I->doesNotThrow() &&970!IntrinsicInst::mayLowerToFunctionCall(CalledFn->getIntrinsicID()))971continue;972973SmallVector<OperandBundleDef, 1> OpBundles;974I->getOperandBundlesAsDefs(OpBundles);975OpBundles.emplace_back("funclet", CallSiteEHPad);976977Instruction *NewInst = CallBase::Create(I, OpBundles, I->getIterator());978NewInst->takeName(I);979I->replaceAllUsesWith(NewInst);980I->eraseFromParent();981}982}983984namespace {985/// Utility for cloning !noalias and !alias.scope metadata. When a code region986/// using scoped alias metadata is inlined, the aliasing relationships may not987/// hold between the two version. It is necessary to create a deep clone of the988/// metadata, putting the two versions in separate scope domains.989class ScopedAliasMetadataDeepCloner {990using MetadataMap = DenseMap<const MDNode *, TrackingMDNodeRef>;991SetVector<const MDNode *> MD;992MetadataMap MDMap;993void addRecursiveMetadataUses();994995public:996ScopedAliasMetadataDeepCloner(const Function *F);997998/// Create a new clone of the scoped alias metadata, which will be used by999/// subsequent remap() calls.1000void clone();10011002/// Remap instructions in the given range from the original to the cloned1003/// metadata.1004void remap(Function::iterator FStart, Function::iterator FEnd);1005};1006} // namespace10071008ScopedAliasMetadataDeepCloner::ScopedAliasMetadataDeepCloner(1009const Function *F) {1010for (const BasicBlock &BB : *F) {1011for (const Instruction &I : BB) {1012if (const MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))1013MD.insert(M);1014if (const MDNode *M = I.getMetadata(LLVMContext::MD_noalias))1015MD.insert(M);10161017// We also need to clone the metadata in noalias intrinsics.1018if (const auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))1019MD.insert(Decl->getScopeList());1020}1021}1022addRecursiveMetadataUses();1023}10241025void ScopedAliasMetadataDeepCloner::addRecursiveMetadataUses() {1026SmallVector<const Metadata *, 16> Queue(MD.begin(), MD.end());1027while (!Queue.empty()) {1028const MDNode *M = cast<MDNode>(Queue.pop_back_val());1029for (const Metadata *Op : M->operands())1030if (const MDNode *OpMD = dyn_cast<MDNode>(Op))1031if (MD.insert(OpMD))1032Queue.push_back(OpMD);1033}1034}10351036void ScopedAliasMetadataDeepCloner::clone() {1037assert(MDMap.empty() && "clone() already called ?");10381039SmallVector<TempMDTuple, 16> DummyNodes;1040for (const MDNode *I : MD) {1041DummyNodes.push_back(MDTuple::getTemporary(I->getContext(), std::nullopt));1042MDMap[I].reset(DummyNodes.back().get());1043}10441045// Create new metadata nodes to replace the dummy nodes, replacing old1046// metadata references with either a dummy node or an already-created new1047// node.1048SmallVector<Metadata *, 4> NewOps;1049for (const MDNode *I : MD) {1050for (const Metadata *Op : I->operands()) {1051if (const MDNode *M = dyn_cast<MDNode>(Op))1052NewOps.push_back(MDMap[M]);1053else1054NewOps.push_back(const_cast<Metadata *>(Op));1055}10561057MDNode *NewM = MDNode::get(I->getContext(), NewOps);1058MDTuple *TempM = cast<MDTuple>(MDMap[I]);1059assert(TempM->isTemporary() && "Expected temporary node");10601061TempM->replaceAllUsesWith(NewM);1062NewOps.clear();1063}1064}10651066void ScopedAliasMetadataDeepCloner::remap(Function::iterator FStart,1067Function::iterator FEnd) {1068if (MDMap.empty())1069return; // Nothing to do.10701071for (BasicBlock &BB : make_range(FStart, FEnd)) {1072for (Instruction &I : BB) {1073// TODO: The null checks for the MDMap.lookup() results should no longer1074// be necessary.1075if (MDNode *M = I.getMetadata(LLVMContext::MD_alias_scope))1076if (MDNode *MNew = MDMap.lookup(M))1077I.setMetadata(LLVMContext::MD_alias_scope, MNew);10781079if (MDNode *M = I.getMetadata(LLVMContext::MD_noalias))1080if (MDNode *MNew = MDMap.lookup(M))1081I.setMetadata(LLVMContext::MD_noalias, MNew);10821083if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))1084if (MDNode *MNew = MDMap.lookup(Decl->getScopeList()))1085Decl->setScopeList(MNew);1086}1087}1088}10891090/// If the inlined function has noalias arguments,1091/// then add new alias scopes for each noalias argument, tag the mapped noalias1092/// parameters with noalias metadata specifying the new scope, and tag all1093/// non-derived loads, stores and memory intrinsics with the new alias scopes.1094static void AddAliasScopeMetadata(CallBase &CB, ValueToValueMapTy &VMap,1095const DataLayout &DL, AAResults *CalleeAAR,1096ClonedCodeInfo &InlinedFunctionInfo) {1097if (!EnableNoAliasConversion)1098return;10991100const Function *CalledFunc = CB.getCalledFunction();1101SmallVector<const Argument *, 4> NoAliasArgs;11021103for (const Argument &Arg : CalledFunc->args())1104if (CB.paramHasAttr(Arg.getArgNo(), Attribute::NoAlias) && !Arg.use_empty())1105NoAliasArgs.push_back(&Arg);11061107if (NoAliasArgs.empty())1108return;11091110// To do a good job, if a noalias variable is captured, we need to know if1111// the capture point dominates the particular use we're considering.1112DominatorTree DT;1113DT.recalculate(const_cast<Function&>(*CalledFunc));11141115// noalias indicates that pointer values based on the argument do not alias1116// pointer values which are not based on it. So we add a new "scope" for each1117// noalias function argument. Accesses using pointers based on that argument1118// become part of that alias scope, accesses using pointers not based on that1119// argument are tagged as noalias with that scope.11201121DenseMap<const Argument *, MDNode *> NewScopes;1122MDBuilder MDB(CalledFunc->getContext());11231124// Create a new scope domain for this function.1125MDNode *NewDomain =1126MDB.createAnonymousAliasScopeDomain(CalledFunc->getName());1127for (unsigned i = 0, e = NoAliasArgs.size(); i != e; ++i) {1128const Argument *A = NoAliasArgs[i];11291130std::string Name = std::string(CalledFunc->getName());1131if (A->hasName()) {1132Name += ": %";1133Name += A->getName();1134} else {1135Name += ": argument ";1136Name += utostr(i);1137}11381139// Note: We always create a new anonymous root here. This is true regardless1140// of the linkage of the callee because the aliasing "scope" is not just a1141// property of the callee, but also all control dependencies in the caller.1142MDNode *NewScope = MDB.createAnonymousAliasScope(NewDomain, Name);1143NewScopes.insert(std::make_pair(A, NewScope));11441145if (UseNoAliasIntrinsic) {1146// Introduce a llvm.experimental.noalias.scope.decl for the noalias1147// argument.1148MDNode *AScopeList = MDNode::get(CalledFunc->getContext(), NewScope);1149auto *NoAliasDecl =1150IRBuilder<>(&CB).CreateNoAliasScopeDeclaration(AScopeList);1151// Ignore the result for now. The result will be used when the1152// llvm.noalias intrinsic is introduced.1153(void)NoAliasDecl;1154}1155}11561157// Iterate over all new instructions in the map; for all memory-access1158// instructions, add the alias scope metadata.1159for (ValueToValueMapTy::iterator VMI = VMap.begin(), VMIE = VMap.end();1160VMI != VMIE; ++VMI) {1161if (const Instruction *I = dyn_cast<Instruction>(VMI->first)) {1162if (!VMI->second)1163continue;11641165Instruction *NI = dyn_cast<Instruction>(VMI->second);1166if (!NI || InlinedFunctionInfo.isSimplified(I, NI))1167continue;11681169bool IsArgMemOnlyCall = false, IsFuncCall = false;1170SmallVector<const Value *, 2> PtrArgs;11711172if (const LoadInst *LI = dyn_cast<LoadInst>(I))1173PtrArgs.push_back(LI->getPointerOperand());1174else if (const StoreInst *SI = dyn_cast<StoreInst>(I))1175PtrArgs.push_back(SI->getPointerOperand());1176else if (const VAArgInst *VAAI = dyn_cast<VAArgInst>(I))1177PtrArgs.push_back(VAAI->getPointerOperand());1178else if (const AtomicCmpXchgInst *CXI = dyn_cast<AtomicCmpXchgInst>(I))1179PtrArgs.push_back(CXI->getPointerOperand());1180else if (const AtomicRMWInst *RMWI = dyn_cast<AtomicRMWInst>(I))1181PtrArgs.push_back(RMWI->getPointerOperand());1182else if (const auto *Call = dyn_cast<CallBase>(I)) {1183// If we know that the call does not access memory, then we'll still1184// know that about the inlined clone of this call site, and we don't1185// need to add metadata.1186if (Call->doesNotAccessMemory())1187continue;11881189IsFuncCall = true;1190if (CalleeAAR) {1191MemoryEffects ME = CalleeAAR->getMemoryEffects(Call);11921193// We'll retain this knowledge without additional metadata.1194if (ME.onlyAccessesInaccessibleMem())1195continue;11961197if (ME.onlyAccessesArgPointees())1198IsArgMemOnlyCall = true;1199}12001201for (Value *Arg : Call->args()) {1202// Only care about pointer arguments. If a noalias argument is1203// accessed through a non-pointer argument, it must be captured1204// first (e.g. via ptrtoint), and we protect against captures below.1205if (!Arg->getType()->isPointerTy())1206continue;12071208PtrArgs.push_back(Arg);1209}1210}12111212// If we found no pointers, then this instruction is not suitable for1213// pairing with an instruction to receive aliasing metadata.1214// However, if this is a call, this we might just alias with none of the1215// noalias arguments.1216if (PtrArgs.empty() && !IsFuncCall)1217continue;12181219// It is possible that there is only one underlying object, but you1220// need to go through several PHIs to see it, and thus could be1221// repeated in the Objects list.1222SmallPtrSet<const Value *, 4> ObjSet;1223SmallVector<Metadata *, 4> Scopes, NoAliases;12241225for (const Value *V : PtrArgs) {1226SmallVector<const Value *, 4> Objects;1227getUnderlyingObjects(V, Objects, /* LI = */ nullptr);12281229for (const Value *O : Objects)1230ObjSet.insert(O);1231}12321233// Figure out if we're derived from anything that is not a noalias1234// argument.1235bool RequiresNoCaptureBefore = false, UsesAliasingPtr = false,1236UsesUnknownObject = false;1237for (const Value *V : ObjSet) {1238// Is this value a constant that cannot be derived from any pointer1239// value (we need to exclude constant expressions, for example, that1240// are formed from arithmetic on global symbols).1241bool IsNonPtrConst = isa<ConstantInt>(V) || isa<ConstantFP>(V) ||1242isa<ConstantPointerNull>(V) ||1243isa<ConstantDataVector>(V) || isa<UndefValue>(V);1244if (IsNonPtrConst)1245continue;12461247// If this is anything other than a noalias argument, then we cannot1248// completely describe the aliasing properties using alias.scope1249// metadata (and, thus, won't add any).1250if (const Argument *A = dyn_cast<Argument>(V)) {1251if (!CB.paramHasAttr(A->getArgNo(), Attribute::NoAlias))1252UsesAliasingPtr = true;1253} else {1254UsesAliasingPtr = true;1255}12561257if (isEscapeSource(V)) {1258// An escape source can only alias with a noalias argument if it has1259// been captured beforehand.1260RequiresNoCaptureBefore = true;1261} else if (!isa<Argument>(V) && !isIdentifiedObject(V)) {1262// If this is neither an escape source, nor some identified object1263// (which cannot directly alias a noalias argument), nor some other1264// argument (which, by definition, also cannot alias a noalias1265// argument), conservatively do not make any assumptions.1266UsesUnknownObject = true;1267}1268}12691270// Nothing we can do if the used underlying object cannot be reliably1271// determined.1272if (UsesUnknownObject)1273continue;12741275// A function call can always get captured noalias pointers (via other1276// parameters, globals, etc.).1277if (IsFuncCall && !IsArgMemOnlyCall)1278RequiresNoCaptureBefore = true;12791280// First, we want to figure out all of the sets with which we definitely1281// don't alias. Iterate over all noalias set, and add those for which:1282// 1. The noalias argument is not in the set of objects from which we1283// definitely derive.1284// 2. The noalias argument has not yet been captured.1285// An arbitrary function that might load pointers could see captured1286// noalias arguments via other noalias arguments or globals, and so we1287// must always check for prior capture.1288for (const Argument *A : NoAliasArgs) {1289if (ObjSet.contains(A))1290continue; // May be based on a noalias argument.12911292// It might be tempting to skip the PointerMayBeCapturedBefore check if1293// A->hasNoCaptureAttr() is true, but this is incorrect because1294// nocapture only guarantees that no copies outlive the function, not1295// that the value cannot be locally captured.1296if (!RequiresNoCaptureBefore ||1297!PointerMayBeCapturedBefore(A, /* ReturnCaptures */ false,1298/* StoreCaptures */ false, I, &DT))1299NoAliases.push_back(NewScopes[A]);1300}13011302if (!NoAliases.empty())1303NI->setMetadata(LLVMContext::MD_noalias,1304MDNode::concatenate(1305NI->getMetadata(LLVMContext::MD_noalias),1306MDNode::get(CalledFunc->getContext(), NoAliases)));13071308// Next, we want to figure out all of the sets to which we might belong.1309// We might belong to a set if the noalias argument is in the set of1310// underlying objects. If there is some non-noalias argument in our list1311// of underlying objects, then we cannot add a scope because the fact1312// that some access does not alias with any set of our noalias arguments1313// cannot itself guarantee that it does not alias with this access1314// (because there is some pointer of unknown origin involved and the1315// other access might also depend on this pointer). We also cannot add1316// scopes to arbitrary functions unless we know they don't access any1317// non-parameter pointer-values.1318bool CanAddScopes = !UsesAliasingPtr;1319if (CanAddScopes && IsFuncCall)1320CanAddScopes = IsArgMemOnlyCall;13211322if (CanAddScopes)1323for (const Argument *A : NoAliasArgs) {1324if (ObjSet.count(A))1325Scopes.push_back(NewScopes[A]);1326}13271328if (!Scopes.empty())1329NI->setMetadata(1330LLVMContext::MD_alias_scope,1331MDNode::concatenate(NI->getMetadata(LLVMContext::MD_alias_scope),1332MDNode::get(CalledFunc->getContext(), Scopes)));1333}1334}1335}13361337static bool MayContainThrowingOrExitingCallAfterCB(CallBase *Begin,1338ReturnInst *End) {13391340assert(Begin->getParent() == End->getParent() &&1341"Expected to be in same basic block!");1342auto BeginIt = Begin->getIterator();1343assert(BeginIt != End->getIterator() && "Non-empty BB has empty iterator");1344return !llvm::isGuaranteedToTransferExecutionToSuccessor(1345++BeginIt, End->getIterator(), InlinerAttributeWindow + 1);1346}13471348// Add attributes from CB params and Fn attributes that can always be propagated1349// to the corresponding argument / inner callbases.1350static void AddParamAndFnBasicAttributes(const CallBase &CB,1351ValueToValueMapTy &VMap,1352ClonedCodeInfo &InlinedFunctionInfo) {1353auto *CalledFunction = CB.getCalledFunction();1354auto &Context = CalledFunction->getContext();13551356// Collect valid attributes for all params.1357SmallVector<AttrBuilder> ValidParamAttrs;1358bool HasAttrToPropagate = false;13591360for (unsigned I = 0, E = CB.arg_size(); I < E; ++I) {1361ValidParamAttrs.emplace_back(AttrBuilder{CB.getContext()});1362// Access attributes can be propagated to any param with the same underlying1363// object as the argument.1364if (CB.paramHasAttr(I, Attribute::ReadNone))1365ValidParamAttrs.back().addAttribute(Attribute::ReadNone);1366if (CB.paramHasAttr(I, Attribute::ReadOnly))1367ValidParamAttrs.back().addAttribute(Attribute::ReadOnly);1368HasAttrToPropagate |= ValidParamAttrs.back().hasAttributes();1369}13701371// Won't be able to propagate anything.1372if (!HasAttrToPropagate)1373return;13741375for (BasicBlock &BB : *CalledFunction) {1376for (Instruction &Ins : BB) {1377const auto *InnerCB = dyn_cast<CallBase>(&Ins);1378if (!InnerCB)1379continue;1380auto *NewInnerCB = dyn_cast_or_null<CallBase>(VMap.lookup(InnerCB));1381if (!NewInnerCB)1382continue;1383// The InnerCB might have be simplified during the inlining1384// process which can make propagation incorrect.1385if (InlinedFunctionInfo.isSimplified(InnerCB, NewInnerCB))1386continue;13871388AttributeList AL = NewInnerCB->getAttributes();1389for (unsigned I = 0, E = InnerCB->arg_size(); I < E; ++I) {1390// Check if the underlying value for the parameter is an argument.1391const Value *UnderlyingV =1392getUnderlyingObject(InnerCB->getArgOperand(I));1393const Argument *Arg = dyn_cast<Argument>(UnderlyingV);1394if (!Arg)1395continue;13961397if (NewInnerCB->paramHasAttr(I, Attribute::ByVal))1398// It's unsound to propagate memory attributes to byval arguments.1399// Even if CalledFunction doesn't e.g. write to the argument,1400// the call to NewInnerCB may write to its by-value copy.1401continue;14021403unsigned ArgNo = Arg->getArgNo();1404// If so, propagate its access attributes.1405AL = AL.addParamAttributes(Context, I, ValidParamAttrs[ArgNo]);1406// We can have conflicting attributes from the inner callsite and1407// to-be-inlined callsite. In that case, choose the most1408// restrictive.14091410// readonly + writeonly means we can never deref so make readnone.1411if (AL.hasParamAttr(I, Attribute::ReadOnly) &&1412AL.hasParamAttr(I, Attribute::WriteOnly))1413AL = AL.addParamAttribute(Context, I, Attribute::ReadNone);14141415// If have readnone, need to clear readonly/writeonly1416if (AL.hasParamAttr(I, Attribute::ReadNone)) {1417AL = AL.removeParamAttribute(Context, I, Attribute::ReadOnly);1418AL = AL.removeParamAttribute(Context, I, Attribute::WriteOnly);1419}14201421// Writable cannot exist in conjunction w/ readonly/readnone1422if (AL.hasParamAttr(I, Attribute::ReadOnly) ||1423AL.hasParamAttr(I, Attribute::ReadNone))1424AL = AL.removeParamAttribute(Context, I, Attribute::Writable);1425}1426NewInnerCB->setAttributes(AL);1427}1428}1429}14301431// Only allow these white listed attributes to be propagated back to the1432// callee. This is because other attributes may only be valid on the call1433// itself, i.e. attributes such as signext and zeroext.14341435// Attributes that are always okay to propagate as if they are violated its1436// immediate UB.1437static AttrBuilder IdentifyValidUBGeneratingAttributes(CallBase &CB) {1438AttrBuilder Valid(CB.getContext());1439if (auto DerefBytes = CB.getRetDereferenceableBytes())1440Valid.addDereferenceableAttr(DerefBytes);1441if (auto DerefOrNullBytes = CB.getRetDereferenceableOrNullBytes())1442Valid.addDereferenceableOrNullAttr(DerefOrNullBytes);1443if (CB.hasRetAttr(Attribute::NoAlias))1444Valid.addAttribute(Attribute::NoAlias);1445if (CB.hasRetAttr(Attribute::NoUndef))1446Valid.addAttribute(Attribute::NoUndef);1447return Valid;1448}14491450// Attributes that need additional checks as propagating them may change1451// behavior or cause new UB.1452static AttrBuilder IdentifyValidPoisonGeneratingAttributes(CallBase &CB) {1453AttrBuilder Valid(CB.getContext());1454if (CB.hasRetAttr(Attribute::NonNull))1455Valid.addAttribute(Attribute::NonNull);1456if (CB.hasRetAttr(Attribute::Alignment))1457Valid.addAlignmentAttr(CB.getRetAlign());1458if (std::optional<ConstantRange> Range = CB.getRange())1459Valid.addRangeAttr(*Range);1460return Valid;1461}14621463static void AddReturnAttributes(CallBase &CB, ValueToValueMapTy &VMap,1464ClonedCodeInfo &InlinedFunctionInfo) {1465AttrBuilder ValidUB = IdentifyValidUBGeneratingAttributes(CB);1466AttrBuilder ValidPG = IdentifyValidPoisonGeneratingAttributes(CB);1467if (!ValidUB.hasAttributes() && !ValidPG.hasAttributes())1468return;1469auto *CalledFunction = CB.getCalledFunction();1470auto &Context = CalledFunction->getContext();14711472for (auto &BB : *CalledFunction) {1473auto *RI = dyn_cast<ReturnInst>(BB.getTerminator());1474if (!RI || !isa<CallBase>(RI->getOperand(0)))1475continue;1476auto *RetVal = cast<CallBase>(RI->getOperand(0));1477// Check that the cloned RetVal exists and is a call, otherwise we cannot1478// add the attributes on the cloned RetVal. Simplification during inlining1479// could have transformed the cloned instruction.1480auto *NewRetVal = dyn_cast_or_null<CallBase>(VMap.lookup(RetVal));1481if (!NewRetVal)1482continue;14831484// The RetVal might have be simplified during the inlining1485// process which can make propagation incorrect.1486if (InlinedFunctionInfo.isSimplified(RetVal, NewRetVal))1487continue;1488// Backward propagation of attributes to the returned value may be incorrect1489// if it is control flow dependent.1490// Consider:1491// @callee {1492// %rv = call @foo()1493// %rv2 = call @bar()1494// if (%rv2 != null)1495// return %rv21496// if (%rv == null)1497// exit()1498// return %rv1499// }1500// caller() {1501// %val = call nonnull @callee()1502// }1503// Here we cannot add the nonnull attribute on either foo or bar. So, we1504// limit the check to both RetVal and RI are in the same basic block and1505// there are no throwing/exiting instructions between these instructions.1506if (RI->getParent() != RetVal->getParent() ||1507MayContainThrowingOrExitingCallAfterCB(RetVal, RI))1508continue;1509// Add to the existing attributes of NewRetVal, i.e. the cloned call1510// instruction.1511// NB! When we have the same attribute already existing on NewRetVal, but1512// with a differing value, the AttributeList's merge API honours the already1513// existing attribute value (i.e. attributes such as dereferenceable,1514// dereferenceable_or_null etc). See AttrBuilder::merge for more details.1515AttributeList AL = NewRetVal->getAttributes();1516if (ValidUB.getDereferenceableBytes() < AL.getRetDereferenceableBytes())1517ValidUB.removeAttribute(Attribute::Dereferenceable);1518if (ValidUB.getDereferenceableOrNullBytes() <1519AL.getRetDereferenceableOrNullBytes())1520ValidUB.removeAttribute(Attribute::DereferenceableOrNull);1521AttributeList NewAL = AL.addRetAttributes(Context, ValidUB);1522// Attributes that may generate poison returns are a bit tricky. If we1523// propagate them, other uses of the callsite might have their behavior1524// change or cause UB (if they have noundef) b.c of the new potential1525// poison.1526// Take the following three cases:1527//1528// 1)1529// define nonnull ptr @foo() {1530// %p = call ptr @bar()1531// call void @use(ptr %p) willreturn nounwind1532// ret ptr %p1533// }1534//1535// 2)1536// define noundef nonnull ptr @foo() {1537// %p = call ptr @bar()1538// call void @use(ptr %p) willreturn nounwind1539// ret ptr %p1540// }1541//1542// 3)1543// define nonnull ptr @foo() {1544// %p = call noundef ptr @bar()1545// ret ptr %p1546// }1547//1548// In case 1, we can't propagate nonnull because poison value in @use may1549// change behavior or trigger UB.1550// In case 2, we don't need to be concerned about propagating nonnull, as1551// any new poison at @use will trigger UB anyways.1552// In case 3, we can never propagate nonnull because it may create UB due to1553// the noundef on @bar.1554if (ValidPG.getAlignment().valueOrOne() < AL.getRetAlignment().valueOrOne())1555ValidPG.removeAttribute(Attribute::Alignment);1556if (ValidPG.hasAttributes()) {1557Attribute CBRange = ValidPG.getAttribute(Attribute::Range);1558if (CBRange.isValid()) {1559Attribute NewRange = AL.getRetAttr(Attribute::Range);1560if (NewRange.isValid()) {1561ValidPG.addRangeAttr(1562CBRange.getRange().intersectWith(NewRange.getRange()));1563}1564}1565// Three checks.1566// If the callsite has `noundef`, then a poison due to violating the1567// return attribute will create UB anyways so we can always propagate.1568// Otherwise, if the return value (callee to be inlined) has `noundef`, we1569// can't propagate as a new poison return will cause UB.1570// Finally, check if the return value has no uses whose behavior may1571// change/may cause UB if we potentially return poison. At the moment this1572// is implemented overly conservatively with a single-use check.1573// TODO: Update the single-use check to iterate through uses and only bail1574// if we have a potentially dangerous use.15751576if (CB.hasRetAttr(Attribute::NoUndef) ||1577(RetVal->hasOneUse() && !RetVal->hasRetAttr(Attribute::NoUndef)))1578NewAL = NewAL.addRetAttributes(Context, ValidPG);1579}1580NewRetVal->setAttributes(NewAL);1581}1582}15831584/// If the inlined function has non-byval align arguments, then1585/// add @llvm.assume-based alignment assumptions to preserve this information.1586static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {1587if (!PreserveAlignmentAssumptions || !IFI.GetAssumptionCache)1588return;15891590AssumptionCache *AC = &IFI.GetAssumptionCache(*CB.getCaller());1591auto &DL = CB.getDataLayout();15921593// To avoid inserting redundant assumptions, we should check for assumptions1594// already in the caller. To do this, we might need a DT of the caller.1595DominatorTree DT;1596bool DTCalculated = false;15971598Function *CalledFunc = CB.getCalledFunction();1599for (Argument &Arg : CalledFunc->args()) {1600if (!Arg.getType()->isPointerTy() || Arg.hasPassPointeeByValueCopyAttr() ||1601Arg.hasNUses(0))1602continue;1603MaybeAlign Alignment = Arg.getParamAlign();1604if (!Alignment)1605continue;16061607if (!DTCalculated) {1608DT.recalculate(*CB.getCaller());1609DTCalculated = true;1610}1611// If we can already prove the asserted alignment in the context of the1612// caller, then don't bother inserting the assumption.1613Value *ArgVal = CB.getArgOperand(Arg.getArgNo());1614if (getKnownAlignment(ArgVal, DL, &CB, AC, &DT) >= *Alignment)1615continue;16161617CallInst *NewAsmp = IRBuilder<>(&CB).CreateAlignmentAssumption(1618DL, ArgVal, Alignment->value());1619AC->registerAssumption(cast<AssumeInst>(NewAsmp));1620}1621}16221623static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src,1624Module *M, BasicBlock *InsertBlock,1625InlineFunctionInfo &IFI,1626Function *CalledFunc) {1627IRBuilder<> Builder(InsertBlock, InsertBlock->begin());16281629Value *Size =1630Builder.getInt64(M->getDataLayout().getTypeStoreSize(ByValType));16311632// Always generate a memcpy of alignment 1 here because we don't know1633// the alignment of the src pointer. Other optimizations can infer1634// better alignment.1635CallInst *CI = Builder.CreateMemCpy(Dst, /*DstAlign*/ Align(1), Src,1636/*SrcAlign*/ Align(1), Size);16371638// The verifier requires that all calls of debug-info-bearing functions1639// from debug-info-bearing functions have a debug location (for inlining1640// purposes). Assign a dummy location to satisfy the constraint.1641if (!CI->getDebugLoc() && InsertBlock->getParent()->getSubprogram())1642if (DISubprogram *SP = CalledFunc->getSubprogram())1643CI->setDebugLoc(DILocation::get(SP->getContext(), 0, 0, SP));1644}16451646/// When inlining a call site that has a byval argument,1647/// we have to make the implicit memcpy explicit by adding it.1648static Value *HandleByValArgument(Type *ByValType, Value *Arg,1649Instruction *TheCall,1650const Function *CalledFunc,1651InlineFunctionInfo &IFI,1652MaybeAlign ByValAlignment) {1653Function *Caller = TheCall->getFunction();1654const DataLayout &DL = Caller->getDataLayout();16551656// If the called function is readonly, then it could not mutate the caller's1657// copy of the byval'd memory. In this case, it is safe to elide the copy and1658// temporary.1659if (CalledFunc->onlyReadsMemory()) {1660// If the byval argument has a specified alignment that is greater than the1661// passed in pointer, then we either have to round up the input pointer or1662// give up on this transformation.1663if (ByValAlignment.valueOrOne() == 1)1664return Arg;16651666AssumptionCache *AC =1667IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;16681669// If the pointer is already known to be sufficiently aligned, or if we can1670// round it up to a larger alignment, then we don't need a temporary.1671if (getOrEnforceKnownAlignment(Arg, *ByValAlignment, DL, TheCall, AC) >=1672*ByValAlignment)1673return Arg;16741675// Otherwise, we have to make a memcpy to get a safe alignment. This is bad1676// for code quality, but rarely happens and is required for correctness.1677}16781679// Create the alloca. If we have DataLayout, use nice alignment.1680Align Alignment = DL.getPrefTypeAlign(ByValType);16811682// If the byval had an alignment specified, we *must* use at least that1683// alignment, as it is required by the byval argument (and uses of the1684// pointer inside the callee).1685if (ByValAlignment)1686Alignment = std::max(Alignment, *ByValAlignment);16871688AllocaInst *NewAlloca =1689new AllocaInst(ByValType, Arg->getType()->getPointerAddressSpace(),1690nullptr, Alignment, Arg->getName());1691NewAlloca->insertBefore(Caller->begin()->begin());1692IFI.StaticAllocas.push_back(NewAlloca);16931694// Uses of the argument in the function should use our new alloca1695// instead.1696return NewAlloca;1697}16981699// Check whether this Value is used by a lifetime intrinsic.1700static bool isUsedByLifetimeMarker(Value *V) {1701for (User *U : V->users())1702if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U))1703if (II->isLifetimeStartOrEnd())1704return true;1705return false;1706}17071708// Check whether the given alloca already has1709// lifetime.start or lifetime.end intrinsics.1710static bool hasLifetimeMarkers(AllocaInst *AI) {1711Type *Ty = AI->getType();1712Type *Int8PtrTy =1713PointerType::get(Ty->getContext(), Ty->getPointerAddressSpace());1714if (Ty == Int8PtrTy)1715return isUsedByLifetimeMarker(AI);17161717// Do a scan to find all the casts to i8*.1718for (User *U : AI->users()) {1719if (U->getType() != Int8PtrTy) continue;1720if (U->stripPointerCasts() != AI) continue;1721if (isUsedByLifetimeMarker(U))1722return true;1723}1724return false;1725}17261727/// Return the result of AI->isStaticAlloca() if AI were moved to the entry1728/// block. Allocas used in inalloca calls and allocas of dynamic array size1729/// cannot be static.1730static bool allocaWouldBeStaticInEntry(const AllocaInst *AI ) {1731return isa<Constant>(AI->getArraySize()) && !AI->isUsedWithInAlloca();1732}17331734/// Returns a DebugLoc for a new DILocation which is a clone of \p OrigDL1735/// inlined at \p InlinedAt. \p IANodes is an inlined-at cache.1736static DebugLoc inlineDebugLoc(DebugLoc OrigDL, DILocation *InlinedAt,1737LLVMContext &Ctx,1738DenseMap<const MDNode *, MDNode *> &IANodes) {1739auto IA = DebugLoc::appendInlinedAt(OrigDL, InlinedAt, Ctx, IANodes);1740return DILocation::get(Ctx, OrigDL.getLine(), OrigDL.getCol(),1741OrigDL.getScope(), IA);1742}17431744/// Update inlined instructions' line numbers to1745/// to encode location where these instructions are inlined.1746static void fixupLineNumbers(Function *Fn, Function::iterator FI,1747Instruction *TheCall, bool CalleeHasDebugInfo) {1748const DebugLoc &TheCallDL = TheCall->getDebugLoc();1749if (!TheCallDL)1750return;17511752auto &Ctx = Fn->getContext();1753DILocation *InlinedAtNode = TheCallDL;17541755// Create a unique call site, not to be confused with any other call from the1756// same location.1757InlinedAtNode = DILocation::getDistinct(1758Ctx, InlinedAtNode->getLine(), InlinedAtNode->getColumn(),1759InlinedAtNode->getScope(), InlinedAtNode->getInlinedAt());17601761// Cache the inlined-at nodes as they're built so they are reused, without1762// this every instruction's inlined-at chain would become distinct from each1763// other.1764DenseMap<const MDNode *, MDNode *> IANodes;17651766// Check if we are not generating inline line tables and want to use1767// the call site location instead.1768bool NoInlineLineTables = Fn->hasFnAttribute("no-inline-line-tables");17691770// Helper-util for updating the metadata attached to an instruction.1771auto UpdateInst = [&](Instruction &I) {1772// Loop metadata needs to be updated so that the start and end locs1773// reference inlined-at locations.1774auto updateLoopInfoLoc = [&Ctx, &InlinedAtNode,1775&IANodes](Metadata *MD) -> Metadata * {1776if (auto *Loc = dyn_cast_or_null<DILocation>(MD))1777return inlineDebugLoc(Loc, InlinedAtNode, Ctx, IANodes).get();1778return MD;1779};1780updateLoopMetadataDebugLocations(I, updateLoopInfoLoc);17811782if (!NoInlineLineTables)1783if (DebugLoc DL = I.getDebugLoc()) {1784DebugLoc IDL =1785inlineDebugLoc(DL, InlinedAtNode, I.getContext(), IANodes);1786I.setDebugLoc(IDL);1787return;1788}17891790if (CalleeHasDebugInfo && !NoInlineLineTables)1791return;17921793// If the inlined instruction has no line number, or if inline info1794// is not being generated, make it look as if it originates from the call1795// location. This is important for ((__always_inline, __nodebug__))1796// functions which must use caller location for all instructions in their1797// function body.17981799// Don't update static allocas, as they may get moved later.1800if (auto *AI = dyn_cast<AllocaInst>(&I))1801if (allocaWouldBeStaticInEntry(AI))1802return;18031804// Do not force a debug loc for pseudo probes, since they do not need to1805// be debuggable, and also they are expected to have a zero/null dwarf1806// discriminator at this point which could be violated otherwise.1807if (isa<PseudoProbeInst>(I))1808return;18091810I.setDebugLoc(TheCallDL);1811};18121813// Helper-util for updating debug-info records attached to instructions.1814auto UpdateDVR = [&](DbgRecord *DVR) {1815assert(DVR->getDebugLoc() && "Debug Value must have debug loc");1816if (NoInlineLineTables) {1817DVR->setDebugLoc(TheCallDL);1818return;1819}1820DebugLoc DL = DVR->getDebugLoc();1821DebugLoc IDL =1822inlineDebugLoc(DL, InlinedAtNode,1823DVR->getMarker()->getParent()->getContext(), IANodes);1824DVR->setDebugLoc(IDL);1825};18261827// Iterate over all instructions, updating metadata and debug-info records.1828for (; FI != Fn->end(); ++FI) {1829for (Instruction &I : *FI) {1830UpdateInst(I);1831for (DbgRecord &DVR : I.getDbgRecordRange()) {1832UpdateDVR(&DVR);1833}1834}18351836// Remove debug info intrinsics if we're not keeping inline info.1837if (NoInlineLineTables) {1838BasicBlock::iterator BI = FI->begin();1839while (BI != FI->end()) {1840if (isa<DbgInfoIntrinsic>(BI)) {1841BI = BI->eraseFromParent();1842continue;1843} else {1844BI->dropDbgRecords();1845}1846++BI;1847}1848}1849}1850}18511852#undef DEBUG_TYPE1853#define DEBUG_TYPE "assignment-tracking"1854/// Find Alloca and linked DbgAssignIntrinsic for locals escaped by \p CB.1855static at::StorageToVarsMap collectEscapedLocals(const DataLayout &DL,1856const CallBase &CB) {1857at::StorageToVarsMap EscapedLocals;1858SmallPtrSet<const Value *, 4> SeenBases;18591860LLVM_DEBUG(1861errs() << "# Finding caller local variables escaped by callee\n");1862for (const Value *Arg : CB.args()) {1863LLVM_DEBUG(errs() << "INSPECT: " << *Arg << "\n");1864if (!Arg->getType()->isPointerTy()) {1865LLVM_DEBUG(errs() << " | SKIP: Not a pointer\n");1866continue;1867}18681869const Instruction *I = dyn_cast<Instruction>(Arg);1870if (!I) {1871LLVM_DEBUG(errs() << " | SKIP: Not result of instruction\n");1872continue;1873}18741875// Walk back to the base storage.1876assert(Arg->getType()->isPtrOrPtrVectorTy());1877APInt TmpOffset(DL.getIndexTypeSizeInBits(Arg->getType()), 0, false);1878const AllocaInst *Base = dyn_cast<AllocaInst>(1879Arg->stripAndAccumulateConstantOffsets(DL, TmpOffset, true));1880if (!Base) {1881LLVM_DEBUG(errs() << " | SKIP: Couldn't walk back to base storage\n");1882continue;1883}18841885assert(Base);1886LLVM_DEBUG(errs() << " | BASE: " << *Base << "\n");1887// We only need to process each base address once - skip any duplicates.1888if (!SeenBases.insert(Base).second)1889continue;18901891// Find all local variables associated with the backing storage.1892auto CollectAssignsForStorage = [&](auto *DbgAssign) {1893// Skip variables from inlined functions - they are not local variables.1894if (DbgAssign->getDebugLoc().getInlinedAt())1895return;1896LLVM_DEBUG(errs() << " > DEF : " << *DbgAssign << "\n");1897EscapedLocals[Base].insert(at::VarRecord(DbgAssign));1898};1899for_each(at::getAssignmentMarkers(Base), CollectAssignsForStorage);1900for_each(at::getDVRAssignmentMarkers(Base), CollectAssignsForStorage);1901}1902return EscapedLocals;1903}19041905static void trackInlinedStores(Function::iterator Start, Function::iterator End,1906const CallBase &CB) {1907LLVM_DEBUG(errs() << "trackInlinedStores into "1908<< Start->getParent()->getName() << " from "1909<< CB.getCalledFunction()->getName() << "\n");1910std::unique_ptr<DataLayout> DL = std::make_unique<DataLayout>(CB.getModule());1911at::trackAssignments(Start, End, collectEscapedLocals(*DL, CB), *DL);1912}19131914/// Update inlined instructions' DIAssignID metadata. We need to do this1915/// otherwise a function inlined more than once into the same function1916/// will cause DIAssignID to be shared by many instructions.1917static void fixupAssignments(Function::iterator Start, Function::iterator End) {1918DenseMap<DIAssignID *, DIAssignID *> Map;1919// Loop over all the inlined instructions. If we find a DIAssignID1920// attachment or use, replace it with a new version.1921for (auto BBI = Start; BBI != End; ++BBI) {1922for (Instruction &I : *BBI)1923at::remapAssignID(Map, I);1924}1925}1926#undef DEBUG_TYPE1927#define DEBUG_TYPE "inline-function"19281929/// Update the block frequencies of the caller after a callee has been inlined.1930///1931/// Each block cloned into the caller has its block frequency scaled by the1932/// ratio of CallSiteFreq/CalleeEntryFreq. This ensures that the cloned copy of1933/// callee's entry block gets the same frequency as the callsite block and the1934/// relative frequencies of all cloned blocks remain the same after cloning.1935static void updateCallerBFI(BasicBlock *CallSiteBlock,1936const ValueToValueMapTy &VMap,1937BlockFrequencyInfo *CallerBFI,1938BlockFrequencyInfo *CalleeBFI,1939const BasicBlock &CalleeEntryBlock) {1940SmallPtrSet<BasicBlock *, 16> ClonedBBs;1941for (auto Entry : VMap) {1942if (!isa<BasicBlock>(Entry.first) || !Entry.second)1943continue;1944auto *OrigBB = cast<BasicBlock>(Entry.first);1945auto *ClonedBB = cast<BasicBlock>(Entry.second);1946BlockFrequency Freq = CalleeBFI->getBlockFreq(OrigBB);1947if (!ClonedBBs.insert(ClonedBB).second) {1948// Multiple blocks in the callee might get mapped to one cloned block in1949// the caller since we prune the callee as we clone it. When that happens,1950// we want to use the maximum among the original blocks' frequencies.1951BlockFrequency NewFreq = CallerBFI->getBlockFreq(ClonedBB);1952if (NewFreq > Freq)1953Freq = NewFreq;1954}1955CallerBFI->setBlockFreq(ClonedBB, Freq);1956}1957BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));1958CallerBFI->setBlockFreqAndScale(1959EntryClone, CallerBFI->getBlockFreq(CallSiteBlock), ClonedBBs);1960}19611962/// Update the branch metadata for cloned call instructions.1963static void updateCallProfile(Function *Callee, const ValueToValueMapTy &VMap,1964const ProfileCount &CalleeEntryCount,1965const CallBase &TheCall, ProfileSummaryInfo *PSI,1966BlockFrequencyInfo *CallerBFI) {1967if (CalleeEntryCount.isSynthetic() || CalleeEntryCount.getCount() < 1)1968return;1969auto CallSiteCount =1970PSI ? PSI->getProfileCount(TheCall, CallerBFI) : std::nullopt;1971int64_t CallCount =1972std::min(CallSiteCount.value_or(0), CalleeEntryCount.getCount());1973updateProfileCallee(Callee, -CallCount, &VMap);1974}19751976void llvm::updateProfileCallee(1977Function *Callee, int64_t EntryDelta,1978const ValueMap<const Value *, WeakTrackingVH> *VMap) {1979auto CalleeCount = Callee->getEntryCount();1980if (!CalleeCount)1981return;19821983const uint64_t PriorEntryCount = CalleeCount->getCount();19841985// Since CallSiteCount is an estimate, it could exceed the original callee1986// count and has to be set to 0 so guard against underflow.1987const uint64_t NewEntryCount =1988(EntryDelta < 0 && static_cast<uint64_t>(-EntryDelta) > PriorEntryCount)1989? 01990: PriorEntryCount + EntryDelta;19911992auto updateVTableProfWeight = [](CallBase *CB, const uint64_t NewEntryCount,1993const uint64_t PriorEntryCount) {1994Instruction *VPtr = PGOIndirectCallVisitor::tryGetVTableInstruction(CB);1995if (VPtr)1996scaleProfData(*VPtr, NewEntryCount, PriorEntryCount);1997};19981999// During inlining ?2000if (VMap) {2001uint64_t CloneEntryCount = PriorEntryCount - NewEntryCount;2002for (auto Entry : *VMap) {2003if (isa<CallInst>(Entry.first))2004if (auto *CI = dyn_cast_or_null<CallInst>(Entry.second)) {2005CI->updateProfWeight(CloneEntryCount, PriorEntryCount);2006updateVTableProfWeight(CI, CloneEntryCount, PriorEntryCount);2007}20082009if (isa<InvokeInst>(Entry.first))2010if (auto *II = dyn_cast_or_null<InvokeInst>(Entry.second)) {2011II->updateProfWeight(CloneEntryCount, PriorEntryCount);2012updateVTableProfWeight(II, CloneEntryCount, PriorEntryCount);2013}2014}2015}20162017if (EntryDelta) {2018Callee->setEntryCount(NewEntryCount);20192020for (BasicBlock &BB : *Callee)2021// No need to update the callsite if it is pruned during inlining.2022if (!VMap || VMap->count(&BB))2023for (Instruction &I : BB) {2024if (CallInst *CI = dyn_cast<CallInst>(&I)) {2025CI->updateProfWeight(NewEntryCount, PriorEntryCount);2026updateVTableProfWeight(CI, NewEntryCount, PriorEntryCount);2027}2028if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {2029II->updateProfWeight(NewEntryCount, PriorEntryCount);2030updateVTableProfWeight(II, NewEntryCount, PriorEntryCount);2031}2032}2033}2034}20352036/// An operand bundle "clang.arc.attachedcall" on a call indicates the call2037/// result is implicitly consumed by a call to retainRV or claimRV immediately2038/// after the call. This function inlines the retainRV/claimRV calls.2039///2040/// There are three cases to consider:2041///2042/// 1. If there is a call to autoreleaseRV that takes a pointer to the returned2043/// object in the callee return block, the autoreleaseRV call and the2044/// retainRV/claimRV call in the caller cancel out. If the call in the caller2045/// is a claimRV call, a call to objc_release is emitted.2046///2047/// 2. If there is a call in the callee return block that doesn't have operand2048/// bundle "clang.arc.attachedcall", the operand bundle on the original call2049/// is transferred to the call in the callee.2050///2051/// 3. Otherwise, a call to objc_retain is inserted if the call in the caller is2052/// a retainRV call.2053static void2054inlineRetainOrClaimRVCalls(CallBase &CB, objcarc::ARCInstKind RVCallKind,2055const SmallVectorImpl<ReturnInst *> &Returns) {2056Module *Mod = CB.getModule();2057assert(objcarc::isRetainOrClaimRV(RVCallKind) && "unexpected ARC function");2058bool IsRetainRV = RVCallKind == objcarc::ARCInstKind::RetainRV,2059IsUnsafeClaimRV = !IsRetainRV;20602061for (auto *RI : Returns) {2062Value *RetOpnd = objcarc::GetRCIdentityRoot(RI->getOperand(0));2063bool InsertRetainCall = IsRetainRV;2064IRBuilder<> Builder(RI->getContext());20652066// Walk backwards through the basic block looking for either a matching2067// autoreleaseRV call or an unannotated call.2068auto InstRange = llvm::make_range(++(RI->getIterator().getReverse()),2069RI->getParent()->rend());2070for (Instruction &I : llvm::make_early_inc_range(InstRange)) {2071// Ignore casts.2072if (isa<CastInst>(I))2073continue;20742075if (auto *II = dyn_cast<IntrinsicInst>(&I)) {2076if (II->getIntrinsicID() != Intrinsic::objc_autoreleaseReturnValue ||2077!II->hasNUses(0) ||2078objcarc::GetRCIdentityRoot(II->getOperand(0)) != RetOpnd)2079break;20802081// If we've found a matching authoreleaseRV call:2082// - If claimRV is attached to the call, insert a call to objc_release2083// and erase the autoreleaseRV call.2084// - If retainRV is attached to the call, just erase the autoreleaseRV2085// call.2086if (IsUnsafeClaimRV) {2087Builder.SetInsertPoint(II);2088Function *IFn =2089Intrinsic::getDeclaration(Mod, Intrinsic::objc_release);2090Builder.CreateCall(IFn, RetOpnd, "");2091}2092II->eraseFromParent();2093InsertRetainCall = false;2094break;2095}20962097auto *CI = dyn_cast<CallInst>(&I);20982099if (!CI)2100break;21012102if (objcarc::GetRCIdentityRoot(CI) != RetOpnd ||2103objcarc::hasAttachedCallOpBundle(CI))2104break;21052106// If we've found an unannotated call that defines RetOpnd, add a2107// "clang.arc.attachedcall" operand bundle.2108Value *BundleArgs[] = {*objcarc::getAttachedARCFunction(&CB)};2109OperandBundleDef OB("clang.arc.attachedcall", BundleArgs);2110auto *NewCall = CallBase::addOperandBundle(2111CI, LLVMContext::OB_clang_arc_attachedcall, OB, CI->getIterator());2112NewCall->copyMetadata(*CI);2113CI->replaceAllUsesWith(NewCall);2114CI->eraseFromParent();2115InsertRetainCall = false;2116break;2117}21182119if (InsertRetainCall) {2120// The retainRV is attached to the call and we've failed to find a2121// matching autoreleaseRV or an annotated call in the callee. Emit a call2122// to objc_retain.2123Builder.SetInsertPoint(RI);2124Function *IFn = Intrinsic::getDeclaration(Mod, Intrinsic::objc_retain);2125Builder.CreateCall(IFn, RetOpnd, "");2126}2127}2128}21292130/// This function inlines the called function into the basic block of the2131/// caller. This returns false if it is not possible to inline this call.2132/// The program is still in a well defined state if this occurs though.2133///2134/// Note that this only does one level of inlining. For example, if the2135/// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now2136/// exists in the instruction stream. Similarly this will inline a recursive2137/// function by one level.2138llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI,2139bool MergeAttributes,2140AAResults *CalleeAAR,2141bool InsertLifetime,2142Function *ForwardVarArgsTo) {2143assert(CB.getParent() && CB.getFunction() && "Instruction not in function!");21442145// FIXME: we don't inline callbr yet.2146if (isa<CallBrInst>(CB))2147return InlineResult::failure("We don't inline callbr yet.");21482149// If IFI has any state in it, zap it before we fill it in.2150IFI.reset();21512152Function *CalledFunc = CB.getCalledFunction();2153if (!CalledFunc || // Can't inline external function or indirect2154CalledFunc->isDeclaration()) // call!2155return InlineResult::failure("external or indirect");21562157// The inliner does not know how to inline through calls with operand bundles2158// in general ...2159Value *ConvergenceControlToken = nullptr;2160if (CB.hasOperandBundles()) {2161for (int i = 0, e = CB.getNumOperandBundles(); i != e; ++i) {2162auto OBUse = CB.getOperandBundleAt(i);2163uint32_t Tag = OBUse.getTagID();2164// ... but it knows how to inline through "deopt" operand bundles ...2165if (Tag == LLVMContext::OB_deopt)2166continue;2167// ... and "funclet" operand bundles.2168if (Tag == LLVMContext::OB_funclet)2169continue;2170if (Tag == LLVMContext::OB_clang_arc_attachedcall)2171continue;2172if (Tag == LLVMContext::OB_kcfi)2173continue;2174if (Tag == LLVMContext::OB_convergencectrl) {2175ConvergenceControlToken = OBUse.Inputs[0].get();2176continue;2177}21782179return InlineResult::failure("unsupported operand bundle");2180}2181}21822183// FIXME: The check below is redundant and incomplete. According to spec, if a2184// convergent call is missing a token, then the caller is using uncontrolled2185// convergence. If the callee has an entry intrinsic, then the callee is using2186// controlled convergence, and the call cannot be inlined. A proper2187// implemenation of this check requires a whole new analysis that identifies2188// convergence in every function. For now, we skip that and just do this one2189// cursory check. The underlying assumption is that in a compiler flow that2190// fully implements convergence control tokens, there is no mixing of2191// controlled and uncontrolled convergent operations in the whole program.2192if (CB.isConvergent()) {2193auto *I = CalledFunc->getEntryBlock().getFirstNonPHI();2194if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(I)) {2195if (IntrinsicCall->getIntrinsicID() ==2196Intrinsic::experimental_convergence_entry) {2197if (!ConvergenceControlToken) {2198return InlineResult::failure(2199"convergent call needs convergencectrl operand");2200}2201}2202}2203}22042205// If the call to the callee cannot throw, set the 'nounwind' flag on any2206// calls that we inline.2207bool MarkNoUnwind = CB.doesNotThrow();22082209BasicBlock *OrigBB = CB.getParent();2210Function *Caller = OrigBB->getParent();22112212// GC poses two hazards to inlining, which only occur when the callee has GC:2213// 1. If the caller has no GC, then the callee's GC must be propagated to the2214// caller.2215// 2. If the caller has a differing GC, it is invalid to inline.2216if (CalledFunc->hasGC()) {2217if (!Caller->hasGC())2218Caller->setGC(CalledFunc->getGC());2219else if (CalledFunc->getGC() != Caller->getGC())2220return InlineResult::failure("incompatible GC");2221}22222223// Get the personality function from the callee if it contains a landing pad.2224Constant *CalledPersonality =2225CalledFunc->hasPersonalityFn()2226? CalledFunc->getPersonalityFn()->stripPointerCasts()2227: nullptr;22282229// Find the personality function used by the landing pads of the caller. If it2230// exists, then check to see that it matches the personality function used in2231// the callee.2232Constant *CallerPersonality =2233Caller->hasPersonalityFn()2234? Caller->getPersonalityFn()->stripPointerCasts()2235: nullptr;2236if (CalledPersonality) {2237if (!CallerPersonality)2238Caller->setPersonalityFn(CalledPersonality);2239// If the personality functions match, then we can perform the2240// inlining. Otherwise, we can't inline.2241// TODO: This isn't 100% true. Some personality functions are proper2242// supersets of others and can be used in place of the other.2243else if (CalledPersonality != CallerPersonality)2244return InlineResult::failure("incompatible personality");2245}22462247// We need to figure out which funclet the callsite was in so that we may2248// properly nest the callee.2249Instruction *CallSiteEHPad = nullptr;2250if (CallerPersonality) {2251EHPersonality Personality = classifyEHPersonality(CallerPersonality);2252if (isScopedEHPersonality(Personality)) {2253std::optional<OperandBundleUse> ParentFunclet =2254CB.getOperandBundle(LLVMContext::OB_funclet);2255if (ParentFunclet)2256CallSiteEHPad = cast<FuncletPadInst>(ParentFunclet->Inputs.front());22572258// OK, the inlining site is legal. What about the target function?22592260if (CallSiteEHPad) {2261if (Personality == EHPersonality::MSVC_CXX) {2262// The MSVC personality cannot tolerate catches getting inlined into2263// cleanup funclets.2264if (isa<CleanupPadInst>(CallSiteEHPad)) {2265// Ok, the call site is within a cleanuppad. Let's check the callee2266// for catchpads.2267for (const BasicBlock &CalledBB : *CalledFunc) {2268if (isa<CatchSwitchInst>(CalledBB.getFirstNonPHI()))2269return InlineResult::failure("catch in cleanup funclet");2270}2271}2272} else if (isAsynchronousEHPersonality(Personality)) {2273// SEH is even less tolerant, there may not be any sort of exceptional2274// funclet in the callee.2275for (const BasicBlock &CalledBB : *CalledFunc) {2276if (CalledBB.isEHPad())2277return InlineResult::failure("SEH in cleanup funclet");2278}2279}2280}2281}2282}22832284// Determine if we are dealing with a call in an EHPad which does not unwind2285// to caller.2286bool EHPadForCallUnwindsLocally = false;2287if (CallSiteEHPad && isa<CallInst>(CB)) {2288UnwindDestMemoTy FuncletUnwindMap;2289Value *CallSiteUnwindDestToken =2290getUnwindDestToken(CallSiteEHPad, FuncletUnwindMap);22912292EHPadForCallUnwindsLocally =2293CallSiteUnwindDestToken &&2294!isa<ConstantTokenNone>(CallSiteUnwindDestToken);2295}22962297// Get an iterator to the last basic block in the function, which will have2298// the new function inlined after it.2299Function::iterator LastBlock = --Caller->end();23002301// Make sure to capture all of the return instructions from the cloned2302// function.2303SmallVector<ReturnInst*, 8> Returns;2304ClonedCodeInfo InlinedFunctionInfo;2305Function::iterator FirstNewBlock;23062307{ // Scope to destroy VMap after cloning.2308ValueToValueMapTy VMap;2309struct ByValInit {2310Value *Dst;2311Value *Src;2312Type *Ty;2313};2314// Keep a list of pair (dst, src) to emit byval initializations.2315SmallVector<ByValInit, 4> ByValInits;23162317// When inlining a function that contains noalias scope metadata,2318// this metadata needs to be cloned so that the inlined blocks2319// have different "unique scopes" at every call site.2320// Track the metadata that must be cloned. Do this before other changes to2321// the function, so that we do not get in trouble when inlining caller ==2322// callee.2323ScopedAliasMetadataDeepCloner SAMetadataCloner(CB.getCalledFunction());23242325auto &DL = Caller->getDataLayout();23262327// Calculate the vector of arguments to pass into the function cloner, which2328// matches up the formal to the actual argument values.2329auto AI = CB.arg_begin();2330unsigned ArgNo = 0;2331for (Function::arg_iterator I = CalledFunc->arg_begin(),2332E = CalledFunc->arg_end(); I != E; ++I, ++AI, ++ArgNo) {2333Value *ActualArg = *AI;23342335// When byval arguments actually inlined, we need to make the copy implied2336// by them explicit. However, we don't do this if the callee is readonly2337// or readnone, because the copy would be unneeded: the callee doesn't2338// modify the struct.2339if (CB.isByValArgument(ArgNo)) {2340ActualArg = HandleByValArgument(CB.getParamByValType(ArgNo), ActualArg,2341&CB, CalledFunc, IFI,2342CalledFunc->getParamAlign(ArgNo));2343if (ActualArg != *AI)2344ByValInits.push_back(2345{ActualArg, (Value *)*AI, CB.getParamByValType(ArgNo)});2346}23472348VMap[&*I] = ActualArg;2349}23502351// TODO: Remove this when users have been updated to the assume bundles.2352// Add alignment assumptions if necessary. We do this before the inlined2353// instructions are actually cloned into the caller so that we can easily2354// check what will be known at the start of the inlined code.2355AddAlignmentAssumptions(CB, IFI);23562357AssumptionCache *AC =2358IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;23592360/// Preserve all attributes on of the call and its parameters.2361salvageKnowledge(&CB, AC);23622363// We want the inliner to prune the code as it copies. We would LOVE to2364// have no dead or constant instructions leftover after inlining occurs2365// (which can happen, e.g., because an argument was constant), but we'll be2366// happy with whatever the cloner can do.2367CloneAndPruneFunctionInto(Caller, CalledFunc, VMap,2368/*ModuleLevelChanges=*/false, Returns, ".i",2369&InlinedFunctionInfo);2370// Remember the first block that is newly cloned over.2371FirstNewBlock = LastBlock; ++FirstNewBlock;23722373// Insert retainRV/clainRV runtime calls.2374objcarc::ARCInstKind RVCallKind = objcarc::getAttachedARCFunctionKind(&CB);2375if (RVCallKind != objcarc::ARCInstKind::None)2376inlineRetainOrClaimRVCalls(CB, RVCallKind, Returns);23772378// Updated caller/callee profiles only when requested. For sample loader2379// inlining, the context-sensitive inlinee profile doesn't need to be2380// subtracted from callee profile, and the inlined clone also doesn't need2381// to be scaled based on call site count.2382if (IFI.UpdateProfile) {2383if (IFI.CallerBFI != nullptr && IFI.CalleeBFI != nullptr)2384// Update the BFI of blocks cloned into the caller.2385updateCallerBFI(OrigBB, VMap, IFI.CallerBFI, IFI.CalleeBFI,2386CalledFunc->front());23872388if (auto Profile = CalledFunc->getEntryCount())2389updateCallProfile(CalledFunc, VMap, *Profile, CB, IFI.PSI,2390IFI.CallerBFI);2391}23922393// Inject byval arguments initialization.2394for (ByValInit &Init : ByValInits)2395HandleByValArgumentInit(Init.Ty, Init.Dst, Init.Src, Caller->getParent(),2396&*FirstNewBlock, IFI, CalledFunc);23972398std::optional<OperandBundleUse> ParentDeopt =2399CB.getOperandBundle(LLVMContext::OB_deopt);2400if (ParentDeopt) {2401SmallVector<OperandBundleDef, 2> OpDefs;24022403for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) {2404CallBase *ICS = dyn_cast_or_null<CallBase>(VH);2405if (!ICS)2406continue; // instruction was DCE'd or RAUW'ed to undef24072408OpDefs.clear();24092410OpDefs.reserve(ICS->getNumOperandBundles());24112412for (unsigned COBi = 0, COBe = ICS->getNumOperandBundles(); COBi < COBe;2413++COBi) {2414auto ChildOB = ICS->getOperandBundleAt(COBi);2415if (ChildOB.getTagID() != LLVMContext::OB_deopt) {2416// If the inlined call has other operand bundles, let them be2417OpDefs.emplace_back(ChildOB);2418continue;2419}24202421// It may be useful to separate this logic (of handling operand2422// bundles) out to a separate "policy" component if this gets crowded.2423// Prepend the parent's deoptimization continuation to the newly2424// inlined call's deoptimization continuation.2425std::vector<Value *> MergedDeoptArgs;2426MergedDeoptArgs.reserve(ParentDeopt->Inputs.size() +2427ChildOB.Inputs.size());24282429llvm::append_range(MergedDeoptArgs, ParentDeopt->Inputs);2430llvm::append_range(MergedDeoptArgs, ChildOB.Inputs);24312432OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs));2433}24342435Instruction *NewI = CallBase::Create(ICS, OpDefs, ICS->getIterator());24362437// Note: the RAUW does the appropriate fixup in VMap, so we need to do2438// this even if the call returns void.2439ICS->replaceAllUsesWith(NewI);24402441VH = nullptr;2442ICS->eraseFromParent();2443}2444}24452446// For 'nodebug' functions, the associated DISubprogram is always null.2447// Conservatively avoid propagating the callsite debug location to2448// instructions inlined from a function whose DISubprogram is not null.2449fixupLineNumbers(Caller, FirstNewBlock, &CB,2450CalledFunc->getSubprogram() != nullptr);24512452if (isAssignmentTrackingEnabled(*Caller->getParent())) {2453// Interpret inlined stores to caller-local variables as assignments.2454trackInlinedStores(FirstNewBlock, Caller->end(), CB);24552456// Update DIAssignID metadata attachments and uses so that they are2457// unique to this inlined instance.2458fixupAssignments(FirstNewBlock, Caller->end());2459}24602461// Now clone the inlined noalias scope metadata.2462SAMetadataCloner.clone();2463SAMetadataCloner.remap(FirstNewBlock, Caller->end());24642465// Add noalias metadata if necessary.2466AddAliasScopeMetadata(CB, VMap, DL, CalleeAAR, InlinedFunctionInfo);24672468// Clone return attributes on the callsite into the calls within the inlined2469// function which feed into its return value.2470AddReturnAttributes(CB, VMap, InlinedFunctionInfo);24712472// Clone attributes on the params of the callsite to calls within the2473// inlined function which use the same param.2474AddParamAndFnBasicAttributes(CB, VMap, InlinedFunctionInfo);24752476propagateMemProfMetadata(CalledFunc, CB,2477InlinedFunctionInfo.ContainsMemProfMetadata, VMap);24782479// Propagate metadata on the callsite if necessary.2480PropagateCallSiteMetadata(CB, FirstNewBlock, Caller->end());24812482// Register any cloned assumptions.2483if (IFI.GetAssumptionCache)2484for (BasicBlock &NewBlock :2485make_range(FirstNewBlock->getIterator(), Caller->end()))2486for (Instruction &I : NewBlock)2487if (auto *II = dyn_cast<AssumeInst>(&I))2488IFI.GetAssumptionCache(*Caller).registerAssumption(II);2489}24902491if (ConvergenceControlToken) {2492auto *I = FirstNewBlock->getFirstNonPHI();2493if (auto *IntrinsicCall = dyn_cast<IntrinsicInst>(I)) {2494if (IntrinsicCall->getIntrinsicID() ==2495Intrinsic::experimental_convergence_entry) {2496IntrinsicCall->replaceAllUsesWith(ConvergenceControlToken);2497IntrinsicCall->eraseFromParent();2498}2499}2500}25012502// If there are any alloca instructions in the block that used to be the entry2503// block for the callee, move them to the entry block of the caller. First2504// calculate which instruction they should be inserted before. We insert the2505// instructions at the end of the current alloca list.2506{2507BasicBlock::iterator InsertPoint = Caller->begin()->begin();2508for (BasicBlock::iterator I = FirstNewBlock->begin(),2509E = FirstNewBlock->end(); I != E; ) {2510AllocaInst *AI = dyn_cast<AllocaInst>(I++);2511if (!AI) continue;25122513// If the alloca is now dead, remove it. This often occurs due to code2514// specialization.2515if (AI->use_empty()) {2516AI->eraseFromParent();2517continue;2518}25192520if (!allocaWouldBeStaticInEntry(AI))2521continue;25222523// Keep track of the static allocas that we inline into the caller.2524IFI.StaticAllocas.push_back(AI);25252526// Scan for the block of allocas that we can move over, and move them2527// all at once.2528while (isa<AllocaInst>(I) &&2529!cast<AllocaInst>(I)->use_empty() &&2530allocaWouldBeStaticInEntry(cast<AllocaInst>(I))) {2531IFI.StaticAllocas.push_back(cast<AllocaInst>(I));2532++I;2533}25342535// Transfer all of the allocas over in a block. Using splice means2536// that the instructions aren't removed from the symbol table, then2537// reinserted.2538I.setTailBit(true);2539Caller->getEntryBlock().splice(InsertPoint, &*FirstNewBlock,2540AI->getIterator(), I);2541}2542}25432544SmallVector<Value*,4> VarArgsToForward;2545SmallVector<AttributeSet, 4> VarArgsAttrs;2546for (unsigned i = CalledFunc->getFunctionType()->getNumParams();2547i < CB.arg_size(); i++) {2548VarArgsToForward.push_back(CB.getArgOperand(i));2549VarArgsAttrs.push_back(CB.getAttributes().getParamAttrs(i));2550}25512552bool InlinedMustTailCalls = false, InlinedDeoptimizeCalls = false;2553if (InlinedFunctionInfo.ContainsCalls) {2554CallInst::TailCallKind CallSiteTailKind = CallInst::TCK_None;2555if (CallInst *CI = dyn_cast<CallInst>(&CB))2556CallSiteTailKind = CI->getTailCallKind();25572558// For inlining purposes, the "notail" marker is the same as no marker.2559if (CallSiteTailKind == CallInst::TCK_NoTail)2560CallSiteTailKind = CallInst::TCK_None;25612562for (Function::iterator BB = FirstNewBlock, E = Caller->end(); BB != E;2563++BB) {2564for (Instruction &I : llvm::make_early_inc_range(*BB)) {2565CallInst *CI = dyn_cast<CallInst>(&I);2566if (!CI)2567continue;25682569// Forward varargs from inlined call site to calls to the2570// ForwardVarArgsTo function, if requested, and to musttail calls.2571if (!VarArgsToForward.empty() &&2572((ForwardVarArgsTo &&2573CI->getCalledFunction() == ForwardVarArgsTo) ||2574CI->isMustTailCall())) {2575// Collect attributes for non-vararg parameters.2576AttributeList Attrs = CI->getAttributes();2577SmallVector<AttributeSet, 8> ArgAttrs;2578if (!Attrs.isEmpty() || !VarArgsAttrs.empty()) {2579for (unsigned ArgNo = 0;2580ArgNo < CI->getFunctionType()->getNumParams(); ++ArgNo)2581ArgAttrs.push_back(Attrs.getParamAttrs(ArgNo));2582}25832584// Add VarArg attributes.2585ArgAttrs.append(VarArgsAttrs.begin(), VarArgsAttrs.end());2586Attrs = AttributeList::get(CI->getContext(), Attrs.getFnAttrs(),2587Attrs.getRetAttrs(), ArgAttrs);2588// Add VarArgs to existing parameters.2589SmallVector<Value *, 6> Params(CI->args());2590Params.append(VarArgsToForward.begin(), VarArgsToForward.end());2591CallInst *NewCI = CallInst::Create(2592CI->getFunctionType(), CI->getCalledOperand(), Params, "", CI->getIterator());2593NewCI->setDebugLoc(CI->getDebugLoc());2594NewCI->setAttributes(Attrs);2595NewCI->setCallingConv(CI->getCallingConv());2596CI->replaceAllUsesWith(NewCI);2597CI->eraseFromParent();2598CI = NewCI;2599}26002601if (Function *F = CI->getCalledFunction())2602InlinedDeoptimizeCalls |=2603F->getIntrinsicID() == Intrinsic::experimental_deoptimize;26042605// We need to reduce the strength of any inlined tail calls. For2606// musttail, we have to avoid introducing potential unbounded stack2607// growth. For example, if functions 'f' and 'g' are mutually recursive2608// with musttail, we can inline 'g' into 'f' so long as we preserve2609// musttail on the cloned call to 'f'. If either the inlined call site2610// or the cloned call site is *not* musttail, the program already has2611// one frame of stack growth, so it's safe to remove musttail. Here is2612// a table of example transformations:2613//2614// f -> musttail g -> musttail f ==> f -> musttail f2615// f -> musttail g -> tail f ==> f -> tail f2616// f -> g -> musttail f ==> f -> f2617// f -> g -> tail f ==> f -> f2618//2619// Inlined notail calls should remain notail calls.2620CallInst::TailCallKind ChildTCK = CI->getTailCallKind();2621if (ChildTCK != CallInst::TCK_NoTail)2622ChildTCK = std::min(CallSiteTailKind, ChildTCK);2623CI->setTailCallKind(ChildTCK);2624InlinedMustTailCalls |= CI->isMustTailCall();26252626// Call sites inlined through a 'nounwind' call site should be2627// 'nounwind' as well. However, avoid marking call sites explicitly2628// where possible. This helps expose more opportunities for CSE after2629// inlining, commonly when the callee is an intrinsic.2630if (MarkNoUnwind && !CI->doesNotThrow())2631CI->setDoesNotThrow();2632}2633}2634}26352636// Leave lifetime markers for the static alloca's, scoping them to the2637// function we just inlined.2638// We need to insert lifetime intrinsics even at O0 to avoid invalid2639// access caused by multithreaded coroutines. The check2640// `Caller->isPresplitCoroutine()` would affect AlwaysInliner at O0 only.2641if ((InsertLifetime || Caller->isPresplitCoroutine()) &&2642!IFI.StaticAllocas.empty()) {2643IRBuilder<> builder(&*FirstNewBlock, FirstNewBlock->begin());2644for (AllocaInst *AI : IFI.StaticAllocas) {2645// Don't mark swifterror allocas. They can't have bitcast uses.2646if (AI->isSwiftError())2647continue;26482649// If the alloca is already scoped to something smaller than the whole2650// function then there's no need to add redundant, less accurate markers.2651if (hasLifetimeMarkers(AI))2652continue;26532654// Try to determine the size of the allocation.2655ConstantInt *AllocaSize = nullptr;2656if (ConstantInt *AIArraySize =2657dyn_cast<ConstantInt>(AI->getArraySize())) {2658auto &DL = Caller->getDataLayout();2659Type *AllocaType = AI->getAllocatedType();2660TypeSize AllocaTypeSize = DL.getTypeAllocSize(AllocaType);2661uint64_t AllocaArraySize = AIArraySize->getLimitedValue();26622663// Don't add markers for zero-sized allocas.2664if (AllocaArraySize == 0)2665continue;26662667// Check that array size doesn't saturate uint64_t and doesn't2668// overflow when it's multiplied by type size.2669if (!AllocaTypeSize.isScalable() &&2670AllocaArraySize != std::numeric_limits<uint64_t>::max() &&2671std::numeric_limits<uint64_t>::max() / AllocaArraySize >=2672AllocaTypeSize.getFixedValue()) {2673AllocaSize = ConstantInt::get(Type::getInt64Ty(AI->getContext()),2674AllocaArraySize * AllocaTypeSize);2675}2676}26772678builder.CreateLifetimeStart(AI, AllocaSize);2679for (ReturnInst *RI : Returns) {2680// Don't insert llvm.lifetime.end calls between a musttail or deoptimize2681// call and a return. The return kills all local allocas.2682if (InlinedMustTailCalls &&2683RI->getParent()->getTerminatingMustTailCall())2684continue;2685if (InlinedDeoptimizeCalls &&2686RI->getParent()->getTerminatingDeoptimizeCall())2687continue;2688IRBuilder<>(RI).CreateLifetimeEnd(AI, AllocaSize);2689}2690}2691}26922693// If the inlined code contained dynamic alloca instructions, wrap the inlined2694// code with llvm.stacksave/llvm.stackrestore intrinsics.2695if (InlinedFunctionInfo.ContainsDynamicAllocas) {2696// Insert the llvm.stacksave.2697CallInst *SavedPtr = IRBuilder<>(&*FirstNewBlock, FirstNewBlock->begin())2698.CreateStackSave("savedstack");26992700// Insert a call to llvm.stackrestore before any return instructions in the2701// inlined function.2702for (ReturnInst *RI : Returns) {2703// Don't insert llvm.stackrestore calls between a musttail or deoptimize2704// call and a return. The return will restore the stack pointer.2705if (InlinedMustTailCalls && RI->getParent()->getTerminatingMustTailCall())2706continue;2707if (InlinedDeoptimizeCalls && RI->getParent()->getTerminatingDeoptimizeCall())2708continue;2709IRBuilder<>(RI).CreateStackRestore(SavedPtr);2710}2711}27122713// If we are inlining for an invoke instruction, we must make sure to rewrite2714// any call instructions into invoke instructions. This is sensitive to which2715// funclet pads were top-level in the inlinee, so must be done before2716// rewriting the "parent pad" links.2717if (auto *II = dyn_cast<InvokeInst>(&CB)) {2718BasicBlock *UnwindDest = II->getUnwindDest();2719Instruction *FirstNonPHI = UnwindDest->getFirstNonPHI();2720if (isa<LandingPadInst>(FirstNonPHI)) {2721HandleInlinedLandingPad(II, &*FirstNewBlock, InlinedFunctionInfo);2722} else {2723HandleInlinedEHPad(II, &*FirstNewBlock, InlinedFunctionInfo);2724}2725}27262727// Update the lexical scopes of the new funclets and callsites.2728// Anything that had 'none' as its parent is now nested inside the callsite's2729// EHPad.2730if (CallSiteEHPad) {2731for (Function::iterator BB = FirstNewBlock->getIterator(),2732E = Caller->end();2733BB != E; ++BB) {2734// Add bundle operands to inlined call sites.2735PropagateOperandBundles(BB, CallSiteEHPad);27362737// It is problematic if the inlinee has a cleanupret which unwinds to2738// caller and we inline it into a call site which doesn't unwind but into2739// an EH pad that does. Such an edge must be dynamically unreachable.2740// As such, we replace the cleanupret with unreachable.2741if (auto *CleanupRet = dyn_cast<CleanupReturnInst>(BB->getTerminator()))2742if (CleanupRet->unwindsToCaller() && EHPadForCallUnwindsLocally)2743changeToUnreachable(CleanupRet);27442745Instruction *I = BB->getFirstNonPHI();2746if (!I->isEHPad())2747continue;27482749if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(I)) {2750if (isa<ConstantTokenNone>(CatchSwitch->getParentPad()))2751CatchSwitch->setParentPad(CallSiteEHPad);2752} else {2753auto *FPI = cast<FuncletPadInst>(I);2754if (isa<ConstantTokenNone>(FPI->getParentPad()))2755FPI->setParentPad(CallSiteEHPad);2756}2757}2758}27592760if (InlinedDeoptimizeCalls) {2761// We need to at least remove the deoptimizing returns from the Return set,2762// so that the control flow from those returns does not get merged into the2763// caller (but terminate it instead). If the caller's return type does not2764// match the callee's return type, we also need to change the return type of2765// the intrinsic.2766if (Caller->getReturnType() == CB.getType()) {2767llvm::erase_if(Returns, [](ReturnInst *RI) {2768return RI->getParent()->getTerminatingDeoptimizeCall() != nullptr;2769});2770} else {2771SmallVector<ReturnInst *, 8> NormalReturns;2772Function *NewDeoptIntrinsic = Intrinsic::getDeclaration(2773Caller->getParent(), Intrinsic::experimental_deoptimize,2774{Caller->getReturnType()});27752776for (ReturnInst *RI : Returns) {2777CallInst *DeoptCall = RI->getParent()->getTerminatingDeoptimizeCall();2778if (!DeoptCall) {2779NormalReturns.push_back(RI);2780continue;2781}27822783// The calling convention on the deoptimize call itself may be bogus,2784// since the code we're inlining may have undefined behavior (and may2785// never actually execute at runtime); but all2786// @llvm.experimental.deoptimize declarations have to have the same2787// calling convention in a well-formed module.2788auto CallingConv = DeoptCall->getCalledFunction()->getCallingConv();2789NewDeoptIntrinsic->setCallingConv(CallingConv);2790auto *CurBB = RI->getParent();2791RI->eraseFromParent();27922793SmallVector<Value *, 4> CallArgs(DeoptCall->args());27942795SmallVector<OperandBundleDef, 1> OpBundles;2796DeoptCall->getOperandBundlesAsDefs(OpBundles);2797auto DeoptAttributes = DeoptCall->getAttributes();2798DeoptCall->eraseFromParent();2799assert(!OpBundles.empty() &&2800"Expected at least the deopt operand bundle");28012802IRBuilder<> Builder(CurBB);2803CallInst *NewDeoptCall =2804Builder.CreateCall(NewDeoptIntrinsic, CallArgs, OpBundles);2805NewDeoptCall->setCallingConv(CallingConv);2806NewDeoptCall->setAttributes(DeoptAttributes);2807if (NewDeoptCall->getType()->isVoidTy())2808Builder.CreateRetVoid();2809else2810Builder.CreateRet(NewDeoptCall);2811// Since the ret type is changed, remove the incompatible attributes.2812NewDeoptCall->removeRetAttrs(2813AttributeFuncs::typeIncompatible(NewDeoptCall->getType()));2814}28152816// Leave behind the normal returns so we can merge control flow.2817std::swap(Returns, NormalReturns);2818}2819}28202821// Handle any inlined musttail call sites. In order for a new call site to be2822// musttail, the source of the clone and the inlined call site must have been2823// musttail. Therefore it's safe to return without merging control into the2824// phi below.2825if (InlinedMustTailCalls) {2826// Check if we need to bitcast the result of any musttail calls.2827Type *NewRetTy = Caller->getReturnType();2828bool NeedBitCast = !CB.use_empty() && CB.getType() != NewRetTy;28292830// Handle the returns preceded by musttail calls separately.2831SmallVector<ReturnInst *, 8> NormalReturns;2832for (ReturnInst *RI : Returns) {2833CallInst *ReturnedMustTail =2834RI->getParent()->getTerminatingMustTailCall();2835if (!ReturnedMustTail) {2836NormalReturns.push_back(RI);2837continue;2838}2839if (!NeedBitCast)2840continue;28412842// Delete the old return and any preceding bitcast.2843BasicBlock *CurBB = RI->getParent();2844auto *OldCast = dyn_cast_or_null<BitCastInst>(RI->getReturnValue());2845RI->eraseFromParent();2846if (OldCast)2847OldCast->eraseFromParent();28482849// Insert a new bitcast and return with the right type.2850IRBuilder<> Builder(CurBB);2851Builder.CreateRet(Builder.CreateBitCast(ReturnedMustTail, NewRetTy));2852}28532854// Leave behind the normal returns so we can merge control flow.2855std::swap(Returns, NormalReturns);2856}28572858// Now that all of the transforms on the inlined code have taken place but2859// before we splice the inlined code into the CFG and lose track of which2860// blocks were actually inlined, collect the call sites. We only do this if2861// call graph updates weren't requested, as those provide value handle based2862// tracking of inlined call sites instead. Calls to intrinsics are not2863// collected because they are not inlineable.2864if (InlinedFunctionInfo.ContainsCalls) {2865// Otherwise just collect the raw call sites that were inlined.2866for (BasicBlock &NewBB :2867make_range(FirstNewBlock->getIterator(), Caller->end()))2868for (Instruction &I : NewBB)2869if (auto *CB = dyn_cast<CallBase>(&I))2870if (!(CB->getCalledFunction() &&2871CB->getCalledFunction()->isIntrinsic()))2872IFI.InlinedCallSites.push_back(CB);2873}28742875// If we cloned in _exactly one_ basic block, and if that block ends in a2876// return instruction, we splice the body of the inlined callee directly into2877// the calling basic block.2878if (Returns.size() == 1 && std::distance(FirstNewBlock, Caller->end()) == 1) {2879// Move all of the instructions right before the call.2880OrigBB->splice(CB.getIterator(), &*FirstNewBlock, FirstNewBlock->begin(),2881FirstNewBlock->end());2882// Remove the cloned basic block.2883Caller->back().eraseFromParent();28842885// If the call site was an invoke instruction, add a branch to the normal2886// destination.2887if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {2888BranchInst *NewBr = BranchInst::Create(II->getNormalDest(), CB.getIterator());2889NewBr->setDebugLoc(Returns[0]->getDebugLoc());2890}28912892// If the return instruction returned a value, replace uses of the call with2893// uses of the returned value.2894if (!CB.use_empty()) {2895ReturnInst *R = Returns[0];2896if (&CB == R->getReturnValue())2897CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));2898else2899CB.replaceAllUsesWith(R->getReturnValue());2900}2901// Since we are now done with the Call/Invoke, we can delete it.2902CB.eraseFromParent();29032904// Since we are now done with the return instruction, delete it also.2905Returns[0]->eraseFromParent();29062907if (MergeAttributes)2908AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc);29092910// We are now done with the inlining.2911return InlineResult::success();2912}29132914// Otherwise, we have the normal case, of more than one block to inline or2915// multiple return sites.29162917// We want to clone the entire callee function into the hole between the2918// "starter" and "ender" blocks. How we accomplish this depends on whether2919// this is an invoke instruction or a call instruction.2920BasicBlock *AfterCallBB;2921BranchInst *CreatedBranchToNormalDest = nullptr;2922if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {29232924// Add an unconditional branch to make this look like the CallInst case...2925CreatedBranchToNormalDest = BranchInst::Create(II->getNormalDest(), CB.getIterator());29262927// Split the basic block. This guarantees that no PHI nodes will have to be2928// updated due to new incoming edges, and make the invoke case more2929// symmetric to the call case.2930AfterCallBB =2931OrigBB->splitBasicBlock(CreatedBranchToNormalDest->getIterator(),2932CalledFunc->getName() + ".exit");29332934} else { // It's a call2935// If this is a call instruction, we need to split the basic block that2936// the call lives in.2937//2938AfterCallBB = OrigBB->splitBasicBlock(CB.getIterator(),2939CalledFunc->getName() + ".exit");2940}29412942if (IFI.CallerBFI) {2943// Copy original BB's block frequency to AfterCallBB2944IFI.CallerBFI->setBlockFreq(AfterCallBB,2945IFI.CallerBFI->getBlockFreq(OrigBB));2946}29472948// Change the branch that used to go to AfterCallBB to branch to the first2949// basic block of the inlined function.2950//2951Instruction *Br = OrigBB->getTerminator();2952assert(Br && Br->getOpcode() == Instruction::Br &&2953"splitBasicBlock broken!");2954Br->setOperand(0, &*FirstNewBlock);29552956// Now that the function is correct, make it a little bit nicer. In2957// particular, move the basic blocks inserted from the end of the function2958// into the space made by splitting the source basic block.2959Caller->splice(AfterCallBB->getIterator(), Caller, FirstNewBlock,2960Caller->end());29612962// Handle all of the return instructions that we just cloned in, and eliminate2963// any users of the original call/invoke instruction.2964Type *RTy = CalledFunc->getReturnType();29652966PHINode *PHI = nullptr;2967if (Returns.size() > 1) {2968// The PHI node should go at the front of the new basic block to merge all2969// possible incoming values.2970if (!CB.use_empty()) {2971PHI = PHINode::Create(RTy, Returns.size(), CB.getName());2972PHI->insertBefore(AfterCallBB->begin());2973// Anything that used the result of the function call should now use the2974// PHI node as their operand.2975CB.replaceAllUsesWith(PHI);2976}29772978// Loop over all of the return instructions adding entries to the PHI node2979// as appropriate.2980if (PHI) {2981for (ReturnInst *RI : Returns) {2982assert(RI->getReturnValue()->getType() == PHI->getType() &&2983"Ret value not consistent in function!");2984PHI->addIncoming(RI->getReturnValue(), RI->getParent());2985}2986}29872988// Add a branch to the merge points and remove return instructions.2989DebugLoc Loc;2990for (ReturnInst *RI : Returns) {2991BranchInst *BI = BranchInst::Create(AfterCallBB, RI->getIterator());2992Loc = RI->getDebugLoc();2993BI->setDebugLoc(Loc);2994RI->eraseFromParent();2995}2996// We need to set the debug location to *somewhere* inside the2997// inlined function. The line number may be nonsensical, but the2998// instruction will at least be associated with the right2999// function.3000if (CreatedBranchToNormalDest)3001CreatedBranchToNormalDest->setDebugLoc(Loc);3002} else if (!Returns.empty()) {3003// Otherwise, if there is exactly one return value, just replace anything3004// using the return value of the call with the computed value.3005if (!CB.use_empty()) {3006if (&CB == Returns[0]->getReturnValue())3007CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));3008else3009CB.replaceAllUsesWith(Returns[0]->getReturnValue());3010}30113012// Update PHI nodes that use the ReturnBB to use the AfterCallBB.3013BasicBlock *ReturnBB = Returns[0]->getParent();3014ReturnBB->replaceAllUsesWith(AfterCallBB);30153016// Splice the code from the return block into the block that it will return3017// to, which contains the code that was after the call.3018AfterCallBB->splice(AfterCallBB->begin(), ReturnBB);30193020if (CreatedBranchToNormalDest)3021CreatedBranchToNormalDest->setDebugLoc(Returns[0]->getDebugLoc());30223023// Delete the return instruction now and empty ReturnBB now.3024Returns[0]->eraseFromParent();3025ReturnBB->eraseFromParent();3026} else if (!CB.use_empty()) {3027// No returns, but something is using the return value of the call. Just3028// nuke the result.3029CB.replaceAllUsesWith(PoisonValue::get(CB.getType()));3030}30313032// Since we are now done with the Call/Invoke, we can delete it.3033CB.eraseFromParent();30343035// If we inlined any musttail calls and the original return is now3036// unreachable, delete it. It can only contain a bitcast and ret.3037if (InlinedMustTailCalls && pred_empty(AfterCallBB))3038AfterCallBB->eraseFromParent();30393040// We should always be able to fold the entry block of the function into the3041// single predecessor of the block...3042assert(cast<BranchInst>(Br)->isUnconditional() && "splitBasicBlock broken!");3043BasicBlock *CalleeEntry = cast<BranchInst>(Br)->getSuccessor(0);30443045// Splice the code entry block into calling block, right before the3046// unconditional branch.3047CalleeEntry->replaceAllUsesWith(OrigBB); // Update PHI nodes3048OrigBB->splice(Br->getIterator(), CalleeEntry);30493050// Remove the unconditional branch.3051Br->eraseFromParent();30523053// Now we can remove the CalleeEntry block, which is now empty.3054CalleeEntry->eraseFromParent();30553056// If we inserted a phi node, check to see if it has a single value (e.g. all3057// the entries are the same or undef). If so, remove the PHI so it doesn't3058// block other optimizations.3059if (PHI) {3060AssumptionCache *AC =3061IFI.GetAssumptionCache ? &IFI.GetAssumptionCache(*Caller) : nullptr;3062auto &DL = Caller->getDataLayout();3063if (Value *V = simplifyInstruction(PHI, {DL, nullptr, nullptr, AC})) {3064PHI->replaceAllUsesWith(V);3065PHI->eraseFromParent();3066}3067}30683069if (MergeAttributes)3070AttributeFuncs::mergeAttributesForInlining(*Caller, *CalledFunc);30713072return InlineResult::success();3073}307430753076