Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
35269 views
//===- ArgumentPromotion.cpp - Promote by-reference arguments -------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This pass promotes "by reference" arguments to be "by value" arguments. In9// practice, this means looking for internal functions that have pointer10// arguments. If it can prove, through the use of alias analysis, that an11// argument is *only* loaded, then it can pass the value into the function12// instead of the address of the value. This can cause recursive simplification13// of code and lead to the elimination of allocas (especially in C++ template14// code like the STL).15//16// This pass also handles aggregate arguments that are passed into a function,17// scalarizing them if the elements of the aggregate are only loaded. Note that18// by default it refuses to scalarize aggregates which would require passing in19// more than three operands to the function, because passing thousands of20// operands for a large array or structure is unprofitable! This limit can be21// configured or disabled, however.22//23// Note that this transformation could also be done for arguments that are only24// stored to (returning the value instead), but does not currently. This case25// would be best handled when and if LLVM begins supporting multiple return26// values from functions.27//28//===----------------------------------------------------------------------===//2930#include "llvm/Transforms/IPO/ArgumentPromotion.h"3132#include "llvm/ADT/DepthFirstIterator.h"33#include "llvm/ADT/STLExtras.h"34#include "llvm/ADT/ScopeExit.h"35#include "llvm/ADT/SmallPtrSet.h"36#include "llvm/ADT/SmallVector.h"37#include "llvm/ADT/Statistic.h"38#include "llvm/ADT/Twine.h"39#include "llvm/Analysis/AssumptionCache.h"40#include "llvm/Analysis/BasicAliasAnalysis.h"41#include "llvm/Analysis/CallGraph.h"42#include "llvm/Analysis/Loads.h"43#include "llvm/Analysis/MemoryLocation.h"44#include "llvm/Analysis/TargetTransformInfo.h"45#include "llvm/Analysis/ValueTracking.h"46#include "llvm/IR/Argument.h"47#include "llvm/IR/Attributes.h"48#include "llvm/IR/BasicBlock.h"49#include "llvm/IR/CFG.h"50#include "llvm/IR/Constants.h"51#include "llvm/IR/DataLayout.h"52#include "llvm/IR/DerivedTypes.h"53#include "llvm/IR/Dominators.h"54#include "llvm/IR/Function.h"55#include "llvm/IR/IRBuilder.h"56#include "llvm/IR/InstrTypes.h"57#include "llvm/IR/Instruction.h"58#include "llvm/IR/Instructions.h"59#include "llvm/IR/Metadata.h"60#include "llvm/IR/Module.h"61#include "llvm/IR/NoFolder.h"62#include "llvm/IR/PassManager.h"63#include "llvm/IR/Type.h"64#include "llvm/IR/Use.h"65#include "llvm/IR/User.h"66#include "llvm/IR/Value.h"67#include "llvm/Support/Casting.h"68#include "llvm/Support/Debug.h"69#include "llvm/Support/raw_ostream.h"70#include "llvm/Transforms/Utils/Local.h"71#include "llvm/Transforms/Utils/PromoteMemToReg.h"72#include <algorithm>73#include <cassert>74#include <cstdint>75#include <utility>76#include <vector>7778using namespace llvm;7980#define DEBUG_TYPE "argpromotion"8182STATISTIC(NumArgumentsPromoted, "Number of pointer arguments promoted");83STATISTIC(NumArgumentsDead, "Number of dead pointer args eliminated");8485namespace {8687struct ArgPart {88Type *Ty;89Align Alignment;90/// A representative guaranteed-executed load or store instruction for use by91/// metadata transfer.92Instruction *MustExecInstr;93};9495using OffsetAndArgPart = std::pair<int64_t, ArgPart>;9697} // end anonymous namespace9899static Value *createByteGEP(IRBuilderBase &IRB, const DataLayout &DL,100Value *Ptr, Type *ResElemTy, int64_t Offset) {101if (Offset != 0) {102APInt APOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), Offset);103Ptr = IRB.CreatePtrAdd(Ptr, IRB.getInt(APOffset));104}105return Ptr;106}107108/// DoPromotion - This method actually performs the promotion of the specified109/// arguments, and returns the new function. At this point, we know that it's110/// safe to do so.111static Function *112doPromotion(Function *F, FunctionAnalysisManager &FAM,113const DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>>114&ArgsToPromote) {115// Start by computing a new prototype for the function, which is the same as116// the old function, but has modified arguments.117FunctionType *FTy = F->getFunctionType();118std::vector<Type *> Params;119120// Attribute - Keep track of the parameter attributes for the arguments121// that we are *not* promoting. For the ones that we do promote, the parameter122// attributes are lost123SmallVector<AttributeSet, 8> ArgAttrVec;124// Mapping from old to new argument indices. -1 for promoted or removed125// arguments.126SmallVector<unsigned> NewArgIndices;127AttributeList PAL = F->getAttributes();128129// First, determine the new argument list130unsigned ArgNo = 0, NewArgNo = 0;131for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;132++I, ++ArgNo) {133if (!ArgsToPromote.count(&*I)) {134// Unchanged argument135Params.push_back(I->getType());136ArgAttrVec.push_back(PAL.getParamAttrs(ArgNo));137NewArgIndices.push_back(NewArgNo++);138} else if (I->use_empty()) {139// Dead argument (which are always marked as promotable)140++NumArgumentsDead;141NewArgIndices.push_back((unsigned)-1);142} else {143const auto &ArgParts = ArgsToPromote.find(&*I)->second;144for (const auto &Pair : ArgParts) {145Params.push_back(Pair.second.Ty);146ArgAttrVec.push_back(AttributeSet());147}148++NumArgumentsPromoted;149NewArgIndices.push_back((unsigned)-1);150NewArgNo += ArgParts.size();151}152}153154Type *RetTy = FTy->getReturnType();155156// Construct the new function type using the new arguments.157FunctionType *NFTy = FunctionType::get(RetTy, Params, FTy->isVarArg());158159// Create the new function body and insert it into the module.160Function *NF = Function::Create(NFTy, F->getLinkage(), F->getAddressSpace(),161F->getName());162NF->copyAttributesFrom(F);163NF->copyMetadata(F, 0);164NF->setIsNewDbgInfoFormat(F->IsNewDbgInfoFormat);165166// The new function will have the !dbg metadata copied from the original167// function. The original function may not be deleted, and dbg metadata need168// to be unique, so we need to drop it.169F->setSubprogram(nullptr);170171LLVM_DEBUG(dbgs() << "ARG PROMOTION: Promoting to:" << *NF << "\n"172<< "From: " << *F);173174uint64_t LargestVectorWidth = 0;175for (auto *I : Params)176if (auto *VT = dyn_cast<llvm::VectorType>(I))177LargestVectorWidth = std::max(178LargestVectorWidth, VT->getPrimitiveSizeInBits().getKnownMinValue());179180// Recompute the parameter attributes list based on the new arguments for181// the function.182NF->setAttributes(AttributeList::get(F->getContext(), PAL.getFnAttrs(),183PAL.getRetAttrs(), ArgAttrVec));184185// Remap argument indices in allocsize attribute.186if (auto AllocSize = NF->getAttributes().getFnAttrs().getAllocSizeArgs()) {187unsigned Arg1 = NewArgIndices[AllocSize->first];188assert(Arg1 != (unsigned)-1 && "allocsize cannot be promoted argument");189std::optional<unsigned> Arg2;190if (AllocSize->second) {191Arg2 = NewArgIndices[*AllocSize->second];192assert(Arg2 != (unsigned)-1 && "allocsize cannot be promoted argument");193}194NF->addFnAttr(Attribute::getWithAllocSizeArgs(F->getContext(), Arg1, Arg2));195}196197AttributeFuncs::updateMinLegalVectorWidthAttr(*NF, LargestVectorWidth);198ArgAttrVec.clear();199200F->getParent()->getFunctionList().insert(F->getIterator(), NF);201NF->takeName(F);202203// Loop over all the callers of the function, transforming the call sites to204// pass in the loaded pointers.205SmallVector<Value *, 16> Args;206const DataLayout &DL = F->getDataLayout();207SmallVector<WeakTrackingVH, 16> DeadArgs;208209while (!F->use_empty()) {210CallBase &CB = cast<CallBase>(*F->user_back());211assert(CB.getCalledFunction() == F);212const AttributeList &CallPAL = CB.getAttributes();213IRBuilder<NoFolder> IRB(&CB);214215// Loop over the operands, inserting GEP and loads in the caller as216// appropriate.217auto *AI = CB.arg_begin();218ArgNo = 0;219for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;220++I, ++AI, ++ArgNo) {221if (!ArgsToPromote.count(&*I)) {222Args.push_back(*AI); // Unmodified argument223ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));224} else if (!I->use_empty()) {225Value *V = *AI;226const auto &ArgParts = ArgsToPromote.find(&*I)->second;227for (const auto &Pair : ArgParts) {228LoadInst *LI = IRB.CreateAlignedLoad(229Pair.second.Ty,230createByteGEP(IRB, DL, V, Pair.second.Ty, Pair.first),231Pair.second.Alignment, V->getName() + ".val");232if (Pair.second.MustExecInstr) {233LI->setAAMetadata(Pair.second.MustExecInstr->getAAMetadata());234LI->copyMetadata(*Pair.second.MustExecInstr,235{LLVMContext::MD_dereferenceable,236LLVMContext::MD_dereferenceable_or_null,237LLVMContext::MD_noundef,238LLVMContext::MD_nontemporal});239// Only transfer poison-generating metadata if we also have240// !noundef.241// TODO: Without !noundef, we could merge this metadata across242// all promoted loads.243if (LI->hasMetadata(LLVMContext::MD_noundef))244LI->copyMetadata(*Pair.second.MustExecInstr,245{LLVMContext::MD_range, LLVMContext::MD_nonnull,246LLVMContext::MD_align});247}248Args.push_back(LI);249ArgAttrVec.push_back(AttributeSet());250}251} else {252assert(ArgsToPromote.count(&*I) && I->use_empty());253DeadArgs.emplace_back(AI->get());254}255}256257// Push any varargs arguments on the list.258for (; AI != CB.arg_end(); ++AI, ++ArgNo) {259Args.push_back(*AI);260ArgAttrVec.push_back(CallPAL.getParamAttrs(ArgNo));261}262263SmallVector<OperandBundleDef, 1> OpBundles;264CB.getOperandBundlesAsDefs(OpBundles);265266CallBase *NewCS = nullptr;267if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {268NewCS = InvokeInst::Create(NF, II->getNormalDest(), II->getUnwindDest(),269Args, OpBundles, "", CB.getIterator());270} else {271auto *NewCall =272CallInst::Create(NF, Args, OpBundles, "", CB.getIterator());273NewCall->setTailCallKind(cast<CallInst>(&CB)->getTailCallKind());274NewCS = NewCall;275}276NewCS->setCallingConv(CB.getCallingConv());277NewCS->setAttributes(AttributeList::get(F->getContext(),278CallPAL.getFnAttrs(),279CallPAL.getRetAttrs(), ArgAttrVec));280NewCS->copyMetadata(CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg});281Args.clear();282ArgAttrVec.clear();283284AttributeFuncs::updateMinLegalVectorWidthAttr(*CB.getCaller(),285LargestVectorWidth);286287if (!CB.use_empty()) {288CB.replaceAllUsesWith(NewCS);289NewCS->takeName(&CB);290}291292// Finally, remove the old call from the program, reducing the use-count of293// F.294CB.eraseFromParent();295}296297RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadArgs);298299// Since we have now created the new function, splice the body of the old300// function right into the new function, leaving the old rotting hulk of the301// function empty.302NF->splice(NF->begin(), F);303304// We will collect all the new created allocas to promote them into registers305// after the following loop306SmallVector<AllocaInst *, 4> Allocas;307308// Loop over the argument list, transferring uses of the old arguments over to309// the new arguments, also transferring over the names as well.310Function::arg_iterator I2 = NF->arg_begin();311for (Argument &Arg : F->args()) {312if (!ArgsToPromote.count(&Arg)) {313// If this is an unmodified argument, move the name and users over to the314// new version.315Arg.replaceAllUsesWith(&*I2);316I2->takeName(&Arg);317++I2;318continue;319}320321// There potentially are metadata uses for things like llvm.dbg.value.322// Replace them with undef, after handling the other regular uses.323auto RauwUndefMetadata = make_scope_exit(324[&]() { Arg.replaceAllUsesWith(UndefValue::get(Arg.getType())); });325326if (Arg.use_empty())327continue;328329// Otherwise, if we promoted this argument, we have to create an alloca in330// the callee for every promotable part and store each of the new incoming331// arguments into the corresponding alloca, what lets the old code (the332// store instructions if they are allowed especially) a chance to work as333// before.334assert(Arg.getType()->isPointerTy() &&335"Only arguments with a pointer type are promotable");336337IRBuilder<NoFolder> IRB(&NF->begin()->front());338339// Add only the promoted elements, so parts from ArgsToPromote340SmallDenseMap<int64_t, AllocaInst *> OffsetToAlloca;341for (const auto &Pair : ArgsToPromote.find(&Arg)->second) {342int64_t Offset = Pair.first;343const ArgPart &Part = Pair.second;344345Argument *NewArg = I2++;346NewArg->setName(Arg.getName() + "." + Twine(Offset) + ".val");347348AllocaInst *NewAlloca = IRB.CreateAlloca(349Part.Ty, nullptr, Arg.getName() + "." + Twine(Offset) + ".allc");350NewAlloca->setAlignment(Pair.second.Alignment);351IRB.CreateAlignedStore(NewArg, NewAlloca, Pair.second.Alignment);352353// Collect the alloca to retarget the users to354OffsetToAlloca.insert({Offset, NewAlloca});355}356357auto GetAlloca = [&](Value *Ptr) {358APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);359Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,360/* AllowNonInbounds */ true);361assert(Ptr == &Arg && "Not constant offset from arg?");362return OffsetToAlloca.lookup(Offset.getSExtValue());363};364365// Cleanup the code from the dead instructions: GEPs and BitCasts in between366// the original argument and its users: loads and stores. Retarget every367// user to the new created alloca.368SmallVector<Value *, 16> Worklist;369SmallVector<Instruction *, 16> DeadInsts;370append_range(Worklist, Arg.users());371while (!Worklist.empty()) {372Value *V = Worklist.pop_back_val();373if (isa<BitCastInst>(V) || isa<GetElementPtrInst>(V)) {374DeadInsts.push_back(cast<Instruction>(V));375append_range(Worklist, V->users());376continue;377}378379if (auto *LI = dyn_cast<LoadInst>(V)) {380Value *Ptr = LI->getPointerOperand();381LI->setOperand(LoadInst::getPointerOperandIndex(), GetAlloca(Ptr));382continue;383}384385if (auto *SI = dyn_cast<StoreInst>(V)) {386assert(!SI->isVolatile() && "Volatile operations can't be promoted.");387Value *Ptr = SI->getPointerOperand();388SI->setOperand(StoreInst::getPointerOperandIndex(), GetAlloca(Ptr));389continue;390}391392llvm_unreachable("Unexpected user");393}394395for (Instruction *I : DeadInsts) {396I->replaceAllUsesWith(PoisonValue::get(I->getType()));397I->eraseFromParent();398}399400// Collect the allocas for promotion401for (const auto &Pair : OffsetToAlloca) {402assert(isAllocaPromotable(Pair.second) &&403"By design, only promotable allocas should be produced.");404Allocas.push_back(Pair.second);405}406}407408LLVM_DEBUG(dbgs() << "ARG PROMOTION: " << Allocas.size()409<< " alloca(s) are promotable by Mem2Reg\n");410411if (!Allocas.empty()) {412// And we are able to call the `promoteMemoryToRegister()` function.413// Our earlier checks have ensured that PromoteMemToReg() will414// succeed.415auto &DT = FAM.getResult<DominatorTreeAnalysis>(*NF);416auto &AC = FAM.getResult<AssumptionAnalysis>(*NF);417PromoteMemToReg(Allocas, DT, &AC);418}419420return NF;421}422423/// Return true if we can prove that all callees pass in a valid pointer for the424/// specified function argument.425static bool allCallersPassValidPointerForArgument(426Argument *Arg, SmallPtrSetImpl<CallBase *> &RecursiveCalls,427Align NeededAlign, uint64_t NeededDerefBytes) {428Function *Callee = Arg->getParent();429const DataLayout &DL = Callee->getDataLayout();430APInt Bytes(64, NeededDerefBytes);431432// Check if the argument itself is marked dereferenceable and aligned.433if (isDereferenceableAndAlignedPointer(Arg, NeededAlign, Bytes, DL))434return true;435436// Look at all call sites of the function. At this point we know we only have437// direct callees.438return all_of(Callee->users(), [&](User *U) {439CallBase &CB = cast<CallBase>(*U);440// In case of functions with recursive calls, this check441// (isDereferenceableAndAlignedPointer) will fail when it tries to look at442// the first caller of this function. The caller may or may not have a load,443// incase it doesn't load the pointer being passed, this check will fail.444// So, it's safe to skip the check incase we know that we are dealing with a445// recursive call. For example we have a IR given below.446//447// def fun(ptr %a) {448// ...449// %loadres = load i32, ptr %a, align 4450// %res = call i32 @fun(ptr %a)451// ...452// }453//454// def bar(ptr %x) {455// ...456// %resbar = call i32 @fun(ptr %x)457// ...458// }459//460// Since we record processed recursive calls, we check if the current461// CallBase has been processed before. If yes it means that it is a462// recursive call and we can skip the check just for this call. So, just463// return true.464if (RecursiveCalls.contains(&CB))465return true;466467return isDereferenceableAndAlignedPointer(CB.getArgOperand(Arg->getArgNo()),468NeededAlign, Bytes, DL);469});470}471472/// Determine that this argument is safe to promote, and find the argument473/// parts it can be promoted into.474static bool findArgParts(Argument *Arg, const DataLayout &DL, AAResults &AAR,475unsigned MaxElements, bool IsRecursive,476SmallVectorImpl<OffsetAndArgPart> &ArgPartsVec) {477// Quick exit for unused arguments478if (Arg->use_empty())479return true;480481// We can only promote this argument if all the uses are loads at known482// offsets.483//484// Promoting the argument causes it to be loaded in the caller485// unconditionally. This is only safe if we can prove that either the load486// would have happened in the callee anyway (ie, there is a load in the entry487// block) or the pointer passed in at every call site is guaranteed to be488// valid.489// In the former case, invalid loads can happen, but would have happened490// anyway, in the latter case, invalid loads won't happen. This prevents us491// from introducing an invalid load that wouldn't have happened in the492// original code.493494SmallDenseMap<int64_t, ArgPart, 4> ArgParts;495Align NeededAlign(1);496uint64_t NeededDerefBytes = 0;497498// And if this is a byval argument we also allow to have store instructions.499// Only handle in such way arguments with specified alignment;500// if it's unspecified, the actual alignment of the argument is501// target-specific.502bool AreStoresAllowed = Arg->getParamByValType() && Arg->getParamAlign();503504// An end user of a pointer argument is a load or store instruction.505// Returns std::nullopt if this load or store is not based on the argument.506// Return true if we can promote the instruction, false otherwise.507auto HandleEndUser = [&](auto *I, Type *Ty,508bool GuaranteedToExecute) -> std::optional<bool> {509// Don't promote volatile or atomic instructions.510if (!I->isSimple())511return false;512513Value *Ptr = I->getPointerOperand();514APInt Offset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);515Ptr = Ptr->stripAndAccumulateConstantOffsets(DL, Offset,516/* AllowNonInbounds */ true);517if (Ptr != Arg)518return std::nullopt;519520if (Offset.getSignificantBits() >= 64)521return false;522523TypeSize Size = DL.getTypeStoreSize(Ty);524// Don't try to promote scalable types.525if (Size.isScalable())526return false;527528// If this is a recursive function and one of the types is a pointer,529// then promoting it might lead to recursive promotion.530if (IsRecursive && Ty->isPointerTy())531return false;532533int64_t Off = Offset.getSExtValue();534auto Pair = ArgParts.try_emplace(535Off, ArgPart{Ty, I->getAlign(), GuaranteedToExecute ? I : nullptr});536ArgPart &Part = Pair.first->second;537bool OffsetNotSeenBefore = Pair.second;538539// We limit promotion to only promoting up to a fixed number of elements of540// the aggregate.541if (MaxElements > 0 && ArgParts.size() > MaxElements) {542LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "543<< "more than " << MaxElements << " parts\n");544return false;545}546547// For now, we only support loading/storing one specific type at a given548// offset.549if (Part.Ty != Ty) {550LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "551<< "accessed as both " << *Part.Ty << " and " << *Ty552<< " at offset " << Off << "\n");553return false;554}555556// If this instruction is not guaranteed to execute, and we haven't seen a557// load or store at this offset before (or it had lower alignment), then we558// need to remember that requirement.559// Note that skipping instructions of previously seen offsets is only560// correct because we only allow a single type for a given offset, which561// also means that the number of accessed bytes will be the same.562if (!GuaranteedToExecute &&563(OffsetNotSeenBefore || Part.Alignment < I->getAlign())) {564// We won't be able to prove dereferenceability for negative offsets.565if (Off < 0)566return false;567568// If the offset is not aligned, an aligned base pointer won't help.569if (!isAligned(I->getAlign(), Off))570return false;571572NeededDerefBytes = std::max(NeededDerefBytes, Off + Size.getFixedValue());573NeededAlign = std::max(NeededAlign, I->getAlign());574}575576Part.Alignment = std::max(Part.Alignment, I->getAlign());577return true;578};579580// Look for loads and stores that are guaranteed to execute on entry.581for (Instruction &I : Arg->getParent()->getEntryBlock()) {582std::optional<bool> Res{};583if (LoadInst *LI = dyn_cast<LoadInst>(&I))584Res = HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ true);585else if (StoreInst *SI = dyn_cast<StoreInst>(&I))586Res = HandleEndUser(SI, SI->getValueOperand()->getType(),587/* GuaranteedToExecute */ true);588if (Res && !*Res)589return false;590591if (!isGuaranteedToTransferExecutionToSuccessor(&I))592break;593}594595// Now look at all loads of the argument. Remember the load instructions596// for the aliasing check below.597SmallVector<const Use *, 16> Worklist;598SmallPtrSet<const Use *, 16> Visited;599SmallVector<LoadInst *, 16> Loads;600SmallPtrSet<CallBase *, 4> RecursiveCalls;601auto AppendUses = [&](const Value *V) {602for (const Use &U : V->uses())603if (Visited.insert(&U).second)604Worklist.push_back(&U);605};606AppendUses(Arg);607while (!Worklist.empty()) {608const Use *U = Worklist.pop_back_val();609Value *V = U->getUser();610if (isa<BitCastInst>(V)) {611AppendUses(V);612continue;613}614615if (auto *GEP = dyn_cast<GetElementPtrInst>(V)) {616if (!GEP->hasAllConstantIndices())617return false;618AppendUses(V);619continue;620}621622if (auto *LI = dyn_cast<LoadInst>(V)) {623if (!*HandleEndUser(LI, LI->getType(), /* GuaranteedToExecute */ false))624return false;625Loads.push_back(LI);626continue;627}628629// Stores are allowed for byval arguments630auto *SI = dyn_cast<StoreInst>(V);631if (AreStoresAllowed && SI &&632U->getOperandNo() == StoreInst::getPointerOperandIndex()) {633if (!*HandleEndUser(SI, SI->getValueOperand()->getType(),634/* GuaranteedToExecute */ false))635return false;636continue;637// Only stores TO the argument is allowed, all the other stores are638// unknown users639}640641auto *CB = dyn_cast<CallBase>(V);642Value *PtrArg = U->get();643if (CB && CB->getCalledFunction() == CB->getFunction()) {644if (PtrArg != Arg) {645LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "646<< "pointer offset is not equal to zero\n");647return false;648}649650unsigned int ArgNo = Arg->getArgNo();651if (U->getOperandNo() != ArgNo) {652LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "653<< "arg position is different in callee\n");654return false;655}656657// We limit promotion to only promoting up to a fixed number of elements658// of the aggregate.659if (MaxElements > 0 && ArgParts.size() > MaxElements) {660LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "661<< "more than " << MaxElements << " parts\n");662return false;663}664665RecursiveCalls.insert(CB);666continue;667}668// Unknown user.669LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "670<< "unknown user " << *V << "\n");671return false;672}673674if (NeededDerefBytes || NeededAlign > 1) {675// Try to prove a required deref / aligned requirement.676if (!allCallersPassValidPointerForArgument(Arg, RecursiveCalls, NeededAlign,677NeededDerefBytes)) {678LLVM_DEBUG(dbgs() << "ArgPromotion of " << *Arg << " failed: "679<< "not dereferenceable or aligned\n");680return false;681}682}683684if (ArgParts.empty())685return true; // No users, this is a dead argument.686687// Sort parts by offset.688append_range(ArgPartsVec, ArgParts);689sort(ArgPartsVec, llvm::less_first());690691// Make sure the parts are non-overlapping.692int64_t Offset = ArgPartsVec[0].first;693for (const auto &Pair : ArgPartsVec) {694if (Pair.first < Offset)695return false; // Overlap with previous part.696697Offset = Pair.first + DL.getTypeStoreSize(Pair.second.Ty);698}699700// If store instructions are allowed, the path from the entry of the function701// to each load may be not free of instructions that potentially invalidate702// the load, and this is an admissible situation.703if (AreStoresAllowed)704return true;705706// Okay, now we know that the argument is only used by load instructions, and707// it is safe to unconditionally perform all of them. Use alias analysis to708// check to see if the pointer is guaranteed to not be modified from entry of709// the function to each of the load instructions.710711for (LoadInst *Load : Loads) {712// Check to see if the load is invalidated from the start of the block to713// the load itself.714BasicBlock *BB = Load->getParent();715716MemoryLocation Loc = MemoryLocation::get(Load);717if (AAR.canInstructionRangeModRef(BB->front(), *Load, Loc, ModRefInfo::Mod))718return false; // Pointer is invalidated!719720// Now check every path from the entry block to the load for transparency.721// To do this, we perform a depth first search on the inverse CFG from the722// loading block.723for (BasicBlock *P : predecessors(BB)) {724for (BasicBlock *TranspBB : inverse_depth_first(P))725if (AAR.canBasicBlockModify(*TranspBB, Loc))726return false;727}728}729730// If the path from the entry of the function to each load is free of731// instructions that potentially invalidate the load, we can make the732// transformation!733return true;734}735736/// Check if callers and callee agree on how promoted arguments would be737/// passed.738static bool areTypesABICompatible(ArrayRef<Type *> Types, const Function &F,739const TargetTransformInfo &TTI) {740return all_of(F.uses(), [&](const Use &U) {741CallBase *CB = dyn_cast<CallBase>(U.getUser());742if (!CB)743return false;744745const Function *Caller = CB->getCaller();746const Function *Callee = CB->getCalledFunction();747return TTI.areTypesABICompatible(Caller, Callee, Types);748});749}750751/// PromoteArguments - This method checks the specified function to see if there752/// are any promotable arguments and if it is safe to promote the function (for753/// example, all callers are direct). If safe to promote some arguments, it754/// calls the DoPromotion method.755static Function *promoteArguments(Function *F, FunctionAnalysisManager &FAM,756unsigned MaxElements, bool IsRecursive) {757// Don't perform argument promotion for naked functions; otherwise we can end758// up removing parameters that are seemingly 'not used' as they are referred759// to in the assembly.760if (F->hasFnAttribute(Attribute::Naked))761return nullptr;762763// Make sure that it is local to this module.764if (!F->hasLocalLinkage())765return nullptr;766767// Don't promote arguments for variadic functions. Adding, removing, or768// changing non-pack parameters can change the classification of pack769// parameters. Frontends encode that classification at the call site in the770// IR, while in the callee the classification is determined dynamically based771// on the number of registers consumed so far.772if (F->isVarArg())773return nullptr;774775// Don't transform functions that receive inallocas, as the transformation may776// not be safe depending on calling convention.777if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca))778return nullptr;779780// First check: see if there are any pointer arguments! If not, quick exit.781SmallVector<Argument *, 16> PointerArgs;782for (Argument &I : F->args())783if (I.getType()->isPointerTy())784PointerArgs.push_back(&I);785if (PointerArgs.empty())786return nullptr;787788// Second check: make sure that all callers are direct callers. We can't789// transform functions that have indirect callers. Also see if the function790// is self-recursive.791for (Use &U : F->uses()) {792CallBase *CB = dyn_cast<CallBase>(U.getUser());793// Must be a direct call.794if (CB == nullptr || !CB->isCallee(&U) ||795CB->getFunctionType() != F->getFunctionType())796return nullptr;797798// Can't change signature of musttail callee799if (CB->isMustTailCall())800return nullptr;801802if (CB->getFunction() == F)803IsRecursive = true;804}805806// Can't change signature of musttail caller807// FIXME: Support promoting whole chain of musttail functions808for (BasicBlock &BB : *F)809if (BB.getTerminatingMustTailCall())810return nullptr;811812const DataLayout &DL = F->getDataLayout();813auto &AAR = FAM.getResult<AAManager>(*F);814const auto &TTI = FAM.getResult<TargetIRAnalysis>(*F);815816// Check to see which arguments are promotable. If an argument is promotable,817// add it to ArgsToPromote.818DenseMap<Argument *, SmallVector<OffsetAndArgPart, 4>> ArgsToPromote;819unsigned NumArgsAfterPromote = F->getFunctionType()->getNumParams();820for (Argument *PtrArg : PointerArgs) {821// Replace sret attribute with noalias. This reduces register pressure by822// avoiding a register copy.823if (PtrArg->hasStructRetAttr()) {824unsigned ArgNo = PtrArg->getArgNo();825F->removeParamAttr(ArgNo, Attribute::StructRet);826F->addParamAttr(ArgNo, Attribute::NoAlias);827for (Use &U : F->uses()) {828CallBase &CB = cast<CallBase>(*U.getUser());829CB.removeParamAttr(ArgNo, Attribute::StructRet);830CB.addParamAttr(ArgNo, Attribute::NoAlias);831}832}833834// If we can promote the pointer to its value.835SmallVector<OffsetAndArgPart, 4> ArgParts;836837if (findArgParts(PtrArg, DL, AAR, MaxElements, IsRecursive, ArgParts)) {838SmallVector<Type *, 4> Types;839for (const auto &Pair : ArgParts)840Types.push_back(Pair.second.Ty);841842if (areTypesABICompatible(Types, *F, TTI)) {843NumArgsAfterPromote += ArgParts.size() - 1;844ArgsToPromote.insert({PtrArg, std::move(ArgParts)});845}846}847}848849// No promotable pointer arguments.850if (ArgsToPromote.empty())851return nullptr;852853if (NumArgsAfterPromote > TTI.getMaxNumArgs())854return nullptr;855856return doPromotion(F, FAM, ArgsToPromote);857}858859PreservedAnalyses ArgumentPromotionPass::run(LazyCallGraph::SCC &C,860CGSCCAnalysisManager &AM,861LazyCallGraph &CG,862CGSCCUpdateResult &UR) {863bool Changed = false, LocalChange;864865// Iterate until we stop promoting from this SCC.866do {867LocalChange = false;868869FunctionAnalysisManager &FAM =870AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();871872bool IsRecursive = C.size() > 1;873for (LazyCallGraph::Node &N : C) {874Function &OldF = N.getFunction();875Function *NewF = promoteArguments(&OldF, FAM, MaxElements, IsRecursive);876if (!NewF)877continue;878LocalChange = true;879880// Directly substitute the functions in the call graph. Note that this881// requires the old function to be completely dead and completely882// replaced by the new function. It does no call graph updates, it merely883// swaps out the particular function mapped to a particular node in the884// graph.885C.getOuterRefSCC().replaceNodeFunction(N, *NewF);886FAM.clear(OldF, OldF.getName());887OldF.eraseFromParent();888889PreservedAnalyses FuncPA;890FuncPA.preserveSet<CFGAnalyses>();891for (auto *U : NewF->users()) {892auto *UserF = cast<CallBase>(U)->getFunction();893FAM.invalidate(*UserF, FuncPA);894}895}896897Changed |= LocalChange;898} while (LocalChange);899900if (!Changed)901return PreservedAnalyses::all();902903PreservedAnalyses PA;904// We've cleared out analyses for deleted functions.905PA.preserve<FunctionAnalysisManagerCGSCCProxy>();906// We've manually invalidated analyses for functions we've modified.907PA.preserveSet<AllAnalysesOn<Function>>();908return PA;909}910911912