Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfUse.cpp
213799 views
//===- MemProfUse.cpp - memory allocation profile use pass --*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the MemProfUsePass which reads memory profiling data9// and uses it to add metadata to instructions to guide optimization.10//11//===----------------------------------------------------------------------===//1213#include "llvm/Transforms/Instrumentation/MemProfUse.h"14#include "llvm/ADT/SmallVector.h"15#include "llvm/ADT/Statistic.h"16#include "llvm/ADT/StringRef.h"17#include "llvm/Analysis/MemoryProfileInfo.h"18#include "llvm/Analysis/OptimizationRemarkEmitter.h"19#include "llvm/Analysis/TargetLibraryInfo.h"20#include "llvm/IR/DiagnosticInfo.h"21#include "llvm/IR/Function.h"22#include "llvm/IR/IntrinsicInst.h"23#include "llvm/IR/Module.h"24#include "llvm/ProfileData/InstrProf.h"25#include "llvm/ProfileData/InstrProfReader.h"26#include "llvm/ProfileData/MemProfCommon.h"27#include "llvm/Support/BLAKE3.h"28#include "llvm/Support/CommandLine.h"29#include "llvm/Support/Debug.h"30#include "llvm/Support/HashBuilder.h"31#include "llvm/Support/VirtualFileSystem.h"32#include "llvm/Transforms/Utils/LongestCommonSequence.h"33#include <map>34#include <set>3536using namespace llvm;37using namespace llvm::memprof;3839#define DEBUG_TYPE "memprof"4041namespace llvm {42extern cl::opt<bool> PGOWarnMissing;43extern cl::opt<bool> NoPGOWarnMismatch;44extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;45} // namespace llvm4647// By default disable matching of allocation profiles onto operator new that48// already explicitly pass a hot/cold hint, since we don't currently49// override these hints anyway.50static cl::opt<bool> ClMemProfMatchHotColdNew(51"memprof-match-hot-cold-new",52cl::desc(53"Match allocation profiles onto existing hot/cold operator new calls"),54cl::Hidden, cl::init(false));5556static cl::opt<bool>57ClPrintMemProfMatchInfo("memprof-print-match-info",58cl::desc("Print matching stats for each allocation "59"context in this module's profiles"),60cl::Hidden, cl::init(false));6162static cl::opt<bool>63SalvageStaleProfile("memprof-salvage-stale-profile",64cl::desc("Salvage stale MemProf profile"),65cl::init(false), cl::Hidden);6667static cl::opt<bool> ClMemProfAttachCalleeGuids(68"memprof-attach-calleeguids",69cl::desc(70"Attach calleeguids as value profile metadata for indirect calls."),71cl::init(true), cl::Hidden);7273static cl::opt<unsigned> MinMatchedColdBytePercent(74"memprof-matching-cold-threshold", cl::init(100), cl::Hidden,75cl::desc("Min percent of cold bytes matched to hint allocation cold"));7677// Matching statistics78STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");79STATISTIC(NumOfMemProfMismatch,80"Number of functions having mismatched memory profile hash.");81STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");82STATISTIC(NumOfMemProfAllocContextProfiles,83"Number of alloc contexts in memory profile.");84STATISTIC(NumOfMemProfCallSiteProfiles,85"Number of callsites in memory profile.");86STATISTIC(NumOfMemProfMatchedAllocContexts,87"Number of matched memory profile alloc contexts.");88STATISTIC(NumOfMemProfMatchedAllocs,89"Number of matched memory profile allocs.");90STATISTIC(NumOfMemProfMatchedCallSites,91"Number of matched memory profile callsites.");9293static void addCallsiteMetadata(Instruction &I,94ArrayRef<uint64_t> InlinedCallStack,95LLVMContext &Ctx) {96I.setMetadata(LLVMContext::MD_callsite,97buildCallstackMetadata(InlinedCallStack, Ctx));98}99100static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,101uint32_t Column) {102llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>103HashBuilder;104HashBuilder.add(Function, LineOffset, Column);105llvm::BLAKE3Result<8> Hash = HashBuilder.final();106uint64_t Id;107std::memcpy(&Id, Hash.data(), sizeof(Hash));108return Id;109}110111static uint64_t computeStackId(const memprof::Frame &Frame) {112return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);113}114115static AllocationType addCallStack(CallStackTrie &AllocTrie,116const AllocationInfo *AllocInfo,117uint64_t FullStackId) {118SmallVector<uint64_t> StackIds;119for (const auto &StackFrame : AllocInfo->CallStack)120StackIds.push_back(computeStackId(StackFrame));121auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),122AllocInfo->Info.getAllocCount(),123AllocInfo->Info.getTotalLifetime());124std::vector<ContextTotalSize> ContextSizeInfo;125if (recordContextSizeInfoForAnalysis()) {126auto TotalSize = AllocInfo->Info.getTotalSize();127assert(TotalSize);128assert(FullStackId != 0);129ContextSizeInfo.push_back({FullStackId, TotalSize});130}131AllocTrie.addCallStack(AllocType, StackIds, std::move(ContextSizeInfo));132return AllocType;133}134135// Return true if InlinedCallStack, computed from a call instruction's debug136// info, is a prefix of ProfileCallStack, a list of Frames from profile data137// (either the allocation data or a callsite).138static bool139stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,140ArrayRef<uint64_t> InlinedCallStack) {141return ProfileCallStack.size() >= InlinedCallStack.size() &&142llvm::equal(ProfileCallStack.take_front(InlinedCallStack.size()),143InlinedCallStack, [](const Frame &F, uint64_t StackId) {144return computeStackId(F) == StackId;145});146}147148static bool isAllocationWithHotColdVariant(const Function *Callee,149const TargetLibraryInfo &TLI) {150if (!Callee)151return false;152LibFunc Func;153if (!TLI.getLibFunc(*Callee, Func))154return false;155switch (Func) {156case LibFunc_Znwm:157case LibFunc_ZnwmRKSt9nothrow_t:158case LibFunc_ZnwmSt11align_val_t:159case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:160case LibFunc_Znam:161case LibFunc_ZnamRKSt9nothrow_t:162case LibFunc_ZnamSt11align_val_t:163case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:164case LibFunc_size_returning_new:165case LibFunc_size_returning_new_aligned:166return true;167case LibFunc_Znwm12__hot_cold_t:168case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:169case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:170case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:171case LibFunc_Znam12__hot_cold_t:172case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:173case LibFunc_ZnamSt11align_val_t12__hot_cold_t:174case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:175case LibFunc_size_returning_new_hot_cold:176case LibFunc_size_returning_new_aligned_hot_cold:177return ClMemProfMatchHotColdNew;178default:179return false;180}181}182183struct AllocMatchInfo {184uint64_t TotalSize = 0;185AllocationType AllocType = AllocationType::None;186};187188DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>>189memprof::extractCallsFromIR(Module &M, const TargetLibraryInfo &TLI,190function_ref<bool(uint64_t)> IsPresentInProfile) {191DenseMap<uint64_t, SmallVector<CallEdgeTy, 0>> Calls;192193auto GetOffset = [](const DILocation *DIL) {194return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &1950xffff;196};197198for (Function &F : M) {199if (F.isDeclaration())200continue;201202for (auto &BB : F) {203for (auto &I : BB) {204if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))205continue;206207auto *CB = dyn_cast<CallBase>(&I);208auto *CalledFunction = CB->getCalledFunction();209// Disregard indirect calls and intrinsics.210if (!CalledFunction || CalledFunction->isIntrinsic())211continue;212213StringRef CalleeName = CalledFunction->getName();214// True if we are calling a heap allocation function that supports215// hot/cold variants.216bool IsAlloc = isAllocationWithHotColdVariant(CalledFunction, TLI);217// True for the first iteration below, indicating that we are looking at218// a leaf node.219bool IsLeaf = true;220for (const DILocation *DIL = I.getDebugLoc(); DIL;221DIL = DIL->getInlinedAt()) {222StringRef CallerName = DIL->getSubprogramLinkageName();223assert(!CallerName.empty() &&224"Be sure to enable -fdebug-info-for-profiling");225uint64_t CallerGUID = memprof::getGUID(CallerName);226uint64_t CalleeGUID = memprof::getGUID(CalleeName);227// Pretend that we are calling a function with GUID == 0 if we are228// in the inline stack leading to a heap allocation function.229if (IsAlloc) {230if (IsLeaf) {231// For leaf nodes, set CalleeGUID to 0 without consulting232// IsPresentInProfile.233CalleeGUID = 0;234} else if (!IsPresentInProfile(CalleeGUID)) {235// In addition to the leaf case above, continue to set CalleeGUID236// to 0 as long as we don't see CalleeGUID in the profile.237CalleeGUID = 0;238} else {239// Once we encounter a callee that exists in the profile, stop240// setting CalleeGUID to 0.241IsAlloc = false;242}243}244245LineLocation Loc = {GetOffset(DIL), DIL->getColumn()};246Calls[CallerGUID].emplace_back(Loc, CalleeGUID);247CalleeName = CallerName;248IsLeaf = false;249}250}251}252}253254// Sort each call list by the source location.255for (auto &[CallerGUID, CallList] : Calls) {256llvm::sort(CallList);257CallList.erase(llvm::unique(CallList), CallList.end());258}259260return Calls;261}262263DenseMap<uint64_t, LocToLocMap>264memprof::computeUndriftMap(Module &M, IndexedInstrProfReader *MemProfReader,265const TargetLibraryInfo &TLI) {266DenseMap<uint64_t, LocToLocMap> UndriftMaps;267268DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromProfile =269MemProfReader->getMemProfCallerCalleePairs();270DenseMap<uint64_t, SmallVector<memprof::CallEdgeTy, 0>> CallsFromIR =271extractCallsFromIR(M, TLI, [&](uint64_t GUID) {272return CallsFromProfile.contains(GUID);273});274275// Compute an undrift map for each CallerGUID.276for (const auto &[CallerGUID, IRAnchors] : CallsFromIR) {277auto It = CallsFromProfile.find(CallerGUID);278if (It == CallsFromProfile.end())279continue;280const auto &ProfileAnchors = It->second;281282LocToLocMap Matchings;283longestCommonSequence<LineLocation, GlobalValue::GUID>(284ProfileAnchors, IRAnchors, std::equal_to<GlobalValue::GUID>(),285[&](LineLocation A, LineLocation B) { Matchings.try_emplace(A, B); });286[[maybe_unused]] bool Inserted =287UndriftMaps.try_emplace(CallerGUID, std::move(Matchings)).second;288289// The insertion must succeed because we visit each GUID exactly once.290assert(Inserted);291}292293return UndriftMaps;294}295296// Given a MemProfRecord, undrift all the source locations present in the297// record in place.298static void299undriftMemProfRecord(const DenseMap<uint64_t, LocToLocMap> &UndriftMaps,300memprof::MemProfRecord &MemProfRec) {301// Undrift a call stack in place.302auto UndriftCallStack = [&](std::vector<Frame> &CallStack) {303for (auto &F : CallStack) {304auto I = UndriftMaps.find(F.Function);305if (I == UndriftMaps.end())306continue;307auto J = I->second.find(LineLocation(F.LineOffset, F.Column));308if (J == I->second.end())309continue;310auto &NewLoc = J->second;311F.LineOffset = NewLoc.LineOffset;312F.Column = NewLoc.Column;313}314};315316for (auto &AS : MemProfRec.AllocSites)317UndriftCallStack(AS.CallStack);318319for (auto &CS : MemProfRec.CallSites)320UndriftCallStack(CS.Frames);321}322323// Helper function to process CalleeGuids and create value profile metadata324static void addVPMetadata(Module &M, Instruction &I,325ArrayRef<GlobalValue::GUID> CalleeGuids) {326if (!ClMemProfAttachCalleeGuids || CalleeGuids.empty())327return;328329if (I.getMetadata(LLVMContext::MD_prof)) {330uint64_t Unused;331// TODO: When merging is implemented, increase this to a typical ICP value332// (e.g., 3-6) For now, we only need to check if existing data exists, so 1333// is sufficient334auto ExistingVD = getValueProfDataFromInst(I, IPVK_IndirectCallTarget,335/*MaxNumValueData=*/1, Unused);336// We don't know how to merge value profile data yet.337if (!ExistingVD.empty()) {338return;339}340}341342SmallVector<InstrProfValueData, 4> VDs;343uint64_t TotalCount = 0;344345for (const GlobalValue::GUID CalleeGUID : CalleeGuids) {346InstrProfValueData VD;347VD.Value = CalleeGUID;348// For MemProf, we don't have actual call counts, so we assign349// a weight of 1 to each potential target.350// TODO: Consider making this weight configurable or increasing it to351// improve effectiveness for ICP.352VD.Count = 1;353VDs.push_back(VD);354TotalCount += VD.Count;355}356357if (!VDs.empty()) {358annotateValueSite(M, I, VDs, TotalCount, IPVK_IndirectCallTarget,359VDs.size());360}361}362363static void readMemprof(Module &M, Function &F,364IndexedInstrProfReader *MemProfReader,365const TargetLibraryInfo &TLI,366std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>367&FullStackIdToAllocMatchInfo,368std::set<std::vector<uint64_t>> &MatchedCallSites,369DenseMap<uint64_t, LocToLocMap> &UndriftMaps,370OptimizationRemarkEmitter &ORE, uint64_t MaxColdSize) {371auto &Ctx = M.getContext();372// Previously we used getIRPGOFuncName() here. If F is local linkage,373// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But374// llvm-profdata uses FuncName in dwarf to create GUID which doesn't375// contain FileName's prefix. It caused local linkage function can't376// find MemProfRecord. So we use getName() now.377// 'unique-internal-linkage-names' can make MemProf work better for local378// linkage function.379auto FuncName = F.getName();380auto FuncGUID = Function::getGUIDAssumingExternalLinkage(FuncName);381std::optional<memprof::MemProfRecord> MemProfRec;382auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);383if (Err) {384handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {385auto Err = IPE.get();386bool SkipWarning = false;387LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName388<< ": ");389if (Err == instrprof_error::unknown_function) {390NumOfMemProfMissing++;391SkipWarning = !PGOWarnMissing;392LLVM_DEBUG(dbgs() << "unknown function");393} else if (Err == instrprof_error::hash_mismatch) {394NumOfMemProfMismatch++;395SkipWarning =396NoPGOWarnMismatch ||397(NoPGOWarnMismatchComdatWeak &&398(F.hasComdat() ||399F.getLinkage() == GlobalValue::AvailableExternallyLinkage));400LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");401}402403if (SkipWarning)404return;405406std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +407Twine(" Hash = ") + std::to_string(FuncGUID))408.str();409410Ctx.diagnose(411DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));412});413return;414}415416NumOfMemProfFunc++;417418// If requested, undrfit MemProfRecord so that the source locations in it419// match those in the IR.420if (SalvageStaleProfile)421undriftMemProfRecord(UndriftMaps, *MemProfRec);422423// Detect if there are non-zero column numbers in the profile. If not,424// treat all column numbers as 0 when matching (i.e. ignore any non-zero425// columns in the IR). The profiled binary might have been built with426// column numbers disabled, for example.427bool ProfileHasColumns = false;428429// Build maps of the location hash to all profile data with that leaf location430// (allocation info and the callsites).431std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;432433// Helper struct for maintaining refs to callsite data. As an alternative we434// could store a pointer to the CallSiteInfo struct but we also need the frame435// index. Using ArrayRefs instead makes it a little easier to read.436struct CallSiteEntry {437// Subset of frames for the corresponding CallSiteInfo.438ArrayRef<Frame> Frames;439// Potential targets for indirect calls.440ArrayRef<GlobalValue::GUID> CalleeGuids;441442// Only compare Frame contents.443// Use pointer-based equality instead of ArrayRef's operator== which does444// element-wise comparison. We want to check if it's the same slice of the445// underlying array, not just equivalent content.446bool operator==(const CallSiteEntry &Other) const {447return Frames.data() == Other.Frames.data() &&448Frames.size() == Other.Frames.size();449}450};451452struct CallSiteEntryHash {453size_t operator()(const CallSiteEntry &Entry) const {454return computeFullStackId(Entry.Frames);455}456};457458// For the callsites we need to record slices of the frame array (see comments459// below where the map entries are added) along with their CalleeGuids.460std::map<uint64_t, std::unordered_set<CallSiteEntry, CallSiteEntryHash>>461LocHashToCallSites;462for (auto &AI : MemProfRec->AllocSites) {463NumOfMemProfAllocContextProfiles++;464// Associate the allocation info with the leaf frame. The later matching465// code will match any inlined call sequences in the IR with a longer prefix466// of call stack frames.467uint64_t StackId = computeStackId(AI.CallStack[0]);468LocHashToAllocInfo[StackId].insert(&AI);469ProfileHasColumns |= AI.CallStack[0].Column;470}471for (auto &CS : MemProfRec->CallSites) {472NumOfMemProfCallSiteProfiles++;473// Need to record all frames from leaf up to and including this function,474// as any of these may or may not have been inlined at this point.475unsigned Idx = 0;476for (auto &StackFrame : CS.Frames) {477uint64_t StackId = computeStackId(StackFrame);478ArrayRef<Frame> FrameSlice = ArrayRef<Frame>(CS.Frames).drop_front(Idx++);479ArrayRef<GlobalValue::GUID> CalleeGuids(CS.CalleeGuids);480LocHashToCallSites[StackId].insert({FrameSlice, CalleeGuids});481482ProfileHasColumns |= StackFrame.Column;483// Once we find this function, we can stop recording.484if (StackFrame.Function == FuncGUID)485break;486}487assert(Idx <= CS.Frames.size() && CS.Frames[Idx - 1].Function == FuncGUID);488}489490auto GetOffset = [](const DILocation *DIL) {491return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &4920xffff;493};494495// Now walk the instructions, looking up the associated profile data using496// debug locations.497for (auto &BB : F) {498for (auto &I : BB) {499if (I.isDebugOrPseudoInst())500continue;501// We are only interested in calls (allocation or interior call stack502// context calls).503auto *CI = dyn_cast<CallBase>(&I);504if (!CI)505continue;506auto *CalledFunction = CI->getCalledFunction();507if (CalledFunction && CalledFunction->isIntrinsic())508continue;509// List of call stack ids computed from the location hashes on debug510// locations (leaf to inlined at root).511SmallVector<uint64_t, 8> InlinedCallStack;512// Was the leaf location found in one of the profile maps?513bool LeafFound = false;514// If leaf was found in a map, iterators pointing to its location in both515// of the maps. It might exist in neither, one, or both (the latter case516// can happen because we don't currently have discriminators to517// distinguish the case when a single line/col maps to both an allocation518// and another callsite).519auto AllocInfoIter = LocHashToAllocInfo.end();520auto CallSitesIter = LocHashToCallSites.end();521for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;522DIL = DIL->getInlinedAt()) {523// Use C++ linkage name if possible. Need to compile with524// -fdebug-info-for-profiling to get linkage name.525StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();526if (Name.empty())527Name = DIL->getScope()->getSubprogram()->getName();528auto CalleeGUID = Function::getGUIDAssumingExternalLinkage(Name);529auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),530ProfileHasColumns ? DIL->getColumn() : 0);531// Check if we have found the profile's leaf frame. If yes, collect532// the rest of the call's inlined context starting here. If not, see if533// we find a match further up the inlined context (in case the profile534// was missing debug frames at the leaf).535if (!LeafFound) {536AllocInfoIter = LocHashToAllocInfo.find(StackId);537CallSitesIter = LocHashToCallSites.find(StackId);538if (AllocInfoIter != LocHashToAllocInfo.end() ||539CallSitesIter != LocHashToCallSites.end())540LeafFound = true;541}542if (LeafFound)543InlinedCallStack.push_back(StackId);544}545// If leaf not in either of the maps, skip inst.546if (!LeafFound)547continue;548549// First add !memprof metadata from allocation info, if we found the550// instruction's leaf location in that map, and if the rest of the551// instruction's locations match the prefix Frame locations on an552// allocation context with the same leaf.553if (AllocInfoIter != LocHashToAllocInfo.end() &&554// Only consider allocations which support hinting.555isAllocationWithHotColdVariant(CI->getCalledFunction(), TLI)) {556// We may match this instruction's location list to multiple MIB557// contexts. Add them to a Trie specialized for trimming the contexts to558// the minimal needed to disambiguate contexts with unique behavior.559CallStackTrie AllocTrie(&ORE, MaxColdSize);560uint64_t TotalSize = 0;561uint64_t TotalColdSize = 0;562for (auto *AllocInfo : AllocInfoIter->second) {563// Check the full inlined call stack against this one.564// If we found and thus matched all frames on the call, include565// this MIB.566if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,567InlinedCallStack)) {568NumOfMemProfMatchedAllocContexts++;569uint64_t FullStackId = 0;570if (ClPrintMemProfMatchInfo || recordContextSizeInfoForAnalysis())571FullStackId = computeFullStackId(AllocInfo->CallStack);572auto AllocType = addCallStack(AllocTrie, AllocInfo, FullStackId);573TotalSize += AllocInfo->Info.getTotalSize();574if (AllocType == AllocationType::Cold)575TotalColdSize += AllocInfo->Info.getTotalSize();576// Record information about the allocation if match info printing577// was requested.578if (ClPrintMemProfMatchInfo) {579assert(FullStackId != 0);580FullStackIdToAllocMatchInfo[std::make_pair(581FullStackId, InlinedCallStack.size())] = {582AllocInfo->Info.getTotalSize(), AllocType};583}584}585}586// If the threshold for the percent of cold bytes is less than 100%,587// and not all bytes are cold, see if we should still hint this588// allocation as cold without context sensitivity.589if (TotalColdSize < TotalSize && MinMatchedColdBytePercent < 100 &&590TotalColdSize * 100 >= MinMatchedColdBytePercent * TotalSize) {591AllocTrie.addSingleAllocTypeAttribute(CI, AllocationType::Cold,592"dominant");593continue;594}595596// We might not have matched any to the full inlined call stack.597// But if we did, create and attach metadata, or a function attribute if598// all contexts have identical profiled behavior.599if (!AllocTrie.empty()) {600NumOfMemProfMatchedAllocs++;601// MemprofMDAttached will be false if a function attribute was602// attached.603bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);604assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));605if (MemprofMDAttached) {606// Add callsite metadata for the instruction's location list so that607// it simpler later on to identify which part of the MIB contexts608// are from this particular instruction (including during inlining,609// when the callsite metadata will be updated appropriately).610// FIXME: can this be changed to strip out the matching stack611// context ids from the MIB contexts and not add any callsite612// metadata here to save space?613addCallsiteMetadata(I, InlinedCallStack, Ctx);614}615}616continue;617}618619if (CallSitesIter == LocHashToCallSites.end())620continue;621622// Otherwise, add callsite metadata. If we reach here then we found the623// instruction's leaf location in the callsites map and not the allocation624// map.625for (const auto &CallSiteEntry : CallSitesIter->second) {626// If we found and thus matched all frames on the call, create and627// attach call stack metadata.628if (stackFrameIncludesInlinedCallStack(CallSiteEntry.Frames,629InlinedCallStack)) {630NumOfMemProfMatchedCallSites++;631addCallsiteMetadata(I, InlinedCallStack, Ctx);632633// Try to attach indirect call metadata if possible.634if (!CalledFunction)635addVPMetadata(M, I, CallSiteEntry.CalleeGuids);636637// Only need to find one with a matching call stack and add a single638// callsite metadata.639640// Accumulate call site matching information upon request.641if (ClPrintMemProfMatchInfo) {642std::vector<uint64_t> CallStack;643append_range(CallStack, InlinedCallStack);644MatchedCallSites.insert(std::move(CallStack));645}646break;647}648}649}650}651}652653MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,654IntrusiveRefCntPtr<vfs::FileSystem> FS)655: MemoryProfileFileName(MemoryProfileFile), FS(FS) {656if (!FS)657this->FS = vfs::getRealFileSystem();658}659660PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {661// Return immediately if the module doesn't contain any function.662if (M.empty())663return PreservedAnalyses::all();664665LLVM_DEBUG(dbgs() << "Read in memory profile:");666auto &Ctx = M.getContext();667auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);668if (Error E = ReaderOrErr.takeError()) {669handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {670Ctx.diagnose(671DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));672});673return PreservedAnalyses::all();674}675676std::unique_ptr<IndexedInstrProfReader> MemProfReader =677std::move(ReaderOrErr.get());678if (!MemProfReader) {679Ctx.diagnose(DiagnosticInfoPGOProfile(680MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));681return PreservedAnalyses::all();682}683684if (!MemProfReader->hasMemoryProfile()) {685Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),686"Not a memory profile"));687return PreservedAnalyses::all();688}689690auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();691692TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(*M.begin());693DenseMap<uint64_t, LocToLocMap> UndriftMaps;694if (SalvageStaleProfile)695UndriftMaps = computeUndriftMap(M, MemProfReader.get(), TLI);696697// Map from the stack hash and matched frame count of each allocation context698// in the function profiles to the total profiled size (bytes) and allocation699// type.700std::map<std::pair<uint64_t, unsigned>, AllocMatchInfo>701FullStackIdToAllocMatchInfo;702703// Set of the matched call sites, each expressed as a sequence of an inline704// call stack.705std::set<std::vector<uint64_t>> MatchedCallSites;706707uint64_t MaxColdSize = 0;708if (auto *MemProfSum = MemProfReader->getMemProfSummary())709MaxColdSize = MemProfSum->getMaxColdTotalSize();710711for (auto &F : M) {712if (F.isDeclaration())713continue;714715const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);716auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);717readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo,718MatchedCallSites, UndriftMaps, ORE, MaxColdSize);719}720721if (ClPrintMemProfMatchInfo) {722for (const auto &[IdLengthPair, Info] : FullStackIdToAllocMatchInfo) {723auto [Id, Length] = IdLengthPair;724errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)725<< " context with id " << Id << " has total profiled size "726<< Info.TotalSize << " is matched with " << Length << " frames\n";727}728729for (const auto &CallStack : MatchedCallSites) {730errs() << "MemProf callsite match for inline call stack";731for (uint64_t StackId : CallStack)732errs() << " " << StackId;733errs() << "\n";734}735}736737return PreservedAnalyses::none();738}739740741