Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
35266 views
//===- MemProfiler.cpp - memory allocation and access profiler ------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file is a part of MemProfiler. Memory accesses are instrumented9// to increment the access count held in a shadow memory location, or10// alternatively to call into the runtime. Memory intrinsic calls (memmove,11// memcpy, memset) are changed to call the memory profiling runtime version12// instead.13//14//===----------------------------------------------------------------------===//1516#include "llvm/Transforms/Instrumentation/MemProfiler.h"17#include "llvm/ADT/SmallVector.h"18#include "llvm/ADT/Statistic.h"19#include "llvm/ADT/StringRef.h"20#include "llvm/Analysis/MemoryBuiltins.h"21#include "llvm/Analysis/MemoryProfileInfo.h"22#include "llvm/Analysis/ValueTracking.h"23#include "llvm/IR/Constant.h"24#include "llvm/IR/DataLayout.h"25#include "llvm/IR/DiagnosticInfo.h"26#include "llvm/IR/Function.h"27#include "llvm/IR/GlobalValue.h"28#include "llvm/IR/IRBuilder.h"29#include "llvm/IR/Instruction.h"30#include "llvm/IR/IntrinsicInst.h"31#include "llvm/IR/Module.h"32#include "llvm/IR/Type.h"33#include "llvm/IR/Value.h"34#include "llvm/ProfileData/InstrProf.h"35#include "llvm/ProfileData/InstrProfReader.h"36#include "llvm/Support/BLAKE3.h"37#include "llvm/Support/CommandLine.h"38#include "llvm/Support/Debug.h"39#include "llvm/Support/HashBuilder.h"40#include "llvm/Support/VirtualFileSystem.h"41#include "llvm/TargetParser/Triple.h"42#include "llvm/Transforms/Utils/BasicBlockUtils.h"43#include "llvm/Transforms/Utils/ModuleUtils.h"44#include <map>45#include <set>4647using namespace llvm;48using namespace llvm::memprof;4950#define DEBUG_TYPE "memprof"5152namespace llvm {53extern cl::opt<bool> PGOWarnMissing;54extern cl::opt<bool> NoPGOWarnMismatch;55extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;56} // namespace llvm5758constexpr int LLVM_MEM_PROFILER_VERSION = 1;5960// Size of memory mapped to a single shadow location.61constexpr uint64_t DefaultMemGranularity = 64;6263// Scale from granularity down to shadow size.64constexpr uint64_t DefaultShadowScale = 3;6566constexpr char MemProfModuleCtorName[] = "memprof.module_ctor";67constexpr uint64_t MemProfCtorAndDtorPriority = 1;68// On Emscripten, the system needs more than one priorities for constructors.69constexpr uint64_t MemProfEmscriptenCtorAndDtorPriority = 50;70constexpr char MemProfInitName[] = "__memprof_init";71constexpr char MemProfVersionCheckNamePrefix[] =72"__memprof_version_mismatch_check_v";7374constexpr char MemProfShadowMemoryDynamicAddress[] =75"__memprof_shadow_memory_dynamic_address";7677constexpr char MemProfFilenameVar[] = "__memprof_profile_filename";7879constexpr char MemProfHistogramFlagVar[] = "__memprof_histogram";8081// Command-line flags.8283static cl::opt<bool> ClInsertVersionCheck(84"memprof-guard-against-version-mismatch",85cl::desc("Guard against compiler/runtime version mismatch."), cl::Hidden,86cl::init(true));8788// This flag may need to be replaced with -f[no-]memprof-reads.89static cl::opt<bool> ClInstrumentReads("memprof-instrument-reads",90cl::desc("instrument read instructions"),91cl::Hidden, cl::init(true));9293static cl::opt<bool>94ClInstrumentWrites("memprof-instrument-writes",95cl::desc("instrument write instructions"), cl::Hidden,96cl::init(true));9798static cl::opt<bool> ClInstrumentAtomics(99"memprof-instrument-atomics",100cl::desc("instrument atomic instructions (rmw, cmpxchg)"), cl::Hidden,101cl::init(true));102103static cl::opt<bool> ClUseCalls(104"memprof-use-callbacks",105cl::desc("Use callbacks instead of inline instrumentation sequences."),106cl::Hidden, cl::init(false));107108static cl::opt<std::string>109ClMemoryAccessCallbackPrefix("memprof-memory-access-callback-prefix",110cl::desc("Prefix for memory access callbacks"),111cl::Hidden, cl::init("__memprof_"));112113// These flags allow to change the shadow mapping.114// The shadow mapping looks like115// Shadow = ((Mem & mask) >> scale) + offset116117static cl::opt<int> ClMappingScale("memprof-mapping-scale",118cl::desc("scale of memprof shadow mapping"),119cl::Hidden, cl::init(DefaultShadowScale));120121static cl::opt<int>122ClMappingGranularity("memprof-mapping-granularity",123cl::desc("granularity of memprof shadow mapping"),124cl::Hidden, cl::init(DefaultMemGranularity));125126static cl::opt<bool> ClStack("memprof-instrument-stack",127cl::desc("Instrument scalar stack variables"),128cl::Hidden, cl::init(false));129130// Debug flags.131132static cl::opt<int> ClDebug("memprof-debug", cl::desc("debug"), cl::Hidden,133cl::init(0));134135static cl::opt<std::string> ClDebugFunc("memprof-debug-func", cl::Hidden,136cl::desc("Debug func"));137138static cl::opt<int> ClDebugMin("memprof-debug-min", cl::desc("Debug min inst"),139cl::Hidden, cl::init(-1));140141static cl::opt<int> ClDebugMax("memprof-debug-max", cl::desc("Debug max inst"),142cl::Hidden, cl::init(-1));143144// By default disable matching of allocation profiles onto operator new that145// already explicitly pass a hot/cold hint, since we don't currently146// override these hints anyway.147static cl::opt<bool> ClMemProfMatchHotColdNew(148"memprof-match-hot-cold-new",149cl::desc(150"Match allocation profiles onto existing hot/cold operator new calls"),151cl::Hidden, cl::init(false));152153static cl::opt<bool> ClHistogram("memprof-histogram",154cl::desc("Collect access count histograms"),155cl::Hidden, cl::init(false));156157static cl::opt<bool>158ClPrintMemProfMatchInfo("memprof-print-match-info",159cl::desc("Print matching stats for each allocation "160"context in this module's profiles"),161cl::Hidden, cl::init(false));162163extern cl::opt<bool> MemProfReportHintedSizes;164165// Instrumentation statistics166STATISTIC(NumInstrumentedReads, "Number of instrumented reads");167STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");168STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");169STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");170171// Matching statistics172STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");173STATISTIC(NumOfMemProfMismatch,174"Number of functions having mismatched memory profile hash.");175STATISTIC(NumOfMemProfFunc, "Number of functions having valid memory profile.");176STATISTIC(NumOfMemProfAllocContextProfiles,177"Number of alloc contexts in memory profile.");178STATISTIC(NumOfMemProfCallSiteProfiles,179"Number of callsites in memory profile.");180STATISTIC(NumOfMemProfMatchedAllocContexts,181"Number of matched memory profile alloc contexts.");182STATISTIC(NumOfMemProfMatchedAllocs,183"Number of matched memory profile allocs.");184STATISTIC(NumOfMemProfMatchedCallSites,185"Number of matched memory profile callsites.");186187namespace {188189/// This struct defines the shadow mapping using the rule:190/// shadow = ((mem & mask) >> Scale) ADD DynamicShadowOffset.191struct ShadowMapping {192ShadowMapping() {193Scale = ClMappingScale;194Granularity = ClMappingGranularity;195Mask = ~(Granularity - 1);196}197198int Scale;199int Granularity;200uint64_t Mask; // Computed as ~(Granularity-1)201};202203static uint64_t getCtorAndDtorPriority(Triple &TargetTriple) {204return TargetTriple.isOSEmscripten() ? MemProfEmscriptenCtorAndDtorPriority205: MemProfCtorAndDtorPriority;206}207208struct InterestingMemoryAccess {209Value *Addr = nullptr;210bool IsWrite;211Type *AccessTy;212Value *MaybeMask = nullptr;213};214215/// Instrument the code in module to profile memory accesses.216class MemProfiler {217public:218MemProfiler(Module &M) {219C = &(M.getContext());220LongSize = M.getDataLayout().getPointerSizeInBits();221IntptrTy = Type::getIntNTy(*C, LongSize);222PtrTy = PointerType::getUnqual(*C);223}224225/// If it is an interesting memory access, populate information226/// about the access and return a InterestingMemoryAccess struct.227/// Otherwise return std::nullopt.228std::optional<InterestingMemoryAccess>229isInterestingMemoryAccess(Instruction *I) const;230231void instrumentMop(Instruction *I, const DataLayout &DL,232InterestingMemoryAccess &Access);233void instrumentAddress(Instruction *OrigIns, Instruction *InsertBefore,234Value *Addr, bool IsWrite);235void instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,236Instruction *I, Value *Addr, Type *AccessTy,237bool IsWrite);238void instrumentMemIntrinsic(MemIntrinsic *MI);239Value *memToShadow(Value *Shadow, IRBuilder<> &IRB);240bool instrumentFunction(Function &F);241bool maybeInsertMemProfInitAtFunctionEntry(Function &F);242bool insertDynamicShadowAtFunctionEntry(Function &F);243244private:245void initializeCallbacks(Module &M);246247LLVMContext *C;248int LongSize;249Type *IntptrTy;250PointerType *PtrTy;251ShadowMapping Mapping;252253// These arrays is indexed by AccessIsWrite254FunctionCallee MemProfMemoryAccessCallback[2];255256FunctionCallee MemProfMemmove, MemProfMemcpy, MemProfMemset;257Value *DynamicShadowOffset = nullptr;258};259260class ModuleMemProfiler {261public:262ModuleMemProfiler(Module &M) { TargetTriple = Triple(M.getTargetTriple()); }263264bool instrumentModule(Module &);265266private:267Triple TargetTriple;268ShadowMapping Mapping;269Function *MemProfCtorFunction = nullptr;270};271272} // end anonymous namespace273274MemProfilerPass::MemProfilerPass() = default;275276PreservedAnalyses MemProfilerPass::run(Function &F,277AnalysisManager<Function> &AM) {278Module &M = *F.getParent();279MemProfiler Profiler(M);280if (Profiler.instrumentFunction(F))281return PreservedAnalyses::none();282return PreservedAnalyses::all();283}284285ModuleMemProfilerPass::ModuleMemProfilerPass() = default;286287PreservedAnalyses ModuleMemProfilerPass::run(Module &M,288AnalysisManager<Module> &AM) {289290assert((!ClHistogram || (ClHistogram && ClUseCalls)) &&291"Cannot use -memprof-histogram without Callbacks. Set "292"memprof-use-callbacks");293294ModuleMemProfiler Profiler(M);295if (Profiler.instrumentModule(M))296return PreservedAnalyses::none();297return PreservedAnalyses::all();298}299300Value *MemProfiler::memToShadow(Value *Shadow, IRBuilder<> &IRB) {301// (Shadow & mask) >> scale302Shadow = IRB.CreateAnd(Shadow, Mapping.Mask);303Shadow = IRB.CreateLShr(Shadow, Mapping.Scale);304// (Shadow >> scale) | offset305assert(DynamicShadowOffset);306return IRB.CreateAdd(Shadow, DynamicShadowOffset);307}308309// Instrument memset/memmove/memcpy310void MemProfiler::instrumentMemIntrinsic(MemIntrinsic *MI) {311IRBuilder<> IRB(MI);312if (isa<MemTransferInst>(MI)) {313IRB.CreateCall(isa<MemMoveInst>(MI) ? MemProfMemmove : MemProfMemcpy,314{MI->getOperand(0), MI->getOperand(1),315IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});316} else if (isa<MemSetInst>(MI)) {317IRB.CreateCall(318MemProfMemset,319{MI->getOperand(0),320IRB.CreateIntCast(MI->getOperand(1), IRB.getInt32Ty(), false),321IRB.CreateIntCast(MI->getOperand(2), IntptrTy, false)});322}323MI->eraseFromParent();324}325326std::optional<InterestingMemoryAccess>327MemProfiler::isInterestingMemoryAccess(Instruction *I) const {328// Do not instrument the load fetching the dynamic shadow address.329if (DynamicShadowOffset == I)330return std::nullopt;331332InterestingMemoryAccess Access;333334if (LoadInst *LI = dyn_cast<LoadInst>(I)) {335if (!ClInstrumentReads)336return std::nullopt;337Access.IsWrite = false;338Access.AccessTy = LI->getType();339Access.Addr = LI->getPointerOperand();340} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {341if (!ClInstrumentWrites)342return std::nullopt;343Access.IsWrite = true;344Access.AccessTy = SI->getValueOperand()->getType();345Access.Addr = SI->getPointerOperand();346} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {347if (!ClInstrumentAtomics)348return std::nullopt;349Access.IsWrite = true;350Access.AccessTy = RMW->getValOperand()->getType();351Access.Addr = RMW->getPointerOperand();352} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {353if (!ClInstrumentAtomics)354return std::nullopt;355Access.IsWrite = true;356Access.AccessTy = XCHG->getCompareOperand()->getType();357Access.Addr = XCHG->getPointerOperand();358} else if (auto *CI = dyn_cast<CallInst>(I)) {359auto *F = CI->getCalledFunction();360if (F && (F->getIntrinsicID() == Intrinsic::masked_load ||361F->getIntrinsicID() == Intrinsic::masked_store)) {362unsigned OpOffset = 0;363if (F->getIntrinsicID() == Intrinsic::masked_store) {364if (!ClInstrumentWrites)365return std::nullopt;366// Masked store has an initial operand for the value.367OpOffset = 1;368Access.AccessTy = CI->getArgOperand(0)->getType();369Access.IsWrite = true;370} else {371if (!ClInstrumentReads)372return std::nullopt;373Access.AccessTy = CI->getType();374Access.IsWrite = false;375}376377auto *BasePtr = CI->getOperand(0 + OpOffset);378Access.MaybeMask = CI->getOperand(2 + OpOffset);379Access.Addr = BasePtr;380}381}382383if (!Access.Addr)384return std::nullopt;385386// Do not instrument accesses from different address spaces; we cannot deal387// with them.388Type *PtrTy = cast<PointerType>(Access.Addr->getType()->getScalarType());389if (PtrTy->getPointerAddressSpace() != 0)390return std::nullopt;391392// Ignore swifterror addresses.393// swifterror memory addresses are mem2reg promoted by instruction394// selection. As such they cannot have regular uses like an instrumentation395// function and it makes no sense to track them as memory.396if (Access.Addr->isSwiftError())397return std::nullopt;398399// Peel off GEPs and BitCasts.400auto *Addr = Access.Addr->stripInBoundsOffsets();401402if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {403// Do not instrument PGO counter updates.404if (GV->hasSection()) {405StringRef SectionName = GV->getSection();406// Check if the global is in the PGO counters section.407auto OF = Triple(I->getModule()->getTargetTriple()).getObjectFormat();408if (SectionName.ends_with(409getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false)))410return std::nullopt;411}412413// Do not instrument accesses to LLVM internal variables.414if (GV->getName().starts_with("__llvm"))415return std::nullopt;416}417418return Access;419}420421void MemProfiler::instrumentMaskedLoadOrStore(const DataLayout &DL, Value *Mask,422Instruction *I, Value *Addr,423Type *AccessTy, bool IsWrite) {424auto *VTy = cast<FixedVectorType>(AccessTy);425unsigned Num = VTy->getNumElements();426auto *Zero = ConstantInt::get(IntptrTy, 0);427for (unsigned Idx = 0; Idx < Num; ++Idx) {428Value *InstrumentedAddress = nullptr;429Instruction *InsertBefore = I;430if (auto *Vector = dyn_cast<ConstantVector>(Mask)) {431// dyn_cast as we might get UndefValue432if (auto *Masked = dyn_cast<ConstantInt>(Vector->getOperand(Idx))) {433if (Masked->isZero())434// Mask is constant false, so no instrumentation needed.435continue;436// If we have a true or undef value, fall through to instrumentAddress.437// with InsertBefore == I438}439} else {440IRBuilder<> IRB(I);441Value *MaskElem = IRB.CreateExtractElement(Mask, Idx);442Instruction *ThenTerm = SplitBlockAndInsertIfThen(MaskElem, I, false);443InsertBefore = ThenTerm;444}445446IRBuilder<> IRB(InsertBefore);447InstrumentedAddress =448IRB.CreateGEP(VTy, Addr, {Zero, ConstantInt::get(IntptrTy, Idx)});449instrumentAddress(I, InsertBefore, InstrumentedAddress, IsWrite);450}451}452453void MemProfiler::instrumentMop(Instruction *I, const DataLayout &DL,454InterestingMemoryAccess &Access) {455// Skip instrumentation of stack accesses unless requested.456if (!ClStack && isa<AllocaInst>(getUnderlyingObject(Access.Addr))) {457if (Access.IsWrite)458++NumSkippedStackWrites;459else460++NumSkippedStackReads;461return;462}463464if (Access.IsWrite)465NumInstrumentedWrites++;466else467NumInstrumentedReads++;468469if (Access.MaybeMask) {470instrumentMaskedLoadOrStore(DL, Access.MaybeMask, I, Access.Addr,471Access.AccessTy, Access.IsWrite);472} else {473// Since the access counts will be accumulated across the entire allocation,474// we only update the shadow access count for the first location and thus475// don't need to worry about alignment and type size.476instrumentAddress(I, I, Access.Addr, Access.IsWrite);477}478}479480void MemProfiler::instrumentAddress(Instruction *OrigIns,481Instruction *InsertBefore, Value *Addr,482bool IsWrite) {483IRBuilder<> IRB(InsertBefore);484Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy);485486if (ClUseCalls) {487IRB.CreateCall(MemProfMemoryAccessCallback[IsWrite], AddrLong);488return;489}490491// Create an inline sequence to compute shadow location, and increment the492// value by one.493Type *ShadowTy = Type::getInt64Ty(*C);494Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);495Value *ShadowPtr = memToShadow(AddrLong, IRB);496Value *ShadowAddr = IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy);497Value *ShadowValue = IRB.CreateLoad(ShadowTy, ShadowAddr);498Value *Inc = ConstantInt::get(Type::getInt64Ty(*C), 1);499ShadowValue = IRB.CreateAdd(ShadowValue, Inc);500IRB.CreateStore(ShadowValue, ShadowAddr);501}502503// Create the variable for the profile file name.504void createProfileFileNameVar(Module &M) {505const MDString *MemProfFilename =506dyn_cast_or_null<MDString>(M.getModuleFlag("MemProfProfileFilename"));507if (!MemProfFilename)508return;509assert(!MemProfFilename->getString().empty() &&510"Unexpected MemProfProfileFilename metadata with empty string");511Constant *ProfileNameConst = ConstantDataArray::getString(512M.getContext(), MemProfFilename->getString(), true);513GlobalVariable *ProfileNameVar = new GlobalVariable(514M, ProfileNameConst->getType(), /*isConstant=*/true,515GlobalValue::WeakAnyLinkage, ProfileNameConst, MemProfFilenameVar);516Triple TT(M.getTargetTriple());517if (TT.supportsCOMDAT()) {518ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);519ProfileNameVar->setComdat(M.getOrInsertComdat(MemProfFilenameVar));520}521}522523// Set MemprofHistogramFlag as a Global veriable in IR. This makes it accessible524// to the runtime, changing shadow count behavior.525void createMemprofHistogramFlagVar(Module &M) {526const StringRef VarName(MemProfHistogramFlagVar);527Type *IntTy1 = Type::getInt1Ty(M.getContext());528auto MemprofHistogramFlag = new GlobalVariable(529M, IntTy1, true, GlobalValue::WeakAnyLinkage,530Constant::getIntegerValue(IntTy1, APInt(1, ClHistogram)), VarName);531Triple TT(M.getTargetTriple());532if (TT.supportsCOMDAT()) {533MemprofHistogramFlag->setLinkage(GlobalValue::ExternalLinkage);534MemprofHistogramFlag->setComdat(M.getOrInsertComdat(VarName));535}536appendToCompilerUsed(M, MemprofHistogramFlag);537}538539bool ModuleMemProfiler::instrumentModule(Module &M) {540541// Create a module constructor.542std::string MemProfVersion = std::to_string(LLVM_MEM_PROFILER_VERSION);543std::string VersionCheckName =544ClInsertVersionCheck ? (MemProfVersionCheckNamePrefix + MemProfVersion)545: "";546std::tie(MemProfCtorFunction, std::ignore) =547createSanitizerCtorAndInitFunctions(M, MemProfModuleCtorName,548MemProfInitName, /*InitArgTypes=*/{},549/*InitArgs=*/{}, VersionCheckName);550551const uint64_t Priority = getCtorAndDtorPriority(TargetTriple);552appendToGlobalCtors(M, MemProfCtorFunction, Priority);553554createProfileFileNameVar(M);555556createMemprofHistogramFlagVar(M);557558return true;559}560561void MemProfiler::initializeCallbacks(Module &M) {562IRBuilder<> IRB(*C);563564for (size_t AccessIsWrite = 0; AccessIsWrite <= 1; AccessIsWrite++) {565const std::string TypeStr = AccessIsWrite ? "store" : "load";566const std::string HistPrefix = ClHistogram ? "hist_" : "";567568SmallVector<Type *, 2> Args1{1, IntptrTy};569MemProfMemoryAccessCallback[AccessIsWrite] = M.getOrInsertFunction(570ClMemoryAccessCallbackPrefix + HistPrefix + TypeStr,571FunctionType::get(IRB.getVoidTy(), Args1, false));572}573MemProfMemmove = M.getOrInsertFunction(574ClMemoryAccessCallbackPrefix + "memmove", PtrTy, PtrTy, PtrTy, IntptrTy);575MemProfMemcpy = M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memcpy",576PtrTy, PtrTy, PtrTy, IntptrTy);577MemProfMemset =578M.getOrInsertFunction(ClMemoryAccessCallbackPrefix + "memset", PtrTy,579PtrTy, IRB.getInt32Ty(), IntptrTy);580}581582bool MemProfiler::maybeInsertMemProfInitAtFunctionEntry(Function &F) {583// For each NSObject descendant having a +load method, this method is invoked584// by the ObjC runtime before any of the static constructors is called.585// Therefore we need to instrument such methods with a call to __memprof_init586// at the beginning in order to initialize our runtime before any access to587// the shadow memory.588// We cannot just ignore these methods, because they may call other589// instrumented functions.590if (F.getName().contains(" load]")) {591FunctionCallee MemProfInitFunction =592declareSanitizerInitFunction(*F.getParent(), MemProfInitName, {});593IRBuilder<> IRB(&F.front(), F.front().begin());594IRB.CreateCall(MemProfInitFunction, {});595return true;596}597return false;598}599600bool MemProfiler::insertDynamicShadowAtFunctionEntry(Function &F) {601IRBuilder<> IRB(&F.front().front());602Value *GlobalDynamicAddress = F.getParent()->getOrInsertGlobal(603MemProfShadowMemoryDynamicAddress, IntptrTy);604if (F.getParent()->getPICLevel() == PICLevel::NotPIC)605cast<GlobalVariable>(GlobalDynamicAddress)->setDSOLocal(true);606DynamicShadowOffset = IRB.CreateLoad(IntptrTy, GlobalDynamicAddress);607return true;608}609610bool MemProfiler::instrumentFunction(Function &F) {611if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)612return false;613if (ClDebugFunc == F.getName())614return false;615if (F.getName().starts_with("__memprof_"))616return false;617618bool FunctionModified = false;619620// If needed, insert __memprof_init.621// This function needs to be called even if the function body is not622// instrumented.623if (maybeInsertMemProfInitAtFunctionEntry(F))624FunctionModified = true;625626LLVM_DEBUG(dbgs() << "MEMPROF instrumenting:\n" << F << "\n");627628initializeCallbacks(*F.getParent());629630SmallVector<Instruction *, 16> ToInstrument;631632// Fill the set of memory operations to instrument.633for (auto &BB : F) {634for (auto &Inst : BB) {635if (isInterestingMemoryAccess(&Inst) || isa<MemIntrinsic>(Inst))636ToInstrument.push_back(&Inst);637}638}639640if (ToInstrument.empty()) {641LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified642<< " " << F << "\n");643644return FunctionModified;645}646647FunctionModified |= insertDynamicShadowAtFunctionEntry(F);648649int NumInstrumented = 0;650for (auto *Inst : ToInstrument) {651if (ClDebugMin < 0 || ClDebugMax < 0 ||652(NumInstrumented >= ClDebugMin && NumInstrumented <= ClDebugMax)) {653std::optional<InterestingMemoryAccess> Access =654isInterestingMemoryAccess(Inst);655if (Access)656instrumentMop(Inst, F.getDataLayout(), *Access);657else658instrumentMemIntrinsic(cast<MemIntrinsic>(Inst));659}660NumInstrumented++;661}662663if (NumInstrumented > 0)664FunctionModified = true;665666LLVM_DEBUG(dbgs() << "MEMPROF done instrumenting: " << FunctionModified << " "667<< F << "\n");668669return FunctionModified;670}671672static void addCallsiteMetadata(Instruction &I,673std::vector<uint64_t> &InlinedCallStack,674LLVMContext &Ctx) {675I.setMetadata(LLVMContext::MD_callsite,676buildCallstackMetadata(InlinedCallStack, Ctx));677}678679static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,680uint32_t Column) {681llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>682HashBuilder;683HashBuilder.add(Function, LineOffset, Column);684llvm::BLAKE3Result<8> Hash = HashBuilder.final();685uint64_t Id;686std::memcpy(&Id, Hash.data(), sizeof(Hash));687return Id;688}689690static uint64_t computeStackId(const memprof::Frame &Frame) {691return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);692}693694// Helper to generate a single hash id for a given callstack, used for emitting695// matching statistics and useful for uniquing such statistics across modules.696static uint64_t697computeFullStackId(const std::vector<memprof::Frame> &CallStack) {698llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::endianness::little>699HashBuilder;700for (auto &F : CallStack)701HashBuilder.add(F.Function, F.LineOffset, F.Column);702llvm::BLAKE3Result<8> Hash = HashBuilder.final();703uint64_t Id;704std::memcpy(&Id, Hash.data(), sizeof(Hash));705return Id;706}707708static AllocationType addCallStack(CallStackTrie &AllocTrie,709const AllocationInfo *AllocInfo) {710SmallVector<uint64_t> StackIds;711for (const auto &StackFrame : AllocInfo->CallStack)712StackIds.push_back(computeStackId(StackFrame));713auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),714AllocInfo->Info.getAllocCount(),715AllocInfo->Info.getTotalLifetime());716uint64_t TotalSize = 0;717if (MemProfReportHintedSizes) {718TotalSize = AllocInfo->Info.getTotalSize();719assert(TotalSize);720}721AllocTrie.addCallStack(AllocType, StackIds, TotalSize);722return AllocType;723}724725// Helper to compare the InlinedCallStack computed from an instruction's debug726// info to a list of Frames from profile data (either the allocation data or a727// callsite). For callsites, the StartIndex to use in the Frame array may be728// non-zero.729static bool730stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,731ArrayRef<uint64_t> InlinedCallStack,732unsigned StartIndex = 0) {733auto StackFrame = ProfileCallStack.begin() + StartIndex;734auto InlCallStackIter = InlinedCallStack.begin();735for (; StackFrame != ProfileCallStack.end() &&736InlCallStackIter != InlinedCallStack.end();737++StackFrame, ++InlCallStackIter) {738uint64_t StackId = computeStackId(*StackFrame);739if (StackId != *InlCallStackIter)740return false;741}742// Return true if we found and matched all stack ids from the call743// instruction.744return InlCallStackIter == InlinedCallStack.end();745}746747static bool isNewWithHotColdVariant(Function *Callee,748const TargetLibraryInfo &TLI) {749if (!Callee)750return false;751LibFunc Func;752if (!TLI.getLibFunc(*Callee, Func))753return false;754switch (Func) {755case LibFunc_Znwm:756case LibFunc_ZnwmRKSt9nothrow_t:757case LibFunc_ZnwmSt11align_val_t:758case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t:759case LibFunc_Znam:760case LibFunc_ZnamRKSt9nothrow_t:761case LibFunc_ZnamSt11align_val_t:762case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t:763return true;764case LibFunc_Znwm12__hot_cold_t:765case LibFunc_ZnwmRKSt9nothrow_t12__hot_cold_t:766case LibFunc_ZnwmSt11align_val_t12__hot_cold_t:767case LibFunc_ZnwmSt11align_val_tRKSt9nothrow_t12__hot_cold_t:768case LibFunc_Znam12__hot_cold_t:769case LibFunc_ZnamRKSt9nothrow_t12__hot_cold_t:770case LibFunc_ZnamSt11align_val_t12__hot_cold_t:771case LibFunc_ZnamSt11align_val_tRKSt9nothrow_t12__hot_cold_t:772return ClMemProfMatchHotColdNew;773default:774return false;775}776}777778struct AllocMatchInfo {779uint64_t TotalSize = 0;780AllocationType AllocType = AllocationType::None;781bool Matched = false;782};783784static void785readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,786const TargetLibraryInfo &TLI,787std::map<uint64_t, AllocMatchInfo> &FullStackIdToAllocMatchInfo) {788auto &Ctx = M.getContext();789// Previously we used getIRPGOFuncName() here. If F is local linkage,790// getIRPGOFuncName() returns FuncName with prefix 'FileName;'. But791// llvm-profdata uses FuncName in dwarf to create GUID which doesn't792// contain FileName's prefix. It caused local linkage function can't793// find MemProfRecord. So we use getName() now.794// 'unique-internal-linkage-names' can make MemProf work better for local795// linkage function.796auto FuncName = F.getName();797auto FuncGUID = Function::getGUID(FuncName);798std::optional<memprof::MemProfRecord> MemProfRec;799auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);800if (Err) {801handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {802auto Err = IPE.get();803bool SkipWarning = false;804LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName805<< ": ");806if (Err == instrprof_error::unknown_function) {807NumOfMemProfMissing++;808SkipWarning = !PGOWarnMissing;809LLVM_DEBUG(dbgs() << "unknown function");810} else if (Err == instrprof_error::hash_mismatch) {811NumOfMemProfMismatch++;812SkipWarning =813NoPGOWarnMismatch ||814(NoPGOWarnMismatchComdatWeak &&815(F.hasComdat() ||816F.getLinkage() == GlobalValue::AvailableExternallyLinkage));817LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");818}819820if (SkipWarning)821return;822823std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +824Twine(" Hash = ") + std::to_string(FuncGUID))825.str();826827Ctx.diagnose(828DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));829});830return;831}832833NumOfMemProfFunc++;834835// Detect if there are non-zero column numbers in the profile. If not,836// treat all column numbers as 0 when matching (i.e. ignore any non-zero837// columns in the IR). The profiled binary might have been built with838// column numbers disabled, for example.839bool ProfileHasColumns = false;840841// Build maps of the location hash to all profile data with that leaf location842// (allocation info and the callsites).843std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;844// For the callsites we need to record the index of the associated frame in845// the frame array (see comments below where the map entries are added).846std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *, unsigned>>>847LocHashToCallSites;848for (auto &AI : MemProfRec->AllocSites) {849NumOfMemProfAllocContextProfiles++;850// Associate the allocation info with the leaf frame. The later matching851// code will match any inlined call sequences in the IR with a longer prefix852// of call stack frames.853uint64_t StackId = computeStackId(AI.CallStack[0]);854LocHashToAllocInfo[StackId].insert(&AI);855ProfileHasColumns |= AI.CallStack[0].Column;856}857for (auto &CS : MemProfRec->CallSites) {858NumOfMemProfCallSiteProfiles++;859// Need to record all frames from leaf up to and including this function,860// as any of these may or may not have been inlined at this point.861unsigned Idx = 0;862for (auto &StackFrame : CS) {863uint64_t StackId = computeStackId(StackFrame);864LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));865ProfileHasColumns |= StackFrame.Column;866// Once we find this function, we can stop recording.867if (StackFrame.Function == FuncGUID)868break;869}870assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);871}872873auto GetOffset = [](const DILocation *DIL) {874return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &8750xffff;876};877878// Now walk the instructions, looking up the associated profile data using879// debug locations.880for (auto &BB : F) {881for (auto &I : BB) {882if (I.isDebugOrPseudoInst())883continue;884// We are only interested in calls (allocation or interior call stack885// context calls).886auto *CI = dyn_cast<CallBase>(&I);887if (!CI)888continue;889auto *CalledFunction = CI->getCalledFunction();890if (CalledFunction && CalledFunction->isIntrinsic())891continue;892// List of call stack ids computed from the location hashes on debug893// locations (leaf to inlined at root).894std::vector<uint64_t> InlinedCallStack;895// Was the leaf location found in one of the profile maps?896bool LeafFound = false;897// If leaf was found in a map, iterators pointing to its location in both898// of the maps. It might exist in neither, one, or both (the latter case899// can happen because we don't currently have discriminators to900// distinguish the case when a single line/col maps to both an allocation901// and another callsite).902std::map<uint64_t, std::set<const AllocationInfo *>>::iterator903AllocInfoIter;904std::map<uint64_t, std::set<std::pair<const std::vector<Frame> *,905unsigned>>>::iterator CallSitesIter;906for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;907DIL = DIL->getInlinedAt()) {908// Use C++ linkage name if possible. Need to compile with909// -fdebug-info-for-profiling to get linkage name.910StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();911if (Name.empty())912Name = DIL->getScope()->getSubprogram()->getName();913auto CalleeGUID = Function::getGUID(Name);914auto StackId = computeStackId(CalleeGUID, GetOffset(DIL),915ProfileHasColumns ? DIL->getColumn() : 0);916// Check if we have found the profile's leaf frame. If yes, collect917// the rest of the call's inlined context starting here. If not, see if918// we find a match further up the inlined context (in case the profile919// was missing debug frames at the leaf).920if (!LeafFound) {921AllocInfoIter = LocHashToAllocInfo.find(StackId);922CallSitesIter = LocHashToCallSites.find(StackId);923if (AllocInfoIter != LocHashToAllocInfo.end() ||924CallSitesIter != LocHashToCallSites.end())925LeafFound = true;926}927if (LeafFound)928InlinedCallStack.push_back(StackId);929}930// If leaf not in either of the maps, skip inst.931if (!LeafFound)932continue;933934// First add !memprof metadata from allocation info, if we found the935// instruction's leaf location in that map, and if the rest of the936// instruction's locations match the prefix Frame locations on an937// allocation context with the same leaf.938if (AllocInfoIter != LocHashToAllocInfo.end()) {939// Only consider allocations via new, to reduce unnecessary metadata,940// since those are the only allocations that will be targeted initially.941if (!isNewWithHotColdVariant(CI->getCalledFunction(), TLI))942continue;943// We may match this instruction's location list to multiple MIB944// contexts. Add them to a Trie specialized for trimming the contexts to945// the minimal needed to disambiguate contexts with unique behavior.946CallStackTrie AllocTrie;947for (auto *AllocInfo : AllocInfoIter->second) {948// Check the full inlined call stack against this one.949// If we found and thus matched all frames on the call, include950// this MIB.951if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,952InlinedCallStack)) {953NumOfMemProfMatchedAllocContexts++;954auto AllocType = addCallStack(AllocTrie, AllocInfo);955// Record information about the allocation if match info printing956// was requested.957if (ClPrintMemProfMatchInfo) {958auto FullStackId = computeFullStackId(AllocInfo->CallStack);959FullStackIdToAllocMatchInfo[FullStackId] = {960AllocInfo->Info.getTotalSize(), AllocType, /*Matched=*/true};961}962}963}964// We might not have matched any to the full inlined call stack.965// But if we did, create and attach metadata, or a function attribute if966// all contexts have identical profiled behavior.967if (!AllocTrie.empty()) {968NumOfMemProfMatchedAllocs++;969// MemprofMDAttached will be false if a function attribute was970// attached.971bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);972assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));973if (MemprofMDAttached) {974// Add callsite metadata for the instruction's location list so that975// it simpler later on to identify which part of the MIB contexts976// are from this particular instruction (including during inlining,977// when the callsite metadata will be updated appropriately).978// FIXME: can this be changed to strip out the matching stack979// context ids from the MIB contexts and not add any callsite980// metadata here to save space?981addCallsiteMetadata(I, InlinedCallStack, Ctx);982}983}984continue;985}986987// Otherwise, add callsite metadata. If we reach here then we found the988// instruction's leaf location in the callsites map and not the allocation989// map.990assert(CallSitesIter != LocHashToCallSites.end());991for (auto CallStackIdx : CallSitesIter->second) {992// If we found and thus matched all frames on the call, create and993// attach call stack metadata.994if (stackFrameIncludesInlinedCallStack(995*CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {996NumOfMemProfMatchedCallSites++;997addCallsiteMetadata(I, InlinedCallStack, Ctx);998// Only need to find one with a matching call stack and add a single999// callsite metadata.1000break;1001}1002}1003}1004}1005}10061007MemProfUsePass::MemProfUsePass(std::string MemoryProfileFile,1008IntrusiveRefCntPtr<vfs::FileSystem> FS)1009: MemoryProfileFileName(MemoryProfileFile), FS(FS) {1010if (!FS)1011this->FS = vfs::getRealFileSystem();1012}10131014PreservedAnalyses MemProfUsePass::run(Module &M, ModuleAnalysisManager &AM) {1015LLVM_DEBUG(dbgs() << "Read in memory profile:");1016auto &Ctx = M.getContext();1017auto ReaderOrErr = IndexedInstrProfReader::create(MemoryProfileFileName, *FS);1018if (Error E = ReaderOrErr.takeError()) {1019handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {1020Ctx.diagnose(1021DiagnosticInfoPGOProfile(MemoryProfileFileName.data(), EI.message()));1022});1023return PreservedAnalyses::all();1024}10251026std::unique_ptr<IndexedInstrProfReader> MemProfReader =1027std::move(ReaderOrErr.get());1028if (!MemProfReader) {1029Ctx.diagnose(DiagnosticInfoPGOProfile(1030MemoryProfileFileName.data(), StringRef("Cannot get MemProfReader")));1031return PreservedAnalyses::all();1032}10331034if (!MemProfReader->hasMemoryProfile()) {1035Ctx.diagnose(DiagnosticInfoPGOProfile(MemoryProfileFileName.data(),1036"Not a memory profile"));1037return PreservedAnalyses::all();1038}10391040auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();10411042// Map from the stack has of each allocation context in the function profiles1043// to the total profiled size (bytes), allocation type, and whether we matched1044// it to an allocation in the IR.1045std::map<uint64_t, AllocMatchInfo> FullStackIdToAllocMatchInfo;10461047for (auto &F : M) {1048if (F.isDeclaration())1049continue;10501051const TargetLibraryInfo &TLI = FAM.getResult<TargetLibraryAnalysis>(F);1052readMemprof(M, F, MemProfReader.get(), TLI, FullStackIdToAllocMatchInfo);1053}10541055if (ClPrintMemProfMatchInfo) {1056for (const auto &[Id, Info] : FullStackIdToAllocMatchInfo)1057errs() << "MemProf " << getAllocTypeAttributeString(Info.AllocType)1058<< " context with id " << Id << " has total profiled size "1059<< Info.TotalSize << (Info.Matched ? " is" : " not")1060<< " matched\n";1061}10621063return PreservedAnalyses::none();1064}106510661067