Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
35266 views
//===-- InstrProfiling.cpp - Frontend instrumentation based profiling -----===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This pass lowers instrprof_* intrinsics emitted by an instrumentor.9// It also builds the data structures and initialization code needed for10// updating execution counts and emitting the profile at runtime.11//12//===----------------------------------------------------------------------===//1314#include "llvm/Transforms/Instrumentation/InstrProfiling.h"15#include "llvm/ADT/ArrayRef.h"16#include "llvm/ADT/STLExtras.h"17#include "llvm/ADT/SmallVector.h"18#include "llvm/ADT/StringRef.h"19#include "llvm/ADT/Twine.h"20#include "llvm/Analysis/BlockFrequencyInfo.h"21#include "llvm/Analysis/BranchProbabilityInfo.h"22#include "llvm/Analysis/LoopInfo.h"23#include "llvm/Analysis/TargetLibraryInfo.h"24#include "llvm/IR/Attributes.h"25#include "llvm/IR/BasicBlock.h"26#include "llvm/IR/CFG.h"27#include "llvm/IR/Constant.h"28#include "llvm/IR/Constants.h"29#include "llvm/IR/DIBuilder.h"30#include "llvm/IR/DerivedTypes.h"31#include "llvm/IR/DiagnosticInfo.h"32#include "llvm/IR/Dominators.h"33#include "llvm/IR/Function.h"34#include "llvm/IR/GlobalValue.h"35#include "llvm/IR/GlobalVariable.h"36#include "llvm/IR/IRBuilder.h"37#include "llvm/IR/Instruction.h"38#include "llvm/IR/Instructions.h"39#include "llvm/IR/IntrinsicInst.h"40#include "llvm/IR/MDBuilder.h"41#include "llvm/IR/Module.h"42#include "llvm/IR/Type.h"43#include "llvm/InitializePasses.h"44#include "llvm/Pass.h"45#include "llvm/ProfileData/InstrProf.h"46#include "llvm/ProfileData/InstrProfCorrelator.h"47#include "llvm/Support/Casting.h"48#include "llvm/Support/CommandLine.h"49#include "llvm/Support/Error.h"50#include "llvm/Support/ErrorHandling.h"51#include "llvm/TargetParser/Triple.h"52#include "llvm/Transforms/Instrumentation.h"53#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"54#include "llvm/Transforms/Utils/BasicBlockUtils.h"55#include "llvm/Transforms/Utils/ModuleUtils.h"56#include "llvm/Transforms/Utils/SSAUpdater.h"57#include <algorithm>58#include <cassert>59#include <cstdint>60#include <string>6162using namespace llvm;6364#define DEBUG_TYPE "instrprof"6566namespace llvm {67// Command line option to enable vtable value profiling. Defined in68// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=69extern cl::opt<bool> EnableVTableValueProfiling;70// TODO: Remove -debug-info-correlate in next LLVM release, in favor of71// -profile-correlate=debug-info.72cl::opt<bool> DebugInfoCorrelate(73"debug-info-correlate",74cl::desc("Use debug info to correlate profiles. (Deprecated, use "75"-profile-correlate=debug-info)"),76cl::init(false));7778cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate(79"profile-correlate",80cl::desc("Use debug info or binary file to correlate profiles."),81cl::init(InstrProfCorrelator::NONE),82cl::values(clEnumValN(InstrProfCorrelator::NONE, "",83"No profile correlation"),84clEnumValN(InstrProfCorrelator::DEBUG_INFO, "debug-info",85"Use debug info to correlate"),86clEnumValN(InstrProfCorrelator::BINARY, "binary",87"Use binary to correlate")));88} // namespace llvm8990namespace {9192cl::opt<bool> DoHashBasedCounterSplit(93"hash-based-counter-split",94cl::desc("Rename counter variable of a comdat function based on cfg hash"),95cl::init(true));9697cl::opt<bool>98RuntimeCounterRelocation("runtime-counter-relocation",99cl::desc("Enable relocating counters at runtime."),100cl::init(false));101102cl::opt<bool> ValueProfileStaticAlloc(103"vp-static-alloc",104cl::desc("Do static counter allocation for value profiler"),105cl::init(true));106107cl::opt<double> NumCountersPerValueSite(108"vp-counters-per-site",109cl::desc("The average number of profile counters allocated "110"per value profiling site."),111// This is set to a very small value because in real programs, only112// a very small percentage of value sites have non-zero targets, e.g, 1/30.113// For those sites with non-zero profile, the average number of targets114// is usually smaller than 2.115cl::init(1.0));116117cl::opt<bool> AtomicCounterUpdateAll(118"instrprof-atomic-counter-update-all",119cl::desc("Make all profile counter updates atomic (for testing only)"),120cl::init(false));121122cl::opt<bool> AtomicCounterUpdatePromoted(123"atomic-counter-update-promoted",124cl::desc("Do counter update using atomic fetch add "125" for promoted counters only"),126cl::init(false));127128cl::opt<bool> AtomicFirstCounter(129"atomic-first-counter",130cl::desc("Use atomic fetch add for first counter in a function (usually "131"the entry counter)"),132cl::init(false));133134// If the option is not specified, the default behavior about whether135// counter promotion is done depends on how instrumentaiton lowering136// pipeline is setup, i.e., the default value of true of this option137// does not mean the promotion will be done by default. Explicitly138// setting this option can override the default behavior.139cl::opt<bool> DoCounterPromotion("do-counter-promotion",140cl::desc("Do counter register promotion"),141cl::init(false));142cl::opt<unsigned> MaxNumOfPromotionsPerLoop(143"max-counter-promotions-per-loop", cl::init(20),144cl::desc("Max number counter promotions per loop to avoid"145" increasing register pressure too much"));146147// A debug option148cl::opt<int>149MaxNumOfPromotions("max-counter-promotions", cl::init(-1),150cl::desc("Max number of allowed counter promotions"));151152cl::opt<unsigned> SpeculativeCounterPromotionMaxExiting(153"speculative-counter-promotion-max-exiting", cl::init(3),154cl::desc("The max number of exiting blocks of a loop to allow "155" speculative counter promotion"));156157cl::opt<bool> SpeculativeCounterPromotionToLoop(158"speculative-counter-promotion-to-loop",159cl::desc("When the option is false, if the target block is in a loop, "160"the promotion will be disallowed unless the promoted counter "161" update can be further/iteratively promoted into an acyclic "162" region."));163164cl::opt<bool> IterativeCounterPromotion(165"iterative-counter-promotion", cl::init(true),166cl::desc("Allow counter promotion across the whole loop nest."));167168cl::opt<bool> SkipRetExitBlock(169"skip-ret-exit-block", cl::init(true),170cl::desc("Suppress counter promotion if exit blocks contain ret."));171172static cl::opt<bool> SampledInstr("sampled-instrumentation", cl::ZeroOrMore,173cl::init(false),174cl::desc("Do PGO instrumentation sampling"));175176static cl::opt<unsigned> SampledInstrPeriod(177"sampled-instr-period",178cl::desc("Set the profile instrumentation sample period. For each sample "179"period, a fixed number of consecutive samples will be recorded. "180"The number is controlled by 'sampled-instr-burst-duration' flag. "181"The default sample period of 65535 is optimized for generating "182"efficient code that leverages unsigned integer wrapping in "183"overflow."),184cl::init(65535));185186static cl::opt<unsigned> SampledInstrBurstDuration(187"sampled-instr-burst-duration",188cl::desc("Set the profile instrumentation burst duration, which can range "189"from 0 to one less than the value of 'sampled-instr-period'. "190"This number of samples will be recorded for each "191"'sampled-instr-period' count update. Setting to 1 enables "192"simple sampling, in which case it is recommended to set "193"'sampled-instr-period' to a prime number."),194cl::init(200));195196using LoadStorePair = std::pair<Instruction *, Instruction *>;197198static uint64_t getIntModuleFlagOrZero(const Module &M, StringRef Flag) {199auto *MD = dyn_cast_or_null<ConstantAsMetadata>(M.getModuleFlag(Flag));200if (!MD)201return 0;202203// If the flag is a ConstantAsMetadata, it should be an integer representable204// in 64-bits.205return cast<ConstantInt>(MD->getValue())->getZExtValue();206}207208static bool enablesValueProfiling(const Module &M) {209return isIRPGOFlagSet(&M) ||210getIntModuleFlagOrZero(M, "EnableValueProfiling") != 0;211}212213// Conservatively returns true if value profiling is enabled.214static bool profDataReferencedByCode(const Module &M) {215return enablesValueProfiling(M);216}217218class InstrLowerer final {219public:220InstrLowerer(Module &M, const InstrProfOptions &Options,221std::function<const TargetLibraryInfo &(Function &F)> GetTLI,222bool IsCS)223: M(M), Options(Options), TT(Triple(M.getTargetTriple())), IsCS(IsCS),224GetTLI(GetTLI), DataReferencedByCode(profDataReferencedByCode(M)) {}225226bool lower();227228private:229Module &M;230const InstrProfOptions Options;231const Triple TT;232// Is this lowering for the context-sensitive instrumentation.233const bool IsCS;234235std::function<const TargetLibraryInfo &(Function &F)> GetTLI;236237const bool DataReferencedByCode;238239struct PerFunctionProfileData {240uint32_t NumValueSites[IPVK_Last + 1] = {};241GlobalVariable *RegionCounters = nullptr;242GlobalVariable *DataVar = nullptr;243GlobalVariable *RegionBitmaps = nullptr;244uint32_t NumBitmapBytes = 0;245246PerFunctionProfileData() = default;247};248DenseMap<GlobalVariable *, PerFunctionProfileData> ProfileDataMap;249// Key is virtual table variable, value is 'VTableProfData' in the form of250// GlobalVariable.251DenseMap<GlobalVariable *, GlobalVariable *> VTableDataMap;252/// If runtime relocation is enabled, this maps functions to the load253/// instruction that produces the profile relocation bias.254DenseMap<const Function *, LoadInst *> FunctionToProfileBiasMap;255std::vector<GlobalValue *> CompilerUsedVars;256std::vector<GlobalValue *> UsedVars;257std::vector<GlobalVariable *> ReferencedNames;258// The list of virtual table variables of which the VTableProfData is259// collected.260std::vector<GlobalVariable *> ReferencedVTables;261GlobalVariable *NamesVar = nullptr;262size_t NamesSize = 0;263264/// The instance of [[alwaysinline]] rmw_or(ptr, i8).265/// This is name-insensitive.266Function *RMWOrFunc = nullptr;267268// vector of counter load/store pairs to be register promoted.269std::vector<LoadStorePair> PromotionCandidates;270271int64_t TotalCountersPromoted = 0;272273/// Lower instrumentation intrinsics in the function. Returns true if there274/// any lowering.275bool lowerIntrinsics(Function *F);276277/// Register-promote counter loads and stores in loops.278void promoteCounterLoadStores(Function *F);279280/// Returns true if relocating counters at runtime is enabled.281bool isRuntimeCounterRelocationEnabled() const;282283/// Returns true if profile counter update register promotion is enabled.284bool isCounterPromotionEnabled() const;285286/// Return true if profile sampling is enabled.287bool isSamplingEnabled() const;288289/// Count the number of instrumented value sites for the function.290void computeNumValueSiteCounts(InstrProfValueProfileInst *Ins);291292/// Replace instrprof.value.profile with a call to runtime library.293void lowerValueProfileInst(InstrProfValueProfileInst *Ins);294295/// Replace instrprof.cover with a store instruction to the coverage byte.296void lowerCover(InstrProfCoverInst *Inc);297298/// Replace instrprof.timestamp with a call to299/// INSTR_PROF_PROFILE_SET_TIMESTAMP.300void lowerTimestamp(InstrProfTimestampInst *TimestampInstruction);301302/// Replace instrprof.increment with an increment of the appropriate value.303void lowerIncrement(InstrProfIncrementInst *Inc);304305/// Force emitting of name vars for unused functions.306void lowerCoverageData(GlobalVariable *CoverageNamesVar);307308/// Replace instrprof.mcdc.tvbitmask.update with a shift and or instruction309/// using the index represented by the a temp value into a bitmap.310void lowerMCDCTestVectorBitmapUpdate(InstrProfMCDCTVBitmapUpdate *Ins);311312/// Get the Bias value for data to access mmap-ed area.313/// Create it if it hasn't been seen.314GlobalVariable *getOrCreateBiasVar(StringRef VarName);315316/// Compute the address of the counter value that this profiling instruction317/// acts on.318Value *getCounterAddress(InstrProfCntrInstBase *I);319320/// Lower the incremental instructions under profile sampling predicates.321void doSampling(Instruction *I);322323/// Get the region counters for an increment, creating them if necessary.324///325/// If the counter array doesn't yet exist, the profile data variables326/// referring to them will also be created.327GlobalVariable *getOrCreateRegionCounters(InstrProfCntrInstBase *Inc);328329/// Create the region counters.330GlobalVariable *createRegionCounters(InstrProfCntrInstBase *Inc,331StringRef Name,332GlobalValue::LinkageTypes Linkage);333334/// Create [[alwaysinline]] rmw_or(ptr, i8).335/// This doesn't update `RMWOrFunc`.336Function *createRMWOrFunc();337338/// Get the call to `rmw_or`.339/// Create the instance if it is unknown.340CallInst *getRMWOrCall(Value *Addr, Value *Val);341342/// Compute the address of the test vector bitmap that this profiling343/// instruction acts on.344Value *getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I);345346/// Get the region bitmaps for an increment, creating them if necessary.347///348/// If the bitmap array doesn't yet exist, the profile data variables349/// referring to them will also be created.350GlobalVariable *getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc);351352/// Create the MC/DC bitmap as a byte-aligned array of bytes associated with353/// an MC/DC Decision region. The number of bytes required is indicated by354/// the intrinsic used (type InstrProfMCDCBitmapInstBase). This is called355/// as part of setupProfileSection() and is conceptually very similar to356/// what is done for profile data counters in createRegionCounters().357GlobalVariable *createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,358StringRef Name,359GlobalValue::LinkageTypes Linkage);360361/// Set Comdat property of GV, if required.362void maybeSetComdat(GlobalVariable *GV, GlobalObject *GO, StringRef VarName);363364/// Setup the sections into which counters and bitmaps are allocated.365GlobalVariable *setupProfileSection(InstrProfInstBase *Inc,366InstrProfSectKind IPSK);367368/// Create INSTR_PROF_DATA variable for counters and bitmaps.369void createDataVariable(InstrProfCntrInstBase *Inc);370371/// Get the counters for virtual table values, creating them if necessary.372void getOrCreateVTableProfData(GlobalVariable *GV);373374/// Emit the section with compressed function names.375void emitNameData();376377/// Emit the section with compressed vtable names.378void emitVTableNames();379380/// Emit value nodes section for value profiling.381void emitVNodes();382383/// Emit runtime registration functions for each profile data variable.384void emitRegistration();385386/// Emit the necessary plumbing to pull in the runtime initialization.387/// Returns true if a change was made.388bool emitRuntimeHook();389390/// Add uses of our data variables and runtime hook.391void emitUses();392393/// Create a static initializer for our data, on platforms that need it,394/// and for any profile output file that was specified.395void emitInitialization();396};397398///399/// A helper class to promote one counter RMW operation in the loop400/// into register update.401///402/// RWM update for the counter will be sinked out of the loop after403/// the transformation.404///405class PGOCounterPromoterHelper : public LoadAndStorePromoter {406public:407PGOCounterPromoterHelper(408Instruction *L, Instruction *S, SSAUpdater &SSA, Value *Init,409BasicBlock *PH, ArrayRef<BasicBlock *> ExitBlocks,410ArrayRef<Instruction *> InsertPts,411DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,412LoopInfo &LI)413: LoadAndStorePromoter({L, S}, SSA), Store(S), ExitBlocks(ExitBlocks),414InsertPts(InsertPts), LoopToCandidates(LoopToCands), LI(LI) {415assert(isa<LoadInst>(L));416assert(isa<StoreInst>(S));417SSA.AddAvailableValue(PH, Init);418}419420void doExtraRewritesBeforeFinalDeletion() override {421for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) {422BasicBlock *ExitBlock = ExitBlocks[i];423Instruction *InsertPos = InsertPts[i];424// Get LiveIn value into the ExitBlock. If there are multiple425// predecessors, the value is defined by a PHI node in this426// block.427Value *LiveInValue = SSA.GetValueInMiddleOfBlock(ExitBlock);428Value *Addr = cast<StoreInst>(Store)->getPointerOperand();429Type *Ty = LiveInValue->getType();430IRBuilder<> Builder(InsertPos);431if (auto *AddrInst = dyn_cast_or_null<IntToPtrInst>(Addr)) {432// If isRuntimeCounterRelocationEnabled() is true then the address of433// the store instruction is computed with two instructions in434// InstrProfiling::getCounterAddress(). We need to copy those435// instructions to this block to compute Addr correctly.436// %BiasAdd = add i64 ptrtoint <__profc_>, <__llvm_profile_counter_bias>437// %Addr = inttoptr i64 %BiasAdd to i64*438auto *OrigBiasInst = dyn_cast<BinaryOperator>(AddrInst->getOperand(0));439assert(OrigBiasInst->getOpcode() == Instruction::BinaryOps::Add);440Value *BiasInst = Builder.Insert(OrigBiasInst->clone());441Addr = Builder.CreateIntToPtr(BiasInst,442PointerType::getUnqual(Ty->getContext()));443}444if (AtomicCounterUpdatePromoted)445// automic update currently can only be promoted across the current446// loop, not the whole loop nest.447Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, LiveInValue,448MaybeAlign(),449AtomicOrdering::SequentiallyConsistent);450else {451LoadInst *OldVal = Builder.CreateLoad(Ty, Addr, "pgocount.promoted");452auto *NewVal = Builder.CreateAdd(OldVal, LiveInValue);453auto *NewStore = Builder.CreateStore(NewVal, Addr);454455// Now update the parent loop's candidate list:456if (IterativeCounterPromotion) {457auto *TargetLoop = LI.getLoopFor(ExitBlock);458if (TargetLoop)459LoopToCandidates[TargetLoop].emplace_back(OldVal, NewStore);460}461}462}463}464465private:466Instruction *Store;467ArrayRef<BasicBlock *> ExitBlocks;468ArrayRef<Instruction *> InsertPts;469DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;470LoopInfo &LI;471};472473/// A helper class to do register promotion for all profile counter474/// updates in a loop.475///476class PGOCounterPromoter {477public:478PGOCounterPromoter(479DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCands,480Loop &CurLoop, LoopInfo &LI, BlockFrequencyInfo *BFI)481: LoopToCandidates(LoopToCands), L(CurLoop), LI(LI), BFI(BFI) {482483// Skip collection of ExitBlocks and InsertPts for loops that will not be484// able to have counters promoted.485SmallVector<BasicBlock *, 8> LoopExitBlocks;486SmallPtrSet<BasicBlock *, 8> BlockSet;487488L.getExitBlocks(LoopExitBlocks);489if (!isPromotionPossible(&L, LoopExitBlocks))490return;491492for (BasicBlock *ExitBlock : LoopExitBlocks) {493if (BlockSet.insert(ExitBlock).second &&494llvm::none_of(predecessors(ExitBlock), [&](const BasicBlock *Pred) {495return llvm::isPresplitCoroSuspendExitEdge(*Pred, *ExitBlock);496})) {497ExitBlocks.push_back(ExitBlock);498InsertPts.push_back(&*ExitBlock->getFirstInsertionPt());499}500}501}502503bool run(int64_t *NumPromoted) {504// Skip 'infinite' loops:505if (ExitBlocks.size() == 0)506return false;507508// Skip if any of the ExitBlocks contains a ret instruction.509// This is to prevent dumping of incomplete profile -- if the510// the loop is a long running loop and dump is called in the middle511// of the loop, the result profile is incomplete.512// FIXME: add other heuristics to detect long running loops.513if (SkipRetExitBlock) {514for (auto *BB : ExitBlocks)515if (isa<ReturnInst>(BB->getTerminator()))516return false;517}518519unsigned MaxProm = getMaxNumOfPromotionsInLoop(&L);520if (MaxProm == 0)521return false;522523unsigned Promoted = 0;524for (auto &Cand : LoopToCandidates[&L]) {525526SmallVector<PHINode *, 4> NewPHIs;527SSAUpdater SSA(&NewPHIs);528Value *InitVal = ConstantInt::get(Cand.first->getType(), 0);529530// If BFI is set, we will use it to guide the promotions.531if (BFI) {532auto *BB = Cand.first->getParent();533auto InstrCount = BFI->getBlockProfileCount(BB);534if (!InstrCount)535continue;536auto PreheaderCount = BFI->getBlockProfileCount(L.getLoopPreheader());537// If the average loop trip count is not greater than 1.5, we skip538// promotion.539if (PreheaderCount && (*PreheaderCount * 3) >= (*InstrCount * 2))540continue;541}542543PGOCounterPromoterHelper Promoter(Cand.first, Cand.second, SSA, InitVal,544L.getLoopPreheader(), ExitBlocks,545InsertPts, LoopToCandidates, LI);546Promoter.run(SmallVector<Instruction *, 2>({Cand.first, Cand.second}));547Promoted++;548if (Promoted >= MaxProm)549break;550551(*NumPromoted)++;552if (MaxNumOfPromotions != -1 && *NumPromoted >= MaxNumOfPromotions)553break;554}555556LLVM_DEBUG(dbgs() << Promoted << " counters promoted for loop (depth="557<< L.getLoopDepth() << ")\n");558return Promoted != 0;559}560561private:562bool allowSpeculativeCounterPromotion(Loop *LP) {563SmallVector<BasicBlock *, 8> ExitingBlocks;564L.getExitingBlocks(ExitingBlocks);565// Not considierered speculative.566if (ExitingBlocks.size() == 1)567return true;568if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)569return false;570return true;571}572573// Check whether the loop satisfies the basic conditions needed to perform574// Counter Promotions.575bool576isPromotionPossible(Loop *LP,577const SmallVectorImpl<BasicBlock *> &LoopExitBlocks) {578// We can't insert into a catchswitch.579if (llvm::any_of(LoopExitBlocks, [](BasicBlock *Exit) {580return isa<CatchSwitchInst>(Exit->getTerminator());581}))582return false;583584if (!LP->hasDedicatedExits())585return false;586587BasicBlock *PH = LP->getLoopPreheader();588if (!PH)589return false;590591return true;592}593594// Returns the max number of Counter Promotions for LP.595unsigned getMaxNumOfPromotionsInLoop(Loop *LP) {596SmallVector<BasicBlock *, 8> LoopExitBlocks;597LP->getExitBlocks(LoopExitBlocks);598if (!isPromotionPossible(LP, LoopExitBlocks))599return 0;600601SmallVector<BasicBlock *, 8> ExitingBlocks;602LP->getExitingBlocks(ExitingBlocks);603604// If BFI is set, we do more aggressive promotions based on BFI.605if (BFI)606return (unsigned)-1;607608// Not considierered speculative.609if (ExitingBlocks.size() == 1)610return MaxNumOfPromotionsPerLoop;611612if (ExitingBlocks.size() > SpeculativeCounterPromotionMaxExiting)613return 0;614615// Whether the target block is in a loop does not matter:616if (SpeculativeCounterPromotionToLoop)617return MaxNumOfPromotionsPerLoop;618619// Now check the target block:620unsigned MaxProm = MaxNumOfPromotionsPerLoop;621for (auto *TargetBlock : LoopExitBlocks) {622auto *TargetLoop = LI.getLoopFor(TargetBlock);623if (!TargetLoop)624continue;625unsigned MaxPromForTarget = getMaxNumOfPromotionsInLoop(TargetLoop);626unsigned PendingCandsInTarget = LoopToCandidates[TargetLoop].size();627MaxProm =628std::min(MaxProm, std::max(MaxPromForTarget, PendingCandsInTarget) -629PendingCandsInTarget);630}631return MaxProm;632}633634DenseMap<Loop *, SmallVector<LoadStorePair, 8>> &LoopToCandidates;635SmallVector<BasicBlock *, 8> ExitBlocks;636SmallVector<Instruction *, 8> InsertPts;637Loop &L;638LoopInfo &LI;639BlockFrequencyInfo *BFI;640};641642enum class ValueProfilingCallType {643// Individual values are tracked. Currently used for indiret call target644// profiling.645Default,646647// MemOp: the memop size value profiling.648MemOp649};650651} // end anonymous namespace652653PreservedAnalyses InstrProfilingLoweringPass::run(Module &M,654ModuleAnalysisManager &AM) {655FunctionAnalysisManager &FAM =656AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();657auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {658return FAM.getResult<TargetLibraryAnalysis>(F);659};660InstrLowerer Lowerer(M, Options, GetTLI, IsCS);661if (!Lowerer.lower())662return PreservedAnalyses::all();663664return PreservedAnalyses::none();665}666667//668// Perform instrumentation sampling.669//670// There are 3 favors of sampling:671// (1) Full burst sampling: We transform:672// Increment_Instruction;673// to:674// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {675// Increment_Instruction;676// }677// __llvm_profile_sampling__ += 1;678// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {679// __llvm_profile_sampling__ = 0;680// }681//682// "__llvm_profile_sampling__" is a thread-local global shared by all PGO683// counters (value-instrumentation and edge instrumentation).684//685// (2) Fast burst sampling:686// "__llvm_profile_sampling__" variable is an unsigned type, meaning it will687// wrap around to zero when overflows. In this case, the second check is688// unnecessary, so we won't generate check2 when the SampledInstrPeriod is689// set to 65535 (64K - 1). The code after:690// if (__llvm_profile_sampling__ < SampledInstrBurstDuration) {691// Increment_Instruction;692// }693// __llvm_profile_sampling__ += 1;694//695// (3) Simple sampling:696// When SampledInstrBurstDuration sets to 1, we do a simple sampling:697// __llvm_profile_sampling__ += 1;698// if (__llvm_profile_sampling__ >= SampledInstrPeriod) {699// __llvm_profile_sampling__ = 0;700// Increment_Instruction;701// }702//703// Note that, the code snippet after the transformation can still be counter704// promoted. However, with sampling enabled, counter updates are expected to705// be infrequent, making the benefits of counter promotion negligible.706// Moreover, counter promotion can potentially cause issues in server707// applications, particularly when the counters are dumped without a clean708// exit. To mitigate this risk, counter promotion is disabled by default when709// sampling is enabled. This behavior can be overridden using the internal710// option.711void InstrLowerer::doSampling(Instruction *I) {712if (!isSamplingEnabled())713return;714715unsigned SampledBurstDuration = SampledInstrBurstDuration.getValue();716unsigned SampledPeriod = SampledInstrPeriod.getValue();717if (SampledBurstDuration >= SampledPeriod) {718report_fatal_error(719"SampledPeriod needs to be greater than SampledBurstDuration");720}721bool UseShort = (SampledPeriod <= USHRT_MAX);722bool IsSimpleSampling = (SampledBurstDuration == 1);723// If (SampledBurstDuration == 1 && SampledPeriod == 65535), generate724// the simple sampling style code.725bool IsFastSampling = (!IsSimpleSampling && SampledPeriod == 65535);726727auto GetConstant = [UseShort](IRBuilder<> &Builder, uint32_t C) {728if (UseShort)729return Builder.getInt16(C);730else731return Builder.getInt32(C);732};733734IntegerType *SamplingVarTy;735if (UseShort)736SamplingVarTy = Type::getInt16Ty(M.getContext());737else738SamplingVarTy = Type::getInt32Ty(M.getContext());739auto *SamplingVar =740M.getGlobalVariable(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));741assert(SamplingVar && "SamplingVar not set properly");742743// Create the condition for checking the burst duration.744Instruction *SamplingVarIncr;745Value *NewSamplingVarVal;746MDBuilder MDB(I->getContext());747MDNode *BranchWeight;748IRBuilder<> CondBuilder(I);749auto *LoadSamplingVar = CondBuilder.CreateLoad(SamplingVarTy, SamplingVar);750if (IsSimpleSampling) {751// For the simple sampling, just create the load and increments.752IRBuilder<> IncBuilder(I);753NewSamplingVarVal =754IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));755SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);756} else {757// For the bust-sampling, create the conditonal update.758auto *DurationCond = CondBuilder.CreateICmpULE(759LoadSamplingVar, GetConstant(CondBuilder, SampledBurstDuration));760BranchWeight = MDB.createBranchWeights(761SampledBurstDuration, SampledPeriod + 1 - SampledBurstDuration);762Instruction *ThenTerm = SplitBlockAndInsertIfThen(763DurationCond, I, /* Unreachable */ false, BranchWeight);764IRBuilder<> IncBuilder(I);765NewSamplingVarVal =766IncBuilder.CreateAdd(LoadSamplingVar, GetConstant(IncBuilder, 1));767SamplingVarIncr = IncBuilder.CreateStore(NewSamplingVarVal, SamplingVar);768I->moveBefore(ThenTerm);769}770771if (IsFastSampling)772return;773774// Create the condtion for checking the period.775Instruction *ThenTerm, *ElseTerm;776IRBuilder<> PeriodCondBuilder(SamplingVarIncr);777auto *PeriodCond = PeriodCondBuilder.CreateICmpUGE(778NewSamplingVarVal, GetConstant(PeriodCondBuilder, SampledPeriod));779BranchWeight = MDB.createBranchWeights(1, SampledPeriod);780SplitBlockAndInsertIfThenElse(PeriodCond, SamplingVarIncr, &ThenTerm,781&ElseTerm, BranchWeight);782783// For the simple sampling, the counter update happens in sampling var reset.784if (IsSimpleSampling)785I->moveBefore(ThenTerm);786787IRBuilder<> ResetBuilder(ThenTerm);788ResetBuilder.CreateStore(GetConstant(ResetBuilder, 0), SamplingVar);789SamplingVarIncr->moveBefore(ElseTerm);790}791792bool InstrLowerer::lowerIntrinsics(Function *F) {793bool MadeChange = false;794PromotionCandidates.clear();795SmallVector<InstrProfInstBase *, 8> InstrProfInsts;796797// To ensure compatibility with sampling, we save the intrinsics into798// a buffer to prevent potential breakage of the iterator (as the799// intrinsics will be moved to a different BB).800for (BasicBlock &BB : *F) {801for (Instruction &Instr : llvm::make_early_inc_range(BB)) {802if (auto *IP = dyn_cast<InstrProfInstBase>(&Instr))803InstrProfInsts.push_back(IP);804}805}806807for (auto *Instr : InstrProfInsts) {808doSampling(Instr);809if (auto *IPIS = dyn_cast<InstrProfIncrementInstStep>(Instr)) {810lowerIncrement(IPIS);811MadeChange = true;812} else if (auto *IPI = dyn_cast<InstrProfIncrementInst>(Instr)) {813lowerIncrement(IPI);814MadeChange = true;815} else if (auto *IPC = dyn_cast<InstrProfTimestampInst>(Instr)) {816lowerTimestamp(IPC);817MadeChange = true;818} else if (auto *IPC = dyn_cast<InstrProfCoverInst>(Instr)) {819lowerCover(IPC);820MadeChange = true;821} else if (auto *IPVP = dyn_cast<InstrProfValueProfileInst>(Instr)) {822lowerValueProfileInst(IPVP);823MadeChange = true;824} else if (auto *IPMP = dyn_cast<InstrProfMCDCBitmapParameters>(Instr)) {825IPMP->eraseFromParent();826MadeChange = true;827} else if (auto *IPBU = dyn_cast<InstrProfMCDCTVBitmapUpdate>(Instr)) {828lowerMCDCTestVectorBitmapUpdate(IPBU);829MadeChange = true;830}831}832833if (!MadeChange)834return false;835836promoteCounterLoadStores(F);837return true;838}839840bool InstrLowerer::isRuntimeCounterRelocationEnabled() const {841// Mach-O don't support weak external references.842if (TT.isOSBinFormatMachO())843return false;844845if (RuntimeCounterRelocation.getNumOccurrences() > 0)846return RuntimeCounterRelocation;847848// Fuchsia uses runtime counter relocation by default.849return TT.isOSFuchsia();850}851852bool InstrLowerer::isSamplingEnabled() const {853if (SampledInstr.getNumOccurrences() > 0)854return SampledInstr;855return Options.Sampling;856}857858bool InstrLowerer::isCounterPromotionEnabled() const {859if (DoCounterPromotion.getNumOccurrences() > 0)860return DoCounterPromotion;861862return Options.DoCounterPromotion;863}864865void InstrLowerer::promoteCounterLoadStores(Function *F) {866if (!isCounterPromotionEnabled())867return;868869DominatorTree DT(*F);870LoopInfo LI(DT);871DenseMap<Loop *, SmallVector<LoadStorePair, 8>> LoopPromotionCandidates;872873std::unique_ptr<BlockFrequencyInfo> BFI;874if (Options.UseBFIInPromotion) {875std::unique_ptr<BranchProbabilityInfo> BPI;876BPI.reset(new BranchProbabilityInfo(*F, LI, &GetTLI(*F)));877BFI.reset(new BlockFrequencyInfo(*F, *BPI, LI));878}879880for (const auto &LoadStore : PromotionCandidates) {881auto *CounterLoad = LoadStore.first;882auto *CounterStore = LoadStore.second;883BasicBlock *BB = CounterLoad->getParent();884Loop *ParentLoop = LI.getLoopFor(BB);885if (!ParentLoop)886continue;887LoopPromotionCandidates[ParentLoop].emplace_back(CounterLoad, CounterStore);888}889890SmallVector<Loop *, 4> Loops = LI.getLoopsInPreorder();891892// Do a post-order traversal of the loops so that counter updates can be893// iteratively hoisted outside the loop nest.894for (auto *Loop : llvm::reverse(Loops)) {895PGOCounterPromoter Promoter(LoopPromotionCandidates, *Loop, LI, BFI.get());896Promoter.run(&TotalCountersPromoted);897}898}899900static bool needsRuntimeHookUnconditionally(const Triple &TT) {901// On Fuchsia, we only need runtime hook if any counters are present.902if (TT.isOSFuchsia())903return false;904905return true;906}907908/// Check if the module contains uses of any profiling intrinsics.909static bool containsProfilingIntrinsics(Module &M) {910auto containsIntrinsic = [&](int ID) {911if (auto *F = M.getFunction(Intrinsic::getName(ID)))912return !F->use_empty();913return false;914};915return containsIntrinsic(llvm::Intrinsic::instrprof_cover) ||916containsIntrinsic(llvm::Intrinsic::instrprof_increment) ||917containsIntrinsic(llvm::Intrinsic::instrprof_increment_step) ||918containsIntrinsic(llvm::Intrinsic::instrprof_timestamp) ||919containsIntrinsic(llvm::Intrinsic::instrprof_value_profile);920}921922bool InstrLowerer::lower() {923bool MadeChange = false;924bool NeedsRuntimeHook = needsRuntimeHookUnconditionally(TT);925if (NeedsRuntimeHook)926MadeChange = emitRuntimeHook();927928if (!IsCS && isSamplingEnabled())929createProfileSamplingVar(M);930931bool ContainsProfiling = containsProfilingIntrinsics(M);932GlobalVariable *CoverageNamesVar =933M.getNamedGlobal(getCoverageUnusedNamesVarName());934// Improve compile time by avoiding linear scans when there is no work.935if (!ContainsProfiling && !CoverageNamesVar)936return MadeChange;937938// We did not know how many value sites there would be inside939// the instrumented function. This is counting the number of instrumented940// target value sites to enter it as field in the profile data variable.941for (Function &F : M) {942InstrProfCntrInstBase *FirstProfInst = nullptr;943for (BasicBlock &BB : F) {944for (auto I = BB.begin(), E = BB.end(); I != E; I++) {945if (auto *Ind = dyn_cast<InstrProfValueProfileInst>(I))946computeNumValueSiteCounts(Ind);947else {948if (FirstProfInst == nullptr &&949(isa<InstrProfIncrementInst>(I) || isa<InstrProfCoverInst>(I)))950FirstProfInst = dyn_cast<InstrProfCntrInstBase>(I);951// If the MCDCBitmapParameters intrinsic seen, create the bitmaps.952if (const auto &Params = dyn_cast<InstrProfMCDCBitmapParameters>(I))953static_cast<void>(getOrCreateRegionBitmaps(Params));954}955}956}957958// Use a profile intrinsic to create the region counters and data variable.959// Also create the data variable based on the MCDCParams.960if (FirstProfInst != nullptr) {961static_cast<void>(getOrCreateRegionCounters(FirstProfInst));962}963}964965if (EnableVTableValueProfiling)966for (GlobalVariable &GV : M.globals())967// Global variables with type metadata are virtual table variables.968if (GV.hasMetadata(LLVMContext::MD_type))969getOrCreateVTableProfData(&GV);970971for (Function &F : M)972MadeChange |= lowerIntrinsics(&F);973974if (CoverageNamesVar) {975lowerCoverageData(CoverageNamesVar);976MadeChange = true;977}978979if (!MadeChange)980return false;981982emitVNodes();983emitNameData();984emitVTableNames();985986// Emit runtime hook for the cases where the target does not unconditionally987// require pulling in profile runtime, and coverage is enabled on code that is988// not eliminated by the front-end, e.g. unused functions with internal989// linkage.990if (!NeedsRuntimeHook && ContainsProfiling)991emitRuntimeHook();992993emitRegistration();994emitUses();995emitInitialization();996return true;997}998999static FunctionCallee getOrInsertValueProfilingCall(1000Module &M, const TargetLibraryInfo &TLI,1001ValueProfilingCallType CallType = ValueProfilingCallType::Default) {1002LLVMContext &Ctx = M.getContext();1003auto *ReturnTy = Type::getVoidTy(M.getContext());10041005AttributeList AL;1006if (auto AK = TLI.getExtAttrForI32Param(false))1007AL = AL.addParamAttribute(M.getContext(), 2, AK);10081009assert((CallType == ValueProfilingCallType::Default ||1010CallType == ValueProfilingCallType::MemOp) &&1011"Must be Default or MemOp");1012Type *ParamTypes[] = {1013#define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType1014#include "llvm/ProfileData/InstrProfData.inc"1015};1016auto *ValueProfilingCallTy =1017FunctionType::get(ReturnTy, ArrayRef(ParamTypes), false);1018StringRef FuncName = CallType == ValueProfilingCallType::Default1019? getInstrProfValueProfFuncName()1020: getInstrProfValueProfMemOpFuncName();1021return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL);1022}10231024void InstrLowerer::computeNumValueSiteCounts(InstrProfValueProfileInst *Ind) {1025GlobalVariable *Name = Ind->getName();1026uint64_t ValueKind = Ind->getValueKind()->getZExtValue();1027uint64_t Index = Ind->getIndex()->getZExtValue();1028auto &PD = ProfileDataMap[Name];1029PD.NumValueSites[ValueKind] =1030std::max(PD.NumValueSites[ValueKind], (uint32_t)(Index + 1));1031}10321033void InstrLowerer::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {1034// TODO: Value profiling heavily depends on the data section which is omitted1035// in lightweight mode. We need to move the value profile pointer to the1036// Counter struct to get this working.1037assert(1038!DebugInfoCorrelate && ProfileCorrelate == InstrProfCorrelator::NONE &&1039"Value profiling is not yet supported with lightweight instrumentation");1040GlobalVariable *Name = Ind->getName();1041auto It = ProfileDataMap.find(Name);1042assert(It != ProfileDataMap.end() && It->second.DataVar &&1043"value profiling detected in function with no counter incerement");10441045GlobalVariable *DataVar = It->second.DataVar;1046uint64_t ValueKind = Ind->getValueKind()->getZExtValue();1047uint64_t Index = Ind->getIndex()->getZExtValue();1048for (uint32_t Kind = IPVK_First; Kind < ValueKind; ++Kind)1049Index += It->second.NumValueSites[Kind];10501051IRBuilder<> Builder(Ind);1052bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==1053llvm::InstrProfValueKind::IPVK_MemOPSize);1054CallInst *Call = nullptr;1055auto *TLI = &GetTLI(*Ind->getFunction());10561057// To support value profiling calls within Windows exception handlers, funclet1058// information contained within operand bundles needs to be copied over to1059// the library call. This is required for the IR to be processed by the1060// WinEHPrepare pass.1061SmallVector<OperandBundleDef, 1> OpBundles;1062Ind->getOperandBundlesAsDefs(OpBundles);1063if (!IsMemOpSize) {1064Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};1065Call = Builder.CreateCall(getOrInsertValueProfilingCall(M, *TLI), Args,1066OpBundles);1067} else {1068Value *Args[3] = {Ind->getTargetValue(), DataVar, Builder.getInt32(Index)};1069Call = Builder.CreateCall(1070getOrInsertValueProfilingCall(M, *TLI, ValueProfilingCallType::MemOp),1071Args, OpBundles);1072}1073if (auto AK = TLI->getExtAttrForI32Param(false))1074Call->addParamAttr(2, AK);1075Ind->replaceAllUsesWith(Call);1076Ind->eraseFromParent();1077}10781079GlobalVariable *InstrLowerer::getOrCreateBiasVar(StringRef VarName) {1080GlobalVariable *Bias = M.getGlobalVariable(VarName);1081if (Bias)1082return Bias;10831084Type *Int64Ty = Type::getInt64Ty(M.getContext());10851086// Compiler must define this variable when runtime counter relocation1087// is being used. Runtime has a weak external reference that is used1088// to check whether that's the case or not.1089Bias = new GlobalVariable(M, Int64Ty, false, GlobalValue::LinkOnceODRLinkage,1090Constant::getNullValue(Int64Ty), VarName);1091Bias->setVisibility(GlobalVariable::HiddenVisibility);1092// A definition that's weak (linkonce_odr) without being in a COMDAT1093// section wouldn't lead to link errors, but it would lead to a dead1094// data word from every TU but one. Putting it in COMDAT ensures there1095// will be exactly one data slot in the link.1096if (TT.supportsCOMDAT())1097Bias->setComdat(M.getOrInsertComdat(VarName));10981099return Bias;1100}11011102Value *InstrLowerer::getCounterAddress(InstrProfCntrInstBase *I) {1103auto *Counters = getOrCreateRegionCounters(I);1104IRBuilder<> Builder(I);11051106if (isa<InstrProfTimestampInst>(I))1107Counters->setAlignment(Align(8));11081109auto *Addr = Builder.CreateConstInBoundsGEP2_32(1110Counters->getValueType(), Counters, 0, I->getIndex()->getZExtValue());11111112if (!isRuntimeCounterRelocationEnabled())1113return Addr;11141115Type *Int64Ty = Type::getInt64Ty(M.getContext());1116Function *Fn = I->getParent()->getParent();1117LoadInst *&BiasLI = FunctionToProfileBiasMap[Fn];1118if (!BiasLI) {1119IRBuilder<> EntryBuilder(&Fn->getEntryBlock().front());1120auto *Bias = getOrCreateBiasVar(getInstrProfCounterBiasVarName());1121BiasLI = EntryBuilder.CreateLoad(Int64Ty, Bias, "profc_bias");1122// Bias doesn't change after startup.1123BiasLI->setMetadata(LLVMContext::MD_invariant_load,1124MDNode::get(M.getContext(), std::nullopt));1125}1126auto *Add = Builder.CreateAdd(Builder.CreatePtrToInt(Addr, Int64Ty), BiasLI);1127return Builder.CreateIntToPtr(Add, Addr->getType());1128}11291130/// Create `void [[alwaysinline]] rmw_or(uint8_t *ArgAddr, uint8_t ArgVal)`1131/// "Basic" sequence is `*ArgAddr |= ArgVal`1132Function *InstrLowerer::createRMWOrFunc() {1133auto &Ctx = M.getContext();1134auto *Int8Ty = Type::getInt8Ty(Ctx);1135Function *Fn = Function::Create(1136FunctionType::get(Type::getVoidTy(Ctx),1137{PointerType::getUnqual(Ctx), Int8Ty}, false),1138Function::LinkageTypes::PrivateLinkage, "rmw_or", M);1139Fn->addFnAttr(Attribute::AlwaysInline);1140auto *ArgAddr = Fn->getArg(0);1141auto *ArgVal = Fn->getArg(1);1142IRBuilder<> Builder(BasicBlock::Create(Ctx, "", Fn));11431144// Load profile bitmap byte.1145// %mcdc.bits = load i8, ptr %4, align 11146auto *Bitmap = Builder.CreateLoad(Int8Ty, ArgAddr, "mcdc.bits");11471148if (Options.Atomic || AtomicCounterUpdateAll) {1149// If ((Bitmap & Val) != Val), then execute atomic (Bitmap |= Val).1150// Note, just-loaded Bitmap might not be up-to-date. Use it just for1151// early testing.1152auto *Masked = Builder.CreateAnd(Bitmap, ArgVal);1153auto *ShouldStore = Builder.CreateICmpNE(Masked, ArgVal);1154auto *ThenTerm = BasicBlock::Create(Ctx, "", Fn);1155auto *ElseTerm = BasicBlock::Create(Ctx, "", Fn);1156// Assume updating will be rare.1157auto *Unlikely = MDBuilder(Ctx).createUnlikelyBranchWeights();1158Builder.CreateCondBr(ShouldStore, ThenTerm, ElseTerm, Unlikely);11591160IRBuilder<> ThenBuilder(ThenTerm);1161ThenBuilder.CreateAtomicRMW(AtomicRMWInst::Or, ArgAddr, ArgVal,1162MaybeAlign(), AtomicOrdering::Monotonic);1163ThenBuilder.CreateRetVoid();11641165IRBuilder<> ElseBuilder(ElseTerm);1166ElseBuilder.CreateRetVoid();11671168return Fn;1169}11701171// Perform logical OR of profile bitmap byte and shifted bit offset.1172// %8 = or i8 %mcdc.bits, %71173auto *Result = Builder.CreateOr(Bitmap, ArgVal);11741175// Store the updated profile bitmap byte.1176// store i8 %8, ptr %3, align 11177Builder.CreateStore(Result, ArgAddr);11781179// Terminator1180Builder.CreateRetVoid();11811182return Fn;1183}11841185CallInst *InstrLowerer::getRMWOrCall(Value *Addr, Value *Val) {1186if (!RMWOrFunc)1187RMWOrFunc = createRMWOrFunc();11881189return CallInst::Create(RMWOrFunc, {Addr, Val});1190}11911192Value *InstrLowerer::getBitmapAddress(InstrProfMCDCTVBitmapUpdate *I) {1193auto *Bitmaps = getOrCreateRegionBitmaps(I);1194IRBuilder<> Builder(I);11951196if (isRuntimeCounterRelocationEnabled()) {1197LLVMContext &Ctx = M.getContext();1198Ctx.diagnose(DiagnosticInfoPGOProfile(1199M.getName().data(),1200Twine("Runtime counter relocation is presently not supported for MC/DC "1201"bitmaps."),1202DS_Warning));1203}12041205return Bitmaps;1206}12071208void InstrLowerer::lowerCover(InstrProfCoverInst *CoverInstruction) {1209auto *Addr = getCounterAddress(CoverInstruction);1210IRBuilder<> Builder(CoverInstruction);1211// We store zero to represent that this block is covered.1212Builder.CreateStore(Builder.getInt8(0), Addr);1213CoverInstruction->eraseFromParent();1214}12151216void InstrLowerer::lowerTimestamp(1217InstrProfTimestampInst *TimestampInstruction) {1218assert(TimestampInstruction->getIndex()->isZeroValue() &&1219"timestamp probes are always the first probe for a function");1220auto &Ctx = M.getContext();1221auto *TimestampAddr = getCounterAddress(TimestampInstruction);1222IRBuilder<> Builder(TimestampInstruction);1223auto *CalleeTy =1224FunctionType::get(Type::getVoidTy(Ctx), TimestampAddr->getType(), false);1225auto Callee = M.getOrInsertFunction(1226INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SET_TIMESTAMP), CalleeTy);1227Builder.CreateCall(Callee, {TimestampAddr});1228TimestampInstruction->eraseFromParent();1229}12301231void InstrLowerer::lowerIncrement(InstrProfIncrementInst *Inc) {1232auto *Addr = getCounterAddress(Inc);12331234IRBuilder<> Builder(Inc);1235if (Options.Atomic || AtomicCounterUpdateAll ||1236(Inc->getIndex()->isZeroValue() && AtomicFirstCounter)) {1237Builder.CreateAtomicRMW(AtomicRMWInst::Add, Addr, Inc->getStep(),1238MaybeAlign(), AtomicOrdering::Monotonic);1239} else {1240Value *IncStep = Inc->getStep();1241Value *Load = Builder.CreateLoad(IncStep->getType(), Addr, "pgocount");1242auto *Count = Builder.CreateAdd(Load, Inc->getStep());1243auto *Store = Builder.CreateStore(Count, Addr);1244if (isCounterPromotionEnabled())1245PromotionCandidates.emplace_back(cast<Instruction>(Load), Store);1246}1247Inc->eraseFromParent();1248}12491250void InstrLowerer::lowerCoverageData(GlobalVariable *CoverageNamesVar) {1251ConstantArray *Names =1252cast<ConstantArray>(CoverageNamesVar->getInitializer());1253for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {1254Constant *NC = Names->getOperand(I);1255Value *V = NC->stripPointerCasts();1256assert(isa<GlobalVariable>(V) && "Missing reference to function name");1257GlobalVariable *Name = cast<GlobalVariable>(V);12581259Name->setLinkage(GlobalValue::PrivateLinkage);1260ReferencedNames.push_back(Name);1261if (isa<ConstantExpr>(NC))1262NC->dropAllReferences();1263}1264CoverageNamesVar->eraseFromParent();1265}12661267void InstrLowerer::lowerMCDCTestVectorBitmapUpdate(1268InstrProfMCDCTVBitmapUpdate *Update) {1269IRBuilder<> Builder(Update);1270auto *Int8Ty = Type::getInt8Ty(M.getContext());1271auto *Int32Ty = Type::getInt32Ty(M.getContext());1272auto *MCDCCondBitmapAddr = Update->getMCDCCondBitmapAddr();1273auto *BitmapAddr = getBitmapAddress(Update);12741275// Load Temp Val + BitmapIdx.1276// %mcdc.temp = load i32, ptr %mcdc.addr, align 41277auto *Temp = Builder.CreateAdd(1278Builder.CreateLoad(Int32Ty, MCDCCondBitmapAddr, "mcdc.temp"),1279Update->getBitmapIndex());12801281// Calculate byte offset using div8.1282// %1 = lshr i32 %mcdc.temp, 31283auto *BitmapByteOffset = Builder.CreateLShr(Temp, 0x3);12841285// Add byte offset to section base byte address.1286// %4 = getelementptr inbounds i8, ptr @__profbm_test, i32 %11287auto *BitmapByteAddr =1288Builder.CreateInBoundsPtrAdd(BitmapAddr, BitmapByteOffset);12891290// Calculate bit offset into bitmap byte by using div8 remainder (AND ~8)1291// %5 = and i32 %mcdc.temp, 71292// %6 = trunc i32 %5 to i81293auto *BitToSet = Builder.CreateTrunc(Builder.CreateAnd(Temp, 0x7), Int8Ty);12941295// Shift bit offset left to form a bitmap.1296// %7 = shl i8 1, %61297auto *ShiftedVal = Builder.CreateShl(Builder.getInt8(0x1), BitToSet);12981299Builder.Insert(getRMWOrCall(BitmapByteAddr, ShiftedVal));1300Update->eraseFromParent();1301}13021303/// Get the name of a profiling variable for a particular function.1304static std::string getVarName(InstrProfInstBase *Inc, StringRef Prefix,1305bool &Renamed) {1306StringRef NamePrefix = getInstrProfNameVarPrefix();1307StringRef Name = Inc->getName()->getName().substr(NamePrefix.size());1308Function *F = Inc->getParent()->getParent();1309Module *M = F->getParent();1310if (!DoHashBasedCounterSplit || !isIRPGOFlagSet(M) ||1311!canRenameComdatFunc(*F)) {1312Renamed = false;1313return (Prefix + Name).str();1314}1315Renamed = true;1316uint64_t FuncHash = Inc->getHash()->getZExtValue();1317SmallVector<char, 24> HashPostfix;1318if (Name.ends_with((Twine(".") + Twine(FuncHash)).toStringRef(HashPostfix)))1319return (Prefix + Name).str();1320return (Prefix + Name + "." + Twine(FuncHash)).str();1321}13221323static inline bool shouldRecordFunctionAddr(Function *F) {1324// Only record function addresses if IR PGO is enabled or if clang value1325// profiling is enabled. Recording function addresses greatly increases object1326// file size, because it prevents the inliner from deleting functions that1327// have been inlined everywhere.1328if (!profDataReferencedByCode(*F->getParent()))1329return false;13301331// Check the linkage1332bool HasAvailableExternallyLinkage = F->hasAvailableExternallyLinkage();1333if (!F->hasLinkOnceLinkage() && !F->hasLocalLinkage() &&1334!HasAvailableExternallyLinkage)1335return true;13361337// A function marked 'alwaysinline' with available_externally linkage can't1338// have its address taken. Doing so would create an undefined external ref to1339// the function, which would fail to link.1340if (HasAvailableExternallyLinkage &&1341F->hasFnAttribute(Attribute::AlwaysInline))1342return false;13431344// Prohibit function address recording if the function is both internal and1345// COMDAT. This avoids the profile data variable referencing internal symbols1346// in COMDAT.1347if (F->hasLocalLinkage() && F->hasComdat())1348return false;13491350// Check uses of this function for other than direct calls or invokes to it.1351// Inline virtual functions have linkeOnceODR linkage. When a key method1352// exists, the vtable will only be emitted in the TU where the key method1353// is defined. In a TU where vtable is not available, the function won't1354// be 'addresstaken'. If its address is not recorded here, the profile data1355// with missing address may be picked by the linker leading to missing1356// indirect call target info.1357return F->hasAddressTaken() || F->hasLinkOnceLinkage();1358}13591360static inline bool shouldUsePublicSymbol(Function *Fn) {1361// It isn't legal to make an alias of this function at all1362if (Fn->isDeclarationForLinker())1363return true;13641365// Symbols with local linkage can just use the symbol directly without1366// introducing relocations1367if (Fn->hasLocalLinkage())1368return true;13691370// PGO + ThinLTO + CFI cause duplicate symbols to be introduced due to some1371// unfavorable interaction between the new alias and the alias renaming done1372// in LowerTypeTests under ThinLTO. For comdat functions that would normally1373// be deduplicated, but the renaming scheme ends up preventing renaming, since1374// it creates unique names for each alias, resulting in duplicated symbols. In1375// the future, we should update the CFI related passes to migrate these1376// aliases to the same module as the jump-table they refer to will be defined.1377if (Fn->hasMetadata(LLVMContext::MD_type))1378return true;13791380// For comdat functions, an alias would need the same linkage as the original1381// function and hidden visibility. There is no point in adding an alias with1382// identical linkage an visibility to avoid introducing symbolic relocations.1383if (Fn->hasComdat() &&1384(Fn->getVisibility() == GlobalValue::VisibilityTypes::HiddenVisibility))1385return true;13861387// its OK to use an alias1388return false;1389}13901391static inline Constant *getFuncAddrForProfData(Function *Fn) {1392auto *Int8PtrTy = PointerType::getUnqual(Fn->getContext());1393// Store a nullptr in __llvm_profd, if we shouldn't use a real address1394if (!shouldRecordFunctionAddr(Fn))1395return ConstantPointerNull::get(Int8PtrTy);13961397// If we can't use an alias, we must use the public symbol, even though this1398// may require a symbolic relocation.1399if (shouldUsePublicSymbol(Fn))1400return Fn;14011402// When possible use a private alias to avoid symbolic relocations.1403auto *GA = GlobalAlias::create(GlobalValue::LinkageTypes::PrivateLinkage,1404Fn->getName() + ".local", Fn);14051406// When the instrumented function is a COMDAT function, we cannot use a1407// private alias. If we did, we would create reference to a local label in1408// this function's section. If this version of the function isn't selected by1409// the linker, then the metadata would introduce a reference to a discarded1410// section. So, for COMDAT functions, we need to adjust the linkage of the1411// alias. Using hidden visibility avoids a dynamic relocation and an entry in1412// the dynamic symbol table.1413//1414// Note that this handles COMDAT functions with visibility other than Hidden,1415// since that case is covered in shouldUsePublicSymbol()1416if (Fn->hasComdat()) {1417GA->setLinkage(Fn->getLinkage());1418GA->setVisibility(GlobalValue::VisibilityTypes::HiddenVisibility);1419}14201421// appendToCompilerUsed(*Fn->getParent(), {GA});14221423return GA;1424}14251426static bool needsRuntimeRegistrationOfSectionRange(const Triple &TT) {1427// compiler-rt uses linker support to get data/counters/name start/end for1428// ELF, COFF, Mach-O and XCOFF.1429if (TT.isOSBinFormatELF() || TT.isOSBinFormatCOFF() ||1430TT.isOSBinFormatMachO() || TT.isOSBinFormatXCOFF())1431return false;14321433return true;1434}14351436void InstrLowerer::maybeSetComdat(GlobalVariable *GV, GlobalObject *GO,1437StringRef CounterGroupName) {1438// Place lowered global variables in a comdat group if the associated function1439// or global variable is a COMDAT. This will make sure that only one copy of1440// global variable (e.g. function counters) of the COMDAT function will be1441// emitted after linking.1442bool NeedComdat = needsComdatForCounter(*GO, M);1443bool UseComdat = (NeedComdat || TT.isOSBinFormatELF());14441445if (!UseComdat)1446return;14471448// Keep in mind that this pass may run before the inliner, so we need to1449// create a new comdat group (for counters, profiling data, etc). If we use1450// the comdat of the parent function, that will result in relocations against1451// discarded sections.1452//1453// If the data variable is referenced by code, non-counter variables (notably1454// profiling data) and counters have to be in different comdats for COFF1455// because the Visual C++ linker will report duplicate symbol errors if there1456// are multiple external symbols with the same name marked1457// IMAGE_COMDAT_SELECT_ASSOCIATIVE.1458StringRef GroupName = TT.isOSBinFormatCOFF() && DataReferencedByCode1459? GV->getName()1460: CounterGroupName;1461Comdat *C = M.getOrInsertComdat(GroupName);14621463if (!NeedComdat) {1464// Object file format must be ELF since `UseComdat && !NeedComdat` is true.1465//1466// For ELF, when not using COMDAT, put counters, data and values into a1467// nodeduplicate COMDAT which is lowered to a zero-flag section group. This1468// allows -z start-stop-gc to discard the entire group when the function is1469// discarded.1470C->setSelectionKind(Comdat::NoDeduplicate);1471}1472GV->setComdat(C);1473// COFF doesn't allow the comdat group leader to have private linkage, so1474// upgrade private linkage to internal linkage to produce a symbol table1475// entry.1476if (TT.isOSBinFormatCOFF() && GV->hasPrivateLinkage())1477GV->setLinkage(GlobalValue::InternalLinkage);1478}14791480static inline bool shouldRecordVTableAddr(GlobalVariable *GV) {1481if (!profDataReferencedByCode(*GV->getParent()))1482return false;14831484if (!GV->hasLinkOnceLinkage() && !GV->hasLocalLinkage() &&1485!GV->hasAvailableExternallyLinkage())1486return true;14871488// This avoids the profile data from referencing internal symbols in1489// COMDAT.1490if (GV->hasLocalLinkage() && GV->hasComdat())1491return false;14921493return true;1494}14951496// FIXME: Introduce an internal alias like what's done for functions to reduce1497// the number of relocation entries.1498static inline Constant *getVTableAddrForProfData(GlobalVariable *GV) {1499auto *Int8PtrTy = PointerType::getUnqual(GV->getContext());15001501// Store a nullptr in __profvt_ if a real address shouldn't be used.1502if (!shouldRecordVTableAddr(GV))1503return ConstantPointerNull::get(Int8PtrTy);15041505return ConstantExpr::getBitCast(GV, Int8PtrTy);1506}15071508void InstrLowerer::getOrCreateVTableProfData(GlobalVariable *GV) {1509assert(!DebugInfoCorrelate &&1510"Value profiling is not supported with lightweight instrumentation");1511if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())1512return;15131514// Skip llvm internal global variable or __prof variables.1515if (GV->getName().starts_with("llvm.") ||1516GV->getName().starts_with("__llvm") ||1517GV->getName().starts_with("__prof"))1518return;15191520// VTableProfData already created1521auto It = VTableDataMap.find(GV);1522if (It != VTableDataMap.end() && It->second)1523return;15241525GlobalValue::LinkageTypes Linkage = GV->getLinkage();1526GlobalValue::VisibilityTypes Visibility = GV->getVisibility();15271528// This is to keep consistent with per-function profile data1529// for correctness.1530if (TT.isOSBinFormatXCOFF()) {1531Linkage = GlobalValue::InternalLinkage;1532Visibility = GlobalValue::DefaultVisibility;1533}15341535LLVMContext &Ctx = M.getContext();1536Type *DataTypes[] = {1537#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) LLVMType,1538#include "llvm/ProfileData/InstrProfData.inc"1539#undef INSTR_PROF_VTABLE_DATA1540};15411542auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));15431544// Used by INSTR_PROF_VTABLE_DATA MACRO1545Constant *VTableAddr = getVTableAddrForProfData(GV);1546const std::string PGOVTableName = getPGOName(*GV);1547// Record the length of the vtable. This is needed since vtable pointers1548// loaded from C++ objects might be from the middle of a vtable definition.1549uint32_t VTableSizeVal =1550M.getDataLayout().getTypeAllocSize(GV->getValueType());15511552Constant *DataVals[] = {1553#define INSTR_PROF_VTABLE_DATA(Type, LLVMType, Name, Init) Init,1554#include "llvm/ProfileData/InstrProfData.inc"1555#undef INSTR_PROF_VTABLE_DATA1556};15571558auto *Data =1559new GlobalVariable(M, DataTy, /*constant=*/false, Linkage,1560ConstantStruct::get(DataTy, DataVals),1561getInstrProfVTableVarPrefix() + PGOVTableName);15621563Data->setVisibility(Visibility);1564Data->setSection(getInstrProfSectionName(IPSK_vtab, TT.getObjectFormat()));1565Data->setAlignment(Align(8));15661567maybeSetComdat(Data, GV, Data->getName());15681569VTableDataMap[GV] = Data;15701571ReferencedVTables.push_back(GV);15721573// VTable <Hash, Addr> is used by runtime but not referenced by other1574// sections. Conservatively mark it linker retained.1575UsedVars.push_back(Data);1576}15771578GlobalVariable *InstrLowerer::setupProfileSection(InstrProfInstBase *Inc,1579InstrProfSectKind IPSK) {1580GlobalVariable *NamePtr = Inc->getName();15811582// Match the linkage and visibility of the name global.1583Function *Fn = Inc->getParent()->getParent();1584GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();1585GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();15861587// Use internal rather than private linkage so the counter variable shows up1588// in the symbol table when using debug info for correlation.1589if ((DebugInfoCorrelate ||1590ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) &&1591TT.isOSBinFormatMachO() && Linkage == GlobalValue::PrivateLinkage)1592Linkage = GlobalValue::InternalLinkage;15931594// Due to the limitation of binder as of 2021/09/28, the duplicate weak1595// symbols in the same csect won't be discarded. When there are duplicate weak1596// symbols, we can NOT guarantee that the relocations get resolved to the1597// intended weak symbol, so we can not ensure the correctness of the relative1598// CounterPtr, so we have to use private linkage for counter and data symbols.1599if (TT.isOSBinFormatXCOFF()) {1600Linkage = GlobalValue::PrivateLinkage;1601Visibility = GlobalValue::DefaultVisibility;1602}1603// Move the name variable to the right section.1604bool Renamed;1605GlobalVariable *Ptr;1606StringRef VarPrefix;1607std::string VarName;1608if (IPSK == IPSK_cnts) {1609VarPrefix = getInstrProfCountersVarPrefix();1610VarName = getVarName(Inc, VarPrefix, Renamed);1611InstrProfCntrInstBase *CntrIncrement = dyn_cast<InstrProfCntrInstBase>(Inc);1612Ptr = createRegionCounters(CntrIncrement, VarName, Linkage);1613} else if (IPSK == IPSK_bitmap) {1614VarPrefix = getInstrProfBitmapVarPrefix();1615VarName = getVarName(Inc, VarPrefix, Renamed);1616InstrProfMCDCBitmapInstBase *BitmapUpdate =1617dyn_cast<InstrProfMCDCBitmapInstBase>(Inc);1618Ptr = createRegionBitmaps(BitmapUpdate, VarName, Linkage);1619} else {1620llvm_unreachable("Profile Section must be for Counters or Bitmaps");1621}16221623Ptr->setVisibility(Visibility);1624// Put the counters and bitmaps in their own sections so linkers can1625// remove unneeded sections.1626Ptr->setSection(getInstrProfSectionName(IPSK, TT.getObjectFormat()));1627Ptr->setLinkage(Linkage);1628maybeSetComdat(Ptr, Fn, VarName);1629return Ptr;1630}16311632GlobalVariable *1633InstrLowerer::createRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc,1634StringRef Name,1635GlobalValue::LinkageTypes Linkage) {1636uint64_t NumBytes = Inc->getNumBitmapBytes();1637auto *BitmapTy = ArrayType::get(Type::getInt8Ty(M.getContext()), NumBytes);1638auto GV = new GlobalVariable(M, BitmapTy, false, Linkage,1639Constant::getNullValue(BitmapTy), Name);1640GV->setAlignment(Align(1));1641return GV;1642}16431644GlobalVariable *1645InstrLowerer::getOrCreateRegionBitmaps(InstrProfMCDCBitmapInstBase *Inc) {1646GlobalVariable *NamePtr = Inc->getName();1647auto &PD = ProfileDataMap[NamePtr];1648if (PD.RegionBitmaps)1649return PD.RegionBitmaps;16501651// If RegionBitmaps doesn't already exist, create it by first setting up1652// the corresponding profile section.1653auto *BitmapPtr = setupProfileSection(Inc, IPSK_bitmap);1654PD.RegionBitmaps = BitmapPtr;1655PD.NumBitmapBytes = Inc->getNumBitmapBytes();1656return PD.RegionBitmaps;1657}16581659GlobalVariable *1660InstrLowerer::createRegionCounters(InstrProfCntrInstBase *Inc, StringRef Name,1661GlobalValue::LinkageTypes Linkage) {1662uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();1663auto &Ctx = M.getContext();1664GlobalVariable *GV;1665if (isa<InstrProfCoverInst>(Inc)) {1666auto *CounterTy = Type::getInt8Ty(Ctx);1667auto *CounterArrTy = ArrayType::get(CounterTy, NumCounters);1668// TODO: `Constant::getAllOnesValue()` does not yet accept an array type.1669std::vector<Constant *> InitialValues(NumCounters,1670Constant::getAllOnesValue(CounterTy));1671GV = new GlobalVariable(M, CounterArrTy, false, Linkage,1672ConstantArray::get(CounterArrTy, InitialValues),1673Name);1674GV->setAlignment(Align(1));1675} else {1676auto *CounterTy = ArrayType::get(Type::getInt64Ty(Ctx), NumCounters);1677GV = new GlobalVariable(M, CounterTy, false, Linkage,1678Constant::getNullValue(CounterTy), Name);1679GV->setAlignment(Align(8));1680}1681return GV;1682}16831684GlobalVariable *1685InstrLowerer::getOrCreateRegionCounters(InstrProfCntrInstBase *Inc) {1686GlobalVariable *NamePtr = Inc->getName();1687auto &PD = ProfileDataMap[NamePtr];1688if (PD.RegionCounters)1689return PD.RegionCounters;16901691// If RegionCounters doesn't already exist, create it by first setting up1692// the corresponding profile section.1693auto *CounterPtr = setupProfileSection(Inc, IPSK_cnts);1694PD.RegionCounters = CounterPtr;16951696if (DebugInfoCorrelate ||1697ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO) {1698LLVMContext &Ctx = M.getContext();1699Function *Fn = Inc->getParent()->getParent();1700if (auto *SP = Fn->getSubprogram()) {1701DIBuilder DB(M, true, SP->getUnit());1702Metadata *FunctionNameAnnotation[] = {1703MDString::get(Ctx, InstrProfCorrelator::FunctionNameAttributeName),1704MDString::get(Ctx, getPGOFuncNameVarInitializer(NamePtr)),1705};1706Metadata *CFGHashAnnotation[] = {1707MDString::get(Ctx, InstrProfCorrelator::CFGHashAttributeName),1708ConstantAsMetadata::get(Inc->getHash()),1709};1710Metadata *NumCountersAnnotation[] = {1711MDString::get(Ctx, InstrProfCorrelator::NumCountersAttributeName),1712ConstantAsMetadata::get(Inc->getNumCounters()),1713};1714auto Annotations = DB.getOrCreateArray({1715MDNode::get(Ctx, FunctionNameAnnotation),1716MDNode::get(Ctx, CFGHashAnnotation),1717MDNode::get(Ctx, NumCountersAnnotation),1718});1719auto *DICounter = DB.createGlobalVariableExpression(1720SP, CounterPtr->getName(), /*LinkageName=*/StringRef(), SP->getFile(),1721/*LineNo=*/0, DB.createUnspecifiedType("Profile Data Type"),1722CounterPtr->hasLocalLinkage(), /*IsDefined=*/true, /*Expr=*/nullptr,1723/*Decl=*/nullptr, /*TemplateParams=*/nullptr, /*AlignInBits=*/0,1724Annotations);1725CounterPtr->addDebugInfo(DICounter);1726DB.finalize();1727}17281729// Mark the counter variable as used so that it isn't optimized out.1730CompilerUsedVars.push_back(PD.RegionCounters);1731}17321733// Create the data variable (if it doesn't already exist).1734createDataVariable(Inc);17351736return PD.RegionCounters;1737}17381739void InstrLowerer::createDataVariable(InstrProfCntrInstBase *Inc) {1740// When debug information is correlated to profile data, a data variable1741// is not needed.1742if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)1743return;17441745GlobalVariable *NamePtr = Inc->getName();1746auto &PD = ProfileDataMap[NamePtr];17471748// Return if data variable was already created.1749if (PD.DataVar)1750return;17511752LLVMContext &Ctx = M.getContext();17531754Function *Fn = Inc->getParent()->getParent();1755GlobalValue::LinkageTypes Linkage = NamePtr->getLinkage();1756GlobalValue::VisibilityTypes Visibility = NamePtr->getVisibility();17571758// Due to the limitation of binder as of 2021/09/28, the duplicate weak1759// symbols in the same csect won't be discarded. When there are duplicate weak1760// symbols, we can NOT guarantee that the relocations get resolved to the1761// intended weak symbol, so we can not ensure the correctness of the relative1762// CounterPtr, so we have to use private linkage for counter and data symbols.1763if (TT.isOSBinFormatXCOFF()) {1764Linkage = GlobalValue::PrivateLinkage;1765Visibility = GlobalValue::DefaultVisibility;1766}17671768bool NeedComdat = needsComdatForCounter(*Fn, M);1769bool Renamed;17701771// The Data Variable section is anchored to profile counters.1772std::string CntsVarName =1773getVarName(Inc, getInstrProfCountersVarPrefix(), Renamed);1774std::string DataVarName =1775getVarName(Inc, getInstrProfDataVarPrefix(), Renamed);17761777auto *Int8PtrTy = PointerType::getUnqual(Ctx);1778// Allocate statically the array of pointers to value profile nodes for1779// the current function.1780Constant *ValuesPtrExpr = ConstantPointerNull::get(Int8PtrTy);1781uint64_t NS = 0;1782for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)1783NS += PD.NumValueSites[Kind];1784if (NS > 0 && ValueProfileStaticAlloc &&1785!needsRuntimeRegistrationOfSectionRange(TT)) {1786ArrayType *ValuesTy = ArrayType::get(Type::getInt64Ty(Ctx), NS);1787auto *ValuesVar = new GlobalVariable(1788M, ValuesTy, false, Linkage, Constant::getNullValue(ValuesTy),1789getVarName(Inc, getInstrProfValuesVarPrefix(), Renamed));1790ValuesVar->setVisibility(Visibility);1791setGlobalVariableLargeSection(TT, *ValuesVar);1792ValuesVar->setSection(1793getInstrProfSectionName(IPSK_vals, TT.getObjectFormat()));1794ValuesVar->setAlignment(Align(8));1795maybeSetComdat(ValuesVar, Fn, CntsVarName);1796ValuesPtrExpr = ValuesVar;1797}17981799uint64_t NumCounters = Inc->getNumCounters()->getZExtValue();1800auto *CounterPtr = PD.RegionCounters;18011802uint64_t NumBitmapBytes = PD.NumBitmapBytes;18031804// Create data variable.1805auto *IntPtrTy = M.getDataLayout().getIntPtrType(M.getContext());1806auto *Int16Ty = Type::getInt16Ty(Ctx);1807auto *Int16ArrayTy = ArrayType::get(Int16Ty, IPVK_Last + 1);1808Type *DataTypes[] = {1809#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) LLVMType,1810#include "llvm/ProfileData/InstrProfData.inc"1811};1812auto *DataTy = StructType::get(Ctx, ArrayRef(DataTypes));18131814Constant *FunctionAddr = getFuncAddrForProfData(Fn);18151816Constant *Int16ArrayVals[IPVK_Last + 1];1817for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)1818Int16ArrayVals[Kind] = ConstantInt::get(Int16Ty, PD.NumValueSites[Kind]);18191820// If the data variable is not referenced by code (if we don't emit1821// @llvm.instrprof.value.profile, NS will be 0), and the counter keeps the1822// data variable live under linker GC, the data variable can be private. This1823// optimization applies to ELF.1824//1825// On COFF, a comdat leader cannot be local so we require DataReferencedByCode1826// to be false.1827//1828// If profd is in a deduplicate comdat, NS==0 with a hash suffix guarantees1829// that other copies must have the same CFG and cannot have value profiling.1830// If no hash suffix, other profd copies may be referenced by code.1831if (NS == 0 && !(DataReferencedByCode && NeedComdat && !Renamed) &&1832(TT.isOSBinFormatELF() ||1833(!DataReferencedByCode && TT.isOSBinFormatCOFF()))) {1834Linkage = GlobalValue::PrivateLinkage;1835Visibility = GlobalValue::DefaultVisibility;1836}1837auto *Data =1838new GlobalVariable(M, DataTy, false, Linkage, nullptr, DataVarName);1839Constant *RelativeCounterPtr;1840GlobalVariable *BitmapPtr = PD.RegionBitmaps;1841Constant *RelativeBitmapPtr = ConstantInt::get(IntPtrTy, 0);1842InstrProfSectKind DataSectionKind;1843// With binary profile correlation, profile data is not loaded into memory.1844// profile data must reference profile counter with an absolute relocation.1845if (ProfileCorrelate == InstrProfCorrelator::BINARY) {1846DataSectionKind = IPSK_covdata;1847RelativeCounterPtr = ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy);1848if (BitmapPtr != nullptr)1849RelativeBitmapPtr = ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy);1850} else {1851// Reference the counter variable with a label difference (link-time1852// constant).1853DataSectionKind = IPSK_data;1854RelativeCounterPtr =1855ConstantExpr::getSub(ConstantExpr::getPtrToInt(CounterPtr, IntPtrTy),1856ConstantExpr::getPtrToInt(Data, IntPtrTy));1857if (BitmapPtr != nullptr)1858RelativeBitmapPtr =1859ConstantExpr::getSub(ConstantExpr::getPtrToInt(BitmapPtr, IntPtrTy),1860ConstantExpr::getPtrToInt(Data, IntPtrTy));1861}18621863Constant *DataVals[] = {1864#define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Init,1865#include "llvm/ProfileData/InstrProfData.inc"1866};1867Data->setInitializer(ConstantStruct::get(DataTy, DataVals));18681869Data->setVisibility(Visibility);1870Data->setSection(1871getInstrProfSectionName(DataSectionKind, TT.getObjectFormat()));1872Data->setAlignment(Align(INSTR_PROF_DATA_ALIGNMENT));1873maybeSetComdat(Data, Fn, CntsVarName);18741875PD.DataVar = Data;18761877// Mark the data variable as used so that it isn't stripped out.1878CompilerUsedVars.push_back(Data);1879// Now that the linkage set by the FE has been passed to the data and counter1880// variables, reset Name variable's linkage and visibility to private so that1881// it can be removed later by the compiler.1882NamePtr->setLinkage(GlobalValue::PrivateLinkage);1883// Collect the referenced names to be used by emitNameData.1884ReferencedNames.push_back(NamePtr);1885}18861887void InstrLowerer::emitVNodes() {1888if (!ValueProfileStaticAlloc)1889return;18901891// For now only support this on platforms that do1892// not require runtime registration to discover1893// named section start/end.1894if (needsRuntimeRegistrationOfSectionRange(TT))1895return;18961897size_t TotalNS = 0;1898for (auto &PD : ProfileDataMap) {1899for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)1900TotalNS += PD.second.NumValueSites[Kind];1901}19021903if (!TotalNS)1904return;19051906uint64_t NumCounters = TotalNS * NumCountersPerValueSite;1907// Heuristic for small programs with very few total value sites.1908// The default value of vp-counters-per-site is chosen based on1909// the observation that large apps usually have a low percentage1910// of value sites that actually have any profile data, and thus1911// the average number of counters per site is low. For small1912// apps with very few sites, this may not be true. Bump up the1913// number of counters in this case.1914#define INSTR_PROF_MIN_VAL_COUNTS 101915if (NumCounters < INSTR_PROF_MIN_VAL_COUNTS)1916NumCounters = std::max(INSTR_PROF_MIN_VAL_COUNTS, (int)NumCounters * 2);19171918auto &Ctx = M.getContext();1919Type *VNodeTypes[] = {1920#define INSTR_PROF_VALUE_NODE(Type, LLVMType, Name, Init) LLVMType,1921#include "llvm/ProfileData/InstrProfData.inc"1922};1923auto *VNodeTy = StructType::get(Ctx, ArrayRef(VNodeTypes));19241925ArrayType *VNodesTy = ArrayType::get(VNodeTy, NumCounters);1926auto *VNodesVar = new GlobalVariable(1927M, VNodesTy, false, GlobalValue::PrivateLinkage,1928Constant::getNullValue(VNodesTy), getInstrProfVNodesVarName());1929setGlobalVariableLargeSection(TT, *VNodesVar);1930VNodesVar->setSection(1931getInstrProfSectionName(IPSK_vnodes, TT.getObjectFormat()));1932VNodesVar->setAlignment(M.getDataLayout().getABITypeAlign(VNodesTy));1933// VNodesVar is used by runtime but not referenced via relocation by other1934// sections. Conservatively make it linker retained.1935UsedVars.push_back(VNodesVar);1936}19371938void InstrLowerer::emitNameData() {1939std::string UncompressedData;19401941if (ReferencedNames.empty())1942return;19431944std::string CompressedNameStr;1945if (Error E = collectPGOFuncNameStrings(ReferencedNames, CompressedNameStr,1946DoInstrProfNameCompression)) {1947report_fatal_error(Twine(toString(std::move(E))), false);1948}19491950auto &Ctx = M.getContext();1951auto *NamesVal =1952ConstantDataArray::getString(Ctx, StringRef(CompressedNameStr), false);1953NamesVar = new GlobalVariable(M, NamesVal->getType(), true,1954GlobalValue::PrivateLinkage, NamesVal,1955getInstrProfNamesVarName());1956NamesSize = CompressedNameStr.size();1957setGlobalVariableLargeSection(TT, *NamesVar);1958NamesVar->setSection(1959ProfileCorrelate == InstrProfCorrelator::BINARY1960? getInstrProfSectionName(IPSK_covname, TT.getObjectFormat())1961: getInstrProfSectionName(IPSK_name, TT.getObjectFormat()));1962// On COFF, it's important to reduce the alignment down to 1 to prevent the1963// linker from inserting padding before the start of the names section or1964// between names entries.1965NamesVar->setAlignment(Align(1));1966// NamesVar is used by runtime but not referenced via relocation by other1967// sections. Conservatively make it linker retained.1968UsedVars.push_back(NamesVar);19691970for (auto *NamePtr : ReferencedNames)1971NamePtr->eraseFromParent();1972}19731974void InstrLowerer::emitVTableNames() {1975if (!EnableVTableValueProfiling || ReferencedVTables.empty())1976return;19771978// Collect the PGO names of referenced vtables and compress them.1979std::string CompressedVTableNames;1980if (Error E = collectVTableStrings(ReferencedVTables, CompressedVTableNames,1981DoInstrProfNameCompression)) {1982report_fatal_error(Twine(toString(std::move(E))), false);1983}19841985auto &Ctx = M.getContext();1986auto *VTableNamesVal = ConstantDataArray::getString(1987Ctx, StringRef(CompressedVTableNames), false /* AddNull */);1988GlobalVariable *VTableNamesVar =1989new GlobalVariable(M, VTableNamesVal->getType(), true /* constant */,1990GlobalValue::PrivateLinkage, VTableNamesVal,1991getInstrProfVTableNamesVarName());1992VTableNamesVar->setSection(1993getInstrProfSectionName(IPSK_vname, TT.getObjectFormat()));1994VTableNamesVar->setAlignment(Align(1));1995// Make VTableNames linker retained.1996UsedVars.push_back(VTableNamesVar);1997}19981999void InstrLowerer::emitRegistration() {2000if (!needsRuntimeRegistrationOfSectionRange(TT))2001return;20022003// Construct the function.2004auto *VoidTy = Type::getVoidTy(M.getContext());2005auto *VoidPtrTy = PointerType::getUnqual(M.getContext());2006auto *Int64Ty = Type::getInt64Ty(M.getContext());2007auto *RegisterFTy = FunctionType::get(VoidTy, false);2008auto *RegisterF = Function::Create(RegisterFTy, GlobalValue::InternalLinkage,2009getInstrProfRegFuncsName(), M);2010RegisterF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);2011if (Options.NoRedZone)2012RegisterF->addFnAttr(Attribute::NoRedZone);20132014auto *RuntimeRegisterTy = FunctionType::get(VoidTy, VoidPtrTy, false);2015auto *RuntimeRegisterF =2016Function::Create(RuntimeRegisterTy, GlobalVariable::ExternalLinkage,2017getInstrProfRegFuncName(), M);20182019IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", RegisterF));2020for (Value *Data : CompilerUsedVars)2021if (!isa<Function>(Data))2022IRB.CreateCall(RuntimeRegisterF, Data);2023for (Value *Data : UsedVars)2024if (Data != NamesVar && !isa<Function>(Data))2025IRB.CreateCall(RuntimeRegisterF, Data);20262027if (NamesVar) {2028Type *ParamTypes[] = {VoidPtrTy, Int64Ty};2029auto *NamesRegisterTy =2030FunctionType::get(VoidTy, ArrayRef(ParamTypes), false);2031auto *NamesRegisterF =2032Function::Create(NamesRegisterTy, GlobalVariable::ExternalLinkage,2033getInstrProfNamesRegFuncName(), M);2034IRB.CreateCall(NamesRegisterF, {NamesVar, IRB.getInt64(NamesSize)});2035}20362037IRB.CreateRetVoid();2038}20392040bool InstrLowerer::emitRuntimeHook() {2041// We expect the linker to be invoked with -u<hook_var> flag for Linux2042// in which case there is no need to emit the external variable.2043if (TT.isOSLinux() || TT.isOSAIX())2044return false;20452046// If the module's provided its own runtime, we don't need to do anything.2047if (M.getGlobalVariable(getInstrProfRuntimeHookVarName()))2048return false;20492050// Declare an external variable that will pull in the runtime initialization.2051auto *Int32Ty = Type::getInt32Ty(M.getContext());2052auto *Var =2053new GlobalVariable(M, Int32Ty, false, GlobalValue::ExternalLinkage,2054nullptr, getInstrProfRuntimeHookVarName());2055Var->setVisibility(GlobalValue::HiddenVisibility);20562057if (TT.isOSBinFormatELF() && !TT.isPS()) {2058// Mark the user variable as used so that it isn't stripped out.2059CompilerUsedVars.push_back(Var);2060} else {2061// Make a function that uses it.2062auto *User = Function::Create(FunctionType::get(Int32Ty, false),2063GlobalValue::LinkOnceODRLinkage,2064getInstrProfRuntimeHookVarUseFuncName(), M);2065User->addFnAttr(Attribute::NoInline);2066if (Options.NoRedZone)2067User->addFnAttr(Attribute::NoRedZone);2068User->setVisibility(GlobalValue::HiddenVisibility);2069if (TT.supportsCOMDAT())2070User->setComdat(M.getOrInsertComdat(User->getName()));20712072IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", User));2073auto *Load = IRB.CreateLoad(Int32Ty, Var);2074IRB.CreateRet(Load);20752076// Mark the function as used so that it isn't stripped out.2077CompilerUsedVars.push_back(User);2078}2079return true;2080}20812082void InstrLowerer::emitUses() {2083// The metadata sections are parallel arrays. Optimizers (e.g.2084// GlobalOpt/ConstantMerge) may not discard associated sections as a unit, so2085// we conservatively retain all unconditionally in the compiler.2086//2087// On ELF and Mach-O, the linker can guarantee the associated sections will be2088// retained or discarded as a unit, so llvm.compiler.used is sufficient.2089// Similarly on COFF, if prof data is not referenced by code we use one comdat2090// and ensure this GC property as well. Otherwise, we have to conservatively2091// make all of the sections retained by the linker.2092if (TT.isOSBinFormatELF() || TT.isOSBinFormatMachO() ||2093(TT.isOSBinFormatCOFF() && !DataReferencedByCode))2094appendToCompilerUsed(M, CompilerUsedVars);2095else2096appendToUsed(M, CompilerUsedVars);20972098// We do not add proper references from used metadata sections to NamesVar and2099// VNodesVar, so we have to be conservative and place them in llvm.used2100// regardless of the target,2101appendToUsed(M, UsedVars);2102}21032104void InstrLowerer::emitInitialization() {2105// Create ProfileFileName variable. Don't don't this for the2106// context-sensitive instrumentation lowering: This lowering is after2107// LTO/ThinLTO linking. Pass PGOInstrumentationGenCreateVar should2108// have already create the variable before LTO/ThinLTO linking.2109if (!IsCS)2110createProfileFileNameVar(M, Options.InstrProfileOutput);2111Function *RegisterF = M.getFunction(getInstrProfRegFuncsName());2112if (!RegisterF)2113return;21142115// Create the initialization function.2116auto *VoidTy = Type::getVoidTy(M.getContext());2117auto *F = Function::Create(FunctionType::get(VoidTy, false),2118GlobalValue::InternalLinkage,2119getInstrProfInitFuncName(), M);2120F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);2121F->addFnAttr(Attribute::NoInline);2122if (Options.NoRedZone)2123F->addFnAttr(Attribute::NoRedZone);21242125// Add the basic block and the necessary calls.2126IRBuilder<> IRB(BasicBlock::Create(M.getContext(), "", F));2127IRB.CreateCall(RegisterF, {});2128IRB.CreateRetVoid();21292130appendToGlobalCtors(M, F, 0);2131}21322133namespace llvm {2134// Create the variable for profile sampling.2135void createProfileSamplingVar(Module &M) {2136const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_SAMPLING_VAR));2137IntegerType *SamplingVarTy;2138Constant *ValueZero;2139if (SampledInstrPeriod.getValue() <= USHRT_MAX) {2140SamplingVarTy = Type::getInt16Ty(M.getContext());2141ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(16, 0));2142} else {2143SamplingVarTy = Type::getInt32Ty(M.getContext());2144ValueZero = Constant::getIntegerValue(SamplingVarTy, APInt(32, 0));2145}2146auto SamplingVar = new GlobalVariable(2147M, SamplingVarTy, false, GlobalValue::WeakAnyLinkage, ValueZero, VarName);2148SamplingVar->setVisibility(GlobalValue::DefaultVisibility);2149SamplingVar->setThreadLocal(true);2150Triple TT(M.getTargetTriple());2151if (TT.supportsCOMDAT()) {2152SamplingVar->setLinkage(GlobalValue::ExternalLinkage);2153SamplingVar->setComdat(M.getOrInsertComdat(VarName));2154}2155appendToCompilerUsed(M, SamplingVar);2156}2157} // namespace llvm215821592160