Path: blob/main/contrib/llvm-project/llvm/lib/Analysis/CtxProfAnalysis.cpp
213765 views
//===- CtxProfAnalysis.cpp - contextual profile analysis ------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// Implementation of the contextual profile analysis, which maintains contextual9// profiling info through IPO passes.10//11//===----------------------------------------------------------------------===//1213#include "llvm/Analysis/CtxProfAnalysis.h"14#include "llvm/ADT/APInt.h"15#include "llvm/ADT/STLExtras.h"16#include "llvm/Analysis/CFG.h"17#include "llvm/IR/Analysis.h"18#include "llvm/IR/Dominators.h"19#include "llvm/IR/IntrinsicInst.h"20#include "llvm/IR/Module.h"21#include "llvm/IR/PassManager.h"22#include "llvm/ProfileData/PGOCtxProfReader.h"23#include "llvm/Support/CommandLine.h"24#include "llvm/Support/MemoryBuffer.h"25#include "llvm/Support/Path.h"26#include <deque>27#include <memory>2829#define DEBUG_TYPE "ctx_prof"3031using namespace llvm;32cl::opt<std::string>33UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden,34cl::desc("Use the specified contextual profile file"));3536static cl::opt<CtxProfAnalysisPrinterPass::PrintMode> PrintLevel(37"ctx-profile-printer-level",38cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden,39cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything,40"everything", "print everything - most verbose"),41clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml",42"just the yaml representation of the profile")),43cl::desc("Verbosity level of the contextual profile printer pass."));4445static cl::opt<bool> ForceIsInSpecializedModule(46"ctx-profile-force-is-specialized", cl::init(false),47cl::desc("Treat the given module as-if it were containing the "48"post-thinlink module containing the root"));4950const char *AssignGUIDPass::GUIDMetadataName = "guid";5152namespace llvm {53class ProfileAnnotatorImpl final {54friend class ProfileAnnotator;55class BBInfo;56struct EdgeInfo {57BBInfo *const Src;58BBInfo *const Dest;59std::optional<uint64_t> Count;6061explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}62};6364class BBInfo {65std::optional<uint64_t> Count;66// OutEdges is dimensioned to match the number of terminator operands.67// Entries in the vector match the index in the terminator operand list. In68// some cases - see `shouldExcludeEdge` and its implementation - an entry69// will be nullptr.70// InEdges doesn't have the above constraint.71SmallVector<EdgeInfo *> OutEdges;72SmallVector<EdgeInfo *> InEdges;73size_t UnknownCountOutEdges = 0;74size_t UnknownCountInEdges = 0;7576// Pass AssumeAllKnown when we try to propagate counts from edges to BBs -77// because all the edge counters must be known.78// Return std::nullopt if there were no edges to sum. The user can decide79// how to interpret that.80std::optional<uint64_t> getEdgeSum(const SmallVector<EdgeInfo *> &Edges,81bool AssumeAllKnown) const {82std::optional<uint64_t> Sum;83for (const auto *E : Edges) {84// `Edges` may be `OutEdges`, case in which `E` could be nullptr.85if (E) {86if (!Sum.has_value())87Sum = 0;88*Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U));89}90}91return Sum;92}9394bool computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {95assert(!Count.has_value());96Count = getEdgeSum(Edges, true);97return Count.has_value();98}99100void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {101uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U);102uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U;103EdgeInfo *E = nullptr;104for (auto *I : Edges)105if (I && !I->Count.has_value()) {106E = I;107#ifdef NDEBUG108break;109#else110assert((!E || E == I) &&111"Expected exactly one edge to have an unknown count, "112"found a second one");113continue;114#endif115}116assert(E && "Expected exactly one edge to have an unknown count");117assert(!E->Count.has_value());118E->Count = EdgeVal;119assert(E->Src->UnknownCountOutEdges > 0);120assert(E->Dest->UnknownCountInEdges > 0);121--E->Src->UnknownCountOutEdges;122--E->Dest->UnknownCountInEdges;123}124125public:126BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional<uint64_t> Count)127: Count(Count) {128// For in edges, we just want to pre-allocate enough space, since we know129// it at this stage. For out edges, we will insert edges at the indices130// corresponding to positions in this BB's terminator instruction, so we131// construct a default (nullptr values)-initialized vector. A nullptr edge132// corresponds to those that are excluded (see shouldExcludeEdge).133InEdges.reserve(NumInEdges);134OutEdges.resize(NumOutEdges);135}136137bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) {138if (!UnknownCountOutEdges) {139return computeCountFrom(OutEdges);140}141return false;142}143144bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) {145if (!UnknownCountInEdges) {146return computeCountFrom(InEdges);147}148return false;149}150151void addInEdge(EdgeInfo &Info) {152InEdges.push_back(&Info);153++UnknownCountInEdges;154}155156// For the out edges, we care about the position we place them in, which is157// the position in terminator instruction's list (at construction). Later,158// we build branch_weights metadata with edge frequency values matching159// these positions.160void addOutEdge(size_t Index, EdgeInfo &Info) {161OutEdges[Index] = &Info;162++UnknownCountOutEdges;163}164165bool hasCount() const { return Count.has_value(); }166167uint64_t getCount() const { return *Count; }168169bool trySetSingleUnknownInEdgeCount() {170if (UnknownCountInEdges == 1) {171setSingleUnknownEdgeCount(InEdges);172return true;173}174return false;175}176177bool trySetSingleUnknownOutEdgeCount() {178if (UnknownCountOutEdges == 1) {179setSingleUnknownEdgeCount(OutEdges);180return true;181}182return false;183}184size_t getNumOutEdges() const { return OutEdges.size(); }185186uint64_t getEdgeCount(size_t Index) const {187if (auto *E = OutEdges[Index])188return *E->Count;189return 0U;190}191};192193const Function &F;194ArrayRef<uint64_t> Counters;195// To be accessed through getBBInfo() after construction.196std::map<const BasicBlock *, BBInfo> BBInfos;197std::vector<EdgeInfo> EdgeInfos;198199// The only criteria for exclusion is faux suspend -> exit edges in presplit200// coroutines. The API serves for readability, currently.201bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const {202return llvm::isPresplitCoroSuspendExitEdge(Src, Dest);203}204205BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; }206207const BBInfo &getBBInfo(const BasicBlock &BB) const {208return BBInfos.find(&BB)->second;209}210211// validation function after we propagate the counters: all BBs and edges'212// counters must have a value.213bool allCountersAreAssigned() const {214for (const auto &BBInfo : BBInfos)215if (!BBInfo.second.hasCount())216return false;217for (const auto &EdgeInfo : EdgeInfos)218if (!EdgeInfo.Count.has_value())219return false;220return true;221}222223/// Check that all paths from the entry basic block that use edges with224/// non-zero counts arrive at a basic block with no successors (i.e. "exit")225bool allTakenPathsExit() const {226std::deque<const BasicBlock *> Worklist;227DenseSet<const BasicBlock *> Visited;228Worklist.push_back(&F.getEntryBlock());229bool HitExit = false;230while (!Worklist.empty()) {231const auto *BB = Worklist.front();232Worklist.pop_front();233if (!Visited.insert(BB).second)234continue;235if (succ_size(BB) == 0) {236if (isa<UnreachableInst>(BB->getTerminator()))237return false;238HitExit = true;239continue;240}241if (succ_size(BB) == 1) {242Worklist.push_back(BB->getUniqueSuccessor());243continue;244}245const auto &BBInfo = getBBInfo(*BB);246bool HasAWayOut = false;247for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) {248const auto *Succ = BB->getTerminator()->getSuccessor(I);249if (!shouldExcludeEdge(*BB, *Succ)) {250if (BBInfo.getEdgeCount(I) > 0) {251HasAWayOut = true;252Worklist.push_back(Succ);253}254}255}256if (!HasAWayOut)257return false;258}259return HitExit;260}261262bool allNonColdSelectsHaveProfile() const {263for (const auto &BB : F) {264if (getBBInfo(BB).getCount() > 0) {265for (const auto &I : BB) {266if (const auto *SI = dyn_cast<SelectInst>(&I)) {267if (const auto *Inst = CtxProfAnalysis::getSelectInstrumentation(268*const_cast<SelectInst *>(SI))) {269auto Index = Inst->getIndex()->getZExtValue();270assert(Index < Counters.size());271if (Counters[Index] == 0)272return false;273}274}275}276}277}278return true;279}280281// This is an adaptation of PGOUseFunc::populateCounters.282// FIXME(mtrofin): look into factoring the code to share one implementation.283void propagateCounterValues() {284bool KeepGoing = true;285while (KeepGoing) {286KeepGoing = false;287for (const auto &BB : F) {288auto &Info = getBBInfo(BB);289if (!Info.hasCount())290KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) ||291Info.tryTakeCountFromKnownInEdges(BB);292if (Info.hasCount()) {293KeepGoing |= Info.trySetSingleUnknownOutEdgeCount();294KeepGoing |= Info.trySetSingleUnknownInEdgeCount();295}296}297}298assert(allCountersAreAssigned() &&299"[ctx-prof] Expected all counters have been assigned.");300assert(allTakenPathsExit() &&301"[ctx-prof] Encountered a BB with more than one successor, where "302"all outgoing edges have a 0 count. This occurs in non-exiting "303"functions (message pumps, usually) which are not supported in the "304"contextual profiling case");305assert(allNonColdSelectsHaveProfile() &&306"[ctx-prof] All non-cold select instructions were expected to have "307"a profile.");308}309310public:311ProfileAnnotatorImpl(const Function &F, ArrayRef<uint64_t> Counters)312: F(F), Counters(Counters) {313assert(!F.isDeclaration());314assert(!Counters.empty());315size_t NrEdges = 0;316for (const auto &BB : F) {317std::optional<uint64_t> Count;318if (auto *Ins = CtxProfAnalysis::getBBInstrumentation(319const_cast<BasicBlock &>(BB))) {320auto Index = Ins->getIndex()->getZExtValue();321assert(Index < Counters.size() &&322"The index must be inside the counters vector by construction - "323"tripping this assertion indicates a bug in how the contextual "324"profile is managed by IPO transforms");325(void)Index;326Count = Counters[Ins->getIndex()->getZExtValue()];327} else if (isa<UnreachableInst>(BB.getTerminator())) {328// The program presumably didn't crash.329Count = 0;330}331auto [It, Ins] =332BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}});333(void)Ins;334assert(Ins && "We iterate through the function's BBs, no reason to "335"insert one more than once");336NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) {337return !shouldExcludeEdge(BB, *Succ);338});339}340// Pre-allocate the vector, we want references to its contents to be stable.341EdgeInfos.reserve(NrEdges);342for (const auto &BB : F) {343auto &Info = getBBInfo(BB);344for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) {345const auto *Succ = BB.getTerminator()->getSuccessor(I);346if (!shouldExcludeEdge(BB, *Succ)) {347auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));348Info.addOutEdge(I, EI);349getBBInfo(*Succ).addInEdge(EI);350}351}352}353assert(EdgeInfos.capacity() == NrEdges &&354"The capacity of EdgeInfos should have stayed unchanged it was "355"populated, because we need pointers to its contents to be stable");356propagateCounterValues();357}358359uint64_t getBBCount(const BasicBlock &BB) { return getBBInfo(BB).getCount(); }360};361362} // namespace llvm363364ProfileAnnotator::ProfileAnnotator(const Function &F,365ArrayRef<uint64_t> RawCounters)366: PImpl(std::make_unique<ProfileAnnotatorImpl>(F, RawCounters)) {}367368ProfileAnnotator::~ProfileAnnotator() = default;369370uint64_t ProfileAnnotator::getBBCount(const BasicBlock &BB) const {371return PImpl->getBBCount(BB);372}373374bool ProfileAnnotator::getSelectInstrProfile(SelectInst &SI,375uint64_t &TrueCount,376uint64_t &FalseCount) const {377const auto &BBInfo = PImpl->getBBInfo(*SI.getParent());378TrueCount = FalseCount = 0;379if (BBInfo.getCount() == 0)380return false;381382auto *Step = CtxProfAnalysis::getSelectInstrumentation(SI);383if (!Step)384return false;385auto Index = Step->getIndex()->getZExtValue();386assert(Index < PImpl->Counters.size() &&387"The index of the step instruction must be inside the "388"counters vector by "389"construction - tripping this assertion indicates a bug in "390"how the contextual profile is managed by IPO transforms");391auto TotalCount = BBInfo.getCount();392TrueCount = PImpl->Counters[Index];393FalseCount = (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);394return true;395}396397bool ProfileAnnotator::getOutgoingBranchWeights(398BasicBlock &BB, SmallVectorImpl<uint64_t> &Profile,399uint64_t &MaxCount) const {400Profile.clear();401402if (succ_size(&BB) < 2)403return false;404405auto *Term = BB.getTerminator();406Profile.resize(Term->getNumSuccessors());407408const auto &BBInfo = PImpl->getBBInfo(BB);409MaxCount = 0;410for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;411++SuccIdx) {412uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);413if (EdgeCount > MaxCount)414MaxCount = EdgeCount;415Profile[SuccIdx] = EdgeCount;416}417return MaxCount > 0;418}419420PreservedAnalyses AssignGUIDPass::run(Module &M, ModuleAnalysisManager &MAM) {421for (auto &F : M.functions()) {422if (F.isDeclaration())423continue;424if (F.getMetadata(GUIDMetadataName))425continue;426const GlobalValue::GUID GUID = F.getGUID();427F.setMetadata(GUIDMetadataName,428MDNode::get(M.getContext(),429{ConstantAsMetadata::get(ConstantInt::get(430Type::getInt64Ty(M.getContext()), GUID))}));431}432return PreservedAnalyses::none();433}434435GlobalValue::GUID AssignGUIDPass::getGUID(const Function &F) {436if (F.isDeclaration()) {437assert(GlobalValue::isExternalLinkage(F.getLinkage()));438return F.getGUID();439}440auto *MD = F.getMetadata(GUIDMetadataName);441assert(MD && "guid not found for defined function");442return cast<ConstantInt>(cast<ConstantAsMetadata>(MD->getOperand(0))443->getValue()444->stripPointerCasts())445->getZExtValue();446}447AnalysisKey CtxProfAnalysis::Key;448449CtxProfAnalysis::CtxProfAnalysis(std::optional<StringRef> Profile)450: Profile([&]() -> std::optional<StringRef> {451if (Profile)452return *Profile;453if (UseCtxProfile.getNumOccurrences())454return UseCtxProfile;455return std::nullopt;456}()) {}457458PGOContextualProfile CtxProfAnalysis::run(Module &M,459ModuleAnalysisManager &MAM) {460if (!Profile)461return {};462ErrorOr<std::unique_ptr<MemoryBuffer>> MB = MemoryBuffer::getFile(*Profile);463if (auto EC = MB.getError()) {464M.getContext().emitError("could not open contextual profile file: " +465EC.message());466return {};467}468PGOCtxProfileReader Reader(MB.get()->getBuffer());469auto MaybeProfiles = Reader.loadProfiles();470if (!MaybeProfiles) {471M.getContext().emitError("contextual profile file is invalid: " +472toString(MaybeProfiles.takeError()));473return {};474}475476// FIXME: We should drive this from ThinLTO, but for the time being, use the477// module name as indicator.478// We want to *only* keep the contextual profiles in modules that capture479// context trees. That allows us to compute specific PSIs, for example.480auto DetermineRootsInModule = [&M]() -> const DenseSet<GlobalValue::GUID> {481DenseSet<GlobalValue::GUID> ProfileRootsInModule;482auto ModName = M.getName();483auto Filename = sys::path::filename(ModName);484// Drop the file extension.485Filename = Filename.substr(0, Filename.find_last_of('.'));486// See if it parses487APInt Guid;488// getAsInteger returns true if there are more chars to read other than the489// integer. So the "false" test is what we want.490if (!Filename.getAsInteger(0, Guid))491ProfileRootsInModule.insert(Guid.getZExtValue());492return ProfileRootsInModule;493};494const auto ProfileRootsInModule = DetermineRootsInModule();495PGOContextualProfile Result;496497// the logic from here on allows for modules that contain - by design - more498// than one root. We currently don't support that, because the determination499// happens based on the module name matching the root guid, but the logic can500// avoid assuming that.501if (!ProfileRootsInModule.empty()) {502Result.IsInSpecializedModule = true;503// Trim first the roots that aren't in this module.504for (auto &[RootGuid, _] :505llvm::make_early_inc_range(MaybeProfiles->Contexts))506if (!ProfileRootsInModule.contains(RootGuid))507MaybeProfiles->Contexts.erase(RootGuid);508// we can also drop the flat profiles509MaybeProfiles->FlatProfiles.clear();510}511512for (const auto &F : M) {513if (F.isDeclaration())514continue;515auto GUID = AssignGUIDPass::getGUID(F);516assert(GUID && "guid not found for defined function");517const auto &Entry = F.begin();518uint32_t MaxCounters = 0; // we expect at least a counter.519for (const auto &I : *Entry)520if (auto *C = dyn_cast<InstrProfIncrementInst>(&I)) {521MaxCounters =522static_cast<uint32_t>(C->getNumCounters()->getZExtValue());523break;524}525if (!MaxCounters)526continue;527uint32_t MaxCallsites = 0;528for (const auto &BB : F)529for (const auto &I : BB)530if (auto *C = dyn_cast<InstrProfCallsite>(&I)) {531MaxCallsites =532static_cast<uint32_t>(C->getNumCounters()->getZExtValue());533break;534}535auto [It, Ins] = Result.FuncInfo.insert(536{GUID, PGOContextualProfile::FunctionInfo(F.getName())});537(void)Ins;538assert(Ins);539It->second.NextCallsiteIndex = MaxCallsites;540It->second.NextCounterIndex = MaxCounters;541}542// If we made it this far, the Result is valid - which we mark by setting543// .Profiles.544Result.Profiles = std::move(*MaybeProfiles);545Result.initIndex();546return Result;547}548549GlobalValue::GUID550PGOContextualProfile::getDefinedFunctionGUID(const Function &F) const {551if (auto It = FuncInfo.find(AssignGUIDPass::getGUID(F)); It != FuncInfo.end())552return It->first;553return 0;554}555556CtxProfAnalysisPrinterPass::CtxProfAnalysisPrinterPass(raw_ostream &OS)557: OS(OS), Mode(PrintLevel) {}558559PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M,560ModuleAnalysisManager &MAM) {561CtxProfAnalysis::Result &C = MAM.getResult<CtxProfAnalysis>(M);562if (C.contexts().empty()) {563OS << "No contextual profile was provided.\n";564return PreservedAnalyses::all();565}566567if (Mode == PrintMode::Everything) {568OS << "Function Info:\n";569for (const auto &[Guid, FuncInfo] : C.FuncInfo)570OS << Guid << " : " << FuncInfo.Name571<< ". MaxCounterID: " << FuncInfo.NextCounterIndex572<< ". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex << "\n";573}574575if (Mode == PrintMode::Everything)576OS << "\nCurrent Profile:\n";577convertCtxProfToYaml(OS, C.profiles());578OS << "\n";579if (Mode == PrintMode::YAML)580return PreservedAnalyses::all();581582OS << "\nFlat Profile:\n";583auto Flat = C.flatten();584for (const auto &[Guid, Counters] : Flat) {585OS << Guid << " : ";586for (auto V : Counters)587OS << V << " ";588OS << "\n";589}590return PreservedAnalyses::all();591}592593InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) {594if (!InstrProfCallsite::canInstrumentCallsite(CB))595return nullptr;596for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) {597if (auto *IPC = dyn_cast<InstrProfCallsite>(Prev))598return IPC;599assert(!isa<CallBase>(Prev) &&600"didn't expect to find another call, that's not the callsite "601"instrumentation, before an instrumentable callsite");602}603return nullptr;604}605606InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {607for (auto &I : BB)608if (auto *Incr = dyn_cast<InstrProfIncrementInst>(&I))609if (!isa<InstrProfIncrementInstStep>(&I))610return Incr;611return nullptr;612}613614InstrProfIncrementInstStep *615CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {616Instruction *Prev = &SI;617while ((Prev = Prev->getPrevNode()))618if (auto *Step = dyn_cast<InstrProfIncrementInstStep>(Prev))619return Step;620return nullptr;621}622623template <class ProfTy>624static void preorderVisitOneRoot(ProfTy &Profile,625function_ref<void(ProfTy &)> Visitor) {626std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {627Visitor(Ctx);628for (auto &[_, SubCtxSet] : Ctx.callsites())629for (auto &[__, Subctx] : SubCtxSet)630Traverser(Subctx);631};632Traverser(Profile);633}634635template <class ProfilesTy, class ProfTy>636static void preorderVisit(ProfilesTy &Profiles,637function_ref<void(ProfTy &)> Visitor) {638for (auto &[_, P] : Profiles)639preorderVisitOneRoot<ProfTy>(P, Visitor);640}641642void PGOContextualProfile::initIndex() {643// Initialize the head of the index list for each function. We don't need it644// after this point.645DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;646for (auto &[Guid, FI] : FuncInfo)647InsertionPoints[Guid] = &FI.Index;648preorderVisit<PGOCtxProfContext::CallTargetMapTy, PGOCtxProfContext>(649Profiles.Contexts, [&](PGOCtxProfContext &Ctx) {650auto InsertIt = InsertionPoints.find(Ctx.guid());651if (InsertIt == InsertionPoints.end())652return;653// Insert at the end of the list. Since we traverse in preorder, it654// means that when we iterate the list from the beginning, we'd655// encounter the contexts in the order we would have, should we have656// performed a full preorder traversal.657InsertIt->second->Next = &Ctx;658Ctx.Previous = InsertIt->second;659InsertIt->second = &Ctx;660});661}662663bool PGOContextualProfile::isInSpecializedModule() const {664return ForceIsInSpecializedModule.getNumOccurrences() > 0665? ForceIsInSpecializedModule666: IsInSpecializedModule;667}668669void PGOContextualProfile::update(Visitor V, const Function &F) {670assert(isFunctionKnown(F));671GlobalValue::GUID G = getDefinedFunctionGUID(F);672for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node;673Node = Node->Next)674V(*reinterpret_cast<PGOCtxProfContext *>(Node));675}676677void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {678if (!F)679return preorderVisit<const PGOCtxProfContext::CallTargetMapTy,680const PGOCtxProfContext>(Profiles.Contexts, V);681assert(isFunctionKnown(*F));682GlobalValue::GUID G = getDefinedFunctionGUID(*F);683for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node;684Node = Node->Next)685V(*reinterpret_cast<const PGOCtxProfContext *>(Node));686}687688const CtxProfFlatProfile PGOContextualProfile::flatten() const {689CtxProfFlatProfile Flat;690auto Accummulate = [](SmallVectorImpl<uint64_t> &Into,691const SmallVectorImpl<uint64_t> &From,692uint64_t SamplingRate) {693if (Into.empty())694Into.resize(From.size());695assert(Into.size() == From.size() &&696"All contexts corresponding to a function should have the exact "697"same number of counters.");698for (size_t I = 0, E = Into.size(); I < E; ++I)699Into[I] += From[I] * SamplingRate;700};701702for (const auto &[_, CtxRoot] : Profiles.Contexts) {703const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount();704preorderVisitOneRoot<const PGOCtxProfContext>(705CtxRoot, [&](const PGOCtxProfContext &Ctx) {706Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor);707});708709for (const auto &[G, Unh] : CtxRoot.getUnhandled())710Accummulate(Flat[G], Unh, SamplingFactor);711}712// We don't sample "Flat" currently, so sampling rate is 1.713for (const auto &[G, FC] : Profiles.FlatProfiles)714Accummulate(Flat[G], FC, /*SamplingRate=*/1);715return Flat;716}717718const CtxProfFlatIndirectCallProfile719PGOContextualProfile::flattenVirtCalls() const {720CtxProfFlatIndirectCallProfile Ret;721for (const auto &[_, CtxRoot] : Profiles.Contexts) {722const uint64_t TotalRootEntryCount = CtxRoot.getTotalRootEntryCount();723preorderVisitOneRoot<const PGOCtxProfContext>(724CtxRoot, [&](const PGOCtxProfContext &Ctx) {725auto &Targets = Ret[Ctx.guid()];726for (const auto &[ID, SubctxSet] : Ctx.callsites())727for (const auto &Subctx : SubctxSet)728Targets[ID][Subctx.first] +=729Subctx.second.getEntrycount() * TotalRootEntryCount;730});731}732return Ret;733}734735void CtxProfAnalysis::collectIndirectCallPromotionList(736CallBase &IC, Result &Profile,737SetVector<std::pair<CallBase *, Function *>> &Candidates) {738const auto *Instr = CtxProfAnalysis::getCallsiteInstrumentation(IC);739if (!Instr)740return;741Module &M = *IC.getParent()->getModule();742const uint32_t CallID = Instr->getIndex()->getZExtValue();743Profile.visit(744[&](const PGOCtxProfContext &Ctx) {745const auto &Targets = Ctx.callsites().find(CallID);746if (Targets == Ctx.callsites().end())747return;748for (const auto &[Guid, _] : Targets->second)749if (auto Name = Profile.getFunctionName(Guid); !Name.empty())750if (auto *Target = M.getFunction(Name))751if (Target->hasFnAttribute(Attribute::AlwaysInline))752Candidates.insert({&IC, Target});753},754IC.getCaller());755}756757758