Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
35269 views
//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements PGO instrumentation using a minimum spanning tree based9// on the following paper:10// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points11// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,12// Issue 3, pp 313-32213// The idea of the algorithm based on the fact that for each node (except for14// the entry and exit), the sum of incoming edge counts equals the sum of15// outgoing edge counts. The count of edge on spanning tree can be derived from16// those edges not on the spanning tree. Knuth proves this method instruments17// the minimum number of edges.18//19// The minimal spanning tree here is actually a maximum weight tree -- on-tree20// edges have higher frequencies (more likely to execute). The idea is to21// instrument those less frequently executed edges to reduce the runtime22// overhead of instrumented binaries.23//24// This file contains two passes:25// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge26// count profile, and generates the instrumentation for indirect call27// profiling.28// (2) Pass PGOInstrumentationUse which reads the edge count profile and29// annotates the branch weights. It also reads the indirect call value30// profiling records and annotate the indirect call instructions.31//32// To get the precise counter information, These two passes need to invoke at33// the same compilation point (so they see the same IR). For pass34// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For35// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and36// the profile is opened in module level and passed to each PGOUseFunc instance.37// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put38// in class FuncPGOInstrumentation.39//40// Class PGOEdge represents a CFG edge and some auxiliary information. Class41// BBInfo contains auxiliary information for each BB. These two classes are used42// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived43// class of PGOEdge and BBInfo, respectively. They contains extra data structure44// used in populating profile counters.45// The MST implementation is in Class CFGMST (CFGMST.h).46//47//===----------------------------------------------------------------------===//4849#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"50#include "ValueProfileCollector.h"51#include "llvm/ADT/APInt.h"52#include "llvm/ADT/ArrayRef.h"53#include "llvm/ADT/STLExtras.h"54#include "llvm/ADT/SmallVector.h"55#include "llvm/ADT/Statistic.h"56#include "llvm/ADT/StringRef.h"57#include "llvm/ADT/Twine.h"58#include "llvm/ADT/iterator.h"59#include "llvm/ADT/iterator_range.h"60#include "llvm/Analysis/BlockFrequencyInfo.h"61#include "llvm/Analysis/BranchProbabilityInfo.h"62#include "llvm/Analysis/CFG.h"63#include "llvm/Analysis/LoopInfo.h"64#include "llvm/Analysis/OptimizationRemarkEmitter.h"65#include "llvm/Analysis/ProfileSummaryInfo.h"66#include "llvm/Analysis/TargetLibraryInfo.h"67#include "llvm/IR/Attributes.h"68#include "llvm/IR/BasicBlock.h"69#include "llvm/IR/CFG.h"70#include "llvm/IR/Comdat.h"71#include "llvm/IR/Constant.h"72#include "llvm/IR/Constants.h"73#include "llvm/IR/DiagnosticInfo.h"74#include "llvm/IR/Dominators.h"75#include "llvm/IR/EHPersonalities.h"76#include "llvm/IR/Function.h"77#include "llvm/IR/GlobalAlias.h"78#include "llvm/IR/GlobalValue.h"79#include "llvm/IR/GlobalVariable.h"80#include "llvm/IR/IRBuilder.h"81#include "llvm/IR/InstVisitor.h"82#include "llvm/IR/InstrTypes.h"83#include "llvm/IR/Instruction.h"84#include "llvm/IR/Instructions.h"85#include "llvm/IR/IntrinsicInst.h"86#include "llvm/IR/Intrinsics.h"87#include "llvm/IR/LLVMContext.h"88#include "llvm/IR/MDBuilder.h"89#include "llvm/IR/Module.h"90#include "llvm/IR/PassManager.h"91#include "llvm/IR/ProfDataUtils.h"92#include "llvm/IR/ProfileSummary.h"93#include "llvm/IR/Type.h"94#include "llvm/IR/Value.h"95#include "llvm/ProfileData/InstrProf.h"96#include "llvm/ProfileData/InstrProfReader.h"97#include "llvm/Support/BranchProbability.h"98#include "llvm/Support/CRC.h"99#include "llvm/Support/Casting.h"100#include "llvm/Support/CommandLine.h"101#include "llvm/Support/DOTGraphTraits.h"102#include "llvm/Support/Debug.h"103#include "llvm/Support/Error.h"104#include "llvm/Support/ErrorHandling.h"105#include "llvm/Support/GraphWriter.h"106#include "llvm/Support/VirtualFileSystem.h"107#include "llvm/Support/raw_ostream.h"108#include "llvm/TargetParser/Triple.h"109#include "llvm/Transforms/Instrumentation.h"110#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"111#include "llvm/Transforms/Instrumentation/CFGMST.h"112#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"113#include "llvm/Transforms/Utils/BasicBlockUtils.h"114#include "llvm/Transforms/Utils/MisExpect.h"115#include "llvm/Transforms/Utils/ModuleUtils.h"116#include <algorithm>117#include <cassert>118#include <cstdint>119#include <memory>120#include <numeric>121#include <optional>122#include <stack>123#include <string>124#include <unordered_map>125#include <utility>126#include <vector>127128using namespace llvm;129using ProfileCount = Function::ProfileCount;130using VPCandidateInfo = ValueProfileCollector::CandidateInfo;131132#define DEBUG_TYPE "pgo-instrumentation"133134STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");135STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");136STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");137STATISTIC(NumOfPGOEdge, "Number of edges.");138STATISTIC(NumOfPGOBB, "Number of basic-blocks.");139STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");140STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");141STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");142STATISTIC(NumOfPGOMissing, "Number of functions without profile.");143STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");144STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");145STATISTIC(NumOfCSPGOSelectInsts,146"Number of select instruction instrumented in CSPGO.");147STATISTIC(NumOfCSPGOMemIntrinsics,148"Number of mem intrinsics instrumented in CSPGO.");149STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");150STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");151STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");152STATISTIC(NumOfCSPGOFunc,153"Number of functions having valid profile counts in CSPGO.");154STATISTIC(NumOfCSPGOMismatch,155"Number of functions having mismatch profile in CSPGO.");156STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");157STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");158159// Command line option to specify the file to read profile from. This is160// mainly used for testing.161static cl::opt<std::string>162PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,163cl::value_desc("filename"),164cl::desc("Specify the path of profile data file. This is"165"mainly for test purpose."));166static cl::opt<std::string> PGOTestProfileRemappingFile(167"pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,168cl::value_desc("filename"),169cl::desc("Specify the path of profile remapping file. This is mainly for "170"test purpose."));171172// Command line option to disable value profiling. The default is false:173// i.e. value profiling is enabled by default. This is for debug purpose.174static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),175cl::Hidden,176cl::desc("Disable Value Profiling"));177178// Command line option to set the maximum number of VP annotations to write to179// the metadata for a single indirect call callsite.180static cl::opt<unsigned> MaxNumAnnotations(181"icp-max-annotations", cl::init(3), cl::Hidden,182cl::desc("Max number of annotations for a single indirect "183"call callsite"));184185// Command line option to set the maximum number of value annotations186// to write to the metadata for a single memop intrinsic.187static cl::opt<unsigned> MaxNumMemOPAnnotations(188"memop-max-annotations", cl::init(4), cl::Hidden,189cl::desc("Max number of preicise value annotations for a single memop"190"intrinsic"));191192// Command line option to control appending FunctionHash to the name of a COMDAT193// function. This is to avoid the hash mismatch caused by the preinliner.194static cl::opt<bool> DoComdatRenaming(195"do-comdat-renaming", cl::init(false), cl::Hidden,196cl::desc("Append function hash to the name of COMDAT function to avoid "197"function hash mismatch due to the preinliner"));198199namespace llvm {200// Command line option to enable/disable the warning about missing profile201// information.202cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),203cl::Hidden,204cl::desc("Use this option to turn on/off "205"warnings about missing profile data for "206"functions."));207208// Command line option to enable/disable the warning about a hash mismatch in209// the profile data.210cl::opt<bool>211NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,212cl::desc("Use this option to turn off/on "213"warnings about profile cfg mismatch."));214215// Command line option to enable/disable the warning about a hash mismatch in216// the profile data for Comdat functions, which often turns out to be false217// positive due to the pre-instrumentation inline.218cl::opt<bool> NoPGOWarnMismatchComdatWeak(219"no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,220cl::desc("The option is used to turn on/off "221"warnings about hash mismatch for comdat "222"or weak functions."));223} // namespace llvm224225// Command line option to enable/disable select instruction instrumentation.226static cl::opt<bool>227PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,228cl::desc("Use this option to turn on/off SELECT "229"instruction instrumentation. "));230231// Command line option to turn on CFG dot or text dump of raw profile counts232static cl::opt<PGOViewCountsType> PGOViewRawCounts(233"pgo-view-raw-counts", cl::Hidden,234cl::desc("A boolean option to show CFG dag or text "235"with raw profile counts from "236"profile data. See also option "237"-pgo-view-counts. To limit graph "238"display to only one function, use "239"filtering option -view-bfi-func-name."),240cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),241clEnumValN(PGOVCT_Graph, "graph", "show a graph."),242clEnumValN(PGOVCT_Text, "text", "show in text.")));243244// Command line option to enable/disable memop intrinsic call.size profiling.245static cl::opt<bool>246PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,247cl::desc("Use this option to turn on/off "248"memory intrinsic size profiling."));249250// Emit branch probability as optimization remarks.251static cl::opt<bool>252EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,253cl::desc("When this option is on, the annotated "254"branch probability will be emitted as "255"optimization remarks: -{Rpass|"256"pass-remarks}=pgo-instrumentation"));257258static cl::opt<bool> PGOInstrumentEntry(259"pgo-instrument-entry", cl::init(false), cl::Hidden,260cl::desc("Force to instrument function entry basicblock."));261262static cl::opt<bool> PGOFunctionEntryCoverage(263"pgo-function-entry-coverage", cl::Hidden,264cl::desc(265"Use this option to enable function entry coverage instrumentation."));266267static cl::opt<bool> PGOBlockCoverage(268"pgo-block-coverage",269cl::desc("Use this option to enable basic block coverage instrumentation"));270271static cl::opt<bool>272PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",273cl::desc("Create a dot file of CFGs with block "274"coverage inference information"));275276static cl::opt<bool> PGOTemporalInstrumentation(277"pgo-temporal-instrumentation",278cl::desc("Use this option to enable temporal instrumentation"));279280static cl::opt<bool>281PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,282cl::desc("Fix function entry count in profile use."));283284static cl::opt<bool> PGOVerifyHotBFI(285"pgo-verify-hot-bfi", cl::init(false), cl::Hidden,286cl::desc("Print out the non-match BFI count if a hot raw profile count "287"becomes non-hot, or a cold raw profile count becomes hot. "288"The print is enabled under -Rpass-analysis=pgo, or "289"internal option -pass-remakrs-analysis=pgo."));290291static cl::opt<bool> PGOVerifyBFI(292"pgo-verify-bfi", cl::init(false), cl::Hidden,293cl::desc("Print out mismatched BFI counts after setting profile metadata "294"The print is enabled under -Rpass-analysis=pgo, or "295"internal option -pass-remakrs-analysis=pgo."));296297static cl::opt<unsigned> PGOVerifyBFIRatio(298"pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,299cl::desc("Set the threshold for pgo-verify-bfi: only print out "300"mismatched BFI if the difference percentage is greater than "301"this value (in percentage)."));302303static cl::opt<unsigned> PGOVerifyBFICutoff(304"pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,305cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "306"profile count value is below."));307308static cl::opt<std::string> PGOTraceFuncHash(309"pgo-trace-func-hash", cl::init("-"), cl::Hidden,310cl::value_desc("function name"),311cl::desc("Trace the hash of the function with this name."));312313static cl::opt<unsigned> PGOFunctionSizeThreshold(314"pgo-function-size-threshold", cl::Hidden,315cl::desc("Do not instrument functions smaller than this threshold."));316317static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(318"pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,319cl::desc("Do not instrument functions with the number of critical edges "320" greater than this threshold."));321322extern cl::opt<unsigned> MaxNumVTableAnnotations;323324namespace llvm {325// Command line option to turn on CFG dot dump after profile annotation.326// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts327extern cl::opt<PGOViewCountsType> PGOViewCounts;328329// Command line option to specify the name of the function for CFG dump330// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=331extern cl::opt<std::string> ViewBlockFreqFuncName;332333// Command line option to enable vtable value profiling. Defined in334// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=335extern cl::opt<bool> EnableVTableValueProfiling;336extern cl::opt<bool> EnableVTableProfileUse;337extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;338} // namespace llvm339340bool shouldInstrumentEntryBB() {341return PGOInstrumentEntry ||342PGOCtxProfLoweringPass::isContextualIRPGOEnabled();343}344345// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx346// profiling implicitly captures indirect call cases, but not other values.347// Supporting other values is relatively straight-forward - just another counter348// range within the context.349bool isValueProfilingDisabled() {350return DisableValueProfiling ||351PGOCtxProfLoweringPass::isContextualIRPGOEnabled();352}353354// Return a string describing the branch condition that can be355// used in static branch probability heuristics:356static std::string getBranchCondString(Instruction *TI) {357BranchInst *BI = dyn_cast<BranchInst>(TI);358if (!BI || !BI->isConditional())359return std::string();360361Value *Cond = BI->getCondition();362ICmpInst *CI = dyn_cast<ICmpInst>(Cond);363if (!CI)364return std::string();365366std::string result;367raw_string_ostream OS(result);368OS << CI->getPredicate() << "_";369CI->getOperand(0)->getType()->print(OS, true);370371Value *RHS = CI->getOperand(1);372ConstantInt *CV = dyn_cast<ConstantInt>(RHS);373if (CV) {374if (CV->isZero())375OS << "_Zero";376else if (CV->isOne())377OS << "_One";378else if (CV->isMinusOne())379OS << "_MinusOne";380else381OS << "_Const";382}383OS.flush();384return result;385}386387static const char *ValueProfKindDescr[] = {388#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,389#include "llvm/ProfileData/InstrProfData.inc"390};391392// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime393// aware this is an ir_level profile so it can set the version flag.394static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {395const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));396Type *IntTy64 = Type::getInt64Ty(M.getContext());397uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);398if (IsCS)399ProfileVersion |= VARIANT_MASK_CSIR_PROF;400if (shouldInstrumentEntryBB())401ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;402if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)403ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;404if (PGOFunctionEntryCoverage)405ProfileVersion |=406VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;407if (PGOBlockCoverage)408ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;409if (PGOTemporalInstrumentation)410ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;411auto IRLevelVersionVariable = new GlobalVariable(412M, IntTy64, true, GlobalValue::WeakAnyLinkage,413Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);414IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);415Triple TT(M.getTargetTriple());416if (TT.supportsCOMDAT()) {417IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);418IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));419}420return IRLevelVersionVariable;421}422423namespace {424425/// The select instruction visitor plays three roles specified426/// by the mode. In \c VM_counting mode, it simply counts the number of427/// select instructions. In \c VM_instrument mode, it inserts code to count428/// the number times TrueValue of select is taken. In \c VM_annotate mode,429/// it reads the profile data and annotate the select instruction with metadata.430enum VisitMode { VM_counting, VM_instrument, VM_annotate };431class PGOUseFunc;432433/// Instruction Visitor class to visit select instructions.434struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {435Function &F;436unsigned NSIs = 0; // Number of select instructions instrumented.437VisitMode Mode = VM_counting; // Visiting mode.438unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.439unsigned TotalNumCtrs = 0; // Total number of counters440GlobalVariable *FuncNameVar = nullptr;441uint64_t FuncHash = 0;442PGOUseFunc *UseFunc = nullptr;443bool HasSingleByteCoverage;444445SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)446: F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}447448void countSelects() {449NSIs = 0;450Mode = VM_counting;451visit(F);452}453454// Visit the IR stream and instrument all select instructions. \p455// Ind is a pointer to the counter index variable; \p TotalNC456// is the total number of counters; \p FNV is the pointer to the457// PGO function name var; \p FHash is the function hash.458void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,459uint64_t FHash) {460Mode = VM_instrument;461CurCtrIdx = Ind;462TotalNumCtrs = TotalNC;463FuncHash = FHash;464FuncNameVar = FNV;465visit(F);466}467468// Visit the IR stream and annotate all select instructions.469void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {470Mode = VM_annotate;471UseFunc = UF;472CurCtrIdx = Ind;473visit(F);474}475476void instrumentOneSelectInst(SelectInst &SI);477void annotateOneSelectInst(SelectInst &SI);478479// Visit \p SI instruction and perform tasks according to visit mode.480void visitSelectInst(SelectInst &SI);481482// Return the number of select instructions. This needs be called after483// countSelects().484unsigned getNumOfSelectInsts() const { return NSIs; }485};486487/// This class implements the CFG edges for the Minimum Spanning Tree (MST)488/// based instrumentation.489/// Note that the CFG can be a multi-graph. So there might be multiple edges490/// with the same SrcBB and DestBB.491struct PGOEdge {492BasicBlock *SrcBB;493BasicBlock *DestBB;494uint64_t Weight;495bool InMST = false;496bool Removed = false;497bool IsCritical = false;498499PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)500: SrcBB(Src), DestBB(Dest), Weight(W) {}501502/// Return the information string of an edge.503std::string infoString() const {504return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +505(IsCritical ? "c" : " ") + " W=" + Twine(Weight))506.str();507}508};509510/// This class stores the auxiliary information for each BB in the MST.511struct PGOBBInfo {512PGOBBInfo *Group;513uint32_t Index;514uint32_t Rank = 0;515516PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}517518/// Return the information string of this object.519std::string infoString() const {520return (Twine("Index=") + Twine(Index)).str();521}522};523524// This class implements the CFG edges. Note the CFG can be a multi-graph.525template <class Edge, class BBInfo> class FuncPGOInstrumentation {526private:527Function &F;528529// Is this is context-sensitive instrumentation.530bool IsCS;531532// A map that stores the Comdat group in function F.533std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;534535ValueProfileCollector VPC;536537void computeCFGHash();538void renameComdatFunction();539540public:541const TargetLibraryInfo &TLI;542std::vector<std::vector<VPCandidateInfo>> ValueSites;543SelectInstVisitor SIVisitor;544std::string FuncName;545std::string DeprecatedFuncName;546GlobalVariable *FuncNameVar;547548// CFG hash value for this function.549uint64_t FunctionHash = 0;550551// The Minimum Spanning Tree of function CFG.552CFGMST<Edge, BBInfo> MST;553554const std::optional<BlockCoverageInference> BCI;555556static std::optional<BlockCoverageInference>557constructBCI(Function &Func, bool HasSingleByteCoverage,558bool InstrumentFuncEntry) {559if (HasSingleByteCoverage)560return BlockCoverageInference(Func, InstrumentFuncEntry);561return {};562}563564// Collect all the BBs that will be instrumented, and store them in565// InstrumentBBs.566void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);567568// Give an edge, find the BB that will be instrumented.569// Return nullptr if there is no BB to be instrumented.570BasicBlock *getInstrBB(Edge *E);571572// Return the auxiliary BB information.573BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }574575// Return the auxiliary BB information if available.576BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }577578// Dump edges and BB information.579void dumpInfo(StringRef Str = "") const {580MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +581" Hash: " + Twine(FunctionHash) + "\t" + Str);582}583584FuncPGOInstrumentation(585Function &Func, TargetLibraryInfo &TLI,586std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,587bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,588BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,589bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)590: F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),591TLI(TLI), ValueSites(IPVK_Last + 1),592SIVisitor(Func, HasSingleByteCoverage),593MST(F, InstrumentFuncEntry, BPI, BFI),594BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {595if (BCI && PGOViewBlockCoverageGraph)596BCI->viewBlockCoverageGraph();597// This should be done before CFG hash computation.598SIVisitor.countSelects();599ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);600if (!IsCS) {601NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();602NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();603NumOfPGOBB += MST.bbInfoSize();604ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);605if (EnableVTableValueProfiling)606ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);607} else {608NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();609NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();610NumOfCSPGOBB += MST.bbInfoSize();611}612613FuncName = getIRPGOFuncName(F);614DeprecatedFuncName = getPGOFuncName(F);615computeCFGHash();616if (!ComdatMembers.empty())617renameComdatFunction();618LLVM_DEBUG(dumpInfo("after CFGMST"));619620for (const auto &E : MST.allEdges()) {621if (E->Removed)622continue;623IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;624if (!E->InMST)625IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;626}627628if (CreateGlobalVar)629FuncNameVar = createPGOFuncNameVar(F, FuncName);630}631};632633} // end anonymous namespace634635// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index636// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers637// of selects, indirect calls, mem ops and edges.638template <class Edge, class BBInfo>639void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {640std::vector<uint8_t> Indexes;641JamCRC JC;642for (auto &BB : F) {643for (BasicBlock *Succ : successors(&BB)) {644auto BI = findBBInfo(Succ);645if (BI == nullptr)646continue;647uint32_t Index = BI->Index;648for (int J = 0; J < 4; J++)649Indexes.push_back((uint8_t)(Index >> (J * 8)));650}651}652JC.update(Indexes);653654JamCRC JCH;655// The higher 32 bits.656auto updateJCH = [&JCH](uint64_t Num) {657uint8_t Data[8];658support::endian::write64le(Data, Num);659JCH.update(Data);660};661updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());662updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());663updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());664if (BCI) {665updateJCH(BCI->getInstrumentedBlocksHash());666} else {667updateJCH((uint64_t)MST.numEdges());668}669670// Hash format for context sensitive profile. Reserve 4 bits for other671// information.672FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();673674// Reserve bit 60-63 for other information purpose.675FunctionHash &= 0x0FFFFFFFFFFFFFFF;676if (IsCS)677NamedInstrProfRecord::setCSFlagInHash(FunctionHash);678LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"679<< " CRC = " << JC.getCRC()680<< ", Selects = " << SIVisitor.getNumOfSelectInsts()681<< ", Edges = " << MST.numEdges() << ", ICSites = "682<< ValueSites[IPVK_IndirectCallTarget].size()683<< ", Memops = " << ValueSites[IPVK_MemOPSize].size()684<< ", High32 CRC = " << JCH.getCRC()685<< ", Hash = " << FunctionHash << "\n";);686687if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))688dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash689<< " in building " << F.getParent()->getSourceFileName() << "\n";690}691692// Check if we can safely rename this Comdat function.693static bool canRenameComdat(694Function &F,695std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {696if (!DoComdatRenaming || !canRenameComdatFunc(F, true))697return false;698699// FIXME: Current only handle those Comdat groups that only containing one700// function.701// (1) For a Comdat group containing multiple functions, we need to have a702// unique postfix based on the hashes for each function. There is a703// non-trivial code refactoring to do this efficiently.704// (2) Variables can not be renamed, so we can not rename Comdat function in a705// group including global vars.706Comdat *C = F.getComdat();707for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {708assert(!isa<GlobalAlias>(CM.second));709Function *FM = dyn_cast<Function>(CM.second);710if (FM != &F)711return false;712}713return true;714}715716// Append the CFGHash to the Comdat function name.717template <class Edge, class BBInfo>718void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {719if (!canRenameComdat(F, ComdatMembers))720return;721std::string OrigName = F.getName().str();722std::string NewFuncName =723Twine(F.getName() + "." + Twine(FunctionHash)).str();724F.setName(Twine(NewFuncName));725GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);726FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();727Comdat *NewComdat;728Module *M = F.getParent();729// For AvailableExternallyLinkage functions, change the linkage to730// LinkOnceODR and put them into comdat. This is because after renaming, there731// is no backup external copy available for the function.732if (!F.hasComdat()) {733assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);734NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));735F.setLinkage(GlobalValue::LinkOnceODRLinkage);736F.setComdat(NewComdat);737return;738}739740// This function belongs to a single function Comdat group.741Comdat *OrigComdat = F.getComdat();742std::string NewComdatName =743Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();744NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));745NewComdat->setSelectionKind(OrigComdat->getSelectionKind());746747for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {748// Must be a function.749cast<Function>(CM.second)->setComdat(NewComdat);750}751}752753/// Collect all the BBs that will be instruments and add them to754/// `InstrumentBBs`.755template <class Edge, class BBInfo>756void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(757std::vector<BasicBlock *> &InstrumentBBs) {758if (BCI) {759for (auto &BB : F)760if (BCI->shouldInstrumentBlock(BB))761InstrumentBBs.push_back(&BB);762return;763}764765// Use a worklist as we will update the vector during the iteration.766std::vector<Edge *> EdgeList;767EdgeList.reserve(MST.numEdges());768for (const auto &E : MST.allEdges())769EdgeList.push_back(E.get());770771for (auto &E : EdgeList) {772BasicBlock *InstrBB = getInstrBB(E);773if (InstrBB)774InstrumentBBs.push_back(InstrBB);775}776}777778// Given a CFG E to be instrumented, find which BB to place the instrumented779// code. The function will split the critical edge if necessary.780template <class Edge, class BBInfo>781BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {782if (E->InMST || E->Removed)783return nullptr;784785BasicBlock *SrcBB = E->SrcBB;786BasicBlock *DestBB = E->DestBB;787// For a fake edge, instrument the real BB.788if (SrcBB == nullptr)789return DestBB;790if (DestBB == nullptr)791return SrcBB;792793auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {794// There are basic blocks (such as catchswitch) cannot be instrumented.795// If the returned first insertion point is the end of BB, skip this BB.796if (BB->getFirstInsertionPt() == BB->end())797return nullptr;798return BB;799};800801// Instrument the SrcBB if it has a single successor,802// otherwise, the DestBB if this is not a critical edge.803Instruction *TI = SrcBB->getTerminator();804if (TI->getNumSuccessors() <= 1)805return canInstrument(SrcBB);806if (!E->IsCritical)807return canInstrument(DestBB);808809// Some IndirectBr critical edges cannot be split by the previous810// SplitIndirectBrCriticalEdges call. Bail out.811unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);812BasicBlock *InstrBB =813isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);814if (!InstrBB) {815LLVM_DEBUG(816dbgs() << "Fail to split critical edge: not instrument this edge.\n");817return nullptr;818}819// For a critical edge, we have to split. Instrument the newly820// created BB.821IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;822LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index823<< " --> " << getBBInfo(DestBB).Index << "\n");824// Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.825MST.addEdge(SrcBB, InstrBB, 0);826// Second one: Add new edge of InstrBB->DestBB.827Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);828NewEdge1.InMST = true;829E->Removed = true;830831return canInstrument(InstrBB);832}833834// When generating value profiling calls on Windows routines that make use of835// handler funclets for exception processing an operand bundle needs to attached836// to the called function. This routine will set \p OpBundles to contain the837// funclet information, if any is needed, that should be placed on the generated838// value profiling call for the value profile candidate call.839static void840populateEHOperandBundle(VPCandidateInfo &Cand,841DenseMap<BasicBlock *, ColorVector> &BlockColors,842SmallVectorImpl<OperandBundleDef> &OpBundles) {843auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);844if (!OrigCall)845return;846847if (!isa<IntrinsicInst>(OrigCall)) {848// The instrumentation call should belong to the same funclet as a849// non-intrinsic call, so just copy the operand bundle, if any exists.850std::optional<OperandBundleUse> ParentFunclet =851OrigCall->getOperandBundle(LLVMContext::OB_funclet);852if (ParentFunclet)853OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));854} else {855// Intrinsics or other instructions do not get funclet information from the856// front-end. Need to use the BlockColors that was computed by the routine857// colorEHFunclets to determine whether a funclet is needed.858if (!BlockColors.empty()) {859const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;860assert(CV.size() == 1 && "non-unique color for block!");861Instruction *EHPad = CV.front()->getFirstNonPHI();862if (EHPad->isEHPad())863OpBundles.emplace_back("funclet", EHPad);864}865}866}867868// Visit all edge and instrument the edges not in MST, and do value profiling.869// Critical edges will be split.870static void instrumentOneFunc(871Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI,872BlockFrequencyInfo *BFI,873std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,874bool IsCS) {875if (!PGOBlockCoverage) {876// Split indirectbr critical edges here before computing the MST rather than877// later in getInstrBB() to avoid invalidating it.878SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);879}880881FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(882F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(),883PGOBlockCoverage);884885auto Name = FuncInfo.FuncNameVar;886auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),887FuncInfo.FunctionHash);888if (PGOFunctionEntryCoverage) {889auto &EntryBB = F.getEntryBlock();890IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());891// llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,892// i32 <index>)893Builder.CreateCall(894Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),895{Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});896return;897}898899std::vector<BasicBlock *> InstrumentBBs;900FuncInfo.getInstrumentBBs(InstrumentBBs);901unsigned NumCounters =902InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();903904if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) {905auto *CSIntrinsic =906Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);907// We want to count the instrumentable callsites, then instrument them. This908// is because the llvm.instrprof.callsite intrinsic has an argument (like909// the other instrprof intrinsics) capturing the total number of910// instrumented objects (counters, or callsites, in this case). In this911// case, we want that value so we can readily pass it to the compiler-rt912// APIs that may have to allocate memory based on the nr of callsites.913// The traversal logic is the same for both counting and instrumentation,914// just needs to be done in succession.915auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {916for (auto &BB : F)917for (auto &Instr : BB)918if (auto *CS = dyn_cast<CallBase>(&Instr)) {919if ((CS->getCalledFunction() &&920CS->getCalledFunction()->isIntrinsic()) ||921dyn_cast<InlineAsm>(CS->getCalledOperand()))922continue;923Visitor(CS);924}925};926// First, count callsites.927uint32_t TotalNrCallsites = 0;928Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });929930// Now instrument.931uint32_t CallsiteIndex = 0;932Visit([&](auto *CB) {933IRBuilder<> Builder(CB);934Builder.CreateCall(CSIntrinsic,935{Name, CFGHash, Builder.getInt32(TotalNrCallsites),936Builder.getInt32(CallsiteIndex++),937CB->getCalledOperand()});938});939}940941uint32_t I = 0;942if (PGOTemporalInstrumentation) {943NumCounters += PGOBlockCoverage ? 8 : 1;944auto &EntryBB = F.getEntryBlock();945IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());946// llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,947// i32 <index>)948Builder.CreateCall(949Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),950{Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});951I += PGOBlockCoverage ? 8 : 1;952}953954for (auto *InstrBB : InstrumentBBs) {955IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());956assert(Builder.GetInsertPoint() != InstrBB->end() &&957"Cannot get the Instrumentation point");958// llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,959// i32 <index>)960Builder.CreateCall(961Intrinsic::getDeclaration(M, PGOBlockCoverage962? Intrinsic::instrprof_cover963: Intrinsic::instrprof_increment),964{Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});965}966967// Now instrument select instructions:968FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,969FuncInfo.FunctionHash);970assert(I == NumCounters);971972if (isValueProfilingDisabled())973return;974975NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();976977// Intrinsic function calls do not have funclet operand bundles needed for978// Windows exception handling attached to them. However, if value profiling is979// inserted for one of these calls, then a funclet value will need to be set980// on the instrumentation call based on the funclet coloring.981DenseMap<BasicBlock *, ColorVector> BlockColors;982if (F.hasPersonalityFn() &&983isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))984BlockColors = colorEHFunclets(F);985986// For each VP Kind, walk the VP candidates and instrument each one.987for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {988unsigned SiteIndex = 0;989if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)990continue;991992for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {993LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]994<< " site: CallSite Index = " << SiteIndex << "\n");995996IRBuilder<> Builder(Cand.InsertPt);997assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&998"Cannot get the Instrumentation point");9991000Value *ToProfile = nullptr;1001if (Cand.V->getType()->isIntegerTy())1002ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());1003else if (Cand.V->getType()->isPointerTy())1004ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());1005assert(ToProfile && "value profiling Value is of unexpected type");10061007SmallVector<OperandBundleDef, 1> OpBundles;1008populateEHOperandBundle(Cand, BlockColors, OpBundles);1009Builder.CreateCall(1010Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),1011{FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),1012ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},1013OpBundles);1014}1015} // IPVK_First <= Kind <= IPVK_Last1016}10171018namespace {10191020// This class represents a CFG edge in profile use compilation.1021struct PGOUseEdge : public PGOEdge {1022using PGOEdge::PGOEdge;10231024std::optional<uint64_t> Count;10251026// Set edge count value1027void setEdgeCount(uint64_t Value) { Count = Value; }10281029// Return the information string for this object.1030std::string infoString() const {1031if (!Count)1032return PGOEdge::infoString();1033return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();1034}1035};10361037using DirectEdges = SmallVector<PGOUseEdge *, 2>;10381039// This class stores the auxiliary information for each BB.1040struct PGOUseBBInfo : public PGOBBInfo {1041std::optional<uint64_t> Count;1042int32_t UnknownCountInEdge = 0;1043int32_t UnknownCountOutEdge = 0;1044DirectEdges InEdges;1045DirectEdges OutEdges;10461047PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}10481049// Set the profile count value for this BB.1050void setBBInfoCount(uint64_t Value) { Count = Value; }10511052// Return the information string of this object.1053std::string infoString() const {1054if (!Count)1055return PGOBBInfo::infoString();1056return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();1057}10581059// Add an OutEdge and update the edge count.1060void addOutEdge(PGOUseEdge *E) {1061OutEdges.push_back(E);1062UnknownCountOutEdge++;1063}10641065// Add an InEdge and update the edge count.1066void addInEdge(PGOUseEdge *E) {1067InEdges.push_back(E);1068UnknownCountInEdge++;1069}1070};10711072} // end anonymous namespace10731074// Sum up the count values for all the edges.1075static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {1076uint64_t Total = 0;1077for (const auto &E : Edges) {1078if (E->Removed)1079continue;1080if (E->Count)1081Total += *E->Count;1082}1083return Total;1084}10851086namespace {10871088class PGOUseFunc {1089public:1090PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,1091std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,1092BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,1093ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,1094bool HasSingleByteCoverage)1095: F(Func), M(Modu), BFI(BFIin), PSI(PSI),1096FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,1097InstrumentFuncEntry, HasSingleByteCoverage),1098FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}10991100void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);11011102// Read counts for the instrumented BB from profile.1103bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,1104InstrProfRecord::CountPseudoKind &PseudoKind);11051106// Populate the counts for all BBs.1107void populateCounters();11081109// Set block coverage based on profile coverage values.1110void populateCoverage(IndexedInstrProfReader *PGOReader);11111112// Set the branch weights based on the count values.1113void setBranchWeights();11141115// Annotate the value profile call sites for all value kind.1116void annotateValueSites();11171118// Annotate the value profile call sites for one value kind.1119void annotateValueSites(uint32_t Kind);11201121// Annotate the irreducible loop header weights.1122void annotateIrrLoopHeaderWeights();11231124// The hotness of the function from the profile count.1125enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };11261127// Return the function hotness from the profile.1128FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }11291130// Return the function hash.1131uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }11321133// Return the profile record for this function;1134InstrProfRecord &getProfileRecord() { return ProfileRecord; }11351136// Return the auxiliary BB information.1137PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {1138return FuncInfo.getBBInfo(BB);1139}11401141// Return the auxiliary BB information if available.1142PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {1143return FuncInfo.findBBInfo(BB);1144}11451146Function &getFunc() const { return F; }11471148void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }11491150uint64_t getProgramMaxCount() const { return ProgramMaxCount; }11511152private:1153Function &F;1154Module *M;1155BlockFrequencyInfo *BFI;1156ProfileSummaryInfo *PSI;11571158// This member stores the shared information with class PGOGenFunc.1159FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;11601161// The maximum count value in the profile. This is only used in PGO use1162// compilation.1163uint64_t ProgramMaxCount;11641165// Position of counter that remains to be read.1166uint32_t CountPosition = 0;11671168// Total size of the profile count for this function.1169uint32_t ProfileCountSize = 0;11701171// ProfileRecord for this function.1172InstrProfRecord ProfileRecord;11731174// Function hotness info derived from profile.1175FuncFreqAttr FreqAttr;11761177// Is to use the context sensitive profile.1178bool IsCS;11791180ValueProfileCollector VPC;11811182// Find the Instrumented BB and set the value. Return false on error.1183bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);11841185// Set the edge counter value for the unknown edge -- there should be only1186// one unknown edge.1187void setEdgeCount(DirectEdges &Edges, uint64_t Value);11881189// Set the hot/cold inline hints based on the count values.1190// FIXME: This function should be removed once the functionality in1191// the inliner is implemented.1192void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {1193if (PSI->isHotCount(EntryCount))1194FreqAttr = FFA_Hot;1195else if (PSI->isColdCount(MaxCount))1196FreqAttr = FFA_Cold;1197}1198};11991200} // end anonymous namespace12011202/// Set up InEdges/OutEdges for all BBs in the MST.1203static void setupBBInfoEdges(1204const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {1205// This is not required when there is block coverage inference.1206if (FuncInfo.BCI)1207return;1208for (const auto &E : FuncInfo.MST.allEdges()) {1209if (E->Removed)1210continue;1211const BasicBlock *SrcBB = E->SrcBB;1212const BasicBlock *DestBB = E->DestBB;1213PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);1214PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);1215SrcInfo.addOutEdge(E.get());1216DestInfo.addInEdge(E.get());1217}1218}12191220// Visit all the edges and assign the count value for the instrumented1221// edges and the BB. Return false on error.1222bool PGOUseFunc::setInstrumentedCounts(1223const std::vector<uint64_t> &CountFromProfile) {12241225std::vector<BasicBlock *> InstrumentBBs;1226FuncInfo.getInstrumentBBs(InstrumentBBs);12271228setupBBInfoEdges(FuncInfo);12291230unsigned NumCounters =1231InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();1232// The number of counters here should match the number of counters1233// in profile. Return if they mismatch.1234if (NumCounters != CountFromProfile.size()) {1235return false;1236}1237auto *FuncEntry = &*F.begin();12381239// Set the profile count to the Instrumented BBs.1240uint32_t I = 0;1241for (BasicBlock *InstrBB : InstrumentBBs) {1242uint64_t CountValue = CountFromProfile[I++];1243PGOUseBBInfo &Info = getBBInfo(InstrBB);1244// If we reach here, we know that we have some nonzero count1245// values in this function. The entry count should not be 0.1246// Fix it if necessary.1247if (InstrBB == FuncEntry && CountValue == 0)1248CountValue = 1;1249Info.setBBInfoCount(CountValue);1250}1251ProfileCountSize = CountFromProfile.size();1252CountPosition = I;12531254// Set the edge count and update the count of unknown edges for BBs.1255auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {1256E->setEdgeCount(Value);1257this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;1258this->getBBInfo(E->DestBB).UnknownCountInEdge--;1259};12601261// Set the profile count the Instrumented edges. There are BBs that not in1262// MST but not instrumented. Need to set the edge count value so that we can1263// populate the profile counts later.1264for (const auto &E : FuncInfo.MST.allEdges()) {1265if (E->Removed || E->InMST)1266continue;1267const BasicBlock *SrcBB = E->SrcBB;1268PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);12691270// If only one out-edge, the edge profile count should be the same as BB1271// profile count.1272if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)1273setEdgeCount(E.get(), *SrcInfo.Count);1274else {1275const BasicBlock *DestBB = E->DestBB;1276PGOUseBBInfo &DestInfo = getBBInfo(DestBB);1277// If only one in-edge, the edge profile count should be the same as BB1278// profile count.1279if (DestInfo.Count && DestInfo.InEdges.size() == 1)1280setEdgeCount(E.get(), *DestInfo.Count);1281}1282if (E->Count)1283continue;1284// E's count should have been set from profile. If not, this meenas E skips1285// the instrumentation. We set the count to 0.1286setEdgeCount(E.get(), 0);1287}1288return true;1289}12901291// Set the count value for the unknown edge. There should be one and only one1292// unknown edge in Edges vector.1293void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {1294for (auto &E : Edges) {1295if (E->Count)1296continue;1297E->setEdgeCount(Value);12981299getBBInfo(E->SrcBB).UnknownCountOutEdge--;1300getBBInfo(E->DestBB).UnknownCountInEdge--;1301return;1302}1303llvm_unreachable("Cannot find the unknown count edge");1304}13051306// Emit function metadata indicating PGO profile mismatch.1307static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) {1308const char MetadataName[] = "instr_prof_hash_mismatch";1309SmallVector<Metadata *, 2> Names;1310// If this metadata already exists, ignore.1311auto *Existing = F.getMetadata(LLVMContext::MD_annotation);1312if (Existing) {1313MDTuple *Tuple = cast<MDTuple>(Existing);1314for (const auto &N : Tuple->operands()) {1315if (N.equalsStr(MetadataName))1316return;1317Names.push_back(N.get());1318}1319}13201321MDBuilder MDB(ctx);1322Names.push_back(MDB.createString(MetadataName));1323MDNode *MD = MDTuple::get(ctx, Names);1324F.setMetadata(LLVMContext::MD_annotation, MD);1325}13261327void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {1328handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {1329auto &Ctx = M->getContext();1330auto Err = IPE.get();1331bool SkipWarning = false;1332LLVM_DEBUG(dbgs() << "Error in reading profile for Func "1333<< FuncInfo.FuncName << ": ");1334if (Err == instrprof_error::unknown_function) {1335IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;1336SkipWarning = !PGOWarnMissing;1337LLVM_DEBUG(dbgs() << "unknown function");1338} else if (Err == instrprof_error::hash_mismatch ||1339Err == instrprof_error::malformed) {1340IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;1341SkipWarning =1342NoPGOWarnMismatch ||1343(NoPGOWarnMismatchComdatWeak &&1344(F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||1345F.getLinkage() == GlobalValue::AvailableExternallyLinkage));1346LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash1347<< " skip=" << SkipWarning << ")");1348// Emit function metadata indicating PGO profile mismatch.1349annotateFunctionWithHashMismatch(F, M->getContext());1350}13511352LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");1353if (SkipWarning)1354return;13551356std::string Msg =1357IPE.message() + std::string(" ") + F.getName().str() +1358std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +1359std::string(" up to ") + std::to_string(MismatchedFuncSum) +1360std::string(" count discarded");13611362Ctx.diagnose(1363DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));1364});1365}13661367// Read the profile from ProfileFileName and assign the value to the1368// instrumented BB and the edges. This function also updates ProgramMaxCount.1369// Return true if the profile are successfully read, and false on errors.1370bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,1371InstrProfRecord::CountPseudoKind &PseudoKind) {1372auto &Ctx = M->getContext();1373uint64_t MismatchedFuncSum = 0;1374Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(1375FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,1376&MismatchedFuncSum);1377if (Error E = Result.takeError()) {1378handleInstrProfError(std::move(E), MismatchedFuncSum);1379return false;1380}1381ProfileRecord = std::move(Result.get());1382PseudoKind = ProfileRecord.getCountPseudoKind();1383if (PseudoKind != InstrProfRecord::NotPseudo) {1384return true;1385}1386std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;13871388IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;1389LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");13901391uint64_t ValueSum = 0;1392for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {1393LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");1394ValueSum += CountFromProfile[I];1395}1396AllZeros = (ValueSum == 0);13971398LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");13991400getBBInfo(nullptr).UnknownCountOutEdge = 2;1401getBBInfo(nullptr).UnknownCountInEdge = 2;14021403if (!setInstrumentedCounts(CountFromProfile)) {1404LLVM_DEBUG(1405dbgs() << "Inconsistent number of counts, skipping this function");1406Ctx.diagnose(DiagnosticInfoPGOProfile(1407M->getName().data(),1408Twine("Inconsistent number of counts in ") + F.getName().str() +1409Twine(": the profile may be stale or there is a function name "1410"collision."),1411DS_Warning));1412return false;1413}1414ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);1415return true;1416}14171418void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {1419uint64_t MismatchedFuncSum = 0;1420Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(1421FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,1422&MismatchedFuncSum);1423if (auto Err = Result.takeError()) {1424handleInstrProfError(std::move(Err), MismatchedFuncSum);1425return;1426}1427IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;14281429std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;1430DenseMap<const BasicBlock *, bool> Coverage;1431unsigned Index = 0;1432for (auto &BB : F)1433if (FuncInfo.BCI->shouldInstrumentBlock(BB))1434Coverage[&BB] = (CountsFromProfile[Index++] != 0);1435assert(Index == CountsFromProfile.size());14361437// For each B in InverseDependencies[A], if A is covered then B is covered.1438DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>>1439InverseDependencies;1440for (auto &BB : F) {1441for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {1442// If Dep is covered then BB is covered.1443InverseDependencies[Dep].insert(&BB);1444}1445}14461447// Infer coverage of the non-instrumented blocks using a flood-fill algorithm.1448std::stack<const BasicBlock *> CoveredBlocksToProcess;1449for (auto &[BB, IsCovered] : Coverage)1450if (IsCovered)1451CoveredBlocksToProcess.push(BB);14521453while (!CoveredBlocksToProcess.empty()) {1454auto *CoveredBlock = CoveredBlocksToProcess.top();1455assert(Coverage[CoveredBlock]);1456CoveredBlocksToProcess.pop();1457for (auto *BB : InverseDependencies[CoveredBlock]) {1458// If CoveredBlock is covered then BB is covered.1459if (Coverage[BB])1460continue;1461Coverage[BB] = true;1462CoveredBlocksToProcess.push(BB);1463}1464}14651466// Annotate block coverage.1467MDBuilder MDB(F.getContext());1468// We set the entry count to 10000 if the entry block is covered so that BFI1469// can propagate a fraction of this count to the other covered blocks.1470F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);1471for (auto &BB : F) {1472// For a block A and its successor B, we set the edge weight as follows:1473// If A is covered and B is covered, set weight=1.1474// If A is covered and B is uncovered, set weight=0.1475// If A is uncovered, set weight=1.1476// This setup will allow BFI to give nonzero profile counts to only covered1477// blocks.1478SmallVector<uint32_t, 4> Weights;1479for (auto *Succ : successors(&BB))1480Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);1481if (Weights.size() >= 2)1482llvm::setBranchWeights(*BB.getTerminator(), Weights,1483/*IsExpected=*/false);1484}14851486unsigned NumCorruptCoverage = 0;1487DominatorTree DT(F);1488LoopInfo LI(DT);1489BranchProbabilityInfo BPI(F, LI);1490BlockFrequencyInfo BFI(F, BPI, LI);1491auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {1492if (auto C = BFI.getBlockProfileCount(&BB))1493return C == 0;1494return {};1495};1496LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");1497for (auto &BB : F) {1498LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")1499<< (Coverage[&BB] ? "X " : " ") << " " << BB.getName()1500<< "\n");1501// In some cases it is possible to find a covered block that has no covered1502// successors, e.g., when a block calls a function that may call exit(). In1503// those cases, BFI could find its successor to be covered while BCI could1504// find its successor to be dead.1505if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {1506LLVM_DEBUG(1507dbgs() << "Found inconsistent block covearge for " << BB.getName()1508<< ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="1509<< (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");1510++NumCorruptCoverage;1511}1512if (Coverage[&BB])1513++NumCoveredBlocks;1514}1515if (PGOVerifyBFI && NumCorruptCoverage) {1516auto &Ctx = M->getContext();1517Ctx.diagnose(DiagnosticInfoPGOProfile(1518M->getName().data(),1519Twine("Found inconsistent block coverage for function ") + F.getName() +1520" in " + Twine(NumCorruptCoverage) + " blocks.",1521DS_Warning));1522}1523if (PGOViewBlockCoverageGraph)1524FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);1525}15261527// Populate the counters from instrumented BBs to all BBs.1528// In the end of this operation, all BBs should have a valid count value.1529void PGOUseFunc::populateCounters() {1530bool Changes = true;1531unsigned NumPasses = 0;1532while (Changes) {1533NumPasses++;1534Changes = false;15351536// For efficient traversal, it's better to start from the end as most1537// of the instrumented edges are at the end.1538for (auto &BB : reverse(F)) {1539PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);1540if (UseBBInfo == nullptr)1541continue;1542if (!UseBBInfo->Count) {1543if (UseBBInfo->UnknownCountOutEdge == 0) {1544UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);1545Changes = true;1546} else if (UseBBInfo->UnknownCountInEdge == 0) {1547UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);1548Changes = true;1549}1550}1551if (UseBBInfo->Count) {1552if (UseBBInfo->UnknownCountOutEdge == 1) {1553uint64_t Total = 0;1554uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);1555// If the one of the successor block can early terminate (no-return),1556// we can end up with situation where out edge sum count is larger as1557// the source BB's count is collected by a post-dominated block.1558if (*UseBBInfo->Count > OutSum)1559Total = *UseBBInfo->Count - OutSum;1560setEdgeCount(UseBBInfo->OutEdges, Total);1561Changes = true;1562}1563if (UseBBInfo->UnknownCountInEdge == 1) {1564uint64_t Total = 0;1565uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);1566if (*UseBBInfo->Count > InSum)1567Total = *UseBBInfo->Count - InSum;1568setEdgeCount(UseBBInfo->InEdges, Total);1569Changes = true;1570}1571}1572}1573}15741575LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");1576(void)NumPasses;1577#ifndef NDEBUG1578// Assert every BB has a valid counter.1579for (auto &BB : F) {1580auto BI = findBBInfo(&BB);1581if (BI == nullptr)1582continue;1583assert(BI->Count && "BB count is not valid");1584}1585#endif1586uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;1587uint64_t FuncMaxCount = FuncEntryCount;1588for (auto &BB : F) {1589auto BI = findBBInfo(&BB);1590if (BI == nullptr)1591continue;1592FuncMaxCount = std::max(FuncMaxCount, *BI->Count);1593}15941595// Fix the obviously inconsistent entry count.1596if (FuncMaxCount > 0 && FuncEntryCount == 0)1597FuncEntryCount = 1;1598F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));1599markFunctionAttributes(FuncEntryCount, FuncMaxCount);16001601// Now annotate select instructions1602FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);1603assert(CountPosition == ProfileCountSize);16041605LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));1606}16071608// Assign the scaled count values to the BB with multiple out edges.1609void PGOUseFunc::setBranchWeights() {1610// Generate MD_prof metadata for every branch instruction.1611LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()1612<< " IsCS=" << IsCS << "\n");1613for (auto &BB : F) {1614Instruction *TI = BB.getTerminator();1615if (TI->getNumSuccessors() < 2)1616continue;1617if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||1618isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||1619isa<CallBrInst>(TI)))1620continue;16211622const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);1623if (!*BBCountInfo.Count)1624continue;16251626// We have a non-zero Branch BB.16271628// SuccessorCount can be greater than OutEdgesCount, because1629// removed edges don't appear in OutEdges.1630unsigned OutEdgesCount = BBCountInfo.OutEdges.size();1631unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();1632assert(OutEdgesCount <= SuccessorCount);16331634SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0);1635uint64_t MaxCount = 0;1636for (unsigned It = 0; It < OutEdgesCount; It++) {1637const PGOUseEdge *E = BBCountInfo.OutEdges[It];1638const BasicBlock *SrcBB = E->SrcBB;1639const BasicBlock *DestBB = E->DestBB;1640if (DestBB == nullptr)1641continue;1642unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);1643uint64_t EdgeCount = *E->Count;1644if (EdgeCount > MaxCount)1645MaxCount = EdgeCount;1646EdgeCounts[SuccNum] = EdgeCount;1647}16481649if (MaxCount)1650setProfMetadata(M, TI, EdgeCounts, MaxCount);1651else {1652// A zero MaxCount can come about when we have a BB with a positive1653// count, and whose successor blocks all have 0 count. This can happen1654// when there is no exit block and the code exits via a noreturn function.1655auto &Ctx = M->getContext();1656Ctx.diagnose(DiagnosticInfoPGOProfile(1657M->getName().data(),1658Twine("Profile in ") + F.getName().str() +1659Twine(" partially ignored") +1660Twine(", possibly due to the lack of a return path."),1661DS_Warning));1662}1663}1664}16651666static bool isIndirectBrTarget(BasicBlock *BB) {1667for (BasicBlock *Pred : predecessors(BB)) {1668if (isa<IndirectBrInst>(Pred->getTerminator()))1669return true;1670}1671return false;1672}16731674void PGOUseFunc::annotateIrrLoopHeaderWeights() {1675LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");1676// Find irr loop headers1677for (auto &BB : F) {1678// As a heuristic also annotate indrectbr targets as they have a high chance1679// to become an irreducible loop header after the indirectbr tail1680// duplication.1681if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {1682Instruction *TI = BB.getTerminator();1683const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);1684setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);1685}1686}1687}16881689void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {1690Module *M = F.getParent();1691IRBuilder<> Builder(&SI);1692Type *Int64Ty = Builder.getInt64Ty();1693auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);1694Builder.CreateCall(1695Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),1696{FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),1697Builder.getInt32(*CurCtrIdx), Step});1698++(*CurCtrIdx);1699}17001701void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {1702std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;1703assert(*CurCtrIdx < CountFromProfile.size() &&1704"Out of bound access of counters");1705uint64_t SCounts[2];1706SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count1707++(*CurCtrIdx);1708uint64_t TotalCount = 0;1709auto BI = UseFunc->findBBInfo(SI.getParent());1710if (BI != nullptr)1711TotalCount = *BI->Count;1712// False Count1713SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);1714uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);1715if (MaxCount)1716setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);1717}17181719void SelectInstVisitor::visitSelectInst(SelectInst &SI) {1720if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)1721return;1722// FIXME: do not handle this yet.1723if (SI.getCondition()->getType()->isVectorTy())1724return;17251726switch (Mode) {1727case VM_counting:1728NSIs++;1729return;1730case VM_instrument:1731instrumentOneSelectInst(SI);1732return;1733case VM_annotate:1734annotateOneSelectInst(SI);1735return;1736}17371738llvm_unreachable("Unknown visiting mode");1739}17401741static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind) {1742if (ValueProfKind == IPVK_MemOPSize)1743return MaxNumMemOPAnnotations;1744if (ValueProfKind == llvm::IPVK_VTableTarget)1745return MaxNumVTableAnnotations;1746return MaxNumAnnotations;1747}17481749// Traverse all valuesites and annotate the instructions for all value kind.1750void PGOUseFunc::annotateValueSites() {1751if (isValueProfilingDisabled())1752return;17531754// Create the PGOFuncName meta data.1755createPGOFuncNameMetadata(F, FuncInfo.FuncName);17561757for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)1758annotateValueSites(Kind);1759}17601761// Annotate the instructions for a specific value kind.1762void PGOUseFunc::annotateValueSites(uint32_t Kind) {1763assert(Kind <= IPVK_Last);1764unsigned ValueSiteIndex = 0;17651766unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);17671768// Since there isn't a reliable or fast way for profile reader to tell if a1769// profile is generated with `-enable-vtable-value-profiling` on, we run the1770// value profile collector over the function IR to find the instrumented sites1771// iff function profile records shows the number of instrumented vtable sites1772// is not zero. Function cfg already takes the number of instrumented1773// indirect call sites into account so it doesn't hash the number of1774// instrumented vtables; as a side effect it makes it easier to enable1775// profiling and profile use in two steps if needed.1776// TODO: Remove this if/when -enable-vtable-value-profiling is on by default.1777if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&1778NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&1779MaxNumVTableAnnotations != 0)1780FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);1781auto &ValueSites = FuncInfo.ValueSites[Kind];1782if (NumValueSites != ValueSites.size()) {1783auto &Ctx = M->getContext();1784Ctx.diagnose(DiagnosticInfoPGOProfile(1785M->getName().data(),1786Twine("Inconsistent number of value sites for ") +1787Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +1788F.getName().str() +1789Twine("\", possibly due to the use of a stale profile."),1790DS_Warning));1791return;1792}17931794for (VPCandidateInfo &I : ValueSites) {1795LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind1796<< "): Index = " << ValueSiteIndex << " out of "1797<< NumValueSites << "\n");1798annotateValueSite(1799*M, *I.AnnotatedInst, ProfileRecord,1800static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,1801getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));1802ValueSiteIndex++;1803}1804}18051806// Collect the set of members for each Comdat in module M and store1807// in ComdatMembers.1808static void collectComdatMembers(1809Module &M,1810std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {1811if (!DoComdatRenaming)1812return;1813for (Function &F : M)1814if (Comdat *C = F.getComdat())1815ComdatMembers.insert(std::make_pair(C, &F));1816for (GlobalVariable &GV : M.globals())1817if (Comdat *C = GV.getComdat())1818ComdatMembers.insert(std::make_pair(C, &GV));1819for (GlobalAlias &GA : M.aliases())1820if (Comdat *C = GA.getComdat())1821ComdatMembers.insert(std::make_pair(C, &GA));1822}18231824// Return true if we should not find instrumentation data for this function1825static bool skipPGOUse(const Function &F) {1826if (F.isDeclaration())1827return true;1828// If there are too many critical edges, PGO might cause1829// compiler time problem. Skip PGO if the number of1830// critical edges execeed the threshold.1831unsigned NumCriticalEdges = 0;1832for (auto &BB : F) {1833const Instruction *TI = BB.getTerminator();1834for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {1835if (isCriticalEdge(TI, I))1836NumCriticalEdges++;1837}1838}1839if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {1840LLVM_DEBUG(dbgs() << "In func " << F.getName()1841<< ", NumCriticalEdges=" << NumCriticalEdges1842<< " exceed the threshold. Skip PGO.\n");1843return true;1844}1845return false;1846}18471848// Return true if we should not instrument this function1849static bool skipPGOGen(const Function &F) {1850if (skipPGOUse(F))1851return true;1852if (F.hasFnAttribute(llvm::Attribute::Naked))1853return true;1854if (F.hasFnAttribute(llvm::Attribute::NoProfile))1855return true;1856if (F.hasFnAttribute(llvm::Attribute::SkipProfile))1857return true;1858if (F.getInstructionCount() < PGOFunctionSizeThreshold)1859return true;1860return false;1861}18621863static bool InstrumentAllFunctions(1864Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,1865function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,1866function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {1867// For the context-sensitve instrumentation, we should have a separated pass1868// (before LTO/ThinLTO linking) to create these variables.1869if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled())1870createIRLevelProfileFlagVar(M, /*IsCS=*/false);18711872Triple TT(M.getTargetTriple());1873LLVMContext &Ctx = M.getContext();1874if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)1875Ctx.diagnose(DiagnosticInfoPGOProfile(1876M.getName().data(),1877Twine("VTable value profiling is presently not "1878"supported for non-ELF object formats"),1879DS_Warning));1880std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;1881collectComdatMembers(M, ComdatMembers);18821883for (auto &F : M) {1884if (skipPGOGen(F))1885continue;1886auto &TLI = LookupTLI(F);1887auto *BPI = LookupBPI(F);1888auto *BFI = LookupBFI(F);1889instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);1890}1891return true;1892}18931894PreservedAnalyses1895PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) {1896createProfileFileNameVar(M, CSInstrName);1897// The variable in a comdat may be discarded by LTO. Ensure the declaration1898// will be retained.1899appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));1900if (ProfileSampling)1901createProfileSamplingVar(M);1902PreservedAnalyses PA;1903PA.preserve<FunctionAnalysisManagerModuleProxy>();1904PA.preserveSet<AllAnalysesOn<Function>>();1905return PA;1906}19071908PreservedAnalyses PGOInstrumentationGen::run(Module &M,1909ModuleAnalysisManager &MAM) {1910auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();1911auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {1912return FAM.getResult<TargetLibraryAnalysis>(F);1913};1914auto LookupBPI = [&FAM](Function &F) {1915return &FAM.getResult<BranchProbabilityAnalysis>(F);1916};1917auto LookupBFI = [&FAM](Function &F) {1918return &FAM.getResult<BlockFrequencyAnalysis>(F);1919};19201921if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))1922return PreservedAnalyses::all();19231924return PreservedAnalyses::none();1925}19261927// Using the ratio b/w sums of profile count values and BFI count values to1928// adjust the func entry count.1929static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,1930BranchProbabilityInfo &NBPI) {1931Function &F = Func.getFunc();1932BlockFrequencyInfo NBFI(F, NBPI, LI);1933#ifndef NDEBUG1934auto BFIEntryCount = F.getEntryCount();1935assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&1936"Invalid BFI Entrycount");1937#endif1938auto SumCount = APFloat::getZero(APFloat::IEEEdouble());1939auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());1940for (auto &BBI : F) {1941uint64_t CountValue = 0;1942uint64_t BFICountValue = 0;1943if (!Func.findBBInfo(&BBI))1944continue;1945auto BFICount = NBFI.getBlockProfileCount(&BBI);1946CountValue = *Func.getBBInfo(&BBI).Count;1947BFICountValue = *BFICount;1948SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);1949SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);1950}1951if (SumCount.isZero())1952return;19531954assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&1955"Incorrect sum of BFI counts");1956if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)1957return;1958double Scale = (SumCount / SumBFICount).convertToDouble();1959if (Scale < 1.001 && Scale > 0.999)1960return;19611962uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;1963uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;1964if (NewEntryCount == 0)1965NewEntryCount = 1;1966if (NewEntryCount != FuncEntryCount) {1967F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));1968LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()1969<< ", entry_count " << FuncEntryCount << " --> "1970<< NewEntryCount << "\n");1971}1972}19731974// Compare the profile count values with BFI count values, and print out1975// the non-matching ones.1976static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,1977BranchProbabilityInfo &NBPI,1978uint64_t HotCountThreshold,1979uint64_t ColdCountThreshold) {1980Function &F = Func.getFunc();1981BlockFrequencyInfo NBFI(F, NBPI, LI);1982// bool PrintFunc = false;1983bool HotBBOnly = PGOVerifyHotBFI;1984StringRef Msg;1985OptimizationRemarkEmitter ORE(&F);19861987unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;1988for (auto &BBI : F) {1989uint64_t CountValue = 0;1990uint64_t BFICountValue = 0;19911992CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);19931994BBNum++;1995if (CountValue)1996NonZeroBBNum++;1997auto BFICount = NBFI.getBlockProfileCount(&BBI);1998if (BFICount)1999BFICountValue = *BFICount;20002001if (HotBBOnly) {2002bool rawIsHot = CountValue >= HotCountThreshold;2003bool BFIIsHot = BFICountValue >= HotCountThreshold;2004bool rawIsCold = CountValue <= ColdCountThreshold;2005bool ShowCount = false;2006if (rawIsHot && !BFIIsHot) {2007Msg = "raw-Hot to BFI-nonHot";2008ShowCount = true;2009} else if (rawIsCold && BFIIsHot) {2010Msg = "raw-Cold to BFI-Hot";2011ShowCount = true;2012}2013if (!ShowCount)2014continue;2015} else {2016if ((CountValue < PGOVerifyBFICutoff) &&2017(BFICountValue < PGOVerifyBFICutoff))2018continue;2019uint64_t Diff = (BFICountValue >= CountValue)2020? BFICountValue - CountValue2021: CountValue - BFICountValue;2022if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)2023continue;2024}2025BBMisMatchNum++;20262027ORE.emit([&]() {2028OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify",2029F.getSubprogram(), &BBI);2030Remark << "BB " << ore::NV("Block", BBI.getName())2031<< " Count=" << ore::NV("Count", CountValue)2032<< " BFI_Count=" << ore::NV("Count", BFICountValue);2033if (!Msg.empty())2034Remark << " (" << Msg << ")";2035return Remark;2036});2037}2038if (BBMisMatchNum)2039ORE.emit([&]() {2040return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",2041F.getSubprogram(), &F.getEntryBlock())2042<< "In Func " << ore::NV("Function", F.getName())2043<< ": Num_of_BB=" << ore::NV("Count", BBNum)2044<< ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)2045<< ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);2046});2047}20482049static bool annotateAllFunctions(2050Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,2051vfs::FileSystem &FS,2052function_ref<TargetLibraryInfo &(Function &)> LookupTLI,2053function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,2054function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,2055ProfileSummaryInfo *PSI, bool IsCS) {2056LLVM_DEBUG(dbgs() << "Read in profile counters: ");2057auto &Ctx = M.getContext();2058// Read the counter array from file.2059auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,2060ProfileRemappingFileName);2061if (Error E = ReaderOrErr.takeError()) {2062handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {2063Ctx.diagnose(2064DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));2065});2066return false;2067}20682069std::unique_ptr<IndexedInstrProfReader> PGOReader =2070std::move(ReaderOrErr.get());2071if (!PGOReader) {2072Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),2073StringRef("Cannot get PGOReader")));2074return false;2075}2076if (!PGOReader->hasCSIRLevelProfile() && IsCS)2077return false;20782079// TODO: might need to change the warning once the clang option is finalized.2080if (!PGOReader->isIRLevelProfile()) {2081Ctx.diagnose(DiagnosticInfoPGOProfile(2082ProfileFileName.data(), "Not an IR level instrumentation profile"));2083return false;2084}2085if (PGOReader->functionEntryOnly()) {2086Ctx.diagnose(DiagnosticInfoPGOProfile(2087ProfileFileName.data(),2088"Function entry profiles are not yet supported for optimization"));2089return false;2090}20912092if (EnableVTableProfileUse) {2093for (GlobalVariable &G : M.globals()) {2094if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))2095continue;20962097// Create the PGOFuncName meta data.2098createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));2099}2100}21012102// Add the profile summary (read from the header of the indexed summary) here2103// so that we can use it below when reading counters (which checks if the2104// function should be marked with a cold or inlinehint attribute).2105M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),2106IsCS ? ProfileSummary::PSK_CSInstr2107: ProfileSummary::PSK_Instr);2108PSI->refresh();21092110std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;2111collectComdatMembers(M, ComdatMembers);2112std::vector<Function *> HotFunctions;2113std::vector<Function *> ColdFunctions;21142115// If the profile marked as always instrument the entry BB, do the2116// same. Note this can be overwritten by the internal option in CFGMST.h2117bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();2118if (PGOInstrumentEntry.getNumOccurrences() > 0)2119InstrumentFuncEntry = PGOInstrumentEntry;2120InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();21212122bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();2123for (auto &F : M) {2124if (skipPGOUse(F))2125continue;2126auto &TLI = LookupTLI(F);2127auto *BPI = LookupBPI(F);2128auto *BFI = LookupBFI(F);2129if (!HasSingleByteCoverage) {2130// Split indirectbr critical edges here before computing the MST rather2131// than later in getInstrBB() to avoid invalidating it.2132SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,2133BFI);2134}2135PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,2136InstrumentFuncEntry, HasSingleByteCoverage);2137if (HasSingleByteCoverage) {2138Func.populateCoverage(PGOReader.get());2139continue;2140}2141// When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,2142// it means the profile for the function is unrepresentative and this2143// function is actually hot / warm. We will reset the function hot / cold2144// attribute and drop all the profile counters.2145InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo;2146bool AllZeros = false;2147if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))2148continue;2149if (AllZeros) {2150F.setEntryCount(ProfileCount(0, Function::PCT_Real));2151if (Func.getProgramMaxCount() != 0)2152ColdFunctions.push_back(&F);2153continue;2154}2155if (PseudoKind != InstrProfRecord::NotPseudo) {2156// Clear function attribute cold.2157if (F.hasFnAttribute(Attribute::Cold))2158F.removeFnAttr(Attribute::Cold);2159// Set function attribute as hot.2160if (PseudoKind == InstrProfRecord::PseudoHot)2161F.addFnAttr(Attribute::Hot);2162continue;2163}2164Func.populateCounters();2165Func.setBranchWeights();2166Func.annotateValueSites();2167Func.annotateIrrLoopHeaderWeights();2168PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();2169if (FreqAttr == PGOUseFunc::FFA_Cold)2170ColdFunctions.push_back(&F);2171else if (FreqAttr == PGOUseFunc::FFA_Hot)2172HotFunctions.push_back(&F);2173if (PGOViewCounts != PGOVCT_None &&2174(ViewBlockFreqFuncName.empty() ||2175F.getName() == ViewBlockFreqFuncName)) {2176LoopInfo LI{DominatorTree(F)};2177std::unique_ptr<BranchProbabilityInfo> NewBPI =2178std::make_unique<BranchProbabilityInfo>(F, LI);2179std::unique_ptr<BlockFrequencyInfo> NewBFI =2180std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);2181if (PGOViewCounts == PGOVCT_Graph)2182NewBFI->view();2183else if (PGOViewCounts == PGOVCT_Text) {2184dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";2185NewBFI->print(dbgs());2186}2187}2188if (PGOViewRawCounts != PGOVCT_None &&2189(ViewBlockFreqFuncName.empty() ||2190F.getName() == ViewBlockFreqFuncName)) {2191if (PGOViewRawCounts == PGOVCT_Graph)2192if (ViewBlockFreqFuncName.empty())2193WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());2194else2195ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());2196else if (PGOViewRawCounts == PGOVCT_Text) {2197dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";2198Func.dumpInfo();2199}2200}22012202if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) {2203LoopInfo LI{DominatorTree(F)};2204BranchProbabilityInfo NBPI(F, LI);22052206// Fix func entry count.2207if (PGOFixEntryCount)2208fixFuncEntryCount(Func, LI, NBPI);22092210// Verify BlockFrequency information.2211uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;2212if (PGOVerifyHotBFI) {2213HotCountThreshold = PSI->getOrCompHotCountThreshold();2214ColdCountThreshold = PSI->getOrCompColdCountThreshold();2215}2216verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);2217}2218}22192220// Set function hotness attribute from the profile.2221// We have to apply these attributes at the end because their presence2222// can affect the BranchProbabilityInfo of any callers, resulting in an2223// inconsistent MST between prof-gen and prof-use.2224for (auto &F : HotFunctions) {2225F->addFnAttr(Attribute::InlineHint);2226LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()2227<< "\n");2228}2229for (auto &F : ColdFunctions) {2230// Only set when there is no Attribute::Hot set by the user. For Hot2231// attribute, user's annotation has the precedence over the profile.2232if (F->hasFnAttribute(Attribute::Hot)) {2233auto &Ctx = M.getContext();2234std::string Msg = std::string("Function ") + F->getName().str() +2235std::string(" is annotated as a hot function but"2236" the profile is cold");2237Ctx.diagnose(2238DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));2239continue;2240}2241F->addFnAttr(Attribute::Cold);2242LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()2243<< "\n");2244}2245return true;2246}22472248PGOInstrumentationUse::PGOInstrumentationUse(2249std::string Filename, std::string RemappingFilename, bool IsCS,2250IntrusiveRefCntPtr<vfs::FileSystem> VFS)2251: ProfileFileName(std::move(Filename)),2252ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),2253FS(std::move(VFS)) {2254if (!PGOTestProfileFile.empty())2255ProfileFileName = PGOTestProfileFile;2256if (!PGOTestProfileRemappingFile.empty())2257ProfileRemappingFileName = PGOTestProfileRemappingFile;2258if (!FS)2259FS = vfs::getRealFileSystem();2260}22612262PreservedAnalyses PGOInstrumentationUse::run(Module &M,2263ModuleAnalysisManager &MAM) {22642265auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();2266auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {2267return FAM.getResult<TargetLibraryAnalysis>(F);2268};2269auto LookupBPI = [&FAM](Function &F) {2270return &FAM.getResult<BranchProbabilityAnalysis>(F);2271};2272auto LookupBFI = [&FAM](Function &F) {2273return &FAM.getResult<BlockFrequencyAnalysis>(F);2274};22752276auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);2277if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,2278LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))2279return PreservedAnalyses::all();22802281return PreservedAnalyses::none();2282}22832284static std::string getSimpleNodeName(const BasicBlock *Node) {2285if (!Node->getName().empty())2286return Node->getName().str();22872288std::string SimpleNodeName;2289raw_string_ostream OS(SimpleNodeName);2290Node->printAsOperand(OS, false);2291return SimpleNodeName;2292}22932294void llvm::setProfMetadata(Module *M, Instruction *TI,2295ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {2296assert(MaxCount > 0 && "Bad max count");2297uint64_t Scale = calculateCountScale(MaxCount);2298SmallVector<unsigned, 4> Weights;2299for (const auto &ECI : EdgeCounts)2300Weights.push_back(scaleBranchCount(ECI, Scale));23012302LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W2303: Weights) {2304dbgs() << W << " ";2305} dbgs() << "\n";);23062307misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);23082309setBranchWeights(*TI, Weights, /*IsExpected=*/false);2310if (EmitBranchProbability) {2311std::string BrCondStr = getBranchCondString(TI);2312if (BrCondStr.empty())2313return;23142315uint64_t WSum =2316std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,2317[](uint64_t w1, uint64_t w2) { return w1 + w2; });2318uint64_t TotalCount =2319std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,2320[](uint64_t c1, uint64_t c2) { return c1 + c2; });2321Scale = calculateCountScale(WSum);2322BranchProbability BP(scaleBranchCount(Weights[0], Scale),2323scaleBranchCount(WSum, Scale));2324std::string BranchProbStr;2325raw_string_ostream OS(BranchProbStr);2326OS << BP;2327OS << " (total count : " << TotalCount << ")";2328OS.flush();2329Function *F = TI->getParent()->getParent();2330OptimizationRemarkEmitter ORE(F);2331ORE.emit([&]() {2332return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)2333<< BrCondStr << " is true with probability : " << BranchProbStr;2334});2335}2336}23372338namespace llvm {23392340void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {2341MDBuilder MDB(M->getContext());2342TI->setMetadata(llvm::LLVMContext::MD_irr_loop,2343MDB.createIrrLoopHeaderWeight(Count));2344}23452346template <> struct GraphTraits<PGOUseFunc *> {2347using NodeRef = const BasicBlock *;2348using ChildIteratorType = const_succ_iterator;2349using nodes_iterator = pointer_iterator<Function::const_iterator>;23502351static NodeRef getEntryNode(const PGOUseFunc *G) {2352return &G->getFunc().front();2353}23542355static ChildIteratorType child_begin(const NodeRef N) {2356return succ_begin(N);2357}23582359static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }23602361static nodes_iterator nodes_begin(const PGOUseFunc *G) {2362return nodes_iterator(G->getFunc().begin());2363}23642365static nodes_iterator nodes_end(const PGOUseFunc *G) {2366return nodes_iterator(G->getFunc().end());2367}2368};23692370template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {2371explicit DOTGraphTraits(bool isSimple = false)2372: DefaultDOTGraphTraits(isSimple) {}23732374static std::string getGraphName(const PGOUseFunc *G) {2375return std::string(G->getFunc().getName());2376}23772378std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {2379std::string Result;2380raw_string_ostream OS(Result);23812382OS << getSimpleNodeName(Node) << ":\\l";2383PGOUseBBInfo *BI = Graph->findBBInfo(Node);2384OS << "Count : ";2385if (BI && BI->Count)2386OS << *BI->Count << "\\l";2387else2388OS << "Unknown\\l";23892390if (!PGOInstrSelect)2391return Result;23922393for (const Instruction &I : *Node) {2394if (!isa<SelectInst>(&I))2395continue;2396// Display scaled counts for SELECT instruction:2397OS << "SELECT : { T = ";2398uint64_t TC, FC;2399bool HasProf = extractBranchWeights(I, TC, FC);2400if (!HasProf)2401OS << "Unknown, F = Unknown }\\l";2402else2403OS << TC << ", F = " << FC << " }\\l";2404}2405return Result;2406}2407};24082409} // end namespace llvm241024112412