Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp
35266 views
//===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// Our algorithm works by first identifying a subset of nodes that must always9// be instrumented. We call these nodes ambiguous because knowing the coverage10// of all remaining nodes is not enough to infer their coverage status.11//12// In general a node v is ambiguous if there exists two entry-to-terminal paths13// P_1 and P_2 such that:14// 1. v not in P_1 but P_1 visits a predecessor of v, and15// 2. v not in P_2 but P_2 visits a successor of v.16//17// If a node v is not ambiguous, then if condition 1 fails, we can infer v’s18// coverage from the coverage of its predecessors, or if condition 2 fails, we19// can infer v’s coverage from the coverage of its successors.20//21// Sadly, there are example CFGs where it is not possible to infer all nodes22// from the ambiguous nodes alone. Our algorithm selects a minimum number of23// extra nodes to add to the ambiguous nodes to form a valid instrumentation S.24//25// Details on this algorithm can be found in https://arxiv.org/abs/2208.1390726//27//===----------------------------------------------------------------------===//2829#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"30#include "llvm/ADT/DepthFirstIterator.h"31#include "llvm/ADT/Statistic.h"32#include "llvm/Support/CRC.h"33#include "llvm/Support/Debug.h"34#include "llvm/Support/GraphWriter.h"35#include "llvm/Support/raw_ostream.h"36#include "llvm/Transforms/Utils/BasicBlockUtils.h"3738using namespace llvm;3940#define DEBUG_TYPE "pgo-block-coverage"4142STATISTIC(NumFunctions, "Number of total functions that BCI has processed");43STATISTIC(NumIneligibleFunctions,44"Number of functions for which BCI cannot run on");45STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed");46STATISTIC(NumInstrumentedBlocks,47"Number of basic blocks instrumented for coverage");4849BlockCoverageInference::BlockCoverageInference(const Function &F,50bool ForceInstrumentEntry)51: F(F), ForceInstrumentEntry(ForceInstrumentEntry) {52findDependencies();53assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock()));5455++NumFunctions;56for (auto &BB : F) {57++NumBlocks;58if (shouldInstrumentBlock(BB))59++NumInstrumentedBlocks;60}61}6263BlockCoverageInference::BlockSet64BlockCoverageInference::getDependencies(const BasicBlock &BB) const {65assert(BB.getParent() == &F);66BlockSet Dependencies;67auto It = PredecessorDependencies.find(&BB);68if (It != PredecessorDependencies.end())69Dependencies.set_union(It->second);70It = SuccessorDependencies.find(&BB);71if (It != SuccessorDependencies.end())72Dependencies.set_union(It->second);73return Dependencies;74}7576uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const {77JamCRC JC;78uint64_t Index = 0;79for (auto &BB : F) {80if (shouldInstrumentBlock(BB)) {81uint8_t Data[8];82support::endian::write64le(Data, Index);83JC.update(Data);84}85Index++;86}87return JC.getCRC();88}8990bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const {91assert(BB.getParent() == &F);92auto It = PredecessorDependencies.find(&BB);93if (It != PredecessorDependencies.end() && It->second.size())94return false;95It = SuccessorDependencies.find(&BB);96if (It != SuccessorDependencies.end() && It->second.size())97return false;98return true;99}100101void BlockCoverageInference::findDependencies() {102assert(PredecessorDependencies.empty() && SuccessorDependencies.empty());103// Empirical analysis shows that this algorithm finishes within 5 seconds for104// functions with fewer than 1.5K blocks.105if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) {106++NumIneligibleFunctions;107return;108}109110SmallVector<const BasicBlock *, 4> TerminalBlocks;111for (auto &BB : F)112if (succ_empty(&BB))113TerminalBlocks.push_back(&BB);114115// Traverse the CFG backwards from the terminal blocks to make sure every116// block can reach some terminal block. Otherwise this algorithm will not work117// and we must fall back to instrumenting every block.118df_iterator_default_set<const BasicBlock *> Visited;119for (auto *BB : TerminalBlocks)120for (auto *N : inverse_depth_first_ext(BB, Visited))121(void)N;122if (F.size() != Visited.size()) {123++NumIneligibleFunctions;124return;125}126127// The current implementation for computing `PredecessorDependencies` and128// `SuccessorDependencies` runs in quadratic time with respect to the number129// of basic blocks. While we do have a more complicated linear time algorithm130// in https://arxiv.org/abs/2208.13907 we do not know if it will give a131// significant speedup in practice given that most functions tend to be132// relatively small in size for intended use cases.133auto &EntryBlock = F.getEntryBlock();134for (auto &BB : F) {135// The set of blocks that are reachable while avoiding BB.136BlockSet ReachableFromEntry, ReachableFromTerminal;137getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true,138ReachableFromEntry);139for (auto *TerminalBlock : TerminalBlocks)140getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false,141ReachableFromTerminal);142143auto Preds = predecessors(&BB);144bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) {145return ReachableFromEntry.count(Pred) &&146ReachableFromTerminal.count(Pred);147});148if (!HasSuperReachablePred)149for (auto *Pred : Preds)150if (ReachableFromEntry.count(Pred))151PredecessorDependencies[&BB].insert(Pred);152153auto Succs = successors(&BB);154bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) {155return ReachableFromEntry.count(Succ) &&156ReachableFromTerminal.count(Succ);157});158if (!HasSuperReachableSucc)159for (auto *Succ : Succs)160if (ReachableFromTerminal.count(Succ))161SuccessorDependencies[&BB].insert(Succ);162}163164if (ForceInstrumentEntry) {165// Force the entry block to be instrumented by clearing the blocks it can166// infer coverage from.167PredecessorDependencies[&EntryBlock].clear();168SuccessorDependencies[&EntryBlock].clear();169}170171// Construct a graph where blocks are connected if there is a mutual172// dependency between them. This graph has a special property that it contains173// only paths.174DenseMap<const BasicBlock *, BlockSet> AdjacencyList;175for (auto &BB : F) {176for (auto *Succ : successors(&BB)) {177if (SuccessorDependencies[&BB].count(Succ) &&178PredecessorDependencies[Succ].count(&BB)) {179AdjacencyList[&BB].insert(Succ);180AdjacencyList[Succ].insert(&BB);181}182}183}184185// Given a path with at least one node, return the next node on the path.186auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * {187assert(Path.size());188auto &Neighbors = AdjacencyList[Path.back()];189if (Path.size() == 1) {190// This is the first node on the path, return its neighbor.191assert(Neighbors.size() == 1);192return Neighbors.front();193} else if (Neighbors.size() == 2) {194// This is the middle of the path, find the neighbor that is not on the195// path already.196assert(Path.size() >= 2);197return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0];198}199// This is the end of the path.200assert(Neighbors.size() == 1);201return nullptr;202};203204// Remove all cycles in the inferencing graph.205for (auto &BB : F) {206if (AdjacencyList[&BB].size() == 1) {207// We found the head of some path.208BlockSet Path;209Path.insert(&BB);210while (const BasicBlock *Next = getNextOnPath(Path))211Path.insert(Next);212LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n");213214// Remove these nodes from the graph so we don't discover this path again.215for (auto *BB : Path)216AdjacencyList[BB].clear();217218// Finally, remove the cycles.219if (PredecessorDependencies[Path.front()].size()) {220for (auto *BB : Path)221if (BB != Path.back())222SuccessorDependencies[BB].clear();223} else {224for (auto *BB : Path)225if (BB != Path.front())226PredecessorDependencies[BB].clear();227}228}229}230LLVM_DEBUG(dump(dbgs()));231}232233void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start,234const BasicBlock &Avoid,235bool IsForward,236BlockSet &Reachable) const {237df_iterator_default_set<const BasicBlock *> Visited;238Visited.insert(&Avoid);239if (IsForward) {240auto Range = depth_first_ext(&Start, Visited);241Reachable.insert(Range.begin(), Range.end());242} else {243auto Range = inverse_depth_first_ext(&Start, Visited);244Reachable.insert(Range.begin(), Range.end());245}246}247248namespace llvm {249class DotFuncBCIInfo {250private:251const BlockCoverageInference *BCI;252const DenseMap<const BasicBlock *, bool> *Coverage;253254public:255DotFuncBCIInfo(const BlockCoverageInference *BCI,256const DenseMap<const BasicBlock *, bool> *Coverage)257: BCI(BCI), Coverage(Coverage) {}258259const Function &getFunction() { return BCI->F; }260261bool isInstrumented(const BasicBlock *BB) const {262return BCI->shouldInstrumentBlock(*BB);263}264265bool isCovered(const BasicBlock *BB) const {266return Coverage && Coverage->lookup(BB);267}268269bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const {270return BCI->getDependencies(*Src).count(Dest);271}272};273274template <>275struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> {276static NodeRef getEntryNode(DotFuncBCIInfo *Info) {277return &(Info->getFunction().getEntryBlock());278}279280// nodes_iterator/begin/end - Allow iteration over all nodes in the graph281using nodes_iterator = pointer_iterator<Function::const_iterator>;282283static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) {284return nodes_iterator(Info->getFunction().begin());285}286287static nodes_iterator nodes_end(DotFuncBCIInfo *Info) {288return nodes_iterator(Info->getFunction().end());289}290291static size_t size(DotFuncBCIInfo *Info) {292return Info->getFunction().size();293}294};295296template <>297struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits {298299DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}300301static std::string getGraphName(DotFuncBCIInfo *Info) {302return "BCI CFG for " + Info->getFunction().getName().str();303}304305std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) {306return Node->getName().str();307}308309std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I,310DotFuncBCIInfo *Info) {311const BasicBlock *Dest = *I;312if (Info->isDependent(Src, Dest))313return "color=red";314if (Info->isDependent(Dest, Src))315return "color=blue";316return "";317}318319std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) {320std::string Result;321if (Info->isInstrumented(Node))322Result += "style=filled,fillcolor=gray";323if (Info->isCovered(Node))324Result += std::string(Result.empty() ? "" : ",") + "color=red";325return Result;326}327};328329} // namespace llvm330331void BlockCoverageInference::viewBlockCoverageGraph(332const DenseMap<const BasicBlock *, bool> *Coverage) const {333DotFuncBCIInfo Info(this, Coverage);334WriteGraph(&Info, "BCI", false,335"Block Coverage Inference for " + F.getName());336}337338void BlockCoverageInference::dump(raw_ostream &OS) const {339OS << "Minimal block coverage for function \'" << F.getName()340<< "\' (Instrumented=*)\n";341for (auto &BB : F) {342OS << (shouldInstrumentBlock(BB) ? "* " : " ") << BB.getName() << "\n";343auto It = PredecessorDependencies.find(&BB);344if (It != PredecessorDependencies.end() && It->second.size())345OS << " PredDeps = " << getBlockNames(It->second) << "\n";346It = SuccessorDependencies.find(&BB);347if (It != SuccessorDependencies.end() && It->second.size())348OS << " SuccDeps = " << getBlockNames(It->second) << "\n";349}350OS << " Instrumented Blocks Hash = 0x"351<< Twine::utohexstr(getInstrumentedBlocksHash()) << "\n";352}353354std::string355BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) {356std::string Result;357raw_string_ostream OS(Result);358OS << "[";359if (!BBs.empty()) {360OS << BBs.front()->getName();361BBs = BBs.drop_front();362}363for (auto *BB : BBs)364OS << ", " << BB->getName();365OS << "]";366return OS.str();367}368369370