CoCalc -- BlockCoverageInference.cpp

GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp
³⁵²⁶⁶ views
1
//===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Our algorithm works by first identifying a subset of nodes that must always
10
// be instrumented. We call these nodes ambiguous because knowing the coverage
11
// of all remaining nodes is not enough to infer their coverage status.
12
//
13
// In general a node v is ambiguous if there exists two entry-to-terminal paths
14
// P_1 and P_2 such that:
15
//   1. v not in P_1 but P_1 visits a predecessor of v, and
16
//   2. v not in P_2 but P_2 visits a successor of v.
17
//
18
// If a node v is not ambiguous, then if condition 1 fails, we can infer v’s
19
// coverage from the coverage of its predecessors, or if condition 2 fails, we
20
// can infer v’s coverage from the coverage of its successors.
21
//
22
// Sadly, there are example CFGs where it is not possible to infer all nodes
23
// from the ambiguous nodes alone. Our algorithm selects a minimum number of
24
// extra nodes to add to the ambiguous nodes to form a valid instrumentation S.
25
//
26
// Details on this algorithm can be found in https://arxiv.org/abs/2208.13907
27
//
28
//===----------------------------------------------------------------------===//
29

30
#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
31
#include "llvm/ADT/DepthFirstIterator.h"
32
#include "llvm/ADT/Statistic.h"
33
#include "llvm/Support/CRC.h"
34
#include "llvm/Support/Debug.h"
35
#include "llvm/Support/GraphWriter.h"
36
#include "llvm/Support/raw_ostream.h"
37
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
38

39
using namespace llvm;
40

41
#define DEBUG_TYPE "pgo-block-coverage"
42

43
STATISTIC(NumFunctions, "Number of total functions that BCI has processed");
44
STATISTIC(NumIneligibleFunctions,
45
          "Number of functions for which BCI cannot run on");
46
STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed");
47
STATISTIC(NumInstrumentedBlocks,
48
          "Number of basic blocks instrumented for coverage");
49

50
BlockCoverageInference::BlockCoverageInference(const Function &F,
51
                                               bool ForceInstrumentEntry)
52
    : F(F), ForceInstrumentEntry(ForceInstrumentEntry) {
53
  findDependencies();
54
  assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock()));
55

56
  ++NumFunctions;
57
  for (auto &BB : F) {
58
    ++NumBlocks;
59
    if (shouldInstrumentBlock(BB))
60
      ++NumInstrumentedBlocks;
61
  }
62
}
63

64
BlockCoverageInference::BlockSet
65
BlockCoverageInference::getDependencies(const BasicBlock &BB) const {
66
  assert(BB.getParent() == &F);
67
  BlockSet Dependencies;
68
  auto It = PredecessorDependencies.find(&BB);
69
  if (It != PredecessorDependencies.end())
70
    Dependencies.set_union(It->second);
71
  It = SuccessorDependencies.find(&BB);
72
  if (It != SuccessorDependencies.end())
73
    Dependencies.set_union(It->second);
74
  return Dependencies;
75
}
76

77
uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const {
78
  JamCRC JC;
79
  uint64_t Index = 0;
80
  for (auto &BB : F) {
81
    if (shouldInstrumentBlock(BB)) {
82
      uint8_t Data[8];
83
      support::endian::write64le(Data, Index);
84
      JC.update(Data);
85
    }
86
    Index++;
87
  }
88
  return JC.getCRC();
89
}
90

91
bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const {
92
  assert(BB.getParent() == &F);
93
  auto It = PredecessorDependencies.find(&BB);
94
  if (It != PredecessorDependencies.end() && It->second.size())
95
    return false;
96
  It = SuccessorDependencies.find(&BB);
97
  if (It != SuccessorDependencies.end() && It->second.size())
98
    return false;
99
  return true;
100
}
101

102
void BlockCoverageInference::findDependencies() {
103
  assert(PredecessorDependencies.empty() && SuccessorDependencies.empty());
104
  // Empirical analysis shows that this algorithm finishes within 5 seconds for
105
  // functions with fewer than 1.5K blocks.
106
  if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) {
107
    ++NumIneligibleFunctions;
108
    return;
109
  }
110

111
  SmallVector<const BasicBlock *, 4> TerminalBlocks;
112
  for (auto &BB : F)
113
    if (succ_empty(&BB))
114
      TerminalBlocks.push_back(&BB);
115

116
  // Traverse the CFG backwards from the terminal blocks to make sure every
117
  // block can reach some terminal block. Otherwise this algorithm will not work
118
  // and we must fall back to instrumenting every block.
119
  df_iterator_default_set<const BasicBlock *> Visited;
120
  for (auto *BB : TerminalBlocks)
121
    for (auto *N : inverse_depth_first_ext(BB, Visited))
122
      (void)N;
123
  if (F.size() != Visited.size()) {
124
    ++NumIneligibleFunctions;
125
    return;
126
  }
127

128
  // The current implementation for computing `PredecessorDependencies` and
129
  // `SuccessorDependencies` runs in quadratic time with respect to the number
130
  // of basic blocks. While we do have a more complicated linear time algorithm
131
  // in https://arxiv.org/abs/2208.13907 we do not know if it will give a
132
  // significant speedup in practice given that most functions tend to be
133
  // relatively small in size for intended use cases.
134
  auto &EntryBlock = F.getEntryBlock();
135
  for (auto &BB : F) {
136
    // The set of blocks that are reachable while avoiding BB.
137
    BlockSet ReachableFromEntry, ReachableFromTerminal;
138
    getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true,
139
                         ReachableFromEntry);
140
    for (auto *TerminalBlock : TerminalBlocks)
141
      getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false,
142
                           ReachableFromTerminal);
143

144
    auto Preds = predecessors(&BB);
145
    bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) {
146
      return ReachableFromEntry.count(Pred) &&
147
             ReachableFromTerminal.count(Pred);
148
    });
149
    if (!HasSuperReachablePred)
150
      for (auto *Pred : Preds)
151
        if (ReachableFromEntry.count(Pred))
152
          PredecessorDependencies[&BB].insert(Pred);
153

154
    auto Succs = successors(&BB);
155
    bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) {
156
      return ReachableFromEntry.count(Succ) &&
157
             ReachableFromTerminal.count(Succ);
158
    });
159
    if (!HasSuperReachableSucc)
160
      for (auto *Succ : Succs)
161
        if (ReachableFromTerminal.count(Succ))
162
          SuccessorDependencies[&BB].insert(Succ);
163
  }
164

165
  if (ForceInstrumentEntry) {
166
    // Force the entry block to be instrumented by clearing the blocks it can
167
    // infer coverage from.
168
    PredecessorDependencies[&EntryBlock].clear();
169
    SuccessorDependencies[&EntryBlock].clear();
170
  }
171

172
  // Construct a graph where blocks are connected if there is a mutual
173
  // dependency between them. This graph has a special property that it contains
174
  // only paths.
175
  DenseMap<const BasicBlock *, BlockSet> AdjacencyList;
176
  for (auto &BB : F) {
177
    for (auto *Succ : successors(&BB)) {
178
      if (SuccessorDependencies[&BB].count(Succ) &&
179
          PredecessorDependencies[Succ].count(&BB)) {
180
        AdjacencyList[&BB].insert(Succ);
181
        AdjacencyList[Succ].insert(&BB);
182
      }
183
    }
184
  }
185

186
  // Given a path with at least one node, return the next node on the path.
187
  auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * {
188
    assert(Path.size());
189
    auto &Neighbors = AdjacencyList[Path.back()];
190
    if (Path.size() == 1) {
191
      // This is the first node on the path, return its neighbor.
192
      assert(Neighbors.size() == 1);
193
      return Neighbors.front();
194
    } else if (Neighbors.size() == 2) {
195
      // This is the middle of the path, find the neighbor that is not on the
196
      // path already.
197
      assert(Path.size() >= 2);
198
      return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0];
199
    }
200
    // This is the end of the path.
201
    assert(Neighbors.size() == 1);
202
    return nullptr;
203
  };
204

205
  // Remove all cycles in the inferencing graph.
206
  for (auto &BB : F) {
207
    if (AdjacencyList[&BB].size() == 1) {
208
      // We found the head of some path.
209
      BlockSet Path;
210
      Path.insert(&BB);
211
      while (const BasicBlock *Next = getNextOnPath(Path))
212
        Path.insert(Next);
213
      LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n");
214

215
      // Remove these nodes from the graph so we don't discover this path again.
216
      for (auto *BB : Path)
217
        AdjacencyList[BB].clear();
218

219
      // Finally, remove the cycles.
220
      if (PredecessorDependencies[Path.front()].size()) {
221
        for (auto *BB : Path)
222
          if (BB != Path.back())
223
            SuccessorDependencies[BB].clear();
224
      } else {
225
        for (auto *BB : Path)
226
          if (BB != Path.front())
227
            PredecessorDependencies[BB].clear();
228
      }
229
    }
230
  }
231
  LLVM_DEBUG(dump(dbgs()));
232
}
233

234
void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start,
235
                                                  const BasicBlock &Avoid,
236
                                                  bool IsForward,
237
                                                  BlockSet &Reachable) const {
238
  df_iterator_default_set<const BasicBlock *> Visited;
239
  Visited.insert(&Avoid);
240
  if (IsForward) {
241
    auto Range = depth_first_ext(&Start, Visited);
242
    Reachable.insert(Range.begin(), Range.end());
243
  } else {
244
    auto Range = inverse_depth_first_ext(&Start, Visited);
245
    Reachable.insert(Range.begin(), Range.end());
246
  }
247
}
248

249
namespace llvm {
250
class DotFuncBCIInfo {
251
private:
252
  const BlockCoverageInference *BCI;
253
  const DenseMap<const BasicBlock *, bool> *Coverage;
254

255
public:
256
  DotFuncBCIInfo(const BlockCoverageInference *BCI,
257
                 const DenseMap<const BasicBlock *, bool> *Coverage)
258
      : BCI(BCI), Coverage(Coverage) {}
259

260
  const Function &getFunction() { return BCI->F; }
261

262
  bool isInstrumented(const BasicBlock *BB) const {
263
    return BCI->shouldInstrumentBlock(*BB);
264
  }
265

266
  bool isCovered(const BasicBlock *BB) const {
267
    return Coverage && Coverage->lookup(BB);
268
  }
269

270
  bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const {
271
    return BCI->getDependencies(*Src).count(Dest);
272
  }
273
};
274

275
template <>
276
struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> {
277
  static NodeRef getEntryNode(DotFuncBCIInfo *Info) {
278
    return &(Info->getFunction().getEntryBlock());
279
  }
280

281
  // nodes_iterator/begin/end - Allow iteration over all nodes in the graph
282
  using nodes_iterator = pointer_iterator<Function::const_iterator>;
283

284
  static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) {
285
    return nodes_iterator(Info->getFunction().begin());
286
  }
287

288
  static nodes_iterator nodes_end(DotFuncBCIInfo *Info) {
289
    return nodes_iterator(Info->getFunction().end());
290
  }
291

292
  static size_t size(DotFuncBCIInfo *Info) {
293
    return Info->getFunction().size();
294
  }
295
};
296

297
template <>
298
struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits {
299

300
  DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
301

302
  static std::string getGraphName(DotFuncBCIInfo *Info) {
303
    return "BCI CFG for " + Info->getFunction().getName().str();
304
  }
305

306
  std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) {
307
    return Node->getName().str();
308
  }
309

310
  std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I,
311
                                DotFuncBCIInfo *Info) {
312
    const BasicBlock *Dest = *I;
313
    if (Info->isDependent(Src, Dest))
314
      return "color=red";
315
    if (Info->isDependent(Dest, Src))
316
      return "color=blue";
317
    return "";
318
  }
319

320
  std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) {
321
    std::string Result;
322
    if (Info->isInstrumented(Node))
323
      Result += "style=filled,fillcolor=gray";
324
    if (Info->isCovered(Node))
325
      Result += std::string(Result.empty() ? "" : ",") + "color=red";
326
    return Result;
327
  }
328
};
329

330
} // namespace llvm
331

332
void BlockCoverageInference::viewBlockCoverageGraph(
333
    const DenseMap<const BasicBlock *, bool> *Coverage) const {
334
  DotFuncBCIInfo Info(this, Coverage);
335
  WriteGraph(&Info, "BCI", false,
336
             "Block Coverage Inference for " + F.getName());
337
}
338

339
void BlockCoverageInference::dump(raw_ostream &OS) const {
340
  OS << "Minimal block coverage for function \'" << F.getName()
341
     << "\' (Instrumented=*)\n";
342
  for (auto &BB : F) {
343
    OS << (shouldInstrumentBlock(BB) ? "* " : "  ") << BB.getName() << "\n";
344
    auto It = PredecessorDependencies.find(&BB);
345
    if (It != PredecessorDependencies.end() && It->second.size())
346
      OS << "    PredDeps = " << getBlockNames(It->second) << "\n";
347
    It = SuccessorDependencies.find(&BB);
348
    if (It != SuccessorDependencies.end() && It->second.size())
349
      OS << "    SuccDeps = " << getBlockNames(It->second) << "\n";
350
  }
351
  OS << "  Instrumented Blocks Hash = 0x"
352
     << Twine::utohexstr(getInstrumentedBlocksHash()) << "\n";
353
}
354

355
std::string
356
BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) {
357
  std::string Result;
358
  raw_string_ostream OS(Result);
359
  OS << "[";
360
  if (!BBs.empty()) {
361
    OS << BBs.front()->getName();
362
    BBs = BBs.drop_front();
363
  }
364
  for (auto *BB : BBs)
365
    OS << ", " << BB->getName();
366
  OS << "]";
367
  return OS.str();
368
}
369

370
Product

Resources

Company