Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/BlockCoverageInference.cpp
35266 views
1
//===-- BlockCoverageInference.cpp - Minimal Execution Coverage -*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Our algorithm works by first identifying a subset of nodes that must always
10
// be instrumented. We call these nodes ambiguous because knowing the coverage
11
// of all remaining nodes is not enough to infer their coverage status.
12
//
13
// In general a node v is ambiguous if there exists two entry-to-terminal paths
14
// P_1 and P_2 such that:
15
// 1. v not in P_1 but P_1 visits a predecessor of v, and
16
// 2. v not in P_2 but P_2 visits a successor of v.
17
//
18
// If a node v is not ambiguous, then if condition 1 fails, we can infer v’s
19
// coverage from the coverage of its predecessors, or if condition 2 fails, we
20
// can infer v’s coverage from the coverage of its successors.
21
//
22
// Sadly, there are example CFGs where it is not possible to infer all nodes
23
// from the ambiguous nodes alone. Our algorithm selects a minimum number of
24
// extra nodes to add to the ambiguous nodes to form a valid instrumentation S.
25
//
26
// Details on this algorithm can be found in https://arxiv.org/abs/2208.13907
27
//
28
//===----------------------------------------------------------------------===//
29
30
#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
31
#include "llvm/ADT/DepthFirstIterator.h"
32
#include "llvm/ADT/Statistic.h"
33
#include "llvm/Support/CRC.h"
34
#include "llvm/Support/Debug.h"
35
#include "llvm/Support/GraphWriter.h"
36
#include "llvm/Support/raw_ostream.h"
37
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
38
39
using namespace llvm;
40
41
#define DEBUG_TYPE "pgo-block-coverage"
42
43
STATISTIC(NumFunctions, "Number of total functions that BCI has processed");
44
STATISTIC(NumIneligibleFunctions,
45
"Number of functions for which BCI cannot run on");
46
STATISTIC(NumBlocks, "Number of total basic blocks that BCI has processed");
47
STATISTIC(NumInstrumentedBlocks,
48
"Number of basic blocks instrumented for coverage");
49
50
BlockCoverageInference::BlockCoverageInference(const Function &F,
51
bool ForceInstrumentEntry)
52
: F(F), ForceInstrumentEntry(ForceInstrumentEntry) {
53
findDependencies();
54
assert(!ForceInstrumentEntry || shouldInstrumentBlock(F.getEntryBlock()));
55
56
++NumFunctions;
57
for (auto &BB : F) {
58
++NumBlocks;
59
if (shouldInstrumentBlock(BB))
60
++NumInstrumentedBlocks;
61
}
62
}
63
64
BlockCoverageInference::BlockSet
65
BlockCoverageInference::getDependencies(const BasicBlock &BB) const {
66
assert(BB.getParent() == &F);
67
BlockSet Dependencies;
68
auto It = PredecessorDependencies.find(&BB);
69
if (It != PredecessorDependencies.end())
70
Dependencies.set_union(It->second);
71
It = SuccessorDependencies.find(&BB);
72
if (It != SuccessorDependencies.end())
73
Dependencies.set_union(It->second);
74
return Dependencies;
75
}
76
77
uint64_t BlockCoverageInference::getInstrumentedBlocksHash() const {
78
JamCRC JC;
79
uint64_t Index = 0;
80
for (auto &BB : F) {
81
if (shouldInstrumentBlock(BB)) {
82
uint8_t Data[8];
83
support::endian::write64le(Data, Index);
84
JC.update(Data);
85
}
86
Index++;
87
}
88
return JC.getCRC();
89
}
90
91
bool BlockCoverageInference::shouldInstrumentBlock(const BasicBlock &BB) const {
92
assert(BB.getParent() == &F);
93
auto It = PredecessorDependencies.find(&BB);
94
if (It != PredecessorDependencies.end() && It->second.size())
95
return false;
96
It = SuccessorDependencies.find(&BB);
97
if (It != SuccessorDependencies.end() && It->second.size())
98
return false;
99
return true;
100
}
101
102
void BlockCoverageInference::findDependencies() {
103
assert(PredecessorDependencies.empty() && SuccessorDependencies.empty());
104
// Empirical analysis shows that this algorithm finishes within 5 seconds for
105
// functions with fewer than 1.5K blocks.
106
if (F.hasFnAttribute(Attribute::NoReturn) || F.size() > 1500) {
107
++NumIneligibleFunctions;
108
return;
109
}
110
111
SmallVector<const BasicBlock *, 4> TerminalBlocks;
112
for (auto &BB : F)
113
if (succ_empty(&BB))
114
TerminalBlocks.push_back(&BB);
115
116
// Traverse the CFG backwards from the terminal blocks to make sure every
117
// block can reach some terminal block. Otherwise this algorithm will not work
118
// and we must fall back to instrumenting every block.
119
df_iterator_default_set<const BasicBlock *> Visited;
120
for (auto *BB : TerminalBlocks)
121
for (auto *N : inverse_depth_first_ext(BB, Visited))
122
(void)N;
123
if (F.size() != Visited.size()) {
124
++NumIneligibleFunctions;
125
return;
126
}
127
128
// The current implementation for computing `PredecessorDependencies` and
129
// `SuccessorDependencies` runs in quadratic time with respect to the number
130
// of basic blocks. While we do have a more complicated linear time algorithm
131
// in https://arxiv.org/abs/2208.13907 we do not know if it will give a
132
// significant speedup in practice given that most functions tend to be
133
// relatively small in size for intended use cases.
134
auto &EntryBlock = F.getEntryBlock();
135
for (auto &BB : F) {
136
// The set of blocks that are reachable while avoiding BB.
137
BlockSet ReachableFromEntry, ReachableFromTerminal;
138
getReachableAvoiding(EntryBlock, BB, /*IsForward=*/true,
139
ReachableFromEntry);
140
for (auto *TerminalBlock : TerminalBlocks)
141
getReachableAvoiding(*TerminalBlock, BB, /*IsForward=*/false,
142
ReachableFromTerminal);
143
144
auto Preds = predecessors(&BB);
145
bool HasSuperReachablePred = llvm::any_of(Preds, [&](auto *Pred) {
146
return ReachableFromEntry.count(Pred) &&
147
ReachableFromTerminal.count(Pred);
148
});
149
if (!HasSuperReachablePred)
150
for (auto *Pred : Preds)
151
if (ReachableFromEntry.count(Pred))
152
PredecessorDependencies[&BB].insert(Pred);
153
154
auto Succs = successors(&BB);
155
bool HasSuperReachableSucc = llvm::any_of(Succs, [&](auto *Succ) {
156
return ReachableFromEntry.count(Succ) &&
157
ReachableFromTerminal.count(Succ);
158
});
159
if (!HasSuperReachableSucc)
160
for (auto *Succ : Succs)
161
if (ReachableFromTerminal.count(Succ))
162
SuccessorDependencies[&BB].insert(Succ);
163
}
164
165
if (ForceInstrumentEntry) {
166
// Force the entry block to be instrumented by clearing the blocks it can
167
// infer coverage from.
168
PredecessorDependencies[&EntryBlock].clear();
169
SuccessorDependencies[&EntryBlock].clear();
170
}
171
172
// Construct a graph where blocks are connected if there is a mutual
173
// dependency between them. This graph has a special property that it contains
174
// only paths.
175
DenseMap<const BasicBlock *, BlockSet> AdjacencyList;
176
for (auto &BB : F) {
177
for (auto *Succ : successors(&BB)) {
178
if (SuccessorDependencies[&BB].count(Succ) &&
179
PredecessorDependencies[Succ].count(&BB)) {
180
AdjacencyList[&BB].insert(Succ);
181
AdjacencyList[Succ].insert(&BB);
182
}
183
}
184
}
185
186
// Given a path with at least one node, return the next node on the path.
187
auto getNextOnPath = [&](BlockSet &Path) -> const BasicBlock * {
188
assert(Path.size());
189
auto &Neighbors = AdjacencyList[Path.back()];
190
if (Path.size() == 1) {
191
// This is the first node on the path, return its neighbor.
192
assert(Neighbors.size() == 1);
193
return Neighbors.front();
194
} else if (Neighbors.size() == 2) {
195
// This is the middle of the path, find the neighbor that is not on the
196
// path already.
197
assert(Path.size() >= 2);
198
return Path.count(Neighbors[0]) ? Neighbors[1] : Neighbors[0];
199
}
200
// This is the end of the path.
201
assert(Neighbors.size() == 1);
202
return nullptr;
203
};
204
205
// Remove all cycles in the inferencing graph.
206
for (auto &BB : F) {
207
if (AdjacencyList[&BB].size() == 1) {
208
// We found the head of some path.
209
BlockSet Path;
210
Path.insert(&BB);
211
while (const BasicBlock *Next = getNextOnPath(Path))
212
Path.insert(Next);
213
LLVM_DEBUG(dbgs() << "Found path: " << getBlockNames(Path) << "\n");
214
215
// Remove these nodes from the graph so we don't discover this path again.
216
for (auto *BB : Path)
217
AdjacencyList[BB].clear();
218
219
// Finally, remove the cycles.
220
if (PredecessorDependencies[Path.front()].size()) {
221
for (auto *BB : Path)
222
if (BB != Path.back())
223
SuccessorDependencies[BB].clear();
224
} else {
225
for (auto *BB : Path)
226
if (BB != Path.front())
227
PredecessorDependencies[BB].clear();
228
}
229
}
230
}
231
LLVM_DEBUG(dump(dbgs()));
232
}
233
234
void BlockCoverageInference::getReachableAvoiding(const BasicBlock &Start,
235
const BasicBlock &Avoid,
236
bool IsForward,
237
BlockSet &Reachable) const {
238
df_iterator_default_set<const BasicBlock *> Visited;
239
Visited.insert(&Avoid);
240
if (IsForward) {
241
auto Range = depth_first_ext(&Start, Visited);
242
Reachable.insert(Range.begin(), Range.end());
243
} else {
244
auto Range = inverse_depth_first_ext(&Start, Visited);
245
Reachable.insert(Range.begin(), Range.end());
246
}
247
}
248
249
namespace llvm {
250
class DotFuncBCIInfo {
251
private:
252
const BlockCoverageInference *BCI;
253
const DenseMap<const BasicBlock *, bool> *Coverage;
254
255
public:
256
DotFuncBCIInfo(const BlockCoverageInference *BCI,
257
const DenseMap<const BasicBlock *, bool> *Coverage)
258
: BCI(BCI), Coverage(Coverage) {}
259
260
const Function &getFunction() { return BCI->F; }
261
262
bool isInstrumented(const BasicBlock *BB) const {
263
return BCI->shouldInstrumentBlock(*BB);
264
}
265
266
bool isCovered(const BasicBlock *BB) const {
267
return Coverage && Coverage->lookup(BB);
268
}
269
270
bool isDependent(const BasicBlock *Src, const BasicBlock *Dest) const {
271
return BCI->getDependencies(*Src).count(Dest);
272
}
273
};
274
275
template <>
276
struct GraphTraits<DotFuncBCIInfo *> : public GraphTraits<const BasicBlock *> {
277
static NodeRef getEntryNode(DotFuncBCIInfo *Info) {
278
return &(Info->getFunction().getEntryBlock());
279
}
280
281
// nodes_iterator/begin/end - Allow iteration over all nodes in the graph
282
using nodes_iterator = pointer_iterator<Function::const_iterator>;
283
284
static nodes_iterator nodes_begin(DotFuncBCIInfo *Info) {
285
return nodes_iterator(Info->getFunction().begin());
286
}
287
288
static nodes_iterator nodes_end(DotFuncBCIInfo *Info) {
289
return nodes_iterator(Info->getFunction().end());
290
}
291
292
static size_t size(DotFuncBCIInfo *Info) {
293
return Info->getFunction().size();
294
}
295
};
296
297
template <>
298
struct DOTGraphTraits<DotFuncBCIInfo *> : public DefaultDOTGraphTraits {
299
300
DOTGraphTraits(bool IsSimple = false) : DefaultDOTGraphTraits(IsSimple) {}
301
302
static std::string getGraphName(DotFuncBCIInfo *Info) {
303
return "BCI CFG for " + Info->getFunction().getName().str();
304
}
305
306
std::string getNodeLabel(const BasicBlock *Node, DotFuncBCIInfo *Info) {
307
return Node->getName().str();
308
}
309
310
std::string getEdgeAttributes(const BasicBlock *Src, const_succ_iterator I,
311
DotFuncBCIInfo *Info) {
312
const BasicBlock *Dest = *I;
313
if (Info->isDependent(Src, Dest))
314
return "color=red";
315
if (Info->isDependent(Dest, Src))
316
return "color=blue";
317
return "";
318
}
319
320
std::string getNodeAttributes(const BasicBlock *Node, DotFuncBCIInfo *Info) {
321
std::string Result;
322
if (Info->isInstrumented(Node))
323
Result += "style=filled,fillcolor=gray";
324
if (Info->isCovered(Node))
325
Result += std::string(Result.empty() ? "" : ",") + "color=red";
326
return Result;
327
}
328
};
329
330
} // namespace llvm
331
332
void BlockCoverageInference::viewBlockCoverageGraph(
333
const DenseMap<const BasicBlock *, bool> *Coverage) const {
334
DotFuncBCIInfo Info(this, Coverage);
335
WriteGraph(&Info, "BCI", false,
336
"Block Coverage Inference for " + F.getName());
337
}
338
339
void BlockCoverageInference::dump(raw_ostream &OS) const {
340
OS << "Minimal block coverage for function \'" << F.getName()
341
<< "\' (Instrumented=*)\n";
342
for (auto &BB : F) {
343
OS << (shouldInstrumentBlock(BB) ? "* " : " ") << BB.getName() << "\n";
344
auto It = PredecessorDependencies.find(&BB);
345
if (It != PredecessorDependencies.end() && It->second.size())
346
OS << " PredDeps = " << getBlockNames(It->second) << "\n";
347
It = SuccessorDependencies.find(&BB);
348
if (It != SuccessorDependencies.end() && It->second.size())
349
OS << " SuccDeps = " << getBlockNames(It->second) << "\n";
350
}
351
OS << " Instrumented Blocks Hash = 0x"
352
<< Twine::utohexstr(getInstrumentedBlocksHash()) << "\n";
353
}
354
355
std::string
356
BlockCoverageInference::getBlockNames(ArrayRef<const BasicBlock *> BBs) {
357
std::string Result;
358
raw_string_ostream OS(Result);
359
OS << "[";
360
if (!BBs.empty()) {
361
OS << BBs.front()->getName();
362
BBs = BBs.drop_front();
363
}
364
for (auto *BB : BBs)
365
OS << ", " << BB->getName();
366
OS << "]";
367
return OS.str();
368
}
369
370