Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
35269 views
1
//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements PGO instrumentation using a minimum spanning tree based
10
// on the following paper:
11
// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12
// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13
// Issue 3, pp 313-322
14
// The idea of the algorithm based on the fact that for each node (except for
15
// the entry and exit), the sum of incoming edge counts equals the sum of
16
// outgoing edge counts. The count of edge on spanning tree can be derived from
17
// those edges not on the spanning tree. Knuth proves this method instruments
18
// the minimum number of edges.
19
//
20
// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21
// edges have higher frequencies (more likely to execute). The idea is to
22
// instrument those less frequently executed edges to reduce the runtime
23
// overhead of instrumented binaries.
24
//
25
// This file contains two passes:
26
// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27
// count profile, and generates the instrumentation for indirect call
28
// profiling.
29
// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30
// annotates the branch weights. It also reads the indirect call value
31
// profiling records and annotate the indirect call instructions.
32
//
33
// To get the precise counter information, These two passes need to invoke at
34
// the same compilation point (so they see the same IR). For pass
35
// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36
// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37
// the profile is opened in module level and passed to each PGOUseFunc instance.
38
// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39
// in class FuncPGOInstrumentation.
40
//
41
// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42
// BBInfo contains auxiliary information for each BB. These two classes are used
43
// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44
// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45
// used in populating profile counters.
46
// The MST implementation is in Class CFGMST (CFGMST.h).
47
//
48
//===----------------------------------------------------------------------===//
49
50
#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
51
#include "ValueProfileCollector.h"
52
#include "llvm/ADT/APInt.h"
53
#include "llvm/ADT/ArrayRef.h"
54
#include "llvm/ADT/STLExtras.h"
55
#include "llvm/ADT/SmallVector.h"
56
#include "llvm/ADT/Statistic.h"
57
#include "llvm/ADT/StringRef.h"
58
#include "llvm/ADT/Twine.h"
59
#include "llvm/ADT/iterator.h"
60
#include "llvm/ADT/iterator_range.h"
61
#include "llvm/Analysis/BlockFrequencyInfo.h"
62
#include "llvm/Analysis/BranchProbabilityInfo.h"
63
#include "llvm/Analysis/CFG.h"
64
#include "llvm/Analysis/LoopInfo.h"
65
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
66
#include "llvm/Analysis/ProfileSummaryInfo.h"
67
#include "llvm/Analysis/TargetLibraryInfo.h"
68
#include "llvm/IR/Attributes.h"
69
#include "llvm/IR/BasicBlock.h"
70
#include "llvm/IR/CFG.h"
71
#include "llvm/IR/Comdat.h"
72
#include "llvm/IR/Constant.h"
73
#include "llvm/IR/Constants.h"
74
#include "llvm/IR/DiagnosticInfo.h"
75
#include "llvm/IR/Dominators.h"
76
#include "llvm/IR/EHPersonalities.h"
77
#include "llvm/IR/Function.h"
78
#include "llvm/IR/GlobalAlias.h"
79
#include "llvm/IR/GlobalValue.h"
80
#include "llvm/IR/GlobalVariable.h"
81
#include "llvm/IR/IRBuilder.h"
82
#include "llvm/IR/InstVisitor.h"
83
#include "llvm/IR/InstrTypes.h"
84
#include "llvm/IR/Instruction.h"
85
#include "llvm/IR/Instructions.h"
86
#include "llvm/IR/IntrinsicInst.h"
87
#include "llvm/IR/Intrinsics.h"
88
#include "llvm/IR/LLVMContext.h"
89
#include "llvm/IR/MDBuilder.h"
90
#include "llvm/IR/Module.h"
91
#include "llvm/IR/PassManager.h"
92
#include "llvm/IR/ProfDataUtils.h"
93
#include "llvm/IR/ProfileSummary.h"
94
#include "llvm/IR/Type.h"
95
#include "llvm/IR/Value.h"
96
#include "llvm/ProfileData/InstrProf.h"
97
#include "llvm/ProfileData/InstrProfReader.h"
98
#include "llvm/Support/BranchProbability.h"
99
#include "llvm/Support/CRC.h"
100
#include "llvm/Support/Casting.h"
101
#include "llvm/Support/CommandLine.h"
102
#include "llvm/Support/DOTGraphTraits.h"
103
#include "llvm/Support/Debug.h"
104
#include "llvm/Support/Error.h"
105
#include "llvm/Support/ErrorHandling.h"
106
#include "llvm/Support/GraphWriter.h"
107
#include "llvm/Support/VirtualFileSystem.h"
108
#include "llvm/Support/raw_ostream.h"
109
#include "llvm/TargetParser/Triple.h"
110
#include "llvm/Transforms/Instrumentation.h"
111
#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
112
#include "llvm/Transforms/Instrumentation/CFGMST.h"
113
#include "llvm/Transforms/Instrumentation/PGOCtxProfLowering.h"
114
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
115
#include "llvm/Transforms/Utils/MisExpect.h"
116
#include "llvm/Transforms/Utils/ModuleUtils.h"
117
#include <algorithm>
118
#include <cassert>
119
#include <cstdint>
120
#include <memory>
121
#include <numeric>
122
#include <optional>
123
#include <stack>
124
#include <string>
125
#include <unordered_map>
126
#include <utility>
127
#include <vector>
128
129
using namespace llvm;
130
using ProfileCount = Function::ProfileCount;
131
using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
132
133
#define DEBUG_TYPE "pgo-instrumentation"
134
135
STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
136
STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
137
STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
138
STATISTIC(NumOfPGOEdge, "Number of edges.");
139
STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
140
STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
141
STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
142
STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
143
STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
144
STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
145
STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
146
STATISTIC(NumOfCSPGOSelectInsts,
147
"Number of select instruction instrumented in CSPGO.");
148
STATISTIC(NumOfCSPGOMemIntrinsics,
149
"Number of mem intrinsics instrumented in CSPGO.");
150
STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
151
STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
152
STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
153
STATISTIC(NumOfCSPGOFunc,
154
"Number of functions having valid profile counts in CSPGO.");
155
STATISTIC(NumOfCSPGOMismatch,
156
"Number of functions having mismatch profile in CSPGO.");
157
STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
158
STATISTIC(NumCoveredBlocks, "Number of basic blocks that were executed");
159
160
// Command line option to specify the file to read profile from. This is
161
// mainly used for testing.
162
static cl::opt<std::string>
163
PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
164
cl::value_desc("filename"),
165
cl::desc("Specify the path of profile data file. This is"
166
"mainly for test purpose."));
167
static cl::opt<std::string> PGOTestProfileRemappingFile(
168
"pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
169
cl::value_desc("filename"),
170
cl::desc("Specify the path of profile remapping file. This is mainly for "
171
"test purpose."));
172
173
// Command line option to disable value profiling. The default is false:
174
// i.e. value profiling is enabled by default. This is for debug purpose.
175
static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
176
cl::Hidden,
177
cl::desc("Disable Value Profiling"));
178
179
// Command line option to set the maximum number of VP annotations to write to
180
// the metadata for a single indirect call callsite.
181
static cl::opt<unsigned> MaxNumAnnotations(
182
"icp-max-annotations", cl::init(3), cl::Hidden,
183
cl::desc("Max number of annotations for a single indirect "
184
"call callsite"));
185
186
// Command line option to set the maximum number of value annotations
187
// to write to the metadata for a single memop intrinsic.
188
static cl::opt<unsigned> MaxNumMemOPAnnotations(
189
"memop-max-annotations", cl::init(4), cl::Hidden,
190
cl::desc("Max number of preicise value annotations for a single memop"
191
"intrinsic"));
192
193
// Command line option to control appending FunctionHash to the name of a COMDAT
194
// function. This is to avoid the hash mismatch caused by the preinliner.
195
static cl::opt<bool> DoComdatRenaming(
196
"do-comdat-renaming", cl::init(false), cl::Hidden,
197
cl::desc("Append function hash to the name of COMDAT function to avoid "
198
"function hash mismatch due to the preinliner"));
199
200
namespace llvm {
201
// Command line option to enable/disable the warning about missing profile
202
// information.
203
cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
204
cl::Hidden,
205
cl::desc("Use this option to turn on/off "
206
"warnings about missing profile data for "
207
"functions."));
208
209
// Command line option to enable/disable the warning about a hash mismatch in
210
// the profile data.
211
cl::opt<bool>
212
NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
213
cl::desc("Use this option to turn off/on "
214
"warnings about profile cfg mismatch."));
215
216
// Command line option to enable/disable the warning about a hash mismatch in
217
// the profile data for Comdat functions, which often turns out to be false
218
// positive due to the pre-instrumentation inline.
219
cl::opt<bool> NoPGOWarnMismatchComdatWeak(
220
"no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
221
cl::desc("The option is used to turn on/off "
222
"warnings about hash mismatch for comdat "
223
"or weak functions."));
224
} // namespace llvm
225
226
// Command line option to enable/disable select instruction instrumentation.
227
static cl::opt<bool>
228
PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
229
cl::desc("Use this option to turn on/off SELECT "
230
"instruction instrumentation. "));
231
232
// Command line option to turn on CFG dot or text dump of raw profile counts
233
static cl::opt<PGOViewCountsType> PGOViewRawCounts(
234
"pgo-view-raw-counts", cl::Hidden,
235
cl::desc("A boolean option to show CFG dag or text "
236
"with raw profile counts from "
237
"profile data. See also option "
238
"-pgo-view-counts. To limit graph "
239
"display to only one function, use "
240
"filtering option -view-bfi-func-name."),
241
cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
242
clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
243
clEnumValN(PGOVCT_Text, "text", "show in text.")));
244
245
// Command line option to enable/disable memop intrinsic call.size profiling.
246
static cl::opt<bool>
247
PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
248
cl::desc("Use this option to turn on/off "
249
"memory intrinsic size profiling."));
250
251
// Emit branch probability as optimization remarks.
252
static cl::opt<bool>
253
EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
254
cl::desc("When this option is on, the annotated "
255
"branch probability will be emitted as "
256
"optimization remarks: -{Rpass|"
257
"pass-remarks}=pgo-instrumentation"));
258
259
static cl::opt<bool> PGOInstrumentEntry(
260
"pgo-instrument-entry", cl::init(false), cl::Hidden,
261
cl::desc("Force to instrument function entry basicblock."));
262
263
static cl::opt<bool> PGOFunctionEntryCoverage(
264
"pgo-function-entry-coverage", cl::Hidden,
265
cl::desc(
266
"Use this option to enable function entry coverage instrumentation."));
267
268
static cl::opt<bool> PGOBlockCoverage(
269
"pgo-block-coverage",
270
cl::desc("Use this option to enable basic block coverage instrumentation"));
271
272
static cl::opt<bool>
273
PGOViewBlockCoverageGraph("pgo-view-block-coverage-graph",
274
cl::desc("Create a dot file of CFGs with block "
275
"coverage inference information"));
276
277
static cl::opt<bool> PGOTemporalInstrumentation(
278
"pgo-temporal-instrumentation",
279
cl::desc("Use this option to enable temporal instrumentation"));
280
281
static cl::opt<bool>
282
PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
283
cl::desc("Fix function entry count in profile use."));
284
285
static cl::opt<bool> PGOVerifyHotBFI(
286
"pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
287
cl::desc("Print out the non-match BFI count if a hot raw profile count "
288
"becomes non-hot, or a cold raw profile count becomes hot. "
289
"The print is enabled under -Rpass-analysis=pgo, or "
290
"internal option -pass-remakrs-analysis=pgo."));
291
292
static cl::opt<bool> PGOVerifyBFI(
293
"pgo-verify-bfi", cl::init(false), cl::Hidden,
294
cl::desc("Print out mismatched BFI counts after setting profile metadata "
295
"The print is enabled under -Rpass-analysis=pgo, or "
296
"internal option -pass-remakrs-analysis=pgo."));
297
298
static cl::opt<unsigned> PGOVerifyBFIRatio(
299
"pgo-verify-bfi-ratio", cl::init(2), cl::Hidden,
300
cl::desc("Set the threshold for pgo-verify-bfi: only print out "
301
"mismatched BFI if the difference percentage is greater than "
302
"this value (in percentage)."));
303
304
static cl::opt<unsigned> PGOVerifyBFICutoff(
305
"pgo-verify-bfi-cutoff", cl::init(5), cl::Hidden,
306
cl::desc("Set the threshold for pgo-verify-bfi: skip the counts whose "
307
"profile count value is below."));
308
309
static cl::opt<std::string> PGOTraceFuncHash(
310
"pgo-trace-func-hash", cl::init("-"), cl::Hidden,
311
cl::value_desc("function name"),
312
cl::desc("Trace the hash of the function with this name."));
313
314
static cl::opt<unsigned> PGOFunctionSizeThreshold(
315
"pgo-function-size-threshold", cl::Hidden,
316
cl::desc("Do not instrument functions smaller than this threshold."));
317
318
static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
319
"pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
320
cl::desc("Do not instrument functions with the number of critical edges "
321
" greater than this threshold."));
322
323
extern cl::opt<unsigned> MaxNumVTableAnnotations;
324
325
namespace llvm {
326
// Command line option to turn on CFG dot dump after profile annotation.
327
// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
328
extern cl::opt<PGOViewCountsType> PGOViewCounts;
329
330
// Command line option to specify the name of the function for CFG dump
331
// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
332
extern cl::opt<std::string> ViewBlockFreqFuncName;
333
334
// Command line option to enable vtable value profiling. Defined in
335
// ProfileData/InstrProf.cpp: -enable-vtable-value-profiling=
336
extern cl::opt<bool> EnableVTableValueProfiling;
337
extern cl::opt<bool> EnableVTableProfileUse;
338
extern cl::opt<InstrProfCorrelator::ProfCorrelatorKind> ProfileCorrelate;
339
} // namespace llvm
340
341
bool shouldInstrumentEntryBB() {
342
return PGOInstrumentEntry ||
343
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
344
}
345
346
// FIXME(mtrofin): re-enable this for ctx profiling, for non-indirect calls. Ctx
347
// profiling implicitly captures indirect call cases, but not other values.
348
// Supporting other values is relatively straight-forward - just another counter
349
// range within the context.
350
bool isValueProfilingDisabled() {
351
return DisableValueProfiling ||
352
PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
353
}
354
355
// Return a string describing the branch condition that can be
356
// used in static branch probability heuristics:
357
static std::string getBranchCondString(Instruction *TI) {
358
BranchInst *BI = dyn_cast<BranchInst>(TI);
359
if (!BI || !BI->isConditional())
360
return std::string();
361
362
Value *Cond = BI->getCondition();
363
ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
364
if (!CI)
365
return std::string();
366
367
std::string result;
368
raw_string_ostream OS(result);
369
OS << CI->getPredicate() << "_";
370
CI->getOperand(0)->getType()->print(OS, true);
371
372
Value *RHS = CI->getOperand(1);
373
ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
374
if (CV) {
375
if (CV->isZero())
376
OS << "_Zero";
377
else if (CV->isOne())
378
OS << "_One";
379
else if (CV->isMinusOne())
380
OS << "_MinusOne";
381
else
382
OS << "_Const";
383
}
384
OS.flush();
385
return result;
386
}
387
388
static const char *ValueProfKindDescr[] = {
389
#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
390
#include "llvm/ProfileData/InstrProfData.inc"
391
};
392
393
// Create a COMDAT variable INSTR_PROF_RAW_VERSION_VAR to make the runtime
394
// aware this is an ir_level profile so it can set the version flag.
395
static GlobalVariable *createIRLevelProfileFlagVar(Module &M, bool IsCS) {
396
const StringRef VarName(INSTR_PROF_QUOTE(INSTR_PROF_RAW_VERSION_VAR));
397
Type *IntTy64 = Type::getInt64Ty(M.getContext());
398
uint64_t ProfileVersion = (INSTR_PROF_RAW_VERSION | VARIANT_MASK_IR_PROF);
399
if (IsCS)
400
ProfileVersion |= VARIANT_MASK_CSIR_PROF;
401
if (shouldInstrumentEntryBB())
402
ProfileVersion |= VARIANT_MASK_INSTR_ENTRY;
403
if (DebugInfoCorrelate || ProfileCorrelate == InstrProfCorrelator::DEBUG_INFO)
404
ProfileVersion |= VARIANT_MASK_DBG_CORRELATE;
405
if (PGOFunctionEntryCoverage)
406
ProfileVersion |=
407
VARIANT_MASK_BYTE_COVERAGE | VARIANT_MASK_FUNCTION_ENTRY_ONLY;
408
if (PGOBlockCoverage)
409
ProfileVersion |= VARIANT_MASK_BYTE_COVERAGE;
410
if (PGOTemporalInstrumentation)
411
ProfileVersion |= VARIANT_MASK_TEMPORAL_PROF;
412
auto IRLevelVersionVariable = new GlobalVariable(
413
M, IntTy64, true, GlobalValue::WeakAnyLinkage,
414
Constant::getIntegerValue(IntTy64, APInt(64, ProfileVersion)), VarName);
415
IRLevelVersionVariable->setVisibility(GlobalValue::HiddenVisibility);
416
Triple TT(M.getTargetTriple());
417
if (TT.supportsCOMDAT()) {
418
IRLevelVersionVariable->setLinkage(GlobalValue::ExternalLinkage);
419
IRLevelVersionVariable->setComdat(M.getOrInsertComdat(VarName));
420
}
421
return IRLevelVersionVariable;
422
}
423
424
namespace {
425
426
/// The select instruction visitor plays three roles specified
427
/// by the mode. In \c VM_counting mode, it simply counts the number of
428
/// select instructions. In \c VM_instrument mode, it inserts code to count
429
/// the number times TrueValue of select is taken. In \c VM_annotate mode,
430
/// it reads the profile data and annotate the select instruction with metadata.
431
enum VisitMode { VM_counting, VM_instrument, VM_annotate };
432
class PGOUseFunc;
433
434
/// Instruction Visitor class to visit select instructions.
435
struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
436
Function &F;
437
unsigned NSIs = 0; // Number of select instructions instrumented.
438
VisitMode Mode = VM_counting; // Visiting mode.
439
unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
440
unsigned TotalNumCtrs = 0; // Total number of counters
441
GlobalVariable *FuncNameVar = nullptr;
442
uint64_t FuncHash = 0;
443
PGOUseFunc *UseFunc = nullptr;
444
bool HasSingleByteCoverage;
445
446
SelectInstVisitor(Function &Func, bool HasSingleByteCoverage)
447
: F(Func), HasSingleByteCoverage(HasSingleByteCoverage) {}
448
449
void countSelects() {
450
NSIs = 0;
451
Mode = VM_counting;
452
visit(F);
453
}
454
455
// Visit the IR stream and instrument all select instructions. \p
456
// Ind is a pointer to the counter index variable; \p TotalNC
457
// is the total number of counters; \p FNV is the pointer to the
458
// PGO function name var; \p FHash is the function hash.
459
void instrumentSelects(unsigned *Ind, unsigned TotalNC, GlobalVariable *FNV,
460
uint64_t FHash) {
461
Mode = VM_instrument;
462
CurCtrIdx = Ind;
463
TotalNumCtrs = TotalNC;
464
FuncHash = FHash;
465
FuncNameVar = FNV;
466
visit(F);
467
}
468
469
// Visit the IR stream and annotate all select instructions.
470
void annotateSelects(PGOUseFunc *UF, unsigned *Ind) {
471
Mode = VM_annotate;
472
UseFunc = UF;
473
CurCtrIdx = Ind;
474
visit(F);
475
}
476
477
void instrumentOneSelectInst(SelectInst &SI);
478
void annotateOneSelectInst(SelectInst &SI);
479
480
// Visit \p SI instruction and perform tasks according to visit mode.
481
void visitSelectInst(SelectInst &SI);
482
483
// Return the number of select instructions. This needs be called after
484
// countSelects().
485
unsigned getNumOfSelectInsts() const { return NSIs; }
486
};
487
488
/// This class implements the CFG edges for the Minimum Spanning Tree (MST)
489
/// based instrumentation.
490
/// Note that the CFG can be a multi-graph. So there might be multiple edges
491
/// with the same SrcBB and DestBB.
492
struct PGOEdge {
493
BasicBlock *SrcBB;
494
BasicBlock *DestBB;
495
uint64_t Weight;
496
bool InMST = false;
497
bool Removed = false;
498
bool IsCritical = false;
499
500
PGOEdge(BasicBlock *Src, BasicBlock *Dest, uint64_t W = 1)
501
: SrcBB(Src), DestBB(Dest), Weight(W) {}
502
503
/// Return the information string of an edge.
504
std::string infoString() const {
505
return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
506
(IsCritical ? "c" : " ") + " W=" + Twine(Weight))
507
.str();
508
}
509
};
510
511
/// This class stores the auxiliary information for each BB in the MST.
512
struct PGOBBInfo {
513
PGOBBInfo *Group;
514
uint32_t Index;
515
uint32_t Rank = 0;
516
517
PGOBBInfo(unsigned IX) : Group(this), Index(IX) {}
518
519
/// Return the information string of this object.
520
std::string infoString() const {
521
return (Twine("Index=") + Twine(Index)).str();
522
}
523
};
524
525
// This class implements the CFG edges. Note the CFG can be a multi-graph.
526
template <class Edge, class BBInfo> class FuncPGOInstrumentation {
527
private:
528
Function &F;
529
530
// Is this is context-sensitive instrumentation.
531
bool IsCS;
532
533
// A map that stores the Comdat group in function F.
534
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
535
536
ValueProfileCollector VPC;
537
538
void computeCFGHash();
539
void renameComdatFunction();
540
541
public:
542
const TargetLibraryInfo &TLI;
543
std::vector<std::vector<VPCandidateInfo>> ValueSites;
544
SelectInstVisitor SIVisitor;
545
std::string FuncName;
546
std::string DeprecatedFuncName;
547
GlobalVariable *FuncNameVar;
548
549
// CFG hash value for this function.
550
uint64_t FunctionHash = 0;
551
552
// The Minimum Spanning Tree of function CFG.
553
CFGMST<Edge, BBInfo> MST;
554
555
const std::optional<BlockCoverageInference> BCI;
556
557
static std::optional<BlockCoverageInference>
558
constructBCI(Function &Func, bool HasSingleByteCoverage,
559
bool InstrumentFuncEntry) {
560
if (HasSingleByteCoverage)
561
return BlockCoverageInference(Func, InstrumentFuncEntry);
562
return {};
563
}
564
565
// Collect all the BBs that will be instrumented, and store them in
566
// InstrumentBBs.
567
void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
568
569
// Give an edge, find the BB that will be instrumented.
570
// Return nullptr if there is no BB to be instrumented.
571
BasicBlock *getInstrBB(Edge *E);
572
573
// Return the auxiliary BB information.
574
BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
575
576
// Return the auxiliary BB information if available.
577
BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
578
579
// Dump edges and BB information.
580
void dumpInfo(StringRef Str = "") const {
581
MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName +
582
" Hash: " + Twine(FunctionHash) + "\t" + Str);
583
}
584
585
FuncPGOInstrumentation(
586
Function &Func, TargetLibraryInfo &TLI,
587
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
588
bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
589
BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
590
bool InstrumentFuncEntry = true, bool HasSingleByteCoverage = false)
591
: F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
592
TLI(TLI), ValueSites(IPVK_Last + 1),
593
SIVisitor(Func, HasSingleByteCoverage),
594
MST(F, InstrumentFuncEntry, BPI, BFI),
595
BCI(constructBCI(Func, HasSingleByteCoverage, InstrumentFuncEntry)) {
596
if (BCI && PGOViewBlockCoverageGraph)
597
BCI->viewBlockCoverageGraph();
598
// This should be done before CFG hash computation.
599
SIVisitor.countSelects();
600
ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
601
if (!IsCS) {
602
NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
603
NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
604
NumOfPGOBB += MST.bbInfoSize();
605
ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
606
if (EnableVTableValueProfiling)
607
ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
608
} else {
609
NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
610
NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
611
NumOfCSPGOBB += MST.bbInfoSize();
612
}
613
614
FuncName = getIRPGOFuncName(F);
615
DeprecatedFuncName = getPGOFuncName(F);
616
computeCFGHash();
617
if (!ComdatMembers.empty())
618
renameComdatFunction();
619
LLVM_DEBUG(dumpInfo("after CFGMST"));
620
621
for (const auto &E : MST.allEdges()) {
622
if (E->Removed)
623
continue;
624
IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
625
if (!E->InMST)
626
IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
627
}
628
629
if (CreateGlobalVar)
630
FuncNameVar = createPGOFuncNameVar(F, FuncName);
631
}
632
};
633
634
} // end anonymous namespace
635
636
// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
637
// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
638
// of selects, indirect calls, mem ops and edges.
639
template <class Edge, class BBInfo>
640
void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
641
std::vector<uint8_t> Indexes;
642
JamCRC JC;
643
for (auto &BB : F) {
644
for (BasicBlock *Succ : successors(&BB)) {
645
auto BI = findBBInfo(Succ);
646
if (BI == nullptr)
647
continue;
648
uint32_t Index = BI->Index;
649
for (int J = 0; J < 4; J++)
650
Indexes.push_back((uint8_t)(Index >> (J * 8)));
651
}
652
}
653
JC.update(Indexes);
654
655
JamCRC JCH;
656
// The higher 32 bits.
657
auto updateJCH = [&JCH](uint64_t Num) {
658
uint8_t Data[8];
659
support::endian::write64le(Data, Num);
660
JCH.update(Data);
661
};
662
updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
663
updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
664
updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
665
if (BCI) {
666
updateJCH(BCI->getInstrumentedBlocksHash());
667
} else {
668
updateJCH((uint64_t)MST.numEdges());
669
}
670
671
// Hash format for context sensitive profile. Reserve 4 bits for other
672
// information.
673
FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
674
675
// Reserve bit 60-63 for other information purpose.
676
FunctionHash &= 0x0FFFFFFFFFFFFFFF;
677
if (IsCS)
678
NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
679
LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
680
<< " CRC = " << JC.getCRC()
681
<< ", Selects = " << SIVisitor.getNumOfSelectInsts()
682
<< ", Edges = " << MST.numEdges() << ", ICSites = "
683
<< ValueSites[IPVK_IndirectCallTarget].size()
684
<< ", Memops = " << ValueSites[IPVK_MemOPSize].size()
685
<< ", High32 CRC = " << JCH.getCRC()
686
<< ", Hash = " << FunctionHash << "\n";);
687
688
if (PGOTraceFuncHash != "-" && F.getName().contains(PGOTraceFuncHash))
689
dbgs() << "Funcname=" << F.getName() << ", Hash=" << FunctionHash
690
<< " in building " << F.getParent()->getSourceFileName() << "\n";
691
}
692
693
// Check if we can safely rename this Comdat function.
694
static bool canRenameComdat(
695
Function &F,
696
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
697
if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
698
return false;
699
700
// FIXME: Current only handle those Comdat groups that only containing one
701
// function.
702
// (1) For a Comdat group containing multiple functions, we need to have a
703
// unique postfix based on the hashes for each function. There is a
704
// non-trivial code refactoring to do this efficiently.
705
// (2) Variables can not be renamed, so we can not rename Comdat function in a
706
// group including global vars.
707
Comdat *C = F.getComdat();
708
for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
709
assert(!isa<GlobalAlias>(CM.second));
710
Function *FM = dyn_cast<Function>(CM.second);
711
if (FM != &F)
712
return false;
713
}
714
return true;
715
}
716
717
// Append the CFGHash to the Comdat function name.
718
template <class Edge, class BBInfo>
719
void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
720
if (!canRenameComdat(F, ComdatMembers))
721
return;
722
std::string OrigName = F.getName().str();
723
std::string NewFuncName =
724
Twine(F.getName() + "." + Twine(FunctionHash)).str();
725
F.setName(Twine(NewFuncName));
726
GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
727
FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
728
Comdat *NewComdat;
729
Module *M = F.getParent();
730
// For AvailableExternallyLinkage functions, change the linkage to
731
// LinkOnceODR and put them into comdat. This is because after renaming, there
732
// is no backup external copy available for the function.
733
if (!F.hasComdat()) {
734
assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
735
NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
736
F.setLinkage(GlobalValue::LinkOnceODRLinkage);
737
F.setComdat(NewComdat);
738
return;
739
}
740
741
// This function belongs to a single function Comdat group.
742
Comdat *OrigComdat = F.getComdat();
743
std::string NewComdatName =
744
Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
745
NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
746
NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
747
748
for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
749
// Must be a function.
750
cast<Function>(CM.second)->setComdat(NewComdat);
751
}
752
}
753
754
/// Collect all the BBs that will be instruments and add them to
755
/// `InstrumentBBs`.
756
template <class Edge, class BBInfo>
757
void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
758
std::vector<BasicBlock *> &InstrumentBBs) {
759
if (BCI) {
760
for (auto &BB : F)
761
if (BCI->shouldInstrumentBlock(BB))
762
InstrumentBBs.push_back(&BB);
763
return;
764
}
765
766
// Use a worklist as we will update the vector during the iteration.
767
std::vector<Edge *> EdgeList;
768
EdgeList.reserve(MST.numEdges());
769
for (const auto &E : MST.allEdges())
770
EdgeList.push_back(E.get());
771
772
for (auto &E : EdgeList) {
773
BasicBlock *InstrBB = getInstrBB(E);
774
if (InstrBB)
775
InstrumentBBs.push_back(InstrBB);
776
}
777
}
778
779
// Given a CFG E to be instrumented, find which BB to place the instrumented
780
// code. The function will split the critical edge if necessary.
781
template <class Edge, class BBInfo>
782
BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
783
if (E->InMST || E->Removed)
784
return nullptr;
785
786
BasicBlock *SrcBB = E->SrcBB;
787
BasicBlock *DestBB = E->DestBB;
788
// For a fake edge, instrument the real BB.
789
if (SrcBB == nullptr)
790
return DestBB;
791
if (DestBB == nullptr)
792
return SrcBB;
793
794
auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
795
// There are basic blocks (such as catchswitch) cannot be instrumented.
796
// If the returned first insertion point is the end of BB, skip this BB.
797
if (BB->getFirstInsertionPt() == BB->end())
798
return nullptr;
799
return BB;
800
};
801
802
// Instrument the SrcBB if it has a single successor,
803
// otherwise, the DestBB if this is not a critical edge.
804
Instruction *TI = SrcBB->getTerminator();
805
if (TI->getNumSuccessors() <= 1)
806
return canInstrument(SrcBB);
807
if (!E->IsCritical)
808
return canInstrument(DestBB);
809
810
// Some IndirectBr critical edges cannot be split by the previous
811
// SplitIndirectBrCriticalEdges call. Bail out.
812
unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
813
BasicBlock *InstrBB =
814
isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
815
if (!InstrBB) {
816
LLVM_DEBUG(
817
dbgs() << "Fail to split critical edge: not instrument this edge.\n");
818
return nullptr;
819
}
820
// For a critical edge, we have to split. Instrument the newly
821
// created BB.
822
IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
823
LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
824
<< " --> " << getBBInfo(DestBB).Index << "\n");
825
// Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
826
MST.addEdge(SrcBB, InstrBB, 0);
827
// Second one: Add new edge of InstrBB->DestBB.
828
Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
829
NewEdge1.InMST = true;
830
E->Removed = true;
831
832
return canInstrument(InstrBB);
833
}
834
835
// When generating value profiling calls on Windows routines that make use of
836
// handler funclets for exception processing an operand bundle needs to attached
837
// to the called function. This routine will set \p OpBundles to contain the
838
// funclet information, if any is needed, that should be placed on the generated
839
// value profiling call for the value profile candidate call.
840
static void
841
populateEHOperandBundle(VPCandidateInfo &Cand,
842
DenseMap<BasicBlock *, ColorVector> &BlockColors,
843
SmallVectorImpl<OperandBundleDef> &OpBundles) {
844
auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
845
if (!OrigCall)
846
return;
847
848
if (!isa<IntrinsicInst>(OrigCall)) {
849
// The instrumentation call should belong to the same funclet as a
850
// non-intrinsic call, so just copy the operand bundle, if any exists.
851
std::optional<OperandBundleUse> ParentFunclet =
852
OrigCall->getOperandBundle(LLVMContext::OB_funclet);
853
if (ParentFunclet)
854
OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
855
} else {
856
// Intrinsics or other instructions do not get funclet information from the
857
// front-end. Need to use the BlockColors that was computed by the routine
858
// colorEHFunclets to determine whether a funclet is needed.
859
if (!BlockColors.empty()) {
860
const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
861
assert(CV.size() == 1 && "non-unique color for block!");
862
Instruction *EHPad = CV.front()->getFirstNonPHI();
863
if (EHPad->isEHPad())
864
OpBundles.emplace_back("funclet", EHPad);
865
}
866
}
867
}
868
869
// Visit all edge and instrument the edges not in MST, and do value profiling.
870
// Critical edges will be split.
871
static void instrumentOneFunc(
872
Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI,
873
BlockFrequencyInfo *BFI,
874
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
875
bool IsCS) {
876
if (!PGOBlockCoverage) {
877
// Split indirectbr critical edges here before computing the MST rather than
878
// later in getInstrBB() to avoid invalidating it.
879
SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
880
}
881
882
FuncPGOInstrumentation<PGOEdge, PGOBBInfo> FuncInfo(
883
F, TLI, ComdatMembers, true, BPI, BFI, IsCS, shouldInstrumentEntryBB(),
884
PGOBlockCoverage);
885
886
auto Name = FuncInfo.FuncNameVar;
887
auto CFGHash = ConstantInt::get(Type::getInt64Ty(M->getContext()),
888
FuncInfo.FunctionHash);
889
if (PGOFunctionEntryCoverage) {
890
auto &EntryBB = F.getEntryBlock();
891
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
892
// llvm.instrprof.cover(i8* <name>, i64 <hash>, i32 <num-counters>,
893
// i32 <index>)
894
Builder.CreateCall(
895
Intrinsic::getDeclaration(M, Intrinsic::instrprof_cover),
896
{Name, CFGHash, Builder.getInt32(1), Builder.getInt32(0)});
897
return;
898
}
899
900
std::vector<BasicBlock *> InstrumentBBs;
901
FuncInfo.getInstrumentBBs(InstrumentBBs);
902
unsigned NumCounters =
903
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
904
905
if (PGOCtxProfLoweringPass::isContextualIRPGOEnabled()) {
906
auto *CSIntrinsic =
907
Intrinsic::getDeclaration(M, Intrinsic::instrprof_callsite);
908
// We want to count the instrumentable callsites, then instrument them. This
909
// is because the llvm.instrprof.callsite intrinsic has an argument (like
910
// the other instrprof intrinsics) capturing the total number of
911
// instrumented objects (counters, or callsites, in this case). In this
912
// case, we want that value so we can readily pass it to the compiler-rt
913
// APIs that may have to allocate memory based on the nr of callsites.
914
// The traversal logic is the same for both counting and instrumentation,
915
// just needs to be done in succession.
916
auto Visit = [&](llvm::function_ref<void(CallBase * CB)> Visitor) {
917
for (auto &BB : F)
918
for (auto &Instr : BB)
919
if (auto *CS = dyn_cast<CallBase>(&Instr)) {
920
if ((CS->getCalledFunction() &&
921
CS->getCalledFunction()->isIntrinsic()) ||
922
dyn_cast<InlineAsm>(CS->getCalledOperand()))
923
continue;
924
Visitor(CS);
925
}
926
};
927
// First, count callsites.
928
uint32_t TotalNrCallsites = 0;
929
Visit([&TotalNrCallsites](auto *) { ++TotalNrCallsites; });
930
931
// Now instrument.
932
uint32_t CallsiteIndex = 0;
933
Visit([&](auto *CB) {
934
IRBuilder<> Builder(CB);
935
Builder.CreateCall(CSIntrinsic,
936
{Name, CFGHash, Builder.getInt32(TotalNrCallsites),
937
Builder.getInt32(CallsiteIndex++),
938
CB->getCalledOperand()});
939
});
940
}
941
942
uint32_t I = 0;
943
if (PGOTemporalInstrumentation) {
944
NumCounters += PGOBlockCoverage ? 8 : 1;
945
auto &EntryBB = F.getEntryBlock();
946
IRBuilder<> Builder(&EntryBB, EntryBB.getFirstInsertionPt());
947
// llvm.instrprof.timestamp(i8* <name>, i64 <hash>, i32 <num-counters>,
948
// i32 <index>)
949
Builder.CreateCall(
950
Intrinsic::getDeclaration(M, Intrinsic::instrprof_timestamp),
951
{Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I)});
952
I += PGOBlockCoverage ? 8 : 1;
953
}
954
955
for (auto *InstrBB : InstrumentBBs) {
956
IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
957
assert(Builder.GetInsertPoint() != InstrBB->end() &&
958
"Cannot get the Instrumentation point");
959
// llvm.instrprof.increment(i8* <name>, i64 <hash>, i32 <num-counters>,
960
// i32 <index>)
961
Builder.CreateCall(
962
Intrinsic::getDeclaration(M, PGOBlockCoverage
963
? Intrinsic::instrprof_cover
964
: Intrinsic::instrprof_increment),
965
{Name, CFGHash, Builder.getInt32(NumCounters), Builder.getInt32(I++)});
966
}
967
968
// Now instrument select instructions:
969
FuncInfo.SIVisitor.instrumentSelects(&I, NumCounters, FuncInfo.FuncNameVar,
970
FuncInfo.FunctionHash);
971
assert(I == NumCounters);
972
973
if (isValueProfilingDisabled())
974
return;
975
976
NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
977
978
// Intrinsic function calls do not have funclet operand bundles needed for
979
// Windows exception handling attached to them. However, if value profiling is
980
// inserted for one of these calls, then a funclet value will need to be set
981
// on the instrumentation call based on the funclet coloring.
982
DenseMap<BasicBlock *, ColorVector> BlockColors;
983
if (F.hasPersonalityFn() &&
984
isScopedEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
985
BlockColors = colorEHFunclets(F);
986
987
// For each VP Kind, walk the VP candidates and instrument each one.
988
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
989
unsigned SiteIndex = 0;
990
if (Kind == IPVK_MemOPSize && !PGOInstrMemOP)
991
continue;
992
993
for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
994
LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]
995
<< " site: CallSite Index = " << SiteIndex << "\n");
996
997
IRBuilder<> Builder(Cand.InsertPt);
998
assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&
999
"Cannot get the Instrumentation point");
1000
1001
Value *ToProfile = nullptr;
1002
if (Cand.V->getType()->isIntegerTy())
1003
ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
1004
else if (Cand.V->getType()->isPointerTy())
1005
ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
1006
assert(ToProfile && "value profiling Value is of unexpected type");
1007
1008
SmallVector<OperandBundleDef, 1> OpBundles;
1009
populateEHOperandBundle(Cand, BlockColors, OpBundles);
1010
Builder.CreateCall(
1011
Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
1012
{FuncInfo.FuncNameVar, Builder.getInt64(FuncInfo.FunctionHash),
1013
ToProfile, Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
1014
OpBundles);
1015
}
1016
} // IPVK_First <= Kind <= IPVK_Last
1017
}
1018
1019
namespace {
1020
1021
// This class represents a CFG edge in profile use compilation.
1022
struct PGOUseEdge : public PGOEdge {
1023
using PGOEdge::PGOEdge;
1024
1025
std::optional<uint64_t> Count;
1026
1027
// Set edge count value
1028
void setEdgeCount(uint64_t Value) { Count = Value; }
1029
1030
// Return the information string for this object.
1031
std::string infoString() const {
1032
if (!Count)
1033
return PGOEdge::infoString();
1034
return (Twine(PGOEdge::infoString()) + " Count=" + Twine(*Count)).str();
1035
}
1036
};
1037
1038
using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1039
1040
// This class stores the auxiliary information for each BB.
1041
struct PGOUseBBInfo : public PGOBBInfo {
1042
std::optional<uint64_t> Count;
1043
int32_t UnknownCountInEdge = 0;
1044
int32_t UnknownCountOutEdge = 0;
1045
DirectEdges InEdges;
1046
DirectEdges OutEdges;
1047
1048
PGOUseBBInfo(unsigned IX) : PGOBBInfo(IX) {}
1049
1050
// Set the profile count value for this BB.
1051
void setBBInfoCount(uint64_t Value) { Count = Value; }
1052
1053
// Return the information string of this object.
1054
std::string infoString() const {
1055
if (!Count)
1056
return PGOBBInfo::infoString();
1057
return (Twine(PGOBBInfo::infoString()) + " Count=" + Twine(*Count)).str();
1058
}
1059
1060
// Add an OutEdge and update the edge count.
1061
void addOutEdge(PGOUseEdge *E) {
1062
OutEdges.push_back(E);
1063
UnknownCountOutEdge++;
1064
}
1065
1066
// Add an InEdge and update the edge count.
1067
void addInEdge(PGOUseEdge *E) {
1068
InEdges.push_back(E);
1069
UnknownCountInEdge++;
1070
}
1071
};
1072
1073
} // end anonymous namespace
1074
1075
// Sum up the count values for all the edges.
1076
static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
1077
uint64_t Total = 0;
1078
for (const auto &E : Edges) {
1079
if (E->Removed)
1080
continue;
1081
if (E->Count)
1082
Total += *E->Count;
1083
}
1084
return Total;
1085
}
1086
1087
namespace {
1088
1089
class PGOUseFunc {
1090
public:
1091
PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1092
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1093
BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
1094
ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry,
1095
bool HasSingleByteCoverage)
1096
: F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1097
FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1098
InstrumentFuncEntry, HasSingleByteCoverage),
1099
FreqAttr(FFA_Normal), IsCS(IsCS), VPC(Func, TLI) {}
1100
1101
void handleInstrProfError(Error Err, uint64_t MismatchedFuncSum);
1102
1103
// Read counts for the instrumented BB from profile.
1104
bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1105
InstrProfRecord::CountPseudoKind &PseudoKind);
1106
1107
// Populate the counts for all BBs.
1108
void populateCounters();
1109
1110
// Set block coverage based on profile coverage values.
1111
void populateCoverage(IndexedInstrProfReader *PGOReader);
1112
1113
// Set the branch weights based on the count values.
1114
void setBranchWeights();
1115
1116
// Annotate the value profile call sites for all value kind.
1117
void annotateValueSites();
1118
1119
// Annotate the value profile call sites for one value kind.
1120
void annotateValueSites(uint32_t Kind);
1121
1122
// Annotate the irreducible loop header weights.
1123
void annotateIrrLoopHeaderWeights();
1124
1125
// The hotness of the function from the profile count.
1126
enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1127
1128
// Return the function hotness from the profile.
1129
FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1130
1131
// Return the function hash.
1132
uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1133
1134
// Return the profile record for this function;
1135
InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1136
1137
// Return the auxiliary BB information.
1138
PGOUseBBInfo &getBBInfo(const BasicBlock *BB) const {
1139
return FuncInfo.getBBInfo(BB);
1140
}
1141
1142
// Return the auxiliary BB information if available.
1143
PGOUseBBInfo *findBBInfo(const BasicBlock *BB) const {
1144
return FuncInfo.findBBInfo(BB);
1145
}
1146
1147
Function &getFunc() const { return F; }
1148
1149
void dumpInfo(StringRef Str = "") const { FuncInfo.dumpInfo(Str); }
1150
1151
uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1152
1153
private:
1154
Function &F;
1155
Module *M;
1156
BlockFrequencyInfo *BFI;
1157
ProfileSummaryInfo *PSI;
1158
1159
// This member stores the shared information with class PGOGenFunc.
1160
FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> FuncInfo;
1161
1162
// The maximum count value in the profile. This is only used in PGO use
1163
// compilation.
1164
uint64_t ProgramMaxCount;
1165
1166
// Position of counter that remains to be read.
1167
uint32_t CountPosition = 0;
1168
1169
// Total size of the profile count for this function.
1170
uint32_t ProfileCountSize = 0;
1171
1172
// ProfileRecord for this function.
1173
InstrProfRecord ProfileRecord;
1174
1175
// Function hotness info derived from profile.
1176
FuncFreqAttr FreqAttr;
1177
1178
// Is to use the context sensitive profile.
1179
bool IsCS;
1180
1181
ValueProfileCollector VPC;
1182
1183
// Find the Instrumented BB and set the value. Return false on error.
1184
bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1185
1186
// Set the edge counter value for the unknown edge -- there should be only
1187
// one unknown edge.
1188
void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1189
1190
// Set the hot/cold inline hints based on the count values.
1191
// FIXME: This function should be removed once the functionality in
1192
// the inliner is implemented.
1193
void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1194
if (PSI->isHotCount(EntryCount))
1195
FreqAttr = FFA_Hot;
1196
else if (PSI->isColdCount(MaxCount))
1197
FreqAttr = FFA_Cold;
1198
}
1199
};
1200
1201
} // end anonymous namespace
1202
1203
/// Set up InEdges/OutEdges for all BBs in the MST.
1204
static void setupBBInfoEdges(
1205
const FuncPGOInstrumentation<PGOUseEdge, PGOUseBBInfo> &FuncInfo) {
1206
// This is not required when there is block coverage inference.
1207
if (FuncInfo.BCI)
1208
return;
1209
for (const auto &E : FuncInfo.MST.allEdges()) {
1210
if (E->Removed)
1211
continue;
1212
const BasicBlock *SrcBB = E->SrcBB;
1213
const BasicBlock *DestBB = E->DestBB;
1214
PGOUseBBInfo &SrcInfo = FuncInfo.getBBInfo(SrcBB);
1215
PGOUseBBInfo &DestInfo = FuncInfo.getBBInfo(DestBB);
1216
SrcInfo.addOutEdge(E.get());
1217
DestInfo.addInEdge(E.get());
1218
}
1219
}
1220
1221
// Visit all the edges and assign the count value for the instrumented
1222
// edges and the BB. Return false on error.
1223
bool PGOUseFunc::setInstrumentedCounts(
1224
const std::vector<uint64_t> &CountFromProfile) {
1225
1226
std::vector<BasicBlock *> InstrumentBBs;
1227
FuncInfo.getInstrumentBBs(InstrumentBBs);
1228
1229
setupBBInfoEdges(FuncInfo);
1230
1231
unsigned NumCounters =
1232
InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1233
// The number of counters here should match the number of counters
1234
// in profile. Return if they mismatch.
1235
if (NumCounters != CountFromProfile.size()) {
1236
return false;
1237
}
1238
auto *FuncEntry = &*F.begin();
1239
1240
// Set the profile count to the Instrumented BBs.
1241
uint32_t I = 0;
1242
for (BasicBlock *InstrBB : InstrumentBBs) {
1243
uint64_t CountValue = CountFromProfile[I++];
1244
PGOUseBBInfo &Info = getBBInfo(InstrBB);
1245
// If we reach here, we know that we have some nonzero count
1246
// values in this function. The entry count should not be 0.
1247
// Fix it if necessary.
1248
if (InstrBB == FuncEntry && CountValue == 0)
1249
CountValue = 1;
1250
Info.setBBInfoCount(CountValue);
1251
}
1252
ProfileCountSize = CountFromProfile.size();
1253
CountPosition = I;
1254
1255
// Set the edge count and update the count of unknown edges for BBs.
1256
auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1257
E->setEdgeCount(Value);
1258
this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1259
this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1260
};
1261
1262
// Set the profile count the Instrumented edges. There are BBs that not in
1263
// MST but not instrumented. Need to set the edge count value so that we can
1264
// populate the profile counts later.
1265
for (const auto &E : FuncInfo.MST.allEdges()) {
1266
if (E->Removed || E->InMST)
1267
continue;
1268
const BasicBlock *SrcBB = E->SrcBB;
1269
PGOUseBBInfo &SrcInfo = getBBInfo(SrcBB);
1270
1271
// If only one out-edge, the edge profile count should be the same as BB
1272
// profile count.
1273
if (SrcInfo.Count && SrcInfo.OutEdges.size() == 1)
1274
setEdgeCount(E.get(), *SrcInfo.Count);
1275
else {
1276
const BasicBlock *DestBB = E->DestBB;
1277
PGOUseBBInfo &DestInfo = getBBInfo(DestBB);
1278
// If only one in-edge, the edge profile count should be the same as BB
1279
// profile count.
1280
if (DestInfo.Count && DestInfo.InEdges.size() == 1)
1281
setEdgeCount(E.get(), *DestInfo.Count);
1282
}
1283
if (E->Count)
1284
continue;
1285
// E's count should have been set from profile. If not, this meenas E skips
1286
// the instrumentation. We set the count to 0.
1287
setEdgeCount(E.get(), 0);
1288
}
1289
return true;
1290
}
1291
1292
// Set the count value for the unknown edge. There should be one and only one
1293
// unknown edge in Edges vector.
1294
void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1295
for (auto &E : Edges) {
1296
if (E->Count)
1297
continue;
1298
E->setEdgeCount(Value);
1299
1300
getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1301
getBBInfo(E->DestBB).UnknownCountInEdge--;
1302
return;
1303
}
1304
llvm_unreachable("Cannot find the unknown count edge");
1305
}
1306
1307
// Emit function metadata indicating PGO profile mismatch.
1308
static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) {
1309
const char MetadataName[] = "instr_prof_hash_mismatch";
1310
SmallVector<Metadata *, 2> Names;
1311
// If this metadata already exists, ignore.
1312
auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1313
if (Existing) {
1314
MDTuple *Tuple = cast<MDTuple>(Existing);
1315
for (const auto &N : Tuple->operands()) {
1316
if (N.equalsStr(MetadataName))
1317
return;
1318
Names.push_back(N.get());
1319
}
1320
}
1321
1322
MDBuilder MDB(ctx);
1323
Names.push_back(MDB.createString(MetadataName));
1324
MDNode *MD = MDTuple::get(ctx, Names);
1325
F.setMetadata(LLVMContext::MD_annotation, MD);
1326
}
1327
1328
void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
1329
handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
1330
auto &Ctx = M->getContext();
1331
auto Err = IPE.get();
1332
bool SkipWarning = false;
1333
LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
1334
<< FuncInfo.FuncName << ": ");
1335
if (Err == instrprof_error::unknown_function) {
1336
IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1337
SkipWarning = !PGOWarnMissing;
1338
LLVM_DEBUG(dbgs() << "unknown function");
1339
} else if (Err == instrprof_error::hash_mismatch ||
1340
Err == instrprof_error::malformed) {
1341
IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1342
SkipWarning =
1343
NoPGOWarnMismatch ||
1344
(NoPGOWarnMismatchComdatWeak &&
1345
(F.hasComdat() || F.getLinkage() == GlobalValue::WeakAnyLinkage ||
1346
F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1347
LLVM_DEBUG(dbgs() << "hash mismatch (hash= " << FuncInfo.FunctionHash
1348
<< " skip=" << SkipWarning << ")");
1349
// Emit function metadata indicating PGO profile mismatch.
1350
annotateFunctionWithHashMismatch(F, M->getContext());
1351
}
1352
1353
LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
1354
if (SkipWarning)
1355
return;
1356
1357
std::string Msg =
1358
IPE.message() + std::string(" ") + F.getName().str() +
1359
std::string(" Hash = ") + std::to_string(FuncInfo.FunctionHash) +
1360
std::string(" up to ") + std::to_string(MismatchedFuncSum) +
1361
std::string(" count discarded");
1362
1363
Ctx.diagnose(
1364
DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1365
});
1366
}
1367
1368
// Read the profile from ProfileFileName and assign the value to the
1369
// instrumented BB and the edges. This function also updates ProgramMaxCount.
1370
// Return true if the profile are successfully read, and false on errors.
1371
bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1372
InstrProfRecord::CountPseudoKind &PseudoKind) {
1373
auto &Ctx = M->getContext();
1374
uint64_t MismatchedFuncSum = 0;
1375
Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
1376
FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1377
&MismatchedFuncSum);
1378
if (Error E = Result.takeError()) {
1379
handleInstrProfError(std::move(E), MismatchedFuncSum);
1380
return false;
1381
}
1382
ProfileRecord = std::move(Result.get());
1383
PseudoKind = ProfileRecord.getCountPseudoKind();
1384
if (PseudoKind != InstrProfRecord::NotPseudo) {
1385
return true;
1386
}
1387
std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1388
1389
IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1390
LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
1391
1392
uint64_t ValueSum = 0;
1393
for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1394
LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
1395
ValueSum += CountFromProfile[I];
1396
}
1397
AllZeros = (ValueSum == 0);
1398
1399
LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
1400
1401
getBBInfo(nullptr).UnknownCountOutEdge = 2;
1402
getBBInfo(nullptr).UnknownCountInEdge = 2;
1403
1404
if (!setInstrumentedCounts(CountFromProfile)) {
1405
LLVM_DEBUG(
1406
dbgs() << "Inconsistent number of counts, skipping this function");
1407
Ctx.diagnose(DiagnosticInfoPGOProfile(
1408
M->getName().data(),
1409
Twine("Inconsistent number of counts in ") + F.getName().str() +
1410
Twine(": the profile may be stale or there is a function name "
1411
"collision."),
1412
DS_Warning));
1413
return false;
1414
}
1415
ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1416
return true;
1417
}
1418
1419
void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
1420
uint64_t MismatchedFuncSum = 0;
1421
Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
1422
FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
1423
&MismatchedFuncSum);
1424
if (auto Err = Result.takeError()) {
1425
handleInstrProfError(std::move(Err), MismatchedFuncSum);
1426
return;
1427
}
1428
IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1429
1430
std::vector<uint64_t> &CountsFromProfile = Result.get().Counts;
1431
DenseMap<const BasicBlock *, bool> Coverage;
1432
unsigned Index = 0;
1433
for (auto &BB : F)
1434
if (FuncInfo.BCI->shouldInstrumentBlock(BB))
1435
Coverage[&BB] = (CountsFromProfile[Index++] != 0);
1436
assert(Index == CountsFromProfile.size());
1437
1438
// For each B in InverseDependencies[A], if A is covered then B is covered.
1439
DenseMap<const BasicBlock *, DenseSet<const BasicBlock *>>
1440
InverseDependencies;
1441
for (auto &BB : F) {
1442
for (auto *Dep : FuncInfo.BCI->getDependencies(BB)) {
1443
// If Dep is covered then BB is covered.
1444
InverseDependencies[Dep].insert(&BB);
1445
}
1446
}
1447
1448
// Infer coverage of the non-instrumented blocks using a flood-fill algorithm.
1449
std::stack<const BasicBlock *> CoveredBlocksToProcess;
1450
for (auto &[BB, IsCovered] : Coverage)
1451
if (IsCovered)
1452
CoveredBlocksToProcess.push(BB);
1453
1454
while (!CoveredBlocksToProcess.empty()) {
1455
auto *CoveredBlock = CoveredBlocksToProcess.top();
1456
assert(Coverage[CoveredBlock]);
1457
CoveredBlocksToProcess.pop();
1458
for (auto *BB : InverseDependencies[CoveredBlock]) {
1459
// If CoveredBlock is covered then BB is covered.
1460
if (Coverage[BB])
1461
continue;
1462
Coverage[BB] = true;
1463
CoveredBlocksToProcess.push(BB);
1464
}
1465
}
1466
1467
// Annotate block coverage.
1468
MDBuilder MDB(F.getContext());
1469
// We set the entry count to 10000 if the entry block is covered so that BFI
1470
// can propagate a fraction of this count to the other covered blocks.
1471
F.setEntryCount(Coverage[&F.getEntryBlock()] ? 10000 : 0);
1472
for (auto &BB : F) {
1473
// For a block A and its successor B, we set the edge weight as follows:
1474
// If A is covered and B is covered, set weight=1.
1475
// If A is covered and B is uncovered, set weight=0.
1476
// If A is uncovered, set weight=1.
1477
// This setup will allow BFI to give nonzero profile counts to only covered
1478
// blocks.
1479
SmallVector<uint32_t, 4> Weights;
1480
for (auto *Succ : successors(&BB))
1481
Weights.push_back((Coverage[Succ] || !Coverage[&BB]) ? 1 : 0);
1482
if (Weights.size() >= 2)
1483
llvm::setBranchWeights(*BB.getTerminator(), Weights,
1484
/*IsExpected=*/false);
1485
}
1486
1487
unsigned NumCorruptCoverage = 0;
1488
DominatorTree DT(F);
1489
LoopInfo LI(DT);
1490
BranchProbabilityInfo BPI(F, LI);
1491
BlockFrequencyInfo BFI(F, BPI, LI);
1492
auto IsBlockDead = [&](const BasicBlock &BB) -> std::optional<bool> {
1493
if (auto C = BFI.getBlockProfileCount(&BB))
1494
return C == 0;
1495
return {};
1496
};
1497
LLVM_DEBUG(dbgs() << "Block Coverage: (Instrumented=*, Covered=X)\n");
1498
for (auto &BB : F) {
1499
LLVM_DEBUG(dbgs() << (FuncInfo.BCI->shouldInstrumentBlock(BB) ? "* " : " ")
1500
<< (Coverage[&BB] ? "X " : " ") << " " << BB.getName()
1501
<< "\n");
1502
// In some cases it is possible to find a covered block that has no covered
1503
// successors, e.g., when a block calls a function that may call exit(). In
1504
// those cases, BFI could find its successor to be covered while BCI could
1505
// find its successor to be dead.
1506
if (Coverage[&BB] == IsBlockDead(BB).value_or(false)) {
1507
LLVM_DEBUG(
1508
dbgs() << "Found inconsistent block covearge for " << BB.getName()
1509
<< ": BCI=" << (Coverage[&BB] ? "Covered" : "Dead") << " BFI="
1510
<< (IsBlockDead(BB).value() ? "Dead" : "Covered") << "\n");
1511
++NumCorruptCoverage;
1512
}
1513
if (Coverage[&BB])
1514
++NumCoveredBlocks;
1515
}
1516
if (PGOVerifyBFI && NumCorruptCoverage) {
1517
auto &Ctx = M->getContext();
1518
Ctx.diagnose(DiagnosticInfoPGOProfile(
1519
M->getName().data(),
1520
Twine("Found inconsistent block coverage for function ") + F.getName() +
1521
" in " + Twine(NumCorruptCoverage) + " blocks.",
1522
DS_Warning));
1523
}
1524
if (PGOViewBlockCoverageGraph)
1525
FuncInfo.BCI->viewBlockCoverageGraph(&Coverage);
1526
}
1527
1528
// Populate the counters from instrumented BBs to all BBs.
1529
// In the end of this operation, all BBs should have a valid count value.
1530
void PGOUseFunc::populateCounters() {
1531
bool Changes = true;
1532
unsigned NumPasses = 0;
1533
while (Changes) {
1534
NumPasses++;
1535
Changes = false;
1536
1537
// For efficient traversal, it's better to start from the end as most
1538
// of the instrumented edges are at the end.
1539
for (auto &BB : reverse(F)) {
1540
PGOUseBBInfo *UseBBInfo = findBBInfo(&BB);
1541
if (UseBBInfo == nullptr)
1542
continue;
1543
if (!UseBBInfo->Count) {
1544
if (UseBBInfo->UnknownCountOutEdge == 0) {
1545
UseBBInfo->Count = sumEdgeCount(UseBBInfo->OutEdges);
1546
Changes = true;
1547
} else if (UseBBInfo->UnknownCountInEdge == 0) {
1548
UseBBInfo->Count = sumEdgeCount(UseBBInfo->InEdges);
1549
Changes = true;
1550
}
1551
}
1552
if (UseBBInfo->Count) {
1553
if (UseBBInfo->UnknownCountOutEdge == 1) {
1554
uint64_t Total = 0;
1555
uint64_t OutSum = sumEdgeCount(UseBBInfo->OutEdges);
1556
// If the one of the successor block can early terminate (no-return),
1557
// we can end up with situation where out edge sum count is larger as
1558
// the source BB's count is collected by a post-dominated block.
1559
if (*UseBBInfo->Count > OutSum)
1560
Total = *UseBBInfo->Count - OutSum;
1561
setEdgeCount(UseBBInfo->OutEdges, Total);
1562
Changes = true;
1563
}
1564
if (UseBBInfo->UnknownCountInEdge == 1) {
1565
uint64_t Total = 0;
1566
uint64_t InSum = sumEdgeCount(UseBBInfo->InEdges);
1567
if (*UseBBInfo->Count > InSum)
1568
Total = *UseBBInfo->Count - InSum;
1569
setEdgeCount(UseBBInfo->InEdges, Total);
1570
Changes = true;
1571
}
1572
}
1573
}
1574
}
1575
1576
LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
1577
(void)NumPasses;
1578
#ifndef NDEBUG
1579
// Assert every BB has a valid counter.
1580
for (auto &BB : F) {
1581
auto BI = findBBInfo(&BB);
1582
if (BI == nullptr)
1583
continue;
1584
assert(BI->Count && "BB count is not valid");
1585
}
1586
#endif
1587
uint64_t FuncEntryCount = *getBBInfo(&*F.begin()).Count;
1588
uint64_t FuncMaxCount = FuncEntryCount;
1589
for (auto &BB : F) {
1590
auto BI = findBBInfo(&BB);
1591
if (BI == nullptr)
1592
continue;
1593
FuncMaxCount = std::max(FuncMaxCount, *BI->Count);
1594
}
1595
1596
// Fix the obviously inconsistent entry count.
1597
if (FuncMaxCount > 0 && FuncEntryCount == 0)
1598
FuncEntryCount = 1;
1599
F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
1600
markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1601
1602
// Now annotate select instructions
1603
FuncInfo.SIVisitor.annotateSelects(this, &CountPosition);
1604
assert(CountPosition == ProfileCountSize);
1605
1606
LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
1607
}
1608
1609
// Assign the scaled count values to the BB with multiple out edges.
1610
void PGOUseFunc::setBranchWeights() {
1611
// Generate MD_prof metadata for every branch instruction.
1612
LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
1613
<< " IsCS=" << IsCS << "\n");
1614
for (auto &BB : F) {
1615
Instruction *TI = BB.getTerminator();
1616
if (TI->getNumSuccessors() < 2)
1617
continue;
1618
if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1619
isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI) ||
1620
isa<CallBrInst>(TI)))
1621
continue;
1622
1623
const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1624
if (!*BBCountInfo.Count)
1625
continue;
1626
1627
// We have a non-zero Branch BB.
1628
1629
// SuccessorCount can be greater than OutEdgesCount, because
1630
// removed edges don't appear in OutEdges.
1631
unsigned OutEdgesCount = BBCountInfo.OutEdges.size();
1632
unsigned SuccessorCount = BB.getTerminator()->getNumSuccessors();
1633
assert(OutEdgesCount <= SuccessorCount);
1634
1635
SmallVector<uint64_t, 2> EdgeCounts(SuccessorCount, 0);
1636
uint64_t MaxCount = 0;
1637
for (unsigned It = 0; It < OutEdgesCount; It++) {
1638
const PGOUseEdge *E = BBCountInfo.OutEdges[It];
1639
const BasicBlock *SrcBB = E->SrcBB;
1640
const BasicBlock *DestBB = E->DestBB;
1641
if (DestBB == nullptr)
1642
continue;
1643
unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1644
uint64_t EdgeCount = *E->Count;
1645
if (EdgeCount > MaxCount)
1646
MaxCount = EdgeCount;
1647
EdgeCounts[SuccNum] = EdgeCount;
1648
}
1649
1650
if (MaxCount)
1651
setProfMetadata(M, TI, EdgeCounts, MaxCount);
1652
else {
1653
// A zero MaxCount can come about when we have a BB with a positive
1654
// count, and whose successor blocks all have 0 count. This can happen
1655
// when there is no exit block and the code exits via a noreturn function.
1656
auto &Ctx = M->getContext();
1657
Ctx.diagnose(DiagnosticInfoPGOProfile(
1658
M->getName().data(),
1659
Twine("Profile in ") + F.getName().str() +
1660
Twine(" partially ignored") +
1661
Twine(", possibly due to the lack of a return path."),
1662
DS_Warning));
1663
}
1664
}
1665
}
1666
1667
static bool isIndirectBrTarget(BasicBlock *BB) {
1668
for (BasicBlock *Pred : predecessors(BB)) {
1669
if (isa<IndirectBrInst>(Pred->getTerminator()))
1670
return true;
1671
}
1672
return false;
1673
}
1674
1675
void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1676
LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
1677
// Find irr loop headers
1678
for (auto &BB : F) {
1679
// As a heuristic also annotate indrectbr targets as they have a high chance
1680
// to become an irreducible loop header after the indirectbr tail
1681
// duplication.
1682
if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1683
Instruction *TI = BB.getTerminator();
1684
const PGOUseBBInfo &BBCountInfo = getBBInfo(&BB);
1685
setIrrLoopHeaderMetadata(M, TI, *BBCountInfo.Count);
1686
}
1687
}
1688
}
1689
1690
void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1691
Module *M = F.getParent();
1692
IRBuilder<> Builder(&SI);
1693
Type *Int64Ty = Builder.getInt64Ty();
1694
auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1695
Builder.CreateCall(
1696
Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1697
{FuncNameVar, Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1698
Builder.getInt32(*CurCtrIdx), Step});
1699
++(*CurCtrIdx);
1700
}
1701
1702
void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1703
std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1704
assert(*CurCtrIdx < CountFromProfile.size() &&
1705
"Out of bound access of counters");
1706
uint64_t SCounts[2];
1707
SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1708
++(*CurCtrIdx);
1709
uint64_t TotalCount = 0;
1710
auto BI = UseFunc->findBBInfo(SI.getParent());
1711
if (BI != nullptr)
1712
TotalCount = *BI->Count;
1713
// False Count
1714
SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1715
uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1716
if (MaxCount)
1717
setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1718
}
1719
1720
void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1721
if (!PGOInstrSelect || PGOFunctionEntryCoverage || HasSingleByteCoverage)
1722
return;
1723
// FIXME: do not handle this yet.
1724
if (SI.getCondition()->getType()->isVectorTy())
1725
return;
1726
1727
switch (Mode) {
1728
case VM_counting:
1729
NSIs++;
1730
return;
1731
case VM_instrument:
1732
instrumentOneSelectInst(SI);
1733
return;
1734
case VM_annotate:
1735
annotateOneSelectInst(SI);
1736
return;
1737
}
1738
1739
llvm_unreachable("Unknown visiting mode");
1740
}
1741
1742
static uint32_t getMaxNumAnnotations(InstrProfValueKind ValueProfKind) {
1743
if (ValueProfKind == IPVK_MemOPSize)
1744
return MaxNumMemOPAnnotations;
1745
if (ValueProfKind == llvm::IPVK_VTableTarget)
1746
return MaxNumVTableAnnotations;
1747
return MaxNumAnnotations;
1748
}
1749
1750
// Traverse all valuesites and annotate the instructions for all value kind.
1751
void PGOUseFunc::annotateValueSites() {
1752
if (isValueProfilingDisabled())
1753
return;
1754
1755
// Create the PGOFuncName meta data.
1756
createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1757
1758
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1759
annotateValueSites(Kind);
1760
}
1761
1762
// Annotate the instructions for a specific value kind.
1763
void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1764
assert(Kind <= IPVK_Last);
1765
unsigned ValueSiteIndex = 0;
1766
1767
unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1768
1769
// Since there isn't a reliable or fast way for profile reader to tell if a
1770
// profile is generated with `-enable-vtable-value-profiling` on, we run the
1771
// value profile collector over the function IR to find the instrumented sites
1772
// iff function profile records shows the number of instrumented vtable sites
1773
// is not zero. Function cfg already takes the number of instrumented
1774
// indirect call sites into account so it doesn't hash the number of
1775
// instrumented vtables; as a side effect it makes it easier to enable
1776
// profiling and profile use in two steps if needed.
1777
// TODO: Remove this if/when -enable-vtable-value-profiling is on by default.
1778
if (NumValueSites > 0 && Kind == IPVK_VTableTarget &&
1779
NumValueSites != FuncInfo.ValueSites[IPVK_VTableTarget].size() &&
1780
MaxNumVTableAnnotations != 0)
1781
FuncInfo.ValueSites[IPVK_VTableTarget] = VPC.get(IPVK_VTableTarget);
1782
auto &ValueSites = FuncInfo.ValueSites[Kind];
1783
if (NumValueSites != ValueSites.size()) {
1784
auto &Ctx = M->getContext();
1785
Ctx.diagnose(DiagnosticInfoPGOProfile(
1786
M->getName().data(),
1787
Twine("Inconsistent number of value sites for ") +
1788
Twine(ValueProfKindDescr[Kind]) + Twine(" profiling in \"") +
1789
F.getName().str() +
1790
Twine("\", possibly due to the use of a stale profile."),
1791
DS_Warning));
1792
return;
1793
}
1794
1795
for (VPCandidateInfo &I : ValueSites) {
1796
LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
1797
<< "): Index = " << ValueSiteIndex << " out of "
1798
<< NumValueSites << "\n");
1799
annotateValueSite(
1800
*M, *I.AnnotatedInst, ProfileRecord,
1801
static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1802
getMaxNumAnnotations(static_cast<InstrProfValueKind>(Kind)));
1803
ValueSiteIndex++;
1804
}
1805
}
1806
1807
// Collect the set of members for each Comdat in module M and store
1808
// in ComdatMembers.
1809
static void collectComdatMembers(
1810
Module &M,
1811
std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1812
if (!DoComdatRenaming)
1813
return;
1814
for (Function &F : M)
1815
if (Comdat *C = F.getComdat())
1816
ComdatMembers.insert(std::make_pair(C, &F));
1817
for (GlobalVariable &GV : M.globals())
1818
if (Comdat *C = GV.getComdat())
1819
ComdatMembers.insert(std::make_pair(C, &GV));
1820
for (GlobalAlias &GA : M.aliases())
1821
if (Comdat *C = GA.getComdat())
1822
ComdatMembers.insert(std::make_pair(C, &GA));
1823
}
1824
1825
// Return true if we should not find instrumentation data for this function
1826
static bool skipPGOUse(const Function &F) {
1827
if (F.isDeclaration())
1828
return true;
1829
// If there are too many critical edges, PGO might cause
1830
// compiler time problem. Skip PGO if the number of
1831
// critical edges execeed the threshold.
1832
unsigned NumCriticalEdges = 0;
1833
for (auto &BB : F) {
1834
const Instruction *TI = BB.getTerminator();
1835
for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
1836
if (isCriticalEdge(TI, I))
1837
NumCriticalEdges++;
1838
}
1839
}
1840
if (NumCriticalEdges > PGOFunctionCriticalEdgeThreshold) {
1841
LLVM_DEBUG(dbgs() << "In func " << F.getName()
1842
<< ", NumCriticalEdges=" << NumCriticalEdges
1843
<< " exceed the threshold. Skip PGO.\n");
1844
return true;
1845
}
1846
return false;
1847
}
1848
1849
// Return true if we should not instrument this function
1850
static bool skipPGOGen(const Function &F) {
1851
if (skipPGOUse(F))
1852
return true;
1853
if (F.hasFnAttribute(llvm::Attribute::Naked))
1854
return true;
1855
if (F.hasFnAttribute(llvm::Attribute::NoProfile))
1856
return true;
1857
if (F.hasFnAttribute(llvm::Attribute::SkipProfile))
1858
return true;
1859
if (F.getInstructionCount() < PGOFunctionSizeThreshold)
1860
return true;
1861
return false;
1862
}
1863
1864
static bool InstrumentAllFunctions(
1865
Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1866
function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
1867
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1868
// For the context-sensitve instrumentation, we should have a separated pass
1869
// (before LTO/ThinLTO linking) to create these variables.
1870
if (!IsCS && !PGOCtxProfLoweringPass::isContextualIRPGOEnabled())
1871
createIRLevelProfileFlagVar(M, /*IsCS=*/false);
1872
1873
Triple TT(M.getTargetTriple());
1874
LLVMContext &Ctx = M.getContext();
1875
if (!TT.isOSBinFormatELF() && EnableVTableValueProfiling)
1876
Ctx.diagnose(DiagnosticInfoPGOProfile(
1877
M.getName().data(),
1878
Twine("VTable value profiling is presently not "
1879
"supported for non-ELF object formats"),
1880
DS_Warning));
1881
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1882
collectComdatMembers(M, ComdatMembers);
1883
1884
for (auto &F : M) {
1885
if (skipPGOGen(F))
1886
continue;
1887
auto &TLI = LookupTLI(F);
1888
auto *BPI = LookupBPI(F);
1889
auto *BFI = LookupBFI(F);
1890
instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
1891
}
1892
return true;
1893
}
1894
1895
PreservedAnalyses
1896
PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &MAM) {
1897
createProfileFileNameVar(M, CSInstrName);
1898
// The variable in a comdat may be discarded by LTO. Ensure the declaration
1899
// will be retained.
1900
appendToCompilerUsed(M, createIRLevelProfileFlagVar(M, /*IsCS=*/true));
1901
if (ProfileSampling)
1902
createProfileSamplingVar(M);
1903
PreservedAnalyses PA;
1904
PA.preserve<FunctionAnalysisManagerModuleProxy>();
1905
PA.preserveSet<AllAnalysesOn<Function>>();
1906
return PA;
1907
}
1908
1909
PreservedAnalyses PGOInstrumentationGen::run(Module &M,
1910
ModuleAnalysisManager &MAM) {
1911
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1912
auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1913
return FAM.getResult<TargetLibraryAnalysis>(F);
1914
};
1915
auto LookupBPI = [&FAM](Function &F) {
1916
return &FAM.getResult<BranchProbabilityAnalysis>(F);
1917
};
1918
auto LookupBFI = [&FAM](Function &F) {
1919
return &FAM.getResult<BlockFrequencyAnalysis>(F);
1920
};
1921
1922
if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1923
return PreservedAnalyses::all();
1924
1925
return PreservedAnalyses::none();
1926
}
1927
1928
// Using the ratio b/w sums of profile count values and BFI count values to
1929
// adjust the func entry count.
1930
static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1931
BranchProbabilityInfo &NBPI) {
1932
Function &F = Func.getFunc();
1933
BlockFrequencyInfo NBFI(F, NBPI, LI);
1934
#ifndef NDEBUG
1935
auto BFIEntryCount = F.getEntryCount();
1936
assert(BFIEntryCount && (BFIEntryCount->getCount() > 0) &&
1937
"Invalid BFI Entrycount");
1938
#endif
1939
auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1940
auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1941
for (auto &BBI : F) {
1942
uint64_t CountValue = 0;
1943
uint64_t BFICountValue = 0;
1944
if (!Func.findBBInfo(&BBI))
1945
continue;
1946
auto BFICount = NBFI.getBlockProfileCount(&BBI);
1947
CountValue = *Func.getBBInfo(&BBI).Count;
1948
BFICountValue = *BFICount;
1949
SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1950
SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1951
}
1952
if (SumCount.isZero())
1953
return;
1954
1955
assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&
1956
"Incorrect sum of BFI counts");
1957
if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1958
return;
1959
double Scale = (SumCount / SumBFICount).convertToDouble();
1960
if (Scale < 1.001 && Scale > 0.999)
1961
return;
1962
1963
uint64_t FuncEntryCount = *Func.getBBInfo(&*F.begin()).Count;
1964
uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1965
if (NewEntryCount == 0)
1966
NewEntryCount = 1;
1967
if (NewEntryCount != FuncEntryCount) {
1968
F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1969
LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()
1970
<< ", entry_count " << FuncEntryCount << " --> "
1971
<< NewEntryCount << "\n");
1972
}
1973
}
1974
1975
// Compare the profile count values with BFI count values, and print out
1976
// the non-matching ones.
1977
static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1978
BranchProbabilityInfo &NBPI,
1979
uint64_t HotCountThreshold,
1980
uint64_t ColdCountThreshold) {
1981
Function &F = Func.getFunc();
1982
BlockFrequencyInfo NBFI(F, NBPI, LI);
1983
// bool PrintFunc = false;
1984
bool HotBBOnly = PGOVerifyHotBFI;
1985
StringRef Msg;
1986
OptimizationRemarkEmitter ORE(&F);
1987
1988
unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1989
for (auto &BBI : F) {
1990
uint64_t CountValue = 0;
1991
uint64_t BFICountValue = 0;
1992
1993
CountValue = Func.getBBInfo(&BBI).Count.value_or(CountValue);
1994
1995
BBNum++;
1996
if (CountValue)
1997
NonZeroBBNum++;
1998
auto BFICount = NBFI.getBlockProfileCount(&BBI);
1999
if (BFICount)
2000
BFICountValue = *BFICount;
2001
2002
if (HotBBOnly) {
2003
bool rawIsHot = CountValue >= HotCountThreshold;
2004
bool BFIIsHot = BFICountValue >= HotCountThreshold;
2005
bool rawIsCold = CountValue <= ColdCountThreshold;
2006
bool ShowCount = false;
2007
if (rawIsHot && !BFIIsHot) {
2008
Msg = "raw-Hot to BFI-nonHot";
2009
ShowCount = true;
2010
} else if (rawIsCold && BFIIsHot) {
2011
Msg = "raw-Cold to BFI-Hot";
2012
ShowCount = true;
2013
}
2014
if (!ShowCount)
2015
continue;
2016
} else {
2017
if ((CountValue < PGOVerifyBFICutoff) &&
2018
(BFICountValue < PGOVerifyBFICutoff))
2019
continue;
2020
uint64_t Diff = (BFICountValue >= CountValue)
2021
? BFICountValue - CountValue
2022
: CountValue - BFICountValue;
2023
if (Diff <= CountValue / 100 * PGOVerifyBFIRatio)
2024
continue;
2025
}
2026
BBMisMatchNum++;
2027
2028
ORE.emit([&]() {
2029
OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "bfi-verify",
2030
F.getSubprogram(), &BBI);
2031
Remark << "BB " << ore::NV("Block", BBI.getName())
2032
<< " Count=" << ore::NV("Count", CountValue)
2033
<< " BFI_Count=" << ore::NV("Count", BFICountValue);
2034
if (!Msg.empty())
2035
Remark << " (" << Msg << ")";
2036
return Remark;
2037
});
2038
}
2039
if (BBMisMatchNum)
2040
ORE.emit([&]() {
2041
return OptimizationRemarkAnalysis(DEBUG_TYPE, "bfi-verify",
2042
F.getSubprogram(), &F.getEntryBlock())
2043
<< "In Func " << ore::NV("Function", F.getName())
2044
<< ": Num_of_BB=" << ore::NV("Count", BBNum)
2045
<< ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
2046
<< ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
2047
});
2048
}
2049
2050
static bool annotateAllFunctions(
2051
Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
2052
vfs::FileSystem &FS,
2053
function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
2054
function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
2055
function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
2056
ProfileSummaryInfo *PSI, bool IsCS) {
2057
LLVM_DEBUG(dbgs() << "Read in profile counters: ");
2058
auto &Ctx = M.getContext();
2059
// Read the counter array from file.
2060
auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName, FS,
2061
ProfileRemappingFileName);
2062
if (Error E = ReaderOrErr.takeError()) {
2063
handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
2064
Ctx.diagnose(
2065
DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
2066
});
2067
return false;
2068
}
2069
2070
std::unique_ptr<IndexedInstrProfReader> PGOReader =
2071
std::move(ReaderOrErr.get());
2072
if (!PGOReader) {
2073
Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
2074
StringRef("Cannot get PGOReader")));
2075
return false;
2076
}
2077
if (!PGOReader->hasCSIRLevelProfile() && IsCS)
2078
return false;
2079
2080
// TODO: might need to change the warning once the clang option is finalized.
2081
if (!PGOReader->isIRLevelProfile()) {
2082
Ctx.diagnose(DiagnosticInfoPGOProfile(
2083
ProfileFileName.data(), "Not an IR level instrumentation profile"));
2084
return false;
2085
}
2086
if (PGOReader->functionEntryOnly()) {
2087
Ctx.diagnose(DiagnosticInfoPGOProfile(
2088
ProfileFileName.data(),
2089
"Function entry profiles are not yet supported for optimization"));
2090
return false;
2091
}
2092
2093
if (EnableVTableProfileUse) {
2094
for (GlobalVariable &G : M.globals()) {
2095
if (!G.hasName() || !G.hasMetadata(LLVMContext::MD_type))
2096
continue;
2097
2098
// Create the PGOFuncName meta data.
2099
createPGONameMetadata(G, getPGOName(G, false /* InLTO*/));
2100
}
2101
}
2102
2103
// Add the profile summary (read from the header of the indexed summary) here
2104
// so that we can use it below when reading counters (which checks if the
2105
// function should be marked with a cold or inlinehint attribute).
2106
M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
2107
IsCS ? ProfileSummary::PSK_CSInstr
2108
: ProfileSummary::PSK_Instr);
2109
PSI->refresh();
2110
2111
std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
2112
collectComdatMembers(M, ComdatMembers);
2113
std::vector<Function *> HotFunctions;
2114
std::vector<Function *> ColdFunctions;
2115
2116
// If the profile marked as always instrument the entry BB, do the
2117
// same. Note this can be overwritten by the internal option in CFGMST.h
2118
bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
2119
if (PGOInstrumentEntry.getNumOccurrences() > 0)
2120
InstrumentFuncEntry = PGOInstrumentEntry;
2121
InstrumentFuncEntry |= PGOCtxProfLoweringPass::isContextualIRPGOEnabled();
2122
2123
bool HasSingleByteCoverage = PGOReader->hasSingleByteCoverage();
2124
for (auto &F : M) {
2125
if (skipPGOUse(F))
2126
continue;
2127
auto &TLI = LookupTLI(F);
2128
auto *BPI = LookupBPI(F);
2129
auto *BFI = LookupBFI(F);
2130
if (!HasSingleByteCoverage) {
2131
// Split indirectbr critical edges here before computing the MST rather
2132
// than later in getInstrBB() to avoid invalidating it.
2133
SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI,
2134
BFI);
2135
}
2136
PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
2137
InstrumentFuncEntry, HasSingleByteCoverage);
2138
if (HasSingleByteCoverage) {
2139
Func.populateCoverage(PGOReader.get());
2140
continue;
2141
}
2142
// When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
2143
// it means the profile for the function is unrepresentative and this
2144
// function is actually hot / warm. We will reset the function hot / cold
2145
// attribute and drop all the profile counters.
2146
InstrProfRecord::CountPseudoKind PseudoKind = InstrProfRecord::NotPseudo;
2147
bool AllZeros = false;
2148
if (!Func.readCounters(PGOReader.get(), AllZeros, PseudoKind))
2149
continue;
2150
if (AllZeros) {
2151
F.setEntryCount(ProfileCount(0, Function::PCT_Real));
2152
if (Func.getProgramMaxCount() != 0)
2153
ColdFunctions.push_back(&F);
2154
continue;
2155
}
2156
if (PseudoKind != InstrProfRecord::NotPseudo) {
2157
// Clear function attribute cold.
2158
if (F.hasFnAttribute(Attribute::Cold))
2159
F.removeFnAttr(Attribute::Cold);
2160
// Set function attribute as hot.
2161
if (PseudoKind == InstrProfRecord::PseudoHot)
2162
F.addFnAttr(Attribute::Hot);
2163
continue;
2164
}
2165
Func.populateCounters();
2166
Func.setBranchWeights();
2167
Func.annotateValueSites();
2168
Func.annotateIrrLoopHeaderWeights();
2169
PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
2170
if (FreqAttr == PGOUseFunc::FFA_Cold)
2171
ColdFunctions.push_back(&F);
2172
else if (FreqAttr == PGOUseFunc::FFA_Hot)
2173
HotFunctions.push_back(&F);
2174
if (PGOViewCounts != PGOVCT_None &&
2175
(ViewBlockFreqFuncName.empty() ||
2176
F.getName() == ViewBlockFreqFuncName)) {
2177
LoopInfo LI{DominatorTree(F)};
2178
std::unique_ptr<BranchProbabilityInfo> NewBPI =
2179
std::make_unique<BranchProbabilityInfo>(F, LI);
2180
std::unique_ptr<BlockFrequencyInfo> NewBFI =
2181
std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
2182
if (PGOViewCounts == PGOVCT_Graph)
2183
NewBFI->view();
2184
else if (PGOViewCounts == PGOVCT_Text) {
2185
dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
2186
NewBFI->print(dbgs());
2187
}
2188
}
2189
if (PGOViewRawCounts != PGOVCT_None &&
2190
(ViewBlockFreqFuncName.empty() ||
2191
F.getName() == ViewBlockFreqFuncName)) {
2192
if (PGOViewRawCounts == PGOVCT_Graph)
2193
if (ViewBlockFreqFuncName.empty())
2194
WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2195
else
2196
ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
2197
else if (PGOViewRawCounts == PGOVCT_Text) {
2198
dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
2199
Func.dumpInfo();
2200
}
2201
}
2202
2203
if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) {
2204
LoopInfo LI{DominatorTree(F)};
2205
BranchProbabilityInfo NBPI(F, LI);
2206
2207
// Fix func entry count.
2208
if (PGOFixEntryCount)
2209
fixFuncEntryCount(Func, LI, NBPI);
2210
2211
// Verify BlockFrequency information.
2212
uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
2213
if (PGOVerifyHotBFI) {
2214
HotCountThreshold = PSI->getOrCompHotCountThreshold();
2215
ColdCountThreshold = PSI->getOrCompColdCountThreshold();
2216
}
2217
verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
2218
}
2219
}
2220
2221
// Set function hotness attribute from the profile.
2222
// We have to apply these attributes at the end because their presence
2223
// can affect the BranchProbabilityInfo of any callers, resulting in an
2224
// inconsistent MST between prof-gen and prof-use.
2225
for (auto &F : HotFunctions) {
2226
F->addFnAttr(Attribute::InlineHint);
2227
LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
2228
<< "\n");
2229
}
2230
for (auto &F : ColdFunctions) {
2231
// Only set when there is no Attribute::Hot set by the user. For Hot
2232
// attribute, user's annotation has the precedence over the profile.
2233
if (F->hasFnAttribute(Attribute::Hot)) {
2234
auto &Ctx = M.getContext();
2235
std::string Msg = std::string("Function ") + F->getName().str() +
2236
std::string(" is annotated as a hot function but"
2237
" the profile is cold");
2238
Ctx.diagnose(
2239
DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
2240
continue;
2241
}
2242
F->addFnAttr(Attribute::Cold);
2243
LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
2244
<< "\n");
2245
}
2246
return true;
2247
}
2248
2249
PGOInstrumentationUse::PGOInstrumentationUse(
2250
std::string Filename, std::string RemappingFilename, bool IsCS,
2251
IntrusiveRefCntPtr<vfs::FileSystem> VFS)
2252
: ProfileFileName(std::move(Filename)),
2253
ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS),
2254
FS(std::move(VFS)) {
2255
if (!PGOTestProfileFile.empty())
2256
ProfileFileName = PGOTestProfileFile;
2257
if (!PGOTestProfileRemappingFile.empty())
2258
ProfileRemappingFileName = PGOTestProfileRemappingFile;
2259
if (!FS)
2260
FS = vfs::getRealFileSystem();
2261
}
2262
2263
PreservedAnalyses PGOInstrumentationUse::run(Module &M,
2264
ModuleAnalysisManager &MAM) {
2265
2266
auto &FAM = MAM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
2267
auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
2268
return FAM.getResult<TargetLibraryAnalysis>(F);
2269
};
2270
auto LookupBPI = [&FAM](Function &F) {
2271
return &FAM.getResult<BranchProbabilityAnalysis>(F);
2272
};
2273
auto LookupBFI = [&FAM](Function &F) {
2274
return &FAM.getResult<BlockFrequencyAnalysis>(F);
2275
};
2276
2277
auto *PSI = &MAM.getResult<ProfileSummaryAnalysis>(M);
2278
if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName, *FS,
2279
LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
2280
return PreservedAnalyses::all();
2281
2282
return PreservedAnalyses::none();
2283
}
2284
2285
static std::string getSimpleNodeName(const BasicBlock *Node) {
2286
if (!Node->getName().empty())
2287
return Node->getName().str();
2288
2289
std::string SimpleNodeName;
2290
raw_string_ostream OS(SimpleNodeName);
2291
Node->printAsOperand(OS, false);
2292
return SimpleNodeName;
2293
}
2294
2295
void llvm::setProfMetadata(Module *M, Instruction *TI,
2296
ArrayRef<uint64_t> EdgeCounts, uint64_t MaxCount) {
2297
assert(MaxCount > 0 && "Bad max count");
2298
uint64_t Scale = calculateCountScale(MaxCount);
2299
SmallVector<unsigned, 4> Weights;
2300
for (const auto &ECI : EdgeCounts)
2301
Weights.push_back(scaleBranchCount(ECI, Scale));
2302
2303
LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
2304
: Weights) {
2305
dbgs() << W << " ";
2306
} dbgs() << "\n";);
2307
2308
misexpect::checkExpectAnnotations(*TI, Weights, /*IsFrontend=*/false);
2309
2310
setBranchWeights(*TI, Weights, /*IsExpected=*/false);
2311
if (EmitBranchProbability) {
2312
std::string BrCondStr = getBranchCondString(TI);
2313
if (BrCondStr.empty())
2314
return;
2315
2316
uint64_t WSum =
2317
std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2318
[](uint64_t w1, uint64_t w2) { return w1 + w2; });
2319
uint64_t TotalCount =
2320
std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2321
[](uint64_t c1, uint64_t c2) { return c1 + c2; });
2322
Scale = calculateCountScale(WSum);
2323
BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2324
scaleBranchCount(WSum, Scale));
2325
std::string BranchProbStr;
2326
raw_string_ostream OS(BranchProbStr);
2327
OS << BP;
2328
OS << " (total count : " << TotalCount << ")";
2329
OS.flush();
2330
Function *F = TI->getParent()->getParent();
2331
OptimizationRemarkEmitter ORE(F);
2332
ORE.emit([&]() {
2333
return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
2334
<< BrCondStr << " is true with probability : " << BranchProbStr;
2335
});
2336
}
2337
}
2338
2339
namespace llvm {
2340
2341
void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {
2342
MDBuilder MDB(M->getContext());
2343
TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2344
MDB.createIrrLoopHeaderWeight(Count));
2345
}
2346
2347
template <> struct GraphTraits<PGOUseFunc *> {
2348
using NodeRef = const BasicBlock *;
2349
using ChildIteratorType = const_succ_iterator;
2350
using nodes_iterator = pointer_iterator<Function::const_iterator>;
2351
2352
static NodeRef getEntryNode(const PGOUseFunc *G) {
2353
return &G->getFunc().front();
2354
}
2355
2356
static ChildIteratorType child_begin(const NodeRef N) {
2357
return succ_begin(N);
2358
}
2359
2360
static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2361
2362
static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2363
return nodes_iterator(G->getFunc().begin());
2364
}
2365
2366
static nodes_iterator nodes_end(const PGOUseFunc *G) {
2367
return nodes_iterator(G->getFunc().end());
2368
}
2369
};
2370
2371
template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2372
explicit DOTGraphTraits(bool isSimple = false)
2373
: DefaultDOTGraphTraits(isSimple) {}
2374
2375
static std::string getGraphName(const PGOUseFunc *G) {
2376
return std::string(G->getFunc().getName());
2377
}
2378
2379
std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2380
std::string Result;
2381
raw_string_ostream OS(Result);
2382
2383
OS << getSimpleNodeName(Node) << ":\\l";
2384
PGOUseBBInfo *BI = Graph->findBBInfo(Node);
2385
OS << "Count : ";
2386
if (BI && BI->Count)
2387
OS << *BI->Count << "\\l";
2388
else
2389
OS << "Unknown\\l";
2390
2391
if (!PGOInstrSelect)
2392
return Result;
2393
2394
for (const Instruction &I : *Node) {
2395
if (!isa<SelectInst>(&I))
2396
continue;
2397
// Display scaled counts for SELECT instruction:
2398
OS << "SELECT : { T = ";
2399
uint64_t TC, FC;
2400
bool HasProf = extractBranchWeights(I, TC, FC);
2401
if (!HasProf)
2402
OS << "Unknown, F = Unknown }\\l";
2403
else
2404
OS << TC << ", F = " << FC << " }\\l";
2405
}
2406
return Result;
2407
}
2408
};
2409
2410
} // end namespace llvm
2411
2412