Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Analysis/CtxProfAnalysis.cpp
213765 views
1
//===- CtxProfAnalysis.cpp - contextual profile analysis ------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// Implementation of the contextual profile analysis, which maintains contextual
10
// profiling info through IPO passes.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/Analysis/CtxProfAnalysis.h"
15
#include "llvm/ADT/APInt.h"
16
#include "llvm/ADT/STLExtras.h"
17
#include "llvm/Analysis/CFG.h"
18
#include "llvm/IR/Analysis.h"
19
#include "llvm/IR/Dominators.h"
20
#include "llvm/IR/IntrinsicInst.h"
21
#include "llvm/IR/Module.h"
22
#include "llvm/IR/PassManager.h"
23
#include "llvm/ProfileData/PGOCtxProfReader.h"
24
#include "llvm/Support/CommandLine.h"
25
#include "llvm/Support/MemoryBuffer.h"
26
#include "llvm/Support/Path.h"
27
#include <deque>
28
#include <memory>
29
30
#define DEBUG_TYPE "ctx_prof"
31
32
using namespace llvm;
33
cl::opt<std::string>
34
UseCtxProfile("use-ctx-profile", cl::init(""), cl::Hidden,
35
cl::desc("Use the specified contextual profile file"));
36
37
static cl::opt<CtxProfAnalysisPrinterPass::PrintMode> PrintLevel(
38
"ctx-profile-printer-level",
39
cl::init(CtxProfAnalysisPrinterPass::PrintMode::YAML), cl::Hidden,
40
cl::values(clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::Everything,
41
"everything", "print everything - most verbose"),
42
clEnumValN(CtxProfAnalysisPrinterPass::PrintMode::YAML, "yaml",
43
"just the yaml representation of the profile")),
44
cl::desc("Verbosity level of the contextual profile printer pass."));
45
46
static cl::opt<bool> ForceIsInSpecializedModule(
47
"ctx-profile-force-is-specialized", cl::init(false),
48
cl::desc("Treat the given module as-if it were containing the "
49
"post-thinlink module containing the root"));
50
51
const char *AssignGUIDPass::GUIDMetadataName = "guid";
52
53
namespace llvm {
54
class ProfileAnnotatorImpl final {
55
friend class ProfileAnnotator;
56
class BBInfo;
57
struct EdgeInfo {
58
BBInfo *const Src;
59
BBInfo *const Dest;
60
std::optional<uint64_t> Count;
61
62
explicit EdgeInfo(BBInfo &Src, BBInfo &Dest) : Src(&Src), Dest(&Dest) {}
63
};
64
65
class BBInfo {
66
std::optional<uint64_t> Count;
67
// OutEdges is dimensioned to match the number of terminator operands.
68
// Entries in the vector match the index in the terminator operand list. In
69
// some cases - see `shouldExcludeEdge` and its implementation - an entry
70
// will be nullptr.
71
// InEdges doesn't have the above constraint.
72
SmallVector<EdgeInfo *> OutEdges;
73
SmallVector<EdgeInfo *> InEdges;
74
size_t UnknownCountOutEdges = 0;
75
size_t UnknownCountInEdges = 0;
76
77
// Pass AssumeAllKnown when we try to propagate counts from edges to BBs -
78
// because all the edge counters must be known.
79
// Return std::nullopt if there were no edges to sum. The user can decide
80
// how to interpret that.
81
std::optional<uint64_t> getEdgeSum(const SmallVector<EdgeInfo *> &Edges,
82
bool AssumeAllKnown) const {
83
std::optional<uint64_t> Sum;
84
for (const auto *E : Edges) {
85
// `Edges` may be `OutEdges`, case in which `E` could be nullptr.
86
if (E) {
87
if (!Sum.has_value())
88
Sum = 0;
89
*Sum += (AssumeAllKnown ? *E->Count : E->Count.value_or(0U));
90
}
91
}
92
return Sum;
93
}
94
95
bool computeCountFrom(const SmallVector<EdgeInfo *> &Edges) {
96
assert(!Count.has_value());
97
Count = getEdgeSum(Edges, true);
98
return Count.has_value();
99
}
100
101
void setSingleUnknownEdgeCount(SmallVector<EdgeInfo *> &Edges) {
102
uint64_t KnownSum = getEdgeSum(Edges, false).value_or(0U);
103
uint64_t EdgeVal = *Count > KnownSum ? *Count - KnownSum : 0U;
104
EdgeInfo *E = nullptr;
105
for (auto *I : Edges)
106
if (I && !I->Count.has_value()) {
107
E = I;
108
#ifdef NDEBUG
109
break;
110
#else
111
assert((!E || E == I) &&
112
"Expected exactly one edge to have an unknown count, "
113
"found a second one");
114
continue;
115
#endif
116
}
117
assert(E && "Expected exactly one edge to have an unknown count");
118
assert(!E->Count.has_value());
119
E->Count = EdgeVal;
120
assert(E->Src->UnknownCountOutEdges > 0);
121
assert(E->Dest->UnknownCountInEdges > 0);
122
--E->Src->UnknownCountOutEdges;
123
--E->Dest->UnknownCountInEdges;
124
}
125
126
public:
127
BBInfo(size_t NumInEdges, size_t NumOutEdges, std::optional<uint64_t> Count)
128
: Count(Count) {
129
// For in edges, we just want to pre-allocate enough space, since we know
130
// it at this stage. For out edges, we will insert edges at the indices
131
// corresponding to positions in this BB's terminator instruction, so we
132
// construct a default (nullptr values)-initialized vector. A nullptr edge
133
// corresponds to those that are excluded (see shouldExcludeEdge).
134
InEdges.reserve(NumInEdges);
135
OutEdges.resize(NumOutEdges);
136
}
137
138
bool tryTakeCountFromKnownOutEdges(const BasicBlock &BB) {
139
if (!UnknownCountOutEdges) {
140
return computeCountFrom(OutEdges);
141
}
142
return false;
143
}
144
145
bool tryTakeCountFromKnownInEdges(const BasicBlock &BB) {
146
if (!UnknownCountInEdges) {
147
return computeCountFrom(InEdges);
148
}
149
return false;
150
}
151
152
void addInEdge(EdgeInfo &Info) {
153
InEdges.push_back(&Info);
154
++UnknownCountInEdges;
155
}
156
157
// For the out edges, we care about the position we place them in, which is
158
// the position in terminator instruction's list (at construction). Later,
159
// we build branch_weights metadata with edge frequency values matching
160
// these positions.
161
void addOutEdge(size_t Index, EdgeInfo &Info) {
162
OutEdges[Index] = &Info;
163
++UnknownCountOutEdges;
164
}
165
166
bool hasCount() const { return Count.has_value(); }
167
168
uint64_t getCount() const { return *Count; }
169
170
bool trySetSingleUnknownInEdgeCount() {
171
if (UnknownCountInEdges == 1) {
172
setSingleUnknownEdgeCount(InEdges);
173
return true;
174
}
175
return false;
176
}
177
178
bool trySetSingleUnknownOutEdgeCount() {
179
if (UnknownCountOutEdges == 1) {
180
setSingleUnknownEdgeCount(OutEdges);
181
return true;
182
}
183
return false;
184
}
185
size_t getNumOutEdges() const { return OutEdges.size(); }
186
187
uint64_t getEdgeCount(size_t Index) const {
188
if (auto *E = OutEdges[Index])
189
return *E->Count;
190
return 0U;
191
}
192
};
193
194
const Function &F;
195
ArrayRef<uint64_t> Counters;
196
// To be accessed through getBBInfo() after construction.
197
std::map<const BasicBlock *, BBInfo> BBInfos;
198
std::vector<EdgeInfo> EdgeInfos;
199
200
// The only criteria for exclusion is faux suspend -> exit edges in presplit
201
// coroutines. The API serves for readability, currently.
202
bool shouldExcludeEdge(const BasicBlock &Src, const BasicBlock &Dest) const {
203
return llvm::isPresplitCoroSuspendExitEdge(Src, Dest);
204
}
205
206
BBInfo &getBBInfo(const BasicBlock &BB) { return BBInfos.find(&BB)->second; }
207
208
const BBInfo &getBBInfo(const BasicBlock &BB) const {
209
return BBInfos.find(&BB)->second;
210
}
211
212
// validation function after we propagate the counters: all BBs and edges'
213
// counters must have a value.
214
bool allCountersAreAssigned() const {
215
for (const auto &BBInfo : BBInfos)
216
if (!BBInfo.second.hasCount())
217
return false;
218
for (const auto &EdgeInfo : EdgeInfos)
219
if (!EdgeInfo.Count.has_value())
220
return false;
221
return true;
222
}
223
224
/// Check that all paths from the entry basic block that use edges with
225
/// non-zero counts arrive at a basic block with no successors (i.e. "exit")
226
bool allTakenPathsExit() const {
227
std::deque<const BasicBlock *> Worklist;
228
DenseSet<const BasicBlock *> Visited;
229
Worklist.push_back(&F.getEntryBlock());
230
bool HitExit = false;
231
while (!Worklist.empty()) {
232
const auto *BB = Worklist.front();
233
Worklist.pop_front();
234
if (!Visited.insert(BB).second)
235
continue;
236
if (succ_size(BB) == 0) {
237
if (isa<UnreachableInst>(BB->getTerminator()))
238
return false;
239
HitExit = true;
240
continue;
241
}
242
if (succ_size(BB) == 1) {
243
Worklist.push_back(BB->getUniqueSuccessor());
244
continue;
245
}
246
const auto &BBInfo = getBBInfo(*BB);
247
bool HasAWayOut = false;
248
for (auto I = 0U; I < BB->getTerminator()->getNumSuccessors(); ++I) {
249
const auto *Succ = BB->getTerminator()->getSuccessor(I);
250
if (!shouldExcludeEdge(*BB, *Succ)) {
251
if (BBInfo.getEdgeCount(I) > 0) {
252
HasAWayOut = true;
253
Worklist.push_back(Succ);
254
}
255
}
256
}
257
if (!HasAWayOut)
258
return false;
259
}
260
return HitExit;
261
}
262
263
bool allNonColdSelectsHaveProfile() const {
264
for (const auto &BB : F) {
265
if (getBBInfo(BB).getCount() > 0) {
266
for (const auto &I : BB) {
267
if (const auto *SI = dyn_cast<SelectInst>(&I)) {
268
if (const auto *Inst = CtxProfAnalysis::getSelectInstrumentation(
269
*const_cast<SelectInst *>(SI))) {
270
auto Index = Inst->getIndex()->getZExtValue();
271
assert(Index < Counters.size());
272
if (Counters[Index] == 0)
273
return false;
274
}
275
}
276
}
277
}
278
}
279
return true;
280
}
281
282
// This is an adaptation of PGOUseFunc::populateCounters.
283
// FIXME(mtrofin): look into factoring the code to share one implementation.
284
void propagateCounterValues() {
285
bool KeepGoing = true;
286
while (KeepGoing) {
287
KeepGoing = false;
288
for (const auto &BB : F) {
289
auto &Info = getBBInfo(BB);
290
if (!Info.hasCount())
291
KeepGoing |= Info.tryTakeCountFromKnownOutEdges(BB) ||
292
Info.tryTakeCountFromKnownInEdges(BB);
293
if (Info.hasCount()) {
294
KeepGoing |= Info.trySetSingleUnknownOutEdgeCount();
295
KeepGoing |= Info.trySetSingleUnknownInEdgeCount();
296
}
297
}
298
}
299
assert(allCountersAreAssigned() &&
300
"[ctx-prof] Expected all counters have been assigned.");
301
assert(allTakenPathsExit() &&
302
"[ctx-prof] Encountered a BB with more than one successor, where "
303
"all outgoing edges have a 0 count. This occurs in non-exiting "
304
"functions (message pumps, usually) which are not supported in the "
305
"contextual profiling case");
306
assert(allNonColdSelectsHaveProfile() &&
307
"[ctx-prof] All non-cold select instructions were expected to have "
308
"a profile.");
309
}
310
311
public:
312
ProfileAnnotatorImpl(const Function &F, ArrayRef<uint64_t> Counters)
313
: F(F), Counters(Counters) {
314
assert(!F.isDeclaration());
315
assert(!Counters.empty());
316
size_t NrEdges = 0;
317
for (const auto &BB : F) {
318
std::optional<uint64_t> Count;
319
if (auto *Ins = CtxProfAnalysis::getBBInstrumentation(
320
const_cast<BasicBlock &>(BB))) {
321
auto Index = Ins->getIndex()->getZExtValue();
322
assert(Index < Counters.size() &&
323
"The index must be inside the counters vector by construction - "
324
"tripping this assertion indicates a bug in how the contextual "
325
"profile is managed by IPO transforms");
326
(void)Index;
327
Count = Counters[Ins->getIndex()->getZExtValue()];
328
} else if (isa<UnreachableInst>(BB.getTerminator())) {
329
// The program presumably didn't crash.
330
Count = 0;
331
}
332
auto [It, Ins] =
333
BBInfos.insert({&BB, {pred_size(&BB), succ_size(&BB), Count}});
334
(void)Ins;
335
assert(Ins && "We iterate through the function's BBs, no reason to "
336
"insert one more than once");
337
NrEdges += llvm::count_if(successors(&BB), [&](const auto *Succ) {
338
return !shouldExcludeEdge(BB, *Succ);
339
});
340
}
341
// Pre-allocate the vector, we want references to its contents to be stable.
342
EdgeInfos.reserve(NrEdges);
343
for (const auto &BB : F) {
344
auto &Info = getBBInfo(BB);
345
for (auto I = 0U; I < BB.getTerminator()->getNumSuccessors(); ++I) {
346
const auto *Succ = BB.getTerminator()->getSuccessor(I);
347
if (!shouldExcludeEdge(BB, *Succ)) {
348
auto &EI = EdgeInfos.emplace_back(getBBInfo(BB), getBBInfo(*Succ));
349
Info.addOutEdge(I, EI);
350
getBBInfo(*Succ).addInEdge(EI);
351
}
352
}
353
}
354
assert(EdgeInfos.capacity() == NrEdges &&
355
"The capacity of EdgeInfos should have stayed unchanged it was "
356
"populated, because we need pointers to its contents to be stable");
357
propagateCounterValues();
358
}
359
360
uint64_t getBBCount(const BasicBlock &BB) { return getBBInfo(BB).getCount(); }
361
};
362
363
} // namespace llvm
364
365
ProfileAnnotator::ProfileAnnotator(const Function &F,
366
ArrayRef<uint64_t> RawCounters)
367
: PImpl(std::make_unique<ProfileAnnotatorImpl>(F, RawCounters)) {}
368
369
ProfileAnnotator::~ProfileAnnotator() = default;
370
371
uint64_t ProfileAnnotator::getBBCount(const BasicBlock &BB) const {
372
return PImpl->getBBCount(BB);
373
}
374
375
bool ProfileAnnotator::getSelectInstrProfile(SelectInst &SI,
376
uint64_t &TrueCount,
377
uint64_t &FalseCount) const {
378
const auto &BBInfo = PImpl->getBBInfo(*SI.getParent());
379
TrueCount = FalseCount = 0;
380
if (BBInfo.getCount() == 0)
381
return false;
382
383
auto *Step = CtxProfAnalysis::getSelectInstrumentation(SI);
384
if (!Step)
385
return false;
386
auto Index = Step->getIndex()->getZExtValue();
387
assert(Index < PImpl->Counters.size() &&
388
"The index of the step instruction must be inside the "
389
"counters vector by "
390
"construction - tripping this assertion indicates a bug in "
391
"how the contextual profile is managed by IPO transforms");
392
auto TotalCount = BBInfo.getCount();
393
TrueCount = PImpl->Counters[Index];
394
FalseCount = (TotalCount > TrueCount ? TotalCount - TrueCount : 0U);
395
return true;
396
}
397
398
bool ProfileAnnotator::getOutgoingBranchWeights(
399
BasicBlock &BB, SmallVectorImpl<uint64_t> &Profile,
400
uint64_t &MaxCount) const {
401
Profile.clear();
402
403
if (succ_size(&BB) < 2)
404
return false;
405
406
auto *Term = BB.getTerminator();
407
Profile.resize(Term->getNumSuccessors());
408
409
const auto &BBInfo = PImpl->getBBInfo(BB);
410
MaxCount = 0;
411
for (unsigned SuccIdx = 0, Size = BBInfo.getNumOutEdges(); SuccIdx < Size;
412
++SuccIdx) {
413
uint64_t EdgeCount = BBInfo.getEdgeCount(SuccIdx);
414
if (EdgeCount > MaxCount)
415
MaxCount = EdgeCount;
416
Profile[SuccIdx] = EdgeCount;
417
}
418
return MaxCount > 0;
419
}
420
421
PreservedAnalyses AssignGUIDPass::run(Module &M, ModuleAnalysisManager &MAM) {
422
for (auto &F : M.functions()) {
423
if (F.isDeclaration())
424
continue;
425
if (F.getMetadata(GUIDMetadataName))
426
continue;
427
const GlobalValue::GUID GUID = F.getGUID();
428
F.setMetadata(GUIDMetadataName,
429
MDNode::get(M.getContext(),
430
{ConstantAsMetadata::get(ConstantInt::get(
431
Type::getInt64Ty(M.getContext()), GUID))}));
432
}
433
return PreservedAnalyses::none();
434
}
435
436
GlobalValue::GUID AssignGUIDPass::getGUID(const Function &F) {
437
if (F.isDeclaration()) {
438
assert(GlobalValue::isExternalLinkage(F.getLinkage()));
439
return F.getGUID();
440
}
441
auto *MD = F.getMetadata(GUIDMetadataName);
442
assert(MD && "guid not found for defined function");
443
return cast<ConstantInt>(cast<ConstantAsMetadata>(MD->getOperand(0))
444
->getValue()
445
->stripPointerCasts())
446
->getZExtValue();
447
}
448
AnalysisKey CtxProfAnalysis::Key;
449
450
CtxProfAnalysis::CtxProfAnalysis(std::optional<StringRef> Profile)
451
: Profile([&]() -> std::optional<StringRef> {
452
if (Profile)
453
return *Profile;
454
if (UseCtxProfile.getNumOccurrences())
455
return UseCtxProfile;
456
return std::nullopt;
457
}()) {}
458
459
PGOContextualProfile CtxProfAnalysis::run(Module &M,
460
ModuleAnalysisManager &MAM) {
461
if (!Profile)
462
return {};
463
ErrorOr<std::unique_ptr<MemoryBuffer>> MB = MemoryBuffer::getFile(*Profile);
464
if (auto EC = MB.getError()) {
465
M.getContext().emitError("could not open contextual profile file: " +
466
EC.message());
467
return {};
468
}
469
PGOCtxProfileReader Reader(MB.get()->getBuffer());
470
auto MaybeProfiles = Reader.loadProfiles();
471
if (!MaybeProfiles) {
472
M.getContext().emitError("contextual profile file is invalid: " +
473
toString(MaybeProfiles.takeError()));
474
return {};
475
}
476
477
// FIXME: We should drive this from ThinLTO, but for the time being, use the
478
// module name as indicator.
479
// We want to *only* keep the contextual profiles in modules that capture
480
// context trees. That allows us to compute specific PSIs, for example.
481
auto DetermineRootsInModule = [&M]() -> const DenseSet<GlobalValue::GUID> {
482
DenseSet<GlobalValue::GUID> ProfileRootsInModule;
483
auto ModName = M.getName();
484
auto Filename = sys::path::filename(ModName);
485
// Drop the file extension.
486
Filename = Filename.substr(0, Filename.find_last_of('.'));
487
// See if it parses
488
APInt Guid;
489
// getAsInteger returns true if there are more chars to read other than the
490
// integer. So the "false" test is what we want.
491
if (!Filename.getAsInteger(0, Guid))
492
ProfileRootsInModule.insert(Guid.getZExtValue());
493
return ProfileRootsInModule;
494
};
495
const auto ProfileRootsInModule = DetermineRootsInModule();
496
PGOContextualProfile Result;
497
498
// the logic from here on allows for modules that contain - by design - more
499
// than one root. We currently don't support that, because the determination
500
// happens based on the module name matching the root guid, but the logic can
501
// avoid assuming that.
502
if (!ProfileRootsInModule.empty()) {
503
Result.IsInSpecializedModule = true;
504
// Trim first the roots that aren't in this module.
505
for (auto &[RootGuid, _] :
506
llvm::make_early_inc_range(MaybeProfiles->Contexts))
507
if (!ProfileRootsInModule.contains(RootGuid))
508
MaybeProfiles->Contexts.erase(RootGuid);
509
// we can also drop the flat profiles
510
MaybeProfiles->FlatProfiles.clear();
511
}
512
513
for (const auto &F : M) {
514
if (F.isDeclaration())
515
continue;
516
auto GUID = AssignGUIDPass::getGUID(F);
517
assert(GUID && "guid not found for defined function");
518
const auto &Entry = F.begin();
519
uint32_t MaxCounters = 0; // we expect at least a counter.
520
for (const auto &I : *Entry)
521
if (auto *C = dyn_cast<InstrProfIncrementInst>(&I)) {
522
MaxCounters =
523
static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
524
break;
525
}
526
if (!MaxCounters)
527
continue;
528
uint32_t MaxCallsites = 0;
529
for (const auto &BB : F)
530
for (const auto &I : BB)
531
if (auto *C = dyn_cast<InstrProfCallsite>(&I)) {
532
MaxCallsites =
533
static_cast<uint32_t>(C->getNumCounters()->getZExtValue());
534
break;
535
}
536
auto [It, Ins] = Result.FuncInfo.insert(
537
{GUID, PGOContextualProfile::FunctionInfo(F.getName())});
538
(void)Ins;
539
assert(Ins);
540
It->second.NextCallsiteIndex = MaxCallsites;
541
It->second.NextCounterIndex = MaxCounters;
542
}
543
// If we made it this far, the Result is valid - which we mark by setting
544
// .Profiles.
545
Result.Profiles = std::move(*MaybeProfiles);
546
Result.initIndex();
547
return Result;
548
}
549
550
GlobalValue::GUID
551
PGOContextualProfile::getDefinedFunctionGUID(const Function &F) const {
552
if (auto It = FuncInfo.find(AssignGUIDPass::getGUID(F)); It != FuncInfo.end())
553
return It->first;
554
return 0;
555
}
556
557
CtxProfAnalysisPrinterPass::CtxProfAnalysisPrinterPass(raw_ostream &OS)
558
: OS(OS), Mode(PrintLevel) {}
559
560
PreservedAnalyses CtxProfAnalysisPrinterPass::run(Module &M,
561
ModuleAnalysisManager &MAM) {
562
CtxProfAnalysis::Result &C = MAM.getResult<CtxProfAnalysis>(M);
563
if (C.contexts().empty()) {
564
OS << "No contextual profile was provided.\n";
565
return PreservedAnalyses::all();
566
}
567
568
if (Mode == PrintMode::Everything) {
569
OS << "Function Info:\n";
570
for (const auto &[Guid, FuncInfo] : C.FuncInfo)
571
OS << Guid << " : " << FuncInfo.Name
572
<< ". MaxCounterID: " << FuncInfo.NextCounterIndex
573
<< ". MaxCallsiteID: " << FuncInfo.NextCallsiteIndex << "\n";
574
}
575
576
if (Mode == PrintMode::Everything)
577
OS << "\nCurrent Profile:\n";
578
convertCtxProfToYaml(OS, C.profiles());
579
OS << "\n";
580
if (Mode == PrintMode::YAML)
581
return PreservedAnalyses::all();
582
583
OS << "\nFlat Profile:\n";
584
auto Flat = C.flatten();
585
for (const auto &[Guid, Counters] : Flat) {
586
OS << Guid << " : ";
587
for (auto V : Counters)
588
OS << V << " ";
589
OS << "\n";
590
}
591
return PreservedAnalyses::all();
592
}
593
594
InstrProfCallsite *CtxProfAnalysis::getCallsiteInstrumentation(CallBase &CB) {
595
if (!InstrProfCallsite::canInstrumentCallsite(CB))
596
return nullptr;
597
for (auto *Prev = CB.getPrevNode(); Prev; Prev = Prev->getPrevNode()) {
598
if (auto *IPC = dyn_cast<InstrProfCallsite>(Prev))
599
return IPC;
600
assert(!isa<CallBase>(Prev) &&
601
"didn't expect to find another call, that's not the callsite "
602
"instrumentation, before an instrumentable callsite");
603
}
604
return nullptr;
605
}
606
607
InstrProfIncrementInst *CtxProfAnalysis::getBBInstrumentation(BasicBlock &BB) {
608
for (auto &I : BB)
609
if (auto *Incr = dyn_cast<InstrProfIncrementInst>(&I))
610
if (!isa<InstrProfIncrementInstStep>(&I))
611
return Incr;
612
return nullptr;
613
}
614
615
InstrProfIncrementInstStep *
616
CtxProfAnalysis::getSelectInstrumentation(SelectInst &SI) {
617
Instruction *Prev = &SI;
618
while ((Prev = Prev->getPrevNode()))
619
if (auto *Step = dyn_cast<InstrProfIncrementInstStep>(Prev))
620
return Step;
621
return nullptr;
622
}
623
624
template <class ProfTy>
625
static void preorderVisitOneRoot(ProfTy &Profile,
626
function_ref<void(ProfTy &)> Visitor) {
627
std::function<void(ProfTy &)> Traverser = [&](auto &Ctx) {
628
Visitor(Ctx);
629
for (auto &[_, SubCtxSet] : Ctx.callsites())
630
for (auto &[__, Subctx] : SubCtxSet)
631
Traverser(Subctx);
632
};
633
Traverser(Profile);
634
}
635
636
template <class ProfilesTy, class ProfTy>
637
static void preorderVisit(ProfilesTy &Profiles,
638
function_ref<void(ProfTy &)> Visitor) {
639
for (auto &[_, P] : Profiles)
640
preorderVisitOneRoot<ProfTy>(P, Visitor);
641
}
642
643
void PGOContextualProfile::initIndex() {
644
// Initialize the head of the index list for each function. We don't need it
645
// after this point.
646
DenseMap<GlobalValue::GUID, PGOCtxProfContext *> InsertionPoints;
647
for (auto &[Guid, FI] : FuncInfo)
648
InsertionPoints[Guid] = &FI.Index;
649
preorderVisit<PGOCtxProfContext::CallTargetMapTy, PGOCtxProfContext>(
650
Profiles.Contexts, [&](PGOCtxProfContext &Ctx) {
651
auto InsertIt = InsertionPoints.find(Ctx.guid());
652
if (InsertIt == InsertionPoints.end())
653
return;
654
// Insert at the end of the list. Since we traverse in preorder, it
655
// means that when we iterate the list from the beginning, we'd
656
// encounter the contexts in the order we would have, should we have
657
// performed a full preorder traversal.
658
InsertIt->second->Next = &Ctx;
659
Ctx.Previous = InsertIt->second;
660
InsertIt->second = &Ctx;
661
});
662
}
663
664
bool PGOContextualProfile::isInSpecializedModule() const {
665
return ForceIsInSpecializedModule.getNumOccurrences() > 0
666
? ForceIsInSpecializedModule
667
: IsInSpecializedModule;
668
}
669
670
void PGOContextualProfile::update(Visitor V, const Function &F) {
671
assert(isFunctionKnown(F));
672
GlobalValue::GUID G = getDefinedFunctionGUID(F);
673
for (auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
674
Node = Node->Next)
675
V(*reinterpret_cast<PGOCtxProfContext *>(Node));
676
}
677
678
void PGOContextualProfile::visit(ConstVisitor V, const Function *F) const {
679
if (!F)
680
return preorderVisit<const PGOCtxProfContext::CallTargetMapTy,
681
const PGOCtxProfContext>(Profiles.Contexts, V);
682
assert(isFunctionKnown(*F));
683
GlobalValue::GUID G = getDefinedFunctionGUID(*F);
684
for (const auto *Node = FuncInfo.find(G)->second.Index.Next; Node;
685
Node = Node->Next)
686
V(*reinterpret_cast<const PGOCtxProfContext *>(Node));
687
}
688
689
const CtxProfFlatProfile PGOContextualProfile::flatten() const {
690
CtxProfFlatProfile Flat;
691
auto Accummulate = [](SmallVectorImpl<uint64_t> &Into,
692
const SmallVectorImpl<uint64_t> &From,
693
uint64_t SamplingRate) {
694
if (Into.empty())
695
Into.resize(From.size());
696
assert(Into.size() == From.size() &&
697
"All contexts corresponding to a function should have the exact "
698
"same number of counters.");
699
for (size_t I = 0, E = Into.size(); I < E; ++I)
700
Into[I] += From[I] * SamplingRate;
701
};
702
703
for (const auto &[_, CtxRoot] : Profiles.Contexts) {
704
const uint64_t SamplingFactor = CtxRoot.getTotalRootEntryCount();
705
preorderVisitOneRoot<const PGOCtxProfContext>(
706
CtxRoot, [&](const PGOCtxProfContext &Ctx) {
707
Accummulate(Flat[Ctx.guid()], Ctx.counters(), SamplingFactor);
708
});
709
710
for (const auto &[G, Unh] : CtxRoot.getUnhandled())
711
Accummulate(Flat[G], Unh, SamplingFactor);
712
}
713
// We don't sample "Flat" currently, so sampling rate is 1.
714
for (const auto &[G, FC] : Profiles.FlatProfiles)
715
Accummulate(Flat[G], FC, /*SamplingRate=*/1);
716
return Flat;
717
}
718
719
const CtxProfFlatIndirectCallProfile
720
PGOContextualProfile::flattenVirtCalls() const {
721
CtxProfFlatIndirectCallProfile Ret;
722
for (const auto &[_, CtxRoot] : Profiles.Contexts) {
723
const uint64_t TotalRootEntryCount = CtxRoot.getTotalRootEntryCount();
724
preorderVisitOneRoot<const PGOCtxProfContext>(
725
CtxRoot, [&](const PGOCtxProfContext &Ctx) {
726
auto &Targets = Ret[Ctx.guid()];
727
for (const auto &[ID, SubctxSet] : Ctx.callsites())
728
for (const auto &Subctx : SubctxSet)
729
Targets[ID][Subctx.first] +=
730
Subctx.second.getEntrycount() * TotalRootEntryCount;
731
});
732
}
733
return Ret;
734
}
735
736
void CtxProfAnalysis::collectIndirectCallPromotionList(
737
CallBase &IC, Result &Profile,
738
SetVector<std::pair<CallBase *, Function *>> &Candidates) {
739
const auto *Instr = CtxProfAnalysis::getCallsiteInstrumentation(IC);
740
if (!Instr)
741
return;
742
Module &M = *IC.getParent()->getModule();
743
const uint32_t CallID = Instr->getIndex()->getZExtValue();
744
Profile.visit(
745
[&](const PGOCtxProfContext &Ctx) {
746
const auto &Targets = Ctx.callsites().find(CallID);
747
if (Targets == Ctx.callsites().end())
748
return;
749
for (const auto &[Guid, _] : Targets->second)
750
if (auto Name = Profile.getFunctionName(Guid); !Name.empty())
751
if (auto *Target = M.getFunction(Name))
752
if (Target->hasFnAttribute(Attribute::AlwaysInline))
753
Candidates.insert({&IC, Target});
754
},
755
IC.getCaller());
756
}
757
758