Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
35266 views
1
//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file implements the SampleProfileMatcher used for stale
10
// profile matching.
11
//
12
//===----------------------------------------------------------------------===//
13
14
#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
15
#include "llvm/IR/IntrinsicInst.h"
16
#include "llvm/IR/MDBuilder.h"
17
#include "llvm/Support/CommandLine.h"
18
19
using namespace llvm;
20
using namespace sampleprof;
21
22
#define DEBUG_TYPE "sample-profile-matcher"
23
24
static cl::opt<unsigned> FuncProfileSimilarityThreshold(
25
"func-profile-similarity-threshold", cl::Hidden, cl::init(80),
26
cl::desc("Consider a profile matches a function if the similarity of their "
27
"callee sequences is above the specified percentile."));
28
29
static cl::opt<unsigned> MinFuncCountForCGMatching(
30
"min-func-count-for-cg-matching", cl::Hidden, cl::init(5),
31
cl::desc("The minimum number of basic blocks required for a function to "
32
"run stale profile call graph matching."));
33
34
static cl::opt<unsigned> MinCallCountForCGMatching(
35
"min-call-count-for-cg-matching", cl::Hidden, cl::init(3),
36
cl::desc("The minimum number of call anchors required for a function to "
37
"run stale profile call graph matching."));
38
39
extern cl::opt<bool> SalvageStaleProfile;
40
extern cl::opt<bool> SalvageUnusedProfile;
41
extern cl::opt<bool> PersistProfileStaleness;
42
extern cl::opt<bool> ReportProfileStaleness;
43
44
static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(
45
"salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),
46
cl::desc("The maximum number of callsites in a function, above which stale "
47
"profile matching will be skipped."));
48
49
void SampleProfileMatcher::findIRAnchors(const Function &F,
50
AnchorMap &IRAnchors) const {
51
// For inlined code, recover the original callsite and callee by finding the
52
// top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
53
// top-level frame is "main:1", the callsite is "1" and the callee is "foo".
54
auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
55
assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
56
const DILocation *PrevDIL = nullptr;
57
do {
58
PrevDIL = DIL;
59
DIL = DIL->getInlinedAt();
60
} while (DIL->getInlinedAt());
61
62
LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
63
DIL, FunctionSamples::ProfileIsFS);
64
StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
65
return std::make_pair(Callsite, FunctionId(CalleeName));
66
};
67
68
auto GetCanonicalCalleeName = [](const CallBase *CB) {
69
StringRef CalleeName = UnknownIndirectCallee;
70
if (Function *Callee = CB->getCalledFunction())
71
CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
72
return CalleeName;
73
};
74
75
// Extract profile matching anchors in the IR.
76
for (auto &BB : F) {
77
for (auto &I : BB) {
78
DILocation *DIL = I.getDebugLoc();
79
if (!DIL)
80
continue;
81
82
if (FunctionSamples::ProfileIsProbeBased) {
83
if (auto Probe = extractProbe(I)) {
84
// Flatten inlined IR for the matching.
85
if (DIL->getInlinedAt()) {
86
IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
87
} else {
88
// Use empty StringRef for basic block probe.
89
StringRef CalleeName;
90
if (const auto *CB = dyn_cast<CallBase>(&I)) {
91
// Skip the probe inst whose callee name is "llvm.pseudoprobe".
92
if (!isa<IntrinsicInst>(&I))
93
CalleeName = GetCanonicalCalleeName(CB);
94
}
95
LineLocation Loc = LineLocation(Probe->Id, 0);
96
IRAnchors.emplace(Loc, FunctionId(CalleeName));
97
}
98
}
99
} else {
100
// TODO: For line-number based profile(AutoFDO), currently only support
101
// find callsite anchors. In future, we need to parse all the non-call
102
// instructions to extract the line locations for profile matching.
103
if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
104
continue;
105
106
if (DIL->getInlinedAt()) {
107
IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
108
} else {
109
LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(
110
DIL, FunctionSamples::ProfileIsFS);
111
StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
112
IRAnchors.emplace(Callsite, FunctionId(CalleeName));
113
}
114
}
115
}
116
}
117
}
118
119
void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
120
AnchorMap &ProfileAnchors) const {
121
auto isInvalidLineOffset = [](uint32_t LineOffset) {
122
return LineOffset & 0x8000;
123
};
124
125
auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
126
AnchorMap &ProfileAnchors) {
127
auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
128
if (!Ret.second) {
129
// For multiple callees, which indicates it's an indirect call, we use a
130
// dummy name(UnknownIndirectCallee) as the indrect callee name.
131
Ret.first->second = FunctionId(UnknownIndirectCallee);
132
}
133
};
134
135
for (const auto &I : FS.getBodySamples()) {
136
const LineLocation &Loc = I.first;
137
if (isInvalidLineOffset(Loc.LineOffset))
138
continue;
139
for (const auto &C : I.second.getCallTargets())
140
InsertAnchor(Loc, C.first, ProfileAnchors);
141
}
142
143
for (const auto &I : FS.getCallsiteSamples()) {
144
const LineLocation &Loc = I.first;
145
if (isInvalidLineOffset(Loc.LineOffset))
146
continue;
147
for (const auto &C : I.second)
148
InsertAnchor(Loc, C.first, ProfileAnchors);
149
}
150
}
151
152
bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,
153
Function *&FuncWithoutProfile) {
154
FuncWithoutProfile = nullptr;
155
auto R = FunctionsWithoutProfile.find(IRFuncName);
156
if (R != FunctionsWithoutProfile.end())
157
FuncWithoutProfile = R->second;
158
return !FuncWithoutProfile;
159
}
160
161
bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {
162
return SymbolMap->find(ProfileFuncName) == SymbolMap->end();
163
}
164
165
bool SampleProfileMatcher::functionMatchesProfile(
166
const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,
167
bool FindMatchedProfileOnly) {
168
if (IRFuncName == ProfileFuncName)
169
return true;
170
if (!SalvageUnusedProfile)
171
return false;
172
173
// If IR function doesn't have profile and the profile is unused, try
174
// matching them.
175
Function *IRFunc = nullptr;
176
if (functionHasProfile(IRFuncName, IRFunc) ||
177
!isProfileUnused(ProfileFuncName))
178
return false;
179
180
assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&
181
"IR function should be different from profile function to match");
182
return functionMatchesProfile(*IRFunc, ProfileFuncName,
183
FindMatchedProfileOnly);
184
}
185
186
LocToLocMap
187
SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,
188
const AnchorList &AnchorList2,
189
bool MatchUnusedFunction) {
190
int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
191
MaxDepth = Size1 + Size2;
192
auto Index = [&](int32_t I) { return I + MaxDepth; };
193
194
LocToLocMap EqualLocations;
195
if (MaxDepth == 0)
196
return EqualLocations;
197
198
// Backtrack the SES result.
199
auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
200
const AnchorList &AnchorList1,
201
const AnchorList &AnchorList2,
202
LocToLocMap &EqualLocations) {
203
int32_t X = Size1, Y = Size2;
204
for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
205
const auto &P = Trace[Depth];
206
int32_t K = X - Y;
207
int32_t PrevK = K;
208
if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
209
PrevK = K + 1;
210
else
211
PrevK = K - 1;
212
213
int32_t PrevX = P[Index(PrevK)];
214
int32_t PrevY = PrevX - PrevK;
215
while (X > PrevX && Y > PrevY) {
216
X--;
217
Y--;
218
EqualLocations.insert({AnchorList1[X].first, AnchorList2[Y].first});
219
}
220
221
if (Depth == 0)
222
break;
223
224
if (Y == PrevY)
225
X--;
226
else if (X == PrevX)
227
Y--;
228
X = PrevX;
229
Y = PrevY;
230
}
231
};
232
233
// The greedy LCS/SES algorithm.
234
235
// An array contains the endpoints of the furthest reaching D-paths.
236
std::vector<int32_t> V(2 * MaxDepth + 1, -1);
237
V[Index(1)] = 0;
238
// Trace is used to backtrack the SES result.
239
std::vector<std::vector<int32_t>> Trace;
240
for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
241
Trace.push_back(V);
242
for (int32_t K = -Depth; K <= Depth; K += 2) {
243
int32_t X = 0, Y = 0;
244
if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
245
X = V[Index(K + 1)];
246
else
247
X = V[Index(K - 1)] + 1;
248
Y = X - K;
249
while (X < Size1 && Y < Size2 &&
250
functionMatchesProfile(
251
AnchorList1[X].second, AnchorList2[Y].second,
252
!MatchUnusedFunction /* Find matched function only */))
253
X++, Y++;
254
255
V[Index(K)] = X;
256
257
if (X >= Size1 && Y >= Size2) {
258
// Length of an SES is D.
259
Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations);
260
return EqualLocations;
261
}
262
}
263
}
264
// Length of an SES is greater than MaxDepth.
265
return EqualLocations;
266
}
267
268
void SampleProfileMatcher::matchNonCallsiteLocs(
269
const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
270
LocToLocMap &IRToProfileLocationMap) {
271
auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
272
// Skip the unchanged location mapping to save memory.
273
if (From != To)
274
IRToProfileLocationMap.insert({From, To});
275
};
276
277
// Use function's beginning location as the initial anchor.
278
int32_t LocationDelta = 0;
279
SmallVector<LineLocation> LastMatchedNonAnchors;
280
for (const auto &IR : IRAnchors) {
281
const auto &Loc = IR.first;
282
bool IsMatchedAnchor = false;
283
// Match the anchor location in lexical order.
284
auto R = MatchedAnchors.find(Loc);
285
if (R != MatchedAnchors.end()) {
286
const auto &Candidate = R->second;
287
InsertMatching(Loc, Candidate);
288
LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
289
<< " is matched from " << Loc << " to " << Candidate
290
<< "\n");
291
LocationDelta = Candidate.LineOffset - Loc.LineOffset;
292
293
// Match backwards for non-anchor locations.
294
// The locations in LastMatchedNonAnchors have been matched forwards
295
// based on the previous anchor, spilt it evenly and overwrite the
296
// second half based on the current anchor.
297
for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
298
I < LastMatchedNonAnchors.size(); I++) {
299
const auto &L = LastMatchedNonAnchors[I];
300
uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
301
LineLocation Candidate(CandidateLineOffset, L.Discriminator);
302
InsertMatching(L, Candidate);
303
LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
304
<< " to " << Candidate << "\n");
305
}
306
307
IsMatchedAnchor = true;
308
LastMatchedNonAnchors.clear();
309
}
310
311
// Match forwards for non-anchor locations.
312
if (!IsMatchedAnchor) {
313
uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
314
LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
315
InsertMatching(Loc, Candidate);
316
LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
317
<< Candidate << "\n");
318
LastMatchedNonAnchors.emplace_back(Loc);
319
}
320
}
321
}
322
323
// Filter the non-call locations from IRAnchors and ProfileAnchors and write
324
// them into a list for random access later.
325
void SampleProfileMatcher::getFilteredAnchorList(
326
const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,
327
AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {
328
for (const auto &I : IRAnchors) {
329
if (I.second.stringRef().empty())
330
continue;
331
FilteredIRAnchorsList.emplace_back(I);
332
}
333
334
for (const auto &I : ProfileAnchors)
335
FilteredProfileAnchorList.emplace_back(I);
336
}
337
338
// Call target name anchor based profile fuzzy matching.
339
// Input:
340
// For IR locations, the anchor is the callee name of direct callsite; For
341
// profile locations, it's the call target name for BodySamples or inlinee's
342
// profile name for CallsiteSamples.
343
// Matching heuristic:
344
// First match all the anchors using the diff algorithm, then split the
345
// non-anchor locations between the two anchors evenly, first half are matched
346
// based on the start anchor, second half are matched based on the end anchor.
347
// For example, given:
348
// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
349
// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
350
// The matching gives:
351
// [1, 2(foo), 3, 5, 6(bar), 7]
352
// | | | | | |
353
// [1, 2, 3(foo), 4, 7, 8(bar), 9]
354
// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
355
void SampleProfileMatcher::runStaleProfileMatching(
356
const Function &F, const AnchorMap &IRAnchors,
357
const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,
358
bool RunCFGMatching, bool RunCGMatching) {
359
if (!RunCFGMatching && !RunCGMatching)
360
return;
361
LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
362
<< "\n");
363
assert(IRToProfileLocationMap.empty() &&
364
"Run stale profile matching only once per function");
365
366
AnchorList FilteredProfileAnchorList;
367
AnchorList FilteredIRAnchorsList;
368
getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
369
FilteredProfileAnchorList);
370
371
if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
372
return;
373
374
if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||
375
FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {
376
LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()
377
<< " because the number of callsites in the IR is "
378
<< FilteredIRAnchorsList.size()
379
<< " and in the profile is "
380
<< FilteredProfileAnchorList.size() << "\n");
381
return;
382
}
383
384
// Match the callsite anchors by finding the longest common subsequence
385
// between IR and profile.
386
// Define a match between two anchors as follows:
387
// 1) The function names of anchors are the same.
388
// 2) The similarity between the anchor functions is above a threshold if
389
// RunCGMatching is set.
390
// For 2), we only consider the anchor functions from IR and profile don't
391
// appear on either side to reduce the matching scope. Note that we need to
392
// use IR anchor as base(A side) to align with the order of
393
// IRToProfileLocationMap.
394
LocToLocMap MatchedAnchors =
395
longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
396
RunCGMatching /* Match unused functions */);
397
398
// CFG level matching:
399
// Apply the callsite matchings to infer matching for the basic
400
// block(non-callsite) locations and write the result to
401
// IRToProfileLocationMap.
402
if (RunCFGMatching)
403
matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
404
}
405
406
void SampleProfileMatcher::runOnFunction(Function &F) {
407
// We need to use flattened function samples for matching.
408
// Unlike IR, which includes all callsites from the source code, the callsites
409
// in profile only show up when they are hit by samples, i,e. the profile
410
// callsites in one context may differ from those in another context. To get
411
// the maximum number of callsites, we merge the function profiles from all
412
// contexts, aka, the flattened profile to find profile anchors.
413
const auto *FSFlattened = getFlattenedSamplesFor(F);
414
if (SalvageUnusedProfile && !FSFlattened) {
415
// Apply the matching in place to find the new function's matched profile.
416
// TODO: For extended profile format, if a function profile is unused and
417
// it's top-level, even if the profile is matched, it's not found in the
418
// profile. This is because sample reader only read the used profile at the
419
// beginning, we need to support loading the profile on-demand in future.
420
auto R = FuncToProfileNameMap.find(&F);
421
if (R != FuncToProfileNameMap.end())
422
FSFlattened = getFlattenedSamplesFor(R->second);
423
}
424
if (!FSFlattened)
425
return;
426
427
// Anchors for IR. It's a map from IR location to callee name, callee name is
428
// empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
429
// for unknown indrect callee name.
430
AnchorMap IRAnchors;
431
findIRAnchors(F, IRAnchors);
432
// Anchors for profile. It's a map from callsite location to a set of callee
433
// name.
434
AnchorMap ProfileAnchors;
435
findProfileAnchors(*FSFlattened, ProfileAnchors);
436
437
// Compute the callsite match states for profile staleness report.
438
if (ReportProfileStaleness || PersistProfileStaleness)
439
recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
440
441
if (!SalvageStaleProfile)
442
return;
443
// For probe-based profiles, run matching only when profile checksum is
444
// mismatched.
445
bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
446
!ProbeManager->profileIsValid(F, *FSFlattened);
447
bool RunCFGMatching =
448
!FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
449
bool RunCGMatching = SalvageUnusedProfile;
450
// For imported functions, the checksum metadata(pseudo_probe_desc) are
451
// dropped, so we leverage function attribute(profile-checksum-mismatch) to
452
// transfer the info: add the attribute during pre-link phase and check it
453
// during post-link phase(see "profileIsValid").
454
if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
455
F.addFnAttr("profile-checksum-mismatch");
456
457
// The matching result will be saved to IRToProfileLocationMap, create a
458
// new map for each function.
459
auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
460
runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,
461
RunCFGMatching, RunCGMatching);
462
// Find and update callsite match states after matching.
463
if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))
464
recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
465
&IRToProfileLocationMap);
466
}
467
468
void SampleProfileMatcher::recordCallsiteMatchStates(
469
const Function &F, const AnchorMap &IRAnchors,
470
const AnchorMap &ProfileAnchors,
471
const LocToLocMap *IRToProfileLocationMap) {
472
bool IsPostMatch = IRToProfileLocationMap != nullptr;
473
auto &CallsiteMatchStates =
474
FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
475
476
auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
477
// IRToProfileLocationMap is null in pre-match phrase.
478
if (!IRToProfileLocationMap)
479
return IRLoc;
480
const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);
481
if (ProfileLoc != IRToProfileLocationMap->end())
482
return ProfileLoc->second;
483
else
484
return IRLoc;
485
};
486
487
for (const auto &I : IRAnchors) {
488
// After fuzzy profile matching, use the matching result to remap the
489
// current IR callsite.
490
const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
491
const auto &IRCalleeId = I.second;
492
const auto &It = ProfileAnchors.find(ProfileLoc);
493
if (It == ProfileAnchors.end())
494
continue;
495
const auto &ProfCalleeId = It->second;
496
if (IRCalleeId == ProfCalleeId) {
497
auto It = CallsiteMatchStates.find(ProfileLoc);
498
if (It == CallsiteMatchStates.end())
499
CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
500
else if (IsPostMatch) {
501
if (It->second == MatchState::InitialMatch)
502
It->second = MatchState::UnchangedMatch;
503
else if (It->second == MatchState::InitialMismatch)
504
It->second = MatchState::RecoveredMismatch;
505
}
506
}
507
}
508
509
// Check if there are any callsites in the profile that does not match to any
510
// IR callsites.
511
for (const auto &I : ProfileAnchors) {
512
const auto &Loc = I.first;
513
assert(!I.second.stringRef().empty() && "Callees should not be empty");
514
auto It = CallsiteMatchStates.find(Loc);
515
if (It == CallsiteMatchStates.end())
516
CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);
517
else if (IsPostMatch) {
518
// Update the state if it's not matched(UnchangedMatch or
519
// RecoveredMismatch).
520
if (It->second == MatchState::InitialMismatch)
521
It->second = MatchState::UnchangedMismatch;
522
else if (It->second == MatchState::InitialMatch)
523
It->second = MatchState::RemovedMatch;
524
}
525
}
526
}
527
528
void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,
529
bool IsTopLevel) {
530
const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
531
// Skip the function that is external or renamed.
532
if (!FuncDesc)
533
return;
534
535
if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
536
if (IsTopLevel)
537
NumStaleProfileFunc++;
538
// Given currently all probe ids are after block probe ids, once the
539
// checksum is mismatched, it's likely all the callites are mismatched and
540
// dropped. We conservatively count all the samples as mismatched and stop
541
// counting the inlinees' profiles.
542
MismatchedFunctionSamples += FS.getTotalSamples();
543
return;
544
}
545
546
// Even the current-level function checksum is matched, it's possible that the
547
// nested inlinees' checksums are mismatched that affect the inlinee's sample
548
// loading, we need to go deeper to check the inlinees' function samples.
549
// Similarly, count all the samples as mismatched if the inlinee's checksum is
550
// mismatched using this recursive function.
551
for (const auto &I : FS.getCallsiteSamples())
552
for (const auto &CS : I.second)
553
countMismatchedFuncSamples(CS.second, false);
554
}
555
556
void SampleProfileMatcher::countMismatchedCallsiteSamples(
557
const FunctionSamples &FS) {
558
auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
559
// Skip it if no mismatched callsite or this is an external function.
560
if (It == FuncCallsiteMatchStates.end() || It->second.empty())
561
return;
562
const auto &CallsiteMatchStates = It->second;
563
564
auto findMatchState = [&](const LineLocation &Loc) {
565
auto It = CallsiteMatchStates.find(Loc);
566
if (It == CallsiteMatchStates.end())
567
return MatchState::Unknown;
568
return It->second;
569
};
570
571
auto AttributeMismatchedSamples = [&](const enum MatchState &State,
572
uint64_t Samples) {
573
if (isMismatchState(State))
574
MismatchedCallsiteSamples += Samples;
575
else if (State == MatchState::RecoveredMismatch)
576
RecoveredCallsiteSamples += Samples;
577
};
578
579
// The non-inlined callsites are saved in the body samples of function
580
// profile, go through it to count the non-inlined callsite samples.
581
for (const auto &I : FS.getBodySamples())
582
AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());
583
584
// Count the inlined callsite samples.
585
for (const auto &I : FS.getCallsiteSamples()) {
586
auto State = findMatchState(I.first);
587
uint64_t CallsiteSamples = 0;
588
for (const auto &CS : I.second)
589
CallsiteSamples += CS.second.getTotalSamples();
590
AttributeMismatchedSamples(State, CallsiteSamples);
591
592
if (isMismatchState(State))
593
continue;
594
595
// When the current level of inlined call site matches the profiled call
596
// site, we need to go deeper along the inline tree to count mismatches from
597
// lower level inlinees.
598
for (const auto &CS : I.second)
599
countMismatchedCallsiteSamples(CS.second);
600
}
601
}
602
603
void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {
604
auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
605
// Skip it if no mismatched callsite or this is an external function.
606
if (It == FuncCallsiteMatchStates.end() || It->second.empty())
607
return;
608
const auto &MatchStates = It->second;
609
[[maybe_unused]] bool OnInitialState =
610
isInitialState(MatchStates.begin()->second);
611
for (const auto &I : MatchStates) {
612
TotalProfiledCallsites++;
613
assert(
614
(OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&
615
"Profile matching state is inconsistent");
616
617
if (isMismatchState(I.second))
618
NumMismatchedCallsites++;
619
else if (I.second == MatchState::RecoveredMismatch)
620
NumRecoveredCallsites++;
621
}
622
}
623
624
void SampleProfileMatcher::countCallGraphRecoveredSamples(
625
const FunctionSamples &FS,
626
std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {
627
if (CallGraphRecoveredProfiles.count(FS.getFunction())) {
628
NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();
629
return;
630
}
631
632
for (const auto &CM : FS.getCallsiteSamples()) {
633
for (const auto &CS : CM.second) {
634
countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);
635
}
636
}
637
}
638
639
void SampleProfileMatcher::computeAndReportProfileStaleness() {
640
if (!ReportProfileStaleness && !PersistProfileStaleness)
641
return;
642
643
std::unordered_set<FunctionId> CallGraphRecoveredProfiles;
644
if (SalvageUnusedProfile) {
645
for (const auto &I : FuncToProfileNameMap) {
646
CallGraphRecoveredProfiles.insert(I.second);
647
if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))
648
continue;
649
NumCallGraphRecoveredProfiledFunc++;
650
}
651
}
652
653
// Count profile mismatches for profile staleness report.
654
for (const auto &F : M) {
655
if (skipProfileForFunction(F))
656
continue;
657
// As the stats will be merged by linker, skip reporting the metrics for
658
// imported functions to avoid repeated counting.
659
if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
660
continue;
661
const auto *FS = Reader.getSamplesFor(F);
662
if (!FS)
663
continue;
664
TotalProfiledFunc++;
665
TotalFunctionSamples += FS->getTotalSamples();
666
667
if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())
668
countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);
669
670
// Checksum mismatch is only used in pseudo-probe mode.
671
if (FunctionSamples::ProfileIsProbeBased)
672
countMismatchedFuncSamples(*FS, true);
673
674
// Count mismatches and samples for calliste.
675
countMismatchCallsites(*FS);
676
countMismatchedCallsiteSamples(*FS);
677
}
678
679
if (ReportProfileStaleness) {
680
if (FunctionSamples::ProfileIsProbeBased) {
681
errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc
682
<< ") of functions' profile are invalid and ("
683
<< MismatchedFunctionSamples << "/" << TotalFunctionSamples
684
<< ") of samples are discarded due to function hash mismatch.\n";
685
}
686
if (SalvageUnusedProfile) {
687
errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"
688
<< TotalProfiledFunc << ") of functions' profile are matched and ("
689
<< NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples
690
<< ") of samples are reused by call graph matching.\n";
691
}
692
693
errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"
694
<< TotalProfiledCallsites
695
<< ") of callsites' profile are invalid and ("
696
<< (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"
697
<< TotalFunctionSamples
698
<< ") of samples are discarded due to callsite location mismatch.\n";
699
errs() << "(" << NumRecoveredCallsites << "/"
700
<< (NumRecoveredCallsites + NumMismatchedCallsites)
701
<< ") of callsites and (" << RecoveredCallsiteSamples << "/"
702
<< (RecoveredCallsiteSamples + MismatchedCallsiteSamples)
703
<< ") of samples are recovered by stale profile matching.\n";
704
}
705
706
if (PersistProfileStaleness) {
707
LLVMContext &Ctx = M.getContext();
708
MDBuilder MDB(Ctx);
709
710
SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
711
if (FunctionSamples::ProfileIsProbeBased) {
712
ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);
713
ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
714
ProfStatsVec.emplace_back("MismatchedFunctionSamples",
715
MismatchedFunctionSamples);
716
ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);
717
}
718
719
if (SalvageUnusedProfile) {
720
ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",
721
NumCallGraphRecoveredProfiledFunc);
722
ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",
723
NumCallGraphRecoveredFuncSamples);
724
}
725
726
ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
727
ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);
728
ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
729
ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
730
MismatchedCallsiteSamples);
731
ProfStatsVec.emplace_back("RecoveredCallsiteSamples",
732
RecoveredCallsiteSamples);
733
734
auto *MD = MDB.createLLVMStats(ProfStatsVec);
735
auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
736
NMD->addOperand(MD);
737
}
738
}
739
740
void SampleProfileMatcher::findFunctionsWithoutProfile() {
741
// TODO: Support MD5 profile.
742
if (FunctionSamples::UseMD5)
743
return;
744
StringSet<> NamesInProfile;
745
if (auto NameTable = Reader.getNameTable()) {
746
for (auto Name : *NameTable)
747
NamesInProfile.insert(Name.stringRef());
748
}
749
750
for (auto &F : M) {
751
// Skip declarations, as even if the function can be matched, we have
752
// nothing to do with it.
753
if (F.isDeclaration())
754
continue;
755
756
StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());
757
const auto *FS = getFlattenedSamplesFor(F);
758
if (FS)
759
continue;
760
761
// For extended binary, functions fully inlined may not be loaded in the
762
// top-level profile, so check the NameTable which has the all symbol names
763
// in profile.
764
if (NamesInProfile.count(CanonFName))
765
continue;
766
767
// For extended binary, non-profiled function symbols are in the profile
768
// symbol list table.
769
if (PSL && PSL->contains(CanonFName))
770
continue;
771
772
LLVM_DEBUG(dbgs() << "Function " << CanonFName
773
<< " is not in profile or profile symbol list.\n");
774
FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;
775
}
776
}
777
778
bool SampleProfileMatcher::functionMatchesProfileHelper(
779
const Function &IRFunc, const FunctionId &ProfFunc) {
780
// The value is in the range [0, 1]. The bigger the value is, the more similar
781
// two sequences are.
782
float Similarity = 0.0;
783
784
const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
785
if (!FSFlattened)
786
return false;
787
// The check for similarity or checksum may not be reliable if the function is
788
// tiny, we use the number of basic block as a proxy for the function
789
// complexity and skip the matching if it's too small.
790
if (IRFunc.size() < MinFuncCountForCGMatching ||
791
FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching)
792
return false;
793
794
// For probe-based function, we first trust the checksum info. If the checksum
795
// doesn't match, we continue checking for similarity.
796
if (FunctionSamples::ProfileIsProbeBased) {
797
const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
798
if (FuncDesc &&
799
!ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
800
LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
801
<< "(IR) and " << ProfFunc << "(Profile) match.\n");
802
803
return true;
804
}
805
}
806
807
AnchorMap IRAnchors;
808
findIRAnchors(IRFunc, IRAnchors);
809
AnchorMap ProfileAnchors;
810
findProfileAnchors(*FSFlattened, ProfileAnchors);
811
812
AnchorList FilteredIRAnchorsList;
813
AnchorList FilteredProfileAnchorList;
814
getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,
815
FilteredProfileAnchorList);
816
817
// Similarly skip the matching if the num of anchors is not enough.
818
if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||
819
FilteredProfileAnchorList.size() < MinCallCountForCGMatching)
820
return false;
821
822
// Use the diff algorithm to find the LCS between IR and profile.
823
824
// Don't recursively match the callee function to avoid infinite matching,
825
// callee functions will be handled later since it's processed in top-down
826
// order .
827
LocToLocMap MatchedAnchors =
828
longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,
829
false /* Match unused functions */);
830
831
Similarity =
832
static_cast<float>(MatchedAnchors.size()) * 2 /
833
(FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());
834
835
LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()
836
<< "(IR) and " << ProfFunc << "(profile) is "
837
<< format("%.2f", Similarity) << "\n");
838
assert((Similarity >= 0 && Similarity <= 1.0) &&
839
"Similarity value should be in [0, 1]");
840
return Similarity * 100 > FuncProfileSimilarityThreshold;
841
}
842
843
// If FindMatchedProfileOnly is set to true, only use the processed function
844
// results. This is used for skipping the repeated recursive matching.
845
bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
846
const FunctionId &ProfFunc,
847
bool FindMatchedProfileOnly) {
848
auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});
849
if (R != FuncProfileMatchCache.end())
850
return R->second;
851
852
if (FindMatchedProfileOnly)
853
return false;
854
855
bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);
856
FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;
857
if (Matched) {
858
FuncToProfileNameMap[&IRFunc] = ProfFunc;
859
LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()
860
<< " matches profile:" << ProfFunc << "\n");
861
}
862
863
return Matched;
864
}
865
866
void SampleProfileMatcher::runOnModule() {
867
ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
868
FunctionSamples::ProfileIsCS);
869
if (SalvageUnusedProfile)
870
findFunctionsWithoutProfile();
871
872
// Process the matching in top-down order so that the caller matching result
873
// can be used to the callee matching.
874
std::vector<Function *> TopDownFunctionList;
875
TopDownFunctionList.reserve(M.size());
876
buildTopDownFuncOrder(CG, TopDownFunctionList);
877
for (auto *F : TopDownFunctionList) {
878
if (skipProfileForFunction(*F))
879
continue;
880
runOnFunction(*F);
881
}
882
883
// Update the data in SampleLoader.
884
if (SalvageUnusedProfile)
885
for (auto &I : FuncToProfileNameMap) {
886
assert(I.first && "New function is null");
887
FunctionId FuncName(I.first->getName());
888
FuncNameToProfNameMap->emplace(FuncName, I.second);
889
// We need to remove the old entry to avoid duplicating the function
890
// processing.
891
SymbolMap->erase(FuncName);
892
SymbolMap->emplace(I.second, I.first);
893
}
894
895
if (SalvageStaleProfile)
896
distributeIRToProfileLocationMap();
897
898
computeAndReportProfileStaleness();
899
}
900
901
void SampleProfileMatcher::distributeIRToProfileLocationMap(
902
FunctionSamples &FS) {
903
const auto ProfileMappings = FuncMappings.find(FS.getFuncName());
904
if (ProfileMappings != FuncMappings.end()) {
905
FS.setIRToProfileLocationMap(&(ProfileMappings->second));
906
}
907
908
for (auto &Callees :
909
const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
910
for (auto &FS : Callees.second) {
911
distributeIRToProfileLocationMap(FS.second);
912
}
913
}
914
}
915
916
// Use a central place to distribute the matching results. Outlined and inlined
917
// profile with the function name will be set to the same pointer.
918
void SampleProfileMatcher::distributeIRToProfileLocationMap() {
919
for (auto &I : Reader.getProfiles()) {
920
distributeIRToProfileLocationMap(I.second);
921
}
922
}
923
924