Path: blob/main/contrib/llvm-project/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
35266 views
//===- SampleProfileMatcher.cpp - Sampling-based Stale Profile Matcher ----===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements the SampleProfileMatcher used for stale9// profile matching.10//11//===----------------------------------------------------------------------===//1213#include "llvm/Transforms/IPO/SampleProfileMatcher.h"14#include "llvm/IR/IntrinsicInst.h"15#include "llvm/IR/MDBuilder.h"16#include "llvm/Support/CommandLine.h"1718using namespace llvm;19using namespace sampleprof;2021#define DEBUG_TYPE "sample-profile-matcher"2223static cl::opt<unsigned> FuncProfileSimilarityThreshold(24"func-profile-similarity-threshold", cl::Hidden, cl::init(80),25cl::desc("Consider a profile matches a function if the similarity of their "26"callee sequences is above the specified percentile."));2728static cl::opt<unsigned> MinFuncCountForCGMatching(29"min-func-count-for-cg-matching", cl::Hidden, cl::init(5),30cl::desc("The minimum number of basic blocks required for a function to "31"run stale profile call graph matching."));3233static cl::opt<unsigned> MinCallCountForCGMatching(34"min-call-count-for-cg-matching", cl::Hidden, cl::init(3),35cl::desc("The minimum number of call anchors required for a function to "36"run stale profile call graph matching."));3738extern cl::opt<bool> SalvageStaleProfile;39extern cl::opt<bool> SalvageUnusedProfile;40extern cl::opt<bool> PersistProfileStaleness;41extern cl::opt<bool> ReportProfileStaleness;4243static cl::opt<unsigned> SalvageStaleProfileMaxCallsites(44"salvage-stale-profile-max-callsites", cl::Hidden, cl::init(UINT_MAX),45cl::desc("The maximum number of callsites in a function, above which stale "46"profile matching will be skipped."));4748void SampleProfileMatcher::findIRAnchors(const Function &F,49AnchorMap &IRAnchors) const {50// For inlined code, recover the original callsite and callee by finding the51// top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the52// top-level frame is "main:1", the callsite is "1" and the callee is "foo".53auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {54assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");55const DILocation *PrevDIL = nullptr;56do {57PrevDIL = DIL;58DIL = DIL->getInlinedAt();59} while (DIL->getInlinedAt());6061LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(62DIL, FunctionSamples::ProfileIsFS);63StringRef CalleeName = PrevDIL->getSubprogramLinkageName();64return std::make_pair(Callsite, FunctionId(CalleeName));65};6667auto GetCanonicalCalleeName = [](const CallBase *CB) {68StringRef CalleeName = UnknownIndirectCallee;69if (Function *Callee = CB->getCalledFunction())70CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());71return CalleeName;72};7374// Extract profile matching anchors in the IR.75for (auto &BB : F) {76for (auto &I : BB) {77DILocation *DIL = I.getDebugLoc();78if (!DIL)79continue;8081if (FunctionSamples::ProfileIsProbeBased) {82if (auto Probe = extractProbe(I)) {83// Flatten inlined IR for the matching.84if (DIL->getInlinedAt()) {85IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));86} else {87// Use empty StringRef for basic block probe.88StringRef CalleeName;89if (const auto *CB = dyn_cast<CallBase>(&I)) {90// Skip the probe inst whose callee name is "llvm.pseudoprobe".91if (!isa<IntrinsicInst>(&I))92CalleeName = GetCanonicalCalleeName(CB);93}94LineLocation Loc = LineLocation(Probe->Id, 0);95IRAnchors.emplace(Loc, FunctionId(CalleeName));96}97}98} else {99// TODO: For line-number based profile(AutoFDO), currently only support100// find callsite anchors. In future, we need to parse all the non-call101// instructions to extract the line locations for profile matching.102if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))103continue;104105if (DIL->getInlinedAt()) {106IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));107} else {108LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(109DIL, FunctionSamples::ProfileIsFS);110StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));111IRAnchors.emplace(Callsite, FunctionId(CalleeName));112}113}114}115}116}117118void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,119AnchorMap &ProfileAnchors) const {120auto isInvalidLineOffset = [](uint32_t LineOffset) {121return LineOffset & 0x8000;122};123124auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,125AnchorMap &ProfileAnchors) {126auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);127if (!Ret.second) {128// For multiple callees, which indicates it's an indirect call, we use a129// dummy name(UnknownIndirectCallee) as the indrect callee name.130Ret.first->second = FunctionId(UnknownIndirectCallee);131}132};133134for (const auto &I : FS.getBodySamples()) {135const LineLocation &Loc = I.first;136if (isInvalidLineOffset(Loc.LineOffset))137continue;138for (const auto &C : I.second.getCallTargets())139InsertAnchor(Loc, C.first, ProfileAnchors);140}141142for (const auto &I : FS.getCallsiteSamples()) {143const LineLocation &Loc = I.first;144if (isInvalidLineOffset(Loc.LineOffset))145continue;146for (const auto &C : I.second)147InsertAnchor(Loc, C.first, ProfileAnchors);148}149}150151bool SampleProfileMatcher::functionHasProfile(const FunctionId &IRFuncName,152Function *&FuncWithoutProfile) {153FuncWithoutProfile = nullptr;154auto R = FunctionsWithoutProfile.find(IRFuncName);155if (R != FunctionsWithoutProfile.end())156FuncWithoutProfile = R->second;157return !FuncWithoutProfile;158}159160bool SampleProfileMatcher::isProfileUnused(const FunctionId &ProfileFuncName) {161return SymbolMap->find(ProfileFuncName) == SymbolMap->end();162}163164bool SampleProfileMatcher::functionMatchesProfile(165const FunctionId &IRFuncName, const FunctionId &ProfileFuncName,166bool FindMatchedProfileOnly) {167if (IRFuncName == ProfileFuncName)168return true;169if (!SalvageUnusedProfile)170return false;171172// If IR function doesn't have profile and the profile is unused, try173// matching them.174Function *IRFunc = nullptr;175if (functionHasProfile(IRFuncName, IRFunc) ||176!isProfileUnused(ProfileFuncName))177return false;178179assert(FunctionId(IRFunc->getName()) != ProfileFuncName &&180"IR function should be different from profile function to match");181return functionMatchesProfile(*IRFunc, ProfileFuncName,182FindMatchedProfileOnly);183}184185LocToLocMap186SampleProfileMatcher::longestCommonSequence(const AnchorList &AnchorList1,187const AnchorList &AnchorList2,188bool MatchUnusedFunction) {189int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),190MaxDepth = Size1 + Size2;191auto Index = [&](int32_t I) { return I + MaxDepth; };192193LocToLocMap EqualLocations;194if (MaxDepth == 0)195return EqualLocations;196197// Backtrack the SES result.198auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,199const AnchorList &AnchorList1,200const AnchorList &AnchorList2,201LocToLocMap &EqualLocations) {202int32_t X = Size1, Y = Size2;203for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {204const auto &P = Trace[Depth];205int32_t K = X - Y;206int32_t PrevK = K;207if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))208PrevK = K + 1;209else210PrevK = K - 1;211212int32_t PrevX = P[Index(PrevK)];213int32_t PrevY = PrevX - PrevK;214while (X > PrevX && Y > PrevY) {215X--;216Y--;217EqualLocations.insert({AnchorList1[X].first, AnchorList2[Y].first});218}219220if (Depth == 0)221break;222223if (Y == PrevY)224X--;225else if (X == PrevX)226Y--;227X = PrevX;228Y = PrevY;229}230};231232// The greedy LCS/SES algorithm.233234// An array contains the endpoints of the furthest reaching D-paths.235std::vector<int32_t> V(2 * MaxDepth + 1, -1);236V[Index(1)] = 0;237// Trace is used to backtrack the SES result.238std::vector<std::vector<int32_t>> Trace;239for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {240Trace.push_back(V);241for (int32_t K = -Depth; K <= Depth; K += 2) {242int32_t X = 0, Y = 0;243if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))244X = V[Index(K + 1)];245else246X = V[Index(K - 1)] + 1;247Y = X - K;248while (X < Size1 && Y < Size2 &&249functionMatchesProfile(250AnchorList1[X].second, AnchorList2[Y].second,251!MatchUnusedFunction /* Find matched function only */))252X++, Y++;253254V[Index(K)] = X;255256if (X >= Size1 && Y >= Size2) {257// Length of an SES is D.258Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations);259return EqualLocations;260}261}262}263// Length of an SES is greater than MaxDepth.264return EqualLocations;265}266267void SampleProfileMatcher::matchNonCallsiteLocs(268const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,269LocToLocMap &IRToProfileLocationMap) {270auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {271// Skip the unchanged location mapping to save memory.272if (From != To)273IRToProfileLocationMap.insert({From, To});274};275276// Use function's beginning location as the initial anchor.277int32_t LocationDelta = 0;278SmallVector<LineLocation> LastMatchedNonAnchors;279for (const auto &IR : IRAnchors) {280const auto &Loc = IR.first;281bool IsMatchedAnchor = false;282// Match the anchor location in lexical order.283auto R = MatchedAnchors.find(Loc);284if (R != MatchedAnchors.end()) {285const auto &Candidate = R->second;286InsertMatching(Loc, Candidate);287LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()288<< " is matched from " << Loc << " to " << Candidate289<< "\n");290LocationDelta = Candidate.LineOffset - Loc.LineOffset;291292// Match backwards for non-anchor locations.293// The locations in LastMatchedNonAnchors have been matched forwards294// based on the previous anchor, spilt it evenly and overwrite the295// second half based on the current anchor.296for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;297I < LastMatchedNonAnchors.size(); I++) {298const auto &L = LastMatchedNonAnchors[I];299uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;300LineLocation Candidate(CandidateLineOffset, L.Discriminator);301InsertMatching(L, Candidate);302LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L303<< " to " << Candidate << "\n");304}305306IsMatchedAnchor = true;307LastMatchedNonAnchors.clear();308}309310// Match forwards for non-anchor locations.311if (!IsMatchedAnchor) {312uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;313LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);314InsertMatching(Loc, Candidate);315LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "316<< Candidate << "\n");317LastMatchedNonAnchors.emplace_back(Loc);318}319}320}321322// Filter the non-call locations from IRAnchors and ProfileAnchors and write323// them into a list for random access later.324void SampleProfileMatcher::getFilteredAnchorList(325const AnchorMap &IRAnchors, const AnchorMap &ProfileAnchors,326AnchorList &FilteredIRAnchorsList, AnchorList &FilteredProfileAnchorList) {327for (const auto &I : IRAnchors) {328if (I.second.stringRef().empty())329continue;330FilteredIRAnchorsList.emplace_back(I);331}332333for (const auto &I : ProfileAnchors)334FilteredProfileAnchorList.emplace_back(I);335}336337// Call target name anchor based profile fuzzy matching.338// Input:339// For IR locations, the anchor is the callee name of direct callsite; For340// profile locations, it's the call target name for BodySamples or inlinee's341// profile name for CallsiteSamples.342// Matching heuristic:343// First match all the anchors using the diff algorithm, then split the344// non-anchor locations between the two anchors evenly, first half are matched345// based on the start anchor, second half are matched based on the end anchor.346// For example, given:347// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]348// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]349// The matching gives:350// [1, 2(foo), 3, 5, 6(bar), 7]351// | | | | | |352// [1, 2, 3(foo), 4, 7, 8(bar), 9]353// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].354void SampleProfileMatcher::runStaleProfileMatching(355const Function &F, const AnchorMap &IRAnchors,356const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap,357bool RunCFGMatching, bool RunCGMatching) {358if (!RunCFGMatching && !RunCGMatching)359return;360LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()361<< "\n");362assert(IRToProfileLocationMap.empty() &&363"Run stale profile matching only once per function");364365AnchorList FilteredProfileAnchorList;366AnchorList FilteredIRAnchorsList;367getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,368FilteredProfileAnchorList);369370if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())371return;372373if (FilteredIRAnchorsList.size() > SalvageStaleProfileMaxCallsites ||374FilteredProfileAnchorList.size() > SalvageStaleProfileMaxCallsites) {375LLVM_DEBUG(dbgs() << "Skip stale profile matching for " << F.getName()376<< " because the number of callsites in the IR is "377<< FilteredIRAnchorsList.size()378<< " and in the profile is "379<< FilteredProfileAnchorList.size() << "\n");380return;381}382383// Match the callsite anchors by finding the longest common subsequence384// between IR and profile.385// Define a match between two anchors as follows:386// 1) The function names of anchors are the same.387// 2) The similarity between the anchor functions is above a threshold if388// RunCGMatching is set.389// For 2), we only consider the anchor functions from IR and profile don't390// appear on either side to reduce the matching scope. Note that we need to391// use IR anchor as base(A side) to align with the order of392// IRToProfileLocationMap.393LocToLocMap MatchedAnchors =394longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,395RunCGMatching /* Match unused functions */);396397// CFG level matching:398// Apply the callsite matchings to infer matching for the basic399// block(non-callsite) locations and write the result to400// IRToProfileLocationMap.401if (RunCFGMatching)402matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);403}404405void SampleProfileMatcher::runOnFunction(Function &F) {406// We need to use flattened function samples for matching.407// Unlike IR, which includes all callsites from the source code, the callsites408// in profile only show up when they are hit by samples, i,e. the profile409// callsites in one context may differ from those in another context. To get410// the maximum number of callsites, we merge the function profiles from all411// contexts, aka, the flattened profile to find profile anchors.412const auto *FSFlattened = getFlattenedSamplesFor(F);413if (SalvageUnusedProfile && !FSFlattened) {414// Apply the matching in place to find the new function's matched profile.415// TODO: For extended profile format, if a function profile is unused and416// it's top-level, even if the profile is matched, it's not found in the417// profile. This is because sample reader only read the used profile at the418// beginning, we need to support loading the profile on-demand in future.419auto R = FuncToProfileNameMap.find(&F);420if (R != FuncToProfileNameMap.end())421FSFlattened = getFlattenedSamplesFor(R->second);422}423if (!FSFlattened)424return;425426// Anchors for IR. It's a map from IR location to callee name, callee name is427// empty for non-call instruction and use a dummy name(UnknownIndirectCallee)428// for unknown indrect callee name.429AnchorMap IRAnchors;430findIRAnchors(F, IRAnchors);431// Anchors for profile. It's a map from callsite location to a set of callee432// name.433AnchorMap ProfileAnchors;434findProfileAnchors(*FSFlattened, ProfileAnchors);435436// Compute the callsite match states for profile staleness report.437if (ReportProfileStaleness || PersistProfileStaleness)438recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);439440if (!SalvageStaleProfile)441return;442// For probe-based profiles, run matching only when profile checksum is443// mismatched.444bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&445!ProbeManager->profileIsValid(F, *FSFlattened);446bool RunCFGMatching =447!FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;448bool RunCGMatching = SalvageUnusedProfile;449// For imported functions, the checksum metadata(pseudo_probe_desc) are450// dropped, so we leverage function attribute(profile-checksum-mismatch) to451// transfer the info: add the attribute during pre-link phase and check it452// during post-link phase(see "profileIsValid").453if (ChecksumMismatch && LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)454F.addFnAttr("profile-checksum-mismatch");455456// The matching result will be saved to IRToProfileLocationMap, create a457// new map for each function.458auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);459runStaleProfileMatching(F, IRAnchors, ProfileAnchors, IRToProfileLocationMap,460RunCFGMatching, RunCGMatching);461// Find and update callsite match states after matching.462if (RunCFGMatching && (ReportProfileStaleness || PersistProfileStaleness))463recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors,464&IRToProfileLocationMap);465}466467void SampleProfileMatcher::recordCallsiteMatchStates(468const Function &F, const AnchorMap &IRAnchors,469const AnchorMap &ProfileAnchors,470const LocToLocMap *IRToProfileLocationMap) {471bool IsPostMatch = IRToProfileLocationMap != nullptr;472auto &CallsiteMatchStates =473FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];474475auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {476// IRToProfileLocationMap is null in pre-match phrase.477if (!IRToProfileLocationMap)478return IRLoc;479const auto &ProfileLoc = IRToProfileLocationMap->find(IRLoc);480if (ProfileLoc != IRToProfileLocationMap->end())481return ProfileLoc->second;482else483return IRLoc;484};485486for (const auto &I : IRAnchors) {487// After fuzzy profile matching, use the matching result to remap the488// current IR callsite.489const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);490const auto &IRCalleeId = I.second;491const auto &It = ProfileAnchors.find(ProfileLoc);492if (It == ProfileAnchors.end())493continue;494const auto &ProfCalleeId = It->second;495if (IRCalleeId == ProfCalleeId) {496auto It = CallsiteMatchStates.find(ProfileLoc);497if (It == CallsiteMatchStates.end())498CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);499else if (IsPostMatch) {500if (It->second == MatchState::InitialMatch)501It->second = MatchState::UnchangedMatch;502else if (It->second == MatchState::InitialMismatch)503It->second = MatchState::RecoveredMismatch;504}505}506}507508// Check if there are any callsites in the profile that does not match to any509// IR callsites.510for (const auto &I : ProfileAnchors) {511const auto &Loc = I.first;512assert(!I.second.stringRef().empty() && "Callees should not be empty");513auto It = CallsiteMatchStates.find(Loc);514if (It == CallsiteMatchStates.end())515CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);516else if (IsPostMatch) {517// Update the state if it's not matched(UnchangedMatch or518// RecoveredMismatch).519if (It->second == MatchState::InitialMismatch)520It->second = MatchState::UnchangedMismatch;521else if (It->second == MatchState::InitialMatch)522It->second = MatchState::RemovedMatch;523}524}525}526527void SampleProfileMatcher::countMismatchedFuncSamples(const FunctionSamples &FS,528bool IsTopLevel) {529const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());530// Skip the function that is external or renamed.531if (!FuncDesc)532return;533534if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {535if (IsTopLevel)536NumStaleProfileFunc++;537// Given currently all probe ids are after block probe ids, once the538// checksum is mismatched, it's likely all the callites are mismatched and539// dropped. We conservatively count all the samples as mismatched and stop540// counting the inlinees' profiles.541MismatchedFunctionSamples += FS.getTotalSamples();542return;543}544545// Even the current-level function checksum is matched, it's possible that the546// nested inlinees' checksums are mismatched that affect the inlinee's sample547// loading, we need to go deeper to check the inlinees' function samples.548// Similarly, count all the samples as mismatched if the inlinee's checksum is549// mismatched using this recursive function.550for (const auto &I : FS.getCallsiteSamples())551for (const auto &CS : I.second)552countMismatchedFuncSamples(CS.second, false);553}554555void SampleProfileMatcher::countMismatchedCallsiteSamples(556const FunctionSamples &FS) {557auto It = FuncCallsiteMatchStates.find(FS.getFuncName());558// Skip it if no mismatched callsite or this is an external function.559if (It == FuncCallsiteMatchStates.end() || It->second.empty())560return;561const auto &CallsiteMatchStates = It->second;562563auto findMatchState = [&](const LineLocation &Loc) {564auto It = CallsiteMatchStates.find(Loc);565if (It == CallsiteMatchStates.end())566return MatchState::Unknown;567return It->second;568};569570auto AttributeMismatchedSamples = [&](const enum MatchState &State,571uint64_t Samples) {572if (isMismatchState(State))573MismatchedCallsiteSamples += Samples;574else if (State == MatchState::RecoveredMismatch)575RecoveredCallsiteSamples += Samples;576};577578// The non-inlined callsites are saved in the body samples of function579// profile, go through it to count the non-inlined callsite samples.580for (const auto &I : FS.getBodySamples())581AttributeMismatchedSamples(findMatchState(I.first), I.second.getSamples());582583// Count the inlined callsite samples.584for (const auto &I : FS.getCallsiteSamples()) {585auto State = findMatchState(I.first);586uint64_t CallsiteSamples = 0;587for (const auto &CS : I.second)588CallsiteSamples += CS.second.getTotalSamples();589AttributeMismatchedSamples(State, CallsiteSamples);590591if (isMismatchState(State))592continue;593594// When the current level of inlined call site matches the profiled call595// site, we need to go deeper along the inline tree to count mismatches from596// lower level inlinees.597for (const auto &CS : I.second)598countMismatchedCallsiteSamples(CS.second);599}600}601602void SampleProfileMatcher::countMismatchCallsites(const FunctionSamples &FS) {603auto It = FuncCallsiteMatchStates.find(FS.getFuncName());604// Skip it if no mismatched callsite or this is an external function.605if (It == FuncCallsiteMatchStates.end() || It->second.empty())606return;607const auto &MatchStates = It->second;608[[maybe_unused]] bool OnInitialState =609isInitialState(MatchStates.begin()->second);610for (const auto &I : MatchStates) {611TotalProfiledCallsites++;612assert(613(OnInitialState ? isInitialState(I.second) : isFinalState(I.second)) &&614"Profile matching state is inconsistent");615616if (isMismatchState(I.second))617NumMismatchedCallsites++;618else if (I.second == MatchState::RecoveredMismatch)619NumRecoveredCallsites++;620}621}622623void SampleProfileMatcher::countCallGraphRecoveredSamples(624const FunctionSamples &FS,625std::unordered_set<FunctionId> &CallGraphRecoveredProfiles) {626if (CallGraphRecoveredProfiles.count(FS.getFunction())) {627NumCallGraphRecoveredFuncSamples += FS.getTotalSamples();628return;629}630631for (const auto &CM : FS.getCallsiteSamples()) {632for (const auto &CS : CM.second) {633countCallGraphRecoveredSamples(CS.second, CallGraphRecoveredProfiles);634}635}636}637638void SampleProfileMatcher::computeAndReportProfileStaleness() {639if (!ReportProfileStaleness && !PersistProfileStaleness)640return;641642std::unordered_set<FunctionId> CallGraphRecoveredProfiles;643if (SalvageUnusedProfile) {644for (const auto &I : FuncToProfileNameMap) {645CallGraphRecoveredProfiles.insert(I.second);646if (GlobalValue::isAvailableExternallyLinkage(I.first->getLinkage()))647continue;648NumCallGraphRecoveredProfiledFunc++;649}650}651652// Count profile mismatches for profile staleness report.653for (const auto &F : M) {654if (skipProfileForFunction(F))655continue;656// As the stats will be merged by linker, skip reporting the metrics for657// imported functions to avoid repeated counting.658if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))659continue;660const auto *FS = Reader.getSamplesFor(F);661if (!FS)662continue;663TotalProfiledFunc++;664TotalFunctionSamples += FS->getTotalSamples();665666if (SalvageUnusedProfile && !CallGraphRecoveredProfiles.empty())667countCallGraphRecoveredSamples(*FS, CallGraphRecoveredProfiles);668669// Checksum mismatch is only used in pseudo-probe mode.670if (FunctionSamples::ProfileIsProbeBased)671countMismatchedFuncSamples(*FS, true);672673// Count mismatches and samples for calliste.674countMismatchCallsites(*FS);675countMismatchedCallsiteSamples(*FS);676}677678if (ReportProfileStaleness) {679if (FunctionSamples::ProfileIsProbeBased) {680errs() << "(" << NumStaleProfileFunc << "/" << TotalProfiledFunc681<< ") of functions' profile are invalid and ("682<< MismatchedFunctionSamples << "/" << TotalFunctionSamples683<< ") of samples are discarded due to function hash mismatch.\n";684}685if (SalvageUnusedProfile) {686errs() << "(" << NumCallGraphRecoveredProfiledFunc << "/"687<< TotalProfiledFunc << ") of functions' profile are matched and ("688<< NumCallGraphRecoveredFuncSamples << "/" << TotalFunctionSamples689<< ") of samples are reused by call graph matching.\n";690}691692errs() << "(" << (NumMismatchedCallsites + NumRecoveredCallsites) << "/"693<< TotalProfiledCallsites694<< ") of callsites' profile are invalid and ("695<< (MismatchedCallsiteSamples + RecoveredCallsiteSamples) << "/"696<< TotalFunctionSamples697<< ") of samples are discarded due to callsite location mismatch.\n";698errs() << "(" << NumRecoveredCallsites << "/"699<< (NumRecoveredCallsites + NumMismatchedCallsites)700<< ") of callsites and (" << RecoveredCallsiteSamples << "/"701<< (RecoveredCallsiteSamples + MismatchedCallsiteSamples)702<< ") of samples are recovered by stale profile matching.\n";703}704705if (PersistProfileStaleness) {706LLVMContext &Ctx = M.getContext();707MDBuilder MDB(Ctx);708709SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;710if (FunctionSamples::ProfileIsProbeBased) {711ProfStatsVec.emplace_back("NumStaleProfileFunc", NumStaleProfileFunc);712ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);713ProfStatsVec.emplace_back("MismatchedFunctionSamples",714MismatchedFunctionSamples);715ProfStatsVec.emplace_back("TotalFunctionSamples", TotalFunctionSamples);716}717718if (SalvageUnusedProfile) {719ProfStatsVec.emplace_back("NumCallGraphRecoveredProfiledFunc",720NumCallGraphRecoveredProfiledFunc);721ProfStatsVec.emplace_back("NumCallGraphRecoveredFuncSamples",722NumCallGraphRecoveredFuncSamples);723}724725ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);726ProfStatsVec.emplace_back("NumRecoveredCallsites", NumRecoveredCallsites);727ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);728ProfStatsVec.emplace_back("MismatchedCallsiteSamples",729MismatchedCallsiteSamples);730ProfStatsVec.emplace_back("RecoveredCallsiteSamples",731RecoveredCallsiteSamples);732733auto *MD = MDB.createLLVMStats(ProfStatsVec);734auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");735NMD->addOperand(MD);736}737}738739void SampleProfileMatcher::findFunctionsWithoutProfile() {740// TODO: Support MD5 profile.741if (FunctionSamples::UseMD5)742return;743StringSet<> NamesInProfile;744if (auto NameTable = Reader.getNameTable()) {745for (auto Name : *NameTable)746NamesInProfile.insert(Name.stringRef());747}748749for (auto &F : M) {750// Skip declarations, as even if the function can be matched, we have751// nothing to do with it.752if (F.isDeclaration())753continue;754755StringRef CanonFName = FunctionSamples::getCanonicalFnName(F.getName());756const auto *FS = getFlattenedSamplesFor(F);757if (FS)758continue;759760// For extended binary, functions fully inlined may not be loaded in the761// top-level profile, so check the NameTable which has the all symbol names762// in profile.763if (NamesInProfile.count(CanonFName))764continue;765766// For extended binary, non-profiled function symbols are in the profile767// symbol list table.768if (PSL && PSL->contains(CanonFName))769continue;770771LLVM_DEBUG(dbgs() << "Function " << CanonFName772<< " is not in profile or profile symbol list.\n");773FunctionsWithoutProfile[FunctionId(CanonFName)] = &F;774}775}776777bool SampleProfileMatcher::functionMatchesProfileHelper(778const Function &IRFunc, const FunctionId &ProfFunc) {779// The value is in the range [0, 1]. The bigger the value is, the more similar780// two sequences are.781float Similarity = 0.0;782783const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);784if (!FSFlattened)785return false;786// The check for similarity or checksum may not be reliable if the function is787// tiny, we use the number of basic block as a proxy for the function788// complexity and skip the matching if it's too small.789if (IRFunc.size() < MinFuncCountForCGMatching ||790FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching)791return false;792793// For probe-based function, we first trust the checksum info. If the checksum794// doesn't match, we continue checking for similarity.795if (FunctionSamples::ProfileIsProbeBased) {796const auto *FuncDesc = ProbeManager->getDesc(IRFunc);797if (FuncDesc &&798!ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {799LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()800<< "(IR) and " << ProfFunc << "(Profile) match.\n");801802return true;803}804}805806AnchorMap IRAnchors;807findIRAnchors(IRFunc, IRAnchors);808AnchorMap ProfileAnchors;809findProfileAnchors(*FSFlattened, ProfileAnchors);810811AnchorList FilteredIRAnchorsList;812AnchorList FilteredProfileAnchorList;813getFilteredAnchorList(IRAnchors, ProfileAnchors, FilteredIRAnchorsList,814FilteredProfileAnchorList);815816// Similarly skip the matching if the num of anchors is not enough.817if (FilteredIRAnchorsList.size() < MinCallCountForCGMatching ||818FilteredProfileAnchorList.size() < MinCallCountForCGMatching)819return false;820821// Use the diff algorithm to find the LCS between IR and profile.822823// Don't recursively match the callee function to avoid infinite matching,824// callee functions will be handled later since it's processed in top-down825// order .826LocToLocMap MatchedAnchors =827longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList,828false /* Match unused functions */);829830Similarity =831static_cast<float>(MatchedAnchors.size()) * 2 /832(FilteredIRAnchorsList.size() + FilteredProfileAnchorList.size());833834LLVM_DEBUG(dbgs() << "The similarity between " << IRFunc.getName()835<< "(IR) and " << ProfFunc << "(profile) is "836<< format("%.2f", Similarity) << "\n");837assert((Similarity >= 0 && Similarity <= 1.0) &&838"Similarity value should be in [0, 1]");839return Similarity * 100 > FuncProfileSimilarityThreshold;840}841842// If FindMatchedProfileOnly is set to true, only use the processed function843// results. This is used for skipping the repeated recursive matching.844bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,845const FunctionId &ProfFunc,846bool FindMatchedProfileOnly) {847auto R = FuncProfileMatchCache.find({&IRFunc, ProfFunc});848if (R != FuncProfileMatchCache.end())849return R->second;850851if (FindMatchedProfileOnly)852return false;853854bool Matched = functionMatchesProfileHelper(IRFunc, ProfFunc);855FuncProfileMatchCache[{&IRFunc, ProfFunc}] = Matched;856if (Matched) {857FuncToProfileNameMap[&IRFunc] = ProfFunc;858LLVM_DEBUG(dbgs() << "Function:" << IRFunc.getName()859<< " matches profile:" << ProfFunc << "\n");860}861862return Matched;863}864865void SampleProfileMatcher::runOnModule() {866ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,867FunctionSamples::ProfileIsCS);868if (SalvageUnusedProfile)869findFunctionsWithoutProfile();870871// Process the matching in top-down order so that the caller matching result872// can be used to the callee matching.873std::vector<Function *> TopDownFunctionList;874TopDownFunctionList.reserve(M.size());875buildTopDownFuncOrder(CG, TopDownFunctionList);876for (auto *F : TopDownFunctionList) {877if (skipProfileForFunction(*F))878continue;879runOnFunction(*F);880}881882// Update the data in SampleLoader.883if (SalvageUnusedProfile)884for (auto &I : FuncToProfileNameMap) {885assert(I.first && "New function is null");886FunctionId FuncName(I.first->getName());887FuncNameToProfNameMap->emplace(FuncName, I.second);888// We need to remove the old entry to avoid duplicating the function889// processing.890SymbolMap->erase(FuncName);891SymbolMap->emplace(I.second, I.first);892}893894if (SalvageStaleProfile)895distributeIRToProfileLocationMap();896897computeAndReportProfileStaleness();898}899900void SampleProfileMatcher::distributeIRToProfileLocationMap(901FunctionSamples &FS) {902const auto ProfileMappings = FuncMappings.find(FS.getFuncName());903if (ProfileMappings != FuncMappings.end()) {904FS.setIRToProfileLocationMap(&(ProfileMappings->second));905}906907for (auto &Callees :908const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {909for (auto &FS : Callees.second) {910distributeIRToProfileLocationMap(FS.second);911}912}913}914915// Use a central place to distribute the matching results. Outlined and inlined916// profile with the function name will be set to the same pointer.917void SampleProfileMatcher::distributeIRToProfileLocationMap() {918for (auto &I : Reader.getProfiles()) {919distributeIRToProfileLocationMap(I.second);920}921}922923924