Path: blob/main/contrib/llvm-project/llvm/lib/ProfileData/SampleProf.cpp
35233 views
//=-- SampleProf.cpp - Sample profiling format support --------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains common definitions used in the reading and writing of9// sample profile data.10//11//===----------------------------------------------------------------------===//1213#include "llvm/ProfileData/SampleProf.h"14#include "llvm/Config/llvm-config.h"15#include "llvm/IR/DebugInfoMetadata.h"16#include "llvm/IR/PseudoProbe.h"17#include "llvm/ProfileData/SampleProfReader.h"18#include "llvm/Support/CommandLine.h"19#include "llvm/Support/Compiler.h"20#include "llvm/Support/Debug.h"21#include "llvm/Support/ErrorHandling.h"22#include "llvm/Support/raw_ostream.h"23#include <string>24#include <system_error>2526using namespace llvm;27using namespace sampleprof;2829static cl::opt<uint64_t> ProfileSymbolListCutOff(30"profile-symbol-list-cutoff", cl::Hidden, cl::init(-1),31cl::desc("Cutoff value about how many symbols in profile symbol list "32"will be used. This is very useful for performance debugging"));3334static cl::opt<bool> GenerateMergedBaseProfiles(35"generate-merged-base-profiles",36cl::desc("When generating nested context-sensitive profiles, always "37"generate extra base profile for function with all its context "38"profiles merged into it."));3940namespace llvm {41namespace sampleprof {42bool FunctionSamples::ProfileIsProbeBased = false;43bool FunctionSamples::ProfileIsCS = false;44bool FunctionSamples::ProfileIsPreInlined = false;45bool FunctionSamples::UseMD5 = false;46bool FunctionSamples::HasUniqSuffix = true;47bool FunctionSamples::ProfileIsFS = false;48} // namespace sampleprof49} // namespace llvm5051namespace {5253// FIXME: This class is only here to support the transition to llvm::Error. It54// will be removed once this transition is complete. Clients should prefer to55// deal with the Error value directly, rather than converting to error_code.56class SampleProfErrorCategoryType : public std::error_category {57const char *name() const noexcept override { return "llvm.sampleprof"; }5859std::string message(int IE) const override {60sampleprof_error E = static_cast<sampleprof_error>(IE);61switch (E) {62case sampleprof_error::success:63return "Success";64case sampleprof_error::bad_magic:65return "Invalid sample profile data (bad magic)";66case sampleprof_error::unsupported_version:67return "Unsupported sample profile format version";68case sampleprof_error::too_large:69return "Too much profile data";70case sampleprof_error::truncated:71return "Truncated profile data";72case sampleprof_error::malformed:73return "Malformed sample profile data";74case sampleprof_error::unrecognized_format:75return "Unrecognized sample profile encoding format";76case sampleprof_error::unsupported_writing_format:77return "Profile encoding format unsupported for writing operations";78case sampleprof_error::truncated_name_table:79return "Truncated function name table";80case sampleprof_error::not_implemented:81return "Unimplemented feature";82case sampleprof_error::counter_overflow:83return "Counter overflow";84case sampleprof_error::ostream_seek_unsupported:85return "Ostream does not support seek";86case sampleprof_error::uncompress_failed:87return "Uncompress failure";88case sampleprof_error::zlib_unavailable:89return "Zlib is unavailable";90case sampleprof_error::hash_mismatch:91return "Function hash mismatch";92}93llvm_unreachable("A value of sampleprof_error has no message.");94}95};9697} // end anonymous namespace9899const std::error_category &llvm::sampleprof_category() {100static SampleProfErrorCategoryType ErrorCategory;101return ErrorCategory;102}103104void LineLocation::print(raw_ostream &OS) const {105OS << LineOffset;106if (Discriminator > 0)107OS << "." << Discriminator;108}109110raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,111const LineLocation &Loc) {112Loc.print(OS);113return OS;114}115116/// Merge the samples in \p Other into this record.117/// Optionally scale sample counts by \p Weight.118sampleprof_error SampleRecord::merge(const SampleRecord &Other,119uint64_t Weight) {120sampleprof_error Result;121Result = addSamples(Other.getSamples(), Weight);122for (const auto &I : Other.getCallTargets()) {123mergeSampleProfErrors(Result, addCalledTarget(I.first, I.second, Weight));124}125return Result;126}127128#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)129LLVM_DUMP_METHOD void LineLocation::dump() const { print(dbgs()); }130#endif131132/// Print the sample record to the stream \p OS indented by \p Indent.133void SampleRecord::print(raw_ostream &OS, unsigned Indent) const {134OS << NumSamples;135if (hasCalls()) {136OS << ", calls:";137for (const auto &I : getSortedCallTargets())138OS << " " << I.first << ":" << I.second;139}140OS << "\n";141}142143#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)144LLVM_DUMP_METHOD void SampleRecord::dump() const { print(dbgs(), 0); }145#endif146147raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,148const SampleRecord &Sample) {149Sample.print(OS, 0);150return OS;151}152153/// Print the samples collected for a function on stream \p OS.154void FunctionSamples::print(raw_ostream &OS, unsigned Indent) const {155if (getFunctionHash())156OS << "CFG checksum " << getFunctionHash() << "\n";157158OS << TotalSamples << ", " << TotalHeadSamples << ", " << BodySamples.size()159<< " sampled lines\n";160161OS.indent(Indent);162if (!BodySamples.empty()) {163OS << "Samples collected in the function's body {\n";164SampleSorter<LineLocation, SampleRecord> SortedBodySamples(BodySamples);165for (const auto &SI : SortedBodySamples.get()) {166OS.indent(Indent + 2);167OS << SI->first << ": " << SI->second;168}169OS.indent(Indent);170OS << "}\n";171} else {172OS << "No samples collected in the function's body\n";173}174175OS.indent(Indent);176if (!CallsiteSamples.empty()) {177OS << "Samples collected in inlined callsites {\n";178SampleSorter<LineLocation, FunctionSamplesMap> SortedCallsiteSamples(179CallsiteSamples);180for (const auto &CS : SortedCallsiteSamples.get()) {181for (const auto &FS : CS->second) {182OS.indent(Indent + 2);183OS << CS->first << ": inlined callee: " << FS.second.getFunction()184<< ": ";185FS.second.print(OS, Indent + 4);186}187}188OS.indent(Indent);189OS << "}\n";190} else {191OS << "No inlined callsites in this function\n";192}193}194195raw_ostream &llvm::sampleprof::operator<<(raw_ostream &OS,196const FunctionSamples &FS) {197FS.print(OS);198return OS;199}200201void sampleprof::sortFuncProfiles(202const SampleProfileMap &ProfileMap,203std::vector<NameFunctionSamples> &SortedProfiles) {204for (const auto &I : ProfileMap) {205SortedProfiles.push_back(std::make_pair(I.first, &I.second));206}207llvm::stable_sort(SortedProfiles, [](const NameFunctionSamples &A,208const NameFunctionSamples &B) {209if (A.second->getTotalSamples() == B.second->getTotalSamples())210return A.second->getContext() < B.second->getContext();211return A.second->getTotalSamples() > B.second->getTotalSamples();212});213}214215unsigned FunctionSamples::getOffset(const DILocation *DIL) {216return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &2170xffff;218}219220LineLocation FunctionSamples::getCallSiteIdentifier(const DILocation *DIL,221bool ProfileIsFS) {222if (FunctionSamples::ProfileIsProbeBased) {223// In a pseudo-probe based profile, a callsite is simply represented by the224// ID of the probe associated with the call instruction. The probe ID is225// encoded in the Discriminator field of the call instruction's debug226// metadata.227return LineLocation(PseudoProbeDwarfDiscriminator::extractProbeIndex(228DIL->getDiscriminator()),2290);230} else {231unsigned Discriminator =232ProfileIsFS ? DIL->getDiscriminator() : DIL->getBaseDiscriminator();233return LineLocation(FunctionSamples::getOffset(DIL), Discriminator);234}235}236237const FunctionSamples *FunctionSamples::findFunctionSamples(238const DILocation *DIL, SampleProfileReaderItaniumRemapper *Remapper,239const HashKeyMap<std::unordered_map, FunctionId, FunctionId>240*FuncNameToProfNameMap) const {241assert(DIL);242SmallVector<std::pair<LineLocation, StringRef>, 10> S;243244const DILocation *PrevDIL = DIL;245for (DIL = DIL->getInlinedAt(); DIL; DIL = DIL->getInlinedAt()) {246// Use C++ linkage name if possible.247StringRef Name = PrevDIL->getScope()->getSubprogram()->getLinkageName();248if (Name.empty())249Name = PrevDIL->getScope()->getSubprogram()->getName();250S.emplace_back(FunctionSamples::getCallSiteIdentifier(251DIL, FunctionSamples::ProfileIsFS),252Name);253PrevDIL = DIL;254}255256if (S.size() == 0)257return this;258const FunctionSamples *FS = this;259for (int i = S.size() - 1; i >= 0 && FS != nullptr; i--) {260FS = FS->findFunctionSamplesAt(S[i].first, S[i].second, Remapper,261FuncNameToProfNameMap);262}263return FS;264}265266void FunctionSamples::findAllNames(DenseSet<FunctionId> &NameSet) const {267NameSet.insert(getFunction());268for (const auto &BS : BodySamples)269for (const auto &TS : BS.second.getCallTargets())270NameSet.insert(TS.first);271272for (const auto &CS : CallsiteSamples) {273for (const auto &NameFS : CS.second) {274NameSet.insert(NameFS.first);275NameFS.second.findAllNames(NameSet);276}277}278}279280const FunctionSamples *FunctionSamples::findFunctionSamplesAt(281const LineLocation &Loc, StringRef CalleeName,282SampleProfileReaderItaniumRemapper *Remapper,283const HashKeyMap<std::unordered_map, FunctionId, FunctionId>284*FuncNameToProfNameMap) const {285CalleeName = getCanonicalFnName(CalleeName);286287auto I = CallsiteSamples.find(mapIRLocToProfileLoc(Loc));288if (I == CallsiteSamples.end())289return nullptr;290auto FS = I->second.find(getRepInFormat(CalleeName));291if (FS != I->second.end())292return &FS->second;293294if (FuncNameToProfNameMap && !FuncNameToProfNameMap->empty()) {295auto R = FuncNameToProfNameMap->find(FunctionId(CalleeName));296if (R != FuncNameToProfNameMap->end()) {297CalleeName = R->second.stringRef();298auto FS = I->second.find(getRepInFormat(CalleeName));299if (FS != I->second.end())300return &FS->second;301}302}303304if (Remapper) {305if (auto NameInProfile = Remapper->lookUpNameInProfile(CalleeName)) {306auto FS = I->second.find(getRepInFormat(*NameInProfile));307if (FS != I->second.end())308return &FS->second;309}310}311// If we cannot find exact match of the callee name, return the FS with312// the max total count. Only do this when CalleeName is not provided,313// i.e., only for indirect calls.314if (!CalleeName.empty())315return nullptr;316uint64_t MaxTotalSamples = 0;317const FunctionSamples *R = nullptr;318for (const auto &NameFS : I->second)319if (NameFS.second.getTotalSamples() >= MaxTotalSamples) {320MaxTotalSamples = NameFS.second.getTotalSamples();321R = &NameFS.second;322}323return R;324}325326#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)327LLVM_DUMP_METHOD void FunctionSamples::dump() const { print(dbgs(), 0); }328#endif329330std::error_code ProfileSymbolList::read(const uint8_t *Data,331uint64_t ListSize) {332const char *ListStart = reinterpret_cast<const char *>(Data);333uint64_t Size = 0;334uint64_t StrNum = 0;335while (Size < ListSize && StrNum < ProfileSymbolListCutOff) {336StringRef Str(ListStart + Size);337add(Str);338Size += Str.size() + 1;339StrNum++;340}341if (Size != ListSize && StrNum != ProfileSymbolListCutOff)342return sampleprof_error::malformed;343return sampleprof_error::success;344}345346void SampleContextTrimmer::trimAndMergeColdContextProfiles(347uint64_t ColdCountThreshold, bool TrimColdContext, bool MergeColdContext,348uint32_t ColdContextFrameLength, bool TrimBaseProfileOnly) {349if (!TrimColdContext && !MergeColdContext)350return;351352// Nothing to merge if sample threshold is zero353if (ColdCountThreshold == 0)354return;355356// Trimming base profiles only is mainly to honor the preinliner decsion. When357// MergeColdContext is true preinliner decsion is not honored anyway so turn358// off TrimBaseProfileOnly.359if (MergeColdContext)360TrimBaseProfileOnly = false;361362// Filter the cold profiles from ProfileMap and move them into a tmp363// container364std::vector<std::pair<hash_code, const FunctionSamples *>> ColdProfiles;365for (const auto &I : ProfileMap) {366const SampleContext &Context = I.second.getContext();367const FunctionSamples &FunctionProfile = I.second;368if (FunctionProfile.getTotalSamples() < ColdCountThreshold &&369(!TrimBaseProfileOnly || Context.isBaseContext()))370ColdProfiles.emplace_back(I.first, &I.second);371}372373// Remove the cold profile from ProfileMap and merge them into374// MergedProfileMap by the last K frames of context375SampleProfileMap MergedProfileMap;376for (const auto &I : ColdProfiles) {377if (MergeColdContext) {378auto MergedContext = I.second->getContext().getContextFrames();379if (ColdContextFrameLength < MergedContext.size())380MergedContext = MergedContext.take_back(ColdContextFrameLength);381// Need to set MergedProfile's context here otherwise it will be lost.382FunctionSamples &MergedProfile = MergedProfileMap.create(MergedContext);383MergedProfile.merge(*I.second);384}385ProfileMap.erase(I.first);386}387388// Move the merged profiles into ProfileMap;389for (const auto &I : MergedProfileMap) {390// Filter the cold merged profile391if (TrimColdContext && I.second.getTotalSamples() < ColdCountThreshold &&392ProfileMap.find(I.second.getContext()) == ProfileMap.end())393continue;394// Merge the profile if the original profile exists, otherwise just insert395// as a new profile. If inserted as a new profile from MergedProfileMap, it396// already has the right context.397auto Ret = ProfileMap.emplace(I.second.getContext(), FunctionSamples());398FunctionSamples &OrigProfile = Ret.first->second;399OrigProfile.merge(I.second);400}401}402403std::error_code ProfileSymbolList::write(raw_ostream &OS) {404// Sort the symbols before output. If doing compression.405// It will make the compression much more effective.406std::vector<StringRef> SortedList(Syms.begin(), Syms.end());407llvm::sort(SortedList);408409std::string OutputString;410for (auto &Sym : SortedList) {411OutputString.append(Sym.str());412OutputString.append(1, '\0');413}414415OS << OutputString;416return sampleprof_error::success;417}418419void ProfileSymbolList::dump(raw_ostream &OS) const {420OS << "======== Dump profile symbol list ========\n";421std::vector<StringRef> SortedList(Syms.begin(), Syms.end());422llvm::sort(SortedList);423424for (auto &Sym : SortedList)425OS << Sym << "\n";426}427428ProfileConverter::FrameNode *429ProfileConverter::FrameNode::getOrCreateChildFrame(const LineLocation &CallSite,430FunctionId CalleeName) {431uint64_t Hash = FunctionSamples::getCallSiteHash(CalleeName, CallSite);432auto It = AllChildFrames.find(Hash);433if (It != AllChildFrames.end()) {434assert(It->second.FuncName == CalleeName &&435"Hash collision for child context node");436return &It->second;437}438439AllChildFrames[Hash] = FrameNode(CalleeName, nullptr, CallSite);440return &AllChildFrames[Hash];441}442443ProfileConverter::ProfileConverter(SampleProfileMap &Profiles)444: ProfileMap(Profiles) {445for (auto &FuncSample : Profiles) {446FunctionSamples *FSamples = &FuncSample.second;447auto *NewNode = getOrCreateContextPath(FSamples->getContext());448assert(!NewNode->FuncSamples && "New node cannot have sample profile");449NewNode->FuncSamples = FSamples;450}451}452453ProfileConverter::FrameNode *454ProfileConverter::getOrCreateContextPath(const SampleContext &Context) {455auto Node = &RootFrame;456LineLocation CallSiteLoc(0, 0);457for (auto &Callsite : Context.getContextFrames()) {458Node = Node->getOrCreateChildFrame(CallSiteLoc, Callsite.Func);459CallSiteLoc = Callsite.Location;460}461return Node;462}463464void ProfileConverter::convertCSProfiles(ProfileConverter::FrameNode &Node) {465// Process each child profile. Add each child profile to callsite profile map466// of the current node `Node` if `Node` comes with a profile. Otherwise467// promote the child profile to a standalone profile.468auto *NodeProfile = Node.FuncSamples;469for (auto &It : Node.AllChildFrames) {470auto &ChildNode = It.second;471convertCSProfiles(ChildNode);472auto *ChildProfile = ChildNode.FuncSamples;473if (!ChildProfile)474continue;475SampleContext OrigChildContext = ChildProfile->getContext();476uint64_t OrigChildContextHash = OrigChildContext.getHashCode();477// Reset the child context to be contextless.478ChildProfile->getContext().setFunction(OrigChildContext.getFunction());479if (NodeProfile) {480// Add child profile to the callsite profile map.481auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);482SamplesMap.emplace(OrigChildContext.getFunction(), *ChildProfile);483NodeProfile->addTotalSamples(ChildProfile->getTotalSamples());484// Remove the corresponding body sample for the callsite and update the485// total weight.486auto Count = NodeProfile->removeCalledTargetAndBodySample(487ChildNode.CallSiteLoc.LineOffset, ChildNode.CallSiteLoc.Discriminator,488OrigChildContext.getFunction());489NodeProfile->removeTotalSamples(Count);490}491492uint64_t NewChildProfileHash = 0;493// Separate child profile to be a standalone profile, if the current parent494// profile doesn't exist. This is a duplicating operation when the child495// profile is already incorporated into the parent which is still useful and496// thus done optionally. It is seen that duplicating context profiles into497// base profiles improves the code quality for thinlto build by allowing a498// profile in the prelink phase for to-be-fully-inlined functions.499if (!NodeProfile) {500ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);501NewChildProfileHash = ChildProfile->getContext().getHashCode();502} else if (GenerateMergedBaseProfiles) {503ProfileMap[ChildProfile->getContext()].merge(*ChildProfile);504NewChildProfileHash = ChildProfile->getContext().getHashCode();505auto &SamplesMap = NodeProfile->functionSamplesAt(ChildNode.CallSiteLoc);506SamplesMap[ChildProfile->getFunction()].getContext().setAttribute(507ContextDuplicatedIntoBase);508}509510// Remove the original child profile. Check if MD5 of new child profile511// collides with old profile, in this case the [] operator already512// overwritten it without the need of erase.513if (NewChildProfileHash != OrigChildContextHash)514ProfileMap.erase(OrigChildContextHash);515}516}517518void ProfileConverter::convertCSProfiles() { convertCSProfiles(RootFrame); }519520521