Path: blob/main/contrib/llvm-project/llvm/tools/llvm-xray/xray-account.cpp
35231 views
//===- xray-account.h - XRay Function Call Accounting ---------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements basic function call accounting from an XRay trace.9//10//===----------------------------------------------------------------------===//1112#include <algorithm>13#include <cassert>14#include <numeric>15#include <system_error>16#include <utility>1718#include "xray-account.h"19#include "xray-registry.h"20#include "llvm/Support/ErrorHandling.h"21#include "llvm/Support/FormatVariadic.h"22#include "llvm/XRay/InstrumentationMap.h"23#include "llvm/XRay/Trace.h"2425#include <cmath>2627using namespace llvm;28using namespace llvm::xray;2930static cl::SubCommand Account("account", "Function call accounting");31static cl::opt<std::string> AccountInput(cl::Positional,32cl::desc("<xray log file>"),33cl::Required, cl::sub(Account));34static cl::opt<bool>35AccountKeepGoing("keep-going", cl::desc("Keep going on errors encountered"),36cl::sub(Account), cl::init(false));37static cl::alias AccountKeepGoing2("k", cl::aliasopt(AccountKeepGoing),38cl::desc("Alias for -keep_going"));39static cl::opt<bool> AccountRecursiveCallsOnly(40"recursive-calls-only", cl::desc("Only count the calls that are recursive"),41cl::sub(Account), cl::init(false));42static cl::opt<bool> AccountDeduceSiblingCalls(43"deduce-sibling-calls",44cl::desc("Deduce sibling calls when unrolling function call stacks"),45cl::sub(Account), cl::init(false));46static cl::alias47AccountDeduceSiblingCalls2("d", cl::aliasopt(AccountDeduceSiblingCalls),48cl::desc("Alias for -deduce_sibling_calls"));49static cl::opt<std::string>50AccountOutput("output", cl::value_desc("output file"), cl::init("-"),51cl::desc("output file; use '-' for stdout"),52cl::sub(Account));53static cl::alias AccountOutput2("o", cl::aliasopt(AccountOutput),54cl::desc("Alias for -output"));55enum class AccountOutputFormats { TEXT, CSV };56static cl::opt<AccountOutputFormats>57AccountOutputFormat("format", cl::desc("output format"),58cl::values(clEnumValN(AccountOutputFormats::TEXT,59"text", "report stats in text"),60clEnumValN(AccountOutputFormats::CSV, "csv",61"report stats in csv")),62cl::sub(Account));63static cl::alias AccountOutputFormat2("f", cl::desc("Alias of -format"),64cl::aliasopt(AccountOutputFormat));6566enum class SortField {67FUNCID,68COUNT,69MIN,70MED,71PCT90,72PCT99,73MAX,74SUM,75FUNC,76};7778static cl::opt<SortField> AccountSortOutput(79"sort", cl::desc("sort output by this field"), cl::value_desc("field"),80cl::sub(Account), cl::init(SortField::FUNCID),81cl::values(clEnumValN(SortField::FUNCID, "funcid", "function id"),82clEnumValN(SortField::COUNT, "count", "function call counts"),83clEnumValN(SortField::MIN, "min", "minimum function durations"),84clEnumValN(SortField::MED, "med", "median function durations"),85clEnumValN(SortField::PCT90, "90p", "90th percentile durations"),86clEnumValN(SortField::PCT99, "99p", "99th percentile durations"),87clEnumValN(SortField::MAX, "max", "maximum function durations"),88clEnumValN(SortField::SUM, "sum", "sum of call durations"),89clEnumValN(SortField::FUNC, "func", "function names")));90static cl::alias AccountSortOutput2("s", cl::aliasopt(AccountSortOutput),91cl::desc("Alias for -sort"));9293enum class SortDirection {94ASCENDING,95DESCENDING,96};97static cl::opt<SortDirection> AccountSortOrder(98"sortorder", cl::desc("sort ordering"), cl::init(SortDirection::ASCENDING),99cl::values(clEnumValN(SortDirection::ASCENDING, "asc", "ascending"),100clEnumValN(SortDirection::DESCENDING, "dsc", "descending")),101cl::sub(Account));102static cl::alias AccountSortOrder2("r", cl::aliasopt(AccountSortOrder),103cl::desc("Alias for -sortorder"));104105static cl::opt<int> AccountTop("top", cl::desc("only show the top N results"),106cl::value_desc("N"), cl::sub(Account),107cl::init(-1));108static cl::alias AccountTop2("p", cl::desc("Alias for -top"),109cl::aliasopt(AccountTop));110111static cl::opt<std::string>112AccountInstrMap("instr_map",113cl::desc("binary with the instrumentation map, or "114"a separate instrumentation map"),115cl::value_desc("binary with xray_instr_map"),116cl::sub(Account), cl::init(""));117static cl::alias AccountInstrMap2("m", cl::aliasopt(AccountInstrMap),118cl::desc("Alias for -instr_map"));119120namespace {121122template <class T, class U> void setMinMax(std::pair<T, T> &MM, U &&V) {123if (MM.first == 0 || MM.second == 0)124MM = std::make_pair(std::forward<U>(V), std::forward<U>(V));125else126MM = std::make_pair(std::min(MM.first, V), std::max(MM.second, V));127}128129template <class T> T diff(T L, T R) { return std::max(L, R) - std::min(L, R); }130131} // namespace132133using RecursionStatus = LatencyAccountant::FunctionStack::RecursionStatus;134RecursionStatus &RecursionStatus::operator++() {135auto Depth = Bitfield::get<RecursionStatus::Depth>(Storage);136assert(Depth >= 0 && Depth < std::numeric_limits<decltype(Depth)>::max());137++Depth;138Bitfield::set<RecursionStatus::Depth>(Storage, Depth); // ++Storage139// Did this function just (maybe indirectly) call itself the first time?140if (!isRecursive() && Depth == 2) // Storage == 2 / Storage s> 1141Bitfield::set<RecursionStatus::IsRecursive>(Storage,142true); // Storage |= INT_MIN143return *this;144}145RecursionStatus &RecursionStatus::operator--() {146auto Depth = Bitfield::get<RecursionStatus::Depth>(Storage);147assert(Depth > 0);148--Depth;149Bitfield::set<RecursionStatus::Depth>(Storage, Depth); // --Storage150// Did we leave a function that previouly (maybe indirectly) called itself?151if (isRecursive() && Depth == 0) // Storage == INT_MIN152Bitfield::set<RecursionStatus::IsRecursive>(Storage, false); // Storage = 0153return *this;154}155bool RecursionStatus::isRecursive() const {156return Bitfield::get<RecursionStatus::IsRecursive>(Storage); // Storage s< 0157}158159bool LatencyAccountant::accountRecord(const XRayRecord &Record) {160setMinMax(PerThreadMinMaxTSC[Record.TId], Record.TSC);161setMinMax(PerCPUMinMaxTSC[Record.CPU], Record.TSC);162163if (CurrentMaxTSC == 0)164CurrentMaxTSC = Record.TSC;165166if (Record.TSC < CurrentMaxTSC)167return false;168169auto &ThreadStack = PerThreadFunctionStack[Record.TId];170if (RecursiveCallsOnly && !ThreadStack.RecursionDepth)171ThreadStack.RecursionDepth.emplace();172switch (Record.Type) {173case RecordTypes::CUSTOM_EVENT:174case RecordTypes::TYPED_EVENT:175// TODO: Support custom and typed event accounting in the future.176return true;177case RecordTypes::ENTER:178case RecordTypes::ENTER_ARG: {179ThreadStack.Stack.emplace_back(Record.FuncId, Record.TSC);180if (ThreadStack.RecursionDepth)181++(*ThreadStack.RecursionDepth)[Record.FuncId];182break;183}184case RecordTypes::EXIT:185case RecordTypes::TAIL_EXIT: {186if (ThreadStack.Stack.empty())187return false;188189if (ThreadStack.Stack.back().first == Record.FuncId) {190const auto &Top = ThreadStack.Stack.back();191if (!ThreadStack.RecursionDepth ||192(*ThreadStack.RecursionDepth)[Top.first].isRecursive())193recordLatency(Top.first, diff(Top.second, Record.TSC));194if (ThreadStack.RecursionDepth)195--(*ThreadStack.RecursionDepth)[Top.first];196ThreadStack.Stack.pop_back();197break;198}199200if (!DeduceSiblingCalls)201return false;202203// Look for the parent up the stack.204auto Parent =205llvm::find_if(llvm::reverse(ThreadStack.Stack),206[&](const std::pair<const int32_t, uint64_t> &E) {207return E.first == Record.FuncId;208});209if (Parent == ThreadStack.Stack.rend())210return false;211212// Account time for this apparently sibling call exit up the stack.213// Considering the following case:214//215// f()216// g()217// h()218//219// We might only ever see the following entries:220//221// -> f()222// -> g()223// -> h()224// <- h()225// <- f()226//227// Now we don't see the exit to g() because some older version of the XRay228// runtime wasn't instrumenting tail exits. If we don't deduce tail calls,229// we may potentially never account time for g() -- and this code would have230// already bailed out, because `<- f()` doesn't match the current "top" of231// stack where we're waiting for the exit to `g()` instead. This is not232// ideal and brittle -- so instead we provide a potentially inaccurate233// accounting of g() instead, computing it from the exit of f().234//235// While it might be better that we account the time between `-> g()` and236// `-> h()` as the proper accounting of time for g() here, this introduces237// complexity to do correctly (need to backtrack, etc.).238//239// FIXME: Potentially implement the more complex deduction algorithm?240auto R = make_range(std::next(Parent).base(), ThreadStack.Stack.end());241for (auto &E : R) {242if (!ThreadStack.RecursionDepth ||243(*ThreadStack.RecursionDepth)[E.first].isRecursive())244recordLatency(E.first, diff(E.second, Record.TSC));245}246for (auto &Top : reverse(R)) {247if (ThreadStack.RecursionDepth)248--(*ThreadStack.RecursionDepth)[Top.first];249ThreadStack.Stack.pop_back();250}251break;252}253}254255return true;256}257258namespace {259260// We consolidate the data into a struct which we can output in various forms.261struct ResultRow {262uint64_t Count;263double Min;264double Median;265double Pct90;266double Pct99;267double Max;268double Sum;269std::string DebugInfo;270std::string Function;271};272273ResultRow getStats(MutableArrayRef<uint64_t> Timings) {274assert(!Timings.empty());275ResultRow R;276R.Sum = std::accumulate(Timings.begin(), Timings.end(), 0.0);277auto MinMax = std::minmax_element(Timings.begin(), Timings.end());278R.Min = *MinMax.first;279R.Max = *MinMax.second;280R.Count = Timings.size();281282auto MedianOff = Timings.size() / 2;283std::nth_element(Timings.begin(), Timings.begin() + MedianOff, Timings.end());284R.Median = Timings[MedianOff];285286auto Pct90Off = std::floor(Timings.size() * 0.9);287std::nth_element(Timings.begin(), Timings.begin() + (uint64_t)Pct90Off,288Timings.end());289R.Pct90 = Timings[Pct90Off];290291auto Pct99Off = std::floor(Timings.size() * 0.99);292std::nth_element(Timings.begin(), Timings.begin() + (uint64_t)Pct99Off,293Timings.end());294R.Pct99 = Timings[Pct99Off];295return R;296}297298} // namespace299300using TupleType = std::tuple<int32_t, uint64_t, ResultRow>;301302template <typename F>303static void sortByKey(std::vector<TupleType> &Results, F Fn) {304bool ASC = AccountSortOrder == SortDirection::ASCENDING;305llvm::sort(Results, [=](const TupleType &L, const TupleType &R) {306return ASC ? Fn(L) < Fn(R) : Fn(L) > Fn(R);307});308}309310template <class F>311void LatencyAccountant::exportStats(const XRayFileHeader &Header, F Fn) const {312std::vector<TupleType> Results;313Results.reserve(FunctionLatencies.size());314for (auto FT : FunctionLatencies) {315const auto &FuncId = FT.first;316auto &Timings = FT.second;317Results.emplace_back(FuncId, Timings.size(), getStats(Timings));318auto &Row = std::get<2>(Results.back());319if (Header.CycleFrequency) {320double CycleFrequency = Header.CycleFrequency;321Row.Min /= CycleFrequency;322Row.Median /= CycleFrequency;323Row.Pct90 /= CycleFrequency;324Row.Pct99 /= CycleFrequency;325Row.Max /= CycleFrequency;326Row.Sum /= CycleFrequency;327}328329Row.Function = FuncIdHelper.SymbolOrNumber(FuncId);330Row.DebugInfo = FuncIdHelper.FileLineAndColumn(FuncId);331}332333// Sort the data according to user-provided flags.334switch (AccountSortOutput) {335case SortField::FUNCID:336sortByKey(Results, [](const TupleType &X) { return std::get<0>(X); });337break;338case SortField::COUNT:339sortByKey(Results, [](const TupleType &X) { return std::get<1>(X); });340break;341case SortField::MIN:342sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Min; });343break;344case SortField::MED:345sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Median; });346break;347case SortField::PCT90:348sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct90; });349break;350case SortField::PCT99:351sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Pct99; });352break;353case SortField::MAX:354sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Max; });355break;356case SortField::SUM:357sortByKey(Results, [](const TupleType &X) { return std::get<2>(X).Sum; });358break;359case SortField::FUNC:360llvm_unreachable("Not implemented");361}362363if (AccountTop > 0) {364auto MaxTop =365std::min(AccountTop.getValue(), static_cast<int>(Results.size()));366Results.erase(Results.begin() + MaxTop, Results.end());367}368369for (const auto &R : Results)370Fn(std::get<0>(R), std::get<1>(R), std::get<2>(R));371}372373void LatencyAccountant::exportStatsAsText(raw_ostream &OS,374const XRayFileHeader &Header) const {375OS << "Functions with latencies: " << FunctionLatencies.size() << "\n";376377// We spend some effort to make the text output more readable, so we do the378// following formatting decisions for each of the fields:379//380// - funcid: 32-bit, but we can determine the largest number and be381// between382// a minimum of 5 characters, up to 9 characters, right aligned.383// - count: 64-bit, but we can determine the largest number and be384// between385// a minimum of 5 characters, up to 9 characters, right aligned.386// - min, median, 90pct, 99pct, max: double precision, but we want to keep387// the values in seconds, with microsecond precision (0.000'001), so we388// have at most 6 significant digits, with the whole number part to be389// at390// least 1 character. For readability we'll right-align, with full 9391// characters each.392// - debug info, function name: we format this as a concatenation of the393// debug info and the function name.394//395static constexpr char StatsHeaderFormat[] =396"{0,+9} {1,+10} [{2,+9}, {3,+9}, {4,+9}, {5,+9}, {6,+9}] {7,+9}";397static constexpr char StatsFormat[] =398R"({0,+9} {1,+10} [{2,+9:f6}, {3,+9:f6}, {4,+9:f6}, {5,+9:f6}, {6,+9:f6}] {7,+9:f6})";399OS << llvm::formatv(StatsHeaderFormat, "funcid", "count", "min", "med", "90p",400"99p", "max", "sum")401<< llvm::formatv(" {0,-12}\n", "function");402exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) {403OS << llvm::formatv(StatsFormat, FuncId, Count, Row.Min, Row.Median,404Row.Pct90, Row.Pct99, Row.Max, Row.Sum)405<< " " << Row.DebugInfo << ": " << Row.Function << "\n";406});407}408409void LatencyAccountant::exportStatsAsCSV(raw_ostream &OS,410const XRayFileHeader &Header) const {411OS << "funcid,count,min,median,90%ile,99%ile,max,sum,debug,function\n";412exportStats(Header, [&](int32_t FuncId, size_t Count, const ResultRow &Row) {413OS << FuncId << ',' << Count << ',' << Row.Min << ',' << Row.Median << ','414<< Row.Pct90 << ',' << Row.Pct99 << ',' << Row.Max << "," << Row.Sum415<< ",\"" << Row.DebugInfo << "\",\"" << Row.Function << "\"\n";416});417}418419using namespace llvm::xray;420421namespace llvm {422template <> struct format_provider<llvm::xray::RecordTypes> {423static void format(const llvm::xray::RecordTypes &T, raw_ostream &Stream,424StringRef Style) {425switch (T) {426case RecordTypes::ENTER:427Stream << "enter";428break;429case RecordTypes::ENTER_ARG:430Stream << "enter-arg";431break;432case RecordTypes::EXIT:433Stream << "exit";434break;435case RecordTypes::TAIL_EXIT:436Stream << "tail-exit";437break;438case RecordTypes::CUSTOM_EVENT:439Stream << "custom-event";440break;441case RecordTypes::TYPED_EVENT:442Stream << "typed-event";443break;444}445}446};447} // namespace llvm448449static CommandRegistration Unused(&Account, []() -> Error {450InstrumentationMap Map;451if (!AccountInstrMap.empty()) {452auto InstrumentationMapOrError = loadInstrumentationMap(AccountInstrMap);453if (!InstrumentationMapOrError)454return joinErrors(make_error<StringError>(455Twine("Cannot open instrumentation map '") +456AccountInstrMap + "'",457std::make_error_code(std::errc::invalid_argument)),458InstrumentationMapOrError.takeError());459Map = std::move(*InstrumentationMapOrError);460}461462std::error_code EC;463raw_fd_ostream OS(AccountOutput, EC, sys::fs::OpenFlags::OF_TextWithCRLF);464if (EC)465return make_error<StringError>(466Twine("Cannot open file '") + AccountOutput + "' for writing.", EC);467468const auto &FunctionAddresses = Map.getFunctionAddresses();469symbolize::LLVMSymbolizer Symbolizer;470llvm::xray::FuncIdConversionHelper FuncIdHelper(AccountInstrMap, Symbolizer,471FunctionAddresses);472xray::LatencyAccountant FCA(FuncIdHelper, AccountRecursiveCallsOnly,473AccountDeduceSiblingCalls);474auto TraceOrErr = loadTraceFile(AccountInput);475if (!TraceOrErr)476return joinErrors(477make_error<StringError>(478Twine("Failed loading input file '") + AccountInput + "'",479std::make_error_code(std::errc::executable_format_error)),480TraceOrErr.takeError());481482auto &T = *TraceOrErr;483for (const auto &Record : T) {484if (FCA.accountRecord(Record))485continue;486errs()487<< "Error processing record: "488<< llvm::formatv(489R"({{type: {0}; cpu: {1}; record-type: {2}; function-id: {3}; tsc: {4}; thread-id: {5}; process-id: {6}}})",490Record.RecordType, Record.CPU, Record.Type, Record.FuncId,491Record.TSC, Record.TId, Record.PId)492<< '\n';493for (const auto &ThreadStack : FCA.getPerThreadFunctionStack()) {494errs() << "Thread ID: " << ThreadStack.first << "\n";495if (ThreadStack.second.Stack.empty()) {496errs() << " (empty stack)\n";497continue;498}499auto Level = ThreadStack.second.Stack.size();500for (const auto &Entry : llvm::reverse(ThreadStack.second.Stack))501errs() << " #" << Level-- << "\t"502<< FuncIdHelper.SymbolOrNumber(Entry.first) << '\n';503}504if (!AccountKeepGoing)505return make_error<StringError>(506Twine("Failed accounting function calls in file '") + AccountInput +507"'.",508std::make_error_code(std::errc::executable_format_error));509}510switch (AccountOutputFormat) {511case AccountOutputFormats::TEXT:512FCA.exportStatsAsText(OS, T.getFileHeader());513break;514case AccountOutputFormats::CSV:515FCA.exportStatsAsCSV(OS, T.getFileHeader());516break;517}518519return Error::success();520});521522523