Path: blob/main/contrib/llvm-project/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
35231 views
//===-- llvm-symbolizer.cpp - Simple addr2line-like symbolizer ------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This utility works much like "addr2line". It is able of transforming9// tuples (module name, module offset) to code locations (function name,10// file, line number, column number). It is targeted for compiler-rt tools11// (especially AddressSanitizer and ThreadSanitizer) that can use it12// to symbolize stack traces in their error reports.13//14//===----------------------------------------------------------------------===//1516#include "Opts.inc"17#include "llvm/ADT/StringExtras.h"18#include "llvm/ADT/StringRef.h"19#include "llvm/Config/config.h"20#include "llvm/DebugInfo/Symbolize/DIPrinter.h"21#include "llvm/DebugInfo/Symbolize/Markup.h"22#include "llvm/DebugInfo/Symbolize/MarkupFilter.h"23#include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"24#include "llvm/DebugInfo/Symbolize/Symbolize.h"25#include "llvm/Debuginfod/BuildIDFetcher.h"26#include "llvm/Debuginfod/Debuginfod.h"27#include "llvm/Debuginfod/HTTPClient.h"28#include "llvm/Option/Arg.h"29#include "llvm/Option/ArgList.h"30#include "llvm/Option/Option.h"31#include "llvm/Support/COM.h"32#include "llvm/Support/CommandLine.h"33#include "llvm/Support/Debug.h"34#include "llvm/Support/Errc.h"35#include "llvm/Support/FileSystem.h"36#include "llvm/Support/LLVMDriver.h"37#include "llvm/Support/Path.h"38#include "llvm/Support/StringSaver.h"39#include "llvm/Support/WithColor.h"40#include "llvm/Support/raw_ostream.h"41#include <algorithm>42#include <cstdio>43#include <cstring>44#include <iostream>45#include <string>4647using namespace llvm;48using namespace symbolize;4950namespace {51enum ID {52OPT_INVALID = 0, // This is not an option ID.53#define OPTION(...) LLVM_MAKE_OPT_ID(__VA_ARGS__),54#include "Opts.inc"55#undef OPTION56};5758#define PREFIX(NAME, VALUE) \59static constexpr StringLiteral NAME##_init[] = VALUE; \60static constexpr ArrayRef<StringLiteral> NAME(NAME##_init, \61std::size(NAME##_init) - 1);62#include "Opts.inc"63#undef PREFIX6465using namespace llvm::opt;66static constexpr opt::OptTable::Info InfoTable[] = {67#define OPTION(...) LLVM_CONSTRUCT_OPT_INFO(__VA_ARGS__),68#include "Opts.inc"69#undef OPTION70};7172class SymbolizerOptTable : public opt::GenericOptTable {73public:74SymbolizerOptTable() : GenericOptTable(InfoTable) {75setGroupedShortOptions(true);76}77};78} // namespace7980static std::string ToolName;8182static void printError(const ErrorInfoBase &EI, StringRef AuxInfo) {83WithColor::error(errs(), ToolName);84if (!AuxInfo.empty())85errs() << "'" << AuxInfo << "': ";86EI.log(errs());87errs() << '\n';88}8990template <typename T>91static void print(const Request &Request, Expected<T> &ResOrErr,92DIPrinter &Printer) {93if (ResOrErr) {94// No error, print the result.95Printer.print(Request, *ResOrErr);96return;97}9899// Handle the error.100bool PrintEmpty = true;101handleAllErrors(std::move(ResOrErr.takeError()),102[&](const ErrorInfoBase &EI) {103PrintEmpty = Printer.printError(Request, EI);104});105106if (PrintEmpty)107Printer.print(Request, T());108}109110enum class OutputStyle { LLVM, GNU, JSON };111112enum class Command {113Code,114Data,115Frame,116};117118static void enableDebuginfod(LLVMSymbolizer &Symbolizer,119const opt::ArgList &Args) {120static bool IsEnabled = false;121if (IsEnabled)122return;123IsEnabled = true;124// Look up symbols using the debuginfod client.125Symbolizer.setBuildIDFetcher(std::make_unique<DebuginfodFetcher>(126Args.getAllArgValues(OPT_debug_file_directory_EQ)));127// The HTTPClient must be initialized for use by the debuginfod client.128HTTPClient::initialize();129}130131static StringRef getSpaceDelimitedWord(StringRef &Source) {132const char kDelimiters[] = " \n\r";133const char *Pos = Source.data();134StringRef Result;135Pos += strspn(Pos, kDelimiters);136if (*Pos == '"' || *Pos == '\'') {137char Quote = *Pos;138Pos++;139const char *End = strchr(Pos, Quote);140if (!End)141return StringRef();142Result = StringRef(Pos, End - Pos);143Pos = End + 1;144} else {145int NameLength = strcspn(Pos, kDelimiters);146Result = StringRef(Pos, NameLength);147Pos += NameLength;148}149Source = StringRef(Pos, Source.end() - Pos);150return Result;151}152153static Error makeStringError(StringRef Msg) {154return make_error<StringError>(Msg, inconvertibleErrorCode());155}156157static Error parseCommand(StringRef BinaryName, bool IsAddr2Line,158StringRef InputString, Command &Cmd,159std::string &ModuleName, object::BuildID &BuildID,160StringRef &Symbol, uint64_t &Offset) {161ModuleName = BinaryName;162if (InputString.consume_front("CODE ")) {163Cmd = Command::Code;164} else if (InputString.consume_front("DATA ")) {165Cmd = Command::Data;166} else if (InputString.consume_front("FRAME ")) {167Cmd = Command::Frame;168} else {169// If no cmd, assume it's CODE.170Cmd = Command::Code;171}172173// Parse optional input file specification.174bool HasFilePrefix = false;175bool HasBuildIDPrefix = false;176while (!InputString.empty()) {177InputString = InputString.ltrim();178if (InputString.consume_front("FILE:")) {179if (HasFilePrefix || HasBuildIDPrefix)180return makeStringError("duplicate input file specification prefix");181HasFilePrefix = true;182continue;183}184if (InputString.consume_front("BUILDID:")) {185if (HasBuildIDPrefix || HasFilePrefix)186return makeStringError("duplicate input file specification prefix");187HasBuildIDPrefix = true;188continue;189}190break;191}192193// If an input file is not specified on the command line, try to extract it194// from the command.195if (HasBuildIDPrefix || HasFilePrefix) {196InputString = InputString.ltrim();197if (InputString.empty()) {198if (HasFilePrefix)199return makeStringError("must be followed by an input file");200else201return makeStringError("must be followed by a hash");202}203204if (!BinaryName.empty() || !BuildID.empty())205return makeStringError("input file has already been specified");206207StringRef Name = getSpaceDelimitedWord(InputString);208if (Name.empty())209return makeStringError("unbalanced quotes in input file name");210if (HasBuildIDPrefix) {211BuildID = parseBuildID(Name);212if (BuildID.empty())213return makeStringError("wrong format of build-id");214} else {215ModuleName = Name;216}217} else if (BinaryName.empty() && BuildID.empty()) {218// No input file has been specified. If the input string contains at least219// two items, assume that the first item is a file name.220ModuleName = getSpaceDelimitedWord(InputString);221if (ModuleName.empty())222return makeStringError("no input filename has been specified");223}224225// Parse address specification, which can be an offset in module or a226// symbol with optional offset.227InputString = InputString.trim();228if (InputString.empty())229return makeStringError("no module offset has been specified");230231// If input string contains a space, ignore everything after it. This behavior232// is consistent with GNU addr2line.233int AddrSpecLength = InputString.find_first_of(" \n\r");234StringRef AddrSpec = InputString.substr(0, AddrSpecLength);235bool StartsWithDigit = std::isdigit(AddrSpec.front());236237// GNU addr2line assumes the address is hexadecimal and allows a redundant238// "0x" or "0X" prefix; do the same for compatibility.239if (IsAddr2Line)240AddrSpec.consume_front("0x") || AddrSpec.consume_front("0X");241242// If address specification is a number, treat it as a module offset.243if (!AddrSpec.getAsInteger(IsAddr2Line ? 16 : 0, Offset)) {244// Module offset is an address.245Symbol = StringRef();246return Error::success();247}248249// If address specification starts with a digit, but is not a number, consider250// it as invalid.251if (StartsWithDigit || AddrSpec.empty())252return makeStringError("expected a number as module offset");253254// Otherwise it is a symbol name, potentially with an offset.255Symbol = AddrSpec;256Offset = 0;257258// If the address specification contains '+', try treating it as259// "symbol + offset".260size_t Plus = AddrSpec.rfind('+');261if (Plus != StringRef::npos) {262StringRef SymbolStr = AddrSpec.take_front(Plus);263StringRef OffsetStr = AddrSpec.substr(Plus + 1);264if (!SymbolStr.empty() && !OffsetStr.empty() &&265!OffsetStr.getAsInteger(0, Offset)) {266Symbol = SymbolStr;267return Error::success();268}269// The found '+' is not an offset delimiter.270}271272return Error::success();273}274275template <typename T>276void executeCommand(StringRef ModuleName, const T &ModuleSpec, Command Cmd,277StringRef Symbol, uint64_t Offset, uint64_t AdjustVMA,278bool ShouldInline, OutputStyle Style,279LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {280uint64_t AdjustedOffset = Offset - AdjustVMA;281object::SectionedAddress Address = {AdjustedOffset,282object::SectionedAddress::UndefSection};283Request SymRequest = {284ModuleName, Symbol.empty() ? std::make_optional(Offset) : std::nullopt,285Symbol};286if (Cmd == Command::Data) {287Expected<DIGlobal> ResOrErr = Symbolizer.symbolizeData(ModuleSpec, Address);288print(SymRequest, ResOrErr, Printer);289} else if (Cmd == Command::Frame) {290Expected<std::vector<DILocal>> ResOrErr =291Symbolizer.symbolizeFrame(ModuleSpec, Address);292print(SymRequest, ResOrErr, Printer);293} else if (!Symbol.empty()) {294Expected<std::vector<DILineInfo>> ResOrErr =295Symbolizer.findSymbol(ModuleSpec, Symbol, Offset);296print(SymRequest, ResOrErr, Printer);297} else if (ShouldInline) {298Expected<DIInliningInfo> ResOrErr =299Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);300print(SymRequest, ResOrErr, Printer);301} else if (Style == OutputStyle::GNU) {302// With PrintFunctions == FunctionNameKind::LinkageName (default)303// and UseSymbolTable == true (also default), Symbolizer.symbolizeCode()304// may override the name of an inlined function with the name of the topmost305// caller function in the inlining chain. This contradicts the existing306// behavior of addr2line. Symbolizer.symbolizeInlinedCode() overrides only307// the topmost function, which suits our needs better.308Expected<DIInliningInfo> ResOrErr =309Symbolizer.symbolizeInlinedCode(ModuleSpec, Address);310Expected<DILineInfo> Res0OrErr =311!ResOrErr312? Expected<DILineInfo>(ResOrErr.takeError())313: ((ResOrErr->getNumberOfFrames() == 0) ? DILineInfo()314: ResOrErr->getFrame(0));315print(SymRequest, Res0OrErr, Printer);316} else {317Expected<DILineInfo> ResOrErr =318Symbolizer.symbolizeCode(ModuleSpec, Address);319print(SymRequest, ResOrErr, Printer);320}321Symbolizer.pruneCache();322}323324static void printUnknownLineInfo(std::string ModuleName, DIPrinter &Printer) {325Request SymRequest = {ModuleName, std::nullopt, StringRef()};326Printer.print(SymRequest, DILineInfo());327}328329static void symbolizeInput(const opt::InputArgList &Args,330object::BuildIDRef IncomingBuildID,331uint64_t AdjustVMA, bool IsAddr2Line,332OutputStyle Style, StringRef InputString,333LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {334Command Cmd;335std::string ModuleName;336object::BuildID BuildID(IncomingBuildID.begin(), IncomingBuildID.end());337uint64_t Offset = 0;338StringRef Symbol;339340// An empty input string may be used to check if the process is alive and341// responding to input. Do not emit a message on stderr in this case but342// respond on stdout.343if (InputString.empty()) {344printUnknownLineInfo(ModuleName, Printer);345return;346}347if (Error E = parseCommand(Args.getLastArgValue(OPT_obj_EQ), IsAddr2Line,348StringRef(InputString), Cmd, ModuleName, BuildID,349Symbol, Offset)) {350handleAllErrors(std::move(E), [&](const StringError &EI) {351printError(EI, InputString);352printUnknownLineInfo(ModuleName, Printer);353});354return;355}356bool ShouldInline = Args.hasFlag(OPT_inlines, OPT_no_inlines, !IsAddr2Line);357if (!BuildID.empty()) {358assert(ModuleName.empty());359if (!Args.hasArg(OPT_no_debuginfod))360enableDebuginfod(Symbolizer, Args);361std::string BuildIDStr = toHex(BuildID);362executeCommand(BuildIDStr, BuildID, Cmd, Symbol, Offset, AdjustVMA,363ShouldInline, Style, Symbolizer, Printer);364} else {365executeCommand(ModuleName, ModuleName, Cmd, Symbol, Offset, AdjustVMA,366ShouldInline, Style, Symbolizer, Printer);367}368}369370static void printHelp(StringRef ToolName, const SymbolizerOptTable &Tbl,371raw_ostream &OS) {372const char HelpText[] = " [options] addresses...";373Tbl.printHelp(OS, (ToolName + HelpText).str().c_str(),374ToolName.str().c_str());375// TODO Replace this with OptTable API once it adds extrahelp support.376OS << "\nPass @FILE as argument to read options from FILE.\n";377}378379static opt::InputArgList parseOptions(int Argc, char *Argv[], bool IsAddr2Line,380StringSaver &Saver,381SymbolizerOptTable &Tbl) {382StringRef ToolName = IsAddr2Line ? "llvm-addr2line" : "llvm-symbolizer";383// The environment variable specifies initial options which can be overridden384// by commnad line options.385Tbl.setInitialOptionsFromEnvironment(IsAddr2Line ? "LLVM_ADDR2LINE_OPTS"386: "LLVM_SYMBOLIZER_OPTS");387bool HasError = false;388opt::InputArgList Args =389Tbl.parseArgs(Argc, Argv, OPT_UNKNOWN, Saver, [&](StringRef Msg) {390errs() << ("error: " + Msg + "\n");391HasError = true;392});393if (HasError)394exit(1);395if (Args.hasArg(OPT_help)) {396printHelp(ToolName, Tbl, outs());397exit(0);398}399if (Args.hasArg(OPT_version)) {400outs() << ToolName << '\n';401cl::PrintVersionMessage();402exit(0);403}404405return Args;406}407408template <typename T>409static void parseIntArg(const opt::InputArgList &Args, int ID, T &Value) {410if (const opt::Arg *A = Args.getLastArg(ID)) {411StringRef V(A->getValue());412if (!llvm::to_integer(V, Value, 0)) {413errs() << A->getSpelling() +414": expected a non-negative integer, but got '" + V + "'";415exit(1);416}417} else {418Value = 0;419}420}421422static FunctionNameKind decideHowToPrintFunctions(const opt::InputArgList &Args,423bool IsAddr2Line) {424if (Args.hasArg(OPT_functions))425return FunctionNameKind::LinkageName;426if (const opt::Arg *A = Args.getLastArg(OPT_functions_EQ))427return StringSwitch<FunctionNameKind>(A->getValue())428.Case("none", FunctionNameKind::None)429.Case("short", FunctionNameKind::ShortName)430.Default(FunctionNameKind::LinkageName);431return IsAddr2Line ? FunctionNameKind::None : FunctionNameKind::LinkageName;432}433434static std::optional<bool> parseColorArg(const opt::InputArgList &Args) {435if (Args.hasArg(OPT_color))436return true;437if (const opt::Arg *A = Args.getLastArg(OPT_color_EQ))438return StringSwitch<std::optional<bool>>(A->getValue())439.Case("always", true)440.Case("never", false)441.Case("auto", std::nullopt);442return std::nullopt;443}444445static object::BuildID parseBuildIDArg(const opt::InputArgList &Args, int ID) {446const opt::Arg *A = Args.getLastArg(ID);447if (!A)448return {};449450StringRef V(A->getValue());451object::BuildID BuildID = parseBuildID(V);452if (BuildID.empty()) {453errs() << A->getSpelling() + ": expected a build ID, but got '" + V + "'\n";454exit(1);455}456return BuildID;457}458459// Symbolize markup from stdin and write the result to stdout.460static void filterMarkup(const opt::InputArgList &Args, LLVMSymbolizer &Symbolizer) {461MarkupFilter Filter(outs(), Symbolizer, parseColorArg(Args));462std::string InputString;463while (std::getline(std::cin, InputString)) {464InputString += '\n';465Filter.filter(std::move(InputString));466}467Filter.finish();468}469470int llvm_symbolizer_main(int argc, char **argv, const llvm::ToolContext &) {471sys::InitializeCOMRAII COM(sys::COMThreadingMode::MultiThreaded);472473ToolName = argv[0];474bool IsAddr2Line = sys::path::stem(ToolName).contains("addr2line");475BumpPtrAllocator A;476StringSaver Saver(A);477SymbolizerOptTable Tbl;478opt::InputArgList Args = parseOptions(argc, argv, IsAddr2Line, Saver, Tbl);479480LLVMSymbolizer::Options Opts;481uint64_t AdjustVMA;482PrinterConfig Config;483parseIntArg(Args, OPT_adjust_vma_EQ, AdjustVMA);484if (const opt::Arg *A = Args.getLastArg(OPT_basenames, OPT_relativenames)) {485Opts.PathStyle =486A->getOption().matches(OPT_basenames)487? DILineInfoSpecifier::FileLineInfoKind::BaseNameOnly488: DILineInfoSpecifier::FileLineInfoKind::RelativeFilePath;489} else {490Opts.PathStyle = DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath;491}492Opts.DebugFileDirectory = Args.getAllArgValues(OPT_debug_file_directory_EQ);493Opts.DefaultArch = Args.getLastArgValue(OPT_default_arch_EQ).str();494Opts.Demangle = Args.hasFlag(OPT_demangle, OPT_no_demangle, !IsAddr2Line);495Opts.DWPName = Args.getLastArgValue(OPT_dwp_EQ).str();496Opts.FallbackDebugPath =497Args.getLastArgValue(OPT_fallback_debug_path_EQ).str();498Opts.PrintFunctions = decideHowToPrintFunctions(Args, IsAddr2Line);499parseIntArg(Args, OPT_print_source_context_lines_EQ,500Config.SourceContextLines);501Opts.RelativeAddresses = Args.hasArg(OPT_relative_address);502Opts.UntagAddresses =503Args.hasFlag(OPT_untag_addresses, OPT_no_untag_addresses, !IsAddr2Line);504Opts.UseDIA = Args.hasArg(OPT_use_dia);505#if !defined(LLVM_ENABLE_DIA_SDK)506if (Opts.UseDIA) {507WithColor::warning() << "DIA not available; using native PDB reader\n";508Opts.UseDIA = false;509}510#endif511Opts.UseSymbolTable = true;512if (Args.hasArg(OPT_cache_size_EQ))513parseIntArg(Args, OPT_cache_size_EQ, Opts.MaxCacheSize);514Config.PrintAddress = Args.hasArg(OPT_addresses);515Config.PrintFunctions = Opts.PrintFunctions != FunctionNameKind::None;516Config.Pretty = Args.hasArg(OPT_pretty_print);517Config.Verbose = Args.hasArg(OPT_verbose);518519for (const opt::Arg *A : Args.filtered(OPT_dsym_hint_EQ)) {520StringRef Hint(A->getValue());521if (sys::path::extension(Hint) == ".dSYM") {522Opts.DsymHints.emplace_back(Hint);523} else {524errs() << "Warning: invalid dSYM hint: \"" << Hint525<< "\" (must have the '.dSYM' extension).\n";526}527}528529LLVMSymbolizer Symbolizer(Opts);530531if (Args.hasFlag(OPT_debuginfod, OPT_no_debuginfod, canUseDebuginfod()))532enableDebuginfod(Symbolizer, Args);533534if (Args.hasArg(OPT_filter_markup)) {535filterMarkup(Args, Symbolizer);536return 0;537}538539auto Style = IsAddr2Line ? OutputStyle::GNU : OutputStyle::LLVM;540if (const opt::Arg *A = Args.getLastArg(OPT_output_style_EQ)) {541if (strcmp(A->getValue(), "GNU") == 0)542Style = OutputStyle::GNU;543else if (strcmp(A->getValue(), "JSON") == 0)544Style = OutputStyle::JSON;545else546Style = OutputStyle::LLVM;547}548549if (Args.hasArg(OPT_build_id_EQ) && Args.hasArg(OPT_obj_EQ)) {550errs() << "error: cannot specify both --build-id and --obj\n";551return EXIT_FAILURE;552}553object::BuildID BuildID = parseBuildIDArg(Args, OPT_build_id_EQ);554555std::unique_ptr<DIPrinter> Printer;556if (Style == OutputStyle::GNU)557Printer = std::make_unique<GNUPrinter>(outs(), printError, Config);558else if (Style == OutputStyle::JSON)559Printer = std::make_unique<JSONPrinter>(outs(), Config);560else561Printer = std::make_unique<LLVMPrinter>(outs(), printError, Config);562563// When an input file is specified, exit immediately if the file cannot be564// read. If getOrCreateModuleInfo succeeds, symbolizeInput will reuse the565// cached file handle.566if (auto *Arg = Args.getLastArg(OPT_obj_EQ); Arg) {567auto Status = Symbolizer.getOrCreateModuleInfo(Arg->getValue());568if (!Status) {569Request SymRequest = {Arg->getValue(), 0, StringRef()};570handleAllErrors(Status.takeError(), [&](const ErrorInfoBase &EI) {571Printer->printError(SymRequest, EI);572});573return EXIT_FAILURE;574}575}576577std::vector<std::string> InputAddresses = Args.getAllArgValues(OPT_INPUT);578if (InputAddresses.empty()) {579const int kMaxInputStringLength = 1024;580char InputString[kMaxInputStringLength];581582while (fgets(InputString, sizeof(InputString), stdin)) {583// Strip newline characters.584std::string StrippedInputString(InputString);585llvm::erase_if(StrippedInputString,586[](char c) { return c == '\r' || c == '\n'; });587symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style,588StrippedInputString, Symbolizer, *Printer);589outs().flush();590}591} else {592Printer->listBegin();593for (StringRef Address : InputAddresses)594symbolizeInput(Args, BuildID, AdjustVMA, IsAddr2Line, Style, Address,595Symbolizer, *Printer);596Printer->listEnd();597}598599return 0;600}601602603