Path: blob/main/contrib/llvm-project/llvm/lib/Debuginfod/Debuginfod.cpp
35233 views
//===-- llvm/Debuginfod/Debuginfod.cpp - Debuginfod client library --------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7///8/// \file9///10/// This file contains several definitions for the debuginfod client and server.11/// For the client, this file defines the fetchInfo function. For the server,12/// this file defines the DebuginfodLogEntry and DebuginfodServer structs, as13/// well as the DebuginfodLog, DebuginfodCollection classes. The fetchInfo14/// function retrieves any of the three supported artifact types: (executable,15/// debuginfo, source file) associated with a build-id from debuginfod servers.16/// If a source file is to be fetched, its absolute path must be specified in17/// the Description argument to fetchInfo. The DebuginfodLogEntry,18/// DebuginfodLog, and DebuginfodCollection are used by the DebuginfodServer to19/// scan the local filesystem for binaries and serve the debuginfod protocol.20///21//===----------------------------------------------------------------------===//2223#include "llvm/Debuginfod/Debuginfod.h"24#include "llvm/ADT/StringExtras.h"25#include "llvm/ADT/StringRef.h"26#include "llvm/BinaryFormat/Magic.h"27#include "llvm/DebugInfo/DWARF/DWARFContext.h"28#include "llvm/DebugInfo/Symbolize/Symbolize.h"29#include "llvm/Debuginfod/HTTPClient.h"30#include "llvm/Object/BuildID.h"31#include "llvm/Object/ELFObjectFile.h"32#include "llvm/Support/CachePruning.h"33#include "llvm/Support/Caching.h"34#include "llvm/Support/Errc.h"35#include "llvm/Support/Error.h"36#include "llvm/Support/FileUtilities.h"37#include "llvm/Support/MemoryBuffer.h"38#include "llvm/Support/Path.h"39#include "llvm/Support/ThreadPool.h"40#include "llvm/Support/xxhash.h"4142#include <atomic>43#include <optional>44#include <thread>4546namespace llvm {4748using llvm::object::BuildIDRef;4950namespace {51std::optional<SmallVector<StringRef>> DebuginfodUrls;52// Many Readers/Single Writer lock protecting the global debuginfod URL list.53llvm::sys::RWMutex UrlsMutex;54} // namespace5556std::string getDebuginfodCacheKey(llvm::StringRef S) {57return utostr(xxh3_64bits(S));58}5960// Returns a binary BuildID as a normalized hex string.61// Uses lowercase for compatibility with common debuginfod servers.62static std::string buildIDToString(BuildIDRef ID) {63return llvm::toHex(ID, /*LowerCase=*/true);64}6566bool canUseDebuginfod() {67return HTTPClient::isAvailable() && !getDefaultDebuginfodUrls().empty();68}6970SmallVector<StringRef> getDefaultDebuginfodUrls() {71std::shared_lock<llvm::sys::RWMutex> ReadGuard(UrlsMutex);72if (!DebuginfodUrls) {73// Only read from the environment variable if the user hasn't already74// set the value.75ReadGuard.unlock();76std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);77DebuginfodUrls = SmallVector<StringRef>();78if (const char *DebuginfodUrlsEnv = std::getenv("DEBUGINFOD_URLS")) {79StringRef(DebuginfodUrlsEnv)80.split(DebuginfodUrls.value(), " ", -1, false);81}82WriteGuard.unlock();83ReadGuard.lock();84}85return DebuginfodUrls.value();86}8788// Set the default debuginfod URL list, override the environment variable.89void setDefaultDebuginfodUrls(const SmallVector<StringRef> &URLs) {90std::unique_lock<llvm::sys::RWMutex> WriteGuard(UrlsMutex);91DebuginfodUrls = URLs;92}9394/// Finds a default local file caching directory for the debuginfod client,95/// first checking DEBUGINFOD_CACHE_PATH.96Expected<std::string> getDefaultDebuginfodCacheDirectory() {97if (const char *CacheDirectoryEnv = std::getenv("DEBUGINFOD_CACHE_PATH"))98return CacheDirectoryEnv;99100SmallString<64> CacheDirectory;101if (!sys::path::cache_directory(CacheDirectory))102return createStringError(103errc::io_error, "Unable to determine appropriate cache directory.");104sys::path::append(CacheDirectory, "llvm-debuginfod", "client");105return std::string(CacheDirectory);106}107108std::chrono::milliseconds getDefaultDebuginfodTimeout() {109long Timeout;110const char *DebuginfodTimeoutEnv = std::getenv("DEBUGINFOD_TIMEOUT");111if (DebuginfodTimeoutEnv &&112to_integer(StringRef(DebuginfodTimeoutEnv).trim(), Timeout, 10))113return std::chrono::milliseconds(Timeout * 1000);114115return std::chrono::milliseconds(90 * 1000);116}117118/// The following functions fetch a debuginfod artifact to a file in a local119/// cache and return the cached file path. They first search the local cache,120/// followed by the debuginfod servers.121122std::string getDebuginfodSourceUrlPath(BuildIDRef ID,123StringRef SourceFilePath) {124SmallString<64> UrlPath;125sys::path::append(UrlPath, sys::path::Style::posix, "buildid",126buildIDToString(ID), "source",127sys::path::convert_to_slash(SourceFilePath));128return std::string(UrlPath);129}130131Expected<std::string> getCachedOrDownloadSource(BuildIDRef ID,132StringRef SourceFilePath) {133std::string UrlPath = getDebuginfodSourceUrlPath(ID, SourceFilePath);134return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath);135}136137std::string getDebuginfodExecutableUrlPath(BuildIDRef ID) {138SmallString<64> UrlPath;139sys::path::append(UrlPath, sys::path::Style::posix, "buildid",140buildIDToString(ID), "executable");141return std::string(UrlPath);142}143144Expected<std::string> getCachedOrDownloadExecutable(BuildIDRef ID) {145std::string UrlPath = getDebuginfodExecutableUrlPath(ID);146return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath);147}148149std::string getDebuginfodDebuginfoUrlPath(BuildIDRef ID) {150SmallString<64> UrlPath;151sys::path::append(UrlPath, sys::path::Style::posix, "buildid",152buildIDToString(ID), "debuginfo");153return std::string(UrlPath);154}155156Expected<std::string> getCachedOrDownloadDebuginfo(BuildIDRef ID) {157std::string UrlPath = getDebuginfodDebuginfoUrlPath(ID);158return getCachedOrDownloadArtifact(getDebuginfodCacheKey(UrlPath), UrlPath);159}160161// General fetching function.162Expected<std::string> getCachedOrDownloadArtifact(StringRef UniqueKey,163StringRef UrlPath) {164SmallString<10> CacheDir;165166Expected<std::string> CacheDirOrErr = getDefaultDebuginfodCacheDirectory();167if (!CacheDirOrErr)168return CacheDirOrErr.takeError();169CacheDir = *CacheDirOrErr;170171return getCachedOrDownloadArtifact(UniqueKey, UrlPath, CacheDir,172getDefaultDebuginfodUrls(),173getDefaultDebuginfodTimeout());174}175176namespace {177178/// A simple handler which streams the returned data to a cache file. The cache179/// file is only created if a 200 OK status is observed.180class StreamedHTTPResponseHandler : public HTTPResponseHandler {181using CreateStreamFn =182std::function<Expected<std::unique_ptr<CachedFileStream>>()>;183CreateStreamFn CreateStream;184HTTPClient &Client;185std::unique_ptr<CachedFileStream> FileStream;186187public:188StreamedHTTPResponseHandler(CreateStreamFn CreateStream, HTTPClient &Client)189: CreateStream(CreateStream), Client(Client) {}190virtual ~StreamedHTTPResponseHandler() = default;191192Error handleBodyChunk(StringRef BodyChunk) override;193};194195} // namespace196197Error StreamedHTTPResponseHandler::handleBodyChunk(StringRef BodyChunk) {198if (!FileStream) {199unsigned Code = Client.responseCode();200if (Code && Code != 200)201return Error::success();202Expected<std::unique_ptr<CachedFileStream>> FileStreamOrError =203CreateStream();204if (!FileStreamOrError)205return FileStreamOrError.takeError();206FileStream = std::move(*FileStreamOrError);207}208*FileStream->OS << BodyChunk;209return Error::success();210}211212// An over-accepting simplification of the HTTP RFC 7230 spec.213static bool isHeader(StringRef S) {214StringRef Name;215StringRef Value;216std::tie(Name, Value) = S.split(':');217if (Name.empty() || Value.empty())218return false;219return all_of(Name, [](char C) { return llvm::isPrint(C) && C != ' '; }) &&220all_of(Value, [](char C) { return llvm::isPrint(C) || C == '\t'; });221}222223static SmallVector<std::string, 0> getHeaders() {224const char *Filename = getenv("DEBUGINFOD_HEADERS_FILE");225if (!Filename)226return {};227ErrorOr<std::unique_ptr<MemoryBuffer>> HeadersFile =228MemoryBuffer::getFile(Filename, /*IsText=*/true);229if (!HeadersFile)230return {};231232SmallVector<std::string, 0> Headers;233uint64_t LineNumber = 0;234for (StringRef Line : llvm::split((*HeadersFile)->getBuffer(), '\n')) {235LineNumber++;236if (!Line.empty() && Line.back() == '\r')237Line = Line.drop_back();238if (!isHeader(Line)) {239if (!all_of(Line, llvm::isSpace))240WithColor::warning()241<< "could not parse debuginfod header: " << Filename << ':'242<< LineNumber << '\n';243continue;244}245Headers.emplace_back(Line);246}247return Headers;248}249250Expected<std::string> getCachedOrDownloadArtifact(251StringRef UniqueKey, StringRef UrlPath, StringRef CacheDirectoryPath,252ArrayRef<StringRef> DebuginfodUrls, std::chrono::milliseconds Timeout) {253SmallString<64> AbsCachedArtifactPath;254sys::path::append(AbsCachedArtifactPath, CacheDirectoryPath,255"llvmcache-" + UniqueKey);256257Expected<FileCache> CacheOrErr =258localCache("Debuginfod-client", ".debuginfod-client", CacheDirectoryPath);259if (!CacheOrErr)260return CacheOrErr.takeError();261262FileCache Cache = *CacheOrErr;263// We choose an arbitrary Task parameter as we do not make use of it.264unsigned Task = 0;265Expected<AddStreamFn> CacheAddStreamOrErr = Cache(Task, UniqueKey, "");266if (!CacheAddStreamOrErr)267return CacheAddStreamOrErr.takeError();268AddStreamFn &CacheAddStream = *CacheAddStreamOrErr;269if (!CacheAddStream)270return std::string(AbsCachedArtifactPath);271// The artifact was not found in the local cache, query the debuginfod272// servers.273if (!HTTPClient::isAvailable())274return createStringError(errc::io_error,275"No working HTTP client is available.");276277if (!HTTPClient::IsInitialized)278return createStringError(279errc::io_error,280"A working HTTP client is available, but it is not initialized. To "281"allow Debuginfod to make HTTP requests, call HTTPClient::initialize() "282"at the beginning of main.");283284HTTPClient Client;285Client.setTimeout(Timeout);286for (StringRef ServerUrl : DebuginfodUrls) {287SmallString<64> ArtifactUrl;288sys::path::append(ArtifactUrl, sys::path::Style::posix, ServerUrl, UrlPath);289290// Perform the HTTP request and if successful, write the response body to291// the cache.292{293StreamedHTTPResponseHandler Handler(294[&]() { return CacheAddStream(Task, ""); }, Client);295HTTPRequest Request(ArtifactUrl);296Request.Headers = getHeaders();297Error Err = Client.perform(Request, Handler);298if (Err)299return std::move(Err);300301unsigned Code = Client.responseCode();302if (Code && Code != 200)303continue;304}305306Expected<CachePruningPolicy> PruningPolicyOrErr =307parseCachePruningPolicy(std::getenv("DEBUGINFOD_CACHE_POLICY"));308if (!PruningPolicyOrErr)309return PruningPolicyOrErr.takeError();310pruneCache(CacheDirectoryPath, *PruningPolicyOrErr);311312// Return the path to the artifact on disk.313return std::string(AbsCachedArtifactPath);314}315316return createStringError(errc::argument_out_of_domain, "build id not found");317}318319DebuginfodLogEntry::DebuginfodLogEntry(const Twine &Message)320: Message(Message.str()) {}321322void DebuginfodLog::push(const Twine &Message) {323push(DebuginfodLogEntry(Message));324}325326void DebuginfodLog::push(DebuginfodLogEntry Entry) {327{328std::lock_guard<std::mutex> Guard(QueueMutex);329LogEntryQueue.push(Entry);330}331QueueCondition.notify_one();332}333334DebuginfodLogEntry DebuginfodLog::pop() {335{336std::unique_lock<std::mutex> Guard(QueueMutex);337// Wait for messages to be pushed into the queue.338QueueCondition.wait(Guard, [&] { return !LogEntryQueue.empty(); });339}340std::lock_guard<std::mutex> Guard(QueueMutex);341if (!LogEntryQueue.size())342llvm_unreachable("Expected message in the queue.");343344DebuginfodLogEntry Entry = LogEntryQueue.front();345LogEntryQueue.pop();346return Entry;347}348349DebuginfodCollection::DebuginfodCollection(ArrayRef<StringRef> PathsRef,350DebuginfodLog &Log,351ThreadPoolInterface &Pool,352double MinInterval)353: Log(Log), Pool(Pool), MinInterval(MinInterval) {354for (StringRef Path : PathsRef)355Paths.push_back(Path.str());356}357358Error DebuginfodCollection::update() {359std::lock_guard<sys::Mutex> Guard(UpdateMutex);360if (UpdateTimer.isRunning())361UpdateTimer.stopTimer();362UpdateTimer.clear();363for (const std::string &Path : Paths) {364Log.push("Updating binaries at path " + Path);365if (Error Err = findBinaries(Path))366return Err;367}368Log.push("Updated collection");369UpdateTimer.startTimer();370return Error::success();371}372373Expected<bool> DebuginfodCollection::updateIfStale() {374if (!UpdateTimer.isRunning())375return false;376UpdateTimer.stopTimer();377double Time = UpdateTimer.getTotalTime().getWallTime();378UpdateTimer.startTimer();379if (Time < MinInterval)380return false;381if (Error Err = update())382return std::move(Err);383return true;384}385386Error DebuginfodCollection::updateForever(std::chrono::milliseconds Interval) {387while (true) {388if (Error Err = update())389return Err;390std::this_thread::sleep_for(Interval);391}392llvm_unreachable("updateForever loop should never end");393}394395static bool hasELFMagic(StringRef FilePath) {396file_magic Type;397std::error_code EC = identify_magic(FilePath, Type);398if (EC)399return false;400switch (Type) {401case file_magic::elf:402case file_magic::elf_relocatable:403case file_magic::elf_executable:404case file_magic::elf_shared_object:405case file_magic::elf_core:406return true;407default:408return false;409}410}411412Error DebuginfodCollection::findBinaries(StringRef Path) {413std::error_code EC;414sys::fs::recursive_directory_iterator I(Twine(Path), EC), E;415std::mutex IteratorMutex;416ThreadPoolTaskGroup IteratorGroup(Pool);417for (unsigned WorkerIndex = 0; WorkerIndex < Pool.getMaxConcurrency();418WorkerIndex++) {419IteratorGroup.async([&, this]() -> void {420std::string FilePath;421while (true) {422{423// Check if iteration is over or there is an error during iteration424std::lock_guard<std::mutex> Guard(IteratorMutex);425if (I == E || EC)426return;427// Grab a file path from the directory iterator and advance the428// iterator.429FilePath = I->path();430I.increment(EC);431}432433// Inspect the file at this path to determine if it is debuginfo.434if (!hasELFMagic(FilePath))435continue;436437Expected<object::OwningBinary<object::Binary>> BinOrErr =438object::createBinary(FilePath);439440if (!BinOrErr) {441consumeError(BinOrErr.takeError());442continue;443}444object::Binary *Bin = std::move(BinOrErr.get().getBinary());445if (!Bin->isObject())446continue;447448// TODO: Support non-ELF binaries449object::ELFObjectFileBase *Object =450dyn_cast<object::ELFObjectFileBase>(Bin);451if (!Object)452continue;453454BuildIDRef ID = getBuildID(Object);455if (ID.empty())456continue;457458std::string IDString = buildIDToString(ID);459if (Object->hasDebugInfo()) {460std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);461(void)DebugBinaries.try_emplace(IDString, std::move(FilePath));462} else {463std::lock_guard<sys::RWMutex> BinariesGuard(BinariesMutex);464(void)Binaries.try_emplace(IDString, std::move(FilePath));465}466}467});468}469IteratorGroup.wait();470std::unique_lock<std::mutex> Guard(IteratorMutex);471if (EC)472return errorCodeToError(EC);473return Error::success();474}475476Expected<std::optional<std::string>>477DebuginfodCollection::getBinaryPath(BuildIDRef ID) {478Log.push("getting binary path of ID " + buildIDToString(ID));479std::shared_lock<sys::RWMutex> Guard(BinariesMutex);480auto Loc = Binaries.find(buildIDToString(ID));481if (Loc != Binaries.end()) {482std::string Path = Loc->getValue();483return Path;484}485return std::nullopt;486}487488Expected<std::optional<std::string>>489DebuginfodCollection::getDebugBinaryPath(BuildIDRef ID) {490Log.push("getting debug binary path of ID " + buildIDToString(ID));491std::shared_lock<sys::RWMutex> Guard(DebugBinariesMutex);492auto Loc = DebugBinaries.find(buildIDToString(ID));493if (Loc != DebugBinaries.end()) {494std::string Path = Loc->getValue();495return Path;496}497return std::nullopt;498}499500Expected<std::string> DebuginfodCollection::findBinaryPath(BuildIDRef ID) {501{502// Check collection; perform on-demand update if stale.503Expected<std::optional<std::string>> PathOrErr = getBinaryPath(ID);504if (!PathOrErr)505return PathOrErr.takeError();506std::optional<std::string> Path = *PathOrErr;507if (!Path) {508Expected<bool> UpdatedOrErr = updateIfStale();509if (!UpdatedOrErr)510return UpdatedOrErr.takeError();511if (*UpdatedOrErr) {512// Try once more.513PathOrErr = getBinaryPath(ID);514if (!PathOrErr)515return PathOrErr.takeError();516Path = *PathOrErr;517}518}519if (Path)520return *Path;521}522523// Try federation.524Expected<std::string> PathOrErr = getCachedOrDownloadExecutable(ID);525if (!PathOrErr)526consumeError(PathOrErr.takeError());527528// Fall back to debug binary.529return findDebugBinaryPath(ID);530}531532Expected<std::string> DebuginfodCollection::findDebugBinaryPath(BuildIDRef ID) {533// Check collection; perform on-demand update if stale.534Expected<std::optional<std::string>> PathOrErr = getDebugBinaryPath(ID);535if (!PathOrErr)536return PathOrErr.takeError();537std::optional<std::string> Path = *PathOrErr;538if (!Path) {539Expected<bool> UpdatedOrErr = updateIfStale();540if (!UpdatedOrErr)541return UpdatedOrErr.takeError();542if (*UpdatedOrErr) {543// Try once more.544PathOrErr = getBinaryPath(ID);545if (!PathOrErr)546return PathOrErr.takeError();547Path = *PathOrErr;548}549}550if (Path)551return *Path;552553// Try federation.554return getCachedOrDownloadDebuginfo(ID);555}556557DebuginfodServer::DebuginfodServer(DebuginfodLog &Log,558DebuginfodCollection &Collection)559: Log(Log), Collection(Collection) {560cantFail(561Server.get(R"(/buildid/(.*)/debuginfo)", [&](HTTPServerRequest Request) {562Log.push("GET " + Request.UrlPath);563std::string IDString;564if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {565Request.setResponse(566{404, "text/plain", "Build ID is not a hex string\n"});567return;568}569object::BuildID ID(IDString.begin(), IDString.end());570Expected<std::string> PathOrErr = Collection.findDebugBinaryPath(ID);571if (Error Err = PathOrErr.takeError()) {572consumeError(std::move(Err));573Request.setResponse({404, "text/plain", "Build ID not found\n"});574return;575}576streamFile(Request, *PathOrErr);577}));578cantFail(579Server.get(R"(/buildid/(.*)/executable)", [&](HTTPServerRequest Request) {580Log.push("GET " + Request.UrlPath);581std::string IDString;582if (!tryGetFromHex(Request.UrlPathMatches[0], IDString)) {583Request.setResponse(584{404, "text/plain", "Build ID is not a hex string\n"});585return;586}587object::BuildID ID(IDString.begin(), IDString.end());588Expected<std::string> PathOrErr = Collection.findBinaryPath(ID);589if (Error Err = PathOrErr.takeError()) {590consumeError(std::move(Err));591Request.setResponse({404, "text/plain", "Build ID not found\n"});592return;593}594streamFile(Request, *PathOrErr);595}));596}597598} // namespace llvm599600601