Path: blob/main/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
35266 views
//===- GsymReader.cpp -----------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "llvm/DebugInfo/GSYM/GsymReader.h"910#include <assert.h>11#include <inttypes.h>12#include <stdio.h>13#include <stdlib.h>1415#include "llvm/DebugInfo/GSYM/GsymCreator.h"16#include "llvm/DebugInfo/GSYM/InlineInfo.h"17#include "llvm/DebugInfo/GSYM/LineTable.h"18#include "llvm/Support/BinaryStreamReader.h"19#include "llvm/Support/DataExtractor.h"20#include "llvm/Support/MemoryBuffer.h"2122using namespace llvm;23using namespace gsym;2425GsymReader::GsymReader(std::unique_ptr<MemoryBuffer> Buffer)26: MemBuffer(std::move(Buffer)), Endian(llvm::endianness::native) {}2728GsymReader::GsymReader(GsymReader &&RHS) = default;2930GsymReader::~GsymReader() = default;3132llvm::Expected<GsymReader> GsymReader::openFile(StringRef Filename) {33// Open the input file and return an appropriate error if needed.34ErrorOr<std::unique_ptr<MemoryBuffer>> BuffOrErr =35MemoryBuffer::getFileOrSTDIN(Filename);36auto Err = BuffOrErr.getError();37if (Err)38return llvm::errorCodeToError(Err);39return create(BuffOrErr.get());40}4142llvm::Expected<GsymReader> GsymReader::copyBuffer(StringRef Bytes) {43auto MemBuffer = MemoryBuffer::getMemBufferCopy(Bytes, "GSYM bytes");44return create(MemBuffer);45}4647llvm::Expected<llvm::gsym::GsymReader>48GsymReader::create(std::unique_ptr<MemoryBuffer> &MemBuffer) {49if (!MemBuffer)50return createStringError(std::errc::invalid_argument,51"invalid memory buffer");52GsymReader GR(std::move(MemBuffer));53llvm::Error Err = GR.parse();54if (Err)55return std::move(Err);56return std::move(GR);57}5859llvm::Error60GsymReader::parse() {61BinaryStreamReader FileData(MemBuffer->getBuffer(), llvm::endianness::native);62// Check for the magic bytes. This file format is designed to be mmap'ed63// into a process and accessed as read only. This is done for performance64// and efficiency for symbolicating and parsing GSYM data.65if (FileData.readObject(Hdr))66return createStringError(std::errc::invalid_argument,67"not enough data for a GSYM header");6869const auto HostByteOrder = llvm::endianness::native;70switch (Hdr->Magic) {71case GSYM_MAGIC:72Endian = HostByteOrder;73break;74case GSYM_CIGAM:75// This is a GSYM file, but not native endianness.76Endian = sys::IsBigEndianHost ? llvm::endianness::little77: llvm::endianness::big;78Swap.reset(new SwappedData);79break;80default:81return createStringError(std::errc::invalid_argument,82"not a GSYM file");83}8485bool DataIsLittleEndian = HostByteOrder != llvm::endianness::little;86// Read a correctly byte swapped header if we need to.87if (Swap) {88DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);89if (auto ExpectedHdr = Header::decode(Data))90Swap->Hdr = ExpectedHdr.get();91else92return ExpectedHdr.takeError();93Hdr = &Swap->Hdr;94}9596// Detect errors in the header and report any that are found. If we make it97// past this without errors, we know we have a good magic value, a supported98// version number, verified address offset size and a valid UUID size.99if (Error Err = Hdr->checkForError())100return Err;101102if (!Swap) {103// This is the native endianness case that is most common and optimized for104// efficient lookups. Here we just grab pointers to the native data and105// use ArrayRef objects to allow efficient read only access.106107// Read the address offsets.108if (FileData.padToAlignment(Hdr->AddrOffSize) ||109FileData.readArray(AddrOffsets,110Hdr->NumAddresses * Hdr->AddrOffSize))111return createStringError(std::errc::invalid_argument,112"failed to read address table");113114// Read the address info offsets.115if (FileData.padToAlignment(4) ||116FileData.readArray(AddrInfoOffsets, Hdr->NumAddresses))117return createStringError(std::errc::invalid_argument,118"failed to read address info offsets table");119120// Read the file table.121uint32_t NumFiles = 0;122if (FileData.readInteger(NumFiles) || FileData.readArray(Files, NumFiles))123return createStringError(std::errc::invalid_argument,124"failed to read file table");125126// Get the string table.127FileData.setOffset(Hdr->StrtabOffset);128if (FileData.readFixedString(StrTab.Data, Hdr->StrtabSize))129return createStringError(std::errc::invalid_argument,130"failed to read string table");131} else {132// This is the non native endianness case that is not common and not133// optimized for lookups. Here we decode the important tables into local134// storage and then set the ArrayRef objects to point to these swapped135// copies of the read only data so lookups can be as efficient as possible.136DataExtractor Data(MemBuffer->getBuffer(), DataIsLittleEndian, 4);137138// Read the address offsets.139uint64_t Offset = alignTo(sizeof(Header), Hdr->AddrOffSize);140Swap->AddrOffsets.resize(Hdr->NumAddresses * Hdr->AddrOffSize);141switch (Hdr->AddrOffSize) {142case 1:143if (!Data.getU8(&Offset, Swap->AddrOffsets.data(), Hdr->NumAddresses))144return createStringError(std::errc::invalid_argument,145"failed to read address table");146break;147case 2:148if (!Data.getU16(&Offset,149reinterpret_cast<uint16_t *>(Swap->AddrOffsets.data()),150Hdr->NumAddresses))151return createStringError(std::errc::invalid_argument,152"failed to read address table");153break;154case 4:155if (!Data.getU32(&Offset,156reinterpret_cast<uint32_t *>(Swap->AddrOffsets.data()),157Hdr->NumAddresses))158return createStringError(std::errc::invalid_argument,159"failed to read address table");160break;161case 8:162if (!Data.getU64(&Offset,163reinterpret_cast<uint64_t *>(Swap->AddrOffsets.data()),164Hdr->NumAddresses))165return createStringError(std::errc::invalid_argument,166"failed to read address table");167}168AddrOffsets = ArrayRef<uint8_t>(Swap->AddrOffsets);169170// Read the address info offsets.171Offset = alignTo(Offset, 4);172Swap->AddrInfoOffsets.resize(Hdr->NumAddresses);173if (Data.getU32(&Offset, Swap->AddrInfoOffsets.data(), Hdr->NumAddresses))174AddrInfoOffsets = ArrayRef<uint32_t>(Swap->AddrInfoOffsets);175else176return createStringError(std::errc::invalid_argument,177"failed to read address table");178// Read the file table.179const uint32_t NumFiles = Data.getU32(&Offset);180if (NumFiles > 0) {181Swap->Files.resize(NumFiles);182if (Data.getU32(&Offset, &Swap->Files[0].Dir, NumFiles*2))183Files = ArrayRef<FileEntry>(Swap->Files);184else185return createStringError(std::errc::invalid_argument,186"failed to read file table");187}188// Get the string table.189StrTab.Data = MemBuffer->getBuffer().substr(Hdr->StrtabOffset,190Hdr->StrtabSize);191if (StrTab.Data.empty())192return createStringError(std::errc::invalid_argument,193"failed to read string table");194}195return Error::success();196197}198199const Header &GsymReader::getHeader() const {200// The only way to get a GsymReader is from GsymReader::openFile(...) or201// GsymReader::copyBuffer() and the header must be valid and initialized to202// a valid pointer value, so the assert below should not trigger.203assert(Hdr);204return *Hdr;205}206207std::optional<uint64_t> GsymReader::getAddress(size_t Index) const {208switch (Hdr->AddrOffSize) {209case 1: return addressForIndex<uint8_t>(Index);210case 2: return addressForIndex<uint16_t>(Index);211case 4: return addressForIndex<uint32_t>(Index);212case 8: return addressForIndex<uint64_t>(Index);213}214return std::nullopt;215}216217std::optional<uint64_t> GsymReader::getAddressInfoOffset(size_t Index) const {218const auto NumAddrInfoOffsets = AddrInfoOffsets.size();219if (Index < NumAddrInfoOffsets)220return AddrInfoOffsets[Index];221return std::nullopt;222}223224Expected<uint64_t>225GsymReader::getAddressIndex(const uint64_t Addr) const {226if (Addr >= Hdr->BaseAddress) {227const uint64_t AddrOffset = Addr - Hdr->BaseAddress;228std::optional<uint64_t> AddrOffsetIndex;229switch (Hdr->AddrOffSize) {230case 1:231AddrOffsetIndex = getAddressOffsetIndex<uint8_t>(AddrOffset);232break;233case 2:234AddrOffsetIndex = getAddressOffsetIndex<uint16_t>(AddrOffset);235break;236case 4:237AddrOffsetIndex = getAddressOffsetIndex<uint32_t>(AddrOffset);238break;239case 8:240AddrOffsetIndex = getAddressOffsetIndex<uint64_t>(AddrOffset);241break;242default:243return createStringError(std::errc::invalid_argument,244"unsupported address offset size %u",245Hdr->AddrOffSize);246}247if (AddrOffsetIndex)248return *AddrOffsetIndex;249}250return createStringError(std::errc::invalid_argument,251"address 0x%" PRIx64 " is not in GSYM", Addr);252253}254255llvm::Expected<DataExtractor>256GsymReader::getFunctionInfoDataForAddress(uint64_t Addr,257uint64_t &FuncStartAddr) const {258Expected<uint64_t> ExpectedAddrIdx = getAddressIndex(Addr);259if (!ExpectedAddrIdx)260return ExpectedAddrIdx.takeError();261const uint64_t FirstAddrIdx = *ExpectedAddrIdx;262// The AddrIdx is the first index of the function info entries that match263// \a Addr. We need to iterate over all function info objects that start with264// the same address until we find a range that contains \a Addr.265std::optional<uint64_t> FirstFuncStartAddr;266const size_t NumAddresses = getNumAddresses();267for (uint64_t AddrIdx = FirstAddrIdx; AddrIdx < NumAddresses; ++AddrIdx) {268auto ExpextedData = getFunctionInfoDataAtIndex(AddrIdx, FuncStartAddr);269// If there was an error, return the error.270if (!ExpextedData)271return ExpextedData;272273// Remember the first function start address if it hasn't already been set.274// If it is already valid, check to see if it matches the first function275// start address and only continue if it matches.276if (FirstFuncStartAddr.has_value()) {277if (*FirstFuncStartAddr != FuncStartAddr)278break; // Done with consecutive function entries with same address.279} else {280FirstFuncStartAddr = FuncStartAddr;281}282// Make sure the current function address ranges contains \a Addr.283// Some symbols on Darwin don't have valid sizes, so if we run into a284// symbol with zero size, then we have found a match for our address.285286// The first thing the encoding of a FunctionInfo object is the function287// size.288uint64_t Offset = 0;289uint32_t FuncSize = ExpextedData->getU32(&Offset);290if (FuncSize == 0 ||291AddressRange(FuncStartAddr, FuncStartAddr + FuncSize).contains(Addr))292return ExpextedData;293}294return createStringError(std::errc::invalid_argument,295"address 0x%" PRIx64 " is not in GSYM", Addr);296}297298llvm::Expected<DataExtractor>299GsymReader::getFunctionInfoDataAtIndex(uint64_t AddrIdx,300uint64_t &FuncStartAddr) const {301if (AddrIdx >= getNumAddresses())302return createStringError(std::errc::invalid_argument,303"invalid address index %" PRIu64, AddrIdx);304const uint32_t AddrInfoOffset = AddrInfoOffsets[AddrIdx];305assert((Endian == endianness::big || Endian == endianness::little) &&306"Endian must be either big or little");307StringRef Bytes = MemBuffer->getBuffer().substr(AddrInfoOffset);308if (Bytes.empty())309return createStringError(std::errc::invalid_argument,310"invalid address info offset 0x%" PRIx32,311AddrInfoOffset);312std::optional<uint64_t> OptFuncStartAddr = getAddress(AddrIdx);313if (!OptFuncStartAddr)314return createStringError(std::errc::invalid_argument,315"failed to extract address[%" PRIu64 "]", AddrIdx);316FuncStartAddr = *OptFuncStartAddr;317return DataExtractor(Bytes, Endian == llvm::endianness::little, 4);318}319320llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {321uint64_t FuncStartAddr = 0;322if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))323return FunctionInfo::decode(*ExpectedData, FuncStartAddr);324else325return ExpectedData.takeError();326}327328llvm::Expected<FunctionInfo>329GsymReader::getFunctionInfoAtIndex(uint64_t Idx) const {330uint64_t FuncStartAddr = 0;331if (auto ExpectedData = getFunctionInfoDataAtIndex(Idx, FuncStartAddr))332return FunctionInfo::decode(*ExpectedData, FuncStartAddr);333else334return ExpectedData.takeError();335}336337llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {338uint64_t FuncStartAddr = 0;339if (auto ExpectedData = getFunctionInfoDataForAddress(Addr, FuncStartAddr))340return FunctionInfo::lookup(*ExpectedData, *this, FuncStartAddr, Addr);341else342return ExpectedData.takeError();343}344345void GsymReader::dump(raw_ostream &OS) {346const auto &Header = getHeader();347// Dump the GSYM header.348OS << Header << "\n";349// Dump the address table.350OS << "Address Table:\n";351OS << "INDEX OFFSET";352353switch (Hdr->AddrOffSize) {354case 1: OS << "8 "; break;355case 2: OS << "16"; break;356case 4: OS << "32"; break;357case 8: OS << "64"; break;358default: OS << "??"; break;359}360OS << " (ADDRESS)\n";361OS << "====== =============================== \n";362for (uint32_t I = 0; I < Header.NumAddresses; ++I) {363OS << format("[%4u] ", I);364switch (Hdr->AddrOffSize) {365case 1: OS << HEX8(getAddrOffsets<uint8_t>()[I]); break;366case 2: OS << HEX16(getAddrOffsets<uint16_t>()[I]); break;367case 4: OS << HEX32(getAddrOffsets<uint32_t>()[I]); break;368case 8: OS << HEX32(getAddrOffsets<uint64_t>()[I]); break;369default: break;370}371OS << " (" << HEX64(*getAddress(I)) << ")\n";372}373// Dump the address info offsets table.374OS << "\nAddress Info Offsets:\n";375OS << "INDEX Offset\n";376OS << "====== ==========\n";377for (uint32_t I = 0; I < Header.NumAddresses; ++I)378OS << format("[%4u] ", I) << HEX32(AddrInfoOffsets[I]) << "\n";379// Dump the file table.380OS << "\nFiles:\n";381OS << "INDEX DIRECTORY BASENAME PATH\n";382OS << "====== ========== ========== ==============================\n";383for (uint32_t I = 0; I < Files.size(); ++I) {384OS << format("[%4u] ", I) << HEX32(Files[I].Dir) << ' '385<< HEX32(Files[I].Base) << ' ';386dump(OS, getFile(I));387OS << "\n";388}389OS << "\n" << StrTab << "\n";390391for (uint32_t I = 0; I < Header.NumAddresses; ++I) {392OS << "FunctionInfo @ " << HEX32(AddrInfoOffsets[I]) << ": ";393if (auto FI = getFunctionInfoAtIndex(I))394dump(OS, *FI);395else396logAllUnhandledErrors(FI.takeError(), OS, "FunctionInfo:");397}398}399400void GsymReader::dump(raw_ostream &OS, const FunctionInfo &FI) {401OS << FI.Range << " \"" << getString(FI.Name) << "\"\n";402if (FI.OptLineTable)403dump(OS, *FI.OptLineTable);404if (FI.Inline)405dump(OS, *FI.Inline);406}407408void GsymReader::dump(raw_ostream &OS, const LineTable <) {409OS << "LineTable:\n";410for (auto &LE: LT) {411OS << " " << HEX64(LE.Addr) << ' ';412if (LE.File)413dump(OS, getFile(LE.File));414OS << ':' << LE.Line << '\n';415}416}417418void GsymReader::dump(raw_ostream &OS, const InlineInfo &II, uint32_t Indent) {419if (Indent == 0)420OS << "InlineInfo:\n";421else422OS.indent(Indent);423OS << II.Ranges << ' ' << getString(II.Name);424if (II.CallFile != 0) {425if (auto File = getFile(II.CallFile)) {426OS << " called from ";427dump(OS, File);428OS << ':' << II.CallLine;429}430}431OS << '\n';432for (const auto &ChildII: II.Children)433dump(OS, ChildII, Indent + 2);434}435436void GsymReader::dump(raw_ostream &OS, std::optional<FileEntry> FE) {437if (FE) {438// IF we have the file from index 0, then don't print anything439if (FE->Dir == 0 && FE->Base == 0)440return;441StringRef Dir = getString(FE->Dir);442StringRef Base = getString(FE->Base);443if (!Dir.empty()) {444OS << Dir;445if (Dir.contains('\\') && !Dir.contains('/'))446OS << '\\';447else448OS << '/';449}450if (!Base.empty()) {451OS << Base;452}453if (!Dir.empty() || !Base.empty())454return;455}456OS << "<invalid-file>";457}458459460