Path: blob/main/contrib/llvm-project/llvm/lib/DebugInfo/GSYM/GsymCreator.cpp
35269 views
//===- GsymCreator.cpp ----------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//===----------------------------------------------------------------------===//67#include "llvm/DebugInfo/GSYM/GsymCreator.h"8#include "llvm/DebugInfo/GSYM/FileWriter.h"9#include "llvm/DebugInfo/GSYM/Header.h"10#include "llvm/DebugInfo/GSYM/LineTable.h"11#include "llvm/DebugInfo/GSYM/OutputAggregator.h"12#include "llvm/MC/StringTableBuilder.h"13#include "llvm/Support/raw_ostream.h"1415#include <algorithm>16#include <cassert>17#include <functional>18#include <vector>1920using namespace llvm;21using namespace gsym;2223GsymCreator::GsymCreator(bool Quiet)24: StrTab(StringTableBuilder::ELF), Quiet(Quiet) {25insertFile(StringRef());26}2728uint32_t GsymCreator::insertFile(StringRef Path, llvm::sys::path::Style Style) {29llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);30llvm::StringRef filename = llvm::sys::path::filename(Path, Style);31// We must insert the strings first, then call the FileEntry constructor.32// If we inline the insertString() function call into the constructor, the33// call order is undefined due to parameter lists not having any ordering34// requirements.35const uint32_t Dir = insertString(directory);36const uint32_t Base = insertString(filename);37return insertFileEntry(FileEntry(Dir, Base));38}3940uint32_t GsymCreator::insertFileEntry(FileEntry FE) {41std::lock_guard<std::mutex> Guard(Mutex);42const auto NextIndex = Files.size();43// Find FE in hash map and insert if not present.44auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));45if (R.second)46Files.emplace_back(FE);47return R.first->second;48}4950uint32_t GsymCreator::copyFile(const GsymCreator &SrcGC, uint32_t FileIdx) {51// File index zero is reserved for a FileEntry with no directory and no52// filename. Any other file and we need to copy the strings for the directory53// and filename.54if (FileIdx == 0)55return 0;56const FileEntry SrcFE = SrcGC.Files[FileIdx];57// Copy the strings for the file and then add the newly converted file entry.58uint32_t Dir =59SrcFE.Dir == 060? 061: StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Dir)->second);62uint32_t Base = StrTab.add(SrcGC.StringOffsetMap.find(SrcFE.Base)->second);63FileEntry DstFE(Dir, Base);64return insertFileEntry(DstFE);65}6667llvm::Error GsymCreator::save(StringRef Path, llvm::endianness ByteOrder,68std::optional<uint64_t> SegmentSize) const {69if (SegmentSize)70return saveSegments(Path, ByteOrder, *SegmentSize);71std::error_code EC;72raw_fd_ostream OutStrm(Path, EC);73if (EC)74return llvm::errorCodeToError(EC);75FileWriter O(OutStrm, ByteOrder);76return encode(O);77}7879llvm::Error GsymCreator::encode(FileWriter &O) const {80std::lock_guard<std::mutex> Guard(Mutex);81if (Funcs.empty())82return createStringError(std::errc::invalid_argument,83"no functions to encode");84if (!Finalized)85return createStringError(std::errc::invalid_argument,86"GsymCreator wasn't finalized prior to encoding");8788if (Funcs.size() > UINT32_MAX)89return createStringError(std::errc::invalid_argument,90"too many FunctionInfos");9192std::optional<uint64_t> BaseAddress = getBaseAddress();93// Base address should be valid if we have any functions.94if (!BaseAddress)95return createStringError(std::errc::invalid_argument,96"invalid base address");97Header Hdr;98Hdr.Magic = GSYM_MAGIC;99Hdr.Version = GSYM_VERSION;100Hdr.AddrOffSize = getAddressOffsetSize();101Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());102Hdr.BaseAddress = *BaseAddress;103Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());104Hdr.StrtabOffset = 0; // We will fix this up later.105Hdr.StrtabSize = 0; // We will fix this up later.106memset(Hdr.UUID, 0, sizeof(Hdr.UUID));107if (UUID.size() > sizeof(Hdr.UUID))108return createStringError(std::errc::invalid_argument,109"invalid UUID size %u", (uint32_t)UUID.size());110// Copy the UUID value if we have one.111if (UUID.size() > 0)112memcpy(Hdr.UUID, UUID.data(), UUID.size());113// Write out the header.114llvm::Error Err = Hdr.encode(O);115if (Err)116return Err;117118const uint64_t MaxAddressOffset = getMaxAddressOffset();119// Write out the address offsets.120O.alignTo(Hdr.AddrOffSize);121for (const auto &FuncInfo : Funcs) {122uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;123// Make sure we calculated the address offsets byte size correctly by124// verifying the current address offset is within ranges. We have seen bugs125// introduced when the code changes that can cause problems here so it is126// good to catch this during testing.127assert(AddrOffset <= MaxAddressOffset);128(void)MaxAddressOffset;129switch (Hdr.AddrOffSize) {130case 1:131O.writeU8(static_cast<uint8_t>(AddrOffset));132break;133case 2:134O.writeU16(static_cast<uint16_t>(AddrOffset));135break;136case 4:137O.writeU32(static_cast<uint32_t>(AddrOffset));138break;139case 8:140O.writeU64(AddrOffset);141break;142}143}144145// Write out all zeros for the AddrInfoOffsets.146O.alignTo(4);147const off_t AddrInfoOffsetsOffset = O.tell();148for (size_t i = 0, n = Funcs.size(); i < n; ++i)149O.writeU32(0);150151// Write out the file table152O.alignTo(4);153assert(!Files.empty());154assert(Files[0].Dir == 0);155assert(Files[0].Base == 0);156size_t NumFiles = Files.size();157if (NumFiles > UINT32_MAX)158return createStringError(std::errc::invalid_argument, "too many files");159O.writeU32(static_cast<uint32_t>(NumFiles));160for (auto File : Files) {161O.writeU32(File.Dir);162O.writeU32(File.Base);163}164165// Write out the string table.166const off_t StrtabOffset = O.tell();167StrTab.write(O.get_stream());168const off_t StrtabSize = O.tell() - StrtabOffset;169std::vector<uint32_t> AddrInfoOffsets;170171// Write out the address infos for each function info.172for (const auto &FuncInfo : Funcs) {173if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))174AddrInfoOffsets.push_back(OffsetOrErr.get());175else176return OffsetOrErr.takeError();177}178// Fixup the string table offset and size in the header179O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));180O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));181182// Fixup all address info offsets183uint64_t Offset = 0;184for (auto AddrInfoOffset : AddrInfoOffsets) {185O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);186Offset += 4;187}188return ErrorSuccess();189}190191llvm::Error GsymCreator::finalize(OutputAggregator &Out) {192std::lock_guard<std::mutex> Guard(Mutex);193if (Finalized)194return createStringError(std::errc::invalid_argument, "already finalized");195Finalized = true;196197// Don't let the string table indexes change by finalizing in order.198StrTab.finalizeInOrder();199200// Remove duplicates function infos that have both entries from debug info201// (DWARF or Breakpad) and entries from the SymbolTable.202//203// Also handle overlapping function. Usually there shouldn't be any, but they204// can and do happen in some rare cases.205//206// (a) (b) (c)207// ^ ^ ^ ^208// |X |Y |X ^ |X209// | | | |Y | ^210// | | | v v |Y211// v v v v212//213// In (a) and (b), Y is ignored and X will be reported for the full range.214// In (c), both functions will be included in the result and lookups for an215// address in the intersection will return Y because of binary search.216//217// Note that in case of (b), we cannot include Y in the result because then218// we wouldn't find any function for range (end of Y, end of X)219// with binary search220221const auto NumBefore = Funcs.size();222// Only sort and unique if this isn't a segment. If this is a segment we223// already finalized the main GsymCreator with all of the function infos224// and then the already sorted and uniqued function infos were added to this225// object.226if (!IsSegment) {227if (NumBefore > 1) {228// Sort function infos so we can emit sorted functions.229llvm::sort(Funcs);230std::vector<FunctionInfo> FinalizedFuncs;231FinalizedFuncs.reserve(Funcs.size());232FinalizedFuncs.emplace_back(std::move(Funcs.front()));233for (size_t Idx=1; Idx < NumBefore; ++Idx) {234FunctionInfo &Prev = FinalizedFuncs.back();235FunctionInfo &Curr = Funcs[Idx];236// Empty ranges won't intersect, but we still need to237// catch the case where we have multiple symbols at the238// same address and coalesce them.239const bool ranges_equal = Prev.Range == Curr.Range;240if (ranges_equal || Prev.Range.intersects(Curr.Range)) {241// Overlapping ranges or empty identical ranges.242if (ranges_equal) {243// Same address range. Check if one is from debug244// info and the other is from a symbol table. If245// so, then keep the one with debug info. Our246// sorting guarantees that entries with matching247// address ranges that have debug info are last in248// the sort.249if (!(Prev == Curr)) {250if (Prev.hasRichInfo() && Curr.hasRichInfo())251Out.Report(252"Duplicate address ranges with different debug info.",253[&](raw_ostream &OS) {254OS << "warning: same address range contains "255"different debug "256<< "info. Removing:\n"257<< Prev << "\nIn favor of this one:\n"258<< Curr << "\n";259});260261// We want to swap the current entry with the previous since262// later entries with the same range always have more debug info263// or different debug info.264std::swap(Prev, Curr);265}266} else {267Out.Report("Overlapping function ranges", [&](raw_ostream &OS) {268// print warnings about overlaps269OS << "warning: function ranges overlap:\n"270<< Prev << "\n"271<< Curr << "\n";272});273FinalizedFuncs.emplace_back(std::move(Curr));274}275} else {276if (Prev.Range.size() == 0 && Curr.Range.contains(Prev.Range.start())) {277// Symbols on macOS don't have address ranges, so if the range278// doesn't match and the size is zero, then we replace the empty279// symbol function info with the current one.280std::swap(Prev, Curr);281} else {282FinalizedFuncs.emplace_back(std::move(Curr));283}284}285}286std::swap(Funcs, FinalizedFuncs);287}288// If our last function info entry doesn't have a size and if we have valid289// text ranges, we should set the size of the last entry since any search for290// a high address might match our last entry. By fixing up this size, we can291// help ensure we don't cause lookups to always return the last symbol that292// has no size when doing lookups.293if (!Funcs.empty() && Funcs.back().Range.size() == 0 && ValidTextRanges) {294if (auto Range =295ValidTextRanges->getRangeThatContains(Funcs.back().Range.start())) {296Funcs.back().Range = {Funcs.back().Range.start(), Range->end()};297}298}299Out << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "300<< Funcs.size() << " total\n";301}302return Error::success();303}304305uint32_t GsymCreator::copyString(const GsymCreator &SrcGC, uint32_t StrOff) {306// String offset at zero is always the empty string, no copying needed.307if (StrOff == 0)308return 0;309return StrTab.add(SrcGC.StringOffsetMap.find(StrOff)->second);310}311312uint32_t GsymCreator::insertString(StringRef S, bool Copy) {313if (S.empty())314return 0;315316// The hash can be calculated outside the lock.317CachedHashStringRef CHStr(S);318std::lock_guard<std::mutex> Guard(Mutex);319if (Copy) {320// We need to provide backing storage for the string if requested321// since StringTableBuilder stores references to strings. Any string322// that comes from a section in an object file doesn't need to be323// copied, but any string created by code will need to be copied.324// This allows GsymCreator to be really fast when parsing DWARF and325// other object files as most strings don't need to be copied.326if (!StrTab.contains(CHStr))327CHStr = CachedHashStringRef{StringStorage.insert(S).first->getKey(),328CHStr.hash()};329}330const uint32_t StrOff = StrTab.add(CHStr);331// Save a mapping of string offsets to the cached string reference in case332// we need to segment the GSYM file and copy string from one string table to333// another.334if (StringOffsetMap.count(StrOff) == 0)335StringOffsetMap.insert(std::make_pair(StrOff, CHStr));336return StrOff;337}338339void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {340std::lock_guard<std::mutex> Guard(Mutex);341Funcs.emplace_back(std::move(FI));342}343344void GsymCreator::forEachFunctionInfo(345std::function<bool(FunctionInfo &)> const &Callback) {346std::lock_guard<std::mutex> Guard(Mutex);347for (auto &FI : Funcs) {348if (!Callback(FI))349break;350}351}352353void GsymCreator::forEachFunctionInfo(354std::function<bool(const FunctionInfo &)> const &Callback) const {355std::lock_guard<std::mutex> Guard(Mutex);356for (const auto &FI : Funcs) {357if (!Callback(FI))358break;359}360}361362size_t GsymCreator::getNumFunctionInfos() const {363std::lock_guard<std::mutex> Guard(Mutex);364return Funcs.size();365}366367bool GsymCreator::IsValidTextAddress(uint64_t Addr) const {368if (ValidTextRanges)369return ValidTextRanges->contains(Addr);370return true; // No valid text ranges has been set, so accept all ranges.371}372373std::optional<uint64_t> GsymCreator::getFirstFunctionAddress() const {374// If we have finalized then Funcs are sorted. If we are a segment then375// Funcs will be sorted as well since function infos get added from an376// already finalized GsymCreator object where its functions were sorted and377// uniqued.378if ((Finalized || IsSegment) && !Funcs.empty())379return std::optional<uint64_t>(Funcs.front().startAddress());380return std::nullopt;381}382383std::optional<uint64_t> GsymCreator::getLastFunctionAddress() const {384// If we have finalized then Funcs are sorted. If we are a segment then385// Funcs will be sorted as well since function infos get added from an386// already finalized GsymCreator object where its functions were sorted and387// uniqued.388if ((Finalized || IsSegment) && !Funcs.empty())389return std::optional<uint64_t>(Funcs.back().startAddress());390return std::nullopt;391}392393std::optional<uint64_t> GsymCreator::getBaseAddress() const {394if (BaseAddress)395return BaseAddress;396return getFirstFunctionAddress();397}398399uint64_t GsymCreator::getMaxAddressOffset() const {400switch (getAddressOffsetSize()) {401case 1: return UINT8_MAX;402case 2: return UINT16_MAX;403case 4: return UINT32_MAX;404case 8: return UINT64_MAX;405}406llvm_unreachable("invalid address offset");407}408409uint8_t GsymCreator::getAddressOffsetSize() const {410const std::optional<uint64_t> BaseAddress = getBaseAddress();411const std::optional<uint64_t> LastFuncAddr = getLastFunctionAddress();412if (BaseAddress && LastFuncAddr) {413const uint64_t AddrDelta = *LastFuncAddr - *BaseAddress;414if (AddrDelta <= UINT8_MAX)415return 1;416else if (AddrDelta <= UINT16_MAX)417return 2;418else if (AddrDelta <= UINT32_MAX)419return 4;420return 8;421}422return 1;423}424425uint64_t GsymCreator::calculateHeaderAndTableSize() const {426uint64_t Size = sizeof(Header);427const size_t NumFuncs = Funcs.size();428// Add size of address offset table429Size += NumFuncs * getAddressOffsetSize();430// Add size of address info offsets which are 32 bit integers in version 1.431Size += NumFuncs * sizeof(uint32_t);432// Add file table size433Size += Files.size() * sizeof(FileEntry);434// Add string table size435Size += StrTab.getSize();436437return Size;438}439440// This function takes a InlineInfo class that was copy constructed from an441// InlineInfo from the \a SrcGC and updates all members that point to strings442// and files to point to strings and files from this GsymCreator.443void GsymCreator::fixupInlineInfo(const GsymCreator &SrcGC, InlineInfo &II) {444II.Name = copyString(SrcGC, II.Name);445II.CallFile = copyFile(SrcGC, II.CallFile);446for (auto &ChildII: II.Children)447fixupInlineInfo(SrcGC, ChildII);448}449450uint64_t GsymCreator::copyFunctionInfo(const GsymCreator &SrcGC, size_t FuncIdx) {451// To copy a function info we need to copy any files and strings over into452// this GsymCreator and then copy the function info and update the string453// table offsets to match the new offsets.454const FunctionInfo &SrcFI = SrcGC.Funcs[FuncIdx];455456FunctionInfo DstFI;457DstFI.Range = SrcFI.Range;458DstFI.Name = copyString(SrcGC, SrcFI.Name);459// Copy the line table if there is one.460if (SrcFI.OptLineTable) {461// Copy the entire line table.462DstFI.OptLineTable = LineTable(SrcFI.OptLineTable.value());463// Fixup all LineEntry::File entries which are indexes in the the file table464// from SrcGC and must be converted to file indexes from this GsymCreator.465LineTable &DstLT = DstFI.OptLineTable.value();466const size_t NumLines = DstLT.size();467for (size_t I=0; I<NumLines; ++I) {468LineEntry &LE = DstLT.get(I);469LE.File = copyFile(SrcGC, LE.File);470}471}472// Copy the inline information if needed.473if (SrcFI.Inline) {474// Make a copy of the source inline information.475DstFI.Inline = SrcFI.Inline.value();476// Fixup all strings and files in the copied inline information.477fixupInlineInfo(SrcGC, *DstFI.Inline);478}479std::lock_guard<std::mutex> Guard(Mutex);480Funcs.emplace_back(DstFI);481return Funcs.back().cacheEncoding();482}483484llvm::Error GsymCreator::saveSegments(StringRef Path,485llvm::endianness ByteOrder,486uint64_t SegmentSize) const {487if (SegmentSize == 0)488return createStringError(std::errc::invalid_argument,489"invalid segment size zero");490491size_t FuncIdx = 0;492const size_t NumFuncs = Funcs.size();493while (FuncIdx < NumFuncs) {494llvm::Expected<std::unique_ptr<GsymCreator>> ExpectedGC =495createSegment(SegmentSize, FuncIdx);496if (ExpectedGC) {497GsymCreator *GC = ExpectedGC->get();498if (GC == NULL)499break; // We had not more functions to encode.500// Don't collect any messages at all501OutputAggregator Out(nullptr);502llvm::Error Err = GC->finalize(Out);503if (Err)504return Err;505std::string SegmentedGsymPath;506raw_string_ostream SGP(SegmentedGsymPath);507std::optional<uint64_t> FirstFuncAddr = GC->getFirstFunctionAddress();508if (FirstFuncAddr) {509SGP << Path << "-" << llvm::format_hex(*FirstFuncAddr, 1);510SGP.flush();511Err = GC->save(SegmentedGsymPath, ByteOrder, std::nullopt);512if (Err)513return Err;514}515} else {516return ExpectedGC.takeError();517}518}519return Error::success();520}521522llvm::Expected<std::unique_ptr<GsymCreator>>523GsymCreator::createSegment(uint64_t SegmentSize, size_t &FuncIdx) const {524// No function entries, return empty unique pointer525if (FuncIdx >= Funcs.size())526return std::unique_ptr<GsymCreator>();527528std::unique_ptr<GsymCreator> GC(new GsymCreator(/*Quiet=*/true));529530// Tell the creator that this is a segment.531GC->setIsSegment();532533// Set the base address if there is one.534if (BaseAddress)535GC->setBaseAddress(*BaseAddress);536// Copy the UUID value from this object into the new creator.537GC->setUUID(UUID);538const size_t NumFuncs = Funcs.size();539// Track how big the function infos are for the current segment so we can540// emit segments that are close to the requested size. It is quick math to541// determine the current header and tables sizes, so we can do that each loop.542uint64_t SegmentFuncInfosSize = 0;543for (; FuncIdx < NumFuncs; ++FuncIdx) {544const uint64_t HeaderAndTableSize = GC->calculateHeaderAndTableSize();545if (HeaderAndTableSize + SegmentFuncInfosSize >= SegmentSize) {546if (SegmentFuncInfosSize == 0)547return createStringError(std::errc::invalid_argument,548"a segment size of %" PRIu64 " is to small to "549"fit any function infos, specify a larger value",550SegmentSize);551552break;553}554SegmentFuncInfosSize += alignTo(GC->copyFunctionInfo(*this, FuncIdx), 4);555}556return std::move(GC);557}558559560