Path: blob/main/contrib/llvm-project/lld/MachO/InputFiles.h
34870 views
//===- InputFiles.h ---------------------------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#ifndef LLD_MACHO_INPUT_FILES_H9#define LLD_MACHO_INPUT_FILES_H1011#include "MachOStructs.h"12#include "Target.h"1314#include "lld/Common/DWARF.h"15#include "lld/Common/LLVM.h"16#include "lld/Common/Memory.h"17#include "llvm/ADT/CachedHashString.h"18#include "llvm/ADT/DenseSet.h"19#include "llvm/ADT/SetVector.h"20#include "llvm/BinaryFormat/MachO.h"21#include "llvm/DebugInfo/DWARF/DWARFUnit.h"22#include "llvm/Object/Archive.h"23#include "llvm/Support/MemoryBuffer.h"24#include "llvm/Support/Threading.h"25#include "llvm/TextAPI/TextAPIReader.h"2627#include <vector>2829namespace llvm {30namespace lto {31class InputFile;32} // namespace lto33namespace MachO {34class InterfaceFile;35} // namespace MachO36class TarWriter;37} // namespace llvm3839namespace lld {40namespace macho {4142struct PlatformInfo;43class ConcatInputSection;44class Symbol;45class Defined;46class AliasSymbol;47struct Reloc;48enum class RefState : uint8_t;4950// If --reproduce option is given, all input files are written51// to this tar archive.52extern std::unique_ptr<llvm::TarWriter> tar;5354// If .subsections_via_symbols is set, each InputSection will be split along55// symbol boundaries. The field offset represents the offset of the subsection56// from the start of the original pre-split InputSection.57struct Subsection {58uint64_t offset = 0;59InputSection *isec = nullptr;60};6162using Subsections = std::vector<Subsection>;63class InputFile;6465class Section {66public:67InputFile *file;68StringRef segname;69StringRef name;70uint32_t flags;71uint64_t addr;72Subsections subsections;7374Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags,75uint64_t addr)76: file(file), segname(segname), name(name), flags(flags), addr(addr) {}77// Ensure pointers to Sections are never invalidated.78Section(const Section &) = delete;79Section &operator=(const Section &) = delete;80Section(Section &&) = delete;81Section &operator=(Section &&) = delete;8283private:84// Whether we have already split this section into individual subsections.85// For sections that cannot be split (e.g. literal sections), this is always86// false.87bool doneSplitting = false;88friend class ObjFile;89};9091// Represents a call graph profile edge.92struct CallGraphEntry {93// The index of the caller in the symbol table.94uint32_t fromIndex;95// The index of the callee in the symbol table.96uint32_t toIndex;97// Number of calls from callee to caller in the profile.98uint64_t count;99100CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count)101: fromIndex(fromIndex), toIndex(toIndex), count(count) {}102};103104class InputFile {105public:106enum Kind {107ObjKind,108OpaqueKind,109DylibKind,110ArchiveKind,111BitcodeKind,112};113114virtual ~InputFile() = default;115Kind kind() const { return fileKind; }116StringRef getName() const { return name; }117static void resetIdCount() { idCount = 0; }118119MemoryBufferRef mb;120121std::vector<Symbol *> symbols;122std::vector<Section *> sections;123ArrayRef<uint8_t> objCImageInfo;124125// If not empty, this stores the name of the archive containing this file.126// We use this string for creating error messages.127std::string archiveName;128129// Provides an easy way to sort InputFiles deterministically.130const int id;131132// True if this is a lazy ObjFile or BitcodeFile.133bool lazy = false;134135protected:136InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)137: mb(mb), id(idCount++), lazy(lazy), fileKind(kind),138name(mb.getBufferIdentifier()) {}139140InputFile(Kind, const llvm::MachO::InterfaceFile &);141142// If true, this input's arch is compatible with target.143bool compatArch = true;144145private:146const Kind fileKind;147const StringRef name;148149static int idCount;150};151152struct FDE {153uint32_t funcLength;154Symbol *personality;155InputSection *lsda;156};157158// .o file159class ObjFile final : public InputFile {160public:161ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,162bool lazy = false, bool forceHidden = false, bool compatArch = true,163bool builtFromBitcode = false);164ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;165ArrayRef<uint8_t> getOptimizationHints() const;166template <class LP> void parse();167template <class LP>168void parseLinkerOptions(llvm::SmallVectorImpl<StringRef> &LinkerOptions);169170static bool classof(const InputFile *f) { return f->kind() == ObjKind; }171172std::string sourceFile() const;173// Parses line table information for diagnostics. compileUnit should be used174// for other purposes.175lld::DWARFCache *getDwarf();176177llvm::DWARFUnit *compileUnit = nullptr;178std::unique_ptr<lld::DWARFCache> dwarfCache;179Section *addrSigSection = nullptr;180const uint32_t modTime;181bool forceHidden;182bool builtFromBitcode;183std::vector<ConcatInputSection *> debugSections;184std::vector<CallGraphEntry> callGraph;185llvm::DenseMap<ConcatInputSection *, FDE> fdes;186std::vector<AliasSymbol *> aliases;187188private:189llvm::once_flag initDwarf;190template <class LP> void parseLazy();191template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);192template <class LP>193void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,194ArrayRef<typename LP::nlist> nList, const char *strtab,195bool subsectionsViaSymbols);196template <class NList>197Symbol *parseNonSectionSymbol(const NList &sym, const char *strtab);198template <class SectionHeader>199void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,200const SectionHeader &, Section &);201void parseDebugInfo();202void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);203void registerCompactUnwind(Section &compactUnwindSection);204void registerEhFrames(Section &ehFrameSection);205};206207// command-line -sectcreate file208class OpaqueFile final : public InputFile {209public:210OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);211static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }212};213214// .dylib or .tbd file215class DylibFile final : public InputFile {216public:217// Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the218// symbols in those sub-libraries will be available under the umbrella219// library's namespace. Those sub-libraries can also have their own220// re-exports. When loading a re-exported dylib, `umbrella` should be set to221// the root dylib to ensure symbols in the child library are correctly bound222// to the root. On the other hand, if a dylib is being directly loaded223// (through an -lfoo flag), then `umbrella` should be a nullptr.224explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,225bool isBundleLoader, bool explicitlyLinked);226explicit DylibFile(const llvm::MachO::InterfaceFile &interface,227DylibFile *umbrella, bool isBundleLoader,228bool explicitlyLinked);229explicit DylibFile(DylibFile *umbrella);230231void parseLoadCommands(MemoryBufferRef mb);232void parseReexports(const llvm::MachO::InterfaceFile &interface);233bool isReferenced() const { return numReferencedSymbols > 0; }234bool isExplicitlyLinked() const;235void setExplicitlyLinked() { explicitlyLinked = true; }236237static bool classof(const InputFile *f) { return f->kind() == DylibKind; }238239StringRef installName;240DylibFile *exportingFile = nullptr;241DylibFile *umbrella;242SmallVector<StringRef, 2> rpaths;243uint32_t compatibilityVersion = 0;244uint32_t currentVersion = 0;245int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel246unsigned numReferencedSymbols = 0;247RefState refState;248bool reexport = false;249bool forceNeeded = false;250bool forceWeakImport = false;251bool deadStrippable = false;252253private:254bool explicitlyLinked = false; // Access via isExplicitlyLinked().255256public:257// An executable can be used as a bundle loader that will load the output258// file being linked, and that contains symbols referenced, but not259// implemented in the bundle. When used like this, it is very similar260// to a dylib, so we've used the same class to represent it.261bool isBundleLoader;262263// Synthetic Dylib objects created by $ld$previous symbols in this dylib.264// Usually empty. These synthetic dylibs won't have synthetic dylibs265// themselves.266SmallVector<DylibFile *, 2> extraDylibs;267268private:269DylibFile *getSyntheticDylib(StringRef installName, uint32_t currentVersion,270uint32_t compatVersion);271272bool handleLDSymbol(StringRef originalName);273void handleLDPreviousSymbol(StringRef name, StringRef originalName);274void handleLDInstallNameSymbol(StringRef name, StringRef originalName);275void handleLDHideSymbol(StringRef name, StringRef originalName);276void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;277void parseExportedSymbols(uint32_t offset, uint32_t size);278void loadReexport(StringRef path, DylibFile *umbrella,279const llvm::MachO::InterfaceFile *currentTopLevelTapi);280281llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;282};283284// .a file285class ArchiveFile final : public InputFile {286public:287explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file,288bool forceHidden);289void addLazySymbols();290void fetch(const llvm::object::Archive::Symbol &);291// LLD normally doesn't use Error for error-handling, but the underlying292// Archive library does, so this is the cleanest way to wrap it.293Error fetch(const llvm::object::Archive::Child &, StringRef reason);294const llvm::object::Archive &getArchive() const { return *file; };295static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }296297private:298std::unique_ptr<llvm::object::Archive> file;299// Keep track of children fetched from the archive by tracking300// which address offsets have been fetched already.301llvm::DenseSet<uint64_t> seen;302// Load all symbols with hidden visibility (-load_hidden).303bool forceHidden;304};305306class BitcodeFile final : public InputFile {307public:308explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,309uint64_t offsetInArchive, bool lazy = false,310bool forceHidden = false, bool compatArch = true);311static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }312void parse();313314std::unique_ptr<llvm::lto::InputFile> obj;315bool forceHidden;316317private:318void parseLazy();319};320321extern llvm::SetVector<InputFile *> inputFiles;322extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;323extern llvm::SmallVector<StringRef> unprocessedLCLinkerOptions;324325std::optional<MemoryBufferRef> readFile(StringRef path);326327void extract(InputFile &file, StringRef reason);328329namespace detail {330331template <class CommandType, class... Types>332std::vector<const CommandType *>333findCommands(const void *anyHdr, size_t maxCommands, Types... types) {334std::vector<const CommandType *> cmds;335std::initializer_list<uint32_t> typesList{types...};336const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);337const uint8_t *p =338reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;339for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {340auto *cmd = reinterpret_cast<const CommandType *>(p);341if (llvm::is_contained(typesList, cmd->cmd)) {342cmds.push_back(cmd);343if (cmds.size() == maxCommands)344return cmds;345}346p += cmd->cmdsize;347}348return cmds;349}350351} // namespace detail352353// anyHdr should be a pointer to either mach_header or mach_header_64354template <class CommandType = llvm::MachO::load_command, class... Types>355const CommandType *findCommand(const void *anyHdr, Types... types) {356std::vector<const CommandType *> cmds =357detail::findCommands<CommandType>(anyHdr, 1, types...);358return cmds.size() ? cmds[0] : nullptr;359}360361template <class CommandType = llvm::MachO::load_command, class... Types>362std::vector<const CommandType *> findCommands(const void *anyHdr,363Types... types) {364return detail::findCommands<CommandType>(anyHdr, 0, types...);365}366367std::string replaceThinLTOSuffix(StringRef path);368} // namespace macho369370std::string toString(const macho::InputFile *file);371std::string toString(const macho::Section &);372} // namespace lld373374#endif375376377