Path: blob/main/contrib/llvm-project/lld/COFF/Symbols.h
34870 views
//===- Symbols.h ------------------------------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#ifndef LLD_COFF_SYMBOLS_H9#define LLD_COFF_SYMBOLS_H1011#include "Chunks.h"12#include "Config.h"13#include "lld/Common/LLVM.h"14#include "lld/Common/Memory.h"15#include "llvm/ADT/ArrayRef.h"16#include "llvm/Object/Archive.h"17#include "llvm/Object/COFF.h"18#include <atomic>19#include <memory>20#include <vector>2122namespace lld {2324namespace coff {2526using llvm::object::Archive;27using llvm::object::COFFSymbolRef;28using llvm::object::coff_import_header;29using llvm::object::coff_symbol_generic;3031class ArchiveFile;32class COFFLinkerContext;33class InputFile;34class ObjFile;35class SymbolTable;3637// The base class for real symbol classes.38class Symbol {39public:40enum Kind {41// The order of these is significant. We start with the regular defined42// symbols as those are the most prevalent and the zero tag is the cheapest43// to set. Among the defined kinds, the lower the kind is preferred over44// the higher kind when testing whether one symbol should take precedence45// over another.46DefinedRegularKind = 0,47DefinedCommonKind,48DefinedLocalImportKind,49DefinedImportThunkKind,50DefinedImportDataKind,51DefinedAbsoluteKind,52DefinedSyntheticKind,5354UndefinedKind,55LazyArchiveKind,56LazyObjectKind,57LazyDLLSymbolKind,5859LastDefinedCOFFKind = DefinedCommonKind,60LastDefinedKind = DefinedSyntheticKind,61};6263Kind kind() const { return static_cast<Kind>(symbolKind); }6465// Returns the symbol name.66StringRef getName() {67// COFF symbol names are read lazily for a performance reason.68// Non-external symbol names are never used by the linker except for logging69// or debugging. Their internal references are resolved not by name but by70// symbol index. And because they are not external, no one can refer them by71// name. Object files contain lots of non-external symbols, and creating72// StringRefs for them (which involves lots of strlen() on the string table)73// is a waste of time.74if (nameData == nullptr)75computeName();76return StringRef(nameData, nameSize);77}7879void replaceKeepingName(Symbol *other, size_t size);8081// Returns the file from which this symbol was created.82InputFile *getFile();8384// Indicates that this symbol will be included in the final image. Only valid85// after calling markLive.86bool isLive() const;8788bool isLazy() const {89return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind ||90symbolKind == LazyDLLSymbolKind;91}9293private:94void computeName();9596protected:97friend SymbolTable;98explicit Symbol(Kind k, StringRef n = "")99: symbolKind(k), isExternal(true), isCOMDAT(false),100writtenToSymtab(false), isUsedInRegularObj(false),101pendingArchiveLoad(false), isGCRoot(false), isRuntimePseudoReloc(false),102deferUndefined(false), canInline(true), isWeak(false),103nameSize(n.size()), nameData(n.empty() ? nullptr : n.data()) {104assert((!n.empty() || k <= LastDefinedCOFFKind) &&105"If the name is empty, the Symbol must be a DefinedCOFF.");106}107108unsigned symbolKind : 8;109unsigned isExternal : 1;110111public:112// This bit is used by the \c DefinedRegular subclass.113unsigned isCOMDAT : 1;114115// This bit is used by Writer::createSymbolAndStringTable() to prevent116// symbols from being written to the symbol table more than once.117unsigned writtenToSymtab : 1;118119// True if this symbol was referenced by a regular (non-bitcode) object.120unsigned isUsedInRegularObj : 1;121122// True if we've seen both a lazy and an undefined symbol with this symbol123// name, which means that we have enqueued an archive member load and should124// not load any more archive members to resolve the same symbol.125unsigned pendingArchiveLoad : 1;126127/// True if we've already added this symbol to the list of GC roots.128unsigned isGCRoot : 1;129130unsigned isRuntimePseudoReloc : 1;131132// True if we want to allow this symbol to be undefined in the early133// undefined check pass in SymbolTable::reportUnresolvable(), as it134// might be fixed up later.135unsigned deferUndefined : 1;136137// False if LTO shouldn't inline whatever this symbol points to. If a symbol138// is overwritten after LTO, LTO shouldn't inline the symbol because it139// doesn't know the final contents of the symbol.140unsigned canInline : 1;141142// True if the symbol is weak. This is only tracked for bitcode/LTO symbols.143// This information isn't written to the output; rather, it's used for144// managing weak symbol overrides.145unsigned isWeak : 1;146147protected:148// Symbol name length. Assume symbol lengths fit in a 32-bit integer.149uint32_t nameSize;150151const char *nameData;152};153154// The base class for any defined symbols, including absolute symbols,155// etc.156class Defined : public Symbol {157public:158Defined(Kind k, StringRef n) : Symbol(k, n) {}159160static bool classof(const Symbol *s) { return s->kind() <= LastDefinedKind; }161162// Returns the RVA (relative virtual address) of this symbol. The163// writer sets and uses RVAs.164uint64_t getRVA();165166// Returns the chunk containing this symbol. Absolute symbols and __ImageBase167// do not have chunks, so this may return null.168Chunk *getChunk();169};170171// Symbols defined via a COFF object file or bitcode file. For COFF files, this172// stores a coff_symbol_generic*, and names of internal symbols are lazily173// loaded through that. For bitcode files, Sym is nullptr and the name is stored174// as a decomposed StringRef.175class DefinedCOFF : public Defined {176friend Symbol;177178public:179DefinedCOFF(Kind k, InputFile *f, StringRef n, const coff_symbol_generic *s)180: Defined(k, n), file(f), sym(s) {}181182static bool classof(const Symbol *s) {183return s->kind() <= LastDefinedCOFFKind;184}185186InputFile *getFile() { return file; }187188COFFSymbolRef getCOFFSymbol();189190InputFile *file;191192protected:193const coff_symbol_generic *sym;194};195196// Regular defined symbols read from object file symbol tables.197class DefinedRegular : public DefinedCOFF {198public:199DefinedRegular(InputFile *f, StringRef n, bool isCOMDAT,200bool isExternal = false,201const coff_symbol_generic *s = nullptr,202SectionChunk *c = nullptr, bool isWeak = false)203: DefinedCOFF(DefinedRegularKind, f, n, s), data(c ? &c->repl : nullptr) {204this->isExternal = isExternal;205this->isCOMDAT = isCOMDAT;206this->isWeak = isWeak;207}208209static bool classof(const Symbol *s) {210return s->kind() == DefinedRegularKind;211}212213uint64_t getRVA() const { return (*data)->getRVA() + sym->Value; }214SectionChunk *getChunk() const { return *data; }215uint32_t getValue() const { return sym->Value; }216217SectionChunk **data;218};219220class DefinedCommon : public DefinedCOFF {221public:222DefinedCommon(InputFile *f, StringRef n, uint64_t size,223const coff_symbol_generic *s = nullptr,224CommonChunk *c = nullptr)225: DefinedCOFF(DefinedCommonKind, f, n, s), data(c), size(size) {226this->isExternal = true;227}228229static bool classof(const Symbol *s) {230return s->kind() == DefinedCommonKind;231}232233uint64_t getRVA() { return data->getRVA(); }234CommonChunk *getChunk() { return data; }235236private:237friend SymbolTable;238uint64_t getSize() const { return size; }239CommonChunk *data;240uint64_t size;241};242243// Absolute symbols.244class DefinedAbsolute : public Defined {245public:246DefinedAbsolute(const COFFLinkerContext &c, StringRef n, COFFSymbolRef s)247: Defined(DefinedAbsoluteKind, n), va(s.getValue()), ctx(c) {248isExternal = s.isExternal();249}250251DefinedAbsolute(const COFFLinkerContext &c, StringRef n, uint64_t v)252: Defined(DefinedAbsoluteKind, n), va(v), ctx(c) {}253254static bool classof(const Symbol *s) {255return s->kind() == DefinedAbsoluteKind;256}257258uint64_t getRVA();259void setVA(uint64_t v) { va = v; }260uint64_t getVA() const { return va; }261262private:263uint64_t va;264const COFFLinkerContext &ctx;265};266267// This symbol is used for linker-synthesized symbols like __ImageBase and268// __safe_se_handler_table.269class DefinedSynthetic : public Defined {270public:271explicit DefinedSynthetic(StringRef name, Chunk *c, uint32_t offset = 0)272: Defined(DefinedSyntheticKind, name), c(c), offset(offset) {}273274static bool classof(const Symbol *s) {275return s->kind() == DefinedSyntheticKind;276}277278// A null chunk indicates that this is __ImageBase. Otherwise, this is some279// other synthesized chunk, like SEHTableChunk.280uint32_t getRVA() { return c ? c->getRVA() + offset : 0; }281Chunk *getChunk() { return c; }282283private:284Chunk *c;285uint32_t offset;286};287288// This class represents a symbol defined in an archive file. It is289// created from an archive file header, and it knows how to load an290// object file from an archive to replace itself with a defined291// symbol. If the resolver finds both Undefined and LazyArchive for292// the same name, it will ask the LazyArchive to load a file.293class LazyArchive : public Symbol {294public:295LazyArchive(ArchiveFile *f, const Archive::Symbol s)296: Symbol(LazyArchiveKind, s.getName()), file(f), sym(s) {}297298static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }299300MemoryBufferRef getMemberBuffer();301302ArchiveFile *file;303const Archive::Symbol sym;304};305306class LazyObject : public Symbol {307public:308LazyObject(InputFile *f, StringRef n) : Symbol(LazyObjectKind, n), file(f) {}309static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }310InputFile *file;311};312313// MinGW only.314class LazyDLLSymbol : public Symbol {315public:316LazyDLLSymbol(DLLFile *f, DLLFile::Symbol *s, StringRef n)317: Symbol(LazyDLLSymbolKind, n), file(f), sym(s) {}318static bool classof(const Symbol *s) {319return s->kind() == LazyDLLSymbolKind;320}321322DLLFile *file;323DLLFile::Symbol *sym;324};325326// Undefined symbols.327class Undefined : public Symbol {328public:329explicit Undefined(StringRef n) : Symbol(UndefinedKind, n) {}330331static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }332333// An undefined symbol can have a fallback symbol which gives an334// undefined symbol a second chance if it would remain undefined.335// If it remains undefined, it'll be replaced with whatever the336// Alias pointer points to.337Symbol *weakAlias = nullptr;338339// If this symbol is external weak, try to resolve it to a defined340// symbol by searching the chain of fallback symbols. Returns the symbol if341// successful, otherwise returns null.342Defined *getWeakAlias();343};344345// Windows-specific classes.346347// This class represents a symbol imported from a DLL. This has two348// names for internal use and external use. The former is used for349// name resolution, and the latter is used for the import descriptor350// table in an output. The former has "__imp_" prefix.351class DefinedImportData : public Defined {352public:353DefinedImportData(StringRef n, ImportFile *f)354: Defined(DefinedImportDataKind, n), file(f) {355}356357static bool classof(const Symbol *s) {358return s->kind() == DefinedImportDataKind;359}360361uint64_t getRVA() { return file->location->getRVA(); }362Chunk *getChunk() { return file->location; }363void setLocation(Chunk *addressTable) { file->location = addressTable; }364365StringRef getDLLName() { return file->dllName; }366StringRef getExternalName() { return file->externalName; }367uint16_t getOrdinal() { return file->hdr->OrdinalHint; }368369ImportFile *file;370371// This is a pointer to the synthetic symbol associated with the load thunk372// for this symbol that will be called if the DLL is delay-loaded. This is373// needed for Control Flow Guard because if this DefinedImportData symbol is a374// valid call target, the corresponding load thunk must also be marked as a375// valid call target.376DefinedSynthetic *loadThunkSym = nullptr;377};378379// This class represents a symbol for a jump table entry which jumps380// to a function in a DLL. Linker are supposed to create such symbols381// without "__imp_" prefix for all function symbols exported from382// DLLs, so that you can call DLL functions as regular functions with383// a regular name. A function pointer is given as a DefinedImportData.384class DefinedImportThunk : public Defined {385public:386DefinedImportThunk(COFFLinkerContext &ctx, StringRef name,387DefinedImportData *s, uint16_t machine);388389static bool classof(const Symbol *s) {390return s->kind() == DefinedImportThunkKind;391}392393uint64_t getRVA() { return data->getRVA(); }394Chunk *getChunk() { return data; }395396DefinedImportData *wrappedSym;397398private:399Chunk *data;400};401402// If you have a symbol "foo" in your object file, a symbol name403// "__imp_foo" becomes automatically available as a pointer to "foo".404// This class is for such automatically-created symbols.405// Yes, this is an odd feature. We didn't intend to implement that.406// This is here just for compatibility with MSVC.407class DefinedLocalImport : public Defined {408public:409DefinedLocalImport(COFFLinkerContext &ctx, StringRef n, Defined *s)410: Defined(DefinedLocalImportKind, n),411data(make<LocalImportChunk>(ctx, s)) {}412413static bool classof(const Symbol *s) {414return s->kind() == DefinedLocalImportKind;415}416417uint64_t getRVA() { return data->getRVA(); }418Chunk *getChunk() { return data; }419420private:421LocalImportChunk *data;422};423424inline uint64_t Defined::getRVA() {425switch (kind()) {426case DefinedAbsoluteKind:427return cast<DefinedAbsolute>(this)->getRVA();428case DefinedSyntheticKind:429return cast<DefinedSynthetic>(this)->getRVA();430case DefinedImportDataKind:431return cast<DefinedImportData>(this)->getRVA();432case DefinedImportThunkKind:433return cast<DefinedImportThunk>(this)->getRVA();434case DefinedLocalImportKind:435return cast<DefinedLocalImport>(this)->getRVA();436case DefinedCommonKind:437return cast<DefinedCommon>(this)->getRVA();438case DefinedRegularKind:439return cast<DefinedRegular>(this)->getRVA();440case LazyArchiveKind:441case LazyObjectKind:442case LazyDLLSymbolKind:443case UndefinedKind:444llvm_unreachable("Cannot get the address for an undefined symbol.");445}446llvm_unreachable("unknown symbol kind");447}448449inline Chunk *Defined::getChunk() {450switch (kind()) {451case DefinedRegularKind:452return cast<DefinedRegular>(this)->getChunk();453case DefinedAbsoluteKind:454return nullptr;455case DefinedSyntheticKind:456return cast<DefinedSynthetic>(this)->getChunk();457case DefinedImportDataKind:458return cast<DefinedImportData>(this)->getChunk();459case DefinedImportThunkKind:460return cast<DefinedImportThunk>(this)->getChunk();461case DefinedLocalImportKind:462return cast<DefinedLocalImport>(this)->getChunk();463case DefinedCommonKind:464return cast<DefinedCommon>(this)->getChunk();465case LazyArchiveKind:466case LazyObjectKind:467case LazyDLLSymbolKind:468case UndefinedKind:469llvm_unreachable("Cannot get the chunk of an undefined symbol.");470}471llvm_unreachable("unknown symbol kind");472}473474// A buffer class that is large enough to hold any Symbol-derived475// object. We allocate memory using this class and instantiate a symbol476// using the placement new.477union SymbolUnion {478alignas(DefinedRegular) char a[sizeof(DefinedRegular)];479alignas(DefinedCommon) char b[sizeof(DefinedCommon)];480alignas(DefinedAbsolute) char c[sizeof(DefinedAbsolute)];481alignas(DefinedSynthetic) char d[sizeof(DefinedSynthetic)];482alignas(LazyArchive) char e[sizeof(LazyArchive)];483alignas(Undefined) char f[sizeof(Undefined)];484alignas(DefinedImportData) char g[sizeof(DefinedImportData)];485alignas(DefinedImportThunk) char h[sizeof(DefinedImportThunk)];486alignas(DefinedLocalImport) char i[sizeof(DefinedLocalImport)];487alignas(LazyObject) char j[sizeof(LazyObject)];488alignas(LazyDLLSymbol) char k[sizeof(LazyDLLSymbol)];489};490491template <typename T, typename... ArgT>492void replaceSymbol(Symbol *s, ArgT &&... arg) {493static_assert(std::is_trivially_destructible<T>(),494"Symbol types must be trivially destructible");495static_assert(sizeof(T) <= sizeof(SymbolUnion), "Symbol too small");496static_assert(alignof(T) <= alignof(SymbolUnion),497"SymbolUnion not aligned enough");498assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&499"Not a Symbol");500bool canInline = s->canInline;501bool isUsedInRegularObj = s->isUsedInRegularObj;502new (s) T(std::forward<ArgT>(arg)...);503s->canInline = canInline;504s->isUsedInRegularObj = isUsedInRegularObj;505}506} // namespace coff507508std::string toString(const coff::COFFLinkerContext &ctx, coff::Symbol &b);509std::string toCOFFString(const coff::COFFLinkerContext &ctx,510const llvm::object::Archive::Symbol &b);511512} // namespace lld513514#endif515516517