Path: blob/main/contrib/llvm-project/lld/ELF/Symbols.h
34879 views
//===- Symbols.h ------------------------------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines various types of Symbols.9//10//===----------------------------------------------------------------------===//1112#ifndef LLD_ELF_SYMBOLS_H13#define LLD_ELF_SYMBOLS_H1415#include "Config.h"16#include "lld/Common/LLVM.h"17#include "lld/Common/Memory.h"18#include "llvm/ADT/DenseMap.h"19#include "llvm/Object/ELF.h"20#include "llvm/Support/Compiler.h"21#include <tuple>2223namespace lld {24namespace elf {25class Symbol;26}27// Returns a string representation for a symbol for diagnostics.28std::string toString(const elf::Symbol &);2930namespace elf {31class CommonSymbol;32class Defined;33class OutputSection;34class SectionBase;35class InputSectionBase;36class SharedSymbol;37class Symbol;38class Undefined;39class LazySymbol;40class InputFile;4142void printTraceSymbol(const Symbol &sym, StringRef name);4344enum {45NEEDS_GOT = 1 << 0,46NEEDS_PLT = 1 << 1,47HAS_DIRECT_RELOC = 1 << 2,48// True if this symbol needs a canonical PLT entry, or (during49// postScanRelocations) a copy relocation.50NEEDS_COPY = 1 << 3,51NEEDS_TLSDESC = 1 << 4,52NEEDS_TLSGD = 1 << 5,53NEEDS_TLSGD_TO_IE = 1 << 6,54NEEDS_GOT_DTPREL = 1 << 7,55NEEDS_TLSIE = 1 << 8,56};5758// Some index properties of a symbol are stored separately in this auxiliary59// struct to decrease sizeof(SymbolUnion) in the majority of cases.60struct SymbolAux {61uint32_t gotIdx = -1;62uint32_t pltIdx = -1;63uint32_t tlsDescIdx = -1;64uint32_t tlsGdIdx = -1;65};6667LLVM_LIBRARY_VISIBILITY extern SmallVector<SymbolAux, 0> symAux;6869// The base class for real symbol classes.70class Symbol {71public:72enum Kind {73PlaceholderKind,74DefinedKind,75CommonKind,76SharedKind,77UndefinedKind,78LazyKind,79};8081Kind kind() const { return static_cast<Kind>(symbolKind); }8283// The file from which this symbol was created.84InputFile *file;8586// The default copy constructor is deleted due to atomic flags. Define one for87// places where no atomic is needed.88Symbol(const Symbol &o) { memcpy(this, &o, sizeof(o)); }8990protected:91const char *nameData;92// 32-bit size saves space.93uint32_t nameSize;9495public:96// The next three fields have the same meaning as the ELF symbol attributes.97// type and binding are placed in this order to optimize generating st_info,98// which is defined as (binding << 4) + (type & 0xf), on a little-endian99// system.100uint8_t type : 4; // symbol type101102// Symbol binding. This is not overwritten by replace() to track103// changes during resolution. In particular:104// - An undefined weak is still weak when it resolves to a shared library.105// - An undefined weak will not extract archive members, but we have to106// remember it is weak.107uint8_t binding : 4;108109uint8_t stOther; // st_other field value110111uint8_t symbolKind;112113// The partition whose dynamic symbol table contains this symbol's definition.114uint8_t partition;115116// True if this symbol is preemptible at load time.117LLVM_PREFERRED_TYPE(bool)118uint8_t isPreemptible : 1;119120// True if the symbol was used for linking and thus need to be added to the121// output file's symbol table. This is true for all symbols except for122// unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that123// are unreferenced except by other bitcode objects.124LLVM_PREFERRED_TYPE(bool)125uint8_t isUsedInRegularObj : 1;126127// True if an undefined or shared symbol is used from a live section.128//129// NOTE: In Writer.cpp the field is used to mark local defined symbols130// which are referenced by relocations when -r or --emit-relocs is given.131LLVM_PREFERRED_TYPE(bool)132uint8_t used : 1;133134// Used by a Defined symbol with protected or default visibility, to record135// whether it is required to be exported into .dynsym. This is set when any of136// the following conditions hold:137//138// - If there is an interposable symbol from a DSO. Note: We also do this for139// STV_PROTECTED symbols which can't be interposed (to match BFD behavior).140// - If -shared or --export-dynamic is specified, any symbol in an object141// file/bitcode sets this property, unless suppressed by LTO142// canBeOmittedFromSymbolTable().143LLVM_PREFERRED_TYPE(bool)144uint8_t exportDynamic : 1;145146// True if the symbol is in the --dynamic-list file. A Defined symbol with147// protected or default visibility with this property is required to be148// exported into .dynsym.149LLVM_PREFERRED_TYPE(bool)150uint8_t inDynamicList : 1;151152// Used to track if there has been at least one undefined reference to the153// symbol. For Undefined and SharedSymbol, the binding may change to STB_WEAK154// if the first undefined reference from a non-shared object is weak.155LLVM_PREFERRED_TYPE(bool)156uint8_t referenced : 1;157158// Used to track if this symbol will be referenced after wrapping is performed159// (i.e. this will be true for foo if __real_foo is referenced, and will be160// true for __wrap_foo if foo is referenced).161LLVM_PREFERRED_TYPE(bool)162uint8_t referencedAfterWrap : 1;163164// True if this symbol is specified by --trace-symbol option.165LLVM_PREFERRED_TYPE(bool)166uint8_t traced : 1;167168// True if the name contains '@'.169LLVM_PREFERRED_TYPE(bool)170uint8_t hasVersionSuffix : 1;171172// Symbol visibility. This is the computed minimum visibility of all173// observed non-DSO symbols.174uint8_t visibility() const { return stOther & 3; }175void setVisibility(uint8_t visibility) {176stOther = (stOther & ~3) | visibility;177}178179bool includeInDynsym() const;180uint8_t computeBinding() const;181bool isGlobal() const { return binding == llvm::ELF::STB_GLOBAL; }182bool isWeak() const { return binding == llvm::ELF::STB_WEAK; }183184bool isUndefined() const { return symbolKind == UndefinedKind; }185bool isCommon() const { return symbolKind == CommonKind; }186bool isDefined() const { return symbolKind == DefinedKind; }187bool isShared() const { return symbolKind == SharedKind; }188bool isPlaceholder() const { return symbolKind == PlaceholderKind; }189190bool isLocal() const { return binding == llvm::ELF::STB_LOCAL; }191192bool isLazy() const { return symbolKind == LazyKind; }193194// True if this is an undefined weak symbol. This only works once195// all input files have been added.196bool isUndefWeak() const { return isWeak() && isUndefined(); }197198StringRef getName() const { return {nameData, nameSize}; }199200void setName(StringRef s) {201nameData = s.data();202nameSize = s.size();203}204205void parseSymbolVersion();206207// Get the NUL-terminated version suffix ("", "@...", or "@@...").208//209// For @@, the name has been truncated by insert(). For @, the name has been210// truncated by Symbol::parseSymbolVersion().211const char *getVersionSuffix() const { return nameData + nameSize; }212213uint32_t getGotIdx() const { return symAux[auxIdx].gotIdx; }214uint32_t getPltIdx() const { return symAux[auxIdx].pltIdx; }215uint32_t getTlsDescIdx() const { return symAux[auxIdx].tlsDescIdx; }216uint32_t getTlsGdIdx() const { return symAux[auxIdx].tlsGdIdx; }217218bool isInGot() const { return getGotIdx() != uint32_t(-1); }219bool isInPlt() const { return getPltIdx() != uint32_t(-1); }220221uint64_t getVA(int64_t addend = 0) const;222223uint64_t getGotOffset() const;224uint64_t getGotVA() const;225uint64_t getGotPltOffset() const;226uint64_t getGotPltVA() const;227uint64_t getPltVA() const;228uint64_t getSize() const;229OutputSection *getOutputSection() const;230231// The following two functions are used for symbol resolution.232//233// You are expected to call mergeProperties for all symbols in input234// files so that attributes that are attached to names rather than235// indivisual symbol (such as visibility) are merged together.236//237// Every time you read a new symbol from an input, you are supposed238// to call resolve() with the new symbol. That function replaces239// "this" object as a result of name resolution if the new symbol is240// more appropriate to be included in the output.241//242// For example, if "this" is an undefined symbol and a new symbol is243// a defined symbol, "this" is replaced with the new symbol.244void mergeProperties(const Symbol &other);245void resolve(const Undefined &other);246void resolve(const CommonSymbol &other);247void resolve(const Defined &other);248void resolve(const LazySymbol &other);249void resolve(const SharedSymbol &other);250251// If this is a lazy symbol, extract an input file and add the symbol252// in the file to the symbol table. Calling this function on253// non-lazy object causes a runtime error.254void extract() const;255256void checkDuplicate(const Defined &other) const;257258private:259bool shouldReplace(const Defined &other) const;260261protected:262Symbol(Kind k, InputFile *file, StringRef name, uint8_t binding,263uint8_t stOther, uint8_t type)264: file(file), nameData(name.data()), nameSize(name.size()), type(type),265binding(binding), stOther(stOther), symbolKind(k), exportDynamic(false),266archSpecificBit(false) {}267268void overwrite(Symbol &sym, Kind k) const {269if (sym.traced)270printTraceSymbol(*this, sym.getName());271sym.file = file;272sym.type = type;273sym.binding = binding;274sym.stOther = (stOther & ~3) | sym.visibility();275sym.symbolKind = k;276}277278public:279// True if this symbol is in the Iplt sub-section of the Plt and the Igot280// sub-section of the .got.plt or .got.281LLVM_PREFERRED_TYPE(bool)282uint8_t isInIplt : 1;283284// True if this symbol needs a GOT entry and its GOT entry is actually in285// Igot. This will be true only for certain non-preemptible ifuncs.286LLVM_PREFERRED_TYPE(bool)287uint8_t gotInIgot : 1;288289// True if defined relative to a section discarded by ICF.290LLVM_PREFERRED_TYPE(bool)291uint8_t folded : 1;292293// Allow reuse of a bit between architecture-exclusive symbol flags.294// - needsTocRestore(): On PPC64, true if a call to this symbol needs to be295// followed by a restore of the toc pointer.296// - isTagged(): On AArch64, true if the symbol needs special relocation and297// metadata semantics because it's tagged, under the AArch64 MemtagABI.298LLVM_PREFERRED_TYPE(bool)299uint8_t archSpecificBit : 1;300bool needsTocRestore() const { return archSpecificBit; }301bool isTagged() const { return archSpecificBit; }302void setNeedsTocRestore(bool v) { archSpecificBit = v; }303void setIsTagged(bool v) {304archSpecificBit = v;305}306307// True if this symbol is defined by a symbol assignment or wrapped by --wrap.308//309// LTO shouldn't inline the symbol because it doesn't know the final content310// of the symbol.311LLVM_PREFERRED_TYPE(bool)312uint8_t scriptDefined : 1;313314// True if defined in a DSO. There may also be a definition in a relocatable315// object file.316LLVM_PREFERRED_TYPE(bool)317uint8_t dsoDefined : 1;318319// True if defined in a DSO as protected visibility.320LLVM_PREFERRED_TYPE(bool)321uint8_t dsoProtected : 1;322323// Temporary flags used to communicate which symbol entries need PLT and GOT324// entries during postScanRelocations();325std::atomic<uint16_t> flags;326327// A symAux index used to access GOT/PLT entry indexes. This is allocated in328// postScanRelocations().329uint32_t auxIdx;330uint32_t dynsymIndex;331332// If `file` is SharedFile (for SharedSymbol or copy-relocated Defined), this333// represents the Verdef index within the input DSO, which will be converted334// to a Verneed index in the output. Otherwise, this represents the Verdef335// index (VER_NDX_LOCAL, VER_NDX_GLOBAL, or a named version).336uint16_t versionId;337LLVM_PREFERRED_TYPE(bool)338uint8_t versionScriptAssigned : 1;339340// True if targeted by a range extension thunk.341LLVM_PREFERRED_TYPE(bool)342uint8_t thunkAccessed : 1;343344void setFlags(uint16_t bits) {345flags.fetch_or(bits, std::memory_order_relaxed);346}347bool hasFlag(uint16_t bit) const {348assert(bit && (bit & (bit - 1)) == 0 && "bit must be a power of 2");349return flags.load(std::memory_order_relaxed) & bit;350}351352bool needsDynReloc() const {353return flags.load(std::memory_order_relaxed) &354(NEEDS_COPY | NEEDS_GOT | NEEDS_PLT | NEEDS_TLSDESC | NEEDS_TLSGD |355NEEDS_TLSGD_TO_IE | NEEDS_GOT_DTPREL | NEEDS_TLSIE);356}357void allocateAux() {358assert(auxIdx == 0);359auxIdx = symAux.size();360symAux.emplace_back();361}362363bool isSection() const { return type == llvm::ELF::STT_SECTION; }364bool isTls() const { return type == llvm::ELF::STT_TLS; }365bool isFunc() const { return type == llvm::ELF::STT_FUNC; }366bool isGnuIFunc() const { return type == llvm::ELF::STT_GNU_IFUNC; }367bool isObject() const { return type == llvm::ELF::STT_OBJECT; }368bool isFile() const { return type == llvm::ELF::STT_FILE; }369};370371// Represents a symbol that is defined in the current output file.372class Defined : public Symbol {373public:374Defined(InputFile *file, StringRef name, uint8_t binding, uint8_t stOther,375uint8_t type, uint64_t value, uint64_t size, SectionBase *section)376: Symbol(DefinedKind, file, name, binding, stOther, type), value(value),377size(size), section(section) {378exportDynamic = config->exportDynamic;379}380void overwrite(Symbol &sym) const;381382static bool classof(const Symbol *s) { return s->isDefined(); }383384uint64_t value;385uint64_t size;386SectionBase *section;387};388389// Represents a common symbol.390//391// On Unix, it is traditionally allowed to write variable definitions392// without initialization expressions (such as "int foo;") to header393// files. Such definition is called "tentative definition".394//395// Using tentative definition is usually considered a bad practice396// because you should write only declarations (such as "extern int397// foo;") to header files. Nevertheless, the linker and the compiler398// have to do something to support bad code by allowing duplicate399// definitions for this particular case.400//401// Common symbols represent variable definitions without initializations.402// The compiler creates common symbols when it sees variable definitions403// without initialization (you can suppress this behavior and let the404// compiler create a regular defined symbol by -fno-common).405//406// The linker allows common symbols to be replaced by regular defined407// symbols. If there are remaining common symbols after name resolution is408// complete, they are converted to regular defined symbols in a .bss409// section. (Therefore, the later passes don't see any CommonSymbols.)410class CommonSymbol : public Symbol {411public:412CommonSymbol(InputFile *file, StringRef name, uint8_t binding,413uint8_t stOther, uint8_t type, uint64_t alignment, uint64_t size)414: Symbol(CommonKind, file, name, binding, stOther, type),415alignment(alignment), size(size) {416exportDynamic = config->exportDynamic;417}418void overwrite(Symbol &sym) const {419Symbol::overwrite(sym, CommonKind);420auto &s = static_cast<CommonSymbol &>(sym);421s.alignment = alignment;422s.size = size;423}424425static bool classof(const Symbol *s) { return s->isCommon(); }426427uint32_t alignment;428uint64_t size;429};430431class Undefined : public Symbol {432public:433Undefined(InputFile *file, StringRef name, uint8_t binding, uint8_t stOther,434uint8_t type, uint32_t discardedSecIdx = 0)435: Symbol(UndefinedKind, file, name, binding, stOther, type),436discardedSecIdx(discardedSecIdx) {}437void overwrite(Symbol &sym) const {438Symbol::overwrite(sym, UndefinedKind);439auto &s = static_cast<Undefined &>(sym);440s.discardedSecIdx = discardedSecIdx;441s.nonPrevailing = nonPrevailing;442}443444static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }445446// The section index if in a discarded section, 0 otherwise.447uint32_t discardedSecIdx;448bool nonPrevailing = false;449};450451class SharedSymbol : public Symbol {452public:453static bool classof(const Symbol *s) { return s->kind() == SharedKind; }454455SharedSymbol(InputFile &file, StringRef name, uint8_t binding,456uint8_t stOther, uint8_t type, uint64_t value, uint64_t size,457uint32_t alignment)458: Symbol(SharedKind, &file, name, binding, stOther, type), value(value),459size(size), alignment(alignment) {460exportDynamic = true;461dsoProtected = visibility() == llvm::ELF::STV_PROTECTED;462// GNU ifunc is a mechanism to allow user-supplied functions to463// resolve PLT slot values at load-time. This is contrary to the464// regular symbol resolution scheme in which symbols are resolved just465// by name. Using this hook, you can program how symbols are solved466// for you program. For example, you can make "memcpy" to be resolved467// to a SSE-enabled version of memcpy only when a machine running the468// program supports the SSE instruction set.469//470// Naturally, such symbols should always be called through their PLT471// slots. What GNU ifunc symbols point to are resolver functions, and472// calling them directly doesn't make sense (unless you are writing a473// loader).474//475// For DSO symbols, we always call them through PLT slots anyway.476// So there's no difference between GNU ifunc and regular function477// symbols if they are in DSOs. So we can handle GNU_IFUNC as FUNC.478if (this->type == llvm::ELF::STT_GNU_IFUNC)479this->type = llvm::ELF::STT_FUNC;480}481void overwrite(Symbol &sym) const {482Symbol::overwrite(sym, SharedKind);483auto &s = static_cast<SharedSymbol &>(sym);484s.dsoProtected = dsoProtected;485s.value = value;486s.size = size;487s.alignment = alignment;488}489490uint64_t value; // st_value491uint64_t size; // st_size492uint32_t alignment;493};494495// LazySymbol symbols represent symbols in object files between --start-lib and496// --end-lib options. LLD also handles traditional archives as if all the files497// in the archive are surrounded by --start-lib and --end-lib.498//499// A special complication is the handling of weak undefined symbols. They should500// not load a file, but we have to remember we have seen both the weak undefined501// and the lazy. We represent that with a lazy symbol with a weak binding. This502// means that code looking for undefined symbols normally also has to take lazy503// symbols into consideration.504class LazySymbol : public Symbol {505public:506LazySymbol(InputFile &file)507: Symbol(LazyKind, &file, {}, llvm::ELF::STB_GLOBAL,508llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {}509void overwrite(Symbol &sym) const { Symbol::overwrite(sym, LazyKind); }510511static bool classof(const Symbol *s) { return s->kind() == LazyKind; }512};513514// Some linker-generated symbols need to be created as515// Defined symbols.516struct ElfSym {517// __bss_start518static Defined *bss;519520// etext and _etext521static Defined *etext1;522static Defined *etext2;523524// edata and _edata525static Defined *edata1;526static Defined *edata2;527528// end and _end529static Defined *end1;530static Defined *end2;531532// The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to533// be at some offset from the base of the .got section, usually 0 or534// the end of the .got.535static Defined *globalOffsetTable;536537// _gp, _gp_disp and __gnu_local_gp symbols. Only for MIPS.538static Defined *mipsGp;539static Defined *mipsGpDisp;540static Defined *mipsLocalGp;541542// __global_pointer$ for RISC-V.543static Defined *riscvGlobalPointer;544545// __rel{,a}_iplt_{start,end} symbols.546static Defined *relaIpltStart;547static Defined *relaIpltEnd;548549// _TLS_MODULE_BASE_ on targets that support TLSDESC.550static Defined *tlsModuleBase;551};552553// A buffer class that is large enough to hold any Symbol-derived554// object. We allocate memory using this class and instantiate a symbol555// using the placement new.556557// It is important to keep the size of SymbolUnion small for performance and558// memory usage reasons. 64 bytes is a soft limit based on the size of Defined559// on a 64-bit system. This is enforced by a static_assert in Symbols.cpp.560union SymbolUnion {561alignas(Defined) char a[sizeof(Defined)];562alignas(CommonSymbol) char b[sizeof(CommonSymbol)];563alignas(Undefined) char c[sizeof(Undefined)];564alignas(SharedSymbol) char d[sizeof(SharedSymbol)];565alignas(LazySymbol) char e[sizeof(LazySymbol)];566};567568template <typename... T> Defined *makeDefined(T &&...args) {569auto *sym = getSpecificAllocSingleton<SymbolUnion>().Allocate();570memset(sym, 0, sizeof(Symbol));571auto &s = *new (reinterpret_cast<Defined *>(sym)) Defined(std::forward<T>(args)...);572return &s;573}574575void reportDuplicate(const Symbol &sym, const InputFile *newFile,576InputSectionBase *errSec, uint64_t errOffset);577void maybeWarnUnorderableSymbol(const Symbol *sym);578bool computeIsPreemptible(const Symbol &sym);579580} // namespace elf581} // namespace lld582583#endif584585586