Path: blob/main/contrib/llvm-project/lld/MachO/Symbols.h
34889 views
//===- Symbols.h ------------------------------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#ifndef LLD_MACHO_SYMBOLS_H9#define LLD_MACHO_SYMBOLS_H1011#include "Config.h"12#include "InputFiles.h"13#include "Target.h"1415#include "llvm/Object/Archive.h"16#include "llvm/Support/MathExtras.h"1718namespace lld {19namespace macho {2021class MachHeaderSection;2223struct StringRefZ {24StringRefZ(const char *s) : data(s), size(-1) {}25StringRefZ(StringRef s) : data(s.data()), size(s.size()) {}2627const char *data;28const uint32_t size;29};3031class Symbol {32public:33enum Kind {34DefinedKind,35UndefinedKind,36CommonKind,37DylibKind,38LazyArchiveKind,39LazyObjectKind,40AliasKind,41};4243virtual ~Symbol() {}4445Kind kind() const { return symbolKind; }4647StringRef getName() const {48if (nameSize == (uint32_t)-1)49nameSize = strlen(nameData);50return {nameData, nameSize};51}5253bool isLive() const { return used; }54bool isLazy() const {55return symbolKind == LazyArchiveKind || symbolKind == LazyObjectKind;56}5758virtual uint64_t getVA() const { return 0; }5960virtual bool isWeakDef() const { return false; }6162// Only undefined or dylib symbols can be weak references. A weak reference63// need not be satisfied at runtime, e.g. due to the symbol not being64// available on a given target platform.65virtual bool isWeakRef() const { return false; }6667virtual bool isTlv() const { return false; }6869// Whether this symbol is in the GOT or TLVPointer sections.70bool isInGot() const { return gotIndex != UINT32_MAX; }7172// Whether this symbol is in the StubsSection.73bool isInStubs() const { return stubsIndex != UINT32_MAX; }7475uint64_t getStubVA() const;76uint64_t getLazyPtrVA() const;77uint64_t getGotVA() const;78uint64_t getTlvVA() const;79uint64_t resolveBranchVA() const {80assert(isa<Defined>(this) || isa<DylibSymbol>(this));81return isInStubs() ? getStubVA() : getVA();82}83uint64_t resolveGotVA() const { return isInGot() ? getGotVA() : getVA(); }84uint64_t resolveTlvVA() const { return isInGot() ? getTlvVA() : getVA(); }8586// The index of this symbol in the GOT or the TLVPointer section, depending87// on whether it is a thread-local. A given symbol cannot be referenced by88// both these sections at once.89uint32_t gotIndex = UINT32_MAX;90uint32_t lazyBindOffset = UINT32_MAX;91uint32_t stubsHelperIndex = UINT32_MAX;92uint32_t stubsIndex = UINT32_MAX;93uint32_t symtabIndex = UINT32_MAX;9495InputFile *getFile() const { return file; }9697protected:98Symbol(Kind k, StringRefZ name, InputFile *file)99: symbolKind(k), nameData(name.data), file(file), nameSize(name.size),100isUsedInRegularObj(!file || isa<ObjFile>(file)),101used(!config->deadStrip) {}102103Kind symbolKind;104const char *nameData;105InputFile *file;106mutable uint32_t nameSize;107108public:109// True if this symbol was referenced by a regular (non-bitcode) object.110bool isUsedInRegularObj : 1;111112// True if this symbol is used from a live section.113bool used : 1;114};115116class Defined : public Symbol {117public:118Defined(StringRefZ name, InputFile *file, InputSection *isec, uint64_t value,119uint64_t size, bool isWeakDef, bool isExternal, bool isPrivateExtern,120bool includeInSymtab, bool isReferencedDynamically, bool noDeadStrip,121bool canOverrideWeakDef = false, bool isWeakDefCanBeHidden = false,122bool interposable = false);123124bool isWeakDef() const override { return weakDef; }125bool isExternalWeakDef() const {126return isWeakDef() && isExternal() && !privateExtern;127}128bool isTlv() const override;129130bool isExternal() const { return external; }131bool isAbsolute() const { return originalIsec == nullptr; }132133uint64_t getVA() const override;134135// Returns the object file that this symbol was defined in. This value differs136// from `getFile()` if the symbol originated from a bitcode file.137ObjFile *getObjectFile() const;138139std::string getSourceLocation();140141// Get the canonical InputSection of the symbol.142InputSection *isec() const;143144// Get the canonical unwind entry of the symbol.145ConcatInputSection *unwindEntry() const;146147static bool classof(const Symbol *s) { return s->kind() == DefinedKind; }148149// Place the bitfields first so that they can get placed in the tail padding150// of the parent class, on platforms which support it.151bool overridesWeakDef : 1;152// Whether this symbol should appear in the output binary's export trie.153bool privateExtern : 1;154// Whether this symbol should appear in the output symbol table.155bool includeInSymtab : 1;156// Whether this symbol was folded into a different symbol during ICF.157bool wasIdenticalCodeFolded : 1;158// Symbols marked referencedDynamically won't be removed from the output's159// symbol table by tools like strip. In theory, this could be set on arbitrary160// symbols in input object files. In practice, it's used solely for the161// synthetic __mh_execute_header symbol.162// This is information for the static linker, and it's also written to the163// output file's symbol table for tools running later (such as `strip`).164bool referencedDynamically : 1;165// Set on symbols that should not be removed by dead code stripping.166// Set for example on `__attribute__((used))` globals, or on some Objective-C167// metadata. This is information only for the static linker and not written168// to the output.169bool noDeadStrip : 1;170// Whether references to this symbol can be interposed at runtime to point to171// a different symbol definition (with the same name). For example, if both172// dylib A and B define an interposable symbol _foo, and we load A before B at173// runtime, then all references to _foo within dylib B will point to the174// definition in dylib A.175//176// Only extern symbols may be interposable.177bool interposable : 1;178179bool weakDefCanBeHidden : 1;180181private:182const bool weakDef : 1;183const bool external : 1;184185public:186// The native InputSection of the symbol. The symbol may be moved to another187// InputSection in which case originalIsec->canonical() will point to the new188// InputSection189InputSection *originalIsec;190// Contains the offset from the containing subsection. Note that this is191// different from nlist::n_value, which is the absolute address of the symbol.192uint64_t value;193// size is only calculated for regular (non-bitcode) symbols.194uint64_t size;195// This can be a subsection of either __compact_unwind or __eh_frame.196ConcatInputSection *originalUnwindEntry = nullptr;197};198199// This enum does double-duty: as a symbol property, it indicates whether & how200// a dylib symbol is referenced. As a DylibFile property, it indicates the kind201// of referenced symbols contained within the file. If there are both weak202// and strong references to the same file, we will count the file as203// strongly-referenced.204enum class RefState : uint8_t { Unreferenced = 0, Weak = 1, Strong = 2 };205206class Undefined : public Symbol {207public:208Undefined(StringRefZ name, InputFile *file, RefState refState,209bool wasBitcodeSymbol)210: Symbol(UndefinedKind, name, file), refState(refState),211wasBitcodeSymbol(wasBitcodeSymbol) {212assert(refState != RefState::Unreferenced);213}214215bool isWeakRef() const override { return refState == RefState::Weak; }216217static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }218219RefState refState : 2;220bool wasBitcodeSymbol;221};222223// On Unix, it is traditionally allowed to write variable definitions without224// initialization expressions (such as "int foo;") to header files. These are225// called tentative definitions.226//227// Using tentative definitions is usually considered a bad practice; you should228// write only declarations (such as "extern int foo;") to header files.229// Nevertheless, the linker and the compiler have to do something to support230// bad code by allowing duplicate definitions for this particular case.231//232// The compiler creates common symbols when it sees tentative definitions.233// (You can suppress this behavior and let the compiler create a regular234// defined symbol by passing -fno-common. -fno-common is the default in clang235// as of LLVM 11.0.) When linking the final binary, if there are remaining236// common symbols after name resolution is complete, the linker converts them237// to regular defined symbols in a __common section.238class CommonSymbol : public Symbol {239public:240CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align,241bool isPrivateExtern)242: Symbol(CommonKind, name, file), size(size),243align(align != 1 ? align : llvm::PowerOf2Ceil(size)),244privateExtern(isPrivateExtern) {245// TODO: cap maximum alignment246}247248static bool classof(const Symbol *s) { return s->kind() == CommonKind; }249250const uint64_t size;251const uint32_t align;252const bool privateExtern;253};254255class DylibSymbol : public Symbol {256public:257DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef,258RefState refState, bool isTlv)259: Symbol(DylibKind, name, file), shouldReexport(false),260refState(refState), weakDef(isWeakDef), tlv(isTlv) {261if (file && refState > RefState::Unreferenced)262file->numReferencedSymbols++;263}264265uint64_t getVA() const override;266bool isWeakDef() const override { return weakDef; }267268// Symbols from weak libraries/frameworks are also weakly-referenced.269bool isWeakRef() const override {270return refState == RefState::Weak ||271(file && getFile()->umbrella->forceWeakImport);272}273bool isReferenced() const { return refState != RefState::Unreferenced; }274bool isTlv() const override { return tlv; }275bool isDynamicLookup() const { return file == nullptr; }276bool hasStubsHelper() const { return stubsHelperIndex != UINT32_MAX; }277278DylibFile *getFile() const {279assert(!isDynamicLookup());280return cast<DylibFile>(file);281}282283static bool classof(const Symbol *s) { return s->kind() == DylibKind; }284285RefState getRefState() const { return refState; }286287void reference(RefState newState) {288assert(newState > RefState::Unreferenced);289if (refState == RefState::Unreferenced && file)290getFile()->numReferencedSymbols++;291refState = std::max(refState, newState);292}293294void unreference() {295// dynamic_lookup symbols have no file.296if (refState > RefState::Unreferenced && file) {297assert(getFile()->numReferencedSymbols > 0);298getFile()->numReferencedSymbols--;299}300}301302bool shouldReexport : 1;303private:304RefState refState : 2;305const bool weakDef : 1;306const bool tlv : 1;307};308309class LazyArchive : public Symbol {310public:311LazyArchive(ArchiveFile *file, const llvm::object::Archive::Symbol &sym)312: Symbol(LazyArchiveKind, sym.getName(), file), sym(sym) {}313314ArchiveFile *getFile() const { return cast<ArchiveFile>(file); }315void fetchArchiveMember();316317static bool classof(const Symbol *s) { return s->kind() == LazyArchiveKind; }318319private:320const llvm::object::Archive::Symbol sym;321};322323// A defined symbol in an ObjFile/BitcodeFile surrounded by --start-lib and324// --end-lib.325class LazyObject : public Symbol {326public:327LazyObject(InputFile &file, StringRef name)328: Symbol(LazyObjectKind, name, &file) {329isUsedInRegularObj = false;330}331332static bool classof(const Symbol *s) { return s->kind() == LazyObjectKind; }333};334335// Represents N_INDR symbols. Note that if we are given valid, linkable inputs,336// then all AliasSymbol instances will be converted into one of the other Symbol337// types after `createAliases()` runs.338class AliasSymbol final : public Symbol {339public:340AliasSymbol(InputFile *file, StringRef name, StringRef aliasedName,341bool isPrivateExtern)342: Symbol(AliasKind, name, file), privateExtern(isPrivateExtern),343aliasedName(aliasedName) {}344345StringRef getAliasedName() const { return aliasedName; }346347static bool classof(const Symbol *s) { return s->kind() == AliasKind; }348349const bool privateExtern;350351private:352StringRef aliasedName;353};354355union SymbolUnion {356alignas(Defined) char a[sizeof(Defined)];357alignas(Undefined) char b[sizeof(Undefined)];358alignas(CommonSymbol) char c[sizeof(CommonSymbol)];359alignas(DylibSymbol) char d[sizeof(DylibSymbol)];360alignas(LazyArchive) char e[sizeof(LazyArchive)];361alignas(LazyObject) char f[sizeof(LazyObject)];362alignas(AliasSymbol) char g[sizeof(AliasSymbol)];363};364365template <typename T, typename... ArgT>366T *replaceSymbol(Symbol *s, ArgT &&...arg) {367static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");368static_assert(alignof(T) <= alignof(SymbolUnion),369"SymbolUnion not aligned enough");370assert(static_cast<Symbol *>(static_cast<T *>(nullptr)) == nullptr &&371"Not a Symbol");372373bool isUsedInRegularObj = s->isUsedInRegularObj;374bool used = s->used;375T *sym = new (s) T(std::forward<ArgT>(arg)...);376sym->isUsedInRegularObj |= isUsedInRegularObj;377sym->used |= used;378return sym;379}380381// Can a symbol's address only be resolved at runtime?382inline bool needsBinding(const Symbol *sym) {383if (isa<DylibSymbol>(sym))384return true;385if (const auto *defined = dyn_cast<Defined>(sym))386return defined->isExternalWeakDef() || defined->interposable;387return false;388}389390// Symbols with `l` or `L` as a prefix are linker-private and never appear in391// the output.392inline bool isPrivateLabel(StringRef name) {393return name.starts_with("l") || name.starts_with("L");394}395} // namespace macho396397std::string toString(const macho::Symbol &);398std::string toMachOString(const llvm::object::Archive::Symbol &);399400} // namespace lld401402#endif403404405