Path: blob/main/contrib/llvm-project/lld/ELF/Symbols.cpp
34878 views
//===- Symbols.cpp --------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "Symbols.h"9#include "Driver.h"10#include "InputFiles.h"11#include "InputSection.h"12#include "OutputSections.h"13#include "SyntheticSections.h"14#include "Target.h"15#include "Writer.h"16#include "lld/Common/ErrorHandler.h"17#include "llvm/Demangle/Demangle.h"18#include "llvm/Support/Compiler.h"19#include <cstring>2021using namespace llvm;22using namespace llvm::object;23using namespace llvm::ELF;24using namespace lld;25using namespace lld::elf;2627static_assert(sizeof(SymbolUnion) <= 64, "SymbolUnion too large");2829template <typename T> struct AssertSymbol {30static_assert(std::is_trivially_destructible<T>(),31"Symbol types must be trivially destructible");32static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small");33static_assert(alignof(T) <= alignof(SymbolUnion),34"SymbolUnion not aligned enough");35};3637LLVM_ATTRIBUTE_UNUSED static inline void assertSymbols() {38AssertSymbol<Defined>();39AssertSymbol<CommonSymbol>();40AssertSymbol<Undefined>();41AssertSymbol<SharedSymbol>();42AssertSymbol<LazySymbol>();43}4445// Returns a symbol for an error message.46static std::string maybeDemangleSymbol(StringRef symName) {47return elf::config->demangle ? demangle(symName.str()) : symName.str();48}4950std::string lld::toString(const elf::Symbol &sym) {51StringRef name = sym.getName();52std::string ret = maybeDemangleSymbol(name);5354const char *suffix = sym.getVersionSuffix();55if (*suffix == '@')56ret += suffix;57return ret;58}5960Defined *ElfSym::bss;61Defined *ElfSym::etext1;62Defined *ElfSym::etext2;63Defined *ElfSym::edata1;64Defined *ElfSym::edata2;65Defined *ElfSym::end1;66Defined *ElfSym::end2;67Defined *ElfSym::globalOffsetTable;68Defined *ElfSym::mipsGp;69Defined *ElfSym::mipsGpDisp;70Defined *ElfSym::mipsLocalGp;71Defined *ElfSym::riscvGlobalPointer;72Defined *ElfSym::relaIpltStart;73Defined *ElfSym::relaIpltEnd;74Defined *ElfSym::tlsModuleBase;75SmallVector<SymbolAux, 0> elf::symAux;7677static uint64_t getSymVA(const Symbol &sym, int64_t addend) {78switch (sym.kind()) {79case Symbol::DefinedKind: {80auto &d = cast<Defined>(sym);81SectionBase *isec = d.section;8283// This is an absolute symbol.84if (!isec)85return d.value;8687assert(isec != &InputSection::discarded);8889uint64_t offset = d.value;9091// An object in an SHF_MERGE section might be referenced via a92// section symbol (as a hack for reducing the number of local93// symbols).94// Depending on the addend, the reference via a section symbol95// refers to a different object in the merge section.96// Since the objects in the merge section are not necessarily97// contiguous in the output, the addend can thus affect the final98// VA in a non-linear way.99// To make this work, we incorporate the addend into the section100// offset (and zero out the addend for later processing) so that101// we find the right object in the section.102if (d.isSection())103offset += addend;104105// In the typical case, this is actually very simple and boils106// down to adding together 3 numbers:107// 1. The address of the output section.108// 2. The offset of the input section within the output section.109// 3. The offset within the input section (this addition happens110// inside InputSection::getOffset).111//112// If you understand the data structures involved with this next113// line (and how they get built), then you have a pretty good114// understanding of the linker.115uint64_t va = isec->getVA(offset);116if (d.isSection())117va -= addend;118119// MIPS relocatable files can mix regular and microMIPS code.120// Linker needs to distinguish such code. To do so microMIPS121// symbols has the `STO_MIPS_MICROMIPS` flag in the `st_other`122// field. Unfortunately, the `MIPS::relocate()` method has123// a symbol value only. To pass type of the symbol (regular/microMIPS)124// to that routine as well as other places where we write125// a symbol value as-is (.dynamic section, `Elf_Ehdr::e_entry`126// field etc) do the same trick as compiler uses to mark microMIPS127// for CPU - set the less-significant bit.128if (config->emachine == EM_MIPS && isMicroMips() &&129((sym.stOther & STO_MIPS_MICROMIPS) || sym.hasFlag(NEEDS_COPY)))130va |= 1;131132if (d.isTls() && !config->relocatable) {133// Use the address of the TLS segment's first section rather than the134// segment's address, because segment addresses aren't initialized until135// after sections are finalized. (e.g. Measuring the size of .rela.dyn136// for Android relocation packing requires knowing TLS symbol addresses137// during section finalization.)138if (!Out::tlsPhdr || !Out::tlsPhdr->firstSec)139fatal(toString(d.file) +140" has an STT_TLS symbol but doesn't have an SHF_TLS section");141return va - Out::tlsPhdr->firstSec->addr;142}143return va;144}145case Symbol::SharedKind:146case Symbol::UndefinedKind:147return 0;148case Symbol::LazyKind:149llvm_unreachable("lazy symbol reached writer");150case Symbol::CommonKind:151llvm_unreachable("common symbol reached writer");152case Symbol::PlaceholderKind:153llvm_unreachable("placeholder symbol reached writer");154}155llvm_unreachable("invalid symbol kind");156}157158uint64_t Symbol::getVA(int64_t addend) const {159return getSymVA(*this, addend) + addend;160}161162uint64_t Symbol::getGotVA() const {163if (gotInIgot)164return in.igotPlt->getVA() + getGotPltOffset();165return in.got->getVA() + getGotOffset();166}167168uint64_t Symbol::getGotOffset() const {169return getGotIdx() * target->gotEntrySize;170}171172uint64_t Symbol::getGotPltVA() const {173if (isInIplt)174return in.igotPlt->getVA() + getGotPltOffset();175return in.gotPlt->getVA() + getGotPltOffset();176}177178uint64_t Symbol::getGotPltOffset() const {179if (isInIplt)180return getPltIdx() * target->gotEntrySize;181return (getPltIdx() + target->gotPltHeaderEntriesNum) * target->gotEntrySize;182}183184uint64_t Symbol::getPltVA() const {185uint64_t outVA = isInIplt186? in.iplt->getVA() + getPltIdx() * target->ipltEntrySize187: in.plt->getVA() + in.plt->headerSize +188getPltIdx() * target->pltEntrySize;189190// While linking microMIPS code PLT code are always microMIPS191// code. Set the less-significant bit to track that fact.192// See detailed comment in the `getSymVA` function.193if (config->emachine == EM_MIPS && isMicroMips())194outVA |= 1;195return outVA;196}197198uint64_t Symbol::getSize() const {199if (const auto *dr = dyn_cast<Defined>(this))200return dr->size;201return cast<SharedSymbol>(this)->size;202}203204OutputSection *Symbol::getOutputSection() const {205if (auto *s = dyn_cast<Defined>(this)) {206if (auto *sec = s->section)207return sec->getOutputSection();208return nullptr;209}210return nullptr;211}212213// If a symbol name contains '@', the characters after that is214// a symbol version name. This function parses that.215void Symbol::parseSymbolVersion() {216// Return if localized by a local: pattern in a version script.217if (versionId == VER_NDX_LOCAL)218return;219StringRef s = getName();220size_t pos = s.find('@');221if (pos == StringRef::npos)222return;223StringRef verstr = s.substr(pos + 1);224225// Truncate the symbol name so that it doesn't include the version string.226nameSize = pos;227228if (verstr.empty())229return;230231// If this is not in this DSO, it is not a definition.232if (!isDefined())233return;234235// '@@' in a symbol name means the default version.236// It is usually the most recent one.237bool isDefault = (verstr[0] == '@');238if (isDefault)239verstr = verstr.substr(1);240241for (const VersionDefinition &ver : namedVersionDefs()) {242if (ver.name != verstr)243continue;244245if (isDefault)246versionId = ver.id;247else248versionId = ver.id | VERSYM_HIDDEN;249return;250}251252// It is an error if the specified version is not defined.253// Usually version script is not provided when linking executable,254// but we may still want to override a versioned symbol from DSO,255// so we do not report error in this case. We also do not error256// if the symbol has a local version as it won't be in the dynamic257// symbol table.258if (config->shared && versionId != VER_NDX_LOCAL)259error(toString(file) + ": symbol " + s + " has undefined version " +260verstr);261}262263void Symbol::extract() const {264if (file->lazy) {265file->lazy = false;266parseFile(file);267}268}269270uint8_t Symbol::computeBinding() const {271auto v = visibility();272if ((v != STV_DEFAULT && v != STV_PROTECTED) || versionId == VER_NDX_LOCAL)273return STB_LOCAL;274if (binding == STB_GNU_UNIQUE && !config->gnuUnique)275return STB_GLOBAL;276return binding;277}278279bool Symbol::includeInDynsym() const {280if (computeBinding() == STB_LOCAL)281return false;282if (!isDefined() && !isCommon())283// This should unconditionally return true, unfortunately glibc -static-pie284// expects undefined weak symbols not to exist in .dynsym, e.g.285// __pthread_mutex_lock reference in _dl_add_to_namespace_list,286// __pthread_initialize_minimal reference in csu/libc-start.c.287return !(isUndefWeak() && config->noDynamicLinker);288289return exportDynamic || inDynamicList;290}291292// Print out a log message for --trace-symbol.293void elf::printTraceSymbol(const Symbol &sym, StringRef name) {294std::string s;295if (sym.isUndefined())296s = ": reference to ";297else if (sym.isLazy())298s = ": lazy definition of ";299else if (sym.isShared())300s = ": shared definition of ";301else if (sym.isCommon())302s = ": common definition of ";303else304s = ": definition of ";305306message(toString(sym.file) + s + name);307}308309static void recordWhyExtract(const InputFile *reference,310const InputFile &extracted, const Symbol &sym) {311ctx.whyExtractRecords.emplace_back(toString(reference), &extracted, sym);312}313314void elf::maybeWarnUnorderableSymbol(const Symbol *sym) {315if (!config->warnSymbolOrdering)316return;317318// If UnresolvedPolicy::Ignore is used, no "undefined symbol" error/warning is319// emitted. It makes sense to not warn on undefined symbols (excluding those320// demoted by demoteSymbols).321//322// Note, ld.bfd --symbol-ordering-file= does not warn on undefined symbols,323// but we don't have to be compatible here.324if (sym->isUndefined() && !cast<Undefined>(sym)->discardedSecIdx &&325config->unresolvedSymbols == UnresolvedPolicy::Ignore)326return;327328const InputFile *file = sym->file;329auto *d = dyn_cast<Defined>(sym);330331auto report = [&](StringRef s) { warn(toString(file) + s + sym->getName()); };332333if (sym->isUndefined()) {334if (cast<Undefined>(sym)->discardedSecIdx)335report(": unable to order discarded symbol: ");336else337report(": unable to order undefined symbol: ");338} else if (sym->isShared())339report(": unable to order shared symbol: ");340else if (d && !d->section)341report(": unable to order absolute symbol: ");342else if (d && isa<OutputSection>(d->section))343report(": unable to order synthetic symbol: ");344else if (d && !d->section->isLive())345report(": unable to order discarded symbol: ");346}347348// Returns true if a symbol can be replaced at load-time by a symbol349// with the same name defined in other ELF executable or DSO.350bool elf::computeIsPreemptible(const Symbol &sym) {351assert(!sym.isLocal() || sym.isPlaceholder());352353// Only symbols with default visibility that appear in dynsym can be354// preempted. Symbols with protected visibility cannot be preempted.355if (!sym.includeInDynsym() || sym.visibility() != STV_DEFAULT)356return false;357358// At this point copy relocations have not been created yet, so any359// symbol that is not defined locally is preemptible.360if (!sym.isDefined())361return true;362363if (!config->shared)364return false;365366// If -Bsymbolic or --dynamic-list is specified, or -Bsymbolic-functions is367// specified and the symbol is STT_FUNC, the symbol is preemptible iff it is368// in the dynamic list. -Bsymbolic-non-weak-functions is a non-weak subset of369// -Bsymbolic-functions.370if (config->symbolic ||371(config->bsymbolic == BsymbolicKind::NonWeak &&372sym.binding != STB_WEAK) ||373(config->bsymbolic == BsymbolicKind::Functions && sym.isFunc()) ||374(config->bsymbolic == BsymbolicKind::NonWeakFunctions && sym.isFunc() &&375sym.binding != STB_WEAK))376return sym.inDynamicList;377return true;378}379380// Merge symbol properties.381//382// When we have many symbols of the same name, we choose one of them,383// and that's the result of symbol resolution. However, symbols that384// were not chosen still affect some symbol properties.385void Symbol::mergeProperties(const Symbol &other) {386if (other.exportDynamic)387exportDynamic = true;388389// DSO symbols do not affect visibility in the output.390if (!other.isShared() && other.visibility() != STV_DEFAULT) {391uint8_t v = visibility(), ov = other.visibility();392setVisibility(v == STV_DEFAULT ? ov : std::min(v, ov));393}394}395396void Symbol::resolve(const Undefined &other) {397if (other.visibility() != STV_DEFAULT) {398uint8_t v = visibility(), ov = other.visibility();399setVisibility(v == STV_DEFAULT ? ov : std::min(v, ov));400}401// An undefined symbol with non default visibility must be satisfied402// in the same DSO.403//404// If this is a non-weak defined symbol in a discarded section, override the405// existing undefined symbol for better error message later.406if (isPlaceholder() || (isShared() && other.visibility() != STV_DEFAULT) ||407(isUndefined() && other.binding != STB_WEAK && other.discardedSecIdx)) {408other.overwrite(*this);409return;410}411412if (traced)413printTraceSymbol(other, getName());414415if (isLazy()) {416// An undefined weak will not extract archive members. See comment on Lazy417// in Symbols.h for the details.418if (other.binding == STB_WEAK) {419binding = STB_WEAK;420type = other.type;421return;422}423424// Do extra check for --warn-backrefs.425//426// --warn-backrefs is an option to prevent an undefined reference from427// extracting an archive member written earlier in the command line. It can428// be used to keep compatibility with GNU linkers to some degree. I'll429// explain the feature and why you may find it useful in this comment.430//431// lld's symbol resolution semantics is more relaxed than traditional Unix432// linkers. For example,433//434// ld.lld foo.a bar.o435//436// succeeds even if bar.o contains an undefined symbol that has to be437// resolved by some object file in foo.a. Traditional Unix linkers don't438// allow this kind of backward reference, as they visit each file only once439// from left to right in the command line while resolving all undefined440// symbols at the moment of visiting.441//442// In the above case, since there's no undefined symbol when a linker visits443// foo.a, no files are pulled out from foo.a, and because the linker forgets444// about foo.a after visiting, it can't resolve undefined symbols in bar.o445// that could have been resolved otherwise.446//447// That lld accepts more relaxed form means that (besides it'd make more448// sense) you can accidentally write a command line or a build file that449// works only with lld, even if you have a plan to distribute it to wider450// users who may be using GNU linkers. With --warn-backrefs, you can detect451// a library order that doesn't work with other Unix linkers.452//453// The option is also useful to detect cyclic dependencies between static454// archives. Again, lld accepts455//456// ld.lld foo.a bar.a457//458// even if foo.a and bar.a depend on each other. With --warn-backrefs, it is459// handled as an error.460//461// Here is how the option works. We assign a group ID to each file. A file462// with a smaller group ID can pull out object files from an archive file463// with an equal or greater group ID. Otherwise, it is a reverse dependency464// and an error.465//466// A file outside --{start,end}-group gets a fresh ID when instantiated. All467// files within the same --{start,end}-group get the same group ID. E.g.468//469// ld.lld A B --start-group C D --end-group E470//471// A forms group 0. B form group 1. C and D (including their member object472// files) form group 2. E forms group 3. I think that you can see how this473// group assignment rule simulates the traditional linker's semantics.474bool backref = config->warnBackrefs && other.file &&475file->groupId < other.file->groupId;476extract();477478if (!config->whyExtract.empty())479recordWhyExtract(other.file, *file, *this);480481// We don't report backward references to weak symbols as they can be482// overridden later.483//484// A traditional linker does not error for -ldef1 -lref -ldef2 (linking485// sandwich), where def2 may or may not be the same as def1. We don't want486// to warn for this case, so dismiss the warning if we see a subsequent lazy487// definition. this->file needs to be saved because in the case of LTO it488// may be reset to nullptr or be replaced with a file named lto.tmp.489if (backref && !isWeak())490ctx.backwardReferences.try_emplace(this,491std::make_pair(other.file, file));492return;493}494495// Undefined symbols in a SharedFile do not change the binding.496if (isa_and_nonnull<SharedFile>(other.file))497return;498499if (isUndefined() || isShared()) {500// The binding will be weak if there is at least one reference and all are501// weak. The binding has one opportunity to change to weak: if the first502// reference is weak.503if (other.binding != STB_WEAK || !referenced)504binding = other.binding;505}506}507508// Compare two symbols. Return true if the new symbol should win.509bool Symbol::shouldReplace(const Defined &other) const {510if (LLVM_UNLIKELY(isCommon())) {511if (config->warnCommon)512warn("common " + getName() + " is overridden");513return !other.isWeak();514}515if (!isDefined())516return true;517518// Incoming STB_GLOBAL overrides STB_WEAK/STB_GNU_UNIQUE. -fgnu-unique changes519// some vague linkage data in COMDAT from STB_WEAK to STB_GNU_UNIQUE. Treat520// STB_GNU_UNIQUE like STB_WEAK so that we prefer the first among all521// STB_WEAK/STB_GNU_UNIQUE copies. If we prefer an incoming STB_GNU_UNIQUE to522// an existing STB_WEAK, there may be discarded section errors because the523// selected copy may be in a non-prevailing COMDAT.524return !isGlobal() && other.isGlobal();525}526527void elf::reportDuplicate(const Symbol &sym, const InputFile *newFile,528InputSectionBase *errSec, uint64_t errOffset) {529if (config->allowMultipleDefinition)530return;531// In glibc<2.32, crti.o has .gnu.linkonce.t.__x86.get_pc_thunk.bx, which532// is sort of proto-comdat. There is actually no duplicate if we have533// full support for .gnu.linkonce.534const Defined *d = dyn_cast<Defined>(&sym);535if (!d || d->getName() == "__x86.get_pc_thunk.bx")536return;537// Allow absolute symbols with the same value for GNU ld compatibility.538if (!d->section && !errSec && errOffset && d->value == errOffset)539return;540if (!d->section || !errSec) {541errorOrWarn("duplicate symbol: " + toString(sym) + "\n>>> defined in " +542toString(sym.file) + "\n>>> defined in " + toString(newFile));543return;544}545546// Construct and print an error message in the form of:547//548// ld.lld: error: duplicate symbol: foo549// >>> defined at bar.c:30550// >>> bar.o (/home/alice/src/bar.o)551// >>> defined at baz.c:563552// >>> baz.o in archive libbaz.a553auto *sec1 = cast<InputSectionBase>(d->section);554std::string src1 = sec1->getSrcMsg(sym, d->value);555std::string obj1 = sec1->getObjMsg(d->value);556std::string src2 = errSec->getSrcMsg(sym, errOffset);557std::string obj2 = errSec->getObjMsg(errOffset);558559std::string msg = "duplicate symbol: " + toString(sym) + "\n>>> defined at ";560if (!src1.empty())561msg += src1 + "\n>>> ";562msg += obj1 + "\n>>> defined at ";563if (!src2.empty())564msg += src2 + "\n>>> ";565msg += obj2;566errorOrWarn(msg);567}568569void Symbol::checkDuplicate(const Defined &other) const {570if (isDefined() && !isWeak() && !other.isWeak())571reportDuplicate(*this, other.file,572dyn_cast_or_null<InputSectionBase>(other.section),573other.value);574}575576void Symbol::resolve(const CommonSymbol &other) {577if (other.exportDynamic)578exportDynamic = true;579if (other.visibility() != STV_DEFAULT) {580uint8_t v = visibility(), ov = other.visibility();581setVisibility(v == STV_DEFAULT ? ov : std::min(v, ov));582}583if (isDefined() && !isWeak()) {584if (config->warnCommon)585warn("common " + getName() + " is overridden");586return;587}588589if (CommonSymbol *oldSym = dyn_cast<CommonSymbol>(this)) {590if (config->warnCommon)591warn("multiple common of " + getName());592oldSym->alignment = std::max(oldSym->alignment, other.alignment);593if (oldSym->size < other.size) {594oldSym->file = other.file;595oldSym->size = other.size;596}597return;598}599600if (auto *s = dyn_cast<SharedSymbol>(this)) {601// Increase st_size if the shared symbol has a larger st_size. The shared602// symbol may be created from common symbols. The fact that some object603// files were linked into a shared object first should not change the604// regular rule that picks the largest st_size.605uint64_t size = s->size;606other.overwrite(*this);607if (size > cast<CommonSymbol>(this)->size)608cast<CommonSymbol>(this)->size = size;609} else {610other.overwrite(*this);611}612}613614void Symbol::resolve(const Defined &other) {615if (other.exportDynamic)616exportDynamic = true;617if (other.visibility() != STV_DEFAULT) {618uint8_t v = visibility(), ov = other.visibility();619setVisibility(v == STV_DEFAULT ? ov : std::min(v, ov));620}621if (shouldReplace(other))622other.overwrite(*this);623}624625void Symbol::resolve(const LazySymbol &other) {626if (isPlaceholder()) {627other.overwrite(*this);628return;629}630631// For common objects, we want to look for global or weak definitions that632// should be extracted as the canonical definition instead.633if (LLVM_UNLIKELY(isCommon()) && elf::config->fortranCommon &&634other.file->shouldExtractForCommon(getName())) {635ctx.backwardReferences.erase(this);636other.overwrite(*this);637other.extract();638return;639}640641if (!isUndefined()) {642// See the comment in resolveUndefined().643if (isDefined())644ctx.backwardReferences.erase(this);645return;646}647648// An undefined weak will not extract archive members. See comment on Lazy in649// Symbols.h for the details.650if (isWeak()) {651uint8_t ty = type;652other.overwrite(*this);653type = ty;654binding = STB_WEAK;655return;656}657658const InputFile *oldFile = file;659other.extract();660if (!config->whyExtract.empty())661recordWhyExtract(oldFile, *file, *this);662}663664void Symbol::resolve(const SharedSymbol &other) {665exportDynamic = true;666if (isPlaceholder()) {667other.overwrite(*this);668return;669}670if (isCommon()) {671// See the comment in resolveCommon() above.672if (other.size > cast<CommonSymbol>(this)->size)673cast<CommonSymbol>(this)->size = other.size;674return;675}676if (visibility() == STV_DEFAULT && (isUndefined() || isLazy())) {677// An undefined symbol with non default visibility must be satisfied678// in the same DSO.679uint8_t bind = binding;680other.overwrite(*this);681binding = bind;682} else if (traced)683printTraceSymbol(other, getName());684}685686void Defined::overwrite(Symbol &sym) const {687if (isa_and_nonnull<SharedFile>(sym.file))688sym.versionId = VER_NDX_GLOBAL;689Symbol::overwrite(sym, DefinedKind);690auto &s = static_cast<Defined &>(sym);691s.value = value;692s.size = size;693s.section = section;694}695696697