Path: blob/main/contrib/llvm-project/lld/ELF/MarkLive.cpp
34869 views
//===- MarkLive.cpp -------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements --gc-sections, which is a feature to remove unused9// sections from output. Unused sections are sections that are not reachable10// from known GC-root symbols or sections. Naturally the feature is11// implemented as a mark-sweep garbage collector.12//13// Here's how it works. Each InputSectionBase has a "Live" bit. The bit is off14// by default. Starting with GC-root symbols or sections, markLive function15// defined in this file visits all reachable sections to set their Live16// bits. Writer will then ignore sections whose Live bits are off, so that17// such sections are not included into output.18//19//===----------------------------------------------------------------------===//2021#include "MarkLive.h"22#include "InputFiles.h"23#include "InputSection.h"24#include "LinkerScript.h"25#include "SymbolTable.h"26#include "Symbols.h"27#include "SyntheticSections.h"28#include "Target.h"29#include "lld/Common/CommonLinkerContext.h"30#include "lld/Common/Strings.h"31#include "llvm/ADT/STLExtras.h"32#include "llvm/Object/ELF.h"33#include "llvm/Support/TimeProfiler.h"34#include <vector>3536using namespace llvm;37using namespace llvm::ELF;38using namespace llvm::object;39using namespace llvm::support::endian;40using namespace lld;41using namespace lld::elf;4243namespace {44template <class ELFT> class MarkLive {45public:46MarkLive(unsigned partition) : partition(partition) {}4748void run();49void moveToMain();5051private:52void enqueue(InputSectionBase *sec, uint64_t offset);53void markSymbol(Symbol *sym);54void mark();5556template <class RelTy>57void resolveReloc(InputSectionBase &sec, RelTy &rel, bool fromFDE);5859template <class RelTy>60void scanEhFrameSection(EhInputSection &eh, ArrayRef<RelTy> rels);6162// The index of the partition that we are currently processing.63unsigned partition;6465// A list of sections to visit.66SmallVector<InputSection *, 0> queue;6768// There are normally few input sections whose names are valid C69// identifiers, so we just store a SmallVector instead of a multimap.70DenseMap<StringRef, SmallVector<InputSectionBase *, 0>> cNamedSections;71};72} // namespace7374template <class ELFT>75static uint64_t getAddend(InputSectionBase &sec,76const typename ELFT::Rel &rel) {77return target->getImplicitAddend(sec.content().begin() + rel.r_offset,78rel.getType(config->isMips64EL));79}8081template <class ELFT>82static uint64_t getAddend(InputSectionBase &sec,83const typename ELFT::Rela &rel) {84return rel.r_addend;85}8687// Currently, we assume all input CREL relocations have an explicit addend.88template <class ELFT>89static uint64_t getAddend(InputSectionBase &sec,90const typename ELFT::Crel &rel) {91return rel.r_addend;92}9394template <class ELFT>95template <class RelTy>96void MarkLive<ELFT>::resolveReloc(InputSectionBase &sec, RelTy &rel,97bool fromFDE) {98// If a symbol is referenced in a live section, it is used.99Symbol &sym = sec.file->getRelocTargetSym(rel);100sym.used = true;101102if (auto *d = dyn_cast<Defined>(&sym)) {103auto *relSec = dyn_cast_or_null<InputSectionBase>(d->section);104if (!relSec)105return;106107uint64_t offset = d->value;108if (d->isSection())109offset += getAddend<ELFT>(sec, rel);110111// fromFDE being true means this is referenced by a FDE in a .eh_frame112// piece. The relocation points to the described function or to a LSDA. We113// only need to keep the LSDA live, so ignore anything that points to114// executable sections. If the LSDA is in a section group or has the115// SHF_LINK_ORDER flag, we ignore the relocation as well because (a) if the116// associated text section is live, the LSDA will be retained due to section117// group/SHF_LINK_ORDER rules (b) if the associated text section should be118// discarded, marking the LSDA will unnecessarily retain the text section.119if (!(fromFDE && ((relSec->flags & (SHF_EXECINSTR | SHF_LINK_ORDER)) ||120relSec->nextInSectionGroup)))121enqueue(relSec, offset);122return;123}124125if (auto *ss = dyn_cast<SharedSymbol>(&sym))126if (!ss->isWeak())127cast<SharedFile>(ss->file)->isNeeded = true;128129for (InputSectionBase *sec : cNamedSections.lookup(sym.getName()))130enqueue(sec, 0);131}132133// The .eh_frame section is an unfortunate special case.134// The section is divided in CIEs and FDEs and the relocations it can have are135// * CIEs can refer to a personality function.136// * FDEs can refer to a LSDA137// * FDEs refer to the function they contain information about138// The last kind of relocation cannot keep the referred section alive, or they139// would keep everything alive in a common object file. In fact, each FDE is140// alive if the section it refers to is alive.141// To keep things simple, in here we just ignore the last relocation kind. The142// other two keep the referred section alive.143//144// A possible improvement would be to fully process .eh_frame in the middle of145// the gc pass. With that we would be able to also gc some sections holding146// LSDAs and personality functions if we found that they were unused.147template <class ELFT>148template <class RelTy>149void MarkLive<ELFT>::scanEhFrameSection(EhInputSection &eh,150ArrayRef<RelTy> rels) {151for (const EhSectionPiece &cie : eh.cies)152if (cie.firstRelocation != unsigned(-1))153resolveReloc(eh, rels[cie.firstRelocation], false);154for (const EhSectionPiece &fde : eh.fdes) {155size_t firstRelI = fde.firstRelocation;156if (firstRelI == (unsigned)-1)157continue;158uint64_t pieceEnd = fde.inputOff + fde.size;159for (size_t j = firstRelI, end2 = rels.size();160j < end2 && rels[j].r_offset < pieceEnd; ++j)161resolveReloc(eh, rels[j], true);162}163}164165// Some sections are used directly by the loader, so they should never be166// garbage-collected. This function returns true if a given section is such167// section.168static bool isReserved(InputSectionBase *sec) {169switch (sec->type) {170case SHT_FINI_ARRAY:171case SHT_INIT_ARRAY:172case SHT_PREINIT_ARRAY:173return true;174case SHT_NOTE:175// SHT_NOTE sections in a group are subject to garbage collection.176return !sec->nextInSectionGroup;177default:178// Support SHT_PROGBITS .init_array (https://golang.org/issue/50295) and179// .init_array.N (https://github.com/rust-lang/rust/issues/92181) for a180// while.181StringRef s = sec->name;182return s == ".init" || s == ".fini" || s.starts_with(".init_array") ||183s == ".jcr" || s.starts_with(".ctors") || s.starts_with(".dtors");184}185}186187template <class ELFT>188void MarkLive<ELFT>::enqueue(InputSectionBase *sec, uint64_t offset) {189// Usually, a whole section is marked as live or dead, but in mergeable190// (splittable) sections, each piece of data has independent liveness bit.191// So we explicitly tell it which offset is in use.192if (auto *ms = dyn_cast<MergeInputSection>(sec))193ms->getSectionPiece(offset).live = true;194195// Set Sec->Partition to the meet (i.e. the "minimum") of Partition and196// Sec->Partition in the following lattice: 1 < other < 0. If Sec->Partition197// doesn't change, we don't need to do anything.198if (sec->partition == 1 || sec->partition == partition)199return;200sec->partition = sec->partition ? 1 : partition;201202// Add input section to the queue.203if (InputSection *s = dyn_cast<InputSection>(sec))204queue.push_back(s);205}206207template <class ELFT> void MarkLive<ELFT>::markSymbol(Symbol *sym) {208if (auto *d = dyn_cast_or_null<Defined>(sym))209if (auto *isec = dyn_cast_or_null<InputSectionBase>(d->section))210enqueue(isec, d->value);211}212213// This is the main function of the garbage collector.214// Starting from GC-root sections, this function visits all reachable215// sections to set their "Live" bits.216template <class ELFT> void MarkLive<ELFT>::run() {217// Add GC root symbols.218219// Preserve externally-visible symbols if the symbols defined by this220// file can interpose other ELF file's symbols at runtime.221for (Symbol *sym : symtab.getSymbols())222if (sym->includeInDynsym() && sym->partition == partition)223markSymbol(sym);224225// If this isn't the main partition, that's all that we need to preserve.226if (partition != 1) {227mark();228return;229}230231markSymbol(symtab.find(config->entry));232markSymbol(symtab.find(config->init));233markSymbol(symtab.find(config->fini));234for (StringRef s : config->undefined)235markSymbol(symtab.find(s));236for (StringRef s : script->referencedSymbols)237markSymbol(symtab.find(s));238for (auto [symName, _] : symtab.cmseSymMap) {239markSymbol(symtab.cmseSymMap[symName].sym);240markSymbol(symtab.cmseSymMap[symName].acleSeSym);241}242243// Mark .eh_frame sections as live because there are usually no relocations244// that point to .eh_frames. Otherwise, the garbage collector would drop245// all of them. We also want to preserve personality routines and LSDA246// referenced by .eh_frame sections, so we scan them for that here.247for (EhInputSection *eh : ctx.ehInputSections) {248const RelsOrRelas<ELFT> rels =249eh->template relsOrRelas<ELFT>(/*supportsCrel=*/false);250if (rels.areRelocsRel())251scanEhFrameSection(*eh, rels.rels);252else if (rels.relas.size())253scanEhFrameSection(*eh, rels.relas);254}255for (InputSectionBase *sec : ctx.inputSections) {256if (sec->flags & SHF_GNU_RETAIN) {257enqueue(sec, 0);258continue;259}260if (sec->flags & SHF_LINK_ORDER)261continue;262263// Usually, non-SHF_ALLOC sections are not removed even if they are264// unreachable through relocations because reachability is not a good signal265// whether they are garbage or not (e.g. there is usually no section266// referring to a .comment section, but we want to keep it.) When a267// non-SHF_ALLOC section is retained, we also retain sections dependent on268// it.269//270// Note on SHF_LINK_ORDER: Such sections contain metadata and they271// have a reverse dependency on the InputSection they are linked with.272// We are able to garbage collect them.273//274// Note on SHF_REL{,A}: Such sections reach here only when -r275// or --emit-reloc were given. And they are subject of garbage276// collection because, if we remove a text section, we also277// remove its relocation section.278//279// Note on nextInSectionGroup: The ELF spec says that group sections are280// included or omitted as a unit. We take the interpretation that:281//282// - Group members (nextInSectionGroup != nullptr) are subject to garbage283// collection.284// - Groups members are retained or discarded as a unit.285if (!(sec->flags & SHF_ALLOC)) {286if (!isStaticRelSecType(sec->type) && !sec->nextInSectionGroup) {287sec->markLive();288for (InputSection *isec : sec->dependentSections)289isec->markLive();290}291}292293// Preserve special sections and those which are specified in linker294// script KEEP command.295if (isReserved(sec) || script->shouldKeep(sec)) {296enqueue(sec, 0);297} else if ((!config->zStartStopGC || sec->name.starts_with("__libc_")) &&298isValidCIdentifier(sec->name)) {299// As a workaround for glibc libc.a before 2.34300// (https://sourceware.org/PR27492), retain __libc_atexit and similar301// sections regardless of zStartStopGC.302cNamedSections[saver().save("__start_" + sec->name)].push_back(sec);303cNamedSections[saver().save("__stop_" + sec->name)].push_back(sec);304}305}306307mark();308}309310template <class ELFT> void MarkLive<ELFT>::mark() {311// Mark all reachable sections.312while (!queue.empty()) {313InputSectionBase &sec = *queue.pop_back_val();314315const RelsOrRelas<ELFT> rels = sec.template relsOrRelas<ELFT>();316for (const typename ELFT::Rel &rel : rels.rels)317resolveReloc(sec, rel, false);318for (const typename ELFT::Rela &rel : rels.relas)319resolveReloc(sec, rel, false);320for (const typename ELFT::Crel &rel : rels.crels)321resolveReloc(sec, rel, false);322323for (InputSectionBase *isec : sec.dependentSections)324enqueue(isec, 0);325326// Mark the next group member.327if (sec.nextInSectionGroup)328enqueue(sec.nextInSectionGroup, 0);329}330}331332// Move the sections for some symbols to the main partition, specifically ifuncs333// (because they can result in an IRELATIVE being added to the main partition's334// GOT, which means that the ifunc must be available when the main partition is335// loaded) and TLS symbols (because we only know how to correctly process TLS336// relocations for the main partition).337//338// We also need to move sections whose names are C identifiers that are referred339// to from __start_/__stop_ symbols because there will only be one set of340// symbols for the whole program.341template <class ELFT> void MarkLive<ELFT>::moveToMain() {342for (ELFFileBase *file : ctx.objectFiles)343for (Symbol *s : file->getSymbols())344if (auto *d = dyn_cast<Defined>(s))345if ((d->type == STT_GNU_IFUNC || d->type == STT_TLS) && d->section &&346d->section->isLive())347markSymbol(s);348349for (InputSectionBase *sec : ctx.inputSections) {350if (!sec->isLive() || !isValidCIdentifier(sec->name))351continue;352if (symtab.find(("__start_" + sec->name).str()) ||353symtab.find(("__stop_" + sec->name).str()))354enqueue(sec, 0);355}356357mark();358}359360// Before calling this function, Live bits are off for all361// input sections. This function make some or all of them on362// so that they are emitted to the output file.363template <class ELFT> void elf::markLive() {364llvm::TimeTraceScope timeScope("markLive");365// If --gc-sections is not given, retain all input sections.366if (!config->gcSections) {367// If a DSO defines a symbol referenced in a regular object, it is needed.368for (Symbol *sym : symtab.getSymbols())369if (auto *s = dyn_cast<SharedSymbol>(sym))370if (s->isUsedInRegularObj && !s->isWeak())371cast<SharedFile>(s->file)->isNeeded = true;372return;373}374375for (InputSectionBase *sec : ctx.inputSections)376sec->markDead();377378// Follow the graph to mark all live sections.379for (unsigned curPart = 1; curPart <= partitions.size(); ++curPart)380MarkLive<ELFT>(curPart).run();381382// If we have multiple partitions, some sections need to live in the main383// partition even if they were allocated to a loadable partition. Move them384// there now.385if (partitions.size() != 1)386MarkLive<ELFT>(1).moveToMain();387388// Report garbage-collected sections.389if (config->printGcSections)390for (InputSectionBase *sec : ctx.inputSections)391if (!sec->isLive())392message("removing unused section " + toString(sec));393}394395template void elf::markLive<ELF32LE>();396template void elf::markLive<ELF32BE>();397template void elf::markLive<ELF64LE>();398template void elf::markLive<ELF64BE>();399400401