Path: blob/main/contrib/llvm-project/lld/ELF/InputSection.h
34878 views
//===- InputSection.h -------------------------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#ifndef LLD_ELF_INPUT_SECTION_H9#define LLD_ELF_INPUT_SECTION_H1011#include "Config.h"12#include "Relocations.h"13#include "lld/Common/CommonLinkerContext.h"14#include "lld/Common/LLVM.h"15#include "lld/Common/Memory.h"16#include "llvm/ADT/CachedHashString.h"17#include "llvm/ADT/DenseSet.h"18#include "llvm/ADT/StringExtras.h"19#include "llvm/ADT/TinyPtrVector.h"20#include "llvm/Object/ELF.h"21#include "llvm/Support/Compiler.h"2223namespace lld {24namespace elf {2526class InputFile;27class Symbol;2829class Defined;30struct Partition;31class SyntheticSection;32template <class ELFT> class ObjFile;33class OutputSection;3435LLVM_LIBRARY_VISIBILITY extern std::vector<Partition> partitions;3637// Returned by InputSectionBase::relsOrRelas. At most one member is empty.38template <class ELFT> struct RelsOrRelas {39Relocs<typename ELFT::Rel> rels;40Relocs<typename ELFT::Rela> relas;41Relocs<typename ELFT::Crel> crels;42bool areRelocsRel() const { return rels.size(); }43bool areRelocsCrel() const { return crels.size(); }44};4546#define invokeOnRelocs(sec, f, ...) \47{ \48const RelsOrRelas<ELFT> rs = (sec).template relsOrRelas<ELFT>(); \49if (rs.areRelocsCrel()) \50f(__VA_ARGS__, rs.crels); \51else if (rs.areRelocsRel()) \52f(__VA_ARGS__, rs.rels); \53else \54f(__VA_ARGS__, rs.relas); \55}5657// This is the base class of all sections that lld handles. Some are sections in58// input files, some are sections in the produced output file and some exist59// just as a convenience for implementing special ways of combining some60// sections.61class SectionBase {62public:63enum Kind { Regular, Synthetic, Spill, EHFrame, Merge, Output };6465Kind kind() const { return (Kind)sectionKind; }6667LLVM_PREFERRED_TYPE(Kind)68uint8_t sectionKind : 3;6970// The next two bit fields are only used by InputSectionBase, but we71// put them here so the struct packs better.7273LLVM_PREFERRED_TYPE(bool)74uint8_t bss : 1;7576// Set for sections that should not be folded by ICF.77LLVM_PREFERRED_TYPE(bool)78uint8_t keepUnique : 1;7980uint8_t partition = 1;81uint32_t type;82StringRef name;8384// The 1-indexed partition that this section is assigned to by the garbage85// collector, or 0 if this section is dead. Normally there is only one86// partition, so this will either be 0 or 1.87elf::Partition &getPartition() const;8889// These corresponds to the fields in Elf_Shdr.90uint64_t flags;91uint32_t addralign;92uint32_t entsize;93uint32_t link;94uint32_t info;9596OutputSection *getOutputSection();97const OutputSection *getOutputSection() const {98return const_cast<SectionBase *>(this)->getOutputSection();99}100101// Translate an offset in the input section to an offset in the output102// section.103uint64_t getOffset(uint64_t offset) const;104105uint64_t getVA(uint64_t offset = 0) const;106107bool isLive() const { return partition != 0; }108void markLive() { partition = 1; }109void markDead() { partition = 0; }110111protected:112constexpr SectionBase(Kind sectionKind, StringRef name, uint64_t flags,113uint32_t entsize, uint32_t addralign, uint32_t type,114uint32_t info, uint32_t link)115: sectionKind(sectionKind), bss(false), keepUnique(false), type(type),116name(name), flags(flags), addralign(addralign), entsize(entsize),117link(link), info(info) {}118};119120struct SymbolAnchor {121uint64_t offset;122Defined *d;123bool end; // true for the anchor of st_value+st_size124};125126struct RelaxAux {127// This records symbol start and end offsets which will be adjusted according128// to the nearest relocDeltas element.129SmallVector<SymbolAnchor, 0> anchors;130// For relocations[i], the actual offset is131// r_offset - (i ? relocDeltas[i-1] : 0).132std::unique_ptr<uint32_t[]> relocDeltas;133// For relocations[i], the actual type is relocTypes[i].134std::unique_ptr<RelType[]> relocTypes;135SmallVector<uint32_t, 0> writes;136};137138// This corresponds to a section of an input file.139class InputSectionBase : public SectionBase {140public:141template <class ELFT>142InputSectionBase(ObjFile<ELFT> &file, const typename ELFT::Shdr &header,143StringRef name, Kind sectionKind);144145InputSectionBase(InputFile *file, uint64_t flags, uint32_t type,146uint64_t entsize, uint32_t link, uint32_t info,147uint32_t addralign, ArrayRef<uint8_t> data, StringRef name,148Kind sectionKind);149150static bool classof(const SectionBase *s) { return s->kind() != Output; }151152// The file which contains this section. Its dynamic type is usually153// ObjFile<ELFT>, but may be an InputFile of InternalKind (for a synthetic154// section).155InputFile *file;156157// Input sections are part of an output section. Special sections158// like .eh_frame and merge sections are first combined into a159// synthetic section that is then added to an output section. In all160// cases this points one level up.161SectionBase *parent = nullptr;162163// Section index of the relocation section if exists.164uint32_t relSecIdx = 0;165166// Getter when the dynamic type is ObjFile<ELFT>.167template <class ELFT> ObjFile<ELFT> *getFile() const {168return cast<ObjFile<ELFT>>(file);169}170171// Used by --optimize-bb-jumps and RISC-V linker relaxation temporarily to172// indicate the number of bytes which is not counted in the size. This should173// be reset to zero after uses.174uint32_t bytesDropped = 0;175176mutable bool compressed = false;177178// Whether this section is SHT_CREL and has been decoded to RELA by179// relsOrRelas.180bool decodedCrel = false;181182// Whether the section needs to be padded with a NOP filler due to183// deleteFallThruJmpInsn.184bool nopFiller = false;185186void drop_back(unsigned num) {187assert(bytesDropped + num < 256);188bytesDropped += num;189}190191void push_back(uint64_t num) {192assert(bytesDropped >= num);193bytesDropped -= num;194}195196mutable const uint8_t *content_;197uint64_t size;198199void trim() {200if (bytesDropped) {201size -= bytesDropped;202bytesDropped = 0;203}204}205206ArrayRef<uint8_t> content() const {207return ArrayRef<uint8_t>(content_, size);208}209ArrayRef<uint8_t> contentMaybeDecompress() const {210if (compressed)211decompress();212return content();213}214215// The next member in the section group if this section is in a group. This is216// used by --gc-sections.217InputSectionBase *nextInSectionGroup = nullptr;218219template <class ELFT>220RelsOrRelas<ELFT> relsOrRelas(bool supportsCrel = true) const;221222// InputSections that are dependent on us (reverse dependency for GC)223llvm::TinyPtrVector<InputSection *> dependentSections;224225// Returns the size of this section (even if this is a common or BSS.)226size_t getSize() const;227228InputSection *getLinkOrderDep() const;229230// Get a symbol that encloses this offset from within the section. If type is231// not zero, return a symbol with the specified type.232Defined *getEnclosingSymbol(uint64_t offset, uint8_t type = 0) const;233Defined *getEnclosingFunction(uint64_t offset) const {234return getEnclosingSymbol(offset, llvm::ELF::STT_FUNC);235}236237// Returns a source location string. Used to construct an error message.238std::string getLocation(uint64_t offset) const;239std::string getSrcMsg(const Symbol &sym, uint64_t offset) const;240std::string getObjMsg(uint64_t offset) const;241242// Each section knows how to relocate itself. These functions apply243// relocations, assuming that Buf points to this section's copy in244// the mmap'ed output buffer.245template <class ELFT> void relocate(uint8_t *buf, uint8_t *bufEnd);246static uint64_t getRelocTargetVA(const InputFile *File, RelType Type,247int64_t A, uint64_t P, const Symbol &Sym,248RelExpr Expr);249250// The native ELF reloc data type is not very convenient to handle.251// So we convert ELF reloc records to our own records in Relocations.cpp.252// This vector contains such "cooked" relocations.253SmallVector<Relocation, 0> relocations;254255void addReloc(const Relocation &r) { relocations.push_back(r); }256MutableArrayRef<Relocation> relocs() { return relocations; }257ArrayRef<Relocation> relocs() const { return relocations; }258259union {260// These are modifiers to jump instructions that are necessary when basic261// block sections are enabled. Basic block sections creates opportunities262// to relax jump instructions at basic block boundaries after reordering the263// basic blocks.264JumpInstrMod *jumpInstrMod = nullptr;265266// Auxiliary information for RISC-V and LoongArch linker relaxation.267// They do not use jumpInstrMod.268RelaxAux *relaxAux;269270// The compressed content size when `compressed` is true.271size_t compressedSize;272};273274// A function compiled with -fsplit-stack calling a function275// compiled without -fsplit-stack needs its prologue adjusted. Find276// such functions and adjust their prologues. This is very similar277// to relocation. See https://gcc.gnu.org/wiki/SplitStacks for more278// information.279template <typename ELFT>280void adjustSplitStackFunctionPrologues(uint8_t *buf, uint8_t *end);281282283template <typename T> llvm::ArrayRef<T> getDataAs() const {284size_t s = content().size();285assert(s % sizeof(T) == 0);286return llvm::ArrayRef<T>((const T *)content().data(), s / sizeof(T));287}288289protected:290template <typename ELFT>291void parseCompressedHeader();292void decompress() const;293};294295// SectionPiece represents a piece of splittable section contents.296// We allocate a lot of these and binary search on them. This means that they297// have to be as compact as possible, which is why we don't store the size (can298// be found by looking at the next one).299struct SectionPiece {300SectionPiece() = default;301SectionPiece(size_t off, uint32_t hash, bool live)302: inputOff(off), live(live), hash(hash >> 1) {}303304uint32_t inputOff;305LLVM_PREFERRED_TYPE(bool)306uint32_t live : 1;307uint32_t hash : 31;308uint64_t outputOff = 0;309};310311static_assert(sizeof(SectionPiece) == 16, "SectionPiece is too big");312313// This corresponds to a SHF_MERGE section of an input file.314class MergeInputSection : public InputSectionBase {315public:316template <class ELFT>317MergeInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header,318StringRef name);319MergeInputSection(uint64_t flags, uint32_t type, uint64_t entsize,320ArrayRef<uint8_t> data, StringRef name);321322static bool classof(const SectionBase *s) { return s->kind() == Merge; }323void splitIntoPieces();324325// Translate an offset in the input section to an offset in the parent326// MergeSyntheticSection.327uint64_t getParentOffset(uint64_t offset) const;328329// Splittable sections are handled as a sequence of data330// rather than a single large blob of data.331SmallVector<SectionPiece, 0> pieces;332333// Returns I'th piece's data. This function is very hot when334// string merging is enabled, so we want to inline.335LLVM_ATTRIBUTE_ALWAYS_INLINE336llvm::CachedHashStringRef getData(size_t i) const {337size_t begin = pieces[i].inputOff;338size_t end =339(pieces.size() - 1 == i) ? content().size() : pieces[i + 1].inputOff;340return {toStringRef(content().slice(begin, end - begin)), pieces[i].hash};341}342343// Returns the SectionPiece at a given input section offset.344SectionPiece &getSectionPiece(uint64_t offset);345const SectionPiece &getSectionPiece(uint64_t offset) const {346return const_cast<MergeInputSection *>(this)->getSectionPiece(offset);347}348349SyntheticSection *getParent() const {350return cast_or_null<SyntheticSection>(parent);351}352353private:354void splitStrings(StringRef s, size_t size);355void splitNonStrings(ArrayRef<uint8_t> a, size_t size);356};357358struct EhSectionPiece {359EhSectionPiece(size_t off, InputSectionBase *sec, uint32_t size,360unsigned firstRelocation)361: inputOff(off), sec(sec), size(size), firstRelocation(firstRelocation) {}362363ArrayRef<uint8_t> data() const {364return {sec->content().data() + this->inputOff, size};365}366367size_t inputOff;368ssize_t outputOff = -1;369InputSectionBase *sec;370uint32_t size;371unsigned firstRelocation;372};373374// This corresponds to a .eh_frame section of an input file.375class EhInputSection : public InputSectionBase {376public:377template <class ELFT>378EhInputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header,379StringRef name);380static bool classof(const SectionBase *s) { return s->kind() == EHFrame; }381template <class ELFT> void split();382template <class ELFT, class RelTy> void split(ArrayRef<RelTy> rels);383384// Splittable sections are handled as a sequence of data385// rather than a single large blob of data.386SmallVector<EhSectionPiece, 0> cies, fdes;387388SyntheticSection *getParent() const;389uint64_t getParentOffset(uint64_t offset) const;390};391392// This is a section that is added directly to an output section393// instead of needing special combination via a synthetic section. This394// includes all input sections with the exceptions of SHF_MERGE and395// .eh_frame. It also includes the synthetic sections themselves.396class InputSection : public InputSectionBase {397public:398InputSection(InputFile *f, uint64_t flags, uint32_t type, uint32_t addralign,399ArrayRef<uint8_t> data, StringRef name, Kind k = Regular);400template <class ELFT>401InputSection(ObjFile<ELFT> &f, const typename ELFT::Shdr &header,402StringRef name);403404static bool classof(const SectionBase *s) {405return s->kind() == SectionBase::Regular ||406s->kind() == SectionBase::Synthetic ||407s->kind() == SectionBase::Spill;408}409410// Write this section to a mmap'ed file, assuming Buf is pointing to411// beginning of the output section.412template <class ELFT> void writeTo(uint8_t *buf);413414OutputSection *getParent() const {415return reinterpret_cast<OutputSection *>(parent);416}417418// This variable has two usages. Initially, it represents an index in the419// OutputSection's InputSection list, and is used when ordering SHF_LINK_ORDER420// sections. After assignAddresses is called, it represents the offset from421// the beginning of the output section this section was assigned to.422uint64_t outSecOff = 0;423424InputSectionBase *getRelocatedSection() const;425426template <class ELFT, class RelTy>427void relocateNonAlloc(uint8_t *buf, Relocs<RelTy> rels);428429// Points to the canonical section. If ICF folds two sections, repl pointer of430// one section points to the other.431InputSection *repl = this;432433// Used by ICF.434uint32_t eqClass[2] = {0, 0};435436// Called by ICF to merge two input sections.437void replace(InputSection *other);438439static InputSection discarded;440441private:442template <class ELFT, class RelTy> void copyRelocations(uint8_t *buf);443444template <class ELFT, class RelTy, class RelIt>445void copyRelocations(uint8_t *buf, llvm::iterator_range<RelIt> rels);446447template <class ELFT> void copyShtGroup(uint8_t *buf);448};449450// A marker for a potential spill location for another input section. This451// broadly acts as if it were the original section until address assignment.452// Then it is either replaced with the real input section or removed.453class PotentialSpillSection : public InputSection {454public:455// The containing input section description; used to quickly replace this stub456// with the actual section.457InputSectionDescription *isd;458459// Next potential spill location for the same source input section.460PotentialSpillSection *next = nullptr;461462PotentialSpillSection(const InputSectionBase &source,463InputSectionDescription &isd);464465static bool classof(const SectionBase *sec) {466return sec->kind() == InputSectionBase::Spill;467}468};469470static_assert(sizeof(InputSection) <= 160, "InputSection is too big");471472class SyntheticSection : public InputSection {473public:474SyntheticSection(uint64_t flags, uint32_t type, uint32_t addralign,475StringRef name)476: InputSection(ctx.internalFile, flags, type, addralign, {}, name,477InputSectionBase::Synthetic) {}478479virtual ~SyntheticSection() = default;480virtual size_t getSize() const = 0;481virtual bool updateAllocSize() { return false; }482// If the section has the SHF_ALLOC flag and the size may be changed if483// thunks are added, update the section size.484virtual bool isNeeded() const { return true; }485virtual void finalizeContents() {}486virtual void writeTo(uint8_t *buf) = 0;487488static bool classof(const SectionBase *sec) {489return sec->kind() == InputSectionBase::Synthetic;490}491};492493inline bool isStaticRelSecType(uint32_t type) {494return type == llvm::ELF::SHT_RELA || type == llvm::ELF::SHT_CREL ||495type == llvm::ELF::SHT_REL;496}497498inline bool isDebugSection(const InputSectionBase &sec) {499return (sec.flags & llvm::ELF::SHF_ALLOC) == 0 &&500sec.name.starts_with(".debug");501}502503// The set of TOC entries (.toc + addend) for which we should not apply504// toc-indirect to toc-relative relaxation. const Symbol * refers to the505// STT_SECTION symbol associated to the .toc input section.506extern llvm::DenseSet<std::pair<const Symbol *, uint64_t>> ppc64noTocRelax;507508} // namespace elf509510std::string toString(const elf::InputSectionBase *);511} // namespace lld512513#endif514515516