Path: blob/main/contrib/llvm-project/lld/MachO/InputSection.h
34870 views
//===- InputSection.h -------------------------------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#ifndef LLD_MACHO_INPUT_SECTION_H9#define LLD_MACHO_INPUT_SECTION_H1011#include "Config.h"12#include "Relocations.h"13#include "Symbols.h"1415#include "lld/Common/LLVM.h"16#include "lld/Common/Memory.h"17#include "llvm/ADT/ArrayRef.h"18#include "llvm/ADT/BitVector.h"19#include "llvm/ADT/CachedHashString.h"20#include "llvm/ADT/TinyPtrVector.h"21#include "llvm/BinaryFormat/MachO.h"2223namespace lld {24namespace macho {2526class InputFile;27class OutputSection;2829class InputSection {30public:31enum Kind : uint8_t {32ConcatKind,33CStringLiteralKind,34WordLiteralKind,35};3637Kind kind() const { return sectionKind; }38virtual ~InputSection() = default;39virtual uint64_t getSize() const { return data.size(); }40virtual bool empty() const { return data.empty(); }41InputFile *getFile() const { return section.file; }42StringRef getName() const { return section.name; }43StringRef getSegName() const { return section.segname; }44uint32_t getFlags() const { return section.flags; }45uint64_t getFileSize() const;46// Translates \p off -- an offset relative to this InputSection -- into an47// offset from the beginning of its parent OutputSection.48virtual uint64_t getOffset(uint64_t off) const = 0;49// The offset from the beginning of the file.50uint64_t getVA(uint64_t off) const;51// Return a user-friendly string for use in diagnostics.52// Format: /path/to/object.o:(symbol _func+0x123)53std::string getLocation(uint64_t off) const;54// Return the source line corresponding to an address, or the empty string.55// Format: Source.cpp:123 (/path/to/Source.cpp:123)56std::string getSourceLocation(uint64_t off) const;57// Return the relocation at \p off, if it exists. This does a linear search.58const Reloc *getRelocAt(uint32_t off) const;59// Whether the data at \p off in this InputSection is live.60virtual bool isLive(uint64_t off) const = 0;61virtual void markLive(uint64_t off) = 0;62virtual InputSection *canonical() { return this; }63virtual const InputSection *canonical() const { return this; }6465protected:66InputSection(Kind kind, const Section §ion, ArrayRef<uint8_t> data,67uint32_t align)68: sectionKind(kind), keepUnique(false), hasAltEntry(false), align(align),69data(data), section(section) {}7071InputSection(const InputSection &rhs)72: sectionKind(rhs.sectionKind), keepUnique(false), hasAltEntry(false),73align(rhs.align), data(rhs.data), section(rhs.section) {}7475Kind sectionKind;7677public:78// is address assigned?79bool isFinal = false;80// keep the address of the symbol(s) in this section unique in the final81// binary ?82bool keepUnique : 1;83// Does this section have symbols at offsets other than zero? (NOTE: only84// applies to ConcatInputSections.)85bool hasAltEntry : 1;86uint32_t align = 1;8788OutputSection *parent = nullptr;89ArrayRef<uint8_t> data;90std::vector<Reloc> relocs;91// The symbols that belong to this InputSection, sorted by value. With92// .subsections_via_symbols, there is typically only one element here.93llvm::TinyPtrVector<Defined *> symbols;9495const Section §ion;9697protected:98const Defined *getContainingSymbol(uint64_t off) const;99};100101// ConcatInputSections are combined into (Concat)OutputSections through simple102// concatenation, in contrast with literal sections which may have their103// contents merged before output.104class ConcatInputSection final : public InputSection {105public:106ConcatInputSection(const Section §ion, ArrayRef<uint8_t> data,107uint32_t align = 1)108: InputSection(ConcatKind, section, data, align) {}109110uint64_t getOffset(uint64_t off) const override { return outSecOff + off; }111uint64_t getVA() const { return InputSection::getVA(0); }112// ConcatInputSections are entirely live or dead, so the offset is irrelevant.113bool isLive(uint64_t off) const override { return live; }114void markLive(uint64_t off) override { live = true; }115bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); }116bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }117void writeTo(uint8_t *buf);118119void foldIdentical(ConcatInputSection *redundant);120ConcatInputSection *canonical() override {121return replacement ? replacement : this;122}123const InputSection *canonical() const override {124return replacement ? replacement : this;125}126127static bool classof(const InputSection *isec) {128return isec->kind() == ConcatKind;129}130131// Points to the surviving section after this one is folded by ICF132ConcatInputSection *replacement = nullptr;133// Equivalence-class ID for ICF134uint32_t icfEqClass[2] = {0, 0};135136// With subsections_via_symbols, most symbols have their own InputSection,137// and for weak symbols (e.g. from inline functions), only the138// InputSection from one translation unit will make it to the output,139// while all copies in other translation units are coalesced into the140// first and not copied to the output.141bool wasCoalesced = false;142bool live = !config->deadStrip;143bool hasCallSites = false;144// This variable has two usages. Initially, it represents the input order.145// After assignAddresses is called, it represents the offset from the146// beginning of the output section this section was assigned to.147uint64_t outSecOff = 0;148};149150// Initialize a fake InputSection that does not belong to any InputFile.151// The created ConcatInputSection will always have 'live=true'152ConcatInputSection *makeSyntheticInputSection(StringRef segName,153StringRef sectName,154uint32_t flags = 0,155ArrayRef<uint8_t> data = {},156uint32_t align = 1);157158// Helper functions to make it easy to sprinkle asserts.159160inline bool shouldOmitFromOutput(InputSection *isec) {161return isa<ConcatInputSection>(isec) &&162cast<ConcatInputSection>(isec)->shouldOmitFromOutput();163}164165inline bool isCoalescedWeak(InputSection *isec) {166return isa<ConcatInputSection>(isec) &&167cast<ConcatInputSection>(isec)->isCoalescedWeak();168}169170// We allocate a lot of these and binary search on them, so they should be as171// compact as possible. Hence the use of 31 rather than 64 bits for the hash.172struct StringPiece {173// Offset from the start of the containing input section.174uint32_t inSecOff;175uint32_t live : 1;176// Only set if deduplicating literals177uint32_t hash : 31;178// Offset from the start of the containing output section.179uint64_t outSecOff = 0;180181StringPiece(uint64_t off, uint32_t hash)182: inSecOff(off), live(!config->deadStrip), hash(hash) {}183};184185static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");186187// CStringInputSections are composed of multiple null-terminated string188// literals, which we represent using StringPieces. These literals can be189// deduplicated and tail-merged, so translating offsets between the input and190// outputs sections is more complicated.191//192// NOTE: One significant difference between LLD and ld64 is that we merge all193// cstring literals, even those referenced directly by non-private symbols.194// ld64 is more conservative and does not do that. This was mostly done for195// implementation simplicity; if we find programs that need the more196// conservative behavior we can certainly implement that.197class CStringInputSection final : public InputSection {198public:199CStringInputSection(const Section §ion, ArrayRef<uint8_t> data,200uint32_t align, bool dedupLiterals)201: InputSection(CStringLiteralKind, section, data, align),202deduplicateLiterals(dedupLiterals) {}203204uint64_t getOffset(uint64_t off) const override;205bool isLive(uint64_t off) const override { return getStringPiece(off).live; }206void markLive(uint64_t off) override { getStringPiece(off).live = true; }207// Find the StringPiece that contains this offset.208StringPiece &getStringPiece(uint64_t off);209const StringPiece &getStringPiece(uint64_t off) const;210// Split at each null byte.211void splitIntoPieces();212213LLVM_ATTRIBUTE_ALWAYS_INLINE214StringRef getStringRef(size_t i) const {215size_t begin = pieces[i].inSecOff;216// The endpoint should be *at* the null terminator, not after. This matches217// the behavior of StringRef(const char *Str).218size_t end =219((pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff) - 1;220return toStringRef(data.slice(begin, end - begin));221}222223StringRef getStringRefAtOffset(uint64_t off) const {224return getStringRef(getStringPieceIndex(off));225}226227// Returns i'th piece as a CachedHashStringRef. This function is very hot when228// string merging is enabled, so we want to inline.229LLVM_ATTRIBUTE_ALWAYS_INLINE230llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const {231assert(deduplicateLiterals);232return {getStringRef(i), pieces[i].hash};233}234235static bool classof(const InputSection *isec) {236return isec->kind() == CStringLiteralKind;237}238239bool deduplicateLiterals = false;240std::vector<StringPiece> pieces;241242private:243size_t getStringPieceIndex(uint64_t off) const;244};245246class WordLiteralInputSection final : public InputSection {247public:248WordLiteralInputSection(const Section §ion, ArrayRef<uint8_t> data,249uint32_t align);250uint64_t getOffset(uint64_t off) const override;251bool isLive(uint64_t off) const override {252return live[off >> power2LiteralSize];253}254void markLive(uint64_t off) override {255live[off >> power2LiteralSize] = true;256}257258static bool classof(const InputSection *isec) {259return isec->kind() == WordLiteralKind;260}261262private:263unsigned power2LiteralSize;264// The liveness of data[off] is tracked by live[off >> power2LiteralSize].265llvm::BitVector live;266};267268inline uint8_t sectionType(uint32_t flags) {269return flags & llvm::MachO::SECTION_TYPE;270}271272inline bool isZeroFill(uint32_t flags) {273return llvm::MachO::isVirtualSection(sectionType(flags));274}275276inline bool isThreadLocalVariables(uint32_t flags) {277return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES;278}279280// These sections contain the data for initializing thread-local variables.281inline bool isThreadLocalData(uint32_t flags) {282return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR ||283sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL;284}285286inline bool isDebugSection(uint32_t flags) {287return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) ==288llvm::MachO::S_ATTR_DEBUG;289}290291inline bool isWordLiteralSection(uint32_t flags) {292return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS ||293sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS ||294sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS;295}296297bool isCodeSection(const InputSection *);298bool isCfStringSection(const InputSection *);299bool isClassRefsSection(const InputSection *);300bool isSelRefsSection(const InputSection *);301bool isEhFrameSection(const InputSection *);302bool isGccExceptTabSection(const InputSection *);303304extern std::vector<ConcatInputSection *> inputSections;305// This is used as a counter for specyfing input order for input sections306extern int inputSectionsOrder;307308namespace section_names {309310constexpr const char authGot[] = "__auth_got";311constexpr const char authPtr[] = "__auth_ptr";312constexpr const char binding[] = "__binding";313constexpr const char bitcodeBundle[] = "__bundle";314constexpr const char cString[] = "__cstring";315constexpr const char cfString[] = "__cfstring";316constexpr const char cgProfile[] = "__cg_profile";317constexpr const char chainFixups[] = "__chainfixups";318constexpr const char codeSignature[] = "__code_signature";319constexpr const char common[] = "__common";320constexpr const char compactUnwind[] = "__compact_unwind";321constexpr const char data[] = "__data";322constexpr const char debugAbbrev[] = "__debug_abbrev";323constexpr const char debugInfo[] = "__debug_info";324constexpr const char debugLine[] = "__debug_line";325constexpr const char debugStr[] = "__debug_str";326constexpr const char debugStrOffs[] = "__debug_str_offs";327constexpr const char ehFrame[] = "__eh_frame";328constexpr const char gccExceptTab[] = "__gcc_except_tab";329constexpr const char export_[] = "__export";330constexpr const char dataInCode[] = "__data_in_code";331constexpr const char functionStarts[] = "__func_starts";332constexpr const char got[] = "__got";333constexpr const char header[] = "__mach_header";334constexpr const char indirectSymbolTable[] = "__ind_sym_tab";335constexpr const char initOffsets[] = "__init_offsets";336constexpr const char const_[] = "__const";337constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";338constexpr const char lazyBinding[] = "__lazy_binding";339constexpr const char literals[] = "__literals";340constexpr const char moduleInitFunc[] = "__mod_init_func";341constexpr const char moduleTermFunc[] = "__mod_term_func";342constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";343constexpr const char objcCatList[] = "__objc_catlist";344constexpr const char objcClassList[] = "__objc_classlist";345constexpr const char objcMethList[] = "__objc_methlist";346constexpr const char objcClassRefs[] = "__objc_classrefs";347constexpr const char objcConst[] = "__objc_const";348constexpr const char objCImageInfo[] = "__objc_imageinfo";349constexpr const char objcStubs[] = "__objc_stubs";350constexpr const char objcSelrefs[] = "__objc_selrefs";351constexpr const char objcMethname[] = "__objc_methname";352constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";353constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";354constexpr const char objcProtoList[] = "__objc_protolist";355constexpr const char pageZero[] = "__pagezero";356constexpr const char pointers[] = "__pointers";357constexpr const char rebase[] = "__rebase";358constexpr const char staticInit[] = "__StaticInit";359constexpr const char stringTable[] = "__string_table";360constexpr const char stubHelper[] = "__stub_helper";361constexpr const char stubs[] = "__stubs";362constexpr const char swift[] = "__swift";363constexpr const char symbolTable[] = "__symbol_table";364constexpr const char textCoalNt[] = "__textcoal_nt";365constexpr const char text[] = "__text";366constexpr const char threadPtrs[] = "__thread_ptrs";367constexpr const char threadVars[] = "__thread_vars";368constexpr const char unwindInfo[] = "__unwind_info";369constexpr const char weakBinding[] = "__weak_binding";370constexpr const char zeroFill[] = "__zerofill";371constexpr const char addrSig[] = "__llvm_addrsig";372373} // namespace section_names374375void addInputSection(InputSection *inputSection);376} // namespace macho377378std::string toString(const macho::InputSection *);379380} // namespace lld381382#endif383384385