Path: blob/main/contrib/llvm-project/lld/MachO/InputSection.cpp
34878 views
//===- InputSection.cpp ---------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "InputSection.h"9#include "ConcatOutputSection.h"10#include "Config.h"11#include "InputFiles.h"12#include "OutputSegment.h"13#include "Symbols.h"14#include "SyntheticSections.h"15#include "Target.h"16#include "UnwindInfoSection.h"17#include "Writer.h"1819#include "lld/Common/ErrorHandler.h"20#include "lld/Common/Memory.h"21#include "llvm/Support/Endian.h"22#include "llvm/Support/xxhash.h"2324using namespace llvm;25using namespace llvm::MachO;26using namespace llvm::support;27using namespace lld;28using namespace lld::macho;2930// Verify ConcatInputSection's size on 64-bit builds. The size of std::vector31// can differ based on STL debug levels (e.g. iterator debugging on MSVC's STL),32// so account for that.33static_assert(sizeof(void *) != 8 ||34sizeof(ConcatInputSection) == sizeof(std::vector<Reloc>) + 88,35"Try to minimize ConcatInputSection's size, we create many "36"instances of it");3738std::vector<ConcatInputSection *> macho::inputSections;39int macho::inputSectionsOrder = 0;4041// Call this function to add a new InputSection and have it routed to the42// appropriate container. Depending on its type and current config, it will43// either be added to 'inputSections' vector or to a synthetic section.44void lld::macho::addInputSection(InputSection *inputSection) {45if (auto *isec = dyn_cast<ConcatInputSection>(inputSection)) {46if (isec->isCoalescedWeak())47return;48if (config->emitRelativeMethodLists &&49ObjCMethListSection::isMethodList(isec)) {50if (in.objcMethList->inputOrder == UnspecifiedInputOrder)51in.objcMethList->inputOrder = inputSectionsOrder++;52in.objcMethList->addInput(isec);53isec->parent = in.objcMethList;54return;55}56if (config->emitInitOffsets &&57sectionType(isec->getFlags()) == S_MOD_INIT_FUNC_POINTERS) {58in.initOffsets->addInput(isec);59return;60}61isec->outSecOff = inputSectionsOrder++;62auto *osec = ConcatOutputSection::getOrCreateForInput(isec);63isec->parent = osec;64inputSections.push_back(isec);65} else if (auto *isec = dyn_cast<CStringInputSection>(inputSection)) {66if (isec->getName() == section_names::objcMethname) {67if (in.objcMethnameSection->inputOrder == UnspecifiedInputOrder)68in.objcMethnameSection->inputOrder = inputSectionsOrder++;69in.objcMethnameSection->addInput(isec);70} else {71if (in.cStringSection->inputOrder == UnspecifiedInputOrder)72in.cStringSection->inputOrder = inputSectionsOrder++;73in.cStringSection->addInput(isec);74}75} else if (auto *isec = dyn_cast<WordLiteralInputSection>(inputSection)) {76if (in.wordLiteralSection->inputOrder == UnspecifiedInputOrder)77in.wordLiteralSection->inputOrder = inputSectionsOrder++;78in.wordLiteralSection->addInput(isec);79} else {80llvm_unreachable("unexpected input section kind");81}8283assert(inputSectionsOrder <= UnspecifiedInputOrder);84}8586uint64_t InputSection::getFileSize() const {87return isZeroFill(getFlags()) ? 0 : getSize();88}8990uint64_t InputSection::getVA(uint64_t off) const {91return parent->addr + getOffset(off);92}9394static uint64_t resolveSymbolVA(const Symbol *sym, uint8_t type) {95const RelocAttrs &relocAttrs = target->getRelocAttrs(type);96if (relocAttrs.hasAttr(RelocAttrBits::BRANCH))97return sym->resolveBranchVA();98if (relocAttrs.hasAttr(RelocAttrBits::GOT))99return sym->resolveGotVA();100if (relocAttrs.hasAttr(RelocAttrBits::TLV))101return sym->resolveTlvVA();102return sym->getVA();103}104105const Defined *InputSection::getContainingSymbol(uint64_t off) const {106auto *nextSym = llvm::upper_bound(107symbols, off, [](uint64_t a, const Defined *b) { return a < b->value; });108if (nextSym == symbols.begin())109return nullptr;110return *std::prev(nextSym);111}112113std::string InputSection::getLocation(uint64_t off) const {114// First, try to find a symbol that's near the offset. Use it as a reference115// point.116if (auto *sym = getContainingSymbol(off))117return (toString(getFile()) + ":(symbol " + toString(*sym) + "+0x" +118Twine::utohexstr(off - sym->value) + ")")119.str();120121// If that fails, use the section itself as a reference point.122for (const Subsection &subsec : section.subsections) {123if (subsec.isec == this) {124off += subsec.offset;125break;126}127}128129return (toString(getFile()) + ":(" + getName() + "+0x" +130Twine::utohexstr(off) + ")")131.str();132}133134std::string InputSection::getSourceLocation(uint64_t off) const {135auto *obj = dyn_cast_or_null<ObjFile>(getFile());136if (!obj)137return {};138139DWARFCache *dwarf = obj->getDwarf();140if (!dwarf)141return std::string();142143for (const Subsection &subsec : section.subsections) {144if (subsec.isec == this) {145off += subsec.offset;146break;147}148}149150auto createMsg = [&](StringRef path, unsigned line) {151std::string filename = sys::path::filename(path).str();152std::string lineStr = (":" + Twine(line)).str();153if (filename == path)154return filename + lineStr;155return (filename + lineStr + " (" + path + lineStr + ")").str();156};157158// First, look up a function for a given offset.159if (std::optional<DILineInfo> li = dwarf->getDILineInfo(160section.addr + off, object::SectionedAddress::UndefSection))161return createMsg(li->FileName, li->Line);162163// If it failed, look up again as a variable.164if (const Defined *sym = getContainingSymbol(off)) {165// Symbols are generally prefixed with an underscore, which is not included166// in the debug information.167StringRef symName = sym->getName();168if (!symName.empty() && symName[0] == '_')169symName = symName.substr(1);170171if (std::optional<std::pair<std::string, unsigned>> fileLine =172dwarf->getVariableLoc(symName))173return createMsg(fileLine->first, fileLine->second);174}175176// Try to get the source file's name from the DWARF information.177if (obj->compileUnit)178return obj->sourceFile();179180return {};181}182183const Reloc *InputSection::getRelocAt(uint32_t off) const {184auto it = llvm::find_if(185relocs, [=](const macho::Reloc &r) { return r.offset == off; });186if (it == relocs.end())187return nullptr;188return &*it;189}190191void ConcatInputSection::foldIdentical(ConcatInputSection *copy) {192align = std::max(align, copy->align);193copy->live = false;194copy->wasCoalesced = true;195copy->replacement = this;196for (auto ©Sym : copy->symbols)197copySym->wasIdenticalCodeFolded = true;198199symbols.insert(symbols.end(), copy->symbols.begin(), copy->symbols.end());200copy->symbols.clear();201202// Remove duplicate compact unwind info for symbols at the same address.203if (symbols.empty())204return;205for (auto it = symbols.begin() + 1; it != symbols.end(); ++it) {206assert((*it)->value == 0);207(*it)->originalUnwindEntry = nullptr;208}209}210211void ConcatInputSection::writeTo(uint8_t *buf) {212assert(!shouldOmitFromOutput());213214if (getFileSize() == 0)215return;216217memcpy(buf, data.data(), data.size());218219for (size_t i = 0; i < relocs.size(); i++) {220const Reloc &r = relocs[i];221uint8_t *loc = buf + r.offset;222uint64_t referentVA = 0;223224const bool needsFixup = config->emitChainedFixups &&225target->hasAttr(r.type, RelocAttrBits::UNSIGNED);226if (target->hasAttr(r.type, RelocAttrBits::SUBTRAHEND)) {227const Symbol *fromSym = r.referent.get<Symbol *>();228const Reloc &minuend = relocs[++i];229uint64_t minuendVA;230if (const Symbol *toSym = minuend.referent.dyn_cast<Symbol *>())231minuendVA = toSym->getVA() + minuend.addend;232else {233auto *referentIsec = minuend.referent.get<InputSection *>();234assert(!::shouldOmitFromOutput(referentIsec));235minuendVA = referentIsec->getVA(minuend.addend);236}237referentVA = minuendVA - fromSym->getVA();238} else if (auto *referentSym = r.referent.dyn_cast<Symbol *>()) {239if (target->hasAttr(r.type, RelocAttrBits::LOAD) &&240!referentSym->isInGot())241target->relaxGotLoad(loc, r.type);242// For dtrace symbols, do not handle them as normal undefined symbols243if (referentSym->getName().starts_with("___dtrace_")) {244// Change dtrace call site to pre-defined instructions245target->handleDtraceReloc(referentSym, r, loc);246continue;247}248referentVA = resolveSymbolVA(referentSym, r.type) + r.addend;249250if (isThreadLocalVariables(getFlags()) && isa<Defined>(referentSym)) {251// References from thread-local variable sections are treated as offsets252// relative to the start of the thread-local data memory area, which253// is initialized via copying all the TLV data sections (which are all254// contiguous).255referentVA -= firstTLVDataSection->addr;256} else if (needsFixup) {257writeChainedFixup(loc, referentSym, r.addend);258continue;259}260} else if (auto *referentIsec = r.referent.dyn_cast<InputSection *>()) {261assert(!::shouldOmitFromOutput(referentIsec));262referentVA = referentIsec->getVA(r.addend);263264if (needsFixup) {265writeChainedRebase(loc, referentVA);266continue;267}268}269target->relocateOne(loc, r, referentVA, getVA() + r.offset);270}271}272273ConcatInputSection *macho::makeSyntheticInputSection(StringRef segName,274StringRef sectName,275uint32_t flags,276ArrayRef<uint8_t> data,277uint32_t align) {278Section §ion =279*make<Section>(/*file=*/nullptr, segName, sectName, flags, /*addr=*/0);280auto isec = make<ConcatInputSection>(section, data, align);281// Since this is an explicitly created 'fake' input section,282// it should not be dead stripped.283isec->live = true;284section.subsections.push_back({0, isec});285return isec;286}287288void CStringInputSection::splitIntoPieces() {289size_t off = 0;290StringRef s = toStringRef(data);291while (!s.empty()) {292size_t end = s.find(0);293if (end == StringRef::npos)294fatal(getLocation(off) + ": string is not null terminated");295uint32_t hash = deduplicateLiterals ? xxh3_64bits(s.take_front(end)) : 0;296pieces.emplace_back(off, hash);297size_t size = end + 1; // include null terminator298s = s.substr(size);299off += size;300}301}302303StringPiece &CStringInputSection::getStringPiece(uint64_t off) {304if (off >= data.size())305fatal(toString(this) + ": offset is outside the section");306307auto it =308partition_point(pieces, [=](StringPiece p) { return p.inSecOff <= off; });309return it[-1];310}311312const StringPiece &CStringInputSection::getStringPiece(uint64_t off) const {313return const_cast<CStringInputSection *>(this)->getStringPiece(off);314}315316size_t CStringInputSection::getStringPieceIndex(uint64_t off) const {317if (off >= data.size())318fatal(toString(this) + ": offset is outside the section");319320auto it =321partition_point(pieces, [=](StringPiece p) { return p.inSecOff <= off; });322return std::distance(pieces.begin(), it) - 1;323}324325uint64_t CStringInputSection::getOffset(uint64_t off) const {326const StringPiece &piece = getStringPiece(off);327uint64_t addend = off - piece.inSecOff;328return piece.outSecOff + addend;329}330331WordLiteralInputSection::WordLiteralInputSection(const Section §ion,332ArrayRef<uint8_t> data,333uint32_t align)334: InputSection(WordLiteralKind, section, data, align) {335switch (sectionType(getFlags())) {336case S_4BYTE_LITERALS:337power2LiteralSize = 2;338break;339case S_8BYTE_LITERALS:340power2LiteralSize = 3;341break;342case S_16BYTE_LITERALS:343power2LiteralSize = 4;344break;345default:346llvm_unreachable("invalid literal section type");347}348349live.resize(data.size() >> power2LiteralSize, !config->deadStrip);350}351352uint64_t WordLiteralInputSection::getOffset(uint64_t off) const {353auto *osec = cast<WordLiteralSection>(parent);354const uintptr_t buf = reinterpret_cast<uintptr_t>(data.data());355switch (sectionType(getFlags())) {356case S_4BYTE_LITERALS:357return osec->getLiteral4Offset(buf + (off & ~3LLU)) | (off & 3);358case S_8BYTE_LITERALS:359return osec->getLiteral8Offset(buf + (off & ~7LLU)) | (off & 7);360case S_16BYTE_LITERALS:361return osec->getLiteral16Offset(buf + (off & ~15LLU)) | (off & 15);362default:363llvm_unreachable("invalid literal section type");364}365}366367bool macho::isCodeSection(const InputSection *isec) {368uint32_t type = sectionType(isec->getFlags());369if (type != S_REGULAR && type != S_COALESCED)370return false;371372uint32_t attr = isec->getFlags() & SECTION_ATTRIBUTES_USR;373if (attr == S_ATTR_PURE_INSTRUCTIONS)374return true;375376if (isec->getSegName() == segment_names::text)377return StringSwitch<bool>(isec->getName())378.Cases(section_names::textCoalNt, section_names::staticInit, true)379.Default(false);380381return false;382}383384bool macho::isCfStringSection(const InputSection *isec) {385return isec->getName() == section_names::cfString &&386isec->getSegName() == segment_names::data;387}388389bool macho::isClassRefsSection(const InputSection *isec) {390return isec->getName() == section_names::objcClassRefs &&391isec->getSegName() == segment_names::data;392}393394bool macho::isSelRefsSection(const InputSection *isec) {395return isec->getName() == section_names::objcSelrefs &&396isec->getSegName() == segment_names::data;397}398399bool macho::isEhFrameSection(const InputSection *isec) {400return isec->getName() == section_names::ehFrame &&401isec->getSegName() == segment_names::text;402}403404bool macho::isGccExceptTabSection(const InputSection *isec) {405return isec->getName() == section_names::gccExceptTab &&406isec->getSegName() == segment_names::text;407}408409std::string lld::toString(const InputSection *isec) {410return (toString(isec->getFile()) + ":(" + isec->getName() + ")").str();411}412413414