Path: blob/main/contrib/llvm-project/lld/MachO/ICF.cpp
34914 views
//===- ICF.cpp ------------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "ICF.h"9#include "ConcatOutputSection.h"10#include "Config.h"11#include "InputSection.h"12#include "SymbolTable.h"13#include "Symbols.h"14#include "UnwindInfoSection.h"1516#include "lld/Common/CommonLinkerContext.h"17#include "llvm/Support/LEB128.h"18#include "llvm/Support/Parallel.h"19#include "llvm/Support/TimeProfiler.h"20#include "llvm/Support/xxhash.h"2122#include <atomic>2324using namespace llvm;25using namespace lld;26using namespace lld::macho;2728static constexpr bool verboseDiagnostics = false;2930class ICF {31public:32ICF(std::vector<ConcatInputSection *> &inputs);33void run();3435using EqualsFn = bool (ICF::*)(const ConcatInputSection *,36const ConcatInputSection *);37void segregate(size_t begin, size_t end, EqualsFn);38size_t findBoundary(size_t begin, size_t end);39void forEachClassRange(size_t begin, size_t end,40llvm::function_ref<void(size_t, size_t)> func);41void forEachClass(llvm::function_ref<void(size_t, size_t)> func);4243bool equalsConstant(const ConcatInputSection *ia,44const ConcatInputSection *ib);45bool equalsVariable(const ConcatInputSection *ia,46const ConcatInputSection *ib);4748// ICF needs a copy of the inputs vector because its equivalence-class49// segregation algorithm destroys the proper sequence.50std::vector<ConcatInputSection *> icfInputs;5152unsigned icfPass = 0;53std::atomic<bool> icfRepeat{false};54std::atomic<uint64_t> equalsConstantCount{0};55std::atomic<uint64_t> equalsVariableCount{0};56};5758ICF::ICF(std::vector<ConcatInputSection *> &inputs) {59icfInputs.assign(inputs.begin(), inputs.end());60}6162// ICF = Identical Code Folding63//64// We only fold __TEXT,__text, so this is really "code" folding, and not65// "COMDAT" folding. String and scalar constant literals are deduplicated66// elsewhere.67//68// Summary of segments & sections:69//70// The __TEXT segment is readonly at the MMU. Some sections are already71// deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are72// synthetic and inherently free of duplicates (__TEXT,__stubs &73// __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,74// because doing so induces many test failures.75//76// The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and77// thus ineligible for ICF.78//79// The __DATA_CONST segment is read/write at the MMU, but is logically const to80// the application after dyld applies fixups to pointer data. We currently81// fold only the __DATA_CONST,__cfstring section.82//83// The __DATA segment is read/write at the MMU, and as application-writeable84// data, none of its sections are eligible for ICF.85//86// Please see the large block comment in lld/ELF/ICF.cpp for an explanation87// of the segregation algorithm.88//89// FIXME(gkm): implement keep-unique attributes90// FIXME(gkm): implement address-significance tables for MachO object files9192// Compare "non-moving" parts of two ConcatInputSections, namely everything93// except references to other ConcatInputSections.94bool ICF::equalsConstant(const ConcatInputSection *ia,95const ConcatInputSection *ib) {96if (verboseDiagnostics)97++equalsConstantCount;98// We can only fold within the same OutputSection.99if (ia->parent != ib->parent)100return false;101if (ia->data.size() != ib->data.size())102return false;103if (ia->data != ib->data)104return false;105if (ia->relocs.size() != ib->relocs.size())106return false;107auto f = [](const Reloc &ra, const Reloc &rb) {108if (ra.type != rb.type)109return false;110if (ra.pcrel != rb.pcrel)111return false;112if (ra.length != rb.length)113return false;114if (ra.offset != rb.offset)115return false;116if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>())117return false;118119InputSection *isecA, *isecB;120121uint64_t valueA = 0;122uint64_t valueB = 0;123if (ra.referent.is<Symbol *>()) {124const auto *sa = ra.referent.get<Symbol *>();125const auto *sb = rb.referent.get<Symbol *>();126if (sa->kind() != sb->kind())127return false;128// ICF runs before Undefineds are treated (and potentially converted into129// DylibSymbols).130if (isa<DylibSymbol>(sa) || isa<Undefined>(sa))131return sa == sb && ra.addend == rb.addend;132assert(isa<Defined>(sa));133const auto *da = cast<Defined>(sa);134const auto *db = cast<Defined>(sb);135if (!da->isec() || !db->isec()) {136assert(da->isAbsolute() && db->isAbsolute());137return da->value + ra.addend == db->value + rb.addend;138}139isecA = da->isec();140valueA = da->value;141isecB = db->isec();142valueB = db->value;143} else {144isecA = ra.referent.get<InputSection *>();145isecB = rb.referent.get<InputSection *>();146}147148if (isecA->parent != isecB->parent)149return false;150// Sections with identical parents should be of the same kind.151assert(isecA->kind() == isecB->kind());152// We will compare ConcatInputSection contents in equalsVariable.153if (isa<ConcatInputSection>(isecA))154return ra.addend == rb.addend;155// Else we have two literal sections. References to them are equal iff their156// offsets in the output section are equal.157if (ra.referent.is<Symbol *>())158// For symbol relocs, we compare the contents at the symbol address. We159// don't do `getOffset(value + addend)` because value + addend may not be160// a valid offset in the literal section.161return isecA->getOffset(valueA) == isecB->getOffset(valueB) &&162ra.addend == rb.addend;163else {164assert(valueA == 0 && valueB == 0);165// For section relocs, we compare the content at the section offset.166return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend);167}168};169return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),170f);171}172173// Compare the "moving" parts of two ConcatInputSections -- i.e. everything not174// handled by equalsConstant().175bool ICF::equalsVariable(const ConcatInputSection *ia,176const ConcatInputSection *ib) {177if (verboseDiagnostics)178++equalsVariableCount;179assert(ia->relocs.size() == ib->relocs.size());180auto f = [this](const Reloc &ra, const Reloc &rb) {181// We already filtered out mismatching values/addends in equalsConstant.182if (ra.referent == rb.referent)183return true;184const ConcatInputSection *isecA, *isecB;185if (ra.referent.is<Symbol *>()) {186// Matching DylibSymbols are already filtered out by the187// identical-referent check above. Non-matching DylibSymbols were filtered188// out in equalsConstant(). So we can safely cast to Defined here.189const auto *da = cast<Defined>(ra.referent.get<Symbol *>());190const auto *db = cast<Defined>(rb.referent.get<Symbol *>());191if (da->isAbsolute())192return true;193isecA = dyn_cast<ConcatInputSection>(da->isec());194if (!isecA)195return true; // literal sections were checked in equalsConstant.196isecB = cast<ConcatInputSection>(db->isec());197} else {198const auto *sa = ra.referent.get<InputSection *>();199const auto *sb = rb.referent.get<InputSection *>();200isecA = dyn_cast<ConcatInputSection>(sa);201if (!isecA)202return true;203isecB = cast<ConcatInputSection>(sb);204}205return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2];206};207if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f))208return false;209210// If there are symbols with associated unwind info, check that the unwind211// info matches. For simplicity, we only handle the case where there are only212// symbols at offset zero within the section (which is typically the case with213// .subsections_via_symbols.)214auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; };215const auto *itA = llvm::find_if(ia->symbols, hasUnwind);216const auto *itB = llvm::find_if(ib->symbols, hasUnwind);217if (itA == ia->symbols.end())218return itB == ib->symbols.end();219if (itB == ib->symbols.end())220return false;221const Defined *da = *itA;222const Defined *db = *itB;223if (da->unwindEntry()->icfEqClass[icfPass % 2] !=224db->unwindEntry()->icfEqClass[icfPass % 2] ||225da->value != 0 || db->value != 0)226return false;227auto isZero = [](Defined *d) { return d->value == 0; };228return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) ==229ia->symbols.end() &&230std::find_if_not(std::next(itB), ib->symbols.end(), isZero) ==231ib->symbols.end();232}233234// Find the first InputSection after BEGIN whose equivalence class differs235size_t ICF::findBoundary(size_t begin, size_t end) {236uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2];237for (size_t i = begin + 1; i < end; ++i)238if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2])239return i;240return end;241}242243// Invoke FUNC on subranges with matching equivalence class244void ICF::forEachClassRange(size_t begin, size_t end,245llvm::function_ref<void(size_t, size_t)> func) {246while (begin < end) {247size_t mid = findBoundary(begin, end);248func(begin, mid);249begin = mid;250}251}252253// Split icfInputs into shards, then parallelize invocation of FUNC on subranges254// with matching equivalence class255void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {256// Only use threads when the benefits outweigh the overhead.257const size_t threadingThreshold = 1024;258if (icfInputs.size() < threadingThreshold) {259forEachClassRange(0, icfInputs.size(), func);260++icfPass;261return;262}263264// Shard into non-overlapping intervals, and call FUNC in parallel. The265// sharding must be completed before any calls to FUNC are made so that FUNC266// can modify the InputSection in its shard without causing data races.267const size_t shards = 256;268size_t step = icfInputs.size() / shards;269size_t boundaries[shards + 1];270boundaries[0] = 0;271boundaries[shards] = icfInputs.size();272parallelFor(1, shards, [&](size_t i) {273boundaries[i] = findBoundary((i - 1) * step, icfInputs.size());274});275parallelFor(1, shards + 1, [&](size_t i) {276if (boundaries[i - 1] < boundaries[i]) {277forEachClassRange(boundaries[i - 1], boundaries[i], func);278}279});280++icfPass;281}282283void ICF::run() {284// Into each origin-section hash, combine all reloc referent section hashes.285for (icfPass = 0; icfPass < 2; ++icfPass) {286parallelForEach(icfInputs, [&](ConcatInputSection *isec) {287uint32_t hash = isec->icfEqClass[icfPass % 2];288for (const Reloc &r : isec->relocs) {289if (auto *sym = r.referent.dyn_cast<Symbol *>()) {290if (auto *defined = dyn_cast<Defined>(sym)) {291if (defined->isec()) {292if (auto *referentIsec =293dyn_cast<ConcatInputSection>(defined->isec()))294hash += defined->value + referentIsec->icfEqClass[icfPass % 2];295else296hash += defined->isec()->kind() +297defined->isec()->getOffset(defined->value);298} else {299hash += defined->value;300}301} else {302// ICF runs before Undefined diags303assert(isa<Undefined>(sym) || isa<DylibSymbol>(sym));304}305}306}307// Set MSB to 1 to avoid collisions with non-hashed classes.308isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31);309});310}311312llvm::stable_sort(313icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {314return a->icfEqClass[0] < b->icfEqClass[0];315});316forEachClass([&](size_t begin, size_t end) {317segregate(begin, end, &ICF::equalsConstant);318});319320// Split equivalence groups by comparing relocations until convergence321do {322icfRepeat = false;323forEachClass([&](size_t begin, size_t end) {324segregate(begin, end, &ICF::equalsVariable);325});326} while (icfRepeat);327log("ICF needed " + Twine(icfPass) + " iterations");328if (verboseDiagnostics) {329log("equalsConstant() called " + Twine(equalsConstantCount) + " times");330log("equalsVariable() called " + Twine(equalsVariableCount) + " times");331}332333// Fold sections within equivalence classes334forEachClass([&](size_t begin, size_t end) {335if (end - begin < 2)336return;337ConcatInputSection *beginIsec = icfInputs[begin];338for (size_t i = begin + 1; i < end; ++i)339beginIsec->foldIdentical(icfInputs[i]);340});341}342343// Split an equivalence class into smaller classes.344void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {345while (begin < end) {346// Divide [begin, end) into two. Let mid be the start index of the347// second group.348auto bound = std::stable_partition(349icfInputs.begin() + begin + 1, icfInputs.begin() + end,350[&](ConcatInputSection *isec) {351return (this->*equals)(icfInputs[begin], isec);352});353size_t mid = bound - icfInputs.begin();354355// Split [begin, end) into [begin, mid) and [mid, end). We use mid as an356// equivalence class ID because every group ends with a unique index.357for (size_t i = begin; i < mid; ++i)358icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid;359360// If we created a group, we need to iterate the main loop again.361if (mid != end)362icfRepeat = true;363364begin = mid;365}366}367368void macho::markSymAsAddrSig(Symbol *s) {369if (auto *d = dyn_cast_or_null<Defined>(s))370if (d->isec())371d->isec()->keepUnique = true;372}373374void macho::markAddrSigSymbols() {375TimeTraceScope timeScope("Mark addrsig symbols");376for (InputFile *file : inputFiles) {377ObjFile *obj = dyn_cast<ObjFile>(file);378if (!obj)379continue;380381Section *addrSigSection = obj->addrSigSection;382if (!addrSigSection)383continue;384assert(addrSigSection->subsections.size() == 1);385386const InputSection *isec = addrSigSection->subsections[0].isec;387388for (const Reloc &r : isec->relocs) {389if (auto *sym = r.referent.dyn_cast<Symbol *>())390markSymAsAddrSig(sym);391else392error(toString(isec) + ": unexpected section relocation");393}394}395}396397void macho::foldIdenticalSections(bool onlyCfStrings) {398TimeTraceScope timeScope("Fold Identical Code Sections");399// The ICF equivalence-class segregation algorithm relies on pre-computed400// hashes of InputSection::data for the ConcatOutputSection::inputs and all401// sections referenced by their relocs. We could recursively traverse the402// relocs to find every referenced InputSection, but that precludes easy403// parallelization. Therefore, we hash every InputSection here where we have404// them all accessible as simple vectors.405406// If an InputSection is ineligible for ICF, we give it a unique ID to force407// it into an unfoldable singleton equivalence class. Begin the unique-ID408// space at inputSections.size(), so that it will never intersect with409// equivalence-class IDs which begin at 0. Since hashes & unique IDs never410// coexist with equivalence-class IDs, this is not necessary, but might help411// someone keep the numbers straight in case we ever need to debug the412// ICF::segregate()413std::vector<ConcatInputSection *> foldable;414uint64_t icfUniqueID = inputSections.size();415for (ConcatInputSection *isec : inputSections) {416bool isFoldableWithAddendsRemoved = isCfStringSection(isec) ||417isClassRefsSection(isec) ||418isSelRefsSection(isec);419// NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we420// can still fold it.421bool hasFoldableFlags = (isSelRefsSection(isec) ||422sectionType(isec->getFlags()) == MachO::S_REGULAR);423// FIXME: consider non-code __text sections as foldable?424bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) &&425(isCodeSection(isec) || isFoldableWithAddendsRemoved ||426isGccExceptTabSection(isec)) &&427!isec->keepUnique && !isec->hasAltEntry &&428!isec->shouldOmitFromOutput() && hasFoldableFlags;429if (isFoldable) {430foldable.push_back(isec);431for (Defined *d : isec->symbols)432if (d->unwindEntry())433foldable.push_back(d->unwindEntry());434435// Some sections have embedded addends that foil ICF's hashing / equality436// checks. (We can ignore embedded addends when doing ICF because the same437// information gets recorded in our Reloc structs.) We therefore create a438// mutable copy of the section data and zero out the embedded addends439// before performing any hashing / equality checks.440if (isFoldableWithAddendsRemoved) {441// We have to do this copying serially as the BumpPtrAllocator is not442// thread-safe. FIXME: Make a thread-safe allocator.443MutableArrayRef<uint8_t> copy = isec->data.copy(bAlloc());444for (const Reloc &r : isec->relocs)445target->relocateOne(copy.data() + r.offset, r, /*va=*/0,446/*relocVA=*/0);447isec->data = copy;448}449} else if (!isEhFrameSection(isec)) {450// EH frames are gathered as foldables from unwindEntry above; give a451// unique ID to everything else.452isec->icfEqClass[0] = ++icfUniqueID;453}454}455parallelForEach(foldable, [](ConcatInputSection *isec) {456assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID!457// Turn-on the top bit to guarantee that valid hashes have no collisions458// with the small-integer unique IDs for ICF-ineligible sections459isec->icfEqClass[0] = xxh3_64bits(isec->data) | (1ull << 31);460});461// Now that every input section is either hashed or marked as unique, run the462// segregation algorithm to detect foldable subsections.463ICF(foldable).run();464}465466467