Path: blob/main/contrib/llvm-project/lld/ELF/ICF.cpp
34878 views
//===- ICF.cpp ------------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// ICF is short for Identical Code Folding. This is a size optimization to9// identify and merge two or more read-only sections (typically functions)10// that happened to have the same contents. It usually reduces output size11// by a few percent.12//13// In ICF, two sections are considered identical if they have the same14// section flags, section data, and relocations. Relocations are tricky,15// because two relocations are considered the same if they have the same16// relocation types, values, and if they point to the same sections *in17// terms of ICF*.18//19// Here is an example. If foo and bar defined below are compiled to the20// same machine instructions, ICF can and should merge the two, although21// their relocations point to each other.22//23// void foo() { bar(); }24// void bar() { foo(); }25//26// If you merge the two, their relocations point to the same section and27// thus you know they are mergeable, but how do you know they are28// mergeable in the first place? This is not an easy problem to solve.29//30// What we are doing in LLD is to partition sections into equivalence31// classes. Sections in the same equivalence class when the algorithm32// terminates are considered identical. Here are details:33//34// 1. First, we partition sections using their hash values as keys. Hash35// values contain section types, section contents and numbers of36// relocations. During this step, relocation targets are not taken into37// account. We just put sections that apparently differ into different38// equivalence classes.39//40// 2. Next, for each equivalence class, we visit sections to compare41// relocation targets. Relocation targets are considered equivalent if42// their targets are in the same equivalence class. Sections with43// different relocation targets are put into different equivalence44// classes.45//46// 3. If we split an equivalence class in step 2, two relocations47// previously target the same equivalence class may now target48// different equivalence classes. Therefore, we repeat step 2 until a49// convergence is obtained.50//51// 4. For each equivalence class C, pick an arbitrary section in C, and52// merge all the other sections in C with it.53//54// For small programs, this algorithm needs 3-5 iterations. For large55// programs such as Chromium, it takes more than 20 iterations.56//57// This algorithm was mentioned as an "optimistic algorithm" in [1],58// though gold implements a different algorithm than this.59//60// We parallelize each step so that multiple threads can work on different61// equivalence classes concurrently. That gave us a large performance62// boost when applying ICF on large programs. For example, MSVC link.exe63// or GNU gold takes 10-20 seconds to apply ICF on Chromium, whose output64// size is about 1.5 GB, but LLD can finish it in less than 2 seconds on a65// 2.8 GHz 40 core machine. Even without threading, LLD's ICF is still66// faster than MSVC or gold though.67//68// [1] Safe ICF: Pointer Safe and Unwinding aware Identical Code Folding69// in the Gold Linker70// http://static.googleusercontent.com/media/research.google.com/en//pubs/archive/36912.pdf71//72//===----------------------------------------------------------------------===//7374#include "ICF.h"75#include "Config.h"76#include "InputFiles.h"77#include "LinkerScript.h"78#include "OutputSections.h"79#include "SymbolTable.h"80#include "Symbols.h"81#include "SyntheticSections.h"82#include "llvm/BinaryFormat/ELF.h"83#include "llvm/Object/ELF.h"84#include "llvm/Support/Parallel.h"85#include "llvm/Support/TimeProfiler.h"86#include "llvm/Support/xxhash.h"87#include <algorithm>88#include <atomic>8990using namespace llvm;91using namespace llvm::ELF;92using namespace llvm::object;93using namespace lld;94using namespace lld::elf;9596namespace {97template <class ELFT> class ICF {98public:99void run();100101private:102void segregate(size_t begin, size_t end, uint32_t eqClassBase, bool constant);103104template <class RelTy>105bool constantEq(const InputSection *a, Relocs<RelTy> relsA,106const InputSection *b, Relocs<RelTy> relsB);107108template <class RelTy>109bool variableEq(const InputSection *a, Relocs<RelTy> relsA,110const InputSection *b, Relocs<RelTy> relsB);111112bool equalsConstant(const InputSection *a, const InputSection *b);113bool equalsVariable(const InputSection *a, const InputSection *b);114115size_t findBoundary(size_t begin, size_t end);116117void forEachClassRange(size_t begin, size_t end,118llvm::function_ref<void(size_t, size_t)> fn);119120void forEachClass(llvm::function_ref<void(size_t, size_t)> fn);121122SmallVector<InputSection *, 0> sections;123124// We repeat the main loop while `Repeat` is true.125std::atomic<bool> repeat;126127// The main loop counter.128int cnt = 0;129130// We have two locations for equivalence classes. On the first iteration131// of the main loop, Class[0] has a valid value, and Class[1] contains132// garbage. We read equivalence classes from slot 0 and write to slot 1.133// So, Class[0] represents the current class, and Class[1] represents134// the next class. On each iteration, we switch their roles and use them135// alternately.136//137// Why are we doing this? Recall that other threads may be working on138// other equivalence classes in parallel. They may read sections that we139// are updating. We cannot update equivalence classes in place because140// it breaks the invariance that all possibly-identical sections must be141// in the same equivalence class at any moment. In other words, the for142// loop to update equivalence classes is not atomic, and that is143// observable from other threads. By writing new classes to other144// places, we can keep the invariance.145//146// Below, `Current` has the index of the current class, and `Next` has147// the index of the next class. If threading is enabled, they are either148// (0, 1) or (1, 0).149//150// Note on single-thread: if that's the case, they are always (0, 0)151// because we can safely read the next class without worrying about race152// conditions. Using the same location makes this algorithm converge153// faster because it uses results of the same iteration earlier.154int current = 0;155int next = 0;156};157}158159// Returns true if section S is subject of ICF.160static bool isEligible(InputSection *s) {161if (!s->isLive() || s->keepUnique || !(s->flags & SHF_ALLOC))162return false;163164// Don't merge writable sections. .data.rel.ro sections are marked as writable165// but are semantically read-only.166if ((s->flags & SHF_WRITE) && s->name != ".data.rel.ro" &&167!s->name.starts_with(".data.rel.ro."))168return false;169170// SHF_LINK_ORDER sections are ICF'd as a unit with their dependent sections,171// so we don't consider them for ICF individually.172if (s->flags & SHF_LINK_ORDER)173return false;174175// Don't merge synthetic sections as their Data member is not valid and empty.176// The Data member needs to be valid for ICF as it is used by ICF to determine177// the equality of section contents.178if (isa<SyntheticSection>(s))179return false;180181// .init and .fini contains instructions that must be executed to initialize182// and finalize the process. They cannot and should not be merged.183if (s->name == ".init" || s->name == ".fini")184return false;185186// A user program may enumerate sections named with a C identifier using187// __start_* and __stop_* symbols. We cannot ICF any such sections because188// that could change program semantics.189if (isValidCIdentifier(s->name))190return false;191192return true;193}194195// Split an equivalence class into smaller classes.196template <class ELFT>197void ICF<ELFT>::segregate(size_t begin, size_t end, uint32_t eqClassBase,198bool constant) {199// This loop rearranges sections in [Begin, End) so that all sections200// that are equal in terms of equals{Constant,Variable} are contiguous201// in [Begin, End).202//203// The algorithm is quadratic in the worst case, but that is not an204// issue in practice because the number of the distinct sections in205// each range is usually very small.206207while (begin < end) {208// Divide [Begin, End) into two. Let Mid be the start index of the209// second group.210auto bound =211std::stable_partition(sections.begin() + begin + 1,212sections.begin() + end, [&](InputSection *s) {213if (constant)214return equalsConstant(sections[begin], s);215return equalsVariable(sections[begin], s);216});217size_t mid = bound - sections.begin();218219// Now we split [Begin, End) into [Begin, Mid) and [Mid, End) by220// updating the sections in [Begin, Mid). We use Mid as the basis for221// the equivalence class ID because every group ends with a unique index.222// Add this to eqClassBase to avoid equality with unique IDs.223for (size_t i = begin; i < mid; ++i)224sections[i]->eqClass[next] = eqClassBase + mid;225226// If we created a group, we need to iterate the main loop again.227if (mid != end)228repeat = true;229230begin = mid;231}232}233234// Compare two lists of relocations.235template <class ELFT>236template <class RelTy>237bool ICF<ELFT>::constantEq(const InputSection *secA, Relocs<RelTy> ra,238const InputSection *secB, Relocs<RelTy> rb) {239if (ra.size() != rb.size())240return false;241auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin();242for (; rai != rae; ++rai, ++rbi) {243if (rai->r_offset != rbi->r_offset ||244rai->getType(config->isMips64EL) != rbi->getType(config->isMips64EL))245return false;246247uint64_t addA = getAddend<ELFT>(*rai);248uint64_t addB = getAddend<ELFT>(*rbi);249250Symbol &sa = secA->file->getRelocTargetSym(*rai);251Symbol &sb = secB->file->getRelocTargetSym(*rbi);252if (&sa == &sb) {253if (addA == addB)254continue;255return false;256}257258auto *da = dyn_cast<Defined>(&sa);259auto *db = dyn_cast<Defined>(&sb);260261// Placeholder symbols generated by linker scripts look the same now but262// may have different values later.263if (!da || !db || da->scriptDefined || db->scriptDefined)264return false;265266// When comparing a pair of relocations, if they refer to different symbols,267// and either symbol is preemptible, the containing sections should be268// considered different. This is because even if the sections are identical269// in this DSO, they may not be after preemption.270if (da->isPreemptible || db->isPreemptible)271return false;272273// Relocations referring to absolute symbols are constant-equal if their274// values are equal.275if (!da->section && !db->section && da->value + addA == db->value + addB)276continue;277if (!da->section || !db->section)278return false;279280if (da->section->kind() != db->section->kind())281return false;282283// Relocations referring to InputSections are constant-equal if their284// section offsets are equal.285if (isa<InputSection>(da->section)) {286if (da->value + addA == db->value + addB)287continue;288return false;289}290291// Relocations referring to MergeInputSections are constant-equal if their292// offsets in the output section are equal.293auto *x = dyn_cast<MergeInputSection>(da->section);294if (!x)295return false;296auto *y = cast<MergeInputSection>(db->section);297if (x->getParent() != y->getParent())298return false;299300uint64_t offsetA =301sa.isSection() ? x->getOffset(addA) : x->getOffset(da->value) + addA;302uint64_t offsetB =303sb.isSection() ? y->getOffset(addB) : y->getOffset(db->value) + addB;304if (offsetA != offsetB)305return false;306}307308return true;309}310311// Compare "non-moving" part of two InputSections, namely everything312// except relocation targets.313template <class ELFT>314bool ICF<ELFT>::equalsConstant(const InputSection *a, const InputSection *b) {315if (a->flags != b->flags || a->getSize() != b->getSize() ||316a->content() != b->content())317return false;318319// If two sections have different output sections, we cannot merge them.320assert(a->getParent() && b->getParent());321if (a->getParent() != b->getParent())322return false;323324const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>();325const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>();326if (ra.areRelocsCrel() || rb.areRelocsCrel())327return constantEq(a, ra.crels, b, rb.crels);328return ra.areRelocsRel() || rb.areRelocsRel()329? constantEq(a, ra.rels, b, rb.rels)330: constantEq(a, ra.relas, b, rb.relas);331}332333// Compare two lists of relocations. Returns true if all pairs of334// relocations point to the same section in terms of ICF.335template <class ELFT>336template <class RelTy>337bool ICF<ELFT>::variableEq(const InputSection *secA, Relocs<RelTy> ra,338const InputSection *secB, Relocs<RelTy> rb) {339assert(ra.size() == rb.size());340341auto rai = ra.begin(), rae = ra.end(), rbi = rb.begin();342for (; rai != rae; ++rai, ++rbi) {343// The two sections must be identical.344Symbol &sa = secA->file->getRelocTargetSym(*rai);345Symbol &sb = secB->file->getRelocTargetSym(*rbi);346if (&sa == &sb)347continue;348349auto *da = cast<Defined>(&sa);350auto *db = cast<Defined>(&sb);351352// We already dealt with absolute and non-InputSection symbols in353// constantEq, and for InputSections we have already checked everything354// except the equivalence class.355if (!da->section)356continue;357auto *x = dyn_cast<InputSection>(da->section);358if (!x)359continue;360auto *y = cast<InputSection>(db->section);361362// Sections that are in the special equivalence class 0, can never be the363// same in terms of the equivalence class.364if (x->eqClass[current] == 0)365return false;366if (x->eqClass[current] != y->eqClass[current])367return false;368};369370return true;371}372373// Compare "moving" part of two InputSections, namely relocation targets.374template <class ELFT>375bool ICF<ELFT>::equalsVariable(const InputSection *a, const InputSection *b) {376const RelsOrRelas<ELFT> ra = a->template relsOrRelas<ELFT>();377const RelsOrRelas<ELFT> rb = b->template relsOrRelas<ELFT>();378if (ra.areRelocsCrel() || rb.areRelocsCrel())379return variableEq(a, ra.crels, b, rb.crels);380return ra.areRelocsRel() || rb.areRelocsRel()381? variableEq(a, ra.rels, b, rb.rels)382: variableEq(a, ra.relas, b, rb.relas);383}384385template <class ELFT> size_t ICF<ELFT>::findBoundary(size_t begin, size_t end) {386uint32_t eqClass = sections[begin]->eqClass[current];387for (size_t i = begin + 1; i < end; ++i)388if (eqClass != sections[i]->eqClass[current])389return i;390return end;391}392393// Sections in the same equivalence class are contiguous in Sections394// vector. Therefore, Sections vector can be considered as contiguous395// groups of sections, grouped by the class.396//397// This function calls Fn on every group within [Begin, End).398template <class ELFT>399void ICF<ELFT>::forEachClassRange(size_t begin, size_t end,400llvm::function_ref<void(size_t, size_t)> fn) {401while (begin < end) {402size_t mid = findBoundary(begin, end);403fn(begin, mid);404begin = mid;405}406}407408// Call Fn on each equivalence class.409template <class ELFT>410void ICF<ELFT>::forEachClass(llvm::function_ref<void(size_t, size_t)> fn) {411// If threading is disabled or the number of sections are412// too small to use threading, call Fn sequentially.413if (parallel::strategy.ThreadsRequested == 1 || sections.size() < 1024) {414forEachClassRange(0, sections.size(), fn);415++cnt;416return;417}418419current = cnt % 2;420next = (cnt + 1) % 2;421422// Shard into non-overlapping intervals, and call Fn in parallel.423// The sharding must be completed before any calls to Fn are made424// so that Fn can modify the Chunks in its shard without causing data425// races.426const size_t numShards = 256;427size_t step = sections.size() / numShards;428size_t boundaries[numShards + 1];429boundaries[0] = 0;430boundaries[numShards] = sections.size();431432parallelFor(1, numShards, [&](size_t i) {433boundaries[i] = findBoundary((i - 1) * step, sections.size());434});435436parallelFor(1, numShards + 1, [&](size_t i) {437if (boundaries[i - 1] < boundaries[i])438forEachClassRange(boundaries[i - 1], boundaries[i], fn);439});440++cnt;441}442443// Combine the hashes of the sections referenced by the given section into its444// hash.445template <class RelTy>446static void combineRelocHashes(unsigned cnt, InputSection *isec,447Relocs<RelTy> rels) {448uint32_t hash = isec->eqClass[cnt % 2];449for (RelTy rel : rels) {450Symbol &s = isec->file->getRelocTargetSym(rel);451if (auto *d = dyn_cast<Defined>(&s))452if (auto *relSec = dyn_cast_or_null<InputSection>(d->section))453hash += relSec->eqClass[cnt % 2];454}455// Set MSB to 1 to avoid collisions with unique IDs.456isec->eqClass[(cnt + 1) % 2] = hash | (1U << 31);457}458459static void print(const Twine &s) {460if (config->printIcfSections)461message(s);462}463464// The main function of ICF.465template <class ELFT> void ICF<ELFT>::run() {466// Compute isPreemptible early. We may add more symbols later, so this loop467// cannot be merged with the later computeIsPreemptible() pass which is used468// by scanRelocations().469if (config->hasDynSymTab)470for (Symbol *sym : symtab.getSymbols())471sym->isPreemptible = computeIsPreemptible(*sym);472473// Two text sections may have identical content and relocations but different474// LSDA, e.g. the two functions may have catch blocks of different types. If a475// text section is referenced by a .eh_frame FDE with LSDA, it is not476// eligible. This is implemented by iterating over CIE/FDE and setting477// eqClass[0] to the referenced text section from a live FDE.478//479// If two .gcc_except_table have identical semantics (usually identical480// content with PC-relative encoding), we will lose folding opportunity.481uint32_t uniqueId = 0;482for (Partition &part : partitions)483part.ehFrame->iterateFDEWithLSDA<ELFT>(484[&](InputSection &s) { s.eqClass[0] = s.eqClass[1] = ++uniqueId; });485486// Collect sections to merge.487for (InputSectionBase *sec : ctx.inputSections) {488auto *s = dyn_cast<InputSection>(sec);489if (s && s->eqClass[0] == 0) {490if (isEligible(s))491sections.push_back(s);492else493// Ineligible sections are assigned unique IDs, i.e. each section494// belongs to an equivalence class of its own.495s->eqClass[0] = s->eqClass[1] = ++uniqueId;496}497}498499// Initially, we use hash values to partition sections.500parallelForEach(sections, [&](InputSection *s) {501// Set MSB to 1 to avoid collisions with unique IDs.502s->eqClass[0] = xxh3_64bits(s->content()) | (1U << 31);503});504505// Perform 2 rounds of relocation hash propagation. 2 is an empirical value to506// reduce the average sizes of equivalence classes, i.e. segregate() which has507// a large time complexity will have less work to do.508for (unsigned cnt = 0; cnt != 2; ++cnt) {509parallelForEach(sections, [&](InputSection *s) {510const RelsOrRelas<ELFT> rels = s->template relsOrRelas<ELFT>();511if (rels.areRelocsCrel())512combineRelocHashes(cnt, s, rels.crels);513else if (rels.areRelocsRel())514combineRelocHashes(cnt, s, rels.rels);515else516combineRelocHashes(cnt, s, rels.relas);517});518}519520// From now on, sections in Sections vector are ordered so that sections521// in the same equivalence class are consecutive in the vector.522llvm::stable_sort(sections, [](const InputSection *a, const InputSection *b) {523return a->eqClass[0] < b->eqClass[0];524});525526// Compare static contents and assign unique equivalence class IDs for each527// static content. Use a base offset for these IDs to ensure no overlap with528// the unique IDs already assigned.529uint32_t eqClassBase = ++uniqueId;530forEachClass([&](size_t begin, size_t end) {531segregate(begin, end, eqClassBase, true);532});533534// Split groups by comparing relocations until convergence is obtained.535do {536repeat = false;537forEachClass([&](size_t begin, size_t end) {538segregate(begin, end, eqClassBase, false);539});540} while (repeat);541542log("ICF needed " + Twine(cnt) + " iterations");543544// Merge sections by the equivalence class.545forEachClassRange(0, sections.size(), [&](size_t begin, size_t end) {546if (end - begin == 1)547return;548print("selected section " + toString(sections[begin]));549for (size_t i = begin + 1; i < end; ++i) {550print(" removing identical section " + toString(sections[i]));551sections[begin]->replace(sections[i]);552553// At this point we know sections merged are fully identical and hence554// we want to remove duplicate implicit dependencies such as link order555// and relocation sections.556for (InputSection *isec : sections[i]->dependentSections)557isec->markDead();558}559});560561// Change Defined symbol's section field to the canonical one.562auto fold = [](Symbol *sym) {563if (auto *d = dyn_cast<Defined>(sym))564if (auto *sec = dyn_cast_or_null<InputSection>(d->section))565if (sec->repl != d->section) {566d->section = sec->repl;567d->folded = true;568}569};570for (Symbol *sym : symtab.getSymbols())571fold(sym);572parallelForEach(ctx.objectFiles, [&](ELFFileBase *file) {573for (Symbol *sym : file->getLocalSymbols())574fold(sym);575});576577// InputSectionDescription::sections is populated by processSectionCommands().578// ICF may fold some input sections assigned to output sections. Remove them.579for (SectionCommand *cmd : script->sectionCommands)580if (auto *osd = dyn_cast<OutputDesc>(cmd))581for (SectionCommand *subCmd : osd->osec.commands)582if (auto *isd = dyn_cast<InputSectionDescription>(subCmd))583llvm::erase_if(isd->sections,584[](InputSection *isec) { return !isec->isLive(); });585}586587// ICF entry point function.588template <class ELFT> void elf::doIcf() {589llvm::TimeTraceScope timeScope("ICF");590ICF<ELFT>().run();591}592593template void elf::doIcf<ELF32LE>();594template void elf::doIcf<ELF32BE>();595template void elf::doIcf<ELF64LE>();596template void elf::doIcf<ELF64BE>();597598599