Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/lld/MachO/ICF.cpp
34914 views
1
//===- ICF.cpp ------------------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "ICF.h"
10
#include "ConcatOutputSection.h"
11
#include "Config.h"
12
#include "InputSection.h"
13
#include "SymbolTable.h"
14
#include "Symbols.h"
15
#include "UnwindInfoSection.h"
16
17
#include "lld/Common/CommonLinkerContext.h"
18
#include "llvm/Support/LEB128.h"
19
#include "llvm/Support/Parallel.h"
20
#include "llvm/Support/TimeProfiler.h"
21
#include "llvm/Support/xxhash.h"
22
23
#include <atomic>
24
25
using namespace llvm;
26
using namespace lld;
27
using namespace lld::macho;
28
29
static constexpr bool verboseDiagnostics = false;
30
31
class ICF {
32
public:
33
ICF(std::vector<ConcatInputSection *> &inputs);
34
void run();
35
36
using EqualsFn = bool (ICF::*)(const ConcatInputSection *,
37
const ConcatInputSection *);
38
void segregate(size_t begin, size_t end, EqualsFn);
39
size_t findBoundary(size_t begin, size_t end);
40
void forEachClassRange(size_t begin, size_t end,
41
llvm::function_ref<void(size_t, size_t)> func);
42
void forEachClass(llvm::function_ref<void(size_t, size_t)> func);
43
44
bool equalsConstant(const ConcatInputSection *ia,
45
const ConcatInputSection *ib);
46
bool equalsVariable(const ConcatInputSection *ia,
47
const ConcatInputSection *ib);
48
49
// ICF needs a copy of the inputs vector because its equivalence-class
50
// segregation algorithm destroys the proper sequence.
51
std::vector<ConcatInputSection *> icfInputs;
52
53
unsigned icfPass = 0;
54
std::atomic<bool> icfRepeat{false};
55
std::atomic<uint64_t> equalsConstantCount{0};
56
std::atomic<uint64_t> equalsVariableCount{0};
57
};
58
59
ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
60
icfInputs.assign(inputs.begin(), inputs.end());
61
}
62
63
// ICF = Identical Code Folding
64
//
65
// We only fold __TEXT,__text, so this is really "code" folding, and not
66
// "COMDAT" folding. String and scalar constant literals are deduplicated
67
// elsewhere.
68
//
69
// Summary of segments & sections:
70
//
71
// The __TEXT segment is readonly at the MMU. Some sections are already
72
// deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
73
// synthetic and inherently free of duplicates (__TEXT,__stubs &
74
// __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
75
// because doing so induces many test failures.
76
//
77
// The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
78
// thus ineligible for ICF.
79
//
80
// The __DATA_CONST segment is read/write at the MMU, but is logically const to
81
// the application after dyld applies fixups to pointer data. We currently
82
// fold only the __DATA_CONST,__cfstring section.
83
//
84
// The __DATA segment is read/write at the MMU, and as application-writeable
85
// data, none of its sections are eligible for ICF.
86
//
87
// Please see the large block comment in lld/ELF/ICF.cpp for an explanation
88
// of the segregation algorithm.
89
//
90
// FIXME(gkm): implement keep-unique attributes
91
// FIXME(gkm): implement address-significance tables for MachO object files
92
93
// Compare "non-moving" parts of two ConcatInputSections, namely everything
94
// except references to other ConcatInputSections.
95
bool ICF::equalsConstant(const ConcatInputSection *ia,
96
const ConcatInputSection *ib) {
97
if (verboseDiagnostics)
98
++equalsConstantCount;
99
// We can only fold within the same OutputSection.
100
if (ia->parent != ib->parent)
101
return false;
102
if (ia->data.size() != ib->data.size())
103
return false;
104
if (ia->data != ib->data)
105
return false;
106
if (ia->relocs.size() != ib->relocs.size())
107
return false;
108
auto f = [](const Reloc &ra, const Reloc &rb) {
109
if (ra.type != rb.type)
110
return false;
111
if (ra.pcrel != rb.pcrel)
112
return false;
113
if (ra.length != rb.length)
114
return false;
115
if (ra.offset != rb.offset)
116
return false;
117
if (ra.referent.is<Symbol *>() != rb.referent.is<Symbol *>())
118
return false;
119
120
InputSection *isecA, *isecB;
121
122
uint64_t valueA = 0;
123
uint64_t valueB = 0;
124
if (ra.referent.is<Symbol *>()) {
125
const auto *sa = ra.referent.get<Symbol *>();
126
const auto *sb = rb.referent.get<Symbol *>();
127
if (sa->kind() != sb->kind())
128
return false;
129
// ICF runs before Undefineds are treated (and potentially converted into
130
// DylibSymbols).
131
if (isa<DylibSymbol>(sa) || isa<Undefined>(sa))
132
return sa == sb && ra.addend == rb.addend;
133
assert(isa<Defined>(sa));
134
const auto *da = cast<Defined>(sa);
135
const auto *db = cast<Defined>(sb);
136
if (!da->isec() || !db->isec()) {
137
assert(da->isAbsolute() && db->isAbsolute());
138
return da->value + ra.addend == db->value + rb.addend;
139
}
140
isecA = da->isec();
141
valueA = da->value;
142
isecB = db->isec();
143
valueB = db->value;
144
} else {
145
isecA = ra.referent.get<InputSection *>();
146
isecB = rb.referent.get<InputSection *>();
147
}
148
149
if (isecA->parent != isecB->parent)
150
return false;
151
// Sections with identical parents should be of the same kind.
152
assert(isecA->kind() == isecB->kind());
153
// We will compare ConcatInputSection contents in equalsVariable.
154
if (isa<ConcatInputSection>(isecA))
155
return ra.addend == rb.addend;
156
// Else we have two literal sections. References to them are equal iff their
157
// offsets in the output section are equal.
158
if (ra.referent.is<Symbol *>())
159
// For symbol relocs, we compare the contents at the symbol address. We
160
// don't do `getOffset(value + addend)` because value + addend may not be
161
// a valid offset in the literal section.
162
return isecA->getOffset(valueA) == isecB->getOffset(valueB) &&
163
ra.addend == rb.addend;
164
else {
165
assert(valueA == 0 && valueB == 0);
166
// For section relocs, we compare the content at the section offset.
167
return isecA->getOffset(ra.addend) == isecB->getOffset(rb.addend);
168
}
169
};
170
return std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(),
171
f);
172
}
173
174
// Compare the "moving" parts of two ConcatInputSections -- i.e. everything not
175
// handled by equalsConstant().
176
bool ICF::equalsVariable(const ConcatInputSection *ia,
177
const ConcatInputSection *ib) {
178
if (verboseDiagnostics)
179
++equalsVariableCount;
180
assert(ia->relocs.size() == ib->relocs.size());
181
auto f = [this](const Reloc &ra, const Reloc &rb) {
182
// We already filtered out mismatching values/addends in equalsConstant.
183
if (ra.referent == rb.referent)
184
return true;
185
const ConcatInputSection *isecA, *isecB;
186
if (ra.referent.is<Symbol *>()) {
187
// Matching DylibSymbols are already filtered out by the
188
// identical-referent check above. Non-matching DylibSymbols were filtered
189
// out in equalsConstant(). So we can safely cast to Defined here.
190
const auto *da = cast<Defined>(ra.referent.get<Symbol *>());
191
const auto *db = cast<Defined>(rb.referent.get<Symbol *>());
192
if (da->isAbsolute())
193
return true;
194
isecA = dyn_cast<ConcatInputSection>(da->isec());
195
if (!isecA)
196
return true; // literal sections were checked in equalsConstant.
197
isecB = cast<ConcatInputSection>(db->isec());
198
} else {
199
const auto *sa = ra.referent.get<InputSection *>();
200
const auto *sb = rb.referent.get<InputSection *>();
201
isecA = dyn_cast<ConcatInputSection>(sa);
202
if (!isecA)
203
return true;
204
isecB = cast<ConcatInputSection>(sb);
205
}
206
return isecA->icfEqClass[icfPass % 2] == isecB->icfEqClass[icfPass % 2];
207
};
208
if (!std::equal(ia->relocs.begin(), ia->relocs.end(), ib->relocs.begin(), f))
209
return false;
210
211
// If there are symbols with associated unwind info, check that the unwind
212
// info matches. For simplicity, we only handle the case where there are only
213
// symbols at offset zero within the section (which is typically the case with
214
// .subsections_via_symbols.)
215
auto hasUnwind = [](Defined *d) { return d->unwindEntry() != nullptr; };
216
const auto *itA = llvm::find_if(ia->symbols, hasUnwind);
217
const auto *itB = llvm::find_if(ib->symbols, hasUnwind);
218
if (itA == ia->symbols.end())
219
return itB == ib->symbols.end();
220
if (itB == ib->symbols.end())
221
return false;
222
const Defined *da = *itA;
223
const Defined *db = *itB;
224
if (da->unwindEntry()->icfEqClass[icfPass % 2] !=
225
db->unwindEntry()->icfEqClass[icfPass % 2] ||
226
da->value != 0 || db->value != 0)
227
return false;
228
auto isZero = [](Defined *d) { return d->value == 0; };
229
return std::find_if_not(std::next(itA), ia->symbols.end(), isZero) ==
230
ia->symbols.end() &&
231
std::find_if_not(std::next(itB), ib->symbols.end(), isZero) ==
232
ib->symbols.end();
233
}
234
235
// Find the first InputSection after BEGIN whose equivalence class differs
236
size_t ICF::findBoundary(size_t begin, size_t end) {
237
uint64_t beginHash = icfInputs[begin]->icfEqClass[icfPass % 2];
238
for (size_t i = begin + 1; i < end; ++i)
239
if (beginHash != icfInputs[i]->icfEqClass[icfPass % 2])
240
return i;
241
return end;
242
}
243
244
// Invoke FUNC on subranges with matching equivalence class
245
void ICF::forEachClassRange(size_t begin, size_t end,
246
llvm::function_ref<void(size_t, size_t)> func) {
247
while (begin < end) {
248
size_t mid = findBoundary(begin, end);
249
func(begin, mid);
250
begin = mid;
251
}
252
}
253
254
// Split icfInputs into shards, then parallelize invocation of FUNC on subranges
255
// with matching equivalence class
256
void ICF::forEachClass(llvm::function_ref<void(size_t, size_t)> func) {
257
// Only use threads when the benefits outweigh the overhead.
258
const size_t threadingThreshold = 1024;
259
if (icfInputs.size() < threadingThreshold) {
260
forEachClassRange(0, icfInputs.size(), func);
261
++icfPass;
262
return;
263
}
264
265
// Shard into non-overlapping intervals, and call FUNC in parallel. The
266
// sharding must be completed before any calls to FUNC are made so that FUNC
267
// can modify the InputSection in its shard without causing data races.
268
const size_t shards = 256;
269
size_t step = icfInputs.size() / shards;
270
size_t boundaries[shards + 1];
271
boundaries[0] = 0;
272
boundaries[shards] = icfInputs.size();
273
parallelFor(1, shards, [&](size_t i) {
274
boundaries[i] = findBoundary((i - 1) * step, icfInputs.size());
275
});
276
parallelFor(1, shards + 1, [&](size_t i) {
277
if (boundaries[i - 1] < boundaries[i]) {
278
forEachClassRange(boundaries[i - 1], boundaries[i], func);
279
}
280
});
281
++icfPass;
282
}
283
284
void ICF::run() {
285
// Into each origin-section hash, combine all reloc referent section hashes.
286
for (icfPass = 0; icfPass < 2; ++icfPass) {
287
parallelForEach(icfInputs, [&](ConcatInputSection *isec) {
288
uint32_t hash = isec->icfEqClass[icfPass % 2];
289
for (const Reloc &r : isec->relocs) {
290
if (auto *sym = r.referent.dyn_cast<Symbol *>()) {
291
if (auto *defined = dyn_cast<Defined>(sym)) {
292
if (defined->isec()) {
293
if (auto *referentIsec =
294
dyn_cast<ConcatInputSection>(defined->isec()))
295
hash += defined->value + referentIsec->icfEqClass[icfPass % 2];
296
else
297
hash += defined->isec()->kind() +
298
defined->isec()->getOffset(defined->value);
299
} else {
300
hash += defined->value;
301
}
302
} else {
303
// ICF runs before Undefined diags
304
assert(isa<Undefined>(sym) || isa<DylibSymbol>(sym));
305
}
306
}
307
}
308
// Set MSB to 1 to avoid collisions with non-hashed classes.
309
isec->icfEqClass[(icfPass + 1) % 2] = hash | (1ull << 31);
310
});
311
}
312
313
llvm::stable_sort(
314
icfInputs, [](const ConcatInputSection *a, const ConcatInputSection *b) {
315
return a->icfEqClass[0] < b->icfEqClass[0];
316
});
317
forEachClass([&](size_t begin, size_t end) {
318
segregate(begin, end, &ICF::equalsConstant);
319
});
320
321
// Split equivalence groups by comparing relocations until convergence
322
do {
323
icfRepeat = false;
324
forEachClass([&](size_t begin, size_t end) {
325
segregate(begin, end, &ICF::equalsVariable);
326
});
327
} while (icfRepeat);
328
log("ICF needed " + Twine(icfPass) + " iterations");
329
if (verboseDiagnostics) {
330
log("equalsConstant() called " + Twine(equalsConstantCount) + " times");
331
log("equalsVariable() called " + Twine(equalsVariableCount) + " times");
332
}
333
334
// Fold sections within equivalence classes
335
forEachClass([&](size_t begin, size_t end) {
336
if (end - begin < 2)
337
return;
338
ConcatInputSection *beginIsec = icfInputs[begin];
339
for (size_t i = begin + 1; i < end; ++i)
340
beginIsec->foldIdentical(icfInputs[i]);
341
});
342
}
343
344
// Split an equivalence class into smaller classes.
345
void ICF::segregate(size_t begin, size_t end, EqualsFn equals) {
346
while (begin < end) {
347
// Divide [begin, end) into two. Let mid be the start index of the
348
// second group.
349
auto bound = std::stable_partition(
350
icfInputs.begin() + begin + 1, icfInputs.begin() + end,
351
[&](ConcatInputSection *isec) {
352
return (this->*equals)(icfInputs[begin], isec);
353
});
354
size_t mid = bound - icfInputs.begin();
355
356
// Split [begin, end) into [begin, mid) and [mid, end). We use mid as an
357
// equivalence class ID because every group ends with a unique index.
358
for (size_t i = begin; i < mid; ++i)
359
icfInputs[i]->icfEqClass[(icfPass + 1) % 2] = mid;
360
361
// If we created a group, we need to iterate the main loop again.
362
if (mid != end)
363
icfRepeat = true;
364
365
begin = mid;
366
}
367
}
368
369
void macho::markSymAsAddrSig(Symbol *s) {
370
if (auto *d = dyn_cast_or_null<Defined>(s))
371
if (d->isec())
372
d->isec()->keepUnique = true;
373
}
374
375
void macho::markAddrSigSymbols() {
376
TimeTraceScope timeScope("Mark addrsig symbols");
377
for (InputFile *file : inputFiles) {
378
ObjFile *obj = dyn_cast<ObjFile>(file);
379
if (!obj)
380
continue;
381
382
Section *addrSigSection = obj->addrSigSection;
383
if (!addrSigSection)
384
continue;
385
assert(addrSigSection->subsections.size() == 1);
386
387
const InputSection *isec = addrSigSection->subsections[0].isec;
388
389
for (const Reloc &r : isec->relocs) {
390
if (auto *sym = r.referent.dyn_cast<Symbol *>())
391
markSymAsAddrSig(sym);
392
else
393
error(toString(isec) + ": unexpected section relocation");
394
}
395
}
396
}
397
398
void macho::foldIdenticalSections(bool onlyCfStrings) {
399
TimeTraceScope timeScope("Fold Identical Code Sections");
400
// The ICF equivalence-class segregation algorithm relies on pre-computed
401
// hashes of InputSection::data for the ConcatOutputSection::inputs and all
402
// sections referenced by their relocs. We could recursively traverse the
403
// relocs to find every referenced InputSection, but that precludes easy
404
// parallelization. Therefore, we hash every InputSection here where we have
405
// them all accessible as simple vectors.
406
407
// If an InputSection is ineligible for ICF, we give it a unique ID to force
408
// it into an unfoldable singleton equivalence class. Begin the unique-ID
409
// space at inputSections.size(), so that it will never intersect with
410
// equivalence-class IDs which begin at 0. Since hashes & unique IDs never
411
// coexist with equivalence-class IDs, this is not necessary, but might help
412
// someone keep the numbers straight in case we ever need to debug the
413
// ICF::segregate()
414
std::vector<ConcatInputSection *> foldable;
415
uint64_t icfUniqueID = inputSections.size();
416
for (ConcatInputSection *isec : inputSections) {
417
bool isFoldableWithAddendsRemoved = isCfStringSection(isec) ||
418
isClassRefsSection(isec) ||
419
isSelRefsSection(isec);
420
// NOTE: __objc_selrefs is typically marked as no_dead_strip by MC, but we
421
// can still fold it.
422
bool hasFoldableFlags = (isSelRefsSection(isec) ||
423
sectionType(isec->getFlags()) == MachO::S_REGULAR);
424
// FIXME: consider non-code __text sections as foldable?
425
bool isFoldable = (!onlyCfStrings || isCfStringSection(isec)) &&
426
(isCodeSection(isec) || isFoldableWithAddendsRemoved ||
427
isGccExceptTabSection(isec)) &&
428
!isec->keepUnique && !isec->hasAltEntry &&
429
!isec->shouldOmitFromOutput() && hasFoldableFlags;
430
if (isFoldable) {
431
foldable.push_back(isec);
432
for (Defined *d : isec->symbols)
433
if (d->unwindEntry())
434
foldable.push_back(d->unwindEntry());
435
436
// Some sections have embedded addends that foil ICF's hashing / equality
437
// checks. (We can ignore embedded addends when doing ICF because the same
438
// information gets recorded in our Reloc structs.) We therefore create a
439
// mutable copy of the section data and zero out the embedded addends
440
// before performing any hashing / equality checks.
441
if (isFoldableWithAddendsRemoved) {
442
// We have to do this copying serially as the BumpPtrAllocator is not
443
// thread-safe. FIXME: Make a thread-safe allocator.
444
MutableArrayRef<uint8_t> copy = isec->data.copy(bAlloc());
445
for (const Reloc &r : isec->relocs)
446
target->relocateOne(copy.data() + r.offset, r, /*va=*/0,
447
/*relocVA=*/0);
448
isec->data = copy;
449
}
450
} else if (!isEhFrameSection(isec)) {
451
// EH frames are gathered as foldables from unwindEntry above; give a
452
// unique ID to everything else.
453
isec->icfEqClass[0] = ++icfUniqueID;
454
}
455
}
456
parallelForEach(foldable, [](ConcatInputSection *isec) {
457
assert(isec->icfEqClass[0] == 0); // don't overwrite a unique ID!
458
// Turn-on the top bit to guarantee that valid hashes have no collisions
459
// with the small-integer unique IDs for ICF-ineligible sections
460
isec->icfEqClass[0] = xxh3_64bits(isec->data) | (1ull << 31);
461
});
462
// Now that every input section is either hashed or marked as unique, run the
463
// segregation algorithm to detect foldable subsections.
464
ICF(foldable).run();
465
}
466
467