Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/lld/MachO/SyntheticSections.h
34878 views
1
//===- SyntheticSections.h -------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#ifndef LLD_MACHO_SYNTHETIC_SECTIONS_H
10
#define LLD_MACHO_SYNTHETIC_SECTIONS_H
11
12
#include "Config.h"
13
#include "ExportTrie.h"
14
#include "InputSection.h"
15
#include "OutputSection.h"
16
#include "OutputSegment.h"
17
#include "Target.h"
18
#include "Writer.h"
19
20
#include "llvm/ADT/DenseMap.h"
21
#include "llvm/ADT/Hashing.h"
22
#include "llvm/ADT/MapVector.h"
23
#include "llvm/ADT/SetVector.h"
24
#include "llvm/BinaryFormat/MachO.h"
25
#include "llvm/Support/MathExtras.h"
26
#include "llvm/Support/raw_ostream.h"
27
28
#include <unordered_map>
29
30
namespace llvm {
31
class DWARFUnit;
32
} // namespace llvm
33
34
namespace lld::macho {
35
36
class Defined;
37
class DylibSymbol;
38
class LoadCommand;
39
class ObjFile;
40
class UnwindInfoSection;
41
42
class SyntheticSection : public OutputSection {
43
public:
44
SyntheticSection(const char *segname, const char *name);
45
virtual ~SyntheticSection() = default;
46
47
static bool classof(const OutputSection *sec) {
48
return sec->kind() == SyntheticKind;
49
}
50
51
StringRef segname;
52
// This fake InputSection makes it easier for us to write code that applies
53
// generically to both user inputs and synthetics.
54
InputSection *isec;
55
};
56
57
// All sections in __LINKEDIT should inherit from this.
58
class LinkEditSection : public SyntheticSection {
59
public:
60
LinkEditSection(const char *segname, const char *name)
61
: SyntheticSection(segname, name) {
62
align = target->wordSize;
63
}
64
65
// Implementations of this method can assume that the regular (non-__LINKEDIT)
66
// sections already have their addresses assigned.
67
virtual void finalizeContents() {}
68
69
// Sections in __LINKEDIT are special: their offsets are recorded in the
70
// load commands like LC_DYLD_INFO_ONLY and LC_SYMTAB, instead of in section
71
// headers.
72
bool isHidden() const final { return true; }
73
74
virtual uint64_t getRawSize() const = 0;
75
76
// codesign (or more specifically libstuff) checks that each section in
77
// __LINKEDIT ends where the next one starts -- no gaps are permitted. We
78
// therefore align every section's start and end points to WordSize.
79
//
80
// NOTE: This assumes that the extra bytes required for alignment can be
81
// zero-valued bytes.
82
uint64_t getSize() const final { return llvm::alignTo(getRawSize(), align); }
83
};
84
85
// The header of the Mach-O file, which must have a file offset of zero.
86
class MachHeaderSection final : public SyntheticSection {
87
public:
88
MachHeaderSection();
89
bool isHidden() const override { return true; }
90
uint64_t getSize() const override;
91
void writeTo(uint8_t *buf) const override;
92
93
void addLoadCommand(LoadCommand *);
94
95
protected:
96
std::vector<LoadCommand *> loadCommands;
97
uint32_t sizeOfCmds = 0;
98
};
99
100
// A hidden section that exists solely for the purpose of creating the
101
// __PAGEZERO segment, which is used to catch null pointer dereferences.
102
class PageZeroSection final : public SyntheticSection {
103
public:
104
PageZeroSection();
105
bool isHidden() const override { return true; }
106
bool isNeeded() const override { return target->pageZeroSize != 0; }
107
uint64_t getSize() const override { return target->pageZeroSize; }
108
uint64_t getFileSize() const override { return 0; }
109
void writeTo(uint8_t *buf) const override {}
110
};
111
112
// This is the base class for the GOT and TLVPointer sections, which are nearly
113
// functionally identical -- they will both be populated by dyld with addresses
114
// to non-lazily-loaded dylib symbols. The main difference is that the
115
// TLVPointerSection stores references to thread-local variables.
116
class NonLazyPointerSectionBase : public SyntheticSection {
117
public:
118
NonLazyPointerSectionBase(const char *segname, const char *name);
119
const llvm::SetVector<const Symbol *> &getEntries() const { return entries; }
120
bool isNeeded() const override { return !entries.empty(); }
121
uint64_t getSize() const override {
122
return entries.size() * target->wordSize;
123
}
124
void writeTo(uint8_t *buf) const override;
125
void addEntry(Symbol *sym);
126
uint64_t getVA(uint32_t gotIndex) const {
127
return addr + gotIndex * target->wordSize;
128
}
129
130
private:
131
llvm::SetVector<const Symbol *> entries;
132
};
133
134
class GotSection final : public NonLazyPointerSectionBase {
135
public:
136
GotSection();
137
};
138
139
class TlvPointerSection final : public NonLazyPointerSectionBase {
140
public:
141
TlvPointerSection();
142
};
143
144
struct Location {
145
const InputSection *isec;
146
uint64_t offset;
147
148
Location(const InputSection *isec, uint64_t offset)
149
: isec(isec), offset(offset) {}
150
uint64_t getVA() const { return isec->getVA(offset); }
151
};
152
153
// Stores rebase opcodes, which tell dyld where absolute addresses have been
154
// encoded in the binary. If the binary is not loaded at its preferred address,
155
// dyld has to rebase these addresses by adding an offset to them.
156
class RebaseSection final : public LinkEditSection {
157
public:
158
RebaseSection();
159
void finalizeContents() override;
160
uint64_t getRawSize() const override { return contents.size(); }
161
bool isNeeded() const override { return !locations.empty(); }
162
void writeTo(uint8_t *buf) const override;
163
164
void addEntry(const InputSection *isec, uint64_t offset) {
165
if (config->isPic)
166
locations.emplace_back(isec, offset);
167
}
168
169
private:
170
std::vector<Location> locations;
171
SmallVector<char, 128> contents;
172
};
173
174
struct BindingEntry {
175
int64_t addend;
176
Location target;
177
BindingEntry(int64_t addend, Location target)
178
: addend(addend), target(target) {}
179
};
180
181
template <class Sym>
182
using BindingsMap = llvm::DenseMap<Sym, std::vector<BindingEntry>>;
183
184
// Stores bind opcodes for telling dyld which symbols to load non-lazily.
185
class BindingSection final : public LinkEditSection {
186
public:
187
BindingSection();
188
void finalizeContents() override;
189
uint64_t getRawSize() const override { return contents.size(); }
190
bool isNeeded() const override { return !bindingsMap.empty(); }
191
void writeTo(uint8_t *buf) const override;
192
193
void addEntry(const Symbol *dysym, const InputSection *isec, uint64_t offset,
194
int64_t addend = 0) {
195
bindingsMap[dysym].emplace_back(addend, Location(isec, offset));
196
}
197
198
private:
199
BindingsMap<const Symbol *> bindingsMap;
200
SmallVector<char, 128> contents;
201
};
202
203
// Stores bind opcodes for telling dyld which weak symbols need coalescing.
204
// There are two types of entries in this section:
205
//
206
// 1) Non-weak definitions: This is a symbol definition that weak symbols in
207
// other dylibs should coalesce to.
208
//
209
// 2) Weak bindings: These tell dyld that a given symbol reference should
210
// coalesce to a non-weak definition if one is found. Note that unlike the
211
// entries in the BindingSection, the bindings here only refer to these
212
// symbols by name, but do not specify which dylib to load them from.
213
class WeakBindingSection final : public LinkEditSection {
214
public:
215
WeakBindingSection();
216
void finalizeContents() override;
217
uint64_t getRawSize() const override { return contents.size(); }
218
bool isNeeded() const override {
219
return !bindingsMap.empty() || !definitions.empty();
220
}
221
222
void writeTo(uint8_t *buf) const override;
223
224
void addEntry(const Symbol *symbol, const InputSection *isec, uint64_t offset,
225
int64_t addend = 0) {
226
bindingsMap[symbol].emplace_back(addend, Location(isec, offset));
227
}
228
229
bool hasEntry() const { return !bindingsMap.empty(); }
230
231
void addNonWeakDefinition(const Defined *defined) {
232
definitions.emplace_back(defined);
233
}
234
235
bool hasNonWeakDefinition() const { return !definitions.empty(); }
236
237
private:
238
BindingsMap<const Symbol *> bindingsMap;
239
std::vector<const Defined *> definitions;
240
SmallVector<char, 128> contents;
241
};
242
243
// The following sections implement lazy symbol binding -- very similar to the
244
// PLT mechanism in ELF.
245
//
246
// ELF's .plt section is broken up into two sections in Mach-O: StubsSection
247
// and StubHelperSection. Calls to functions in dylibs will end up calling into
248
// StubsSection, which contains indirect jumps to addresses stored in the
249
// LazyPointerSection (the counterpart to ELF's .plt.got).
250
//
251
// We will first describe how non-weak symbols are handled.
252
//
253
// At program start, the LazyPointerSection contains addresses that point into
254
// one of the entry points in the middle of the StubHelperSection. The code in
255
// StubHelperSection will push on the stack an offset into the
256
// LazyBindingSection. The push is followed by a jump to the beginning of the
257
// StubHelperSection (similar to PLT0), which then calls into dyld_stub_binder.
258
// dyld_stub_binder is a non-lazily-bound symbol, so this call looks it up in
259
// the GOT.
260
//
261
// The stub binder will look up the bind opcodes in the LazyBindingSection at
262
// the given offset. The bind opcodes will tell the binder to update the
263
// address in the LazyPointerSection to point to the symbol, so that subsequent
264
// calls don't have to redo the symbol resolution. The binder will then jump to
265
// the resolved symbol.
266
//
267
// With weak symbols, the situation is slightly different. Since there is no
268
// "weak lazy" lookup, function calls to weak symbols are always non-lazily
269
// bound. We emit both regular non-lazy bindings as well as weak bindings, in
270
// order that the weak bindings may overwrite the non-lazy bindings if an
271
// appropriate symbol is found at runtime. However, the bound addresses will
272
// still be written (non-lazily) into the LazyPointerSection.
273
//
274
// Symbols are always bound eagerly when chained fixups are used. In that case,
275
// StubsSection contains indirect jumps to addresses stored in the GotSection.
276
// The GOT directly contains the fixup entries, which will be replaced by the
277
// address of the target symbols on load. LazyPointerSection and
278
// StubHelperSection are not used.
279
280
class StubsSection final : public SyntheticSection {
281
public:
282
StubsSection();
283
uint64_t getSize() const override;
284
bool isNeeded() const override { return !entries.empty(); }
285
void finalize() override;
286
void writeTo(uint8_t *buf) const override;
287
const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
288
// Creates a stub for the symbol and the corresponding entry in the
289
// LazyPointerSection.
290
void addEntry(Symbol *);
291
uint64_t getVA(uint32_t stubsIndex) const {
292
assert(isFinal || target->usesThunks());
293
// ConcatOutputSection::finalize() can seek the address of a
294
// stub before its address is assigned. Before __stubs is
295
// finalized, return a contrived out-of-range address.
296
return isFinal ? addr + stubsIndex * target->stubSize
297
: TargetInfo::outOfRangeVA;
298
}
299
300
bool isFinal = false; // is address assigned?
301
302
private:
303
llvm::SetVector<Symbol *> entries;
304
};
305
306
class StubHelperSection final : public SyntheticSection {
307
public:
308
StubHelperSection();
309
uint64_t getSize() const override;
310
bool isNeeded() const override;
311
void writeTo(uint8_t *buf) const override;
312
313
void setUp();
314
315
DylibSymbol *stubBinder = nullptr;
316
Defined *dyldPrivate = nullptr;
317
};
318
319
class ObjCSelRefsHelper {
320
public:
321
static void initialize();
322
static void cleanup();
323
324
static ConcatInputSection *getSelRef(StringRef methname);
325
static ConcatInputSection *makeSelRef(StringRef methname);
326
327
private:
328
static llvm::DenseMap<llvm::CachedHashStringRef, ConcatInputSection *>
329
methnameToSelref;
330
};
331
332
// Objective-C stubs are hoisted objc_msgSend calls per selector called in the
333
// program. Apple Clang produces undefined symbols to each stub, such as
334
// '_objc_msgSend$foo', which are then synthesized by the linker. The stubs
335
// load the particular selector 'foo' from __objc_selrefs, setting it to the
336
// first argument of the objc_msgSend call, and then jumps to objc_msgSend. The
337
// actual stub contents are mirrored from ld64.
338
class ObjCStubsSection final : public SyntheticSection {
339
public:
340
ObjCStubsSection();
341
void addEntry(Symbol *sym);
342
uint64_t getSize() const override;
343
bool isNeeded() const override { return !symbols.empty(); }
344
void finalize() override { isec->isFinal = true; }
345
void writeTo(uint8_t *buf) const override;
346
void setUp();
347
348
static constexpr llvm::StringLiteral symbolPrefix = "_objc_msgSend$";
349
static bool isObjCStubSymbol(Symbol *sym);
350
static StringRef getMethname(Symbol *sym);
351
352
private:
353
std::vector<Defined *> symbols;
354
Symbol *objcMsgSend = nullptr;
355
};
356
357
// Note that this section may also be targeted by non-lazy bindings. In
358
// particular, this happens when branch relocations target weak symbols.
359
class LazyPointerSection final : public SyntheticSection {
360
public:
361
LazyPointerSection();
362
uint64_t getSize() const override;
363
bool isNeeded() const override;
364
void writeTo(uint8_t *buf) const override;
365
uint64_t getVA(uint32_t index) const {
366
return addr + (index << target->p2WordSize);
367
}
368
};
369
370
class LazyBindingSection final : public LinkEditSection {
371
public:
372
LazyBindingSection();
373
void finalizeContents() override;
374
uint64_t getRawSize() const override { return contents.size(); }
375
bool isNeeded() const override { return !entries.empty(); }
376
void writeTo(uint8_t *buf) const override;
377
// Note that every entry here will by referenced by a corresponding entry in
378
// the StubHelperSection.
379
void addEntry(Symbol *dysym);
380
const llvm::SetVector<Symbol *> &getEntries() const { return entries; }
381
382
private:
383
uint32_t encode(const Symbol &);
384
385
llvm::SetVector<Symbol *> entries;
386
SmallVector<char, 128> contents;
387
llvm::raw_svector_ostream os{contents};
388
};
389
390
// Stores a trie that describes the set of exported symbols.
391
class ExportSection final : public LinkEditSection {
392
public:
393
ExportSection();
394
void finalizeContents() override;
395
uint64_t getRawSize() const override { return size; }
396
bool isNeeded() const override { return size; }
397
void writeTo(uint8_t *buf) const override;
398
399
bool hasWeakSymbol = false;
400
401
private:
402
TrieBuilder trieBuilder;
403
size_t size = 0;
404
};
405
406
// Stores 'data in code' entries that describe the locations of data regions
407
// inside code sections. This is used by llvm-objdump to distinguish jump tables
408
// and stop them from being disassembled as instructions.
409
class DataInCodeSection final : public LinkEditSection {
410
public:
411
DataInCodeSection();
412
void finalizeContents() override;
413
uint64_t getRawSize() const override {
414
return sizeof(llvm::MachO::data_in_code_entry) * entries.size();
415
}
416
void writeTo(uint8_t *buf) const override;
417
418
private:
419
std::vector<llvm::MachO::data_in_code_entry> entries;
420
};
421
422
// Stores ULEB128 delta encoded addresses of functions.
423
class FunctionStartsSection final : public LinkEditSection {
424
public:
425
FunctionStartsSection();
426
void finalizeContents() override;
427
uint64_t getRawSize() const override { return contents.size(); }
428
void writeTo(uint8_t *buf) const override;
429
430
private:
431
SmallVector<char, 128> contents;
432
};
433
434
// Stores the strings referenced by the symbol table.
435
class StringTableSection final : public LinkEditSection {
436
public:
437
StringTableSection();
438
// Returns the start offset of the added string.
439
uint32_t addString(StringRef);
440
uint64_t getRawSize() const override { return size; }
441
void writeTo(uint8_t *buf) const override;
442
443
static constexpr size_t emptyStringIndex = 1;
444
445
private:
446
// ld64 emits string tables which start with a space and a zero byte. We
447
// match its behavior here since some tools depend on it.
448
// Consequently, the empty string will be at index 1, not zero.
449
std::vector<StringRef> strings{" "};
450
size_t size = 2;
451
};
452
453
struct SymtabEntry {
454
Symbol *sym;
455
size_t strx;
456
};
457
458
struct StabsEntry {
459
uint8_t type = 0;
460
uint32_t strx = StringTableSection::emptyStringIndex;
461
uint8_t sect = 0;
462
uint16_t desc = 0;
463
uint64_t value = 0;
464
465
StabsEntry() = default;
466
explicit StabsEntry(uint8_t type) : type(type) {}
467
};
468
469
// Symbols of the same type must be laid out contiguously: we choose to emit
470
// all local symbols first, then external symbols, and finally undefined
471
// symbols. For each symbol type, the LC_DYSYMTAB load command will record the
472
// range (start index and total number) of those symbols in the symbol table.
473
class SymtabSection : public LinkEditSection {
474
public:
475
void finalizeContents() override;
476
uint32_t getNumSymbols() const;
477
uint32_t getNumLocalSymbols() const {
478
return stabs.size() + localSymbols.size();
479
}
480
uint32_t getNumExternalSymbols() const { return externalSymbols.size(); }
481
uint32_t getNumUndefinedSymbols() const { return undefinedSymbols.size(); }
482
483
private:
484
void emitBeginSourceStab(StringRef);
485
void emitEndSourceStab();
486
void emitObjectFileStab(ObjFile *);
487
void emitEndFunStab(Defined *);
488
void emitStabs();
489
490
protected:
491
SymtabSection(StringTableSection &);
492
493
StringTableSection &stringTableSection;
494
// STABS symbols are always local symbols, but we represent them with special
495
// entries because they may use fields like n_sect and n_desc differently.
496
std::vector<StabsEntry> stabs;
497
std::vector<SymtabEntry> localSymbols;
498
std::vector<SymtabEntry> externalSymbols;
499
std::vector<SymtabEntry> undefinedSymbols;
500
};
501
502
template <class LP> SymtabSection *makeSymtabSection(StringTableSection &);
503
504
// The indirect symbol table is a list of 32-bit integers that serve as indices
505
// into the (actual) symbol table. The indirect symbol table is a
506
// concatenation of several sub-arrays of indices, each sub-array belonging to
507
// a separate section. The starting offset of each sub-array is stored in the
508
// reserved1 header field of the respective section.
509
//
510
// These sub-arrays provide symbol information for sections that store
511
// contiguous sequences of symbol references. These references can be pointers
512
// (e.g. those in the GOT and TLVP sections) or assembly sequences (e.g.
513
// function stubs).
514
class IndirectSymtabSection final : public LinkEditSection {
515
public:
516
IndirectSymtabSection();
517
void finalizeContents() override;
518
uint32_t getNumSymbols() const;
519
uint64_t getRawSize() const override {
520
return getNumSymbols() * sizeof(uint32_t);
521
}
522
bool isNeeded() const override;
523
void writeTo(uint8_t *buf) const override;
524
};
525
526
// The code signature comes at the very end of the linked output file.
527
class CodeSignatureSection final : public LinkEditSection {
528
public:
529
// NOTE: These values are duplicated in llvm-objcopy's MachO/Object.h file
530
// and any changes here, should be repeated there.
531
static constexpr uint8_t blockSizeShift = 12;
532
static constexpr size_t blockSize = (1 << blockSizeShift); // 4 KiB
533
static constexpr size_t hashSize = 256 / 8;
534
static constexpr size_t blobHeadersSize = llvm::alignTo<8>(
535
sizeof(llvm::MachO::CS_SuperBlob) + sizeof(llvm::MachO::CS_BlobIndex));
536
static constexpr uint32_t fixedHeadersSize =
537
blobHeadersSize + sizeof(llvm::MachO::CS_CodeDirectory);
538
539
uint32_t fileNamePad = 0;
540
uint32_t allHeadersSize = 0;
541
StringRef fileName;
542
543
CodeSignatureSection();
544
uint64_t getRawSize() const override;
545
bool isNeeded() const override { return true; }
546
void writeTo(uint8_t *buf) const override;
547
uint32_t getBlockCount() const;
548
void writeHashes(uint8_t *buf) const;
549
};
550
551
class CStringSection : public SyntheticSection {
552
public:
553
CStringSection(const char *name);
554
void addInput(CStringInputSection *);
555
uint64_t getSize() const override { return size; }
556
virtual void finalizeContents();
557
bool isNeeded() const override { return !inputs.empty(); }
558
void writeTo(uint8_t *buf) const override;
559
560
std::vector<CStringInputSection *> inputs;
561
562
private:
563
uint64_t size;
564
};
565
566
class DeduplicatedCStringSection final : public CStringSection {
567
public:
568
DeduplicatedCStringSection(const char *name) : CStringSection(name){};
569
uint64_t getSize() const override { return size; }
570
void finalizeContents() override;
571
void writeTo(uint8_t *buf) const override;
572
573
struct StringOffset {
574
uint8_t trailingZeros;
575
uint64_t outSecOff = UINT64_MAX;
576
577
explicit StringOffset(uint8_t zeros) : trailingZeros(zeros) {}
578
};
579
580
StringOffset getStringOffset(StringRef str) const;
581
582
private:
583
llvm::DenseMap<llvm::CachedHashStringRef, StringOffset> stringOffsetMap;
584
size_t size = 0;
585
};
586
587
/*
588
* This section contains deduplicated literal values. The 16-byte values are
589
* laid out first, followed by the 8- and then the 4-byte ones.
590
*/
591
class WordLiteralSection final : public SyntheticSection {
592
public:
593
using UInt128 = std::pair<uint64_t, uint64_t>;
594
// I don't think the standard guarantees the size of a pair, so let's make
595
// sure it's exact -- that way we can construct it via `mmap`.
596
static_assert(sizeof(UInt128) == 16);
597
598
WordLiteralSection();
599
void addInput(WordLiteralInputSection *);
600
void finalizeContents();
601
void writeTo(uint8_t *buf) const override;
602
603
uint64_t getSize() const override {
604
return literal16Map.size() * 16 + literal8Map.size() * 8 +
605
literal4Map.size() * 4;
606
}
607
608
bool isNeeded() const override {
609
return !literal16Map.empty() || !literal4Map.empty() ||
610
!literal8Map.empty();
611
}
612
613
uint64_t getLiteral16Offset(uintptr_t buf) const {
614
return literal16Map.at(*reinterpret_cast<const UInt128 *>(buf)) * 16;
615
}
616
617
uint64_t getLiteral8Offset(uintptr_t buf) const {
618
return literal16Map.size() * 16 +
619
literal8Map.at(*reinterpret_cast<const uint64_t *>(buf)) * 8;
620
}
621
622
uint64_t getLiteral4Offset(uintptr_t buf) const {
623
return literal16Map.size() * 16 + literal8Map.size() * 8 +
624
literal4Map.at(*reinterpret_cast<const uint32_t *>(buf)) * 4;
625
}
626
627
private:
628
std::vector<WordLiteralInputSection *> inputs;
629
630
template <class T> struct Hasher {
631
llvm::hash_code operator()(T v) const { return llvm::hash_value(v); }
632
};
633
// We're using unordered_map instead of DenseMap here because we need to
634
// support all possible integer values -- there are no suitable tombstone
635
// values for DenseMap.
636
std::unordered_map<UInt128, uint64_t, Hasher<UInt128>> literal16Map;
637
std::unordered_map<uint64_t, uint64_t> literal8Map;
638
std::unordered_map<uint32_t, uint64_t> literal4Map;
639
};
640
641
class ObjCImageInfoSection final : public SyntheticSection {
642
public:
643
ObjCImageInfoSection();
644
bool isNeeded() const override { return !files.empty(); }
645
uint64_t getSize() const override { return 8; }
646
void addFile(const InputFile *file) {
647
assert(!file->objCImageInfo.empty());
648
files.push_back(file);
649
}
650
void finalizeContents();
651
void writeTo(uint8_t *buf) const override;
652
653
private:
654
struct ImageInfo {
655
uint8_t swiftVersion = 0;
656
bool hasCategoryClassProperties = false;
657
} info;
658
static ImageInfo parseImageInfo(const InputFile *);
659
std::vector<const InputFile *> files; // files with image info
660
};
661
662
// This section stores 32-bit __TEXT segment offsets of initializer functions.
663
//
664
// The compiler stores pointers to initializers in __mod_init_func. These need
665
// to be fixed up at load time, which takes time and dirties memory. By
666
// synthesizing InitOffsetsSection from them, this data can live in the
667
// read-only __TEXT segment instead. This section is used by default when
668
// chained fixups are enabled.
669
//
670
// There is no similar counterpart to __mod_term_func, as that section is
671
// deprecated, and static destructors are instead handled by registering them
672
// via __cxa_atexit from an autogenerated initializer function (see D121736).
673
class InitOffsetsSection final : public SyntheticSection {
674
public:
675
InitOffsetsSection();
676
bool isNeeded() const override { return !sections.empty(); }
677
uint64_t getSize() const override;
678
void writeTo(uint8_t *buf) const override;
679
void setUp();
680
681
void addInput(ConcatInputSection *isec) { sections.push_back(isec); }
682
const std::vector<ConcatInputSection *> &inputs() const { return sections; }
683
684
private:
685
std::vector<ConcatInputSection *> sections;
686
};
687
688
// This SyntheticSection is for the __objc_methlist section, which contains
689
// relative method lists if the -objc_relative_method_lists option is enabled.
690
class ObjCMethListSection final : public SyntheticSection {
691
public:
692
ObjCMethListSection();
693
694
static bool isMethodList(const ConcatInputSection *isec);
695
void addInput(ConcatInputSection *isec) { inputs.push_back(isec); }
696
std::vector<ConcatInputSection *> getInputs() { return inputs; }
697
698
void setUp();
699
void finalize() override;
700
bool isNeeded() const override { return !inputs.empty(); }
701
uint64_t getSize() const override { return sectionSize; }
702
void writeTo(uint8_t *bufStart) const override;
703
704
private:
705
void readMethodListHeader(const uint8_t *buf, uint32_t &structSizeAndFlags,
706
uint32_t &structCount) const;
707
void writeMethodListHeader(uint8_t *buf, uint32_t structSizeAndFlags,
708
uint32_t structCount) const;
709
uint32_t computeRelativeMethodListSize(uint32_t absoluteMethodListSize) const;
710
void writeRelativeOffsetForIsec(const ConcatInputSection *isec, uint8_t *buf,
711
uint32_t &inSecOff, uint32_t &outSecOff,
712
bool useSelRef) const;
713
uint32_t writeRelativeMethodList(const ConcatInputSection *isec,
714
uint8_t *buf) const;
715
716
static constexpr uint32_t methodListHeaderSize =
717
/*structSizeAndFlags*/ sizeof(uint32_t) +
718
/*structCount*/ sizeof(uint32_t);
719
// Relative method lists are supported only for 3-pointer method lists
720
static constexpr uint32_t pointersPerStruct = 3;
721
// The runtime identifies relative method lists via this magic value
722
static constexpr uint32_t relMethodHeaderFlag = 0x80000000;
723
// In the method list header, the first 2 bytes are the size of struct
724
static constexpr uint32_t structSizeMask = 0x0000FFFF;
725
// In the method list header, the last 2 bytes are the flags for the struct
726
static constexpr uint32_t structFlagsMask = 0xFFFF0000;
727
// Relative method lists have 4 byte alignment as all data in the InputSection
728
// is 4 byte
729
static constexpr uint32_t relativeOffsetSize = sizeof(uint32_t);
730
731
// The output size of the __objc_methlist section, computed during finalize()
732
uint32_t sectionSize = 0;
733
std::vector<ConcatInputSection *> inputs;
734
};
735
736
// Chained fixups are a replacement for classic dyld opcodes. In this format,
737
// most of the metadata necessary for binding symbols and rebasing addresses is
738
// stored directly in the memory location that will have the fixup applied.
739
//
740
// The fixups form singly linked lists; each one covering a single page in
741
// memory. The __LINKEDIT,__chainfixups section stores the page offset of the
742
// first fixup of each page; the rest can be found by walking the chain using
743
// the offset that is embedded in each entry.
744
//
745
// This setup allows pages to be relocated lazily at page-in time and without
746
// being dirtied. The kernel can discard and load them again as needed. This
747
// technique, called page-in linking, was introduced in macOS 13.
748
//
749
// The benefits of this format are:
750
// - smaller __LINKEDIT segment, as most of the fixup information is stored in
751
// the data segment
752
// - faster startup, since not all relocations need to be done upfront
753
// - slightly lower memory usage, as fewer pages are dirtied
754
//
755
// Userspace x86_64 and arm64 binaries have two types of fixup entries:
756
// - Rebase entries contain an absolute address, to which the object's load
757
// address will be added to get the final value. This is used for loading
758
// the address of a symbol defined in the same binary.
759
// - Binding entries are mostly used for symbols imported from other dylibs,
760
// but for weakly bound and interposable symbols as well. They are looked up
761
// by a (symbol name, library) pair stored in __chainfixups. This import
762
// entry also encodes whether the import is weak (i.e. if the symbol is
763
// missing, it should be set to null instead of producing a load error).
764
// The fixup encodes an ordinal associated with the import, and an optional
765
// addend.
766
//
767
// The entries are tightly packed 64-bit bitfields. One of the bits specifies
768
// which kind of fixup to interpret them as.
769
//
770
// LLD generates the fixup data in 5 stages:
771
// 1. While scanning relocations, we make a note of each location that needs
772
// a fixup by calling addRebase() or addBinding(). During this, we assign
773
// a unique ordinal for each (symbol name, library, addend) import tuple.
774
// 2. After addresses have been assigned to all sections, and thus the memory
775
// layout of the linked image is final; finalizeContents() is called. Here,
776
// the page offsets of the chain start entries are calculated.
777
// 3. ChainedFixupsSection::writeTo() writes the page start offsets and the
778
// imports table to the output file.
779
// 4. Each section's fixup entries are encoded and written to disk in
780
// ConcatInputSection::writeTo(), but without writing the offsets that form
781
// the chain.
782
// 5. Finally, each page's (which might correspond to multiple sections)
783
// fixups are linked together in Writer::buildFixupChains().
784
class ChainedFixupsSection final : public LinkEditSection {
785
public:
786
ChainedFixupsSection();
787
void finalizeContents() override;
788
uint64_t getRawSize() const override { return size; }
789
bool isNeeded() const override;
790
void writeTo(uint8_t *buf) const override;
791
792
void addRebase(const InputSection *isec, uint64_t offset) {
793
locations.emplace_back(isec, offset);
794
}
795
void addBinding(const Symbol *dysym, const InputSection *isec,
796
uint64_t offset, int64_t addend = 0);
797
798
void setHasNonWeakDefinition() { hasNonWeakDef = true; }
799
800
// Returns an (ordinal, inline addend) tuple used by dyld_chained_ptr_64_bind.
801
std::pair<uint32_t, uint8_t> getBinding(const Symbol *sym,
802
int64_t addend) const;
803
804
const std::vector<Location> &getLocations() const { return locations; }
805
806
bool hasWeakBinding() const { return hasWeakBind; }
807
bool hasNonWeakDefinition() const { return hasNonWeakDef; }
808
809
private:
810
// Location::offset initially stores the offset within an InputSection, but
811
// contains output segment offsets after finalizeContents().
812
std::vector<Location> locations;
813
// (target symbol, addend) => import ordinal
814
llvm::MapVector<std::pair<const Symbol *, int64_t>, uint32_t> bindings;
815
816
struct SegmentInfo {
817
SegmentInfo(const OutputSegment *oseg) : oseg(oseg) {}
818
819
const OutputSegment *oseg;
820
// (page index, fixup starts offset)
821
llvm::SmallVector<std::pair<uint16_t, uint16_t>> pageStarts;
822
823
size_t getSize() const;
824
size_t writeTo(uint8_t *buf) const;
825
};
826
llvm::SmallVector<SegmentInfo, 4> fixupSegments;
827
828
size_t symtabSize = 0;
829
size_t size = 0;
830
831
bool needsAddend = false;
832
bool needsLargeAddend = false;
833
bool hasWeakBind = false;
834
bool hasNonWeakDef = false;
835
llvm::MachO::ChainedImportFormat importFormat;
836
};
837
838
void writeChainedRebase(uint8_t *buf, uint64_t targetVA);
839
void writeChainedFixup(uint8_t *buf, const Symbol *sym, int64_t addend);
840
841
struct InStruct {
842
const uint8_t *bufferStart = nullptr;
843
MachHeaderSection *header = nullptr;
844
CStringSection *cStringSection = nullptr;
845
DeduplicatedCStringSection *objcMethnameSection = nullptr;
846
WordLiteralSection *wordLiteralSection = nullptr;
847
RebaseSection *rebase = nullptr;
848
BindingSection *binding = nullptr;
849
WeakBindingSection *weakBinding = nullptr;
850
LazyBindingSection *lazyBinding = nullptr;
851
ExportSection *exports = nullptr;
852
GotSection *got = nullptr;
853
TlvPointerSection *tlvPointers = nullptr;
854
LazyPointerSection *lazyPointers = nullptr;
855
StubsSection *stubs = nullptr;
856
StubHelperSection *stubHelper = nullptr;
857
ObjCStubsSection *objcStubs = nullptr;
858
UnwindInfoSection *unwindInfo = nullptr;
859
ObjCImageInfoSection *objCImageInfo = nullptr;
860
ConcatInputSection *imageLoaderCache = nullptr;
861
InitOffsetsSection *initOffsets = nullptr;
862
ObjCMethListSection *objcMethList = nullptr;
863
ChainedFixupsSection *chainedFixups = nullptr;
864
};
865
866
extern InStruct in;
867
extern std::vector<SyntheticSection *> syntheticSections;
868
869
void createSyntheticSymbols();
870
871
} // namespace lld::macho
872
873
#endif
874
875