Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/lld/MachO/InputSection.h
34870 views
1
//===- InputSection.h -------------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#ifndef LLD_MACHO_INPUT_SECTION_H
10
#define LLD_MACHO_INPUT_SECTION_H
11
12
#include "Config.h"
13
#include "Relocations.h"
14
#include "Symbols.h"
15
16
#include "lld/Common/LLVM.h"
17
#include "lld/Common/Memory.h"
18
#include "llvm/ADT/ArrayRef.h"
19
#include "llvm/ADT/BitVector.h"
20
#include "llvm/ADT/CachedHashString.h"
21
#include "llvm/ADT/TinyPtrVector.h"
22
#include "llvm/BinaryFormat/MachO.h"
23
24
namespace lld {
25
namespace macho {
26
27
class InputFile;
28
class OutputSection;
29
30
class InputSection {
31
public:
32
enum Kind : uint8_t {
33
ConcatKind,
34
CStringLiteralKind,
35
WordLiteralKind,
36
};
37
38
Kind kind() const { return sectionKind; }
39
virtual ~InputSection() = default;
40
virtual uint64_t getSize() const { return data.size(); }
41
virtual bool empty() const { return data.empty(); }
42
InputFile *getFile() const { return section.file; }
43
StringRef getName() const { return section.name; }
44
StringRef getSegName() const { return section.segname; }
45
uint32_t getFlags() const { return section.flags; }
46
uint64_t getFileSize() const;
47
// Translates \p off -- an offset relative to this InputSection -- into an
48
// offset from the beginning of its parent OutputSection.
49
virtual uint64_t getOffset(uint64_t off) const = 0;
50
// The offset from the beginning of the file.
51
uint64_t getVA(uint64_t off) const;
52
// Return a user-friendly string for use in diagnostics.
53
// Format: /path/to/object.o:(symbol _func+0x123)
54
std::string getLocation(uint64_t off) const;
55
// Return the source line corresponding to an address, or the empty string.
56
// Format: Source.cpp:123 (/path/to/Source.cpp:123)
57
std::string getSourceLocation(uint64_t off) const;
58
// Return the relocation at \p off, if it exists. This does a linear search.
59
const Reloc *getRelocAt(uint32_t off) const;
60
// Whether the data at \p off in this InputSection is live.
61
virtual bool isLive(uint64_t off) const = 0;
62
virtual void markLive(uint64_t off) = 0;
63
virtual InputSection *canonical() { return this; }
64
virtual const InputSection *canonical() const { return this; }
65
66
protected:
67
InputSection(Kind kind, const Section &section, ArrayRef<uint8_t> data,
68
uint32_t align)
69
: sectionKind(kind), keepUnique(false), hasAltEntry(false), align(align),
70
data(data), section(section) {}
71
72
InputSection(const InputSection &rhs)
73
: sectionKind(rhs.sectionKind), keepUnique(false), hasAltEntry(false),
74
align(rhs.align), data(rhs.data), section(rhs.section) {}
75
76
Kind sectionKind;
77
78
public:
79
// is address assigned?
80
bool isFinal = false;
81
// keep the address of the symbol(s) in this section unique in the final
82
// binary ?
83
bool keepUnique : 1;
84
// Does this section have symbols at offsets other than zero? (NOTE: only
85
// applies to ConcatInputSections.)
86
bool hasAltEntry : 1;
87
uint32_t align = 1;
88
89
OutputSection *parent = nullptr;
90
ArrayRef<uint8_t> data;
91
std::vector<Reloc> relocs;
92
// The symbols that belong to this InputSection, sorted by value. With
93
// .subsections_via_symbols, there is typically only one element here.
94
llvm::TinyPtrVector<Defined *> symbols;
95
96
const Section &section;
97
98
protected:
99
const Defined *getContainingSymbol(uint64_t off) const;
100
};
101
102
// ConcatInputSections are combined into (Concat)OutputSections through simple
103
// concatenation, in contrast with literal sections which may have their
104
// contents merged before output.
105
class ConcatInputSection final : public InputSection {
106
public:
107
ConcatInputSection(const Section &section, ArrayRef<uint8_t> data,
108
uint32_t align = 1)
109
: InputSection(ConcatKind, section, data, align) {}
110
111
uint64_t getOffset(uint64_t off) const override { return outSecOff + off; }
112
uint64_t getVA() const { return InputSection::getVA(0); }
113
// ConcatInputSections are entirely live or dead, so the offset is irrelevant.
114
bool isLive(uint64_t off) const override { return live; }
115
void markLive(uint64_t off) override { live = true; }
116
bool isCoalescedWeak() const { return wasCoalesced && symbols.empty(); }
117
bool shouldOmitFromOutput() const { return !live || isCoalescedWeak(); }
118
void writeTo(uint8_t *buf);
119
120
void foldIdentical(ConcatInputSection *redundant);
121
ConcatInputSection *canonical() override {
122
return replacement ? replacement : this;
123
}
124
const InputSection *canonical() const override {
125
return replacement ? replacement : this;
126
}
127
128
static bool classof(const InputSection *isec) {
129
return isec->kind() == ConcatKind;
130
}
131
132
// Points to the surviving section after this one is folded by ICF
133
ConcatInputSection *replacement = nullptr;
134
// Equivalence-class ID for ICF
135
uint32_t icfEqClass[2] = {0, 0};
136
137
// With subsections_via_symbols, most symbols have their own InputSection,
138
// and for weak symbols (e.g. from inline functions), only the
139
// InputSection from one translation unit will make it to the output,
140
// while all copies in other translation units are coalesced into the
141
// first and not copied to the output.
142
bool wasCoalesced = false;
143
bool live = !config->deadStrip;
144
bool hasCallSites = false;
145
// This variable has two usages. Initially, it represents the input order.
146
// After assignAddresses is called, it represents the offset from the
147
// beginning of the output section this section was assigned to.
148
uint64_t outSecOff = 0;
149
};
150
151
// Initialize a fake InputSection that does not belong to any InputFile.
152
// The created ConcatInputSection will always have 'live=true'
153
ConcatInputSection *makeSyntheticInputSection(StringRef segName,
154
StringRef sectName,
155
uint32_t flags = 0,
156
ArrayRef<uint8_t> data = {},
157
uint32_t align = 1);
158
159
// Helper functions to make it easy to sprinkle asserts.
160
161
inline bool shouldOmitFromOutput(InputSection *isec) {
162
return isa<ConcatInputSection>(isec) &&
163
cast<ConcatInputSection>(isec)->shouldOmitFromOutput();
164
}
165
166
inline bool isCoalescedWeak(InputSection *isec) {
167
return isa<ConcatInputSection>(isec) &&
168
cast<ConcatInputSection>(isec)->isCoalescedWeak();
169
}
170
171
// We allocate a lot of these and binary search on them, so they should be as
172
// compact as possible. Hence the use of 31 rather than 64 bits for the hash.
173
struct StringPiece {
174
// Offset from the start of the containing input section.
175
uint32_t inSecOff;
176
uint32_t live : 1;
177
// Only set if deduplicating literals
178
uint32_t hash : 31;
179
// Offset from the start of the containing output section.
180
uint64_t outSecOff = 0;
181
182
StringPiece(uint64_t off, uint32_t hash)
183
: inSecOff(off), live(!config->deadStrip), hash(hash) {}
184
};
185
186
static_assert(sizeof(StringPiece) == 16, "StringPiece is too big!");
187
188
// CStringInputSections are composed of multiple null-terminated string
189
// literals, which we represent using StringPieces. These literals can be
190
// deduplicated and tail-merged, so translating offsets between the input and
191
// outputs sections is more complicated.
192
//
193
// NOTE: One significant difference between LLD and ld64 is that we merge all
194
// cstring literals, even those referenced directly by non-private symbols.
195
// ld64 is more conservative and does not do that. This was mostly done for
196
// implementation simplicity; if we find programs that need the more
197
// conservative behavior we can certainly implement that.
198
class CStringInputSection final : public InputSection {
199
public:
200
CStringInputSection(const Section &section, ArrayRef<uint8_t> data,
201
uint32_t align, bool dedupLiterals)
202
: InputSection(CStringLiteralKind, section, data, align),
203
deduplicateLiterals(dedupLiterals) {}
204
205
uint64_t getOffset(uint64_t off) const override;
206
bool isLive(uint64_t off) const override { return getStringPiece(off).live; }
207
void markLive(uint64_t off) override { getStringPiece(off).live = true; }
208
// Find the StringPiece that contains this offset.
209
StringPiece &getStringPiece(uint64_t off);
210
const StringPiece &getStringPiece(uint64_t off) const;
211
// Split at each null byte.
212
void splitIntoPieces();
213
214
LLVM_ATTRIBUTE_ALWAYS_INLINE
215
StringRef getStringRef(size_t i) const {
216
size_t begin = pieces[i].inSecOff;
217
// The endpoint should be *at* the null terminator, not after. This matches
218
// the behavior of StringRef(const char *Str).
219
size_t end =
220
((pieces.size() - 1 == i) ? data.size() : pieces[i + 1].inSecOff) - 1;
221
return toStringRef(data.slice(begin, end - begin));
222
}
223
224
StringRef getStringRefAtOffset(uint64_t off) const {
225
return getStringRef(getStringPieceIndex(off));
226
}
227
228
// Returns i'th piece as a CachedHashStringRef. This function is very hot when
229
// string merging is enabled, so we want to inline.
230
LLVM_ATTRIBUTE_ALWAYS_INLINE
231
llvm::CachedHashStringRef getCachedHashStringRef(size_t i) const {
232
assert(deduplicateLiterals);
233
return {getStringRef(i), pieces[i].hash};
234
}
235
236
static bool classof(const InputSection *isec) {
237
return isec->kind() == CStringLiteralKind;
238
}
239
240
bool deduplicateLiterals = false;
241
std::vector<StringPiece> pieces;
242
243
private:
244
size_t getStringPieceIndex(uint64_t off) const;
245
};
246
247
class WordLiteralInputSection final : public InputSection {
248
public:
249
WordLiteralInputSection(const Section &section, ArrayRef<uint8_t> data,
250
uint32_t align);
251
uint64_t getOffset(uint64_t off) const override;
252
bool isLive(uint64_t off) const override {
253
return live[off >> power2LiteralSize];
254
}
255
void markLive(uint64_t off) override {
256
live[off >> power2LiteralSize] = true;
257
}
258
259
static bool classof(const InputSection *isec) {
260
return isec->kind() == WordLiteralKind;
261
}
262
263
private:
264
unsigned power2LiteralSize;
265
// The liveness of data[off] is tracked by live[off >> power2LiteralSize].
266
llvm::BitVector live;
267
};
268
269
inline uint8_t sectionType(uint32_t flags) {
270
return flags & llvm::MachO::SECTION_TYPE;
271
}
272
273
inline bool isZeroFill(uint32_t flags) {
274
return llvm::MachO::isVirtualSection(sectionType(flags));
275
}
276
277
inline bool isThreadLocalVariables(uint32_t flags) {
278
return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_VARIABLES;
279
}
280
281
// These sections contain the data for initializing thread-local variables.
282
inline bool isThreadLocalData(uint32_t flags) {
283
return sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_REGULAR ||
284
sectionType(flags) == llvm::MachO::S_THREAD_LOCAL_ZEROFILL;
285
}
286
287
inline bool isDebugSection(uint32_t flags) {
288
return (flags & llvm::MachO::SECTION_ATTRIBUTES_USR) ==
289
llvm::MachO::S_ATTR_DEBUG;
290
}
291
292
inline bool isWordLiteralSection(uint32_t flags) {
293
return sectionType(flags) == llvm::MachO::S_4BYTE_LITERALS ||
294
sectionType(flags) == llvm::MachO::S_8BYTE_LITERALS ||
295
sectionType(flags) == llvm::MachO::S_16BYTE_LITERALS;
296
}
297
298
bool isCodeSection(const InputSection *);
299
bool isCfStringSection(const InputSection *);
300
bool isClassRefsSection(const InputSection *);
301
bool isSelRefsSection(const InputSection *);
302
bool isEhFrameSection(const InputSection *);
303
bool isGccExceptTabSection(const InputSection *);
304
305
extern std::vector<ConcatInputSection *> inputSections;
306
// This is used as a counter for specyfing input order for input sections
307
extern int inputSectionsOrder;
308
309
namespace section_names {
310
311
constexpr const char authGot[] = "__auth_got";
312
constexpr const char authPtr[] = "__auth_ptr";
313
constexpr const char binding[] = "__binding";
314
constexpr const char bitcodeBundle[] = "__bundle";
315
constexpr const char cString[] = "__cstring";
316
constexpr const char cfString[] = "__cfstring";
317
constexpr const char cgProfile[] = "__cg_profile";
318
constexpr const char chainFixups[] = "__chainfixups";
319
constexpr const char codeSignature[] = "__code_signature";
320
constexpr const char common[] = "__common";
321
constexpr const char compactUnwind[] = "__compact_unwind";
322
constexpr const char data[] = "__data";
323
constexpr const char debugAbbrev[] = "__debug_abbrev";
324
constexpr const char debugInfo[] = "__debug_info";
325
constexpr const char debugLine[] = "__debug_line";
326
constexpr const char debugStr[] = "__debug_str";
327
constexpr const char debugStrOffs[] = "__debug_str_offs";
328
constexpr const char ehFrame[] = "__eh_frame";
329
constexpr const char gccExceptTab[] = "__gcc_except_tab";
330
constexpr const char export_[] = "__export";
331
constexpr const char dataInCode[] = "__data_in_code";
332
constexpr const char functionStarts[] = "__func_starts";
333
constexpr const char got[] = "__got";
334
constexpr const char header[] = "__mach_header";
335
constexpr const char indirectSymbolTable[] = "__ind_sym_tab";
336
constexpr const char initOffsets[] = "__init_offsets";
337
constexpr const char const_[] = "__const";
338
constexpr const char lazySymbolPtr[] = "__la_symbol_ptr";
339
constexpr const char lazyBinding[] = "__lazy_binding";
340
constexpr const char literals[] = "__literals";
341
constexpr const char moduleInitFunc[] = "__mod_init_func";
342
constexpr const char moduleTermFunc[] = "__mod_term_func";
343
constexpr const char nonLazySymbolPtr[] = "__nl_symbol_ptr";
344
constexpr const char objcCatList[] = "__objc_catlist";
345
constexpr const char objcClassList[] = "__objc_classlist";
346
constexpr const char objcMethList[] = "__objc_methlist";
347
constexpr const char objcClassRefs[] = "__objc_classrefs";
348
constexpr const char objcConst[] = "__objc_const";
349
constexpr const char objCImageInfo[] = "__objc_imageinfo";
350
constexpr const char objcStubs[] = "__objc_stubs";
351
constexpr const char objcSelrefs[] = "__objc_selrefs";
352
constexpr const char objcMethname[] = "__objc_methname";
353
constexpr const char objcNonLazyCatList[] = "__objc_nlcatlist";
354
constexpr const char objcNonLazyClassList[] = "__objc_nlclslist";
355
constexpr const char objcProtoList[] = "__objc_protolist";
356
constexpr const char pageZero[] = "__pagezero";
357
constexpr const char pointers[] = "__pointers";
358
constexpr const char rebase[] = "__rebase";
359
constexpr const char staticInit[] = "__StaticInit";
360
constexpr const char stringTable[] = "__string_table";
361
constexpr const char stubHelper[] = "__stub_helper";
362
constexpr const char stubs[] = "__stubs";
363
constexpr const char swift[] = "__swift";
364
constexpr const char symbolTable[] = "__symbol_table";
365
constexpr const char textCoalNt[] = "__textcoal_nt";
366
constexpr const char text[] = "__text";
367
constexpr const char threadPtrs[] = "__thread_ptrs";
368
constexpr const char threadVars[] = "__thread_vars";
369
constexpr const char unwindInfo[] = "__unwind_info";
370
constexpr const char weakBinding[] = "__weak_binding";
371
constexpr const char zeroFill[] = "__zerofill";
372
constexpr const char addrSig[] = "__llvm_addrsig";
373
374
} // namespace section_names
375
376
void addInputSection(InputSection *inputSection);
377
} // namespace macho
378
379
std::string toString(const macho::InputSection *);
380
381
} // namespace lld
382
383
#endif
384
385