Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/lld/MachO/InputFiles.h
34870 views
1
//===- InputFiles.h ---------------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#ifndef LLD_MACHO_INPUT_FILES_H
10
#define LLD_MACHO_INPUT_FILES_H
11
12
#include "MachOStructs.h"
13
#include "Target.h"
14
15
#include "lld/Common/DWARF.h"
16
#include "lld/Common/LLVM.h"
17
#include "lld/Common/Memory.h"
18
#include "llvm/ADT/CachedHashString.h"
19
#include "llvm/ADT/DenseSet.h"
20
#include "llvm/ADT/SetVector.h"
21
#include "llvm/BinaryFormat/MachO.h"
22
#include "llvm/DebugInfo/DWARF/DWARFUnit.h"
23
#include "llvm/Object/Archive.h"
24
#include "llvm/Support/MemoryBuffer.h"
25
#include "llvm/Support/Threading.h"
26
#include "llvm/TextAPI/TextAPIReader.h"
27
28
#include <vector>
29
30
namespace llvm {
31
namespace lto {
32
class InputFile;
33
} // namespace lto
34
namespace MachO {
35
class InterfaceFile;
36
} // namespace MachO
37
class TarWriter;
38
} // namespace llvm
39
40
namespace lld {
41
namespace macho {
42
43
struct PlatformInfo;
44
class ConcatInputSection;
45
class Symbol;
46
class Defined;
47
class AliasSymbol;
48
struct Reloc;
49
enum class RefState : uint8_t;
50
51
// If --reproduce option is given, all input files are written
52
// to this tar archive.
53
extern std::unique_ptr<llvm::TarWriter> tar;
54
55
// If .subsections_via_symbols is set, each InputSection will be split along
56
// symbol boundaries. The field offset represents the offset of the subsection
57
// from the start of the original pre-split InputSection.
58
struct Subsection {
59
uint64_t offset = 0;
60
InputSection *isec = nullptr;
61
};
62
63
using Subsections = std::vector<Subsection>;
64
class InputFile;
65
66
class Section {
67
public:
68
InputFile *file;
69
StringRef segname;
70
StringRef name;
71
uint32_t flags;
72
uint64_t addr;
73
Subsections subsections;
74
75
Section(InputFile *file, StringRef segname, StringRef name, uint32_t flags,
76
uint64_t addr)
77
: file(file), segname(segname), name(name), flags(flags), addr(addr) {}
78
// Ensure pointers to Sections are never invalidated.
79
Section(const Section &) = delete;
80
Section &operator=(const Section &) = delete;
81
Section(Section &&) = delete;
82
Section &operator=(Section &&) = delete;
83
84
private:
85
// Whether we have already split this section into individual subsections.
86
// For sections that cannot be split (e.g. literal sections), this is always
87
// false.
88
bool doneSplitting = false;
89
friend class ObjFile;
90
};
91
92
// Represents a call graph profile edge.
93
struct CallGraphEntry {
94
// The index of the caller in the symbol table.
95
uint32_t fromIndex;
96
// The index of the callee in the symbol table.
97
uint32_t toIndex;
98
// Number of calls from callee to caller in the profile.
99
uint64_t count;
100
101
CallGraphEntry(uint32_t fromIndex, uint32_t toIndex, uint64_t count)
102
: fromIndex(fromIndex), toIndex(toIndex), count(count) {}
103
};
104
105
class InputFile {
106
public:
107
enum Kind {
108
ObjKind,
109
OpaqueKind,
110
DylibKind,
111
ArchiveKind,
112
BitcodeKind,
113
};
114
115
virtual ~InputFile() = default;
116
Kind kind() const { return fileKind; }
117
StringRef getName() const { return name; }
118
static void resetIdCount() { idCount = 0; }
119
120
MemoryBufferRef mb;
121
122
std::vector<Symbol *> symbols;
123
std::vector<Section *> sections;
124
ArrayRef<uint8_t> objCImageInfo;
125
126
// If not empty, this stores the name of the archive containing this file.
127
// We use this string for creating error messages.
128
std::string archiveName;
129
130
// Provides an easy way to sort InputFiles deterministically.
131
const int id;
132
133
// True if this is a lazy ObjFile or BitcodeFile.
134
bool lazy = false;
135
136
protected:
137
InputFile(Kind kind, MemoryBufferRef mb, bool lazy = false)
138
: mb(mb), id(idCount++), lazy(lazy), fileKind(kind),
139
name(mb.getBufferIdentifier()) {}
140
141
InputFile(Kind, const llvm::MachO::InterfaceFile &);
142
143
// If true, this input's arch is compatible with target.
144
bool compatArch = true;
145
146
private:
147
const Kind fileKind;
148
const StringRef name;
149
150
static int idCount;
151
};
152
153
struct FDE {
154
uint32_t funcLength;
155
Symbol *personality;
156
InputSection *lsda;
157
};
158
159
// .o file
160
class ObjFile final : public InputFile {
161
public:
162
ObjFile(MemoryBufferRef mb, uint32_t modTime, StringRef archiveName,
163
bool lazy = false, bool forceHidden = false, bool compatArch = true,
164
bool builtFromBitcode = false);
165
ArrayRef<llvm::MachO::data_in_code_entry> getDataInCode() const;
166
ArrayRef<uint8_t> getOptimizationHints() const;
167
template <class LP> void parse();
168
template <class LP>
169
void parseLinkerOptions(llvm::SmallVectorImpl<StringRef> &LinkerOptions);
170
171
static bool classof(const InputFile *f) { return f->kind() == ObjKind; }
172
173
std::string sourceFile() const;
174
// Parses line table information for diagnostics. compileUnit should be used
175
// for other purposes.
176
lld::DWARFCache *getDwarf();
177
178
llvm::DWARFUnit *compileUnit = nullptr;
179
std::unique_ptr<lld::DWARFCache> dwarfCache;
180
Section *addrSigSection = nullptr;
181
const uint32_t modTime;
182
bool forceHidden;
183
bool builtFromBitcode;
184
std::vector<ConcatInputSection *> debugSections;
185
std::vector<CallGraphEntry> callGraph;
186
llvm::DenseMap<ConcatInputSection *, FDE> fdes;
187
std::vector<AliasSymbol *> aliases;
188
189
private:
190
llvm::once_flag initDwarf;
191
template <class LP> void parseLazy();
192
template <class SectionHeader> void parseSections(ArrayRef<SectionHeader>);
193
template <class LP>
194
void parseSymbols(ArrayRef<typename LP::section> sectionHeaders,
195
ArrayRef<typename LP::nlist> nList, const char *strtab,
196
bool subsectionsViaSymbols);
197
template <class NList>
198
Symbol *parseNonSectionSymbol(const NList &sym, const char *strtab);
199
template <class SectionHeader>
200
void parseRelocations(ArrayRef<SectionHeader> sectionHeaders,
201
const SectionHeader &, Section &);
202
void parseDebugInfo();
203
void splitEhFrames(ArrayRef<uint8_t> dataArr, Section &ehFrameSection);
204
void registerCompactUnwind(Section &compactUnwindSection);
205
void registerEhFrames(Section &ehFrameSection);
206
};
207
208
// command-line -sectcreate file
209
class OpaqueFile final : public InputFile {
210
public:
211
OpaqueFile(MemoryBufferRef mb, StringRef segName, StringRef sectName);
212
static bool classof(const InputFile *f) { return f->kind() == OpaqueKind; }
213
};
214
215
// .dylib or .tbd file
216
class DylibFile final : public InputFile {
217
public:
218
// Mach-O dylibs can re-export other dylibs as sub-libraries, meaning that the
219
// symbols in those sub-libraries will be available under the umbrella
220
// library's namespace. Those sub-libraries can also have their own
221
// re-exports. When loading a re-exported dylib, `umbrella` should be set to
222
// the root dylib to ensure symbols in the child library are correctly bound
223
// to the root. On the other hand, if a dylib is being directly loaded
224
// (through an -lfoo flag), then `umbrella` should be a nullptr.
225
explicit DylibFile(MemoryBufferRef mb, DylibFile *umbrella,
226
bool isBundleLoader, bool explicitlyLinked);
227
explicit DylibFile(const llvm::MachO::InterfaceFile &interface,
228
DylibFile *umbrella, bool isBundleLoader,
229
bool explicitlyLinked);
230
explicit DylibFile(DylibFile *umbrella);
231
232
void parseLoadCommands(MemoryBufferRef mb);
233
void parseReexports(const llvm::MachO::InterfaceFile &interface);
234
bool isReferenced() const { return numReferencedSymbols > 0; }
235
bool isExplicitlyLinked() const;
236
void setExplicitlyLinked() { explicitlyLinked = true; }
237
238
static bool classof(const InputFile *f) { return f->kind() == DylibKind; }
239
240
StringRef installName;
241
DylibFile *exportingFile = nullptr;
242
DylibFile *umbrella;
243
SmallVector<StringRef, 2> rpaths;
244
uint32_t compatibilityVersion = 0;
245
uint32_t currentVersion = 0;
246
int64_t ordinal = 0; // Ordinal numbering starts from 1, so 0 is a sentinel
247
unsigned numReferencedSymbols = 0;
248
RefState refState;
249
bool reexport = false;
250
bool forceNeeded = false;
251
bool forceWeakImport = false;
252
bool deadStrippable = false;
253
254
private:
255
bool explicitlyLinked = false; // Access via isExplicitlyLinked().
256
257
public:
258
// An executable can be used as a bundle loader that will load the output
259
// file being linked, and that contains symbols referenced, but not
260
// implemented in the bundle. When used like this, it is very similar
261
// to a dylib, so we've used the same class to represent it.
262
bool isBundleLoader;
263
264
// Synthetic Dylib objects created by $ld$previous symbols in this dylib.
265
// Usually empty. These synthetic dylibs won't have synthetic dylibs
266
// themselves.
267
SmallVector<DylibFile *, 2> extraDylibs;
268
269
private:
270
DylibFile *getSyntheticDylib(StringRef installName, uint32_t currentVersion,
271
uint32_t compatVersion);
272
273
bool handleLDSymbol(StringRef originalName);
274
void handleLDPreviousSymbol(StringRef name, StringRef originalName);
275
void handleLDInstallNameSymbol(StringRef name, StringRef originalName);
276
void handleLDHideSymbol(StringRef name, StringRef originalName);
277
void checkAppExtensionSafety(bool dylibIsAppExtensionSafe) const;
278
void parseExportedSymbols(uint32_t offset, uint32_t size);
279
void loadReexport(StringRef path, DylibFile *umbrella,
280
const llvm::MachO::InterfaceFile *currentTopLevelTapi);
281
282
llvm::DenseSet<llvm::CachedHashStringRef> hiddenSymbols;
283
};
284
285
// .a file
286
class ArchiveFile final : public InputFile {
287
public:
288
explicit ArchiveFile(std::unique_ptr<llvm::object::Archive> &&file,
289
bool forceHidden);
290
void addLazySymbols();
291
void fetch(const llvm::object::Archive::Symbol &);
292
// LLD normally doesn't use Error for error-handling, but the underlying
293
// Archive library does, so this is the cleanest way to wrap it.
294
Error fetch(const llvm::object::Archive::Child &, StringRef reason);
295
const llvm::object::Archive &getArchive() const { return *file; };
296
static bool classof(const InputFile *f) { return f->kind() == ArchiveKind; }
297
298
private:
299
std::unique_ptr<llvm::object::Archive> file;
300
// Keep track of children fetched from the archive by tracking
301
// which address offsets have been fetched already.
302
llvm::DenseSet<uint64_t> seen;
303
// Load all symbols with hidden visibility (-load_hidden).
304
bool forceHidden;
305
};
306
307
class BitcodeFile final : public InputFile {
308
public:
309
explicit BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
310
uint64_t offsetInArchive, bool lazy = false,
311
bool forceHidden = false, bool compatArch = true);
312
static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
313
void parse();
314
315
std::unique_ptr<llvm::lto::InputFile> obj;
316
bool forceHidden;
317
318
private:
319
void parseLazy();
320
};
321
322
extern llvm::SetVector<InputFile *> inputFiles;
323
extern llvm::DenseMap<llvm::CachedHashStringRef, MemoryBufferRef> cachedReads;
324
extern llvm::SmallVector<StringRef> unprocessedLCLinkerOptions;
325
326
std::optional<MemoryBufferRef> readFile(StringRef path);
327
328
void extract(InputFile &file, StringRef reason);
329
330
namespace detail {
331
332
template <class CommandType, class... Types>
333
std::vector<const CommandType *>
334
findCommands(const void *anyHdr, size_t maxCommands, Types... types) {
335
std::vector<const CommandType *> cmds;
336
std::initializer_list<uint32_t> typesList{types...};
337
const auto *hdr = reinterpret_cast<const llvm::MachO::mach_header *>(anyHdr);
338
const uint8_t *p =
339
reinterpret_cast<const uint8_t *>(hdr) + target->headerSize;
340
for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) {
341
auto *cmd = reinterpret_cast<const CommandType *>(p);
342
if (llvm::is_contained(typesList, cmd->cmd)) {
343
cmds.push_back(cmd);
344
if (cmds.size() == maxCommands)
345
return cmds;
346
}
347
p += cmd->cmdsize;
348
}
349
return cmds;
350
}
351
352
} // namespace detail
353
354
// anyHdr should be a pointer to either mach_header or mach_header_64
355
template <class CommandType = llvm::MachO::load_command, class... Types>
356
const CommandType *findCommand(const void *anyHdr, Types... types) {
357
std::vector<const CommandType *> cmds =
358
detail::findCommands<CommandType>(anyHdr, 1, types...);
359
return cmds.size() ? cmds[0] : nullptr;
360
}
361
362
template <class CommandType = llvm::MachO::load_command, class... Types>
363
std::vector<const CommandType *> findCommands(const void *anyHdr,
364
Types... types) {
365
return detail::findCommands<CommandType>(anyHdr, 0, types...);
366
}
367
368
std::string replaceThinLTOSuffix(StringRef path);
369
} // namespace macho
370
371
std::string toString(const macho::InputFile *file);
372
std::string toString(const macho::Section &);
373
} // namespace lld
374
375
#endif
376
377