Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/lld/ELF/InputFiles.cpp
34869 views
1
//===- InputFiles.cpp -----------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "InputFiles.h"
10
#include "Config.h"
11
#include "DWARF.h"
12
#include "Driver.h"
13
#include "InputSection.h"
14
#include "LinkerScript.h"
15
#include "SymbolTable.h"
16
#include "Symbols.h"
17
#include "SyntheticSections.h"
18
#include "Target.h"
19
#include "lld/Common/CommonLinkerContext.h"
20
#include "lld/Common/DWARF.h"
21
#include "llvm/ADT/CachedHashString.h"
22
#include "llvm/ADT/STLExtras.h"
23
#include "llvm/LTO/LTO.h"
24
#include "llvm/Object/IRObjectFile.h"
25
#include "llvm/Support/ARMAttributeParser.h"
26
#include "llvm/Support/ARMBuildAttributes.h"
27
#include "llvm/Support/Endian.h"
28
#include "llvm/Support/FileSystem.h"
29
#include "llvm/Support/Path.h"
30
#include "llvm/Support/RISCVAttributeParser.h"
31
#include "llvm/Support/TarWriter.h"
32
#include "llvm/Support/TimeProfiler.h"
33
#include "llvm/Support/raw_ostream.h"
34
#include <optional>
35
36
using namespace llvm;
37
using namespace llvm::ELF;
38
using namespace llvm::object;
39
using namespace llvm::sys;
40
using namespace llvm::sys::fs;
41
using namespace llvm::support::endian;
42
using namespace lld;
43
using namespace lld::elf;
44
45
// This function is explicitly instantiated in ARM.cpp, don't do it here to
46
// avoid warnings with MSVC.
47
extern template void ObjFile<ELF32LE>::importCmseSymbols();
48
extern template void ObjFile<ELF32BE>::importCmseSymbols();
49
extern template void ObjFile<ELF64LE>::importCmseSymbols();
50
extern template void ObjFile<ELF64BE>::importCmseSymbols();
51
52
bool InputFile::isInGroup;
53
uint32_t InputFile::nextGroupId;
54
55
std::unique_ptr<TarWriter> elf::tar;
56
57
// Returns "<internal>", "foo.a(bar.o)" or "baz.o".
58
std::string lld::toString(const InputFile *f) {
59
static std::mutex mu;
60
if (!f)
61
return "<internal>";
62
63
{
64
std::lock_guard<std::mutex> lock(mu);
65
if (f->toStringCache.empty()) {
66
if (f->archiveName.empty())
67
f->toStringCache = f->getName();
68
else
69
(f->archiveName + "(" + f->getName() + ")").toVector(f->toStringCache);
70
}
71
}
72
return std::string(f->toStringCache);
73
}
74
75
static ELFKind getELFKind(MemoryBufferRef mb, StringRef archiveName) {
76
unsigned char size;
77
unsigned char endian;
78
std::tie(size, endian) = getElfArchType(mb.getBuffer());
79
80
auto report = [&](StringRef msg) {
81
StringRef filename = mb.getBufferIdentifier();
82
if (archiveName.empty())
83
fatal(filename + ": " + msg);
84
else
85
fatal(archiveName + "(" + filename + "): " + msg);
86
};
87
88
if (!mb.getBuffer().starts_with(ElfMagic))
89
report("not an ELF file");
90
if (endian != ELFDATA2LSB && endian != ELFDATA2MSB)
91
report("corrupted ELF file: invalid data encoding");
92
if (size != ELFCLASS32 && size != ELFCLASS64)
93
report("corrupted ELF file: invalid file class");
94
95
size_t bufSize = mb.getBuffer().size();
96
if ((size == ELFCLASS32 && bufSize < sizeof(Elf32_Ehdr)) ||
97
(size == ELFCLASS64 && bufSize < sizeof(Elf64_Ehdr)))
98
report("corrupted ELF file: file is too short");
99
100
if (size == ELFCLASS32)
101
return (endian == ELFDATA2LSB) ? ELF32LEKind : ELF32BEKind;
102
return (endian == ELFDATA2LSB) ? ELF64LEKind : ELF64BEKind;
103
}
104
105
// For ARM only, to set the EF_ARM_ABI_FLOAT_SOFT or EF_ARM_ABI_FLOAT_HARD
106
// flag in the ELF Header we need to look at Tag_ABI_VFP_args to find out how
107
// the input objects have been compiled.
108
static void updateARMVFPArgs(const ARMAttributeParser &attributes,
109
const InputFile *f) {
110
std::optional<unsigned> attr =
111
attributes.getAttributeValue(ARMBuildAttrs::ABI_VFP_args);
112
if (!attr)
113
// If an ABI tag isn't present then it is implicitly given the value of 0
114
// which maps to ARMBuildAttrs::BaseAAPCS. However many assembler files,
115
// including some in glibc that don't use FP args (and should have value 3)
116
// don't have the attribute so we do not consider an implicit value of 0
117
// as a clash.
118
return;
119
120
unsigned vfpArgs = *attr;
121
ARMVFPArgKind arg;
122
switch (vfpArgs) {
123
case ARMBuildAttrs::BaseAAPCS:
124
arg = ARMVFPArgKind::Base;
125
break;
126
case ARMBuildAttrs::HardFPAAPCS:
127
arg = ARMVFPArgKind::VFP;
128
break;
129
case ARMBuildAttrs::ToolChainFPPCS:
130
// Tool chain specific convention that conforms to neither AAPCS variant.
131
arg = ARMVFPArgKind::ToolChain;
132
break;
133
case ARMBuildAttrs::CompatibleFPAAPCS:
134
// Object compatible with all conventions.
135
return;
136
default:
137
error(toString(f) + ": unknown Tag_ABI_VFP_args value: " + Twine(vfpArgs));
138
return;
139
}
140
// Follow ld.bfd and error if there is a mix of calling conventions.
141
if (config->armVFPArgs != arg && config->armVFPArgs != ARMVFPArgKind::Default)
142
error(toString(f) + ": incompatible Tag_ABI_VFP_args");
143
else
144
config->armVFPArgs = arg;
145
}
146
147
// The ARM support in lld makes some use of instructions that are not available
148
// on all ARM architectures. Namely:
149
// - Use of BLX instruction for interworking between ARM and Thumb state.
150
// - Use of the extended Thumb branch encoding in relocation.
151
// - Use of the MOVT/MOVW instructions in Thumb Thunks.
152
// The ARM Attributes section contains information about the architecture chosen
153
// at compile time. We follow the convention that if at least one input object
154
// is compiled with an architecture that supports these features then lld is
155
// permitted to use them.
156
static void updateSupportedARMFeatures(const ARMAttributeParser &attributes) {
157
std::optional<unsigned> attr =
158
attributes.getAttributeValue(ARMBuildAttrs::CPU_arch);
159
if (!attr)
160
return;
161
auto arch = *attr;
162
switch (arch) {
163
case ARMBuildAttrs::Pre_v4:
164
case ARMBuildAttrs::v4:
165
case ARMBuildAttrs::v4T:
166
// Architectures prior to v5 do not support BLX instruction
167
break;
168
case ARMBuildAttrs::v5T:
169
case ARMBuildAttrs::v5TE:
170
case ARMBuildAttrs::v5TEJ:
171
case ARMBuildAttrs::v6:
172
case ARMBuildAttrs::v6KZ:
173
case ARMBuildAttrs::v6K:
174
config->armHasBlx = true;
175
// Architectures used in pre-Cortex processors do not support
176
// The J1 = 1 J2 = 1 Thumb branch range extension, with the exception
177
// of Architecture v6T2 (arm1156t2-s and arm1156t2f-s) that do.
178
break;
179
default:
180
// All other Architectures have BLX and extended branch encoding
181
config->armHasBlx = true;
182
config->armJ1J2BranchEncoding = true;
183
if (arch != ARMBuildAttrs::v6_M && arch != ARMBuildAttrs::v6S_M)
184
// All Architectures used in Cortex processors with the exception
185
// of v6-M and v6S-M have the MOVT and MOVW instructions.
186
config->armHasMovtMovw = true;
187
break;
188
}
189
190
// Only ARMv8-M or later architectures have CMSE support.
191
std::optional<unsigned> profile =
192
attributes.getAttributeValue(ARMBuildAttrs::CPU_arch_profile);
193
if (!profile)
194
return;
195
if (arch >= ARMBuildAttrs::CPUArch::v8_M_Base &&
196
profile == ARMBuildAttrs::MicroControllerProfile)
197
config->armCMSESupport = true;
198
199
// The thumb PLT entries require Thumb2 which can be used on multiple archs.
200
// For now, let's limit it to ones where ARM isn't available and we know have
201
// Thumb2.
202
std::optional<unsigned> armISA =
203
attributes.getAttributeValue(ARMBuildAttrs::ARM_ISA_use);
204
std::optional<unsigned> thumb =
205
attributes.getAttributeValue(ARMBuildAttrs::THUMB_ISA_use);
206
config->armHasArmISA |= armISA && *armISA >= ARMBuildAttrs::Allowed;
207
config->armHasThumb2ISA |= thumb && *thumb >= ARMBuildAttrs::AllowThumb32;
208
}
209
210
InputFile::InputFile(Kind k, MemoryBufferRef m)
211
: mb(m), groupId(nextGroupId), fileKind(k) {
212
// All files within the same --{start,end}-group get the same group ID.
213
// Otherwise, a new file will get a new group ID.
214
if (!isInGroup)
215
++nextGroupId;
216
}
217
218
std::optional<MemoryBufferRef> elf::readFile(StringRef path) {
219
llvm::TimeTraceScope timeScope("Load input files", path);
220
221
// The --chroot option changes our virtual root directory.
222
// This is useful when you are dealing with files created by --reproduce.
223
if (!config->chroot.empty() && path.starts_with("/"))
224
path = saver().save(config->chroot + path);
225
226
bool remapped = false;
227
auto it = config->remapInputs.find(path);
228
if (it != config->remapInputs.end()) {
229
path = it->second;
230
remapped = true;
231
} else {
232
for (const auto &[pat, toFile] : config->remapInputsWildcards) {
233
if (pat.match(path)) {
234
path = toFile;
235
remapped = true;
236
break;
237
}
238
}
239
}
240
if (remapped) {
241
// Use /dev/null to indicate an input file that should be ignored. Change
242
// the path to NUL on Windows.
243
#ifdef _WIN32
244
if (path == "/dev/null")
245
path = "NUL";
246
#endif
247
}
248
249
log(path);
250
config->dependencyFiles.insert(llvm::CachedHashString(path));
251
252
auto mbOrErr = MemoryBuffer::getFile(path, /*IsText=*/false,
253
/*RequiresNullTerminator=*/false);
254
if (auto ec = mbOrErr.getError()) {
255
error("cannot open " + path + ": " + ec.message());
256
return std::nullopt;
257
}
258
259
MemoryBufferRef mbref = (*mbOrErr)->getMemBufferRef();
260
ctx.memoryBuffers.push_back(std::move(*mbOrErr)); // take MB ownership
261
262
if (tar)
263
tar->append(relativeToRoot(path), mbref.getBuffer());
264
return mbref;
265
}
266
267
// All input object files must be for the same architecture
268
// (e.g. it does not make sense to link x86 object files with
269
// MIPS object files.) This function checks for that error.
270
static bool isCompatible(InputFile *file) {
271
if (!file->isElf() && !isa<BitcodeFile>(file))
272
return true;
273
274
if (file->ekind == config->ekind && file->emachine == config->emachine) {
275
if (config->emachine != EM_MIPS)
276
return true;
277
if (isMipsN32Abi(file) == config->mipsN32Abi)
278
return true;
279
}
280
281
StringRef target =
282
!config->bfdname.empty() ? config->bfdname : config->emulation;
283
if (!target.empty()) {
284
error(toString(file) + " is incompatible with " + target);
285
return false;
286
}
287
288
InputFile *existing = nullptr;
289
if (!ctx.objectFiles.empty())
290
existing = ctx.objectFiles[0];
291
else if (!ctx.sharedFiles.empty())
292
existing = ctx.sharedFiles[0];
293
else if (!ctx.bitcodeFiles.empty())
294
existing = ctx.bitcodeFiles[0];
295
std::string with;
296
if (existing)
297
with = " with " + toString(existing);
298
error(toString(file) + " is incompatible" + with);
299
return false;
300
}
301
302
template <class ELFT> static void doParseFile(InputFile *file) {
303
if (!isCompatible(file))
304
return;
305
306
// Lazy object file
307
if (file->lazy) {
308
if (auto *f = dyn_cast<BitcodeFile>(file)) {
309
ctx.lazyBitcodeFiles.push_back(f);
310
f->parseLazy();
311
} else {
312
cast<ObjFile<ELFT>>(file)->parseLazy();
313
}
314
return;
315
}
316
317
if (config->trace)
318
message(toString(file));
319
320
if (file->kind() == InputFile::ObjKind) {
321
ctx.objectFiles.push_back(cast<ELFFileBase>(file));
322
cast<ObjFile<ELFT>>(file)->parse();
323
} else if (auto *f = dyn_cast<SharedFile>(file)) {
324
f->parse<ELFT>();
325
} else if (auto *f = dyn_cast<BitcodeFile>(file)) {
326
ctx.bitcodeFiles.push_back(f);
327
f->parse();
328
} else {
329
ctx.binaryFiles.push_back(cast<BinaryFile>(file));
330
cast<BinaryFile>(file)->parse();
331
}
332
}
333
334
// Add symbols in File to the symbol table.
335
void elf::parseFile(InputFile *file) { invokeELFT(doParseFile, file); }
336
337
// This function is explicitly instantiated in ARM.cpp. Mark it extern here,
338
// to avoid warnings when building with MSVC.
339
extern template void ObjFile<ELF32LE>::importCmseSymbols();
340
extern template void ObjFile<ELF32BE>::importCmseSymbols();
341
extern template void ObjFile<ELF64LE>::importCmseSymbols();
342
extern template void ObjFile<ELF64BE>::importCmseSymbols();
343
344
template <class ELFT>
345
static void doParseFiles(const std::vector<InputFile *> &files,
346
InputFile *armCmseImpLib) {
347
// Add all files to the symbol table. This will add almost all symbols that we
348
// need to the symbol table. This process might add files to the link due to
349
// addDependentLibrary.
350
for (size_t i = 0; i < files.size(); ++i) {
351
llvm::TimeTraceScope timeScope("Parse input files", files[i]->getName());
352
doParseFile<ELFT>(files[i]);
353
}
354
if (armCmseImpLib)
355
cast<ObjFile<ELFT>>(*armCmseImpLib).importCmseSymbols();
356
}
357
358
void elf::parseFiles(const std::vector<InputFile *> &files,
359
InputFile *armCmseImpLib) {
360
llvm::TimeTraceScope timeScope("Parse input files");
361
invokeELFT(doParseFiles, files, armCmseImpLib);
362
}
363
364
// Concatenates arguments to construct a string representing an error location.
365
static std::string createFileLineMsg(StringRef path, unsigned line) {
366
std::string filename = std::string(path::filename(path));
367
std::string lineno = ":" + std::to_string(line);
368
if (filename == path)
369
return filename + lineno;
370
return filename + lineno + " (" + path.str() + lineno + ")";
371
}
372
373
template <class ELFT>
374
static std::string getSrcMsgAux(ObjFile<ELFT> &file, const Symbol &sym,
375
const InputSectionBase &sec, uint64_t offset) {
376
// In DWARF, functions and variables are stored to different places.
377
// First, look up a function for a given offset.
378
if (std::optional<DILineInfo> info = file.getDILineInfo(&sec, offset))
379
return createFileLineMsg(info->FileName, info->Line);
380
381
// If it failed, look up again as a variable.
382
if (std::optional<std::pair<std::string, unsigned>> fileLine =
383
file.getVariableLoc(sym.getName()))
384
return createFileLineMsg(fileLine->first, fileLine->second);
385
386
// File.sourceFile contains STT_FILE symbol, and that is a last resort.
387
return std::string(file.sourceFile);
388
}
389
390
std::string InputFile::getSrcMsg(const Symbol &sym, const InputSectionBase &sec,
391
uint64_t offset) {
392
if (kind() != ObjKind)
393
return "";
394
switch (ekind) {
395
default:
396
llvm_unreachable("Invalid kind");
397
case ELF32LEKind:
398
return getSrcMsgAux(cast<ObjFile<ELF32LE>>(*this), sym, sec, offset);
399
case ELF32BEKind:
400
return getSrcMsgAux(cast<ObjFile<ELF32BE>>(*this), sym, sec, offset);
401
case ELF64LEKind:
402
return getSrcMsgAux(cast<ObjFile<ELF64LE>>(*this), sym, sec, offset);
403
case ELF64BEKind:
404
return getSrcMsgAux(cast<ObjFile<ELF64BE>>(*this), sym, sec, offset);
405
}
406
}
407
408
StringRef InputFile::getNameForScript() const {
409
if (archiveName.empty())
410
return getName();
411
412
if (nameForScriptCache.empty())
413
nameForScriptCache = (archiveName + Twine(':') + getName()).str();
414
415
return nameForScriptCache;
416
}
417
418
// An ELF object file may contain a `.deplibs` section. If it exists, the
419
// section contains a list of library specifiers such as `m` for libm. This
420
// function resolves a given name by finding the first matching library checking
421
// the various ways that a library can be specified to LLD. This ELF extension
422
// is a form of autolinking and is called `dependent libraries`. It is currently
423
// unique to LLVM and lld.
424
static void addDependentLibrary(StringRef specifier, const InputFile *f) {
425
if (!config->dependentLibraries)
426
return;
427
if (std::optional<std::string> s = searchLibraryBaseName(specifier))
428
ctx.driver.addFile(saver().save(*s), /*withLOption=*/true);
429
else if (std::optional<std::string> s = findFromSearchPaths(specifier))
430
ctx.driver.addFile(saver().save(*s), /*withLOption=*/true);
431
else if (fs::exists(specifier))
432
ctx.driver.addFile(specifier, /*withLOption=*/false);
433
else
434
error(toString(f) +
435
": unable to find library from dependent library specifier: " +
436
specifier);
437
}
438
439
// Record the membership of a section group so that in the garbage collection
440
// pass, section group members are kept or discarded as a unit.
441
template <class ELFT>
442
static void handleSectionGroup(ArrayRef<InputSectionBase *> sections,
443
ArrayRef<typename ELFT::Word> entries) {
444
bool hasAlloc = false;
445
for (uint32_t index : entries.slice(1)) {
446
if (index >= sections.size())
447
return;
448
if (InputSectionBase *s = sections[index])
449
if (s != &InputSection::discarded && s->flags & SHF_ALLOC)
450
hasAlloc = true;
451
}
452
453
// If any member has the SHF_ALLOC flag, the whole group is subject to garbage
454
// collection. See the comment in markLive(). This rule retains .debug_types
455
// and .rela.debug_types.
456
if (!hasAlloc)
457
return;
458
459
// Connect the members in a circular doubly-linked list via
460
// nextInSectionGroup.
461
InputSectionBase *head;
462
InputSectionBase *prev = nullptr;
463
for (uint32_t index : entries.slice(1)) {
464
InputSectionBase *s = sections[index];
465
if (!s || s == &InputSection::discarded)
466
continue;
467
if (prev)
468
prev->nextInSectionGroup = s;
469
else
470
head = s;
471
prev = s;
472
}
473
if (prev)
474
prev->nextInSectionGroup = head;
475
}
476
477
template <class ELFT> DWARFCache *ObjFile<ELFT>::getDwarf() {
478
llvm::call_once(initDwarf, [this]() {
479
dwarf = std::make_unique<DWARFCache>(std::make_unique<DWARFContext>(
480
std::make_unique<LLDDwarfObj<ELFT>>(this), "",
481
[&](Error err) { warn(getName() + ": " + toString(std::move(err))); },
482
[&](Error warning) {
483
warn(getName() + ": " + toString(std::move(warning)));
484
}));
485
});
486
487
return dwarf.get();
488
}
489
490
// Returns the pair of file name and line number describing location of data
491
// object (variable, array, etc) definition.
492
template <class ELFT>
493
std::optional<std::pair<std::string, unsigned>>
494
ObjFile<ELFT>::getVariableLoc(StringRef name) {
495
return getDwarf()->getVariableLoc(name);
496
}
497
498
// Returns source line information for a given offset
499
// using DWARF debug info.
500
template <class ELFT>
501
std::optional<DILineInfo>
502
ObjFile<ELFT>::getDILineInfo(const InputSectionBase *s, uint64_t offset) {
503
// Detect SectionIndex for specified section.
504
uint64_t sectionIndex = object::SectionedAddress::UndefSection;
505
ArrayRef<InputSectionBase *> sections = s->file->getSections();
506
for (uint64_t curIndex = 0; curIndex < sections.size(); ++curIndex) {
507
if (s == sections[curIndex]) {
508
sectionIndex = curIndex;
509
break;
510
}
511
}
512
513
return getDwarf()->getDILineInfo(offset, sectionIndex);
514
}
515
516
ELFFileBase::ELFFileBase(Kind k, ELFKind ekind, MemoryBufferRef mb)
517
: InputFile(k, mb) {
518
this->ekind = ekind;
519
}
520
521
template <typename Elf_Shdr>
522
static const Elf_Shdr *findSection(ArrayRef<Elf_Shdr> sections, uint32_t type) {
523
for (const Elf_Shdr &sec : sections)
524
if (sec.sh_type == type)
525
return &sec;
526
return nullptr;
527
}
528
529
void ELFFileBase::init() {
530
switch (ekind) {
531
case ELF32LEKind:
532
init<ELF32LE>(fileKind);
533
break;
534
case ELF32BEKind:
535
init<ELF32BE>(fileKind);
536
break;
537
case ELF64LEKind:
538
init<ELF64LE>(fileKind);
539
break;
540
case ELF64BEKind:
541
init<ELF64BE>(fileKind);
542
break;
543
default:
544
llvm_unreachable("getELFKind");
545
}
546
}
547
548
template <class ELFT> void ELFFileBase::init(InputFile::Kind k) {
549
using Elf_Shdr = typename ELFT::Shdr;
550
using Elf_Sym = typename ELFT::Sym;
551
552
// Initialize trivial attributes.
553
const ELFFile<ELFT> &obj = getObj<ELFT>();
554
emachine = obj.getHeader().e_machine;
555
osabi = obj.getHeader().e_ident[llvm::ELF::EI_OSABI];
556
abiVersion = obj.getHeader().e_ident[llvm::ELF::EI_ABIVERSION];
557
558
ArrayRef<Elf_Shdr> sections = CHECK(obj.sections(), this);
559
elfShdrs = sections.data();
560
numELFShdrs = sections.size();
561
562
// Find a symbol table.
563
const Elf_Shdr *symtabSec =
564
findSection(sections, k == SharedKind ? SHT_DYNSYM : SHT_SYMTAB);
565
566
if (!symtabSec)
567
return;
568
569
// Initialize members corresponding to a symbol table.
570
firstGlobal = symtabSec->sh_info;
571
572
ArrayRef<Elf_Sym> eSyms = CHECK(obj.symbols(symtabSec), this);
573
if (firstGlobal == 0 || firstGlobal > eSyms.size())
574
fatal(toString(this) + ": invalid sh_info in symbol table");
575
576
elfSyms = reinterpret_cast<const void *>(eSyms.data());
577
numELFSyms = uint32_t(eSyms.size());
578
stringTable = CHECK(obj.getStringTableForSymtab(*symtabSec, sections), this);
579
}
580
581
template <class ELFT>
582
uint32_t ObjFile<ELFT>::getSectionIndex(const Elf_Sym &sym) const {
583
return CHECK(
584
this->getObj().getSectionIndex(sym, getELFSyms<ELFT>(), shndxTable),
585
this);
586
}
587
588
template <class ELFT> void ObjFile<ELFT>::parse(bool ignoreComdats) {
589
object::ELFFile<ELFT> obj = this->getObj();
590
// Read a section table. justSymbols is usually false.
591
if (this->justSymbols) {
592
initializeJustSymbols();
593
initializeSymbols(obj);
594
return;
595
}
596
597
// Handle dependent libraries and selection of section groups as these are not
598
// done in parallel.
599
ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>();
600
StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this);
601
uint64_t size = objSections.size();
602
sections.resize(size);
603
for (size_t i = 0; i != size; ++i) {
604
const Elf_Shdr &sec = objSections[i];
605
if (sec.sh_type == SHT_LLVM_DEPENDENT_LIBRARIES && !config->relocatable) {
606
StringRef name = check(obj.getSectionName(sec, shstrtab));
607
ArrayRef<char> data = CHECK(
608
this->getObj().template getSectionContentsAsArray<char>(sec), this);
609
if (!data.empty() && data.back() != '\0') {
610
error(
611
toString(this) +
612
": corrupted dependent libraries section (unterminated string): " +
613
name);
614
} else {
615
for (const char *d = data.begin(), *e = data.end(); d < e;) {
616
StringRef s(d);
617
addDependentLibrary(s, this);
618
d += s.size() + 1;
619
}
620
}
621
this->sections[i] = &InputSection::discarded;
622
continue;
623
}
624
625
if (sec.sh_type == SHT_ARM_ATTRIBUTES && config->emachine == EM_ARM) {
626
ARMAttributeParser attributes;
627
ArrayRef<uint8_t> contents =
628
check(this->getObj().getSectionContents(sec));
629
StringRef name = check(obj.getSectionName(sec, shstrtab));
630
this->sections[i] = &InputSection::discarded;
631
if (Error e = attributes.parse(contents, ekind == ELF32LEKind
632
? llvm::endianness::little
633
: llvm::endianness::big)) {
634
InputSection isec(*this, sec, name);
635
warn(toString(&isec) + ": " + llvm::toString(std::move(e)));
636
} else {
637
updateSupportedARMFeatures(attributes);
638
updateARMVFPArgs(attributes, this);
639
640
// FIXME: Retain the first attribute section we see. The eglibc ARM
641
// dynamic loaders require the presence of an attribute section for
642
// dlopen to work. In a full implementation we would merge all attribute
643
// sections.
644
if (in.attributes == nullptr) {
645
in.attributes = std::make_unique<InputSection>(*this, sec, name);
646
this->sections[i] = in.attributes.get();
647
}
648
}
649
}
650
651
// Producing a static binary with MTE globals is not currently supported,
652
// remove all SHT_AARCH64_MEMTAG_GLOBALS_STATIC sections as they're unused
653
// medatada, and we don't want them to end up in the output file for static
654
// executables.
655
if (sec.sh_type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC &&
656
!canHaveMemtagGlobals()) {
657
this->sections[i] = &InputSection::discarded;
658
continue;
659
}
660
661
if (sec.sh_type != SHT_GROUP)
662
continue;
663
StringRef signature = getShtGroupSignature(objSections, sec);
664
ArrayRef<Elf_Word> entries =
665
CHECK(obj.template getSectionContentsAsArray<Elf_Word>(sec), this);
666
if (entries.empty())
667
fatal(toString(this) + ": empty SHT_GROUP");
668
669
Elf_Word flag = entries[0];
670
if (flag && flag != GRP_COMDAT)
671
fatal(toString(this) + ": unsupported SHT_GROUP format");
672
673
bool keepGroup =
674
(flag & GRP_COMDAT) == 0 || ignoreComdats ||
675
symtab.comdatGroups.try_emplace(CachedHashStringRef(signature), this)
676
.second;
677
if (keepGroup) {
678
if (!config->resolveGroups)
679
this->sections[i] = createInputSection(
680
i, sec, check(obj.getSectionName(sec, shstrtab)));
681
continue;
682
}
683
684
// Otherwise, discard group members.
685
for (uint32_t secIndex : entries.slice(1)) {
686
if (secIndex >= size)
687
fatal(toString(this) +
688
": invalid section index in group: " + Twine(secIndex));
689
this->sections[secIndex] = &InputSection::discarded;
690
}
691
}
692
693
// Read a symbol table.
694
initializeSymbols(obj);
695
}
696
697
// Sections with SHT_GROUP and comdat bits define comdat section groups.
698
// They are identified and deduplicated by group name. This function
699
// returns a group name.
700
template <class ELFT>
701
StringRef ObjFile<ELFT>::getShtGroupSignature(ArrayRef<Elf_Shdr> sections,
702
const Elf_Shdr &sec) {
703
typename ELFT::SymRange symbols = this->getELFSyms<ELFT>();
704
if (sec.sh_info >= symbols.size())
705
fatal(toString(this) + ": invalid symbol index");
706
const typename ELFT::Sym &sym = symbols[sec.sh_info];
707
return CHECK(sym.getName(this->stringTable), this);
708
}
709
710
template <class ELFT>
711
bool ObjFile<ELFT>::shouldMerge(const Elf_Shdr &sec, StringRef name) {
712
// On a regular link we don't merge sections if -O0 (default is -O1). This
713
// sometimes makes the linker significantly faster, although the output will
714
// be bigger.
715
//
716
// Doing the same for -r would create a problem as it would combine sections
717
// with different sh_entsize. One option would be to just copy every SHF_MERGE
718
// section as is to the output. While this would produce a valid ELF file with
719
// usable SHF_MERGE sections, tools like (llvm-)?dwarfdump get confused when
720
// they see two .debug_str. We could have separate logic for combining
721
// SHF_MERGE sections based both on their name and sh_entsize, but that seems
722
// to be more trouble than it is worth. Instead, we just use the regular (-O1)
723
// logic for -r.
724
if (config->optimize == 0 && !config->relocatable)
725
return false;
726
727
// A mergeable section with size 0 is useless because they don't have
728
// any data to merge. A mergeable string section with size 0 can be
729
// argued as invalid because it doesn't end with a null character.
730
// We'll avoid a mess by handling them as if they were non-mergeable.
731
if (sec.sh_size == 0)
732
return false;
733
734
// Check for sh_entsize. The ELF spec is not clear about the zero
735
// sh_entsize. It says that "the member [sh_entsize] contains 0 if
736
// the section does not hold a table of fixed-size entries". We know
737
// that Rust 1.13 produces a string mergeable section with a zero
738
// sh_entsize. Here we just accept it rather than being picky about it.
739
uint64_t entSize = sec.sh_entsize;
740
if (entSize == 0)
741
return false;
742
if (sec.sh_size % entSize)
743
fatal(toString(this) + ":(" + name + "): SHF_MERGE section size (" +
744
Twine(sec.sh_size) + ") must be a multiple of sh_entsize (" +
745
Twine(entSize) + ")");
746
747
if (sec.sh_flags & SHF_WRITE)
748
fatal(toString(this) + ":(" + name +
749
"): writable SHF_MERGE section is not supported");
750
751
return true;
752
}
753
754
// This is for --just-symbols.
755
//
756
// --just-symbols is a very minor feature that allows you to link your
757
// output against other existing program, so that if you load both your
758
// program and the other program into memory, your output can refer the
759
// other program's symbols.
760
//
761
// When the option is given, we link "just symbols". The section table is
762
// initialized with null pointers.
763
template <class ELFT> void ObjFile<ELFT>::initializeJustSymbols() {
764
sections.resize(numELFShdrs);
765
}
766
767
static bool isKnownSpecificSectionType(uint32_t t, uint32_t flags) {
768
if (SHT_LOUSER <= t && t <= SHT_HIUSER && !(flags & SHF_ALLOC))
769
return true;
770
if (SHT_LOOS <= t && t <= SHT_HIOS && !(flags & SHF_OS_NONCONFORMING))
771
return true;
772
// Allow all processor-specific types. This is different from GNU ld.
773
return SHT_LOPROC <= t && t <= SHT_HIPROC;
774
}
775
776
template <class ELFT>
777
void ObjFile<ELFT>::initializeSections(bool ignoreComdats,
778
const llvm::object::ELFFile<ELFT> &obj) {
779
ArrayRef<Elf_Shdr> objSections = getELFShdrs<ELFT>();
780
StringRef shstrtab = CHECK(obj.getSectionStringTable(objSections), this);
781
uint64_t size = objSections.size();
782
SmallVector<ArrayRef<Elf_Word>, 0> selectedGroups;
783
for (size_t i = 0; i != size; ++i) {
784
if (this->sections[i] == &InputSection::discarded)
785
continue;
786
const Elf_Shdr &sec = objSections[i];
787
const uint32_t type = sec.sh_type;
788
789
// SHF_EXCLUDE'ed sections are discarded by the linker. However,
790
// if -r is given, we'll let the final link discard such sections.
791
// This is compatible with GNU.
792
if ((sec.sh_flags & SHF_EXCLUDE) && !config->relocatable) {
793
if (type == SHT_LLVM_CALL_GRAPH_PROFILE)
794
cgProfileSectionIndex = i;
795
if (type == SHT_LLVM_ADDRSIG) {
796
// We ignore the address-significance table if we know that the object
797
// file was created by objcopy or ld -r. This is because these tools
798
// will reorder the symbols in the symbol table, invalidating the data
799
// in the address-significance table, which refers to symbols by index.
800
if (sec.sh_link != 0)
801
this->addrsigSec = &sec;
802
else if (config->icf == ICFLevel::Safe)
803
warn(toString(this) +
804
": --icf=safe conservatively ignores "
805
"SHT_LLVM_ADDRSIG [index " +
806
Twine(i) +
807
"] with sh_link=0 "
808
"(likely created using objcopy or ld -r)");
809
}
810
this->sections[i] = &InputSection::discarded;
811
continue;
812
}
813
814
switch (type) {
815
case SHT_GROUP: {
816
if (!config->relocatable)
817
sections[i] = &InputSection::discarded;
818
StringRef signature =
819
cantFail(this->getELFSyms<ELFT>()[sec.sh_info].getName(stringTable));
820
ArrayRef<Elf_Word> entries =
821
cantFail(obj.template getSectionContentsAsArray<Elf_Word>(sec));
822
if ((entries[0] & GRP_COMDAT) == 0 || ignoreComdats ||
823
symtab.comdatGroups.find(CachedHashStringRef(signature))->second ==
824
this)
825
selectedGroups.push_back(entries);
826
break;
827
}
828
case SHT_SYMTAB_SHNDX:
829
shndxTable = CHECK(obj.getSHNDXTable(sec, objSections), this);
830
break;
831
case SHT_SYMTAB:
832
case SHT_STRTAB:
833
case SHT_REL:
834
case SHT_RELA:
835
case SHT_CREL:
836
case SHT_NULL:
837
break;
838
case SHT_PROGBITS:
839
case SHT_NOTE:
840
case SHT_NOBITS:
841
case SHT_INIT_ARRAY:
842
case SHT_FINI_ARRAY:
843
case SHT_PREINIT_ARRAY:
844
this->sections[i] =
845
createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab)));
846
break;
847
case SHT_LLVM_LTO:
848
// Discard .llvm.lto in a relocatable link that does not use the bitcode.
849
// The concatenated output does not properly reflect the linking
850
// semantics. In addition, since we do not use the bitcode wrapper format,
851
// the concatenated raw bitcode would be invalid.
852
if (config->relocatable && !config->fatLTOObjects) {
853
sections[i] = &InputSection::discarded;
854
break;
855
}
856
[[fallthrough]];
857
default:
858
this->sections[i] =
859
createInputSection(i, sec, check(obj.getSectionName(sec, shstrtab)));
860
if (type == SHT_LLVM_SYMPART)
861
ctx.hasSympart.store(true, std::memory_order_relaxed);
862
else if (config->rejectMismatch &&
863
!isKnownSpecificSectionType(type, sec.sh_flags))
864
errorOrWarn(toString(this->sections[i]) + ": unknown section type 0x" +
865
Twine::utohexstr(type));
866
break;
867
}
868
}
869
870
// We have a second loop. It is used to:
871
// 1) handle SHF_LINK_ORDER sections.
872
// 2) create relocation sections. In some cases the section header index of a
873
// relocation section may be smaller than that of the relocated section. In
874
// such cases, the relocation section would attempt to reference a target
875
// section that has not yet been created. For simplicity, delay creation of
876
// relocation sections until now.
877
for (size_t i = 0; i != size; ++i) {
878
if (this->sections[i] == &InputSection::discarded)
879
continue;
880
const Elf_Shdr &sec = objSections[i];
881
882
if (isStaticRelSecType(sec.sh_type)) {
883
// Find a relocation target section and associate this section with that.
884
// Target may have been discarded if it is in a different section group
885
// and the group is discarded, even though it's a violation of the spec.
886
// We handle that situation gracefully by discarding dangling relocation
887
// sections.
888
const uint32_t info = sec.sh_info;
889
InputSectionBase *s = getRelocTarget(i, info);
890
if (!s)
891
continue;
892
893
// ELF spec allows mergeable sections with relocations, but they are rare,
894
// and it is in practice hard to merge such sections by contents, because
895
// applying relocations at end of linking changes section contents. So, we
896
// simply handle such sections as non-mergeable ones. Degrading like this
897
// is acceptable because section merging is optional.
898
if (auto *ms = dyn_cast<MergeInputSection>(s)) {
899
s = makeThreadLocal<InputSection>(
900
ms->file, ms->flags, ms->type, ms->addralign,
901
ms->contentMaybeDecompress(), ms->name);
902
sections[info] = s;
903
}
904
905
if (s->relSecIdx != 0)
906
error(
907
toString(s) +
908
": multiple relocation sections to one section are not supported");
909
s->relSecIdx = i;
910
911
// Relocation sections are usually removed from the output, so return
912
// `nullptr` for the normal case. However, if -r or --emit-relocs is
913
// specified, we need to copy them to the output. (Some post link analysis
914
// tools specify --emit-relocs to obtain the information.)
915
if (config->copyRelocs) {
916
auto *isec = makeThreadLocal<InputSection>(
917
*this, sec, check(obj.getSectionName(sec, shstrtab)));
918
// If the relocated section is discarded (due to /DISCARD/ or
919
// --gc-sections), the relocation section should be discarded as well.
920
s->dependentSections.push_back(isec);
921
sections[i] = isec;
922
}
923
continue;
924
}
925
926
// A SHF_LINK_ORDER section with sh_link=0 is handled as if it did not have
927
// the flag.
928
if (!sec.sh_link || !(sec.sh_flags & SHF_LINK_ORDER))
929
continue;
930
931
InputSectionBase *linkSec = nullptr;
932
if (sec.sh_link < size)
933
linkSec = this->sections[sec.sh_link];
934
if (!linkSec)
935
fatal(toString(this) + ": invalid sh_link index: " + Twine(sec.sh_link));
936
937
// A SHF_LINK_ORDER section is discarded if its linked-to section is
938
// discarded.
939
InputSection *isec = cast<InputSection>(this->sections[i]);
940
linkSec->dependentSections.push_back(isec);
941
if (!isa<InputSection>(linkSec))
942
error("a section " + isec->name +
943
" with SHF_LINK_ORDER should not refer a non-regular section: " +
944
toString(linkSec));
945
}
946
947
for (ArrayRef<Elf_Word> entries : selectedGroups)
948
handleSectionGroup<ELFT>(this->sections, entries);
949
}
950
951
// Read the following info from the .note.gnu.property section and write it to
952
// the corresponding fields in `ObjFile`:
953
// - Feature flags (32 bits) representing x86 or AArch64 features for
954
// hardware-assisted call flow control;
955
// - AArch64 PAuth ABI core info (16 bytes).
956
template <class ELFT>
957
void readGnuProperty(const InputSection &sec, ObjFile<ELFT> &f) {
958
using Elf_Nhdr = typename ELFT::Nhdr;
959
using Elf_Note = typename ELFT::Note;
960
961
ArrayRef<uint8_t> data = sec.content();
962
auto reportFatal = [&](const uint8_t *place, const Twine &msg) {
963
fatal(toString(sec.file) + ":(" + sec.name + "+0x" +
964
Twine::utohexstr(place - sec.content().data()) + "): " + msg);
965
};
966
while (!data.empty()) {
967
// Read one NOTE record.
968
auto *nhdr = reinterpret_cast<const Elf_Nhdr *>(data.data());
969
if (data.size() < sizeof(Elf_Nhdr) ||
970
data.size() < nhdr->getSize(sec.addralign))
971
reportFatal(data.data(), "data is too short");
972
973
Elf_Note note(*nhdr);
974
if (nhdr->n_type != NT_GNU_PROPERTY_TYPE_0 || note.getName() != "GNU") {
975
data = data.slice(nhdr->getSize(sec.addralign));
976
continue;
977
}
978
979
uint32_t featureAndType = config->emachine == EM_AARCH64
980
? GNU_PROPERTY_AARCH64_FEATURE_1_AND
981
: GNU_PROPERTY_X86_FEATURE_1_AND;
982
983
// Read a body of a NOTE record, which consists of type-length-value fields.
984
ArrayRef<uint8_t> desc = note.getDesc(sec.addralign);
985
while (!desc.empty()) {
986
const uint8_t *place = desc.data();
987
if (desc.size() < 8)
988
reportFatal(place, "program property is too short");
989
uint32_t type = read32<ELFT::Endianness>(desc.data());
990
uint32_t size = read32<ELFT::Endianness>(desc.data() + 4);
991
desc = desc.slice(8);
992
if (desc.size() < size)
993
reportFatal(place, "program property is too short");
994
995
if (type == featureAndType) {
996
// We found a FEATURE_1_AND field. There may be more than one of these
997
// in a .note.gnu.property section, for a relocatable object we
998
// accumulate the bits set.
999
if (size < 4)
1000
reportFatal(place, "FEATURE_1_AND entry is too short");
1001
f.andFeatures |= read32<ELFT::Endianness>(desc.data());
1002
} else if (config->emachine == EM_AARCH64 &&
1003
type == GNU_PROPERTY_AARCH64_FEATURE_PAUTH) {
1004
if (!f.aarch64PauthAbiCoreInfo.empty()) {
1005
reportFatal(data.data(),
1006
"multiple GNU_PROPERTY_AARCH64_FEATURE_PAUTH entries are "
1007
"not supported");
1008
} else if (size != 16) {
1009
reportFatal(data.data(), "GNU_PROPERTY_AARCH64_FEATURE_PAUTH entry "
1010
"is invalid: expected 16 bytes, but got " +
1011
Twine(size));
1012
}
1013
f.aarch64PauthAbiCoreInfo = desc;
1014
}
1015
1016
// Padding is present in the note descriptor, if necessary.
1017
desc = desc.slice(alignTo<(ELFT::Is64Bits ? 8 : 4)>(size));
1018
}
1019
1020
// Go to next NOTE record to look for more FEATURE_1_AND descriptions.
1021
data = data.slice(nhdr->getSize(sec.addralign));
1022
}
1023
}
1024
1025
template <class ELFT>
1026
InputSectionBase *ObjFile<ELFT>::getRelocTarget(uint32_t idx, uint32_t info) {
1027
if (info < this->sections.size()) {
1028
InputSectionBase *target = this->sections[info];
1029
1030
// Strictly speaking, a relocation section must be included in the
1031
// group of the section it relocates. However, LLVM 3.3 and earlier
1032
// would fail to do so, so we gracefully handle that case.
1033
if (target == &InputSection::discarded)
1034
return nullptr;
1035
1036
if (target != nullptr)
1037
return target;
1038
}
1039
1040
error(toString(this) + Twine(": relocation section (index ") + Twine(idx) +
1041
") has invalid sh_info (" + Twine(info) + ")");
1042
return nullptr;
1043
}
1044
1045
// The function may be called concurrently for different input files. For
1046
// allocation, prefer makeThreadLocal which does not require holding a lock.
1047
template <class ELFT>
1048
InputSectionBase *ObjFile<ELFT>::createInputSection(uint32_t idx,
1049
const Elf_Shdr &sec,
1050
StringRef name) {
1051
if (name.starts_with(".n")) {
1052
// The GNU linker uses .note.GNU-stack section as a marker indicating
1053
// that the code in the object file does not expect that the stack is
1054
// executable (in terms of NX bit). If all input files have the marker,
1055
// the GNU linker adds a PT_GNU_STACK segment to tells the loader to
1056
// make the stack non-executable. Most object files have this section as
1057
// of 2017.
1058
//
1059
// But making the stack non-executable is a norm today for security
1060
// reasons. Failure to do so may result in a serious security issue.
1061
// Therefore, we make LLD always add PT_GNU_STACK unless it is
1062
// explicitly told to do otherwise (by -z execstack). Because the stack
1063
// executable-ness is controlled solely by command line options,
1064
// .note.GNU-stack sections are simply ignored.
1065
if (name == ".note.GNU-stack")
1066
return &InputSection::discarded;
1067
1068
// Object files that use processor features such as Intel Control-Flow
1069
// Enforcement (CET) or AArch64 Branch Target Identification BTI, use a
1070
// .note.gnu.property section containing a bitfield of feature bits like the
1071
// GNU_PROPERTY_X86_FEATURE_1_IBT flag. Read a bitmap containing the flag.
1072
//
1073
// Since we merge bitmaps from multiple object files to create a new
1074
// .note.gnu.property containing a single AND'ed bitmap, we discard an input
1075
// file's .note.gnu.property section.
1076
if (name == ".note.gnu.property") {
1077
readGnuProperty<ELFT>(InputSection(*this, sec, name), *this);
1078
return &InputSection::discarded;
1079
}
1080
1081
// Split stacks is a feature to support a discontiguous stack,
1082
// commonly used in the programming language Go. For the details,
1083
// see https://gcc.gnu.org/wiki/SplitStacks. An object file compiled
1084
// for split stack will include a .note.GNU-split-stack section.
1085
if (name == ".note.GNU-split-stack") {
1086
if (config->relocatable) {
1087
error(
1088
"cannot mix split-stack and non-split-stack in a relocatable link");
1089
return &InputSection::discarded;
1090
}
1091
this->splitStack = true;
1092
return &InputSection::discarded;
1093
}
1094
1095
// An object file compiled for split stack, but where some of the
1096
// functions were compiled with the no_split_stack_attribute will
1097
// include a .note.GNU-no-split-stack section.
1098
if (name == ".note.GNU-no-split-stack") {
1099
this->someNoSplitStack = true;
1100
return &InputSection::discarded;
1101
}
1102
1103
// Strip existing .note.gnu.build-id sections so that the output won't have
1104
// more than one build-id. This is not usually a problem because input
1105
// object files normally don't have .build-id sections, but you can create
1106
// such files by "ld.{bfd,gold,lld} -r --build-id", and we want to guard
1107
// against it.
1108
if (name == ".note.gnu.build-id")
1109
return &InputSection::discarded;
1110
}
1111
1112
// The linker merges EH (exception handling) frames and creates a
1113
// .eh_frame_hdr section for runtime. So we handle them with a special
1114
// class. For relocatable outputs, they are just passed through.
1115
if (name == ".eh_frame" && !config->relocatable)
1116
return makeThreadLocal<EhInputSection>(*this, sec, name);
1117
1118
if ((sec.sh_flags & SHF_MERGE) && shouldMerge(sec, name))
1119
return makeThreadLocal<MergeInputSection>(*this, sec, name);
1120
return makeThreadLocal<InputSection>(*this, sec, name);
1121
}
1122
1123
// Initialize symbols. symbols is a parallel array to the corresponding ELF
1124
// symbol table.
1125
template <class ELFT>
1126
void ObjFile<ELFT>::initializeSymbols(const object::ELFFile<ELFT> &obj) {
1127
ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1128
if (numSymbols == 0) {
1129
numSymbols = eSyms.size();
1130
symbols = std::make_unique<Symbol *[]>(numSymbols);
1131
}
1132
1133
// Some entries have been filled by LazyObjFile.
1134
for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i)
1135
if (!symbols[i])
1136
symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this));
1137
1138
// Perform symbol resolution on non-local symbols.
1139
SmallVector<unsigned, 32> undefineds;
1140
for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1141
const Elf_Sym &eSym = eSyms[i];
1142
uint32_t secIdx = eSym.st_shndx;
1143
if (secIdx == SHN_UNDEF) {
1144
undefineds.push_back(i);
1145
continue;
1146
}
1147
1148
uint8_t binding = eSym.getBinding();
1149
uint8_t stOther = eSym.st_other;
1150
uint8_t type = eSym.getType();
1151
uint64_t value = eSym.st_value;
1152
uint64_t size = eSym.st_size;
1153
1154
Symbol *sym = symbols[i];
1155
sym->isUsedInRegularObj = true;
1156
if (LLVM_UNLIKELY(eSym.st_shndx == SHN_COMMON)) {
1157
if (value == 0 || value >= UINT32_MAX)
1158
fatal(toString(this) + ": common symbol '" + sym->getName() +
1159
"' has invalid alignment: " + Twine(value));
1160
hasCommonSyms = true;
1161
sym->resolve(
1162
CommonSymbol{this, StringRef(), binding, stOther, type, value, size});
1163
continue;
1164
}
1165
1166
// Handle global defined symbols. Defined::section will be set in postParse.
1167
sym->resolve(Defined{this, StringRef(), binding, stOther, type, value, size,
1168
nullptr});
1169
}
1170
1171
// Undefined symbols (excluding those defined relative to non-prevailing
1172
// sections) can trigger recursive extract. Process defined symbols first so
1173
// that the relative order between a defined symbol and an undefined symbol
1174
// does not change the symbol resolution behavior. In addition, a set of
1175
// interconnected symbols will all be resolved to the same file, instead of
1176
// being resolved to different files.
1177
for (unsigned i : undefineds) {
1178
const Elf_Sym &eSym = eSyms[i];
1179
Symbol *sym = symbols[i];
1180
sym->resolve(Undefined{this, StringRef(), eSym.getBinding(), eSym.st_other,
1181
eSym.getType()});
1182
sym->isUsedInRegularObj = true;
1183
sym->referenced = true;
1184
}
1185
}
1186
1187
template <class ELFT>
1188
void ObjFile<ELFT>::initSectionsAndLocalSyms(bool ignoreComdats) {
1189
if (!justSymbols)
1190
initializeSections(ignoreComdats, getObj());
1191
1192
if (!firstGlobal)
1193
return;
1194
SymbolUnion *locals = makeThreadLocalN<SymbolUnion>(firstGlobal);
1195
memset(locals, 0, sizeof(SymbolUnion) * firstGlobal);
1196
1197
ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1198
for (size_t i = 0, end = firstGlobal; i != end; ++i) {
1199
const Elf_Sym &eSym = eSyms[i];
1200
uint32_t secIdx = eSym.st_shndx;
1201
if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
1202
secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
1203
else if (secIdx >= SHN_LORESERVE)
1204
secIdx = 0;
1205
if (LLVM_UNLIKELY(secIdx >= sections.size()))
1206
fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
1207
if (LLVM_UNLIKELY(eSym.getBinding() != STB_LOCAL))
1208
error(toString(this) + ": non-local symbol (" + Twine(i) +
1209
") found at index < .symtab's sh_info (" + Twine(end) + ")");
1210
1211
InputSectionBase *sec = sections[secIdx];
1212
uint8_t type = eSym.getType();
1213
if (type == STT_FILE)
1214
sourceFile = CHECK(eSym.getName(stringTable), this);
1215
if (LLVM_UNLIKELY(stringTable.size() <= eSym.st_name))
1216
fatal(toString(this) + ": invalid symbol name offset");
1217
StringRef name(stringTable.data() + eSym.st_name);
1218
1219
symbols[i] = reinterpret_cast<Symbol *>(locals + i);
1220
if (eSym.st_shndx == SHN_UNDEF || sec == &InputSection::discarded)
1221
new (symbols[i]) Undefined(this, name, STB_LOCAL, eSym.st_other, type,
1222
/*discardedSecIdx=*/secIdx);
1223
else
1224
new (symbols[i]) Defined(this, name, STB_LOCAL, eSym.st_other, type,
1225
eSym.st_value, eSym.st_size, sec);
1226
symbols[i]->partition = 1;
1227
symbols[i]->isUsedInRegularObj = true;
1228
}
1229
}
1230
1231
// Called after all ObjFile::parse is called for all ObjFiles. This checks
1232
// duplicate symbols and may do symbol property merge in the future.
1233
template <class ELFT> void ObjFile<ELFT>::postParse() {
1234
static std::mutex mu;
1235
ArrayRef<Elf_Sym> eSyms = this->getELFSyms<ELFT>();
1236
for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1237
const Elf_Sym &eSym = eSyms[i];
1238
Symbol &sym = *symbols[i];
1239
uint32_t secIdx = eSym.st_shndx;
1240
uint8_t binding = eSym.getBinding();
1241
if (LLVM_UNLIKELY(binding != STB_GLOBAL && binding != STB_WEAK &&
1242
binding != STB_GNU_UNIQUE))
1243
errorOrWarn(toString(this) + ": symbol (" + Twine(i) +
1244
") has invalid binding: " + Twine((int)binding));
1245
1246
// st_value of STT_TLS represents the assigned offset, not the actual
1247
// address which is used by STT_FUNC and STT_OBJECT. STT_TLS symbols can
1248
// only be referenced by special TLS relocations. It is usually an error if
1249
// a STT_TLS symbol is replaced by a non-STT_TLS symbol, vice versa.
1250
if (LLVM_UNLIKELY(sym.isTls()) && eSym.getType() != STT_TLS &&
1251
eSym.getType() != STT_NOTYPE)
1252
errorOrWarn("TLS attribute mismatch: " + toString(sym) + "\n>>> in " +
1253
toString(sym.file) + "\n>>> in " + toString(this));
1254
1255
// Handle non-COMMON defined symbol below. !sym.file allows a symbol
1256
// assignment to redefine a symbol without an error.
1257
if (!sym.file || !sym.isDefined() || secIdx == SHN_UNDEF ||
1258
secIdx == SHN_COMMON)
1259
continue;
1260
1261
if (LLVM_UNLIKELY(secIdx == SHN_XINDEX))
1262
secIdx = check(getExtendedSymbolTableIndex<ELFT>(eSym, i, shndxTable));
1263
else if (secIdx >= SHN_LORESERVE)
1264
secIdx = 0;
1265
if (LLVM_UNLIKELY(secIdx >= sections.size()))
1266
fatal(toString(this) + ": invalid section index: " + Twine(secIdx));
1267
InputSectionBase *sec = sections[secIdx];
1268
if (sec == &InputSection::discarded) {
1269
if (sym.traced) {
1270
printTraceSymbol(Undefined{this, sym.getName(), sym.binding,
1271
sym.stOther, sym.type, secIdx},
1272
sym.getName());
1273
}
1274
if (sym.file == this) {
1275
std::lock_guard<std::mutex> lock(mu);
1276
ctx.nonPrevailingSyms.emplace_back(&sym, secIdx);
1277
}
1278
continue;
1279
}
1280
1281
if (sym.file == this) {
1282
cast<Defined>(sym).section = sec;
1283
continue;
1284
}
1285
1286
if (sym.binding == STB_WEAK || binding == STB_WEAK)
1287
continue;
1288
std::lock_guard<std::mutex> lock(mu);
1289
ctx.duplicates.push_back({&sym, this, sec, eSym.st_value});
1290
}
1291
}
1292
1293
// The handling of tentative definitions (COMMON symbols) in archives is murky.
1294
// A tentative definition will be promoted to a global definition if there are
1295
// no non-tentative definitions to dominate it. When we hold a tentative
1296
// definition to a symbol and are inspecting archive members for inclusion
1297
// there are 2 ways we can proceed:
1298
//
1299
// 1) Consider the tentative definition a 'real' definition (ie promotion from
1300
// tentative to real definition has already happened) and not inspect
1301
// archive members for Global/Weak definitions to replace the tentative
1302
// definition. An archive member would only be included if it satisfies some
1303
// other undefined symbol. This is the behavior Gold uses.
1304
//
1305
// 2) Consider the tentative definition as still undefined (ie the promotion to
1306
// a real definition happens only after all symbol resolution is done).
1307
// The linker searches archive members for STB_GLOBAL definitions to
1308
// replace the tentative definition with. This is the behavior used by
1309
// GNU ld.
1310
//
1311
// The second behavior is inherited from SysVR4, which based it on the FORTRAN
1312
// COMMON BLOCK model. This behavior is needed for proper initialization in old
1313
// (pre F90) FORTRAN code that is packaged into an archive.
1314
//
1315
// The following functions search archive members for definitions to replace
1316
// tentative definitions (implementing behavior 2).
1317
static bool isBitcodeNonCommonDef(MemoryBufferRef mb, StringRef symName,
1318
StringRef archiveName) {
1319
IRSymtabFile symtabFile = check(readIRSymtab(mb));
1320
for (const irsymtab::Reader::SymbolRef &sym :
1321
symtabFile.TheReader.symbols()) {
1322
if (sym.isGlobal() && sym.getName() == symName)
1323
return !sym.isUndefined() && !sym.isWeak() && !sym.isCommon();
1324
}
1325
return false;
1326
}
1327
1328
template <class ELFT>
1329
static bool isNonCommonDef(ELFKind ekind, MemoryBufferRef mb, StringRef symName,
1330
StringRef archiveName) {
1331
ObjFile<ELFT> *obj = make<ObjFile<ELFT>>(ekind, mb, archiveName);
1332
obj->init();
1333
StringRef stringtable = obj->getStringTable();
1334
1335
for (auto sym : obj->template getGlobalELFSyms<ELFT>()) {
1336
Expected<StringRef> name = sym.getName(stringtable);
1337
if (name && name.get() == symName)
1338
return sym.isDefined() && sym.getBinding() == STB_GLOBAL &&
1339
!sym.isCommon();
1340
}
1341
return false;
1342
}
1343
1344
static bool isNonCommonDef(MemoryBufferRef mb, StringRef symName,
1345
StringRef archiveName) {
1346
switch (getELFKind(mb, archiveName)) {
1347
case ELF32LEKind:
1348
return isNonCommonDef<ELF32LE>(ELF32LEKind, mb, symName, archiveName);
1349
case ELF32BEKind:
1350
return isNonCommonDef<ELF32BE>(ELF32BEKind, mb, symName, archiveName);
1351
case ELF64LEKind:
1352
return isNonCommonDef<ELF64LE>(ELF64LEKind, mb, symName, archiveName);
1353
case ELF64BEKind:
1354
return isNonCommonDef<ELF64BE>(ELF64BEKind, mb, symName, archiveName);
1355
default:
1356
llvm_unreachable("getELFKind");
1357
}
1358
}
1359
1360
unsigned SharedFile::vernauxNum;
1361
1362
SharedFile::SharedFile(MemoryBufferRef m, StringRef defaultSoName)
1363
: ELFFileBase(SharedKind, getELFKind(m, ""), m), soName(defaultSoName),
1364
isNeeded(!config->asNeeded) {}
1365
1366
// Parse the version definitions in the object file if present, and return a
1367
// vector whose nth element contains a pointer to the Elf_Verdef for version
1368
// identifier n. Version identifiers that are not definitions map to nullptr.
1369
template <typename ELFT>
1370
static SmallVector<const void *, 0>
1371
parseVerdefs(const uint8_t *base, const typename ELFT::Shdr *sec) {
1372
if (!sec)
1373
return {};
1374
1375
// Build the Verdefs array by following the chain of Elf_Verdef objects
1376
// from the start of the .gnu.version_d section.
1377
SmallVector<const void *, 0> verdefs;
1378
const uint8_t *verdef = base + sec->sh_offset;
1379
for (unsigned i = 0, e = sec->sh_info; i != e; ++i) {
1380
auto *curVerdef = reinterpret_cast<const typename ELFT::Verdef *>(verdef);
1381
verdef += curVerdef->vd_next;
1382
unsigned verdefIndex = curVerdef->vd_ndx;
1383
if (verdefIndex >= verdefs.size())
1384
verdefs.resize(verdefIndex + 1);
1385
verdefs[verdefIndex] = curVerdef;
1386
}
1387
return verdefs;
1388
}
1389
1390
// Parse SHT_GNU_verneed to properly set the name of a versioned undefined
1391
// symbol. We detect fatal issues which would cause vulnerabilities, but do not
1392
// implement sophisticated error checking like in llvm-readobj because the value
1393
// of such diagnostics is low.
1394
template <typename ELFT>
1395
std::vector<uint32_t> SharedFile::parseVerneed(const ELFFile<ELFT> &obj,
1396
const typename ELFT::Shdr *sec) {
1397
if (!sec)
1398
return {};
1399
std::vector<uint32_t> verneeds;
1400
ArrayRef<uint8_t> data = CHECK(obj.getSectionContents(*sec), this);
1401
const uint8_t *verneedBuf = data.begin();
1402
for (unsigned i = 0; i != sec->sh_info; ++i) {
1403
if (verneedBuf + sizeof(typename ELFT::Verneed) > data.end())
1404
fatal(toString(this) + " has an invalid Verneed");
1405
auto *vn = reinterpret_cast<const typename ELFT::Verneed *>(verneedBuf);
1406
const uint8_t *vernauxBuf = verneedBuf + vn->vn_aux;
1407
for (unsigned j = 0; j != vn->vn_cnt; ++j) {
1408
if (vernauxBuf + sizeof(typename ELFT::Vernaux) > data.end())
1409
fatal(toString(this) + " has an invalid Vernaux");
1410
auto *aux = reinterpret_cast<const typename ELFT::Vernaux *>(vernauxBuf);
1411
if (aux->vna_name >= this->stringTable.size())
1412
fatal(toString(this) + " has a Vernaux with an invalid vna_name");
1413
uint16_t version = aux->vna_other & VERSYM_VERSION;
1414
if (version >= verneeds.size())
1415
verneeds.resize(version + 1);
1416
verneeds[version] = aux->vna_name;
1417
vernauxBuf += aux->vna_next;
1418
}
1419
verneedBuf += vn->vn_next;
1420
}
1421
return verneeds;
1422
}
1423
1424
// We do not usually care about alignments of data in shared object
1425
// files because the loader takes care of it. However, if we promote a
1426
// DSO symbol to point to .bss due to copy relocation, we need to keep
1427
// the original alignment requirements. We infer it in this function.
1428
template <typename ELFT>
1429
static uint64_t getAlignment(ArrayRef<typename ELFT::Shdr> sections,
1430
const typename ELFT::Sym &sym) {
1431
uint64_t ret = UINT64_MAX;
1432
if (sym.st_value)
1433
ret = 1ULL << llvm::countr_zero((uint64_t)sym.st_value);
1434
if (0 < sym.st_shndx && sym.st_shndx < sections.size())
1435
ret = std::min<uint64_t>(ret, sections[sym.st_shndx].sh_addralign);
1436
return (ret > UINT32_MAX) ? 0 : ret;
1437
}
1438
1439
// Fully parse the shared object file.
1440
//
1441
// This function parses symbol versions. If a DSO has version information,
1442
// the file has a ".gnu.version_d" section which contains symbol version
1443
// definitions. Each symbol is associated to one version through a table in
1444
// ".gnu.version" section. That table is a parallel array for the symbol
1445
// table, and each table entry contains an index in ".gnu.version_d".
1446
//
1447
// The special index 0 is reserved for VERF_NDX_LOCAL and 1 is for
1448
// VER_NDX_GLOBAL. There's no table entry for these special versions in
1449
// ".gnu.version_d".
1450
//
1451
// The file format for symbol versioning is perhaps a bit more complicated
1452
// than necessary, but you can easily understand the code if you wrap your
1453
// head around the data structure described above.
1454
template <class ELFT> void SharedFile::parse() {
1455
using Elf_Dyn = typename ELFT::Dyn;
1456
using Elf_Shdr = typename ELFT::Shdr;
1457
using Elf_Sym = typename ELFT::Sym;
1458
using Elf_Verdef = typename ELFT::Verdef;
1459
using Elf_Versym = typename ELFT::Versym;
1460
1461
ArrayRef<Elf_Dyn> dynamicTags;
1462
const ELFFile<ELFT> obj = this->getObj<ELFT>();
1463
ArrayRef<Elf_Shdr> sections = getELFShdrs<ELFT>();
1464
1465
const Elf_Shdr *versymSec = nullptr;
1466
const Elf_Shdr *verdefSec = nullptr;
1467
const Elf_Shdr *verneedSec = nullptr;
1468
1469
// Search for .dynsym, .dynamic, .symtab, .gnu.version and .gnu.version_d.
1470
for (const Elf_Shdr &sec : sections) {
1471
switch (sec.sh_type) {
1472
default:
1473
continue;
1474
case SHT_DYNAMIC:
1475
dynamicTags =
1476
CHECK(obj.template getSectionContentsAsArray<Elf_Dyn>(sec), this);
1477
break;
1478
case SHT_GNU_versym:
1479
versymSec = &sec;
1480
break;
1481
case SHT_GNU_verdef:
1482
verdefSec = &sec;
1483
break;
1484
case SHT_GNU_verneed:
1485
verneedSec = &sec;
1486
break;
1487
}
1488
}
1489
1490
if (versymSec && numELFSyms == 0) {
1491
error("SHT_GNU_versym should be associated with symbol table");
1492
return;
1493
}
1494
1495
// Search for a DT_SONAME tag to initialize this->soName.
1496
for (const Elf_Dyn &dyn : dynamicTags) {
1497
if (dyn.d_tag == DT_NEEDED) {
1498
uint64_t val = dyn.getVal();
1499
if (val >= this->stringTable.size())
1500
fatal(toString(this) + ": invalid DT_NEEDED entry");
1501
dtNeeded.push_back(this->stringTable.data() + val);
1502
} else if (dyn.d_tag == DT_SONAME) {
1503
uint64_t val = dyn.getVal();
1504
if (val >= this->stringTable.size())
1505
fatal(toString(this) + ": invalid DT_SONAME entry");
1506
soName = this->stringTable.data() + val;
1507
}
1508
}
1509
1510
// DSOs are uniquified not by filename but by soname.
1511
DenseMap<CachedHashStringRef, SharedFile *>::iterator it;
1512
bool wasInserted;
1513
std::tie(it, wasInserted) =
1514
symtab.soNames.try_emplace(CachedHashStringRef(soName), this);
1515
1516
// If a DSO appears more than once on the command line with and without
1517
// --as-needed, --no-as-needed takes precedence over --as-needed because a
1518
// user can add an extra DSO with --no-as-needed to force it to be added to
1519
// the dependency list.
1520
it->second->isNeeded |= isNeeded;
1521
if (!wasInserted)
1522
return;
1523
1524
ctx.sharedFiles.push_back(this);
1525
1526
verdefs = parseVerdefs<ELFT>(obj.base(), verdefSec);
1527
std::vector<uint32_t> verneeds = parseVerneed<ELFT>(obj, verneedSec);
1528
1529
// Parse ".gnu.version" section which is a parallel array for the symbol
1530
// table. If a given file doesn't have a ".gnu.version" section, we use
1531
// VER_NDX_GLOBAL.
1532
size_t size = numELFSyms - firstGlobal;
1533
std::vector<uint16_t> versyms(size, VER_NDX_GLOBAL);
1534
if (versymSec) {
1535
ArrayRef<Elf_Versym> versym =
1536
CHECK(obj.template getSectionContentsAsArray<Elf_Versym>(*versymSec),
1537
this)
1538
.slice(firstGlobal);
1539
for (size_t i = 0; i < size; ++i)
1540
versyms[i] = versym[i].vs_index;
1541
}
1542
1543
// System libraries can have a lot of symbols with versions. Using a
1544
// fixed buffer for computing the versions name (foo@ver) can save a
1545
// lot of allocations.
1546
SmallString<0> versionedNameBuffer;
1547
1548
// Add symbols to the symbol table.
1549
ArrayRef<Elf_Sym> syms = this->getGlobalELFSyms<ELFT>();
1550
for (size_t i = 0, e = syms.size(); i != e; ++i) {
1551
const Elf_Sym &sym = syms[i];
1552
1553
// ELF spec requires that all local symbols precede weak or global
1554
// symbols in each symbol table, and the index of first non-local symbol
1555
// is stored to sh_info. If a local symbol appears after some non-local
1556
// symbol, that's a violation of the spec.
1557
StringRef name = CHECK(sym.getName(stringTable), this);
1558
if (sym.getBinding() == STB_LOCAL) {
1559
errorOrWarn(toString(this) + ": invalid local symbol '" + name +
1560
"' in global part of symbol table");
1561
continue;
1562
}
1563
1564
const uint16_t ver = versyms[i], idx = ver & ~VERSYM_HIDDEN;
1565
if (sym.isUndefined()) {
1566
// For unversioned undefined symbols, VER_NDX_GLOBAL makes more sense but
1567
// as of binutils 2.34, GNU ld produces VER_NDX_LOCAL.
1568
if (ver != VER_NDX_LOCAL && ver != VER_NDX_GLOBAL) {
1569
if (idx >= verneeds.size()) {
1570
error("corrupt input file: version need index " + Twine(idx) +
1571
" for symbol " + name + " is out of bounds\n>>> defined in " +
1572
toString(this));
1573
continue;
1574
}
1575
StringRef verName = stringTable.data() + verneeds[idx];
1576
versionedNameBuffer.clear();
1577
name = saver().save(
1578
(name + "@" + verName).toStringRef(versionedNameBuffer));
1579
}
1580
Symbol *s = symtab.addSymbol(
1581
Undefined{this, name, sym.getBinding(), sym.st_other, sym.getType()});
1582
s->exportDynamic = true;
1583
if (sym.getBinding() != STB_WEAK &&
1584
config->unresolvedSymbolsInShlib != UnresolvedPolicy::Ignore)
1585
requiredSymbols.push_back(s);
1586
continue;
1587
}
1588
1589
if (ver == VER_NDX_LOCAL ||
1590
(ver != VER_NDX_GLOBAL && idx >= verdefs.size())) {
1591
// In GNU ld < 2.31 (before 3be08ea4728b56d35e136af4e6fd3086ade17764), the
1592
// MIPS port puts _gp_disp symbol into DSO files and incorrectly assigns
1593
// VER_NDX_LOCAL. Workaround this bug.
1594
if (config->emachine == EM_MIPS && name == "_gp_disp")
1595
continue;
1596
error("corrupt input file: version definition index " + Twine(idx) +
1597
" for symbol " + name + " is out of bounds\n>>> defined in " +
1598
toString(this));
1599
continue;
1600
}
1601
1602
uint32_t alignment = getAlignment<ELFT>(sections, sym);
1603
if (ver == idx) {
1604
auto *s = symtab.addSymbol(
1605
SharedSymbol{*this, name, sym.getBinding(), sym.st_other,
1606
sym.getType(), sym.st_value, sym.st_size, alignment});
1607
s->dsoDefined = true;
1608
if (s->file == this)
1609
s->versionId = ver;
1610
}
1611
1612
// Also add the symbol with the versioned name to handle undefined symbols
1613
// with explicit versions.
1614
if (ver == VER_NDX_GLOBAL)
1615
continue;
1616
1617
StringRef verName =
1618
stringTable.data() +
1619
reinterpret_cast<const Elf_Verdef *>(verdefs[idx])->getAux()->vda_name;
1620
versionedNameBuffer.clear();
1621
name = (name + "@" + verName).toStringRef(versionedNameBuffer);
1622
auto *s = symtab.addSymbol(
1623
SharedSymbol{*this, saver().save(name), sym.getBinding(), sym.st_other,
1624
sym.getType(), sym.st_value, sym.st_size, alignment});
1625
s->dsoDefined = true;
1626
if (s->file == this)
1627
s->versionId = idx;
1628
}
1629
}
1630
1631
static ELFKind getBitcodeELFKind(const Triple &t) {
1632
if (t.isLittleEndian())
1633
return t.isArch64Bit() ? ELF64LEKind : ELF32LEKind;
1634
return t.isArch64Bit() ? ELF64BEKind : ELF32BEKind;
1635
}
1636
1637
static uint16_t getBitcodeMachineKind(StringRef path, const Triple &t) {
1638
switch (t.getArch()) {
1639
case Triple::aarch64:
1640
case Triple::aarch64_be:
1641
return EM_AARCH64;
1642
case Triple::amdgcn:
1643
case Triple::r600:
1644
return EM_AMDGPU;
1645
case Triple::arm:
1646
case Triple::armeb:
1647
case Triple::thumb:
1648
case Triple::thumbeb:
1649
return EM_ARM;
1650
case Triple::avr:
1651
return EM_AVR;
1652
case Triple::hexagon:
1653
return EM_HEXAGON;
1654
case Triple::loongarch32:
1655
case Triple::loongarch64:
1656
return EM_LOONGARCH;
1657
case Triple::mips:
1658
case Triple::mipsel:
1659
case Triple::mips64:
1660
case Triple::mips64el:
1661
return EM_MIPS;
1662
case Triple::msp430:
1663
return EM_MSP430;
1664
case Triple::ppc:
1665
case Triple::ppcle:
1666
return EM_PPC;
1667
case Triple::ppc64:
1668
case Triple::ppc64le:
1669
return EM_PPC64;
1670
case Triple::riscv32:
1671
case Triple::riscv64:
1672
return EM_RISCV;
1673
case Triple::sparcv9:
1674
return EM_SPARCV9;
1675
case Triple::systemz:
1676
return EM_S390;
1677
case Triple::x86:
1678
return t.isOSIAMCU() ? EM_IAMCU : EM_386;
1679
case Triple::x86_64:
1680
return EM_X86_64;
1681
default:
1682
error(path + ": could not infer e_machine from bitcode target triple " +
1683
t.str());
1684
return EM_NONE;
1685
}
1686
}
1687
1688
static uint8_t getOsAbi(const Triple &t) {
1689
switch (t.getOS()) {
1690
case Triple::AMDHSA:
1691
return ELF::ELFOSABI_AMDGPU_HSA;
1692
case Triple::AMDPAL:
1693
return ELF::ELFOSABI_AMDGPU_PAL;
1694
case Triple::Mesa3D:
1695
return ELF::ELFOSABI_AMDGPU_MESA3D;
1696
default:
1697
return ELF::ELFOSABI_NONE;
1698
}
1699
}
1700
1701
BitcodeFile::BitcodeFile(MemoryBufferRef mb, StringRef archiveName,
1702
uint64_t offsetInArchive, bool lazy)
1703
: InputFile(BitcodeKind, mb) {
1704
this->archiveName = archiveName;
1705
this->lazy = lazy;
1706
1707
std::string path = mb.getBufferIdentifier().str();
1708
if (config->thinLTOIndexOnly)
1709
path = replaceThinLTOSuffix(mb.getBufferIdentifier());
1710
1711
// ThinLTO assumes that all MemoryBufferRefs given to it have a unique
1712
// name. If two archives define two members with the same name, this
1713
// causes a collision which result in only one of the objects being taken
1714
// into consideration at LTO time (which very likely causes undefined
1715
// symbols later in the link stage). So we append file offset to make
1716
// filename unique.
1717
StringRef name = archiveName.empty()
1718
? saver().save(path)
1719
: saver().save(archiveName + "(" + path::filename(path) +
1720
" at " + utostr(offsetInArchive) + ")");
1721
MemoryBufferRef mbref(mb.getBuffer(), name);
1722
1723
obj = CHECK(lto::InputFile::create(mbref), this);
1724
1725
Triple t(obj->getTargetTriple());
1726
ekind = getBitcodeELFKind(t);
1727
emachine = getBitcodeMachineKind(mb.getBufferIdentifier(), t);
1728
osabi = getOsAbi(t);
1729
}
1730
1731
static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
1732
switch (gvVisibility) {
1733
case GlobalValue::DefaultVisibility:
1734
return STV_DEFAULT;
1735
case GlobalValue::HiddenVisibility:
1736
return STV_HIDDEN;
1737
case GlobalValue::ProtectedVisibility:
1738
return STV_PROTECTED;
1739
}
1740
llvm_unreachable("unknown visibility");
1741
}
1742
1743
static void
1744
createBitcodeSymbol(Symbol *&sym, const std::vector<bool> &keptComdats,
1745
const lto::InputFile::Symbol &objSym, BitcodeFile &f) {
1746
uint8_t binding = objSym.isWeak() ? STB_WEAK : STB_GLOBAL;
1747
uint8_t type = objSym.isTLS() ? STT_TLS : STT_NOTYPE;
1748
uint8_t visibility = mapVisibility(objSym.getVisibility());
1749
1750
if (!sym)
1751
sym = symtab.insert(saver().save(objSym.getName()));
1752
1753
int c = objSym.getComdatIndex();
1754
if (objSym.isUndefined() || (c != -1 && !keptComdats[c])) {
1755
Undefined newSym(&f, StringRef(), binding, visibility, type);
1756
sym->resolve(newSym);
1757
sym->referenced = true;
1758
return;
1759
}
1760
1761
if (objSym.isCommon()) {
1762
sym->resolve(CommonSymbol{&f, StringRef(), binding, visibility, STT_OBJECT,
1763
objSym.getCommonAlignment(),
1764
objSym.getCommonSize()});
1765
} else {
1766
Defined newSym(&f, StringRef(), binding, visibility, type, 0, 0, nullptr);
1767
if (objSym.canBeOmittedFromSymbolTable())
1768
newSym.exportDynamic = false;
1769
sym->resolve(newSym);
1770
}
1771
}
1772
1773
void BitcodeFile::parse() {
1774
for (std::pair<StringRef, Comdat::SelectionKind> s : obj->getComdatTable()) {
1775
keptComdats.push_back(
1776
s.second == Comdat::NoDeduplicate ||
1777
symtab.comdatGroups.try_emplace(CachedHashStringRef(s.first), this)
1778
.second);
1779
}
1780
1781
if (numSymbols == 0) {
1782
numSymbols = obj->symbols().size();
1783
symbols = std::make_unique<Symbol *[]>(numSymbols);
1784
}
1785
// Process defined symbols first. See the comment in
1786
// ObjFile<ELFT>::initializeSymbols.
1787
for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
1788
if (!irSym.isUndefined())
1789
createBitcodeSymbol(symbols[i], keptComdats, irSym, *this);
1790
for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
1791
if (irSym.isUndefined())
1792
createBitcodeSymbol(symbols[i], keptComdats, irSym, *this);
1793
1794
for (auto l : obj->getDependentLibraries())
1795
addDependentLibrary(l, this);
1796
}
1797
1798
void BitcodeFile::parseLazy() {
1799
numSymbols = obj->symbols().size();
1800
symbols = std::make_unique<Symbol *[]>(numSymbols);
1801
for (auto [i, irSym] : llvm::enumerate(obj->symbols()))
1802
if (!irSym.isUndefined()) {
1803
auto *sym = symtab.insert(saver().save(irSym.getName()));
1804
sym->resolve(LazySymbol{*this});
1805
symbols[i] = sym;
1806
}
1807
}
1808
1809
void BitcodeFile::postParse() {
1810
for (auto [i, irSym] : llvm::enumerate(obj->symbols())) {
1811
const Symbol &sym = *symbols[i];
1812
if (sym.file == this || !sym.isDefined() || irSym.isUndefined() ||
1813
irSym.isCommon() || irSym.isWeak())
1814
continue;
1815
int c = irSym.getComdatIndex();
1816
if (c != -1 && !keptComdats[c])
1817
continue;
1818
reportDuplicate(sym, this, nullptr, 0);
1819
}
1820
}
1821
1822
void BinaryFile::parse() {
1823
ArrayRef<uint8_t> data = arrayRefFromStringRef(mb.getBuffer());
1824
auto *section = make<InputSection>(this, SHF_ALLOC | SHF_WRITE, SHT_PROGBITS,
1825
8, data, ".data");
1826
sections.push_back(section);
1827
1828
// For each input file foo that is embedded to a result as a binary
1829
// blob, we define _binary_foo_{start,end,size} symbols, so that
1830
// user programs can access blobs by name. Non-alphanumeric
1831
// characters in a filename are replaced with underscore.
1832
std::string s = "_binary_" + mb.getBufferIdentifier().str();
1833
for (char &c : s)
1834
if (!isAlnum(c))
1835
c = '_';
1836
1837
llvm::StringSaver &saver = lld::saver();
1838
1839
symtab.addAndCheckDuplicate(Defined{this, saver.save(s + "_start"),
1840
STB_GLOBAL, STV_DEFAULT, STT_OBJECT, 0, 0,
1841
section});
1842
symtab.addAndCheckDuplicate(Defined{this, saver.save(s + "_end"), STB_GLOBAL,
1843
STV_DEFAULT, STT_OBJECT, data.size(), 0,
1844
section});
1845
symtab.addAndCheckDuplicate(Defined{this, saver.save(s + "_size"), STB_GLOBAL,
1846
STV_DEFAULT, STT_OBJECT, data.size(), 0,
1847
nullptr});
1848
}
1849
1850
InputFile *elf::createInternalFile(StringRef name) {
1851
auto *file =
1852
make<InputFile>(InputFile::InternalKind, MemoryBufferRef("", name));
1853
// References from an internal file do not lead to --warn-backrefs
1854
// diagnostics.
1855
file->groupId = 0;
1856
return file;
1857
}
1858
1859
ELFFileBase *elf::createObjFile(MemoryBufferRef mb, StringRef archiveName,
1860
bool lazy) {
1861
ELFFileBase *f;
1862
switch (getELFKind(mb, archiveName)) {
1863
case ELF32LEKind:
1864
f = make<ObjFile<ELF32LE>>(ELF32LEKind, mb, archiveName);
1865
break;
1866
case ELF32BEKind:
1867
f = make<ObjFile<ELF32BE>>(ELF32BEKind, mb, archiveName);
1868
break;
1869
case ELF64LEKind:
1870
f = make<ObjFile<ELF64LE>>(ELF64LEKind, mb, archiveName);
1871
break;
1872
case ELF64BEKind:
1873
f = make<ObjFile<ELF64BE>>(ELF64BEKind, mb, archiveName);
1874
break;
1875
default:
1876
llvm_unreachable("getELFKind");
1877
}
1878
f->init();
1879
f->lazy = lazy;
1880
return f;
1881
}
1882
1883
template <class ELFT> void ObjFile<ELFT>::parseLazy() {
1884
const ArrayRef<typename ELFT::Sym> eSyms = this->getELFSyms<ELFT>();
1885
numSymbols = eSyms.size();
1886
symbols = std::make_unique<Symbol *[]>(numSymbols);
1887
1888
// resolve() may trigger this->extract() if an existing symbol is an undefined
1889
// symbol. If that happens, this function has served its purpose, and we can
1890
// exit from the loop early.
1891
for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1892
if (eSyms[i].st_shndx == SHN_UNDEF)
1893
continue;
1894
symbols[i] = symtab.insert(CHECK(eSyms[i].getName(stringTable), this));
1895
symbols[i]->resolve(LazySymbol{*this});
1896
if (!lazy)
1897
break;
1898
}
1899
}
1900
1901
bool InputFile::shouldExtractForCommon(StringRef name) const {
1902
if (isa<BitcodeFile>(this))
1903
return isBitcodeNonCommonDef(mb, name, archiveName);
1904
1905
return isNonCommonDef(mb, name, archiveName);
1906
}
1907
1908
std::string elf::replaceThinLTOSuffix(StringRef path) {
1909
auto [suffix, repl] = config->thinLTOObjectSuffixReplace;
1910
if (path.consume_back(suffix))
1911
return (path + repl).str();
1912
return std::string(path);
1913
}
1914
1915
template class elf::ObjFile<ELF32LE>;
1916
template class elf::ObjFile<ELF32BE>;
1917
template class elf::ObjFile<ELF64LE>;
1918
template class elf::ObjFile<ELF64BE>;
1919
1920
template void SharedFile::parse<ELF32LE>();
1921
template void SharedFile::parse<ELF32BE>();
1922
template void SharedFile::parse<ELF64LE>();
1923
template void SharedFile::parse<ELF64BE>();
1924
1925