Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/lld/ELF/Symbols.h
34879 views
1
//===- Symbols.h ------------------------------------------------*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines various types of Symbols.
10
//
11
//===----------------------------------------------------------------------===//
12
13
#ifndef LLD_ELF_SYMBOLS_H
14
#define LLD_ELF_SYMBOLS_H
15
16
#include "Config.h"
17
#include "lld/Common/LLVM.h"
18
#include "lld/Common/Memory.h"
19
#include "llvm/ADT/DenseMap.h"
20
#include "llvm/Object/ELF.h"
21
#include "llvm/Support/Compiler.h"
22
#include <tuple>
23
24
namespace lld {
25
namespace elf {
26
class Symbol;
27
}
28
// Returns a string representation for a symbol for diagnostics.
29
std::string toString(const elf::Symbol &);
30
31
namespace elf {
32
class CommonSymbol;
33
class Defined;
34
class OutputSection;
35
class SectionBase;
36
class InputSectionBase;
37
class SharedSymbol;
38
class Symbol;
39
class Undefined;
40
class LazySymbol;
41
class InputFile;
42
43
void printTraceSymbol(const Symbol &sym, StringRef name);
44
45
enum {
46
NEEDS_GOT = 1 << 0,
47
NEEDS_PLT = 1 << 1,
48
HAS_DIRECT_RELOC = 1 << 2,
49
// True if this symbol needs a canonical PLT entry, or (during
50
// postScanRelocations) a copy relocation.
51
NEEDS_COPY = 1 << 3,
52
NEEDS_TLSDESC = 1 << 4,
53
NEEDS_TLSGD = 1 << 5,
54
NEEDS_TLSGD_TO_IE = 1 << 6,
55
NEEDS_GOT_DTPREL = 1 << 7,
56
NEEDS_TLSIE = 1 << 8,
57
};
58
59
// Some index properties of a symbol are stored separately in this auxiliary
60
// struct to decrease sizeof(SymbolUnion) in the majority of cases.
61
struct SymbolAux {
62
uint32_t gotIdx = -1;
63
uint32_t pltIdx = -1;
64
uint32_t tlsDescIdx = -1;
65
uint32_t tlsGdIdx = -1;
66
};
67
68
LLVM_LIBRARY_VISIBILITY extern SmallVector<SymbolAux, 0> symAux;
69
70
// The base class for real symbol classes.
71
class Symbol {
72
public:
73
enum Kind {
74
PlaceholderKind,
75
DefinedKind,
76
CommonKind,
77
SharedKind,
78
UndefinedKind,
79
LazyKind,
80
};
81
82
Kind kind() const { return static_cast<Kind>(symbolKind); }
83
84
// The file from which this symbol was created.
85
InputFile *file;
86
87
// The default copy constructor is deleted due to atomic flags. Define one for
88
// places where no atomic is needed.
89
Symbol(const Symbol &o) { memcpy(this, &o, sizeof(o)); }
90
91
protected:
92
const char *nameData;
93
// 32-bit size saves space.
94
uint32_t nameSize;
95
96
public:
97
// The next three fields have the same meaning as the ELF symbol attributes.
98
// type and binding are placed in this order to optimize generating st_info,
99
// which is defined as (binding << 4) + (type & 0xf), on a little-endian
100
// system.
101
uint8_t type : 4; // symbol type
102
103
// Symbol binding. This is not overwritten by replace() to track
104
// changes during resolution. In particular:
105
// - An undefined weak is still weak when it resolves to a shared library.
106
// - An undefined weak will not extract archive members, but we have to
107
// remember it is weak.
108
uint8_t binding : 4;
109
110
uint8_t stOther; // st_other field value
111
112
uint8_t symbolKind;
113
114
// The partition whose dynamic symbol table contains this symbol's definition.
115
uint8_t partition;
116
117
// True if this symbol is preemptible at load time.
118
LLVM_PREFERRED_TYPE(bool)
119
uint8_t isPreemptible : 1;
120
121
// True if the symbol was used for linking and thus need to be added to the
122
// output file's symbol table. This is true for all symbols except for
123
// unreferenced DSO symbols, lazy (archive) symbols, and bitcode symbols that
124
// are unreferenced except by other bitcode objects.
125
LLVM_PREFERRED_TYPE(bool)
126
uint8_t isUsedInRegularObj : 1;
127
128
// True if an undefined or shared symbol is used from a live section.
129
//
130
// NOTE: In Writer.cpp the field is used to mark local defined symbols
131
// which are referenced by relocations when -r or --emit-relocs is given.
132
LLVM_PREFERRED_TYPE(bool)
133
uint8_t used : 1;
134
135
// Used by a Defined symbol with protected or default visibility, to record
136
// whether it is required to be exported into .dynsym. This is set when any of
137
// the following conditions hold:
138
//
139
// - If there is an interposable symbol from a DSO. Note: We also do this for
140
// STV_PROTECTED symbols which can't be interposed (to match BFD behavior).
141
// - If -shared or --export-dynamic is specified, any symbol in an object
142
// file/bitcode sets this property, unless suppressed by LTO
143
// canBeOmittedFromSymbolTable().
144
LLVM_PREFERRED_TYPE(bool)
145
uint8_t exportDynamic : 1;
146
147
// True if the symbol is in the --dynamic-list file. A Defined symbol with
148
// protected or default visibility with this property is required to be
149
// exported into .dynsym.
150
LLVM_PREFERRED_TYPE(bool)
151
uint8_t inDynamicList : 1;
152
153
// Used to track if there has been at least one undefined reference to the
154
// symbol. For Undefined and SharedSymbol, the binding may change to STB_WEAK
155
// if the first undefined reference from a non-shared object is weak.
156
LLVM_PREFERRED_TYPE(bool)
157
uint8_t referenced : 1;
158
159
// Used to track if this symbol will be referenced after wrapping is performed
160
// (i.e. this will be true for foo if __real_foo is referenced, and will be
161
// true for __wrap_foo if foo is referenced).
162
LLVM_PREFERRED_TYPE(bool)
163
uint8_t referencedAfterWrap : 1;
164
165
// True if this symbol is specified by --trace-symbol option.
166
LLVM_PREFERRED_TYPE(bool)
167
uint8_t traced : 1;
168
169
// True if the name contains '@'.
170
LLVM_PREFERRED_TYPE(bool)
171
uint8_t hasVersionSuffix : 1;
172
173
// Symbol visibility. This is the computed minimum visibility of all
174
// observed non-DSO symbols.
175
uint8_t visibility() const { return stOther & 3; }
176
void setVisibility(uint8_t visibility) {
177
stOther = (stOther & ~3) | visibility;
178
}
179
180
bool includeInDynsym() const;
181
uint8_t computeBinding() const;
182
bool isGlobal() const { return binding == llvm::ELF::STB_GLOBAL; }
183
bool isWeak() const { return binding == llvm::ELF::STB_WEAK; }
184
185
bool isUndefined() const { return symbolKind == UndefinedKind; }
186
bool isCommon() const { return symbolKind == CommonKind; }
187
bool isDefined() const { return symbolKind == DefinedKind; }
188
bool isShared() const { return symbolKind == SharedKind; }
189
bool isPlaceholder() const { return symbolKind == PlaceholderKind; }
190
191
bool isLocal() const { return binding == llvm::ELF::STB_LOCAL; }
192
193
bool isLazy() const { return symbolKind == LazyKind; }
194
195
// True if this is an undefined weak symbol. This only works once
196
// all input files have been added.
197
bool isUndefWeak() const { return isWeak() && isUndefined(); }
198
199
StringRef getName() const { return {nameData, nameSize}; }
200
201
void setName(StringRef s) {
202
nameData = s.data();
203
nameSize = s.size();
204
}
205
206
void parseSymbolVersion();
207
208
// Get the NUL-terminated version suffix ("", "@...", or "@@...").
209
//
210
// For @@, the name has been truncated by insert(). For @, the name has been
211
// truncated by Symbol::parseSymbolVersion().
212
const char *getVersionSuffix() const { return nameData + nameSize; }
213
214
uint32_t getGotIdx() const { return symAux[auxIdx].gotIdx; }
215
uint32_t getPltIdx() const { return symAux[auxIdx].pltIdx; }
216
uint32_t getTlsDescIdx() const { return symAux[auxIdx].tlsDescIdx; }
217
uint32_t getTlsGdIdx() const { return symAux[auxIdx].tlsGdIdx; }
218
219
bool isInGot() const { return getGotIdx() != uint32_t(-1); }
220
bool isInPlt() const { return getPltIdx() != uint32_t(-1); }
221
222
uint64_t getVA(int64_t addend = 0) const;
223
224
uint64_t getGotOffset() const;
225
uint64_t getGotVA() const;
226
uint64_t getGotPltOffset() const;
227
uint64_t getGotPltVA() const;
228
uint64_t getPltVA() const;
229
uint64_t getSize() const;
230
OutputSection *getOutputSection() const;
231
232
// The following two functions are used for symbol resolution.
233
//
234
// You are expected to call mergeProperties for all symbols in input
235
// files so that attributes that are attached to names rather than
236
// indivisual symbol (such as visibility) are merged together.
237
//
238
// Every time you read a new symbol from an input, you are supposed
239
// to call resolve() with the new symbol. That function replaces
240
// "this" object as a result of name resolution if the new symbol is
241
// more appropriate to be included in the output.
242
//
243
// For example, if "this" is an undefined symbol and a new symbol is
244
// a defined symbol, "this" is replaced with the new symbol.
245
void mergeProperties(const Symbol &other);
246
void resolve(const Undefined &other);
247
void resolve(const CommonSymbol &other);
248
void resolve(const Defined &other);
249
void resolve(const LazySymbol &other);
250
void resolve(const SharedSymbol &other);
251
252
// If this is a lazy symbol, extract an input file and add the symbol
253
// in the file to the symbol table. Calling this function on
254
// non-lazy object causes a runtime error.
255
void extract() const;
256
257
void checkDuplicate(const Defined &other) const;
258
259
private:
260
bool shouldReplace(const Defined &other) const;
261
262
protected:
263
Symbol(Kind k, InputFile *file, StringRef name, uint8_t binding,
264
uint8_t stOther, uint8_t type)
265
: file(file), nameData(name.data()), nameSize(name.size()), type(type),
266
binding(binding), stOther(stOther), symbolKind(k), exportDynamic(false),
267
archSpecificBit(false) {}
268
269
void overwrite(Symbol &sym, Kind k) const {
270
if (sym.traced)
271
printTraceSymbol(*this, sym.getName());
272
sym.file = file;
273
sym.type = type;
274
sym.binding = binding;
275
sym.stOther = (stOther & ~3) | sym.visibility();
276
sym.symbolKind = k;
277
}
278
279
public:
280
// True if this symbol is in the Iplt sub-section of the Plt and the Igot
281
// sub-section of the .got.plt or .got.
282
LLVM_PREFERRED_TYPE(bool)
283
uint8_t isInIplt : 1;
284
285
// True if this symbol needs a GOT entry and its GOT entry is actually in
286
// Igot. This will be true only for certain non-preemptible ifuncs.
287
LLVM_PREFERRED_TYPE(bool)
288
uint8_t gotInIgot : 1;
289
290
// True if defined relative to a section discarded by ICF.
291
LLVM_PREFERRED_TYPE(bool)
292
uint8_t folded : 1;
293
294
// Allow reuse of a bit between architecture-exclusive symbol flags.
295
// - needsTocRestore(): On PPC64, true if a call to this symbol needs to be
296
// followed by a restore of the toc pointer.
297
// - isTagged(): On AArch64, true if the symbol needs special relocation and
298
// metadata semantics because it's tagged, under the AArch64 MemtagABI.
299
LLVM_PREFERRED_TYPE(bool)
300
uint8_t archSpecificBit : 1;
301
bool needsTocRestore() const { return archSpecificBit; }
302
bool isTagged() const { return archSpecificBit; }
303
void setNeedsTocRestore(bool v) { archSpecificBit = v; }
304
void setIsTagged(bool v) {
305
archSpecificBit = v;
306
}
307
308
// True if this symbol is defined by a symbol assignment or wrapped by --wrap.
309
//
310
// LTO shouldn't inline the symbol because it doesn't know the final content
311
// of the symbol.
312
LLVM_PREFERRED_TYPE(bool)
313
uint8_t scriptDefined : 1;
314
315
// True if defined in a DSO. There may also be a definition in a relocatable
316
// object file.
317
LLVM_PREFERRED_TYPE(bool)
318
uint8_t dsoDefined : 1;
319
320
// True if defined in a DSO as protected visibility.
321
LLVM_PREFERRED_TYPE(bool)
322
uint8_t dsoProtected : 1;
323
324
// Temporary flags used to communicate which symbol entries need PLT and GOT
325
// entries during postScanRelocations();
326
std::atomic<uint16_t> flags;
327
328
// A symAux index used to access GOT/PLT entry indexes. This is allocated in
329
// postScanRelocations().
330
uint32_t auxIdx;
331
uint32_t dynsymIndex;
332
333
// If `file` is SharedFile (for SharedSymbol or copy-relocated Defined), this
334
// represents the Verdef index within the input DSO, which will be converted
335
// to a Verneed index in the output. Otherwise, this represents the Verdef
336
// index (VER_NDX_LOCAL, VER_NDX_GLOBAL, or a named version).
337
uint16_t versionId;
338
LLVM_PREFERRED_TYPE(bool)
339
uint8_t versionScriptAssigned : 1;
340
341
// True if targeted by a range extension thunk.
342
LLVM_PREFERRED_TYPE(bool)
343
uint8_t thunkAccessed : 1;
344
345
void setFlags(uint16_t bits) {
346
flags.fetch_or(bits, std::memory_order_relaxed);
347
}
348
bool hasFlag(uint16_t bit) const {
349
assert(bit && (bit & (bit - 1)) == 0 && "bit must be a power of 2");
350
return flags.load(std::memory_order_relaxed) & bit;
351
}
352
353
bool needsDynReloc() const {
354
return flags.load(std::memory_order_relaxed) &
355
(NEEDS_COPY | NEEDS_GOT | NEEDS_PLT | NEEDS_TLSDESC | NEEDS_TLSGD |
356
NEEDS_TLSGD_TO_IE | NEEDS_GOT_DTPREL | NEEDS_TLSIE);
357
}
358
void allocateAux() {
359
assert(auxIdx == 0);
360
auxIdx = symAux.size();
361
symAux.emplace_back();
362
}
363
364
bool isSection() const { return type == llvm::ELF::STT_SECTION; }
365
bool isTls() const { return type == llvm::ELF::STT_TLS; }
366
bool isFunc() const { return type == llvm::ELF::STT_FUNC; }
367
bool isGnuIFunc() const { return type == llvm::ELF::STT_GNU_IFUNC; }
368
bool isObject() const { return type == llvm::ELF::STT_OBJECT; }
369
bool isFile() const { return type == llvm::ELF::STT_FILE; }
370
};
371
372
// Represents a symbol that is defined in the current output file.
373
class Defined : public Symbol {
374
public:
375
Defined(InputFile *file, StringRef name, uint8_t binding, uint8_t stOther,
376
uint8_t type, uint64_t value, uint64_t size, SectionBase *section)
377
: Symbol(DefinedKind, file, name, binding, stOther, type), value(value),
378
size(size), section(section) {
379
exportDynamic = config->exportDynamic;
380
}
381
void overwrite(Symbol &sym) const;
382
383
static bool classof(const Symbol *s) { return s->isDefined(); }
384
385
uint64_t value;
386
uint64_t size;
387
SectionBase *section;
388
};
389
390
// Represents a common symbol.
391
//
392
// On Unix, it is traditionally allowed to write variable definitions
393
// without initialization expressions (such as "int foo;") to header
394
// files. Such definition is called "tentative definition".
395
//
396
// Using tentative definition is usually considered a bad practice
397
// because you should write only declarations (such as "extern int
398
// foo;") to header files. Nevertheless, the linker and the compiler
399
// have to do something to support bad code by allowing duplicate
400
// definitions for this particular case.
401
//
402
// Common symbols represent variable definitions without initializations.
403
// The compiler creates common symbols when it sees variable definitions
404
// without initialization (you can suppress this behavior and let the
405
// compiler create a regular defined symbol by -fno-common).
406
//
407
// The linker allows common symbols to be replaced by regular defined
408
// symbols. If there are remaining common symbols after name resolution is
409
// complete, they are converted to regular defined symbols in a .bss
410
// section. (Therefore, the later passes don't see any CommonSymbols.)
411
class CommonSymbol : public Symbol {
412
public:
413
CommonSymbol(InputFile *file, StringRef name, uint8_t binding,
414
uint8_t stOther, uint8_t type, uint64_t alignment, uint64_t size)
415
: Symbol(CommonKind, file, name, binding, stOther, type),
416
alignment(alignment), size(size) {
417
exportDynamic = config->exportDynamic;
418
}
419
void overwrite(Symbol &sym) const {
420
Symbol::overwrite(sym, CommonKind);
421
auto &s = static_cast<CommonSymbol &>(sym);
422
s.alignment = alignment;
423
s.size = size;
424
}
425
426
static bool classof(const Symbol *s) { return s->isCommon(); }
427
428
uint32_t alignment;
429
uint64_t size;
430
};
431
432
class Undefined : public Symbol {
433
public:
434
Undefined(InputFile *file, StringRef name, uint8_t binding, uint8_t stOther,
435
uint8_t type, uint32_t discardedSecIdx = 0)
436
: Symbol(UndefinedKind, file, name, binding, stOther, type),
437
discardedSecIdx(discardedSecIdx) {}
438
void overwrite(Symbol &sym) const {
439
Symbol::overwrite(sym, UndefinedKind);
440
auto &s = static_cast<Undefined &>(sym);
441
s.discardedSecIdx = discardedSecIdx;
442
s.nonPrevailing = nonPrevailing;
443
}
444
445
static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
446
447
// The section index if in a discarded section, 0 otherwise.
448
uint32_t discardedSecIdx;
449
bool nonPrevailing = false;
450
};
451
452
class SharedSymbol : public Symbol {
453
public:
454
static bool classof(const Symbol *s) { return s->kind() == SharedKind; }
455
456
SharedSymbol(InputFile &file, StringRef name, uint8_t binding,
457
uint8_t stOther, uint8_t type, uint64_t value, uint64_t size,
458
uint32_t alignment)
459
: Symbol(SharedKind, &file, name, binding, stOther, type), value(value),
460
size(size), alignment(alignment) {
461
exportDynamic = true;
462
dsoProtected = visibility() == llvm::ELF::STV_PROTECTED;
463
// GNU ifunc is a mechanism to allow user-supplied functions to
464
// resolve PLT slot values at load-time. This is contrary to the
465
// regular symbol resolution scheme in which symbols are resolved just
466
// by name. Using this hook, you can program how symbols are solved
467
// for you program. For example, you can make "memcpy" to be resolved
468
// to a SSE-enabled version of memcpy only when a machine running the
469
// program supports the SSE instruction set.
470
//
471
// Naturally, such symbols should always be called through their PLT
472
// slots. What GNU ifunc symbols point to are resolver functions, and
473
// calling them directly doesn't make sense (unless you are writing a
474
// loader).
475
//
476
// For DSO symbols, we always call them through PLT slots anyway.
477
// So there's no difference between GNU ifunc and regular function
478
// symbols if they are in DSOs. So we can handle GNU_IFUNC as FUNC.
479
if (this->type == llvm::ELF::STT_GNU_IFUNC)
480
this->type = llvm::ELF::STT_FUNC;
481
}
482
void overwrite(Symbol &sym) const {
483
Symbol::overwrite(sym, SharedKind);
484
auto &s = static_cast<SharedSymbol &>(sym);
485
s.dsoProtected = dsoProtected;
486
s.value = value;
487
s.size = size;
488
s.alignment = alignment;
489
}
490
491
uint64_t value; // st_value
492
uint64_t size; // st_size
493
uint32_t alignment;
494
};
495
496
// LazySymbol symbols represent symbols in object files between --start-lib and
497
// --end-lib options. LLD also handles traditional archives as if all the files
498
// in the archive are surrounded by --start-lib and --end-lib.
499
//
500
// A special complication is the handling of weak undefined symbols. They should
501
// not load a file, but we have to remember we have seen both the weak undefined
502
// and the lazy. We represent that with a lazy symbol with a weak binding. This
503
// means that code looking for undefined symbols normally also has to take lazy
504
// symbols into consideration.
505
class LazySymbol : public Symbol {
506
public:
507
LazySymbol(InputFile &file)
508
: Symbol(LazyKind, &file, {}, llvm::ELF::STB_GLOBAL,
509
llvm::ELF::STV_DEFAULT, llvm::ELF::STT_NOTYPE) {}
510
void overwrite(Symbol &sym) const { Symbol::overwrite(sym, LazyKind); }
511
512
static bool classof(const Symbol *s) { return s->kind() == LazyKind; }
513
};
514
515
// Some linker-generated symbols need to be created as
516
// Defined symbols.
517
struct ElfSym {
518
// __bss_start
519
static Defined *bss;
520
521
// etext and _etext
522
static Defined *etext1;
523
static Defined *etext2;
524
525
// edata and _edata
526
static Defined *edata1;
527
static Defined *edata2;
528
529
// end and _end
530
static Defined *end1;
531
static Defined *end2;
532
533
// The _GLOBAL_OFFSET_TABLE_ symbol is defined by target convention to
534
// be at some offset from the base of the .got section, usually 0 or
535
// the end of the .got.
536
static Defined *globalOffsetTable;
537
538
// _gp, _gp_disp and __gnu_local_gp symbols. Only for MIPS.
539
static Defined *mipsGp;
540
static Defined *mipsGpDisp;
541
static Defined *mipsLocalGp;
542
543
// __global_pointer$ for RISC-V.
544
static Defined *riscvGlobalPointer;
545
546
// __rel{,a}_iplt_{start,end} symbols.
547
static Defined *relaIpltStart;
548
static Defined *relaIpltEnd;
549
550
// _TLS_MODULE_BASE_ on targets that support TLSDESC.
551
static Defined *tlsModuleBase;
552
};
553
554
// A buffer class that is large enough to hold any Symbol-derived
555
// object. We allocate memory using this class and instantiate a symbol
556
// using the placement new.
557
558
// It is important to keep the size of SymbolUnion small for performance and
559
// memory usage reasons. 64 bytes is a soft limit based on the size of Defined
560
// on a 64-bit system. This is enforced by a static_assert in Symbols.cpp.
561
union SymbolUnion {
562
alignas(Defined) char a[sizeof(Defined)];
563
alignas(CommonSymbol) char b[sizeof(CommonSymbol)];
564
alignas(Undefined) char c[sizeof(Undefined)];
565
alignas(SharedSymbol) char d[sizeof(SharedSymbol)];
566
alignas(LazySymbol) char e[sizeof(LazySymbol)];
567
};
568
569
template <typename... T> Defined *makeDefined(T &&...args) {
570
auto *sym = getSpecificAllocSingleton<SymbolUnion>().Allocate();
571
memset(sym, 0, sizeof(Symbol));
572
auto &s = *new (reinterpret_cast<Defined *>(sym)) Defined(std::forward<T>(args)...);
573
return &s;
574
}
575
576
void reportDuplicate(const Symbol &sym, const InputFile *newFile,
577
InputSectionBase *errSec, uint64_t errOffset);
578
void maybeWarnUnorderableSymbol(const Symbol *sym);
579
bool computeIsPreemptible(const Symbol &sym);
580
581
} // namespace elf
582
} // namespace lld
583
584
#endif
585
586