Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/lld/ELF/Arch/AArch64.cpp
34878 views
1
//===- AArch64.cpp --------------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "InputFiles.h"
10
#include "OutputSections.h"
11
#include "Symbols.h"
12
#include "SyntheticSections.h"
13
#include "Target.h"
14
#include "lld/Common/ErrorHandler.h"
15
#include "llvm/BinaryFormat/ELF.h"
16
#include "llvm/Support/Endian.h"
17
18
using namespace llvm;
19
using namespace llvm::support::endian;
20
using namespace llvm::ELF;
21
using namespace lld;
22
using namespace lld::elf;
23
24
// Page(Expr) is the page address of the expression Expr, defined
25
// as (Expr & ~0xFFF). (This applies even if the machine page size
26
// supported by the platform has a different value.)
27
uint64_t elf::getAArch64Page(uint64_t expr) {
28
return expr & ~static_cast<uint64_t>(0xFFF);
29
}
30
31
namespace {
32
class AArch64 : public TargetInfo {
33
public:
34
AArch64();
35
RelExpr getRelExpr(RelType type, const Symbol &s,
36
const uint8_t *loc) const override;
37
RelType getDynRel(RelType type) const override;
38
int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
39
void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
40
void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
41
void writePltHeader(uint8_t *buf) const override;
42
void writePlt(uint8_t *buf, const Symbol &sym,
43
uint64_t pltEntryAddr) const override;
44
bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
45
uint64_t branchAddr, const Symbol &s,
46
int64_t a) const override;
47
uint32_t getThunkSectionSpacing() const override;
48
bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
49
bool usesOnlyLowPageBits(RelType type) const override;
50
void relocate(uint8_t *loc, const Relocation &rel,
51
uint64_t val) const override;
52
RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;
53
void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;
54
55
private:
56
void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
57
void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
58
void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;
59
};
60
61
struct AArch64Relaxer {
62
bool safeToRelaxAdrpLdr = false;
63
64
AArch64Relaxer(ArrayRef<Relocation> relocs);
65
bool tryRelaxAdrpAdd(const Relocation &adrpRel, const Relocation &addRel,
66
uint64_t secAddr, uint8_t *buf) const;
67
bool tryRelaxAdrpLdr(const Relocation &adrpRel, const Relocation &ldrRel,
68
uint64_t secAddr, uint8_t *buf) const;
69
};
70
} // namespace
71
72
// Return the bits [Start, End] from Val shifted Start bits.
73
// For instance, getBits(0xF0, 4, 8) returns 0xF.
74
static uint64_t getBits(uint64_t val, int start, int end) {
75
uint64_t mask = ((uint64_t)1 << (end + 1 - start)) - 1;
76
return (val >> start) & mask;
77
}
78
79
AArch64::AArch64() {
80
copyRel = R_AARCH64_COPY;
81
relativeRel = R_AARCH64_RELATIVE;
82
iRelativeRel = R_AARCH64_IRELATIVE;
83
gotRel = R_AARCH64_GLOB_DAT;
84
pltRel = R_AARCH64_JUMP_SLOT;
85
symbolicRel = R_AARCH64_ABS64;
86
tlsDescRel = R_AARCH64_TLSDESC;
87
tlsGotRel = R_AARCH64_TLS_TPREL64;
88
pltHeaderSize = 32;
89
pltEntrySize = 16;
90
ipltEntrySize = 16;
91
defaultMaxPageSize = 65536;
92
93
// Align to the 2 MiB page size (known as a superpage or huge page).
94
// FreeBSD automatically promotes 2 MiB-aligned allocations.
95
defaultImageBase = 0x200000;
96
97
needsThunks = true;
98
}
99
100
RelExpr AArch64::getRelExpr(RelType type, const Symbol &s,
101
const uint8_t *loc) const {
102
switch (type) {
103
case R_AARCH64_ABS16:
104
case R_AARCH64_ABS32:
105
case R_AARCH64_ABS64:
106
case R_AARCH64_ADD_ABS_LO12_NC:
107
case R_AARCH64_LDST128_ABS_LO12_NC:
108
case R_AARCH64_LDST16_ABS_LO12_NC:
109
case R_AARCH64_LDST32_ABS_LO12_NC:
110
case R_AARCH64_LDST64_ABS_LO12_NC:
111
case R_AARCH64_LDST8_ABS_LO12_NC:
112
case R_AARCH64_MOVW_SABS_G0:
113
case R_AARCH64_MOVW_SABS_G1:
114
case R_AARCH64_MOVW_SABS_G2:
115
case R_AARCH64_MOVW_UABS_G0:
116
case R_AARCH64_MOVW_UABS_G0_NC:
117
case R_AARCH64_MOVW_UABS_G1:
118
case R_AARCH64_MOVW_UABS_G1_NC:
119
case R_AARCH64_MOVW_UABS_G2:
120
case R_AARCH64_MOVW_UABS_G2_NC:
121
case R_AARCH64_MOVW_UABS_G3:
122
return R_ABS;
123
case R_AARCH64_AUTH_ABS64:
124
return R_AARCH64_AUTH;
125
case R_AARCH64_TLSDESC_ADR_PAGE21:
126
return R_AARCH64_TLSDESC_PAGE;
127
case R_AARCH64_TLSDESC_LD64_LO12:
128
case R_AARCH64_TLSDESC_ADD_LO12:
129
return R_TLSDESC;
130
case R_AARCH64_TLSDESC_CALL:
131
return R_TLSDESC_CALL;
132
case R_AARCH64_TLSLE_ADD_TPREL_HI12:
133
case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
134
case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
135
case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
136
case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
137
case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
138
case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
139
case R_AARCH64_TLSLE_MOVW_TPREL_G0:
140
case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
141
case R_AARCH64_TLSLE_MOVW_TPREL_G1:
142
case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
143
case R_AARCH64_TLSLE_MOVW_TPREL_G2:
144
return R_TPREL;
145
case R_AARCH64_CALL26:
146
case R_AARCH64_CONDBR19:
147
case R_AARCH64_JUMP26:
148
case R_AARCH64_TSTBR14:
149
return R_PLT_PC;
150
case R_AARCH64_PLT32:
151
const_cast<Symbol &>(s).thunkAccessed = true;
152
return R_PLT_PC;
153
case R_AARCH64_PREL16:
154
case R_AARCH64_PREL32:
155
case R_AARCH64_PREL64:
156
case R_AARCH64_ADR_PREL_LO21:
157
case R_AARCH64_LD_PREL_LO19:
158
case R_AARCH64_MOVW_PREL_G0:
159
case R_AARCH64_MOVW_PREL_G0_NC:
160
case R_AARCH64_MOVW_PREL_G1:
161
case R_AARCH64_MOVW_PREL_G1_NC:
162
case R_AARCH64_MOVW_PREL_G2:
163
case R_AARCH64_MOVW_PREL_G2_NC:
164
case R_AARCH64_MOVW_PREL_G3:
165
return R_PC;
166
case R_AARCH64_ADR_PREL_PG_HI21:
167
case R_AARCH64_ADR_PREL_PG_HI21_NC:
168
return R_AARCH64_PAGE_PC;
169
case R_AARCH64_LD64_GOT_LO12_NC:
170
case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
171
return R_GOT;
172
case R_AARCH64_LD64_GOTPAGE_LO15:
173
return R_AARCH64_GOT_PAGE;
174
case R_AARCH64_ADR_GOT_PAGE:
175
case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
176
return R_AARCH64_GOT_PAGE_PC;
177
case R_AARCH64_GOTPCREL32:
178
case R_AARCH64_GOT_LD_PREL19:
179
return R_GOT_PC;
180
case R_AARCH64_NONE:
181
return R_NONE;
182
default:
183
error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
184
") against symbol " + toString(s));
185
return R_NONE;
186
}
187
}
188
189
RelExpr AArch64::adjustTlsExpr(RelType type, RelExpr expr) const {
190
if (expr == R_RELAX_TLS_GD_TO_IE) {
191
if (type == R_AARCH64_TLSDESC_ADR_PAGE21)
192
return R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC;
193
return R_RELAX_TLS_GD_TO_IE_ABS;
194
}
195
return expr;
196
}
197
198
bool AArch64::usesOnlyLowPageBits(RelType type) const {
199
switch (type) {
200
default:
201
return false;
202
case R_AARCH64_ADD_ABS_LO12_NC:
203
case R_AARCH64_LD64_GOT_LO12_NC:
204
case R_AARCH64_LDST128_ABS_LO12_NC:
205
case R_AARCH64_LDST16_ABS_LO12_NC:
206
case R_AARCH64_LDST32_ABS_LO12_NC:
207
case R_AARCH64_LDST64_ABS_LO12_NC:
208
case R_AARCH64_LDST8_ABS_LO12_NC:
209
case R_AARCH64_TLSDESC_ADD_LO12:
210
case R_AARCH64_TLSDESC_LD64_LO12:
211
case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
212
return true;
213
}
214
}
215
216
RelType AArch64::getDynRel(RelType type) const {
217
if (type == R_AARCH64_ABS64 || type == R_AARCH64_AUTH_ABS64)
218
return type;
219
return R_AARCH64_NONE;
220
}
221
222
int64_t AArch64::getImplicitAddend(const uint8_t *buf, RelType type) const {
223
switch (type) {
224
case R_AARCH64_TLSDESC:
225
return read64(buf + 8);
226
case R_AARCH64_NONE:
227
case R_AARCH64_GLOB_DAT:
228
case R_AARCH64_JUMP_SLOT:
229
return 0;
230
case R_AARCH64_ABS16:
231
case R_AARCH64_PREL16:
232
return SignExtend64<16>(read16(buf));
233
case R_AARCH64_ABS32:
234
case R_AARCH64_PREL32:
235
return SignExtend64<32>(read32(buf));
236
case R_AARCH64_ABS64:
237
case R_AARCH64_PREL64:
238
case R_AARCH64_RELATIVE:
239
case R_AARCH64_IRELATIVE:
240
case R_AARCH64_TLS_TPREL64:
241
return read64(buf);
242
243
// The following relocation types all point at instructions, and
244
// relocate an immediate field in the instruction.
245
//
246
// The general rule, from AAELF64 ยง5.7.2 "Addends and PC-bias",
247
// says: "If the relocation relocates an instruction the immediate
248
// field of the instruction is extracted, scaled as required by
249
// the instruction field encoding, and sign-extended to 64 bits".
250
251
// The R_AARCH64_MOVW family operates on wide MOV/MOVK/MOVZ
252
// instructions, which have a 16-bit immediate field with its low
253
// bit in bit 5 of the instruction encoding. When the immediate
254
// field is used as an implicit addend for REL-type relocations,
255
// it is treated as added to the low bits of the output value, not
256
// shifted depending on the relocation type.
257
//
258
// This allows REL relocations to express the requirement 'please
259
// add 12345 to this symbol value and give me the four 16-bit
260
// chunks of the result', by putting the same addend 12345 in all
261
// four instructions. Carries between the 16-bit chunks are
262
// handled correctly, because the whole 64-bit addition is done
263
// once per relocation.
264
case R_AARCH64_MOVW_UABS_G0:
265
case R_AARCH64_MOVW_UABS_G0_NC:
266
case R_AARCH64_MOVW_UABS_G1:
267
case R_AARCH64_MOVW_UABS_G1_NC:
268
case R_AARCH64_MOVW_UABS_G2:
269
case R_AARCH64_MOVW_UABS_G2_NC:
270
case R_AARCH64_MOVW_UABS_G3:
271
return SignExtend64<16>(getBits(read32(buf), 5, 20));
272
273
// R_AARCH64_TSTBR14 points at a TBZ or TBNZ instruction, which
274
// has a 14-bit offset measured in instructions, i.e. shifted left
275
// by 2.
276
case R_AARCH64_TSTBR14:
277
return SignExtend64<16>(getBits(read32(buf), 5, 18) << 2);
278
279
// R_AARCH64_CONDBR19 operates on the ordinary B.cond instruction,
280
// which has a 19-bit offset measured in instructions.
281
//
282
// R_AARCH64_LD_PREL_LO19 operates on the LDR (literal)
283
// instruction, which also has a 19-bit offset, measured in 4-byte
284
// chunks. So the calculation is the same as for
285
// R_AARCH64_CONDBR19.
286
case R_AARCH64_CONDBR19:
287
case R_AARCH64_LD_PREL_LO19:
288
return SignExtend64<21>(getBits(read32(buf), 5, 23) << 2);
289
290
// R_AARCH64_ADD_ABS_LO12_NC operates on ADD (immediate). The
291
// immediate can optionally be shifted left by 12 bits, but this
292
// relocation is intended for the case where it is not.
293
case R_AARCH64_ADD_ABS_LO12_NC:
294
return SignExtend64<12>(getBits(read32(buf), 10, 21));
295
296
// R_AARCH64_ADR_PREL_LO21 operates on an ADR instruction, whose
297
// 21-bit immediate is split between two bits high up in the word
298
// (in fact the two _lowest_ order bits of the value) and 19 bits
299
// lower down.
300
//
301
// R_AARCH64_ADR_PREL_PG_HI21[_NC] operate on an ADRP instruction,
302
// which encodes the immediate in the same way, but will shift it
303
// left by 12 bits when the instruction executes. For the same
304
// reason as the MOVW family, we don't apply that left shift here.
305
case R_AARCH64_ADR_PREL_LO21:
306
case R_AARCH64_ADR_PREL_PG_HI21:
307
case R_AARCH64_ADR_PREL_PG_HI21_NC:
308
return SignExtend64<21>((getBits(read32(buf), 5, 23) << 2) |
309
getBits(read32(buf), 29, 30));
310
311
// R_AARCH64_{JUMP,CALL}26 operate on B and BL, which have a
312
// 26-bit offset measured in instructions.
313
case R_AARCH64_JUMP26:
314
case R_AARCH64_CALL26:
315
return SignExtend64<28>(getBits(read32(buf), 0, 25) << 2);
316
317
default:
318
internalLinkerError(getErrorLocation(buf),
319
"cannot read addend for relocation " + toString(type));
320
return 0;
321
}
322
}
323
324
void AArch64::writeGotPlt(uint8_t *buf, const Symbol &) const {
325
write64(buf, in.plt->getVA());
326
}
327
328
void AArch64::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
329
if (config->writeAddends)
330
write64(buf, s.getVA());
331
}
332
333
void AArch64::writePltHeader(uint8_t *buf) const {
334
const uint8_t pltData[] = {
335
0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
336
0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2]))
337
0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))]
338
0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2]))
339
0x20, 0x02, 0x1f, 0xd6, // br x17
340
0x1f, 0x20, 0x03, 0xd5, // nop
341
0x1f, 0x20, 0x03, 0xd5, // nop
342
0x1f, 0x20, 0x03, 0xd5 // nop
343
};
344
memcpy(buf, pltData, sizeof(pltData));
345
346
uint64_t got = in.gotPlt->getVA();
347
uint64_t plt = in.plt->getVA();
348
relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
349
getAArch64Page(got + 16) - getAArch64Page(plt + 4));
350
relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16);
351
relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16);
352
}
353
354
void AArch64::writePlt(uint8_t *buf, const Symbol &sym,
355
uint64_t pltEntryAddr) const {
356
const uint8_t inst[] = {
357
0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
358
0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
359
0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[n]))
360
0x20, 0x02, 0x1f, 0xd6 // br x17
361
};
362
memcpy(buf, inst, sizeof(inst));
363
364
uint64_t gotPltEntryAddr = sym.getGotPltVA();
365
relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21,
366
getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr));
367
relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr);
368
relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr);
369
}
370
371
bool AArch64::needsThunk(RelExpr expr, RelType type, const InputFile *file,
372
uint64_t branchAddr, const Symbol &s,
373
int64_t a) const {
374
// If s is an undefined weak symbol and does not have a PLT entry then it will
375
// be resolved as a branch to the next instruction. If it is hidden, its
376
// binding has been converted to local, so we just check isUndefined() here. A
377
// undefined non-weak symbol will have been errored.
378
if (s.isUndefined() && !s.isInPlt())
379
return false;
380
// ELF for the ARM 64-bit architecture, section Call and Jump relocations
381
// only permits range extension thunks for R_AARCH64_CALL26 and
382
// R_AARCH64_JUMP26 relocation types.
383
if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
384
type != R_AARCH64_PLT32)
385
return false;
386
uint64_t dst = expr == R_PLT_PC ? s.getPltVA() : s.getVA(a);
387
return !inBranchRange(type, branchAddr, dst);
388
}
389
390
uint32_t AArch64::getThunkSectionSpacing() const {
391
// See comment in Arch/ARM.cpp for a more detailed explanation of
392
// getThunkSectionSpacing(). For AArch64 the only branches we are permitted to
393
// Thunk have a range of +/- 128 MiB
394
return (128 * 1024 * 1024) - 0x30000;
395
}
396
397
bool AArch64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
398
if (type != R_AARCH64_CALL26 && type != R_AARCH64_JUMP26 &&
399
type != R_AARCH64_PLT32)
400
return true;
401
// The AArch64 call and unconditional branch instructions have a range of
402
// +/- 128 MiB. The PLT32 relocation supports a range up to +/- 2 GiB.
403
uint64_t range =
404
type == R_AARCH64_PLT32 ? (UINT64_C(1) << 31) : (128 * 1024 * 1024);
405
if (dst > src) {
406
// Immediate of branch is signed.
407
range -= 4;
408
return dst - src <= range;
409
}
410
return src - dst <= range;
411
}
412
413
static void write32AArch64Addr(uint8_t *l, uint64_t imm) {
414
uint32_t immLo = (imm & 0x3) << 29;
415
uint32_t immHi = (imm & 0x1FFFFC) << 3;
416
uint64_t mask = (0x3 << 29) | (0x1FFFFC << 3);
417
write32le(l, (read32le(l) & ~mask) | immLo | immHi);
418
}
419
420
static void writeMaskedBits32le(uint8_t *p, int32_t v, uint32_t mask) {
421
write32le(p, (read32le(p) & ~mask) | v);
422
}
423
424
// Update the immediate field in a AARCH64 ldr, str, and add instruction.
425
static void write32Imm12(uint8_t *l, uint64_t imm) {
426
writeMaskedBits32le(l, (imm & 0xFFF) << 10, 0xFFF << 10);
427
}
428
429
// Update the immediate field in an AArch64 movk, movn or movz instruction
430
// for a signed relocation, and update the opcode of a movn or movz instruction
431
// to match the sign of the operand.
432
static void writeSMovWImm(uint8_t *loc, uint32_t imm) {
433
uint32_t inst = read32le(loc);
434
// Opcode field is bits 30, 29, with 10 = movz, 00 = movn and 11 = movk.
435
if (!(inst & (1 << 29))) {
436
// movn or movz.
437
if (imm & 0x10000) {
438
// Change opcode to movn, which takes an inverted operand.
439
imm ^= 0xFFFF;
440
inst &= ~(1 << 30);
441
} else {
442
// Change opcode to movz.
443
inst |= 1 << 30;
444
}
445
}
446
write32le(loc, inst | ((imm & 0xFFFF) << 5));
447
}
448
449
void AArch64::relocate(uint8_t *loc, const Relocation &rel,
450
uint64_t val) const {
451
switch (rel.type) {
452
case R_AARCH64_ABS16:
453
case R_AARCH64_PREL16:
454
checkIntUInt(loc, val, 16, rel);
455
write16(loc, val);
456
break;
457
case R_AARCH64_ABS32:
458
case R_AARCH64_PREL32:
459
checkIntUInt(loc, val, 32, rel);
460
write32(loc, val);
461
break;
462
case R_AARCH64_PLT32:
463
case R_AARCH64_GOTPCREL32:
464
checkInt(loc, val, 32, rel);
465
write32(loc, val);
466
break;
467
case R_AARCH64_ABS64:
468
// AArch64 relocations to tagged symbols have extended semantics, as
469
// described here:
470
// https://github.com/ARM-software/abi-aa/blob/main/memtagabielf64/memtagabielf64.rst#841extended-semantics-of-r_aarch64_relative.
471
// tl;dr: encode the symbol's special addend in the place, which is an
472
// offset to the point where the logical tag is derived from. Quick hack, if
473
// the addend is within the symbol's bounds, no need to encode the tag
474
// derivation offset.
475
if (rel.sym && rel.sym->isTagged() &&
476
(rel.addend < 0 ||
477
rel.addend >= static_cast<int64_t>(rel.sym->getSize())))
478
write64(loc, -rel.addend);
479
else
480
write64(loc, val);
481
break;
482
case R_AARCH64_PREL64:
483
write64(loc, val);
484
break;
485
case R_AARCH64_AUTH_ABS64:
486
// If val is wider than 32 bits, the relocation must have been moved from
487
// .relr.auth.dyn to .rela.dyn, and the addend write is not needed.
488
//
489
// If val fits in 32 bits, we have two potential scenarios:
490
// * True RELR: Write the 32-bit `val`.
491
// * RELA: Even if the value now fits in 32 bits, it might have been
492
// converted from RELR during an iteration in
493
// finalizeAddressDependentContent(). Writing the value is harmless
494
// because dynamic linking ignores it.
495
if (isInt<32>(val))
496
write32(loc, val);
497
break;
498
case R_AARCH64_ADD_ABS_LO12_NC:
499
write32Imm12(loc, val);
500
break;
501
case R_AARCH64_ADR_GOT_PAGE:
502
case R_AARCH64_ADR_PREL_PG_HI21:
503
case R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21:
504
case R_AARCH64_TLSDESC_ADR_PAGE21:
505
checkInt(loc, val, 33, rel);
506
[[fallthrough]];
507
case R_AARCH64_ADR_PREL_PG_HI21_NC:
508
write32AArch64Addr(loc, val >> 12);
509
break;
510
case R_AARCH64_ADR_PREL_LO21:
511
checkInt(loc, val, 21, rel);
512
write32AArch64Addr(loc, val);
513
break;
514
case R_AARCH64_JUMP26:
515
// Normally we would just write the bits of the immediate field, however
516
// when patching instructions for the cpu errata fix -fix-cortex-a53-843419
517
// we want to replace a non-branch instruction with a branch immediate
518
// instruction. By writing all the bits of the instruction including the
519
// opcode and the immediate (0 001 | 01 imm26) we can do this
520
// transformation by placing a R_AARCH64_JUMP26 relocation at the offset of
521
// the instruction we want to patch.
522
write32le(loc, 0x14000000);
523
[[fallthrough]];
524
case R_AARCH64_CALL26:
525
checkInt(loc, val, 28, rel);
526
writeMaskedBits32le(loc, (val & 0x0FFFFFFC) >> 2, 0x0FFFFFFC >> 2);
527
break;
528
case R_AARCH64_CONDBR19:
529
case R_AARCH64_LD_PREL_LO19:
530
case R_AARCH64_GOT_LD_PREL19:
531
checkAlignment(loc, val, 4, rel);
532
checkInt(loc, val, 21, rel);
533
writeMaskedBits32le(loc, (val & 0x1FFFFC) << 3, 0x1FFFFC << 3);
534
break;
535
case R_AARCH64_LDST8_ABS_LO12_NC:
536
case R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC:
537
write32Imm12(loc, getBits(val, 0, 11));
538
break;
539
case R_AARCH64_LDST16_ABS_LO12_NC:
540
case R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC:
541
checkAlignment(loc, val, 2, rel);
542
write32Imm12(loc, getBits(val, 1, 11));
543
break;
544
case R_AARCH64_LDST32_ABS_LO12_NC:
545
case R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC:
546
checkAlignment(loc, val, 4, rel);
547
write32Imm12(loc, getBits(val, 2, 11));
548
break;
549
case R_AARCH64_LDST64_ABS_LO12_NC:
550
case R_AARCH64_LD64_GOT_LO12_NC:
551
case R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC:
552
case R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC:
553
case R_AARCH64_TLSDESC_LD64_LO12:
554
checkAlignment(loc, val, 8, rel);
555
write32Imm12(loc, getBits(val, 3, 11));
556
break;
557
case R_AARCH64_LDST128_ABS_LO12_NC:
558
case R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC:
559
checkAlignment(loc, val, 16, rel);
560
write32Imm12(loc, getBits(val, 4, 11));
561
break;
562
case R_AARCH64_LD64_GOTPAGE_LO15:
563
checkAlignment(loc, val, 8, rel);
564
write32Imm12(loc, getBits(val, 3, 14));
565
break;
566
case R_AARCH64_MOVW_UABS_G0:
567
checkUInt(loc, val, 16, rel);
568
[[fallthrough]];
569
case R_AARCH64_MOVW_UABS_G0_NC:
570
writeMaskedBits32le(loc, (val & 0xFFFF) << 5, 0xFFFF << 5);
571
break;
572
case R_AARCH64_MOVW_UABS_G1:
573
checkUInt(loc, val, 32, rel);
574
[[fallthrough]];
575
case R_AARCH64_MOVW_UABS_G1_NC:
576
writeMaskedBits32le(loc, (val & 0xFFFF0000) >> 11, 0xFFFF0000 >> 11);
577
break;
578
case R_AARCH64_MOVW_UABS_G2:
579
checkUInt(loc, val, 48, rel);
580
[[fallthrough]];
581
case R_AARCH64_MOVW_UABS_G2_NC:
582
writeMaskedBits32le(loc, (val & 0xFFFF00000000) >> 27,
583
0xFFFF00000000 >> 27);
584
break;
585
case R_AARCH64_MOVW_UABS_G3:
586
writeMaskedBits32le(loc, (val & 0xFFFF000000000000) >> 43,
587
0xFFFF000000000000 >> 43);
588
break;
589
case R_AARCH64_MOVW_PREL_G0:
590
case R_AARCH64_MOVW_SABS_G0:
591
case R_AARCH64_TLSLE_MOVW_TPREL_G0:
592
checkInt(loc, val, 17, rel);
593
[[fallthrough]];
594
case R_AARCH64_MOVW_PREL_G0_NC:
595
case R_AARCH64_TLSLE_MOVW_TPREL_G0_NC:
596
writeSMovWImm(loc, val);
597
break;
598
case R_AARCH64_MOVW_PREL_G1:
599
case R_AARCH64_MOVW_SABS_G1:
600
case R_AARCH64_TLSLE_MOVW_TPREL_G1:
601
checkInt(loc, val, 33, rel);
602
[[fallthrough]];
603
case R_AARCH64_MOVW_PREL_G1_NC:
604
case R_AARCH64_TLSLE_MOVW_TPREL_G1_NC:
605
writeSMovWImm(loc, val >> 16);
606
break;
607
case R_AARCH64_MOVW_PREL_G2:
608
case R_AARCH64_MOVW_SABS_G2:
609
case R_AARCH64_TLSLE_MOVW_TPREL_G2:
610
checkInt(loc, val, 49, rel);
611
[[fallthrough]];
612
case R_AARCH64_MOVW_PREL_G2_NC:
613
writeSMovWImm(loc, val >> 32);
614
break;
615
case R_AARCH64_MOVW_PREL_G3:
616
writeSMovWImm(loc, val >> 48);
617
break;
618
case R_AARCH64_TSTBR14:
619
checkInt(loc, val, 16, rel);
620
writeMaskedBits32le(loc, (val & 0xFFFC) << 3, 0xFFFC << 3);
621
break;
622
case R_AARCH64_TLSLE_ADD_TPREL_HI12:
623
checkUInt(loc, val, 24, rel);
624
write32Imm12(loc, val >> 12);
625
break;
626
case R_AARCH64_TLSLE_ADD_TPREL_LO12_NC:
627
case R_AARCH64_TLSDESC_ADD_LO12:
628
write32Imm12(loc, val);
629
break;
630
case R_AARCH64_TLSDESC:
631
// For R_AARCH64_TLSDESC the addend is stored in the second 64-bit word.
632
write64(loc + 8, val);
633
break;
634
default:
635
llvm_unreachable("unknown relocation");
636
}
637
}
638
639
void AArch64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,
640
uint64_t val) const {
641
// TLSDESC Global-Dynamic relocation are in the form:
642
// adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
643
// ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
644
// add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
645
// .tlsdesccall [R_AARCH64_TLSDESC_CALL]
646
// blr x1
647
// And it can optimized to:
648
// movz x0, #0x0, lsl #16
649
// movk x0, #0x10
650
// nop
651
// nop
652
checkUInt(loc, val, 32, rel);
653
654
switch (rel.type) {
655
case R_AARCH64_TLSDESC_ADD_LO12:
656
case R_AARCH64_TLSDESC_CALL:
657
write32le(loc, 0xd503201f); // nop
658
return;
659
case R_AARCH64_TLSDESC_ADR_PAGE21:
660
write32le(loc, 0xd2a00000 | (((val >> 16) & 0xffff) << 5)); // movz
661
return;
662
case R_AARCH64_TLSDESC_LD64_LO12:
663
write32le(loc, 0xf2800000 | ((val & 0xffff) << 5)); // movk
664
return;
665
default:
666
llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
667
}
668
}
669
670
void AArch64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,
671
uint64_t val) const {
672
// TLSDESC Global-Dynamic relocation are in the form:
673
// adrp x0, :tlsdesc:v [R_AARCH64_TLSDESC_ADR_PAGE21]
674
// ldr x1, [x0, #:tlsdesc_lo12:v [R_AARCH64_TLSDESC_LD64_LO12]
675
// add x0, x0, :tlsdesc_los:v [R_AARCH64_TLSDESC_ADD_LO12]
676
// .tlsdesccall [R_AARCH64_TLSDESC_CALL]
677
// blr x1
678
// And it can optimized to:
679
// adrp x0, :gottprel:v
680
// ldr x0, [x0, :gottprel_lo12:v]
681
// nop
682
// nop
683
684
switch (rel.type) {
685
case R_AARCH64_TLSDESC_ADD_LO12:
686
case R_AARCH64_TLSDESC_CALL:
687
write32le(loc, 0xd503201f); // nop
688
break;
689
case R_AARCH64_TLSDESC_ADR_PAGE21:
690
write32le(loc, 0x90000000); // adrp
691
relocateNoSym(loc, R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, val);
692
break;
693
case R_AARCH64_TLSDESC_LD64_LO12:
694
write32le(loc, 0xf9400000); // ldr
695
relocateNoSym(loc, R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, val);
696
break;
697
default:
698
llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");
699
}
700
}
701
702
void AArch64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,
703
uint64_t val) const {
704
checkUInt(loc, val, 32, rel);
705
706
if (rel.type == R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21) {
707
// Generate MOVZ.
708
uint32_t regNo = read32le(loc) & 0x1f;
709
write32le(loc, (0xd2a00000 | regNo) | (((val >> 16) & 0xffff) << 5));
710
return;
711
}
712
if (rel.type == R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC) {
713
// Generate MOVK.
714
uint32_t regNo = read32le(loc) & 0x1f;
715
write32le(loc, (0xf2800000 | regNo) | ((val & 0xffff) << 5));
716
return;
717
}
718
llvm_unreachable("invalid relocation for TLS IE to LE relaxation");
719
}
720
721
AArch64Relaxer::AArch64Relaxer(ArrayRef<Relocation> relocs) {
722
if (!config->relax)
723
return;
724
// Check if R_AARCH64_ADR_GOT_PAGE and R_AARCH64_LD64_GOT_LO12_NC
725
// always appear in pairs.
726
size_t i = 0;
727
const size_t size = relocs.size();
728
for (; i != size; ++i) {
729
if (relocs[i].type == R_AARCH64_ADR_GOT_PAGE) {
730
if (i + 1 < size && relocs[i + 1].type == R_AARCH64_LD64_GOT_LO12_NC) {
731
++i;
732
continue;
733
}
734
break;
735
} else if (relocs[i].type == R_AARCH64_LD64_GOT_LO12_NC) {
736
break;
737
}
738
}
739
safeToRelaxAdrpLdr = i == size;
740
}
741
742
bool AArch64Relaxer::tryRelaxAdrpAdd(const Relocation &adrpRel,
743
const Relocation &addRel, uint64_t secAddr,
744
uint8_t *buf) const {
745
// When the address of sym is within the range of ADR then
746
// we may relax
747
// ADRP xn, sym
748
// ADD xn, xn, :lo12: sym
749
// to
750
// NOP
751
// ADR xn, sym
752
if (!config->relax || adrpRel.type != R_AARCH64_ADR_PREL_PG_HI21 ||
753
addRel.type != R_AARCH64_ADD_ABS_LO12_NC)
754
return false;
755
// Check if the relocations apply to consecutive instructions.
756
if (adrpRel.offset + 4 != addRel.offset)
757
return false;
758
if (adrpRel.sym != addRel.sym)
759
return false;
760
if (adrpRel.addend != 0 || addRel.addend != 0)
761
return false;
762
763
uint32_t adrpInstr = read32le(buf + adrpRel.offset);
764
uint32_t addInstr = read32le(buf + addRel.offset);
765
// Check if the first instruction is ADRP and the second instruction is ADD.
766
if ((adrpInstr & 0x9f000000) != 0x90000000 ||
767
(addInstr & 0xffc00000) != 0x91000000)
768
return false;
769
uint32_t adrpDestReg = adrpInstr & 0x1f;
770
uint32_t addDestReg = addInstr & 0x1f;
771
uint32_t addSrcReg = (addInstr >> 5) & 0x1f;
772
if (adrpDestReg != addDestReg || adrpDestReg != addSrcReg)
773
return false;
774
775
Symbol &sym = *adrpRel.sym;
776
// Check if the address difference is within 1MiB range.
777
int64_t val = sym.getVA() - (secAddr + addRel.offset);
778
if (val < -1024 * 1024 || val >= 1024 * 1024)
779
return false;
780
781
Relocation adrRel = {R_ABS, R_AARCH64_ADR_PREL_LO21, addRel.offset,
782
/*addend=*/0, &sym};
783
// nop
784
write32le(buf + adrpRel.offset, 0xd503201f);
785
// adr x_<dest_reg>
786
write32le(buf + adrRel.offset, 0x10000000 | adrpDestReg);
787
target->relocate(buf + adrRel.offset, adrRel, val);
788
return true;
789
}
790
791
bool AArch64Relaxer::tryRelaxAdrpLdr(const Relocation &adrpRel,
792
const Relocation &ldrRel, uint64_t secAddr,
793
uint8_t *buf) const {
794
if (!safeToRelaxAdrpLdr)
795
return false;
796
797
// When the definition of sym is not preemptible then we may
798
// be able to relax
799
// ADRP xn, :got: sym
800
// LDR xn, [ xn :got_lo12: sym]
801
// to
802
// ADRP xn, sym
803
// ADD xn, xn, :lo_12: sym
804
805
if (adrpRel.type != R_AARCH64_ADR_GOT_PAGE ||
806
ldrRel.type != R_AARCH64_LD64_GOT_LO12_NC)
807
return false;
808
// Check if the relocations apply to consecutive instructions.
809
if (adrpRel.offset + 4 != ldrRel.offset)
810
return false;
811
// Check if the relocations reference the same symbol and
812
// skip undefined, preemptible and STT_GNU_IFUNC symbols.
813
if (!adrpRel.sym || adrpRel.sym != ldrRel.sym || !adrpRel.sym->isDefined() ||
814
adrpRel.sym->isPreemptible || adrpRel.sym->isGnuIFunc())
815
return false;
816
// Check if the addends of the both relocations are zero.
817
if (adrpRel.addend != 0 || ldrRel.addend != 0)
818
return false;
819
uint32_t adrpInstr = read32le(buf + adrpRel.offset);
820
uint32_t ldrInstr = read32le(buf + ldrRel.offset);
821
// Check if the first instruction is ADRP and the second instruction is LDR.
822
if ((adrpInstr & 0x9f000000) != 0x90000000 ||
823
(ldrInstr & 0x3b000000) != 0x39000000)
824
return false;
825
// Check the value of the sf bit.
826
if (!(ldrInstr >> 31))
827
return false;
828
uint32_t adrpDestReg = adrpInstr & 0x1f;
829
uint32_t ldrDestReg = ldrInstr & 0x1f;
830
uint32_t ldrSrcReg = (ldrInstr >> 5) & 0x1f;
831
// Check if ADPR and LDR use the same register.
832
if (adrpDestReg != ldrDestReg || adrpDestReg != ldrSrcReg)
833
return false;
834
835
Symbol &sym = *adrpRel.sym;
836
// GOT references to absolute symbols can't be relaxed to use ADRP/ADD in
837
// position-independent code because these instructions produce a relative
838
// address.
839
if (config->isPic && !cast<Defined>(sym).section)
840
return false;
841
// Check if the address difference is within 4GB range.
842
int64_t val =
843
getAArch64Page(sym.getVA()) - getAArch64Page(secAddr + adrpRel.offset);
844
if (val != llvm::SignExtend64(val, 33))
845
return false;
846
847
Relocation adrpSymRel = {R_AARCH64_PAGE_PC, R_AARCH64_ADR_PREL_PG_HI21,
848
adrpRel.offset, /*addend=*/0, &sym};
849
Relocation addRel = {R_ABS, R_AARCH64_ADD_ABS_LO12_NC, ldrRel.offset,
850
/*addend=*/0, &sym};
851
852
// adrp x_<dest_reg>
853
write32le(buf + adrpSymRel.offset, 0x90000000 | adrpDestReg);
854
// add x_<dest reg>, x_<dest reg>
855
write32le(buf + addRel.offset, 0x91000000 | adrpDestReg | (adrpDestReg << 5));
856
857
target->relocate(buf + adrpSymRel.offset, adrpSymRel,
858
SignExtend64(getAArch64Page(sym.getVA()) -
859
getAArch64Page(secAddr + adrpSymRel.offset),
860
64));
861
target->relocate(buf + addRel.offset, addRel, SignExtend64(sym.getVA(), 64));
862
tryRelaxAdrpAdd(adrpSymRel, addRel, secAddr, buf);
863
return true;
864
}
865
866
// Tagged symbols have upper address bits that are added by the dynamic loader,
867
// and thus need the full 64-bit GOT entry. Do not relax such symbols.
868
static bool needsGotForMemtag(const Relocation &rel) {
869
return rel.sym->isTagged() && needsGot(rel.expr);
870
}
871
872
void AArch64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
873
uint64_t secAddr = sec.getOutputSection()->addr;
874
if (auto *s = dyn_cast<InputSection>(&sec))
875
secAddr += s->outSecOff;
876
else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))
877
secAddr += ehIn->getParent()->outSecOff;
878
AArch64Relaxer relaxer(sec.relocs());
879
for (size_t i = 0, size = sec.relocs().size(); i != size; ++i) {
880
const Relocation &rel = sec.relocs()[i];
881
uint8_t *loc = buf + rel.offset;
882
const uint64_t val =
883
sec.getRelocTargetVA(sec.file, rel.type, rel.addend,
884
secAddr + rel.offset, *rel.sym, rel.expr);
885
886
if (needsGotForMemtag(rel)) {
887
relocate(loc, rel, val);
888
continue;
889
}
890
891
switch (rel.expr) {
892
case R_AARCH64_GOT_PAGE_PC:
893
if (i + 1 < size &&
894
relaxer.tryRelaxAdrpLdr(rel, sec.relocs()[i + 1], secAddr, buf)) {
895
++i;
896
continue;
897
}
898
break;
899
case R_AARCH64_PAGE_PC:
900
if (i + 1 < size &&
901
relaxer.tryRelaxAdrpAdd(rel, sec.relocs()[i + 1], secAddr, buf)) {
902
++i;
903
continue;
904
}
905
break;
906
case R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC:
907
case R_RELAX_TLS_GD_TO_IE_ABS:
908
relaxTlsGdToIe(loc, rel, val);
909
continue;
910
case R_RELAX_TLS_GD_TO_LE:
911
relaxTlsGdToLe(loc, rel, val);
912
continue;
913
case R_RELAX_TLS_IE_TO_LE:
914
relaxTlsIeToLe(loc, rel, val);
915
continue;
916
default:
917
break;
918
}
919
relocate(loc, rel, val);
920
}
921
}
922
923
// AArch64 may use security features in variant PLT sequences. These are:
924
// Pointer Authentication (PAC), introduced in armv8.3-a and Branch Target
925
// Indicator (BTI) introduced in armv8.5-a. The additional instructions used
926
// in the variant Plt sequences are encoded in the Hint space so they can be
927
// deployed on older architectures, which treat the instructions as a nop.
928
// PAC and BTI can be combined leading to the following combinations:
929
// writePltHeader
930
// writePltHeaderBti (no PAC Header needed)
931
// writePlt
932
// writePltBti (BTI only)
933
// writePltPac (PAC only)
934
// writePltBtiPac (BTI and PAC)
935
//
936
// When PAC is enabled the dynamic loader encrypts the address that it places
937
// in the .got.plt using the pacia1716 instruction which encrypts the value in
938
// x17 using the modifier in x16. The static linker places autia1716 before the
939
// indirect branch to x17 to authenticate the address in x17 with the modifier
940
// in x16. This makes it more difficult for an attacker to modify the value in
941
// the .got.plt.
942
//
943
// When BTI is enabled all indirect branches must land on a bti instruction.
944
// The static linker must place a bti instruction at the start of any PLT entry
945
// that may be the target of an indirect branch. As the PLT entries call the
946
// lazy resolver indirectly this must have a bti instruction at start. In
947
// general a bti instruction is not needed for a PLT entry as indirect calls
948
// are resolved to the function address and not the PLT entry for the function.
949
// There are a small number of cases where the PLT address can escape, such as
950
// taking the address of a function or ifunc via a non got-generating
951
// relocation, and a shared library refers to that symbol.
952
//
953
// We use the bti c variant of the instruction which permits indirect branches
954
// (br) via x16/x17 and indirect function calls (blr) via any register. The ABI
955
// guarantees that all indirect branches from code requiring BTI protection
956
// will go via x16/x17
957
958
namespace {
959
class AArch64BtiPac final : public AArch64 {
960
public:
961
AArch64BtiPac();
962
void writePltHeader(uint8_t *buf) const override;
963
void writePlt(uint8_t *buf, const Symbol &sym,
964
uint64_t pltEntryAddr) const override;
965
966
private:
967
bool btiHeader; // bti instruction needed in PLT Header and Entry
968
bool pacEntry; // autia1716 instruction needed in PLT Entry
969
};
970
} // namespace
971
972
AArch64BtiPac::AArch64BtiPac() {
973
btiHeader = (config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI);
974
// A BTI (Branch Target Indicator) Plt Entry is only required if the
975
// address of the PLT entry can be taken by the program, which permits an
976
// indirect jump to the PLT entry. This can happen when the address
977
// of the PLT entry for a function is canonicalised due to the address of
978
// the function in an executable being taken by a shared library, or
979
// non-preemptible ifunc referenced by non-GOT-generating, non-PLT-generating
980
// relocations.
981
// The PAC PLT entries require dynamic loader support and this isn't known
982
// from properties in the objects, so we use the command line flag.
983
pacEntry = config->zPacPlt;
984
985
if (btiHeader || pacEntry) {
986
pltEntrySize = 24;
987
ipltEntrySize = 24;
988
}
989
}
990
991
void AArch64BtiPac::writePltHeader(uint8_t *buf) const {
992
const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
993
const uint8_t pltData[] = {
994
0xf0, 0x7b, 0xbf, 0xa9, // stp x16, x30, [sp,#-16]!
995
0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[2]))
996
0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[2]))]
997
0x10, 0x02, 0x00, 0x91, // add x16, x16, Offset(&(.got.plt[2]))
998
0x20, 0x02, 0x1f, 0xd6, // br x17
999
0x1f, 0x20, 0x03, 0xd5, // nop
1000
0x1f, 0x20, 0x03, 0xd5 // nop
1001
};
1002
const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
1003
1004
uint64_t got = in.gotPlt->getVA();
1005
uint64_t plt = in.plt->getVA();
1006
1007
if (btiHeader) {
1008
// PltHeader is called indirectly by plt[N]. Prefix pltData with a BTI C
1009
// instruction.
1010
memcpy(buf, btiData, sizeof(btiData));
1011
buf += sizeof(btiData);
1012
plt += sizeof(btiData);
1013
}
1014
memcpy(buf, pltData, sizeof(pltData));
1015
1016
relocateNoSym(buf + 4, R_AARCH64_ADR_PREL_PG_HI21,
1017
getAArch64Page(got + 16) - getAArch64Page(plt + 8));
1018
relocateNoSym(buf + 8, R_AARCH64_LDST64_ABS_LO12_NC, got + 16);
1019
relocateNoSym(buf + 12, R_AARCH64_ADD_ABS_LO12_NC, got + 16);
1020
if (!btiHeader)
1021
// We didn't add the BTI c instruction so round out size with NOP.
1022
memcpy(buf + sizeof(pltData), nopData, sizeof(nopData));
1023
}
1024
1025
void AArch64BtiPac::writePlt(uint8_t *buf, const Symbol &sym,
1026
uint64_t pltEntryAddr) const {
1027
// The PLT entry is of the form:
1028
// [btiData] addrInst (pacBr | stdBr) [nopData]
1029
const uint8_t btiData[] = { 0x5f, 0x24, 0x03, 0xd5 }; // bti c
1030
const uint8_t addrInst[] = {
1031
0x10, 0x00, 0x00, 0x90, // adrp x16, Page(&(.got.plt[n]))
1032
0x11, 0x02, 0x40, 0xf9, // ldr x17, [x16, Offset(&(.got.plt[n]))]
1033
0x10, 0x02, 0x00, 0x91 // add x16, x16, Offset(&(.got.plt[n]))
1034
};
1035
const uint8_t pacBr[] = {
1036
0x9f, 0x21, 0x03, 0xd5, // autia1716
1037
0x20, 0x02, 0x1f, 0xd6 // br x17
1038
};
1039
const uint8_t stdBr[] = {
1040
0x20, 0x02, 0x1f, 0xd6, // br x17
1041
0x1f, 0x20, 0x03, 0xd5 // nop
1042
};
1043
const uint8_t nopData[] = { 0x1f, 0x20, 0x03, 0xd5 }; // nop
1044
1045
// NEEDS_COPY indicates a non-ifunc canonical PLT entry whose address may
1046
// escape to shared objects. isInIplt indicates a non-preemptible ifunc. Its
1047
// address may escape if referenced by a direct relocation. If relative
1048
// vtables are used then if the vtable is in a shared object the offsets will
1049
// be to the PLT entry. The condition is conservative.
1050
bool hasBti = btiHeader &&
1051
(sym.hasFlag(NEEDS_COPY) || sym.isInIplt || sym.thunkAccessed);
1052
if (hasBti) {
1053
memcpy(buf, btiData, sizeof(btiData));
1054
buf += sizeof(btiData);
1055
pltEntryAddr += sizeof(btiData);
1056
}
1057
1058
uint64_t gotPltEntryAddr = sym.getGotPltVA();
1059
memcpy(buf, addrInst, sizeof(addrInst));
1060
relocateNoSym(buf, R_AARCH64_ADR_PREL_PG_HI21,
1061
getAArch64Page(gotPltEntryAddr) - getAArch64Page(pltEntryAddr));
1062
relocateNoSym(buf + 4, R_AARCH64_LDST64_ABS_LO12_NC, gotPltEntryAddr);
1063
relocateNoSym(buf + 8, R_AARCH64_ADD_ABS_LO12_NC, gotPltEntryAddr);
1064
1065
if (pacEntry)
1066
memcpy(buf + sizeof(addrInst), pacBr, sizeof(pacBr));
1067
else
1068
memcpy(buf + sizeof(addrInst), stdBr, sizeof(stdBr));
1069
if (!hasBti)
1070
// We didn't add the BTI c instruction so round out size with NOP.
1071
memcpy(buf + sizeof(addrInst) + sizeof(stdBr), nopData, sizeof(nopData));
1072
}
1073
1074
static TargetInfo *getTargetInfo() {
1075
if ((config->andFeatures & GNU_PROPERTY_AARCH64_FEATURE_1_BTI) ||
1076
config->zPacPlt) {
1077
static AArch64BtiPac t;
1078
return &t;
1079
}
1080
static AArch64 t;
1081
return &t;
1082
}
1083
1084
TargetInfo *elf::getAArch64TargetInfo() { return getTargetInfo(); }
1085
1086
template <class ELFT>
1087
static void
1088
addTaggedSymbolReferences(InputSectionBase &sec,
1089
DenseMap<Symbol *, unsigned> &referenceCount) {
1090
assert(sec.type == SHT_AARCH64_MEMTAG_GLOBALS_STATIC);
1091
1092
const RelsOrRelas<ELFT> rels = sec.relsOrRelas<ELFT>();
1093
if (rels.areRelocsRel())
1094
error("non-RELA relocations are not allowed with memtag globals");
1095
1096
for (const typename ELFT::Rela &rel : rels.relas) {
1097
Symbol &sym = sec.file->getRelocTargetSym(rel);
1098
// Linker-synthesized symbols such as __executable_start may be referenced
1099
// as tagged in input objfiles, and we don't want them to be tagged. A
1100
// cheap way to exclude them is the type check, but their type is
1101
// STT_NOTYPE. In addition, this save us from checking untaggable symbols,
1102
// like functions or TLS symbols.
1103
if (sym.type != STT_OBJECT)
1104
continue;
1105
// STB_LOCAL symbols can't be referenced from outside the object file, and
1106
// thus don't need to be checked for references from other object files.
1107
if (sym.binding == STB_LOCAL) {
1108
sym.setIsTagged(true);
1109
continue;
1110
}
1111
++referenceCount[&sym];
1112
}
1113
sec.markDead();
1114
}
1115
1116
// A tagged symbol must be denoted as being tagged by all references and the
1117
// chosen definition. For simplicity, here, it must also be denoted as tagged
1118
// for all definitions. Otherwise:
1119
//
1120
// 1. A tagged definition can be used by an untagged declaration, in which case
1121
// the untagged access may be PC-relative, causing a tag mismatch at
1122
// runtime.
1123
// 2. An untagged definition can be used by a tagged declaration, where the
1124
// compiler has taken advantage of the increased alignment of the tagged
1125
// declaration, but the alignment at runtime is wrong, causing a fault.
1126
//
1127
// Ideally, this isn't a problem, as any TU that imports or exports tagged
1128
// symbols should also be built with tagging. But, to handle these cases, we
1129
// demote the symbol to be untagged.
1130
void lld::elf::createTaggedSymbols(const SmallVector<ELFFileBase *, 0> &files) {
1131
assert(hasMemtag());
1132
1133
// First, collect all symbols that are marked as tagged, and count how many
1134
// times they're marked as tagged.
1135
DenseMap<Symbol *, unsigned> taggedSymbolReferenceCount;
1136
for (InputFile* file : files) {
1137
if (file->kind() != InputFile::ObjKind)
1138
continue;
1139
for (InputSectionBase *section : file->getSections()) {
1140
if (!section || section->type != SHT_AARCH64_MEMTAG_GLOBALS_STATIC ||
1141
section == &InputSection::discarded)
1142
continue;
1143
invokeELFT(addTaggedSymbolReferences, *section,
1144
taggedSymbolReferenceCount);
1145
}
1146
}
1147
1148
// Now, go through all the symbols. If the number of declarations +
1149
// definitions to a symbol exceeds the amount of times they're marked as
1150
// tagged, it means we have an objfile that uses the untagged variant of the
1151
// symbol.
1152
for (InputFile *file : files) {
1153
if (file->kind() != InputFile::BinaryKind &&
1154
file->kind() != InputFile::ObjKind)
1155
continue;
1156
1157
for (Symbol *symbol : file->getSymbols()) {
1158
// See `addTaggedSymbolReferences` for more details.
1159
if (symbol->type != STT_OBJECT ||
1160
symbol->binding == STB_LOCAL)
1161
continue;
1162
auto it = taggedSymbolReferenceCount.find(symbol);
1163
if (it == taggedSymbolReferenceCount.end()) continue;
1164
unsigned &remainingAllowedTaggedRefs = it->second;
1165
if (remainingAllowedTaggedRefs == 0) {
1166
taggedSymbolReferenceCount.erase(it);
1167
continue;
1168
}
1169
--remainingAllowedTaggedRefs;
1170
}
1171
}
1172
1173
// `addTaggedSymbolReferences` has already checked that we have RELA
1174
// relocations, the only other way to get written addends is with
1175
// --apply-dynamic-relocs.
1176
if (!taggedSymbolReferenceCount.empty() && config->writeAddends)
1177
error("--apply-dynamic-relocs cannot be used with MTE globals");
1178
1179
// Now, `taggedSymbolReferenceCount` should only contain symbols that are
1180
// defined as tagged exactly the same amount as it's referenced, meaning all
1181
// uses are tagged.
1182
for (auto &[symbol, remainingTaggedRefs] : taggedSymbolReferenceCount) {
1183
assert(remainingTaggedRefs == 0 &&
1184
"Symbol is defined as tagged more times than it's used");
1185
symbol->setIsTagged(true);
1186
}
1187
}
1188
1189