Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/lld/ELF/Arch/ARM.cpp
34878 views
1
//===- ARM.cpp ------------------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "InputFiles.h"
10
#include "OutputSections.h"
11
#include "SymbolTable.h"
12
#include "Symbols.h"
13
#include "SyntheticSections.h"
14
#include "Target.h"
15
#include "lld/Common/ErrorHandler.h"
16
#include "lld/Common/Filesystem.h"
17
#include "llvm/BinaryFormat/ELF.h"
18
#include "llvm/Support/Endian.h"
19
20
using namespace llvm;
21
using namespace llvm::support::endian;
22
using namespace llvm::support;
23
using namespace llvm::ELF;
24
using namespace lld;
25
using namespace lld::elf;
26
using namespace llvm::object;
27
28
namespace {
29
class ARM final : public TargetInfo {
30
public:
31
ARM();
32
uint32_t calcEFlags() const override;
33
RelExpr getRelExpr(RelType type, const Symbol &s,
34
const uint8_t *loc) const override;
35
RelType getDynRel(RelType type) const override;
36
int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
37
void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
38
void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
39
void writePltHeader(uint8_t *buf) const override;
40
void writePlt(uint8_t *buf, const Symbol &sym,
41
uint64_t pltEntryAddr) const override;
42
void addPltSymbols(InputSection &isec, uint64_t off) const override;
43
void addPltHeaderSymbols(InputSection &isd) const override;
44
bool needsThunk(RelExpr expr, RelType type, const InputFile *file,
45
uint64_t branchAddr, const Symbol &s,
46
int64_t a) const override;
47
uint32_t getThunkSectionSpacing() const override;
48
bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;
49
void relocate(uint8_t *loc, const Relocation &rel,
50
uint64_t val) const override;
51
};
52
enum class CodeState { Data = 0, Thumb = 2, Arm = 4 };
53
} // namespace
54
55
static DenseMap<InputSection *, SmallVector<const Defined *, 0>> sectionMap{};
56
57
ARM::ARM() {
58
copyRel = R_ARM_COPY;
59
relativeRel = R_ARM_RELATIVE;
60
iRelativeRel = R_ARM_IRELATIVE;
61
gotRel = R_ARM_GLOB_DAT;
62
pltRel = R_ARM_JUMP_SLOT;
63
symbolicRel = R_ARM_ABS32;
64
tlsGotRel = R_ARM_TLS_TPOFF32;
65
tlsModuleIndexRel = R_ARM_TLS_DTPMOD32;
66
tlsOffsetRel = R_ARM_TLS_DTPOFF32;
67
pltHeaderSize = 32;
68
pltEntrySize = 16;
69
ipltEntrySize = 16;
70
trapInstr = {0xd4, 0xd4, 0xd4, 0xd4};
71
needsThunks = true;
72
defaultMaxPageSize = 65536;
73
}
74
75
uint32_t ARM::calcEFlags() const {
76
// The ABIFloatType is used by loaders to detect the floating point calling
77
// convention.
78
uint32_t abiFloatType = 0;
79
80
// Set the EF_ARM_BE8 flag in the ELF header, if ELF file is big-endian
81
// with BE-8 code.
82
uint32_t armBE8 = 0;
83
84
if (config->armVFPArgs == ARMVFPArgKind::Base ||
85
config->armVFPArgs == ARMVFPArgKind::Default)
86
abiFloatType = EF_ARM_ABI_FLOAT_SOFT;
87
else if (config->armVFPArgs == ARMVFPArgKind::VFP)
88
abiFloatType = EF_ARM_ABI_FLOAT_HARD;
89
90
if (!config->isLE && config->armBe8)
91
armBE8 = EF_ARM_BE8;
92
93
// We don't currently use any features incompatible with EF_ARM_EABI_VER5,
94
// but we don't have any firm guarantees of conformance. Linux AArch64
95
// kernels (as of 2016) require an EABI version to be set.
96
return EF_ARM_EABI_VER5 | abiFloatType | armBE8;
97
}
98
99
RelExpr ARM::getRelExpr(RelType type, const Symbol &s,
100
const uint8_t *loc) const {
101
switch (type) {
102
case R_ARM_ABS32:
103
case R_ARM_MOVW_ABS_NC:
104
case R_ARM_MOVT_ABS:
105
case R_ARM_THM_MOVW_ABS_NC:
106
case R_ARM_THM_MOVT_ABS:
107
case R_ARM_THM_ALU_ABS_G0_NC:
108
case R_ARM_THM_ALU_ABS_G1_NC:
109
case R_ARM_THM_ALU_ABS_G2_NC:
110
case R_ARM_THM_ALU_ABS_G3:
111
return R_ABS;
112
case R_ARM_THM_JUMP8:
113
case R_ARM_THM_JUMP11:
114
return R_PC;
115
case R_ARM_CALL:
116
case R_ARM_JUMP24:
117
case R_ARM_PC24:
118
case R_ARM_PLT32:
119
case R_ARM_PREL31:
120
case R_ARM_THM_JUMP19:
121
case R_ARM_THM_JUMP24:
122
case R_ARM_THM_CALL:
123
return R_PLT_PC;
124
case R_ARM_GOTOFF32:
125
// (S + A) - GOT_ORG
126
return R_GOTREL;
127
case R_ARM_GOT_BREL:
128
// GOT(S) + A - GOT_ORG
129
return R_GOT_OFF;
130
case R_ARM_GOT_PREL:
131
case R_ARM_TLS_IE32:
132
// GOT(S) + A - P
133
return R_GOT_PC;
134
case R_ARM_SBREL32:
135
return R_ARM_SBREL;
136
case R_ARM_TARGET1:
137
return config->target1Rel ? R_PC : R_ABS;
138
case R_ARM_TARGET2:
139
if (config->target2 == Target2Policy::Rel)
140
return R_PC;
141
if (config->target2 == Target2Policy::Abs)
142
return R_ABS;
143
return R_GOT_PC;
144
case R_ARM_TLS_GD32:
145
return R_TLSGD_PC;
146
case R_ARM_TLS_LDM32:
147
return R_TLSLD_PC;
148
case R_ARM_TLS_LDO32:
149
return R_DTPREL;
150
case R_ARM_BASE_PREL:
151
// B(S) + A - P
152
// FIXME: currently B(S) assumed to be .got, this may not hold for all
153
// platforms.
154
return R_GOTONLY_PC;
155
case R_ARM_MOVW_PREL_NC:
156
case R_ARM_MOVT_PREL:
157
case R_ARM_REL32:
158
case R_ARM_THM_MOVW_PREL_NC:
159
case R_ARM_THM_MOVT_PREL:
160
return R_PC;
161
case R_ARM_ALU_PC_G0:
162
case R_ARM_ALU_PC_G0_NC:
163
case R_ARM_ALU_PC_G1:
164
case R_ARM_ALU_PC_G1_NC:
165
case R_ARM_ALU_PC_G2:
166
case R_ARM_LDR_PC_G0:
167
case R_ARM_LDR_PC_G1:
168
case R_ARM_LDR_PC_G2:
169
case R_ARM_LDRS_PC_G0:
170
case R_ARM_LDRS_PC_G1:
171
case R_ARM_LDRS_PC_G2:
172
case R_ARM_THM_ALU_PREL_11_0:
173
case R_ARM_THM_PC8:
174
case R_ARM_THM_PC12:
175
return R_ARM_PCA;
176
case R_ARM_MOVW_BREL_NC:
177
case R_ARM_MOVW_BREL:
178
case R_ARM_MOVT_BREL:
179
case R_ARM_THM_MOVW_BREL_NC:
180
case R_ARM_THM_MOVW_BREL:
181
case R_ARM_THM_MOVT_BREL:
182
return R_ARM_SBREL;
183
case R_ARM_NONE:
184
return R_NONE;
185
case R_ARM_TLS_LE32:
186
return R_TPREL;
187
case R_ARM_V4BX:
188
// V4BX is just a marker to indicate there's a "bx rN" instruction at the
189
// given address. It can be used to implement a special linker mode which
190
// rewrites ARMv4T inputs to ARMv4. Since we support only ARMv4 input and
191
// not ARMv4 output, we can just ignore it.
192
return R_NONE;
193
default:
194
error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
195
") against symbol " + toString(s));
196
return R_NONE;
197
}
198
}
199
200
RelType ARM::getDynRel(RelType type) const {
201
if ((type == R_ARM_ABS32) || (type == R_ARM_TARGET1 && !config->target1Rel))
202
return R_ARM_ABS32;
203
return R_ARM_NONE;
204
}
205
206
void ARM::writeGotPlt(uint8_t *buf, const Symbol &) const {
207
write32(buf, in.plt->getVA());
208
}
209
210
void ARM::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
211
// An ARM entry is the address of the ifunc resolver function.
212
write32(buf, s.getVA());
213
}
214
215
// Long form PLT Header that does not have any restrictions on the displacement
216
// of the .plt from the .got.plt.
217
static void writePltHeaderLong(uint8_t *buf) {
218
write32(buf + 0, 0xe52de004); // str lr, [sp,#-4]!
219
write32(buf + 4, 0xe59fe004); // ldr lr, L2
220
write32(buf + 8, 0xe08fe00e); // L1: add lr, pc, lr
221
write32(buf + 12, 0xe5bef008); // ldr pc, [lr, #8]
222
write32(buf + 16, 0x00000000); // L2: .word &(.got.plt) - L1 - 8
223
write32(buf + 20, 0xd4d4d4d4); // Pad to 32-byte boundary
224
write32(buf + 24, 0xd4d4d4d4); // Pad to 32-byte boundary
225
write32(buf + 28, 0xd4d4d4d4);
226
uint64_t gotPlt = in.gotPlt->getVA();
227
uint64_t l1 = in.plt->getVA() + 8;
228
write32(buf + 16, gotPlt - l1 - 8);
229
}
230
231
// True if we should use Thumb PLTs, which currently require Thumb2, and are
232
// only used if the target does not have the ARM ISA.
233
static bool useThumbPLTs() {
234
return config->armHasThumb2ISA && !config->armHasArmISA;
235
}
236
237
// The default PLT header requires the .got.plt to be within 128 Mb of the
238
// .plt in the positive direction.
239
void ARM::writePltHeader(uint8_t *buf) const {
240
if (useThumbPLTs()) {
241
// The instruction sequence for thumb:
242
//
243
// 0: b500 push {lr}
244
// 2: f8df e008 ldr.w lr, [pc, #0x8] @ 0xe <func+0xe>
245
// 6: 44fe add lr, pc
246
// 8: f85e ff08 ldr pc, [lr, #8]!
247
// e: .word .got.plt - .plt - 16
248
//
249
// At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from
250
// `pc` in the add instruction and 8 bytes for the `lr` adjustment.
251
//
252
uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 16;
253
assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");
254
write16(buf + 0, 0xb500);
255
// Split into two halves to support endianness correctly.
256
write16(buf + 2, 0xf8df);
257
write16(buf + 4, 0xe008);
258
write16(buf + 6, 0x44fe);
259
// Split into two halves to support endianness correctly.
260
write16(buf + 8, 0xf85e);
261
write16(buf + 10, 0xff08);
262
write32(buf + 12, offset);
263
264
memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
265
memcpy(buf + 20, trapInstr.data(), 4);
266
memcpy(buf + 24, trapInstr.data(), 4);
267
memcpy(buf + 28, trapInstr.data(), 4);
268
} else {
269
// Use a similar sequence to that in writePlt(), the difference is the
270
// calling conventions mean we use lr instead of ip. The PLT entry is
271
// responsible for saving lr on the stack, the dynamic loader is responsible
272
// for reloading it.
273
const uint32_t pltData[] = {
274
0xe52de004, // L1: str lr, [sp,#-4]!
275
0xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)
276
0xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)
277
0xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)
278
};
279
280
uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;
281
if (!llvm::isUInt<27>(offset)) {
282
// We cannot encode the Offset, use the long form.
283
writePltHeaderLong(buf);
284
return;
285
}
286
write32(buf + 0, pltData[0]);
287
write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff));
288
write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff));
289
write32(buf + 12, pltData[3] | (offset & 0xfff));
290
memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary
291
memcpy(buf + 20, trapInstr.data(), 4);
292
memcpy(buf + 24, trapInstr.data(), 4);
293
memcpy(buf + 28, trapInstr.data(), 4);
294
}
295
}
296
297
void ARM::addPltHeaderSymbols(InputSection &isec) const {
298
if (useThumbPLTs()) {
299
addSyntheticLocal("$t", STT_NOTYPE, 0, 0, isec);
300
addSyntheticLocal("$d", STT_NOTYPE, 12, 0, isec);
301
} else {
302
addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec);
303
addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec);
304
}
305
}
306
307
// Long form PLT entries that do not have any restrictions on the displacement
308
// of the .plt from the .got.plt.
309
static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr,
310
uint64_t pltEntryAddr) {
311
write32(buf + 0, 0xe59fc004); // ldr ip, L2
312
write32(buf + 4, 0xe08cc00f); // L1: add ip, ip, pc
313
write32(buf + 8, 0xe59cf000); // ldr pc, [ip]
314
write32(buf + 12, 0x00000000); // L2: .word Offset(&(.got.plt) - L1 - 8
315
uint64_t l1 = pltEntryAddr + 4;
316
write32(buf + 12, gotPltEntryAddr - l1 - 8);
317
}
318
319
// The default PLT entries require the .got.plt to be within 128 Mb of the
320
// .plt in the positive direction.
321
void ARM::writePlt(uint8_t *buf, const Symbol &sym,
322
uint64_t pltEntryAddr) const {
323
324
if (!useThumbPLTs()) {
325
uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8;
326
327
// The PLT entry is similar to the example given in Appendix A of ELF for
328
// the Arm Architecture. Instead of using the Group Relocations to find the
329
// optimal rotation for the 8-bit immediate used in the add instructions we
330
// hard code the most compact rotations for simplicity. This saves a load
331
// instruction over the long plt sequences.
332
const uint32_t pltData[] = {
333
0xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 8
334
0xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 8
335
0xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8
336
};
337
if (!llvm::isUInt<27>(offset)) {
338
// We cannot encode the Offset, use the long form.
339
writePltLong(buf, sym.getGotPltVA(), pltEntryAddr);
340
return;
341
}
342
write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff));
343
write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff));
344
write32(buf + 8, pltData[2] | (offset & 0xfff));
345
memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary
346
} else {
347
uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 12;
348
assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");
349
350
// A PLT entry will be:
351
//
352
// movw ip, #<lower 16 bits>
353
// movt ip, #<upper 16 bits>
354
// add ip, pc
355
// L1: ldr.w pc, [ip]
356
// b L1
357
//
358
// where ip = r12 = 0xc
359
360
// movw ip, #<lower 16 bits>
361
write16(buf + 2, 0x0c00); // use `ip`
362
relocateNoSym(buf, R_ARM_THM_MOVW_ABS_NC, offset);
363
364
// movt ip, #<upper 16 bits>
365
write16(buf + 6, 0x0c00); // use `ip`
366
relocateNoSym(buf + 4, R_ARM_THM_MOVT_ABS, offset);
367
368
write16(buf + 8, 0x44fc); // add ip, pc
369
write16(buf + 10, 0xf8dc); // ldr.w pc, [ip] (bottom half)
370
write16(buf + 12, 0xf000); // ldr.w pc, [ip] (upper half)
371
write16(buf + 14, 0xe7fc); // Branch to previous instruction
372
}
373
}
374
375
void ARM::addPltSymbols(InputSection &isec, uint64_t off) const {
376
if (useThumbPLTs()) {
377
addSyntheticLocal("$t", STT_NOTYPE, off, 0, isec);
378
} else {
379
addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec);
380
addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec);
381
}
382
}
383
384
bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,
385
uint64_t branchAddr, const Symbol &s,
386
int64_t a) const {
387
// If s is an undefined weak symbol and does not have a PLT entry then it will
388
// be resolved as a branch to the next instruction. If it is hidden, its
389
// binding has been converted to local, so we just check isUndefined() here. A
390
// undefined non-weak symbol will have been errored.
391
if (s.isUndefined() && !s.isInPlt())
392
return false;
393
// A state change from ARM to Thumb and vice versa must go through an
394
// interworking thunk if the relocation type is not R_ARM_CALL or
395
// R_ARM_THM_CALL.
396
switch (type) {
397
case R_ARM_PC24:
398
case R_ARM_PLT32:
399
case R_ARM_JUMP24:
400
// Source is ARM, all PLT entries are ARM so no interworking required.
401
// Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb).
402
assert(!useThumbPLTs() &&
403
"If the source is ARM, we should not need Thumb PLTs");
404
if (s.isFunc() && expr == R_PC && (s.getVA() & 1))
405
return true;
406
[[fallthrough]];
407
case R_ARM_CALL: {
408
uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA();
409
return !inBranchRange(type, branchAddr, dst + a) ||
410
(!config->armHasBlx && (s.getVA() & 1));
411
}
412
case R_ARM_THM_JUMP19:
413
case R_ARM_THM_JUMP24:
414
// Source is Thumb, when all PLT entries are ARM interworking is required.
415
// Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).
416
if ((expr == R_PLT_PC && !useThumbPLTs()) ||
417
(s.isFunc() && (s.getVA() & 1) == 0))
418
return true;
419
[[fallthrough]];
420
case R_ARM_THM_CALL: {
421
uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA();
422
return !inBranchRange(type, branchAddr, dst + a) ||
423
(!config->armHasBlx && (s.getVA() & 1) == 0);;
424
}
425
}
426
return false;
427
}
428
429
uint32_t ARM::getThunkSectionSpacing() const {
430
// The placing of pre-created ThunkSections is controlled by the value
431
// thunkSectionSpacing returned by getThunkSectionSpacing(). The aim is to
432
// place the ThunkSection such that all branches from the InputSections
433
// prior to the ThunkSection can reach a Thunk placed at the end of the
434
// ThunkSection. Graphically:
435
// | up to thunkSectionSpacing .text input sections |
436
// | ThunkSection |
437
// | up to thunkSectionSpacing .text input sections |
438
// | ThunkSection |
439
440
// Pre-created ThunkSections are spaced roughly 16MiB apart on ARMv7. This
441
// is to match the most common expected case of a Thumb 2 encoded BL, BLX or
442
// B.W:
443
// ARM B, BL, BLX range +/- 32MiB
444
// Thumb B.W, BL, BLX range +/- 16MiB
445
// Thumb B<cc>.W range +/- 1MiB
446
// If a branch cannot reach a pre-created ThunkSection a new one will be
447
// created so we can handle the rare cases of a Thumb 2 conditional branch.
448
// We intentionally use a lower size for thunkSectionSpacing than the maximum
449
// branch range so the end of the ThunkSection is more likely to be within
450
// range of the branch instruction that is furthest away. The value we shorten
451
// thunkSectionSpacing by is set conservatively to allow us to create 16,384
452
// 12 byte Thunks at any offset in a ThunkSection without risk of a branch to
453
// one of the Thunks going out of range.
454
455
// On Arm the thunkSectionSpacing depends on the range of the Thumb Branch
456
// range. On earlier Architectures such as ARMv4, ARMv5 and ARMv6 (except
457
// ARMv6T2) the range is +/- 4MiB.
458
459
return (config->armJ1J2BranchEncoding) ? 0x1000000 - 0x30000
460
: 0x400000 - 0x7500;
461
}
462
463
bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {
464
if ((dst & 0x1) == 0)
465
// Destination is ARM, if ARM caller then Src is already 4-byte aligned.
466
// If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure
467
// destination will be 4 byte aligned.
468
src &= ~0x3;
469
else
470
// Bit 0 == 1 denotes Thumb state, it is not part of the range.
471
dst &= ~0x1;
472
473
int64_t offset = dst - src;
474
switch (type) {
475
case R_ARM_PC24:
476
case R_ARM_PLT32:
477
case R_ARM_JUMP24:
478
case R_ARM_CALL:
479
return llvm::isInt<26>(offset);
480
case R_ARM_THM_JUMP19:
481
return llvm::isInt<21>(offset);
482
case R_ARM_THM_JUMP24:
483
case R_ARM_THM_CALL:
484
return config->armJ1J2BranchEncoding ? llvm::isInt<25>(offset)
485
: llvm::isInt<23>(offset);
486
default:
487
return true;
488
}
489
}
490
491
// Helper to produce message text when LLD detects that a CALL relocation to
492
// a non STT_FUNC symbol that may result in incorrect interworking between ARM
493
// or Thumb.
494
static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) {
495
assert(!s.isFunc());
496
const ErrorPlace place = getErrorPlace(loc);
497
std::string hint;
498
if (!place.srcLoc.empty())
499
hint = "; " + place.srcLoc;
500
if (s.isSection()) {
501
// Section symbols must be defined and in a section. Users cannot change
502
// the type. Use the section name as getName() returns an empty string.
503
warn(place.loc + "branch and link relocation: " + toString(relt) +
504
" to STT_SECTION symbol " + cast<Defined>(s).section->name +
505
" ; interworking not performed" + hint);
506
} else {
507
// Warn with hint on how to alter the symbol type.
508
warn(getErrorLocation(loc) + "branch and link relocation: " +
509
toString(relt) + " to non STT_FUNC symbol: " + s.getName() +
510
" interworking not performed; consider using directive '.type " +
511
s.getName() +
512
", %function' to give symbol type STT_FUNC if interworking between "
513
"ARM and Thumb is required" +
514
hint);
515
}
516
}
517
518
// Rotate a 32-bit unsigned value right by a specified amt of bits.
519
static uint32_t rotr32(uint32_t val, uint32_t amt) {
520
assert(amt < 32 && "Invalid rotate amount");
521
return (val >> amt) | (val << ((32 - amt) & 31));
522
}
523
524
static std::pair<uint32_t, uint32_t> getRemAndLZForGroup(unsigned group,
525
uint32_t val) {
526
uint32_t rem, lz;
527
do {
528
lz = llvm::countl_zero(val) & ~1;
529
rem = val;
530
if (lz == 32) // implies rem == 0
531
break;
532
val &= 0xffffff >> lz;
533
} while (group--);
534
return {rem, lz};
535
}
536
537
static void encodeAluGroup(uint8_t *loc, const Relocation &rel, uint64_t val,
538
int group, bool check) {
539
// ADD/SUB (immediate) add = bit23, sub = bit22
540
// immediate field carries is a 12-bit modified immediate, made up of a 4-bit
541
// even rotate right and an 8-bit immediate.
542
uint32_t opcode = 0x00800000;
543
if (val >> 63) {
544
opcode = 0x00400000;
545
val = -val;
546
}
547
uint32_t imm, lz;
548
std::tie(imm, lz) = getRemAndLZForGroup(group, val);
549
uint32_t rot = 0;
550
if (lz < 24) {
551
imm = rotr32(imm, 24 - lz);
552
rot = (lz + 8) << 7;
553
}
554
if (check && imm > 0xff)
555
error(getErrorLocation(loc) + "unencodeable immediate " + Twine(val).str() +
556
" for relocation " + toString(rel.type));
557
write32(loc, (read32(loc) & 0xff3ff000) | opcode | rot | (imm & 0xff));
558
}
559
560
static void encodeLdrGroup(uint8_t *loc, const Relocation &rel, uint64_t val,
561
int group) {
562
// R_ARM_LDR_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a
563
// function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
564
// bottom bit to recover S + A - P.
565
if (rel.sym->isFunc())
566
val &= ~0x1;
567
// LDR (literal) u = bit23
568
uint32_t opcode = 0x00800000;
569
if (val >> 63) {
570
opcode = 0x0;
571
val = -val;
572
}
573
uint32_t imm = getRemAndLZForGroup(group, val).first;
574
checkUInt(loc, imm, 12, rel);
575
write32(loc, (read32(loc) & 0xff7ff000) | opcode | imm);
576
}
577
578
static void encodeLdrsGroup(uint8_t *loc, const Relocation &rel, uint64_t val,
579
int group) {
580
// R_ARM_LDRS_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a
581
// function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
582
// bottom bit to recover S + A - P.
583
if (rel.sym->isFunc())
584
val &= ~0x1;
585
// LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23
586
uint32_t opcode = 0x00800000;
587
if (val >> 63) {
588
opcode = 0x0;
589
val = -val;
590
}
591
uint32_t imm = getRemAndLZForGroup(group, val).first;
592
checkUInt(loc, imm, 8, rel);
593
write32(loc, (read32(loc) & 0xff7ff0f0) | opcode | ((imm & 0xf0) << 4) |
594
(imm & 0xf));
595
}
596
597
void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
598
switch (rel.type) {
599
case R_ARM_ABS32:
600
case R_ARM_BASE_PREL:
601
case R_ARM_GOTOFF32:
602
case R_ARM_GOT_BREL:
603
case R_ARM_GOT_PREL:
604
case R_ARM_REL32:
605
case R_ARM_RELATIVE:
606
case R_ARM_SBREL32:
607
case R_ARM_TARGET1:
608
case R_ARM_TARGET2:
609
case R_ARM_TLS_GD32:
610
case R_ARM_TLS_IE32:
611
case R_ARM_TLS_LDM32:
612
case R_ARM_TLS_LDO32:
613
case R_ARM_TLS_LE32:
614
case R_ARM_TLS_TPOFF32:
615
case R_ARM_TLS_DTPOFF32:
616
write32(loc, val);
617
break;
618
case R_ARM_PREL31:
619
checkInt(loc, val, 31, rel);
620
write32(loc, (read32(loc) & 0x80000000) | (val & ~0x80000000));
621
break;
622
case R_ARM_CALL: {
623
// R_ARM_CALL is used for BL and BLX instructions, for symbols of type
624
// STT_FUNC we choose whether to write a BL or BLX depending on the
625
// value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is
626
// not of type STT_FUNC then we must preserve the original instruction.
627
assert(rel.sym); // R_ARM_CALL is always reached via relocate().
628
bool bit0Thumb = val & 1;
629
bool isBlx = (read32(loc) & 0xfe000000) == 0xfa000000;
630
// lld 10.0 and before always used bit0Thumb when deciding to write a BLX
631
// even when type not STT_FUNC.
632
if (!rel.sym->isFunc() && isBlx != bit0Thumb)
633
stateChangeWarning(loc, rel.type, *rel.sym);
634
if (rel.sym->isFunc() ? bit0Thumb : isBlx) {
635
// The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1'
636
checkInt(loc, val, 26, rel);
637
write32(loc, 0xfa000000 | // opcode
638
((val & 2) << 23) | // H
639
((val >> 2) & 0x00ffffff)); // imm24
640
break;
641
}
642
// BLX (always unconditional) instruction to an ARM Target, select an
643
// unconditional BL.
644
write32(loc, 0xeb000000 | (read32(loc) & 0x00ffffff));
645
// fall through as BL encoding is shared with B
646
}
647
[[fallthrough]];
648
case R_ARM_JUMP24:
649
case R_ARM_PC24:
650
case R_ARM_PLT32:
651
checkInt(loc, val, 26, rel);
652
write32(loc, (read32(loc) & ~0x00ffffff) | ((val >> 2) & 0x00ffffff));
653
break;
654
case R_ARM_THM_JUMP8:
655
// We do a 9 bit check because val is right-shifted by 1 bit.
656
checkInt(loc, val, 9, rel);
657
write16(loc, (read32(loc) & 0xff00) | ((val >> 1) & 0x00ff));
658
break;
659
case R_ARM_THM_JUMP11:
660
// We do a 12 bit check because val is right-shifted by 1 bit.
661
checkInt(loc, val, 12, rel);
662
write16(loc, (read32(loc) & 0xf800) | ((val >> 1) & 0x07ff));
663
break;
664
case R_ARM_THM_JUMP19:
665
// Encoding T3: Val = S:J2:J1:imm6:imm11:0
666
checkInt(loc, val, 21, rel);
667
write16(loc,
668
(read16(loc) & 0xfbc0) | // opcode cond
669
((val >> 10) & 0x0400) | // S
670
((val >> 12) & 0x003f)); // imm6
671
write16(loc + 2,
672
0x8000 | // opcode
673
((val >> 8) & 0x0800) | // J2
674
((val >> 5) & 0x2000) | // J1
675
((val >> 1) & 0x07ff)); // imm11
676
break;
677
case R_ARM_THM_CALL: {
678
// R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type
679
// STT_FUNC we choose whether to write a BL or BLX depending on the
680
// value of bit 0 of Val. With bit 0 == 0 denoting ARM, if the symbol is
681
// not of type STT_FUNC then we must preserve the original instruction.
682
// PLT entries are always ARM state so we know we need to interwork.
683
assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate().
684
bool bit0Thumb = val & 1;
685
bool useThumb = bit0Thumb || useThumbPLTs();
686
bool isBlx = (read16(loc + 2) & 0x1000) == 0;
687
// lld 10.0 and before always used bit0Thumb when deciding to write a BLX
688
// even when type not STT_FUNC.
689
if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == useThumb)
690
stateChangeWarning(loc, rel.type, *rel.sym);
691
if ((rel.sym->isFunc() || rel.sym->isInPlt()) ? !useThumb : isBlx) {
692
// We are writing a BLX. Ensure BLX destination is 4-byte aligned. As
693
// the BLX instruction may only be two byte aligned. This must be done
694
// before overflow check.
695
val = alignTo(val, 4);
696
write16(loc + 2, read16(loc + 2) & ~0x1000);
697
} else {
698
write16(loc + 2, (read16(loc + 2) & ~0x1000) | 1 << 12);
699
}
700
if (!config->armJ1J2BranchEncoding) {
701
// Older Arm architectures do not support R_ARM_THM_JUMP24 and have
702
// different encoding rules and range due to J1 and J2 always being 1.
703
checkInt(loc, val, 23, rel);
704
write16(loc,
705
0xf000 | // opcode
706
((val >> 12) & 0x07ff)); // imm11
707
write16(loc + 2,
708
(read16(loc + 2) & 0xd000) | // opcode
709
0x2800 | // J1 == J2 == 1
710
((val >> 1) & 0x07ff)); // imm11
711
break;
712
}
713
}
714
// Fall through as rest of encoding is the same as B.W
715
[[fallthrough]];
716
case R_ARM_THM_JUMP24:
717
// Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0
718
checkInt(loc, val, 25, rel);
719
write16(loc,
720
0xf000 | // opcode
721
((val >> 14) & 0x0400) | // S
722
((val >> 12) & 0x03ff)); // imm10
723
write16(loc + 2,
724
(read16(loc + 2) & 0xd000) | // opcode
725
(((~(val >> 10)) ^ (val >> 11)) & 0x2000) | // J1
726
(((~(val >> 11)) ^ (val >> 13)) & 0x0800) | // J2
727
((val >> 1) & 0x07ff)); // imm11
728
break;
729
case R_ARM_MOVW_ABS_NC:
730
case R_ARM_MOVW_PREL_NC:
731
case R_ARM_MOVW_BREL_NC:
732
write32(loc, (read32(loc) & ~0x000f0fff) | ((val & 0xf000) << 4) |
733
(val & 0x0fff));
734
break;
735
case R_ARM_MOVT_ABS:
736
case R_ARM_MOVT_PREL:
737
case R_ARM_MOVT_BREL:
738
write32(loc, (read32(loc) & ~0x000f0fff) |
739
(((val >> 16) & 0xf000) << 4) | ((val >> 16) & 0xfff));
740
break;
741
case R_ARM_THM_MOVT_ABS:
742
case R_ARM_THM_MOVT_PREL:
743
case R_ARM_THM_MOVT_BREL:
744
// Encoding T1: A = imm4:i:imm3:imm8
745
746
write16(loc,
747
0xf2c0 | // opcode
748
((val >> 17) & 0x0400) | // i
749
((val >> 28) & 0x000f)); // imm4
750
751
write16(loc + 2,
752
(read16(loc + 2) & 0x8f00) | // opcode
753
((val >> 12) & 0x7000) | // imm3
754
((val >> 16) & 0x00ff)); // imm8
755
break;
756
case R_ARM_THM_MOVW_ABS_NC:
757
case R_ARM_THM_MOVW_PREL_NC:
758
case R_ARM_THM_MOVW_BREL_NC:
759
// Encoding T3: A = imm4:i:imm3:imm8
760
write16(loc,
761
0xf240 | // opcode
762
((val >> 1) & 0x0400) | // i
763
((val >> 12) & 0x000f)); // imm4
764
write16(loc + 2,
765
(read16(loc + 2) & 0x8f00) | // opcode
766
((val << 4) & 0x7000) | // imm3
767
(val & 0x00ff)); // imm8
768
break;
769
case R_ARM_THM_ALU_ABS_G3:
770
write16(loc, (read16(loc) &~ 0x00ff) | ((val >> 24) & 0x00ff));
771
break;
772
case R_ARM_THM_ALU_ABS_G2_NC:
773
write16(loc, (read16(loc) &~ 0x00ff) | ((val >> 16) & 0x00ff));
774
break;
775
case R_ARM_THM_ALU_ABS_G1_NC:
776
write16(loc, (read16(loc) &~ 0x00ff) | ((val >> 8) & 0x00ff));
777
break;
778
case R_ARM_THM_ALU_ABS_G0_NC:
779
write16(loc, (read16(loc) &~ 0x00ff) | (val & 0x00ff));
780
break;
781
case R_ARM_ALU_PC_G0:
782
encodeAluGroup(loc, rel, val, 0, true);
783
break;
784
case R_ARM_ALU_PC_G0_NC:
785
encodeAluGroup(loc, rel, val, 0, false);
786
break;
787
case R_ARM_ALU_PC_G1:
788
encodeAluGroup(loc, rel, val, 1, true);
789
break;
790
case R_ARM_ALU_PC_G1_NC:
791
encodeAluGroup(loc, rel, val, 1, false);
792
break;
793
case R_ARM_ALU_PC_G2:
794
encodeAluGroup(loc, rel, val, 2, true);
795
break;
796
case R_ARM_LDR_PC_G0:
797
encodeLdrGroup(loc, rel, val, 0);
798
break;
799
case R_ARM_LDR_PC_G1:
800
encodeLdrGroup(loc, rel, val, 1);
801
break;
802
case R_ARM_LDR_PC_G2:
803
encodeLdrGroup(loc, rel, val, 2);
804
break;
805
case R_ARM_LDRS_PC_G0:
806
encodeLdrsGroup(loc, rel, val, 0);
807
break;
808
case R_ARM_LDRS_PC_G1:
809
encodeLdrsGroup(loc, rel, val, 1);
810
break;
811
case R_ARM_LDRS_PC_G2:
812
encodeLdrsGroup(loc, rel, val, 2);
813
break;
814
case R_ARM_THM_ALU_PREL_11_0: {
815
// ADR encoding T2 (sub), T3 (add) i:imm3:imm8
816
int64_t imm = val;
817
uint16_t sub = 0;
818
if (imm < 0) {
819
imm = -imm;
820
sub = 0x00a0;
821
}
822
checkUInt(loc, imm, 12, rel);
823
write16(loc, (read16(loc) & 0xfb0f) | sub | (imm & 0x800) >> 1);
824
write16(loc + 2,
825
(read16(loc + 2) & 0x8f00) | (imm & 0x700) << 4 | (imm & 0xff));
826
break;
827
}
828
case R_ARM_THM_PC8:
829
// ADR and LDR literal encoding T1 positive offset only imm8:00
830
// R_ARM_THM_PC8 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a
831
// function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
832
// bottom bit to recover S + A - Pa.
833
if (rel.sym->isFunc())
834
val &= ~0x1;
835
checkUInt(loc, val, 10, rel);
836
checkAlignment(loc, val, 4, rel);
837
write16(loc, (read16(loc) & 0xff00) | (val & 0x3fc) >> 2);
838
break;
839
case R_ARM_THM_PC12: {
840
// LDR (literal) encoding T2, add = (U == '1') imm12
841
// imm12 is unsigned
842
// R_ARM_THM_PC12 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a
843
// function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear
844
// bottom bit to recover S + A - Pa.
845
if (rel.sym->isFunc())
846
val &= ~0x1;
847
int64_t imm12 = val;
848
uint16_t u = 0x0080;
849
if (imm12 < 0) {
850
imm12 = -imm12;
851
u = 0;
852
}
853
checkUInt(loc, imm12, 12, rel);
854
write16(loc, read16(loc) | u);
855
write16(loc + 2, (read16(loc + 2) & 0xf000) | imm12);
856
break;
857
}
858
default:
859
llvm_unreachable("unknown relocation");
860
}
861
}
862
863
int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {
864
switch (type) {
865
default:
866
internalLinkerError(getErrorLocation(buf),
867
"cannot read addend for relocation " + toString(type));
868
return 0;
869
case R_ARM_ABS32:
870
case R_ARM_BASE_PREL:
871
case R_ARM_GLOB_DAT:
872
case R_ARM_GOTOFF32:
873
case R_ARM_GOT_BREL:
874
case R_ARM_GOT_PREL:
875
case R_ARM_IRELATIVE:
876
case R_ARM_REL32:
877
case R_ARM_RELATIVE:
878
case R_ARM_SBREL32:
879
case R_ARM_TARGET1:
880
case R_ARM_TARGET2:
881
case R_ARM_TLS_DTPMOD32:
882
case R_ARM_TLS_DTPOFF32:
883
case R_ARM_TLS_GD32:
884
case R_ARM_TLS_IE32:
885
case R_ARM_TLS_LDM32:
886
case R_ARM_TLS_LE32:
887
case R_ARM_TLS_LDO32:
888
case R_ARM_TLS_TPOFF32:
889
return SignExtend64<32>(read32(buf));
890
case R_ARM_PREL31:
891
return SignExtend64<31>(read32(buf));
892
case R_ARM_CALL:
893
case R_ARM_JUMP24:
894
case R_ARM_PC24:
895
case R_ARM_PLT32:
896
return SignExtend64<26>(read32(buf) << 2);
897
case R_ARM_THM_JUMP8:
898
return SignExtend64<9>(read16(buf) << 1);
899
case R_ARM_THM_JUMP11:
900
return SignExtend64<12>(read16(buf) << 1);
901
case R_ARM_THM_JUMP19: {
902
// Encoding T3: A = S:J2:J1:imm10:imm6:0
903
uint16_t hi = read16(buf);
904
uint16_t lo = read16(buf + 2);
905
return SignExtend64<20>(((hi & 0x0400) << 10) | // S
906
((lo & 0x0800) << 8) | // J2
907
((lo & 0x2000) << 5) | // J1
908
((hi & 0x003f) << 12) | // imm6
909
((lo & 0x07ff) << 1)); // imm11:0
910
}
911
case R_ARM_THM_CALL:
912
if (!config->armJ1J2BranchEncoding) {
913
// Older Arm architectures do not support R_ARM_THM_JUMP24 and have
914
// different encoding rules and range due to J1 and J2 always being 1.
915
uint16_t hi = read16(buf);
916
uint16_t lo = read16(buf + 2);
917
return SignExtend64<22>(((hi & 0x7ff) << 12) | // imm11
918
((lo & 0x7ff) << 1)); // imm11:0
919
break;
920
}
921
[[fallthrough]];
922
case R_ARM_THM_JUMP24: {
923
// Encoding B T4, BL T1, BLX T2: A = S:I1:I2:imm10:imm11:0
924
// I1 = NOT(J1 EOR S), I2 = NOT(J2 EOR S)
925
uint16_t hi = read16(buf);
926
uint16_t lo = read16(buf + 2);
927
return SignExtend64<24>(((hi & 0x0400) << 14) | // S
928
(~((lo ^ (hi << 3)) << 10) & 0x00800000) | // I1
929
(~((lo ^ (hi << 1)) << 11) & 0x00400000) | // I2
930
((hi & 0x003ff) << 12) | // imm0
931
((lo & 0x007ff) << 1)); // imm11:0
932
}
933
// ELF for the ARM Architecture 4.6.1.1 the implicit addend for MOVW and
934
// MOVT is in the range -32768 <= A < 32768
935
case R_ARM_MOVW_ABS_NC:
936
case R_ARM_MOVT_ABS:
937
case R_ARM_MOVW_PREL_NC:
938
case R_ARM_MOVT_PREL:
939
case R_ARM_MOVW_BREL_NC:
940
case R_ARM_MOVT_BREL: {
941
uint64_t val = read32(buf) & 0x000f0fff;
942
return SignExtend64<16>(((val & 0x000f0000) >> 4) | (val & 0x00fff));
943
}
944
case R_ARM_THM_MOVW_ABS_NC:
945
case R_ARM_THM_MOVT_ABS:
946
case R_ARM_THM_MOVW_PREL_NC:
947
case R_ARM_THM_MOVT_PREL:
948
case R_ARM_THM_MOVW_BREL_NC:
949
case R_ARM_THM_MOVT_BREL: {
950
// Encoding T3: A = imm4:i:imm3:imm8
951
uint16_t hi = read16(buf);
952
uint16_t lo = read16(buf + 2);
953
return SignExtend64<16>(((hi & 0x000f) << 12) | // imm4
954
((hi & 0x0400) << 1) | // i
955
((lo & 0x7000) >> 4) | // imm3
956
(lo & 0x00ff)); // imm8
957
}
958
case R_ARM_THM_ALU_ABS_G0_NC:
959
case R_ARM_THM_ALU_ABS_G1_NC:
960
case R_ARM_THM_ALU_ABS_G2_NC:
961
case R_ARM_THM_ALU_ABS_G3:
962
return read16(buf) & 0xff;
963
case R_ARM_ALU_PC_G0:
964
case R_ARM_ALU_PC_G0_NC:
965
case R_ARM_ALU_PC_G1:
966
case R_ARM_ALU_PC_G1_NC:
967
case R_ARM_ALU_PC_G2: {
968
// 12-bit immediate is a modified immediate made up of a 4-bit even
969
// right rotation and 8-bit constant. After the rotation the value
970
// is zero-extended. When bit 23 is set the instruction is an add, when
971
// bit 22 is set it is a sub.
972
uint32_t instr = read32(buf);
973
uint32_t val = rotr32(instr & 0xff, ((instr & 0xf00) >> 8) * 2);
974
return (instr & 0x00400000) ? -val : val;
975
}
976
case R_ARM_LDR_PC_G0:
977
case R_ARM_LDR_PC_G1:
978
case R_ARM_LDR_PC_G2: {
979
// ADR (literal) add = bit23, sub = bit22
980
// LDR (literal) u = bit23 unsigned imm12
981
bool u = read32(buf) & 0x00800000;
982
uint32_t imm12 = read32(buf) & 0xfff;
983
return u ? imm12 : -imm12;
984
}
985
case R_ARM_LDRS_PC_G0:
986
case R_ARM_LDRS_PC_G1:
987
case R_ARM_LDRS_PC_G2: {
988
// LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23 unsigned imm8
989
uint32_t opcode = read32(buf);
990
bool u = opcode & 0x00800000;
991
uint32_t imm4l = opcode & 0xf;
992
uint32_t imm4h = (opcode & 0xf00) >> 4;
993
return u ? (imm4h | imm4l) : -(imm4h | imm4l);
994
}
995
case R_ARM_THM_ALU_PREL_11_0: {
996
// Thumb2 ADR, which is an alias for a sub or add instruction with an
997
// unsigned immediate.
998
// ADR encoding T2 (sub), T3 (add) i:imm3:imm8
999
uint16_t hi = read16(buf);
1000
uint16_t lo = read16(buf + 2);
1001
uint64_t imm = (hi & 0x0400) << 1 | // i
1002
(lo & 0x7000) >> 4 | // imm3
1003
(lo & 0x00ff); // imm8
1004
// For sub, addend is negative, add is positive.
1005
return (hi & 0x00f0) ? -imm : imm;
1006
}
1007
case R_ARM_THM_PC8:
1008
// ADR and LDR (literal) encoding T1
1009
// From ELF for the ARM Architecture the initial signed addend is formed
1010
// from an unsigned field using expression (((imm8:00 + 4) & 0x3ff) – 4)
1011
// this trick permits the PC bias of -4 to be encoded using imm8 = 0xff
1012
return ((((read16(buf) & 0xff) << 2) + 4) & 0x3ff) - 4;
1013
case R_ARM_THM_PC12: {
1014
// LDR (literal) encoding T2, add = (U == '1') imm12
1015
bool u = read16(buf) & 0x0080;
1016
uint64_t imm12 = read16(buf + 2) & 0x0fff;
1017
return u ? imm12 : -imm12;
1018
}
1019
case R_ARM_NONE:
1020
case R_ARM_V4BX:
1021
case R_ARM_JUMP_SLOT:
1022
// These relocations are defined as not having an implicit addend.
1023
return 0;
1024
}
1025
}
1026
1027
static bool isArmMapSymbol(const Symbol *b) {
1028
return b->getName() == "$a" || b->getName().starts_with("$a.");
1029
}
1030
1031
static bool isThumbMapSymbol(const Symbol *s) {
1032
return s->getName() == "$t" || s->getName().starts_with("$t.");
1033
}
1034
1035
static bool isDataMapSymbol(const Symbol *b) {
1036
return b->getName() == "$d" || b->getName().starts_with("$d.");
1037
}
1038
1039
void elf::sortArmMappingSymbols() {
1040
// For each input section make sure the mapping symbols are sorted in
1041
// ascending order.
1042
for (auto &kv : sectionMap) {
1043
SmallVector<const Defined *, 0> &mapSyms = kv.second;
1044
llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) {
1045
return a->value < b->value;
1046
});
1047
}
1048
}
1049
1050
void elf::addArmInputSectionMappingSymbols() {
1051
// Collect mapping symbols for every executable input sections.
1052
// The linker generated mapping symbols for all the synthetic
1053
// sections are adding into the sectionmap through the function
1054
// addArmSyntheitcSectionMappingSymbol.
1055
for (ELFFileBase *file : ctx.objectFiles) {
1056
for (Symbol *sym : file->getLocalSymbols()) {
1057
auto *def = dyn_cast<Defined>(sym);
1058
if (!def)
1059
continue;
1060
if (!isArmMapSymbol(def) && !isDataMapSymbol(def) &&
1061
!isThumbMapSymbol(def))
1062
continue;
1063
if (auto *sec = cast_if_present<InputSection>(def->section))
1064
if (sec->flags & SHF_EXECINSTR)
1065
sectionMap[sec].push_back(def);
1066
}
1067
}
1068
}
1069
1070
// Synthetic sections are not backed by an ELF file where we can access the
1071
// symbol table, instead mapping symbols added to synthetic sections are stored
1072
// in the synthetic symbol table. Due to the presence of strip (--strip-all),
1073
// we can not rely on the synthetic symbol table retaining the mapping symbols.
1074
// Instead we record the mapping symbols locally.
1075
void elf::addArmSyntheticSectionMappingSymbol(Defined *sym) {
1076
if (!isArmMapSymbol(sym) && !isDataMapSymbol(sym) && !isThumbMapSymbol(sym))
1077
return;
1078
if (auto *sec = cast_if_present<InputSection>(sym->section))
1079
if (sec->flags & SHF_EXECINSTR)
1080
sectionMap[sec].push_back(sym);
1081
}
1082
1083
static void toLittleEndianInstructions(uint8_t *buf, uint64_t start,
1084
uint64_t end, uint64_t width) {
1085
CodeState curState = static_cast<CodeState>(width);
1086
if (curState == CodeState::Arm)
1087
for (uint64_t i = start; i < end; i += width)
1088
write32le(buf + i, read32(buf + i));
1089
1090
if (curState == CodeState::Thumb)
1091
for (uint64_t i = start; i < end; i += width)
1092
write16le(buf + i, read16(buf + i));
1093
}
1094
1095
// Arm BE8 big endian format requires instructions to be little endian, with
1096
// the initial contents big-endian. Convert the big-endian instructions to
1097
// little endian leaving literal data untouched. We use mapping symbols to
1098
// identify half open intervals of Arm code [$a, non $a) and Thumb code
1099
// [$t, non $t) and convert these to little endian a word or half word at a
1100
// time respectively.
1101
void elf::convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf) {
1102
if (!sectionMap.contains(sec))
1103
return;
1104
1105
SmallVector<const Defined *, 0> &mapSyms = sectionMap[sec];
1106
1107
if (mapSyms.empty())
1108
return;
1109
1110
CodeState curState = CodeState::Data;
1111
uint64_t start = 0, width = 0, size = sec->getSize();
1112
for (auto &msym : mapSyms) {
1113
CodeState newState = CodeState::Data;
1114
if (isThumbMapSymbol(msym))
1115
newState = CodeState::Thumb;
1116
else if (isArmMapSymbol(msym))
1117
newState = CodeState::Arm;
1118
1119
if (newState == curState)
1120
continue;
1121
1122
if (curState != CodeState::Data) {
1123
width = static_cast<uint64_t>(curState);
1124
toLittleEndianInstructions(buf, start, msym->value, width);
1125
}
1126
start = msym->value;
1127
curState = newState;
1128
}
1129
1130
// Passed last mapping symbol, may need to reverse
1131
// up to end of section.
1132
if (curState != CodeState::Data) {
1133
width = static_cast<uint64_t>(curState);
1134
toLittleEndianInstructions(buf, start, size, width);
1135
}
1136
}
1137
1138
// The Arm Cortex-M Security Extensions (CMSE) splits a system into two parts;
1139
// the non-secure and secure states with the secure state inaccessible from the
1140
// non-secure state, apart from an area of memory in secure state called the
1141
// secure gateway which is accessible from non-secure state. The secure gateway
1142
// contains one or more entry points which must start with a landing pad
1143
// instruction SG. Arm recommends that the secure gateway consists only of
1144
// secure gateway veneers, which are made up of a SG instruction followed by a
1145
// branch to the destination in secure state. Full details can be found in Arm
1146
// v8-M Security Extensions Requirements on Development Tools.
1147
//
1148
// The CMSE model of software development requires the non-secure and secure
1149
// states to be developed as two separate programs. The non-secure developer is
1150
// provided with an import library defining symbols describing the entry points
1151
// in the secure gateway. No additional linker support is required for the
1152
// non-secure state.
1153
//
1154
// Development of the secure state requires linker support to manage the secure
1155
// gateway veneers. The management consists of:
1156
// - Creation of new secure gateway veneers based on symbol conventions.
1157
// - Checking the address of existing secure gateway veneers.
1158
// - Warning when existing secure gateway veneers removed.
1159
//
1160
// The secure gateway veneers are created in an import library, which is just an
1161
// ELF object with a symbol table. The import library is controlled by two
1162
// command line options:
1163
// --in-implib (specify an input import library from a previous revision of the
1164
// program).
1165
// --out-implib (specify an output import library to be created by the linker).
1166
//
1167
// The input import library is used to manage consistency of the secure entry
1168
// points. The output import library is for new and updated secure entry points.
1169
//
1170
// The symbol convention that identifies secure entry functions is the prefix
1171
// __acle_se_ for a symbol called name the linker is expected to create a secure
1172
// gateway veneer if symbols __acle_se_name and name have the same address.
1173
// After creating a secure gateway veneer the symbol name labels the secure
1174
// gateway veneer and the __acle_se_name labels the function definition.
1175
//
1176
// The LLD implementation:
1177
// - Reads an existing import library with importCmseSymbols().
1178
// - Determines which new secure gateway veneers to create and redirects calls
1179
// within the secure state to the __acle_se_ prefixed symbol with
1180
// processArmCmseSymbols().
1181
// - Models the SG veneers as a synthetic section.
1182
1183
// Initialize symbols. symbols is a parallel array to the corresponding ELF
1184
// symbol table.
1185
template <class ELFT> void ObjFile<ELFT>::importCmseSymbols() {
1186
ArrayRef<Elf_Sym> eSyms = getELFSyms<ELFT>();
1187
// Error for local symbols. The symbol at index 0 is LOCAL. So skip it.
1188
for (size_t i = 1, end = firstGlobal; i != end; ++i) {
1189
errorOrWarn("CMSE symbol '" + CHECK(eSyms[i].getName(stringTable), this) +
1190
"' in import library '" + toString(this) + "' is not global");
1191
}
1192
1193
for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {
1194
const Elf_Sym &eSym = eSyms[i];
1195
Defined *sym = reinterpret_cast<Defined *>(make<SymbolUnion>());
1196
1197
// Initialize symbol fields.
1198
memset(sym, 0, sizeof(Symbol));
1199
sym->setName(CHECK(eSyms[i].getName(stringTable), this));
1200
sym->value = eSym.st_value;
1201
sym->size = eSym.st_size;
1202
sym->type = eSym.getType();
1203
sym->binding = eSym.getBinding();
1204
sym->stOther = eSym.st_other;
1205
1206
if (eSym.st_shndx != SHN_ABS) {
1207
error("CMSE symbol '" + sym->getName() + "' in import library '" +
1208
toString(this) + "' is not absolute");
1209
continue;
1210
}
1211
1212
if (!(eSym.st_value & 1) || (eSym.getType() != STT_FUNC)) {
1213
error("CMSE symbol '" + sym->getName() + "' in import library '" +
1214
toString(this) + "' is not a Thumb function definition");
1215
continue;
1216
}
1217
1218
if (symtab.cmseImportLib.count(sym->getName())) {
1219
error("CMSE symbol '" + sym->getName() +
1220
"' is multiply defined in import library '" + toString(this) + "'");
1221
continue;
1222
}
1223
1224
if (eSym.st_size != ACLESESYM_SIZE) {
1225
warn("CMSE symbol '" + sym->getName() + "' in import library '" +
1226
toString(this) + "' does not have correct size of " +
1227
Twine(ACLESESYM_SIZE) + " bytes");
1228
}
1229
1230
symtab.cmseImportLib[sym->getName()] = sym;
1231
}
1232
}
1233
1234
// Check symbol attributes of the acleSeSym, sym pair.
1235
// Both symbols should be global/weak Thumb code symbol definitions.
1236
static std::string checkCmseSymAttributes(Symbol *acleSeSym, Symbol *sym) {
1237
auto check = [](Symbol *s, StringRef type) -> std::optional<std::string> {
1238
auto d = dyn_cast_or_null<Defined>(s);
1239
if (!(d && d->isFunc() && (d->value & 1)))
1240
return (Twine(toString(s->file)) + ": cmse " + type + " symbol '" +
1241
s->getName() + "' is not a Thumb function definition")
1242
.str();
1243
if (!d->section)
1244
return (Twine(toString(s->file)) + ": cmse " + type + " symbol '" +
1245
s->getName() + "' cannot be an absolute symbol")
1246
.str();
1247
return std::nullopt;
1248
};
1249
for (auto [sym, type] :
1250
{std::make_pair(acleSeSym, "special"), std::make_pair(sym, "entry")})
1251
if (auto err = check(sym, type))
1252
return *err;
1253
return "";
1254
}
1255
1256
// Look for [__acle_se_<sym>, <sym>] pairs, as specified in the Cortex-M
1257
// Security Extensions specification.
1258
// 1) <sym> : A standard function name.
1259
// 2) __acle_se_<sym> : A special symbol that prefixes the standard function
1260
// name with __acle_se_.
1261
// Both these symbols are Thumb function symbols with external linkage.
1262
// <sym> may be redefined in .gnu.sgstubs.
1263
void elf::processArmCmseSymbols() {
1264
if (!config->cmseImplib)
1265
return;
1266
// Only symbols with external linkage end up in symtab, so no need to do
1267
// linkage checks. Only check symbol type.
1268
for (Symbol *acleSeSym : symtab.getSymbols()) {
1269
if (!acleSeSym->getName().starts_with(ACLESESYM_PREFIX))
1270
continue;
1271
// If input object build attributes do not support CMSE, error and disable
1272
// further scanning for <sym>, __acle_se_<sym> pairs.
1273
if (!config->armCMSESupport) {
1274
error("CMSE is only supported by ARMv8-M architecture or later");
1275
config->cmseImplib = false;
1276
break;
1277
}
1278
1279
// Try to find the associated symbol definition.
1280
// Symbol must have external linkage.
1281
StringRef name = acleSeSym->getName().substr(std::strlen(ACLESESYM_PREFIX));
1282
Symbol *sym = symtab.find(name);
1283
if (!sym) {
1284
error(toString(acleSeSym->file) + ": cmse special symbol '" +
1285
acleSeSym->getName() +
1286
"' detected, but no associated entry function definition '" + name +
1287
"' with external linkage found");
1288
continue;
1289
}
1290
1291
std::string errMsg = checkCmseSymAttributes(acleSeSym, sym);
1292
if (!errMsg.empty()) {
1293
error(errMsg);
1294
continue;
1295
}
1296
1297
// <sym> may be redefined later in the link in .gnu.sgstubs
1298
symtab.cmseSymMap[name] = {acleSeSym, sym};
1299
}
1300
1301
// If this is an Arm CMSE secure app, replace references to entry symbol <sym>
1302
// with its corresponding special symbol __acle_se_<sym>.
1303
parallelForEach(ctx.objectFiles, [&](InputFile *file) {
1304
MutableArrayRef<Symbol *> syms = file->getMutableSymbols();
1305
for (size_t i = 0, e = syms.size(); i != e; ++i) {
1306
StringRef symName = syms[i]->getName();
1307
if (symtab.cmseSymMap.count(symName))
1308
syms[i] = symtab.cmseSymMap[symName].acleSeSym;
1309
}
1310
});
1311
}
1312
1313
class elf::ArmCmseSGVeneer {
1314
public:
1315
ArmCmseSGVeneer(Symbol *sym, Symbol *acleSeSym,
1316
std::optional<uint64_t> addr = std::nullopt)
1317
: sym(sym), acleSeSym(acleSeSym), entAddr{addr} {}
1318
static const size_t size{ACLESESYM_SIZE};
1319
const std::optional<uint64_t> getAddr() const { return entAddr; };
1320
1321
Symbol *sym;
1322
Symbol *acleSeSym;
1323
uint64_t offset = 0;
1324
1325
private:
1326
const std::optional<uint64_t> entAddr;
1327
};
1328
1329
ArmCmseSGSection::ArmCmseSGSection()
1330
: SyntheticSection(llvm::ELF::SHF_ALLOC | llvm::ELF::SHF_EXECINSTR,
1331
llvm::ELF::SHT_PROGBITS,
1332
/*alignment=*/32, ".gnu.sgstubs") {
1333
entsize = ACLESESYM_SIZE;
1334
// The range of addresses used in the CMSE import library should be fixed.
1335
for (auto &[_, sym] : symtab.cmseImportLib) {
1336
if (impLibMaxAddr <= sym->value)
1337
impLibMaxAddr = sym->value + sym->size;
1338
}
1339
if (symtab.cmseSymMap.empty())
1340
return;
1341
addMappingSymbol();
1342
for (auto &[_, entryFunc] : symtab.cmseSymMap)
1343
addSGVeneer(cast<Defined>(entryFunc.acleSeSym),
1344
cast<Defined>(entryFunc.sym));
1345
for (auto &[_, sym] : symtab.cmseImportLib) {
1346
if (!symtab.inCMSEOutImpLib.count(sym->getName()))
1347
warn("entry function '" + sym->getName() +
1348
"' from CMSE import library is not present in secure application");
1349
}
1350
1351
if (!symtab.cmseImportLib.empty() && config->cmseOutputLib.empty()) {
1352
for (auto &[_, entryFunc] : symtab.cmseSymMap) {
1353
Symbol *sym = entryFunc.sym;
1354
if (!symtab.inCMSEOutImpLib.count(sym->getName()))
1355
warn("new entry function '" + sym->getName() +
1356
"' introduced but no output import library specified");
1357
}
1358
}
1359
}
1360
1361
void ArmCmseSGSection::addSGVeneer(Symbol *acleSeSym, Symbol *sym) {
1362
entries.emplace_back(acleSeSym, sym);
1363
if (symtab.cmseImportLib.count(sym->getName()))
1364
symtab.inCMSEOutImpLib[sym->getName()] = true;
1365
// Symbol addresses different, nothing to do.
1366
if (acleSeSym->file != sym->file ||
1367
cast<Defined>(*acleSeSym).value != cast<Defined>(*sym).value)
1368
return;
1369
// Only secure symbols with values equal to that of it's non-secure
1370
// counterpart needs to be in the .gnu.sgstubs section.
1371
ArmCmseSGVeneer *ss = nullptr;
1372
if (symtab.cmseImportLib.count(sym->getName())) {
1373
Defined *impSym = symtab.cmseImportLib[sym->getName()];
1374
ss = make<ArmCmseSGVeneer>(sym, acleSeSym, impSym->value);
1375
} else {
1376
ss = make<ArmCmseSGVeneer>(sym, acleSeSym);
1377
++newEntries;
1378
}
1379
sgVeneers.emplace_back(ss);
1380
}
1381
1382
void ArmCmseSGSection::writeTo(uint8_t *buf) {
1383
for (ArmCmseSGVeneer *s : sgVeneers) {
1384
uint8_t *p = buf + s->offset;
1385
write16(p + 0, 0xe97f); // SG
1386
write16(p + 2, 0xe97f);
1387
write16(p + 4, 0xf000); // B.W S
1388
write16(p + 6, 0xb000);
1389
target->relocateNoSym(p + 4, R_ARM_THM_JUMP24,
1390
s->acleSeSym->getVA() -
1391
(getVA() + s->offset + s->size));
1392
}
1393
}
1394
1395
void ArmCmseSGSection::addMappingSymbol() {
1396
addSyntheticLocal("$t", STT_NOTYPE, /*off=*/0, /*size=*/0, *this);
1397
}
1398
1399
size_t ArmCmseSGSection::getSize() const {
1400
if (sgVeneers.empty())
1401
return (impLibMaxAddr ? impLibMaxAddr - getVA() : 0) + newEntries * entsize;
1402
1403
return entries.size() * entsize;
1404
}
1405
1406
void ArmCmseSGSection::finalizeContents() {
1407
if (sgVeneers.empty())
1408
return;
1409
1410
auto it =
1411
std::stable_partition(sgVeneers.begin(), sgVeneers.end(),
1412
[](auto *i) { return i->getAddr().has_value(); });
1413
std::sort(sgVeneers.begin(), it, [](auto *a, auto *b) {
1414
return a->getAddr().value() < b->getAddr().value();
1415
});
1416
// This is the partition of the veneers with fixed addresses.
1417
uint64_t addr = (*sgVeneers.begin())->getAddr().has_value()
1418
? (*sgVeneers.begin())->getAddr().value()
1419
: getVA();
1420
// Check if the start address of '.gnu.sgstubs' correspond to the
1421
// linker-synthesized veneer with the lowest address.
1422
if ((getVA() & ~1) != (addr & ~1)) {
1423
error("start address of '.gnu.sgstubs' is different from previous link");
1424
return;
1425
}
1426
1427
for (size_t i = 0; i < sgVeneers.size(); ++i) {
1428
ArmCmseSGVeneer *s = sgVeneers[i];
1429
s->offset = i * s->size;
1430
Defined(file, StringRef(), s->sym->binding, s->sym->stOther, s->sym->type,
1431
s->offset | 1, s->size, this)
1432
.overwrite(*s->sym);
1433
}
1434
}
1435
1436
// Write the CMSE import library to disk.
1437
// The CMSE import library is a relocatable object with only a symbol table.
1438
// The symbols are copies of the (absolute) symbols of the secure gateways
1439
// in the executable output by this link.
1440
// See Arm® v8-M Security Extensions: Requirements on Development Tools
1441
// https://developer.arm.com/documentation/ecm0359818/latest
1442
template <typename ELFT> void elf::writeARMCmseImportLib() {
1443
StringTableSection *shstrtab =
1444
make<StringTableSection>(".shstrtab", /*dynamic=*/false);
1445
StringTableSection *strtab =
1446
make<StringTableSection>(".strtab", /*dynamic=*/false);
1447
SymbolTableBaseSection *impSymTab = make<SymbolTableSection<ELFT>>(*strtab);
1448
1449
SmallVector<std::pair<OutputSection *, SyntheticSection *>, 0> osIsPairs;
1450
osIsPairs.emplace_back(make<OutputSection>(strtab->name, 0, 0), strtab);
1451
osIsPairs.emplace_back(make<OutputSection>(impSymTab->name, 0, 0), impSymTab);
1452
osIsPairs.emplace_back(make<OutputSection>(shstrtab->name, 0, 0), shstrtab);
1453
1454
std::sort(symtab.cmseSymMap.begin(), symtab.cmseSymMap.end(),
1455
[](const auto &a, const auto &b) -> bool {
1456
return a.second.sym->getVA() < b.second.sym->getVA();
1457
});
1458
// Copy the secure gateway entry symbols to the import library symbol table.
1459
for (auto &p : symtab.cmseSymMap) {
1460
Defined *d = cast<Defined>(p.second.sym);
1461
impSymTab->addSymbol(makeDefined(
1462
ctx.internalFile, d->getName(), d->computeBinding(),
1463
/*stOther=*/0, STT_FUNC, d->getVA(), d->getSize(), nullptr));
1464
}
1465
1466
size_t idx = 0;
1467
uint64_t off = sizeof(typename ELFT::Ehdr);
1468
for (auto &[osec, isec] : osIsPairs) {
1469
osec->sectionIndex = ++idx;
1470
osec->recordSection(isec);
1471
osec->finalizeInputSections();
1472
osec->shName = shstrtab->addString(osec->name);
1473
osec->size = isec->getSize();
1474
isec->finalizeContents();
1475
osec->offset = alignToPowerOf2(off, osec->addralign);
1476
off = osec->offset + osec->size;
1477
}
1478
1479
const uint64_t sectionHeaderOff = alignToPowerOf2(off, config->wordsize);
1480
const auto shnum = osIsPairs.size() + 1;
1481
const uint64_t fileSize =
1482
sectionHeaderOff + shnum * sizeof(typename ELFT::Shdr);
1483
const unsigned flags =
1484
config->mmapOutputFile ? 0 : (unsigned)FileOutputBuffer::F_no_mmap;
1485
unlinkAsync(config->cmseOutputLib);
1486
Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr =
1487
FileOutputBuffer::create(config->cmseOutputLib, fileSize, flags);
1488
if (!bufferOrErr) {
1489
error("failed to open " + config->cmseOutputLib + ": " +
1490
llvm::toString(bufferOrErr.takeError()));
1491
return;
1492
}
1493
1494
// Write the ELF Header
1495
std::unique_ptr<FileOutputBuffer> &buffer = *bufferOrErr;
1496
uint8_t *const buf = buffer->getBufferStart();
1497
memcpy(buf, "\177ELF", 4);
1498
auto *eHdr = reinterpret_cast<typename ELFT::Ehdr *>(buf);
1499
eHdr->e_type = ET_REL;
1500
eHdr->e_entry = 0;
1501
eHdr->e_shoff = sectionHeaderOff;
1502
eHdr->e_ident[EI_CLASS] = ELFCLASS32;
1503
eHdr->e_ident[EI_DATA] = config->isLE ? ELFDATA2LSB : ELFDATA2MSB;
1504
eHdr->e_ident[EI_VERSION] = EV_CURRENT;
1505
eHdr->e_ident[EI_OSABI] = config->osabi;
1506
eHdr->e_ident[EI_ABIVERSION] = 0;
1507
eHdr->e_machine = EM_ARM;
1508
eHdr->e_version = EV_CURRENT;
1509
eHdr->e_flags = config->eflags;
1510
eHdr->e_ehsize = sizeof(typename ELFT::Ehdr);
1511
eHdr->e_phnum = 0;
1512
eHdr->e_shentsize = sizeof(typename ELFT::Shdr);
1513
eHdr->e_phoff = 0;
1514
eHdr->e_phentsize = 0;
1515
eHdr->e_shnum = shnum;
1516
eHdr->e_shstrndx = shstrtab->getParent()->sectionIndex;
1517
1518
// Write the section header table.
1519
auto *sHdrs = reinterpret_cast<typename ELFT::Shdr *>(buf + eHdr->e_shoff);
1520
for (auto &[osec, _] : osIsPairs)
1521
osec->template writeHeaderTo<ELFT>(++sHdrs);
1522
1523
// Write section contents to a mmap'ed file.
1524
{
1525
parallel::TaskGroup tg;
1526
for (auto &[osec, _] : osIsPairs)
1527
osec->template writeTo<ELFT>(buf + osec->offset, tg);
1528
}
1529
1530
if (auto e = buffer->commit())
1531
fatal("failed to write output '" + buffer->getPath() +
1532
"': " + toString(std::move(e)));
1533
}
1534
1535
TargetInfo *elf::getARMTargetInfo() {
1536
static ARM target;
1537
return &target;
1538
}
1539
1540
template void elf::writeARMCmseImportLib<ELF32LE>();
1541
template void elf::writeARMCmseImportLib<ELF32BE>();
1542
template void elf::writeARMCmseImportLib<ELF64LE>();
1543
template void elf::writeARMCmseImportLib<ELF64BE>();
1544
1545
template void ObjFile<ELF32LE>::importCmseSymbols();
1546
template void ObjFile<ELF32BE>::importCmseSymbols();
1547
template void ObjFile<ELF64LE>::importCmseSymbols();
1548
template void ObjFile<ELF64BE>::importCmseSymbols();
1549
1550