Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/lld/ELF/Arch/LoongArch.cpp
34889 views
1
//===- LoongArch.cpp ------------------------------------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
9
#include "InputFiles.h"
10
#include "OutputSections.h"
11
#include "Symbols.h"
12
#include "SyntheticSections.h"
13
#include "Target.h"
14
#include "llvm/BinaryFormat/ELF.h"
15
#include "llvm/Support/LEB128.h"
16
17
using namespace llvm;
18
using namespace llvm::object;
19
using namespace llvm::support::endian;
20
using namespace llvm::ELF;
21
using namespace lld;
22
using namespace lld::elf;
23
24
namespace {
25
class LoongArch final : public TargetInfo {
26
public:
27
LoongArch();
28
uint32_t calcEFlags() const override;
29
int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
30
void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
31
void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
32
void writePltHeader(uint8_t *buf) const override;
33
void writePlt(uint8_t *buf, const Symbol &sym,
34
uint64_t pltEntryAddr) const override;
35
RelType getDynRel(RelType type) const override;
36
RelExpr getRelExpr(RelType type, const Symbol &s,
37
const uint8_t *loc) const override;
38
bool usesOnlyLowPageBits(RelType type) const override;
39
void relocate(uint8_t *loc, const Relocation &rel,
40
uint64_t val) const override;
41
bool relaxOnce(int pass) const override;
42
void finalizeRelax(int passes) const override;
43
};
44
} // end anonymous namespace
45
46
namespace {
47
enum Op {
48
SUB_W = 0x00110000,
49
SUB_D = 0x00118000,
50
BREAK = 0x002a0000,
51
SRLI_W = 0x00448000,
52
SRLI_D = 0x00450000,
53
ADDI_W = 0x02800000,
54
ADDI_D = 0x02c00000,
55
ANDI = 0x03400000,
56
PCADDU12I = 0x1c000000,
57
LD_W = 0x28800000,
58
LD_D = 0x28c00000,
59
JIRL = 0x4c000000,
60
};
61
62
enum Reg {
63
R_ZERO = 0,
64
R_RA = 1,
65
R_TP = 2,
66
R_T0 = 12,
67
R_T1 = 13,
68
R_T2 = 14,
69
R_T3 = 15,
70
};
71
} // namespace
72
73
// Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences
74
// like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i`
75
// produces a PC-relative intermediate value with the lowest 12 bits zeroed (the
76
// "page") for the next instruction to add in the "page offset". (`pcalau12i`
77
// stands for something like "PC ALigned Add Upper that starts from the 12th
78
// bit, Immediate".)
79
//
80
// Here a "page" is in fact just another way to refer to the 12-bit range
81
// allowed by the immediate field of the addi/ld/st instructions, and not
82
// related to the system or the kernel's actual page size. The semantics happen
83
// to match the AArch64 `adrp`, so the concept of "page" is borrowed here.
84
static uint64_t getLoongArchPage(uint64_t p) {
85
return p & ~static_cast<uint64_t>(0xfff);
86
}
87
88
static uint32_t lo12(uint32_t val) { return val & 0xfff; }
89
90
// Calculate the adjusted page delta between dest and PC.
91
uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type) {
92
// Note that if the sequence being relocated is `pcalau12i + addi.d + lu32i.d
93
// + lu52i.d`, they must be adjacent so that we can infer the PC of
94
// `pcalau12i` when calculating the page delta for the other two instructions
95
// (lu32i.d and lu52i.d). Compensate all the sign-extensions is a bit
96
// complicated. Just use psABI recommended algorithm.
97
uint64_t pcalau12i_pc;
98
switch (type) {
99
case R_LARCH_PCALA64_LO20:
100
case R_LARCH_GOT64_PC_LO20:
101
case R_LARCH_TLS_IE64_PC_LO20:
102
case R_LARCH_TLS_DESC64_PC_LO20:
103
pcalau12i_pc = pc - 8;
104
break;
105
case R_LARCH_PCALA64_HI12:
106
case R_LARCH_GOT64_PC_HI12:
107
case R_LARCH_TLS_IE64_PC_HI12:
108
case R_LARCH_TLS_DESC64_PC_HI12:
109
pcalau12i_pc = pc - 12;
110
break;
111
default:
112
pcalau12i_pc = pc;
113
break;
114
}
115
uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pcalau12i_pc);
116
if (dest & 0x800)
117
result += 0x1000 - 0x1'0000'0000;
118
if (result & 0x8000'0000)
119
result += 0x1'0000'0000;
120
return result;
121
}
122
123
static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; }
124
125
static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) {
126
return op | d | (j << 5) | (k << 10);
127
}
128
129
// Extract bits v[begin:end], where range is inclusive.
130
static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
131
return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end;
132
}
133
134
static uint32_t setD5k16(uint32_t insn, uint32_t imm) {
135
uint32_t immLo = extractBits(imm, 15, 0);
136
uint32_t immHi = extractBits(imm, 20, 16);
137
return (insn & 0xfc0003e0) | (immLo << 10) | immHi;
138
}
139
140
static uint32_t setD10k16(uint32_t insn, uint32_t imm) {
141
uint32_t immLo = extractBits(imm, 15, 0);
142
uint32_t immHi = extractBits(imm, 25, 16);
143
return (insn & 0xfc000000) | (immLo << 10) | immHi;
144
}
145
146
static uint32_t setJ20(uint32_t insn, uint32_t imm) {
147
return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5);
148
}
149
150
static uint32_t setK12(uint32_t insn, uint32_t imm) {
151
return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10);
152
}
153
154
static uint32_t setK16(uint32_t insn, uint32_t imm) {
155
return (insn & 0xfc0003ff) | (extractBits(imm, 15, 0) << 10);
156
}
157
158
static bool isJirl(uint32_t insn) {
159
return (insn & 0xfc000000) == JIRL;
160
}
161
162
static void handleUleb128(uint8_t *loc, uint64_t val) {
163
const uint32_t maxcount = 1 + 64 / 7;
164
uint32_t count;
165
const char *error = nullptr;
166
uint64_t orig = decodeULEB128(loc, &count, nullptr, &error);
167
if (count > maxcount || (count == maxcount && error))
168
errorOrWarn(getErrorLocation(loc) + "extra space for uleb128");
169
uint64_t mask = count < maxcount ? (1ULL << 7 * count) - 1 : -1ULL;
170
encodeULEB128((orig + val) & mask, loc, count);
171
}
172
173
LoongArch::LoongArch() {
174
// The LoongArch ISA itself does not have a limit on page sizes. According to
175
// the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is
176
// 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to
177
// "unlimited".
178
// However, practically the maximum usable page size is constrained by the
179
// kernel implementation, and 64KiB is the biggest non-huge page size
180
// supported by Linux as of v6.4. The most widespread page size in use,
181
// though, is 16KiB.
182
defaultCommonPageSize = 16384;
183
defaultMaxPageSize = 65536;
184
write32le(trapInstr.data(), BREAK); // break 0
185
186
copyRel = R_LARCH_COPY;
187
pltRel = R_LARCH_JUMP_SLOT;
188
relativeRel = R_LARCH_RELATIVE;
189
iRelativeRel = R_LARCH_IRELATIVE;
190
191
if (config->is64) {
192
symbolicRel = R_LARCH_64;
193
tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64;
194
tlsOffsetRel = R_LARCH_TLS_DTPREL64;
195
tlsGotRel = R_LARCH_TLS_TPREL64;
196
tlsDescRel = R_LARCH_TLS_DESC64;
197
} else {
198
symbolicRel = R_LARCH_32;
199
tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32;
200
tlsOffsetRel = R_LARCH_TLS_DTPREL32;
201
tlsGotRel = R_LARCH_TLS_TPREL32;
202
tlsDescRel = R_LARCH_TLS_DESC32;
203
}
204
205
gotRel = symbolicRel;
206
207
// .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map
208
gotPltHeaderEntriesNum = 2;
209
210
pltHeaderSize = 32;
211
pltEntrySize = 16;
212
ipltEntrySize = 16;
213
}
214
215
static uint32_t getEFlags(const InputFile *f) {
216
if (config->is64)
217
return cast<ObjFile<ELF64LE>>(f)->getObj().getHeader().e_flags;
218
return cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;
219
}
220
221
static bool inputFileHasCode(const InputFile *f) {
222
for (const auto *sec : f->getSections())
223
if (sec && sec->flags & SHF_EXECINSTR)
224
return true;
225
226
return false;
227
}
228
229
uint32_t LoongArch::calcEFlags() const {
230
// If there are only binary input files (from -b binary), use a
231
// value of 0 for the ELF header flags.
232
if (ctx.objectFiles.empty())
233
return 0;
234
235
uint32_t target = 0;
236
const InputFile *targetFile;
237
for (const InputFile *f : ctx.objectFiles) {
238
// Do not enforce ABI compatibility if the input file does not contain code.
239
// This is useful for allowing linkage with data-only object files produced
240
// with tools like objcopy, that have zero e_flags.
241
if (!inputFileHasCode(f))
242
continue;
243
244
// Take the first non-zero e_flags as the reference.
245
uint32_t flags = getEFlags(f);
246
if (target == 0 && flags != 0) {
247
target = flags;
248
targetFile = f;
249
}
250
251
if ((flags & EF_LOONGARCH_ABI_MODIFIER_MASK) !=
252
(target & EF_LOONGARCH_ABI_MODIFIER_MASK))
253
error(toString(f) +
254
": cannot link object files with different ABI from " +
255
toString(targetFile));
256
257
// We cannot process psABI v1.x / object ABI v0 files (containing stack
258
// relocations), unlike ld.bfd.
259
//
260
// Instead of blindly accepting every v0 object and only failing at
261
// relocation processing time, just disallow interlink altogether. We
262
// don't expect significant usage of object ABI v0 in the wild (the old
263
// world may continue using object ABI v0 for a while, but as it's not
264
// binary-compatible with the upstream i.e. new-world ecosystem, it's not
265
// being considered here).
266
//
267
// There are briefly some new-world systems with object ABI v0 binaries too.
268
// It is because these systems were built before the new ABI was finalized.
269
// These are not supported either due to the extremely small number of them,
270
// and the few impacted users are advised to simply rebuild world or
271
// reinstall a recent system.
272
if ((flags & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1)
273
error(toString(f) + ": unsupported object file ABI version");
274
}
275
276
return target;
277
}
278
279
int64_t LoongArch::getImplicitAddend(const uint8_t *buf, RelType type) const {
280
switch (type) {
281
default:
282
internalLinkerError(getErrorLocation(buf),
283
"cannot read addend for relocation " + toString(type));
284
return 0;
285
case R_LARCH_32:
286
case R_LARCH_TLS_DTPMOD32:
287
case R_LARCH_TLS_DTPREL32:
288
case R_LARCH_TLS_TPREL32:
289
return SignExtend64<32>(read32le(buf));
290
case R_LARCH_64:
291
case R_LARCH_TLS_DTPMOD64:
292
case R_LARCH_TLS_DTPREL64:
293
case R_LARCH_TLS_TPREL64:
294
return read64le(buf);
295
case R_LARCH_RELATIVE:
296
case R_LARCH_IRELATIVE:
297
return config->is64 ? read64le(buf) : read32le(buf);
298
case R_LARCH_NONE:
299
case R_LARCH_JUMP_SLOT:
300
// These relocations are defined as not having an implicit addend.
301
return 0;
302
case R_LARCH_TLS_DESC32:
303
return read32le(buf + 4);
304
case R_LARCH_TLS_DESC64:
305
return read64le(buf + 8);
306
}
307
}
308
309
void LoongArch::writeGotPlt(uint8_t *buf, const Symbol &s) const {
310
if (config->is64)
311
write64le(buf, in.plt->getVA());
312
else
313
write32le(buf, in.plt->getVA());
314
}
315
316
void LoongArch::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
317
if (config->writeAddends) {
318
if (config->is64)
319
write64le(buf, s.getVA());
320
else
321
write32le(buf, s.getVA());
322
}
323
}
324
325
void LoongArch::writePltHeader(uint8_t *buf) const {
326
// The LoongArch PLT is currently structured just like that of RISCV.
327
// Annoyingly, this means the PLT is still using `pcaddu12i` to perform
328
// PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`),
329
// in contrast to the AArch64-like page-offset scheme with `pcalau12i` that
330
// is used everywhere else involving PC-relative operations in the LoongArch
331
// ELF psABI v2.00.
332
//
333
// The `pcrel_{hi20,lo12}` operators are illustrative only and not really
334
// supported by LoongArch assemblers.
335
//
336
// pcaddu12i $t2, %pcrel_hi20(.got.plt)
337
// sub.[wd] $t1, $t1, $t3
338
// ld.[wd] $t3, $t2, %pcrel_lo12(.got.plt) ; t3 = _dl_runtime_resolve
339
// addi.[wd] $t1, $t1, -pltHeaderSize-12 ; t1 = &.plt[i] - &.plt[0]
340
// addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt)
341
// srli.[wd] $t1, $t1, (is64?1:2) ; t1 = &.got.plt[i] - &.got.plt[0]
342
// ld.[wd] $t0, $t0, Wordsize ; t0 = link_map
343
// jr $t3
344
uint32_t offset = in.gotPlt->getVA() - in.plt->getVA();
345
uint32_t sub = config->is64 ? SUB_D : SUB_W;
346
uint32_t ld = config->is64 ? LD_D : LD_W;
347
uint32_t addi = config->is64 ? ADDI_D : ADDI_W;
348
uint32_t srli = config->is64 ? SRLI_D : SRLI_W;
349
write32le(buf + 0, insn(PCADDU12I, R_T2, hi20(offset), 0));
350
write32le(buf + 4, insn(sub, R_T1, R_T1, R_T3));
351
write32le(buf + 8, insn(ld, R_T3, R_T2, lo12(offset)));
352
write32le(buf + 12, insn(addi, R_T1, R_T1, lo12(-target->pltHeaderSize - 12)));
353
write32le(buf + 16, insn(addi, R_T0, R_T2, lo12(offset)));
354
write32le(buf + 20, insn(srli, R_T1, R_T1, config->is64 ? 1 : 2));
355
write32le(buf + 24, insn(ld, R_T0, R_T0, config->wordsize));
356
write32le(buf + 28, insn(JIRL, R_ZERO, R_T3, 0));
357
}
358
359
void LoongArch::writePlt(uint8_t *buf, const Symbol &sym,
360
uint64_t pltEntryAddr) const {
361
// See the comment in writePltHeader for reason why pcaddu12i is used instead
362
// of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days.
363
//
364
// pcaddu12i $t3, %pcrel_hi20([email protected])
365
// ld.[wd] $t3, $t3, %pcrel_lo12([email protected])
366
// jirl $t1, $t3, 0
367
// nop
368
uint32_t offset = sym.getGotPltVA() - pltEntryAddr;
369
write32le(buf + 0, insn(PCADDU12I, R_T3, hi20(offset), 0));
370
write32le(buf + 4,
371
insn(config->is64 ? LD_D : LD_W, R_T3, R_T3, lo12(offset)));
372
write32le(buf + 8, insn(JIRL, R_T1, R_T3, 0));
373
write32le(buf + 12, insn(ANDI, R_ZERO, R_ZERO, 0));
374
}
375
376
RelType LoongArch::getDynRel(RelType type) const {
377
return type == target->symbolicRel ? type
378
: static_cast<RelType>(R_LARCH_NONE);
379
}
380
381
RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s,
382
const uint8_t *loc) const {
383
switch (type) {
384
case R_LARCH_NONE:
385
case R_LARCH_MARK_LA:
386
case R_LARCH_MARK_PCREL:
387
return R_NONE;
388
case R_LARCH_32:
389
case R_LARCH_64:
390
case R_LARCH_ABS_HI20:
391
case R_LARCH_ABS_LO12:
392
case R_LARCH_ABS64_LO20:
393
case R_LARCH_ABS64_HI12:
394
return R_ABS;
395
case R_LARCH_PCALA_LO12:
396
// We could just R_ABS, but the JIRL instruction reuses the relocation type
397
// for a different purpose. The questionable usage is part of glibc 2.37
398
// libc_nonshared.a [1], which is linked into user programs, so we have to
399
// work around it for a while, even if a new relocation type may be
400
// introduced in the future [2].
401
//
402
// [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a
403
// [2]: https://github.com/loongson/la-abi-specs/pull/3
404
return isJirl(read32le(loc)) ? R_PLT : R_ABS;
405
case R_LARCH_TLS_DTPREL32:
406
case R_LARCH_TLS_DTPREL64:
407
return R_DTPREL;
408
case R_LARCH_TLS_TPREL32:
409
case R_LARCH_TLS_TPREL64:
410
case R_LARCH_TLS_LE_HI20:
411
case R_LARCH_TLS_LE_HI20_R:
412
case R_LARCH_TLS_LE_LO12:
413
case R_LARCH_TLS_LE_LO12_R:
414
case R_LARCH_TLS_LE64_LO20:
415
case R_LARCH_TLS_LE64_HI12:
416
return R_TPREL;
417
case R_LARCH_ADD6:
418
case R_LARCH_ADD8:
419
case R_LARCH_ADD16:
420
case R_LARCH_ADD32:
421
case R_LARCH_ADD64:
422
case R_LARCH_ADD_ULEB128:
423
case R_LARCH_SUB6:
424
case R_LARCH_SUB8:
425
case R_LARCH_SUB16:
426
case R_LARCH_SUB32:
427
case R_LARCH_SUB64:
428
case R_LARCH_SUB_ULEB128:
429
// The LoongArch add/sub relocs behave like the RISCV counterparts; reuse
430
// the RelExpr to avoid code duplication.
431
return R_RISCV_ADD;
432
case R_LARCH_32_PCREL:
433
case R_LARCH_64_PCREL:
434
case R_LARCH_PCREL20_S2:
435
return R_PC;
436
case R_LARCH_B16:
437
case R_LARCH_B21:
438
case R_LARCH_B26:
439
case R_LARCH_CALL36:
440
return R_PLT_PC;
441
case R_LARCH_GOT_PC_HI20:
442
case R_LARCH_GOT64_PC_LO20:
443
case R_LARCH_GOT64_PC_HI12:
444
case R_LARCH_TLS_IE_PC_HI20:
445
case R_LARCH_TLS_IE64_PC_LO20:
446
case R_LARCH_TLS_IE64_PC_HI12:
447
return R_LOONGARCH_GOT_PAGE_PC;
448
case R_LARCH_GOT_PC_LO12:
449
case R_LARCH_TLS_IE_PC_LO12:
450
return R_LOONGARCH_GOT;
451
case R_LARCH_TLS_LD_PC_HI20:
452
case R_LARCH_TLS_GD_PC_HI20:
453
return R_LOONGARCH_TLSGD_PAGE_PC;
454
case R_LARCH_PCALA_HI20:
455
// Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT
456
// anyway so why waste time checking only to get everything relaxed back to
457
// it?
458
//
459
// This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want
460
// both the HI20 and LO12 to potentially refer to the PLT. But in reality
461
// the HI20 reloc appears earlier, and the relocs don't contain enough
462
// information to let us properly resolve semantics per symbol.
463
// Unlike RISCV, our LO12 relocs *do not* point to their corresponding HI20
464
// relocs, hence it is nearly impossible to 100% accurately determine each
465
// HI20's "flavor" without taking big performance hits, in the presence of
466
// edge cases (e.g. HI20 without pairing LO12; paired LO12 placed so far
467
// apart that relationship is not certain anymore), and programmer mistakes
468
// (e.g. as outlined in https://github.com/loongson/la-abi-specs/pull/3).
469
//
470
// Ideally we would scan in an extra pass for all LO12s on JIRL, then mark
471
// every HI20 reloc referring to the same symbol differently; this is not
472
// feasible with the current function signature of getRelExpr that doesn't
473
// allow for such inter-pass state.
474
//
475
// So, unfortunately we have to again workaround this quirk the same way as
476
// BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only
477
// relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later
478
// stage.
479
return R_LOONGARCH_PLT_PAGE_PC;
480
case R_LARCH_PCALA64_LO20:
481
case R_LARCH_PCALA64_HI12:
482
return R_LOONGARCH_PAGE_PC;
483
case R_LARCH_GOT_HI20:
484
case R_LARCH_GOT_LO12:
485
case R_LARCH_GOT64_LO20:
486
case R_LARCH_GOT64_HI12:
487
case R_LARCH_TLS_IE_HI20:
488
case R_LARCH_TLS_IE_LO12:
489
case R_LARCH_TLS_IE64_LO20:
490
case R_LARCH_TLS_IE64_HI12:
491
return R_GOT;
492
case R_LARCH_TLS_LD_HI20:
493
return R_TLSLD_GOT;
494
case R_LARCH_TLS_GD_HI20:
495
return R_TLSGD_GOT;
496
case R_LARCH_TLS_LE_ADD_R:
497
case R_LARCH_RELAX:
498
return config->relax ? R_RELAX_HINT : R_NONE;
499
case R_LARCH_ALIGN:
500
return R_RELAX_HINT;
501
case R_LARCH_TLS_DESC_PC_HI20:
502
case R_LARCH_TLS_DESC64_PC_LO20:
503
case R_LARCH_TLS_DESC64_PC_HI12:
504
return R_LOONGARCH_TLSDESC_PAGE_PC;
505
case R_LARCH_TLS_DESC_PC_LO12:
506
case R_LARCH_TLS_DESC_LD:
507
case R_LARCH_TLS_DESC_HI20:
508
case R_LARCH_TLS_DESC_LO12:
509
case R_LARCH_TLS_DESC64_LO20:
510
case R_LARCH_TLS_DESC64_HI12:
511
return R_TLSDESC;
512
case R_LARCH_TLS_DESC_CALL:
513
return R_TLSDESC_CALL;
514
case R_LARCH_TLS_LD_PCREL20_S2:
515
return R_TLSLD_PC;
516
case R_LARCH_TLS_GD_PCREL20_S2:
517
return R_TLSGD_PC;
518
case R_LARCH_TLS_DESC_PCREL20_S2:
519
return R_TLSDESC_PC;
520
521
// Other known relocs that are explicitly unimplemented:
522
//
523
// - psABI v1 relocs that need a stateful stack machine to work, and not
524
// required when implementing psABI v2;
525
// - relocs that are not used anywhere (R_LARCH_{ADD,SUB}_24 [1], and the
526
// two GNU vtable-related relocs).
527
//
528
// [1]: https://web.archive.org/web/20230709064026/https://github.com/loongson/LoongArch-Documentation/issues/51
529
default:
530
error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
531
") against symbol " + toString(s));
532
return R_NONE;
533
}
534
}
535
536
bool LoongArch::usesOnlyLowPageBits(RelType type) const {
537
switch (type) {
538
default:
539
return false;
540
case R_LARCH_PCALA_LO12:
541
case R_LARCH_GOT_LO12:
542
case R_LARCH_GOT_PC_LO12:
543
case R_LARCH_TLS_IE_PC_LO12:
544
case R_LARCH_TLS_DESC_LO12:
545
case R_LARCH_TLS_DESC_PC_LO12:
546
return true;
547
}
548
}
549
550
void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
551
uint64_t val) const {
552
switch (rel.type) {
553
case R_LARCH_32_PCREL:
554
checkInt(loc, val, 32, rel);
555
[[fallthrough]];
556
case R_LARCH_32:
557
case R_LARCH_TLS_DTPREL32:
558
write32le(loc, val);
559
return;
560
case R_LARCH_64:
561
case R_LARCH_TLS_DTPREL64:
562
case R_LARCH_64_PCREL:
563
write64le(loc, val);
564
return;
565
566
// Relocs intended for `pcaddi`.
567
case R_LARCH_PCREL20_S2:
568
case R_LARCH_TLS_LD_PCREL20_S2:
569
case R_LARCH_TLS_GD_PCREL20_S2:
570
case R_LARCH_TLS_DESC_PCREL20_S2:
571
checkInt(loc, val, 22, rel);
572
checkAlignment(loc, val, 4, rel);
573
write32le(loc, setJ20(read32le(loc), val >> 2));
574
return;
575
576
case R_LARCH_B16:
577
checkInt(loc, val, 18, rel);
578
checkAlignment(loc, val, 4, rel);
579
write32le(loc, setK16(read32le(loc), val >> 2));
580
return;
581
582
case R_LARCH_B21:
583
checkInt(loc, val, 23, rel);
584
checkAlignment(loc, val, 4, rel);
585
write32le(loc, setD5k16(read32le(loc), val >> 2));
586
return;
587
588
case R_LARCH_B26:
589
checkInt(loc, val, 28, rel);
590
checkAlignment(loc, val, 4, rel);
591
write32le(loc, setD10k16(read32le(loc), val >> 2));
592
return;
593
594
case R_LARCH_CALL36: {
595
// This relocation is designed for adjacent pcaddu18i+jirl pairs that
596
// are patched in one time. Because of sign extension of these insns'
597
// immediate fields, the relocation range is [-128G - 0x20000, +128G -
598
// 0x20000) (of course must be 4-byte aligned).
599
if (((int64_t)val + 0x20000) != llvm::SignExtend64(val + 0x20000, 38))
600
reportRangeError(loc, rel, Twine(val), llvm::minIntN(38) - 0x20000,
601
llvm::maxIntN(38) - 0x20000);
602
checkAlignment(loc, val, 4, rel);
603
// Since jirl performs sign extension on the offset immediate, adds (1<<17)
604
// to original val to get the correct hi20.
605
uint32_t hi20 = extractBits(val + (1 << 17), 37, 18);
606
// Despite the name, the lower part is actually 18 bits with 4-byte aligned.
607
uint32_t lo16 = extractBits(val, 17, 2);
608
write32le(loc, setJ20(read32le(loc), hi20));
609
write32le(loc + 4, setK16(read32le(loc + 4), lo16));
610
return;
611
}
612
613
// Relocs intended for `addi`, `ld` or `st`.
614
case R_LARCH_PCALA_LO12:
615
// We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12
616
// on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes
617
// removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly
618
// its immediate slot width is different too (16, not 12).
619
// In this case, process like an R_LARCH_B16, but without overflow checking
620
// and only taking the value's lowest 12 bits.
621
if (isJirl(read32le(loc))) {
622
checkAlignment(loc, val, 4, rel);
623
val = SignExtend64<12>(val);
624
write32le(loc, setK16(read32le(loc), val >> 2));
625
return;
626
}
627
[[fallthrough]];
628
case R_LARCH_ABS_LO12:
629
case R_LARCH_GOT_PC_LO12:
630
case R_LARCH_GOT_LO12:
631
case R_LARCH_TLS_LE_LO12:
632
case R_LARCH_TLS_IE_PC_LO12:
633
case R_LARCH_TLS_IE_LO12:
634
case R_LARCH_TLS_LE_LO12_R:
635
case R_LARCH_TLS_DESC_PC_LO12:
636
case R_LARCH_TLS_DESC_LO12:
637
write32le(loc, setK12(read32le(loc), extractBits(val, 11, 0)));
638
return;
639
640
// Relocs intended for `lu12i.w` or `pcalau12i`.
641
case R_LARCH_ABS_HI20:
642
case R_LARCH_PCALA_HI20:
643
case R_LARCH_GOT_PC_HI20:
644
case R_LARCH_GOT_HI20:
645
case R_LARCH_TLS_LE_HI20:
646
case R_LARCH_TLS_IE_PC_HI20:
647
case R_LARCH_TLS_IE_HI20:
648
case R_LARCH_TLS_LD_PC_HI20:
649
case R_LARCH_TLS_LD_HI20:
650
case R_LARCH_TLS_GD_PC_HI20:
651
case R_LARCH_TLS_GD_HI20:
652
case R_LARCH_TLS_DESC_PC_HI20:
653
case R_LARCH_TLS_DESC_HI20:
654
write32le(loc, setJ20(read32le(loc), extractBits(val, 31, 12)));
655
return;
656
case R_LARCH_TLS_LE_HI20_R:
657
write32le(loc, setJ20(read32le(loc), extractBits(val + 0x800, 31, 12)));
658
return;
659
660
// Relocs intended for `lu32i.d`.
661
case R_LARCH_ABS64_LO20:
662
case R_LARCH_PCALA64_LO20:
663
case R_LARCH_GOT64_PC_LO20:
664
case R_LARCH_GOT64_LO20:
665
case R_LARCH_TLS_LE64_LO20:
666
case R_LARCH_TLS_IE64_PC_LO20:
667
case R_LARCH_TLS_IE64_LO20:
668
case R_LARCH_TLS_DESC64_PC_LO20:
669
case R_LARCH_TLS_DESC64_LO20:
670
write32le(loc, setJ20(read32le(loc), extractBits(val, 51, 32)));
671
return;
672
673
// Relocs intended for `lu52i.d`.
674
case R_LARCH_ABS64_HI12:
675
case R_LARCH_PCALA64_HI12:
676
case R_LARCH_GOT64_PC_HI12:
677
case R_LARCH_GOT64_HI12:
678
case R_LARCH_TLS_LE64_HI12:
679
case R_LARCH_TLS_IE64_PC_HI12:
680
case R_LARCH_TLS_IE64_HI12:
681
case R_LARCH_TLS_DESC64_PC_HI12:
682
case R_LARCH_TLS_DESC64_HI12:
683
write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52)));
684
return;
685
686
case R_LARCH_ADD6:
687
*loc = (*loc & 0xc0) | ((*loc + val) & 0x3f);
688
return;
689
case R_LARCH_ADD8:
690
*loc += val;
691
return;
692
case R_LARCH_ADD16:
693
write16le(loc, read16le(loc) + val);
694
return;
695
case R_LARCH_ADD32:
696
write32le(loc, read32le(loc) + val);
697
return;
698
case R_LARCH_ADD64:
699
write64le(loc, read64le(loc) + val);
700
return;
701
case R_LARCH_ADD_ULEB128:
702
handleUleb128(loc, val);
703
return;
704
case R_LARCH_SUB6:
705
*loc = (*loc & 0xc0) | ((*loc - val) & 0x3f);
706
return;
707
case R_LARCH_SUB8:
708
*loc -= val;
709
return;
710
case R_LARCH_SUB16:
711
write16le(loc, read16le(loc) - val);
712
return;
713
case R_LARCH_SUB32:
714
write32le(loc, read32le(loc) - val);
715
return;
716
case R_LARCH_SUB64:
717
write64le(loc, read64le(loc) - val);
718
return;
719
case R_LARCH_SUB_ULEB128:
720
handleUleb128(loc, -val);
721
return;
722
723
case R_LARCH_MARK_LA:
724
case R_LARCH_MARK_PCREL:
725
// no-op
726
return;
727
728
case R_LARCH_TLS_LE_ADD_R:
729
case R_LARCH_RELAX:
730
return; // Ignored (for now)
731
732
case R_LARCH_TLS_DESC_LD:
733
return; // nothing to do.
734
case R_LARCH_TLS_DESC32:
735
write32le(loc + 4, val);
736
return;
737
case R_LARCH_TLS_DESC64:
738
write64le(loc + 8, val);
739
return;
740
741
default:
742
llvm_unreachable("unknown relocation");
743
}
744
}
745
746
static bool relax(InputSection &sec) {
747
const uint64_t secAddr = sec.getVA();
748
const MutableArrayRef<Relocation> relocs = sec.relocs();
749
auto &aux = *sec.relaxAux;
750
bool changed = false;
751
ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors);
752
uint64_t delta = 0;
753
754
std::fill_n(aux.relocTypes.get(), relocs.size(), R_LARCH_NONE);
755
aux.writes.clear();
756
for (auto [i, r] : llvm::enumerate(relocs)) {
757
const uint64_t loc = secAddr + r.offset - delta;
758
uint32_t &cur = aux.relocDeltas[i], remove = 0;
759
switch (r.type) {
760
case R_LARCH_ALIGN: {
761
const uint64_t addend =
762
r.sym->isUndefined() ? Log2_64(r.addend) + 1 : r.addend;
763
const uint64_t allBytes = (1ULL << (addend & 0xff)) - 4;
764
const uint64_t align = 1ULL << (addend & 0xff);
765
const uint64_t maxBytes = addend >> 8;
766
const uint64_t off = loc & (align - 1);
767
const uint64_t curBytes = off == 0 ? 0 : align - off;
768
// All bytes beyond the alignment boundary should be removed.
769
// If emit bytes more than max bytes to emit, remove all.
770
if (maxBytes != 0 && curBytes > maxBytes)
771
remove = allBytes;
772
else
773
remove = allBytes - curBytes;
774
// If we can't satisfy this alignment, we've found a bad input.
775
if (LLVM_UNLIKELY(static_cast<int32_t>(remove) < 0)) {
776
errorOrWarn(getErrorLocation((const uint8_t *)loc) +
777
"insufficient padding bytes for " + lld::toString(r.type) +
778
": " + Twine(allBytes) + " bytes available for " +
779
"requested alignment of " + Twine(align) + " bytes");
780
remove = 0;
781
}
782
break;
783
}
784
}
785
786
// For all anchors whose offsets are <= r.offset, they are preceded by
787
// the previous relocation whose `relocDeltas` value equals `delta`.
788
// Decrease their st_value and update their st_size.
789
for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) {
790
if (sa[0].end)
791
sa[0].d->size = sa[0].offset - delta - sa[0].d->value;
792
else
793
sa[0].d->value = sa[0].offset - delta;
794
}
795
delta += remove;
796
if (delta != cur) {
797
cur = delta;
798
changed = true;
799
}
800
}
801
802
for (const SymbolAnchor &a : sa) {
803
if (a.end)
804
a.d->size = a.offset - delta - a.d->value;
805
else
806
a.d->value = a.offset - delta;
807
}
808
// Inform assignAddresses that the size has changed.
809
if (!isUInt<32>(delta))
810
fatal("section size decrease is too large: " + Twine(delta));
811
sec.bytesDropped = delta;
812
return changed;
813
}
814
815
// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
816
// the absence of a linker script. For call and load/store R_LARCH_RELAX, code
817
// shrinkage may reduce displacement and make more relocations eligible for
818
// relaxation. Code shrinkage may increase displacement to a call/load/store
819
// target at a higher fixed address, invalidating an earlier relaxation. Any
820
// change in section sizes can have cascading effect and require another
821
// relaxation pass.
822
bool LoongArch::relaxOnce(int pass) const {
823
if (config->relocatable)
824
return false;
825
826
if (pass == 0)
827
initSymbolAnchors();
828
829
SmallVector<InputSection *, 0> storage;
830
bool changed = false;
831
for (OutputSection *osec : outputSections) {
832
if (!(osec->flags & SHF_EXECINSTR))
833
continue;
834
for (InputSection *sec : getInputSections(*osec, storage))
835
changed |= relax(*sec);
836
}
837
return changed;
838
}
839
840
void LoongArch::finalizeRelax(int passes) const {
841
log("relaxation passes: " + Twine(passes));
842
SmallVector<InputSection *, 0> storage;
843
for (OutputSection *osec : outputSections) {
844
if (!(osec->flags & SHF_EXECINSTR))
845
continue;
846
for (InputSection *sec : getInputSections(*osec, storage)) {
847
RelaxAux &aux = *sec->relaxAux;
848
if (!aux.relocDeltas)
849
continue;
850
851
MutableArrayRef<Relocation> rels = sec->relocs();
852
ArrayRef<uint8_t> old = sec->content();
853
size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1];
854
uint8_t *p = context().bAlloc.Allocate<uint8_t>(newSize);
855
uint64_t offset = 0;
856
int64_t delta = 0;
857
sec->content_ = p;
858
sec->size = newSize;
859
sec->bytesDropped = 0;
860
861
// Update section content: remove NOPs for R_LARCH_ALIGN and rewrite
862
// instructions for relaxed relocations.
863
for (size_t i = 0, e = rels.size(); i != e; ++i) {
864
uint32_t remove = aux.relocDeltas[i] - delta;
865
delta = aux.relocDeltas[i];
866
if (remove == 0 && aux.relocTypes[i] == R_LARCH_NONE)
867
continue;
868
869
// Copy from last location to the current relocated location.
870
const Relocation &r = rels[i];
871
uint64_t size = r.offset - offset;
872
memcpy(p, old.data() + offset, size);
873
p += size;
874
offset = r.offset + remove;
875
}
876
memcpy(p, old.data() + offset, old.size() - offset);
877
878
// Subtract the previous relocDeltas value from the relocation offset.
879
// For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease
880
// their r_offset by the same delta.
881
delta = 0;
882
for (size_t i = 0, e = rels.size(); i != e;) {
883
uint64_t cur = rels[i].offset;
884
do {
885
rels[i].offset -= delta;
886
if (aux.relocTypes[i] != R_LARCH_NONE)
887
rels[i].type = aux.relocTypes[i];
888
} while (++i != e && rels[i].offset == cur);
889
delta = aux.relocDeltas[i - 1];
890
}
891
}
892
}
893
}
894
895
TargetInfo *elf::getLoongArchTargetInfo() {
896
static LoongArch target;
897
return &target;
898
}
899
900