Path: blob/main/contrib/llvm-project/lld/ELF/Arch/LoongArch.cpp
34889 views
//===- LoongArch.cpp ------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "InputFiles.h"9#include "OutputSections.h"10#include "Symbols.h"11#include "SyntheticSections.h"12#include "Target.h"13#include "llvm/BinaryFormat/ELF.h"14#include "llvm/Support/LEB128.h"1516using namespace llvm;17using namespace llvm::object;18using namespace llvm::support::endian;19using namespace llvm::ELF;20using namespace lld;21using namespace lld::elf;2223namespace {24class LoongArch final : public TargetInfo {25public:26LoongArch();27uint32_t calcEFlags() const override;28int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;29void writeGotPlt(uint8_t *buf, const Symbol &s) const override;30void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;31void writePltHeader(uint8_t *buf) const override;32void writePlt(uint8_t *buf, const Symbol &sym,33uint64_t pltEntryAddr) const override;34RelType getDynRel(RelType type) const override;35RelExpr getRelExpr(RelType type, const Symbol &s,36const uint8_t *loc) const override;37bool usesOnlyLowPageBits(RelType type) const override;38void relocate(uint8_t *loc, const Relocation &rel,39uint64_t val) const override;40bool relaxOnce(int pass) const override;41void finalizeRelax(int passes) const override;42};43} // end anonymous namespace4445namespace {46enum Op {47SUB_W = 0x00110000,48SUB_D = 0x00118000,49BREAK = 0x002a0000,50SRLI_W = 0x00448000,51SRLI_D = 0x00450000,52ADDI_W = 0x02800000,53ADDI_D = 0x02c00000,54ANDI = 0x03400000,55PCADDU12I = 0x1c000000,56LD_W = 0x28800000,57LD_D = 0x28c00000,58JIRL = 0x4c000000,59};6061enum Reg {62R_ZERO = 0,63R_RA = 1,64R_TP = 2,65R_T0 = 12,66R_T1 = 13,67R_T2 = 14,68R_T3 = 15,69};70} // namespace7172// Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences73// like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i`74// produces a PC-relative intermediate value with the lowest 12 bits zeroed (the75// "page") for the next instruction to add in the "page offset". (`pcalau12i`76// stands for something like "PC ALigned Add Upper that starts from the 12th77// bit, Immediate".)78//79// Here a "page" is in fact just another way to refer to the 12-bit range80// allowed by the immediate field of the addi/ld/st instructions, and not81// related to the system or the kernel's actual page size. The semantics happen82// to match the AArch64 `adrp`, so the concept of "page" is borrowed here.83static uint64_t getLoongArchPage(uint64_t p) {84return p & ~static_cast<uint64_t>(0xfff);85}8687static uint32_t lo12(uint32_t val) { return val & 0xfff; }8889// Calculate the adjusted page delta between dest and PC.90uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type) {91// Note that if the sequence being relocated is `pcalau12i + addi.d + lu32i.d92// + lu52i.d`, they must be adjacent so that we can infer the PC of93// `pcalau12i` when calculating the page delta for the other two instructions94// (lu32i.d and lu52i.d). Compensate all the sign-extensions is a bit95// complicated. Just use psABI recommended algorithm.96uint64_t pcalau12i_pc;97switch (type) {98case R_LARCH_PCALA64_LO20:99case R_LARCH_GOT64_PC_LO20:100case R_LARCH_TLS_IE64_PC_LO20:101case R_LARCH_TLS_DESC64_PC_LO20:102pcalau12i_pc = pc - 8;103break;104case R_LARCH_PCALA64_HI12:105case R_LARCH_GOT64_PC_HI12:106case R_LARCH_TLS_IE64_PC_HI12:107case R_LARCH_TLS_DESC64_PC_HI12:108pcalau12i_pc = pc - 12;109break;110default:111pcalau12i_pc = pc;112break;113}114uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pcalau12i_pc);115if (dest & 0x800)116result += 0x1000 - 0x1'0000'0000;117if (result & 0x8000'0000)118result += 0x1'0000'0000;119return result;120}121122static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; }123124static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) {125return op | d | (j << 5) | (k << 10);126}127128// Extract bits v[begin:end], where range is inclusive.129static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {130return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end;131}132133static uint32_t setD5k16(uint32_t insn, uint32_t imm) {134uint32_t immLo = extractBits(imm, 15, 0);135uint32_t immHi = extractBits(imm, 20, 16);136return (insn & 0xfc0003e0) | (immLo << 10) | immHi;137}138139static uint32_t setD10k16(uint32_t insn, uint32_t imm) {140uint32_t immLo = extractBits(imm, 15, 0);141uint32_t immHi = extractBits(imm, 25, 16);142return (insn & 0xfc000000) | (immLo << 10) | immHi;143}144145static uint32_t setJ20(uint32_t insn, uint32_t imm) {146return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5);147}148149static uint32_t setK12(uint32_t insn, uint32_t imm) {150return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10);151}152153static uint32_t setK16(uint32_t insn, uint32_t imm) {154return (insn & 0xfc0003ff) | (extractBits(imm, 15, 0) << 10);155}156157static bool isJirl(uint32_t insn) {158return (insn & 0xfc000000) == JIRL;159}160161static void handleUleb128(uint8_t *loc, uint64_t val) {162const uint32_t maxcount = 1 + 64 / 7;163uint32_t count;164const char *error = nullptr;165uint64_t orig = decodeULEB128(loc, &count, nullptr, &error);166if (count > maxcount || (count == maxcount && error))167errorOrWarn(getErrorLocation(loc) + "extra space for uleb128");168uint64_t mask = count < maxcount ? (1ULL << 7 * count) - 1 : -1ULL;169encodeULEB128((orig + val) & mask, loc, count);170}171172LoongArch::LoongArch() {173// The LoongArch ISA itself does not have a limit on page sizes. According to174// the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is175// 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to176// "unlimited".177// However, practically the maximum usable page size is constrained by the178// kernel implementation, and 64KiB is the biggest non-huge page size179// supported by Linux as of v6.4. The most widespread page size in use,180// though, is 16KiB.181defaultCommonPageSize = 16384;182defaultMaxPageSize = 65536;183write32le(trapInstr.data(), BREAK); // break 0184185copyRel = R_LARCH_COPY;186pltRel = R_LARCH_JUMP_SLOT;187relativeRel = R_LARCH_RELATIVE;188iRelativeRel = R_LARCH_IRELATIVE;189190if (config->is64) {191symbolicRel = R_LARCH_64;192tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64;193tlsOffsetRel = R_LARCH_TLS_DTPREL64;194tlsGotRel = R_LARCH_TLS_TPREL64;195tlsDescRel = R_LARCH_TLS_DESC64;196} else {197symbolicRel = R_LARCH_32;198tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32;199tlsOffsetRel = R_LARCH_TLS_DTPREL32;200tlsGotRel = R_LARCH_TLS_TPREL32;201tlsDescRel = R_LARCH_TLS_DESC32;202}203204gotRel = symbolicRel;205206// .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map207gotPltHeaderEntriesNum = 2;208209pltHeaderSize = 32;210pltEntrySize = 16;211ipltEntrySize = 16;212}213214static uint32_t getEFlags(const InputFile *f) {215if (config->is64)216return cast<ObjFile<ELF64LE>>(f)->getObj().getHeader().e_flags;217return cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;218}219220static bool inputFileHasCode(const InputFile *f) {221for (const auto *sec : f->getSections())222if (sec && sec->flags & SHF_EXECINSTR)223return true;224225return false;226}227228uint32_t LoongArch::calcEFlags() const {229// If there are only binary input files (from -b binary), use a230// value of 0 for the ELF header flags.231if (ctx.objectFiles.empty())232return 0;233234uint32_t target = 0;235const InputFile *targetFile;236for (const InputFile *f : ctx.objectFiles) {237// Do not enforce ABI compatibility if the input file does not contain code.238// This is useful for allowing linkage with data-only object files produced239// with tools like objcopy, that have zero e_flags.240if (!inputFileHasCode(f))241continue;242243// Take the first non-zero e_flags as the reference.244uint32_t flags = getEFlags(f);245if (target == 0 && flags != 0) {246target = flags;247targetFile = f;248}249250if ((flags & EF_LOONGARCH_ABI_MODIFIER_MASK) !=251(target & EF_LOONGARCH_ABI_MODIFIER_MASK))252error(toString(f) +253": cannot link object files with different ABI from " +254toString(targetFile));255256// We cannot process psABI v1.x / object ABI v0 files (containing stack257// relocations), unlike ld.bfd.258//259// Instead of blindly accepting every v0 object and only failing at260// relocation processing time, just disallow interlink altogether. We261// don't expect significant usage of object ABI v0 in the wild (the old262// world may continue using object ABI v0 for a while, but as it's not263// binary-compatible with the upstream i.e. new-world ecosystem, it's not264// being considered here).265//266// There are briefly some new-world systems with object ABI v0 binaries too.267// It is because these systems were built before the new ABI was finalized.268// These are not supported either due to the extremely small number of them,269// and the few impacted users are advised to simply rebuild world or270// reinstall a recent system.271if ((flags & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1)272error(toString(f) + ": unsupported object file ABI version");273}274275return target;276}277278int64_t LoongArch::getImplicitAddend(const uint8_t *buf, RelType type) const {279switch (type) {280default:281internalLinkerError(getErrorLocation(buf),282"cannot read addend for relocation " + toString(type));283return 0;284case R_LARCH_32:285case R_LARCH_TLS_DTPMOD32:286case R_LARCH_TLS_DTPREL32:287case R_LARCH_TLS_TPREL32:288return SignExtend64<32>(read32le(buf));289case R_LARCH_64:290case R_LARCH_TLS_DTPMOD64:291case R_LARCH_TLS_DTPREL64:292case R_LARCH_TLS_TPREL64:293return read64le(buf);294case R_LARCH_RELATIVE:295case R_LARCH_IRELATIVE:296return config->is64 ? read64le(buf) : read32le(buf);297case R_LARCH_NONE:298case R_LARCH_JUMP_SLOT:299// These relocations are defined as not having an implicit addend.300return 0;301case R_LARCH_TLS_DESC32:302return read32le(buf + 4);303case R_LARCH_TLS_DESC64:304return read64le(buf + 8);305}306}307308void LoongArch::writeGotPlt(uint8_t *buf, const Symbol &s) const {309if (config->is64)310write64le(buf, in.plt->getVA());311else312write32le(buf, in.plt->getVA());313}314315void LoongArch::writeIgotPlt(uint8_t *buf, const Symbol &s) const {316if (config->writeAddends) {317if (config->is64)318write64le(buf, s.getVA());319else320write32le(buf, s.getVA());321}322}323324void LoongArch::writePltHeader(uint8_t *buf) const {325// The LoongArch PLT is currently structured just like that of RISCV.326// Annoyingly, this means the PLT is still using `pcaddu12i` to perform327// PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`),328// in contrast to the AArch64-like page-offset scheme with `pcalau12i` that329// is used everywhere else involving PC-relative operations in the LoongArch330// ELF psABI v2.00.331//332// The `pcrel_{hi20,lo12}` operators are illustrative only and not really333// supported by LoongArch assemblers.334//335// pcaddu12i $t2, %pcrel_hi20(.got.plt)336// sub.[wd] $t1, $t1, $t3337// ld.[wd] $t3, $t2, %pcrel_lo12(.got.plt) ; t3 = _dl_runtime_resolve338// addi.[wd] $t1, $t1, -pltHeaderSize-12 ; t1 = &.plt[i] - &.plt[0]339// addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt)340// srli.[wd] $t1, $t1, (is64?1:2) ; t1 = &.got.plt[i] - &.got.plt[0]341// ld.[wd] $t0, $t0, Wordsize ; t0 = link_map342// jr $t3343uint32_t offset = in.gotPlt->getVA() - in.plt->getVA();344uint32_t sub = config->is64 ? SUB_D : SUB_W;345uint32_t ld = config->is64 ? LD_D : LD_W;346uint32_t addi = config->is64 ? ADDI_D : ADDI_W;347uint32_t srli = config->is64 ? SRLI_D : SRLI_W;348write32le(buf + 0, insn(PCADDU12I, R_T2, hi20(offset), 0));349write32le(buf + 4, insn(sub, R_T1, R_T1, R_T3));350write32le(buf + 8, insn(ld, R_T3, R_T2, lo12(offset)));351write32le(buf + 12, insn(addi, R_T1, R_T1, lo12(-target->pltHeaderSize - 12)));352write32le(buf + 16, insn(addi, R_T0, R_T2, lo12(offset)));353write32le(buf + 20, insn(srli, R_T1, R_T1, config->is64 ? 1 : 2));354write32le(buf + 24, insn(ld, R_T0, R_T0, config->wordsize));355write32le(buf + 28, insn(JIRL, R_ZERO, R_T3, 0));356}357358void LoongArch::writePlt(uint8_t *buf, const Symbol &sym,359uint64_t pltEntryAddr) const {360// See the comment in writePltHeader for reason why pcaddu12i is used instead361// of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days.362//363// pcaddu12i $t3, %pcrel_hi20([email protected])364// ld.[wd] $t3, $t3, %pcrel_lo12([email protected])365// jirl $t1, $t3, 0366// nop367uint32_t offset = sym.getGotPltVA() - pltEntryAddr;368write32le(buf + 0, insn(PCADDU12I, R_T3, hi20(offset), 0));369write32le(buf + 4,370insn(config->is64 ? LD_D : LD_W, R_T3, R_T3, lo12(offset)));371write32le(buf + 8, insn(JIRL, R_T1, R_T3, 0));372write32le(buf + 12, insn(ANDI, R_ZERO, R_ZERO, 0));373}374375RelType LoongArch::getDynRel(RelType type) const {376return type == target->symbolicRel ? type377: static_cast<RelType>(R_LARCH_NONE);378}379380RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s,381const uint8_t *loc) const {382switch (type) {383case R_LARCH_NONE:384case R_LARCH_MARK_LA:385case R_LARCH_MARK_PCREL:386return R_NONE;387case R_LARCH_32:388case R_LARCH_64:389case R_LARCH_ABS_HI20:390case R_LARCH_ABS_LO12:391case R_LARCH_ABS64_LO20:392case R_LARCH_ABS64_HI12:393return R_ABS;394case R_LARCH_PCALA_LO12:395// We could just R_ABS, but the JIRL instruction reuses the relocation type396// for a different purpose. The questionable usage is part of glibc 2.37397// libc_nonshared.a [1], which is linked into user programs, so we have to398// work around it for a while, even if a new relocation type may be399// introduced in the future [2].400//401// [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a402// [2]: https://github.com/loongson/la-abi-specs/pull/3403return isJirl(read32le(loc)) ? R_PLT : R_ABS;404case R_LARCH_TLS_DTPREL32:405case R_LARCH_TLS_DTPREL64:406return R_DTPREL;407case R_LARCH_TLS_TPREL32:408case R_LARCH_TLS_TPREL64:409case R_LARCH_TLS_LE_HI20:410case R_LARCH_TLS_LE_HI20_R:411case R_LARCH_TLS_LE_LO12:412case R_LARCH_TLS_LE_LO12_R:413case R_LARCH_TLS_LE64_LO20:414case R_LARCH_TLS_LE64_HI12:415return R_TPREL;416case R_LARCH_ADD6:417case R_LARCH_ADD8:418case R_LARCH_ADD16:419case R_LARCH_ADD32:420case R_LARCH_ADD64:421case R_LARCH_ADD_ULEB128:422case R_LARCH_SUB6:423case R_LARCH_SUB8:424case R_LARCH_SUB16:425case R_LARCH_SUB32:426case R_LARCH_SUB64:427case R_LARCH_SUB_ULEB128:428// The LoongArch add/sub relocs behave like the RISCV counterparts; reuse429// the RelExpr to avoid code duplication.430return R_RISCV_ADD;431case R_LARCH_32_PCREL:432case R_LARCH_64_PCREL:433case R_LARCH_PCREL20_S2:434return R_PC;435case R_LARCH_B16:436case R_LARCH_B21:437case R_LARCH_B26:438case R_LARCH_CALL36:439return R_PLT_PC;440case R_LARCH_GOT_PC_HI20:441case R_LARCH_GOT64_PC_LO20:442case R_LARCH_GOT64_PC_HI12:443case R_LARCH_TLS_IE_PC_HI20:444case R_LARCH_TLS_IE64_PC_LO20:445case R_LARCH_TLS_IE64_PC_HI12:446return R_LOONGARCH_GOT_PAGE_PC;447case R_LARCH_GOT_PC_LO12:448case R_LARCH_TLS_IE_PC_LO12:449return R_LOONGARCH_GOT;450case R_LARCH_TLS_LD_PC_HI20:451case R_LARCH_TLS_GD_PC_HI20:452return R_LOONGARCH_TLSGD_PAGE_PC;453case R_LARCH_PCALA_HI20:454// Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT455// anyway so why waste time checking only to get everything relaxed back to456// it?457//458// This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want459// both the HI20 and LO12 to potentially refer to the PLT. But in reality460// the HI20 reloc appears earlier, and the relocs don't contain enough461// information to let us properly resolve semantics per symbol.462// Unlike RISCV, our LO12 relocs *do not* point to their corresponding HI20463// relocs, hence it is nearly impossible to 100% accurately determine each464// HI20's "flavor" without taking big performance hits, in the presence of465// edge cases (e.g. HI20 without pairing LO12; paired LO12 placed so far466// apart that relationship is not certain anymore), and programmer mistakes467// (e.g. as outlined in https://github.com/loongson/la-abi-specs/pull/3).468//469// Ideally we would scan in an extra pass for all LO12s on JIRL, then mark470// every HI20 reloc referring to the same symbol differently; this is not471// feasible with the current function signature of getRelExpr that doesn't472// allow for such inter-pass state.473//474// So, unfortunately we have to again workaround this quirk the same way as475// BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only476// relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later477// stage.478return R_LOONGARCH_PLT_PAGE_PC;479case R_LARCH_PCALA64_LO20:480case R_LARCH_PCALA64_HI12:481return R_LOONGARCH_PAGE_PC;482case R_LARCH_GOT_HI20:483case R_LARCH_GOT_LO12:484case R_LARCH_GOT64_LO20:485case R_LARCH_GOT64_HI12:486case R_LARCH_TLS_IE_HI20:487case R_LARCH_TLS_IE_LO12:488case R_LARCH_TLS_IE64_LO20:489case R_LARCH_TLS_IE64_HI12:490return R_GOT;491case R_LARCH_TLS_LD_HI20:492return R_TLSLD_GOT;493case R_LARCH_TLS_GD_HI20:494return R_TLSGD_GOT;495case R_LARCH_TLS_LE_ADD_R:496case R_LARCH_RELAX:497return config->relax ? R_RELAX_HINT : R_NONE;498case R_LARCH_ALIGN:499return R_RELAX_HINT;500case R_LARCH_TLS_DESC_PC_HI20:501case R_LARCH_TLS_DESC64_PC_LO20:502case R_LARCH_TLS_DESC64_PC_HI12:503return R_LOONGARCH_TLSDESC_PAGE_PC;504case R_LARCH_TLS_DESC_PC_LO12:505case R_LARCH_TLS_DESC_LD:506case R_LARCH_TLS_DESC_HI20:507case R_LARCH_TLS_DESC_LO12:508case R_LARCH_TLS_DESC64_LO20:509case R_LARCH_TLS_DESC64_HI12:510return R_TLSDESC;511case R_LARCH_TLS_DESC_CALL:512return R_TLSDESC_CALL;513case R_LARCH_TLS_LD_PCREL20_S2:514return R_TLSLD_PC;515case R_LARCH_TLS_GD_PCREL20_S2:516return R_TLSGD_PC;517case R_LARCH_TLS_DESC_PCREL20_S2:518return R_TLSDESC_PC;519520// Other known relocs that are explicitly unimplemented:521//522// - psABI v1 relocs that need a stateful stack machine to work, and not523// required when implementing psABI v2;524// - relocs that are not used anywhere (R_LARCH_{ADD,SUB}_24 [1], and the525// two GNU vtable-related relocs).526//527// [1]: https://web.archive.org/web/20230709064026/https://github.com/loongson/LoongArch-Documentation/issues/51528default:529error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +530") against symbol " + toString(s));531return R_NONE;532}533}534535bool LoongArch::usesOnlyLowPageBits(RelType type) const {536switch (type) {537default:538return false;539case R_LARCH_PCALA_LO12:540case R_LARCH_GOT_LO12:541case R_LARCH_GOT_PC_LO12:542case R_LARCH_TLS_IE_PC_LO12:543case R_LARCH_TLS_DESC_LO12:544case R_LARCH_TLS_DESC_PC_LO12:545return true;546}547}548549void LoongArch::relocate(uint8_t *loc, const Relocation &rel,550uint64_t val) const {551switch (rel.type) {552case R_LARCH_32_PCREL:553checkInt(loc, val, 32, rel);554[[fallthrough]];555case R_LARCH_32:556case R_LARCH_TLS_DTPREL32:557write32le(loc, val);558return;559case R_LARCH_64:560case R_LARCH_TLS_DTPREL64:561case R_LARCH_64_PCREL:562write64le(loc, val);563return;564565// Relocs intended for `pcaddi`.566case R_LARCH_PCREL20_S2:567case R_LARCH_TLS_LD_PCREL20_S2:568case R_LARCH_TLS_GD_PCREL20_S2:569case R_LARCH_TLS_DESC_PCREL20_S2:570checkInt(loc, val, 22, rel);571checkAlignment(loc, val, 4, rel);572write32le(loc, setJ20(read32le(loc), val >> 2));573return;574575case R_LARCH_B16:576checkInt(loc, val, 18, rel);577checkAlignment(loc, val, 4, rel);578write32le(loc, setK16(read32le(loc), val >> 2));579return;580581case R_LARCH_B21:582checkInt(loc, val, 23, rel);583checkAlignment(loc, val, 4, rel);584write32le(loc, setD5k16(read32le(loc), val >> 2));585return;586587case R_LARCH_B26:588checkInt(loc, val, 28, rel);589checkAlignment(loc, val, 4, rel);590write32le(loc, setD10k16(read32le(loc), val >> 2));591return;592593case R_LARCH_CALL36: {594// This relocation is designed for adjacent pcaddu18i+jirl pairs that595// are patched in one time. Because of sign extension of these insns'596// immediate fields, the relocation range is [-128G - 0x20000, +128G -597// 0x20000) (of course must be 4-byte aligned).598if (((int64_t)val + 0x20000) != llvm::SignExtend64(val + 0x20000, 38))599reportRangeError(loc, rel, Twine(val), llvm::minIntN(38) - 0x20000,600llvm::maxIntN(38) - 0x20000);601checkAlignment(loc, val, 4, rel);602// Since jirl performs sign extension on the offset immediate, adds (1<<17)603// to original val to get the correct hi20.604uint32_t hi20 = extractBits(val + (1 << 17), 37, 18);605// Despite the name, the lower part is actually 18 bits with 4-byte aligned.606uint32_t lo16 = extractBits(val, 17, 2);607write32le(loc, setJ20(read32le(loc), hi20));608write32le(loc + 4, setK16(read32le(loc + 4), lo16));609return;610}611612// Relocs intended for `addi`, `ld` or `st`.613case R_LARCH_PCALA_LO12:614// We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12615// on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes616// removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly617// its immediate slot width is different too (16, not 12).618// In this case, process like an R_LARCH_B16, but without overflow checking619// and only taking the value's lowest 12 bits.620if (isJirl(read32le(loc))) {621checkAlignment(loc, val, 4, rel);622val = SignExtend64<12>(val);623write32le(loc, setK16(read32le(loc), val >> 2));624return;625}626[[fallthrough]];627case R_LARCH_ABS_LO12:628case R_LARCH_GOT_PC_LO12:629case R_LARCH_GOT_LO12:630case R_LARCH_TLS_LE_LO12:631case R_LARCH_TLS_IE_PC_LO12:632case R_LARCH_TLS_IE_LO12:633case R_LARCH_TLS_LE_LO12_R:634case R_LARCH_TLS_DESC_PC_LO12:635case R_LARCH_TLS_DESC_LO12:636write32le(loc, setK12(read32le(loc), extractBits(val, 11, 0)));637return;638639// Relocs intended for `lu12i.w` or `pcalau12i`.640case R_LARCH_ABS_HI20:641case R_LARCH_PCALA_HI20:642case R_LARCH_GOT_PC_HI20:643case R_LARCH_GOT_HI20:644case R_LARCH_TLS_LE_HI20:645case R_LARCH_TLS_IE_PC_HI20:646case R_LARCH_TLS_IE_HI20:647case R_LARCH_TLS_LD_PC_HI20:648case R_LARCH_TLS_LD_HI20:649case R_LARCH_TLS_GD_PC_HI20:650case R_LARCH_TLS_GD_HI20:651case R_LARCH_TLS_DESC_PC_HI20:652case R_LARCH_TLS_DESC_HI20:653write32le(loc, setJ20(read32le(loc), extractBits(val, 31, 12)));654return;655case R_LARCH_TLS_LE_HI20_R:656write32le(loc, setJ20(read32le(loc), extractBits(val + 0x800, 31, 12)));657return;658659// Relocs intended for `lu32i.d`.660case R_LARCH_ABS64_LO20:661case R_LARCH_PCALA64_LO20:662case R_LARCH_GOT64_PC_LO20:663case R_LARCH_GOT64_LO20:664case R_LARCH_TLS_LE64_LO20:665case R_LARCH_TLS_IE64_PC_LO20:666case R_LARCH_TLS_IE64_LO20:667case R_LARCH_TLS_DESC64_PC_LO20:668case R_LARCH_TLS_DESC64_LO20:669write32le(loc, setJ20(read32le(loc), extractBits(val, 51, 32)));670return;671672// Relocs intended for `lu52i.d`.673case R_LARCH_ABS64_HI12:674case R_LARCH_PCALA64_HI12:675case R_LARCH_GOT64_PC_HI12:676case R_LARCH_GOT64_HI12:677case R_LARCH_TLS_LE64_HI12:678case R_LARCH_TLS_IE64_PC_HI12:679case R_LARCH_TLS_IE64_HI12:680case R_LARCH_TLS_DESC64_PC_HI12:681case R_LARCH_TLS_DESC64_HI12:682write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52)));683return;684685case R_LARCH_ADD6:686*loc = (*loc & 0xc0) | ((*loc + val) & 0x3f);687return;688case R_LARCH_ADD8:689*loc += val;690return;691case R_LARCH_ADD16:692write16le(loc, read16le(loc) + val);693return;694case R_LARCH_ADD32:695write32le(loc, read32le(loc) + val);696return;697case R_LARCH_ADD64:698write64le(loc, read64le(loc) + val);699return;700case R_LARCH_ADD_ULEB128:701handleUleb128(loc, val);702return;703case R_LARCH_SUB6:704*loc = (*loc & 0xc0) | ((*loc - val) & 0x3f);705return;706case R_LARCH_SUB8:707*loc -= val;708return;709case R_LARCH_SUB16:710write16le(loc, read16le(loc) - val);711return;712case R_LARCH_SUB32:713write32le(loc, read32le(loc) - val);714return;715case R_LARCH_SUB64:716write64le(loc, read64le(loc) - val);717return;718case R_LARCH_SUB_ULEB128:719handleUleb128(loc, -val);720return;721722case R_LARCH_MARK_LA:723case R_LARCH_MARK_PCREL:724// no-op725return;726727case R_LARCH_TLS_LE_ADD_R:728case R_LARCH_RELAX:729return; // Ignored (for now)730731case R_LARCH_TLS_DESC_LD:732return; // nothing to do.733case R_LARCH_TLS_DESC32:734write32le(loc + 4, val);735return;736case R_LARCH_TLS_DESC64:737write64le(loc + 8, val);738return;739740default:741llvm_unreachable("unknown relocation");742}743}744745static bool relax(InputSection &sec) {746const uint64_t secAddr = sec.getVA();747const MutableArrayRef<Relocation> relocs = sec.relocs();748auto &aux = *sec.relaxAux;749bool changed = false;750ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors);751uint64_t delta = 0;752753std::fill_n(aux.relocTypes.get(), relocs.size(), R_LARCH_NONE);754aux.writes.clear();755for (auto [i, r] : llvm::enumerate(relocs)) {756const uint64_t loc = secAddr + r.offset - delta;757uint32_t &cur = aux.relocDeltas[i], remove = 0;758switch (r.type) {759case R_LARCH_ALIGN: {760const uint64_t addend =761r.sym->isUndefined() ? Log2_64(r.addend) + 1 : r.addend;762const uint64_t allBytes = (1ULL << (addend & 0xff)) - 4;763const uint64_t align = 1ULL << (addend & 0xff);764const uint64_t maxBytes = addend >> 8;765const uint64_t off = loc & (align - 1);766const uint64_t curBytes = off == 0 ? 0 : align - off;767// All bytes beyond the alignment boundary should be removed.768// If emit bytes more than max bytes to emit, remove all.769if (maxBytes != 0 && curBytes > maxBytes)770remove = allBytes;771else772remove = allBytes - curBytes;773// If we can't satisfy this alignment, we've found a bad input.774if (LLVM_UNLIKELY(static_cast<int32_t>(remove) < 0)) {775errorOrWarn(getErrorLocation((const uint8_t *)loc) +776"insufficient padding bytes for " + lld::toString(r.type) +777": " + Twine(allBytes) + " bytes available for " +778"requested alignment of " + Twine(align) + " bytes");779remove = 0;780}781break;782}783}784785// For all anchors whose offsets are <= r.offset, they are preceded by786// the previous relocation whose `relocDeltas` value equals `delta`.787// Decrease their st_value and update their st_size.788for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) {789if (sa[0].end)790sa[0].d->size = sa[0].offset - delta - sa[0].d->value;791else792sa[0].d->value = sa[0].offset - delta;793}794delta += remove;795if (delta != cur) {796cur = delta;797changed = true;798}799}800801for (const SymbolAnchor &a : sa) {802if (a.end)803a.d->size = a.offset - delta - a.d->value;804else805a.d->value = a.offset - delta;806}807// Inform assignAddresses that the size has changed.808if (!isUInt<32>(delta))809fatal("section size decrease is too large: " + Twine(delta));810sec.bytesDropped = delta;811return changed;812}813814// When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in815// the absence of a linker script. For call and load/store R_LARCH_RELAX, code816// shrinkage may reduce displacement and make more relocations eligible for817// relaxation. Code shrinkage may increase displacement to a call/load/store818// target at a higher fixed address, invalidating an earlier relaxation. Any819// change in section sizes can have cascading effect and require another820// relaxation pass.821bool LoongArch::relaxOnce(int pass) const {822if (config->relocatable)823return false;824825if (pass == 0)826initSymbolAnchors();827828SmallVector<InputSection *, 0> storage;829bool changed = false;830for (OutputSection *osec : outputSections) {831if (!(osec->flags & SHF_EXECINSTR))832continue;833for (InputSection *sec : getInputSections(*osec, storage))834changed |= relax(*sec);835}836return changed;837}838839void LoongArch::finalizeRelax(int passes) const {840log("relaxation passes: " + Twine(passes));841SmallVector<InputSection *, 0> storage;842for (OutputSection *osec : outputSections) {843if (!(osec->flags & SHF_EXECINSTR))844continue;845for (InputSection *sec : getInputSections(*osec, storage)) {846RelaxAux &aux = *sec->relaxAux;847if (!aux.relocDeltas)848continue;849850MutableArrayRef<Relocation> rels = sec->relocs();851ArrayRef<uint8_t> old = sec->content();852size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1];853uint8_t *p = context().bAlloc.Allocate<uint8_t>(newSize);854uint64_t offset = 0;855int64_t delta = 0;856sec->content_ = p;857sec->size = newSize;858sec->bytesDropped = 0;859860// Update section content: remove NOPs for R_LARCH_ALIGN and rewrite861// instructions for relaxed relocations.862for (size_t i = 0, e = rels.size(); i != e; ++i) {863uint32_t remove = aux.relocDeltas[i] - delta;864delta = aux.relocDeltas[i];865if (remove == 0 && aux.relocTypes[i] == R_LARCH_NONE)866continue;867868// Copy from last location to the current relocated location.869const Relocation &r = rels[i];870uint64_t size = r.offset - offset;871memcpy(p, old.data() + offset, size);872p += size;873offset = r.offset + remove;874}875memcpy(p, old.data() + offset, old.size() - offset);876877// Subtract the previous relocDeltas value from the relocation offset.878// For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease879// their r_offset by the same delta.880delta = 0;881for (size_t i = 0, e = rels.size(); i != e;) {882uint64_t cur = rels[i].offset;883do {884rels[i].offset -= delta;885if (aux.relocTypes[i] != R_LARCH_NONE)886rels[i].type = aux.relocTypes[i];887} while (++i != e && rels[i].offset == cur);888delta = aux.relocDeltas[i - 1];889}890}891}892}893894TargetInfo *elf::getLoongArchTargetInfo() {895static LoongArch target;896return ⌖897}898899900