Path: blob/main/contrib/llvm-project/lld/ELF/Arch/ARM.cpp
34878 views
//===- ARM.cpp ------------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "InputFiles.h"9#include "OutputSections.h"10#include "SymbolTable.h"11#include "Symbols.h"12#include "SyntheticSections.h"13#include "Target.h"14#include "lld/Common/ErrorHandler.h"15#include "lld/Common/Filesystem.h"16#include "llvm/BinaryFormat/ELF.h"17#include "llvm/Support/Endian.h"1819using namespace llvm;20using namespace llvm::support::endian;21using namespace llvm::support;22using namespace llvm::ELF;23using namespace lld;24using namespace lld::elf;25using namespace llvm::object;2627namespace {28class ARM final : public TargetInfo {29public:30ARM();31uint32_t calcEFlags() const override;32RelExpr getRelExpr(RelType type, const Symbol &s,33const uint8_t *loc) const override;34RelType getDynRel(RelType type) const override;35int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;36void writeGotPlt(uint8_t *buf, const Symbol &s) const override;37void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;38void writePltHeader(uint8_t *buf) const override;39void writePlt(uint8_t *buf, const Symbol &sym,40uint64_t pltEntryAddr) const override;41void addPltSymbols(InputSection &isec, uint64_t off) const override;42void addPltHeaderSymbols(InputSection &isd) const override;43bool needsThunk(RelExpr expr, RelType type, const InputFile *file,44uint64_t branchAddr, const Symbol &s,45int64_t a) const override;46uint32_t getThunkSectionSpacing() const override;47bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;48void relocate(uint8_t *loc, const Relocation &rel,49uint64_t val) const override;50};51enum class CodeState { Data = 0, Thumb = 2, Arm = 4 };52} // namespace5354static DenseMap<InputSection *, SmallVector<const Defined *, 0>> sectionMap{};5556ARM::ARM() {57copyRel = R_ARM_COPY;58relativeRel = R_ARM_RELATIVE;59iRelativeRel = R_ARM_IRELATIVE;60gotRel = R_ARM_GLOB_DAT;61pltRel = R_ARM_JUMP_SLOT;62symbolicRel = R_ARM_ABS32;63tlsGotRel = R_ARM_TLS_TPOFF32;64tlsModuleIndexRel = R_ARM_TLS_DTPMOD32;65tlsOffsetRel = R_ARM_TLS_DTPOFF32;66pltHeaderSize = 32;67pltEntrySize = 16;68ipltEntrySize = 16;69trapInstr = {0xd4, 0xd4, 0xd4, 0xd4};70needsThunks = true;71defaultMaxPageSize = 65536;72}7374uint32_t ARM::calcEFlags() const {75// The ABIFloatType is used by loaders to detect the floating point calling76// convention.77uint32_t abiFloatType = 0;7879// Set the EF_ARM_BE8 flag in the ELF header, if ELF file is big-endian80// with BE-8 code.81uint32_t armBE8 = 0;8283if (config->armVFPArgs == ARMVFPArgKind::Base ||84config->armVFPArgs == ARMVFPArgKind::Default)85abiFloatType = EF_ARM_ABI_FLOAT_SOFT;86else if (config->armVFPArgs == ARMVFPArgKind::VFP)87abiFloatType = EF_ARM_ABI_FLOAT_HARD;8889if (!config->isLE && config->armBe8)90armBE8 = EF_ARM_BE8;9192// We don't currently use any features incompatible with EF_ARM_EABI_VER5,93// but we don't have any firm guarantees of conformance. Linux AArch6494// kernels (as of 2016) require an EABI version to be set.95return EF_ARM_EABI_VER5 | abiFloatType | armBE8;96}9798RelExpr ARM::getRelExpr(RelType type, const Symbol &s,99const uint8_t *loc) const {100switch (type) {101case R_ARM_ABS32:102case R_ARM_MOVW_ABS_NC:103case R_ARM_MOVT_ABS:104case R_ARM_THM_MOVW_ABS_NC:105case R_ARM_THM_MOVT_ABS:106case R_ARM_THM_ALU_ABS_G0_NC:107case R_ARM_THM_ALU_ABS_G1_NC:108case R_ARM_THM_ALU_ABS_G2_NC:109case R_ARM_THM_ALU_ABS_G3:110return R_ABS;111case R_ARM_THM_JUMP8:112case R_ARM_THM_JUMP11:113return R_PC;114case R_ARM_CALL:115case R_ARM_JUMP24:116case R_ARM_PC24:117case R_ARM_PLT32:118case R_ARM_PREL31:119case R_ARM_THM_JUMP19:120case R_ARM_THM_JUMP24:121case R_ARM_THM_CALL:122return R_PLT_PC;123case R_ARM_GOTOFF32:124// (S + A) - GOT_ORG125return R_GOTREL;126case R_ARM_GOT_BREL:127// GOT(S) + A - GOT_ORG128return R_GOT_OFF;129case R_ARM_GOT_PREL:130case R_ARM_TLS_IE32:131// GOT(S) + A - P132return R_GOT_PC;133case R_ARM_SBREL32:134return R_ARM_SBREL;135case R_ARM_TARGET1:136return config->target1Rel ? R_PC : R_ABS;137case R_ARM_TARGET2:138if (config->target2 == Target2Policy::Rel)139return R_PC;140if (config->target2 == Target2Policy::Abs)141return R_ABS;142return R_GOT_PC;143case R_ARM_TLS_GD32:144return R_TLSGD_PC;145case R_ARM_TLS_LDM32:146return R_TLSLD_PC;147case R_ARM_TLS_LDO32:148return R_DTPREL;149case R_ARM_BASE_PREL:150// B(S) + A - P151// FIXME: currently B(S) assumed to be .got, this may not hold for all152// platforms.153return R_GOTONLY_PC;154case R_ARM_MOVW_PREL_NC:155case R_ARM_MOVT_PREL:156case R_ARM_REL32:157case R_ARM_THM_MOVW_PREL_NC:158case R_ARM_THM_MOVT_PREL:159return R_PC;160case R_ARM_ALU_PC_G0:161case R_ARM_ALU_PC_G0_NC:162case R_ARM_ALU_PC_G1:163case R_ARM_ALU_PC_G1_NC:164case R_ARM_ALU_PC_G2:165case R_ARM_LDR_PC_G0:166case R_ARM_LDR_PC_G1:167case R_ARM_LDR_PC_G2:168case R_ARM_LDRS_PC_G0:169case R_ARM_LDRS_PC_G1:170case R_ARM_LDRS_PC_G2:171case R_ARM_THM_ALU_PREL_11_0:172case R_ARM_THM_PC8:173case R_ARM_THM_PC12:174return R_ARM_PCA;175case R_ARM_MOVW_BREL_NC:176case R_ARM_MOVW_BREL:177case R_ARM_MOVT_BREL:178case R_ARM_THM_MOVW_BREL_NC:179case R_ARM_THM_MOVW_BREL:180case R_ARM_THM_MOVT_BREL:181return R_ARM_SBREL;182case R_ARM_NONE:183return R_NONE;184case R_ARM_TLS_LE32:185return R_TPREL;186case R_ARM_V4BX:187// V4BX is just a marker to indicate there's a "bx rN" instruction at the188// given address. It can be used to implement a special linker mode which189// rewrites ARMv4T inputs to ARMv4. Since we support only ARMv4 input and190// not ARMv4 output, we can just ignore it.191return R_NONE;192default:193error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +194") against symbol " + toString(s));195return R_NONE;196}197}198199RelType ARM::getDynRel(RelType type) const {200if ((type == R_ARM_ABS32) || (type == R_ARM_TARGET1 && !config->target1Rel))201return R_ARM_ABS32;202return R_ARM_NONE;203}204205void ARM::writeGotPlt(uint8_t *buf, const Symbol &) const {206write32(buf, in.plt->getVA());207}208209void ARM::writeIgotPlt(uint8_t *buf, const Symbol &s) const {210// An ARM entry is the address of the ifunc resolver function.211write32(buf, s.getVA());212}213214// Long form PLT Header that does not have any restrictions on the displacement215// of the .plt from the .got.plt.216static void writePltHeaderLong(uint8_t *buf) {217write32(buf + 0, 0xe52de004); // str lr, [sp,#-4]!218write32(buf + 4, 0xe59fe004); // ldr lr, L2219write32(buf + 8, 0xe08fe00e); // L1: add lr, pc, lr220write32(buf + 12, 0xe5bef008); // ldr pc, [lr, #8]221write32(buf + 16, 0x00000000); // L2: .word &(.got.plt) - L1 - 8222write32(buf + 20, 0xd4d4d4d4); // Pad to 32-byte boundary223write32(buf + 24, 0xd4d4d4d4); // Pad to 32-byte boundary224write32(buf + 28, 0xd4d4d4d4);225uint64_t gotPlt = in.gotPlt->getVA();226uint64_t l1 = in.plt->getVA() + 8;227write32(buf + 16, gotPlt - l1 - 8);228}229230// True if we should use Thumb PLTs, which currently require Thumb2, and are231// only used if the target does not have the ARM ISA.232static bool useThumbPLTs() {233return config->armHasThumb2ISA && !config->armHasArmISA;234}235236// The default PLT header requires the .got.plt to be within 128 Mb of the237// .plt in the positive direction.238void ARM::writePltHeader(uint8_t *buf) const {239if (useThumbPLTs()) {240// The instruction sequence for thumb:241//242// 0: b500 push {lr}243// 2: f8df e008 ldr.w lr, [pc, #0x8] @ 0xe <func+0xe>244// 6: 44fe add lr, pc245// 8: f85e ff08 ldr pc, [lr, #8]!246// e: .word .got.plt - .plt - 16247//248// At 0x8, we want to jump to .got.plt, the -16 accounts for 8 bytes from249// `pc` in the add instruction and 8 bytes for the `lr` adjustment.250//251uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 16;252assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");253write16(buf + 0, 0xb500);254// Split into two halves to support endianness correctly.255write16(buf + 2, 0xf8df);256write16(buf + 4, 0xe008);257write16(buf + 6, 0x44fe);258// Split into two halves to support endianness correctly.259write16(buf + 8, 0xf85e);260write16(buf + 10, 0xff08);261write32(buf + 12, offset);262263memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary264memcpy(buf + 20, trapInstr.data(), 4);265memcpy(buf + 24, trapInstr.data(), 4);266memcpy(buf + 28, trapInstr.data(), 4);267} else {268// Use a similar sequence to that in writePlt(), the difference is the269// calling conventions mean we use lr instead of ip. The PLT entry is270// responsible for saving lr on the stack, the dynamic loader is responsible271// for reloading it.272const uint32_t pltData[] = {2730xe52de004, // L1: str lr, [sp,#-4]!2740xe28fe600, // add lr, pc, #0x0NN00000 &(.got.plt - L1 - 4)2750xe28eea00, // add lr, lr, #0x000NN000 &(.got.plt - L1 - 4)2760xe5bef000, // ldr pc, [lr, #0x00000NNN] &(.got.plt -L1 - 4)277};278279uint64_t offset = in.gotPlt->getVA() - in.plt->getVA() - 4;280if (!llvm::isUInt<27>(offset)) {281// We cannot encode the Offset, use the long form.282writePltHeaderLong(buf);283return;284}285write32(buf + 0, pltData[0]);286write32(buf + 4, pltData[1] | ((offset >> 20) & 0xff));287write32(buf + 8, pltData[2] | ((offset >> 12) & 0xff));288write32(buf + 12, pltData[3] | (offset & 0xfff));289memcpy(buf + 16, trapInstr.data(), 4); // Pad to 32-byte boundary290memcpy(buf + 20, trapInstr.data(), 4);291memcpy(buf + 24, trapInstr.data(), 4);292memcpy(buf + 28, trapInstr.data(), 4);293}294}295296void ARM::addPltHeaderSymbols(InputSection &isec) const {297if (useThumbPLTs()) {298addSyntheticLocal("$t", STT_NOTYPE, 0, 0, isec);299addSyntheticLocal("$d", STT_NOTYPE, 12, 0, isec);300} else {301addSyntheticLocal("$a", STT_NOTYPE, 0, 0, isec);302addSyntheticLocal("$d", STT_NOTYPE, 16, 0, isec);303}304}305306// Long form PLT entries that do not have any restrictions on the displacement307// of the .plt from the .got.plt.308static void writePltLong(uint8_t *buf, uint64_t gotPltEntryAddr,309uint64_t pltEntryAddr) {310write32(buf + 0, 0xe59fc004); // ldr ip, L2311write32(buf + 4, 0xe08cc00f); // L1: add ip, ip, pc312write32(buf + 8, 0xe59cf000); // ldr pc, [ip]313write32(buf + 12, 0x00000000); // L2: .word Offset(&(.got.plt) - L1 - 8314uint64_t l1 = pltEntryAddr + 4;315write32(buf + 12, gotPltEntryAddr - l1 - 8);316}317318// The default PLT entries require the .got.plt to be within 128 Mb of the319// .plt in the positive direction.320void ARM::writePlt(uint8_t *buf, const Symbol &sym,321uint64_t pltEntryAddr) const {322323if (!useThumbPLTs()) {324uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 8;325326// The PLT entry is similar to the example given in Appendix A of ELF for327// the Arm Architecture. Instead of using the Group Relocations to find the328// optimal rotation for the 8-bit immediate used in the add instructions we329// hard code the most compact rotations for simplicity. This saves a load330// instruction over the long plt sequences.331const uint32_t pltData[] = {3320xe28fc600, // L1: add ip, pc, #0x0NN00000 Offset(&(.got.plt) - L1 - 83330xe28cca00, // add ip, ip, #0x000NN000 Offset(&(.got.plt) - L1 - 83340xe5bcf000, // ldr pc, [ip, #0x00000NNN] Offset(&(.got.plt) - L1 - 8335};336if (!llvm::isUInt<27>(offset)) {337// We cannot encode the Offset, use the long form.338writePltLong(buf, sym.getGotPltVA(), pltEntryAddr);339return;340}341write32(buf + 0, pltData[0] | ((offset >> 20) & 0xff));342write32(buf + 4, pltData[1] | ((offset >> 12) & 0xff));343write32(buf + 8, pltData[2] | (offset & 0xfff));344memcpy(buf + 12, trapInstr.data(), 4); // Pad to 16-byte boundary345} else {346uint64_t offset = sym.getGotPltVA() - pltEntryAddr - 12;347assert(llvm::isUInt<32>(offset) && "This should always fit into a 32-bit offset");348349// A PLT entry will be:350//351// movw ip, #<lower 16 bits>352// movt ip, #<upper 16 bits>353// add ip, pc354// L1: ldr.w pc, [ip]355// b L1356//357// where ip = r12 = 0xc358359// movw ip, #<lower 16 bits>360write16(buf + 2, 0x0c00); // use `ip`361relocateNoSym(buf, R_ARM_THM_MOVW_ABS_NC, offset);362363// movt ip, #<upper 16 bits>364write16(buf + 6, 0x0c00); // use `ip`365relocateNoSym(buf + 4, R_ARM_THM_MOVT_ABS, offset);366367write16(buf + 8, 0x44fc); // add ip, pc368write16(buf + 10, 0xf8dc); // ldr.w pc, [ip] (bottom half)369write16(buf + 12, 0xf000); // ldr.w pc, [ip] (upper half)370write16(buf + 14, 0xe7fc); // Branch to previous instruction371}372}373374void ARM::addPltSymbols(InputSection &isec, uint64_t off) const {375if (useThumbPLTs()) {376addSyntheticLocal("$t", STT_NOTYPE, off, 0, isec);377} else {378addSyntheticLocal("$a", STT_NOTYPE, off, 0, isec);379addSyntheticLocal("$d", STT_NOTYPE, off + 12, 0, isec);380}381}382383bool ARM::needsThunk(RelExpr expr, RelType type, const InputFile *file,384uint64_t branchAddr, const Symbol &s,385int64_t a) const {386// If s is an undefined weak symbol and does not have a PLT entry then it will387// be resolved as a branch to the next instruction. If it is hidden, its388// binding has been converted to local, so we just check isUndefined() here. A389// undefined non-weak symbol will have been errored.390if (s.isUndefined() && !s.isInPlt())391return false;392// A state change from ARM to Thumb and vice versa must go through an393// interworking thunk if the relocation type is not R_ARM_CALL or394// R_ARM_THM_CALL.395switch (type) {396case R_ARM_PC24:397case R_ARM_PLT32:398case R_ARM_JUMP24:399// Source is ARM, all PLT entries are ARM so no interworking required.400// Otherwise we need to interwork if STT_FUNC Symbol has bit 0 set (Thumb).401assert(!useThumbPLTs() &&402"If the source is ARM, we should not need Thumb PLTs");403if (s.isFunc() && expr == R_PC && (s.getVA() & 1))404return true;405[[fallthrough]];406case R_ARM_CALL: {407uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA();408return !inBranchRange(type, branchAddr, dst + a) ||409(!config->armHasBlx && (s.getVA() & 1));410}411case R_ARM_THM_JUMP19:412case R_ARM_THM_JUMP24:413// Source is Thumb, when all PLT entries are ARM interworking is required.414// Otherwise we need to interwork if STT_FUNC Symbol has bit 0 clear (ARM).415if ((expr == R_PLT_PC && !useThumbPLTs()) ||416(s.isFunc() && (s.getVA() & 1) == 0))417return true;418[[fallthrough]];419case R_ARM_THM_CALL: {420uint64_t dst = (expr == R_PLT_PC) ? s.getPltVA() : s.getVA();421return !inBranchRange(type, branchAddr, dst + a) ||422(!config->armHasBlx && (s.getVA() & 1) == 0);;423}424}425return false;426}427428uint32_t ARM::getThunkSectionSpacing() const {429// The placing of pre-created ThunkSections is controlled by the value430// thunkSectionSpacing returned by getThunkSectionSpacing(). The aim is to431// place the ThunkSection such that all branches from the InputSections432// prior to the ThunkSection can reach a Thunk placed at the end of the433// ThunkSection. Graphically:434// | up to thunkSectionSpacing .text input sections |435// | ThunkSection |436// | up to thunkSectionSpacing .text input sections |437// | ThunkSection |438439// Pre-created ThunkSections are spaced roughly 16MiB apart on ARMv7. This440// is to match the most common expected case of a Thumb 2 encoded BL, BLX or441// B.W:442// ARM B, BL, BLX range +/- 32MiB443// Thumb B.W, BL, BLX range +/- 16MiB444// Thumb B<cc>.W range +/- 1MiB445// If a branch cannot reach a pre-created ThunkSection a new one will be446// created so we can handle the rare cases of a Thumb 2 conditional branch.447// We intentionally use a lower size for thunkSectionSpacing than the maximum448// branch range so the end of the ThunkSection is more likely to be within449// range of the branch instruction that is furthest away. The value we shorten450// thunkSectionSpacing by is set conservatively to allow us to create 16,384451// 12 byte Thunks at any offset in a ThunkSection without risk of a branch to452// one of the Thunks going out of range.453454// On Arm the thunkSectionSpacing depends on the range of the Thumb Branch455// range. On earlier Architectures such as ARMv4, ARMv5 and ARMv6 (except456// ARMv6T2) the range is +/- 4MiB.457458return (config->armJ1J2BranchEncoding) ? 0x1000000 - 0x30000459: 0x400000 - 0x7500;460}461462bool ARM::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {463if ((dst & 0x1) == 0)464// Destination is ARM, if ARM caller then Src is already 4-byte aligned.465// If Thumb Caller (BLX) the Src address has bottom 2 bits cleared to ensure466// destination will be 4 byte aligned.467src &= ~0x3;468else469// Bit 0 == 1 denotes Thumb state, it is not part of the range.470dst &= ~0x1;471472int64_t offset = dst - src;473switch (type) {474case R_ARM_PC24:475case R_ARM_PLT32:476case R_ARM_JUMP24:477case R_ARM_CALL:478return llvm::isInt<26>(offset);479case R_ARM_THM_JUMP19:480return llvm::isInt<21>(offset);481case R_ARM_THM_JUMP24:482case R_ARM_THM_CALL:483return config->armJ1J2BranchEncoding ? llvm::isInt<25>(offset)484: llvm::isInt<23>(offset);485default:486return true;487}488}489490// Helper to produce message text when LLD detects that a CALL relocation to491// a non STT_FUNC symbol that may result in incorrect interworking between ARM492// or Thumb.493static void stateChangeWarning(uint8_t *loc, RelType relt, const Symbol &s) {494assert(!s.isFunc());495const ErrorPlace place = getErrorPlace(loc);496std::string hint;497if (!place.srcLoc.empty())498hint = "; " + place.srcLoc;499if (s.isSection()) {500// Section symbols must be defined and in a section. Users cannot change501// the type. Use the section name as getName() returns an empty string.502warn(place.loc + "branch and link relocation: " + toString(relt) +503" to STT_SECTION symbol " + cast<Defined>(s).section->name +504" ; interworking not performed" + hint);505} else {506// Warn with hint on how to alter the symbol type.507warn(getErrorLocation(loc) + "branch and link relocation: " +508toString(relt) + " to non STT_FUNC symbol: " + s.getName() +509" interworking not performed; consider using directive '.type " +510s.getName() +511", %function' to give symbol type STT_FUNC if interworking between "512"ARM and Thumb is required" +513hint);514}515}516517// Rotate a 32-bit unsigned value right by a specified amt of bits.518static uint32_t rotr32(uint32_t val, uint32_t amt) {519assert(amt < 32 && "Invalid rotate amount");520return (val >> amt) | (val << ((32 - amt) & 31));521}522523static std::pair<uint32_t, uint32_t> getRemAndLZForGroup(unsigned group,524uint32_t val) {525uint32_t rem, lz;526do {527lz = llvm::countl_zero(val) & ~1;528rem = val;529if (lz == 32) // implies rem == 0530break;531val &= 0xffffff >> lz;532} while (group--);533return {rem, lz};534}535536static void encodeAluGroup(uint8_t *loc, const Relocation &rel, uint64_t val,537int group, bool check) {538// ADD/SUB (immediate) add = bit23, sub = bit22539// immediate field carries is a 12-bit modified immediate, made up of a 4-bit540// even rotate right and an 8-bit immediate.541uint32_t opcode = 0x00800000;542if (val >> 63) {543opcode = 0x00400000;544val = -val;545}546uint32_t imm, lz;547std::tie(imm, lz) = getRemAndLZForGroup(group, val);548uint32_t rot = 0;549if (lz < 24) {550imm = rotr32(imm, 24 - lz);551rot = (lz + 8) << 7;552}553if (check && imm > 0xff)554error(getErrorLocation(loc) + "unencodeable immediate " + Twine(val).str() +555" for relocation " + toString(rel.type));556write32(loc, (read32(loc) & 0xff3ff000) | opcode | rot | (imm & 0xff));557}558559static void encodeLdrGroup(uint8_t *loc, const Relocation &rel, uint64_t val,560int group) {561// R_ARM_LDR_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a562// function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear563// bottom bit to recover S + A - P.564if (rel.sym->isFunc())565val &= ~0x1;566// LDR (literal) u = bit23567uint32_t opcode = 0x00800000;568if (val >> 63) {569opcode = 0x0;570val = -val;571}572uint32_t imm = getRemAndLZForGroup(group, val).first;573checkUInt(loc, imm, 12, rel);574write32(loc, (read32(loc) & 0xff7ff000) | opcode | imm);575}576577static void encodeLdrsGroup(uint8_t *loc, const Relocation &rel, uint64_t val,578int group) {579// R_ARM_LDRS_PC_Gn is S + A - P, we have ((S + A) | T) - P, if S is a580// function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear581// bottom bit to recover S + A - P.582if (rel.sym->isFunc())583val &= ~0x1;584// LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23585uint32_t opcode = 0x00800000;586if (val >> 63) {587opcode = 0x0;588val = -val;589}590uint32_t imm = getRemAndLZForGroup(group, val).first;591checkUInt(loc, imm, 8, rel);592write32(loc, (read32(loc) & 0xff7ff0f0) | opcode | ((imm & 0xf0) << 4) |593(imm & 0xf));594}595596void ARM::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {597switch (rel.type) {598case R_ARM_ABS32:599case R_ARM_BASE_PREL:600case R_ARM_GOTOFF32:601case R_ARM_GOT_BREL:602case R_ARM_GOT_PREL:603case R_ARM_REL32:604case R_ARM_RELATIVE:605case R_ARM_SBREL32:606case R_ARM_TARGET1:607case R_ARM_TARGET2:608case R_ARM_TLS_GD32:609case R_ARM_TLS_IE32:610case R_ARM_TLS_LDM32:611case R_ARM_TLS_LDO32:612case R_ARM_TLS_LE32:613case R_ARM_TLS_TPOFF32:614case R_ARM_TLS_DTPOFF32:615write32(loc, val);616break;617case R_ARM_PREL31:618checkInt(loc, val, 31, rel);619write32(loc, (read32(loc) & 0x80000000) | (val & ~0x80000000));620break;621case R_ARM_CALL: {622// R_ARM_CALL is used for BL and BLX instructions, for symbols of type623// STT_FUNC we choose whether to write a BL or BLX depending on the624// value of bit 0 of Val. With bit 0 == 1 denoting Thumb. If the symbol is625// not of type STT_FUNC then we must preserve the original instruction.626assert(rel.sym); // R_ARM_CALL is always reached via relocate().627bool bit0Thumb = val & 1;628bool isBlx = (read32(loc) & 0xfe000000) == 0xfa000000;629// lld 10.0 and before always used bit0Thumb when deciding to write a BLX630// even when type not STT_FUNC.631if (!rel.sym->isFunc() && isBlx != bit0Thumb)632stateChangeWarning(loc, rel.type, *rel.sym);633if (rel.sym->isFunc() ? bit0Thumb : isBlx) {634// The BLX encoding is 0xfa:H:imm24 where Val = imm24:H:'1'635checkInt(loc, val, 26, rel);636write32(loc, 0xfa000000 | // opcode637((val & 2) << 23) | // H638((val >> 2) & 0x00ffffff)); // imm24639break;640}641// BLX (always unconditional) instruction to an ARM Target, select an642// unconditional BL.643write32(loc, 0xeb000000 | (read32(loc) & 0x00ffffff));644// fall through as BL encoding is shared with B645}646[[fallthrough]];647case R_ARM_JUMP24:648case R_ARM_PC24:649case R_ARM_PLT32:650checkInt(loc, val, 26, rel);651write32(loc, (read32(loc) & ~0x00ffffff) | ((val >> 2) & 0x00ffffff));652break;653case R_ARM_THM_JUMP8:654// We do a 9 bit check because val is right-shifted by 1 bit.655checkInt(loc, val, 9, rel);656write16(loc, (read32(loc) & 0xff00) | ((val >> 1) & 0x00ff));657break;658case R_ARM_THM_JUMP11:659// We do a 12 bit check because val is right-shifted by 1 bit.660checkInt(loc, val, 12, rel);661write16(loc, (read32(loc) & 0xf800) | ((val >> 1) & 0x07ff));662break;663case R_ARM_THM_JUMP19:664// Encoding T3: Val = S:J2:J1:imm6:imm11:0665checkInt(loc, val, 21, rel);666write16(loc,667(read16(loc) & 0xfbc0) | // opcode cond668((val >> 10) & 0x0400) | // S669((val >> 12) & 0x003f)); // imm6670write16(loc + 2,6710x8000 | // opcode672((val >> 8) & 0x0800) | // J2673((val >> 5) & 0x2000) | // J1674((val >> 1) & 0x07ff)); // imm11675break;676case R_ARM_THM_CALL: {677// R_ARM_THM_CALL is used for BL and BLX instructions, for symbols of type678// STT_FUNC we choose whether to write a BL or BLX depending on the679// value of bit 0 of Val. With bit 0 == 0 denoting ARM, if the symbol is680// not of type STT_FUNC then we must preserve the original instruction.681// PLT entries are always ARM state so we know we need to interwork.682assert(rel.sym); // R_ARM_THM_CALL is always reached via relocate().683bool bit0Thumb = val & 1;684bool useThumb = bit0Thumb || useThumbPLTs();685bool isBlx = (read16(loc + 2) & 0x1000) == 0;686// lld 10.0 and before always used bit0Thumb when deciding to write a BLX687// even when type not STT_FUNC.688if (!rel.sym->isFunc() && !rel.sym->isInPlt() && isBlx == useThumb)689stateChangeWarning(loc, rel.type, *rel.sym);690if ((rel.sym->isFunc() || rel.sym->isInPlt()) ? !useThumb : isBlx) {691// We are writing a BLX. Ensure BLX destination is 4-byte aligned. As692// the BLX instruction may only be two byte aligned. This must be done693// before overflow check.694val = alignTo(val, 4);695write16(loc + 2, read16(loc + 2) & ~0x1000);696} else {697write16(loc + 2, (read16(loc + 2) & ~0x1000) | 1 << 12);698}699if (!config->armJ1J2BranchEncoding) {700// Older Arm architectures do not support R_ARM_THM_JUMP24 and have701// different encoding rules and range due to J1 and J2 always being 1.702checkInt(loc, val, 23, rel);703write16(loc,7040xf000 | // opcode705((val >> 12) & 0x07ff)); // imm11706write16(loc + 2,707(read16(loc + 2) & 0xd000) | // opcode7080x2800 | // J1 == J2 == 1709((val >> 1) & 0x07ff)); // imm11710break;711}712}713// Fall through as rest of encoding is the same as B.W714[[fallthrough]];715case R_ARM_THM_JUMP24:716// Encoding B T4, BL T1, BLX T2: Val = S:I1:I2:imm10:imm11:0717checkInt(loc, val, 25, rel);718write16(loc,7190xf000 | // opcode720((val >> 14) & 0x0400) | // S721((val >> 12) & 0x03ff)); // imm10722write16(loc + 2,723(read16(loc + 2) & 0xd000) | // opcode724(((~(val >> 10)) ^ (val >> 11)) & 0x2000) | // J1725(((~(val >> 11)) ^ (val >> 13)) & 0x0800) | // J2726((val >> 1) & 0x07ff)); // imm11727break;728case R_ARM_MOVW_ABS_NC:729case R_ARM_MOVW_PREL_NC:730case R_ARM_MOVW_BREL_NC:731write32(loc, (read32(loc) & ~0x000f0fff) | ((val & 0xf000) << 4) |732(val & 0x0fff));733break;734case R_ARM_MOVT_ABS:735case R_ARM_MOVT_PREL:736case R_ARM_MOVT_BREL:737write32(loc, (read32(loc) & ~0x000f0fff) |738(((val >> 16) & 0xf000) << 4) | ((val >> 16) & 0xfff));739break;740case R_ARM_THM_MOVT_ABS:741case R_ARM_THM_MOVT_PREL:742case R_ARM_THM_MOVT_BREL:743// Encoding T1: A = imm4:i:imm3:imm8744745write16(loc,7460xf2c0 | // opcode747((val >> 17) & 0x0400) | // i748((val >> 28) & 0x000f)); // imm4749750write16(loc + 2,751(read16(loc + 2) & 0x8f00) | // opcode752((val >> 12) & 0x7000) | // imm3753((val >> 16) & 0x00ff)); // imm8754break;755case R_ARM_THM_MOVW_ABS_NC:756case R_ARM_THM_MOVW_PREL_NC:757case R_ARM_THM_MOVW_BREL_NC:758// Encoding T3: A = imm4:i:imm3:imm8759write16(loc,7600xf240 | // opcode761((val >> 1) & 0x0400) | // i762((val >> 12) & 0x000f)); // imm4763write16(loc + 2,764(read16(loc + 2) & 0x8f00) | // opcode765((val << 4) & 0x7000) | // imm3766(val & 0x00ff)); // imm8767break;768case R_ARM_THM_ALU_ABS_G3:769write16(loc, (read16(loc) &~ 0x00ff) | ((val >> 24) & 0x00ff));770break;771case R_ARM_THM_ALU_ABS_G2_NC:772write16(loc, (read16(loc) &~ 0x00ff) | ((val >> 16) & 0x00ff));773break;774case R_ARM_THM_ALU_ABS_G1_NC:775write16(loc, (read16(loc) &~ 0x00ff) | ((val >> 8) & 0x00ff));776break;777case R_ARM_THM_ALU_ABS_G0_NC:778write16(loc, (read16(loc) &~ 0x00ff) | (val & 0x00ff));779break;780case R_ARM_ALU_PC_G0:781encodeAluGroup(loc, rel, val, 0, true);782break;783case R_ARM_ALU_PC_G0_NC:784encodeAluGroup(loc, rel, val, 0, false);785break;786case R_ARM_ALU_PC_G1:787encodeAluGroup(loc, rel, val, 1, true);788break;789case R_ARM_ALU_PC_G1_NC:790encodeAluGroup(loc, rel, val, 1, false);791break;792case R_ARM_ALU_PC_G2:793encodeAluGroup(loc, rel, val, 2, true);794break;795case R_ARM_LDR_PC_G0:796encodeLdrGroup(loc, rel, val, 0);797break;798case R_ARM_LDR_PC_G1:799encodeLdrGroup(loc, rel, val, 1);800break;801case R_ARM_LDR_PC_G2:802encodeLdrGroup(loc, rel, val, 2);803break;804case R_ARM_LDRS_PC_G0:805encodeLdrsGroup(loc, rel, val, 0);806break;807case R_ARM_LDRS_PC_G1:808encodeLdrsGroup(loc, rel, val, 1);809break;810case R_ARM_LDRS_PC_G2:811encodeLdrsGroup(loc, rel, val, 2);812break;813case R_ARM_THM_ALU_PREL_11_0: {814// ADR encoding T2 (sub), T3 (add) i:imm3:imm8815int64_t imm = val;816uint16_t sub = 0;817if (imm < 0) {818imm = -imm;819sub = 0x00a0;820}821checkUInt(loc, imm, 12, rel);822write16(loc, (read16(loc) & 0xfb0f) | sub | (imm & 0x800) >> 1);823write16(loc + 2,824(read16(loc + 2) & 0x8f00) | (imm & 0x700) << 4 | (imm & 0xff));825break;826}827case R_ARM_THM_PC8:828// ADR and LDR literal encoding T1 positive offset only imm8:00829// R_ARM_THM_PC8 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a830// function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear831// bottom bit to recover S + A - Pa.832if (rel.sym->isFunc())833val &= ~0x1;834checkUInt(loc, val, 10, rel);835checkAlignment(loc, val, 4, rel);836write16(loc, (read16(loc) & 0xff00) | (val & 0x3fc) >> 2);837break;838case R_ARM_THM_PC12: {839// LDR (literal) encoding T2, add = (U == '1') imm12840// imm12 is unsigned841// R_ARM_THM_PC12 is S + A - Pa, we have ((S + A) | T) - Pa, if S is a842// function then addr is 0 (modulo 2) and Pa is 0 (modulo 4) so we can clear843// bottom bit to recover S + A - Pa.844if (rel.sym->isFunc())845val &= ~0x1;846int64_t imm12 = val;847uint16_t u = 0x0080;848if (imm12 < 0) {849imm12 = -imm12;850u = 0;851}852checkUInt(loc, imm12, 12, rel);853write16(loc, read16(loc) | u);854write16(loc + 2, (read16(loc + 2) & 0xf000) | imm12);855break;856}857default:858llvm_unreachable("unknown relocation");859}860}861862int64_t ARM::getImplicitAddend(const uint8_t *buf, RelType type) const {863switch (type) {864default:865internalLinkerError(getErrorLocation(buf),866"cannot read addend for relocation " + toString(type));867return 0;868case R_ARM_ABS32:869case R_ARM_BASE_PREL:870case R_ARM_GLOB_DAT:871case R_ARM_GOTOFF32:872case R_ARM_GOT_BREL:873case R_ARM_GOT_PREL:874case R_ARM_IRELATIVE:875case R_ARM_REL32:876case R_ARM_RELATIVE:877case R_ARM_SBREL32:878case R_ARM_TARGET1:879case R_ARM_TARGET2:880case R_ARM_TLS_DTPMOD32:881case R_ARM_TLS_DTPOFF32:882case R_ARM_TLS_GD32:883case R_ARM_TLS_IE32:884case R_ARM_TLS_LDM32:885case R_ARM_TLS_LE32:886case R_ARM_TLS_LDO32:887case R_ARM_TLS_TPOFF32:888return SignExtend64<32>(read32(buf));889case R_ARM_PREL31:890return SignExtend64<31>(read32(buf));891case R_ARM_CALL:892case R_ARM_JUMP24:893case R_ARM_PC24:894case R_ARM_PLT32:895return SignExtend64<26>(read32(buf) << 2);896case R_ARM_THM_JUMP8:897return SignExtend64<9>(read16(buf) << 1);898case R_ARM_THM_JUMP11:899return SignExtend64<12>(read16(buf) << 1);900case R_ARM_THM_JUMP19: {901// Encoding T3: A = S:J2:J1:imm10:imm6:0902uint16_t hi = read16(buf);903uint16_t lo = read16(buf + 2);904return SignExtend64<20>(((hi & 0x0400) << 10) | // S905((lo & 0x0800) << 8) | // J2906((lo & 0x2000) << 5) | // J1907((hi & 0x003f) << 12) | // imm6908((lo & 0x07ff) << 1)); // imm11:0909}910case R_ARM_THM_CALL:911if (!config->armJ1J2BranchEncoding) {912// Older Arm architectures do not support R_ARM_THM_JUMP24 and have913// different encoding rules and range due to J1 and J2 always being 1.914uint16_t hi = read16(buf);915uint16_t lo = read16(buf + 2);916return SignExtend64<22>(((hi & 0x7ff) << 12) | // imm11917((lo & 0x7ff) << 1)); // imm11:0918break;919}920[[fallthrough]];921case R_ARM_THM_JUMP24: {922// Encoding B T4, BL T1, BLX T2: A = S:I1:I2:imm10:imm11:0923// I1 = NOT(J1 EOR S), I2 = NOT(J2 EOR S)924uint16_t hi = read16(buf);925uint16_t lo = read16(buf + 2);926return SignExtend64<24>(((hi & 0x0400) << 14) | // S927(~((lo ^ (hi << 3)) << 10) & 0x00800000) | // I1928(~((lo ^ (hi << 1)) << 11) & 0x00400000) | // I2929((hi & 0x003ff) << 12) | // imm0930((lo & 0x007ff) << 1)); // imm11:0931}932// ELF for the ARM Architecture 4.6.1.1 the implicit addend for MOVW and933// MOVT is in the range -32768 <= A < 32768934case R_ARM_MOVW_ABS_NC:935case R_ARM_MOVT_ABS:936case R_ARM_MOVW_PREL_NC:937case R_ARM_MOVT_PREL:938case R_ARM_MOVW_BREL_NC:939case R_ARM_MOVT_BREL: {940uint64_t val = read32(buf) & 0x000f0fff;941return SignExtend64<16>(((val & 0x000f0000) >> 4) | (val & 0x00fff));942}943case R_ARM_THM_MOVW_ABS_NC:944case R_ARM_THM_MOVT_ABS:945case R_ARM_THM_MOVW_PREL_NC:946case R_ARM_THM_MOVT_PREL:947case R_ARM_THM_MOVW_BREL_NC:948case R_ARM_THM_MOVT_BREL: {949// Encoding T3: A = imm4:i:imm3:imm8950uint16_t hi = read16(buf);951uint16_t lo = read16(buf + 2);952return SignExtend64<16>(((hi & 0x000f) << 12) | // imm4953((hi & 0x0400) << 1) | // i954((lo & 0x7000) >> 4) | // imm3955(lo & 0x00ff)); // imm8956}957case R_ARM_THM_ALU_ABS_G0_NC:958case R_ARM_THM_ALU_ABS_G1_NC:959case R_ARM_THM_ALU_ABS_G2_NC:960case R_ARM_THM_ALU_ABS_G3:961return read16(buf) & 0xff;962case R_ARM_ALU_PC_G0:963case R_ARM_ALU_PC_G0_NC:964case R_ARM_ALU_PC_G1:965case R_ARM_ALU_PC_G1_NC:966case R_ARM_ALU_PC_G2: {967// 12-bit immediate is a modified immediate made up of a 4-bit even968// right rotation and 8-bit constant. After the rotation the value969// is zero-extended. When bit 23 is set the instruction is an add, when970// bit 22 is set it is a sub.971uint32_t instr = read32(buf);972uint32_t val = rotr32(instr & 0xff, ((instr & 0xf00) >> 8) * 2);973return (instr & 0x00400000) ? -val : val;974}975case R_ARM_LDR_PC_G0:976case R_ARM_LDR_PC_G1:977case R_ARM_LDR_PC_G2: {978// ADR (literal) add = bit23, sub = bit22979// LDR (literal) u = bit23 unsigned imm12980bool u = read32(buf) & 0x00800000;981uint32_t imm12 = read32(buf) & 0xfff;982return u ? imm12 : -imm12;983}984case R_ARM_LDRS_PC_G0:985case R_ARM_LDRS_PC_G1:986case R_ARM_LDRS_PC_G2: {987// LDRD/LDRH/LDRSB/LDRSH (literal) u = bit23 unsigned imm8988uint32_t opcode = read32(buf);989bool u = opcode & 0x00800000;990uint32_t imm4l = opcode & 0xf;991uint32_t imm4h = (opcode & 0xf00) >> 4;992return u ? (imm4h | imm4l) : -(imm4h | imm4l);993}994case R_ARM_THM_ALU_PREL_11_0: {995// Thumb2 ADR, which is an alias for a sub or add instruction with an996// unsigned immediate.997// ADR encoding T2 (sub), T3 (add) i:imm3:imm8998uint16_t hi = read16(buf);999uint16_t lo = read16(buf + 2);1000uint64_t imm = (hi & 0x0400) << 1 | // i1001(lo & 0x7000) >> 4 | // imm31002(lo & 0x00ff); // imm81003// For sub, addend is negative, add is positive.1004return (hi & 0x00f0) ? -imm : imm;1005}1006case R_ARM_THM_PC8:1007// ADR and LDR (literal) encoding T11008// From ELF for the ARM Architecture the initial signed addend is formed1009// from an unsigned field using expression (((imm8:00 + 4) & 0x3ff) – 4)1010// this trick permits the PC bias of -4 to be encoded using imm8 = 0xff1011return ((((read16(buf) & 0xff) << 2) + 4) & 0x3ff) - 4;1012case R_ARM_THM_PC12: {1013// LDR (literal) encoding T2, add = (U == '1') imm121014bool u = read16(buf) & 0x0080;1015uint64_t imm12 = read16(buf + 2) & 0x0fff;1016return u ? imm12 : -imm12;1017}1018case R_ARM_NONE:1019case R_ARM_V4BX:1020case R_ARM_JUMP_SLOT:1021// These relocations are defined as not having an implicit addend.1022return 0;1023}1024}10251026static bool isArmMapSymbol(const Symbol *b) {1027return b->getName() == "$a" || b->getName().starts_with("$a.");1028}10291030static bool isThumbMapSymbol(const Symbol *s) {1031return s->getName() == "$t" || s->getName().starts_with("$t.");1032}10331034static bool isDataMapSymbol(const Symbol *b) {1035return b->getName() == "$d" || b->getName().starts_with("$d.");1036}10371038void elf::sortArmMappingSymbols() {1039// For each input section make sure the mapping symbols are sorted in1040// ascending order.1041for (auto &kv : sectionMap) {1042SmallVector<const Defined *, 0> &mapSyms = kv.second;1043llvm::stable_sort(mapSyms, [](const Defined *a, const Defined *b) {1044return a->value < b->value;1045});1046}1047}10481049void elf::addArmInputSectionMappingSymbols() {1050// Collect mapping symbols for every executable input sections.1051// The linker generated mapping symbols for all the synthetic1052// sections are adding into the sectionmap through the function1053// addArmSyntheitcSectionMappingSymbol.1054for (ELFFileBase *file : ctx.objectFiles) {1055for (Symbol *sym : file->getLocalSymbols()) {1056auto *def = dyn_cast<Defined>(sym);1057if (!def)1058continue;1059if (!isArmMapSymbol(def) && !isDataMapSymbol(def) &&1060!isThumbMapSymbol(def))1061continue;1062if (auto *sec = cast_if_present<InputSection>(def->section))1063if (sec->flags & SHF_EXECINSTR)1064sectionMap[sec].push_back(def);1065}1066}1067}10681069// Synthetic sections are not backed by an ELF file where we can access the1070// symbol table, instead mapping symbols added to synthetic sections are stored1071// in the synthetic symbol table. Due to the presence of strip (--strip-all),1072// we can not rely on the synthetic symbol table retaining the mapping symbols.1073// Instead we record the mapping symbols locally.1074void elf::addArmSyntheticSectionMappingSymbol(Defined *sym) {1075if (!isArmMapSymbol(sym) && !isDataMapSymbol(sym) && !isThumbMapSymbol(sym))1076return;1077if (auto *sec = cast_if_present<InputSection>(sym->section))1078if (sec->flags & SHF_EXECINSTR)1079sectionMap[sec].push_back(sym);1080}10811082static void toLittleEndianInstructions(uint8_t *buf, uint64_t start,1083uint64_t end, uint64_t width) {1084CodeState curState = static_cast<CodeState>(width);1085if (curState == CodeState::Arm)1086for (uint64_t i = start; i < end; i += width)1087write32le(buf + i, read32(buf + i));10881089if (curState == CodeState::Thumb)1090for (uint64_t i = start; i < end; i += width)1091write16le(buf + i, read16(buf + i));1092}10931094// Arm BE8 big endian format requires instructions to be little endian, with1095// the initial contents big-endian. Convert the big-endian instructions to1096// little endian leaving literal data untouched. We use mapping symbols to1097// identify half open intervals of Arm code [$a, non $a) and Thumb code1098// [$t, non $t) and convert these to little endian a word or half word at a1099// time respectively.1100void elf::convertArmInstructionstoBE8(InputSection *sec, uint8_t *buf) {1101if (!sectionMap.contains(sec))1102return;11031104SmallVector<const Defined *, 0> &mapSyms = sectionMap[sec];11051106if (mapSyms.empty())1107return;11081109CodeState curState = CodeState::Data;1110uint64_t start = 0, width = 0, size = sec->getSize();1111for (auto &msym : mapSyms) {1112CodeState newState = CodeState::Data;1113if (isThumbMapSymbol(msym))1114newState = CodeState::Thumb;1115else if (isArmMapSymbol(msym))1116newState = CodeState::Arm;11171118if (newState == curState)1119continue;11201121if (curState != CodeState::Data) {1122width = static_cast<uint64_t>(curState);1123toLittleEndianInstructions(buf, start, msym->value, width);1124}1125start = msym->value;1126curState = newState;1127}11281129// Passed last mapping symbol, may need to reverse1130// up to end of section.1131if (curState != CodeState::Data) {1132width = static_cast<uint64_t>(curState);1133toLittleEndianInstructions(buf, start, size, width);1134}1135}11361137// The Arm Cortex-M Security Extensions (CMSE) splits a system into two parts;1138// the non-secure and secure states with the secure state inaccessible from the1139// non-secure state, apart from an area of memory in secure state called the1140// secure gateway which is accessible from non-secure state. The secure gateway1141// contains one or more entry points which must start with a landing pad1142// instruction SG. Arm recommends that the secure gateway consists only of1143// secure gateway veneers, which are made up of a SG instruction followed by a1144// branch to the destination in secure state. Full details can be found in Arm1145// v8-M Security Extensions Requirements on Development Tools.1146//1147// The CMSE model of software development requires the non-secure and secure1148// states to be developed as two separate programs. The non-secure developer is1149// provided with an import library defining symbols describing the entry points1150// in the secure gateway. No additional linker support is required for the1151// non-secure state.1152//1153// Development of the secure state requires linker support to manage the secure1154// gateway veneers. The management consists of:1155// - Creation of new secure gateway veneers based on symbol conventions.1156// - Checking the address of existing secure gateway veneers.1157// - Warning when existing secure gateway veneers removed.1158//1159// The secure gateway veneers are created in an import library, which is just an1160// ELF object with a symbol table. The import library is controlled by two1161// command line options:1162// --in-implib (specify an input import library from a previous revision of the1163// program).1164// --out-implib (specify an output import library to be created by the linker).1165//1166// The input import library is used to manage consistency of the secure entry1167// points. The output import library is for new and updated secure entry points.1168//1169// The symbol convention that identifies secure entry functions is the prefix1170// __acle_se_ for a symbol called name the linker is expected to create a secure1171// gateway veneer if symbols __acle_se_name and name have the same address.1172// After creating a secure gateway veneer the symbol name labels the secure1173// gateway veneer and the __acle_se_name labels the function definition.1174//1175// The LLD implementation:1176// - Reads an existing import library with importCmseSymbols().1177// - Determines which new secure gateway veneers to create and redirects calls1178// within the secure state to the __acle_se_ prefixed symbol with1179// processArmCmseSymbols().1180// - Models the SG veneers as a synthetic section.11811182// Initialize symbols. symbols is a parallel array to the corresponding ELF1183// symbol table.1184template <class ELFT> void ObjFile<ELFT>::importCmseSymbols() {1185ArrayRef<Elf_Sym> eSyms = getELFSyms<ELFT>();1186// Error for local symbols. The symbol at index 0 is LOCAL. So skip it.1187for (size_t i = 1, end = firstGlobal; i != end; ++i) {1188errorOrWarn("CMSE symbol '" + CHECK(eSyms[i].getName(stringTable), this) +1189"' in import library '" + toString(this) + "' is not global");1190}11911192for (size_t i = firstGlobal, end = eSyms.size(); i != end; ++i) {1193const Elf_Sym &eSym = eSyms[i];1194Defined *sym = reinterpret_cast<Defined *>(make<SymbolUnion>());11951196// Initialize symbol fields.1197memset(sym, 0, sizeof(Symbol));1198sym->setName(CHECK(eSyms[i].getName(stringTable), this));1199sym->value = eSym.st_value;1200sym->size = eSym.st_size;1201sym->type = eSym.getType();1202sym->binding = eSym.getBinding();1203sym->stOther = eSym.st_other;12041205if (eSym.st_shndx != SHN_ABS) {1206error("CMSE symbol '" + sym->getName() + "' in import library '" +1207toString(this) + "' is not absolute");1208continue;1209}12101211if (!(eSym.st_value & 1) || (eSym.getType() != STT_FUNC)) {1212error("CMSE symbol '" + sym->getName() + "' in import library '" +1213toString(this) + "' is not a Thumb function definition");1214continue;1215}12161217if (symtab.cmseImportLib.count(sym->getName())) {1218error("CMSE symbol '" + sym->getName() +1219"' is multiply defined in import library '" + toString(this) + "'");1220continue;1221}12221223if (eSym.st_size != ACLESESYM_SIZE) {1224warn("CMSE symbol '" + sym->getName() + "' in import library '" +1225toString(this) + "' does not have correct size of " +1226Twine(ACLESESYM_SIZE) + " bytes");1227}12281229symtab.cmseImportLib[sym->getName()] = sym;1230}1231}12321233// Check symbol attributes of the acleSeSym, sym pair.1234// Both symbols should be global/weak Thumb code symbol definitions.1235static std::string checkCmseSymAttributes(Symbol *acleSeSym, Symbol *sym) {1236auto check = [](Symbol *s, StringRef type) -> std::optional<std::string> {1237auto d = dyn_cast_or_null<Defined>(s);1238if (!(d && d->isFunc() && (d->value & 1)))1239return (Twine(toString(s->file)) + ": cmse " + type + " symbol '" +1240s->getName() + "' is not a Thumb function definition")1241.str();1242if (!d->section)1243return (Twine(toString(s->file)) + ": cmse " + type + " symbol '" +1244s->getName() + "' cannot be an absolute symbol")1245.str();1246return std::nullopt;1247};1248for (auto [sym, type] :1249{std::make_pair(acleSeSym, "special"), std::make_pair(sym, "entry")})1250if (auto err = check(sym, type))1251return *err;1252return "";1253}12541255// Look for [__acle_se_<sym>, <sym>] pairs, as specified in the Cortex-M1256// Security Extensions specification.1257// 1) <sym> : A standard function name.1258// 2) __acle_se_<sym> : A special symbol that prefixes the standard function1259// name with __acle_se_.1260// Both these symbols are Thumb function symbols with external linkage.1261// <sym> may be redefined in .gnu.sgstubs.1262void elf::processArmCmseSymbols() {1263if (!config->cmseImplib)1264return;1265// Only symbols with external linkage end up in symtab, so no need to do1266// linkage checks. Only check symbol type.1267for (Symbol *acleSeSym : symtab.getSymbols()) {1268if (!acleSeSym->getName().starts_with(ACLESESYM_PREFIX))1269continue;1270// If input object build attributes do not support CMSE, error and disable1271// further scanning for <sym>, __acle_se_<sym> pairs.1272if (!config->armCMSESupport) {1273error("CMSE is only supported by ARMv8-M architecture or later");1274config->cmseImplib = false;1275break;1276}12771278// Try to find the associated symbol definition.1279// Symbol must have external linkage.1280StringRef name = acleSeSym->getName().substr(std::strlen(ACLESESYM_PREFIX));1281Symbol *sym = symtab.find(name);1282if (!sym) {1283error(toString(acleSeSym->file) + ": cmse special symbol '" +1284acleSeSym->getName() +1285"' detected, but no associated entry function definition '" + name +1286"' with external linkage found");1287continue;1288}12891290std::string errMsg = checkCmseSymAttributes(acleSeSym, sym);1291if (!errMsg.empty()) {1292error(errMsg);1293continue;1294}12951296// <sym> may be redefined later in the link in .gnu.sgstubs1297symtab.cmseSymMap[name] = {acleSeSym, sym};1298}12991300// If this is an Arm CMSE secure app, replace references to entry symbol <sym>1301// with its corresponding special symbol __acle_se_<sym>.1302parallelForEach(ctx.objectFiles, [&](InputFile *file) {1303MutableArrayRef<Symbol *> syms = file->getMutableSymbols();1304for (size_t i = 0, e = syms.size(); i != e; ++i) {1305StringRef symName = syms[i]->getName();1306if (symtab.cmseSymMap.count(symName))1307syms[i] = symtab.cmseSymMap[symName].acleSeSym;1308}1309});1310}13111312class elf::ArmCmseSGVeneer {1313public:1314ArmCmseSGVeneer(Symbol *sym, Symbol *acleSeSym,1315std::optional<uint64_t> addr = std::nullopt)1316: sym(sym), acleSeSym(acleSeSym), entAddr{addr} {}1317static const size_t size{ACLESESYM_SIZE};1318const std::optional<uint64_t> getAddr() const { return entAddr; };13191320Symbol *sym;1321Symbol *acleSeSym;1322uint64_t offset = 0;13231324private:1325const std::optional<uint64_t> entAddr;1326};13271328ArmCmseSGSection::ArmCmseSGSection()1329: SyntheticSection(llvm::ELF::SHF_ALLOC | llvm::ELF::SHF_EXECINSTR,1330llvm::ELF::SHT_PROGBITS,1331/*alignment=*/32, ".gnu.sgstubs") {1332entsize = ACLESESYM_SIZE;1333// The range of addresses used in the CMSE import library should be fixed.1334for (auto &[_, sym] : symtab.cmseImportLib) {1335if (impLibMaxAddr <= sym->value)1336impLibMaxAddr = sym->value + sym->size;1337}1338if (symtab.cmseSymMap.empty())1339return;1340addMappingSymbol();1341for (auto &[_, entryFunc] : symtab.cmseSymMap)1342addSGVeneer(cast<Defined>(entryFunc.acleSeSym),1343cast<Defined>(entryFunc.sym));1344for (auto &[_, sym] : symtab.cmseImportLib) {1345if (!symtab.inCMSEOutImpLib.count(sym->getName()))1346warn("entry function '" + sym->getName() +1347"' from CMSE import library is not present in secure application");1348}13491350if (!symtab.cmseImportLib.empty() && config->cmseOutputLib.empty()) {1351for (auto &[_, entryFunc] : symtab.cmseSymMap) {1352Symbol *sym = entryFunc.sym;1353if (!symtab.inCMSEOutImpLib.count(sym->getName()))1354warn("new entry function '" + sym->getName() +1355"' introduced but no output import library specified");1356}1357}1358}13591360void ArmCmseSGSection::addSGVeneer(Symbol *acleSeSym, Symbol *sym) {1361entries.emplace_back(acleSeSym, sym);1362if (symtab.cmseImportLib.count(sym->getName()))1363symtab.inCMSEOutImpLib[sym->getName()] = true;1364// Symbol addresses different, nothing to do.1365if (acleSeSym->file != sym->file ||1366cast<Defined>(*acleSeSym).value != cast<Defined>(*sym).value)1367return;1368// Only secure symbols with values equal to that of it's non-secure1369// counterpart needs to be in the .gnu.sgstubs section.1370ArmCmseSGVeneer *ss = nullptr;1371if (symtab.cmseImportLib.count(sym->getName())) {1372Defined *impSym = symtab.cmseImportLib[sym->getName()];1373ss = make<ArmCmseSGVeneer>(sym, acleSeSym, impSym->value);1374} else {1375ss = make<ArmCmseSGVeneer>(sym, acleSeSym);1376++newEntries;1377}1378sgVeneers.emplace_back(ss);1379}13801381void ArmCmseSGSection::writeTo(uint8_t *buf) {1382for (ArmCmseSGVeneer *s : sgVeneers) {1383uint8_t *p = buf + s->offset;1384write16(p + 0, 0xe97f); // SG1385write16(p + 2, 0xe97f);1386write16(p + 4, 0xf000); // B.W S1387write16(p + 6, 0xb000);1388target->relocateNoSym(p + 4, R_ARM_THM_JUMP24,1389s->acleSeSym->getVA() -1390(getVA() + s->offset + s->size));1391}1392}13931394void ArmCmseSGSection::addMappingSymbol() {1395addSyntheticLocal("$t", STT_NOTYPE, /*off=*/0, /*size=*/0, *this);1396}13971398size_t ArmCmseSGSection::getSize() const {1399if (sgVeneers.empty())1400return (impLibMaxAddr ? impLibMaxAddr - getVA() : 0) + newEntries * entsize;14011402return entries.size() * entsize;1403}14041405void ArmCmseSGSection::finalizeContents() {1406if (sgVeneers.empty())1407return;14081409auto it =1410std::stable_partition(sgVeneers.begin(), sgVeneers.end(),1411[](auto *i) { return i->getAddr().has_value(); });1412std::sort(sgVeneers.begin(), it, [](auto *a, auto *b) {1413return a->getAddr().value() < b->getAddr().value();1414});1415// This is the partition of the veneers with fixed addresses.1416uint64_t addr = (*sgVeneers.begin())->getAddr().has_value()1417? (*sgVeneers.begin())->getAddr().value()1418: getVA();1419// Check if the start address of '.gnu.sgstubs' correspond to the1420// linker-synthesized veneer with the lowest address.1421if ((getVA() & ~1) != (addr & ~1)) {1422error("start address of '.gnu.sgstubs' is different from previous link");1423return;1424}14251426for (size_t i = 0; i < sgVeneers.size(); ++i) {1427ArmCmseSGVeneer *s = sgVeneers[i];1428s->offset = i * s->size;1429Defined(file, StringRef(), s->sym->binding, s->sym->stOther, s->sym->type,1430s->offset | 1, s->size, this)1431.overwrite(*s->sym);1432}1433}14341435// Write the CMSE import library to disk.1436// The CMSE import library is a relocatable object with only a symbol table.1437// The symbols are copies of the (absolute) symbols of the secure gateways1438// in the executable output by this link.1439// See Arm® v8-M Security Extensions: Requirements on Development Tools1440// https://developer.arm.com/documentation/ecm0359818/latest1441template <typename ELFT> void elf::writeARMCmseImportLib() {1442StringTableSection *shstrtab =1443make<StringTableSection>(".shstrtab", /*dynamic=*/false);1444StringTableSection *strtab =1445make<StringTableSection>(".strtab", /*dynamic=*/false);1446SymbolTableBaseSection *impSymTab = make<SymbolTableSection<ELFT>>(*strtab);14471448SmallVector<std::pair<OutputSection *, SyntheticSection *>, 0> osIsPairs;1449osIsPairs.emplace_back(make<OutputSection>(strtab->name, 0, 0), strtab);1450osIsPairs.emplace_back(make<OutputSection>(impSymTab->name, 0, 0), impSymTab);1451osIsPairs.emplace_back(make<OutputSection>(shstrtab->name, 0, 0), shstrtab);14521453std::sort(symtab.cmseSymMap.begin(), symtab.cmseSymMap.end(),1454[](const auto &a, const auto &b) -> bool {1455return a.second.sym->getVA() < b.second.sym->getVA();1456});1457// Copy the secure gateway entry symbols to the import library symbol table.1458for (auto &p : symtab.cmseSymMap) {1459Defined *d = cast<Defined>(p.second.sym);1460impSymTab->addSymbol(makeDefined(1461ctx.internalFile, d->getName(), d->computeBinding(),1462/*stOther=*/0, STT_FUNC, d->getVA(), d->getSize(), nullptr));1463}14641465size_t idx = 0;1466uint64_t off = sizeof(typename ELFT::Ehdr);1467for (auto &[osec, isec] : osIsPairs) {1468osec->sectionIndex = ++idx;1469osec->recordSection(isec);1470osec->finalizeInputSections();1471osec->shName = shstrtab->addString(osec->name);1472osec->size = isec->getSize();1473isec->finalizeContents();1474osec->offset = alignToPowerOf2(off, osec->addralign);1475off = osec->offset + osec->size;1476}14771478const uint64_t sectionHeaderOff = alignToPowerOf2(off, config->wordsize);1479const auto shnum = osIsPairs.size() + 1;1480const uint64_t fileSize =1481sectionHeaderOff + shnum * sizeof(typename ELFT::Shdr);1482const unsigned flags =1483config->mmapOutputFile ? 0 : (unsigned)FileOutputBuffer::F_no_mmap;1484unlinkAsync(config->cmseOutputLib);1485Expected<std::unique_ptr<FileOutputBuffer>> bufferOrErr =1486FileOutputBuffer::create(config->cmseOutputLib, fileSize, flags);1487if (!bufferOrErr) {1488error("failed to open " + config->cmseOutputLib + ": " +1489llvm::toString(bufferOrErr.takeError()));1490return;1491}14921493// Write the ELF Header1494std::unique_ptr<FileOutputBuffer> &buffer = *bufferOrErr;1495uint8_t *const buf = buffer->getBufferStart();1496memcpy(buf, "\177ELF", 4);1497auto *eHdr = reinterpret_cast<typename ELFT::Ehdr *>(buf);1498eHdr->e_type = ET_REL;1499eHdr->e_entry = 0;1500eHdr->e_shoff = sectionHeaderOff;1501eHdr->e_ident[EI_CLASS] = ELFCLASS32;1502eHdr->e_ident[EI_DATA] = config->isLE ? ELFDATA2LSB : ELFDATA2MSB;1503eHdr->e_ident[EI_VERSION] = EV_CURRENT;1504eHdr->e_ident[EI_OSABI] = config->osabi;1505eHdr->e_ident[EI_ABIVERSION] = 0;1506eHdr->e_machine = EM_ARM;1507eHdr->e_version = EV_CURRENT;1508eHdr->e_flags = config->eflags;1509eHdr->e_ehsize = sizeof(typename ELFT::Ehdr);1510eHdr->e_phnum = 0;1511eHdr->e_shentsize = sizeof(typename ELFT::Shdr);1512eHdr->e_phoff = 0;1513eHdr->e_phentsize = 0;1514eHdr->e_shnum = shnum;1515eHdr->e_shstrndx = shstrtab->getParent()->sectionIndex;15161517// Write the section header table.1518auto *sHdrs = reinterpret_cast<typename ELFT::Shdr *>(buf + eHdr->e_shoff);1519for (auto &[osec, _] : osIsPairs)1520osec->template writeHeaderTo<ELFT>(++sHdrs);15211522// Write section contents to a mmap'ed file.1523{1524parallel::TaskGroup tg;1525for (auto &[osec, _] : osIsPairs)1526osec->template writeTo<ELFT>(buf + osec->offset, tg);1527}15281529if (auto e = buffer->commit())1530fatal("failed to write output '" + buffer->getPath() +1531"': " + toString(std::move(e)));1532}15331534TargetInfo *elf::getARMTargetInfo() {1535static ARM target;1536return ⌖1537}15381539template void elf::writeARMCmseImportLib<ELF32LE>();1540template void elf::writeARMCmseImportLib<ELF32BE>();1541template void elf::writeARMCmseImportLib<ELF64LE>();1542template void elf::writeARMCmseImportLib<ELF64BE>();15431544template void ObjFile<ELF32LE>::importCmseSymbols();1545template void ObjFile<ELF32BE>::importCmseSymbols();1546template void ObjFile<ELF64LE>::importCmseSymbols();1547template void ObjFile<ELF64BE>::importCmseSymbols();154815491550