Path: blob/main/contrib/llvm-project/lld/ELF/Arch/PPC64.cpp
34878 views
//===- PPC64.cpp ----------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "InputFiles.h"9#include "OutputSections.h"10#include "SymbolTable.h"11#include "Symbols.h"12#include "SyntheticSections.h"13#include "Target.h"14#include "Thunks.h"15#include "lld/Common/CommonLinkerContext.h"16#include "llvm/Support/Endian.h"1718using namespace llvm;19using namespace llvm::object;20using namespace llvm::support::endian;21using namespace llvm::ELF;22using namespace lld;23using namespace lld::elf;2425constexpr uint64_t ppc64TocOffset = 0x8000;26constexpr uint64_t dynamicThreadPointerOffset = 0x8000;2728namespace {29// The instruction encoding of bits 21-30 from the ISA for the Xform and Dform30// instructions that can be used as part of the initial exec TLS sequence.31enum XFormOpcd {32LBZX = 87,33LHZX = 279,34LWZX = 23,35LDX = 21,36STBX = 215,37STHX = 407,38STWX = 151,39STDX = 149,40LHAX = 343,41LWAX = 341,42LFSX = 535,43LFDX = 599,44STFSX = 663,45STFDX = 727,46ADD = 266,47};4849enum DFormOpcd {50LBZ = 34,51LBZU = 35,52LHZ = 40,53LHZU = 41,54LHAU = 43,55LWZ = 32,56LWZU = 33,57LFSU = 49,58LFDU = 51,59STB = 38,60STBU = 39,61STH = 44,62STHU = 45,63STW = 36,64STWU = 37,65STFSU = 53,66STFDU = 55,67LHA = 42,68LFS = 48,69LFD = 50,70STFS = 52,71STFD = 54,72ADDI = 1473};7475enum DSFormOpcd {76LD = 58,77LWA = 58,78STD = 6279};8081constexpr uint32_t NOP = 0x60000000;8283enum class PPCLegacyInsn : uint32_t {84NOINSN = 0,85// Loads.86LBZ = 0x88000000,87LHZ = 0xa0000000,88LWZ = 0x80000000,89LHA = 0xa8000000,90LWA = 0xe8000002,91LD = 0xe8000000,92LFS = 0xC0000000,93LXSSP = 0xe4000003,94LFD = 0xc8000000,95LXSD = 0xe4000002,96LXV = 0xf4000001,97LXVP = 0x18000000,9899// Stores.100STB = 0x98000000,101STH = 0xb0000000,102STW = 0x90000000,103STD = 0xf8000000,104STFS = 0xd0000000,105STXSSP = 0xf4000003,106STFD = 0xd8000000,107STXSD = 0xf4000002,108STXV = 0xf4000005,109STXVP = 0x18000001110};111enum class PPCPrefixedInsn : uint64_t {112NOINSN = 0,113PREFIX_MLS = 0x0610000000000000,114PREFIX_8LS = 0x0410000000000000,115116// Loads.117PLBZ = PREFIX_MLS,118PLHZ = PREFIX_MLS,119PLWZ = PREFIX_MLS,120PLHA = PREFIX_MLS,121PLWA = PREFIX_8LS | 0xa4000000,122PLD = PREFIX_8LS | 0xe4000000,123PLFS = PREFIX_MLS,124PLXSSP = PREFIX_8LS | 0xac000000,125PLFD = PREFIX_MLS,126PLXSD = PREFIX_8LS | 0xa8000000,127PLXV = PREFIX_8LS | 0xc8000000,128PLXVP = PREFIX_8LS | 0xe8000000,129130// Stores.131PSTB = PREFIX_MLS,132PSTH = PREFIX_MLS,133PSTW = PREFIX_MLS,134PSTD = PREFIX_8LS | 0xf4000000,135PSTFS = PREFIX_MLS,136PSTXSSP = PREFIX_8LS | 0xbc000000,137PSTFD = PREFIX_MLS,138PSTXSD = PREFIX_8LS | 0xb8000000,139PSTXV = PREFIX_8LS | 0xd8000000,140PSTXVP = PREFIX_8LS | 0xf8000000141};142143static bool checkPPCLegacyInsn(uint32_t encoding) {144PPCLegacyInsn insn = static_cast<PPCLegacyInsn>(encoding);145if (insn == PPCLegacyInsn::NOINSN)146return false;147#define PCREL_OPT(Legacy, PCRel, InsnMask) \148if (insn == PPCLegacyInsn::Legacy) \149return true;150#include "PPCInsns.def"151#undef PCREL_OPT152return false;153}154155// Masks to apply to legacy instructions when converting them to prefixed,156// pc-relative versions. For the most part, the primary opcode is shared157// between the legacy instruction and the suffix of its prefixed version.158// However, there are some instances where that isn't the case (DS-Form and159// DQ-form instructions).160enum class LegacyToPrefixMask : uint64_t {161NOMASK = 0x0,162OPC_AND_RST = 0xffe00000, // Primary opc (0-5) and R[ST] (6-10).163ONLY_RST = 0x3e00000, // [RS]T (6-10).164ST_STX28_TO5 =1650x8000000003e00000, // S/T (6-10) - The [S/T]X bit moves from 28 to 5.166};167168class PPC64 final : public TargetInfo {169public:170PPC64();171int getTlsGdRelaxSkip(RelType type) const override;172uint32_t calcEFlags() const override;173RelExpr getRelExpr(RelType type, const Symbol &s,174const uint8_t *loc) const override;175RelType getDynRel(RelType type) const override;176int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;177void writePltHeader(uint8_t *buf) const override;178void writePlt(uint8_t *buf, const Symbol &sym,179uint64_t pltEntryAddr) const override;180void writeIplt(uint8_t *buf, const Symbol &sym,181uint64_t pltEntryAddr) const override;182void relocate(uint8_t *loc, const Relocation &rel,183uint64_t val) const override;184void writeGotHeader(uint8_t *buf) const override;185bool needsThunk(RelExpr expr, RelType type, const InputFile *file,186uint64_t branchAddr, const Symbol &s,187int64_t a) const override;188uint32_t getThunkSectionSpacing() const override;189bool inBranchRange(RelType type, uint64_t src, uint64_t dst) const override;190RelExpr adjustTlsExpr(RelType type, RelExpr expr) const override;191RelExpr adjustGotPcExpr(RelType type, int64_t addend,192const uint8_t *loc) const override;193void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const;194void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;195196bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,197uint8_t stOther) const override;198199private:200void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) const;201void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;202void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;203void relaxTlsIeToLe(uint8_t *loc, const Relocation &rel, uint64_t val) const;204};205} // namespace206207uint64_t elf::getPPC64TocBase() {208// The TOC consists of sections .got, .toc, .tocbss, .plt in that order. The209// TOC starts where the first of these sections starts. We always create a210// .got when we see a relocation that uses it, so for us the start is always211// the .got.212uint64_t tocVA = in.got->getVA();213214// Per the ppc64-elf-linux ABI, The TOC base is TOC value plus 0x8000215// thus permitting a full 64 Kbytes segment. Note that the glibc startup216// code (crt1.o) assumes that you can get from the TOC base to the217// start of the .toc section with only a single (signed) 16-bit relocation.218return tocVA + ppc64TocOffset;219}220221unsigned elf::getPPC64GlobalEntryToLocalEntryOffset(uint8_t stOther) {222// The offset is encoded into the 3 most significant bits of the st_other223// field, with some special values described in section 3.4.1 of the ABI:224// 0 --> Zero offset between the GEP and LEP, and the function does NOT use225// the TOC pointer (r2). r2 will hold the same value on returning from226// the function as it did on entering the function.227// 1 --> Zero offset between the GEP and LEP, and r2 should be treated as a228// caller-saved register for all callers.229// 2-6 --> The binary logarithm of the offset eg:230// 2 --> 2^2 = 4 bytes --> 1 instruction.231// 6 --> 2^6 = 64 bytes --> 16 instructions.232// 7 --> Reserved.233uint8_t gepToLep = (stOther >> 5) & 7;234if (gepToLep < 2)235return 0;236237// The value encoded in the st_other bits is the238// log-base-2(offset).239if (gepToLep < 7)240return 1 << gepToLep;241242error("reserved value of 7 in the 3 most-significant-bits of st_other");243return 0;244}245246void elf::writePrefixedInstruction(uint8_t *loc, uint64_t insn) {247insn = config->isLE ? insn << 32 | insn >> 32 : insn;248write64(loc, insn);249}250251static bool addOptional(StringRef name, uint64_t value,252std::vector<Defined *> &defined) {253Symbol *sym = symtab.find(name);254if (!sym || sym->isDefined())255return false;256sym->resolve(Defined{ctx.internalFile, StringRef(), STB_GLOBAL, STV_HIDDEN,257STT_FUNC, value,258/*size=*/0, /*section=*/nullptr});259defined.push_back(cast<Defined>(sym));260return true;261}262263// If from is 14, write ${prefix}14: firstInsn; ${prefix}15:264// firstInsn+0x200008; ...; ${prefix}31: firstInsn+(31-14)*0x200008; $tail265// The labels are defined only if they exist in the symbol table.266static void writeSequence(MutableArrayRef<uint32_t> buf, const char *prefix,267int from, uint32_t firstInsn,268ArrayRef<uint32_t> tail) {269std::vector<Defined *> defined;270char name[16];271int first;272uint32_t *ptr = buf.data();273for (int r = from; r < 32; ++r) {274format("%s%d", prefix, r).snprint(name, sizeof(name));275if (addOptional(name, 4 * (r - from), defined) && defined.size() == 1)276first = r - from;277write32(ptr++, firstInsn + 0x200008 * (r - from));278}279for (uint32_t insn : tail)280write32(ptr++, insn);281assert(ptr == &*buf.end());282283if (defined.empty())284return;285// The full section content has the extent of [begin, end). We drop unused286// instructions and write [first,end).287auto *sec = make<InputSection>(288ctx.internalFile, SHF_ALLOC, SHT_PROGBITS, 4,289ArrayRef(reinterpret_cast<uint8_t *>(buf.data() + first),2904 * (buf.size() - first)),291".text");292ctx.inputSections.push_back(sec);293for (Defined *sym : defined) {294sym->section = sec;295sym->value -= 4 * first;296}297}298299// Implements some save and restore functions as described by ELF V2 ABI to be300// compatible with GCC. With GCC -Os, when the number of call-saved registers301// exceeds a certain threshold, GCC generates _savegpr0_* _restgpr0_* calls and302// expects the linker to define them. See303// https://sourceware.org/pipermail/binutils/2002-February/017444.html and304// https://sourceware.org/pipermail/binutils/2004-August/036765.html . This is305// weird because libgcc.a would be the natural place. The linker generation306// approach has the advantage that the linker can generate multiple copies to307// avoid long branch thunks. However, we don't consider the advantage308// significant enough to complicate our trunk implementation, so we take the309// simple approach and synthesize .text sections providing the implementation.310void elf::addPPC64SaveRestore() {311static uint32_t savegpr0[20], restgpr0[21], savegpr1[19], restgpr1[19];312constexpr uint32_t blr = 0x4e800020, mtlr_0 = 0x7c0803a6;313314// _restgpr0_14: ld 14, -144(1); _restgpr0_15: ld 15, -136(1); ...315// Tail: ld 0, 16(1); mtlr 0; blr316writeSequence(restgpr0, "_restgpr0_", 14, 0xe9c1ff70,317{0xe8010010, mtlr_0, blr});318// _restgpr1_14: ld 14, -144(12); _restgpr1_15: ld 15, -136(12); ...319// Tail: blr320writeSequence(restgpr1, "_restgpr1_", 14, 0xe9ccff70, {blr});321// _savegpr0_14: std 14, -144(1); _savegpr0_15: std 15, -136(1); ...322// Tail: std 0, 16(1); blr323writeSequence(savegpr0, "_savegpr0_", 14, 0xf9c1ff70, {0xf8010010, blr});324// _savegpr1_14: std 14, -144(12); _savegpr1_15: std 15, -136(12); ...325// Tail: blr326writeSequence(savegpr1, "_savegpr1_", 14, 0xf9ccff70, {blr});327}328329// Find the R_PPC64_ADDR64 in .rela.toc with matching offset.330template <typename ELFT>331static std::pair<Defined *, int64_t>332getRelaTocSymAndAddend(InputSectionBase *tocSec, uint64_t offset) {333// .rela.toc contains exclusively R_PPC64_ADDR64 relocations sorted by334// r_offset: 0, 8, 16, etc. For a given Offset, Offset / 8 gives us the335// relocation index in most cases.336//337// In rare cases a TOC entry may store a constant that doesn't need an338// R_PPC64_ADDR64, the corresponding r_offset is therefore missing. Offset / 8339// points to a relocation with larger r_offset. Do a linear probe then.340// Constants are extremely uncommon in .toc and the extra number of array341// accesses can be seen as a small constant.342ArrayRef<typename ELFT::Rela> relas =343tocSec->template relsOrRelas<ELFT>().relas;344if (relas.empty())345return {};346uint64_t index = std::min<uint64_t>(offset / 8, relas.size() - 1);347for (;;) {348if (relas[index].r_offset == offset) {349Symbol &sym = tocSec->file->getRelocTargetSym(relas[index]);350return {dyn_cast<Defined>(&sym), getAddend<ELFT>(relas[index])};351}352if (relas[index].r_offset < offset || index == 0)353break;354--index;355}356return {};357}358359// When accessing a symbol defined in another translation unit, compilers360// reserve a .toc entry, allocate a local label and generate toc-indirect361// instructions:362//363// addis 3, 2, .LC0@toc@ha # R_PPC64_TOC16_HA364// ld 3, .LC0@toc@l(3) # R_PPC64_TOC16_LO_DS, load the address from a .toc entry365// ld/lwa 3, 0(3) # load the value from the address366//367// .section .toc,"aw",@progbits368// .LC0: .tc var[TC],var369//370// If var is defined, non-preemptable and addressable with a 32-bit signed371// offset from the toc base, the address of var can be computed by adding an372// offset to the toc base, saving a load.373//374// addis 3,2,var@toc@ha # this may be relaxed to a nop,375// addi 3,3,var@toc@l # then this becomes addi 3,2,var@toc376// ld/lwa 3, 0(3) # load the value from the address377//378// Returns true if the relaxation is performed.379static bool tryRelaxPPC64TocIndirection(const Relocation &rel,380uint8_t *bufLoc) {381assert(config->tocOptimize);382if (rel.addend < 0)383return false;384385// If the symbol is not the .toc section, this isn't a toc-indirection.386Defined *defSym = dyn_cast<Defined>(rel.sym);387if (!defSym || !defSym->isSection() || defSym->section->name != ".toc")388return false;389390Defined *d;391int64_t addend;392auto *tocISB = cast<InputSectionBase>(defSym->section);393std::tie(d, addend) =394config->isLE ? getRelaTocSymAndAddend<ELF64LE>(tocISB, rel.addend)395: getRelaTocSymAndAddend<ELF64BE>(tocISB, rel.addend);396397// Only non-preemptable defined symbols can be relaxed.398if (!d || d->isPreemptible)399return false;400401// R_PPC64_ADDR64 should have created a canonical PLT for the non-preemptable402// ifunc and changed its type to STT_FUNC.403assert(!d->isGnuIFunc());404405// Two instructions can materialize a 32-bit signed offset from the toc base.406uint64_t tocRelative = d->getVA(addend) - getPPC64TocBase();407if (!isInt<32>(tocRelative))408return false;409410// Add PPC64TocOffset that will be subtracted by PPC64::relocate().411static_cast<const PPC64 &>(*target).relaxGot(bufLoc, rel,412tocRelative + ppc64TocOffset);413return true;414}415416// Relocation masks following the #lo(value), #hi(value), #ha(value),417// #higher(value), #highera(value), #highest(value), and #highesta(value)418// macros defined in section 4.5.1. Relocation Types of the PPC-elf64abi419// document.420static uint16_t lo(uint64_t v) { return v; }421static uint16_t hi(uint64_t v) { return v >> 16; }422static uint64_t ha(uint64_t v) { return (v + 0x8000) >> 16; }423static uint16_t higher(uint64_t v) { return v >> 32; }424static uint16_t highera(uint64_t v) { return (v + 0x8000) >> 32; }425static uint16_t highest(uint64_t v) { return v >> 48; }426static uint16_t highesta(uint64_t v) { return (v + 0x8000) >> 48; }427428// Extracts the 'PO' field of an instruction encoding.429static uint8_t getPrimaryOpCode(uint32_t encoding) { return (encoding >> 26); }430431static bool isDQFormInstruction(uint32_t encoding) {432switch (getPrimaryOpCode(encoding)) {433default:434return false;435case 6: // Power10 paired loads/stores (lxvp, stxvp).436case 56:437// The only instruction with a primary opcode of 56 is `lq`.438return true;439case 61:440// There are both DS and DQ instruction forms with this primary opcode.441// Namely `lxv` and `stxv` are the DQ-forms that use it.442// The DS 'XO' bits being set to 01 is restricted to DQ form.443return (encoding & 3) == 0x1;444}445}446447static bool isDSFormInstruction(PPCLegacyInsn insn) {448switch (insn) {449default:450return false;451case PPCLegacyInsn::LWA:452case PPCLegacyInsn::LD:453case PPCLegacyInsn::LXSD:454case PPCLegacyInsn::LXSSP:455case PPCLegacyInsn::STD:456case PPCLegacyInsn::STXSD:457case PPCLegacyInsn::STXSSP:458return true;459}460}461462static PPCLegacyInsn getPPCLegacyInsn(uint32_t encoding) {463uint32_t opc = encoding & 0xfc000000;464465// If the primary opcode is shared between multiple instructions, we need to466// fix it up to match the actual instruction we are after.467if ((opc == 0xe4000000 || opc == 0xe8000000 || opc == 0xf4000000 ||468opc == 0xf8000000) &&469!isDQFormInstruction(encoding))470opc = encoding & 0xfc000003;471else if (opc == 0xf4000000)472opc = encoding & 0xfc000007;473else if (opc == 0x18000000)474opc = encoding & 0xfc00000f;475476// If the value is not one of the enumerators in PPCLegacyInsn, we want to477// return PPCLegacyInsn::NOINSN.478if (!checkPPCLegacyInsn(opc))479return PPCLegacyInsn::NOINSN;480return static_cast<PPCLegacyInsn>(opc);481}482483static PPCPrefixedInsn getPCRelativeForm(PPCLegacyInsn insn) {484switch (insn) {485#define PCREL_OPT(Legacy, PCRel, InsnMask) \486case PPCLegacyInsn::Legacy: \487return PPCPrefixedInsn::PCRel488#include "PPCInsns.def"489#undef PCREL_OPT490}491return PPCPrefixedInsn::NOINSN;492}493494static LegacyToPrefixMask getInsnMask(PPCLegacyInsn insn) {495switch (insn) {496#define PCREL_OPT(Legacy, PCRel, InsnMask) \497case PPCLegacyInsn::Legacy: \498return LegacyToPrefixMask::InsnMask499#include "PPCInsns.def"500#undef PCREL_OPT501}502return LegacyToPrefixMask::NOMASK;503}504static uint64_t getPCRelativeForm(uint32_t encoding) {505PPCLegacyInsn origInsn = getPPCLegacyInsn(encoding);506PPCPrefixedInsn pcrelInsn = getPCRelativeForm(origInsn);507if (pcrelInsn == PPCPrefixedInsn::NOINSN)508return UINT64_C(-1);509LegacyToPrefixMask origInsnMask = getInsnMask(origInsn);510uint64_t pcrelEncoding =511(uint64_t)pcrelInsn | (encoding & (uint64_t)origInsnMask);512513// If the mask requires moving bit 28 to bit 5, do that now.514if (origInsnMask == LegacyToPrefixMask::ST_STX28_TO5)515pcrelEncoding |= (encoding & 0x8) << 23;516return pcrelEncoding;517}518519static bool isInstructionUpdateForm(uint32_t encoding) {520switch (getPrimaryOpCode(encoding)) {521default:522return false;523case LBZU:524case LHAU:525case LHZU:526case LWZU:527case LFSU:528case LFDU:529case STBU:530case STHU:531case STWU:532case STFSU:533case STFDU:534return true;535// LWA has the same opcode as LD, and the DS bits is what differentiates536// between LD/LDU/LWA537case LD:538case STD:539return (encoding & 3) == 1;540}541}542543// Compute the total displacement between the prefixed instruction that gets544// to the start of the data and the load/store instruction that has the offset545// into the data structure.546// For example:547// paddi 3, 0, 1000, 1548// lwz 3, 20(3)549// Should add up to 1020 for total displacement.550static int64_t getTotalDisp(uint64_t prefixedInsn, uint32_t accessInsn) {551int64_t disp34 = llvm::SignExtend64(552((prefixedInsn & 0x3ffff00000000) >> 16) | (prefixedInsn & 0xffff), 34);553int32_t disp16 = llvm::SignExtend32(accessInsn & 0xffff, 16);554// For DS and DQ form instructions, we need to mask out the XO bits.555if (isDQFormInstruction(accessInsn))556disp16 &= ~0xf;557else if (isDSFormInstruction(getPPCLegacyInsn(accessInsn)))558disp16 &= ~0x3;559return disp34 + disp16;560}561562// There are a number of places when we either want to read or write an563// instruction when handling a half16 relocation type. On big-endian the buffer564// pointer is pointing into the middle of the word we want to extract, and on565// little-endian it is pointing to the start of the word. These 2 helpers are to566// simplify reading and writing in that context.567static void writeFromHalf16(uint8_t *loc, uint32_t insn) {568write32(config->isLE ? loc : loc - 2, insn);569}570571static uint32_t readFromHalf16(const uint8_t *loc) {572return read32(config->isLE ? loc : loc - 2);573}574575static uint64_t readPrefixedInstruction(const uint8_t *loc) {576uint64_t fullInstr = read64(loc);577return config->isLE ? (fullInstr << 32 | fullInstr >> 32) : fullInstr;578}579580PPC64::PPC64() {581copyRel = R_PPC64_COPY;582gotRel = R_PPC64_GLOB_DAT;583pltRel = R_PPC64_JMP_SLOT;584relativeRel = R_PPC64_RELATIVE;585iRelativeRel = R_PPC64_IRELATIVE;586symbolicRel = R_PPC64_ADDR64;587pltHeaderSize = 60;588pltEntrySize = 4;589ipltEntrySize = 16; // PPC64PltCallStub::size590gotHeaderEntriesNum = 1;591gotPltHeaderEntriesNum = 2;592needsThunks = true;593594tlsModuleIndexRel = R_PPC64_DTPMOD64;595tlsOffsetRel = R_PPC64_DTPREL64;596597tlsGotRel = R_PPC64_TPREL64;598599needsMoreStackNonSplit = false;600601// We need 64K pages (at least under glibc/Linux, the loader won't602// set different permissions on a finer granularity than that).603defaultMaxPageSize = 65536;604605// The PPC64 ELF ABI v1 spec, says:606//607// It is normally desirable to put segments with different characteristics608// in separate 256 Mbyte portions of the address space, to give the609// operating system full paging flexibility in the 64-bit address space.610//611// And because the lowest non-zero 256M boundary is 0x10000000, PPC64 linkers612// use 0x10000000 as the starting address.613defaultImageBase = 0x10000000;614615write32(trapInstr.data(), 0x7fe00008);616}617618int PPC64::getTlsGdRelaxSkip(RelType type) const {619// A __tls_get_addr call instruction is marked with 2 relocations:620//621// R_PPC64_TLSGD / R_PPC64_TLSLD: marker relocation622// R_PPC64_REL24: __tls_get_addr623//624// After the relaxation we no longer call __tls_get_addr and should skip both625// relocations to not create a false dependence on __tls_get_addr being626// defined.627if (type == R_PPC64_TLSGD || type == R_PPC64_TLSLD)628return 2;629return 1;630}631632static uint32_t getEFlags(InputFile *file) {633if (file->ekind == ELF64BEKind)634return cast<ObjFile<ELF64BE>>(file)->getObj().getHeader().e_flags;635return cast<ObjFile<ELF64LE>>(file)->getObj().getHeader().e_flags;636}637638// This file implements v2 ABI. This function makes sure that all639// object files have v2 or an unspecified version as an ABI version.640uint32_t PPC64::calcEFlags() const {641for (InputFile *f : ctx.objectFiles) {642uint32_t flag = getEFlags(f);643if (flag == 1)644error(toString(f) + ": ABI version 1 is not supported");645else if (flag > 2)646error(toString(f) + ": unrecognized e_flags: " + Twine(flag));647}648return 2;649}650651void PPC64::relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) const {652switch (rel.type) {653case R_PPC64_TOC16_HA:654// Convert "addis reg, 2, .LC0@toc@h" to "addis reg, 2, var@toc@h" or "nop".655relocate(loc, rel, val);656break;657case R_PPC64_TOC16_LO_DS: {658// Convert "ld reg, .LC0@toc@l(reg)" to "addi reg, reg, var@toc@l" or659// "addi reg, 2, var@toc".660uint32_t insn = readFromHalf16(loc);661if (getPrimaryOpCode(insn) != LD)662error("expected a 'ld' for got-indirect to toc-relative relaxing");663writeFromHalf16(loc, (insn & 0x03ffffff) | 0x38000000);664relocateNoSym(loc, R_PPC64_TOC16_LO, val);665break;666}667case R_PPC64_GOT_PCREL34: {668// Clear the first 8 bits of the prefix and the first 6 bits of the669// instruction (the primary opcode).670uint64_t insn = readPrefixedInstruction(loc);671if ((insn & 0xfc000000) != 0xe4000000)672error("expected a 'pld' for got-indirect to pc-relative relaxing");673insn &= ~0xff000000fc000000;674675// Replace the cleared bits with the values for PADDI (0x600000038000000);676insn |= 0x600000038000000;677writePrefixedInstruction(loc, insn);678relocate(loc, rel, val);679break;680}681case R_PPC64_PCREL_OPT: {682// We can only relax this if the R_PPC64_GOT_PCREL34 at this offset can683// be relaxed. The eligibility for the relaxation needs to be determined684// on that relocation since this one does not relocate a symbol.685uint64_t insn = readPrefixedInstruction(loc);686uint32_t accessInsn = read32(loc + rel.addend);687uint64_t pcRelInsn = getPCRelativeForm(accessInsn);688689// This error is not necessary for correctness but is emitted for now690// to ensure we don't miss these opportunities in real code. It can be691// removed at a later date.692if (pcRelInsn == UINT64_C(-1)) {693errorOrWarn(694"unrecognized instruction for R_PPC64_PCREL_OPT relaxation: 0x" +695Twine::utohexstr(accessInsn));696break;697}698699int64_t totalDisp = getTotalDisp(insn, accessInsn);700if (!isInt<34>(totalDisp))701break; // Displacement doesn't fit.702// Convert the PADDI to the prefixed version of accessInsn and convert703// accessInsn to a nop.704writePrefixedInstruction(loc, pcRelInsn |705((totalDisp & 0x3ffff0000) << 16) |706(totalDisp & 0xffff));707write32(loc + rel.addend, NOP); // nop accessInsn.708break;709}710default:711llvm_unreachable("unexpected relocation type");712}713}714715void PPC64::relaxTlsGdToLe(uint8_t *loc, const Relocation &rel,716uint64_t val) const {717// Reference: 3.7.4.2 of the 64-bit ELF V2 abi supplement.718// The general dynamic code sequence for a global `x` will look like:719// Instruction Relocation Symbol720// addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x721// addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x722// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x723// R_PPC64_REL24 __tls_get_addr724// nop None None725726// Relaxing to local exec entails converting:727// addis r3, r2, x@got@tlsgd@ha into nop728// addi r3, r3, x@got@tlsgd@l into addis r3, r13, x@tprel@ha729// bl __tls_get_addr(x@tlsgd) into nop730// nop into addi r3, r3, x@tprel@l731732switch (rel.type) {733case R_PPC64_GOT_TLSGD16_HA:734writeFromHalf16(loc, NOP);735break;736case R_PPC64_GOT_TLSGD16:737case R_PPC64_GOT_TLSGD16_LO:738writeFromHalf16(loc, 0x3c6d0000); // addis r3, r13739relocateNoSym(loc, R_PPC64_TPREL16_HA, val);740break;741case R_PPC64_GOT_TLSGD_PCREL34:742// Relax from paddi r3, 0, x@got@tlsgd@pcrel, 1 to743// paddi r3, r13, x@tprel, 0744writePrefixedInstruction(loc, 0x06000000386d0000);745relocateNoSym(loc, R_PPC64_TPREL34, val);746break;747case R_PPC64_TLSGD: {748// PC Relative Relaxation:749// Relax from bl __tls_get_addr@notoc(x@tlsgd) to750// nop751// TOC Relaxation:752// Relax from bl __tls_get_addr(x@tlsgd)753// nop754// to755// nop756// addi r3, r3, x@tprel@l757const uintptr_t locAsInt = reinterpret_cast<uintptr_t>(loc);758if (locAsInt % 4 == 0) {759write32(loc, NOP); // nop760write32(loc + 4, 0x38630000); // addi r3, r3761// Since we are relocating a half16 type relocation and Loc + 4 points to762// the start of an instruction we need to advance the buffer by an extra763// 2 bytes on BE.764relocateNoSym(loc + 4 + (config->ekind == ELF64BEKind ? 2 : 0),765R_PPC64_TPREL16_LO, val);766} else if (locAsInt % 4 == 1) {767write32(loc - 1, NOP);768} else {769errorOrWarn("R_PPC64_TLSGD has unexpected byte alignment");770}771break;772}773default:774llvm_unreachable("unsupported relocation for TLS GD to LE relaxation");775}776}777778void PPC64::relaxTlsLdToLe(uint8_t *loc, const Relocation &rel,779uint64_t val) const {780// Reference: 3.7.4.3 of the 64-bit ELF V2 abi supplement.781// The local dynamic code sequence for a global `x` will look like:782// Instruction Relocation Symbol783// addis r3, r2, x@got@tlsld@ha R_PPC64_GOT_TLSLD16_HA x784// addi r3, r3, x@got@tlsld@l R_PPC64_GOT_TLSLD16_LO x785// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSLD x786// R_PPC64_REL24 __tls_get_addr787// nop None None788789// Relaxing to local exec entails converting:790// addis r3, r2, x@got@tlsld@ha into nop791// addi r3, r3, x@got@tlsld@l into addis r3, r13, 0792// bl __tls_get_addr(x@tlsgd) into nop793// nop into addi r3, r3, 4096794795switch (rel.type) {796case R_PPC64_GOT_TLSLD16_HA:797writeFromHalf16(loc, NOP);798break;799case R_PPC64_GOT_TLSLD16_LO:800writeFromHalf16(loc, 0x3c6d0000); // addis r3, r13, 0801break;802case R_PPC64_GOT_TLSLD_PCREL34:803// Relax from paddi r3, 0, x1@got@tlsld@pcrel, 1 to804// paddi r3, r13, 0x1000, 0805writePrefixedInstruction(loc, 0x06000000386d1000);806break;807case R_PPC64_TLSLD: {808// PC Relative Relaxation:809// Relax from bl __tls_get_addr@notoc(x@tlsld)810// to811// nop812// TOC Relaxation:813// Relax from bl __tls_get_addr(x@tlsld)814// nop815// to816// nop817// addi r3, r3, 4096818const uintptr_t locAsInt = reinterpret_cast<uintptr_t>(loc);819if (locAsInt % 4 == 0) {820write32(loc, NOP);821write32(loc + 4, 0x38631000); // addi r3, r3, 4096822} else if (locAsInt % 4 == 1) {823write32(loc - 1, NOP);824} else {825errorOrWarn("R_PPC64_TLSLD has unexpected byte alignment");826}827break;828}829case R_PPC64_DTPREL16:830case R_PPC64_DTPREL16_HA:831case R_PPC64_DTPREL16_HI:832case R_PPC64_DTPREL16_DS:833case R_PPC64_DTPREL16_LO:834case R_PPC64_DTPREL16_LO_DS:835case R_PPC64_DTPREL34:836relocate(loc, rel, val);837break;838default:839llvm_unreachable("unsupported relocation for TLS LD to LE relaxation");840}841}842843// Map X-Form instructions to their DS-Form counterparts, if applicable.844// The full encoding is returned here to distinguish between the different845// DS-Form instructions.846unsigned elf::getPPCDSFormOp(unsigned secondaryOp) {847switch (secondaryOp) {848case LWAX:849return (LWA << 26) | 0x2;850case LDX:851return LD << 26;852case STDX:853return STD << 26;854default:855return 0;856}857}858859unsigned elf::getPPCDFormOp(unsigned secondaryOp) {860switch (secondaryOp) {861case LBZX:862return LBZ << 26;863case LHZX:864return LHZ << 26;865case LWZX:866return LWZ << 26;867case STBX:868return STB << 26;869case STHX:870return STH << 26;871case STWX:872return STW << 26;873case LHAX:874return LHA << 26;875case LFSX:876return LFS << 26;877case LFDX:878return LFD << 26;879case STFSX:880return STFS << 26;881case STFDX:882return STFD << 26;883case ADD:884return ADDI << 26;885default:886return 0;887}888}889890void PPC64::relaxTlsIeToLe(uint8_t *loc, const Relocation &rel,891uint64_t val) const {892// The initial exec code sequence for a global `x` will look like:893// Instruction Relocation Symbol894// addis r9, r2, x@got@tprel@ha R_PPC64_GOT_TPREL16_HA x895// ld r9, x@got@tprel@l(r9) R_PPC64_GOT_TPREL16_LO_DS x896// add r9, r9, x@tls R_PPC64_TLS x897898// Relaxing to local exec entails converting:899// addis r9, r2, x@got@tprel@ha into nop900// ld r9, x@got@tprel@l(r9) into addis r9, r13, x@tprel@ha901// add r9, r9, x@tls into addi r9, r9, x@tprel@l902903// x@tls R_PPC64_TLS is a relocation which does not compute anything,904// it is replaced with r13 (thread pointer).905906// The add instruction in the initial exec sequence has multiple variations907// that need to be handled. If we are building an address it will use an add908// instruction, if we are accessing memory it will use any of the X-form909// indexed load or store instructions.910911unsigned offset = (config->ekind == ELF64BEKind) ? 2 : 0;912switch (rel.type) {913case R_PPC64_GOT_TPREL16_HA:914write32(loc - offset, NOP);915break;916case R_PPC64_GOT_TPREL16_LO_DS:917case R_PPC64_GOT_TPREL16_DS: {918uint32_t regNo = read32(loc - offset) & 0x03E00000; // bits 6-10919write32(loc - offset, 0x3C0D0000 | regNo); // addis RegNo, r13920relocateNoSym(loc, R_PPC64_TPREL16_HA, val);921break;922}923case R_PPC64_GOT_TPREL_PCREL34: {924const uint64_t pldRT = readPrefixedInstruction(loc) & 0x0000000003e00000;925// paddi RT(from pld), r13, symbol@tprel, 0926writePrefixedInstruction(loc, 0x06000000380d0000 | pldRT);927relocateNoSym(loc, R_PPC64_TPREL34, val);928break;929}930case R_PPC64_TLS: {931const uintptr_t locAsInt = reinterpret_cast<uintptr_t>(loc);932if (locAsInt % 4 == 0) {933uint32_t primaryOp = getPrimaryOpCode(read32(loc));934if (primaryOp != 31)935error("unrecognized instruction for IE to LE R_PPC64_TLS");936uint32_t secondaryOp = (read32(loc) & 0x000007FE) >> 1; // bits 21-30937uint32_t dFormOp = getPPCDFormOp(secondaryOp);938uint32_t finalReloc;939if (dFormOp == 0) { // Expecting a DS-Form instruction.940dFormOp = getPPCDSFormOp(secondaryOp);941if (dFormOp == 0)942error("unrecognized instruction for IE to LE R_PPC64_TLS");943finalReloc = R_PPC64_TPREL16_LO_DS;944} else945finalReloc = R_PPC64_TPREL16_LO;946write32(loc, dFormOp | (read32(loc) & 0x03ff0000));947relocateNoSym(loc + offset, finalReloc, val);948} else if (locAsInt % 4 == 1) {949// If the offset is not 4 byte aligned then we have a PCRel type reloc.950// This version of the relocation is offset by one byte from the951// instruction it references.952uint32_t tlsInstr = read32(loc - 1);953uint32_t primaryOp = getPrimaryOpCode(tlsInstr);954if (primaryOp != 31)955errorOrWarn("unrecognized instruction for IE to LE R_PPC64_TLS");956uint32_t secondaryOp = (tlsInstr & 0x000007FE) >> 1; // bits 21-30957// The add is a special case and should be turned into a nop. The paddi958// that comes before it will already have computed the address of the959// symbol.960if (secondaryOp == 266) {961// Check if the add uses the same result register as the input register.962uint32_t rt = (tlsInstr & 0x03E00000) >> 21; // bits 6-10963uint32_t ra = (tlsInstr & 0x001F0000) >> 16; // bits 11-15964if (ra == rt) {965write32(loc - 1, NOP);966} else {967// mr rt, ra968write32(loc - 1, 0x7C000378 | (rt << 16) | (ra << 21) | (ra << 11));969}970} else {971uint32_t dFormOp = getPPCDFormOp(secondaryOp);972if (dFormOp == 0) { // Expecting a DS-Form instruction.973dFormOp = getPPCDSFormOp(secondaryOp);974if (dFormOp == 0)975errorOrWarn("unrecognized instruction for IE to LE R_PPC64_TLS");976}977write32(loc - 1, (dFormOp | (tlsInstr & 0x03ff0000)));978}979} else {980errorOrWarn("R_PPC64_TLS must be either 4 byte aligned or one byte "981"offset from 4 byte aligned");982}983break;984}985default:986llvm_unreachable("unknown relocation for IE to LE");987break;988}989}990991RelExpr PPC64::getRelExpr(RelType type, const Symbol &s,992const uint8_t *loc) const {993switch (type) {994case R_PPC64_NONE:995return R_NONE;996case R_PPC64_ADDR16:997case R_PPC64_ADDR16_DS:998case R_PPC64_ADDR16_HA:999case R_PPC64_ADDR16_HI:1000case R_PPC64_ADDR16_HIGH:1001case R_PPC64_ADDR16_HIGHER:1002case R_PPC64_ADDR16_HIGHERA:1003case R_PPC64_ADDR16_HIGHEST:1004case R_PPC64_ADDR16_HIGHESTA:1005case R_PPC64_ADDR16_LO:1006case R_PPC64_ADDR16_LO_DS:1007case R_PPC64_ADDR32:1008case R_PPC64_ADDR64:1009return R_ABS;1010case R_PPC64_GOT16:1011case R_PPC64_GOT16_DS:1012case R_PPC64_GOT16_HA:1013case R_PPC64_GOT16_HI:1014case R_PPC64_GOT16_LO:1015case R_PPC64_GOT16_LO_DS:1016return R_GOT_OFF;1017case R_PPC64_TOC16:1018case R_PPC64_TOC16_DS:1019case R_PPC64_TOC16_HI:1020case R_PPC64_TOC16_LO:1021return R_GOTREL;1022case R_PPC64_GOT_PCREL34:1023case R_PPC64_GOT_TPREL_PCREL34:1024case R_PPC64_PCREL_OPT:1025return R_GOT_PC;1026case R_PPC64_TOC16_HA:1027case R_PPC64_TOC16_LO_DS:1028return config->tocOptimize ? R_PPC64_RELAX_TOC : R_GOTREL;1029case R_PPC64_TOC:1030return R_PPC64_TOCBASE;1031case R_PPC64_REL14:1032case R_PPC64_REL24:1033return R_PPC64_CALL_PLT;1034case R_PPC64_REL24_NOTOC:1035return R_PLT_PC;1036case R_PPC64_REL16_LO:1037case R_PPC64_REL16_HA:1038case R_PPC64_REL16_HI:1039case R_PPC64_REL32:1040case R_PPC64_REL64:1041case R_PPC64_PCREL34:1042return R_PC;1043case R_PPC64_GOT_TLSGD16:1044case R_PPC64_GOT_TLSGD16_HA:1045case R_PPC64_GOT_TLSGD16_HI:1046case R_PPC64_GOT_TLSGD16_LO:1047return R_TLSGD_GOT;1048case R_PPC64_GOT_TLSGD_PCREL34:1049return R_TLSGD_PC;1050case R_PPC64_GOT_TLSLD16:1051case R_PPC64_GOT_TLSLD16_HA:1052case R_PPC64_GOT_TLSLD16_HI:1053case R_PPC64_GOT_TLSLD16_LO:1054return R_TLSLD_GOT;1055case R_PPC64_GOT_TLSLD_PCREL34:1056return R_TLSLD_PC;1057case R_PPC64_GOT_TPREL16_HA:1058case R_PPC64_GOT_TPREL16_LO_DS:1059case R_PPC64_GOT_TPREL16_DS:1060case R_PPC64_GOT_TPREL16_HI:1061return R_GOT_OFF;1062case R_PPC64_GOT_DTPREL16_HA:1063case R_PPC64_GOT_DTPREL16_LO_DS:1064case R_PPC64_GOT_DTPREL16_DS:1065case R_PPC64_GOT_DTPREL16_HI:1066return R_TLSLD_GOT_OFF;1067case R_PPC64_TPREL16:1068case R_PPC64_TPREL16_HA:1069case R_PPC64_TPREL16_LO:1070case R_PPC64_TPREL16_HI:1071case R_PPC64_TPREL16_DS:1072case R_PPC64_TPREL16_LO_DS:1073case R_PPC64_TPREL16_HIGHER:1074case R_PPC64_TPREL16_HIGHERA:1075case R_PPC64_TPREL16_HIGHEST:1076case R_PPC64_TPREL16_HIGHESTA:1077case R_PPC64_TPREL34:1078return R_TPREL;1079case R_PPC64_DTPREL16:1080case R_PPC64_DTPREL16_DS:1081case R_PPC64_DTPREL16_HA:1082case R_PPC64_DTPREL16_HI:1083case R_PPC64_DTPREL16_HIGHER:1084case R_PPC64_DTPREL16_HIGHERA:1085case R_PPC64_DTPREL16_HIGHEST:1086case R_PPC64_DTPREL16_HIGHESTA:1087case R_PPC64_DTPREL16_LO:1088case R_PPC64_DTPREL16_LO_DS:1089case R_PPC64_DTPREL64:1090case R_PPC64_DTPREL34:1091return R_DTPREL;1092case R_PPC64_TLSGD:1093return R_TLSDESC_CALL;1094case R_PPC64_TLSLD:1095return R_TLSLD_HINT;1096case R_PPC64_TLS:1097return R_TLSIE_HINT;1098default:1099error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +1100") against symbol " + toString(s));1101return R_NONE;1102}1103}11041105RelType PPC64::getDynRel(RelType type) const {1106if (type == R_PPC64_ADDR64 || type == R_PPC64_TOC)1107return R_PPC64_ADDR64;1108return R_PPC64_NONE;1109}11101111int64_t PPC64::getImplicitAddend(const uint8_t *buf, RelType type) const {1112switch (type) {1113case R_PPC64_NONE:1114case R_PPC64_GLOB_DAT:1115case R_PPC64_JMP_SLOT:1116return 0;1117case R_PPC64_REL32:1118return SignExtend64<32>(read32(buf));1119case R_PPC64_ADDR64:1120case R_PPC64_REL64:1121case R_PPC64_RELATIVE:1122case R_PPC64_IRELATIVE:1123case R_PPC64_DTPMOD64:1124case R_PPC64_DTPREL64:1125case R_PPC64_TPREL64:1126return read64(buf);1127default:1128internalLinkerError(getErrorLocation(buf),1129"cannot read addend for relocation " + toString(type));1130return 0;1131}1132}11331134void PPC64::writeGotHeader(uint8_t *buf) const {1135write64(buf, getPPC64TocBase());1136}11371138void PPC64::writePltHeader(uint8_t *buf) const {1139// The generic resolver stub goes first.1140write32(buf + 0, 0x7c0802a6); // mflr r01141write32(buf + 4, 0x429f0005); // bcl 20,4*cr7+so,8 <_glink+0x8>1142write32(buf + 8, 0x7d6802a6); // mflr r111143write32(buf + 12, 0x7c0803a6); // mtlr r01144write32(buf + 16, 0x7d8b6050); // subf r12, r11, r121145write32(buf + 20, 0x380cffcc); // subi r0,r12,521146write32(buf + 24, 0x7800f082); // srdi r0,r0,62,21147write32(buf + 28, 0xe98b002c); // ld r12,44(r11)1148write32(buf + 32, 0x7d6c5a14); // add r11,r12,r111149write32(buf + 36, 0xe98b0000); // ld r12,0(r11)1150write32(buf + 40, 0xe96b0008); // ld r11,8(r11)1151write32(buf + 44, 0x7d8903a6); // mtctr r121152write32(buf + 48, 0x4e800420); // bctr11531154// The 'bcl' instruction will set the link register to the address of the1155// following instruction ('mflr r11'). Here we store the offset from that1156// instruction to the first entry in the GotPlt section.1157int64_t gotPltOffset = in.gotPlt->getVA() - (in.plt->getVA() + 8);1158write64(buf + 52, gotPltOffset);1159}11601161void PPC64::writePlt(uint8_t *buf, const Symbol &sym,1162uint64_t /*pltEntryAddr*/) const {1163int32_t offset = pltHeaderSize + sym.getPltIdx() * pltEntrySize;1164// bl __glink_PLTresolve1165write32(buf, 0x48000000 | ((-offset) & 0x03FFFFFc));1166}11671168void PPC64::writeIplt(uint8_t *buf, const Symbol &sym,1169uint64_t /*pltEntryAddr*/) const {1170writePPC64LoadAndBranch(buf, sym.getGotPltVA() - getPPC64TocBase());1171}11721173static std::pair<RelType, uint64_t> toAddr16Rel(RelType type, uint64_t val) {1174// Relocations relative to the toc-base need to be adjusted by the Toc offset.1175uint64_t tocBiasedVal = val - ppc64TocOffset;1176// Relocations relative to dtv[dtpmod] need to be adjusted by the DTP offset.1177uint64_t dtpBiasedVal = val - dynamicThreadPointerOffset;11781179switch (type) {1180// TOC biased relocation.1181case R_PPC64_GOT16:1182case R_PPC64_GOT_TLSGD16:1183case R_PPC64_GOT_TLSLD16:1184case R_PPC64_TOC16:1185return {R_PPC64_ADDR16, tocBiasedVal};1186case R_PPC64_GOT16_DS:1187case R_PPC64_TOC16_DS:1188case R_PPC64_GOT_TPREL16_DS:1189case R_PPC64_GOT_DTPREL16_DS:1190return {R_PPC64_ADDR16_DS, tocBiasedVal};1191case R_PPC64_GOT16_HA:1192case R_PPC64_GOT_TLSGD16_HA:1193case R_PPC64_GOT_TLSLD16_HA:1194case R_PPC64_GOT_TPREL16_HA:1195case R_PPC64_GOT_DTPREL16_HA:1196case R_PPC64_TOC16_HA:1197return {R_PPC64_ADDR16_HA, tocBiasedVal};1198case R_PPC64_GOT16_HI:1199case R_PPC64_GOT_TLSGD16_HI:1200case R_PPC64_GOT_TLSLD16_HI:1201case R_PPC64_GOT_TPREL16_HI:1202case R_PPC64_GOT_DTPREL16_HI:1203case R_PPC64_TOC16_HI:1204return {R_PPC64_ADDR16_HI, tocBiasedVal};1205case R_PPC64_GOT16_LO:1206case R_PPC64_GOT_TLSGD16_LO:1207case R_PPC64_GOT_TLSLD16_LO:1208case R_PPC64_TOC16_LO:1209return {R_PPC64_ADDR16_LO, tocBiasedVal};1210case R_PPC64_GOT16_LO_DS:1211case R_PPC64_TOC16_LO_DS:1212case R_PPC64_GOT_TPREL16_LO_DS:1213case R_PPC64_GOT_DTPREL16_LO_DS:1214return {R_PPC64_ADDR16_LO_DS, tocBiasedVal};12151216// Dynamic Thread pointer biased relocation types.1217case R_PPC64_DTPREL16:1218return {R_PPC64_ADDR16, dtpBiasedVal};1219case R_PPC64_DTPREL16_DS:1220return {R_PPC64_ADDR16_DS, dtpBiasedVal};1221case R_PPC64_DTPREL16_HA:1222return {R_PPC64_ADDR16_HA, dtpBiasedVal};1223case R_PPC64_DTPREL16_HI:1224return {R_PPC64_ADDR16_HI, dtpBiasedVal};1225case R_PPC64_DTPREL16_HIGHER:1226return {R_PPC64_ADDR16_HIGHER, dtpBiasedVal};1227case R_PPC64_DTPREL16_HIGHERA:1228return {R_PPC64_ADDR16_HIGHERA, dtpBiasedVal};1229case R_PPC64_DTPREL16_HIGHEST:1230return {R_PPC64_ADDR16_HIGHEST, dtpBiasedVal};1231case R_PPC64_DTPREL16_HIGHESTA:1232return {R_PPC64_ADDR16_HIGHESTA, dtpBiasedVal};1233case R_PPC64_DTPREL16_LO:1234return {R_PPC64_ADDR16_LO, dtpBiasedVal};1235case R_PPC64_DTPREL16_LO_DS:1236return {R_PPC64_ADDR16_LO_DS, dtpBiasedVal};1237case R_PPC64_DTPREL64:1238return {R_PPC64_ADDR64, dtpBiasedVal};12391240default:1241return {type, val};1242}1243}12441245static bool isTocOptType(RelType type) {1246switch (type) {1247case R_PPC64_GOT16_HA:1248case R_PPC64_GOT16_LO_DS:1249case R_PPC64_TOC16_HA:1250case R_PPC64_TOC16_LO_DS:1251case R_PPC64_TOC16_LO:1252return true;1253default:1254return false;1255}1256}12571258void PPC64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {1259RelType type = rel.type;1260bool shouldTocOptimize = isTocOptType(type);1261// For dynamic thread pointer relative, toc-relative, and got-indirect1262// relocations, proceed in terms of the corresponding ADDR16 relocation type.1263std::tie(type, val) = toAddr16Rel(type, val);12641265switch (type) {1266case R_PPC64_ADDR14: {1267checkAlignment(loc, val, 4, rel);1268// Preserve the AA/LK bits in the branch instruction1269uint8_t aalk = loc[3];1270write16(loc + 2, (aalk & 3) | (val & 0xfffc));1271break;1272}1273case R_PPC64_ADDR16:1274checkIntUInt(loc, val, 16, rel);1275write16(loc, val);1276break;1277case R_PPC64_ADDR32:1278checkIntUInt(loc, val, 32, rel);1279write32(loc, val);1280break;1281case R_PPC64_ADDR16_DS:1282case R_PPC64_TPREL16_DS: {1283checkInt(loc, val, 16, rel);1284// DQ-form instructions use bits 28-31 as part of the instruction encoding1285// DS-form instructions only use bits 30-31.1286uint16_t mask = isDQFormInstruction(readFromHalf16(loc)) ? 0xf : 0x3;1287checkAlignment(loc, lo(val), mask + 1, rel);1288write16(loc, (read16(loc) & mask) | lo(val));1289} break;1290case R_PPC64_ADDR16_HA:1291case R_PPC64_REL16_HA:1292case R_PPC64_TPREL16_HA:1293if (config->tocOptimize && shouldTocOptimize && ha(val) == 0)1294writeFromHalf16(loc, NOP);1295else {1296checkInt(loc, val + 0x8000, 32, rel);1297write16(loc, ha(val));1298}1299break;1300case R_PPC64_ADDR16_HI:1301case R_PPC64_REL16_HI:1302case R_PPC64_TPREL16_HI:1303checkInt(loc, val, 32, rel);1304write16(loc, hi(val));1305break;1306case R_PPC64_ADDR16_HIGH:1307write16(loc, hi(val));1308break;1309case R_PPC64_ADDR16_HIGHER:1310case R_PPC64_TPREL16_HIGHER:1311write16(loc, higher(val));1312break;1313case R_PPC64_ADDR16_HIGHERA:1314case R_PPC64_TPREL16_HIGHERA:1315write16(loc, highera(val));1316break;1317case R_PPC64_ADDR16_HIGHEST:1318case R_PPC64_TPREL16_HIGHEST:1319write16(loc, highest(val));1320break;1321case R_PPC64_ADDR16_HIGHESTA:1322case R_PPC64_TPREL16_HIGHESTA:1323write16(loc, highesta(val));1324break;1325case R_PPC64_ADDR16_LO:1326case R_PPC64_REL16_LO:1327case R_PPC64_TPREL16_LO:1328// When the high-adjusted part of a toc relocation evaluates to 0, it is1329// changed into a nop. The lo part then needs to be updated to use the1330// toc-pointer register r2, as the base register.1331if (config->tocOptimize && shouldTocOptimize && ha(val) == 0) {1332uint32_t insn = readFromHalf16(loc);1333if (isInstructionUpdateForm(insn))1334error(getErrorLocation(loc) +1335"can't toc-optimize an update instruction: 0x" +1336utohexstr(insn));1337writeFromHalf16(loc, (insn & 0xffe00000) | 0x00020000 | lo(val));1338} else {1339write16(loc, lo(val));1340}1341break;1342case R_PPC64_ADDR16_LO_DS:1343case R_PPC64_TPREL16_LO_DS: {1344// DQ-form instructions use bits 28-31 as part of the instruction encoding1345// DS-form instructions only use bits 30-31.1346uint32_t insn = readFromHalf16(loc);1347uint16_t mask = isDQFormInstruction(insn) ? 0xf : 0x3;1348checkAlignment(loc, lo(val), mask + 1, rel);1349if (config->tocOptimize && shouldTocOptimize && ha(val) == 0) {1350// When the high-adjusted part of a toc relocation evaluates to 0, it is1351// changed into a nop. The lo part then needs to be updated to use the toc1352// pointer register r2, as the base register.1353if (isInstructionUpdateForm(insn))1354error(getErrorLocation(loc) +1355"Can't toc-optimize an update instruction: 0x" +1356Twine::utohexstr(insn));1357insn &= 0xffe00000 | mask;1358writeFromHalf16(loc, insn | 0x00020000 | lo(val));1359} else {1360write16(loc, (read16(loc) & mask) | lo(val));1361}1362} break;1363case R_PPC64_TPREL16:1364checkInt(loc, val, 16, rel);1365write16(loc, val);1366break;1367case R_PPC64_REL32:1368checkInt(loc, val, 32, rel);1369write32(loc, val);1370break;1371case R_PPC64_ADDR64:1372case R_PPC64_REL64:1373case R_PPC64_TOC:1374write64(loc, val);1375break;1376case R_PPC64_REL14: {1377uint32_t mask = 0x0000FFFC;1378checkInt(loc, val, 16, rel);1379checkAlignment(loc, val, 4, rel);1380write32(loc, (read32(loc) & ~mask) | (val & mask));1381break;1382}1383case R_PPC64_REL24:1384case R_PPC64_REL24_NOTOC: {1385uint32_t mask = 0x03FFFFFC;1386checkInt(loc, val, 26, rel);1387checkAlignment(loc, val, 4, rel);1388write32(loc, (read32(loc) & ~mask) | (val & mask));1389break;1390}1391case R_PPC64_DTPREL64:1392write64(loc, val - dynamicThreadPointerOffset);1393break;1394case R_PPC64_DTPREL34:1395// The Dynamic Thread Vector actually points 0x8000 bytes past the start1396// of the TLS block. Therefore, in the case of R_PPC64_DTPREL34 we first1397// need to subtract that value then fallthrough to the general case.1398val -= dynamicThreadPointerOffset;1399[[fallthrough]];1400case R_PPC64_PCREL34:1401case R_PPC64_GOT_PCREL34:1402case R_PPC64_GOT_TLSGD_PCREL34:1403case R_PPC64_GOT_TLSLD_PCREL34:1404case R_PPC64_GOT_TPREL_PCREL34:1405case R_PPC64_TPREL34: {1406const uint64_t si0Mask = 0x00000003ffff0000;1407const uint64_t si1Mask = 0x000000000000ffff;1408const uint64_t fullMask = 0x0003ffff0000ffff;1409checkInt(loc, val, 34, rel);14101411uint64_t instr = readPrefixedInstruction(loc) & ~fullMask;1412writePrefixedInstruction(loc, instr | ((val & si0Mask) << 16) |1413(val & si1Mask));1414break;1415}1416// If we encounter a PCREL_OPT relocation that we won't optimize.1417case R_PPC64_PCREL_OPT:1418break;1419default:1420llvm_unreachable("unknown relocation");1421}1422}14231424bool PPC64::needsThunk(RelExpr expr, RelType type, const InputFile *file,1425uint64_t branchAddr, const Symbol &s, int64_t a) const {1426if (type != R_PPC64_REL14 && type != R_PPC64_REL24 &&1427type != R_PPC64_REL24_NOTOC)1428return false;14291430// If a function is in the Plt it needs to be called with a call-stub.1431if (s.isInPlt())1432return true;14331434// This check looks at the st_other bits of the callee with relocation1435// R_PPC64_REL14 or R_PPC64_REL24. If the value is 1, then the callee1436// clobbers the TOC and we need an R2 save stub.1437if (type != R_PPC64_REL24_NOTOC && (s.stOther >> 5) == 1)1438return true;14391440if (type == R_PPC64_REL24_NOTOC && (s.stOther >> 5) > 1)1441return true;14421443// An undefined weak symbol not in a PLT does not need a thunk. If it is1444// hidden, its binding has been converted to local, so we just check1445// isUndefined() here. A undefined non-weak symbol has been errored.1446if (s.isUndefined())1447return false;14481449// If the offset exceeds the range of the branch type then it will need1450// a range-extending thunk.1451// See the comment in getRelocTargetVA() about R_PPC64_CALL.1452return !inBranchRange(type, branchAddr,1453s.getVA(a) +1454getPPC64GlobalEntryToLocalEntryOffset(s.stOther));1455}14561457uint32_t PPC64::getThunkSectionSpacing() const {1458// See comment in Arch/ARM.cpp for a more detailed explanation of1459// getThunkSectionSpacing(). For PPC64 we pick the constant here based on1460// R_PPC64_REL24, which is used by unconditional branch instructions.1461// 0x2000000 = (1 << 24-1) * 41462return 0x2000000;1463}14641465bool PPC64::inBranchRange(RelType type, uint64_t src, uint64_t dst) const {1466int64_t offset = dst - src;1467if (type == R_PPC64_REL14)1468return isInt<16>(offset);1469if (type == R_PPC64_REL24 || type == R_PPC64_REL24_NOTOC)1470return isInt<26>(offset);1471llvm_unreachable("unsupported relocation type used in branch");1472}14731474RelExpr PPC64::adjustTlsExpr(RelType type, RelExpr expr) const {1475if (type != R_PPC64_GOT_TLSGD_PCREL34 && expr == R_RELAX_TLS_GD_TO_IE)1476return R_RELAX_TLS_GD_TO_IE_GOT_OFF;1477if (expr == R_RELAX_TLS_LD_TO_LE)1478return R_RELAX_TLS_LD_TO_LE_ABS;1479return expr;1480}14811482RelExpr PPC64::adjustGotPcExpr(RelType type, int64_t addend,1483const uint8_t *loc) const {1484if ((type == R_PPC64_GOT_PCREL34 || type == R_PPC64_PCREL_OPT) &&1485config->pcRelOptimize) {1486// It only makes sense to optimize pld since paddi means that the address1487// of the object in the GOT is required rather than the object itself.1488if ((readPrefixedInstruction(loc) & 0xfc000000) == 0xe4000000)1489return R_PPC64_RELAX_GOT_PC;1490}1491return R_GOT_PC;1492}14931494// Reference: 3.7.4.1 of the 64-bit ELF V2 abi supplement.1495// The general dynamic code sequence for a global `x` uses 4 instructions.1496// Instruction Relocation Symbol1497// addis r3, r2, x@got@tlsgd@ha R_PPC64_GOT_TLSGD16_HA x1498// addi r3, r3, x@got@tlsgd@l R_PPC64_GOT_TLSGD16_LO x1499// bl __tls_get_addr(x@tlsgd) R_PPC64_TLSGD x1500// R_PPC64_REL24 __tls_get_addr1501// nop None None1502//1503// Relaxing to initial-exec entails:1504// 1) Convert the addis/addi pair that builds the address of the tls_index1505// struct for 'x' to an addis/ld pair that loads an offset from a got-entry.1506// 2) Convert the call to __tls_get_addr to a nop.1507// 3) Convert the nop following the call to an add of the loaded offset to the1508// thread pointer.1509// Since the nop must directly follow the call, the R_PPC64_TLSGD relocation is1510// used as the relaxation hint for both steps 2 and 3.1511void PPC64::relaxTlsGdToIe(uint8_t *loc, const Relocation &rel,1512uint64_t val) const {1513switch (rel.type) {1514case R_PPC64_GOT_TLSGD16_HA:1515// This is relaxed from addis rT, r2, sym@got@tlsgd@ha to1516// addis rT, r2, sym@got@tprel@ha.1517relocateNoSym(loc, R_PPC64_GOT_TPREL16_HA, val);1518return;1519case R_PPC64_GOT_TLSGD16:1520case R_PPC64_GOT_TLSGD16_LO: {1521// Relax from addi r3, rA, sym@got@tlsgd@l to1522// ld r3, sym@got@tprel@l(rA)1523uint32_t ra = (readFromHalf16(loc) & (0x1f << 16));1524writeFromHalf16(loc, 0xe8600000 | ra);1525relocateNoSym(loc, R_PPC64_GOT_TPREL16_LO_DS, val);1526return;1527}1528case R_PPC64_GOT_TLSGD_PCREL34: {1529// Relax from paddi r3, 0, sym@got@tlsgd@pcrel, 1 to1530// pld r3, sym@got@tprel@pcrel1531writePrefixedInstruction(loc, 0x04100000e4600000);1532relocateNoSym(loc, R_PPC64_GOT_TPREL_PCREL34, val);1533return;1534}1535case R_PPC64_TLSGD: {1536// PC Relative Relaxation:1537// Relax from bl __tls_get_addr@notoc(x@tlsgd) to1538// nop1539// TOC Relaxation:1540// Relax from bl __tls_get_addr(x@tlsgd)1541// nop1542// to1543// nop1544// add r3, r3, r131545const uintptr_t locAsInt = reinterpret_cast<uintptr_t>(loc);1546if (locAsInt % 4 == 0) {1547write32(loc, NOP); // bl __tls_get_addr(sym@tlsgd) --> nop1548write32(loc + 4, 0x7c636A14); // nop --> add r3, r3, r131549} else if (locAsInt % 4 == 1) {1550// bl __tls_get_addr(sym@tlsgd) --> add r3, r3, r131551write32(loc - 1, 0x7c636a14);1552} else {1553errorOrWarn("R_PPC64_TLSGD has unexpected byte alignment");1554}1555return;1556}1557default:1558llvm_unreachable("unsupported relocation for TLS GD to IE relaxation");1559}1560}15611562void PPC64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {1563uint64_t secAddr = sec.getOutputSection()->addr;1564if (auto *s = dyn_cast<InputSection>(&sec))1565secAddr += s->outSecOff;1566else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))1567secAddr += ehIn->getParent()->outSecOff;1568uint64_t lastPPCRelaxedRelocOff = -1;1569for (const Relocation &rel : sec.relocs()) {1570uint8_t *loc = buf + rel.offset;1571const uint64_t val =1572sec.getRelocTargetVA(sec.file, rel.type, rel.addend,1573secAddr + rel.offset, *rel.sym, rel.expr);1574switch (rel.expr) {1575case R_PPC64_RELAX_GOT_PC: {1576// The R_PPC64_PCREL_OPT relocation must appear immediately after1577// R_PPC64_GOT_PCREL34 in the relocations table at the same offset.1578// We can only relax R_PPC64_PCREL_OPT if we have also relaxed1579// the associated R_PPC64_GOT_PCREL34 since only the latter has an1580// associated symbol. So save the offset when relaxing R_PPC64_GOT_PCREL341581// and only relax the other if the saved offset matches.1582if (rel.type == R_PPC64_GOT_PCREL34)1583lastPPCRelaxedRelocOff = rel.offset;1584if (rel.type == R_PPC64_PCREL_OPT && rel.offset != lastPPCRelaxedRelocOff)1585break;1586relaxGot(loc, rel, val);1587break;1588}1589case R_PPC64_RELAX_TOC:1590// rel.sym refers to the STT_SECTION symbol associated to the .toc input1591// section. If an R_PPC64_TOC16_LO (.toc + addend) references the TOC1592// entry, there may be R_PPC64_TOC16_HA not paired with1593// R_PPC64_TOC16_LO_DS. Don't relax. This loses some relaxation1594// opportunities but is safe.1595if (ppc64noTocRelax.count({rel.sym, rel.addend}) ||1596!tryRelaxPPC64TocIndirection(rel, loc))1597relocate(loc, rel, val);1598break;1599case R_PPC64_CALL:1600// If this is a call to __tls_get_addr, it may be part of a TLS1601// sequence that has been relaxed and turned into a nop. In this1602// case, we don't want to handle it as a call.1603if (read32(loc) == 0x60000000) // nop1604break;16051606// Patch a nop (0x60000000) to a ld.1607if (rel.sym->needsTocRestore()) {1608// gcc/gfortran 5.4, 6.3 and earlier versions do not add nop for1609// recursive calls even if the function is preemptible. This is not1610// wrong in the common case where the function is not preempted at1611// runtime. Just ignore.1612if ((rel.offset + 8 > sec.content().size() ||1613read32(loc + 4) != 0x60000000) &&1614rel.sym->file != sec.file) {1615// Use substr(6) to remove the "__plt_" prefix.1616errorOrWarn(getErrorLocation(loc) + "call to " +1617lld::toString(*rel.sym).substr(6) +1618" lacks nop, can't restore toc");1619break;1620}1621write32(loc + 4, 0xe8410018); // ld %r2, 24(%r1)1622}1623relocate(loc, rel, val);1624break;1625case R_RELAX_TLS_GD_TO_IE:1626case R_RELAX_TLS_GD_TO_IE_GOT_OFF:1627relaxTlsGdToIe(loc, rel, val);1628break;1629case R_RELAX_TLS_GD_TO_LE:1630relaxTlsGdToLe(loc, rel, val);1631break;1632case R_RELAX_TLS_LD_TO_LE_ABS:1633relaxTlsLdToLe(loc, rel, val);1634break;1635case R_RELAX_TLS_IE_TO_LE:1636relaxTlsIeToLe(loc, rel, val);1637break;1638default:1639relocate(loc, rel, val);1640break;1641}1642}1643}16441645// The prologue for a split-stack function is expected to look roughly1646// like this:1647// .Lglobal_entry_point:1648// # TOC pointer initialization.1649// ...1650// .Llocal_entry_point:1651// # load the __private_ss member of the threads tcbhead.1652// ld r0,-0x7000-64(r13)1653// # subtract the functions stack size from the stack pointer.1654// addis r12, r1, ha(-stack-frame size)1655// addi r12, r12, l(-stack-frame size)1656// # compare needed to actual and branch to allocate_more_stack if more1657// # space is needed, otherwise fallthrough to 'normal' function body.1658// cmpld cr7,r12,r01659// blt- cr7, .Lallocate_more_stack1660//1661// -) The allocate_more_stack block might be placed after the split-stack1662// prologue and the `blt-` replaced with a `bge+ .Lnormal_func_body`1663// instead.1664// -) If either the addis or addi is not needed due to the stack size being1665// smaller then 32K or a multiple of 64K they will be replaced with a nop,1666// but there will always be 2 instructions the linker can overwrite for the1667// adjusted stack size.1668//1669// The linkers job here is to increase the stack size used in the addis/addi1670// pair by split-stack-size-adjust.1671// addis r12, r1, ha(-stack-frame size - split-stack-adjust-size)1672// addi r12, r12, l(-stack-frame size - split-stack-adjust-size)1673bool PPC64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,1674uint8_t stOther) const {1675// If the caller has a global entry point adjust the buffer past it. The start1676// of the split-stack prologue will be at the local entry point.1677loc += getPPC64GlobalEntryToLocalEntryOffset(stOther);16781679// At the very least we expect to see a load of some split-stack data from the1680// tcb, and 2 instructions that calculate the ending stack address this1681// function will require. If there is not enough room for at least 31682// instructions it can't be a split-stack prologue.1683if (loc + 12 >= end)1684return false;16851686// First instruction must be `ld r0, -0x7000-64(r13)`1687if (read32(loc) != 0xe80d8fc0)1688return false;16891690int16_t hiImm = 0;1691int16_t loImm = 0;1692// First instruction can be either an addis if the frame size is larger then1693// 32K, or an addi if the size is less then 32K.1694int32_t firstInstr = read32(loc + 4);1695if (getPrimaryOpCode(firstInstr) == 15) {1696hiImm = firstInstr & 0xFFFF;1697} else if (getPrimaryOpCode(firstInstr) == 14) {1698loImm = firstInstr & 0xFFFF;1699} else {1700return false;1701}17021703// Second instruction is either an addi or a nop. If the first instruction was1704// an addi then LoImm is set and the second instruction must be a nop.1705uint32_t secondInstr = read32(loc + 8);1706if (!loImm && getPrimaryOpCode(secondInstr) == 14) {1707loImm = secondInstr & 0xFFFF;1708} else if (secondInstr != NOP) {1709return false;1710}17111712// The register operands of the first instruction should be the stack-pointer1713// (r1) as the input (RA) and r12 as the output (RT). If the second1714// instruction is not a nop, then it should use r12 as both input and output.1715auto checkRegOperands = [](uint32_t instr, uint8_t expectedRT,1716uint8_t expectedRA) {1717return ((instr & 0x3E00000) >> 21 == expectedRT) &&1718((instr & 0x1F0000) >> 16 == expectedRA);1719};1720if (!checkRegOperands(firstInstr, 12, 1))1721return false;1722if (secondInstr != NOP && !checkRegOperands(secondInstr, 12, 12))1723return false;17241725int32_t stackFrameSize = (hiImm * 65536) + loImm;1726// Check that the adjusted size doesn't overflow what we can represent with 21727// instructions.1728if (stackFrameSize < config->splitStackAdjustSize + INT32_MIN) {1729error(getErrorLocation(loc) + "split-stack prologue adjustment overflows");1730return false;1731}17321733int32_t adjustedStackFrameSize =1734stackFrameSize - config->splitStackAdjustSize;17351736loImm = adjustedStackFrameSize & 0xFFFF;1737hiImm = (adjustedStackFrameSize + 0x8000) >> 16;1738if (hiImm) {1739write32(loc + 4, 0x3D810000 | (uint16_t)hiImm);1740// If the low immediate is zero the second instruction will be a nop.1741secondInstr = loImm ? 0x398C0000 | (uint16_t)loImm : NOP;1742write32(loc + 8, secondInstr);1743} else {1744// addi r12, r1, imm1745write32(loc + 4, (0x39810000) | (uint16_t)loImm);1746write32(loc + 8, NOP);1747}17481749return true;1750}17511752TargetInfo *elf::getPPC64TargetInfo() {1753static PPC64 target;1754return ⌖1755}175617571758