Path: blob/main/contrib/llvm-project/lld/ELF/Arch/X86_64.cpp
34879 views
//===- X86_64.cpp ---------------------------------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "OutputSections.h"9#include "Relocations.h"10#include "Symbols.h"11#include "SyntheticSections.h"12#include "Target.h"13#include "lld/Common/ErrorHandler.h"14#include "llvm/BinaryFormat/ELF.h"15#include "llvm/Support/Endian.h"16#include "llvm/Support/MathExtras.h"1718using namespace llvm;19using namespace llvm::object;20using namespace llvm::support::endian;21using namespace llvm::ELF;22using namespace lld;23using namespace lld::elf;2425namespace {26class X86_64 : public TargetInfo {27public:28X86_64();29int getTlsGdRelaxSkip(RelType type) const override;30RelExpr getRelExpr(RelType type, const Symbol &s,31const uint8_t *loc) const override;32RelType getDynRel(RelType type) const override;33void writeGotPltHeader(uint8_t *buf) const override;34void writeGotPlt(uint8_t *buf, const Symbol &s) const override;35void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;36void writePltHeader(uint8_t *buf) const override;37void writePlt(uint8_t *buf, const Symbol &sym,38uint64_t pltEntryAddr) const override;39void relocate(uint8_t *loc, const Relocation &rel,40uint64_t val) const override;41int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;42void applyJumpInstrMod(uint8_t *loc, JumpModType type,43unsigned size) const override;44RelExpr adjustGotPcExpr(RelType type, int64_t addend,45const uint8_t *loc) const override;46void relocateAlloc(InputSectionBase &sec, uint8_t *buf) const override;47bool adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,48uint8_t stOther) const override;49bool deleteFallThruJmpInsn(InputSection &is, InputFile *file,50InputSection *nextIS) const override;51bool relaxOnce(int pass) const override;52};53} // namespace5455// This is vector of NOP instructions of sizes from 1 to 8 bytes. The56// appropriately sized instructions are used to fill the gaps between sections57// which are executed during fall through.58static const std::vector<std::vector<uint8_t>> nopInstructions = {59{0x90},60{0x66, 0x90},61{0x0f, 0x1f, 0x00},62{0x0f, 0x1f, 0x40, 0x00},63{0x0f, 0x1f, 0x44, 0x00, 0x00},64{0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},65{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},66{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},67{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}};6869X86_64::X86_64() {70copyRel = R_X86_64_COPY;71gotRel = R_X86_64_GLOB_DAT;72pltRel = R_X86_64_JUMP_SLOT;73relativeRel = R_X86_64_RELATIVE;74iRelativeRel = R_X86_64_IRELATIVE;75symbolicRel = R_X86_64_64;76tlsDescRel = R_X86_64_TLSDESC;77tlsGotRel = R_X86_64_TPOFF64;78tlsModuleIndexRel = R_X86_64_DTPMOD64;79tlsOffsetRel = R_X86_64_DTPOFF64;80gotBaseSymInGotPlt = true;81gotEntrySize = 8;82pltHeaderSize = 16;83pltEntrySize = 16;84ipltEntrySize = 16;85trapInstr = {0xcc, 0xcc, 0xcc, 0xcc}; // 0xcc = INT386nopInstrs = nopInstructions;8788// Align to the large page size (known as a superpage or huge page).89// FreeBSD automatically promotes large, superpage-aligned allocations.90defaultImageBase = 0x200000;91}9293int X86_64::getTlsGdRelaxSkip(RelType type) const {94// TLSDESC relocations are processed separately. See relaxTlsGdToLe below.95return type == R_X86_64_GOTPC32_TLSDESC || type == R_X86_64_TLSDESC_CALL ? 196: 2;97}9899// Opcodes for the different X86_64 jmp instructions.100enum JmpInsnOpcode : uint32_t {101J_JMP_32,102J_JNE_32,103J_JE_32,104J_JG_32,105J_JGE_32,106J_JB_32,107J_JBE_32,108J_JL_32,109J_JLE_32,110J_JA_32,111J_JAE_32,112J_UNKNOWN,113};114115// Given the first (optional) and second byte of the insn's opcode, this116// returns the corresponding enum value.117static JmpInsnOpcode getJmpInsnType(const uint8_t *first,118const uint8_t *second) {119if (*second == 0xe9)120return J_JMP_32;121122if (first == nullptr)123return J_UNKNOWN;124125if (*first == 0x0f) {126switch (*second) {127case 0x84:128return J_JE_32;129case 0x85:130return J_JNE_32;131case 0x8f:132return J_JG_32;133case 0x8d:134return J_JGE_32;135case 0x82:136return J_JB_32;137case 0x86:138return J_JBE_32;139case 0x8c:140return J_JL_32;141case 0x8e:142return J_JLE_32;143case 0x87:144return J_JA_32;145case 0x83:146return J_JAE_32;147}148}149return J_UNKNOWN;150}151152// Return the relocation index for input section IS with a specific Offset.153// Returns the maximum size of the vector if no such relocation is found.154static unsigned getRelocationWithOffset(const InputSection &is,155uint64_t offset) {156unsigned size = is.relocs().size();157for (unsigned i = size - 1; i + 1 > 0; --i) {158if (is.relocs()[i].offset == offset && is.relocs()[i].expr != R_NONE)159return i;160}161return size;162}163164// Returns true if R corresponds to a relocation used for a jump instruction.165// TODO: Once special relocations for relaxable jump instructions are available,166// this should be modified to use those relocations.167static bool isRelocationForJmpInsn(Relocation &R) {168return R.type == R_X86_64_PLT32 || R.type == R_X86_64_PC32 ||169R.type == R_X86_64_PC8;170}171172// Return true if Relocation R points to the first instruction in the173// next section.174// TODO: Delete this once psABI reserves a new relocation type for fall thru175// jumps.176static bool isFallThruRelocation(InputSection &is, InputFile *file,177InputSection *nextIS, Relocation &r) {178if (!isRelocationForJmpInsn(r))179return false;180181uint64_t addrLoc = is.getOutputSection()->addr + is.outSecOff + r.offset;182uint64_t targetOffset = InputSectionBase::getRelocTargetVA(183file, r.type, r.addend, addrLoc, *r.sym, r.expr);184185// If this jmp is a fall thru, the target offset is the beginning of the186// next section.187uint64_t nextSectionOffset =188nextIS->getOutputSection()->addr + nextIS->outSecOff;189return (addrLoc + 4 + targetOffset) == nextSectionOffset;190}191192// Return the jmp instruction opcode that is the inverse of the given193// opcode. For example, JE inverted is JNE.194static JmpInsnOpcode invertJmpOpcode(const JmpInsnOpcode opcode) {195switch (opcode) {196case J_JE_32:197return J_JNE_32;198case J_JNE_32:199return J_JE_32;200case J_JG_32:201return J_JLE_32;202case J_JGE_32:203return J_JL_32;204case J_JB_32:205return J_JAE_32;206case J_JBE_32:207return J_JA_32;208case J_JL_32:209return J_JGE_32;210case J_JLE_32:211return J_JG_32;212case J_JA_32:213return J_JBE_32;214case J_JAE_32:215return J_JB_32;216default:217return J_UNKNOWN;218}219}220221// Deletes direct jump instruction in input sections that jumps to the222// following section as it is not required. If there are two consecutive jump223// instructions, it checks if they can be flipped and one can be deleted.224// For example:225// .section .text226// a.BB.foo:227// ...228// 10: jne aa.BB.foo229// 16: jmp bar230// aa.BB.foo:231// ...232//233// can be converted to:234// a.BB.foo:235// ...236// 10: je bar #jne flipped to je and the jmp is deleted.237// aa.BB.foo:238// ...239bool X86_64::deleteFallThruJmpInsn(InputSection &is, InputFile *file,240InputSection *nextIS) const {241const unsigned sizeOfDirectJmpInsn = 5;242243if (nextIS == nullptr)244return false;245246if (is.getSize() < sizeOfDirectJmpInsn)247return false;248249// If this jmp insn can be removed, it is the last insn and the250// relocation is 4 bytes before the end.251unsigned rIndex = getRelocationWithOffset(is, is.getSize() - 4);252if (rIndex == is.relocs().size())253return false;254255Relocation &r = is.relocs()[rIndex];256257// Check if the relocation corresponds to a direct jmp.258const uint8_t *secContents = is.content().data();259// If it is not a direct jmp instruction, there is nothing to do here.260if (*(secContents + r.offset - 1) != 0xe9)261return false;262263if (isFallThruRelocation(is, file, nextIS, r)) {264// This is a fall thru and can be deleted.265r.expr = R_NONE;266r.offset = 0;267is.drop_back(sizeOfDirectJmpInsn);268is.nopFiller = true;269return true;270}271272// Now, check if flip and delete is possible.273const unsigned sizeOfJmpCCInsn = 6;274// To flip, there must be at least one JmpCC and one direct jmp.275if (is.getSize() < sizeOfDirectJmpInsn + sizeOfJmpCCInsn)276return false;277278unsigned rbIndex =279getRelocationWithOffset(is, (is.getSize() - sizeOfDirectJmpInsn - 4));280if (rbIndex == is.relocs().size())281return false;282283Relocation &rB = is.relocs()[rbIndex];284285const uint8_t *jmpInsnB = secContents + rB.offset - 1;286JmpInsnOpcode jmpOpcodeB = getJmpInsnType(jmpInsnB - 1, jmpInsnB);287if (jmpOpcodeB == J_UNKNOWN)288return false;289290if (!isFallThruRelocation(is, file, nextIS, rB))291return false;292293// jmpCC jumps to the fall thru block, the branch can be flipped and the294// jmp can be deleted.295JmpInsnOpcode jInvert = invertJmpOpcode(jmpOpcodeB);296if (jInvert == J_UNKNOWN)297return false;298is.jumpInstrMod = make<JumpInstrMod>();299*is.jumpInstrMod = {rB.offset - 1, jInvert, 4};300// Move R's values to rB except the offset.301rB = {r.expr, r.type, rB.offset, r.addend, r.sym};302// Cancel R303r.expr = R_NONE;304r.offset = 0;305is.drop_back(sizeOfDirectJmpInsn);306is.nopFiller = true;307return true;308}309310bool X86_64::relaxOnce(int pass) const {311uint64_t minVA = UINT64_MAX, maxVA = 0;312for (OutputSection *osec : outputSections) {313minVA = std::min(minVA, osec->addr);314maxVA = std::max(maxVA, osec->addr + osec->size);315}316// If the max VA is under 2^31, GOTPCRELX relocations cannot overfow. In317// -pie/-shared, the condition can be relaxed to test the max VA difference as318// there is no R_RELAX_GOT_PC_NOPIC.319if (isUInt<31>(maxVA) || (isUInt<31>(maxVA - minVA) && config->isPic))320return false;321322SmallVector<InputSection *, 0> storage;323bool changed = false;324for (OutputSection *osec : outputSections) {325if (!(osec->flags & SHF_EXECINSTR))326continue;327for (InputSection *sec : getInputSections(*osec, storage)) {328for (Relocation &rel : sec->relocs()) {329if (rel.expr != R_RELAX_GOT_PC && rel.expr != R_RELAX_GOT_PC_NOPIC)330continue;331assert(rel.addend == -4);332333uint64_t v = sec->getRelocTargetVA(334sec->file, rel.type, rel.expr == R_RELAX_GOT_PC_NOPIC ? 0 : -4,335sec->getOutputSection()->addr + sec->outSecOff + rel.offset,336*rel.sym, rel.expr);337if (isInt<32>(v))338continue;339if (rel.sym->auxIdx == 0) {340rel.sym->allocateAux();341addGotEntry(*rel.sym);342changed = true;343}344rel.expr = R_GOT_PC;345}346}347}348return changed;349}350351RelExpr X86_64::getRelExpr(RelType type, const Symbol &s,352const uint8_t *loc) const {353switch (type) {354case R_X86_64_8:355case R_X86_64_16:356case R_X86_64_32:357case R_X86_64_32S:358case R_X86_64_64:359return R_ABS;360case R_X86_64_DTPOFF32:361case R_X86_64_DTPOFF64:362return R_DTPREL;363case R_X86_64_TPOFF32:364case R_X86_64_TPOFF64:365return R_TPREL;366case R_X86_64_TLSDESC_CALL:367return R_TLSDESC_CALL;368case R_X86_64_TLSLD:369return R_TLSLD_PC;370case R_X86_64_TLSGD:371return R_TLSGD_PC;372case R_X86_64_SIZE32:373case R_X86_64_SIZE64:374return R_SIZE;375case R_X86_64_PLT32:376return R_PLT_PC;377case R_X86_64_PC8:378case R_X86_64_PC16:379case R_X86_64_PC32:380case R_X86_64_PC64:381return R_PC;382case R_X86_64_GOT32:383case R_X86_64_GOT64:384return R_GOTPLT;385case R_X86_64_GOTPC32_TLSDESC:386return R_TLSDESC_PC;387case R_X86_64_GOTPCREL:388case R_X86_64_GOTPCRELX:389case R_X86_64_REX_GOTPCRELX:390case R_X86_64_GOTTPOFF:391return R_GOT_PC;392case R_X86_64_GOTOFF64:393return R_GOTPLTREL;394case R_X86_64_PLTOFF64:395return R_PLT_GOTPLT;396case R_X86_64_GOTPC32:397case R_X86_64_GOTPC64:398return R_GOTPLTONLY_PC;399case R_X86_64_NONE:400return R_NONE;401default:402error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +403") against symbol " + toString(s));404return R_NONE;405}406}407408void X86_64::writeGotPltHeader(uint8_t *buf) const {409// The first entry holds the link-time address of _DYNAMIC. It is documented410// in the psABI and glibc before Aug 2021 used the entry to compute run-time411// load address of the shared object (note that this is relevant for linking412// ld.so, not any other program).413write64le(buf, mainPart->dynamic->getVA());414}415416void X86_64::writeGotPlt(uint8_t *buf, const Symbol &s) const {417// See comments in X86::writeGotPlt.418write64le(buf, s.getPltVA() + 6);419}420421void X86_64::writeIgotPlt(uint8_t *buf, const Symbol &s) const {422// An x86 entry is the address of the ifunc resolver function (for -z rel).423if (config->writeAddends)424write64le(buf, s.getVA());425}426427void X86_64::writePltHeader(uint8_t *buf) const {428const uint8_t pltData[] = {4290xff, 0x35, 0, 0, 0, 0, // pushq GOTPLT+8(%rip)4300xff, 0x25, 0, 0, 0, 0, // jmp *GOTPLT+16(%rip)4310x0f, 0x1f, 0x40, 0x00, // nop432};433memcpy(buf, pltData, sizeof(pltData));434uint64_t gotPlt = in.gotPlt->getVA();435uint64_t plt = in.ibtPlt ? in.ibtPlt->getVA() : in.plt->getVA();436write32le(buf + 2, gotPlt - plt + 2); // GOTPLT+8437write32le(buf + 8, gotPlt - plt + 4); // GOTPLT+16438}439440void X86_64::writePlt(uint8_t *buf, const Symbol &sym,441uint64_t pltEntryAddr) const {442const uint8_t inst[] = {4430xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip)4440x68, 0, 0, 0, 0, // pushq <relocation index>4450xe9, 0, 0, 0, 0, // jmpq plt[0]446};447memcpy(buf, inst, sizeof(inst));448449write32le(buf + 2, sym.getGotPltVA() - pltEntryAddr - 6);450write32le(buf + 7, sym.getPltIdx());451write32le(buf + 12, in.plt->getVA() - pltEntryAddr - 16);452}453454RelType X86_64::getDynRel(RelType type) const {455if (type == R_X86_64_64 || type == R_X86_64_PC64 || type == R_X86_64_SIZE32 ||456type == R_X86_64_SIZE64)457return type;458return R_X86_64_NONE;459}460461static void relaxTlsGdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {462if (rel.type == R_X86_64_TLSGD) {463// Convert464// .byte 0x66465// leaq x@tlsgd(%rip), %rdi466// .word 0x6666467// rex64468// call __tls_get_addr@plt469// to the following two instructions.470const uint8_t inst[] = {4710x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00,4720x00, 0x00, // mov %fs:0x0,%rax4730x48, 0x8d, 0x80, 0, 0, 0, 0, // lea x@tpoff,%rax474};475memcpy(loc - 4, inst, sizeof(inst));476477// The original code used a pc relative relocation and so we have to478// compensate for the -4 in had in the addend.479write32le(loc + 8, val + 4);480} else if (rel.type == R_X86_64_GOTPC32_TLSDESC) {481// Convert leaq x@tlsdesc(%rip), %REG to movq $x@tpoff, %REG.482if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d ||483(loc[-1] & 0xc7) != 0x05) {484errorOrWarn(getErrorLocation(loc - 3) +485"R_X86_64_GOTPC32_TLSDESC must be used "486"in leaq x@tlsdesc(%rip), %REG");487return;488}489loc[-3] = 0x48 | ((loc[-3] >> 2) & 1);490loc[-2] = 0xc7;491loc[-1] = 0xc0 | ((loc[-1] >> 3) & 7);492write32le(loc, val + 4);493} else {494// Convert call *x@tlsdesc(%REG) to xchg ax, ax.495assert(rel.type == R_X86_64_TLSDESC_CALL);496loc[0] = 0x66;497loc[1] = 0x90;498}499}500501static void relaxTlsGdToIe(uint8_t *loc, const Relocation &rel, uint64_t val) {502if (rel.type == R_X86_64_TLSGD) {503// Convert504// .byte 0x66505// leaq x@tlsgd(%rip), %rdi506// .word 0x6666507// rex64508// call __tls_get_addr@plt509// to the following two instructions.510const uint8_t inst[] = {5110x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00,5120x00, 0x00, // mov %fs:0x0,%rax5130x48, 0x03, 0x05, 0, 0, 0, 0, // addq x@gottpoff(%rip),%rax514};515memcpy(loc - 4, inst, sizeof(inst));516517// Both code sequences are PC relatives, but since we are moving the518// constant forward by 8 bytes we have to subtract the value by 8.519write32le(loc + 8, val - 8);520} else if (rel.type == R_X86_64_GOTPC32_TLSDESC) {521// Convert leaq x@tlsdesc(%rip), %REG to movq x@gottpoff(%rip), %REG.522assert(rel.type == R_X86_64_GOTPC32_TLSDESC);523if ((loc[-3] & 0xfb) != 0x48 || loc[-2] != 0x8d ||524(loc[-1] & 0xc7) != 0x05) {525errorOrWarn(getErrorLocation(loc - 3) +526"R_X86_64_GOTPC32_TLSDESC must be used "527"in leaq x@tlsdesc(%rip), %REG");528return;529}530loc[-2] = 0x8b;531write32le(loc, val);532} else {533// Convert call *x@tlsdesc(%rax) to xchg ax, ax.534assert(rel.type == R_X86_64_TLSDESC_CALL);535loc[0] = 0x66;536loc[1] = 0x90;537}538}539540// In some conditions, R_X86_64_GOTTPOFF relocation can be optimized to541// R_X86_64_TPOFF32 so that it does not use GOT.542static void relaxTlsIeToLe(uint8_t *loc, const Relocation &, uint64_t val) {543uint8_t *inst = loc - 3;544uint8_t reg = loc[-1] >> 3;545uint8_t *regSlot = loc - 1;546547// Note that ADD with RSP or R12 is converted to ADD instead of LEA548// because LEA with these registers needs 4 bytes to encode and thus549// wouldn't fit the space.550551if (memcmp(inst, "\x48\x03\x25", 3) == 0) {552// "addq foo@gottpoff(%rip),%rsp" -> "addq $foo,%rsp"553memcpy(inst, "\x48\x81\xc4", 3);554} else if (memcmp(inst, "\x4c\x03\x25", 3) == 0) {555// "addq foo@gottpoff(%rip),%r12" -> "addq $foo,%r12"556memcpy(inst, "\x49\x81\xc4", 3);557} else if (memcmp(inst, "\x4c\x03", 2) == 0) {558// "addq foo@gottpoff(%rip),%r[8-15]" -> "leaq foo(%r[8-15]),%r[8-15]"559memcpy(inst, "\x4d\x8d", 2);560*regSlot = 0x80 | (reg << 3) | reg;561} else if (memcmp(inst, "\x48\x03", 2) == 0) {562// "addq foo@gottpoff(%rip),%reg -> "leaq foo(%reg),%reg"563memcpy(inst, "\x48\x8d", 2);564*regSlot = 0x80 | (reg << 3) | reg;565} else if (memcmp(inst, "\x4c\x8b", 2) == 0) {566// "movq foo@gottpoff(%rip),%r[8-15]" -> "movq $foo,%r[8-15]"567memcpy(inst, "\x49\xc7", 2);568*regSlot = 0xc0 | reg;569} else if (memcmp(inst, "\x48\x8b", 2) == 0) {570// "movq foo@gottpoff(%rip),%reg" -> "movq $foo,%reg"571memcpy(inst, "\x48\xc7", 2);572*regSlot = 0xc0 | reg;573} else {574error(getErrorLocation(loc - 3) +575"R_X86_64_GOTTPOFF must be used in MOVQ or ADDQ instructions only");576}577578// The original code used a PC relative relocation.579// Need to compensate for the -4 it had in the addend.580write32le(loc, val + 4);581}582583static void relaxTlsLdToLe(uint8_t *loc, const Relocation &rel, uint64_t val) {584const uint8_t inst[] = {5850x66, 0x66, // .word 0x66665860x66, // .byte 0x665870x64, 0x48, 0x8b, 0x04, 0x25, 0x00, 0x00, 0x00, 0x00, // mov %fs:0,%rax588};589590if (loc[4] == 0xe8) {591// Convert592// leaq bar@tlsld(%rip), %rdi # 48 8d 3d <Loc>593// callq __tls_get_addr@PLT # e8 <disp32>594// leaq bar@dtpoff(%rax), %rcx595// to596// .word 0x6666597// .byte 0x66598// mov %fs:0,%rax599// leaq bar@tpoff(%rax), %rcx600memcpy(loc - 3, inst, sizeof(inst));601return;602}603604if (loc[4] == 0xff && loc[5] == 0x15) {605// Convert606// leaq x@tlsld(%rip),%rdi # 48 8d 3d <Loc>607// call *__tls_get_addr@GOTPCREL(%rip) # ff 15 <disp32>608// to609// .long 0x66666666610// movq %fs:0,%rax611// See "Table 11.9: LD -> LE Code Transition (LP64)" in612// https://raw.githubusercontent.com/wiki/hjl-tools/x86-psABI/x86-64-psABI-1.0.pdf613loc[-3] = 0x66;614memcpy(loc - 2, inst, sizeof(inst));615return;616}617618error(getErrorLocation(loc - 3) +619"expected R_X86_64_PLT32 or R_X86_64_GOTPCRELX after R_X86_64_TLSLD");620}621622// A JumpInstrMod at a specific offset indicates that the jump instruction623// opcode at that offset must be modified. This is specifically used to relax624// jump instructions with basic block sections. This function looks at the625// JumpMod and effects the change.626void X86_64::applyJumpInstrMod(uint8_t *loc, JumpModType type,627unsigned size) const {628switch (type) {629case J_JMP_32:630if (size == 4)631*loc = 0xe9;632else633*loc = 0xeb;634break;635case J_JE_32:636if (size == 4) {637loc[-1] = 0x0f;638*loc = 0x84;639} else640*loc = 0x74;641break;642case J_JNE_32:643if (size == 4) {644loc[-1] = 0x0f;645*loc = 0x85;646} else647*loc = 0x75;648break;649case J_JG_32:650if (size == 4) {651loc[-1] = 0x0f;652*loc = 0x8f;653} else654*loc = 0x7f;655break;656case J_JGE_32:657if (size == 4) {658loc[-1] = 0x0f;659*loc = 0x8d;660} else661*loc = 0x7d;662break;663case J_JB_32:664if (size == 4) {665loc[-1] = 0x0f;666*loc = 0x82;667} else668*loc = 0x72;669break;670case J_JBE_32:671if (size == 4) {672loc[-1] = 0x0f;673*loc = 0x86;674} else675*loc = 0x76;676break;677case J_JL_32:678if (size == 4) {679loc[-1] = 0x0f;680*loc = 0x8c;681} else682*loc = 0x7c;683break;684case J_JLE_32:685if (size == 4) {686loc[-1] = 0x0f;687*loc = 0x8e;688} else689*loc = 0x7e;690break;691case J_JA_32:692if (size == 4) {693loc[-1] = 0x0f;694*loc = 0x87;695} else696*loc = 0x77;697break;698case J_JAE_32:699if (size == 4) {700loc[-1] = 0x0f;701*loc = 0x83;702} else703*loc = 0x73;704break;705case J_UNKNOWN:706llvm_unreachable("Unknown Jump Relocation");707}708}709710int64_t X86_64::getImplicitAddend(const uint8_t *buf, RelType type) const {711switch (type) {712case R_X86_64_8:713case R_X86_64_PC8:714return SignExtend64<8>(*buf);715case R_X86_64_16:716case R_X86_64_PC16:717return SignExtend64<16>(read16le(buf));718case R_X86_64_32:719case R_X86_64_32S:720case R_X86_64_TPOFF32:721case R_X86_64_GOT32:722case R_X86_64_GOTPC32:723case R_X86_64_GOTPC32_TLSDESC:724case R_X86_64_GOTPCREL:725case R_X86_64_GOTPCRELX:726case R_X86_64_REX_GOTPCRELX:727case R_X86_64_PC32:728case R_X86_64_GOTTPOFF:729case R_X86_64_PLT32:730case R_X86_64_TLSGD:731case R_X86_64_TLSLD:732case R_X86_64_DTPOFF32:733case R_X86_64_SIZE32:734return SignExtend64<32>(read32le(buf));735case R_X86_64_64:736case R_X86_64_TPOFF64:737case R_X86_64_DTPOFF64:738case R_X86_64_DTPMOD64:739case R_X86_64_PC64:740case R_X86_64_SIZE64:741case R_X86_64_GLOB_DAT:742case R_X86_64_GOT64:743case R_X86_64_GOTOFF64:744case R_X86_64_GOTPC64:745case R_X86_64_PLTOFF64:746case R_X86_64_IRELATIVE:747case R_X86_64_RELATIVE:748return read64le(buf);749case R_X86_64_TLSDESC:750return read64le(buf + 8);751case R_X86_64_JUMP_SLOT:752case R_X86_64_NONE:753// These relocations are defined as not having an implicit addend.754return 0;755default:756internalLinkerError(getErrorLocation(buf),757"cannot read addend for relocation " + toString(type));758return 0;759}760}761762static void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val);763764void X86_64::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {765switch (rel.type) {766case R_X86_64_8:767checkIntUInt(loc, val, 8, rel);768*loc = val;769break;770case R_X86_64_PC8:771checkInt(loc, val, 8, rel);772*loc = val;773break;774case R_X86_64_16:775checkIntUInt(loc, val, 16, rel);776write16le(loc, val);777break;778case R_X86_64_PC16:779checkInt(loc, val, 16, rel);780write16le(loc, val);781break;782case R_X86_64_32:783checkUInt(loc, val, 32, rel);784write32le(loc, val);785break;786case R_X86_64_32S:787case R_X86_64_GOT32:788case R_X86_64_GOTPC32:789case R_X86_64_GOTPCREL:790case R_X86_64_PC32:791case R_X86_64_PLT32:792case R_X86_64_DTPOFF32:793case R_X86_64_SIZE32:794checkInt(loc, val, 32, rel);795write32le(loc, val);796break;797case R_X86_64_64:798case R_X86_64_TPOFF64:799case R_X86_64_DTPOFF64:800case R_X86_64_PC64:801case R_X86_64_SIZE64:802case R_X86_64_GOT64:803case R_X86_64_GOTOFF64:804case R_X86_64_GOTPC64:805case R_X86_64_PLTOFF64:806write64le(loc, val);807break;808case R_X86_64_GOTPCRELX:809case R_X86_64_REX_GOTPCRELX:810if (rel.expr != R_GOT_PC) {811relaxGot(loc, rel, val);812} else {813checkInt(loc, val, 32, rel);814write32le(loc, val);815}816break;817case R_X86_64_GOTPC32_TLSDESC:818case R_X86_64_TLSDESC_CALL:819case R_X86_64_TLSGD:820if (rel.expr == R_RELAX_TLS_GD_TO_LE) {821relaxTlsGdToLe(loc, rel, val);822} else if (rel.expr == R_RELAX_TLS_GD_TO_IE) {823relaxTlsGdToIe(loc, rel, val);824} else {825checkInt(loc, val, 32, rel);826write32le(loc, val);827}828break;829case R_X86_64_TLSLD:830if (rel.expr == R_RELAX_TLS_LD_TO_LE) {831relaxTlsLdToLe(loc, rel, val);832} else {833checkInt(loc, val, 32, rel);834write32le(loc, val);835}836break;837case R_X86_64_GOTTPOFF:838if (rel.expr == R_RELAX_TLS_IE_TO_LE) {839relaxTlsIeToLe(loc, rel, val);840} else {841checkInt(loc, val, 32, rel);842write32le(loc, val);843}844break;845case R_X86_64_TPOFF32:846checkInt(loc, val, 32, rel);847write32le(loc, val);848break;849850case R_X86_64_TLSDESC:851// The addend is stored in the second 64-bit word.852write64le(loc + 8, val);853break;854default:855llvm_unreachable("unknown relocation");856}857}858859RelExpr X86_64::adjustGotPcExpr(RelType type, int64_t addend,860const uint8_t *loc) const {861// Only R_X86_64_[REX_]GOTPCRELX can be relaxed. GNU as may emit GOTPCRELX862// with addend != -4. Such an instruction does not load the full GOT entry, so863// we cannot relax the relocation. E.g. movl x@GOTPCREL+4(%rip), %rax864// (addend=0) loads the high 32 bits of the GOT entry.865if (!config->relax || addend != -4 ||866(type != R_X86_64_GOTPCRELX && type != R_X86_64_REX_GOTPCRELX))867return R_GOT_PC;868const uint8_t op = loc[-2];869const uint8_t modRm = loc[-1];870871// FIXME: When PIC is disabled and foo is defined locally in the872// lower 32 bit address space, memory operand in mov can be converted into873// immediate operand. Otherwise, mov must be changed to lea. We support only874// latter relaxation at this moment.875if (op == 0x8b)876return R_RELAX_GOT_PC;877878// Relax call and jmp.879if (op == 0xff && (modRm == 0x15 || modRm == 0x25))880return R_RELAX_GOT_PC;881882// We don't support test/binop instructions without a REX prefix.883if (type == R_X86_64_GOTPCRELX)884return R_GOT_PC;885886// Relaxation of test, adc, add, and, cmp, or, sbb, sub, xor.887// If PIC then no relaxation is available.888return config->isPic ? R_GOT_PC : R_RELAX_GOT_PC_NOPIC;889}890891// A subset of relaxations can only be applied for no-PIC. This method892// handles such relaxations. Instructions encoding information was taken from:893// "Intel 64 and IA-32 Architectures Software Developer's Manual V2"894// (http://www.intel.com/content/dam/www/public/us/en/documents/manuals/895// 64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf)896static void relaxGotNoPic(uint8_t *loc, uint64_t val, uint8_t op,897uint8_t modRm) {898const uint8_t rex = loc[-3];899// Convert "test %reg, foo@GOTPCREL(%rip)" to "test $foo, %reg".900if (op == 0x85) {901// See "TEST-Logical Compare" (4-428 Vol. 2B),902// TEST r/m64, r64 uses "full" ModR / M byte (no opcode extension).903904// ModR/M byte has form XX YYY ZZZ, where905// YYY is MODRM.reg(register 2), ZZZ is MODRM.rm(register 1).906// XX has different meanings:907// 00: The operand's memory address is in reg1.908// 01: The operand's memory address is reg1 + a byte-sized displacement.909// 10: The operand's memory address is reg1 + a word-sized displacement.910// 11: The operand is reg1 itself.911// If an instruction requires only one operand, the unused reg2 field912// holds extra opcode bits rather than a register code913// 0xC0 == 11 000 000 binary.914// 0x38 == 00 111 000 binary.915// We transfer reg2 to reg1 here as operand.916// See "2.1.3 ModR/M and SIB Bytes" (Vol. 2A 2-3).917loc[-1] = 0xc0 | (modRm & 0x38) >> 3; // ModR/M byte.918919// Change opcode from TEST r/m64, r64 to TEST r/m64, imm32920// See "TEST-Logical Compare" (4-428 Vol. 2B).921loc[-2] = 0xf7;922923// Move R bit to the B bit in REX byte.924// REX byte is encoded as 0100WRXB, where925// 0100 is 4bit fixed pattern.926// REX.W When 1, a 64-bit operand size is used. Otherwise, when 0, the927// default operand size is used (which is 32-bit for most but not all928// instructions).929// REX.R This 1-bit value is an extension to the MODRM.reg field.930// REX.X This 1-bit value is an extension to the SIB.index field.931// REX.B This 1-bit value is an extension to the MODRM.rm field or the932// SIB.base field.933// See "2.2.1.2 More on REX Prefix Fields " (2-8 Vol. 2A).934loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2;935write32le(loc, val);936return;937}938939// If we are here then we need to relax the adc, add, and, cmp, or, sbb, sub940// or xor operations.941942// Convert "binop foo@GOTPCREL(%rip), %reg" to "binop $foo, %reg".943// Logic is close to one for test instruction above, but we also944// write opcode extension here, see below for details.945loc[-1] = 0xc0 | (modRm & 0x38) >> 3 | (op & 0x3c); // ModR/M byte.946947// Primary opcode is 0x81, opcode extension is one of:948// 000b = ADD, 001b is OR, 010b is ADC, 011b is SBB,949// 100b is AND, 101b is SUB, 110b is XOR, 111b is CMP.950// This value was wrote to MODRM.reg in a line above.951// See "3.2 INSTRUCTIONS (A-M)" (Vol. 2A 3-15),952// "INSTRUCTION SET REFERENCE, N-Z" (Vol. 2B 4-1) for953// descriptions about each operation.954loc[-2] = 0x81;955loc[-3] = (rex & ~0x4) | (rex & 0x4) >> 2;956write32le(loc, val);957}958959static void relaxGot(uint8_t *loc, const Relocation &rel, uint64_t val) {960assert(isInt<32>(val) &&961"GOTPCRELX should not have been relaxed if it overflows");962const uint8_t op = loc[-2];963const uint8_t modRm = loc[-1];964965// Convert "mov foo@GOTPCREL(%rip),%reg" to "lea foo(%rip),%reg".966if (op == 0x8b) {967loc[-2] = 0x8d;968write32le(loc, val);969return;970}971972if (op != 0xff) {973// We are relaxing a rip relative to an absolute, so compensate974// for the old -4 addend.975assert(!config->isPic);976relaxGotNoPic(loc, val + 4, op, modRm);977return;978}979980// Convert call/jmp instructions.981if (modRm == 0x15) {982// ABI says we can convert "call *foo@GOTPCREL(%rip)" to "nop; call foo".983// Instead we convert to "addr32 call foo" where addr32 is an instruction984// prefix. That makes result expression to be a single instruction.985loc[-2] = 0x67; // addr32 prefix986loc[-1] = 0xe8; // call987write32le(loc, val);988return;989}990991// Convert "jmp *foo@GOTPCREL(%rip)" to "jmp foo; nop".992// jmp doesn't return, so it is fine to use nop here, it is just a stub.993assert(modRm == 0x25);994loc[-2] = 0xe9; // jmp995loc[3] = 0x90; // nop996write32le(loc - 1, val + 1);997}998999// A split-stack prologue starts by checking the amount of stack remaining1000// in one of two ways:1001// A) Comparing of the stack pointer to a field in the tcb.1002// B) Or a load of a stack pointer offset with an lea to r10 or r11.1003bool X86_64::adjustPrologueForCrossSplitStack(uint8_t *loc, uint8_t *end,1004uint8_t stOther) const {1005if (!config->is64) {1006error("target doesn't support split stacks");1007return false;1008}10091010if (loc + 8 >= end)1011return false;10121013// Replace "cmp %fs:0x70,%rsp" and subsequent branch1014// with "stc, nopl 0x0(%rax,%rax,1)"1015if (memcmp(loc, "\x64\x48\x3b\x24\x25", 5) == 0) {1016memcpy(loc, "\xf9\x0f\x1f\x84\x00\x00\x00\x00", 8);1017return true;1018}10191020// Adjust "lea X(%rsp),%rYY" to lea "(X - 0x4000)(%rsp),%rYY" where rYY could1021// be r10 or r11. The lea instruction feeds a subsequent compare which checks1022// if there is X available stack space. Making X larger effectively reserves1023// that much additional space. The stack grows downward so subtract the value.1024if (memcmp(loc, "\x4c\x8d\x94\x24", 4) == 0 ||1025memcmp(loc, "\x4c\x8d\x9c\x24", 4) == 0) {1026// The offset bytes are encoded four bytes after the start of the1027// instruction.1028write32le(loc + 4, read32le(loc + 4) - 0x4000);1029return true;1030}1031return false;1032}10331034void X86_64::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {1035uint64_t secAddr = sec.getOutputSection()->addr;1036if (auto *s = dyn_cast<InputSection>(&sec))1037secAddr += s->outSecOff;1038else if (auto *ehIn = dyn_cast<EhInputSection>(&sec))1039secAddr += ehIn->getParent()->outSecOff;1040for (const Relocation &rel : sec.relocs()) {1041if (rel.expr == R_NONE) // See deleteFallThruJmpInsn1042continue;1043uint8_t *loc = buf + rel.offset;1044const uint64_t val =1045sec.getRelocTargetVA(sec.file, rel.type, rel.addend,1046secAddr + rel.offset, *rel.sym, rel.expr);1047relocate(loc, rel, val);1048}1049if (sec.jumpInstrMod) {1050applyJumpInstrMod(buf + sec.jumpInstrMod->offset,1051sec.jumpInstrMod->original, sec.jumpInstrMod->size);1052}1053}10541055// If Intel Indirect Branch Tracking is enabled, we have to emit special PLT1056// entries containing endbr64 instructions. A PLT entry will be split into two1057// parts, one in .plt.sec (writePlt), and the other in .plt (writeIBTPlt).1058namespace {1059class IntelIBT : public X86_64 {1060public:1061IntelIBT();1062void writeGotPlt(uint8_t *buf, const Symbol &s) const override;1063void writePlt(uint8_t *buf, const Symbol &sym,1064uint64_t pltEntryAddr) const override;1065void writeIBTPlt(uint8_t *buf, size_t numEntries) const override;10661067static const unsigned IBTPltHeaderSize = 16;1068};1069} // namespace10701071IntelIBT::IntelIBT() { pltHeaderSize = 0; }10721073void IntelIBT::writeGotPlt(uint8_t *buf, const Symbol &s) const {1074uint64_t va =1075in.ibtPlt->getVA() + IBTPltHeaderSize + s.getPltIdx() * pltEntrySize;1076write64le(buf, va);1077}10781079void IntelIBT::writePlt(uint8_t *buf, const Symbol &sym,1080uint64_t pltEntryAddr) const {1081const uint8_t Inst[] = {10820xf3, 0x0f, 0x1e, 0xfa, // endbr6410830xff, 0x25, 0, 0, 0, 0, // jmpq *got(%rip)10840x66, 0x0f, 0x1f, 0x44, 0, 0, // nop1085};1086memcpy(buf, Inst, sizeof(Inst));1087write32le(buf + 6, sym.getGotPltVA() - pltEntryAddr - 10);1088}10891090void IntelIBT::writeIBTPlt(uint8_t *buf, size_t numEntries) const {1091writePltHeader(buf);1092buf += IBTPltHeaderSize;10931094const uint8_t inst[] = {10950xf3, 0x0f, 0x1e, 0xfa, // endbr6410960x68, 0, 0, 0, 0, // pushq <relocation index>10970xe9, 0, 0, 0, 0, // jmpq plt[0]10980x66, 0x90, // nop1099};11001101for (size_t i = 0; i < numEntries; ++i) {1102memcpy(buf, inst, sizeof(inst));1103write32le(buf + 5, i);1104write32le(buf + 10, -pltHeaderSize - sizeof(inst) * i - 30);1105buf += sizeof(inst);1106}1107}11081109// These nonstandard PLT entries are to migtigate Spectre v2 security1110// vulnerability. In order to mitigate Spectre v2, we want to avoid indirect1111// branch instructions such as `jmp *GOTPLT(%rip)`. So, in the following PLT1112// entries, we use a CALL followed by MOV and RET to do the same thing as an1113// indirect jump. That instruction sequence is so-called "retpoline".1114//1115// We have two types of retpoline PLTs as a size optimization. If `-z now`1116// is specified, all dynamic symbols are resolved at load-time. Thus, when1117// that option is given, we can omit code for symbol lazy resolution.1118namespace {1119class Retpoline : public X86_64 {1120public:1121Retpoline();1122void writeGotPlt(uint8_t *buf, const Symbol &s) const override;1123void writePltHeader(uint8_t *buf) const override;1124void writePlt(uint8_t *buf, const Symbol &sym,1125uint64_t pltEntryAddr) const override;1126};11271128class RetpolineZNow : public X86_64 {1129public:1130RetpolineZNow();1131void writeGotPlt(uint8_t *buf, const Symbol &s) const override {}1132void writePltHeader(uint8_t *buf) const override;1133void writePlt(uint8_t *buf, const Symbol &sym,1134uint64_t pltEntryAddr) const override;1135};1136} // namespace11371138Retpoline::Retpoline() {1139pltHeaderSize = 48;1140pltEntrySize = 32;1141ipltEntrySize = 32;1142}11431144void Retpoline::writeGotPlt(uint8_t *buf, const Symbol &s) const {1145write64le(buf, s.getPltVA() + 17);1146}11471148void Retpoline::writePltHeader(uint8_t *buf) const {1149const uint8_t insn[] = {11500xff, 0x35, 0, 0, 0, 0, // 0: pushq GOTPLT+8(%rip)11510x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 6: mov GOTPLT+16(%rip), %r1111520xe8, 0x0e, 0x00, 0x00, 0x00, // d: callq next11530xf3, 0x90, // 12: loop: pause11540x0f, 0xae, 0xe8, // 14: lfence11550xeb, 0xf9, // 17: jmp loop11560xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 19: int3; .align 1611570x4c, 0x89, 0x1c, 0x24, // 20: next: mov %r11, (%rsp)11580xc3, // 24: ret11590xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 25: int3; padding11600xcc, 0xcc, 0xcc, 0xcc, // 2c: int3; padding1161};1162memcpy(buf, insn, sizeof(insn));11631164uint64_t gotPlt = in.gotPlt->getVA();1165uint64_t plt = in.plt->getVA();1166write32le(buf + 2, gotPlt - plt - 6 + 8);1167write32le(buf + 9, gotPlt - plt - 13 + 16);1168}11691170void Retpoline::writePlt(uint8_t *buf, const Symbol &sym,1171uint64_t pltEntryAddr) const {1172const uint8_t insn[] = {11730x4c, 0x8b, 0x1d, 0, 0, 0, 0, // 0: mov foo@GOTPLT(%rip), %r1111740xe8, 0, 0, 0, 0, // 7: callq plt+0x2011750xe9, 0, 0, 0, 0, // c: jmp plt+0x1211760x68, 0, 0, 0, 0, // 11: pushq <relocation index>11770xe9, 0, 0, 0, 0, // 16: jmp plt+011780xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1b: int3; padding1179};1180memcpy(buf, insn, sizeof(insn));11811182uint64_t off = pltEntryAddr - in.plt->getVA();11831184write32le(buf + 3, sym.getGotPltVA() - pltEntryAddr - 7);1185write32le(buf + 8, -off - 12 + 32);1186write32le(buf + 13, -off - 17 + 18);1187write32le(buf + 18, sym.getPltIdx());1188write32le(buf + 23, -off - 27);1189}11901191RetpolineZNow::RetpolineZNow() {1192pltHeaderSize = 32;1193pltEntrySize = 16;1194ipltEntrySize = 16;1195}11961197void RetpolineZNow::writePltHeader(uint8_t *buf) const {1198const uint8_t insn[] = {11990xe8, 0x0b, 0x00, 0x00, 0x00, // 0: call next12000xf3, 0x90, // 5: loop: pause12010x0f, 0xae, 0xe8, // 7: lfence12020xeb, 0xf9, // a: jmp loop12030xcc, 0xcc, 0xcc, 0xcc, // c: int3; .align 1612040x4c, 0x89, 0x1c, 0x24, // 10: next: mov %r11, (%rsp)12050xc3, // 14: ret12060xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 15: int3; padding12070xcc, 0xcc, 0xcc, 0xcc, 0xcc, // 1a: int3; padding12080xcc, // 1f: int3; padding1209};1210memcpy(buf, insn, sizeof(insn));1211}12121213void RetpolineZNow::writePlt(uint8_t *buf, const Symbol &sym,1214uint64_t pltEntryAddr) const {1215const uint8_t insn[] = {12160x4c, 0x8b, 0x1d, 0, 0, 0, 0, // mov foo@GOTPLT(%rip), %r1112170xe9, 0, 0, 0, 0, // jmp plt+012180xcc, 0xcc, 0xcc, 0xcc, // int3; padding1219};1220memcpy(buf, insn, sizeof(insn));12211222write32le(buf + 3, sym.getGotPltVA() - pltEntryAddr - 7);1223write32le(buf + 8, in.plt->getVA() - pltEntryAddr - 12);1224}12251226static TargetInfo *getTargetInfo() {1227if (config->zRetpolineplt) {1228if (config->zNow) {1229static RetpolineZNow t;1230return &t;1231}1232static Retpoline t;1233return &t;1234}12351236if (config->andFeatures & GNU_PROPERTY_X86_FEATURE_1_IBT) {1237static IntelIBT t;1238return &t;1239}12401241static X86_64 t;1242return &t;1243}12441245TargetInfo *elf::getX86_64TargetInfo() { return getTargetInfo(); }124612471248