Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp
35294 views
//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//78#include "MCTargetDesc/X86BaseInfo.h"9#include "MCTargetDesc/X86EncodingOptimization.h"10#include "MCTargetDesc/X86FixupKinds.h"11#include "llvm/ADT/StringSwitch.h"12#include "llvm/BinaryFormat/ELF.h"13#include "llvm/BinaryFormat/MachO.h"14#include "llvm/MC/MCAsmBackend.h"15#include "llvm/MC/MCAssembler.h"16#include "llvm/MC/MCCodeEmitter.h"17#include "llvm/MC/MCContext.h"18#include "llvm/MC/MCDwarf.h"19#include "llvm/MC/MCELFObjectWriter.h"20#include "llvm/MC/MCELFStreamer.h"21#include "llvm/MC/MCExpr.h"22#include "llvm/MC/MCFixupKindInfo.h"23#include "llvm/MC/MCInst.h"24#include "llvm/MC/MCInstrInfo.h"25#include "llvm/MC/MCMachObjectWriter.h"26#include "llvm/MC/MCObjectStreamer.h"27#include "llvm/MC/MCObjectWriter.h"28#include "llvm/MC/MCRegisterInfo.h"29#include "llvm/MC/MCSectionMachO.h"30#include "llvm/MC/MCSubtargetInfo.h"31#include "llvm/MC/MCValue.h"32#include "llvm/MC/TargetRegistry.h"33#include "llvm/Support/CommandLine.h"34#include "llvm/Support/ErrorHandling.h"35#include "llvm/Support/raw_ostream.h"3637using namespace llvm;3839namespace {40/// A wrapper for holding a mask of the values from X86::AlignBranchBoundaryKind41class X86AlignBranchKind {42private:43uint8_t AlignBranchKind = 0;4445public:46void operator=(const std::string &Val) {47if (Val.empty())48return;49SmallVector<StringRef, 6> BranchTypes;50StringRef(Val).split(BranchTypes, '+', -1, false);51for (auto BranchType : BranchTypes) {52if (BranchType == "fused")53addKind(X86::AlignBranchFused);54else if (BranchType == "jcc")55addKind(X86::AlignBranchJcc);56else if (BranchType == "jmp")57addKind(X86::AlignBranchJmp);58else if (BranchType == "call")59addKind(X86::AlignBranchCall);60else if (BranchType == "ret")61addKind(X86::AlignBranchRet);62else if (BranchType == "indirect")63addKind(X86::AlignBranchIndirect);64else {65errs() << "invalid argument " << BranchType.str()66<< " to -x86-align-branch=; each element must be one of: fused, "67"jcc, jmp, call, ret, indirect.(plus separated)\n";68}69}70}7172operator uint8_t() const { return AlignBranchKind; }73void addKind(X86::AlignBranchBoundaryKind Value) { AlignBranchKind |= Value; }74};7576X86AlignBranchKind X86AlignBranchKindLoc;7778cl::opt<unsigned> X86AlignBranchBoundary(79"x86-align-branch-boundary", cl::init(0),80cl::desc(81"Control how the assembler should align branches with NOP. If the "82"boundary's size is not 0, it should be a power of 2 and no less "83"than 32. Branches will be aligned to prevent from being across or "84"against the boundary of specified size. The default value 0 does not "85"align branches."));8687cl::opt<X86AlignBranchKind, true, cl::parser<std::string>> X86AlignBranch(88"x86-align-branch",89cl::desc(90"Specify types of branches to align (plus separated list of types):"91"\njcc indicates conditional jumps"92"\nfused indicates fused conditional jumps"93"\njmp indicates direct unconditional jumps"94"\ncall indicates direct and indirect calls"95"\nret indicates rets"96"\nindirect indicates indirect unconditional jumps"),97cl::location(X86AlignBranchKindLoc));9899cl::opt<bool> X86AlignBranchWithin32BBoundaries(100"x86-branches-within-32B-boundaries", cl::init(false),101cl::desc(102"Align selected instructions to mitigate negative performance impact "103"of Intel's micro code update for errata skx102. May break "104"assumptions about labels corresponding to particular instructions, "105"and should be used with caution."));106107cl::opt<unsigned> X86PadMaxPrefixSize(108"x86-pad-max-prefix-size", cl::init(0),109cl::desc("Maximum number of prefixes to use for padding"));110111cl::opt<bool> X86PadForAlign(112"x86-pad-for-align", cl::init(false), cl::Hidden,113cl::desc("Pad previous instructions to implement align directives"));114115cl::opt<bool> X86PadForBranchAlign(116"x86-pad-for-branch-align", cl::init(true), cl::Hidden,117cl::desc("Pad previous instructions to implement branch alignment"));118119class X86AsmBackend : public MCAsmBackend {120const MCSubtargetInfo &STI;121std::unique_ptr<const MCInstrInfo> MCII;122X86AlignBranchKind AlignBranchType;123Align AlignBoundary;124unsigned TargetPrefixMax = 0;125126MCInst PrevInst;127unsigned PrevInstOpcode = 0;128MCBoundaryAlignFragment *PendingBA = nullptr;129std::pair<MCFragment *, size_t> PrevInstPosition;130bool IsRightAfterData = false;131132uint8_t determinePaddingPrefix(const MCInst &Inst) const;133bool isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const;134bool needAlign(const MCInst &Inst) const;135bool canPadBranches(MCObjectStreamer &OS) const;136bool canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const;137138public:139X86AsmBackend(const Target &T, const MCSubtargetInfo &STI)140: MCAsmBackend(llvm::endianness::little), STI(STI),141MCII(T.createMCInstrInfo()) {142if (X86AlignBranchWithin32BBoundaries) {143// At the moment, this defaults to aligning fused branches, unconditional144// jumps, and (unfused) conditional jumps with nops. Both the145// instructions aligned and the alignment method (nop vs prefix) may146// change in the future.147AlignBoundary = assumeAligned(32);148AlignBranchType.addKind(X86::AlignBranchFused);149AlignBranchType.addKind(X86::AlignBranchJcc);150AlignBranchType.addKind(X86::AlignBranchJmp);151}152// Allow overriding defaults set by main flag153if (X86AlignBranchBoundary.getNumOccurrences())154AlignBoundary = assumeAligned(X86AlignBranchBoundary);155if (X86AlignBranch.getNumOccurrences())156AlignBranchType = X86AlignBranchKindLoc;157if (X86PadMaxPrefixSize.getNumOccurrences())158TargetPrefixMax = X86PadMaxPrefixSize;159}160161bool allowAutoPadding() const override;162bool allowEnhancedRelaxation() const override;163void emitInstructionBegin(MCObjectStreamer &OS, const MCInst &Inst,164const MCSubtargetInfo &STI);165void emitInstructionEnd(MCObjectStreamer &OS, const MCInst &Inst);166167unsigned getNumFixupKinds() const override {168return X86::NumTargetFixupKinds;169}170171std::optional<MCFixupKind> getFixupKind(StringRef Name) const override;172173const MCFixupKindInfo &getFixupKindInfo(MCFixupKind Kind) const override;174175bool shouldForceRelocation(const MCAssembler &Asm, const MCFixup &Fixup,176const MCValue &Target,177const MCSubtargetInfo *STI) override;178179void applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,180const MCValue &Target, MutableArrayRef<char> Data,181uint64_t Value, bool IsResolved,182const MCSubtargetInfo *STI) const override;183184bool mayNeedRelaxation(const MCInst &Inst,185const MCSubtargetInfo &STI) const override;186187bool fixupNeedsRelaxation(const MCFixup &Fixup,188uint64_t Value) const override;189190void relaxInstruction(MCInst &Inst,191const MCSubtargetInfo &STI) const override;192193bool padInstructionViaRelaxation(MCRelaxableFragment &RF,194MCCodeEmitter &Emitter,195unsigned &RemainingSize) const;196197bool padInstructionViaPrefix(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,198unsigned &RemainingSize) const;199200bool padInstructionEncoding(MCRelaxableFragment &RF, MCCodeEmitter &Emitter,201unsigned &RemainingSize) const;202203void finishLayout(const MCAssembler &Asm) const override;204205unsigned getMaximumNopSize(const MCSubtargetInfo &STI) const override;206207bool writeNopData(raw_ostream &OS, uint64_t Count,208const MCSubtargetInfo *STI) const override;209};210} // end anonymous namespace211212static bool isRelaxableBranch(unsigned Opcode) {213return Opcode == X86::JCC_1 || Opcode == X86::JMP_1;214}215216static unsigned getRelaxedOpcodeBranch(unsigned Opcode,217bool Is16BitMode = false) {218switch (Opcode) {219default:220llvm_unreachable("invalid opcode for branch");221case X86::JCC_1:222return (Is16BitMode) ? X86::JCC_2 : X86::JCC_4;223case X86::JMP_1:224return (Is16BitMode) ? X86::JMP_2 : X86::JMP_4;225}226}227228static unsigned getRelaxedOpcode(const MCInst &MI, bool Is16BitMode) {229unsigned Opcode = MI.getOpcode();230return isRelaxableBranch(Opcode) ? getRelaxedOpcodeBranch(Opcode, Is16BitMode)231: X86::getOpcodeForLongImmediateForm(Opcode);232}233234static X86::CondCode getCondFromBranch(const MCInst &MI,235const MCInstrInfo &MCII) {236unsigned Opcode = MI.getOpcode();237switch (Opcode) {238default:239return X86::COND_INVALID;240case X86::JCC_1: {241const MCInstrDesc &Desc = MCII.get(Opcode);242return static_cast<X86::CondCode>(243MI.getOperand(Desc.getNumOperands() - 1).getImm());244}245}246}247248static X86::SecondMacroFusionInstKind249classifySecondInstInMacroFusion(const MCInst &MI, const MCInstrInfo &MCII) {250X86::CondCode CC = getCondFromBranch(MI, MCII);251return classifySecondCondCodeInMacroFusion(CC);252}253254/// Check if the instruction uses RIP relative addressing.255static bool isRIPRelative(const MCInst &MI, const MCInstrInfo &MCII) {256unsigned Opcode = MI.getOpcode();257const MCInstrDesc &Desc = MCII.get(Opcode);258uint64_t TSFlags = Desc.TSFlags;259unsigned CurOp = X86II::getOperandBias(Desc);260int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);261if (MemoryOperand < 0)262return false;263unsigned BaseRegNum = MemoryOperand + CurOp + X86::AddrBaseReg;264unsigned BaseReg = MI.getOperand(BaseRegNum).getReg();265return (BaseReg == X86::RIP);266}267268/// Check if the instruction is a prefix.269static bool isPrefix(unsigned Opcode, const MCInstrInfo &MCII) {270return X86II::isPrefix(MCII.get(Opcode).TSFlags);271}272273/// Check if the instruction is valid as the first instruction in macro fusion.274static bool isFirstMacroFusibleInst(const MCInst &Inst,275const MCInstrInfo &MCII) {276// An Intel instruction with RIP relative addressing is not macro fusible.277if (isRIPRelative(Inst, MCII))278return false;279X86::FirstMacroFusionInstKind FIK =280X86::classifyFirstOpcodeInMacroFusion(Inst.getOpcode());281return FIK != X86::FirstMacroFusionInstKind::Invalid;282}283284/// X86 can reduce the bytes of NOP by padding instructions with prefixes to285/// get a better peformance in some cases. Here, we determine which prefix is286/// the most suitable.287///288/// If the instruction has a segment override prefix, use the existing one.289/// If the target is 64-bit, use the CS.290/// If the target is 32-bit,291/// - If the instruction has a ESP/EBP base register, use SS.292/// - Otherwise use DS.293uint8_t X86AsmBackend::determinePaddingPrefix(const MCInst &Inst) const {294assert((STI.hasFeature(X86::Is32Bit) || STI.hasFeature(X86::Is64Bit)) &&295"Prefixes can be added only in 32-bit or 64-bit mode.");296const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());297uint64_t TSFlags = Desc.TSFlags;298299// Determine where the memory operand starts, if present.300int MemoryOperand = X86II::getMemoryOperandNo(TSFlags);301if (MemoryOperand != -1)302MemoryOperand += X86II::getOperandBias(Desc);303304unsigned SegmentReg = 0;305if (MemoryOperand >= 0) {306// Check for explicit segment override on memory operand.307SegmentReg = Inst.getOperand(MemoryOperand + X86::AddrSegmentReg).getReg();308}309310switch (TSFlags & X86II::FormMask) {311default:312break;313case X86II::RawFrmDstSrc: {314// Check segment override opcode prefix as needed (not for %ds).315if (Inst.getOperand(2).getReg() != X86::DS)316SegmentReg = Inst.getOperand(2).getReg();317break;318}319case X86II::RawFrmSrc: {320// Check segment override opcode prefix as needed (not for %ds).321if (Inst.getOperand(1).getReg() != X86::DS)322SegmentReg = Inst.getOperand(1).getReg();323break;324}325case X86II::RawFrmMemOffs: {326// Check segment override opcode prefix as needed.327SegmentReg = Inst.getOperand(1).getReg();328break;329}330}331332if (SegmentReg != 0)333return X86::getSegmentOverridePrefixForReg(SegmentReg);334335if (STI.hasFeature(X86::Is64Bit))336return X86::CS_Encoding;337338if (MemoryOperand >= 0) {339unsigned BaseRegNum = MemoryOperand + X86::AddrBaseReg;340unsigned BaseReg = Inst.getOperand(BaseRegNum).getReg();341if (BaseReg == X86::ESP || BaseReg == X86::EBP)342return X86::SS_Encoding;343}344return X86::DS_Encoding;345}346347/// Check if the two instructions will be macro-fused on the target cpu.348bool X86AsmBackend::isMacroFused(const MCInst &Cmp, const MCInst &Jcc) const {349const MCInstrDesc &InstDesc = MCII->get(Jcc.getOpcode());350if (!InstDesc.isConditionalBranch())351return false;352if (!isFirstMacroFusibleInst(Cmp, *MCII))353return false;354const X86::FirstMacroFusionInstKind CmpKind =355X86::classifyFirstOpcodeInMacroFusion(Cmp.getOpcode());356const X86::SecondMacroFusionInstKind BranchKind =357classifySecondInstInMacroFusion(Jcc, *MCII);358return X86::isMacroFused(CmpKind, BranchKind);359}360361/// Check if the instruction has a variant symbol operand.362static bool hasVariantSymbol(const MCInst &MI) {363for (auto &Operand : MI) {364if (!Operand.isExpr())365continue;366const MCExpr &Expr = *Operand.getExpr();367if (Expr.getKind() == MCExpr::SymbolRef &&368cast<MCSymbolRefExpr>(Expr).getKind() != MCSymbolRefExpr::VK_None)369return true;370}371return false;372}373374bool X86AsmBackend::allowAutoPadding() const {375return (AlignBoundary != Align(1) && AlignBranchType != X86::AlignBranchNone);376}377378bool X86AsmBackend::allowEnhancedRelaxation() const {379return allowAutoPadding() && TargetPrefixMax != 0 && X86PadForBranchAlign;380}381382/// X86 has certain instructions which enable interrupts exactly one383/// instruction *after* the instruction which stores to SS. Return true if the384/// given instruction may have such an interrupt delay slot.385static bool mayHaveInterruptDelaySlot(unsigned InstOpcode) {386switch (InstOpcode) {387case X86::POPSS16:388case X86::POPSS32:389case X86::STI:390return true;391392case X86::MOV16sr:393case X86::MOV32sr:394case X86::MOV64sr:395case X86::MOV16sm:396// In fact, this is only the case if the first operand is SS. However, as397// segment moves occur extremely rarely, this is just a minor pessimization.398return true;399}400return false;401}402403/// Check if the instruction to be emitted is right after any data.404static bool405isRightAfterData(MCFragment *CurrentFragment,406const std::pair<MCFragment *, size_t> &PrevInstPosition) {407MCFragment *F = CurrentFragment;408// Since data is always emitted into a DataFragment, our check strategy is409// simple here.410// - If the fragment is a DataFragment411// - If it's empty (section start or data after align), return false.412// - If it's not the fragment where the previous instruction is,413// returns true.414// - If it's the fragment holding the previous instruction but its415// size changed since the previous instruction was emitted into416// it, returns true.417// - Otherwise returns false.418// - If the fragment is not a DataFragment, returns false.419if (auto *DF = dyn_cast_or_null<MCDataFragment>(F))420return DF->getContents().size() &&421(DF != PrevInstPosition.first ||422DF->getContents().size() != PrevInstPosition.second);423424return false;425}426427/// \returns the fragment size if it has instructions, otherwise returns 0.428static size_t getSizeForInstFragment(const MCFragment *F) {429if (!F || !F->hasInstructions())430return 0;431// MCEncodedFragmentWithContents being templated makes this tricky.432switch (F->getKind()) {433default:434llvm_unreachable("Unknown fragment with instructions!");435case MCFragment::FT_Data:436return cast<MCDataFragment>(*F).getContents().size();437case MCFragment::FT_Relaxable:438return cast<MCRelaxableFragment>(*F).getContents().size();439case MCFragment::FT_CompactEncodedInst:440return cast<MCCompactEncodedInstFragment>(*F).getContents().size();441}442}443444/// Return true if we can insert NOP or prefixes automatically before the445/// the instruction to be emitted.446bool X86AsmBackend::canPadInst(const MCInst &Inst, MCObjectStreamer &OS) const {447if (hasVariantSymbol(Inst))448// Linker may rewrite the instruction with variant symbol operand(e.g.449// TLSCALL).450return false;451452if (mayHaveInterruptDelaySlot(PrevInstOpcode))453// If this instruction follows an interrupt enabling instruction with a one454// instruction delay, inserting a nop would change behavior.455return false;456457if (isPrefix(PrevInstOpcode, *MCII))458// If this instruction follows a prefix, inserting a nop/prefix would change459// semantic.460return false;461462if (isPrefix(Inst.getOpcode(), *MCII))463// If this instruction is a prefix, inserting a prefix would change464// semantic.465return false;466467if (IsRightAfterData)468// If this instruction follows any data, there is no clear469// instruction boundary, inserting a nop/prefix would change semantic.470return false;471472return true;473}474475bool X86AsmBackend::canPadBranches(MCObjectStreamer &OS) const {476if (!OS.getAllowAutoPadding())477return false;478assert(allowAutoPadding() && "incorrect initialization!");479480// We only pad in text section.481if (!OS.getCurrentSectionOnly()->isText())482return false;483484// To be Done: Currently don't deal with Bundle cases.485if (OS.getAssembler().isBundlingEnabled())486return false;487488// Branches only need to be aligned in 32-bit or 64-bit mode.489if (!(STI.hasFeature(X86::Is64Bit) || STI.hasFeature(X86::Is32Bit)))490return false;491492return true;493}494495/// Check if the instruction operand needs to be aligned.496bool X86AsmBackend::needAlign(const MCInst &Inst) const {497const MCInstrDesc &Desc = MCII->get(Inst.getOpcode());498return (Desc.isConditionalBranch() &&499(AlignBranchType & X86::AlignBranchJcc)) ||500(Desc.isUnconditionalBranch() &&501(AlignBranchType & X86::AlignBranchJmp)) ||502(Desc.isCall() && (AlignBranchType & X86::AlignBranchCall)) ||503(Desc.isReturn() && (AlignBranchType & X86::AlignBranchRet)) ||504(Desc.isIndirectBranch() &&505(AlignBranchType & X86::AlignBranchIndirect));506}507508/// Insert BoundaryAlignFragment before instructions to align branches.509void X86AsmBackend::emitInstructionBegin(MCObjectStreamer &OS,510const MCInst &Inst, const MCSubtargetInfo &STI) {511// Used by canPadInst. Done here, because in emitInstructionEnd, the current512// fragment will have changed.513IsRightAfterData =514isRightAfterData(OS.getCurrentFragment(), PrevInstPosition);515516if (!canPadBranches(OS))517return;518519// NB: PrevInst only valid if canPadBranches is true.520if (!isMacroFused(PrevInst, Inst))521// Macro fusion doesn't happen indeed, clear the pending.522PendingBA = nullptr;523524// When branch padding is enabled (basically the skx102 erratum => unlikely),525// we call canPadInst (not cheap) twice. However, in the common case, we can526// avoid unnecessary calls to that, as this is otherwise only used for527// relaxable fragments.528if (!canPadInst(Inst, OS))529return;530531if (PendingBA && PendingBA->getNext() == OS.getCurrentFragment()) {532// Macro fusion actually happens and there is no other fragment inserted533// after the previous instruction.534//535// Do nothing here since we already inserted a BoudaryAlign fragment when536// we met the first instruction in the fused pair and we'll tie them537// together in emitInstructionEnd.538//539// Note: When there is at least one fragment, such as MCAlignFragment,540// inserted after the previous instruction, e.g.541//542// \code543// cmp %rax %rcx544// .align 16545// je .Label0546// \ endcode547//548// We will treat the JCC as a unfused branch although it may be fused549// with the CMP.550return;551}552553if (needAlign(Inst) || ((AlignBranchType & X86::AlignBranchFused) &&554isFirstMacroFusibleInst(Inst, *MCII))) {555// If we meet a unfused branch or the first instuction in a fusiable pair,556// insert a BoundaryAlign fragment.557PendingBA = OS.getContext().allocFragment<MCBoundaryAlignFragment>(558AlignBoundary, STI);559OS.insert(PendingBA);560}561}562563/// Set the last fragment to be aligned for the BoundaryAlignFragment.564void X86AsmBackend::emitInstructionEnd(MCObjectStreamer &OS,565const MCInst &Inst) {566MCFragment *CF = OS.getCurrentFragment();567if (auto *F = dyn_cast_or_null<MCRelaxableFragment>(CF))568F->setAllowAutoPadding(canPadInst(Inst, OS));569570// Update PrevInstOpcode here, canPadInst() reads that.571PrevInstOpcode = Inst.getOpcode();572PrevInstPosition = std::make_pair(CF, getSizeForInstFragment(CF));573574if (!canPadBranches(OS))575return;576577// PrevInst is only needed if canPadBranches. Copying an MCInst isn't cheap.578PrevInst = Inst;579580if (!needAlign(Inst) || !PendingBA)581return;582583// Tie the aligned instructions into a pending BoundaryAlign.584PendingBA->setLastFragment(CF);585PendingBA = nullptr;586587// We need to ensure that further data isn't added to the current588// DataFragment, so that we can get the size of instructions later in589// MCAssembler::relaxBoundaryAlign. The easiest way is to insert a new empty590// DataFragment.591if (isa_and_nonnull<MCDataFragment>(CF))592OS.insert(OS.getContext().allocFragment<MCDataFragment>());593594// Update the maximum alignment on the current section if necessary.595MCSection *Sec = OS.getCurrentSectionOnly();596Sec->ensureMinAlignment(AlignBoundary);597}598599std::optional<MCFixupKind> X86AsmBackend::getFixupKind(StringRef Name) const {600if (STI.getTargetTriple().isOSBinFormatELF()) {601unsigned Type;602if (STI.getTargetTriple().getArch() == Triple::x86_64) {603Type = llvm::StringSwitch<unsigned>(Name)604#define ELF_RELOC(X, Y) .Case(#X, Y)605#include "llvm/BinaryFormat/ELFRelocs/x86_64.def"606#undef ELF_RELOC607.Case("BFD_RELOC_NONE", ELF::R_X86_64_NONE)608.Case("BFD_RELOC_8", ELF::R_X86_64_8)609.Case("BFD_RELOC_16", ELF::R_X86_64_16)610.Case("BFD_RELOC_32", ELF::R_X86_64_32)611.Case("BFD_RELOC_64", ELF::R_X86_64_64)612.Default(-1u);613} else {614Type = llvm::StringSwitch<unsigned>(Name)615#define ELF_RELOC(X, Y) .Case(#X, Y)616#include "llvm/BinaryFormat/ELFRelocs/i386.def"617#undef ELF_RELOC618.Case("BFD_RELOC_NONE", ELF::R_386_NONE)619.Case("BFD_RELOC_8", ELF::R_386_8)620.Case("BFD_RELOC_16", ELF::R_386_16)621.Case("BFD_RELOC_32", ELF::R_386_32)622.Default(-1u);623}624if (Type == -1u)625return std::nullopt;626return static_cast<MCFixupKind>(FirstLiteralRelocationKind + Type);627}628return MCAsmBackend::getFixupKind(Name);629}630631const MCFixupKindInfo &X86AsmBackend::getFixupKindInfo(MCFixupKind Kind) const {632const static MCFixupKindInfo Infos[X86::NumTargetFixupKinds] = {633{"reloc_riprel_4byte", 0, 32, MCFixupKindInfo::FKF_IsPCRel},634{"reloc_riprel_4byte_movq_load", 0, 32, MCFixupKindInfo::FKF_IsPCRel},635{"reloc_riprel_4byte_relax", 0, 32, MCFixupKindInfo::FKF_IsPCRel},636{"reloc_riprel_4byte_relax_rex", 0, 32, MCFixupKindInfo::FKF_IsPCRel},637{"reloc_signed_4byte", 0, 32, 0},638{"reloc_signed_4byte_relax", 0, 32, 0},639{"reloc_global_offset_table", 0, 32, 0},640{"reloc_global_offset_table8", 0, 64, 0},641{"reloc_branch_4byte_pcrel", 0, 32, MCFixupKindInfo::FKF_IsPCRel},642};643644// Fixup kinds from .reloc directive are like R_386_NONE/R_X86_64_NONE. They645// do not require any extra processing.646if (Kind >= FirstLiteralRelocationKind)647return MCAsmBackend::getFixupKindInfo(FK_NONE);648649if (Kind < FirstTargetFixupKind)650return MCAsmBackend::getFixupKindInfo(Kind);651652assert(unsigned(Kind - FirstTargetFixupKind) < getNumFixupKinds() &&653"Invalid kind!");654assert(Infos[Kind - FirstTargetFixupKind].Name && "Empty fixup name!");655return Infos[Kind - FirstTargetFixupKind];656}657658bool X86AsmBackend::shouldForceRelocation(const MCAssembler &,659const MCFixup &Fixup, const MCValue &,660const MCSubtargetInfo *STI) {661return Fixup.getKind() >= FirstLiteralRelocationKind;662}663664static unsigned getFixupKindSize(unsigned Kind) {665switch (Kind) {666default:667llvm_unreachable("invalid fixup kind!");668case FK_NONE:669return 0;670case FK_PCRel_1:671case FK_SecRel_1:672case FK_Data_1:673return 1;674case FK_PCRel_2:675case FK_SecRel_2:676case FK_Data_2:677return 2;678case FK_PCRel_4:679case X86::reloc_riprel_4byte:680case X86::reloc_riprel_4byte_relax:681case X86::reloc_riprel_4byte_relax_rex:682case X86::reloc_riprel_4byte_movq_load:683case X86::reloc_signed_4byte:684case X86::reloc_signed_4byte_relax:685case X86::reloc_global_offset_table:686case X86::reloc_branch_4byte_pcrel:687case FK_SecRel_4:688case FK_Data_4:689return 4;690case FK_PCRel_8:691case FK_SecRel_8:692case FK_Data_8:693case X86::reloc_global_offset_table8:694return 8;695}696}697698void X86AsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup,699const MCValue &Target,700MutableArrayRef<char> Data,701uint64_t Value, bool IsResolved,702const MCSubtargetInfo *STI) const {703unsigned Kind = Fixup.getKind();704if (Kind >= FirstLiteralRelocationKind)705return;706unsigned Size = getFixupKindSize(Kind);707708assert(Fixup.getOffset() + Size <= Data.size() && "Invalid fixup offset!");709710int64_t SignedValue = static_cast<int64_t>(Value);711if ((Target.isAbsolute() || IsResolved) &&712getFixupKindInfo(Fixup.getKind()).Flags &713MCFixupKindInfo::FKF_IsPCRel) {714// check that PC relative fixup fits into the fixup size.715if (Size > 0 && !isIntN(Size * 8, SignedValue))716Asm.getContext().reportError(717Fixup.getLoc(), "value of " + Twine(SignedValue) +718" is too large for field of " + Twine(Size) +719((Size == 1) ? " byte." : " bytes."));720} else {721// Check that uppper bits are either all zeros or all ones.722// Specifically ignore overflow/underflow as long as the leakage is723// limited to the lower bits. This is to remain compatible with724// other assemblers.725assert((Size == 0 || isIntN(Size * 8 + 1, SignedValue)) &&726"Value does not fit in the Fixup field");727}728729for (unsigned i = 0; i != Size; ++i)730Data[Fixup.getOffset() + i] = uint8_t(Value >> (i * 8));731}732733bool X86AsmBackend::mayNeedRelaxation(const MCInst &MI,734const MCSubtargetInfo &STI) const {735unsigned Opcode = MI.getOpcode();736unsigned SkipOperands = X86::isCCMPCC(Opcode) ? 2 : 0;737return isRelaxableBranch(Opcode) ||738(X86::getOpcodeForLongImmediateForm(Opcode) != Opcode &&739MI.getOperand(MI.getNumOperands() - 1 - SkipOperands).isExpr());740}741742bool X86AsmBackend::fixupNeedsRelaxation(const MCFixup &Fixup,743uint64_t Value) const {744// Relax if the value is too big for a (signed) i8.745return !isInt<8>(Value);746}747748// FIXME: Can tblgen help at all here to verify there aren't other instructions749// we can relax?750void X86AsmBackend::relaxInstruction(MCInst &Inst,751const MCSubtargetInfo &STI) const {752// The only relaxations X86 does is from a 1byte pcrel to a 4byte pcrel.753bool Is16BitMode = STI.hasFeature(X86::Is16Bit);754unsigned RelaxedOp = getRelaxedOpcode(Inst, Is16BitMode);755756if (RelaxedOp == Inst.getOpcode()) {757SmallString<256> Tmp;758raw_svector_ostream OS(Tmp);759Inst.dump_pretty(OS);760OS << "\n";761report_fatal_error("unexpected instruction to relax: " + OS.str());762}763764Inst.setOpcode(RelaxedOp);765}766767bool X86AsmBackend::padInstructionViaPrefix(MCRelaxableFragment &RF,768MCCodeEmitter &Emitter,769unsigned &RemainingSize) const {770if (!RF.getAllowAutoPadding())771return false;772// If the instruction isn't fully relaxed, shifting it around might require a773// larger value for one of the fixups then can be encoded. The outer loop774// will also catch this before moving to the next instruction, but we need to775// prevent padding this single instruction as well.776if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))777return false;778779const unsigned OldSize = RF.getContents().size();780if (OldSize == 15)781return false;782783const unsigned MaxPossiblePad = std::min(15 - OldSize, RemainingSize);784const unsigned RemainingPrefixSize = [&]() -> unsigned {785SmallString<15> Code;786X86_MC::emitPrefix(Emitter, RF.getInst(), Code, STI);787assert(Code.size() < 15 && "The number of prefixes must be less than 15.");788789// TODO: It turns out we need a decent amount of plumbing for the target790// specific bits to determine number of prefixes its safe to add. Various791// targets (older chips mostly, but also Atom family) encounter decoder792// stalls with too many prefixes. For testing purposes, we set the value793// externally for the moment.794unsigned ExistingPrefixSize = Code.size();795if (TargetPrefixMax <= ExistingPrefixSize)796return 0;797return TargetPrefixMax - ExistingPrefixSize;798}();799const unsigned PrefixBytesToAdd =800std::min(MaxPossiblePad, RemainingPrefixSize);801if (PrefixBytesToAdd == 0)802return false;803804const uint8_t Prefix = determinePaddingPrefix(RF.getInst());805806SmallString<256> Code;807Code.append(PrefixBytesToAdd, Prefix);808Code.append(RF.getContents().begin(), RF.getContents().end());809RF.getContents() = Code;810811// Adjust the fixups for the change in offsets812for (auto &F : RF.getFixups()) {813F.setOffset(F.getOffset() + PrefixBytesToAdd);814}815816RemainingSize -= PrefixBytesToAdd;817return true;818}819820bool X86AsmBackend::padInstructionViaRelaxation(MCRelaxableFragment &RF,821MCCodeEmitter &Emitter,822unsigned &RemainingSize) const {823if (!mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))824// TODO: There are lots of other tricks we could apply for increasing825// encoding size without impacting performance.826return false;827828MCInst Relaxed = RF.getInst();829relaxInstruction(Relaxed, *RF.getSubtargetInfo());830831SmallVector<MCFixup, 4> Fixups;832SmallString<15> Code;833Emitter.encodeInstruction(Relaxed, Code, Fixups, *RF.getSubtargetInfo());834const unsigned OldSize = RF.getContents().size();835const unsigned NewSize = Code.size();836assert(NewSize >= OldSize && "size decrease during relaxation?");837unsigned Delta = NewSize - OldSize;838if (Delta > RemainingSize)839return false;840RF.setInst(Relaxed);841RF.getContents() = Code;842RF.getFixups() = Fixups;843RemainingSize -= Delta;844return true;845}846847bool X86AsmBackend::padInstructionEncoding(MCRelaxableFragment &RF,848MCCodeEmitter &Emitter,849unsigned &RemainingSize) const {850bool Changed = false;851if (RemainingSize != 0)852Changed |= padInstructionViaRelaxation(RF, Emitter, RemainingSize);853if (RemainingSize != 0)854Changed |= padInstructionViaPrefix(RF, Emitter, RemainingSize);855return Changed;856}857858void X86AsmBackend::finishLayout(MCAssembler const &Asm) const {859// See if we can further relax some instructions to cut down on the number of860// nop bytes required for code alignment. The actual win is in reducing861// instruction count, not number of bytes. Modern X86-64 can easily end up862// decode limited. It is often better to reduce the number of instructions863// (i.e. eliminate nops) even at the cost of increasing the size and864// complexity of others.865if (!X86PadForAlign && !X86PadForBranchAlign)866return;867868// The processed regions are delimitered by LabeledFragments. -g may have more869// MCSymbols and therefore different relaxation results. X86PadForAlign is870// disabled by default to eliminate the -g vs non -g difference.871DenseSet<MCFragment *> LabeledFragments;872for (const MCSymbol &S : Asm.symbols())873LabeledFragments.insert(S.getFragment(false));874875for (MCSection &Sec : Asm) {876if (!Sec.isText())877continue;878879SmallVector<MCRelaxableFragment *, 4> Relaxable;880for (MCSection::iterator I = Sec.begin(), IE = Sec.end(); I != IE; ++I) {881MCFragment &F = *I;882883if (LabeledFragments.count(&F))884Relaxable.clear();885886if (F.getKind() == MCFragment::FT_Data ||887F.getKind() == MCFragment::FT_CompactEncodedInst)888// Skip and ignore889continue;890891if (F.getKind() == MCFragment::FT_Relaxable) {892auto &RF = cast<MCRelaxableFragment>(*I);893Relaxable.push_back(&RF);894continue;895}896897auto canHandle = [](MCFragment &F) -> bool {898switch (F.getKind()) {899default:900return false;901case MCFragment::FT_Align:902return X86PadForAlign;903case MCFragment::FT_BoundaryAlign:904return X86PadForBranchAlign;905}906};907// For any unhandled kind, assume we can't change layout.908if (!canHandle(F)) {909Relaxable.clear();910continue;911}912913#ifndef NDEBUG914const uint64_t OrigOffset = Asm.getFragmentOffset(F);915#endif916const uint64_t OrigSize = Asm.computeFragmentSize(F);917918// To keep the effects local, prefer to relax instructions closest to919// the align directive. This is purely about human understandability920// of the resulting code. If we later find a reason to expand921// particular instructions over others, we can adjust.922unsigned RemainingSize = OrigSize;923while (!Relaxable.empty() && RemainingSize != 0) {924auto &RF = *Relaxable.pop_back_val();925// Give the backend a chance to play any tricks it wishes to increase926// the encoding size of the given instruction. Target independent code927// will try further relaxation, but target's may play further tricks.928if (padInstructionEncoding(RF, Asm.getEmitter(), RemainingSize))929Sec.setHasLayout(false);930931// If we have an instruction which hasn't been fully relaxed, we can't932// skip past it and insert bytes before it. Changing its starting933// offset might require a larger negative offset than it can encode.934// We don't need to worry about larger positive offsets as none of the935// possible offsets between this and our align are visible, and the936// ones afterwards aren't changing.937if (mayNeedRelaxation(RF.getInst(), *RF.getSubtargetInfo()))938break;939}940Relaxable.clear();941942// BoundaryAlign explicitly tracks it's size (unlike align)943if (F.getKind() == MCFragment::FT_BoundaryAlign)944cast<MCBoundaryAlignFragment>(F).setSize(RemainingSize);945946#ifndef NDEBUG947const uint64_t FinalOffset = Asm.getFragmentOffset(F);948const uint64_t FinalSize = Asm.computeFragmentSize(F);949assert(OrigOffset + OrigSize == FinalOffset + FinalSize &&950"can't move start of next fragment!");951assert(FinalSize == RemainingSize && "inconsistent size computation?");952#endif953954// If we're looking at a boundary align, make sure we don't try to pad955// its target instructions for some following directive. Doing so would956// break the alignment of the current boundary align.957if (auto *BF = dyn_cast<MCBoundaryAlignFragment>(&F)) {958const MCFragment *LastFragment = BF->getLastFragment();959if (!LastFragment)960continue;961while (&*I != LastFragment)962++I;963}964}965}966967// The layout is done. Mark every fragment as valid.968for (MCSection &Section : Asm) {969Asm.getFragmentOffset(*Section.curFragList()->Tail);970Asm.computeFragmentSize(*Section.curFragList()->Tail);971}972}973974unsigned X86AsmBackend::getMaximumNopSize(const MCSubtargetInfo &STI) const {975if (STI.hasFeature(X86::Is16Bit))976return 4;977if (!STI.hasFeature(X86::FeatureNOPL) && !STI.hasFeature(X86::Is64Bit))978return 1;979if (STI.hasFeature(X86::TuningFast7ByteNOP))980return 7;981if (STI.hasFeature(X86::TuningFast15ByteNOP))982return 15;983if (STI.hasFeature(X86::TuningFast11ByteNOP))984return 11;985// FIXME: handle 32-bit mode986// 15-bytes is the longest single NOP instruction, but 10-bytes is987// commonly the longest that can be efficiently decoded.988return 10;989}990991/// Write a sequence of optimal nops to the output, covering \p Count992/// bytes.993/// \return - true on success, false on failure994bool X86AsmBackend::writeNopData(raw_ostream &OS, uint64_t Count,995const MCSubtargetInfo *STI) const {996static const char Nops32Bit[10][11] = {997// nop998"\x90",999// xchg %ax,%ax1000"\x66\x90",1001// nopl (%[re]ax)1002"\x0f\x1f\x00",1003// nopl 0(%[re]ax)1004"\x0f\x1f\x40\x00",1005// nopl 0(%[re]ax,%[re]ax,1)1006"\x0f\x1f\x44\x00\x00",1007// nopw 0(%[re]ax,%[re]ax,1)1008"\x66\x0f\x1f\x44\x00\x00",1009// nopl 0L(%[re]ax)1010"\x0f\x1f\x80\x00\x00\x00\x00",1011// nopl 0L(%[re]ax,%[re]ax,1)1012"\x0f\x1f\x84\x00\x00\x00\x00\x00",1013// nopw 0L(%[re]ax,%[re]ax,1)1014"\x66\x0f\x1f\x84\x00\x00\x00\x00\x00",1015// nopw %cs:0L(%[re]ax,%[re]ax,1)1016"\x66\x2e\x0f\x1f\x84\x00\x00\x00\x00\x00",1017};10181019// 16-bit mode uses different nop patterns than 32-bit.1020static const char Nops16Bit[4][11] = {1021// nop1022"\x90",1023// xchg %eax,%eax1024"\x66\x90",1025// lea 0(%si),%si1026"\x8d\x74\x00",1027// lea 0w(%si),%si1028"\x8d\xb4\x00\x00",1029};10301031const char(*Nops)[11] =1032STI->hasFeature(X86::Is16Bit) ? Nops16Bit : Nops32Bit;10331034uint64_t MaxNopLength = (uint64_t)getMaximumNopSize(*STI);10351036// Emit as many MaxNopLength NOPs as needed, then emit a NOP of the remaining1037// length.1038do {1039const uint8_t ThisNopLength = (uint8_t) std::min(Count, MaxNopLength);1040const uint8_t Prefixes = ThisNopLength <= 10 ? 0 : ThisNopLength - 10;1041for (uint8_t i = 0; i < Prefixes; i++)1042OS << '\x66';1043const uint8_t Rest = ThisNopLength - Prefixes;1044if (Rest != 0)1045OS.write(Nops[Rest - 1], Rest);1046Count -= ThisNopLength;1047} while (Count != 0);10481049return true;1050}10511052/* *** */10531054namespace {10551056class ELFX86AsmBackend : public X86AsmBackend {1057public:1058uint8_t OSABI;1059ELFX86AsmBackend(const Target &T, uint8_t OSABI, const MCSubtargetInfo &STI)1060: X86AsmBackend(T, STI), OSABI(OSABI) {}1061};10621063class ELFX86_32AsmBackend : public ELFX86AsmBackend {1064public:1065ELFX86_32AsmBackend(const Target &T, uint8_t OSABI,1066const MCSubtargetInfo &STI)1067: ELFX86AsmBackend(T, OSABI, STI) {}10681069std::unique_ptr<MCObjectTargetWriter>1070createObjectTargetWriter() const override {1071return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI, ELF::EM_386);1072}1073};10741075class ELFX86_X32AsmBackend : public ELFX86AsmBackend {1076public:1077ELFX86_X32AsmBackend(const Target &T, uint8_t OSABI,1078const MCSubtargetInfo &STI)1079: ELFX86AsmBackend(T, OSABI, STI) {}10801081std::unique_ptr<MCObjectTargetWriter>1082createObjectTargetWriter() const override {1083return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,1084ELF::EM_X86_64);1085}1086};10871088class ELFX86_IAMCUAsmBackend : public ELFX86AsmBackend {1089public:1090ELFX86_IAMCUAsmBackend(const Target &T, uint8_t OSABI,1091const MCSubtargetInfo &STI)1092: ELFX86AsmBackend(T, OSABI, STI) {}10931094std::unique_ptr<MCObjectTargetWriter>1095createObjectTargetWriter() const override {1096return createX86ELFObjectWriter(/*IsELF64*/ false, OSABI,1097ELF::EM_IAMCU);1098}1099};11001101class ELFX86_64AsmBackend : public ELFX86AsmBackend {1102public:1103ELFX86_64AsmBackend(const Target &T, uint8_t OSABI,1104const MCSubtargetInfo &STI)1105: ELFX86AsmBackend(T, OSABI, STI) {}11061107std::unique_ptr<MCObjectTargetWriter>1108createObjectTargetWriter() const override {1109return createX86ELFObjectWriter(/*IsELF64*/ true, OSABI, ELF::EM_X86_64);1110}1111};11121113class WindowsX86AsmBackend : public X86AsmBackend {1114bool Is64Bit;11151116public:1117WindowsX86AsmBackend(const Target &T, bool is64Bit,1118const MCSubtargetInfo &STI)1119: X86AsmBackend(T, STI)1120, Is64Bit(is64Bit) {1121}11221123std::optional<MCFixupKind> getFixupKind(StringRef Name) const override {1124return StringSwitch<std::optional<MCFixupKind>>(Name)1125.Case("dir32", FK_Data_4)1126.Case("secrel32", FK_SecRel_4)1127.Case("secidx", FK_SecRel_2)1128.Default(MCAsmBackend::getFixupKind(Name));1129}11301131std::unique_ptr<MCObjectTargetWriter>1132createObjectTargetWriter() const override {1133return createX86WinCOFFObjectWriter(Is64Bit);1134}1135};11361137namespace CU {11381139/// Compact unwind encoding values.1140enum CompactUnwindEncodings {1141/// [RE]BP based frame where [RE]BP is pused on the stack immediately after1142/// the return address, then [RE]SP is moved to [RE]BP.1143UNWIND_MODE_BP_FRAME = 0x01000000,11441145/// A frameless function with a small constant stack size.1146UNWIND_MODE_STACK_IMMD = 0x02000000,11471148/// A frameless function with a large constant stack size.1149UNWIND_MODE_STACK_IND = 0x03000000,11501151/// No compact unwind encoding is available.1152UNWIND_MODE_DWARF = 0x04000000,11531154/// Mask for encoding the frame registers.1155UNWIND_BP_FRAME_REGISTERS = 0x00007FFF,11561157/// Mask for encoding the frameless registers.1158UNWIND_FRAMELESS_STACK_REG_PERMUTATION = 0x000003FF1159};11601161} // namespace CU11621163class DarwinX86AsmBackend : public X86AsmBackend {1164const MCRegisterInfo &MRI;11651166/// Number of registers that can be saved in a compact unwind encoding.1167enum { CU_NUM_SAVED_REGS = 6 };11681169mutable unsigned SavedRegs[CU_NUM_SAVED_REGS];1170Triple TT;1171bool Is64Bit;11721173unsigned OffsetSize; ///< Offset of a "push" instruction.1174unsigned MoveInstrSize; ///< Size of a "move" instruction.1175unsigned StackDivide; ///< Amount to adjust stack size by.1176protected:1177/// Size of a "push" instruction for the given register.1178unsigned PushInstrSize(unsigned Reg) const {1179switch (Reg) {1180case X86::EBX:1181case X86::ECX:1182case X86::EDX:1183case X86::EDI:1184case X86::ESI:1185case X86::EBP:1186case X86::RBX:1187case X86::RBP:1188return 1;1189case X86::R12:1190case X86::R13:1191case X86::R14:1192case X86::R15:1193return 2;1194}1195return 1;1196}11971198private:1199/// Get the compact unwind number for a given register. The number1200/// corresponds to the enum lists in compact_unwind_encoding.h.1201int getCompactUnwindRegNum(unsigned Reg) const {1202static const MCPhysReg CU32BitRegs[7] = {1203X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 01204};1205static const MCPhysReg CU64BitRegs[] = {1206X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 01207};1208const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs;1209for (int Idx = 1; *CURegs; ++CURegs, ++Idx)1210if (*CURegs == Reg)1211return Idx;12121213return -1;1214}12151216/// Return the registers encoded for a compact encoding with a frame1217/// pointer.1218uint32_t encodeCompactUnwindRegistersWithFrame() const {1219// Encode the registers in the order they were saved --- 3-bits per1220// register. The list of saved registers is assumed to be in reverse1221// order. The registers are numbered from 1 to CU_NUM_SAVED_REGS.1222uint32_t RegEnc = 0;1223for (int i = 0, Idx = 0; i != CU_NUM_SAVED_REGS; ++i) {1224unsigned Reg = SavedRegs[i];1225if (Reg == 0) break;12261227int CURegNum = getCompactUnwindRegNum(Reg);1228if (CURegNum == -1) return ~0U;12291230// Encode the 3-bit register number in order, skipping over 3-bits for1231// each register.1232RegEnc |= (CURegNum & 0x7) << (Idx++ * 3);1233}12341235assert((RegEnc & 0x3FFFF) == RegEnc &&1236"Invalid compact register encoding!");1237return RegEnc;1238}12391240/// Create the permutation encoding used with frameless stacks. It is1241/// passed the number of registers to be saved and an array of the registers1242/// saved.1243uint32_t encodeCompactUnwindRegistersWithoutFrame(unsigned RegCount) const {1244// The saved registers are numbered from 1 to 6. In order to encode the1245// order in which they were saved, we re-number them according to their1246// place in the register order. The re-numbering is relative to the last1247// re-numbered register. E.g., if we have registers {6, 2, 4, 5} saved in1248// that order:1249//1250// Orig Re-Num1251// ---- ------1252// 6 61253// 2 21254// 4 31255// 5 31256//1257for (unsigned i = 0; i < RegCount; ++i) {1258int CUReg = getCompactUnwindRegNum(SavedRegs[i]);1259if (CUReg == -1) return ~0U;1260SavedRegs[i] = CUReg;1261}12621263// Reverse the list.1264std::reverse(&SavedRegs[0], &SavedRegs[CU_NUM_SAVED_REGS]);12651266uint32_t RenumRegs[CU_NUM_SAVED_REGS];1267for (unsigned i = CU_NUM_SAVED_REGS - RegCount; i < CU_NUM_SAVED_REGS; ++i){1268unsigned Countless = 0;1269for (unsigned j = CU_NUM_SAVED_REGS - RegCount; j < i; ++j)1270if (SavedRegs[j] < SavedRegs[i])1271++Countless;12721273RenumRegs[i] = SavedRegs[i] - Countless - 1;1274}12751276// Take the renumbered values and encode them into a 10-bit number.1277uint32_t permutationEncoding = 0;1278switch (RegCount) {1279case 6:1280permutationEncoding |= 120 * RenumRegs[0] + 24 * RenumRegs[1]1281+ 6 * RenumRegs[2] + 2 * RenumRegs[3]1282+ RenumRegs[4];1283break;1284case 5:1285permutationEncoding |= 120 * RenumRegs[1] + 24 * RenumRegs[2]1286+ 6 * RenumRegs[3] + 2 * RenumRegs[4]1287+ RenumRegs[5];1288break;1289case 4:1290permutationEncoding |= 60 * RenumRegs[2] + 12 * RenumRegs[3]1291+ 3 * RenumRegs[4] + RenumRegs[5];1292break;1293case 3:1294permutationEncoding |= 20 * RenumRegs[3] + 4 * RenumRegs[4]1295+ RenumRegs[5];1296break;1297case 2:1298permutationEncoding |= 5 * RenumRegs[4] + RenumRegs[5];1299break;1300case 1:1301permutationEncoding |= RenumRegs[5];1302break;1303}13041305assert((permutationEncoding & 0x3FF) == permutationEncoding &&1306"Invalid compact register encoding!");1307return permutationEncoding;1308}13091310public:1311DarwinX86AsmBackend(const Target &T, const MCRegisterInfo &MRI,1312const MCSubtargetInfo &STI)1313: X86AsmBackend(T, STI), MRI(MRI), TT(STI.getTargetTriple()),1314Is64Bit(TT.isArch64Bit()) {1315memset(SavedRegs, 0, sizeof(SavedRegs));1316OffsetSize = Is64Bit ? 8 : 4;1317MoveInstrSize = Is64Bit ? 3 : 2;1318StackDivide = Is64Bit ? 8 : 4;1319}13201321std::unique_ptr<MCObjectTargetWriter>1322createObjectTargetWriter() const override {1323uint32_t CPUType = cantFail(MachO::getCPUType(TT));1324uint32_t CPUSubType = cantFail(MachO::getCPUSubType(TT));1325return createX86MachObjectWriter(Is64Bit, CPUType, CPUSubType);1326}13271328/// Implementation of algorithm to generate the compact unwind encoding1329/// for the CFI instructions.1330uint64_t generateCompactUnwindEncoding(const MCDwarfFrameInfo *FI,1331const MCContext *Ctxt) const override {1332ArrayRef<MCCFIInstruction> Instrs = FI->Instructions;1333if (Instrs.empty()) return 0;1334if (!isDarwinCanonicalPersonality(FI->Personality) &&1335!Ctxt->emitCompactUnwindNonCanonical())1336return CU::UNWIND_MODE_DWARF;13371338// Reset the saved registers.1339unsigned SavedRegIdx = 0;1340memset(SavedRegs, 0, sizeof(SavedRegs));13411342bool HasFP = false;13431344// Encode that we are using EBP/RBP as the frame pointer.1345uint64_t CompactUnwindEncoding = 0;13461347unsigned SubtractInstrIdx = Is64Bit ? 3 : 2;1348unsigned InstrOffset = 0;1349unsigned StackAdjust = 0;1350uint64_t StackSize = 0;1351int64_t MinAbsOffset = std::numeric_limits<int64_t>::max();13521353for (const MCCFIInstruction &Inst : Instrs) {1354switch (Inst.getOperation()) {1355default:1356// Any other CFI directives indicate a frame that we aren't prepared1357// to represent via compact unwind, so just bail out.1358return CU::UNWIND_MODE_DWARF;1359case MCCFIInstruction::OpDefCfaRegister: {1360// Defines a frame pointer. E.g.1361//1362// movq %rsp, %rbp1363// L0:1364// .cfi_def_cfa_register %rbp1365//1366HasFP = true;13671368// If the frame pointer is other than esp/rsp, we do not have a way to1369// generate a compact unwinding representation, so bail out.1370if (*MRI.getLLVMRegNum(Inst.getRegister(), true) !=1371(Is64Bit ? X86::RBP : X86::EBP))1372return CU::UNWIND_MODE_DWARF;13731374// Reset the counts.1375memset(SavedRegs, 0, sizeof(SavedRegs));1376StackAdjust = 0;1377SavedRegIdx = 0;1378MinAbsOffset = std::numeric_limits<int64_t>::max();1379InstrOffset += MoveInstrSize;1380break;1381}1382case MCCFIInstruction::OpDefCfaOffset: {1383// Defines a new offset for the CFA. E.g.1384//1385// With frame:1386//1387// pushq %rbp1388// L0:1389// .cfi_def_cfa_offset 161390//1391// Without frame:1392//1393// subq $72, %rsp1394// L0:1395// .cfi_def_cfa_offset 801396//1397StackSize = Inst.getOffset() / StackDivide;1398break;1399}1400case MCCFIInstruction::OpOffset: {1401// Defines a "push" of a callee-saved register. E.g.1402//1403// pushq %r151404// pushq %r141405// pushq %rbx1406// L0:1407// subq $120, %rsp1408// L1:1409// .cfi_offset %rbx, -401410// .cfi_offset %r14, -321411// .cfi_offset %r15, -241412//1413if (SavedRegIdx == CU_NUM_SAVED_REGS)1414// If there are too many saved registers, we cannot use a compact1415// unwind encoding.1416return CU::UNWIND_MODE_DWARF;14171418unsigned Reg = *MRI.getLLVMRegNum(Inst.getRegister(), true);1419SavedRegs[SavedRegIdx++] = Reg;1420StackAdjust += OffsetSize;1421MinAbsOffset = std::min(MinAbsOffset, std::abs(Inst.getOffset()));1422InstrOffset += PushInstrSize(Reg);1423break;1424}1425}1426}14271428StackAdjust /= StackDivide;14291430if (HasFP) {1431if ((StackAdjust & 0xFF) != StackAdjust)1432// Offset was too big for a compact unwind encoding.1433return CU::UNWIND_MODE_DWARF;14341435// We don't attempt to track a real StackAdjust, so if the saved registers1436// aren't adjacent to rbp we can't cope.1437if (SavedRegIdx != 0 && MinAbsOffset != 3 * (int)OffsetSize)1438return CU::UNWIND_MODE_DWARF;14391440// Get the encoding of the saved registers when we have a frame pointer.1441uint32_t RegEnc = encodeCompactUnwindRegistersWithFrame();1442if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;14431444CompactUnwindEncoding |= CU::UNWIND_MODE_BP_FRAME;1445CompactUnwindEncoding |= (StackAdjust & 0xFF) << 16;1446CompactUnwindEncoding |= RegEnc & CU::UNWIND_BP_FRAME_REGISTERS;1447} else {1448SubtractInstrIdx += InstrOffset;1449++StackAdjust;14501451if ((StackSize & 0xFF) == StackSize) {1452// Frameless stack with a small stack size.1453CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IMMD;14541455// Encode the stack size.1456CompactUnwindEncoding |= (StackSize & 0xFF) << 16;1457} else {1458if ((StackAdjust & 0x7) != StackAdjust)1459// The extra stack adjustments are too big for us to handle.1460return CU::UNWIND_MODE_DWARF;14611462// Frameless stack with an offset too large for us to encode compactly.1463CompactUnwindEncoding |= CU::UNWIND_MODE_STACK_IND;14641465// Encode the offset to the nnnnnn value in the 'subl $nnnnnn, ESP'1466// instruction.1467CompactUnwindEncoding |= (SubtractInstrIdx & 0xFF) << 16;14681469// Encode any extra stack adjustments (done via push instructions).1470CompactUnwindEncoding |= (StackAdjust & 0x7) << 13;1471}14721473// Encode the number of registers saved. (Reverse the list first.)1474std::reverse(&SavedRegs[0], &SavedRegs[SavedRegIdx]);1475CompactUnwindEncoding |= (SavedRegIdx & 0x7) << 10;14761477// Get the encoding of the saved registers when we don't have a frame1478// pointer.1479uint32_t RegEnc = encodeCompactUnwindRegistersWithoutFrame(SavedRegIdx);1480if (RegEnc == ~0U) return CU::UNWIND_MODE_DWARF;14811482// Encode the register encoding.1483CompactUnwindEncoding |=1484RegEnc & CU::UNWIND_FRAMELESS_STACK_REG_PERMUTATION;1485}14861487return CompactUnwindEncoding;1488}1489};14901491} // end anonymous namespace14921493MCAsmBackend *llvm::createX86_32AsmBackend(const Target &T,1494const MCSubtargetInfo &STI,1495const MCRegisterInfo &MRI,1496const MCTargetOptions &Options) {1497const Triple &TheTriple = STI.getTargetTriple();1498if (TheTriple.isOSBinFormatMachO())1499return new DarwinX86AsmBackend(T, MRI, STI);15001501if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())1502return new WindowsX86AsmBackend(T, false, STI);15031504uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());15051506if (TheTriple.isOSIAMCU())1507return new ELFX86_IAMCUAsmBackend(T, OSABI, STI);15081509return new ELFX86_32AsmBackend(T, OSABI, STI);1510}15111512MCAsmBackend *llvm::createX86_64AsmBackend(const Target &T,1513const MCSubtargetInfo &STI,1514const MCRegisterInfo &MRI,1515const MCTargetOptions &Options) {1516const Triple &TheTriple = STI.getTargetTriple();1517if (TheTriple.isOSBinFormatMachO())1518return new DarwinX86AsmBackend(T, MRI, STI);15191520if (TheTriple.isOSWindows() && TheTriple.isOSBinFormatCOFF())1521return new WindowsX86AsmBackend(T, true, STI);15221523if (TheTriple.isUEFI()) {1524assert(TheTriple.isOSBinFormatCOFF() &&1525"Only COFF format is supported in UEFI environment.");1526return new WindowsX86AsmBackend(T, true, STI);1527}15281529uint8_t OSABI = MCELFObjectTargetWriter::getOSABI(TheTriple.getOS());15301531if (TheTriple.isX32())1532return new ELFX86_X32AsmBackend(T, OSABI, STI);1533return new ELFX86_64AsmBackend(T, OSABI, STI);1534}15351536namespace {1537class X86ELFStreamer : public MCELFStreamer {1538public:1539X86ELFStreamer(MCContext &Context, std::unique_ptr<MCAsmBackend> TAB,1540std::unique_ptr<MCObjectWriter> OW,1541std::unique_ptr<MCCodeEmitter> Emitter)1542: MCELFStreamer(Context, std::move(TAB), std::move(OW),1543std::move(Emitter)) {}15441545void emitInstruction(const MCInst &Inst, const MCSubtargetInfo &STI) override;1546};1547} // end anonymous namespace15481549void X86_MC::emitInstruction(MCObjectStreamer &S, const MCInst &Inst,1550const MCSubtargetInfo &STI) {1551auto &Backend = static_cast<X86AsmBackend &>(S.getAssembler().getBackend());1552Backend.emitInstructionBegin(S, Inst, STI);1553S.MCObjectStreamer::emitInstruction(Inst, STI);1554Backend.emitInstructionEnd(S, Inst);1555}15561557void X86ELFStreamer::emitInstruction(const MCInst &Inst,1558const MCSubtargetInfo &STI) {1559X86_MC::emitInstruction(*this, Inst, STI);1560}15611562MCStreamer *llvm::createX86ELFStreamer(const Triple &T, MCContext &Context,1563std::unique_ptr<MCAsmBackend> &&MAB,1564std::unique_ptr<MCObjectWriter> &&MOW,1565std::unique_ptr<MCCodeEmitter> &&MCE) {1566return new X86ELFStreamer(Context, std::move(MAB), std::move(MOW),1567std::move(MCE));1568}156915701571