Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86ExpandPseudo.cpp
35267 views
//===------- X86ExpandPseudo.cpp - Expand pseudo instructions -------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains a pass that expands pseudo instructions into target9// instructions to allow proper scheduling, if-conversion, other late10// optimizations, or simply the encoding of the instructions.11//12//===----------------------------------------------------------------------===//1314#include "X86.h"15#include "X86FrameLowering.h"16#include "X86InstrBuilder.h"17#include "X86InstrInfo.h"18#include "X86MachineFunctionInfo.h"19#include "X86Subtarget.h"20#include "llvm/CodeGen/LivePhysRegs.h"21#include "llvm/CodeGen/MachineFunctionPass.h"22#include "llvm/CodeGen/MachineInstrBuilder.h"23#include "llvm/CodeGen/Passes.h" // For IDs of passes that are preserved.24#include "llvm/IR/EHPersonalities.h"25#include "llvm/IR/GlobalValue.h"26#include "llvm/Target/TargetMachine.h"27using namespace llvm;2829#define DEBUG_TYPE "x86-pseudo"30#define X86_EXPAND_PSEUDO_NAME "X86 pseudo instruction expansion pass"3132namespace {33class X86ExpandPseudo : public MachineFunctionPass {34public:35static char ID;36X86ExpandPseudo() : MachineFunctionPass(ID) {}3738void getAnalysisUsage(AnalysisUsage &AU) const override {39AU.setPreservesCFG();40AU.addPreservedID(MachineLoopInfoID);41AU.addPreservedID(MachineDominatorsID);42MachineFunctionPass::getAnalysisUsage(AU);43}4445const X86Subtarget *STI = nullptr;46const X86InstrInfo *TII = nullptr;47const X86RegisterInfo *TRI = nullptr;48const X86MachineFunctionInfo *X86FI = nullptr;49const X86FrameLowering *X86FL = nullptr;5051bool runOnMachineFunction(MachineFunction &MF) override;5253MachineFunctionProperties getRequiredProperties() const override {54return MachineFunctionProperties().set(55MachineFunctionProperties::Property::NoVRegs);56}5758StringRef getPassName() const override {59return "X86 pseudo instruction expansion pass";60}6162private:63void expandICallBranchFunnel(MachineBasicBlock *MBB,64MachineBasicBlock::iterator MBBI);65void expandCALL_RVMARKER(MachineBasicBlock &MBB,66MachineBasicBlock::iterator MBBI);67bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);68bool expandMBB(MachineBasicBlock &MBB);6970/// This function expands pseudos which affects control flow.71/// It is done in separate pass to simplify blocks navigation in main72/// pass(calling expandMBB).73bool expandPseudosWhichAffectControlFlow(MachineFunction &MF);7475/// Expand X86::VASTART_SAVE_XMM_REGS into set of xmm copying instructions,76/// placed into separate block guarded by check for al register(for SystemV77/// abi).78void expandVastartSaveXmmRegs(79MachineBasicBlock *EntryBlk,80MachineBasicBlock::iterator VAStartPseudoInstr) const;81};82char X86ExpandPseudo::ID = 0;8384} // End anonymous namespace.8586INITIALIZE_PASS(X86ExpandPseudo, DEBUG_TYPE, X86_EXPAND_PSEUDO_NAME, false,87false)8889void X86ExpandPseudo::expandICallBranchFunnel(90MachineBasicBlock *MBB, MachineBasicBlock::iterator MBBI) {91MachineBasicBlock *JTMBB = MBB;92MachineInstr *JTInst = &*MBBI;93MachineFunction *MF = MBB->getParent();94const BasicBlock *BB = MBB->getBasicBlock();95auto InsPt = MachineFunction::iterator(MBB);96++InsPt;9798std::vector<std::pair<MachineBasicBlock *, unsigned>> TargetMBBs;99const DebugLoc &DL = JTInst->getDebugLoc();100MachineOperand Selector = JTInst->getOperand(0);101const GlobalValue *CombinedGlobal = JTInst->getOperand(1).getGlobal();102103auto CmpTarget = [&](unsigned Target) {104if (Selector.isReg())105MBB->addLiveIn(Selector.getReg());106BuildMI(*MBB, MBBI, DL, TII->get(X86::LEA64r), X86::R11)107.addReg(X86::RIP)108.addImm(1)109.addReg(0)110.addGlobalAddress(CombinedGlobal,111JTInst->getOperand(2 + 2 * Target).getImm())112.addReg(0);113BuildMI(*MBB, MBBI, DL, TII->get(X86::CMP64rr))114.add(Selector)115.addReg(X86::R11);116};117118auto CreateMBB = [&]() {119auto *NewMBB = MF->CreateMachineBasicBlock(BB);120MBB->addSuccessor(NewMBB);121if (!MBB->isLiveIn(X86::EFLAGS))122MBB->addLiveIn(X86::EFLAGS);123return NewMBB;124};125126auto EmitCondJump = [&](unsigned CC, MachineBasicBlock *ThenMBB) {127BuildMI(*MBB, MBBI, DL, TII->get(X86::JCC_1)).addMBB(ThenMBB).addImm(CC);128129auto *ElseMBB = CreateMBB();130MF->insert(InsPt, ElseMBB);131MBB = ElseMBB;132MBBI = MBB->end();133};134135auto EmitCondJumpTarget = [&](unsigned CC, unsigned Target) {136auto *ThenMBB = CreateMBB();137TargetMBBs.push_back({ThenMBB, Target});138EmitCondJump(CC, ThenMBB);139};140141auto EmitTailCall = [&](unsigned Target) {142BuildMI(*MBB, MBBI, DL, TII->get(X86::TAILJMPd64))143.add(JTInst->getOperand(3 + 2 * Target));144};145146std::function<void(unsigned, unsigned)> EmitBranchFunnel =147[&](unsigned FirstTarget, unsigned NumTargets) {148if (NumTargets == 1) {149EmitTailCall(FirstTarget);150return;151}152153if (NumTargets == 2) {154CmpTarget(FirstTarget + 1);155EmitCondJumpTarget(X86::COND_B, FirstTarget);156EmitTailCall(FirstTarget + 1);157return;158}159160if (NumTargets < 6) {161CmpTarget(FirstTarget + 1);162EmitCondJumpTarget(X86::COND_B, FirstTarget);163EmitCondJumpTarget(X86::COND_E, FirstTarget + 1);164EmitBranchFunnel(FirstTarget + 2, NumTargets - 2);165return;166}167168auto *ThenMBB = CreateMBB();169CmpTarget(FirstTarget + (NumTargets / 2));170EmitCondJump(X86::COND_B, ThenMBB);171EmitCondJumpTarget(X86::COND_E, FirstTarget + (NumTargets / 2));172EmitBranchFunnel(FirstTarget + (NumTargets / 2) + 1,173NumTargets - (NumTargets / 2) - 1);174175MF->insert(InsPt, ThenMBB);176MBB = ThenMBB;177MBBI = MBB->end();178EmitBranchFunnel(FirstTarget, NumTargets / 2);179};180181EmitBranchFunnel(0, (JTInst->getNumOperands() - 2) / 2);182for (auto P : TargetMBBs) {183MF->insert(InsPt, P.first);184BuildMI(P.first, DL, TII->get(X86::TAILJMPd64))185.add(JTInst->getOperand(3 + 2 * P.second));186}187JTMBB->erase(JTInst);188}189190void X86ExpandPseudo::expandCALL_RVMARKER(MachineBasicBlock &MBB,191MachineBasicBlock::iterator MBBI) {192// Expand CALL_RVMARKER pseudo to call instruction, followed by the special193//"movq %rax, %rdi" marker.194MachineInstr &MI = *MBBI;195196MachineInstr *OriginalCall;197assert((MI.getOperand(1).isGlobal() || MI.getOperand(1).isReg()) &&198"invalid operand for regular call");199unsigned Opc = -1;200if (MI.getOpcode() == X86::CALL64m_RVMARKER)201Opc = X86::CALL64m;202else if (MI.getOpcode() == X86::CALL64r_RVMARKER)203Opc = X86::CALL64r;204else if (MI.getOpcode() == X86::CALL64pcrel32_RVMARKER)205Opc = X86::CALL64pcrel32;206else207llvm_unreachable("unexpected opcode");208209OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();210bool RAXImplicitDead = false;211for (MachineOperand &Op : llvm::drop_begin(MI.operands())) {212// RAX may be 'implicit dead', if there are no other users of the return213// value. We introduce a new use, so change it to 'implicit def'.214if (Op.isReg() && Op.isImplicit() && Op.isDead() &&215TRI->regsOverlap(Op.getReg(), X86::RAX)) {216Op.setIsDead(false);217Op.setIsDef(true);218RAXImplicitDead = true;219}220OriginalCall->addOperand(Op);221}222223// Emit marker "movq %rax, %rdi". %rdi is not callee-saved, so it cannot be224// live across the earlier call. The call to the ObjC runtime function returns225// the first argument, so the value of %rax is unchanged after the ObjC226// runtime call. On Windows targets, the runtime call follows the regular227// x64 calling convention and expects the first argument in %rcx.228auto TargetReg = STI->getTargetTriple().isOSWindows() ? X86::RCX : X86::RDI;229auto *Marker = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::MOV64rr))230.addReg(TargetReg, RegState::Define)231.addReg(X86::RAX)232.getInstr();233if (MI.shouldUpdateCallSiteInfo())234MBB.getParent()->moveCallSiteInfo(&MI, Marker);235236// Emit call to ObjC runtime.237const uint32_t *RegMask =238TRI->getCallPreservedMask(*MBB.getParent(), CallingConv::C);239MachineInstr *RtCall =240BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(X86::CALL64pcrel32))241.addGlobalAddress(MI.getOperand(0).getGlobal(), 0, 0)242.addRegMask(RegMask)243.addReg(X86::RAX,244RegState::Implicit |245(RAXImplicitDead ? (RegState::Dead | RegState::Define)246: RegState::Define))247.getInstr();248MI.eraseFromParent();249250auto &TM = MBB.getParent()->getTarget();251// On Darwin platforms, wrap the expanded sequence in a bundle to prevent252// later optimizations from breaking up the sequence.253if (TM.getTargetTriple().isOSDarwin())254finalizeBundle(MBB, OriginalCall->getIterator(),255std::next(RtCall->getIterator()));256}257258/// If \p MBBI is a pseudo instruction, this method expands259/// it to the corresponding (sequence of) actual instruction(s).260/// \returns true if \p MBBI has been expanded.261bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,262MachineBasicBlock::iterator MBBI) {263MachineInstr &MI = *MBBI;264unsigned Opcode = MI.getOpcode();265const DebugLoc &DL = MBBI->getDebugLoc();266#define GET_EGPR_IF_ENABLED(OPC) (STI->hasEGPR() ? OPC##_EVEX : OPC)267switch (Opcode) {268default:269return false;270case X86::TCRETURNdi:271case X86::TCRETURNdicc:272case X86::TCRETURNri:273case X86::TCRETURNmi:274case X86::TCRETURNdi64:275case X86::TCRETURNdi64cc:276case X86::TCRETURNri64:277case X86::TCRETURNmi64: {278bool isMem = Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64;279MachineOperand &JumpTarget = MBBI->getOperand(0);280MachineOperand &StackAdjust = MBBI->getOperand(isMem ? X86::AddrNumOperands281: 1);282assert(StackAdjust.isImm() && "Expecting immediate value.");283284// Adjust stack pointer.285int StackAdj = StackAdjust.getImm();286int MaxTCDelta = X86FI->getTCReturnAddrDelta();287int Offset = 0;288assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");289290// Incoporate the retaddr area.291Offset = StackAdj - MaxTCDelta;292assert(Offset >= 0 && "Offset should never be negative");293294if (Opcode == X86::TCRETURNdicc || Opcode == X86::TCRETURNdi64cc) {295assert(Offset == 0 && "Conditional tail call cannot adjust the stack.");296}297298if (Offset) {299// Check for possible merge with preceding ADD instruction.300Offset += X86FL->mergeSPUpdates(MBB, MBBI, true);301X86FL->emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue=*/true);302}303304// Jump to label or value in register.305bool IsWin64 = STI->isTargetWin64();306if (Opcode == X86::TCRETURNdi || Opcode == X86::TCRETURNdicc ||307Opcode == X86::TCRETURNdi64 || Opcode == X86::TCRETURNdi64cc) {308unsigned Op;309switch (Opcode) {310case X86::TCRETURNdi:311Op = X86::TAILJMPd;312break;313case X86::TCRETURNdicc:314Op = X86::TAILJMPd_CC;315break;316case X86::TCRETURNdi64cc:317assert(!MBB.getParent()->hasWinCFI() &&318"Conditional tail calls confuse "319"the Win64 unwinder.");320Op = X86::TAILJMPd64_CC;321break;322default:323// Note: Win64 uses REX prefixes indirect jumps out of functions, but324// not direct ones.325Op = X86::TAILJMPd64;326break;327}328MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));329if (JumpTarget.isGlobal()) {330MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),331JumpTarget.getTargetFlags());332} else {333assert(JumpTarget.isSymbol());334MIB.addExternalSymbol(JumpTarget.getSymbolName(),335JumpTarget.getTargetFlags());336}337if (Op == X86::TAILJMPd_CC || Op == X86::TAILJMPd64_CC) {338MIB.addImm(MBBI->getOperand(2).getImm());339}340341} else if (Opcode == X86::TCRETURNmi || Opcode == X86::TCRETURNmi64) {342unsigned Op = (Opcode == X86::TCRETURNmi)343? X86::TAILJMPm344: (IsWin64 ? X86::TAILJMPm64_REX : X86::TAILJMPm64);345MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(Op));346for (unsigned i = 0; i != X86::AddrNumOperands; ++i)347MIB.add(MBBI->getOperand(i));348} else if (Opcode == X86::TCRETURNri64) {349JumpTarget.setIsKill();350BuildMI(MBB, MBBI, DL,351TII->get(IsWin64 ? X86::TAILJMPr64_REX : X86::TAILJMPr64))352.add(JumpTarget);353} else {354JumpTarget.setIsKill();355BuildMI(MBB, MBBI, DL, TII->get(X86::TAILJMPr))356.add(JumpTarget);357}358359MachineInstr &NewMI = *std::prev(MBBI);360NewMI.copyImplicitOps(*MBBI->getParent()->getParent(), *MBBI);361NewMI.setCFIType(*MBB.getParent(), MI.getCFIType());362363// Update the call site info.364if (MBBI->isCandidateForCallSiteEntry())365MBB.getParent()->moveCallSiteInfo(&*MBBI, &NewMI);366367// Delete the pseudo instruction TCRETURN.368MBB.erase(MBBI);369370return true;371}372case X86::EH_RETURN:373case X86::EH_RETURN64: {374MachineOperand &DestAddr = MBBI->getOperand(0);375assert(DestAddr.isReg() && "Offset should be in register!");376const bool Uses64BitFramePtr =377STI->isTarget64BitLP64() || STI->isTargetNaCl64();378Register StackPtr = TRI->getStackRegister();379BuildMI(MBB, MBBI, DL,380TII->get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), StackPtr)381.addReg(DestAddr.getReg());382// The EH_RETURN pseudo is really removed during the MC Lowering.383return true;384}385case X86::IRET: {386// Adjust stack to erase error code387int64_t StackAdj = MBBI->getOperand(0).getImm();388X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, true);389// Replace pseudo with machine iret390unsigned RetOp = STI->is64Bit() ? X86::IRET64 : X86::IRET32;391// Use UIRET if UINTR is present (except for building kernel)392if (STI->is64Bit() && STI->hasUINTR() &&393MBB.getParent()->getTarget().getCodeModel() != CodeModel::Kernel)394RetOp = X86::UIRET;395BuildMI(MBB, MBBI, DL, TII->get(RetOp));396MBB.erase(MBBI);397return true;398}399case X86::RET: {400// Adjust stack to erase error code401int64_t StackAdj = MBBI->getOperand(0).getImm();402MachineInstrBuilder MIB;403if (StackAdj == 0) {404MIB = BuildMI(MBB, MBBI, DL,405TII->get(STI->is64Bit() ? X86::RET64 : X86::RET32));406} else if (isUInt<16>(StackAdj)) {407MIB = BuildMI(MBB, MBBI, DL,408TII->get(STI->is64Bit() ? X86::RETI64 : X86::RETI32))409.addImm(StackAdj);410} else {411assert(!STI->is64Bit() &&412"shouldn't need to do this for x86_64 targets!");413// A ret can only handle immediates as big as 2**16-1. If we need to pop414// off bytes before the return address, we must do it manually.415BuildMI(MBB, MBBI, DL, TII->get(X86::POP32r)).addReg(X86::ECX, RegState::Define);416X86FL->emitSPUpdate(MBB, MBBI, DL, StackAdj, /*InEpilogue=*/true);417BuildMI(MBB, MBBI, DL, TII->get(X86::PUSH32r)).addReg(X86::ECX);418MIB = BuildMI(MBB, MBBI, DL, TII->get(X86::RET32));419}420for (unsigned I = 1, E = MBBI->getNumOperands(); I != E; ++I)421MIB.add(MBBI->getOperand(I));422MBB.erase(MBBI);423return true;424}425case X86::LCMPXCHG16B_SAVE_RBX: {426// Perform the following transformation.427// SaveRbx = pseudocmpxchg Addr, <4 opds for the address>, InArg, SaveRbx428// =>429// RBX = InArg430// actualcmpxchg Addr431// RBX = SaveRbx432const MachineOperand &InArg = MBBI->getOperand(6);433Register SaveRbx = MBBI->getOperand(7).getReg();434435// Copy the input argument of the pseudo into the argument of the436// actual instruction.437// NOTE: We don't copy the kill flag since the input might be the same reg438// as one of the other operands of LCMPXCHG16B.439TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, InArg.getReg(), false);440// Create the actual instruction.441MachineInstr *NewInstr = BuildMI(MBB, MBBI, DL, TII->get(X86::LCMPXCHG16B));442// Copy the operands related to the address.443for (unsigned Idx = 1; Idx < 6; ++Idx)444NewInstr->addOperand(MBBI->getOperand(Idx));445// Finally, restore the value of RBX.446TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx,447/*SrcIsKill*/ true);448449// Delete the pseudo.450MBBI->eraseFromParent();451return true;452}453// Loading/storing mask pairs requires two kmov operations. The second one of454// these needs a 2 byte displacement relative to the specified address (with455// 32 bit spill size). The pairs of 1bit masks up to 16 bit masks all use the456// same spill size, they all are stored using MASKPAIR16STORE, loaded using457// MASKPAIR16LOAD.458//459// The displacement value might wrap around in theory, thus the asserts in460// both cases.461case X86::MASKPAIR16LOAD: {462int64_t Disp = MBBI->getOperand(1 + X86::AddrDisp).getImm();463assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");464Register Reg = MBBI->getOperand(0).getReg();465bool DstIsDead = MBBI->getOperand(0).isDead();466Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);467Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);468469auto MIBLo =470BuildMI(MBB, MBBI, DL, TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWkm)))471.addReg(Reg0, RegState::Define | getDeadRegState(DstIsDead));472auto MIBHi =473BuildMI(MBB, MBBI, DL, TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWkm)))474.addReg(Reg1, RegState::Define | getDeadRegState(DstIsDead));475476for (int i = 0; i < X86::AddrNumOperands; ++i) {477MIBLo.add(MBBI->getOperand(1 + i));478if (i == X86::AddrDisp)479MIBHi.addImm(Disp + 2);480else481MIBHi.add(MBBI->getOperand(1 + i));482}483484// Split the memory operand, adjusting the offset and size for the halves.485MachineMemOperand *OldMMO = MBBI->memoperands().front();486MachineFunction *MF = MBB.getParent();487MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2);488MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2);489490MIBLo.setMemRefs(MMOLo);491MIBHi.setMemRefs(MMOHi);492493// Delete the pseudo.494MBB.erase(MBBI);495return true;496}497case X86::MASKPAIR16STORE: {498int64_t Disp = MBBI->getOperand(X86::AddrDisp).getImm();499assert(Disp >= 0 && Disp <= INT32_MAX - 2 && "Unexpected displacement");500Register Reg = MBBI->getOperand(X86::AddrNumOperands).getReg();501bool SrcIsKill = MBBI->getOperand(X86::AddrNumOperands).isKill();502Register Reg0 = TRI->getSubReg(Reg, X86::sub_mask_0);503Register Reg1 = TRI->getSubReg(Reg, X86::sub_mask_1);504505auto MIBLo =506BuildMI(MBB, MBBI, DL, TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWmk)));507auto MIBHi =508BuildMI(MBB, MBBI, DL, TII->get(GET_EGPR_IF_ENABLED(X86::KMOVWmk)));509510for (int i = 0; i < X86::AddrNumOperands; ++i) {511MIBLo.add(MBBI->getOperand(i));512if (i == X86::AddrDisp)513MIBHi.addImm(Disp + 2);514else515MIBHi.add(MBBI->getOperand(i));516}517MIBLo.addReg(Reg0, getKillRegState(SrcIsKill));518MIBHi.addReg(Reg1, getKillRegState(SrcIsKill));519520// Split the memory operand, adjusting the offset and size for the halves.521MachineMemOperand *OldMMO = MBBI->memoperands().front();522MachineFunction *MF = MBB.getParent();523MachineMemOperand *MMOLo = MF->getMachineMemOperand(OldMMO, 0, 2);524MachineMemOperand *MMOHi = MF->getMachineMemOperand(OldMMO, 2, 2);525526MIBLo.setMemRefs(MMOLo);527MIBHi.setMemRefs(MMOHi);528529// Delete the pseudo.530MBB.erase(MBBI);531return true;532}533case X86::MWAITX_SAVE_RBX: {534// Perform the following transformation.535// SaveRbx = pseudomwaitx InArg, SaveRbx536// =>537// [E|R]BX = InArg538// actualmwaitx539// [E|R]BX = SaveRbx540const MachineOperand &InArg = MBBI->getOperand(1);541// Copy the input argument of the pseudo into the argument of the542// actual instruction.543TII->copyPhysReg(MBB, MBBI, DL, X86::EBX, InArg.getReg(), InArg.isKill());544// Create the actual instruction.545BuildMI(MBB, MBBI, DL, TII->get(X86::MWAITXrrr));546// Finally, restore the value of RBX.547Register SaveRbx = MBBI->getOperand(2).getReg();548TII->copyPhysReg(MBB, MBBI, DL, X86::RBX, SaveRbx, /*SrcIsKill*/ true);549// Delete the pseudo.550MBBI->eraseFromParent();551return true;552}553case TargetOpcode::ICALL_BRANCH_FUNNEL:554expandICallBranchFunnel(&MBB, MBBI);555return true;556case X86::PLDTILECFGV: {557MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::LDTILECFG)));558return true;559}560case X86::PTILELOADDV:561case X86::PTILELOADDT1V: {562for (unsigned i = 2; i > 0; --i)563MI.removeOperand(i);564unsigned Opc = Opcode == X86::PTILELOADDV565? GET_EGPR_IF_ENABLED(X86::TILELOADD)566: GET_EGPR_IF_ENABLED(X86::TILELOADDT1);567MI.setDesc(TII->get(Opc));568return true;569}570case X86::PTCMMIMFP16PSV:571case X86::PTCMMRLFP16PSV:572case X86::PTDPBSSDV:573case X86::PTDPBSUDV:574case X86::PTDPBUSDV:575case X86::PTDPBUUDV:576case X86::PTDPBF16PSV:577case X86::PTDPFP16PSV: {578MI.untieRegOperand(4);579for (unsigned i = 3; i > 0; --i)580MI.removeOperand(i);581unsigned Opc;582switch (Opcode) {583case X86::PTCMMIMFP16PSV: Opc = X86::TCMMIMFP16PS; break;584case X86::PTCMMRLFP16PSV: Opc = X86::TCMMRLFP16PS; break;585case X86::PTDPBSSDV: Opc = X86::TDPBSSD; break;586case X86::PTDPBSUDV: Opc = X86::TDPBSUD; break;587case X86::PTDPBUSDV: Opc = X86::TDPBUSD; break;588case X86::PTDPBUUDV: Opc = X86::TDPBUUD; break;589case X86::PTDPBF16PSV: Opc = X86::TDPBF16PS; break;590case X86::PTDPFP16PSV: Opc = X86::TDPFP16PS; break;591default: llvm_unreachable("Impossible Opcode!");592}593MI.setDesc(TII->get(Opc));594MI.tieOperands(0, 1);595return true;596}597case X86::PTILESTOREDV: {598for (int i = 1; i >= 0; --i)599MI.removeOperand(i);600MI.setDesc(TII->get(GET_EGPR_IF_ENABLED(X86::TILESTORED)));601return true;602}603#undef GET_EGPR_IF_ENABLED604case X86::PTILEZEROV: {605for (int i = 2; i > 0; --i) // Remove row, col606MI.removeOperand(i);607MI.setDesc(TII->get(X86::TILEZERO));608return true;609}610case X86::CALL64pcrel32_RVMARKER:611case X86::CALL64r_RVMARKER:612case X86::CALL64m_RVMARKER:613expandCALL_RVMARKER(MBB, MBBI);614return true;615case X86::ADD32mi_ND:616case X86::ADD64mi32_ND:617case X86::SUB32mi_ND:618case X86::SUB64mi32_ND:619case X86::AND32mi_ND:620case X86::AND64mi32_ND:621case X86::OR32mi_ND:622case X86::OR64mi32_ND:623case X86::XOR32mi_ND:624case X86::XOR64mi32_ND:625case X86::ADC32mi_ND:626case X86::ADC64mi32_ND:627case X86::SBB32mi_ND:628case X86::SBB64mi32_ND: {629// It's possible for an EVEX-encoded legacy instruction to reach the 15-byte630// instruction length limit: 4 bytes of EVEX prefix + 1 byte of opcode + 1631// byte of ModRM + 1 byte of SIB + 4 bytes of displacement + 4 bytes of632// immediate = 15 bytes in total, e.g.633//634// subq $184, %fs:257(%rbx, %rcx), %rax635//636// In such a case, no additional (ADSIZE or segment override) prefix can be637// used. To resolve the issue, we split the “long” instruction into 2638// instructions:639//640// movq %fs:257(%rbx, %rcx),%rax641// subq $184, %rax642//643// Therefore we consider the OPmi_ND to be a pseudo instruction to some644// extent.645const MachineOperand &ImmOp =646MI.getOperand(MI.getNumExplicitOperands() - 1);647// If the immediate is a expr, conservatively estimate 4 bytes.648if (ImmOp.isImm() && isInt<8>(ImmOp.getImm()))649return false;650int MemOpNo = X86::getFirstAddrOperandIdx(MI);651const MachineOperand &DispOp = MI.getOperand(MemOpNo + X86::AddrDisp);652Register Base = MI.getOperand(MemOpNo + X86::AddrBaseReg).getReg();653// If the displacement is a expr, conservatively estimate 4 bytes.654if (Base && DispOp.isImm() && isInt<8>(DispOp.getImm()))655return false;656// There can only be one of three: SIB, segment override register, ADSIZE657Register Index = MI.getOperand(MemOpNo + X86::AddrIndexReg).getReg();658unsigned Count = !!MI.getOperand(MemOpNo + X86::AddrSegmentReg).getReg();659if (X86II::needSIB(Base, Index, /*In64BitMode=*/true))660++Count;661if (X86MCRegisterClasses[X86::GR32RegClassID].contains(Base) ||662X86MCRegisterClasses[X86::GR32RegClassID].contains(Index))663++Count;664if (Count < 2)665return false;666unsigned Opc, LoadOpc;667switch (Opcode) {668#define MI_TO_RI(OP) \669case X86::OP##32mi_ND: \670Opc = X86::OP##32ri; \671LoadOpc = X86::MOV32rm; \672break; \673case X86::OP##64mi32_ND: \674Opc = X86::OP##64ri32; \675LoadOpc = X86::MOV64rm; \676break;677678default:679llvm_unreachable("Unexpected Opcode");680MI_TO_RI(ADD);681MI_TO_RI(SUB);682MI_TO_RI(AND);683MI_TO_RI(OR);684MI_TO_RI(XOR);685MI_TO_RI(ADC);686MI_TO_RI(SBB);687#undef MI_TO_RI688}689// Insert OPri.690Register DestReg = MI.getOperand(0).getReg();691BuildMI(MBB, std::next(MBBI), DL, TII->get(Opc), DestReg)692.addReg(DestReg)693.add(ImmOp);694// Change OPmi_ND to MOVrm.695for (unsigned I = MI.getNumImplicitOperands() + 1; I != 0; --I)696MI.removeOperand(MI.getNumOperands() - 1);697MI.setDesc(TII->get(LoadOpc));698return true;699}700}701llvm_unreachable("Previous switch has a fallthrough?");702}703704// This function creates additional block for storing varargs guarded705// registers. It adds check for %al into entry block, to skip706// GuardedRegsBlk if xmm registers should not be stored.707//708// EntryBlk[VAStartPseudoInstr] EntryBlk709// | | .710// | | .711// | | GuardedRegsBlk712// | => | .713// | | .714// | TailBlk715// | |716// | |717//718void X86ExpandPseudo::expandVastartSaveXmmRegs(719MachineBasicBlock *EntryBlk,720MachineBasicBlock::iterator VAStartPseudoInstr) const {721assert(VAStartPseudoInstr->getOpcode() == X86::VASTART_SAVE_XMM_REGS);722723MachineFunction *Func = EntryBlk->getParent();724const TargetInstrInfo *TII = STI->getInstrInfo();725const DebugLoc &DL = VAStartPseudoInstr->getDebugLoc();726Register CountReg = VAStartPseudoInstr->getOperand(0).getReg();727728// Calculate liveins for newly created blocks.729LivePhysRegs LiveRegs(*STI->getRegisterInfo());730SmallVector<std::pair<MCPhysReg, const MachineOperand *>, 8> Clobbers;731732LiveRegs.addLiveIns(*EntryBlk);733for (MachineInstr &MI : EntryBlk->instrs()) {734if (MI.getOpcode() == VAStartPseudoInstr->getOpcode())735break;736737LiveRegs.stepForward(MI, Clobbers);738}739740// Create the new basic blocks. One block contains all the XMM stores,741// and another block is the final destination regardless of whether any742// stores were performed.743const BasicBlock *LLVMBlk = EntryBlk->getBasicBlock();744MachineFunction::iterator EntryBlkIter = ++EntryBlk->getIterator();745MachineBasicBlock *GuardedRegsBlk = Func->CreateMachineBasicBlock(LLVMBlk);746MachineBasicBlock *TailBlk = Func->CreateMachineBasicBlock(LLVMBlk);747Func->insert(EntryBlkIter, GuardedRegsBlk);748Func->insert(EntryBlkIter, TailBlk);749750// Transfer the remainder of EntryBlk and its successor edges to TailBlk.751TailBlk->splice(TailBlk->begin(), EntryBlk,752std::next(MachineBasicBlock::iterator(VAStartPseudoInstr)),753EntryBlk->end());754TailBlk->transferSuccessorsAndUpdatePHIs(EntryBlk);755756uint64_t FrameOffset = VAStartPseudoInstr->getOperand(4).getImm();757uint64_t VarArgsRegsOffset = VAStartPseudoInstr->getOperand(6).getImm();758759// TODO: add support for YMM and ZMM here.760unsigned MOVOpc = STI->hasAVX() ? X86::VMOVAPSmr : X86::MOVAPSmr;761762// In the XMM save block, save all the XMM argument registers.763for (int64_t OpndIdx = 7, RegIdx = 0;764OpndIdx < VAStartPseudoInstr->getNumOperands() - 1;765OpndIdx++, RegIdx++) {766auto NewMI = BuildMI(GuardedRegsBlk, DL, TII->get(MOVOpc));767for (int i = 0; i < X86::AddrNumOperands; ++i) {768if (i == X86::AddrDisp)769NewMI.addImm(FrameOffset + VarArgsRegsOffset + RegIdx * 16);770else771NewMI.add(VAStartPseudoInstr->getOperand(i + 1));772}773NewMI.addReg(VAStartPseudoInstr->getOperand(OpndIdx).getReg());774assert(VAStartPseudoInstr->getOperand(OpndIdx).getReg().isPhysical());775}776777// The original block will now fall through to the GuardedRegsBlk.778EntryBlk->addSuccessor(GuardedRegsBlk);779// The GuardedRegsBlk will fall through to the TailBlk.780GuardedRegsBlk->addSuccessor(TailBlk);781782if (!STI->isCallingConvWin64(Func->getFunction().getCallingConv())) {783// If %al is 0, branch around the XMM save block.784BuildMI(EntryBlk, DL, TII->get(X86::TEST8rr))785.addReg(CountReg)786.addReg(CountReg);787BuildMI(EntryBlk, DL, TII->get(X86::JCC_1))788.addMBB(TailBlk)789.addImm(X86::COND_E);790EntryBlk->addSuccessor(TailBlk);791}792793// Add liveins to the created block.794addLiveIns(*GuardedRegsBlk, LiveRegs);795addLiveIns(*TailBlk, LiveRegs);796797// Delete the pseudo.798VAStartPseudoInstr->eraseFromParent();799}800801/// Expand all pseudo instructions contained in \p MBB.802/// \returns true if any expansion occurred for \p MBB.803bool X86ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {804bool Modified = false;805806// MBBI may be invalidated by the expansion.807MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();808while (MBBI != E) {809MachineBasicBlock::iterator NMBBI = std::next(MBBI);810Modified |= expandMI(MBB, MBBI);811MBBI = NMBBI;812}813814return Modified;815}816817bool X86ExpandPseudo::expandPseudosWhichAffectControlFlow(MachineFunction &MF) {818// Currently pseudo which affects control flow is only819// X86::VASTART_SAVE_XMM_REGS which is located in Entry block.820// So we do not need to evaluate other blocks.821for (MachineInstr &Instr : MF.front().instrs()) {822if (Instr.getOpcode() == X86::VASTART_SAVE_XMM_REGS) {823expandVastartSaveXmmRegs(&(MF.front()), Instr);824return true;825}826}827828return false;829}830831bool X86ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {832STI = &MF.getSubtarget<X86Subtarget>();833TII = STI->getInstrInfo();834TRI = STI->getRegisterInfo();835X86FI = MF.getInfo<X86MachineFunctionInfo>();836X86FL = STI->getFrameLowering();837838bool Modified = expandPseudosWhichAffectControlFlow(MF);839840for (MachineBasicBlock &MBB : MF)841Modified |= expandMBB(MBB);842return Modified;843}844845/// Returns an instance of the pseudo instruction expansion pass.846FunctionPass *llvm::createX86ExpandPseudoPass() {847return new X86ExpandPseudo();848}849850851