Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86FixupLEAs.cpp
35294 views
//===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines the pass that finds instructions that can be9// re-written as LEA instructions in order to reduce pipeline delays.10// It replaces LEAs with ADD/INC/DEC when that is better for size/speed.11//12//===----------------------------------------------------------------------===//1314#include "X86.h"15#include "X86InstrInfo.h"16#include "X86Subtarget.h"17#include "llvm/ADT/Statistic.h"18#include "llvm/Analysis/ProfileSummaryInfo.h"19#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"20#include "llvm/CodeGen/MachineFunctionPass.h"21#include "llvm/CodeGen/MachineInstrBuilder.h"22#include "llvm/CodeGen/MachineSizeOpts.h"23#include "llvm/CodeGen/Passes.h"24#include "llvm/CodeGen/TargetSchedule.h"25#include "llvm/Support/Debug.h"26#include "llvm/Support/raw_ostream.h"27using namespace llvm;2829#define FIXUPLEA_DESC "X86 LEA Fixup"30#define FIXUPLEA_NAME "x86-fixup-LEAs"3132#define DEBUG_TYPE FIXUPLEA_NAME3334STATISTIC(NumLEAs, "Number of LEA instructions created");3536namespace {37class FixupLEAPass : public MachineFunctionPass {38enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };3940/// Given a machine register, look for the instruction41/// which writes it in the current basic block. If found,42/// try to replace it with an equivalent LEA instruction.43/// If replacement succeeds, then also process the newly created44/// instruction.45void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,46MachineBasicBlock &MBB);4748/// Given a memory access or LEA instruction49/// whose address mode uses a base and/or index register, look for50/// an opportunity to replace the instruction which sets the base or index51/// register with an equivalent LEA instruction.52void processInstruction(MachineBasicBlock::iterator &I,53MachineBasicBlock &MBB);5455/// Given a LEA instruction which is unprofitable56/// on SlowLEA targets try to replace it with an equivalent ADD instruction.57void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,58MachineBasicBlock &MBB);5960/// Given a LEA instruction which is unprofitable61/// on SNB+ try to replace it with other instructions.62/// According to Intel's Optimization Reference Manual:63/// " For LEA instructions with three source operands and some specific64/// situations, instruction latency has increased to 3 cycles, and must65/// dispatch via port 1:66/// - LEA that has all three source operands: base, index, and offset67/// - LEA that uses base and index registers where the base is EBP, RBP,68/// or R1369/// - LEA that uses RIP relative addressing mode70/// - LEA that uses 16-bit addressing mode "71/// This function currently handles the first 2 cases only.72void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,73MachineBasicBlock &MBB, bool OptIncDec);7475/// Look for LEAs that are really two address LEAs that we might be able to76/// turn into regular ADD instructions.77bool optTwoAddrLEA(MachineBasicBlock::iterator &I,78MachineBasicBlock &MBB, bool OptIncDec,79bool UseLEAForSP) const;8081/// Look for and transform the sequence82/// lea (reg1, reg2), reg383/// sub reg3, reg484/// to85/// sub reg1, reg486/// sub reg2, reg487/// It can also optimize the sequence lea/add similarly.88bool optLEAALU(MachineBasicBlock::iterator &I, MachineBasicBlock &MBB) const;8990/// Step forwards in MBB, looking for an ADD/SUB instruction which uses91/// the dest register of LEA instruction I.92MachineBasicBlock::iterator searchALUInst(MachineBasicBlock::iterator &I,93MachineBasicBlock &MBB) const;9495/// Check instructions between LeaI and AluI (exclusively).96/// Set BaseIndexDef to true if base or index register from LeaI is defined.97/// Set AluDestRef to true if the dest register of AluI is used or defined.98/// *KilledBase is set to the killed base register usage.99/// *KilledIndex is set to the killed index register usage.100void checkRegUsage(MachineBasicBlock::iterator &LeaI,101MachineBasicBlock::iterator &AluI, bool &BaseIndexDef,102bool &AluDestRef, MachineOperand **KilledBase,103MachineOperand **KilledIndex) const;104105/// Determine if an instruction references a machine register106/// and, if so, whether it reads or writes the register.107RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);108109/// Step backwards through a basic block, looking110/// for an instruction which writes a register within111/// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.112MachineBasicBlock::iterator searchBackwards(MachineOperand &p,113MachineBasicBlock::iterator &I,114MachineBasicBlock &MBB);115116/// if an instruction can be converted to an117/// equivalent LEA, insert the new instruction into the basic block118/// and return a pointer to it. Otherwise, return zero.119MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,120MachineBasicBlock::iterator &MBBI) const;121122public:123static char ID;124125StringRef getPassName() const override { return FIXUPLEA_DESC; }126127FixupLEAPass() : MachineFunctionPass(ID) { }128129/// Loop over all of the basic blocks,130/// replacing instructions by equivalent LEA instructions131/// if needed and when possible.132bool runOnMachineFunction(MachineFunction &MF) override;133134// This pass runs after regalloc and doesn't support VReg operands.135MachineFunctionProperties getRequiredProperties() const override {136return MachineFunctionProperties().set(137MachineFunctionProperties::Property::NoVRegs);138}139140void getAnalysisUsage(AnalysisUsage &AU) const override {141AU.addRequired<ProfileSummaryInfoWrapperPass>();142AU.addRequired<LazyMachineBlockFrequencyInfoPass>();143MachineFunctionPass::getAnalysisUsage(AU);144}145146private:147TargetSchedModel TSM;148const X86InstrInfo *TII = nullptr;149const X86RegisterInfo *TRI = nullptr;150};151}152153char FixupLEAPass::ID = 0;154155INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)156157MachineInstr *158FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,159MachineBasicBlock::iterator &MBBI) const {160MachineInstr &MI = *MBBI;161switch (MI.getOpcode()) {162case X86::MOV32rr:163case X86::MOV64rr: {164const MachineOperand &Src = MI.getOperand(1);165const MachineOperand &Dest = MI.getOperand(0);166MachineInstr *NewMI =167BuildMI(MBB, MBBI, MI.getDebugLoc(),168TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r169: X86::LEA64r))170.add(Dest)171.add(Src)172.addImm(1)173.addReg(0)174.addImm(0)175.addReg(0);176return NewMI;177}178}179180if (!MI.isConvertibleTo3Addr())181return nullptr;182183switch (MI.getOpcode()) {184default:185// Only convert instructions that we've verified are safe.186return nullptr;187case X86::ADD64ri32:188case X86::ADD64ri32_DB:189case X86::ADD32ri:190case X86::ADD32ri_DB:191if (!MI.getOperand(2).isImm()) {192// convertToThreeAddress will call getImm()193// which requires isImm() to be true194return nullptr;195}196break;197case X86::SHL64ri:198case X86::SHL32ri:199case X86::INC64r:200case X86::INC32r:201case X86::DEC64r:202case X86::DEC32r:203case X86::ADD64rr:204case X86::ADD64rr_DB:205case X86::ADD32rr:206case X86::ADD32rr_DB:207// These instructions are all fine to convert.208break;209}210return TII->convertToThreeAddress(MI, nullptr, nullptr);211}212213FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }214215static bool isLEA(unsigned Opcode) {216return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||217Opcode == X86::LEA64_32r;218}219220bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {221if (skipFunction(MF.getFunction()))222return false;223224const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();225bool IsSlowLEA = ST.slowLEA();226bool IsSlow3OpsLEA = ST.slow3OpsLEA();227bool LEAUsesAG = ST.leaUsesAG();228229bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();230bool UseLEAForSP = ST.useLeaForSP();231232TSM.init(&ST);233TII = ST.getInstrInfo();234TRI = ST.getRegisterInfo();235auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();236auto *MBFI = (PSI && PSI->hasProfileSummary())237? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()238: nullptr;239240LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);241for (MachineBasicBlock &MBB : MF) {242// First pass. Try to remove or optimize existing LEAs.243bool OptIncDecPerBB =244OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);245for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {246if (!isLEA(I->getOpcode()))247continue;248249if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))250continue;251252if (IsSlowLEA)253processInstructionForSlowLEA(I, MBB);254else if (IsSlow3OpsLEA)255processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);256}257258// Second pass for creating LEAs. This may reverse some of the259// transformations above.260if (LEAUsesAG) {261for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)262processInstruction(I, MBB);263}264}265266LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);267268return true;269}270271FixupLEAPass::RegUsageState272FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {273RegUsageState RegUsage = RU_NotUsed;274MachineInstr &MI = *I;275276for (const MachineOperand &MO : MI.operands()) {277if (MO.isReg() && MO.getReg() == p.getReg()) {278if (MO.isDef())279return RU_Write;280RegUsage = RU_Read;281}282}283return RegUsage;284}285286/// getPreviousInstr - Given a reference to an instruction in a basic287/// block, return a reference to the previous instruction in the block,288/// wrapping around to the last instruction of the block if the block289/// branches to itself.290static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,291MachineBasicBlock &MBB) {292if (I == MBB.begin()) {293if (MBB.isPredecessor(&MBB)) {294I = --MBB.end();295return true;296} else297return false;298}299--I;300return true;301}302303MachineBasicBlock::iterator304FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,305MachineBasicBlock &MBB) {306int InstrDistance = 1;307MachineBasicBlock::iterator CurInst;308static const int INSTR_DISTANCE_THRESHOLD = 5;309310CurInst = I;311bool Found;312Found = getPreviousInstr(CurInst, MBB);313while (Found && I != CurInst) {314if (CurInst->isCall() || CurInst->isInlineAsm())315break;316if (InstrDistance > INSTR_DISTANCE_THRESHOLD)317break; // too far back to make a difference318if (usesRegister(p, CurInst) == RU_Write) {319return CurInst;320}321InstrDistance += TSM.computeInstrLatency(&*CurInst);322Found = getPreviousInstr(CurInst, MBB);323}324return MachineBasicBlock::iterator();325}326327static inline bool isInefficientLEAReg(unsigned Reg) {328return Reg == X86::EBP || Reg == X86::RBP ||329Reg == X86::R13D || Reg == X86::R13;330}331332/// Returns true if this LEA uses base and index registers, and the base333/// register is known to be inefficient for the subtarget.334// TODO: use a variant scheduling class to model the latency profile335// of LEA instructions, and implement this logic as a scheduling predicate.336static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,337const MachineOperand &Index) {338return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&339Index.getReg() != X86::NoRegister;340}341342static inline bool hasLEAOffset(const MachineOperand &Offset) {343return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal() ||344Offset.isBlockAddress();345}346347static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {348switch (LEAOpcode) {349default:350llvm_unreachable("Unexpected LEA instruction");351case X86::LEA32r:352case X86::LEA64_32r:353return X86::ADD32rr;354case X86::LEA64r:355return X86::ADD64rr;356}357}358359static inline unsigned getSUBrrFromLEA(unsigned LEAOpcode) {360switch (LEAOpcode) {361default:362llvm_unreachable("Unexpected LEA instruction");363case X86::LEA32r:364case X86::LEA64_32r:365return X86::SUB32rr;366case X86::LEA64r:367return X86::SUB64rr;368}369}370371static inline unsigned getADDriFromLEA(unsigned LEAOpcode,372const MachineOperand &Offset) {373switch (LEAOpcode) {374default:375llvm_unreachable("Unexpected LEA instruction");376case X86::LEA32r:377case X86::LEA64_32r:378return X86::ADD32ri;379case X86::LEA64r:380return X86::ADD64ri32;381}382}383384static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {385switch (LEAOpcode) {386default:387llvm_unreachable("Unexpected LEA instruction");388case X86::LEA32r:389case X86::LEA64_32r:390return IsINC ? X86::INC32r : X86::DEC32r;391case X86::LEA64r:392return IsINC ? X86::INC64r : X86::DEC64r;393}394}395396MachineBasicBlock::iterator397FixupLEAPass::searchALUInst(MachineBasicBlock::iterator &I,398MachineBasicBlock &MBB) const {399const int InstrDistanceThreshold = 5;400int InstrDistance = 1;401MachineBasicBlock::iterator CurInst = std::next(I);402403unsigned LEAOpcode = I->getOpcode();404unsigned AddOpcode = getADDrrFromLEA(LEAOpcode);405unsigned SubOpcode = getSUBrrFromLEA(LEAOpcode);406Register DestReg = I->getOperand(0).getReg();407408while (CurInst != MBB.end()) {409if (CurInst->isCall() || CurInst->isInlineAsm())410break;411if (InstrDistance > InstrDistanceThreshold)412break;413414// Check if the lea dest register is used in an add/sub instruction only.415for (unsigned I = 0, E = CurInst->getNumOperands(); I != E; ++I) {416MachineOperand &Opnd = CurInst->getOperand(I);417if (Opnd.isReg()) {418if (Opnd.getReg() == DestReg) {419if (Opnd.isDef() || !Opnd.isKill())420return MachineBasicBlock::iterator();421422unsigned AluOpcode = CurInst->getOpcode();423if (AluOpcode != AddOpcode && AluOpcode != SubOpcode)424return MachineBasicBlock::iterator();425426MachineOperand &Opnd2 = CurInst->getOperand(3 - I);427MachineOperand AluDest = CurInst->getOperand(0);428if (Opnd2.getReg() != AluDest.getReg())429return MachineBasicBlock::iterator();430431// X - (Y + Z) may generate different flags than (X - Y) - Z when432// there is overflow. So we can't change the alu instruction if the433// flags register is live.434if (!CurInst->registerDefIsDead(X86::EFLAGS, TRI))435return MachineBasicBlock::iterator();436437return CurInst;438}439if (TRI->regsOverlap(DestReg, Opnd.getReg()))440return MachineBasicBlock::iterator();441}442}443444InstrDistance++;445++CurInst;446}447return MachineBasicBlock::iterator();448}449450void FixupLEAPass::checkRegUsage(MachineBasicBlock::iterator &LeaI,451MachineBasicBlock::iterator &AluI,452bool &BaseIndexDef, bool &AluDestRef,453MachineOperand **KilledBase,454MachineOperand **KilledIndex) const {455BaseIndexDef = AluDestRef = false;456*KilledBase = *KilledIndex = nullptr;457Register BaseReg = LeaI->getOperand(1 + X86::AddrBaseReg).getReg();458Register IndexReg = LeaI->getOperand(1 + X86::AddrIndexReg).getReg();459Register AluDestReg = AluI->getOperand(0).getReg();460461for (MachineInstr &CurInst : llvm::make_range(std::next(LeaI), AluI)) {462for (MachineOperand &Opnd : CurInst.operands()) {463if (!Opnd.isReg())464continue;465Register Reg = Opnd.getReg();466if (TRI->regsOverlap(Reg, AluDestReg))467AluDestRef = true;468if (TRI->regsOverlap(Reg, BaseReg)) {469if (Opnd.isDef())470BaseIndexDef = true;471else if (Opnd.isKill())472*KilledBase = &Opnd;473}474if (TRI->regsOverlap(Reg, IndexReg)) {475if (Opnd.isDef())476BaseIndexDef = true;477else if (Opnd.isKill())478*KilledIndex = &Opnd;479}480}481}482}483484bool FixupLEAPass::optLEAALU(MachineBasicBlock::iterator &I,485MachineBasicBlock &MBB) const {486// Look for an add/sub instruction which uses the result of lea.487MachineBasicBlock::iterator AluI = searchALUInst(I, MBB);488if (AluI == MachineBasicBlock::iterator())489return false;490491// Check if there are any related register usage between lea and alu.492bool BaseIndexDef, AluDestRef;493MachineOperand *KilledBase, *KilledIndex;494checkRegUsage(I, AluI, BaseIndexDef, AluDestRef, &KilledBase, &KilledIndex);495496MachineBasicBlock::iterator InsertPos = AluI;497if (BaseIndexDef) {498if (AluDestRef)499return false;500InsertPos = I;501KilledBase = KilledIndex = nullptr;502}503504// Check if there are same registers.505Register AluDestReg = AluI->getOperand(0).getReg();506Register BaseReg = I->getOperand(1 + X86::AddrBaseReg).getReg();507Register IndexReg = I->getOperand(1 + X86::AddrIndexReg).getReg();508if (I->getOpcode() == X86::LEA64_32r) {509BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);510IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);511}512if (AluDestReg == IndexReg) {513if (BaseReg == IndexReg)514return false;515std::swap(BaseReg, IndexReg);516std::swap(KilledBase, KilledIndex);517}518if (BaseReg == IndexReg)519KilledBase = nullptr;520521// Now it's safe to change instructions.522MachineInstr *NewMI1, *NewMI2;523unsigned NewOpcode = AluI->getOpcode();524NewMI1 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),525AluDestReg)526.addReg(AluDestReg, RegState::Kill)527.addReg(BaseReg, KilledBase ? RegState::Kill : 0);528NewMI1->addRegisterDead(X86::EFLAGS, TRI);529NewMI2 = BuildMI(MBB, InsertPos, AluI->getDebugLoc(), TII->get(NewOpcode),530AluDestReg)531.addReg(AluDestReg, RegState::Kill)532.addReg(IndexReg, KilledIndex ? RegState::Kill : 0);533NewMI2->addRegisterDead(X86::EFLAGS, TRI);534535// Clear the old Kill flags.536if (KilledBase)537KilledBase->setIsKill(false);538if (KilledIndex)539KilledIndex->setIsKill(false);540541MBB.getParent()->substituteDebugValuesForInst(*AluI, *NewMI2, 1);542MBB.erase(I);543MBB.erase(AluI);544I = NewMI1;545return true;546}547548bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,549MachineBasicBlock &MBB, bool OptIncDec,550bool UseLEAForSP) const {551MachineInstr &MI = *I;552553const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);554const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);555const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);556const MachineOperand &Disp = MI.getOperand(1 + X86::AddrDisp);557const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);558559if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||560MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=561MachineBasicBlock::LQR_Dead)562return false;563564Register DestReg = MI.getOperand(0).getReg();565Register BaseReg = Base.getReg();566Register IndexReg = Index.getReg();567568// Don't change stack adjustment LEAs.569if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))570return false;571572// LEA64_32 has 64-bit operands but 32-bit result.573if (MI.getOpcode() == X86::LEA64_32r) {574if (BaseReg != 0)575BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);576if (IndexReg != 0)577IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);578}579580MachineInstr *NewMI = nullptr;581582// Case 1.583// Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1584// which can be turned into add %reg2, %reg1585if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&586(DestReg == BaseReg || DestReg == IndexReg)) {587unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());588if (DestReg != BaseReg)589std::swap(BaseReg, IndexReg);590591if (MI.getOpcode() == X86::LEA64_32r) {592// TODO: Do we need the super register implicit use?593NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)594.addReg(BaseReg).addReg(IndexReg)595.addReg(Base.getReg(), RegState::Implicit)596.addReg(Index.getReg(), RegState::Implicit);597} else {598NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)599.addReg(BaseReg).addReg(IndexReg);600}601} else if (DestReg == BaseReg && IndexReg == 0) {602// Case 2.603// This is an LEA with only a base register and a displacement,604// We can use ADDri or INC/DEC.605606// Does this LEA have one these forms:607// lea %reg, 1(%reg)608// lea %reg, -1(%reg)609if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {610bool IsINC = Disp.getImm() == 1;611unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);612613if (MI.getOpcode() == X86::LEA64_32r) {614// TODO: Do we need the super register implicit use?615NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)616.addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);617} else {618NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)619.addReg(BaseReg);620}621} else {622unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);623if (MI.getOpcode() == X86::LEA64_32r) {624// TODO: Do we need the super register implicit use?625NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)626.addReg(BaseReg).addImm(Disp.getImm())627.addReg(Base.getReg(), RegState::Implicit);628} else {629NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)630.addReg(BaseReg).addImm(Disp.getImm());631}632}633} else if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0) {634// Case 3.635// Look for and transform the sequence636// lea (reg1, reg2), reg3637// sub reg3, reg4638return optLEAALU(I, MBB);639} else640return false;641642MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);643MBB.erase(I);644I = NewMI;645return true;646}647648void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,649MachineBasicBlock &MBB) {650// Process a load, store, or LEA instruction.651MachineInstr &MI = *I;652const MCInstrDesc &Desc = MI.getDesc();653int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);654if (AddrOffset >= 0) {655AddrOffset += X86II::getOperandBias(Desc);656MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);657if (p.isReg() && p.getReg() != X86::ESP) {658seekLEAFixup(p, I, MBB);659}660MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);661if (q.isReg() && q.getReg() != X86::ESP) {662seekLEAFixup(q, I, MBB);663}664}665}666667void FixupLEAPass::seekLEAFixup(MachineOperand &p,668MachineBasicBlock::iterator &I,669MachineBasicBlock &MBB) {670MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);671if (MBI != MachineBasicBlock::iterator()) {672MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);673if (NewMI) {674++NumLEAs;675LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););676// now to replace with an equivalent LEA...677LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););678MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);679MBB.erase(MBI);680MachineBasicBlock::iterator J =681static_cast<MachineBasicBlock::iterator>(NewMI);682processInstruction(J, MBB);683}684}685}686687void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,688MachineBasicBlock &MBB) {689MachineInstr &MI = *I;690const unsigned Opcode = MI.getOpcode();691692const MachineOperand &Dst = MI.getOperand(0);693const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);694const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);695const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);696const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);697const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);698699if (Segment.getReg() != 0 || !Offset.isImm() ||700MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=701MachineBasicBlock::LQR_Dead)702return;703const Register DstR = Dst.getReg();704const Register SrcR1 = Base.getReg();705const Register SrcR2 = Index.getReg();706if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))707return;708if (Scale.getImm() > 1)709return;710LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););711LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);712MachineInstr *NewMI = nullptr;713// Make ADD instruction for two registers writing to LEA's destination714if (SrcR1 != 0 && SrcR2 != 0) {715const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));716const MachineOperand &Src = SrcR1 == DstR ? Index : Base;717NewMI =718BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);719LLVM_DEBUG(NewMI->dump(););720}721// Make ADD instruction for immediate722if (Offset.getImm() != 0) {723const MCInstrDesc &ADDri =724TII->get(getADDriFromLEA(Opcode, Offset));725const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;726NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)727.add(SrcR)728.addImm(Offset.getImm());729LLVM_DEBUG(NewMI->dump(););730}731if (NewMI) {732MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);733MBB.erase(I);734I = NewMI;735}736}737738void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,739MachineBasicBlock &MBB,740bool OptIncDec) {741MachineInstr &MI = *I;742const unsigned LEAOpcode = MI.getOpcode();743744const MachineOperand &Dest = MI.getOperand(0);745const MachineOperand &Base = MI.getOperand(1 + X86::AddrBaseReg);746const MachineOperand &Scale = MI.getOperand(1 + X86::AddrScaleAmt);747const MachineOperand &Index = MI.getOperand(1 + X86::AddrIndexReg);748const MachineOperand &Offset = MI.getOperand(1 + X86::AddrDisp);749const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);750751if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||752MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=753MachineBasicBlock::LQR_Dead ||754Segment.getReg() != X86::NoRegister)755return;756757Register DestReg = Dest.getReg();758Register BaseReg = Base.getReg();759Register IndexReg = Index.getReg();760761if (MI.getOpcode() == X86::LEA64_32r) {762if (BaseReg != 0)763BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);764if (IndexReg != 0)765IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);766}767768bool IsScale1 = Scale.getImm() == 1;769bool IsInefficientBase = isInefficientLEAReg(BaseReg);770bool IsInefficientIndex = isInefficientLEAReg(IndexReg);771772// Skip these cases since it takes more than 2 instructions773// to replace the LEA instruction.774if (IsInefficientBase && DestReg == BaseReg && !IsScale1)775return;776777LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););778LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);779780MachineInstr *NewMI = nullptr;781bool BaseOrIndexIsDst = DestReg == BaseReg || DestReg == IndexReg;782// First try and remove the base while sticking with LEA iff base == index and783// scale == 1. We can handle:784// 1. lea D(%base,%index,1) -> lea D(,%index,2)785// 2. lea D(%r13/%rbp,%index) -> lea D(,%index,2)786// Only do this if the LEA would otherwise be split into 2-instruction787// (either it has a an Offset or neither base nor index are dst)788if (IsScale1 && BaseReg == IndexReg &&789(hasLEAOffset(Offset) || (IsInefficientBase && !BaseOrIndexIsDst))) {790NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))791.add(Dest)792.addReg(0)793.addImm(2)794.add(Index)795.add(Offset)796.add(Segment);797LLVM_DEBUG(NewMI->dump(););798799MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);800MBB.erase(I);801I = NewMI;802return;803} else if (IsScale1 && BaseOrIndexIsDst) {804// Try to replace LEA with one or two (for the 3-op LEA case)805// add instructions:806// 1.lea (%base,%index,1), %base => add %index,%base807// 2.lea (%base,%index,1), %index => add %base,%index808809unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());810if (DestReg != BaseReg)811std::swap(BaseReg, IndexReg);812813if (MI.getOpcode() == X86::LEA64_32r) {814// TODO: Do we need the super register implicit use?815NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)816.addReg(BaseReg)817.addReg(IndexReg)818.addReg(Base.getReg(), RegState::Implicit)819.addReg(Index.getReg(), RegState::Implicit);820} else {821NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)822.addReg(BaseReg)823.addReg(IndexReg);824}825} else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {826// If the base is inefficient try switching the index and base operands,827// otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:828// lea offset(%base,%index,scale),%dst =>829// lea (%base,%index,scale); add offset,%dst830NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))831.add(Dest)832.add(IsInefficientBase ? Index : Base)833.add(Scale)834.add(IsInefficientBase ? Base : Index)835.addImm(0)836.add(Segment);837LLVM_DEBUG(NewMI->dump(););838}839840// If either replacement succeeded above, add the offset if needed, then841// replace the instruction.842if (NewMI) {843// Create ADD instruction for the Offset in case of 3-Ops LEA.844if (hasLEAOffset(Offset)) {845if (OptIncDec && Offset.isImm() &&846(Offset.getImm() == 1 || Offset.getImm() == -1)) {847unsigned NewOpc =848getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);849NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)850.addReg(DestReg);851LLVM_DEBUG(NewMI->dump(););852} else {853unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);854NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)855.addReg(DestReg)856.add(Offset);857LLVM_DEBUG(NewMI->dump(););858}859}860861MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);862MBB.erase(I);863I = NewMI;864return;865}866867// Handle the rest of the cases with inefficient base register:868assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");869assert(IsInefficientBase && "efficient base should be handled already!");870871// FIXME: Handle LEA64_32r.872if (LEAOpcode == X86::LEA64_32r)873return;874875// lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst876if (IsScale1 && !hasLEAOffset(Offset)) {877bool BIK = Base.isKill() && BaseReg != IndexReg;878TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);879LLVM_DEBUG(MI.getPrevNode()->dump(););880881unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());882NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)883.addReg(DestReg)884.add(Index);885LLVM_DEBUG(NewMI->dump(););886887MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);888MBB.erase(I);889I = NewMI;890return;891}892893// lea offset(%base,%index,scale), %dst =>894// lea offset( ,%index,scale), %dst; add %base,%dst895NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))896.add(Dest)897.addReg(0)898.add(Scale)899.add(Index)900.add(Offset)901.add(Segment);902LLVM_DEBUG(NewMI->dump(););903904unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());905NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)906.addReg(DestReg)907.add(Base);908LLVM_DEBUG(NewMI->dump(););909910MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);911MBB.erase(I);912I = NewMI;913}914915916