Path: blob/main/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
35269 views
//===-- PPCInstrInfo.cpp - PowerPC Instruction Information ----------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file contains the PowerPC implementation of the TargetInstrInfo class.9//10//===----------------------------------------------------------------------===//1112#include "PPCInstrInfo.h"13#include "MCTargetDesc/PPCPredicates.h"14#include "PPC.h"15#include "PPCHazardRecognizers.h"16#include "PPCInstrBuilder.h"17#include "PPCMachineFunctionInfo.h"18#include "PPCTargetMachine.h"19#include "llvm/ADT/STLExtras.h"20#include "llvm/ADT/Statistic.h"21#include "llvm/Analysis/AliasAnalysis.h"22#include "llvm/CodeGen/LiveIntervals.h"23#include "llvm/CodeGen/LivePhysRegs.h"24#include "llvm/CodeGen/MachineCombinerPattern.h"25#include "llvm/CodeGen/MachineConstantPool.h"26#include "llvm/CodeGen/MachineFrameInfo.h"27#include "llvm/CodeGen/MachineFunctionPass.h"28#include "llvm/CodeGen/MachineInstrBuilder.h"29#include "llvm/CodeGen/MachineMemOperand.h"30#include "llvm/CodeGen/MachineRegisterInfo.h"31#include "llvm/CodeGen/PseudoSourceValue.h"32#include "llvm/CodeGen/RegisterClassInfo.h"33#include "llvm/CodeGen/RegisterPressure.h"34#include "llvm/CodeGen/ScheduleDAG.h"35#include "llvm/CodeGen/SlotIndexes.h"36#include "llvm/CodeGen/StackMaps.h"37#include "llvm/MC/MCAsmInfo.h"38#include "llvm/MC/MCInst.h"39#include "llvm/MC/TargetRegistry.h"40#include "llvm/Support/CommandLine.h"41#include "llvm/Support/Debug.h"42#include "llvm/Support/ErrorHandling.h"43#include "llvm/Support/raw_ostream.h"4445using namespace llvm;4647#define DEBUG_TYPE "ppc-instr-info"4849#define GET_INSTRMAP_INFO50#define GET_INSTRINFO_CTOR_DTOR51#include "PPCGenInstrInfo.inc"5253STATISTIC(NumStoreSPILLVSRRCAsVec,54"Number of spillvsrrc spilled to stack as vec");55STATISTIC(NumStoreSPILLVSRRCAsGpr,56"Number of spillvsrrc spilled to stack as gpr");57STATISTIC(NumGPRtoVSRSpill, "Number of gpr spills to spillvsrrc");58STATISTIC(CmpIselsConverted,59"Number of ISELs that depend on comparison of constants converted");60STATISTIC(MissedConvertibleImmediateInstrs,61"Number of compare-immediate instructions fed by constants");62STATISTIC(NumRcRotatesConvertedToRcAnd,63"Number of record-form rotates converted to record-form andi");6465static cl::66opt<bool> DisableCTRLoopAnal("disable-ppc-ctrloop-analysis", cl::Hidden,67cl::desc("Disable analysis for CTR loops"));6869static cl::opt<bool> DisableCmpOpt("disable-ppc-cmp-opt",70cl::desc("Disable compare instruction optimization"), cl::Hidden);7172static cl::opt<bool> VSXSelfCopyCrash("crash-on-ppc-vsx-self-copy",73cl::desc("Causes the backend to crash instead of generating a nop VSX copy"),74cl::Hidden);7576static cl::opt<bool>77UseOldLatencyCalc("ppc-old-latency-calc", cl::Hidden,78cl::desc("Use the old (incorrect) instruction latency calculation"));7980static cl::opt<float>81FMARPFactor("ppc-fma-rp-factor", cl::Hidden, cl::init(1.5),82cl::desc("register pressure factor for the transformations."));8384static cl::opt<bool> EnableFMARegPressureReduction(85"ppc-fma-rp-reduction", cl::Hidden, cl::init(true),86cl::desc("enable register pressure reduce in machine combiner pass."));8788// Pin the vtable to this file.89void PPCInstrInfo::anchor() {}9091PPCInstrInfo::PPCInstrInfo(PPCSubtarget &STI)92: PPCGenInstrInfo(PPC::ADJCALLSTACKDOWN, PPC::ADJCALLSTACKUP,93/* CatchRetOpcode */ -1,94STI.isPPC64() ? PPC::BLR8 : PPC::BLR),95Subtarget(STI), RI(STI.getTargetMachine()) {}9697/// CreateTargetHazardRecognizer - Return the hazard recognizer to use for98/// this target when scheduling the DAG.99ScheduleHazardRecognizer *100PPCInstrInfo::CreateTargetHazardRecognizer(const TargetSubtargetInfo *STI,101const ScheduleDAG *DAG) const {102unsigned Directive =103static_cast<const PPCSubtarget *>(STI)->getCPUDirective();104if (Directive == PPC::DIR_440 || Directive == PPC::DIR_A2 ||105Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) {106const InstrItineraryData *II =107static_cast<const PPCSubtarget *>(STI)->getInstrItineraryData();108return new ScoreboardHazardRecognizer(II, DAG);109}110111return TargetInstrInfo::CreateTargetHazardRecognizer(STI, DAG);112}113114/// CreateTargetPostRAHazardRecognizer - Return the postRA hazard recognizer115/// to use for this target when scheduling the DAG.116ScheduleHazardRecognizer *117PPCInstrInfo::CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,118const ScheduleDAG *DAG) const {119unsigned Directive =120DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();121122// FIXME: Leaving this as-is until we have POWER9 scheduling info123if (Directive == PPC::DIR_PWR7 || Directive == PPC::DIR_PWR8)124return new PPCDispatchGroupSBHazardRecognizer(II, DAG);125126// Most subtargets use a PPC970 recognizer.127if (Directive != PPC::DIR_440 && Directive != PPC::DIR_A2 &&128Directive != PPC::DIR_E500mc && Directive != PPC::DIR_E5500) {129assert(DAG->TII && "No InstrInfo?");130131return new PPCHazardRecognizer970(*DAG);132}133134return new ScoreboardHazardRecognizer(II, DAG);135}136137unsigned PPCInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,138const MachineInstr &MI,139unsigned *PredCost) const {140if (!ItinData || UseOldLatencyCalc)141return PPCGenInstrInfo::getInstrLatency(ItinData, MI, PredCost);142143// The default implementation of getInstrLatency calls getStageLatency, but144// getStageLatency does not do the right thing for us. While we have145// itinerary, most cores are fully pipelined, and so the itineraries only146// express the first part of the pipeline, not every stage. Instead, we need147// to use the listed output operand cycle number (using operand 0 here, which148// is an output).149150unsigned Latency = 1;151unsigned DefClass = MI.getDesc().getSchedClass();152for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {153const MachineOperand &MO = MI.getOperand(i);154if (!MO.isReg() || !MO.isDef() || MO.isImplicit())155continue;156157std::optional<unsigned> Cycle = ItinData->getOperandCycle(DefClass, i);158if (!Cycle)159continue;160161Latency = std::max(Latency, *Cycle);162}163164return Latency;165}166167std::optional<unsigned> PPCInstrInfo::getOperandLatency(168const InstrItineraryData *ItinData, const MachineInstr &DefMI,169unsigned DefIdx, const MachineInstr &UseMI, unsigned UseIdx) const {170std::optional<unsigned> Latency = PPCGenInstrInfo::getOperandLatency(171ItinData, DefMI, DefIdx, UseMI, UseIdx);172173if (!DefMI.getParent())174return Latency;175176const MachineOperand &DefMO = DefMI.getOperand(DefIdx);177Register Reg = DefMO.getReg();178179bool IsRegCR;180if (Reg.isVirtual()) {181const MachineRegisterInfo *MRI =182&DefMI.getParent()->getParent()->getRegInfo();183IsRegCR = MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRRCRegClass) ||184MRI->getRegClass(Reg)->hasSuperClassEq(&PPC::CRBITRCRegClass);185} else {186IsRegCR = PPC::CRRCRegClass.contains(Reg) ||187PPC::CRBITRCRegClass.contains(Reg);188}189190if (UseMI.isBranch() && IsRegCR) {191if (!Latency)192Latency = getInstrLatency(ItinData, DefMI);193194// On some cores, there is an additional delay between writing to a condition195// register, and using it from a branch.196unsigned Directive = Subtarget.getCPUDirective();197switch (Directive) {198default: break;199case PPC::DIR_7400:200case PPC::DIR_750:201case PPC::DIR_970:202case PPC::DIR_E5500:203case PPC::DIR_PWR4:204case PPC::DIR_PWR5:205case PPC::DIR_PWR5X:206case PPC::DIR_PWR6:207case PPC::DIR_PWR6X:208case PPC::DIR_PWR7:209case PPC::DIR_PWR8:210// FIXME: Is this needed for POWER9?211Latency = *Latency + 2;212break;213}214}215216return Latency;217}218219void PPCInstrInfo::setSpecialOperandAttr(MachineInstr &MI,220uint32_t Flags) const {221MI.setFlags(Flags);222MI.clearFlag(MachineInstr::MIFlag::NoSWrap);223MI.clearFlag(MachineInstr::MIFlag::NoUWrap);224MI.clearFlag(MachineInstr::MIFlag::IsExact);225}226227// This function does not list all associative and commutative operations, but228// only those worth feeding through the machine combiner in an attempt to229// reduce the critical path. Mostly, this means floating-point operations,230// because they have high latencies(>=5) (compared to other operations, such as231// and/or, which are also associative and commutative, but have low latencies).232bool PPCInstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,233bool Invert) const {234if (Invert)235return false;236switch (Inst.getOpcode()) {237// Floating point:238// FP Add:239case PPC::FADD:240case PPC::FADDS:241// FP Multiply:242case PPC::FMUL:243case PPC::FMULS:244// Altivec Add:245case PPC::VADDFP:246// VSX Add:247case PPC::XSADDDP:248case PPC::XVADDDP:249case PPC::XVADDSP:250case PPC::XSADDSP:251// VSX Multiply:252case PPC::XSMULDP:253case PPC::XVMULDP:254case PPC::XVMULSP:255case PPC::XSMULSP:256return Inst.getFlag(MachineInstr::MIFlag::FmReassoc) &&257Inst.getFlag(MachineInstr::MIFlag::FmNsz);258// Fixed point:259// Multiply:260case PPC::MULHD:261case PPC::MULLD:262case PPC::MULHW:263case PPC::MULLW:264return true;265default:266return false;267}268}269270#define InfoArrayIdxFMAInst 0271#define InfoArrayIdxFAddInst 1272#define InfoArrayIdxFMULInst 2273#define InfoArrayIdxAddOpIdx 3274#define InfoArrayIdxMULOpIdx 4275#define InfoArrayIdxFSubInst 5276// Array keeps info for FMA instructions:277// Index 0(InfoArrayIdxFMAInst): FMA instruction;278// Index 1(InfoArrayIdxFAddInst): ADD instruction associated with FMA;279// Index 2(InfoArrayIdxFMULInst): MUL instruction associated with FMA;280// Index 3(InfoArrayIdxAddOpIdx): ADD operand index in FMA operands;281// Index 4(InfoArrayIdxMULOpIdx): first MUL operand index in FMA operands;282// second MUL operand index is plus 1;283// Index 5(InfoArrayIdxFSubInst): SUB instruction associated with FMA.284static const uint16_t FMAOpIdxInfo[][6] = {285// FIXME: Add more FMA instructions like XSNMADDADP and so on.286{PPC::XSMADDADP, PPC::XSADDDP, PPC::XSMULDP, 1, 2, PPC::XSSUBDP},287{PPC::XSMADDASP, PPC::XSADDSP, PPC::XSMULSP, 1, 2, PPC::XSSUBSP},288{PPC::XVMADDADP, PPC::XVADDDP, PPC::XVMULDP, 1, 2, PPC::XVSUBDP},289{PPC::XVMADDASP, PPC::XVADDSP, PPC::XVMULSP, 1, 2, PPC::XVSUBSP},290{PPC::FMADD, PPC::FADD, PPC::FMUL, 3, 1, PPC::FSUB},291{PPC::FMADDS, PPC::FADDS, PPC::FMULS, 3, 1, PPC::FSUBS}};292293// Check if an opcode is a FMA instruction. If it is, return the index in array294// FMAOpIdxInfo. Otherwise, return -1.295int16_t PPCInstrInfo::getFMAOpIdxInfo(unsigned Opcode) const {296for (unsigned I = 0; I < std::size(FMAOpIdxInfo); I++)297if (FMAOpIdxInfo[I][InfoArrayIdxFMAInst] == Opcode)298return I;299return -1;300}301302// On PowerPC target, we have two kinds of patterns related to FMA:303// 1: Improve ILP.304// Try to reassociate FMA chains like below:305//306// Pattern 1:307// A = FADD X, Y (Leaf)308// B = FMA A, M21, M22 (Prev)309// C = FMA B, M31, M32 (Root)310// -->311// A = FMA X, M21, M22312// B = FMA Y, M31, M32313// C = FADD A, B314//315// Pattern 2:316// A = FMA X, M11, M12 (Leaf)317// B = FMA A, M21, M22 (Prev)318// C = FMA B, M31, M32 (Root)319// -->320// A = FMUL M11, M12321// B = FMA X, M21, M22322// D = FMA A, M31, M32323// C = FADD B, D324//325// breaking the dependency between A and B, allowing FMA to be executed in326// parallel (or back-to-back in a pipeline) instead of depending on each other.327//328// 2: Reduce register pressure.329// Try to reassociate FMA with FSUB and a constant like below:330// C is a floating point const.331//332// Pattern 1:333// A = FSUB X, Y (Leaf)334// D = FMA B, C, A (Root)335// -->336// A = FMA B, Y, -C337// D = FMA A, X, C338//339// Pattern 2:340// A = FSUB X, Y (Leaf)341// D = FMA B, A, C (Root)342// -->343// A = FMA B, Y, -C344// D = FMA A, X, C345//346// Before the transformation, A must be assigned with different hardware347// register with D. After the transformation, A and D must be assigned with348// same hardware register due to TIE attribute of FMA instructions.349//350bool PPCInstrInfo::getFMAPatterns(MachineInstr &Root,351SmallVectorImpl<unsigned> &Patterns,352bool DoRegPressureReduce) const {353MachineBasicBlock *MBB = Root.getParent();354const MachineRegisterInfo *MRI = &MBB->getParent()->getRegInfo();355const TargetRegisterInfo *TRI = &getRegisterInfo();356357auto IsAllOpsVirtualReg = [](const MachineInstr &Instr) {358for (const auto &MO : Instr.explicit_operands())359if (!(MO.isReg() && MO.getReg().isVirtual()))360return false;361return true;362};363364auto IsReassociableAddOrSub = [&](const MachineInstr &Instr,365unsigned OpType) {366if (Instr.getOpcode() !=367FMAOpIdxInfo[getFMAOpIdxInfo(Root.getOpcode())][OpType])368return false;369370// Instruction can be reassociated.371// fast math flags may prohibit reassociation.372if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&373Instr.getFlag(MachineInstr::MIFlag::FmNsz)))374return false;375376// Instruction operands are virtual registers for reassociation.377if (!IsAllOpsVirtualReg(Instr))378return false;379380// For register pressure reassociation, the FSub must have only one use as381// we want to delete the sub to save its def.382if (OpType == InfoArrayIdxFSubInst &&383!MRI->hasOneNonDBGUse(Instr.getOperand(0).getReg()))384return false;385386return true;387};388389auto IsReassociableFMA = [&](const MachineInstr &Instr, int16_t &AddOpIdx,390int16_t &MulOpIdx, bool IsLeaf) {391int16_t Idx = getFMAOpIdxInfo(Instr.getOpcode());392if (Idx < 0)393return false;394395// Instruction can be reassociated.396// fast math flags may prohibit reassociation.397if (!(Instr.getFlag(MachineInstr::MIFlag::FmReassoc) &&398Instr.getFlag(MachineInstr::MIFlag::FmNsz)))399return false;400401// Instruction operands are virtual registers for reassociation.402if (!IsAllOpsVirtualReg(Instr))403return false;404405MulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];406if (IsLeaf)407return true;408409AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];410411const MachineOperand &OpAdd = Instr.getOperand(AddOpIdx);412MachineInstr *MIAdd = MRI->getUniqueVRegDef(OpAdd.getReg());413// If 'add' operand's def is not in current block, don't do ILP related opt.414if (!MIAdd || MIAdd->getParent() != MBB)415return false;416417// If this is not Leaf FMA Instr, its 'add' operand should only have one use418// as this fma will be changed later.419return IsLeaf ? true : MRI->hasOneNonDBGUse(OpAdd.getReg());420};421422int16_t AddOpIdx = -1;423int16_t MulOpIdx = -1;424425bool IsUsedOnceL = false;426bool IsUsedOnceR = false;427MachineInstr *MULInstrL = nullptr;428MachineInstr *MULInstrR = nullptr;429430auto IsRPReductionCandidate = [&]() {431// Currently, we only support float and double.432// FIXME: add support for other types.433unsigned Opcode = Root.getOpcode();434if (Opcode != PPC::XSMADDASP && Opcode != PPC::XSMADDADP)435return false;436437// Root must be a valid FMA like instruction.438// Treat it as leaf as we don't care its add operand.439if (IsReassociableFMA(Root, AddOpIdx, MulOpIdx, true)) {440assert((MulOpIdx >= 0) && "mul operand index not right!");441Register MULRegL = TRI->lookThruSingleUseCopyChain(442Root.getOperand(MulOpIdx).getReg(), MRI);443Register MULRegR = TRI->lookThruSingleUseCopyChain(444Root.getOperand(MulOpIdx + 1).getReg(), MRI);445if (!MULRegL && !MULRegR)446return false;447448if (MULRegL && !MULRegR) {449MULRegR =450TRI->lookThruCopyLike(Root.getOperand(MulOpIdx + 1).getReg(), MRI);451IsUsedOnceL = true;452} else if (!MULRegL && MULRegR) {453MULRegL =454TRI->lookThruCopyLike(Root.getOperand(MulOpIdx).getReg(), MRI);455IsUsedOnceR = true;456} else {457IsUsedOnceL = true;458IsUsedOnceR = true;459}460461if (!MULRegL.isVirtual() || !MULRegR.isVirtual())462return false;463464MULInstrL = MRI->getVRegDef(MULRegL);465MULInstrR = MRI->getVRegDef(MULRegR);466return true;467}468return false;469};470471// Register pressure fma reassociation patterns.472if (DoRegPressureReduce && IsRPReductionCandidate()) {473assert((MULInstrL && MULInstrR) && "wrong register preduction candidate!");474// Register pressure pattern 1475if (isLoadFromConstantPool(MULInstrL) && IsUsedOnceR &&476IsReassociableAddOrSub(*MULInstrR, InfoArrayIdxFSubInst)) {477LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BCA\n");478Patterns.push_back(PPCMachineCombinerPattern::REASSOC_XY_BCA);479return true;480}481482// Register pressure pattern 2483if ((isLoadFromConstantPool(MULInstrR) && IsUsedOnceL &&484IsReassociableAddOrSub(*MULInstrL, InfoArrayIdxFSubInst))) {485LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_BAC\n");486Patterns.push_back(PPCMachineCombinerPattern::REASSOC_XY_BAC);487return true;488}489}490491// ILP fma reassociation patterns.492// Root must be a valid FMA like instruction.493AddOpIdx = -1;494if (!IsReassociableFMA(Root, AddOpIdx, MulOpIdx, false))495return false;496497assert((AddOpIdx >= 0) && "add operand index not right!");498499Register RegB = Root.getOperand(AddOpIdx).getReg();500MachineInstr *Prev = MRI->getUniqueVRegDef(RegB);501502// Prev must be a valid FMA like instruction.503AddOpIdx = -1;504if (!IsReassociableFMA(*Prev, AddOpIdx, MulOpIdx, false))505return false;506507assert((AddOpIdx >= 0) && "add operand index not right!");508509Register RegA = Prev->getOperand(AddOpIdx).getReg();510MachineInstr *Leaf = MRI->getUniqueVRegDef(RegA);511AddOpIdx = -1;512if (IsReassociableFMA(*Leaf, AddOpIdx, MulOpIdx, true)) {513Patterns.push_back(PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM);514LLVM_DEBUG(dbgs() << "add pattern REASSOC_XMM_AMM_BMM\n");515return true;516}517if (IsReassociableAddOrSub(*Leaf, InfoArrayIdxFAddInst)) {518Patterns.push_back(PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM);519LLVM_DEBUG(dbgs() << "add pattern REASSOC_XY_AMM_BMM\n");520return true;521}522return false;523}524525void PPCInstrInfo::finalizeInsInstrs(526MachineInstr &Root, unsigned &Pattern,527SmallVectorImpl<MachineInstr *> &InsInstrs) const {528assert(!InsInstrs.empty() && "Instructions set to be inserted is empty!");529530MachineFunction *MF = Root.getMF();531MachineRegisterInfo *MRI = &MF->getRegInfo();532const TargetRegisterInfo *TRI = &getRegisterInfo();533MachineConstantPool *MCP = MF->getConstantPool();534535int16_t Idx = getFMAOpIdxInfo(Root.getOpcode());536if (Idx < 0)537return;538539uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];540541// For now we only need to fix up placeholder for register pressure reduce542// patterns.543Register ConstReg = 0;544switch (Pattern) {545case PPCMachineCombinerPattern::REASSOC_XY_BCA:546ConstReg =547TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), MRI);548break;549case PPCMachineCombinerPattern::REASSOC_XY_BAC:550ConstReg =551TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx + 1).getReg(), MRI);552break;553default:554// Not register pressure reduce patterns.555return;556}557558MachineInstr *ConstDefInstr = MRI->getVRegDef(ConstReg);559// Get const value from const pool.560const Constant *C = getConstantFromConstantPool(ConstDefInstr);561assert(isa<llvm::ConstantFP>(C) && "not a valid constant!");562563// Get negative fp const.564APFloat F1((dyn_cast<ConstantFP>(C))->getValueAPF());565F1.changeSign();566Constant *NegC = ConstantFP::get(dyn_cast<ConstantFP>(C)->getContext(), F1);567Align Alignment = MF->getDataLayout().getPrefTypeAlign(C->getType());568569// Put negative fp const into constant pool.570unsigned ConstPoolIdx = MCP->getConstantPoolIndex(NegC, Alignment);571572MachineOperand *Placeholder = nullptr;573// Record the placeholder PPC::ZERO8 we add in reassociateFMA.574for (auto *Inst : InsInstrs) {575for (MachineOperand &Operand : Inst->explicit_operands()) {576assert(Operand.isReg() && "Invalid instruction in InsInstrs!");577if (Operand.getReg() == PPC::ZERO8) {578Placeholder = &Operand;579break;580}581}582}583584assert(Placeholder && "Placeholder does not exist!");585586// Generate instructions to load the const fp from constant pool.587// We only support PPC64 and medium code model.588Register LoadNewConst =589generateLoadForNewConst(ConstPoolIdx, &Root, C->getType(), InsInstrs);590591// Fill the placeholder with the new load from constant pool.592Placeholder->setReg(LoadNewConst);593}594595bool PPCInstrInfo::shouldReduceRegisterPressure(596const MachineBasicBlock *MBB, const RegisterClassInfo *RegClassInfo) const {597598if (!EnableFMARegPressureReduction)599return false;600601// Currently, we only enable register pressure reducing in machine combiner602// for: 1: PPC64; 2: Code Model is Medium; 3: Power9 which also has vector603// support.604//605// So we need following instructions to access a TOC entry:606//607// %6:g8rc_and_g8rc_nox0 = ADDIStocHA8 $x2, %const.0608// %7:vssrc = DFLOADf32 target-flags(ppc-toc-lo) %const.0,609// killed %6:g8rc_and_g8rc_nox0, implicit $x2 :: (load 4 from constant-pool)610//611// FIXME: add more supported targets, like Small and Large code model, PPC32,612// AIX.613if (!(Subtarget.isPPC64() && Subtarget.hasP9Vector() &&614Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium))615return false;616617const TargetRegisterInfo *TRI = &getRegisterInfo();618const MachineFunction *MF = MBB->getParent();619const MachineRegisterInfo *MRI = &MF->getRegInfo();620621auto GetMBBPressure =622[&](const MachineBasicBlock *MBB) -> std::vector<unsigned> {623RegionPressure Pressure;624RegPressureTracker RPTracker(Pressure);625626// Initialize the register pressure tracker.627RPTracker.init(MBB->getParent(), RegClassInfo, nullptr, MBB, MBB->end(),628/*TrackLaneMasks*/ false, /*TrackUntiedDefs=*/true);629630for (const auto &MI : reverse(*MBB)) {631if (MI.isDebugValue() || MI.isDebugLabel())632continue;633RegisterOperands RegOpers;634RegOpers.collect(MI, *TRI, *MRI, false, false);635RPTracker.recedeSkipDebugValues();636assert(&*RPTracker.getPos() == &MI && "RPTracker sync error!");637RPTracker.recede(RegOpers);638}639640// Close the RPTracker to finalize live ins.641RPTracker.closeRegion();642643return RPTracker.getPressure().MaxSetPressure;644};645646// For now we only care about float and double type fma.647unsigned VSSRCLimit = TRI->getRegPressureSetLimit(648*MBB->getParent(), PPC::RegisterPressureSets::VSSRC);649650// Only reduce register pressure when pressure is high.651return GetMBBPressure(MBB)[PPC::RegisterPressureSets::VSSRC] >652(float)VSSRCLimit * FMARPFactor;653}654655bool PPCInstrInfo::isLoadFromConstantPool(MachineInstr *I) const {656// I has only one memory operand which is load from constant pool.657if (!I->hasOneMemOperand())658return false;659660MachineMemOperand *Op = I->memoperands()[0];661return Op->isLoad() && Op->getPseudoValue() &&662Op->getPseudoValue()->kind() == PseudoSourceValue::ConstantPool;663}664665Register PPCInstrInfo::generateLoadForNewConst(666unsigned Idx, MachineInstr *MI, Type *Ty,667SmallVectorImpl<MachineInstr *> &InsInstrs) const {668// Now we only support PPC64, Medium code model and P9 with vector.669// We have immutable pattern to access const pool. See function670// shouldReduceRegisterPressure.671assert((Subtarget.isPPC64() && Subtarget.hasP9Vector() &&672Subtarget.getTargetMachine().getCodeModel() == CodeModel::Medium) &&673"Target not supported!\n");674675MachineFunction *MF = MI->getMF();676MachineRegisterInfo *MRI = &MF->getRegInfo();677678// Generate ADDIStocHA8679Register VReg1 = MRI->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);680MachineInstrBuilder TOCOffset =681BuildMI(*MF, MI->getDebugLoc(), get(PPC::ADDIStocHA8), VReg1)682.addReg(PPC::X2)683.addConstantPoolIndex(Idx);684685assert((Ty->isFloatTy() || Ty->isDoubleTy()) &&686"Only float and double are supported!");687688unsigned LoadOpcode;689// Should be float type or double type.690if (Ty->isFloatTy())691LoadOpcode = PPC::DFLOADf32;692else693LoadOpcode = PPC::DFLOADf64;694695const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());696Register VReg2 = MRI->createVirtualRegister(RC);697MachineMemOperand *MMO = MF->getMachineMemOperand(698MachinePointerInfo::getConstantPool(*MF), MachineMemOperand::MOLoad,699Ty->getScalarSizeInBits() / 8, MF->getDataLayout().getPrefTypeAlign(Ty));700701// Generate Load from constant pool.702MachineInstrBuilder Load =703BuildMI(*MF, MI->getDebugLoc(), get(LoadOpcode), VReg2)704.addConstantPoolIndex(Idx)705.addReg(VReg1, getKillRegState(true))706.addMemOperand(MMO);707708Load->getOperand(1).setTargetFlags(PPCII::MO_TOC_LO);709710// Insert the toc load instructions into InsInstrs.711InsInstrs.insert(InsInstrs.begin(), Load);712InsInstrs.insert(InsInstrs.begin(), TOCOffset);713return VReg2;714}715716// This function returns the const value in constant pool if the \p I is a load717// from constant pool.718const Constant *719PPCInstrInfo::getConstantFromConstantPool(MachineInstr *I) const {720MachineFunction *MF = I->getMF();721MachineRegisterInfo *MRI = &MF->getRegInfo();722MachineConstantPool *MCP = MF->getConstantPool();723assert(I->mayLoad() && "Should be a load instruction.\n");724for (auto MO : I->uses()) {725if (!MO.isReg())726continue;727Register Reg = MO.getReg();728if (Reg == 0 || !Reg.isVirtual())729continue;730// Find the toc address.731MachineInstr *DefMI = MRI->getVRegDef(Reg);732for (auto MO2 : DefMI->uses())733if (MO2.isCPI())734return (MCP->getConstants())[MO2.getIndex()].Val.ConstVal;735}736return nullptr;737}738739CombinerObjective PPCInstrInfo::getCombinerObjective(unsigned Pattern) const {740switch (Pattern) {741case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM:742case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM:743return CombinerObjective::MustReduceDepth;744case PPCMachineCombinerPattern::REASSOC_XY_BCA:745case PPCMachineCombinerPattern::REASSOC_XY_BAC:746return CombinerObjective::MustReduceRegisterPressure;747default:748return TargetInstrInfo::getCombinerObjective(Pattern);749}750}751752bool PPCInstrInfo::getMachineCombinerPatterns(753MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,754bool DoRegPressureReduce) const {755// Using the machine combiner in this way is potentially expensive, so756// restrict to when aggressive optimizations are desired.757if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOptLevel::Aggressive)758return false;759760if (getFMAPatterns(Root, Patterns, DoRegPressureReduce))761return true;762763return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,764DoRegPressureReduce);765}766767void PPCInstrInfo::genAlternativeCodeSequence(768MachineInstr &Root, unsigned Pattern,769SmallVectorImpl<MachineInstr *> &InsInstrs,770SmallVectorImpl<MachineInstr *> &DelInstrs,771DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {772switch (Pattern) {773case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM:774case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM:775case PPCMachineCombinerPattern::REASSOC_XY_BCA:776case PPCMachineCombinerPattern::REASSOC_XY_BAC:777reassociateFMA(Root, Pattern, InsInstrs, DelInstrs, InstrIdxForVirtReg);778break;779default:780// Reassociate default patterns.781TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,782DelInstrs, InstrIdxForVirtReg);783break;784}785}786787void PPCInstrInfo::reassociateFMA(788MachineInstr &Root, unsigned Pattern,789SmallVectorImpl<MachineInstr *> &InsInstrs,790SmallVectorImpl<MachineInstr *> &DelInstrs,791DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {792MachineFunction *MF = Root.getMF();793MachineRegisterInfo &MRI = MF->getRegInfo();794const TargetRegisterInfo *TRI = &getRegisterInfo();795MachineOperand &OpC = Root.getOperand(0);796Register RegC = OpC.getReg();797const TargetRegisterClass *RC = MRI.getRegClass(RegC);798MRI.constrainRegClass(RegC, RC);799800unsigned FmaOp = Root.getOpcode();801int16_t Idx = getFMAOpIdxInfo(FmaOp);802assert(Idx >= 0 && "Root must be a FMA instruction");803804bool IsILPReassociate =805(Pattern == PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM) ||806(Pattern == PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM);807808uint16_t AddOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxAddOpIdx];809uint16_t FirstMulOpIdx = FMAOpIdxInfo[Idx][InfoArrayIdxMULOpIdx];810811MachineInstr *Prev = nullptr;812MachineInstr *Leaf = nullptr;813switch (Pattern) {814default:815llvm_unreachable("not recognized pattern!");816case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM:817case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM:818Prev = MRI.getUniqueVRegDef(Root.getOperand(AddOpIdx).getReg());819Leaf = MRI.getUniqueVRegDef(Prev->getOperand(AddOpIdx).getReg());820break;821case PPCMachineCombinerPattern::REASSOC_XY_BAC: {822Register MULReg =823TRI->lookThruCopyLike(Root.getOperand(FirstMulOpIdx).getReg(), &MRI);824Leaf = MRI.getVRegDef(MULReg);825break;826}827case PPCMachineCombinerPattern::REASSOC_XY_BCA: {828Register MULReg = TRI->lookThruCopyLike(829Root.getOperand(FirstMulOpIdx + 1).getReg(), &MRI);830Leaf = MRI.getVRegDef(MULReg);831break;832}833}834835uint32_t IntersectedFlags = 0;836if (IsILPReassociate)837IntersectedFlags = Root.getFlags() & Prev->getFlags() & Leaf->getFlags();838else839IntersectedFlags = Root.getFlags() & Leaf->getFlags();840841auto GetOperandInfo = [&](const MachineOperand &Operand, Register &Reg,842bool &KillFlag) {843Reg = Operand.getReg();844MRI.constrainRegClass(Reg, RC);845KillFlag = Operand.isKill();846};847848auto GetFMAInstrInfo = [&](const MachineInstr &Instr, Register &MulOp1,849Register &MulOp2, Register &AddOp,850bool &MulOp1KillFlag, bool &MulOp2KillFlag,851bool &AddOpKillFlag) {852GetOperandInfo(Instr.getOperand(FirstMulOpIdx), MulOp1, MulOp1KillFlag);853GetOperandInfo(Instr.getOperand(FirstMulOpIdx + 1), MulOp2, MulOp2KillFlag);854GetOperandInfo(Instr.getOperand(AddOpIdx), AddOp, AddOpKillFlag);855};856857Register RegM11, RegM12, RegX, RegY, RegM21, RegM22, RegM31, RegM32, RegA11,858RegA21, RegB;859bool KillX = false, KillY = false, KillM11 = false, KillM12 = false,860KillM21 = false, KillM22 = false, KillM31 = false, KillM32 = false,861KillA11 = false, KillA21 = false, KillB = false;862863GetFMAInstrInfo(Root, RegM31, RegM32, RegB, KillM31, KillM32, KillB);864865if (IsILPReassociate)866GetFMAInstrInfo(*Prev, RegM21, RegM22, RegA21, KillM21, KillM22, KillA21);867868if (Pattern == PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM) {869GetFMAInstrInfo(*Leaf, RegM11, RegM12, RegA11, KillM11, KillM12, KillA11);870GetOperandInfo(Leaf->getOperand(AddOpIdx), RegX, KillX);871} else if (Pattern == PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM) {872GetOperandInfo(Leaf->getOperand(1), RegX, KillX);873GetOperandInfo(Leaf->getOperand(2), RegY, KillY);874} else {875// Get FSUB instruction info.876GetOperandInfo(Leaf->getOperand(1), RegX, KillX);877GetOperandInfo(Leaf->getOperand(2), RegY, KillY);878}879880// Create new virtual registers for the new results instead of881// recycling legacy ones because the MachineCombiner's computation of the882// critical path requires a new register definition rather than an existing883// one.884// For register pressure reassociation, we only need create one virtual885// register for the new fma.886Register NewVRA = MRI.createVirtualRegister(RC);887InstrIdxForVirtReg.insert(std::make_pair(NewVRA, 0));888889Register NewVRB = 0;890if (IsILPReassociate) {891NewVRB = MRI.createVirtualRegister(RC);892InstrIdxForVirtReg.insert(std::make_pair(NewVRB, 1));893}894895Register NewVRD = 0;896if (Pattern == PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM) {897NewVRD = MRI.createVirtualRegister(RC);898InstrIdxForVirtReg.insert(std::make_pair(NewVRD, 2));899}900901auto AdjustOperandOrder = [&](MachineInstr *MI, Register RegAdd, bool KillAdd,902Register RegMul1, bool KillRegMul1,903Register RegMul2, bool KillRegMul2) {904MI->getOperand(AddOpIdx).setReg(RegAdd);905MI->getOperand(AddOpIdx).setIsKill(KillAdd);906MI->getOperand(FirstMulOpIdx).setReg(RegMul1);907MI->getOperand(FirstMulOpIdx).setIsKill(KillRegMul1);908MI->getOperand(FirstMulOpIdx + 1).setReg(RegMul2);909MI->getOperand(FirstMulOpIdx + 1).setIsKill(KillRegMul2);910};911912MachineInstrBuilder NewARegPressure, NewCRegPressure;913switch (Pattern) {914default:915llvm_unreachable("not recognized pattern!");916case PPCMachineCombinerPattern::REASSOC_XY_AMM_BMM: {917// Create new instructions for insertion.918MachineInstrBuilder MINewB =919BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)920.addReg(RegX, getKillRegState(KillX))921.addReg(RegM21, getKillRegState(KillM21))922.addReg(RegM22, getKillRegState(KillM22));923MachineInstrBuilder MINewA =924BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)925.addReg(RegY, getKillRegState(KillY))926.addReg(RegM31, getKillRegState(KillM31))927.addReg(RegM32, getKillRegState(KillM32));928// If AddOpIdx is not 1, adjust the order.929if (AddOpIdx != 1) {930AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);931AdjustOperandOrder(MINewA, RegY, KillY, RegM31, KillM31, RegM32, KillM32);932}933934MachineInstrBuilder MINewC =935BuildMI(*MF, Root.getDebugLoc(),936get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC)937.addReg(NewVRB, getKillRegState(true))938.addReg(NewVRA, getKillRegState(true));939940// Update flags for newly created instructions.941setSpecialOperandAttr(*MINewA, IntersectedFlags);942setSpecialOperandAttr(*MINewB, IntersectedFlags);943setSpecialOperandAttr(*MINewC, IntersectedFlags);944945// Record new instructions for insertion.946InsInstrs.push_back(MINewA);947InsInstrs.push_back(MINewB);948InsInstrs.push_back(MINewC);949break;950}951case PPCMachineCombinerPattern::REASSOC_XMM_AMM_BMM: {952assert(NewVRD && "new FMA register not created!");953// Create new instructions for insertion.954MachineInstrBuilder MINewA =955BuildMI(*MF, Leaf->getDebugLoc(),956get(FMAOpIdxInfo[Idx][InfoArrayIdxFMULInst]), NewVRA)957.addReg(RegM11, getKillRegState(KillM11))958.addReg(RegM12, getKillRegState(KillM12));959MachineInstrBuilder MINewB =960BuildMI(*MF, Prev->getDebugLoc(), get(FmaOp), NewVRB)961.addReg(RegX, getKillRegState(KillX))962.addReg(RegM21, getKillRegState(KillM21))963.addReg(RegM22, getKillRegState(KillM22));964MachineInstrBuilder MINewD =965BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRD)966.addReg(NewVRA, getKillRegState(true))967.addReg(RegM31, getKillRegState(KillM31))968.addReg(RegM32, getKillRegState(KillM32));969// If AddOpIdx is not 1, adjust the order.970if (AddOpIdx != 1) {971AdjustOperandOrder(MINewB, RegX, KillX, RegM21, KillM21, RegM22, KillM22);972AdjustOperandOrder(MINewD, NewVRA, true, RegM31, KillM31, RegM32,973KillM32);974}975976MachineInstrBuilder MINewC =977BuildMI(*MF, Root.getDebugLoc(),978get(FMAOpIdxInfo[Idx][InfoArrayIdxFAddInst]), RegC)979.addReg(NewVRB, getKillRegState(true))980.addReg(NewVRD, getKillRegState(true));981982// Update flags for newly created instructions.983setSpecialOperandAttr(*MINewA, IntersectedFlags);984setSpecialOperandAttr(*MINewB, IntersectedFlags);985setSpecialOperandAttr(*MINewD, IntersectedFlags);986setSpecialOperandAttr(*MINewC, IntersectedFlags);987988// Record new instructions for insertion.989InsInstrs.push_back(MINewA);990InsInstrs.push_back(MINewB);991InsInstrs.push_back(MINewD);992InsInstrs.push_back(MINewC);993break;994}995case PPCMachineCombinerPattern::REASSOC_XY_BAC:996case PPCMachineCombinerPattern::REASSOC_XY_BCA: {997Register VarReg;998bool KillVarReg = false;999if (Pattern == PPCMachineCombinerPattern::REASSOC_XY_BCA) {1000VarReg = RegM31;1001KillVarReg = KillM31;1002} else {1003VarReg = RegM32;1004KillVarReg = KillM32;1005}1006// We don't want to get negative const from memory pool too early, as the1007// created entry will not be deleted even if it has no users. Since all1008// operand of Leaf and Root are virtual register, we use zero register1009// here as a placeholder. When the InsInstrs is selected in1010// MachineCombiner, we call finalizeInsInstrs to replace the zero register1011// with a virtual register which is a load from constant pool.1012NewARegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), NewVRA)1013.addReg(RegB, getKillRegState(RegB))1014.addReg(RegY, getKillRegState(KillY))1015.addReg(PPC::ZERO8);1016NewCRegPressure = BuildMI(*MF, Root.getDebugLoc(), get(FmaOp), RegC)1017.addReg(NewVRA, getKillRegState(true))1018.addReg(RegX, getKillRegState(KillX))1019.addReg(VarReg, getKillRegState(KillVarReg));1020// For now, we only support xsmaddadp/xsmaddasp, their add operand are1021// both at index 1, no need to adjust.1022// FIXME: when add more fma instructions support, like fma/fmas, adjust1023// the operand index here.1024break;1025}1026}10271028if (!IsILPReassociate) {1029setSpecialOperandAttr(*NewARegPressure, IntersectedFlags);1030setSpecialOperandAttr(*NewCRegPressure, IntersectedFlags);10311032InsInstrs.push_back(NewARegPressure);1033InsInstrs.push_back(NewCRegPressure);1034}10351036assert(!InsInstrs.empty() &&1037"Insertion instructions set should not be empty!");10381039// Record old instructions for deletion.1040DelInstrs.push_back(Leaf);1041if (IsILPReassociate)1042DelInstrs.push_back(Prev);1043DelInstrs.push_back(&Root);1044}10451046// Detect 32 -> 64-bit extensions where we may reuse the low sub-register.1047bool PPCInstrInfo::isCoalescableExtInstr(const MachineInstr &MI,1048Register &SrcReg, Register &DstReg,1049unsigned &SubIdx) const {1050switch (MI.getOpcode()) {1051default: return false;1052case PPC::EXTSW:1053case PPC::EXTSW_32:1054case PPC::EXTSW_32_64:1055SrcReg = MI.getOperand(1).getReg();1056DstReg = MI.getOperand(0).getReg();1057SubIdx = PPC::sub_32;1058return true;1059}1060}10611062Register PPCInstrInfo::isLoadFromStackSlot(const MachineInstr &MI,1063int &FrameIndex) const {1064if (llvm::is_contained(getLoadOpcodesForSpillArray(), MI.getOpcode())) {1065// Check for the operands added by addFrameReference (the immediate is the1066// offset which defaults to 0).1067if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&1068MI.getOperand(2).isFI()) {1069FrameIndex = MI.getOperand(2).getIndex();1070return MI.getOperand(0).getReg();1071}1072}1073return 0;1074}10751076// For opcodes with the ReMaterializable flag set, this function is called to1077// verify the instruction is really rematable.1078bool PPCInstrInfo::isReallyTriviallyReMaterializable(1079const MachineInstr &MI) const {1080switch (MI.getOpcode()) {1081default:1082// Let base implementaion decide.1083break;1084case PPC::LI:1085case PPC::LI8:1086case PPC::PLI:1087case PPC::PLI8:1088case PPC::LIS:1089case PPC::LIS8:1090case PPC::ADDIStocHA:1091case PPC::ADDIStocHA8:1092case PPC::ADDItocL:1093case PPC::ADDItocL8:1094case PPC::LOAD_STACK_GUARD:1095case PPC::PPCLdFixedAddr:1096case PPC::XXLXORz:1097case PPC::XXLXORspz:1098case PPC::XXLXORdpz:1099case PPC::XXLEQVOnes:1100case PPC::XXSPLTI32DX:1101case PPC::XXSPLTIW:1102case PPC::XXSPLTIDP:1103case PPC::V_SET0B:1104case PPC::V_SET0H:1105case PPC::V_SET0:1106case PPC::V_SETALLONESB:1107case PPC::V_SETALLONESH:1108case PPC::V_SETALLONES:1109case PPC::CRSET:1110case PPC::CRUNSET:1111case PPC::XXSETACCZ:1112case PPC::XXSETACCZW:1113return true;1114}1115return TargetInstrInfo::isReallyTriviallyReMaterializable(MI);1116}11171118Register PPCInstrInfo::isStoreToStackSlot(const MachineInstr &MI,1119int &FrameIndex) const {1120if (llvm::is_contained(getStoreOpcodesForSpillArray(), MI.getOpcode())) {1121if (MI.getOperand(1).isImm() && !MI.getOperand(1).getImm() &&1122MI.getOperand(2).isFI()) {1123FrameIndex = MI.getOperand(2).getIndex();1124return MI.getOperand(0).getReg();1125}1126}1127return 0;1128}11291130MachineInstr *PPCInstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,1131unsigned OpIdx1,1132unsigned OpIdx2) const {1133MachineFunction &MF = *MI.getParent()->getParent();11341135// Normal instructions can be commuted the obvious way.1136if (MI.getOpcode() != PPC::RLWIMI && MI.getOpcode() != PPC::RLWIMI_rec)1137return TargetInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2);1138// Note that RLWIMI can be commuted as a 32-bit instruction, but not as a1139// 64-bit instruction (so we don't handle PPC::RLWIMI8 here), because1140// changing the relative order of the mask operands might change what happens1141// to the high-bits of the mask (and, thus, the result).11421143// Cannot commute if it has a non-zero rotate count.1144if (MI.getOperand(3).getImm() != 0)1145return nullptr;11461147// If we have a zero rotate count, we have:1148// M = mask(MB,ME)1149// Op0 = (Op1 & ~M) | (Op2 & M)1150// Change this to:1151// M = mask((ME+1)&31, (MB-1)&31)1152// Op0 = (Op2 & ~M) | (Op1 & M)11531154// Swap op1/op21155assert(((OpIdx1 == 1 && OpIdx2 == 2) || (OpIdx1 == 2 && OpIdx2 == 1)) &&1156"Only the operands 1 and 2 can be swapped in RLSIMI/RLWIMI_rec.");1157Register Reg0 = MI.getOperand(0).getReg();1158Register Reg1 = MI.getOperand(1).getReg();1159Register Reg2 = MI.getOperand(2).getReg();1160unsigned SubReg1 = MI.getOperand(1).getSubReg();1161unsigned SubReg2 = MI.getOperand(2).getSubReg();1162bool Reg1IsKill = MI.getOperand(1).isKill();1163bool Reg2IsKill = MI.getOperand(2).isKill();1164bool ChangeReg0 = false;1165// If machine instrs are no longer in two-address forms, update1166// destination register as well.1167if (Reg0 == Reg1) {1168// Must be two address instruction (i.e. op1 is tied to op0).1169assert(MI.getDesc().getOperandConstraint(1, MCOI::TIED_TO) == 0 &&1170"Expecting a two-address instruction!");1171assert(MI.getOperand(0).getSubReg() == SubReg1 && "Tied subreg mismatch");1172Reg2IsKill = false;1173ChangeReg0 = true;1174}11751176// Masks.1177unsigned MB = MI.getOperand(4).getImm();1178unsigned ME = MI.getOperand(5).getImm();11791180// We can't commute a trivial mask (there is no way to represent an all-zero1181// mask).1182if (MB == 0 && ME == 31)1183return nullptr;11841185if (NewMI) {1186// Create a new instruction.1187Register Reg0 = ChangeReg0 ? Reg2 : MI.getOperand(0).getReg();1188bool Reg0IsDead = MI.getOperand(0).isDead();1189return BuildMI(MF, MI.getDebugLoc(), MI.getDesc())1190.addReg(Reg0, RegState::Define | getDeadRegState(Reg0IsDead))1191.addReg(Reg2, getKillRegState(Reg2IsKill))1192.addReg(Reg1, getKillRegState(Reg1IsKill))1193.addImm((ME + 1) & 31)1194.addImm((MB - 1) & 31);1195}11961197if (ChangeReg0) {1198MI.getOperand(0).setReg(Reg2);1199MI.getOperand(0).setSubReg(SubReg2);1200}1201MI.getOperand(2).setReg(Reg1);1202MI.getOperand(1).setReg(Reg2);1203MI.getOperand(2).setSubReg(SubReg1);1204MI.getOperand(1).setSubReg(SubReg2);1205MI.getOperand(2).setIsKill(Reg1IsKill);1206MI.getOperand(1).setIsKill(Reg2IsKill);12071208// Swap the mask around.1209MI.getOperand(4).setImm((ME + 1) & 31);1210MI.getOperand(5).setImm((MB - 1) & 31);1211return &MI;1212}12131214bool PPCInstrInfo::findCommutedOpIndices(const MachineInstr &MI,1215unsigned &SrcOpIdx1,1216unsigned &SrcOpIdx2) const {1217// For VSX A-Type FMA instructions, it is the first two operands that can be1218// commuted, however, because the non-encoded tied input operand is listed1219// first, the operands to swap are actually the second and third.12201221int AltOpc = PPC::getAltVSXFMAOpcode(MI.getOpcode());1222if (AltOpc == -1)1223return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);12241225// The commutable operand indices are 2 and 3. Return them in SrcOpIdx11226// and SrcOpIdx2.1227return fixCommutedOpIndices(SrcOpIdx1, SrcOpIdx2, 2, 3);1228}12291230void PPCInstrInfo::insertNoop(MachineBasicBlock &MBB,1231MachineBasicBlock::iterator MI) const {1232// This function is used for scheduling, and the nop wanted here is the type1233// that terminates dispatch groups on the POWER cores.1234unsigned Directive = Subtarget.getCPUDirective();1235unsigned Opcode;1236switch (Directive) {1237default: Opcode = PPC::NOP; break;1238case PPC::DIR_PWR6: Opcode = PPC::NOP_GT_PWR6; break;1239case PPC::DIR_PWR7: Opcode = PPC::NOP_GT_PWR7; break;1240case PPC::DIR_PWR8: Opcode = PPC::NOP_GT_PWR7; break; /* FIXME: Update when P8 InstrScheduling model is ready */1241// FIXME: Update when POWER9 scheduling model is ready.1242case PPC::DIR_PWR9: Opcode = PPC::NOP_GT_PWR7; break;1243}12441245DebugLoc DL;1246BuildMI(MBB, MI, DL, get(Opcode));1247}12481249/// Return the noop instruction to use for a noop.1250MCInst PPCInstrInfo::getNop() const {1251MCInst Nop;1252Nop.setOpcode(PPC::NOP);1253return Nop;1254}12551256// Branch analysis.1257// Note: If the condition register is set to CTR or CTR8 then this is a1258// BDNZ (imm == 1) or BDZ (imm == 0) branch.1259bool PPCInstrInfo::analyzeBranch(MachineBasicBlock &MBB,1260MachineBasicBlock *&TBB,1261MachineBasicBlock *&FBB,1262SmallVectorImpl<MachineOperand> &Cond,1263bool AllowModify) const {1264bool isPPC64 = Subtarget.isPPC64();12651266// If the block has no terminators, it just falls into the block after it.1267MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();1268if (I == MBB.end())1269return false;12701271if (!isUnpredicatedTerminator(*I))1272return false;12731274if (AllowModify) {1275// If the BB ends with an unconditional branch to the fallthrough BB,1276// we eliminate the branch instruction.1277if (I->getOpcode() == PPC::B &&1278MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {1279I->eraseFromParent();12801281// We update iterator after deleting the last branch.1282I = MBB.getLastNonDebugInstr();1283if (I == MBB.end() || !isUnpredicatedTerminator(*I))1284return false;1285}1286}12871288// Get the last instruction in the block.1289MachineInstr &LastInst = *I;12901291// If there is only one terminator instruction, process it.1292if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {1293if (LastInst.getOpcode() == PPC::B) {1294if (!LastInst.getOperand(0).isMBB())1295return true;1296TBB = LastInst.getOperand(0).getMBB();1297return false;1298} else if (LastInst.getOpcode() == PPC::BCC) {1299if (!LastInst.getOperand(2).isMBB())1300return true;1301// Block ends with fall-through condbranch.1302TBB = LastInst.getOperand(2).getMBB();1303Cond.push_back(LastInst.getOperand(0));1304Cond.push_back(LastInst.getOperand(1));1305return false;1306} else if (LastInst.getOpcode() == PPC::BC) {1307if (!LastInst.getOperand(1).isMBB())1308return true;1309// Block ends with fall-through condbranch.1310TBB = LastInst.getOperand(1).getMBB();1311Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));1312Cond.push_back(LastInst.getOperand(0));1313return false;1314} else if (LastInst.getOpcode() == PPC::BCn) {1315if (!LastInst.getOperand(1).isMBB())1316return true;1317// Block ends with fall-through condbranch.1318TBB = LastInst.getOperand(1).getMBB();1319Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));1320Cond.push_back(LastInst.getOperand(0));1321return false;1322} else if (LastInst.getOpcode() == PPC::BDNZ8 ||1323LastInst.getOpcode() == PPC::BDNZ) {1324if (!LastInst.getOperand(0).isMBB())1325return true;1326if (DisableCTRLoopAnal)1327return true;1328TBB = LastInst.getOperand(0).getMBB();1329Cond.push_back(MachineOperand::CreateImm(1));1330Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,1331true));1332return false;1333} else if (LastInst.getOpcode() == PPC::BDZ8 ||1334LastInst.getOpcode() == PPC::BDZ) {1335if (!LastInst.getOperand(0).isMBB())1336return true;1337if (DisableCTRLoopAnal)1338return true;1339TBB = LastInst.getOperand(0).getMBB();1340Cond.push_back(MachineOperand::CreateImm(0));1341Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,1342true));1343return false;1344}13451346// Otherwise, don't know what this is.1347return true;1348}13491350// Get the instruction before it if it's a terminator.1351MachineInstr &SecondLastInst = *I;13521353// If there are three terminators, we don't know what sort of block this is.1354if (I != MBB.begin() && isUnpredicatedTerminator(*--I))1355return true;13561357// If the block ends with PPC::B and PPC:BCC, handle it.1358if (SecondLastInst.getOpcode() == PPC::BCC &&1359LastInst.getOpcode() == PPC::B) {1360if (!SecondLastInst.getOperand(2).isMBB() ||1361!LastInst.getOperand(0).isMBB())1362return true;1363TBB = SecondLastInst.getOperand(2).getMBB();1364Cond.push_back(SecondLastInst.getOperand(0));1365Cond.push_back(SecondLastInst.getOperand(1));1366FBB = LastInst.getOperand(0).getMBB();1367return false;1368} else if (SecondLastInst.getOpcode() == PPC::BC &&1369LastInst.getOpcode() == PPC::B) {1370if (!SecondLastInst.getOperand(1).isMBB() ||1371!LastInst.getOperand(0).isMBB())1372return true;1373TBB = SecondLastInst.getOperand(1).getMBB();1374Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_SET));1375Cond.push_back(SecondLastInst.getOperand(0));1376FBB = LastInst.getOperand(0).getMBB();1377return false;1378} else if (SecondLastInst.getOpcode() == PPC::BCn &&1379LastInst.getOpcode() == PPC::B) {1380if (!SecondLastInst.getOperand(1).isMBB() ||1381!LastInst.getOperand(0).isMBB())1382return true;1383TBB = SecondLastInst.getOperand(1).getMBB();1384Cond.push_back(MachineOperand::CreateImm(PPC::PRED_BIT_UNSET));1385Cond.push_back(SecondLastInst.getOperand(0));1386FBB = LastInst.getOperand(0).getMBB();1387return false;1388} else if ((SecondLastInst.getOpcode() == PPC::BDNZ8 ||1389SecondLastInst.getOpcode() == PPC::BDNZ) &&1390LastInst.getOpcode() == PPC::B) {1391if (!SecondLastInst.getOperand(0).isMBB() ||1392!LastInst.getOperand(0).isMBB())1393return true;1394if (DisableCTRLoopAnal)1395return true;1396TBB = SecondLastInst.getOperand(0).getMBB();1397Cond.push_back(MachineOperand::CreateImm(1));1398Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,1399true));1400FBB = LastInst.getOperand(0).getMBB();1401return false;1402} else if ((SecondLastInst.getOpcode() == PPC::BDZ8 ||1403SecondLastInst.getOpcode() == PPC::BDZ) &&1404LastInst.getOpcode() == PPC::B) {1405if (!SecondLastInst.getOperand(0).isMBB() ||1406!LastInst.getOperand(0).isMBB())1407return true;1408if (DisableCTRLoopAnal)1409return true;1410TBB = SecondLastInst.getOperand(0).getMBB();1411Cond.push_back(MachineOperand::CreateImm(0));1412Cond.push_back(MachineOperand::CreateReg(isPPC64 ? PPC::CTR8 : PPC::CTR,1413true));1414FBB = LastInst.getOperand(0).getMBB();1415return false;1416}14171418// If the block ends with two PPC:Bs, handle it. The second one is not1419// executed, so remove it.1420if (SecondLastInst.getOpcode() == PPC::B && LastInst.getOpcode() == PPC::B) {1421if (!SecondLastInst.getOperand(0).isMBB())1422return true;1423TBB = SecondLastInst.getOperand(0).getMBB();1424I = LastInst;1425if (AllowModify)1426I->eraseFromParent();1427return false;1428}14291430// Otherwise, can't handle this.1431return true;1432}14331434unsigned PPCInstrInfo::removeBranch(MachineBasicBlock &MBB,1435int *BytesRemoved) const {1436assert(!BytesRemoved && "code size not handled");14371438MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();1439if (I == MBB.end())1440return 0;14411442if (I->getOpcode() != PPC::B && I->getOpcode() != PPC::BCC &&1443I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&1444I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&1445I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)1446return 0;14471448// Remove the branch.1449I->eraseFromParent();14501451I = MBB.end();14521453if (I == MBB.begin()) return 1;1454--I;1455if (I->getOpcode() != PPC::BCC &&1456I->getOpcode() != PPC::BC && I->getOpcode() != PPC::BCn &&1457I->getOpcode() != PPC::BDNZ8 && I->getOpcode() != PPC::BDNZ &&1458I->getOpcode() != PPC::BDZ8 && I->getOpcode() != PPC::BDZ)1459return 1;14601461// Remove the branch.1462I->eraseFromParent();1463return 2;1464}14651466unsigned PPCInstrInfo::insertBranch(MachineBasicBlock &MBB,1467MachineBasicBlock *TBB,1468MachineBasicBlock *FBB,1469ArrayRef<MachineOperand> Cond,1470const DebugLoc &DL,1471int *BytesAdded) const {1472// Shouldn't be a fall through.1473assert(TBB && "insertBranch must not be told to insert a fallthrough");1474assert((Cond.size() == 2 || Cond.size() == 0) &&1475"PPC branch conditions have two components!");1476assert(!BytesAdded && "code size not handled");14771478bool isPPC64 = Subtarget.isPPC64();14791480// One-way branch.1481if (!FBB) {1482if (Cond.empty()) // Unconditional branch1483BuildMI(&MBB, DL, get(PPC::B)).addMBB(TBB);1484else if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)1485BuildMI(&MBB, DL, get(Cond[0].getImm() ?1486(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :1487(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);1488else if (Cond[0].getImm() == PPC::PRED_BIT_SET)1489BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);1490else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)1491BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);1492else // Conditional branch1493BuildMI(&MBB, DL, get(PPC::BCC))1494.addImm(Cond[0].getImm())1495.add(Cond[1])1496.addMBB(TBB);1497return 1;1498}14991500// Two-way Conditional Branch.1501if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)1502BuildMI(&MBB, DL, get(Cond[0].getImm() ?1503(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ) :1504(isPPC64 ? PPC::BDZ8 : PPC::BDZ))).addMBB(TBB);1505else if (Cond[0].getImm() == PPC::PRED_BIT_SET)1506BuildMI(&MBB, DL, get(PPC::BC)).add(Cond[1]).addMBB(TBB);1507else if (Cond[0].getImm() == PPC::PRED_BIT_UNSET)1508BuildMI(&MBB, DL, get(PPC::BCn)).add(Cond[1]).addMBB(TBB);1509else1510BuildMI(&MBB, DL, get(PPC::BCC))1511.addImm(Cond[0].getImm())1512.add(Cond[1])1513.addMBB(TBB);1514BuildMI(&MBB, DL, get(PPC::B)).addMBB(FBB);1515return 2;1516}15171518// Select analysis.1519bool PPCInstrInfo::canInsertSelect(const MachineBasicBlock &MBB,1520ArrayRef<MachineOperand> Cond,1521Register DstReg, Register TrueReg,1522Register FalseReg, int &CondCycles,1523int &TrueCycles, int &FalseCycles) const {1524if (!Subtarget.hasISEL())1525return false;15261527if (Cond.size() != 2)1528return false;15291530// If this is really a bdnz-like condition, then it cannot be turned into a1531// select.1532if (Cond[1].getReg() == PPC::CTR || Cond[1].getReg() == PPC::CTR8)1533return false;15341535// If the conditional branch uses a physical register, then it cannot be1536// turned into a select.1537if (Cond[1].getReg().isPhysical())1538return false;15391540// Check register classes.1541const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();1542const TargetRegisterClass *RC =1543RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));1544if (!RC)1545return false;15461547// isel is for regular integer GPRs only.1548if (!PPC::GPRCRegClass.hasSubClassEq(RC) &&1549!PPC::GPRC_NOR0RegClass.hasSubClassEq(RC) &&1550!PPC::G8RCRegClass.hasSubClassEq(RC) &&1551!PPC::G8RC_NOX0RegClass.hasSubClassEq(RC))1552return false;15531554// FIXME: These numbers are for the A2, how well they work for other cores is1555// an open question. On the A2, the isel instruction has a 2-cycle latency1556// but single-cycle throughput. These numbers are used in combination with1557// the MispredictPenalty setting from the active SchedMachineModel.1558CondCycles = 1;1559TrueCycles = 1;1560FalseCycles = 1;15611562return true;1563}15641565void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,1566MachineBasicBlock::iterator MI,1567const DebugLoc &dl, Register DestReg,1568ArrayRef<MachineOperand> Cond, Register TrueReg,1569Register FalseReg) const {1570assert(Cond.size() == 2 &&1571"PPC branch conditions have two components!");15721573// Get the register classes.1574MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();1575const TargetRegisterClass *RC =1576RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));1577assert(RC && "TrueReg and FalseReg must have overlapping register classes");15781579bool Is64Bit = PPC::G8RCRegClass.hasSubClassEq(RC) ||1580PPC::G8RC_NOX0RegClass.hasSubClassEq(RC);1581assert((Is64Bit ||1582PPC::GPRCRegClass.hasSubClassEq(RC) ||1583PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) &&1584"isel is for regular integer GPRs only");15851586unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;1587auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());15881589unsigned SubIdx = 0;1590bool SwapOps = false;1591switch (SelectPred) {1592case PPC::PRED_EQ:1593case PPC::PRED_EQ_MINUS:1594case PPC::PRED_EQ_PLUS:1595SubIdx = PPC::sub_eq; SwapOps = false; break;1596case PPC::PRED_NE:1597case PPC::PRED_NE_MINUS:1598case PPC::PRED_NE_PLUS:1599SubIdx = PPC::sub_eq; SwapOps = true; break;1600case PPC::PRED_LT:1601case PPC::PRED_LT_MINUS:1602case PPC::PRED_LT_PLUS:1603SubIdx = PPC::sub_lt; SwapOps = false; break;1604case PPC::PRED_GE:1605case PPC::PRED_GE_MINUS:1606case PPC::PRED_GE_PLUS:1607SubIdx = PPC::sub_lt; SwapOps = true; break;1608case PPC::PRED_GT:1609case PPC::PRED_GT_MINUS:1610case PPC::PRED_GT_PLUS:1611SubIdx = PPC::sub_gt; SwapOps = false; break;1612case PPC::PRED_LE:1613case PPC::PRED_LE_MINUS:1614case PPC::PRED_LE_PLUS:1615SubIdx = PPC::sub_gt; SwapOps = true; break;1616case PPC::PRED_UN:1617case PPC::PRED_UN_MINUS:1618case PPC::PRED_UN_PLUS:1619SubIdx = PPC::sub_un; SwapOps = false; break;1620case PPC::PRED_NU:1621case PPC::PRED_NU_MINUS:1622case PPC::PRED_NU_PLUS:1623SubIdx = PPC::sub_un; SwapOps = true; break;1624case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;1625case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;1626}16271628Register FirstReg = SwapOps ? FalseReg : TrueReg,1629SecondReg = SwapOps ? TrueReg : FalseReg;16301631// The first input register of isel cannot be r0. If it is a member1632// of a register class that can be r0, then copy it first (the1633// register allocator should eliminate the copy).1634if (MRI.getRegClass(FirstReg)->contains(PPC::R0) ||1635MRI.getRegClass(FirstReg)->contains(PPC::X0)) {1636const TargetRegisterClass *FirstRC =1637MRI.getRegClass(FirstReg)->contains(PPC::X0) ?1638&PPC::G8RC_NOX0RegClass : &PPC::GPRC_NOR0RegClass;1639Register OldFirstReg = FirstReg;1640FirstReg = MRI.createVirtualRegister(FirstRC);1641BuildMI(MBB, MI, dl, get(TargetOpcode::COPY), FirstReg)1642.addReg(OldFirstReg);1643}16441645BuildMI(MBB, MI, dl, get(OpCode), DestReg)1646.addReg(FirstReg).addReg(SecondReg)1647.addReg(Cond[1].getReg(), 0, SubIdx);1648}16491650static unsigned getCRBitValue(unsigned CRBit) {1651unsigned Ret = 4;1652if (CRBit == PPC::CR0LT || CRBit == PPC::CR1LT ||1653CRBit == PPC::CR2LT || CRBit == PPC::CR3LT ||1654CRBit == PPC::CR4LT || CRBit == PPC::CR5LT ||1655CRBit == PPC::CR6LT || CRBit == PPC::CR7LT)1656Ret = 3;1657if (CRBit == PPC::CR0GT || CRBit == PPC::CR1GT ||1658CRBit == PPC::CR2GT || CRBit == PPC::CR3GT ||1659CRBit == PPC::CR4GT || CRBit == PPC::CR5GT ||1660CRBit == PPC::CR6GT || CRBit == PPC::CR7GT)1661Ret = 2;1662if (CRBit == PPC::CR0EQ || CRBit == PPC::CR1EQ ||1663CRBit == PPC::CR2EQ || CRBit == PPC::CR3EQ ||1664CRBit == PPC::CR4EQ || CRBit == PPC::CR5EQ ||1665CRBit == PPC::CR6EQ || CRBit == PPC::CR7EQ)1666Ret = 1;1667if (CRBit == PPC::CR0UN || CRBit == PPC::CR1UN ||1668CRBit == PPC::CR2UN || CRBit == PPC::CR3UN ||1669CRBit == PPC::CR4UN || CRBit == PPC::CR5UN ||1670CRBit == PPC::CR6UN || CRBit == PPC::CR7UN)1671Ret = 0;16721673assert(Ret != 4 && "Invalid CR bit register");1674return Ret;1675}16761677void PPCInstrInfo::copyPhysReg(MachineBasicBlock &MBB,1678MachineBasicBlock::iterator I,1679const DebugLoc &DL, MCRegister DestReg,1680MCRegister SrcReg, bool KillSrc) const {1681// We can end up with self copies and similar things as a result of VSX copy1682// legalization. Promote them here.1683const TargetRegisterInfo *TRI = &getRegisterInfo();1684if (PPC::F8RCRegClass.contains(DestReg) &&1685PPC::VSRCRegClass.contains(SrcReg)) {1686MCRegister SuperReg =1687TRI->getMatchingSuperReg(DestReg, PPC::sub_64, &PPC::VSRCRegClass);16881689if (VSXSelfCopyCrash && SrcReg == SuperReg)1690llvm_unreachable("nop VSX copy");16911692DestReg = SuperReg;1693} else if (PPC::F8RCRegClass.contains(SrcReg) &&1694PPC::VSRCRegClass.contains(DestReg)) {1695MCRegister SuperReg =1696TRI->getMatchingSuperReg(SrcReg, PPC::sub_64, &PPC::VSRCRegClass);16971698if (VSXSelfCopyCrash && DestReg == SuperReg)1699llvm_unreachable("nop VSX copy");17001701SrcReg = SuperReg;1702}17031704// Different class register copy1705if (PPC::CRBITRCRegClass.contains(SrcReg) &&1706PPC::GPRCRegClass.contains(DestReg)) {1707MCRegister CRReg = getCRFromCRBit(SrcReg);1708BuildMI(MBB, I, DL, get(PPC::MFOCRF), DestReg).addReg(CRReg);1709getKillRegState(KillSrc);1710// Rotate the CR bit in the CR fields to be the least significant bit and1711// then mask with 0x1 (MB = ME = 31).1712BuildMI(MBB, I, DL, get(PPC::RLWINM), DestReg)1713.addReg(DestReg, RegState::Kill)1714.addImm(TRI->getEncodingValue(CRReg) * 4 + (4 - getCRBitValue(SrcReg)))1715.addImm(31)1716.addImm(31);1717return;1718} else if (PPC::CRRCRegClass.contains(SrcReg) &&1719(PPC::G8RCRegClass.contains(DestReg) ||1720PPC::GPRCRegClass.contains(DestReg))) {1721bool Is64Bit = PPC::G8RCRegClass.contains(DestReg);1722unsigned MvCode = Is64Bit ? PPC::MFOCRF8 : PPC::MFOCRF;1723unsigned ShCode = Is64Bit ? PPC::RLWINM8 : PPC::RLWINM;1724unsigned CRNum = TRI->getEncodingValue(SrcReg);1725BuildMI(MBB, I, DL, get(MvCode), DestReg).addReg(SrcReg);1726getKillRegState(KillSrc);1727if (CRNum == 7)1728return;1729// Shift the CR bits to make the CR field in the lowest 4 bits of GRC.1730BuildMI(MBB, I, DL, get(ShCode), DestReg)1731.addReg(DestReg, RegState::Kill)1732.addImm(CRNum * 4 + 4)1733.addImm(28)1734.addImm(31);1735return;1736} else if (PPC::G8RCRegClass.contains(SrcReg) &&1737PPC::VSFRCRegClass.contains(DestReg)) {1738assert(Subtarget.hasDirectMove() &&1739"Subtarget doesn't support directmove, don't know how to copy.");1740BuildMI(MBB, I, DL, get(PPC::MTVSRD), DestReg).addReg(SrcReg);1741NumGPRtoVSRSpill++;1742getKillRegState(KillSrc);1743return;1744} else if (PPC::VSFRCRegClass.contains(SrcReg) &&1745PPC::G8RCRegClass.contains(DestReg)) {1746assert(Subtarget.hasDirectMove() &&1747"Subtarget doesn't support directmove, don't know how to copy.");1748BuildMI(MBB, I, DL, get(PPC::MFVSRD), DestReg).addReg(SrcReg);1749getKillRegState(KillSrc);1750return;1751} else if (PPC::SPERCRegClass.contains(SrcReg) &&1752PPC::GPRCRegClass.contains(DestReg)) {1753BuildMI(MBB, I, DL, get(PPC::EFSCFD), DestReg).addReg(SrcReg);1754getKillRegState(KillSrc);1755return;1756} else if (PPC::GPRCRegClass.contains(SrcReg) &&1757PPC::SPERCRegClass.contains(DestReg)) {1758BuildMI(MBB, I, DL, get(PPC::EFDCFS), DestReg).addReg(SrcReg);1759getKillRegState(KillSrc);1760return;1761}17621763unsigned Opc;1764if (PPC::GPRCRegClass.contains(DestReg, SrcReg))1765Opc = PPC::OR;1766else if (PPC::G8RCRegClass.contains(DestReg, SrcReg))1767Opc = PPC::OR8;1768else if (PPC::F4RCRegClass.contains(DestReg, SrcReg))1769Opc = PPC::FMR;1770else if (PPC::CRRCRegClass.contains(DestReg, SrcReg))1771Opc = PPC::MCRF;1772else if (PPC::VRRCRegClass.contains(DestReg, SrcReg))1773Opc = PPC::VOR;1774else if (PPC::VSRCRegClass.contains(DestReg, SrcReg))1775// There are two different ways this can be done:1776// 1. xxlor : This has lower latency (on the P7), 2 cycles, but can only1777// issue in VSU pipeline 0.1778// 2. xmovdp/xmovsp: This has higher latency (on the P7), 6 cycles, but1779// can go to either pipeline.1780// We'll always use xxlor here, because in practically all cases where1781// copies are generated, they are close enough to some use that the1782// lower-latency form is preferable.1783Opc = PPC::XXLOR;1784else if (PPC::VSFRCRegClass.contains(DestReg, SrcReg) ||1785PPC::VSSRCRegClass.contains(DestReg, SrcReg))1786Opc = (Subtarget.hasP9Vector()) ? PPC::XSCPSGNDP : PPC::XXLORf;1787else if (Subtarget.pairedVectorMemops() &&1788PPC::VSRpRCRegClass.contains(DestReg, SrcReg)) {1789if (SrcReg > PPC::VSRp15)1790SrcReg = PPC::V0 + (SrcReg - PPC::VSRp16) * 2;1791else1792SrcReg = PPC::VSL0 + (SrcReg - PPC::VSRp0) * 2;1793if (DestReg > PPC::VSRp15)1794DestReg = PPC::V0 + (DestReg - PPC::VSRp16) * 2;1795else1796DestReg = PPC::VSL0 + (DestReg - PPC::VSRp0) * 2;1797BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg).1798addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));1799BuildMI(MBB, I, DL, get(PPC::XXLOR), DestReg + 1).1800addReg(SrcReg + 1).addReg(SrcReg + 1, getKillRegState(KillSrc));1801return;1802}1803else if (PPC::CRBITRCRegClass.contains(DestReg, SrcReg))1804Opc = PPC::CROR;1805else if (PPC::SPERCRegClass.contains(DestReg, SrcReg))1806Opc = PPC::EVOR;1807else if ((PPC::ACCRCRegClass.contains(DestReg) ||1808PPC::UACCRCRegClass.contains(DestReg)) &&1809(PPC::ACCRCRegClass.contains(SrcReg) ||1810PPC::UACCRCRegClass.contains(SrcReg))) {1811// If primed, de-prime the source register, copy the individual registers1812// and prime the destination if needed. The vector subregisters are1813// vs[(u)acc * 4] - vs[(u)acc * 4 + 3]. If the copy is not a kill and the1814// source is primed, we need to re-prime it after the copy as well.1815PPCRegisterInfo::emitAccCopyInfo(MBB, DestReg, SrcReg);1816bool DestPrimed = PPC::ACCRCRegClass.contains(DestReg);1817bool SrcPrimed = PPC::ACCRCRegClass.contains(SrcReg);1818MCRegister VSLSrcReg =1819PPC::VSL0 + (SrcReg - (SrcPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;1820MCRegister VSLDestReg =1821PPC::VSL0 + (DestReg - (DestPrimed ? PPC::ACC0 : PPC::UACC0)) * 4;1822if (SrcPrimed)1823BuildMI(MBB, I, DL, get(PPC::XXMFACC), SrcReg).addReg(SrcReg);1824for (unsigned Idx = 0; Idx < 4; Idx++)1825BuildMI(MBB, I, DL, get(PPC::XXLOR), VSLDestReg + Idx)1826.addReg(VSLSrcReg + Idx)1827.addReg(VSLSrcReg + Idx, getKillRegState(KillSrc));1828if (DestPrimed)1829BuildMI(MBB, I, DL, get(PPC::XXMTACC), DestReg).addReg(DestReg);1830if (SrcPrimed && !KillSrc)1831BuildMI(MBB, I, DL, get(PPC::XXMTACC), SrcReg).addReg(SrcReg);1832return;1833} else if (PPC::G8pRCRegClass.contains(DestReg) &&1834PPC::G8pRCRegClass.contains(SrcReg)) {1835// TODO: Handle G8RC to G8pRC (and vice versa) copy.1836unsigned DestRegIdx = DestReg - PPC::G8p0;1837MCRegister DestRegSub0 = PPC::X0 + 2 * DestRegIdx;1838MCRegister DestRegSub1 = PPC::X0 + 2 * DestRegIdx + 1;1839unsigned SrcRegIdx = SrcReg - PPC::G8p0;1840MCRegister SrcRegSub0 = PPC::X0 + 2 * SrcRegIdx;1841MCRegister SrcRegSub1 = PPC::X0 + 2 * SrcRegIdx + 1;1842BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub0)1843.addReg(SrcRegSub0)1844.addReg(SrcRegSub0, getKillRegState(KillSrc));1845BuildMI(MBB, I, DL, get(PPC::OR8), DestRegSub1)1846.addReg(SrcRegSub1)1847.addReg(SrcRegSub1, getKillRegState(KillSrc));1848return;1849} else1850llvm_unreachable("Impossible reg-to-reg copy");18511852const MCInstrDesc &MCID = get(Opc);1853if (MCID.getNumOperands() == 3)1854BuildMI(MBB, I, DL, MCID, DestReg)1855.addReg(SrcReg).addReg(SrcReg, getKillRegState(KillSrc));1856else1857BuildMI(MBB, I, DL, MCID, DestReg).addReg(SrcReg, getKillRegState(KillSrc));1858}18591860unsigned PPCInstrInfo::getSpillIndex(const TargetRegisterClass *RC) const {1861int OpcodeIndex = 0;18621863if (PPC::GPRCRegClass.hasSubClassEq(RC) ||1864PPC::GPRC_NOR0RegClass.hasSubClassEq(RC)) {1865OpcodeIndex = SOK_Int4Spill;1866} else if (PPC::G8RCRegClass.hasSubClassEq(RC) ||1867PPC::G8RC_NOX0RegClass.hasSubClassEq(RC)) {1868OpcodeIndex = SOK_Int8Spill;1869} else if (PPC::F8RCRegClass.hasSubClassEq(RC)) {1870OpcodeIndex = SOK_Float8Spill;1871} else if (PPC::F4RCRegClass.hasSubClassEq(RC)) {1872OpcodeIndex = SOK_Float4Spill;1873} else if (PPC::SPERCRegClass.hasSubClassEq(RC)) {1874OpcodeIndex = SOK_SPESpill;1875} else if (PPC::CRRCRegClass.hasSubClassEq(RC)) {1876OpcodeIndex = SOK_CRSpill;1877} else if (PPC::CRBITRCRegClass.hasSubClassEq(RC)) {1878OpcodeIndex = SOK_CRBitSpill;1879} else if (PPC::VRRCRegClass.hasSubClassEq(RC)) {1880OpcodeIndex = SOK_VRVectorSpill;1881} else if (PPC::VSRCRegClass.hasSubClassEq(RC)) {1882OpcodeIndex = SOK_VSXVectorSpill;1883} else if (PPC::VSFRCRegClass.hasSubClassEq(RC)) {1884OpcodeIndex = SOK_VectorFloat8Spill;1885} else if (PPC::VSSRCRegClass.hasSubClassEq(RC)) {1886OpcodeIndex = SOK_VectorFloat4Spill;1887} else if (PPC::SPILLTOVSRRCRegClass.hasSubClassEq(RC)) {1888OpcodeIndex = SOK_SpillToVSR;1889} else if (PPC::ACCRCRegClass.hasSubClassEq(RC)) {1890assert(Subtarget.pairedVectorMemops() &&1891"Register unexpected when paired memops are disabled.");1892OpcodeIndex = SOK_AccumulatorSpill;1893} else if (PPC::UACCRCRegClass.hasSubClassEq(RC)) {1894assert(Subtarget.pairedVectorMemops() &&1895"Register unexpected when paired memops are disabled.");1896OpcodeIndex = SOK_UAccumulatorSpill;1897} else if (PPC::WACCRCRegClass.hasSubClassEq(RC)) {1898assert(Subtarget.pairedVectorMemops() &&1899"Register unexpected when paired memops are disabled.");1900OpcodeIndex = SOK_WAccumulatorSpill;1901} else if (PPC::VSRpRCRegClass.hasSubClassEq(RC)) {1902assert(Subtarget.pairedVectorMemops() &&1903"Register unexpected when paired memops are disabled.");1904OpcodeIndex = SOK_PairedVecSpill;1905} else if (PPC::G8pRCRegClass.hasSubClassEq(RC)) {1906OpcodeIndex = SOK_PairedG8Spill;1907} else {1908llvm_unreachable("Unknown regclass!");1909}1910return OpcodeIndex;1911}19121913unsigned1914PPCInstrInfo::getStoreOpcodeForSpill(const TargetRegisterClass *RC) const {1915ArrayRef<unsigned> OpcodesForSpill = getStoreOpcodesForSpillArray();1916return OpcodesForSpill[getSpillIndex(RC)];1917}19181919unsigned1920PPCInstrInfo::getLoadOpcodeForSpill(const TargetRegisterClass *RC) const {1921ArrayRef<unsigned> OpcodesForSpill = getLoadOpcodesForSpillArray();1922return OpcodesForSpill[getSpillIndex(RC)];1923}19241925void PPCInstrInfo::StoreRegToStackSlot(1926MachineFunction &MF, unsigned SrcReg, bool isKill, int FrameIdx,1927const TargetRegisterClass *RC,1928SmallVectorImpl<MachineInstr *> &NewMIs) const {1929unsigned Opcode = getStoreOpcodeForSpill(RC);1930DebugLoc DL;19311932PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();1933FuncInfo->setHasSpills();19341935NewMIs.push_back(addFrameReference(1936BuildMI(MF, DL, get(Opcode)).addReg(SrcReg, getKillRegState(isKill)),1937FrameIdx));19381939if (PPC::CRRCRegClass.hasSubClassEq(RC) ||1940PPC::CRBITRCRegClass.hasSubClassEq(RC))1941FuncInfo->setSpillsCR();19421943if (isXFormMemOp(Opcode))1944FuncInfo->setHasNonRISpills();1945}19461947void PPCInstrInfo::storeRegToStackSlotNoUpd(1948MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg,1949bool isKill, int FrameIdx, const TargetRegisterClass *RC,1950const TargetRegisterInfo *TRI) const {1951MachineFunction &MF = *MBB.getParent();1952SmallVector<MachineInstr *, 4> NewMIs;19531954StoreRegToStackSlot(MF, SrcReg, isKill, FrameIdx, RC, NewMIs);19551956for (MachineInstr *NewMI : NewMIs)1957MBB.insert(MI, NewMI);19581959const MachineFrameInfo &MFI = MF.getFrameInfo();1960MachineMemOperand *MMO = MF.getMachineMemOperand(1961MachinePointerInfo::getFixedStack(MF, FrameIdx),1962MachineMemOperand::MOStore, MFI.getObjectSize(FrameIdx),1963MFI.getObjectAlign(FrameIdx));1964NewMIs.back()->addMemOperand(MF, MMO);1965}19661967void PPCInstrInfo::storeRegToStackSlot(1968MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,1969bool isKill, int FrameIdx, const TargetRegisterClass *RC,1970const TargetRegisterInfo *TRI, Register VReg) const {1971// We need to avoid a situation in which the value from a VRRC register is1972// spilled using an Altivec instruction and reloaded into a VSRC register1973// using a VSX instruction. The issue with this is that the VSX1974// load/store instructions swap the doublewords in the vector and the Altivec1975// ones don't. The register classes on the spill/reload may be different if1976// the register is defined using an Altivec instruction and is then used by a1977// VSX instruction.1978RC = updatedRC(RC);1979storeRegToStackSlotNoUpd(MBB, MI, SrcReg, isKill, FrameIdx, RC, TRI);1980}19811982void PPCInstrInfo::LoadRegFromStackSlot(MachineFunction &MF, const DebugLoc &DL,1983unsigned DestReg, int FrameIdx,1984const TargetRegisterClass *RC,1985SmallVectorImpl<MachineInstr *> &NewMIs)1986const {1987unsigned Opcode = getLoadOpcodeForSpill(RC);1988NewMIs.push_back(addFrameReference(BuildMI(MF, DL, get(Opcode), DestReg),1989FrameIdx));1990}19911992void PPCInstrInfo::loadRegFromStackSlotNoUpd(1993MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg,1994int FrameIdx, const TargetRegisterClass *RC,1995const TargetRegisterInfo *TRI) const {1996MachineFunction &MF = *MBB.getParent();1997SmallVector<MachineInstr*, 4> NewMIs;1998DebugLoc DL;1999if (MI != MBB.end()) DL = MI->getDebugLoc();20002001LoadRegFromStackSlot(MF, DL, DestReg, FrameIdx, RC, NewMIs);20022003for (MachineInstr *NewMI : NewMIs)2004MBB.insert(MI, NewMI);20052006const MachineFrameInfo &MFI = MF.getFrameInfo();2007MachineMemOperand *MMO = MF.getMachineMemOperand(2008MachinePointerInfo::getFixedStack(MF, FrameIdx),2009MachineMemOperand::MOLoad, MFI.getObjectSize(FrameIdx),2010MFI.getObjectAlign(FrameIdx));2011NewMIs.back()->addMemOperand(MF, MMO);2012}20132014void PPCInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,2015MachineBasicBlock::iterator MI,2016Register DestReg, int FrameIdx,2017const TargetRegisterClass *RC,2018const TargetRegisterInfo *TRI,2019Register VReg) const {2020// We need to avoid a situation in which the value from a VRRC register is2021// spilled using an Altivec instruction and reloaded into a VSRC register2022// using a VSX instruction. The issue with this is that the VSX2023// load/store instructions swap the doublewords in the vector and the Altivec2024// ones don't. The register classes on the spill/reload may be different if2025// the register is defined using an Altivec instruction and is then used by a2026// VSX instruction.2027RC = updatedRC(RC);20282029loadRegFromStackSlotNoUpd(MBB, MI, DestReg, FrameIdx, RC, TRI);2030}20312032bool PPCInstrInfo::2033reverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {2034assert(Cond.size() == 2 && "Invalid PPC branch opcode!");2035if (Cond[1].getReg() == PPC::CTR8 || Cond[1].getReg() == PPC::CTR)2036Cond[0].setImm(Cond[0].getImm() == 0 ? 1 : 0);2037else2038// Leave the CR# the same, but invert the condition.2039Cond[0].setImm(PPC::InvertPredicate((PPC::Predicate)Cond[0].getImm()));2040return false;2041}20422043// For some instructions, it is legal to fold ZERO into the RA register field.2044// This function performs that fold by replacing the operand with PPC::ZERO,2045// it does not consider whether the load immediate zero is no longer in use.2046bool PPCInstrInfo::onlyFoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,2047Register Reg) const {2048// A zero immediate should always be loaded with a single li.2049unsigned DefOpc = DefMI.getOpcode();2050if (DefOpc != PPC::LI && DefOpc != PPC::LI8)2051return false;2052if (!DefMI.getOperand(1).isImm())2053return false;2054if (DefMI.getOperand(1).getImm() != 0)2055return false;20562057// Note that we cannot here invert the arguments of an isel in order to fold2058// a ZERO into what is presented as the second argument. All we have here2059// is the condition bit, and that might come from a CR-logical bit operation.20602061const MCInstrDesc &UseMCID = UseMI.getDesc();20622063// Only fold into real machine instructions.2064if (UseMCID.isPseudo())2065return false;20662067// We need to find which of the User's operands is to be folded, that will be2068// the operand that matches the given register ID.2069unsigned UseIdx;2070for (UseIdx = 0; UseIdx < UseMI.getNumOperands(); ++UseIdx)2071if (UseMI.getOperand(UseIdx).isReg() &&2072UseMI.getOperand(UseIdx).getReg() == Reg)2073break;20742075assert(UseIdx < UseMI.getNumOperands() && "Cannot find Reg in UseMI");2076assert(UseIdx < UseMCID.getNumOperands() && "No operand description for Reg");20772078const MCOperandInfo *UseInfo = &UseMCID.operands()[UseIdx];20792080// We can fold the zero if this register requires a GPRC_NOR0/G8RC_NOX02081// register (which might also be specified as a pointer class kind).2082if (UseInfo->isLookupPtrRegClass()) {2083if (UseInfo->RegClass /* Kind */ != 1)2084return false;2085} else {2086if (UseInfo->RegClass != PPC::GPRC_NOR0RegClassID &&2087UseInfo->RegClass != PPC::G8RC_NOX0RegClassID)2088return false;2089}20902091// Make sure this is not tied to an output register (or otherwise2092// constrained). This is true for ST?UX registers, for example, which2093// are tied to their output registers.2094if (UseInfo->Constraints != 0)2095return false;20962097MCRegister ZeroReg;2098if (UseInfo->isLookupPtrRegClass()) {2099bool isPPC64 = Subtarget.isPPC64();2100ZeroReg = isPPC64 ? PPC::ZERO8 : PPC::ZERO;2101} else {2102ZeroReg = UseInfo->RegClass == PPC::G8RC_NOX0RegClassID ?2103PPC::ZERO8 : PPC::ZERO;2104}21052106LLVM_DEBUG(dbgs() << "Folded immediate zero for: ");2107LLVM_DEBUG(UseMI.dump());2108UseMI.getOperand(UseIdx).setReg(ZeroReg);2109LLVM_DEBUG(dbgs() << "Into: ");2110LLVM_DEBUG(UseMI.dump());2111return true;2112}21132114// Folds zero into instructions which have a load immediate zero as an operand2115// but also recognize zero as immediate zero. If the definition of the load2116// has no more users it is deleted.2117bool PPCInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,2118Register Reg, MachineRegisterInfo *MRI) const {2119bool Changed = onlyFoldImmediate(UseMI, DefMI, Reg);2120if (MRI->use_nodbg_empty(Reg))2121DefMI.eraseFromParent();2122return Changed;2123}21242125static bool MBBDefinesCTR(MachineBasicBlock &MBB) {2126for (MachineInstr &MI : MBB)2127if (MI.definesRegister(PPC::CTR, /*TRI=*/nullptr) ||2128MI.definesRegister(PPC::CTR8, /*TRI=*/nullptr))2129return true;2130return false;2131}21322133// We should make sure that, if we're going to predicate both sides of a2134// condition (a diamond), that both sides don't define the counter register. We2135// can predicate counter-decrement-based branches, but while that predicates2136// the branching, it does not predicate the counter decrement. If we tried to2137// merge the triangle into one predicated block, we'd decrement the counter2138// twice.2139bool PPCInstrInfo::isProfitableToIfCvt(MachineBasicBlock &TMBB,2140unsigned NumT, unsigned ExtraT,2141MachineBasicBlock &FMBB,2142unsigned NumF, unsigned ExtraF,2143BranchProbability Probability) const {2144return !(MBBDefinesCTR(TMBB) && MBBDefinesCTR(FMBB));2145}214621472148bool PPCInstrInfo::isPredicated(const MachineInstr &MI) const {2149// The predicated branches are identified by their type, not really by the2150// explicit presence of a predicate. Furthermore, some of them can be2151// predicated more than once. Because if conversion won't try to predicate2152// any instruction which already claims to be predicated (by returning true2153// here), always return false. In doing so, we let isPredicable() be the2154// final word on whether not the instruction can be (further) predicated.21552156return false;2157}21582159bool PPCInstrInfo::isSchedulingBoundary(const MachineInstr &MI,2160const MachineBasicBlock *MBB,2161const MachineFunction &MF) const {2162switch (MI.getOpcode()) {2163default:2164break;2165// Set MFFS and MTFSF as scheduling boundary to avoid unexpected code motion2166// across them, since some FP operations may change content of FPSCR.2167// TODO: Model FPSCR in PPC instruction definitions and remove the workaround2168case PPC::MFFS:2169case PPC::MTFSF:2170case PPC::FENCE:2171return true;2172}2173return TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF);2174}21752176bool PPCInstrInfo::PredicateInstruction(MachineInstr &MI,2177ArrayRef<MachineOperand> Pred) const {2178unsigned OpC = MI.getOpcode();2179if (OpC == PPC::BLR || OpC == PPC::BLR8) {2180if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {2181bool isPPC64 = Subtarget.isPPC64();2182MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZLR8 : PPC::BDNZLR)2183: (isPPC64 ? PPC::BDZLR8 : PPC::BDZLR)));2184// Need add Def and Use for CTR implicit operand.2185MachineInstrBuilder(*MI.getParent()->getParent(), MI)2186.addReg(Pred[1].getReg(), RegState::Implicit)2187.addReg(Pred[1].getReg(), RegState::ImplicitDefine);2188} else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {2189MI.setDesc(get(PPC::BCLR));2190MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);2191} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {2192MI.setDesc(get(PPC::BCLRn));2193MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);2194} else {2195MI.setDesc(get(PPC::BCCLR));2196MachineInstrBuilder(*MI.getParent()->getParent(), MI)2197.addImm(Pred[0].getImm())2198.add(Pred[1]);2199}22002201return true;2202} else if (OpC == PPC::B) {2203if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR) {2204bool isPPC64 = Subtarget.isPPC64();2205MI.setDesc(get(Pred[0].getImm() ? (isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)2206: (isPPC64 ? PPC::BDZ8 : PPC::BDZ)));2207// Need add Def and Use for CTR implicit operand.2208MachineInstrBuilder(*MI.getParent()->getParent(), MI)2209.addReg(Pred[1].getReg(), RegState::Implicit)2210.addReg(Pred[1].getReg(), RegState::ImplicitDefine);2211} else if (Pred[0].getImm() == PPC::PRED_BIT_SET) {2212MachineBasicBlock *MBB = MI.getOperand(0).getMBB();2213MI.removeOperand(0);22142215MI.setDesc(get(PPC::BC));2216MachineInstrBuilder(*MI.getParent()->getParent(), MI)2217.add(Pred[1])2218.addMBB(MBB);2219} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {2220MachineBasicBlock *MBB = MI.getOperand(0).getMBB();2221MI.removeOperand(0);22222223MI.setDesc(get(PPC::BCn));2224MachineInstrBuilder(*MI.getParent()->getParent(), MI)2225.add(Pred[1])2226.addMBB(MBB);2227} else {2228MachineBasicBlock *MBB = MI.getOperand(0).getMBB();2229MI.removeOperand(0);22302231MI.setDesc(get(PPC::BCC));2232MachineInstrBuilder(*MI.getParent()->getParent(), MI)2233.addImm(Pred[0].getImm())2234.add(Pred[1])2235.addMBB(MBB);2236}22372238return true;2239} else if (OpC == PPC::BCTR || OpC == PPC::BCTR8 || OpC == PPC::BCTRL ||2240OpC == PPC::BCTRL8 || OpC == PPC::BCTRL_RM ||2241OpC == PPC::BCTRL8_RM) {2242if (Pred[1].getReg() == PPC::CTR8 || Pred[1].getReg() == PPC::CTR)2243llvm_unreachable("Cannot predicate bctr[l] on the ctr register");22442245bool setLR = OpC == PPC::BCTRL || OpC == PPC::BCTRL8 ||2246OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM;2247bool isPPC64 = Subtarget.isPPC64();22482249if (Pred[0].getImm() == PPC::PRED_BIT_SET) {2250MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8 : PPC::BCCTR8)2251: (setLR ? PPC::BCCTRL : PPC::BCCTR)));2252MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);2253} else if (Pred[0].getImm() == PPC::PRED_BIT_UNSET) {2254MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCTRL8n : PPC::BCCTR8n)2255: (setLR ? PPC::BCCTRLn : PPC::BCCTRn)));2256MachineInstrBuilder(*MI.getParent()->getParent(), MI).add(Pred[1]);2257} else {2258MI.setDesc(get(isPPC64 ? (setLR ? PPC::BCCCTRL8 : PPC::BCCCTR8)2259: (setLR ? PPC::BCCCTRL : PPC::BCCCTR)));2260MachineInstrBuilder(*MI.getParent()->getParent(), MI)2261.addImm(Pred[0].getImm())2262.add(Pred[1]);2263}22642265// Need add Def and Use for LR implicit operand.2266if (setLR)2267MachineInstrBuilder(*MI.getParent()->getParent(), MI)2268.addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::Implicit)2269.addReg(isPPC64 ? PPC::LR8 : PPC::LR, RegState::ImplicitDefine);2270if (OpC == PPC::BCTRL_RM || OpC == PPC::BCTRL8_RM)2271MachineInstrBuilder(*MI.getParent()->getParent(), MI)2272.addReg(PPC::RM, RegState::ImplicitDefine);22732274return true;2275}22762277return false;2278}22792280bool PPCInstrInfo::SubsumesPredicate(ArrayRef<MachineOperand> Pred1,2281ArrayRef<MachineOperand> Pred2) const {2282assert(Pred1.size() == 2 && "Invalid PPC first predicate");2283assert(Pred2.size() == 2 && "Invalid PPC second predicate");22842285if (Pred1[1].getReg() == PPC::CTR8 || Pred1[1].getReg() == PPC::CTR)2286return false;2287if (Pred2[1].getReg() == PPC::CTR8 || Pred2[1].getReg() == PPC::CTR)2288return false;22892290// P1 can only subsume P2 if they test the same condition register.2291if (Pred1[1].getReg() != Pred2[1].getReg())2292return false;22932294PPC::Predicate P1 = (PPC::Predicate) Pred1[0].getImm();2295PPC::Predicate P2 = (PPC::Predicate) Pred2[0].getImm();22962297if (P1 == P2)2298return true;22992300// Does P1 subsume P2, e.g. GE subsumes GT.2301if (P1 == PPC::PRED_LE &&2302(P2 == PPC::PRED_LT || P2 == PPC::PRED_EQ))2303return true;2304if (P1 == PPC::PRED_GE &&2305(P2 == PPC::PRED_GT || P2 == PPC::PRED_EQ))2306return true;23072308return false;2309}23102311bool PPCInstrInfo::ClobbersPredicate(MachineInstr &MI,2312std::vector<MachineOperand> &Pred,2313bool SkipDead) const {2314// Note: At the present time, the contents of Pred from this function is2315// unused by IfConversion. This implementation follows ARM by pushing the2316// CR-defining operand. Because the 'DZ' and 'DNZ' count as types of2317// predicate, instructions defining CTR or CTR8 are also included as2318// predicate-defining instructions.23192320const TargetRegisterClass *RCs[] =2321{ &PPC::CRRCRegClass, &PPC::CRBITRCRegClass,2322&PPC::CTRRCRegClass, &PPC::CTRRC8RegClass };23232324bool Found = false;2325for (const MachineOperand &MO : MI.operands()) {2326for (unsigned c = 0; c < std::size(RCs) && !Found; ++c) {2327const TargetRegisterClass *RC = RCs[c];2328if (MO.isReg()) {2329if (MO.isDef() && RC->contains(MO.getReg())) {2330Pred.push_back(MO);2331Found = true;2332}2333} else if (MO.isRegMask()) {2334for (MCPhysReg R : *RC)2335if (MO.clobbersPhysReg(R)) {2336Pred.push_back(MO);2337Found = true;2338}2339}2340}2341}23422343return Found;2344}23452346bool PPCInstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,2347Register &SrcReg2, int64_t &Mask,2348int64_t &Value) const {2349unsigned Opc = MI.getOpcode();23502351switch (Opc) {2352default: return false;2353case PPC::CMPWI:2354case PPC::CMPLWI:2355case PPC::CMPDI:2356case PPC::CMPLDI:2357SrcReg = MI.getOperand(1).getReg();2358SrcReg2 = 0;2359Value = MI.getOperand(2).getImm();2360Mask = 0xFFFF;2361return true;2362case PPC::CMPW:2363case PPC::CMPLW:2364case PPC::CMPD:2365case PPC::CMPLD:2366case PPC::FCMPUS:2367case PPC::FCMPUD:2368SrcReg = MI.getOperand(1).getReg();2369SrcReg2 = MI.getOperand(2).getReg();2370Value = 0;2371Mask = 0;2372return true;2373}2374}23752376bool PPCInstrInfo::optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg,2377Register SrcReg2, int64_t Mask,2378int64_t Value,2379const MachineRegisterInfo *MRI) const {2380if (DisableCmpOpt)2381return false;23822383int OpC = CmpInstr.getOpcode();2384Register CRReg = CmpInstr.getOperand(0).getReg();23852386// FP record forms set CR1 based on the exception status bits, not a2387// comparison with zero.2388if (OpC == PPC::FCMPUS || OpC == PPC::FCMPUD)2389return false;23902391const TargetRegisterInfo *TRI = &getRegisterInfo();2392// The record forms set the condition register based on a signed comparison2393// with zero (so says the ISA manual). This is not as straightforward as it2394// seems, however, because this is always a 64-bit comparison on PPC64, even2395// for instructions that are 32-bit in nature (like slw for example).2396// So, on PPC32, for unsigned comparisons, we can use the record forms only2397// for equality checks (as those don't depend on the sign). On PPC64,2398// we are restricted to equality for unsigned 64-bit comparisons and for2399// signed 32-bit comparisons the applicability is more restricted.2400bool isPPC64 = Subtarget.isPPC64();2401bool is32BitSignedCompare = OpC == PPC::CMPWI || OpC == PPC::CMPW;2402bool is32BitUnsignedCompare = OpC == PPC::CMPLWI || OpC == PPC::CMPLW;2403bool is64BitUnsignedCompare = OpC == PPC::CMPLDI || OpC == PPC::CMPLD;24042405// Look through copies unless that gets us to a physical register.2406Register ActualSrc = TRI->lookThruCopyLike(SrcReg, MRI);2407if (ActualSrc.isVirtual())2408SrcReg = ActualSrc;24092410// Get the unique definition of SrcReg.2411MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);2412if (!MI) return false;24132414bool equalityOnly = false;2415bool noSub = false;2416if (isPPC64) {2417if (is32BitSignedCompare) {2418// We can perform this optimization only if SrcReg is sign-extending.2419if (isSignExtended(SrcReg, MRI))2420noSub = true;2421else2422return false;2423} else if (is32BitUnsignedCompare) {2424// We can perform this optimization, equality only, if SrcReg is2425// zero-extending.2426if (isZeroExtended(SrcReg, MRI)) {2427noSub = true;2428equalityOnly = true;2429} else2430return false;2431} else2432equalityOnly = is64BitUnsignedCompare;2433} else2434equalityOnly = is32BitUnsignedCompare;24352436if (equalityOnly) {2437// We need to check the uses of the condition register in order to reject2438// non-equality comparisons.2439for (MachineRegisterInfo::use_instr_iterator2440I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();2441I != IE; ++I) {2442MachineInstr *UseMI = &*I;2443if (UseMI->getOpcode() == PPC::BCC) {2444PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();2445unsigned PredCond = PPC::getPredicateCondition(Pred);2446// We ignore hint bits when checking for non-equality comparisons.2447if (PredCond != PPC::PRED_EQ && PredCond != PPC::PRED_NE)2448return false;2449} else if (UseMI->getOpcode() == PPC::ISEL ||2450UseMI->getOpcode() == PPC::ISEL8) {2451unsigned SubIdx = UseMI->getOperand(3).getSubReg();2452if (SubIdx != PPC::sub_eq)2453return false;2454} else2455return false;2456}2457}24582459MachineBasicBlock::iterator I = CmpInstr;24602461// Scan forward to find the first use of the compare.2462for (MachineBasicBlock::iterator EL = CmpInstr.getParent()->end(); I != EL;2463++I) {2464bool FoundUse = false;2465for (MachineRegisterInfo::use_instr_iterator2466J = MRI->use_instr_begin(CRReg), JE = MRI->use_instr_end();2467J != JE; ++J)2468if (&*J == &*I) {2469FoundUse = true;2470break;2471}24722473if (FoundUse)2474break;2475}24762477SmallVector<std::pair<MachineOperand*, PPC::Predicate>, 4> PredsToUpdate;2478SmallVector<std::pair<MachineOperand*, unsigned>, 4> SubRegsToUpdate;24792480// There are two possible candidates which can be changed to set CR[01].2481// One is MI, the other is a SUB instruction.2482// For CMPrr(r1,r2), we are looking for SUB(r1,r2) or SUB(r2,r1).2483MachineInstr *Sub = nullptr;2484if (SrcReg2 != 0)2485// MI is not a candidate for CMPrr.2486MI = nullptr;2487// FIXME: Conservatively refuse to convert an instruction which isn't in the2488// same BB as the comparison. This is to allow the check below to avoid calls2489// (and other explicit clobbers); instead we should really check for these2490// more explicitly (in at least a few predecessors).2491else if (MI->getParent() != CmpInstr.getParent())2492return false;2493else if (Value != 0) {2494// The record-form instructions set CR bit based on signed comparison2495// against 0. We try to convert a compare against 1 or -1 into a compare2496// against 0 to exploit record-form instructions. For example, we change2497// the condition "greater than -1" into "greater than or equal to 0"2498// and "less than 1" into "less than or equal to 0".24992500// Since we optimize comparison based on a specific branch condition,2501// we don't optimize if condition code is used by more than once.2502if (equalityOnly || !MRI->hasOneUse(CRReg))2503return false;25042505MachineInstr *UseMI = &*MRI->use_instr_begin(CRReg);2506if (UseMI->getOpcode() != PPC::BCC)2507return false;25082509PPC::Predicate Pred = (PPC::Predicate)UseMI->getOperand(0).getImm();2510unsigned PredCond = PPC::getPredicateCondition(Pred);2511unsigned PredHint = PPC::getPredicateHint(Pred);2512int16_t Immed = (int16_t)Value;25132514// When modifying the condition in the predicate, we propagate hint bits2515// from the original predicate to the new one.2516if (Immed == -1 && PredCond == PPC::PRED_GT)2517// We convert "greater than -1" into "greater than or equal to 0",2518// since we are assuming signed comparison by !equalityOnly2519Pred = PPC::getPredicate(PPC::PRED_GE, PredHint);2520else if (Immed == -1 && PredCond == PPC::PRED_LE)2521// We convert "less than or equal to -1" into "less than 0".2522Pred = PPC::getPredicate(PPC::PRED_LT, PredHint);2523else if (Immed == 1 && PredCond == PPC::PRED_LT)2524// We convert "less than 1" into "less than or equal to 0".2525Pred = PPC::getPredicate(PPC::PRED_LE, PredHint);2526else if (Immed == 1 && PredCond == PPC::PRED_GE)2527// We convert "greater than or equal to 1" into "greater than 0".2528Pred = PPC::getPredicate(PPC::PRED_GT, PredHint);2529else2530return false;25312532// Convert the comparison and its user to a compare against zero with the2533// appropriate predicate on the branch. Zero comparison might provide2534// optimization opportunities post-RA (see optimization in2535// PPCPreEmitPeephole.cpp).2536UseMI->getOperand(0).setImm(Pred);2537CmpInstr.getOperand(2).setImm(0);2538}25392540// Search for Sub.2541--I;25422543// Get ready to iterate backward from CmpInstr.2544MachineBasicBlock::iterator E = MI, B = CmpInstr.getParent()->begin();25452546for (; I != E && !noSub; --I) {2547const MachineInstr &Instr = *I;2548unsigned IOpC = Instr.getOpcode();25492550if (&*I != &CmpInstr && (Instr.modifiesRegister(PPC::CR0, TRI) ||2551Instr.readsRegister(PPC::CR0, TRI)))2552// This instruction modifies or uses the record condition register after2553// the one we want to change. While we could do this transformation, it2554// would likely not be profitable. This transformation removes one2555// instruction, and so even forcing RA to generate one move probably2556// makes it unprofitable.2557return false;25582559// Check whether CmpInstr can be made redundant by the current instruction.2560if ((OpC == PPC::CMPW || OpC == PPC::CMPLW ||2561OpC == PPC::CMPD || OpC == PPC::CMPLD) &&2562(IOpC == PPC::SUBF || IOpC == PPC::SUBF8) &&2563((Instr.getOperand(1).getReg() == SrcReg &&2564Instr.getOperand(2).getReg() == SrcReg2) ||2565(Instr.getOperand(1).getReg() == SrcReg2 &&2566Instr.getOperand(2).getReg() == SrcReg))) {2567Sub = &*I;2568break;2569}25702571if (I == B)2572// The 'and' is below the comparison instruction.2573return false;2574}25752576// Return false if no candidates exist.2577if (!MI && !Sub)2578return false;25792580// The single candidate is called MI.2581if (!MI) MI = Sub;25822583int NewOpC = -1;2584int MIOpC = MI->getOpcode();2585if (MIOpC == PPC::ANDI_rec || MIOpC == PPC::ANDI8_rec ||2586MIOpC == PPC::ANDIS_rec || MIOpC == PPC::ANDIS8_rec)2587NewOpC = MIOpC;2588else {2589NewOpC = PPC::getRecordFormOpcode(MIOpC);2590if (NewOpC == -1 && PPC::getNonRecordFormOpcode(MIOpC) != -1)2591NewOpC = MIOpC;2592}25932594// FIXME: On the non-embedded POWER architectures, only some of the record2595// forms are fast, and we should use only the fast ones.25962597// The defining instruction has a record form (or is already a record2598// form). It is possible, however, that we'll need to reverse the condition2599// code of the users.2600if (NewOpC == -1)2601return false;26022603// This transformation should not be performed if `nsw` is missing and is not2604// `equalityOnly` comparison. Since if there is overflow, sub_lt, sub_gt in2605// CRReg do not reflect correct order. If `equalityOnly` is true, sub_eq in2606// CRReg can reflect if compared values are equal, this optz is still valid.2607if (!equalityOnly && (NewOpC == PPC::SUBF_rec || NewOpC == PPC::SUBF8_rec) &&2608Sub && !Sub->getFlag(MachineInstr::NoSWrap))2609return false;26102611// If we have SUB(r1, r2) and CMP(r2, r1), the condition code based on CMP2612// needs to be updated to be based on SUB. Push the condition code2613// operands to OperandsToUpdate. If it is safe to remove CmpInstr, the2614// condition code of these operands will be modified.2615// Here, Value == 0 means we haven't converted comparison against 1 or -1 to2616// comparison against 0, which may modify predicate.2617bool ShouldSwap = false;2618if (Sub && Value == 0) {2619ShouldSwap = SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 &&2620Sub->getOperand(2).getReg() == SrcReg;26212622// The operands to subf are the opposite of sub, so only in the fixed-point2623// case, invert the order.2624ShouldSwap = !ShouldSwap;2625}26262627if (ShouldSwap)2628for (MachineRegisterInfo::use_instr_iterator2629I = MRI->use_instr_begin(CRReg), IE = MRI->use_instr_end();2630I != IE; ++I) {2631MachineInstr *UseMI = &*I;2632if (UseMI->getOpcode() == PPC::BCC) {2633PPC::Predicate Pred = (PPC::Predicate) UseMI->getOperand(0).getImm();2634unsigned PredCond = PPC::getPredicateCondition(Pred);2635assert((!equalityOnly ||2636PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE) &&2637"Invalid predicate for equality-only optimization");2638(void)PredCond; // To suppress warning in release build.2639PredsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(0)),2640PPC::getSwappedPredicate(Pred)));2641} else if (UseMI->getOpcode() == PPC::ISEL ||2642UseMI->getOpcode() == PPC::ISEL8) {2643unsigned NewSubReg = UseMI->getOperand(3).getSubReg();2644assert((!equalityOnly || NewSubReg == PPC::sub_eq) &&2645"Invalid CR bit for equality-only optimization");26462647if (NewSubReg == PPC::sub_lt)2648NewSubReg = PPC::sub_gt;2649else if (NewSubReg == PPC::sub_gt)2650NewSubReg = PPC::sub_lt;26512652SubRegsToUpdate.push_back(std::make_pair(&(UseMI->getOperand(3)),2653NewSubReg));2654} else // We need to abort on a user we don't understand.2655return false;2656}2657assert(!(Value != 0 && ShouldSwap) &&2658"Non-zero immediate support and ShouldSwap"2659"may conflict in updating predicate");26602661// Create a new virtual register to hold the value of the CR set by the2662// record-form instruction. If the instruction was not previously in2663// record form, then set the kill flag on the CR.2664CmpInstr.eraseFromParent();26652666MachineBasicBlock::iterator MII = MI;2667BuildMI(*MI->getParent(), std::next(MII), MI->getDebugLoc(),2668get(TargetOpcode::COPY), CRReg)2669.addReg(PPC::CR0, MIOpC != NewOpC ? RegState::Kill : 0);26702671// Even if CR0 register were dead before, it is alive now since the2672// instruction we just built uses it.2673MI->clearRegisterDeads(PPC::CR0);26742675if (MIOpC != NewOpC) {2676// We need to be careful here: we're replacing one instruction with2677// another, and we need to make sure that we get all of the right2678// implicit uses and defs. On the other hand, the caller may be holding2679// an iterator to this instruction, and so we can't delete it (this is2680// specifically the case if this is the instruction directly after the2681// compare).26822683// Rotates are expensive instructions. If we're emitting a record-form2684// rotate that can just be an andi/andis, we should just emit that.2685if (MIOpC == PPC::RLWINM || MIOpC == PPC::RLWINM8) {2686Register GPRRes = MI->getOperand(0).getReg();2687int64_t SH = MI->getOperand(2).getImm();2688int64_t MB = MI->getOperand(3).getImm();2689int64_t ME = MI->getOperand(4).getImm();2690// We can only do this if both the start and end of the mask are in the2691// same halfword.2692bool MBInLoHWord = MB >= 16;2693bool MEInLoHWord = ME >= 16;2694uint64_t Mask = ~0LLU;26952696if (MB <= ME && MBInLoHWord == MEInLoHWord && SH == 0) {2697Mask = ((1LLU << (32 - MB)) - 1) & ~((1LLU << (31 - ME)) - 1);2698// The mask value needs to shift right 16 if we're emitting andis.2699Mask >>= MBInLoHWord ? 0 : 16;2700NewOpC = MIOpC == PPC::RLWINM2701? (MBInLoHWord ? PPC::ANDI_rec : PPC::ANDIS_rec)2702: (MBInLoHWord ? PPC::ANDI8_rec : PPC::ANDIS8_rec);2703} else if (MRI->use_empty(GPRRes) && (ME == 31) &&2704(ME - MB + 1 == SH) && (MB >= 16)) {2705// If we are rotating by the exact number of bits as are in the mask2706// and the mask is in the least significant bits of the register,2707// that's just an andis. (as long as the GPR result has no uses).2708Mask = ((1LLU << 32) - 1) & ~((1LLU << (32 - SH)) - 1);2709Mask >>= 16;2710NewOpC = MIOpC == PPC::RLWINM ? PPC::ANDIS_rec : PPC::ANDIS8_rec;2711}2712// If we've set the mask, we can transform.2713if (Mask != ~0LLU) {2714MI->removeOperand(4);2715MI->removeOperand(3);2716MI->getOperand(2).setImm(Mask);2717NumRcRotatesConvertedToRcAnd++;2718}2719} else if (MIOpC == PPC::RLDICL && MI->getOperand(2).getImm() == 0) {2720int64_t MB = MI->getOperand(3).getImm();2721if (MB >= 48) {2722uint64_t Mask = (1LLU << (63 - MB + 1)) - 1;2723NewOpC = PPC::ANDI8_rec;2724MI->removeOperand(3);2725MI->getOperand(2).setImm(Mask);2726NumRcRotatesConvertedToRcAnd++;2727}2728}27292730const MCInstrDesc &NewDesc = get(NewOpC);2731MI->setDesc(NewDesc);27322733for (MCPhysReg ImpDef : NewDesc.implicit_defs()) {2734if (!MI->definesRegister(ImpDef, /*TRI=*/nullptr)) {2735MI->addOperand(*MI->getParent()->getParent(),2736MachineOperand::CreateReg(ImpDef, true, true));2737}2738}2739for (MCPhysReg ImpUse : NewDesc.implicit_uses()) {2740if (!MI->readsRegister(ImpUse, /*TRI=*/nullptr)) {2741MI->addOperand(*MI->getParent()->getParent(),2742MachineOperand::CreateReg(ImpUse, false, true));2743}2744}2745}2746assert(MI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&2747"Record-form instruction does not define cr0?");27482749// Modify the condition code of operands in OperandsToUpdate.2750// Since we have SUB(r1, r2) and CMP(r2, r1), the condition code needs to2751// be changed from r2 > r1 to r1 < r2, from r2 < r1 to r1 > r2, etc.2752for (unsigned i = 0, e = PredsToUpdate.size(); i < e; i++)2753PredsToUpdate[i].first->setImm(PredsToUpdate[i].second);27542755for (unsigned i = 0, e = SubRegsToUpdate.size(); i < e; i++)2756SubRegsToUpdate[i].first->setSubReg(SubRegsToUpdate[i].second);27572758return true;2759}27602761bool PPCInstrInfo::optimizeCmpPostRA(MachineInstr &CmpMI) const {2762MachineRegisterInfo *MRI = &CmpMI.getParent()->getParent()->getRegInfo();2763if (MRI->isSSA())2764return false;27652766Register SrcReg, SrcReg2;2767int64_t CmpMask, CmpValue;2768if (!analyzeCompare(CmpMI, SrcReg, SrcReg2, CmpMask, CmpValue))2769return false;27702771// Try to optimize the comparison against 0.2772if (CmpValue || !CmpMask || SrcReg2)2773return false;27742775// The record forms set the condition register based on a signed comparison2776// with zero (see comments in optimizeCompareInstr). Since we can't do the2777// equality checks in post-RA, we are more restricted on a unsigned2778// comparison.2779unsigned Opc = CmpMI.getOpcode();2780if (Opc == PPC::CMPLWI || Opc == PPC::CMPLDI)2781return false;27822783// The record forms are always based on a 64-bit comparison on PPC642784// (similary, a 32-bit comparison on PPC32), while the CMPWI is a 32-bit2785// comparison. Since we can't do the equality checks in post-RA, we bail out2786// the case.2787if (Subtarget.isPPC64() && Opc == PPC::CMPWI)2788return false;27892790// CmpMI can't be deleted if it has implicit def.2791if (CmpMI.hasImplicitDef())2792return false;27932794bool SrcRegHasOtherUse = false;2795MachineInstr *SrcMI = getDefMIPostRA(SrcReg, CmpMI, SrcRegHasOtherUse);2796if (!SrcMI || !SrcMI->definesRegister(SrcReg, /*TRI=*/nullptr))2797return false;27982799MachineOperand RegMO = CmpMI.getOperand(0);2800Register CRReg = RegMO.getReg();2801if (CRReg != PPC::CR0)2802return false;28032804// Make sure there is no def/use of CRReg between SrcMI and CmpMI.2805bool SeenUseOfCRReg = false;2806bool IsCRRegKilled = false;2807if (!isRegElgibleForForwarding(RegMO, *SrcMI, CmpMI, false, IsCRRegKilled,2808SeenUseOfCRReg) ||2809SrcMI->definesRegister(CRReg, /*TRI=*/nullptr) || SeenUseOfCRReg)2810return false;28112812int SrcMIOpc = SrcMI->getOpcode();2813int NewOpC = PPC::getRecordFormOpcode(SrcMIOpc);2814if (NewOpC == -1)2815return false;28162817LLVM_DEBUG(dbgs() << "Replace Instr: ");2818LLVM_DEBUG(SrcMI->dump());28192820const MCInstrDesc &NewDesc = get(NewOpC);2821SrcMI->setDesc(NewDesc);2822MachineInstrBuilder(*SrcMI->getParent()->getParent(), SrcMI)2823.addReg(CRReg, RegState::ImplicitDefine);2824SrcMI->clearRegisterDeads(CRReg);28252826assert(SrcMI->definesRegister(PPC::CR0, /*TRI=*/nullptr) &&2827"Record-form instruction does not define cr0?");28282829LLVM_DEBUG(dbgs() << "with: ");2830LLVM_DEBUG(SrcMI->dump());2831LLVM_DEBUG(dbgs() << "Delete dead instruction: ");2832LLVM_DEBUG(CmpMI.dump());2833return true;2834}28352836bool PPCInstrInfo::getMemOperandsWithOffsetWidth(2837const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,2838int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,2839const TargetRegisterInfo *TRI) const {2840const MachineOperand *BaseOp;2841OffsetIsScalable = false;2842if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, Width, TRI))2843return false;2844BaseOps.push_back(BaseOp);2845return true;2846}28472848static bool isLdStSafeToCluster(const MachineInstr &LdSt,2849const TargetRegisterInfo *TRI) {2850// If this is a volatile load/store, don't mess with it.2851if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3)2852return false;28532854if (LdSt.getOperand(2).isFI())2855return true;28562857assert(LdSt.getOperand(2).isReg() && "Expected a reg operand.");2858// Can't cluster if the instruction modifies the base register2859// or it is update form. e.g. ld r2,3(r2)2860if (LdSt.modifiesRegister(LdSt.getOperand(2).getReg(), TRI))2861return false;28622863return true;2864}28652866// Only cluster instruction pair that have the same opcode, and they are2867// clusterable according to PowerPC specification.2868static bool isClusterableLdStOpcPair(unsigned FirstOpc, unsigned SecondOpc,2869const PPCSubtarget &Subtarget) {2870switch (FirstOpc) {2871default:2872return false;2873case PPC::STD:2874case PPC::STFD:2875case PPC::STXSD:2876case PPC::DFSTOREf64:2877return FirstOpc == SecondOpc;2878// PowerPC backend has opcode STW/STW8 for instruction "stw" to deal with2879// 32bit and 64bit instruction selection. They are clusterable pair though2880// they are different opcode.2881case PPC::STW:2882case PPC::STW8:2883return SecondOpc == PPC::STW || SecondOpc == PPC::STW8;2884}2885}28862887bool PPCInstrInfo::shouldClusterMemOps(2888ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,2889bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,2890int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,2891unsigned NumBytes) const {28922893assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);2894const MachineOperand &BaseOp1 = *BaseOps1.front();2895const MachineOperand &BaseOp2 = *BaseOps2.front();2896assert((BaseOp1.isReg() || BaseOp1.isFI()) &&2897"Only base registers and frame indices are supported.");28982899// ClusterSize means the number of memory operations that will have been2900// clustered if this hook returns true.2901// Don't cluster memory op if there are already two ops clustered at least.2902if (ClusterSize > 2)2903return false;29042905// Cluster the load/store only when they have the same base2906// register or FI.2907if ((BaseOp1.isReg() != BaseOp2.isReg()) ||2908(BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg()) ||2909(BaseOp1.isFI() && BaseOp1.getIndex() != BaseOp2.getIndex()))2910return false;29112912// Check if the load/store are clusterable according to the PowerPC2913// specification.2914const MachineInstr &FirstLdSt = *BaseOp1.getParent();2915const MachineInstr &SecondLdSt = *BaseOp2.getParent();2916unsigned FirstOpc = FirstLdSt.getOpcode();2917unsigned SecondOpc = SecondLdSt.getOpcode();2918const TargetRegisterInfo *TRI = &getRegisterInfo();2919// Cluster the load/store only when they have the same opcode, and they are2920// clusterable opcode according to PowerPC specification.2921if (!isClusterableLdStOpcPair(FirstOpc, SecondOpc, Subtarget))2922return false;29232924// Can't cluster load/store that have ordered or volatile memory reference.2925if (!isLdStSafeToCluster(FirstLdSt, TRI) ||2926!isLdStSafeToCluster(SecondLdSt, TRI))2927return false;29282929int64_t Offset1 = 0, Offset2 = 0;2930LocationSize Width1 = 0, Width2 = 0;2931const MachineOperand *Base1 = nullptr, *Base2 = nullptr;2932if (!getMemOperandWithOffsetWidth(FirstLdSt, Base1, Offset1, Width1, TRI) ||2933!getMemOperandWithOffsetWidth(SecondLdSt, Base2, Offset2, Width2, TRI) ||2934Width1 != Width2)2935return false;29362937assert(Base1 == &BaseOp1 && Base2 == &BaseOp2 &&2938"getMemOperandWithOffsetWidth return incorrect base op");2939// The caller should already have ordered FirstMemOp/SecondMemOp by offset.2940assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");2941return Offset1 + (int64_t)Width1.getValue() == Offset2;2942}29432944/// GetInstSize - Return the number of bytes of code the specified2945/// instruction may be. This returns the maximum number of bytes.2946///2947unsigned PPCInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {2948unsigned Opcode = MI.getOpcode();29492950if (Opcode == PPC::INLINEASM || Opcode == PPC::INLINEASM_BR) {2951const MachineFunction *MF = MI.getParent()->getParent();2952const char *AsmStr = MI.getOperand(0).getSymbolName();2953return getInlineAsmLength(AsmStr, *MF->getTarget().getMCAsmInfo());2954} else if (Opcode == TargetOpcode::STACKMAP) {2955StackMapOpers Opers(&MI);2956return Opers.getNumPatchBytes();2957} else if (Opcode == TargetOpcode::PATCHPOINT) {2958PatchPointOpers Opers(&MI);2959return Opers.getNumPatchBytes();2960} else {2961return get(Opcode).getSize();2962}2963}29642965std::pair<unsigned, unsigned>2966PPCInstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {2967// PPC always uses a direct mask.2968return std::make_pair(TF, 0u);2969}29702971ArrayRef<std::pair<unsigned, const char *>>2972PPCInstrInfo::getSerializableDirectMachineOperandTargetFlags() const {2973using namespace PPCII;2974static const std::pair<unsigned, const char *> TargetFlags[] = {2975{MO_PLT, "ppc-plt"},2976{MO_PIC_FLAG, "ppc-pic"},2977{MO_PCREL_FLAG, "ppc-pcrel"},2978{MO_GOT_FLAG, "ppc-got"},2979{MO_PCREL_OPT_FLAG, "ppc-opt-pcrel"},2980{MO_TLSGD_FLAG, "ppc-tlsgd"},2981{MO_TPREL_FLAG, "ppc-tprel"},2982{MO_TLSLDM_FLAG, "ppc-tlsldm"},2983{MO_TLSLD_FLAG, "ppc-tlsld"},2984{MO_TLSGDM_FLAG, "ppc-tlsgdm"},2985{MO_GOT_TLSGD_PCREL_FLAG, "ppc-got-tlsgd-pcrel"},2986{MO_GOT_TLSLD_PCREL_FLAG, "ppc-got-tlsld-pcrel"},2987{MO_GOT_TPREL_PCREL_FLAG, "ppc-got-tprel-pcrel"},2988{MO_LO, "ppc-lo"},2989{MO_HA, "ppc-ha"},2990{MO_TPREL_LO, "ppc-tprel-lo"},2991{MO_TPREL_HA, "ppc-tprel-ha"},2992{MO_DTPREL_LO, "ppc-dtprel-lo"},2993{MO_TLSLD_LO, "ppc-tlsld-lo"},2994{MO_TOC_LO, "ppc-toc-lo"},2995{MO_TLS, "ppc-tls"},2996{MO_PIC_HA_FLAG, "ppc-ha-pic"},2997{MO_PIC_LO_FLAG, "ppc-lo-pic"},2998{MO_TPREL_PCREL_FLAG, "ppc-tprel-pcrel"},2999{MO_TLS_PCREL_FLAG, "ppc-tls-pcrel"},3000{MO_GOT_PCREL_FLAG, "ppc-got-pcrel"},3001};3002return ArrayRef(TargetFlags);3003}30043005// Expand VSX Memory Pseudo instruction to either a VSX or a FP instruction.3006// The VSX versions have the advantage of a full 64-register target whereas3007// the FP ones have the advantage of lower latency and higher throughput. So3008// what we are after is using the faster instructions in low register pressure3009// situations and using the larger register file in high register pressure3010// situations.3011bool PPCInstrInfo::expandVSXMemPseudo(MachineInstr &MI) const {3012unsigned UpperOpcode, LowerOpcode;3013switch (MI.getOpcode()) {3014case PPC::DFLOADf32:3015UpperOpcode = PPC::LXSSP;3016LowerOpcode = PPC::LFS;3017break;3018case PPC::DFLOADf64:3019UpperOpcode = PPC::LXSD;3020LowerOpcode = PPC::LFD;3021break;3022case PPC::DFSTOREf32:3023UpperOpcode = PPC::STXSSP;3024LowerOpcode = PPC::STFS;3025break;3026case PPC::DFSTOREf64:3027UpperOpcode = PPC::STXSD;3028LowerOpcode = PPC::STFD;3029break;3030case PPC::XFLOADf32:3031UpperOpcode = PPC::LXSSPX;3032LowerOpcode = PPC::LFSX;3033break;3034case PPC::XFLOADf64:3035UpperOpcode = PPC::LXSDX;3036LowerOpcode = PPC::LFDX;3037break;3038case PPC::XFSTOREf32:3039UpperOpcode = PPC::STXSSPX;3040LowerOpcode = PPC::STFSX;3041break;3042case PPC::XFSTOREf64:3043UpperOpcode = PPC::STXSDX;3044LowerOpcode = PPC::STFDX;3045break;3046case PPC::LIWAX:3047UpperOpcode = PPC::LXSIWAX;3048LowerOpcode = PPC::LFIWAX;3049break;3050case PPC::LIWZX:3051UpperOpcode = PPC::LXSIWZX;3052LowerOpcode = PPC::LFIWZX;3053break;3054case PPC::STIWX:3055UpperOpcode = PPC::STXSIWX;3056LowerOpcode = PPC::STFIWX;3057break;3058default:3059llvm_unreachable("Unknown Operation!");3060}30613062Register TargetReg = MI.getOperand(0).getReg();3063unsigned Opcode;3064if ((TargetReg >= PPC::F0 && TargetReg <= PPC::F31) ||3065(TargetReg >= PPC::VSL0 && TargetReg <= PPC::VSL31))3066Opcode = LowerOpcode;3067else3068Opcode = UpperOpcode;3069MI.setDesc(get(Opcode));3070return true;3071}30723073static bool isAnImmediateOperand(const MachineOperand &MO) {3074return MO.isCPI() || MO.isGlobal() || MO.isImm();3075}30763077bool PPCInstrInfo::expandPostRAPseudo(MachineInstr &MI) const {3078auto &MBB = *MI.getParent();3079auto DL = MI.getDebugLoc();30803081switch (MI.getOpcode()) {3082case PPC::BUILD_UACC: {3083MCRegister ACC = MI.getOperand(0).getReg();3084MCRegister UACC = MI.getOperand(1).getReg();3085if (ACC - PPC::ACC0 != UACC - PPC::UACC0) {3086MCRegister SrcVSR = PPC::VSL0 + (UACC - PPC::UACC0) * 4;3087MCRegister DstVSR = PPC::VSL0 + (ACC - PPC::ACC0) * 4;3088// FIXME: This can easily be improved to look up to the top of the MBB3089// to see if the inputs are XXLOR's. If they are and SrcReg is killed,3090// we can just re-target any such XXLOR's to DstVSR + offset.3091for (int VecNo = 0; VecNo < 4; VecNo++)3092BuildMI(MBB, MI, DL, get(PPC::XXLOR), DstVSR + VecNo)3093.addReg(SrcVSR + VecNo)3094.addReg(SrcVSR + VecNo);3095}3096// BUILD_UACC is expanded to 4 copies of the underlying vsx registers.3097// So after building the 4 copies, we can replace the BUILD_UACC instruction3098// with a NOP.3099[[fallthrough]];3100}3101case PPC::KILL_PAIR: {3102MI.setDesc(get(PPC::UNENCODED_NOP));3103MI.removeOperand(1);3104MI.removeOperand(0);3105return true;3106}3107case TargetOpcode::LOAD_STACK_GUARD: {3108assert(Subtarget.isTargetLinux() &&3109"Only Linux target is expected to contain LOAD_STACK_GUARD");3110const int64_t Offset = Subtarget.isPPC64() ? -0x7010 : -0x7008;3111const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;3112MI.setDesc(get(Subtarget.isPPC64() ? PPC::LD : PPC::LWZ));3113MachineInstrBuilder(*MI.getParent()->getParent(), MI)3114.addImm(Offset)3115.addReg(Reg);3116return true;3117}3118case PPC::PPCLdFixedAddr: {3119assert(Subtarget.getTargetTriple().isOSGlibc() &&3120"Only targets with Glibc expected to contain PPCLdFixedAddr");3121int64_t Offset = 0;3122const unsigned Reg = Subtarget.isPPC64() ? PPC::X13 : PPC::R2;3123MI.setDesc(get(PPC::LWZ));3124uint64_t FAType = MI.getOperand(1).getImm();3125#undef PPC_LNX_FEATURE3126#undef PPC_CPU3127#define PPC_LNX_DEFINE_OFFSETS3128#include "llvm/TargetParser/PPCTargetParser.def"3129bool IsLE = Subtarget.isLittleEndian();3130bool Is64 = Subtarget.isPPC64();3131if (FAType == PPC_FAWORD_HWCAP) {3132if (IsLE)3133Offset = Is64 ? PPC_HWCAP_OFFSET_LE64 : PPC_HWCAP_OFFSET_LE32;3134else3135Offset = Is64 ? PPC_HWCAP_OFFSET_BE64 : PPC_HWCAP_OFFSET_BE32;3136} else if (FAType == PPC_FAWORD_HWCAP2) {3137if (IsLE)3138Offset = Is64 ? PPC_HWCAP2_OFFSET_LE64 : PPC_HWCAP2_OFFSET_LE32;3139else3140Offset = Is64 ? PPC_HWCAP2_OFFSET_BE64 : PPC_HWCAP2_OFFSET_BE32;3141} else if (FAType == PPC_FAWORD_CPUID) {3142if (IsLE)3143Offset = Is64 ? PPC_CPUID_OFFSET_LE64 : PPC_CPUID_OFFSET_LE32;3144else3145Offset = Is64 ? PPC_CPUID_OFFSET_BE64 : PPC_CPUID_OFFSET_BE32;3146}3147assert(Offset && "Do not know the offset for this fixed addr load");3148MI.removeOperand(1);3149Subtarget.getTargetMachine().setGlibcHWCAPAccess();3150MachineInstrBuilder(*MI.getParent()->getParent(), MI)3151.addImm(Offset)3152.addReg(Reg);3153return true;3154#define PPC_TGT_PARSER_UNDEF_MACROS3155#include "llvm/TargetParser/PPCTargetParser.def"3156#undef PPC_TGT_PARSER_UNDEF_MACROS3157}3158case PPC::DFLOADf32:3159case PPC::DFLOADf64:3160case PPC::DFSTOREf32:3161case PPC::DFSTOREf64: {3162assert(Subtarget.hasP9Vector() &&3163"Invalid D-Form Pseudo-ops on Pre-P9 target.");3164assert(MI.getOperand(2).isReg() &&3165isAnImmediateOperand(MI.getOperand(1)) &&3166"D-form op must have register and immediate operands");3167return expandVSXMemPseudo(MI);3168}3169case PPC::XFLOADf32:3170case PPC::XFSTOREf32:3171case PPC::LIWAX:3172case PPC::LIWZX:3173case PPC::STIWX: {3174assert(Subtarget.hasP8Vector() &&3175"Invalid X-Form Pseudo-ops on Pre-P8 target.");3176assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&3177"X-form op must have register and register operands");3178return expandVSXMemPseudo(MI);3179}3180case PPC::XFLOADf64:3181case PPC::XFSTOREf64: {3182assert(Subtarget.hasVSX() &&3183"Invalid X-Form Pseudo-ops on target that has no VSX.");3184assert(MI.getOperand(2).isReg() && MI.getOperand(1).isReg() &&3185"X-form op must have register and register operands");3186return expandVSXMemPseudo(MI);3187}3188case PPC::SPILLTOVSR_LD: {3189Register TargetReg = MI.getOperand(0).getReg();3190if (PPC::VSFRCRegClass.contains(TargetReg)) {3191MI.setDesc(get(PPC::DFLOADf64));3192return expandPostRAPseudo(MI);3193}3194else3195MI.setDesc(get(PPC::LD));3196return true;3197}3198case PPC::SPILLTOVSR_ST: {3199Register SrcReg = MI.getOperand(0).getReg();3200if (PPC::VSFRCRegClass.contains(SrcReg)) {3201NumStoreSPILLVSRRCAsVec++;3202MI.setDesc(get(PPC::DFSTOREf64));3203return expandPostRAPseudo(MI);3204} else {3205NumStoreSPILLVSRRCAsGpr++;3206MI.setDesc(get(PPC::STD));3207}3208return true;3209}3210case PPC::SPILLTOVSR_LDX: {3211Register TargetReg = MI.getOperand(0).getReg();3212if (PPC::VSFRCRegClass.contains(TargetReg))3213MI.setDesc(get(PPC::LXSDX));3214else3215MI.setDesc(get(PPC::LDX));3216return true;3217}3218case PPC::SPILLTOVSR_STX: {3219Register SrcReg = MI.getOperand(0).getReg();3220if (PPC::VSFRCRegClass.contains(SrcReg)) {3221NumStoreSPILLVSRRCAsVec++;3222MI.setDesc(get(PPC::STXSDX));3223} else {3224NumStoreSPILLVSRRCAsGpr++;3225MI.setDesc(get(PPC::STDX));3226}3227return true;3228}32293230// FIXME: Maybe we can expand it in 'PowerPC Expand Atomic' pass.3231case PPC::CFENCE:3232case PPC::CFENCE8: {3233auto Val = MI.getOperand(0).getReg();3234unsigned CmpOp = Subtarget.isPPC64() ? PPC::CMPD : PPC::CMPW;3235BuildMI(MBB, MI, DL, get(CmpOp), PPC::CR7).addReg(Val).addReg(Val);3236BuildMI(MBB, MI, DL, get(PPC::CTRL_DEP))3237.addImm(PPC::PRED_NE_MINUS)3238.addReg(PPC::CR7)3239.addImm(1);3240MI.setDesc(get(PPC::ISYNC));3241MI.removeOperand(0);3242return true;3243}3244}3245return false;3246}32473248// Essentially a compile-time implementation of a compare->isel sequence.3249// It takes two constants to compare, along with the true/false registers3250// and the comparison type (as a subreg to a CR field) and returns one3251// of the true/false registers, depending on the comparison results.3252static unsigned selectReg(int64_t Imm1, int64_t Imm2, unsigned CompareOpc,3253unsigned TrueReg, unsigned FalseReg,3254unsigned CRSubReg) {3255// Signed comparisons. The immediates are assumed to be sign-extended.3256if (CompareOpc == PPC::CMPWI || CompareOpc == PPC::CMPDI) {3257switch (CRSubReg) {3258default: llvm_unreachable("Unknown integer comparison type.");3259case PPC::sub_lt:3260return Imm1 < Imm2 ? TrueReg : FalseReg;3261case PPC::sub_gt:3262return Imm1 > Imm2 ? TrueReg : FalseReg;3263case PPC::sub_eq:3264return Imm1 == Imm2 ? TrueReg : FalseReg;3265}3266}3267// Unsigned comparisons.3268else if (CompareOpc == PPC::CMPLWI || CompareOpc == PPC::CMPLDI) {3269switch (CRSubReg) {3270default: llvm_unreachable("Unknown integer comparison type.");3271case PPC::sub_lt:3272return (uint64_t)Imm1 < (uint64_t)Imm2 ? TrueReg : FalseReg;3273case PPC::sub_gt:3274return (uint64_t)Imm1 > (uint64_t)Imm2 ? TrueReg : FalseReg;3275case PPC::sub_eq:3276return Imm1 == Imm2 ? TrueReg : FalseReg;3277}3278}3279return PPC::NoRegister;3280}32813282void PPCInstrInfo::replaceInstrOperandWithImm(MachineInstr &MI,3283unsigned OpNo,3284int64_t Imm) const {3285assert(MI.getOperand(OpNo).isReg() && "Operand must be a REG");3286// Replace the REG with the Immediate.3287Register InUseReg = MI.getOperand(OpNo).getReg();3288MI.getOperand(OpNo).ChangeToImmediate(Imm);32893290// We need to make sure that the MI didn't have any implicit use3291// of this REG any more. We don't call MI.implicit_operands().empty() to3292// return early, since MI's MCID might be changed in calling context, as a3293// result its number of explicit operands may be changed, thus the begin of3294// implicit operand is changed.3295const TargetRegisterInfo *TRI = &getRegisterInfo();3296int UseOpIdx = MI.findRegisterUseOperandIdx(InUseReg, TRI, false);3297if (UseOpIdx >= 0) {3298MachineOperand &MO = MI.getOperand(UseOpIdx);3299if (MO.isImplicit())3300// The operands must always be in the following order:3301// - explicit reg defs,3302// - other explicit operands (reg uses, immediates, etc.),3303// - implicit reg defs3304// - implicit reg uses3305// Therefore, removing the implicit operand won't change the explicit3306// operands layout.3307MI.removeOperand(UseOpIdx);3308}3309}33103311// Replace an instruction with one that materializes a constant (and sets3312// CR0 if the original instruction was a record-form instruction).3313void PPCInstrInfo::replaceInstrWithLI(MachineInstr &MI,3314const LoadImmediateInfo &LII) const {3315// Remove existing operands.3316int OperandToKeep = LII.SetCR ? 1 : 0;3317for (int i = MI.getNumOperands() - 1; i > OperandToKeep; i--)3318MI.removeOperand(i);33193320// Replace the instruction.3321if (LII.SetCR) {3322MI.setDesc(get(LII.Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));3323// Set the immediate.3324MachineInstrBuilder(*MI.getParent()->getParent(), MI)3325.addImm(LII.Imm).addReg(PPC::CR0, RegState::ImplicitDefine);3326return;3327}3328else3329MI.setDesc(get(LII.Is64Bit ? PPC::LI8 : PPC::LI));33303331// Set the immediate.3332MachineInstrBuilder(*MI.getParent()->getParent(), MI)3333.addImm(LII.Imm);3334}33353336MachineInstr *PPCInstrInfo::getDefMIPostRA(unsigned Reg, MachineInstr &MI,3337bool &SeenIntermediateUse) const {3338assert(!MI.getParent()->getParent()->getRegInfo().isSSA() &&3339"Should be called after register allocation.");3340const TargetRegisterInfo *TRI = &getRegisterInfo();3341MachineBasicBlock::reverse_iterator E = MI.getParent()->rend(), It = MI;3342It++;3343SeenIntermediateUse = false;3344for (; It != E; ++It) {3345if (It->modifiesRegister(Reg, TRI))3346return &*It;3347if (It->readsRegister(Reg, TRI))3348SeenIntermediateUse = true;3349}3350return nullptr;3351}33523353void PPCInstrInfo::materializeImmPostRA(MachineBasicBlock &MBB,3354MachineBasicBlock::iterator MBBI,3355const DebugLoc &DL, Register Reg,3356int64_t Imm) const {3357assert(!MBB.getParent()->getRegInfo().isSSA() &&3358"Register should be in non-SSA form after RA");3359bool isPPC64 = Subtarget.isPPC64();3360// FIXME: Materialization here is not optimal.3361// For some special bit patterns we can use less instructions.3362// See `selectI64ImmDirect` in PPCISelDAGToDAG.cpp.3363if (isInt<16>(Imm)) {3364BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LI8 : PPC::LI), Reg).addImm(Imm);3365} else if (isInt<32>(Imm)) {3366BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::LIS8 : PPC::LIS), Reg)3367.addImm(Imm >> 16);3368if (Imm & 0xFFFF)3369BuildMI(MBB, MBBI, DL, get(isPPC64 ? PPC::ORI8 : PPC::ORI), Reg)3370.addReg(Reg, RegState::Kill)3371.addImm(Imm & 0xFFFF);3372} else {3373assert(isPPC64 && "Materializing 64-bit immediate to single register is "3374"only supported in PPC64");3375BuildMI(MBB, MBBI, DL, get(PPC::LIS8), Reg).addImm(Imm >> 48);3376if ((Imm >> 32) & 0xFFFF)3377BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)3378.addReg(Reg, RegState::Kill)3379.addImm((Imm >> 32) & 0xFFFF);3380BuildMI(MBB, MBBI, DL, get(PPC::RLDICR), Reg)3381.addReg(Reg, RegState::Kill)3382.addImm(32)3383.addImm(31);3384BuildMI(MBB, MBBI, DL, get(PPC::ORIS8), Reg)3385.addReg(Reg, RegState::Kill)3386.addImm((Imm >> 16) & 0xFFFF);3387if (Imm & 0xFFFF)3388BuildMI(MBB, MBBI, DL, get(PPC::ORI8), Reg)3389.addReg(Reg, RegState::Kill)3390.addImm(Imm & 0xFFFF);3391}3392}33933394MachineInstr *PPCInstrInfo::getForwardingDefMI(3395MachineInstr &MI,3396unsigned &OpNoForForwarding,3397bool &SeenIntermediateUse) const {3398OpNoForForwarding = ~0U;3399MachineInstr *DefMI = nullptr;3400MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();3401const TargetRegisterInfo *TRI = &getRegisterInfo();3402// If we're in SSA, get the defs through the MRI. Otherwise, only look3403// within the basic block to see if the register is defined using an3404// LI/LI8/ADDI/ADDI8.3405if (MRI->isSSA()) {3406for (int i = 1, e = MI.getNumOperands(); i < e; i++) {3407if (!MI.getOperand(i).isReg())3408continue;3409Register Reg = MI.getOperand(i).getReg();3410if (!Reg.isVirtual())3411continue;3412Register TrueReg = TRI->lookThruCopyLike(Reg, MRI);3413if (TrueReg.isVirtual()) {3414MachineInstr *DefMIForTrueReg = MRI->getVRegDef(TrueReg);3415if (DefMIForTrueReg->getOpcode() == PPC::LI ||3416DefMIForTrueReg->getOpcode() == PPC::LI8 ||3417DefMIForTrueReg->getOpcode() == PPC::ADDI ||3418DefMIForTrueReg->getOpcode() == PPC::ADDI8) {3419OpNoForForwarding = i;3420DefMI = DefMIForTrueReg;3421// The ADDI and LI operand maybe exist in one instruction at same3422// time. we prefer to fold LI operand as LI only has one Imm operand3423// and is more possible to be converted. So if current DefMI is3424// ADDI/ADDI8, we continue to find possible LI/LI8.3425if (DefMI->getOpcode() == PPC::LI || DefMI->getOpcode() == PPC::LI8)3426break;3427}3428}3429}3430} else {3431// Looking back through the definition for each operand could be expensive,3432// so exit early if this isn't an instruction that either has an immediate3433// form or is already an immediate form that we can handle.3434ImmInstrInfo III;3435unsigned Opc = MI.getOpcode();3436bool ConvertibleImmForm =3437Opc == PPC::CMPWI || Opc == PPC::CMPLWI || Opc == PPC::CMPDI ||3438Opc == PPC::CMPLDI || Opc == PPC::ADDI || Opc == PPC::ADDI8 ||3439Opc == PPC::ORI || Opc == PPC::ORI8 || Opc == PPC::XORI ||3440Opc == PPC::XORI8 || Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec ||3441Opc == PPC::RLDICL_32 || Opc == PPC::RLDICL_32_64 ||3442Opc == PPC::RLWINM || Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8 ||3443Opc == PPC::RLWINM8_rec;3444bool IsVFReg = (MI.getNumOperands() && MI.getOperand(0).isReg())3445? PPC::isVFRegister(MI.getOperand(0).getReg())3446: false;3447if (!ConvertibleImmForm && !instrHasImmForm(Opc, IsVFReg, III, true))3448return nullptr;34493450// Don't convert or %X, %Y, %Y since that's just a register move.3451if ((Opc == PPC::OR || Opc == PPC::OR8) &&3452MI.getOperand(1).getReg() == MI.getOperand(2).getReg())3453return nullptr;3454for (int i = 1, e = MI.getNumOperands(); i < e; i++) {3455MachineOperand &MO = MI.getOperand(i);3456SeenIntermediateUse = false;3457if (MO.isReg() && MO.isUse() && !MO.isImplicit()) {3458Register Reg = MI.getOperand(i).getReg();3459// If we see another use of this reg between the def and the MI,3460// we want to flag it so the def isn't deleted.3461MachineInstr *DefMI = getDefMIPostRA(Reg, MI, SeenIntermediateUse);3462if (DefMI) {3463// Is this register defined by some form of add-immediate (including3464// load-immediate) within this basic block?3465switch (DefMI->getOpcode()) {3466default:3467break;3468case PPC::LI:3469case PPC::LI8:3470case PPC::ADDItocL8:3471case PPC::ADDI:3472case PPC::ADDI8:3473OpNoForForwarding = i;3474return DefMI;3475}3476}3477}3478}3479}3480return OpNoForForwarding == ~0U ? nullptr : DefMI;3481}34823483unsigned PPCInstrInfo::getSpillTarget() const {3484// With P10, we may need to spill paired vector registers or accumulator3485// registers. MMA implies paired vectors, so we can just check that.3486bool IsP10Variant = Subtarget.isISA3_1() || Subtarget.pairedVectorMemops();3487// P11 uses the P10 target.3488return Subtarget.isISAFuture() ? 3 : IsP10Variant ?34892 : Subtarget.hasP9Vector() ?34901 : 0;3491}34923493ArrayRef<unsigned> PPCInstrInfo::getStoreOpcodesForSpillArray() const {3494return {StoreSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};3495}34963497ArrayRef<unsigned> PPCInstrInfo::getLoadOpcodesForSpillArray() const {3498return {LoadSpillOpcodesArray[getSpillTarget()], SOK_LastOpcodeSpill};3499}35003501// This opt tries to convert the following imm form to an index form to save an3502// add for stack variables.3503// Return false if no such pattern found.3504//3505// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi3506// ADD instr: ToBeDeletedReg = ADD ToBeChangedReg(killed), ScaleReg3507// Imm instr: Reg = op OffsetImm, ToBeDeletedReg(killed)3508//3509// can be converted to:3510//3511// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, (OffsetAddi + OffsetImm)3512// Index instr: Reg = opx ScaleReg, ToBeChangedReg(killed)3513//3514// In order to eliminate ADD instr, make sure that:3515// 1: (OffsetAddi + OffsetImm) must be int16 since this offset will be used in3516// new ADDI instr and ADDI can only take int16 Imm.3517// 2: ToBeChangedReg must be killed in ADD instr and there is no other use3518// between ADDI and ADD instr since its original def in ADDI will be changed3519// in new ADDI instr. And also there should be no new def for it between3520// ADD and Imm instr as ToBeChangedReg will be used in Index instr.3521// 3: ToBeDeletedReg must be killed in Imm instr and there is no other use3522// between ADD and Imm instr since ADD instr will be eliminated.3523// 4: ScaleReg must not be redefined between ADD and Imm instr since it will be3524// moved to Index instr.3525bool PPCInstrInfo::foldFrameOffset(MachineInstr &MI) const {3526MachineFunction *MF = MI.getParent()->getParent();3527MachineRegisterInfo *MRI = &MF->getRegInfo();3528bool PostRA = !MRI->isSSA();3529// Do this opt after PEI which is after RA. The reason is stack slot expansion3530// in PEI may expose such opportunities since in PEI, stack slot offsets to3531// frame base(OffsetAddi) are determined.3532if (!PostRA)3533return false;3534unsigned ToBeDeletedReg = 0;3535int64_t OffsetImm = 0;3536unsigned XFormOpcode = 0;3537ImmInstrInfo III;35383539// Check if Imm instr meets requirement.3540if (!isImmInstrEligibleForFolding(MI, ToBeDeletedReg, XFormOpcode, OffsetImm,3541III))3542return false;35433544bool OtherIntermediateUse = false;3545MachineInstr *ADDMI = getDefMIPostRA(ToBeDeletedReg, MI, OtherIntermediateUse);35463547// Exit if there is other use between ADD and Imm instr or no def found.3548if (OtherIntermediateUse || !ADDMI)3549return false;35503551// Check if ADD instr meets requirement.3552if (!isADDInstrEligibleForFolding(*ADDMI))3553return false;35543555unsigned ScaleRegIdx = 0;3556int64_t OffsetAddi = 0;3557MachineInstr *ADDIMI = nullptr;35583559// Check if there is a valid ToBeChangedReg in ADDMI.3560// 1: It must be killed.3561// 2: Its definition must be a valid ADDIMI.3562// 3: It must satify int16 offset requirement.3563if (isValidToBeChangedReg(ADDMI, 1, ADDIMI, OffsetAddi, OffsetImm))3564ScaleRegIdx = 2;3565else if (isValidToBeChangedReg(ADDMI, 2, ADDIMI, OffsetAddi, OffsetImm))3566ScaleRegIdx = 1;3567else3568return false;35693570assert(ADDIMI && "There should be ADDIMI for valid ToBeChangedReg.");3571Register ToBeChangedReg = ADDIMI->getOperand(0).getReg();3572Register ScaleReg = ADDMI->getOperand(ScaleRegIdx).getReg();3573auto NewDefFor = [&](unsigned Reg, MachineBasicBlock::iterator Start,3574MachineBasicBlock::iterator End) {3575for (auto It = ++Start; It != End; It++)3576if (It->modifiesRegister(Reg, &getRegisterInfo()))3577return true;3578return false;3579};35803581// We are trying to replace the ImmOpNo with ScaleReg. Give up if it is3582// treated as special zero when ScaleReg is R0/X0 register.3583if (III.ZeroIsSpecialOrig == III.ImmOpNo &&3584(ScaleReg == PPC::R0 || ScaleReg == PPC::X0))3585return false;35863587// Make sure no other def for ToBeChangedReg and ScaleReg between ADD Instr3588// and Imm Instr.3589if (NewDefFor(ToBeChangedReg, *ADDMI, MI) || NewDefFor(ScaleReg, *ADDMI, MI))3590return false;35913592// Now start to do the transformation.3593LLVM_DEBUG(dbgs() << "Replace instruction: "3594<< "\n");3595LLVM_DEBUG(ADDIMI->dump());3596LLVM_DEBUG(ADDMI->dump());3597LLVM_DEBUG(MI.dump());3598LLVM_DEBUG(dbgs() << "with: "3599<< "\n");36003601// Update ADDI instr.3602ADDIMI->getOperand(2).setImm(OffsetAddi + OffsetImm);36033604// Update Imm instr.3605MI.setDesc(get(XFormOpcode));3606MI.getOperand(III.ImmOpNo)3607.ChangeToRegister(ScaleReg, false, false,3608ADDMI->getOperand(ScaleRegIdx).isKill());36093610MI.getOperand(III.OpNoForForwarding)3611.ChangeToRegister(ToBeChangedReg, false, false, true);36123613// Eliminate ADD instr.3614ADDMI->eraseFromParent();36153616LLVM_DEBUG(ADDIMI->dump());3617LLVM_DEBUG(MI.dump());36183619return true;3620}36213622bool PPCInstrInfo::isADDIInstrEligibleForFolding(MachineInstr &ADDIMI,3623int64_t &Imm) const {3624unsigned Opc = ADDIMI.getOpcode();36253626// Exit if the instruction is not ADDI.3627if (Opc != PPC::ADDI && Opc != PPC::ADDI8)3628return false;36293630// The operand may not necessarily be an immediate - it could be a relocation.3631if (!ADDIMI.getOperand(2).isImm())3632return false;36333634Imm = ADDIMI.getOperand(2).getImm();36353636return true;3637}36383639bool PPCInstrInfo::isADDInstrEligibleForFolding(MachineInstr &ADDMI) const {3640unsigned Opc = ADDMI.getOpcode();36413642// Exit if the instruction is not ADD.3643return Opc == PPC::ADD4 || Opc == PPC::ADD8;3644}36453646bool PPCInstrInfo::isImmInstrEligibleForFolding(MachineInstr &MI,3647unsigned &ToBeDeletedReg,3648unsigned &XFormOpcode,3649int64_t &OffsetImm,3650ImmInstrInfo &III) const {3651// Only handle load/store.3652if (!MI.mayLoadOrStore())3653return false;36543655unsigned Opc = MI.getOpcode();36563657XFormOpcode = RI.getMappedIdxOpcForImmOpc(Opc);36583659// Exit if instruction has no index form.3660if (XFormOpcode == PPC::INSTRUCTION_LIST_END)3661return false;36623663// TODO: sync the logic between instrHasImmForm() and ImmToIdxMap.3664if (!instrHasImmForm(XFormOpcode,3665PPC::isVFRegister(MI.getOperand(0).getReg()), III, true))3666return false;36673668if (!III.IsSummingOperands)3669return false;36703671MachineOperand ImmOperand = MI.getOperand(III.ImmOpNo);3672MachineOperand RegOperand = MI.getOperand(III.OpNoForForwarding);3673// Only support imm operands, not relocation slots or others.3674if (!ImmOperand.isImm())3675return false;36763677assert(RegOperand.isReg() && "Instruction format is not right");36783679// There are other use for ToBeDeletedReg after Imm instr, can not delete it.3680if (!RegOperand.isKill())3681return false;36823683ToBeDeletedReg = RegOperand.getReg();3684OffsetImm = ImmOperand.getImm();36853686return true;3687}36883689bool PPCInstrInfo::isValidToBeChangedReg(MachineInstr *ADDMI, unsigned Index,3690MachineInstr *&ADDIMI,3691int64_t &OffsetAddi,3692int64_t OffsetImm) const {3693assert((Index == 1 || Index == 2) && "Invalid operand index for add.");3694MachineOperand &MO = ADDMI->getOperand(Index);36953696if (!MO.isKill())3697return false;36983699bool OtherIntermediateUse = false;37003701ADDIMI = getDefMIPostRA(MO.getReg(), *ADDMI, OtherIntermediateUse);3702// Currently handle only one "add + Imminstr" pair case, exit if other3703// intermediate use for ToBeChangedReg found.3704// TODO: handle the cases where there are other "add + Imminstr" pairs3705// with same offset in Imminstr which is like:3706//3707// ADDI instr: ToBeChangedReg = ADDI FrameBaseReg, OffsetAddi3708// ADD instr1: ToBeDeletedReg1 = ADD ToBeChangedReg, ScaleReg13709// Imm instr1: Reg1 = op1 OffsetImm, ToBeDeletedReg1(killed)3710// ADD instr2: ToBeDeletedReg2 = ADD ToBeChangedReg(killed), ScaleReg23711// Imm instr2: Reg2 = op2 OffsetImm, ToBeDeletedReg2(killed)3712//3713// can be converted to:3714//3715// new ADDI instr: ToBeChangedReg = ADDI FrameBaseReg,3716// (OffsetAddi + OffsetImm)3717// Index instr1: Reg1 = opx1 ScaleReg1, ToBeChangedReg3718// Index instr2: Reg2 = opx2 ScaleReg2, ToBeChangedReg(killed)37193720if (OtherIntermediateUse || !ADDIMI)3721return false;3722// Check if ADDI instr meets requirement.3723if (!isADDIInstrEligibleForFolding(*ADDIMI, OffsetAddi))3724return false;37253726if (isInt<16>(OffsetAddi + OffsetImm))3727return true;3728return false;3729}37303731// If this instruction has an immediate form and one of its operands is a3732// result of a load-immediate or an add-immediate, convert it to3733// the immediate form if the constant is in range.3734bool PPCInstrInfo::convertToImmediateForm(MachineInstr &MI,3735SmallSet<Register, 4> &RegsToUpdate,3736MachineInstr **KilledDef) const {3737MachineFunction *MF = MI.getParent()->getParent();3738MachineRegisterInfo *MRI = &MF->getRegInfo();3739bool PostRA = !MRI->isSSA();3740bool SeenIntermediateUse = true;3741unsigned ForwardingOperand = ~0U;3742MachineInstr *DefMI = getForwardingDefMI(MI, ForwardingOperand,3743SeenIntermediateUse);3744if (!DefMI)3745return false;3746assert(ForwardingOperand < MI.getNumOperands() &&3747"The forwarding operand needs to be valid at this point");3748bool IsForwardingOperandKilled = MI.getOperand(ForwardingOperand).isKill();3749bool KillFwdDefMI = !SeenIntermediateUse && IsForwardingOperandKilled;3750if (KilledDef && KillFwdDefMI)3751*KilledDef = DefMI;37523753// Conservatively add defs from DefMI and defs/uses from MI to the set of3754// registers that need their kill flags updated.3755for (const MachineOperand &MO : DefMI->operands())3756if (MO.isReg() && MO.isDef())3757RegsToUpdate.insert(MO.getReg());3758for (const MachineOperand &MO : MI.operands())3759if (MO.isReg())3760RegsToUpdate.insert(MO.getReg());37613762// If this is a imm instruction and its register operands is produced by ADDI,3763// put the imm into imm inst directly.3764if (RI.getMappedIdxOpcForImmOpc(MI.getOpcode()) !=3765PPC::INSTRUCTION_LIST_END &&3766transformToNewImmFormFedByAdd(MI, *DefMI, ForwardingOperand))3767return true;37683769ImmInstrInfo III;3770bool IsVFReg = MI.getOperand(0).isReg()3771? PPC::isVFRegister(MI.getOperand(0).getReg())3772: false;3773bool HasImmForm = instrHasImmForm(MI.getOpcode(), IsVFReg, III, PostRA);3774// If this is a reg+reg instruction that has a reg+imm form,3775// and one of the operands is produced by an add-immediate,3776// try to convert it.3777if (HasImmForm &&3778transformToImmFormFedByAdd(MI, III, ForwardingOperand, *DefMI,3779KillFwdDefMI))3780return true;37813782// If this is a reg+reg instruction that has a reg+imm form,3783// and one of the operands is produced by LI, convert it now.3784if (HasImmForm &&3785transformToImmFormFedByLI(MI, III, ForwardingOperand, *DefMI))3786return true;37873788// If this is not a reg+reg, but the DefMI is LI/LI8, check if its user MI3789// can be simpified to LI.3790if (!HasImmForm && simplifyToLI(MI, *DefMI, ForwardingOperand, KilledDef))3791return true;37923793return false;3794}37953796bool PPCInstrInfo::combineRLWINM(MachineInstr &MI,3797MachineInstr **ToErase) const {3798MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();3799Register FoldingReg = MI.getOperand(1).getReg();3800if (!FoldingReg.isVirtual())3801return false;3802MachineInstr *SrcMI = MRI->getVRegDef(FoldingReg);3803if (SrcMI->getOpcode() != PPC::RLWINM &&3804SrcMI->getOpcode() != PPC::RLWINM_rec &&3805SrcMI->getOpcode() != PPC::RLWINM8 &&3806SrcMI->getOpcode() != PPC::RLWINM8_rec)3807return false;3808assert((MI.getOperand(2).isImm() && MI.getOperand(3).isImm() &&3809MI.getOperand(4).isImm() && SrcMI->getOperand(2).isImm() &&3810SrcMI->getOperand(3).isImm() && SrcMI->getOperand(4).isImm()) &&3811"Invalid PPC::RLWINM Instruction!");3812uint64_t SHSrc = SrcMI->getOperand(2).getImm();3813uint64_t SHMI = MI.getOperand(2).getImm();3814uint64_t MBSrc = SrcMI->getOperand(3).getImm();3815uint64_t MBMI = MI.getOperand(3).getImm();3816uint64_t MESrc = SrcMI->getOperand(4).getImm();3817uint64_t MEMI = MI.getOperand(4).getImm();38183819assert((MEMI < 32 && MESrc < 32 && MBMI < 32 && MBSrc < 32) &&3820"Invalid PPC::RLWINM Instruction!");3821// If MBMI is bigger than MEMI, we always can not get run of ones.3822// RotatedSrcMask non-wrap:3823// 0........31|32........633824// RotatedSrcMask: B---E B---E3825// MaskMI: -----------|--E B------3826// Result: ----- --- (Bad candidate)3827//3828// RotatedSrcMask wrap:3829// 0........31|32........633830// RotatedSrcMask: --E B----|--E B----3831// MaskMI: -----------|--E B------3832// Result: --- -----|--- ----- (Bad candidate)3833//3834// One special case is RotatedSrcMask is a full set mask.3835// RotatedSrcMask full:3836// 0........31|32........633837// RotatedSrcMask: ------EB---|-------EB---3838// MaskMI: -----------|--E B------3839// Result: -----------|--- ------- (Good candidate)38403841// Mark special case.3842bool SrcMaskFull = (MBSrc - MESrc == 1) || (MBSrc == 0 && MESrc == 31);38433844// For other MBMI > MEMI cases, just return.3845if ((MBMI > MEMI) && !SrcMaskFull)3846return false;38473848// Handle MBMI <= MEMI cases.3849APInt MaskMI = APInt::getBitsSetWithWrap(32, 32 - MEMI - 1, 32 - MBMI);3850// In MI, we only need low 32 bits of SrcMI, just consider about low 323851// bit of SrcMI mask. Note that in APInt, lowerest bit is at index 0,3852// while in PowerPC ISA, lowerest bit is at index 63.3853APInt MaskSrc = APInt::getBitsSetWithWrap(32, 32 - MESrc - 1, 32 - MBSrc);38543855APInt RotatedSrcMask = MaskSrc.rotl(SHMI);3856APInt FinalMask = RotatedSrcMask & MaskMI;3857uint32_t NewMB, NewME;3858bool Simplified = false;38593860// If final mask is 0, MI result should be 0 too.3861if (FinalMask.isZero()) {3862bool Is64Bit =3863(MI.getOpcode() == PPC::RLWINM8 || MI.getOpcode() == PPC::RLWINM8_rec);3864Simplified = true;3865LLVM_DEBUG(dbgs() << "Replace Instr: ");3866LLVM_DEBUG(MI.dump());38673868if (MI.getOpcode() == PPC::RLWINM || MI.getOpcode() == PPC::RLWINM8) {3869// Replace MI with "LI 0"3870MI.removeOperand(4);3871MI.removeOperand(3);3872MI.removeOperand(2);3873MI.getOperand(1).ChangeToImmediate(0);3874MI.setDesc(get(Is64Bit ? PPC::LI8 : PPC::LI));3875} else {3876// Replace MI with "ANDI_rec reg, 0"3877MI.removeOperand(4);3878MI.removeOperand(3);3879MI.getOperand(2).setImm(0);3880MI.setDesc(get(Is64Bit ? PPC::ANDI8_rec : PPC::ANDI_rec));3881MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());3882if (SrcMI->getOperand(1).isKill()) {3883MI.getOperand(1).setIsKill(true);3884SrcMI->getOperand(1).setIsKill(false);3885} else3886// About to replace MI.getOperand(1), clear its kill flag.3887MI.getOperand(1).setIsKill(false);3888}38893890LLVM_DEBUG(dbgs() << "With: ");3891LLVM_DEBUG(MI.dump());38923893} else if ((isRunOfOnes((unsigned)(FinalMask.getZExtValue()), NewMB, NewME) &&3894NewMB <= NewME) ||3895SrcMaskFull) {3896// Here we only handle MBMI <= MEMI case, so NewMB must be no bigger3897// than NewME. Otherwise we get a 64 bit value after folding, but MI3898// return a 32 bit value.3899Simplified = true;3900LLVM_DEBUG(dbgs() << "Converting Instr: ");3901LLVM_DEBUG(MI.dump());39023903uint16_t NewSH = (SHSrc + SHMI) % 32;3904MI.getOperand(2).setImm(NewSH);3905// If SrcMI mask is full, no need to update MBMI and MEMI.3906if (!SrcMaskFull) {3907MI.getOperand(3).setImm(NewMB);3908MI.getOperand(4).setImm(NewME);3909}3910MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());3911if (SrcMI->getOperand(1).isKill()) {3912MI.getOperand(1).setIsKill(true);3913SrcMI->getOperand(1).setIsKill(false);3914} else3915// About to replace MI.getOperand(1), clear its kill flag.3916MI.getOperand(1).setIsKill(false);39173918LLVM_DEBUG(dbgs() << "To: ");3919LLVM_DEBUG(MI.dump());3920}3921if (Simplified & MRI->use_nodbg_empty(FoldingReg) &&3922!SrcMI->hasImplicitDef()) {3923// If FoldingReg has no non-debug use and it has no implicit def (it3924// is not RLWINMO or RLWINM8o), it's safe to delete its def SrcMI.3925// Otherwise keep it.3926*ToErase = SrcMI;3927LLVM_DEBUG(dbgs() << "Delete dead instruction: ");3928LLVM_DEBUG(SrcMI->dump());3929}3930return Simplified;3931}39323933bool PPCInstrInfo::instrHasImmForm(unsigned Opc, bool IsVFReg,3934ImmInstrInfo &III, bool PostRA) const {3935// The vast majority of the instructions would need their operand 2 replaced3936// with an immediate when switching to the reg+imm form. A marked exception3937// are the update form loads/stores for which a constant operand 2 would need3938// to turn into a displacement and move operand 1 to the operand 2 position.3939III.ImmOpNo = 2;3940III.OpNoForForwarding = 2;3941III.ImmWidth = 16;3942III.ImmMustBeMultipleOf = 1;3943III.TruncateImmTo = 0;3944III.IsSummingOperands = false;3945switch (Opc) {3946default: return false;3947case PPC::ADD4:3948case PPC::ADD8:3949III.SignedImm = true;3950III.ZeroIsSpecialOrig = 0;3951III.ZeroIsSpecialNew = 1;3952III.IsCommutative = true;3953III.IsSummingOperands = true;3954III.ImmOpcode = Opc == PPC::ADD4 ? PPC::ADDI : PPC::ADDI8;3955break;3956case PPC::ADDC:3957case PPC::ADDC8:3958III.SignedImm = true;3959III.ZeroIsSpecialOrig = 0;3960III.ZeroIsSpecialNew = 0;3961III.IsCommutative = true;3962III.IsSummingOperands = true;3963III.ImmOpcode = Opc == PPC::ADDC ? PPC::ADDIC : PPC::ADDIC8;3964break;3965case PPC::ADDC_rec:3966III.SignedImm = true;3967III.ZeroIsSpecialOrig = 0;3968III.ZeroIsSpecialNew = 0;3969III.IsCommutative = true;3970III.IsSummingOperands = true;3971III.ImmOpcode = PPC::ADDIC_rec;3972break;3973case PPC::SUBFC:3974case PPC::SUBFC8:3975III.SignedImm = true;3976III.ZeroIsSpecialOrig = 0;3977III.ZeroIsSpecialNew = 0;3978III.IsCommutative = false;3979III.ImmOpcode = Opc == PPC::SUBFC ? PPC::SUBFIC : PPC::SUBFIC8;3980break;3981case PPC::CMPW:3982case PPC::CMPD:3983III.SignedImm = true;3984III.ZeroIsSpecialOrig = 0;3985III.ZeroIsSpecialNew = 0;3986III.IsCommutative = false;3987III.ImmOpcode = Opc == PPC::CMPW ? PPC::CMPWI : PPC::CMPDI;3988break;3989case PPC::CMPLW:3990case PPC::CMPLD:3991III.SignedImm = false;3992III.ZeroIsSpecialOrig = 0;3993III.ZeroIsSpecialNew = 0;3994III.IsCommutative = false;3995III.ImmOpcode = Opc == PPC::CMPLW ? PPC::CMPLWI : PPC::CMPLDI;3996break;3997case PPC::AND_rec:3998case PPC::AND8_rec:3999case PPC::OR:4000case PPC::OR8:4001case PPC::XOR:4002case PPC::XOR8:4003III.SignedImm = false;4004III.ZeroIsSpecialOrig = 0;4005III.ZeroIsSpecialNew = 0;4006III.IsCommutative = true;4007switch(Opc) {4008default: llvm_unreachable("Unknown opcode");4009case PPC::AND_rec:4010III.ImmOpcode = PPC::ANDI_rec;4011break;4012case PPC::AND8_rec:4013III.ImmOpcode = PPC::ANDI8_rec;4014break;4015case PPC::OR: III.ImmOpcode = PPC::ORI; break;4016case PPC::OR8: III.ImmOpcode = PPC::ORI8; break;4017case PPC::XOR: III.ImmOpcode = PPC::XORI; break;4018case PPC::XOR8: III.ImmOpcode = PPC::XORI8; break;4019}4020break;4021case PPC::RLWNM:4022case PPC::RLWNM8:4023case PPC::RLWNM_rec:4024case PPC::RLWNM8_rec:4025case PPC::SLW:4026case PPC::SLW8:4027case PPC::SLW_rec:4028case PPC::SLW8_rec:4029case PPC::SRW:4030case PPC::SRW8:4031case PPC::SRW_rec:4032case PPC::SRW8_rec:4033case PPC::SRAW:4034case PPC::SRAW_rec:4035III.SignedImm = false;4036III.ZeroIsSpecialOrig = 0;4037III.ZeroIsSpecialNew = 0;4038III.IsCommutative = false;4039// This isn't actually true, but the instructions ignore any of the4040// upper bits, so any immediate loaded with an LI is acceptable.4041// This does not apply to shift right algebraic because a value4042// out of range will produce a -1/0.4043III.ImmWidth = 16;4044if (Opc == PPC::RLWNM || Opc == PPC::RLWNM8 || Opc == PPC::RLWNM_rec ||4045Opc == PPC::RLWNM8_rec)4046III.TruncateImmTo = 5;4047else4048III.TruncateImmTo = 6;4049switch(Opc) {4050default: llvm_unreachable("Unknown opcode");4051case PPC::RLWNM: III.ImmOpcode = PPC::RLWINM; break;4052case PPC::RLWNM8: III.ImmOpcode = PPC::RLWINM8; break;4053case PPC::RLWNM_rec:4054III.ImmOpcode = PPC::RLWINM_rec;4055break;4056case PPC::RLWNM8_rec:4057III.ImmOpcode = PPC::RLWINM8_rec;4058break;4059case PPC::SLW: III.ImmOpcode = PPC::RLWINM; break;4060case PPC::SLW8: III.ImmOpcode = PPC::RLWINM8; break;4061case PPC::SLW_rec:4062III.ImmOpcode = PPC::RLWINM_rec;4063break;4064case PPC::SLW8_rec:4065III.ImmOpcode = PPC::RLWINM8_rec;4066break;4067case PPC::SRW: III.ImmOpcode = PPC::RLWINM; break;4068case PPC::SRW8: III.ImmOpcode = PPC::RLWINM8; break;4069case PPC::SRW_rec:4070III.ImmOpcode = PPC::RLWINM_rec;4071break;4072case PPC::SRW8_rec:4073III.ImmOpcode = PPC::RLWINM8_rec;4074break;4075case PPC::SRAW:4076III.ImmWidth = 5;4077III.TruncateImmTo = 0;4078III.ImmOpcode = PPC::SRAWI;4079break;4080case PPC::SRAW_rec:4081III.ImmWidth = 5;4082III.TruncateImmTo = 0;4083III.ImmOpcode = PPC::SRAWI_rec;4084break;4085}4086break;4087case PPC::RLDCL:4088case PPC::RLDCL_rec:4089case PPC::RLDCR:4090case PPC::RLDCR_rec:4091case PPC::SLD:4092case PPC::SLD_rec:4093case PPC::SRD:4094case PPC::SRD_rec:4095case PPC::SRAD:4096case PPC::SRAD_rec:4097III.SignedImm = false;4098III.ZeroIsSpecialOrig = 0;4099III.ZeroIsSpecialNew = 0;4100III.IsCommutative = false;4101// This isn't actually true, but the instructions ignore any of the4102// upper bits, so any immediate loaded with an LI is acceptable.4103// This does not apply to shift right algebraic because a value4104// out of range will produce a -1/0.4105III.ImmWidth = 16;4106if (Opc == PPC::RLDCL || Opc == PPC::RLDCL_rec || Opc == PPC::RLDCR ||4107Opc == PPC::RLDCR_rec)4108III.TruncateImmTo = 6;4109else4110III.TruncateImmTo = 7;4111switch(Opc) {4112default: llvm_unreachable("Unknown opcode");4113case PPC::RLDCL: III.ImmOpcode = PPC::RLDICL; break;4114case PPC::RLDCL_rec:4115III.ImmOpcode = PPC::RLDICL_rec;4116break;4117case PPC::RLDCR: III.ImmOpcode = PPC::RLDICR; break;4118case PPC::RLDCR_rec:4119III.ImmOpcode = PPC::RLDICR_rec;4120break;4121case PPC::SLD: III.ImmOpcode = PPC::RLDICR; break;4122case PPC::SLD_rec:4123III.ImmOpcode = PPC::RLDICR_rec;4124break;4125case PPC::SRD: III.ImmOpcode = PPC::RLDICL; break;4126case PPC::SRD_rec:4127III.ImmOpcode = PPC::RLDICL_rec;4128break;4129case PPC::SRAD:4130III.ImmWidth = 6;4131III.TruncateImmTo = 0;4132III.ImmOpcode = PPC::SRADI;4133break;4134case PPC::SRAD_rec:4135III.ImmWidth = 6;4136III.TruncateImmTo = 0;4137III.ImmOpcode = PPC::SRADI_rec;4138break;4139}4140break;4141// Loads and stores:4142case PPC::LBZX:4143case PPC::LBZX8:4144case PPC::LHZX:4145case PPC::LHZX8:4146case PPC::LHAX:4147case PPC::LHAX8:4148case PPC::LWZX:4149case PPC::LWZX8:4150case PPC::LWAX:4151case PPC::LDX:4152case PPC::LFSX:4153case PPC::LFDX:4154case PPC::STBX:4155case PPC::STBX8:4156case PPC::STHX:4157case PPC::STHX8:4158case PPC::STWX:4159case PPC::STWX8:4160case PPC::STDX:4161case PPC::STFSX:4162case PPC::STFDX:4163III.SignedImm = true;4164III.ZeroIsSpecialOrig = 1;4165III.ZeroIsSpecialNew = 2;4166III.IsCommutative = true;4167III.IsSummingOperands = true;4168III.ImmOpNo = 1;4169III.OpNoForForwarding = 2;4170switch(Opc) {4171default: llvm_unreachable("Unknown opcode");4172case PPC::LBZX: III.ImmOpcode = PPC::LBZ; break;4173case PPC::LBZX8: III.ImmOpcode = PPC::LBZ8; break;4174case PPC::LHZX: III.ImmOpcode = PPC::LHZ; break;4175case PPC::LHZX8: III.ImmOpcode = PPC::LHZ8; break;4176case PPC::LHAX: III.ImmOpcode = PPC::LHA; break;4177case PPC::LHAX8: III.ImmOpcode = PPC::LHA8; break;4178case PPC::LWZX: III.ImmOpcode = PPC::LWZ; break;4179case PPC::LWZX8: III.ImmOpcode = PPC::LWZ8; break;4180case PPC::LWAX:4181III.ImmOpcode = PPC::LWA;4182III.ImmMustBeMultipleOf = 4;4183break;4184case PPC::LDX: III.ImmOpcode = PPC::LD; III.ImmMustBeMultipleOf = 4; break;4185case PPC::LFSX: III.ImmOpcode = PPC::LFS; break;4186case PPC::LFDX: III.ImmOpcode = PPC::LFD; break;4187case PPC::STBX: III.ImmOpcode = PPC::STB; break;4188case PPC::STBX8: III.ImmOpcode = PPC::STB8; break;4189case PPC::STHX: III.ImmOpcode = PPC::STH; break;4190case PPC::STHX8: III.ImmOpcode = PPC::STH8; break;4191case PPC::STWX: III.ImmOpcode = PPC::STW; break;4192case PPC::STWX8: III.ImmOpcode = PPC::STW8; break;4193case PPC::STDX:4194III.ImmOpcode = PPC::STD;4195III.ImmMustBeMultipleOf = 4;4196break;4197case PPC::STFSX: III.ImmOpcode = PPC::STFS; break;4198case PPC::STFDX: III.ImmOpcode = PPC::STFD; break;4199}4200break;4201case PPC::LBZUX:4202case PPC::LBZUX8:4203case PPC::LHZUX:4204case PPC::LHZUX8:4205case PPC::LHAUX:4206case PPC::LHAUX8:4207case PPC::LWZUX:4208case PPC::LWZUX8:4209case PPC::LDUX:4210case PPC::LFSUX:4211case PPC::LFDUX:4212case PPC::STBUX:4213case PPC::STBUX8:4214case PPC::STHUX:4215case PPC::STHUX8:4216case PPC::STWUX:4217case PPC::STWUX8:4218case PPC::STDUX:4219case PPC::STFSUX:4220case PPC::STFDUX:4221III.SignedImm = true;4222III.ZeroIsSpecialOrig = 2;4223III.ZeroIsSpecialNew = 3;4224III.IsCommutative = false;4225III.IsSummingOperands = true;4226III.ImmOpNo = 2;4227III.OpNoForForwarding = 3;4228switch(Opc) {4229default: llvm_unreachable("Unknown opcode");4230case PPC::LBZUX: III.ImmOpcode = PPC::LBZU; break;4231case PPC::LBZUX8: III.ImmOpcode = PPC::LBZU8; break;4232case PPC::LHZUX: III.ImmOpcode = PPC::LHZU; break;4233case PPC::LHZUX8: III.ImmOpcode = PPC::LHZU8; break;4234case PPC::LHAUX: III.ImmOpcode = PPC::LHAU; break;4235case PPC::LHAUX8: III.ImmOpcode = PPC::LHAU8; break;4236case PPC::LWZUX: III.ImmOpcode = PPC::LWZU; break;4237case PPC::LWZUX8: III.ImmOpcode = PPC::LWZU8; break;4238case PPC::LDUX:4239III.ImmOpcode = PPC::LDU;4240III.ImmMustBeMultipleOf = 4;4241break;4242case PPC::LFSUX: III.ImmOpcode = PPC::LFSU; break;4243case PPC::LFDUX: III.ImmOpcode = PPC::LFDU; break;4244case PPC::STBUX: III.ImmOpcode = PPC::STBU; break;4245case PPC::STBUX8: III.ImmOpcode = PPC::STBU8; break;4246case PPC::STHUX: III.ImmOpcode = PPC::STHU; break;4247case PPC::STHUX8: III.ImmOpcode = PPC::STHU8; break;4248case PPC::STWUX: III.ImmOpcode = PPC::STWU; break;4249case PPC::STWUX8: III.ImmOpcode = PPC::STWU8; break;4250case PPC::STDUX:4251III.ImmOpcode = PPC::STDU;4252III.ImmMustBeMultipleOf = 4;4253break;4254case PPC::STFSUX: III.ImmOpcode = PPC::STFSU; break;4255case PPC::STFDUX: III.ImmOpcode = PPC::STFDU; break;4256}4257break;4258// Power9 and up only. For some of these, the X-Form version has access to all4259// 64 VSR's whereas the D-Form only has access to the VR's. We replace those4260// with pseudo-ops pre-ra and for post-ra, we check that the register loaded4261// into or stored from is one of the VR registers.4262case PPC::LXVX:4263case PPC::LXSSPX:4264case PPC::LXSDX:4265case PPC::STXVX:4266case PPC::STXSSPX:4267case PPC::STXSDX:4268case PPC::XFLOADf32:4269case PPC::XFLOADf64:4270case PPC::XFSTOREf32:4271case PPC::XFSTOREf64:4272if (!Subtarget.hasP9Vector())4273return false;4274III.SignedImm = true;4275III.ZeroIsSpecialOrig = 1;4276III.ZeroIsSpecialNew = 2;4277III.IsCommutative = true;4278III.IsSummingOperands = true;4279III.ImmOpNo = 1;4280III.OpNoForForwarding = 2;4281III.ImmMustBeMultipleOf = 4;4282switch(Opc) {4283default: llvm_unreachable("Unknown opcode");4284case PPC::LXVX:4285III.ImmOpcode = PPC::LXV;4286III.ImmMustBeMultipleOf = 16;4287break;4288case PPC::LXSSPX:4289if (PostRA) {4290if (IsVFReg)4291III.ImmOpcode = PPC::LXSSP;4292else {4293III.ImmOpcode = PPC::LFS;4294III.ImmMustBeMultipleOf = 1;4295}4296break;4297}4298[[fallthrough]];4299case PPC::XFLOADf32:4300III.ImmOpcode = PPC::DFLOADf32;4301break;4302case PPC::LXSDX:4303if (PostRA) {4304if (IsVFReg)4305III.ImmOpcode = PPC::LXSD;4306else {4307III.ImmOpcode = PPC::LFD;4308III.ImmMustBeMultipleOf = 1;4309}4310break;4311}4312[[fallthrough]];4313case PPC::XFLOADf64:4314III.ImmOpcode = PPC::DFLOADf64;4315break;4316case PPC::STXVX:4317III.ImmOpcode = PPC::STXV;4318III.ImmMustBeMultipleOf = 16;4319break;4320case PPC::STXSSPX:4321if (PostRA) {4322if (IsVFReg)4323III.ImmOpcode = PPC::STXSSP;4324else {4325III.ImmOpcode = PPC::STFS;4326III.ImmMustBeMultipleOf = 1;4327}4328break;4329}4330[[fallthrough]];4331case PPC::XFSTOREf32:4332III.ImmOpcode = PPC::DFSTOREf32;4333break;4334case PPC::STXSDX:4335if (PostRA) {4336if (IsVFReg)4337III.ImmOpcode = PPC::STXSD;4338else {4339III.ImmOpcode = PPC::STFD;4340III.ImmMustBeMultipleOf = 1;4341}4342break;4343}4344[[fallthrough]];4345case PPC::XFSTOREf64:4346III.ImmOpcode = PPC::DFSTOREf64;4347break;4348}4349break;4350}4351return true;4352}43534354// Utility function for swaping two arbitrary operands of an instruction.4355static void swapMIOperands(MachineInstr &MI, unsigned Op1, unsigned Op2) {4356assert(Op1 != Op2 && "Cannot swap operand with itself.");43574358unsigned MaxOp = std::max(Op1, Op2);4359unsigned MinOp = std::min(Op1, Op2);4360MachineOperand MOp1 = MI.getOperand(MinOp);4361MachineOperand MOp2 = MI.getOperand(MaxOp);4362MI.removeOperand(std::max(Op1, Op2));4363MI.removeOperand(std::min(Op1, Op2));43644365// If the operands we are swapping are the two at the end (the common case)4366// we can just remove both and add them in the opposite order.4367if (MaxOp - MinOp == 1 && MI.getNumOperands() == MinOp) {4368MI.addOperand(MOp2);4369MI.addOperand(MOp1);4370} else {4371// Store all operands in a temporary vector, remove them and re-add in the4372// right order.4373SmallVector<MachineOperand, 2> MOps;4374unsigned TotalOps = MI.getNumOperands() + 2; // We've already removed 2 ops.4375for (unsigned i = MI.getNumOperands() - 1; i >= MinOp; i--) {4376MOps.push_back(MI.getOperand(i));4377MI.removeOperand(i);4378}4379// MOp2 needs to be added next.4380MI.addOperand(MOp2);4381// Now add the rest.4382for (unsigned i = MI.getNumOperands(); i < TotalOps; i++) {4383if (i == MaxOp)4384MI.addOperand(MOp1);4385else {4386MI.addOperand(MOps.back());4387MOps.pop_back();4388}4389}4390}4391}43924393// Check if the 'MI' that has the index OpNoForForwarding4394// meets the requirement described in the ImmInstrInfo.4395bool PPCInstrInfo::isUseMIElgibleForForwarding(MachineInstr &MI,4396const ImmInstrInfo &III,4397unsigned OpNoForForwarding4398) const {4399// As the algorithm of checking for PPC::ZERO/PPC::ZERO84400// would not work pre-RA, we can only do the check post RA.4401MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();4402if (MRI.isSSA())4403return false;44044405// Cannot do the transform if MI isn't summing the operands.4406if (!III.IsSummingOperands)4407return false;44084409// The instruction we are trying to replace must have the ZeroIsSpecialOrig set.4410if (!III.ZeroIsSpecialOrig)4411return false;44124413// We cannot do the transform if the operand we are trying to replace4414// isn't the same as the operand the instruction allows.4415if (OpNoForForwarding != III.OpNoForForwarding)4416return false;44174418// Check if the instruction we are trying to transform really has4419// the special zero register as its operand.4420if (MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO &&4421MI.getOperand(III.ZeroIsSpecialOrig).getReg() != PPC::ZERO8)4422return false;44234424// This machine instruction is convertible if it is,4425// 1. summing the operands.4426// 2. one of the operands is special zero register.4427// 3. the operand we are trying to replace is allowed by the MI.4428return true;4429}44304431// Check if the DefMI is the add inst and set the ImmMO and RegMO4432// accordingly.4433bool PPCInstrInfo::isDefMIElgibleForForwarding(MachineInstr &DefMI,4434const ImmInstrInfo &III,4435MachineOperand *&ImmMO,4436MachineOperand *&RegMO) const {4437unsigned Opc = DefMI.getOpcode();4438if (Opc != PPC::ADDItocL8 && Opc != PPC::ADDI && Opc != PPC::ADDI8)4439return false;44404441// Skip the optimization of transformTo[NewImm|Imm]FormFedByAdd for ADDItocL84442// on AIX which is used for toc-data access. TODO: Follow up to see if it can4443// apply for AIX toc-data as well.4444if (Opc == PPC::ADDItocL8 && Subtarget.isAIX())4445return false;44464447assert(DefMI.getNumOperands() >= 3 &&4448"Add inst must have at least three operands");4449RegMO = &DefMI.getOperand(1);4450ImmMO = &DefMI.getOperand(2);44514452// Before RA, ADDI first operand could be a frame index.4453if (!RegMO->isReg())4454return false;44554456// This DefMI is elgible for forwarding if it is:4457// 1. add inst4458// 2. one of the operands is Imm/CPI/Global.4459return isAnImmediateOperand(*ImmMO);4460}44614462bool PPCInstrInfo::isRegElgibleForForwarding(4463const MachineOperand &RegMO, const MachineInstr &DefMI,4464const MachineInstr &MI, bool KillDefMI,4465bool &IsFwdFeederRegKilled, bool &SeenIntermediateUse) const {4466// x = addi y, imm4467// ...4468// z = lfdx 0, x -> z = lfd imm(y)4469// The Reg "y" can be forwarded to the MI(z) only when there is no DEF4470// of "y" between the DEF of "x" and "z".4471// The query is only valid post RA.4472const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();4473if (MRI.isSSA())4474return false;44754476Register Reg = RegMO.getReg();44774478// Walking the inst in reverse(MI-->DefMI) to get the last DEF of the Reg.4479MachineBasicBlock::const_reverse_iterator It = MI;4480MachineBasicBlock::const_reverse_iterator E = MI.getParent()->rend();4481It++;4482for (; It != E; ++It) {4483if (It->modifiesRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)4484return false;4485else if (It->killsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)4486IsFwdFeederRegKilled = true;4487if (It->readsRegister(Reg, &getRegisterInfo()) && (&*It) != &DefMI)4488SeenIntermediateUse = true;4489// Made it to DefMI without encountering a clobber.4490if ((&*It) == &DefMI)4491break;4492}4493assert((&*It) == &DefMI && "DefMI is missing");44944495// If DefMI also defines the register to be forwarded, we can only forward it4496// if DefMI is being erased.4497if (DefMI.modifiesRegister(Reg, &getRegisterInfo()))4498return KillDefMI;44994500return true;4501}45024503bool PPCInstrInfo::isImmElgibleForForwarding(const MachineOperand &ImmMO,4504const MachineInstr &DefMI,4505const ImmInstrInfo &III,4506int64_t &Imm,4507int64_t BaseImm) const {4508assert(isAnImmediateOperand(ImmMO) && "ImmMO is NOT an immediate");4509if (DefMI.getOpcode() == PPC::ADDItocL8) {4510// The operand for ADDItocL8 is CPI, which isn't imm at compiling time,4511// However, we know that, it is 16-bit width, and has the alignment of 4.4512// Check if the instruction met the requirement.4513if (III.ImmMustBeMultipleOf > 4 ||4514III.TruncateImmTo || III.ImmWidth != 16)4515return false;45164517// Going from XForm to DForm loads means that the displacement needs to be4518// not just an immediate but also a multiple of 4, or 16 depending on the4519// load. A DForm load cannot be represented if it is a multiple of say 2.4520// XForm loads do not have this restriction.4521if (ImmMO.isGlobal()) {4522const DataLayout &DL = ImmMO.getGlobal()->getDataLayout();4523if (ImmMO.getGlobal()->getPointerAlignment(DL) < III.ImmMustBeMultipleOf)4524return false;4525}45264527return true;4528}45294530if (ImmMO.isImm()) {4531// It is Imm, we need to check if the Imm fit the range.4532// Sign-extend to 64-bits.4533// DefMI may be folded with another imm form instruction, the result Imm is4534// the sum of Imm of DefMI and BaseImm which is from imm form instruction.4535APInt ActualValue(64, ImmMO.getImm() + BaseImm, true);4536if (III.SignedImm && !ActualValue.isSignedIntN(III.ImmWidth))4537return false;4538if (!III.SignedImm && !ActualValue.isIntN(III.ImmWidth))4539return false;4540Imm = SignExtend64<16>(ImmMO.getImm() + BaseImm);45414542if (Imm % III.ImmMustBeMultipleOf)4543return false;4544if (III.TruncateImmTo)4545Imm &= ((1 << III.TruncateImmTo) - 1);4546}4547else4548return false;45494550// This ImmMO is forwarded if it meets the requriement describle4551// in ImmInstrInfo4552return true;4553}45544555bool PPCInstrInfo::simplifyToLI(MachineInstr &MI, MachineInstr &DefMI,4556unsigned OpNoForForwarding,4557MachineInstr **KilledDef) const {4558if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||4559!DefMI.getOperand(1).isImm())4560return false;45614562MachineFunction *MF = MI.getParent()->getParent();4563MachineRegisterInfo *MRI = &MF->getRegInfo();4564bool PostRA = !MRI->isSSA();45654566int64_t Immediate = DefMI.getOperand(1).getImm();4567// Sign-extend to 64-bits.4568int64_t SExtImm = SignExtend64<16>(Immediate);45694570bool ReplaceWithLI = false;4571bool Is64BitLI = false;4572int64_t NewImm = 0;4573bool SetCR = false;4574unsigned Opc = MI.getOpcode();4575switch (Opc) {4576default:4577return false;45784579// FIXME: Any branches conditional on such a comparison can be made4580// unconditional. At this time, this happens too infrequently to be worth4581// the implementation effort, but if that ever changes, we could convert4582// such a pattern here.4583case PPC::CMPWI:4584case PPC::CMPLWI:4585case PPC::CMPDI:4586case PPC::CMPLDI: {4587// Doing this post-RA would require dataflow analysis to reliably find uses4588// of the CR register set by the compare.4589// No need to fixup killed/dead flag since this transformation is only valid4590// before RA.4591if (PostRA)4592return false;4593// If a compare-immediate is fed by an immediate and is itself an input of4594// an ISEL (the most common case) into a COPY of the correct register.4595bool Changed = false;4596Register DefReg = MI.getOperand(0).getReg();4597int64_t Comparand = MI.getOperand(2).getImm();4598int64_t SExtComparand = ((uint64_t)Comparand & ~0x7FFFuLL) != 04599? (Comparand | 0xFFFFFFFFFFFF0000)4600: Comparand;46014602for (auto &CompareUseMI : MRI->use_instructions(DefReg)) {4603unsigned UseOpc = CompareUseMI.getOpcode();4604if (UseOpc != PPC::ISEL && UseOpc != PPC::ISEL8)4605continue;4606unsigned CRSubReg = CompareUseMI.getOperand(3).getSubReg();4607Register TrueReg = CompareUseMI.getOperand(1).getReg();4608Register FalseReg = CompareUseMI.getOperand(2).getReg();4609unsigned RegToCopy =4610selectReg(SExtImm, SExtComparand, Opc, TrueReg, FalseReg, CRSubReg);4611if (RegToCopy == PPC::NoRegister)4612continue;4613// Can't use PPC::COPY to copy PPC::ZERO[8]. Convert it to LI[8] 0.4614if (RegToCopy == PPC::ZERO || RegToCopy == PPC::ZERO8) {4615CompareUseMI.setDesc(get(UseOpc == PPC::ISEL8 ? PPC::LI8 : PPC::LI));4616replaceInstrOperandWithImm(CompareUseMI, 1, 0);4617CompareUseMI.removeOperand(3);4618CompareUseMI.removeOperand(2);4619continue;4620}4621LLVM_DEBUG(4622dbgs() << "Found LI -> CMPI -> ISEL, replacing with a copy.\n");4623LLVM_DEBUG(DefMI.dump(); MI.dump(); CompareUseMI.dump());4624LLVM_DEBUG(dbgs() << "Is converted to:\n");4625// Convert to copy and remove unneeded operands.4626CompareUseMI.setDesc(get(PPC::COPY));4627CompareUseMI.removeOperand(3);4628CompareUseMI.removeOperand(RegToCopy == TrueReg ? 2 : 1);4629CmpIselsConverted++;4630Changed = true;4631LLVM_DEBUG(CompareUseMI.dump());4632}4633if (Changed)4634return true;4635// This may end up incremented multiple times since this function is called4636// during a fixed-point transformation, but it is only meant to indicate the4637// presence of this opportunity.4638MissedConvertibleImmediateInstrs++;4639return false;4640}46414642// Immediate forms - may simply be convertable to an LI.4643case PPC::ADDI:4644case PPC::ADDI8: {4645// Does the sum fit in a 16-bit signed field?4646int64_t Addend = MI.getOperand(2).getImm();4647if (isInt<16>(Addend + SExtImm)) {4648ReplaceWithLI = true;4649Is64BitLI = Opc == PPC::ADDI8;4650NewImm = Addend + SExtImm;4651break;4652}4653return false;4654}4655case PPC::SUBFIC:4656case PPC::SUBFIC8: {4657// Only transform this if the CARRY implicit operand is dead.4658if (MI.getNumOperands() > 3 && !MI.getOperand(3).isDead())4659return false;4660int64_t Minuend = MI.getOperand(2).getImm();4661if (isInt<16>(Minuend - SExtImm)) {4662ReplaceWithLI = true;4663Is64BitLI = Opc == PPC::SUBFIC8;4664NewImm = Minuend - SExtImm;4665break;4666}4667return false;4668}4669case PPC::RLDICL:4670case PPC::RLDICL_rec:4671case PPC::RLDICL_32:4672case PPC::RLDICL_32_64: {4673// Use APInt's rotate function.4674int64_t SH = MI.getOperand(2).getImm();4675int64_t MB = MI.getOperand(3).getImm();4676APInt InVal((Opc == PPC::RLDICL || Opc == PPC::RLDICL_rec) ? 64 : 32,4677SExtImm, true);4678InVal = InVal.rotl(SH);4679uint64_t Mask = MB == 0 ? -1LLU : (1LLU << (63 - MB + 1)) - 1;4680InVal &= Mask;4681// Can't replace negative values with an LI as that will sign-extend4682// and not clear the left bits. If we're setting the CR bit, we will use4683// ANDI_rec which won't sign extend, so that's safe.4684if (isUInt<15>(InVal.getSExtValue()) ||4685(Opc == PPC::RLDICL_rec && isUInt<16>(InVal.getSExtValue()))) {4686ReplaceWithLI = true;4687Is64BitLI = Opc != PPC::RLDICL_32;4688NewImm = InVal.getSExtValue();4689SetCR = Opc == PPC::RLDICL_rec;4690break;4691}4692return false;4693}4694case PPC::RLWINM:4695case PPC::RLWINM8:4696case PPC::RLWINM_rec:4697case PPC::RLWINM8_rec: {4698int64_t SH = MI.getOperand(2).getImm();4699int64_t MB = MI.getOperand(3).getImm();4700int64_t ME = MI.getOperand(4).getImm();4701APInt InVal(32, SExtImm, true);4702InVal = InVal.rotl(SH);4703APInt Mask = APInt::getBitsSetWithWrap(32, 32 - ME - 1, 32 - MB);4704InVal &= Mask;4705// Can't replace negative values with an LI as that will sign-extend4706// and not clear the left bits. If we're setting the CR bit, we will use4707// ANDI_rec which won't sign extend, so that's safe.4708bool ValueFits = isUInt<15>(InVal.getSExtValue());4709ValueFits |= ((Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec) &&4710isUInt<16>(InVal.getSExtValue()));4711if (ValueFits) {4712ReplaceWithLI = true;4713Is64BitLI = Opc == PPC::RLWINM8 || Opc == PPC::RLWINM8_rec;4714NewImm = InVal.getSExtValue();4715SetCR = Opc == PPC::RLWINM_rec || Opc == PPC::RLWINM8_rec;4716break;4717}4718return false;4719}4720case PPC::ORI:4721case PPC::ORI8:4722case PPC::XORI:4723case PPC::XORI8: {4724int64_t LogicalImm = MI.getOperand(2).getImm();4725int64_t Result = 0;4726if (Opc == PPC::ORI || Opc == PPC::ORI8)4727Result = LogicalImm | SExtImm;4728else4729Result = LogicalImm ^ SExtImm;4730if (isInt<16>(Result)) {4731ReplaceWithLI = true;4732Is64BitLI = Opc == PPC::ORI8 || Opc == PPC::XORI8;4733NewImm = Result;4734break;4735}4736return false;4737}4738}47394740if (ReplaceWithLI) {4741// We need to be careful with CR-setting instructions we're replacing.4742if (SetCR) {4743// We don't know anything about uses when we're out of SSA, so only4744// replace if the new immediate will be reproduced.4745bool ImmChanged = (SExtImm & NewImm) != NewImm;4746if (PostRA && ImmChanged)4747return false;47484749if (!PostRA) {4750// If the defining load-immediate has no other uses, we can just replace4751// the immediate with the new immediate.4752if (MRI->hasOneUse(DefMI.getOperand(0).getReg()))4753DefMI.getOperand(1).setImm(NewImm);47544755// If we're not using the GPR result of the CR-setting instruction, we4756// just need to and with zero/non-zero depending on the new immediate.4757else if (MRI->use_empty(MI.getOperand(0).getReg())) {4758if (NewImm) {4759assert(Immediate && "Transformation converted zero to non-zero?");4760NewImm = Immediate;4761}4762} else if (ImmChanged)4763return false;4764}4765}47664767LLVM_DEBUG(dbgs() << "Replacing constant instruction:\n");4768LLVM_DEBUG(MI.dump());4769LLVM_DEBUG(dbgs() << "Fed by:\n");4770LLVM_DEBUG(DefMI.dump());4771LoadImmediateInfo LII;4772LII.Imm = NewImm;4773LII.Is64Bit = Is64BitLI;4774LII.SetCR = SetCR;4775// If we're setting the CR, the original load-immediate must be kept (as an4776// operand to ANDI_rec/ANDI8_rec).4777if (KilledDef && SetCR)4778*KilledDef = nullptr;4779replaceInstrWithLI(MI, LII);47804781if (PostRA)4782recomputeLivenessFlags(*MI.getParent());47834784LLVM_DEBUG(dbgs() << "With:\n");4785LLVM_DEBUG(MI.dump());4786return true;4787}4788return false;4789}47904791bool PPCInstrInfo::transformToNewImmFormFedByAdd(4792MachineInstr &MI, MachineInstr &DefMI, unsigned OpNoForForwarding) const {4793MachineRegisterInfo *MRI = &MI.getParent()->getParent()->getRegInfo();4794bool PostRA = !MRI->isSSA();4795// FIXME: extend this to post-ra. Need to do some change in getForwardingDefMI4796// for post-ra.4797if (PostRA)4798return false;47994800// Only handle load/store.4801if (!MI.mayLoadOrStore())4802return false;48034804unsigned XFormOpcode = RI.getMappedIdxOpcForImmOpc(MI.getOpcode());48054806assert((XFormOpcode != PPC::INSTRUCTION_LIST_END) &&4807"MI must have x-form opcode");48084809// get Imm Form info.4810ImmInstrInfo III;4811bool IsVFReg = MI.getOperand(0).isReg()4812? PPC::isVFRegister(MI.getOperand(0).getReg())4813: false;48144815if (!instrHasImmForm(XFormOpcode, IsVFReg, III, PostRA))4816return false;48174818if (!III.IsSummingOperands)4819return false;48204821if (OpNoForForwarding != III.OpNoForForwarding)4822return false;48234824MachineOperand ImmOperandMI = MI.getOperand(III.ImmOpNo);4825if (!ImmOperandMI.isImm())4826return false;48274828// Check DefMI.4829MachineOperand *ImmMO = nullptr;4830MachineOperand *RegMO = nullptr;4831if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))4832return false;4833assert(ImmMO && RegMO && "Imm and Reg operand must have been set");48344835// Check Imm.4836// Set ImmBase from imm instruction as base and get new Imm inside4837// isImmElgibleForForwarding.4838int64_t ImmBase = ImmOperandMI.getImm();4839int64_t Imm = 0;4840if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm, ImmBase))4841return false;48424843// Do the transform4844LLVM_DEBUG(dbgs() << "Replacing existing reg+imm instruction:\n");4845LLVM_DEBUG(MI.dump());4846LLVM_DEBUG(dbgs() << "Fed by:\n");4847LLVM_DEBUG(DefMI.dump());48484849MI.getOperand(III.OpNoForForwarding).setReg(RegMO->getReg());4850MI.getOperand(III.ImmOpNo).setImm(Imm);48514852LLVM_DEBUG(dbgs() << "With:\n");4853LLVM_DEBUG(MI.dump());4854return true;4855}48564857// If an X-Form instruction is fed by an add-immediate and one of its operands4858// is the literal zero, attempt to forward the source of the add-immediate to4859// the corresponding D-Form instruction with the displacement coming from4860// the immediate being added.4861bool PPCInstrInfo::transformToImmFormFedByAdd(4862MachineInstr &MI, const ImmInstrInfo &III, unsigned OpNoForForwarding,4863MachineInstr &DefMI, bool KillDefMI) const {4864// RegMO ImmMO4865// | |4866// x = addi reg, imm <----- DefMI4867// y = op 0 , x <----- MI4868// |4869// OpNoForForwarding4870// Check if the MI meet the requirement described in the III.4871if (!isUseMIElgibleForForwarding(MI, III, OpNoForForwarding))4872return false;48734874// Check if the DefMI meet the requirement4875// described in the III. If yes, set the ImmMO and RegMO accordingly.4876MachineOperand *ImmMO = nullptr;4877MachineOperand *RegMO = nullptr;4878if (!isDefMIElgibleForForwarding(DefMI, III, ImmMO, RegMO))4879return false;4880assert(ImmMO && RegMO && "Imm and Reg operand must have been set");48814882// As we get the Imm operand now, we need to check if the ImmMO meet4883// the requirement described in the III. If yes set the Imm.4884int64_t Imm = 0;4885if (!isImmElgibleForForwarding(*ImmMO, DefMI, III, Imm))4886return false;48874888bool IsFwdFeederRegKilled = false;4889bool SeenIntermediateUse = false;4890// Check if the RegMO can be forwarded to MI.4891if (!isRegElgibleForForwarding(*RegMO, DefMI, MI, KillDefMI,4892IsFwdFeederRegKilled, SeenIntermediateUse))4893return false;48944895MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();4896bool PostRA = !MRI.isSSA();48974898// We know that, the MI and DefMI both meet the pattern, and4899// the Imm also meet the requirement with the new Imm-form.4900// It is safe to do the transformation now.4901LLVM_DEBUG(dbgs() << "Replacing indexed instruction:\n");4902LLVM_DEBUG(MI.dump());4903LLVM_DEBUG(dbgs() << "Fed by:\n");4904LLVM_DEBUG(DefMI.dump());49054906// Update the base reg first.4907MI.getOperand(III.OpNoForForwarding).ChangeToRegister(RegMO->getReg(),4908false, false,4909RegMO->isKill());49104911// Then, update the imm.4912if (ImmMO->isImm()) {4913// If the ImmMO is Imm, change the operand that has ZERO to that Imm4914// directly.4915replaceInstrOperandWithImm(MI, III.ZeroIsSpecialOrig, Imm);4916}4917else {4918// Otherwise, it is Constant Pool Index(CPI) or Global,4919// which is relocation in fact. We need to replace the special zero4920// register with ImmMO.4921// Before that, we need to fixup the target flags for imm.4922// For some reason, we miss to set the flag for the ImmMO if it is CPI.4923if (DefMI.getOpcode() == PPC::ADDItocL8)4924ImmMO->setTargetFlags(PPCII::MO_TOC_LO);49254926// MI didn't have the interface such as MI.setOperand(i) though4927// it has MI.getOperand(i). To repalce the ZERO MachineOperand with4928// ImmMO, we need to remove ZERO operand and all the operands behind it,4929// and, add the ImmMO, then, move back all the operands behind ZERO.4930SmallVector<MachineOperand, 2> MOps;4931for (unsigned i = MI.getNumOperands() - 1; i >= III.ZeroIsSpecialOrig; i--) {4932MOps.push_back(MI.getOperand(i));4933MI.removeOperand(i);4934}49354936// Remove the last MO in the list, which is ZERO operand in fact.4937MOps.pop_back();4938// Add the imm operand.4939MI.addOperand(*ImmMO);4940// Now add the rest back.4941for (auto &MO : MOps)4942MI.addOperand(MO);4943}49444945// Update the opcode.4946MI.setDesc(get(III.ImmOpcode));49474948if (PostRA)4949recomputeLivenessFlags(*MI.getParent());4950LLVM_DEBUG(dbgs() << "With:\n");4951LLVM_DEBUG(MI.dump());49524953return true;4954}49554956bool PPCInstrInfo::transformToImmFormFedByLI(MachineInstr &MI,4957const ImmInstrInfo &III,4958unsigned ConstantOpNo,4959MachineInstr &DefMI) const {4960// DefMI must be LI or LI8.4961if ((DefMI.getOpcode() != PPC::LI && DefMI.getOpcode() != PPC::LI8) ||4962!DefMI.getOperand(1).isImm())4963return false;49644965// Get Imm operand and Sign-extend to 64-bits.4966int64_t Imm = SignExtend64<16>(DefMI.getOperand(1).getImm());49674968MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();4969bool PostRA = !MRI.isSSA();4970// Exit early if we can't convert this.4971if ((ConstantOpNo != III.OpNoForForwarding) && !III.IsCommutative)4972return false;4973if (Imm % III.ImmMustBeMultipleOf)4974return false;4975if (III.TruncateImmTo)4976Imm &= ((1 << III.TruncateImmTo) - 1);4977if (III.SignedImm) {4978APInt ActualValue(64, Imm, true);4979if (!ActualValue.isSignedIntN(III.ImmWidth))4980return false;4981} else {4982uint64_t UnsignedMax = (1 << III.ImmWidth) - 1;4983if ((uint64_t)Imm > UnsignedMax)4984return false;4985}49864987// If we're post-RA, the instructions don't agree on whether register zero is4988// special, we can transform this as long as the register operand that will4989// end up in the location where zero is special isn't R0.4990if (PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {4991unsigned PosForOrigZero = III.ZeroIsSpecialOrig ? III.ZeroIsSpecialOrig :4992III.ZeroIsSpecialNew + 1;4993Register OrigZeroReg = MI.getOperand(PosForOrigZero).getReg();4994Register NewZeroReg = MI.getOperand(III.ZeroIsSpecialNew).getReg();4995// If R0 is in the operand where zero is special for the new instruction,4996// it is unsafe to transform if the constant operand isn't that operand.4997if ((NewZeroReg == PPC::R0 || NewZeroReg == PPC::X0) &&4998ConstantOpNo != III.ZeroIsSpecialNew)4999return false;5000if ((OrigZeroReg == PPC::R0 || OrigZeroReg == PPC::X0) &&5001ConstantOpNo != PosForOrigZero)5002return false;5003}50045005unsigned Opc = MI.getOpcode();5006bool SpecialShift32 = Opc == PPC::SLW || Opc == PPC::SLW_rec ||5007Opc == PPC::SRW || Opc == PPC::SRW_rec ||5008Opc == PPC::SLW8 || Opc == PPC::SLW8_rec ||5009Opc == PPC::SRW8 || Opc == PPC::SRW8_rec;5010bool SpecialShift64 = Opc == PPC::SLD || Opc == PPC::SLD_rec ||5011Opc == PPC::SRD || Opc == PPC::SRD_rec;5012bool SetCR = Opc == PPC::SLW_rec || Opc == PPC::SRW_rec ||5013Opc == PPC::SLD_rec || Opc == PPC::SRD_rec;5014bool RightShift = Opc == PPC::SRW || Opc == PPC::SRW_rec || Opc == PPC::SRD ||5015Opc == PPC::SRD_rec;50165017LLVM_DEBUG(dbgs() << "Replacing reg+reg instruction: ");5018LLVM_DEBUG(MI.dump());5019LLVM_DEBUG(dbgs() << "Fed by load-immediate: ");5020LLVM_DEBUG(DefMI.dump());5021MI.setDesc(get(III.ImmOpcode));5022if (ConstantOpNo == III.OpNoForForwarding) {5023// Converting shifts to immediate form is a bit tricky since they may do5024// one of three things:5025// 1. If the shift amount is between OpSize and 2*OpSize, the result is zero5026// 2. If the shift amount is zero, the result is unchanged (save for maybe5027// setting CR0)5028// 3. If the shift amount is in [1, OpSize), it's just a shift5029if (SpecialShift32 || SpecialShift64) {5030LoadImmediateInfo LII;5031LII.Imm = 0;5032LII.SetCR = SetCR;5033LII.Is64Bit = SpecialShift64;5034uint64_t ShAmt = Imm & (SpecialShift32 ? 0x1F : 0x3F);5035if (Imm & (SpecialShift32 ? 0x20 : 0x40))5036replaceInstrWithLI(MI, LII);5037// Shifts by zero don't change the value. If we don't need to set CR0,5038// just convert this to a COPY. Can't do this post-RA since we've already5039// cleaned up the copies.5040else if (!SetCR && ShAmt == 0 && !PostRA) {5041MI.removeOperand(2);5042MI.setDesc(get(PPC::COPY));5043} else {5044// The 32 bit and 64 bit instructions are quite different.5045if (SpecialShift32) {5046// Left shifts use (N, 0, 31-N).5047// Right shifts use (32-N, N, 31) if 0 < N < 32.5048// use (0, 0, 31) if N == 0.5049uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 32 - ShAmt : ShAmt;5050uint64_t MB = RightShift ? ShAmt : 0;5051uint64_t ME = RightShift ? 31 : 31 - ShAmt;5052replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);5053MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(MB)5054.addImm(ME);5055} else {5056// Left shifts use (N, 63-N).5057// Right shifts use (64-N, N) if 0 < N < 64.5058// use (0, 0) if N == 0.5059uint64_t SH = ShAmt == 0 ? 0 : RightShift ? 64 - ShAmt : ShAmt;5060uint64_t ME = RightShift ? ShAmt : 63 - ShAmt;5061replaceInstrOperandWithImm(MI, III.OpNoForForwarding, SH);5062MachineInstrBuilder(*MI.getParent()->getParent(), MI).addImm(ME);5063}5064}5065} else5066replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);5067}5068// Convert commutative instructions (switch the operands and convert the5069// desired one to an immediate.5070else if (III.IsCommutative) {5071replaceInstrOperandWithImm(MI, ConstantOpNo, Imm);5072swapMIOperands(MI, ConstantOpNo, III.OpNoForForwarding);5073} else5074llvm_unreachable("Should have exited early!");50755076// For instructions for which the constant register replaces a different5077// operand than where the immediate goes, we need to swap them.5078if (III.OpNoForForwarding != III.ImmOpNo)5079swapMIOperands(MI, III.OpNoForForwarding, III.ImmOpNo);50805081// If the special R0/X0 register index are different for original instruction5082// and new instruction, we need to fix up the register class in new5083// instruction.5084if (!PostRA && III.ZeroIsSpecialOrig != III.ZeroIsSpecialNew) {5085if (III.ZeroIsSpecialNew) {5086// If operand at III.ZeroIsSpecialNew is physical reg(eg: ZERO/ZERO8), no5087// need to fix up register class.5088Register RegToModify = MI.getOperand(III.ZeroIsSpecialNew).getReg();5089if (RegToModify.isVirtual()) {5090const TargetRegisterClass *NewRC =5091MRI.getRegClass(RegToModify)->hasSuperClassEq(&PPC::GPRCRegClass) ?5092&PPC::GPRC_and_GPRC_NOR0RegClass : &PPC::G8RC_and_G8RC_NOX0RegClass;5093MRI.setRegClass(RegToModify, NewRC);5094}5095}5096}50975098if (PostRA)5099recomputeLivenessFlags(*MI.getParent());51005101LLVM_DEBUG(dbgs() << "With: ");5102LLVM_DEBUG(MI.dump());5103LLVM_DEBUG(dbgs() << "\n");5104return true;5105}51065107const TargetRegisterClass *5108PPCInstrInfo::updatedRC(const TargetRegisterClass *RC) const {5109if (Subtarget.hasVSX() && RC == &PPC::VRRCRegClass)5110return &PPC::VSRCRegClass;5111return RC;5112}51135114int PPCInstrInfo::getRecordFormOpcode(unsigned Opcode) {5115return PPC::getRecordFormOpcode(Opcode);5116}51175118static bool isOpZeroOfSubwordPreincLoad(int Opcode) {5119return (Opcode == PPC::LBZU || Opcode == PPC::LBZUX || Opcode == PPC::LBZU8 ||5120Opcode == PPC::LBZUX8 || Opcode == PPC::LHZU ||5121Opcode == PPC::LHZUX || Opcode == PPC::LHZU8 ||5122Opcode == PPC::LHZUX8);5123}51245125// This function checks for sign extension from 32 bits to 64 bits.5126static bool definedBySignExtendingOp(const unsigned Reg,5127const MachineRegisterInfo *MRI) {5128if (!Register::isVirtualRegister(Reg))5129return false;51305131MachineInstr *MI = MRI->getVRegDef(Reg);5132if (!MI)5133return false;51345135int Opcode = MI->getOpcode();5136const PPCInstrInfo *TII =5137MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();5138if (TII->isSExt32To64(Opcode))5139return true;51405141// The first def of LBZU/LHZU is sign extended.5142if (isOpZeroOfSubwordPreincLoad(Opcode) && MI->getOperand(0).getReg() == Reg)5143return true;51445145// RLDICL generates sign-extended output if it clears at least5146// 33 bits from the left (MSB).5147if (Opcode == PPC::RLDICL && MI->getOperand(3).getImm() >= 33)5148return true;51495150// If at least one bit from left in a lower word is masked out,5151// all of 0 to 32-th bits of the output are cleared.5152// Hence the output is already sign extended.5153if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||5154Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec) &&5155MI->getOperand(3).getImm() > 0 &&5156MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())5157return true;51585159// If the most significant bit of immediate in ANDIS is zero,5160// all of 0 to 32-th bits are cleared.5161if (Opcode == PPC::ANDIS_rec || Opcode == PPC::ANDIS8_rec) {5162uint16_t Imm = MI->getOperand(2).getImm();5163if ((Imm & 0x8000) == 0)5164return true;5165}51665167return false;5168}51695170// This function checks the machine instruction that defines the input register5171// Reg. If that machine instruction always outputs a value that has only zeros5172// in the higher 32 bits then this function will return true.5173static bool definedByZeroExtendingOp(const unsigned Reg,5174const MachineRegisterInfo *MRI) {5175if (!Register::isVirtualRegister(Reg))5176return false;51775178MachineInstr *MI = MRI->getVRegDef(Reg);5179if (!MI)5180return false;51815182int Opcode = MI->getOpcode();5183const PPCInstrInfo *TII =5184MI->getMF()->getSubtarget<PPCSubtarget>().getInstrInfo();5185if (TII->isZExt32To64(Opcode))5186return true;51875188// The first def of LBZU/LHZU/LWZU are zero extended.5189if ((isOpZeroOfSubwordPreincLoad(Opcode) || Opcode == PPC::LWZU ||5190Opcode == PPC::LWZUX || Opcode == PPC::LWZU8 || Opcode == PPC::LWZUX8) &&5191MI->getOperand(0).getReg() == Reg)5192return true;51935194// The 16-bit immediate is sign-extended in li/lis.5195// If the most significant bit is zero, all higher bits are zero.5196if (Opcode == PPC::LI || Opcode == PPC::LI8 ||5197Opcode == PPC::LIS || Opcode == PPC::LIS8) {5198int64_t Imm = MI->getOperand(1).getImm();5199if (((uint64_t)Imm & ~0x7FFFuLL) == 0)5200return true;5201}52025203// We have some variations of rotate-and-mask instructions5204// that clear higher 32-bits.5205if ((Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||5206Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec ||5207Opcode == PPC::RLDICL_32_64) &&5208MI->getOperand(3).getImm() >= 32)5209return true;52105211if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&5212MI->getOperand(3).getImm() >= 32 &&5213MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())5214return true;52155216if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||5217Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||5218Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&5219MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())5220return true;52215222return false;5223}52245225// This function returns true if the input MachineInstr is a TOC save5226// instruction.5227bool PPCInstrInfo::isTOCSaveMI(const MachineInstr &MI) const {5228if (!MI.getOperand(1).isImm() || !MI.getOperand(2).isReg())5229return false;5230unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();5231unsigned StackOffset = MI.getOperand(1).getImm();5232Register StackReg = MI.getOperand(2).getReg();5233Register SPReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;5234if (StackReg == SPReg && StackOffset == TOCSaveOffset)5235return true;52365237return false;5238}52395240// We limit the max depth to track incoming values of PHIs or binary ops5241// (e.g. AND) to avoid excessive cost.5242const unsigned MAX_BINOP_DEPTH = 1;5243// The isSignOrZeroExtended function is recursive. The parameter BinOpDepth5244// does not count all of the recursions. The parameter BinOpDepth is incremented5245// only when isSignOrZeroExtended calls itself more than once. This is done to5246// prevent expontential recursion. There is no parameter to track linear5247// recursion.5248std::pair<bool, bool>5249PPCInstrInfo::isSignOrZeroExtended(const unsigned Reg,5250const unsigned BinOpDepth,5251const MachineRegisterInfo *MRI) const {5252if (!Register::isVirtualRegister(Reg))5253return std::pair<bool, bool>(false, false);52545255MachineInstr *MI = MRI->getVRegDef(Reg);5256if (!MI)5257return std::pair<bool, bool>(false, false);52585259bool IsSExt = definedBySignExtendingOp(Reg, MRI);5260bool IsZExt = definedByZeroExtendingOp(Reg, MRI);52615262// If we know the instruction always returns sign- and zero-extended result,5263// return here.5264if (IsSExt && IsZExt)5265return std::pair<bool, bool>(IsSExt, IsZExt);52665267switch (MI->getOpcode()) {5268case PPC::COPY: {5269Register SrcReg = MI->getOperand(1).getReg();52705271// In both ELFv1 and v2 ABI, method parameters and the return value5272// are sign- or zero-extended.5273const MachineFunction *MF = MI->getMF();52745275if (!MF->getSubtarget<PPCSubtarget>().isSVR4ABI()) {5276// If this is a copy from another register, we recursively check source.5277auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);5278return std::pair<bool, bool>(SrcExt.first || IsSExt,5279SrcExt.second || IsZExt);5280}52815282// From here on everything is SVR4ABI5283const PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();5284// We check the ZExt/SExt flags for a method parameter.5285if (MI->getParent()->getBasicBlock() ==5286&MF->getFunction().getEntryBlock()) {5287Register VReg = MI->getOperand(0).getReg();5288if (MF->getRegInfo().isLiveIn(VReg)) {5289IsSExt |= FuncInfo->isLiveInSExt(VReg);5290IsZExt |= FuncInfo->isLiveInZExt(VReg);5291return std::pair<bool, bool>(IsSExt, IsZExt);5292}5293}52945295if (SrcReg != PPC::X3) {5296// If this is a copy from another register, we recursively check source.5297auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);5298return std::pair<bool, bool>(SrcExt.first || IsSExt,5299SrcExt.second || IsZExt);5300}53015302// For a method return value, we check the ZExt/SExt flags in attribute.5303// We assume the following code sequence for method call.5304// ADJCALLSTACKDOWN 32, implicit dead %r1, implicit %r15305// BL8_NOP @func,...5306// ADJCALLSTACKUP 32, 0, implicit dead %r1, implicit %r15307// %5 = COPY %x3; G8RC:%55308const MachineBasicBlock *MBB = MI->getParent();5309std::pair<bool, bool> IsExtendPair = std::pair<bool, bool>(IsSExt, IsZExt);5310MachineBasicBlock::const_instr_iterator II =5311MachineBasicBlock::const_instr_iterator(MI);5312if (II == MBB->instr_begin() || (--II)->getOpcode() != PPC::ADJCALLSTACKUP)5313return IsExtendPair;53145315const MachineInstr &CallMI = *(--II);5316if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())5317return IsExtendPair;53185319const Function *CalleeFn =5320dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());5321if (!CalleeFn)5322return IsExtendPair;5323const IntegerType *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());5324if (IntTy && IntTy->getBitWidth() <= 32) {5325const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();5326IsSExt |= Attrs.hasAttribute(Attribute::SExt);5327IsZExt |= Attrs.hasAttribute(Attribute::ZExt);5328return std::pair<bool, bool>(IsSExt, IsZExt);5329}53305331return IsExtendPair;5332}53335334// OR, XOR with 16-bit immediate does not change the upper 48 bits.5335// So, we track the operand register as we do for register copy.5336case PPC::ORI:5337case PPC::XORI:5338case PPC::ORI8:5339case PPC::XORI8: {5340Register SrcReg = MI->getOperand(1).getReg();5341auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);5342return std::pair<bool, bool>(SrcExt.first || IsSExt,5343SrcExt.second || IsZExt);5344}53455346// OR, XOR with shifted 16-bit immediate does not change the upper5347// 32 bits. So, we track the operand register for zero extension.5348// For sign extension when the MSB of the immediate is zero, we also5349// track the operand register since the upper 33 bits are unchanged.5350case PPC::ORIS:5351case PPC::XORIS:5352case PPC::ORIS8:5353case PPC::XORIS8: {5354Register SrcReg = MI->getOperand(1).getReg();5355auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth, MRI);5356uint16_t Imm = MI->getOperand(2).getImm();5357if (Imm & 0x8000)5358return std::pair<bool, bool>(false, SrcExt.second || IsZExt);5359else5360return std::pair<bool, bool>(SrcExt.first || IsSExt,5361SrcExt.second || IsZExt);5362}53635364// If all incoming values are sign-/zero-extended,5365// the output of OR, ISEL or PHI is also sign-/zero-extended.5366case PPC::OR:5367case PPC::OR8:5368case PPC::ISEL:5369case PPC::PHI: {5370if (BinOpDepth >= MAX_BINOP_DEPTH)5371return std::pair<bool, bool>(false, false);53725373// The input registers for PHI are operand 1, 3, ...5374// The input registers for others are operand 1 and 2.5375unsigned OperandEnd = 3, OperandStride = 1;5376if (MI->getOpcode() == PPC::PHI) {5377OperandEnd = MI->getNumOperands();5378OperandStride = 2;5379}53805381IsSExt = true;5382IsZExt = true;5383for (unsigned I = 1; I != OperandEnd; I += OperandStride) {5384if (!MI->getOperand(I).isReg())5385return std::pair<bool, bool>(false, false);53865387Register SrcReg = MI->getOperand(I).getReg();5388auto SrcExt = isSignOrZeroExtended(SrcReg, BinOpDepth + 1, MRI);5389IsSExt &= SrcExt.first;5390IsZExt &= SrcExt.second;5391}5392return std::pair<bool, bool>(IsSExt, IsZExt);5393}53945395// If at least one of the incoming values of an AND is zero extended5396// then the output is also zero-extended. If both of the incoming values5397// are sign-extended then the output is also sign extended.5398case PPC::AND:5399case PPC::AND8: {5400if (BinOpDepth >= MAX_BINOP_DEPTH)5401return std::pair<bool, bool>(false, false);54025403Register SrcReg1 = MI->getOperand(1).getReg();5404Register SrcReg2 = MI->getOperand(2).getReg();5405auto Src1Ext = isSignOrZeroExtended(SrcReg1, BinOpDepth + 1, MRI);5406auto Src2Ext = isSignOrZeroExtended(SrcReg2, BinOpDepth + 1, MRI);5407return std::pair<bool, bool>(Src1Ext.first && Src2Ext.first,5408Src1Ext.second || Src2Ext.second);5409}54105411default:5412break;5413}5414return std::pair<bool, bool>(IsSExt, IsZExt);5415}54165417bool PPCInstrInfo::isBDNZ(unsigned Opcode) const {5418return (Opcode == (Subtarget.isPPC64() ? PPC::BDNZ8 : PPC::BDNZ));5419}54205421namespace {5422class PPCPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {5423MachineInstr *Loop, *EndLoop, *LoopCount;5424MachineFunction *MF;5425const TargetInstrInfo *TII;5426int64_t TripCount;54275428public:5429PPCPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,5430MachineInstr *LoopCount)5431: Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),5432MF(Loop->getParent()->getParent()),5433TII(MF->getSubtarget().getInstrInfo()) {5434// Inspect the Loop instruction up-front, as it may be deleted when we call5435// createTripCountGreaterCondition.5436if (LoopCount->getOpcode() == PPC::LI8 || LoopCount->getOpcode() == PPC::LI)5437TripCount = LoopCount->getOperand(1).getImm();5438else5439TripCount = -1;5440}54415442bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {5443// Only ignore the terminator.5444return MI == EndLoop;5445}54465447std::optional<bool> createTripCountGreaterCondition(5448int TC, MachineBasicBlock &MBB,5449SmallVectorImpl<MachineOperand> &Cond) override {5450if (TripCount == -1) {5451// Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,5452// so we don't need to generate any thing here.5453Cond.push_back(MachineOperand::CreateImm(0));5454Cond.push_back(MachineOperand::CreateReg(5455MF->getSubtarget<PPCSubtarget>().isPPC64() ? PPC::CTR8 : PPC::CTR,5456true));5457return {};5458}54595460return TripCount > TC;5461}54625463void setPreheader(MachineBasicBlock *NewPreheader) override {5464// Do nothing. We want the LOOP setup instruction to stay in the *old*5465// preheader, so we can use BDZ in the prologs to adapt the loop trip count.5466}54675468void adjustTripCount(int TripCountAdjust) override {5469// If the loop trip count is a compile-time value, then just change the5470// value.5471if (LoopCount->getOpcode() == PPC::LI8 ||5472LoopCount->getOpcode() == PPC::LI) {5473int64_t TripCount = LoopCount->getOperand(1).getImm() + TripCountAdjust;5474LoopCount->getOperand(1).setImm(TripCount);5475return;5476}54775478// Since BDZ/BDZ8 that we will insert will also decrease the ctr by 1,5479// so we don't need to generate any thing here.5480}54815482void disposed() override {5483Loop->eraseFromParent();5484// Ensure the loop setup instruction is deleted too.5485LoopCount->eraseFromParent();5486}5487};5488} // namespace54895490std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>5491PPCInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {5492// We really "analyze" only hardware loops right now.5493MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();5494MachineBasicBlock *Preheader = *LoopBB->pred_begin();5495if (Preheader == LoopBB)5496Preheader = *std::next(LoopBB->pred_begin());5497MachineFunction *MF = Preheader->getParent();54985499if (I != LoopBB->end() && isBDNZ(I->getOpcode())) {5500SmallPtrSet<MachineBasicBlock *, 8> Visited;5501if (MachineInstr *LoopInst = findLoopInstr(*Preheader, Visited)) {5502Register LoopCountReg = LoopInst->getOperand(0).getReg();5503MachineRegisterInfo &MRI = MF->getRegInfo();5504MachineInstr *LoopCount = MRI.getUniqueVRegDef(LoopCountReg);5505return std::make_unique<PPCPipelinerLoopInfo>(LoopInst, &*I, LoopCount);5506}5507}5508return nullptr;5509}55105511MachineInstr *PPCInstrInfo::findLoopInstr(5512MachineBasicBlock &PreHeader,5513SmallPtrSet<MachineBasicBlock *, 8> &Visited) const {55145515unsigned LOOPi = (Subtarget.isPPC64() ? PPC::MTCTR8loop : PPC::MTCTRloop);55165517// The loop set-up instruction should be in preheader5518for (auto &I : PreHeader.instrs())5519if (I.getOpcode() == LOOPi)5520return &I;5521return nullptr;5522}55235524// Return true if get the base operand, byte offset of an instruction and the5525// memory width. Width is the size of memory that is being loaded/stored.5526bool PPCInstrInfo::getMemOperandWithOffsetWidth(5527const MachineInstr &LdSt, const MachineOperand *&BaseReg, int64_t &Offset,5528LocationSize &Width, const TargetRegisterInfo *TRI) const {5529if (!LdSt.mayLoadOrStore() || LdSt.getNumExplicitOperands() != 3)5530return false;55315532// Handle only loads/stores with base register followed by immediate offset.5533if (!LdSt.getOperand(1).isImm() ||5534(!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))5535return false;5536if (!LdSt.getOperand(1).isImm() ||5537(!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()))5538return false;55395540if (!LdSt.hasOneMemOperand())5541return false;55425543Width = (*LdSt.memoperands_begin())->getSize();5544Offset = LdSt.getOperand(1).getImm();5545BaseReg = &LdSt.getOperand(2);5546return true;5547}55485549bool PPCInstrInfo::areMemAccessesTriviallyDisjoint(5550const MachineInstr &MIa, const MachineInstr &MIb) const {5551assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");5552assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");55535554if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||5555MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())5556return false;55575558// Retrieve the base register, offset from the base register and width. Width5559// is the size of memory that is being loaded/stored (e.g. 1, 2, 4). If5560// base registers are identical, and the offset of a lower memory access +5561// the width doesn't overlap the offset of a higher memory access,5562// then the memory accesses are different.5563const TargetRegisterInfo *TRI = &getRegisterInfo();5564const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;5565int64_t OffsetA = 0, OffsetB = 0;5566LocationSize WidthA = 0, WidthB = 0;5567if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, WidthA, TRI) &&5568getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, WidthB, TRI)) {5569if (BaseOpA->isIdenticalTo(*BaseOpB)) {5570int LowOffset = std::min(OffsetA, OffsetB);5571int HighOffset = std::max(OffsetA, OffsetB);5572LocationSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;5573if (LowWidth.hasValue() &&5574LowOffset + (int)LowWidth.getValue() <= HighOffset)5575return true;5576}5577}5578return false;5579}558055815582