Path: blob/main/contrib/llvm-project/llvm/lib/Target/BPF/BPFMIPeephole.cpp
35266 views
//===-------------- BPFMIPeephole.cpp - MI Peephole Cleanups -------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This pass performs peephole optimizations to cleanup ugly code sequences at9// MachineInstruction layer.10//11// Currently, there are two optimizations implemented:12// - One pre-RA MachineSSA pass to eliminate type promotion sequences, those13// zero extend 32-bit subregisters to 64-bit registers, if the compiler14// could prove the subregisters is defined by 32-bit operations in which15// case the upper half of the underlying 64-bit registers were zeroed16// implicitly.17//18// - One post-RA PreEmit pass to do final cleanup on some redundant19// instructions generated due to bad RA on subregister.20//===----------------------------------------------------------------------===//2122#include "BPF.h"23#include "BPFInstrInfo.h"24#include "BPFTargetMachine.h"25#include "llvm/ADT/Statistic.h"26#include "llvm/CodeGen/MachineFunctionPass.h"27#include "llvm/CodeGen/MachineInstrBuilder.h"28#include "llvm/CodeGen/MachineRegisterInfo.h"29#include "llvm/Support/Debug.h"30#include <set>3132using namespace llvm;3334#define DEBUG_TYPE "bpf-mi-zext-elim"3536static cl::opt<int> GotolAbsLowBound("gotol-abs-low-bound", cl::Hidden,37cl::init(INT16_MAX >> 1), cl::desc("Specify gotol lower bound"));3839STATISTIC(ZExtElemNum, "Number of zero extension shifts eliminated");4041namespace {4243struct BPFMIPeephole : public MachineFunctionPass {4445static char ID;46const BPFInstrInfo *TII;47MachineFunction *MF;48MachineRegisterInfo *MRI;4950BPFMIPeephole() : MachineFunctionPass(ID) {51initializeBPFMIPeepholePass(*PassRegistry::getPassRegistry());52}5354private:55// Initialize class variables.56void initialize(MachineFunction &MFParm);5758bool isCopyFrom32Def(MachineInstr *CopyMI);59bool isInsnFrom32Def(MachineInstr *DefInsn);60bool isPhiFrom32Def(MachineInstr *MovMI);61bool isMovFrom32Def(MachineInstr *MovMI);62bool eliminateZExtSeq();63bool eliminateZExt();6465std::set<MachineInstr *> PhiInsns;6667public:6869// Main entry point for this pass.70bool runOnMachineFunction(MachineFunction &MF) override {71if (skipFunction(MF.getFunction()))72return false;7374initialize(MF);7576// First try to eliminate (zext, lshift, rshift) and then77// try to eliminate zext.78bool ZExtSeqExist, ZExtExist;79ZExtSeqExist = eliminateZExtSeq();80ZExtExist = eliminateZExt();81return ZExtSeqExist || ZExtExist;82}83};8485// Initialize class variables.86void BPFMIPeephole::initialize(MachineFunction &MFParm) {87MF = &MFParm;88MRI = &MF->getRegInfo();89TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();90LLVM_DEBUG(dbgs() << "*** BPF MachineSSA ZEXT Elim peephole pass ***\n\n");91}9293bool BPFMIPeephole::isCopyFrom32Def(MachineInstr *CopyMI)94{95MachineOperand &opnd = CopyMI->getOperand(1);9697if (!opnd.isReg())98return false;99100// Return false if getting value from a 32bit physical register.101// Most likely, this physical register is aliased to102// function call return value or current function parameters.103Register Reg = opnd.getReg();104if (!Reg.isVirtual())105return false;106107if (MRI->getRegClass(Reg) == &BPF::GPRRegClass)108return false;109110MachineInstr *DefInsn = MRI->getVRegDef(Reg);111if (!isInsnFrom32Def(DefInsn))112return false;113114return true;115}116117bool BPFMIPeephole::isPhiFrom32Def(MachineInstr *PhiMI)118{119for (unsigned i = 1, e = PhiMI->getNumOperands(); i < e; i += 2) {120MachineOperand &opnd = PhiMI->getOperand(i);121122if (!opnd.isReg())123return false;124125MachineInstr *PhiDef = MRI->getVRegDef(opnd.getReg());126if (!PhiDef)127return false;128if (PhiDef->isPHI()) {129if (!PhiInsns.insert(PhiDef).second)130return false;131if (!isPhiFrom32Def(PhiDef))132return false;133}134if (PhiDef->getOpcode() == BPF::COPY && !isCopyFrom32Def(PhiDef))135return false;136}137138return true;139}140141// The \p DefInsn instruction defines a virtual register.142bool BPFMIPeephole::isInsnFrom32Def(MachineInstr *DefInsn)143{144if (!DefInsn)145return false;146147if (DefInsn->isPHI()) {148if (!PhiInsns.insert(DefInsn).second)149return false;150if (!isPhiFrom32Def(DefInsn))151return false;152} else if (DefInsn->getOpcode() == BPF::COPY) {153if (!isCopyFrom32Def(DefInsn))154return false;155}156157return true;158}159160bool BPFMIPeephole::isMovFrom32Def(MachineInstr *MovMI)161{162MachineInstr *DefInsn = MRI->getVRegDef(MovMI->getOperand(1).getReg());163164LLVM_DEBUG(dbgs() << " Def of Mov Src:");165LLVM_DEBUG(DefInsn->dump());166167PhiInsns.clear();168if (!isInsnFrom32Def(DefInsn))169return false;170171LLVM_DEBUG(dbgs() << " One ZExt elim sequence identified.\n");172173return true;174}175176bool BPFMIPeephole::eliminateZExtSeq() {177MachineInstr* ToErase = nullptr;178bool Eliminated = false;179180for (MachineBasicBlock &MBB : *MF) {181for (MachineInstr &MI : MBB) {182// If the previous instruction was marked for elimination, remove it now.183if (ToErase) {184ToErase->eraseFromParent();185ToErase = nullptr;186}187188// Eliminate the 32-bit to 64-bit zero extension sequence when possible.189//190// MOV_32_64 rB, wA191// SLL_ri rB, rB, 32192// SRL_ri rB, rB, 32193if (MI.getOpcode() == BPF::SRL_ri &&194MI.getOperand(2).getImm() == 32) {195Register DstReg = MI.getOperand(0).getReg();196Register ShfReg = MI.getOperand(1).getReg();197MachineInstr *SllMI = MRI->getVRegDef(ShfReg);198199LLVM_DEBUG(dbgs() << "Starting SRL found:");200LLVM_DEBUG(MI.dump());201202if (!SllMI ||203SllMI->isPHI() ||204SllMI->getOpcode() != BPF::SLL_ri ||205SllMI->getOperand(2).getImm() != 32)206continue;207208LLVM_DEBUG(dbgs() << " SLL found:");209LLVM_DEBUG(SllMI->dump());210211MachineInstr *MovMI = MRI->getVRegDef(SllMI->getOperand(1).getReg());212if (!MovMI ||213MovMI->isPHI() ||214MovMI->getOpcode() != BPF::MOV_32_64)215continue;216217LLVM_DEBUG(dbgs() << " Type cast Mov found:");218LLVM_DEBUG(MovMI->dump());219220Register SubReg = MovMI->getOperand(1).getReg();221if (!isMovFrom32Def(MovMI)) {222LLVM_DEBUG(dbgs()223<< " One ZExt elim sequence failed qualifying elim.\n");224continue;225}226227BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), DstReg)228.addImm(0).addReg(SubReg).addImm(BPF::sub_32);229230SllMI->eraseFromParent();231MovMI->eraseFromParent();232// MI is the right shift, we can't erase it in it's own iteration.233// Mark it to ToErase, and erase in the next iteration.234ToErase = &MI;235ZExtElemNum++;236Eliminated = true;237}238}239}240241return Eliminated;242}243244bool BPFMIPeephole::eliminateZExt() {245MachineInstr* ToErase = nullptr;246bool Eliminated = false;247248for (MachineBasicBlock &MBB : *MF) {249for (MachineInstr &MI : MBB) {250// If the previous instruction was marked for elimination, remove it now.251if (ToErase) {252ToErase->eraseFromParent();253ToErase = nullptr;254}255256if (MI.getOpcode() != BPF::MOV_32_64)257continue;258259// Eliminate MOV_32_64 if possible.260// MOV_32_64 rA, wB261//262// If wB has been zero extended, replace it with a SUBREG_TO_REG.263// This is to workaround BPF programs where pkt->{data, data_end}264// is encoded as u32, but actually the verifier populates them265// as 64bit pointer. The MOV_32_64 will zero out the top 32 bits.266LLVM_DEBUG(dbgs() << "Candidate MOV_32_64 instruction:");267LLVM_DEBUG(MI.dump());268269if (!isMovFrom32Def(&MI))270continue;271272LLVM_DEBUG(dbgs() << "Removing the MOV_32_64 instruction\n");273274Register dst = MI.getOperand(0).getReg();275Register src = MI.getOperand(1).getReg();276277// Build a SUBREG_TO_REG instruction.278BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(BPF::SUBREG_TO_REG), dst)279.addImm(0).addReg(src).addImm(BPF::sub_32);280281ToErase = &MI;282Eliminated = true;283}284}285286return Eliminated;287}288289} // end default namespace290291INITIALIZE_PASS(BPFMIPeephole, DEBUG_TYPE,292"BPF MachineSSA Peephole Optimization For ZEXT Eliminate",293false, false)294295char BPFMIPeephole::ID = 0;296FunctionPass* llvm::createBPFMIPeepholePass() { return new BPFMIPeephole(); }297298STATISTIC(RedundantMovElemNum, "Number of redundant moves eliminated");299300namespace {301302struct BPFMIPreEmitPeephole : public MachineFunctionPass {303304static char ID;305MachineFunction *MF;306const TargetRegisterInfo *TRI;307const BPFInstrInfo *TII;308bool SupportGotol;309310BPFMIPreEmitPeephole() : MachineFunctionPass(ID) {311initializeBPFMIPreEmitPeepholePass(*PassRegistry::getPassRegistry());312}313314private:315// Initialize class variables.316void initialize(MachineFunction &MFParm);317318bool in16BitRange(int Num);319bool eliminateRedundantMov();320bool adjustBranch();321322public:323324// Main entry point for this pass.325bool runOnMachineFunction(MachineFunction &MF) override {326if (skipFunction(MF.getFunction()))327return false;328329initialize(MF);330331bool Changed;332Changed = eliminateRedundantMov();333if (SupportGotol)334Changed = adjustBranch() || Changed;335return Changed;336}337};338339// Initialize class variables.340void BPFMIPreEmitPeephole::initialize(MachineFunction &MFParm) {341MF = &MFParm;342TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();343TRI = MF->getSubtarget<BPFSubtarget>().getRegisterInfo();344SupportGotol = MF->getSubtarget<BPFSubtarget>().hasGotol();345LLVM_DEBUG(dbgs() << "*** BPF PreEmit peephole pass ***\n\n");346}347348bool BPFMIPreEmitPeephole::eliminateRedundantMov() {349MachineInstr* ToErase = nullptr;350bool Eliminated = false;351352for (MachineBasicBlock &MBB : *MF) {353for (MachineInstr &MI : MBB) {354// If the previous instruction was marked for elimination, remove it now.355if (ToErase) {356LLVM_DEBUG(dbgs() << " Redundant Mov Eliminated:");357LLVM_DEBUG(ToErase->dump());358ToErase->eraseFromParent();359ToErase = nullptr;360}361362// Eliminate identical move:363//364// MOV rA, rA365//366// Note that we cannot remove367// MOV_32_64 rA, wA368// MOV_rr_32 wA, wA369// as these two instructions having side effects, zeroing out370// top 32 bits of rA.371unsigned Opcode = MI.getOpcode();372if (Opcode == BPF::MOV_rr) {373Register dst = MI.getOperand(0).getReg();374Register src = MI.getOperand(1).getReg();375376if (dst != src)377continue;378379ToErase = &MI;380RedundantMovElemNum++;381Eliminated = true;382}383}384}385386return Eliminated;387}388389bool BPFMIPreEmitPeephole::in16BitRange(int Num) {390// Well, the cut-off is not precisely at 16bit range since391// new codes are added during the transformation. So let us392// a little bit conservative.393return Num >= -GotolAbsLowBound && Num <= GotolAbsLowBound;394}395396// Before cpu=v4, only 16bit branch target offset (-0x8000 to 0x7fff)397// is supported for both unconditional (JMP) and condition (JEQ, JSGT,398// etc.) branches. In certain cases, e.g., full unrolling, the branch399// target offset might exceed 16bit range. If this happens, the llvm400// will generate incorrect code as the offset is truncated to 16bit.401//402// To fix this rare case, a new insn JMPL is introduced. This new403// insn supports supports 32bit branch target offset. The compiler404// does not use this insn during insn selection. Rather, BPF backend405// will estimate the branch target offset and do JMP -> JMPL and406// JEQ -> JEQ + JMPL conversion if the estimated branch target offset407// is beyond 16bit.408bool BPFMIPreEmitPeephole::adjustBranch() {409bool Changed = false;410int CurrNumInsns = 0;411DenseMap<MachineBasicBlock *, int> SoFarNumInsns;412DenseMap<MachineBasicBlock *, MachineBasicBlock *> FollowThroughBB;413std::vector<MachineBasicBlock *> MBBs;414415MachineBasicBlock *PrevBB = nullptr;416for (MachineBasicBlock &MBB : *MF) {417// MBB.size() is the number of insns in this basic block, including some418// debug info, e.g., DEBUG_VALUE, so we may over-count a little bit.419// Typically we have way more normal insns than DEBUG_VALUE insns.420// Also, if we indeed need to convert conditional branch like JEQ to421// JEQ + JMPL, we actually introduced some new insns like below.422CurrNumInsns += (int)MBB.size();423SoFarNumInsns[&MBB] = CurrNumInsns;424if (PrevBB != nullptr)425FollowThroughBB[PrevBB] = &MBB;426PrevBB = &MBB;427// A list of original BBs to make later traveral easier.428MBBs.push_back(&MBB);429}430FollowThroughBB[PrevBB] = nullptr;431432for (unsigned i = 0; i < MBBs.size(); i++) {433// We have four cases here:434// (1). no terminator, simple follow through.435// (2). jmp to another bb.436// (3). conditional jmp to another bb or follow through.437// (4). conditional jmp followed by an unconditional jmp.438MachineInstr *CondJmp = nullptr, *UncondJmp = nullptr;439440MachineBasicBlock *MBB = MBBs[i];441for (MachineInstr &Term : MBB->terminators()) {442if (Term.isConditionalBranch()) {443assert(CondJmp == nullptr);444CondJmp = &Term;445} else if (Term.isUnconditionalBranch()) {446assert(UncondJmp == nullptr);447UncondJmp = &Term;448}449}450451// (1). no terminator, simple follow through.452if (!CondJmp && !UncondJmp)453continue;454455MachineBasicBlock *CondTargetBB, *JmpBB;456CurrNumInsns = SoFarNumInsns[MBB];457458// (2). jmp to another bb.459if (!CondJmp && UncondJmp) {460JmpBB = UncondJmp->getOperand(0).getMBB();461if (in16BitRange(SoFarNumInsns[JmpBB] - JmpBB->size() - CurrNumInsns))462continue;463464// replace this insn as a JMPL.465BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);466UncondJmp->eraseFromParent();467Changed = true;468continue;469}470471const BasicBlock *TermBB = MBB->getBasicBlock();472int Dist;473474// (3). conditional jmp to another bb or follow through.475if (!UncondJmp) {476CondTargetBB = CondJmp->getOperand(2).getMBB();477MachineBasicBlock *FollowBB = FollowThroughBB[MBB];478Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;479if (in16BitRange(Dist))480continue;481482// We have483// B2: ...484// if (cond) goto B5485// B3: ...486// where B2 -> B5 is beyond 16bit range.487//488// We do not have 32bit cond jmp insn. So we try to do489// the following.490// B2: ...491// if (cond) goto New_B1492// New_B0 goto B3493// New_B1: gotol B5494// B3: ...495// Basically two new basic blocks are created.496MachineBasicBlock *New_B0 = MF->CreateMachineBasicBlock(TermBB);497MachineBasicBlock *New_B1 = MF->CreateMachineBasicBlock(TermBB);498499// Insert New_B0 and New_B1 into function block list.500MachineFunction::iterator MBB_I = ++MBB->getIterator();501MF->insert(MBB_I, New_B0);502MF->insert(MBB_I, New_B1);503504// replace B2 cond jump505if (CondJmp->getOperand(1).isReg())506BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))507.addReg(CondJmp->getOperand(0).getReg())508.addReg(CondJmp->getOperand(1).getReg())509.addMBB(New_B1);510else511BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))512.addReg(CondJmp->getOperand(0).getReg())513.addImm(CondJmp->getOperand(1).getImm())514.addMBB(New_B1);515516// it is possible that CondTargetBB and FollowBB are the same. But the517// above Dist checking should already filtered this case.518MBB->removeSuccessor(CondTargetBB);519MBB->removeSuccessor(FollowBB);520MBB->addSuccessor(New_B0);521MBB->addSuccessor(New_B1);522523// Populate insns in New_B0 and New_B1.524BuildMI(New_B0, CondJmp->getDebugLoc(), TII->get(BPF::JMP)).addMBB(FollowBB);525BuildMI(New_B1, CondJmp->getDebugLoc(), TII->get(BPF::JMPL))526.addMBB(CondTargetBB);527528New_B0->addSuccessor(FollowBB);529New_B1->addSuccessor(CondTargetBB);530CondJmp->eraseFromParent();531Changed = true;532continue;533}534535// (4). conditional jmp followed by an unconditional jmp.536CondTargetBB = CondJmp->getOperand(2).getMBB();537JmpBB = UncondJmp->getOperand(0).getMBB();538539// We have540// B2: ...541// if (cond) goto B5542// JMP B7543// B3: ...544//545// If only B2->B5 is out of 16bit range, we can do546// B2: ...547// if (cond) goto new_B548// JMP B7549// New_B: gotol B5550// B3: ...551//552// If only 'JMP B7' is out of 16bit range, we can replace553// 'JMP B7' with 'JMPL B7'.554//555// If both B2->B5 and 'JMP B7' is out of range, just do556// both the above transformations.557Dist = SoFarNumInsns[CondTargetBB] - CondTargetBB->size() - CurrNumInsns;558if (!in16BitRange(Dist)) {559MachineBasicBlock *New_B = MF->CreateMachineBasicBlock(TermBB);560561// Insert New_B0 into function block list.562MF->insert(++MBB->getIterator(), New_B);563564// replace B2 cond jump565if (CondJmp->getOperand(1).isReg())566BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))567.addReg(CondJmp->getOperand(0).getReg())568.addReg(CondJmp->getOperand(1).getReg())569.addMBB(New_B);570else571BuildMI(*MBB, MachineBasicBlock::iterator(*CondJmp), CondJmp->getDebugLoc(), TII->get(CondJmp->getOpcode()))572.addReg(CondJmp->getOperand(0).getReg())573.addImm(CondJmp->getOperand(1).getImm())574.addMBB(New_B);575576if (CondTargetBB != JmpBB)577MBB->removeSuccessor(CondTargetBB);578MBB->addSuccessor(New_B);579580// Populate insn in New_B.581BuildMI(New_B, CondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(CondTargetBB);582583New_B->addSuccessor(CondTargetBB);584CondJmp->eraseFromParent();585Changed = true;586}587588if (!in16BitRange(SoFarNumInsns[JmpBB] - CurrNumInsns)) {589BuildMI(MBB, UncondJmp->getDebugLoc(), TII->get(BPF::JMPL)).addMBB(JmpBB);590UncondJmp->eraseFromParent();591Changed = true;592}593}594595return Changed;596}597598} // end default namespace599600INITIALIZE_PASS(BPFMIPreEmitPeephole, "bpf-mi-pemit-peephole",601"BPF PreEmit Peephole Optimization", false, false)602603char BPFMIPreEmitPeephole::ID = 0;604FunctionPass* llvm::createBPFMIPreEmitPeepholePass()605{606return new BPFMIPreEmitPeephole();607}608609610