Path: blob/main/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
35266 views
//===-- PPCCTRLoops.cpp - Generate CTR loops ------------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This pass generates machine instructions for the CTR loops related pseudos:9// 1: MTCTRloop/DecreaseCTRloop10// 2: MTCTR8loop/DecreaseCTR8loop11//12// If a CTR loop can be generated:13// 1: MTCTRloop/MTCTR8loop will be converted to "mtctr"14// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "bdnz/bdz" and15// its user branch instruction can be deleted.16//17// If a CTR loop can not be generated due to clobber of CTR:18// 1: MTCTRloop/MTCTR8loop can be deleted.19// 2: DecreaseCTRloop/DecreaseCTR8loop will be converted to "addi -1" and20// a "cmplwi/cmpldi".21//22// This pass runs just before register allocation, because we don't want23// register allocator to allocate register for DecreaseCTRloop if a CTR can be24// generated or if a CTR loop can not be generated, we don't have any condition25// register for the new added "cmplwi/cmpldi".26//27//===----------------------------------------------------------------------===//2829#include "PPC.h"30#include "PPCInstrInfo.h"31#include "PPCSubtarget.h"32#include "llvm/ADT/Statistic.h"33#include "llvm/CodeGen/MachineBasicBlock.h"34#include "llvm/CodeGen/MachineFunction.h"35#include "llvm/CodeGen/MachineFunctionPass.h"36#include "llvm/CodeGen/MachineInstr.h"37#include "llvm/CodeGen/MachineLoopInfo.h"38#include "llvm/CodeGen/MachineOperand.h"39#include "llvm/CodeGen/MachineRegisterInfo.h"40#include "llvm/CodeGen/Register.h"41#include "llvm/InitializePasses.h"42#include "llvm/Pass.h"43#include "llvm/PassRegistry.h"44#include "llvm/Support/CodeGen.h"45#include "llvm/Support/Debug.h"46#include "llvm/Support/ErrorHandling.h"47#include <cassert>4849using namespace llvm;5051#define DEBUG_TYPE "ppc-ctrloops"5253STATISTIC(NumCTRLoops, "Number of CTR loops generated");54STATISTIC(NumNormalLoops, "Number of normal compare + branch loops generated");5556namespace {57class PPCCTRLoops : public MachineFunctionPass {58public:59static char ID;6061PPCCTRLoops() : MachineFunctionPass(ID) {62initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());63}6465void getAnalysisUsage(AnalysisUsage &AU) const override {66AU.addRequired<MachineLoopInfoWrapperPass>();67MachineFunctionPass::getAnalysisUsage(AU);68}6970bool runOnMachineFunction(MachineFunction &MF) override;7172private:73const PPCInstrInfo *TII = nullptr;74MachineRegisterInfo *MRI = nullptr;7576bool processLoop(MachineLoop *ML);77bool isCTRClobber(MachineInstr *MI, bool CheckReads) const;78void expandNormalLoops(MachineLoop *ML, MachineInstr *Start,79MachineInstr *Dec);80void expandCTRLoops(MachineLoop *ML, MachineInstr *Start, MachineInstr *Dec);81};82} // namespace8384char PPCCTRLoops::ID = 0;8586INITIALIZE_PASS_BEGIN(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",87false, false)88INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)89INITIALIZE_PASS_END(PPCCTRLoops, DEBUG_TYPE, "PowerPC CTR loops generation",90false, false)9192FunctionPass *llvm::createPPCCTRLoopsPass() { return new PPCCTRLoops(); }9394bool PPCCTRLoops::runOnMachineFunction(MachineFunction &MF) {95bool Changed = false;9697auto &MLI = getAnalysis<MachineLoopInfoWrapperPass>().getLI();98TII = static_cast<const PPCInstrInfo *>(MF.getSubtarget().getInstrInfo());99MRI = &MF.getRegInfo();100101for (auto *ML : MLI) {102if (ML->isOutermost())103Changed |= processLoop(ML);104}105106#ifndef NDEBUG107for (const MachineBasicBlock &BB : MF) {108for (const MachineInstr &I : BB)109assert((I.getOpcode() != PPC::DecreaseCTRloop &&110I.getOpcode() != PPC::DecreaseCTR8loop) &&111"CTR loop pseudo is not expanded!");112}113#endif114115return Changed;116}117118bool PPCCTRLoops::isCTRClobber(MachineInstr *MI, bool CheckReads) const {119if (!CheckReads) {120// If we are only checking for defs, that is we are going to find121// definitions before MTCTRloop, for this case:122// CTR defination inside the callee of a call instruction will not impact123// the defination of MTCTRloop, so we can use definesRegister() for the124// check, no need to check the regmask.125return MI->definesRegister(PPC::CTR, /*TRI=*/nullptr) ||126MI->definesRegister(PPC::CTR8, /*TRI=*/nullptr);127}128129if (MI->modifiesRegister(PPC::CTR, /*TRI=*/nullptr) ||130MI->modifiesRegister(PPC::CTR8, /*TRI=*/nullptr))131return true;132133if (MI->getDesc().isCall())134return true;135136// We define the CTR in the loop preheader, so if there is any CTR reader in137// the loop, we also can not use CTR loop form.138if (MI->readsRegister(PPC::CTR, /*TRI=*/nullptr) ||139MI->readsRegister(PPC::CTR8, /*TRI=*/nullptr))140return true;141142return false;143}144145bool PPCCTRLoops::processLoop(MachineLoop *ML) {146bool Changed = false;147148// Align with HardwareLoop pass, process inner loops first.149for (MachineLoop *I : *ML)150Changed |= processLoop(I);151152// If any inner loop is changed, outter loop must be without hardware loop153// intrinsics.154if (Changed)155return true;156157auto IsLoopStart = [](MachineInstr &MI) {158return MI.getOpcode() == PPC::MTCTRloop ||159MI.getOpcode() == PPC::MTCTR8loop;160};161162auto SearchForStart =163[&IsLoopStart](MachineBasicBlock *MBB) -> MachineInstr * {164for (auto &MI : *MBB) {165if (IsLoopStart(MI))166return &MI;167}168return nullptr;169};170171MachineInstr *Start = nullptr;172MachineInstr *Dec = nullptr;173bool InvalidCTRLoop = false;174175MachineBasicBlock *Preheader = ML->getLoopPreheader();176// If there is no preheader for this loop, there must be no MTCTRloop177// either.178if (!Preheader)179return false;180181Start = SearchForStart(Preheader);182// This is not a CTR loop candidate.183if (!Start)184return false;185186// If CTR is live to the preheader, we can not redefine the CTR register.187if (Preheader->isLiveIn(PPC::CTR) || Preheader->isLiveIn(PPC::CTR8))188InvalidCTRLoop = true;189190// Make sure there is also no CTR clobber in the block preheader between the191// begin and MTCTR.192for (MachineBasicBlock::reverse_instr_iterator I =193std::next(Start->getReverseIterator());194I != Preheader->instr_rend(); ++I)195// Only check the definitions of CTR. If there is non-dead definition for196// the CTR, we conservatively don't generate a CTR loop.197if (isCTRClobber(&*I, /* CheckReads */ false)) {198InvalidCTRLoop = true;199break;200}201202// Make sure there is also no CTR clobber/user in the block preheader between203// MTCTR and the end.204for (MachineBasicBlock::instr_iterator I = std::next(Start->getIterator());205I != Preheader->instr_end(); ++I)206if (isCTRClobber(&*I, /* CheckReads */ true)) {207InvalidCTRLoop = true;208break;209}210211// Find the CTR loop components and decide whether or not to fall back to a212// normal loop.213for (auto *MBB : reverse(ML->getBlocks())) {214for (auto &MI : *MBB) {215if (MI.getOpcode() == PPC::DecreaseCTRloop ||216MI.getOpcode() == PPC::DecreaseCTR8loop)217Dec = &MI;218else if (!InvalidCTRLoop)219// If any instruction clobber CTR, then we can not generate a CTR loop.220InvalidCTRLoop |= isCTRClobber(&MI, /* CheckReads */ true);221}222if (Dec && InvalidCTRLoop)223break;224}225226assert(Dec && "CTR loop is not complete!");227228if (InvalidCTRLoop) {229expandNormalLoops(ML, Start, Dec);230++NumNormalLoops;231}232else {233expandCTRLoops(ML, Start, Dec);234++NumCTRLoops;235}236return true;237}238239void PPCCTRLoops::expandNormalLoops(MachineLoop *ML, MachineInstr *Start,240MachineInstr *Dec) {241bool Is64Bit =242Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();243244MachineBasicBlock *Preheader = Start->getParent();245MachineBasicBlock *Exiting = Dec->getParent();246assert((Preheader && Exiting) &&247"Preheader and exiting should exist for CTR loop!");248249assert(Dec->getOperand(1).getImm() == 1 &&250"Loop decrement stride must be 1");251252unsigned ADDIOpcode = Is64Bit ? PPC::ADDI8 : PPC::ADDI;253unsigned CMPOpcode = Is64Bit ? PPC::CMPLDI : PPC::CMPLWI;254255Register PHIDef =256MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass257: &PPC::GPRC_and_GPRC_NOR0RegClass);258259Start->getParent()->getParent()->getProperties().reset(260MachineFunctionProperties::Property::NoPHIs);261262// Generate "PHI" in the header block.263auto PHIMIB = BuildMI(*ML->getHeader(), ML->getHeader()->getFirstNonPHI(),264DebugLoc(), TII->get(TargetOpcode::PHI), PHIDef);265PHIMIB.addReg(Start->getOperand(0).getReg()).addMBB(Preheader);266267Register ADDIDef =268MRI->createVirtualRegister(Is64Bit ? &PPC::G8RC_and_G8RC_NOX0RegClass269: &PPC::GPRC_and_GPRC_NOR0RegClass);270// Generate "addi -1" in the exiting block.271BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(ADDIOpcode), ADDIDef)272.addReg(PHIDef)273.addImm(-1);274275// Add other inputs for the PHI node.276if (ML->isLoopLatch(Exiting)) {277// There must be only two predecessors for the loop header, one is the278// Preheader and the other one is loop latch Exiting. In hardware loop279// insertion pass, the block containing DecreaseCTRloop must dominate all280// loop latches. So there must be only one latch.281assert(ML->getHeader()->pred_size() == 2 &&282"Loop header predecessor is not right!");283PHIMIB.addReg(ADDIDef).addMBB(Exiting);284} else {285// If the block containing DecreaseCTRloop is not a loop latch, we can use286// ADDIDef as the value for all other blocks for the PHI. In hardware loop287// insertion pass, the block containing DecreaseCTRloop must dominate all288// loop latches.289for (MachineBasicBlock *P : ML->getHeader()->predecessors()) {290if (ML->contains(P)) {291assert(ML->isLoopLatch(P) &&292"Loop's header in-loop predecessor is not loop latch!");293PHIMIB.addReg(ADDIDef).addMBB(P);294} else295assert(P == Preheader &&296"CTR loop should not be generated for irreducible loop!");297}298}299300// Generate the compare in the exiting block.301Register CMPDef = MRI->createVirtualRegister(&PPC::CRRCRegClass);302auto CMPMIB =303BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(CMPOpcode), CMPDef)304.addReg(ADDIDef)305.addImm(0);306307BuildMI(*Exiting, Dec, Dec->getDebugLoc(), TII->get(TargetOpcode::COPY),308Dec->getOperand(0).getReg())309.addReg(CMPMIB->getOperand(0).getReg(), 0, PPC::sub_gt);310311// Remove the pseudo instructions.312Start->eraseFromParent();313Dec->eraseFromParent();314}315316void PPCCTRLoops::expandCTRLoops(MachineLoop *ML, MachineInstr *Start,317MachineInstr *Dec) {318bool Is64Bit =319Start->getParent()->getParent()->getSubtarget<PPCSubtarget>().isPPC64();320321MachineBasicBlock *Preheader = Start->getParent();322MachineBasicBlock *Exiting = Dec->getParent();323324(void)Preheader;325assert((Preheader && Exiting) &&326"Preheader and exiting should exist for CTR loop!");327328assert(Dec->getOperand(1).getImm() == 1 && "Loop decrement must be 1!");329330unsigned BDNZOpcode = Is64Bit ? PPC::BDNZ8 : PPC::BDNZ;331unsigned BDZOpcode = Is64Bit ? PPC::BDZ8 : PPC::BDZ;332auto BrInstr = MRI->use_instr_begin(Dec->getOperand(0).getReg());333assert(MRI->hasOneUse(Dec->getOperand(0).getReg()) &&334"There should be only one user for loop decrement pseudo!");335336unsigned Opcode = 0;337switch (BrInstr->getOpcode()) {338case PPC::BC:339Opcode = BDNZOpcode;340(void) ML;341assert(ML->contains(BrInstr->getOperand(1).getMBB()) &&342"Invalid ctr loop!");343break;344case PPC::BCn:345Opcode = BDZOpcode;346assert(!ML->contains(BrInstr->getOperand(1).getMBB()) &&347"Invalid ctr loop!");348break;349default:350llvm_unreachable("Unhandled branch user for DecreaseCTRloop.");351}352353// Generate "bdnz/bdz" in the exiting block just before the terminator.354BuildMI(*Exiting, &*BrInstr, BrInstr->getDebugLoc(), TII->get(Opcode))355.addMBB(BrInstr->getOperand(1).getMBB());356357// Remove the pseudo instructions.358BrInstr->eraseFromParent();359Dec->eraseFromParent();360}361362363