Path: blob/main/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
35271 views
//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning9// of a MachineFunction.10//11// mov %SPL, %depot12// cvta.local %SP, %SPL13//14// Because Frame Index is a generic address and alloca can only return generic15// pointer, without this pass the instructions producing alloca'ed address will16// be based on %SP. NVPTXLowerAlloca tends to help replace store and load on17// this address with their .local versions, but this may introduce a lot of18// cvta.to.local instructions. Performance can be improved if we avoid casting19// address back and forth and directly calculate local address based on %SPL.20// This peephole pass optimizes these cases, for example21//22// It will transform the following pattern23// %0 = LEA_ADDRi64 %VRFrame64, 424// %1 = cvta_to_local_64 %025//26// into27// %1 = LEA_ADDRi64 %VRFrameLocal64, 428//29// %VRFrameLocal64 is the virtual register name of %SPL30//31//===----------------------------------------------------------------------===//3233#include "NVPTX.h"34#include "NVPTXRegisterInfo.h"35#include "NVPTXSubtarget.h"36#include "llvm/CodeGen/MachineFunctionPass.h"37#include "llvm/CodeGen/MachineInstrBuilder.h"38#include "llvm/CodeGen/MachineRegisterInfo.h"39#include "llvm/CodeGen/TargetInstrInfo.h"40#include "llvm/CodeGen/TargetRegisterInfo.h"4142using namespace llvm;4344#define DEBUG_TYPE "nvptx-peephole"4546namespace llvm {47void initializeNVPTXPeepholePass(PassRegistry &);48}4950namespace {51struct NVPTXPeephole : public MachineFunctionPass {52public:53static char ID;54NVPTXPeephole() : MachineFunctionPass(ID) {55initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());56}5758bool runOnMachineFunction(MachineFunction &MF) override;5960StringRef getPassName() const override {61return "NVPTX optimize redundant cvta.to.local instruction";62}6364void getAnalysisUsage(AnalysisUsage &AU) const override {65MachineFunctionPass::getAnalysisUsage(AU);66}67};68}6970char NVPTXPeephole::ID = 0;7172INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)7374static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {75auto &MBB = *Root.getParent();76auto &MF = *MBB.getParent();77// Check current instruction is cvta.to.local78if (Root.getOpcode() != NVPTX::cvta_to_local_64 &&79Root.getOpcode() != NVPTX::cvta_to_local)80return false;8182auto &Op = Root.getOperand(1);83const auto &MRI = MF.getRegInfo();84MachineInstr *GenericAddrDef = nullptr;85if (Op.isReg() && Op.getReg().isVirtual()) {86GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());87}8889// Check the register operand is uniquely defined by LEA_ADDRi instruction90if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||91(GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&92GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {93return false;94}9596const NVPTXRegisterInfo *NRI =97MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();9899// Check the LEA_ADDRi operand is Frame index100auto &BaseAddrOp = GenericAddrDef->getOperand(1);101if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) {102return true;103}104105return false;106}107108static void CombineCVTAToLocal(MachineInstr &Root) {109auto &MBB = *Root.getParent();110auto &MF = *MBB.getParent();111const auto &MRI = MF.getRegInfo();112const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();113auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());114115const NVPTXRegisterInfo *NRI =116MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();117118MachineInstrBuilder MIB =119BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),120Root.getOperand(0).getReg())121.addReg(NRI->getFrameLocalRegister(MF))122.add(Prev.getOperand(2));123124MBB.insert((MachineBasicBlock::iterator)&Root, MIB);125126// Check if MRI has only one non dbg use, which is Root127if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {128Prev.eraseFromParent();129}130Root.eraseFromParent();131}132133bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {134if (skipFunction(MF.getFunction()))135return false;136137bool Changed = false;138// Loop over all of the basic blocks.139for (auto &MBB : MF) {140// Traverse the basic block.141auto BlockIter = MBB.begin();142143while (BlockIter != MBB.end()) {144auto &MI = *BlockIter++;145if (isCVTAToLocalCombinationCandidate(MI)) {146CombineCVTAToLocal(MI);147Changed = true;148}149} // Instruction150} // Basic Block151152const NVPTXRegisterInfo *NRI =153MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();154155// Remove unnecessary %VRFrame = cvta.local %VRFrameLocal156const auto &MRI = MF.getRegInfo();157if (MRI.use_empty(NRI->getFrameRegister(MF))) {158if (auto MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF))) {159MI->eraseFromParent();160}161}162163return Changed;164}165166MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }167168169