Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/NVPTX/NVPTXPeephole.cpp
35271 views
1
//===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
10
// of a MachineFunction.
11
//
12
// mov %SPL, %depot
13
// cvta.local %SP, %SPL
14
//
15
// Because Frame Index is a generic address and alloca can only return generic
16
// pointer, without this pass the instructions producing alloca'ed address will
17
// be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
18
// this address with their .local versions, but this may introduce a lot of
19
// cvta.to.local instructions. Performance can be improved if we avoid casting
20
// address back and forth and directly calculate local address based on %SPL.
21
// This peephole pass optimizes these cases, for example
22
//
23
// It will transform the following pattern
24
// %0 = LEA_ADDRi64 %VRFrame64, 4
25
// %1 = cvta_to_local_64 %0
26
//
27
// into
28
// %1 = LEA_ADDRi64 %VRFrameLocal64, 4
29
//
30
// %VRFrameLocal64 is the virtual register name of %SPL
31
//
32
//===----------------------------------------------------------------------===//
33
34
#include "NVPTX.h"
35
#include "NVPTXRegisterInfo.h"
36
#include "NVPTXSubtarget.h"
37
#include "llvm/CodeGen/MachineFunctionPass.h"
38
#include "llvm/CodeGen/MachineInstrBuilder.h"
39
#include "llvm/CodeGen/MachineRegisterInfo.h"
40
#include "llvm/CodeGen/TargetInstrInfo.h"
41
#include "llvm/CodeGen/TargetRegisterInfo.h"
42
43
using namespace llvm;
44
45
#define DEBUG_TYPE "nvptx-peephole"
46
47
namespace llvm {
48
void initializeNVPTXPeepholePass(PassRegistry &);
49
}
50
51
namespace {
52
struct NVPTXPeephole : public MachineFunctionPass {
53
public:
54
static char ID;
55
NVPTXPeephole() : MachineFunctionPass(ID) {
56
initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
57
}
58
59
bool runOnMachineFunction(MachineFunction &MF) override;
60
61
StringRef getPassName() const override {
62
return "NVPTX optimize redundant cvta.to.local instruction";
63
}
64
65
void getAnalysisUsage(AnalysisUsage &AU) const override {
66
MachineFunctionPass::getAnalysisUsage(AU);
67
}
68
};
69
}
70
71
char NVPTXPeephole::ID = 0;
72
73
INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
74
75
static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
76
auto &MBB = *Root.getParent();
77
auto &MF = *MBB.getParent();
78
// Check current instruction is cvta.to.local
79
if (Root.getOpcode() != NVPTX::cvta_to_local_64 &&
80
Root.getOpcode() != NVPTX::cvta_to_local)
81
return false;
82
83
auto &Op = Root.getOperand(1);
84
const auto &MRI = MF.getRegInfo();
85
MachineInstr *GenericAddrDef = nullptr;
86
if (Op.isReg() && Op.getReg().isVirtual()) {
87
GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
88
}
89
90
// Check the register operand is uniquely defined by LEA_ADDRi instruction
91
if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
92
(GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
93
GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
94
return false;
95
}
96
97
const NVPTXRegisterInfo *NRI =
98
MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
99
100
// Check the LEA_ADDRi operand is Frame index
101
auto &BaseAddrOp = GenericAddrDef->getOperand(1);
102
if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NRI->getFrameRegister(MF)) {
103
return true;
104
}
105
106
return false;
107
}
108
109
static void CombineCVTAToLocal(MachineInstr &Root) {
110
auto &MBB = *Root.getParent();
111
auto &MF = *MBB.getParent();
112
const auto &MRI = MF.getRegInfo();
113
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
114
auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
115
116
const NVPTXRegisterInfo *NRI =
117
MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
118
119
MachineInstrBuilder MIB =
120
BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
121
Root.getOperand(0).getReg())
122
.addReg(NRI->getFrameLocalRegister(MF))
123
.add(Prev.getOperand(2));
124
125
MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
126
127
// Check if MRI has only one non dbg use, which is Root
128
if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
129
Prev.eraseFromParent();
130
}
131
Root.eraseFromParent();
132
}
133
134
bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
135
if (skipFunction(MF.getFunction()))
136
return false;
137
138
bool Changed = false;
139
// Loop over all of the basic blocks.
140
for (auto &MBB : MF) {
141
// Traverse the basic block.
142
auto BlockIter = MBB.begin();
143
144
while (BlockIter != MBB.end()) {
145
auto &MI = *BlockIter++;
146
if (isCVTAToLocalCombinationCandidate(MI)) {
147
CombineCVTAToLocal(MI);
148
Changed = true;
149
}
150
} // Instruction
151
} // Basic Block
152
153
const NVPTXRegisterInfo *NRI =
154
MF.getSubtarget<NVPTXSubtarget>().getRegisterInfo();
155
156
// Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
157
const auto &MRI = MF.getRegInfo();
158
if (MRI.use_empty(NRI->getFrameRegister(MF))) {
159
if (auto MI = MRI.getUniqueVRegDef(NRI->getFrameRegister(MF))) {
160
MI->eraseFromParent();
161
}
162
}
163
164
return Changed;
165
}
166
167
MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
168
169