Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp
35268 views
1
//===-- AMDGPUGlobalISelDivergenceLowering.cpp ----------------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// GlobalISel pass that selects divergent i1 phis as lane mask phis.
11
/// Lane mask merging uses same algorithm as SDAG in SILowerI1Copies.
12
/// Handles all cases of temporal divergence.
13
/// For divergent non-phi i1 and uniform i1 uses outside of the cycle this pass
14
/// currently depends on LCSSA to insert phis with one incoming.
15
//
16
//===----------------------------------------------------------------------===//
17
18
#include "AMDGPU.h"
19
#include "SILowerI1Copies.h"
20
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
21
#include "llvm/CodeGen/MachineFunctionPass.h"
22
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
23
#include "llvm/InitializePasses.h"
24
25
#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
26
27
using namespace llvm;
28
29
namespace {
30
31
class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
32
public:
33
static char ID;
34
35
public:
36
AMDGPUGlobalISelDivergenceLowering() : MachineFunctionPass(ID) {
37
initializeAMDGPUGlobalISelDivergenceLoweringPass(
38
*PassRegistry::getPassRegistry());
39
}
40
41
bool runOnMachineFunction(MachineFunction &MF) override;
42
43
StringRef getPassName() const override {
44
return "AMDGPU GlobalISel divergence lowering";
45
}
46
47
void getAnalysisUsage(AnalysisUsage &AU) const override {
48
AU.setPreservesCFG();
49
AU.addRequired<MachineDominatorTreeWrapperPass>();
50
AU.addRequired<MachinePostDominatorTreeWrapperPass>();
51
AU.addRequired<MachineUniformityAnalysisPass>();
52
MachineFunctionPass::getAnalysisUsage(AU);
53
}
54
};
55
56
class DivergenceLoweringHelper : public PhiLoweringHelper {
57
public:
58
DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
59
MachinePostDominatorTree *PDT,
60
MachineUniformityInfo *MUI);
61
62
private:
63
MachineUniformityInfo *MUI = nullptr;
64
MachineIRBuilder B;
65
Register buildRegCopyToLaneMask(Register Reg);
66
67
public:
68
void markAsLaneMask(Register DstReg) const override;
69
void getCandidatesForLowering(
70
SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
71
void collectIncomingValuesFromPhi(
72
const MachineInstr *MI,
73
SmallVectorImpl<Incoming> &Incomings) const override;
74
void replaceDstReg(Register NewReg, Register OldReg,
75
MachineBasicBlock *MBB) override;
76
void buildMergeLaneMasks(MachineBasicBlock &MBB,
77
MachineBasicBlock::iterator I, const DebugLoc &DL,
78
Register DstReg, Register PrevReg,
79
Register CurReg) override;
80
void constrainAsLaneMask(Incoming &In) override;
81
};
82
83
DivergenceLoweringHelper::DivergenceLoweringHelper(
84
MachineFunction *MF, MachineDominatorTree *DT,
85
MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
86
: PhiLoweringHelper(MF, DT, PDT), MUI(MUI), B(*MF) {}
87
88
// _(s1) -> SReg_32/64(s1)
89
void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
90
assert(MRI->getType(DstReg) == LLT::scalar(1));
91
92
if (MRI->getRegClassOrNull(DstReg)) {
93
if (MRI->constrainRegClass(DstReg, ST->getBoolRC()))
94
return;
95
llvm_unreachable("Failed to constrain register class");
96
}
97
98
MRI->setRegClass(DstReg, ST->getBoolRC());
99
}
100
101
void DivergenceLoweringHelper::getCandidatesForLowering(
102
SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
103
LLT S1 = LLT::scalar(1);
104
105
// Add divergent i1 phis to the list
106
for (MachineBasicBlock &MBB : *MF) {
107
for (MachineInstr &MI : MBB.phis()) {
108
Register Dst = MI.getOperand(0).getReg();
109
if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
110
Vreg1Phis.push_back(&MI);
111
}
112
}
113
}
114
115
void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
116
const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
117
for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
118
Incomings.emplace_back(MI->getOperand(i).getReg(),
119
MI->getOperand(i + 1).getMBB(), Register());
120
}
121
}
122
123
void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
124
MachineBasicBlock *MBB) {
125
BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
126
.addReg(NewReg);
127
}
128
129
// Copy Reg to new lane mask register, insert a copy after instruction that
130
// defines Reg while skipping phis if needed.
131
Register DivergenceLoweringHelper::buildRegCopyToLaneMask(Register Reg) {
132
Register LaneMask = createLaneMaskReg(MRI, LaneMaskRegAttrs);
133
MachineInstr *Instr = MRI->getVRegDef(Reg);
134
MachineBasicBlock *MBB = Instr->getParent();
135
B.setInsertPt(*MBB, MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));
136
B.buildCopy(LaneMask, Reg);
137
return LaneMask;
138
}
139
140
// bb.previous
141
// %PrevReg = ...
142
//
143
// bb.current
144
// %CurReg = ...
145
//
146
// %DstReg - not defined
147
//
148
// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
149
//
150
// bb.previous
151
// %PrevReg = ...
152
// %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
153
//
154
// bb.current
155
// %CurReg = ...
156
// %CurRegCopy:sreg_32(s1) = COPY %CurReg
157
// ...
158
// %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
159
// %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
160
// %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
161
//
162
// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
163
void DivergenceLoweringHelper::buildMergeLaneMasks(
164
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
165
Register DstReg, Register PrevReg, Register CurReg) {
166
// DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
167
// TODO: check if inputs are constants or results of a compare.
168
169
Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
170
Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
171
Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
172
Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
173
174
B.setInsertPt(MBB, I);
175
B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
176
B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
177
B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
178
}
179
180
// GlobalISel has to constrain S1 incoming taken as-is with lane mask register
181
// class. Insert a copy of Incoming.Reg to new lane mask inside Incoming.Block,
182
// Incoming.Reg becomes that new lane mask.
183
void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) {
184
B.setInsertPt(*In.Block, In.Block->getFirstTerminator());
185
186
auto Copy = B.buildCopy(LLT::scalar(1), In.Reg);
187
MRI->setRegClass(Copy.getReg(0), ST->getBoolRC());
188
In.Reg = Copy.getReg(0);
189
}
190
191
} // End anonymous namespace.
192
193
INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
194
"AMDGPU GlobalISel divergence lowering", false, false)
195
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
196
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
197
INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
198
INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
199
"AMDGPU GlobalISel divergence lowering", false, false)
200
201
char AMDGPUGlobalISelDivergenceLowering::ID = 0;
202
203
char &llvm::AMDGPUGlobalISelDivergenceLoweringID =
204
AMDGPUGlobalISelDivergenceLowering::ID;
205
206
FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
207
return new AMDGPUGlobalISelDivergenceLowering();
208
}
209
210
bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
211
MachineFunction &MF) {
212
MachineDominatorTree &DT =
213
getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
214
MachinePostDominatorTree &PDT =
215
getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
216
MachineUniformityInfo &MUI =
217
getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
218
219
DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
220
221
return Helper.lowerPhis();
222
}
223
224