Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
35294 views
1
//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file
10
/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU
11
/// instructions that produce single-use VGPR values. If the value is forwarded
12
/// to the consumer instruction prior to VGPR writeback, the hardware can
13
/// then skip (kill) the VGPR write.
14
//
15
//===----------------------------------------------------------------------===//
16
17
#include "AMDGPU.h"
18
#include "AMDGPUGenSearchableTables.inc"
19
#include "GCNSubtarget.h"
20
#include "SIInstrInfo.h"
21
#include "SIRegisterInfo.h"
22
#include "llvm/ADT/DenseMap.h"
23
#include "llvm/ADT/STLExtras.h"
24
#include "llvm/ADT/SmallVector.h"
25
#include "llvm/ADT/StringRef.h"
26
#include "llvm/CodeGen/MachineBasicBlock.h"
27
#include "llvm/CodeGen/MachineFunction.h"
28
#include "llvm/CodeGen/MachineFunctionPass.h"
29
#include "llvm/CodeGen/MachineInstr.h"
30
#include "llvm/CodeGen/MachineInstrBuilder.h"
31
#include "llvm/CodeGen/MachineOperand.h"
32
#include "llvm/CodeGen/Register.h"
33
#include "llvm/IR/DebugLoc.h"
34
#include "llvm/MC/MCRegister.h"
35
#include "llvm/MC/MCRegisterInfo.h"
36
#include "llvm/Pass.h"
37
#include <array>
38
39
using namespace llvm;
40
41
#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"
42
43
namespace {
44
class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {
45
private:
46
const SIInstrInfo *SII;
47
class SingleUseInstruction {
48
private:
49
static const unsigned MaxSkipRange = 0b111;
50
static const unsigned MaxNumberOfSkipRegions = 2;
51
52
unsigned LastEncodedPositionEnd;
53
MachineInstr *ProducerInstr;
54
55
std::array<unsigned, MaxNumberOfSkipRegions + 1> SingleUseRegions;
56
SmallVector<unsigned, MaxNumberOfSkipRegions> SkipRegions;
57
58
// Adds a skip region into the instruction.
59
void skip(const unsigned ProducerPosition) {
60
while (LastEncodedPositionEnd + MaxSkipRange < ProducerPosition) {
61
SkipRegions.push_back(MaxSkipRange);
62
LastEncodedPositionEnd += MaxSkipRange;
63
}
64
SkipRegions.push_back(ProducerPosition - LastEncodedPositionEnd);
65
LastEncodedPositionEnd = ProducerPosition;
66
}
67
68
bool currentRegionHasSpace() {
69
const auto Region = SkipRegions.size();
70
// The first region has an extra bit of encoding space.
71
return SingleUseRegions[Region] <
72
((Region == MaxNumberOfSkipRegions) ? 0b1111U : 0b111U);
73
}
74
75
unsigned encodeImm() {
76
// Handle the first Single Use Region separately as it has an extra bit
77
// of encoding space.
78
unsigned Imm = SingleUseRegions[SkipRegions.size()];
79
unsigned ShiftAmount = 4;
80
for (unsigned i = SkipRegions.size(); i > 0; i--) {
81
Imm |= SkipRegions[i - 1] << ShiftAmount;
82
ShiftAmount += 3;
83
Imm |= SingleUseRegions[i - 1] << ShiftAmount;
84
ShiftAmount += 3;
85
}
86
return Imm;
87
}
88
89
public:
90
SingleUseInstruction(const unsigned ProducerPosition,
91
MachineInstr *Producer)
92
: LastEncodedPositionEnd(ProducerPosition + 1), ProducerInstr(Producer),
93
SingleUseRegions({1, 0, 0}) {}
94
95
// Returns false if adding a new single use producer failed. This happens
96
// because it could not be encoded, either because there is no room to
97
// encode another single use producer region or that this single use
98
// producer is too far away to encode the amount of instructions to skip.
99
bool tryAddProducer(const unsigned ProducerPosition, MachineInstr *MI) {
100
// Producer is too far away to encode into this instruction or another
101
// skip region is needed and SkipRegions.size() = 2 so there's no room for
102
// another skip region, therefore a new instruction is needed.
103
if (LastEncodedPositionEnd +
104
(MaxSkipRange * (MaxNumberOfSkipRegions - SkipRegions.size())) <
105
ProducerPosition)
106
return false;
107
108
// If a skip region is needed.
109
if (LastEncodedPositionEnd != ProducerPosition ||
110
!currentRegionHasSpace()) {
111
// If the current region is out of space therefore a skip region would
112
// be needed, but there is no room for another skip region.
113
if (SkipRegions.size() == MaxNumberOfSkipRegions)
114
return false;
115
skip(ProducerPosition);
116
}
117
118
SingleUseRegions[SkipRegions.size()]++;
119
LastEncodedPositionEnd = ProducerPosition + 1;
120
ProducerInstr = MI;
121
return true;
122
}
123
124
auto emit(const SIInstrInfo *SII) {
125
return BuildMI(*ProducerInstr->getParent(), ProducerInstr, DebugLoc(),
126
SII->get(AMDGPU::S_SINGLEUSE_VDST))
127
.addImm(encodeImm());
128
}
129
};
130
131
public:
132
static char ID;
133
134
AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}
135
136
void insertSingleUseInstructions(
137
ArrayRef<std::pair<unsigned, MachineInstr *>> SingleUseProducers) const {
138
SmallVector<SingleUseInstruction> Instructions;
139
140
for (auto &[Position, MI] : SingleUseProducers) {
141
// Encode this position into the last single use instruction if possible.
142
if (Instructions.empty() ||
143
!Instructions.back().tryAddProducer(Position, MI)) {
144
// If not, add a new instruction.
145
Instructions.push_back(SingleUseInstruction(Position, MI));
146
}
147
}
148
149
for (auto &Instruction : Instructions)
150
Instruction.emit(SII);
151
}
152
153
bool runOnMachineFunction(MachineFunction &MF) override {
154
const auto &ST = MF.getSubtarget<GCNSubtarget>();
155
if (!ST.hasVGPRSingleUseHintInsts())
156
return false;
157
158
SII = ST.getInstrInfo();
159
const auto *TRI = &SII->getRegisterInfo();
160
bool InstructionEmitted = false;
161
162
for (MachineBasicBlock &MBB : MF) {
163
DenseMap<MCRegUnit, unsigned> RegisterUseCount;
164
165
// Handle boundaries at the end of basic block separately to avoid
166
// false positives. If they are live at the end of a basic block then
167
// assume it has more uses later on.
168
for (const auto &Liveout : MBB.liveouts()) {
169
for (MCRegUnitMaskIterator Units(Liveout.PhysReg, TRI); Units.isValid();
170
++Units) {
171
const auto [Unit, Mask] = *Units;
172
if ((Mask & Liveout.LaneMask).any())
173
RegisterUseCount[Unit] = 2;
174
}
175
}
176
177
SmallVector<std::pair<unsigned, MachineInstr *>>
178
SingleUseProducerPositions;
179
180
unsigned VALUInstrCount = 0;
181
for (MachineInstr &MI : reverse(MBB.instrs())) {
182
// All registers in all operands need to be single use for an
183
// instruction to be marked as a single use producer.
184
bool AllProducerOperandsAreSingleUse = true;
185
186
// Gather a list of Registers used before updating use counts to avoid
187
// double counting registers that appear multiple times in a single
188
// MachineInstr.
189
SmallVector<MCRegUnit> RegistersUsed;
190
191
for (const auto &Operand : MI.all_defs()) {
192
const auto Reg = Operand.getReg();
193
194
const auto RegUnits = TRI->regunits(Reg);
195
if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit Unit) {
196
return RegisterUseCount[Unit] > 1;
197
}))
198
AllProducerOperandsAreSingleUse = false;
199
200
// Reset uses count when a register is no longer live.
201
for (const MCRegUnit Unit : RegUnits)
202
RegisterUseCount.erase(Unit);
203
}
204
205
for (const auto &Operand : MI.all_uses()) {
206
const auto Reg = Operand.getReg();
207
208
// Count the number of times each register is read.
209
for (const MCRegUnit Unit : TRI->regunits(Reg)) {
210
if (!is_contained(RegistersUsed, Unit))
211
RegistersUsed.push_back(Unit);
212
}
213
}
214
for (const MCRegUnit Unit : RegistersUsed)
215
RegisterUseCount[Unit]++;
216
217
// Do not attempt to optimise across exec mask changes.
218
if (MI.modifiesRegister(AMDGPU::EXEC, TRI) ||
219
AMDGPU::isInvalidSingleUseConsumerInst(MI.getOpcode())) {
220
for (auto &UsedReg : RegisterUseCount)
221
UsedReg.second = 2;
222
}
223
224
if (!SIInstrInfo::isVALU(MI) ||
225
AMDGPU::isInvalidSingleUseProducerInst(MI.getOpcode()))
226
continue;
227
if (AllProducerOperandsAreSingleUse) {
228
SingleUseProducerPositions.push_back({VALUInstrCount, &MI});
229
InstructionEmitted = true;
230
}
231
VALUInstrCount++;
232
}
233
insertSingleUseInstructions(SingleUseProducerPositions);
234
}
235
return InstructionEmitted;
236
}
237
};
238
} // namespace
239
240
char AMDGPUInsertSingleUseVDST::ID = 0;
241
242
char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;
243
244
INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,
245
"AMDGPU Insert SingleUseVDST", false, false)
246
247