Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
35269 views
1
//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This file defines a hazard recognizer for the SystemZ scheduler.
10
//
11
// This class is used by the SystemZ scheduling strategy to maintain
12
// the state during scheduling, and provide cost functions for
13
// scheduling candidates. This includes:
14
//
15
// * Decoder grouping. A decoder group can maximally hold 3 uops, and
16
// instructions that always begin a new group should be scheduled when
17
// the current decoder group is empty.
18
// * Processor resources usage. It is beneficial to balance the use of
19
// resources.
20
//
21
// A goal is to consider all instructions, also those outside of any
22
// scheduling region. Such instructions are "advanced" past and include
23
// single instructions before a scheduling region, branches etc.
24
//
25
// A block that has only one predecessor continues scheduling with the state
26
// of it (which may be updated by emitting branches).
27
//
28
// ===---------------------------------------------------------------------===//
29
30
#include "SystemZHazardRecognizer.h"
31
#include "llvm/ADT/Statistic.h"
32
33
using namespace llvm;
34
35
#define DEBUG_TYPE "machine-scheduler"
36
37
// This is the limit of processor resource usage at which the
38
// scheduler should try to look for other instructions (not using the
39
// critical resource).
40
static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,
41
cl::desc("The OOO window for processor "
42
"resources during scheduling."),
43
cl::init(8));
44
45
unsigned SystemZHazardRecognizer::
46
getNumDecoderSlots(SUnit *SU) const {
47
const MCSchedClassDesc *SC = getSchedClass(SU);
48
if (!SC->isValid())
49
return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.
50
51
assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&
52
"Only cracked instruction can have 2 uops.");
53
assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&
54
"Expanded instructions always group alone.");
55
assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&
56
"Expanded instructions fill the group(s).");
57
58
return SC->NumMicroOps;
59
}
60
61
unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {
62
unsigned Idx = CurrGroupSize;
63
if (GrpCount % 2)
64
Idx += 3;
65
66
if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {
67
if (Idx == 1 || Idx == 2)
68
Idx = 3;
69
else if (Idx == 4 || Idx == 5)
70
Idx = 0;
71
}
72
73
return Idx;
74
}
75
76
ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::
77
getHazardType(SUnit *SU, int Stalls) {
78
return (fitsIntoCurrentGroup(SU) ? NoHazard : Hazard);
79
}
80
81
void SystemZHazardRecognizer::Reset() {
82
CurrGroupSize = 0;
83
CurrGroupHas4RegOps = false;
84
clearProcResCounters();
85
GrpCount = 0;
86
LastFPdOpCycleIdx = UINT_MAX;
87
LastEmittedMI = nullptr;
88
LLVM_DEBUG(CurGroupDbg = "";);
89
}
90
91
bool
92
SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {
93
const MCSchedClassDesc *SC = getSchedClass(SU);
94
if (!SC->isValid())
95
return true;
96
97
// A cracked instruction only fits into schedule if the current
98
// group is empty.
99
if (SC->BeginGroup)
100
return (CurrGroupSize == 0);
101
102
// An instruction with 4 register operands will not fit in last slot.
103
assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&
104
"Current decoder group is already full!");
105
if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
106
return false;
107
108
// Since a full group is handled immediately in EmitInstruction(),
109
// SU should fit into current group. NumSlots should be 1 or 0,
110
// since it is not a cracked or expanded instruction.
111
assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&
112
"Expected normal instruction to fit in non-full group!");
113
114
return true;
115
}
116
117
bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {
118
const MachineFunction &MF = *MI->getParent()->getParent();
119
const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
120
const MCInstrDesc &MID = MI->getDesc();
121
unsigned Count = 0;
122
for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {
123
const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);
124
if (RC == nullptr)
125
continue;
126
if (OpIdx >= MID.getNumDefs() &&
127
MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)
128
continue;
129
Count++;
130
}
131
return Count >= 4;
132
}
133
134
void SystemZHazardRecognizer::nextGroup() {
135
if (CurrGroupSize == 0)
136
return;
137
138
LLVM_DEBUG(dumpCurrGroup("Completed decode group"));
139
LLVM_DEBUG(CurGroupDbg = "";);
140
141
int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);
142
assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&
143
"Current decoder group bad.");
144
145
// Reset counter for next group.
146
CurrGroupSize = 0;
147
CurrGroupHas4RegOps = false;
148
149
GrpCount += ((unsigned) NumGroups);
150
151
// Decrease counters for execution units.
152
for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
153
ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)
154
? (ProcResourceCounters[i] - NumGroups)
155
: 0);
156
157
// Clear CriticalResourceIdx if it is now below the threshold.
158
if (CriticalResourceIdx != UINT_MAX &&
159
(ProcResourceCounters[CriticalResourceIdx] <=
160
ProcResCostLim))
161
CriticalResourceIdx = UINT_MAX;
162
163
LLVM_DEBUG(dumpState(););
164
}
165
166
#ifndef NDEBUG // Debug output
167
void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {
168
OS << "SU(" << SU->NodeNum << "):";
169
OS << TII->getName(SU->getInstr()->getOpcode());
170
171
const MCSchedClassDesc *SC = getSchedClass(SU);
172
if (!SC->isValid())
173
return;
174
175
for (TargetSchedModel::ProcResIter
176
PI = SchedModel->getWriteProcResBegin(SC),
177
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
178
const MCProcResourceDesc &PRD =
179
*SchedModel->getProcResource(PI->ProcResourceIdx);
180
std::string FU(PRD.Name);
181
// trim e.g. Z13_FXaUnit -> FXa
182
FU = FU.substr(FU.find('_') + 1);
183
size_t Pos = FU.find("Unit");
184
if (Pos != std::string::npos)
185
FU.resize(Pos);
186
if (FU == "LS") // LSUnit -> LSU
187
FU = "LSU";
188
OS << "/" << FU;
189
190
if (PI->ReleaseAtCycle> 1)
191
OS << "(" << PI->ReleaseAtCycle << "cyc)";
192
}
193
194
if (SC->NumMicroOps > 1)
195
OS << "/" << SC->NumMicroOps << "uops";
196
if (SC->BeginGroup && SC->EndGroup)
197
OS << "/GroupsAlone";
198
else if (SC->BeginGroup)
199
OS << "/BeginsGroup";
200
else if (SC->EndGroup)
201
OS << "/EndsGroup";
202
if (SU->isUnbuffered)
203
OS << "/Unbuffered";
204
if (has4RegOps(SU->getInstr()))
205
OS << "/4RegOps";
206
}
207
208
void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {
209
dbgs() << "++ " << Msg;
210
dbgs() << ": ";
211
212
if (CurGroupDbg.empty())
213
dbgs() << " <empty>\n";
214
else {
215
dbgs() << "{ " << CurGroupDbg << " }";
216
dbgs() << " (" << CurrGroupSize << " decoder slot"
217
<< (CurrGroupSize > 1 ? "s":"")
218
<< (CurrGroupHas4RegOps ? ", 4RegOps" : "")
219
<< ")\n";
220
}
221
}
222
223
void SystemZHazardRecognizer::dumpProcResourceCounters() const {
224
bool any = false;
225
226
for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
227
if (ProcResourceCounters[i] > 0) {
228
any = true;
229
break;
230
}
231
232
if (!any)
233
return;
234
235
dbgs() << "++ | Resource counters: ";
236
for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)
237
if (ProcResourceCounters[i] > 0)
238
dbgs() << SchedModel->getProcResource(i)->Name
239
<< ":" << ProcResourceCounters[i] << " ";
240
dbgs() << "\n";
241
242
if (CriticalResourceIdx != UINT_MAX)
243
dbgs() << "++ | Critical resource: "
244
<< SchedModel->getProcResource(CriticalResourceIdx)->Name
245
<< "\n";
246
}
247
248
void SystemZHazardRecognizer::dumpState() const {
249
dumpCurrGroup("| Current decoder group");
250
dbgs() << "++ | Current cycle index: "
251
<< getCurrCycleIdx() << "\n";
252
dumpProcResourceCounters();
253
if (LastFPdOpCycleIdx != UINT_MAX)
254
dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";
255
}
256
257
#endif //NDEBUG
258
259
void SystemZHazardRecognizer::clearProcResCounters() {
260
ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);
261
CriticalResourceIdx = UINT_MAX;
262
}
263
264
static inline bool isBranchRetTrap(MachineInstr *MI) {
265
return (MI->isBranch() || MI->isReturn() ||
266
MI->getOpcode() == SystemZ::CondTrap);
267
}
268
269
// Update state with SU as the next scheduled unit.
270
void SystemZHazardRecognizer::
271
EmitInstruction(SUnit *SU) {
272
const MCSchedClassDesc *SC = getSchedClass(SU);
273
LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());
274
dbgs() << "\n";);
275
LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););
276
277
// If scheduling an SU that must begin a new decoder group, move on
278
// to next group.
279
if (!fitsIntoCurrentGroup(SU))
280
nextGroup();
281
282
LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg);
283
if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););
284
285
LastEmittedMI = SU->getInstr();
286
287
// After returning from a call, we don't know much about the state.
288
if (SU->isCall) {
289
LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);
290
Reset();
291
LastEmittedMI = SU->getInstr();
292
return;
293
}
294
295
// Increase counter for execution unit(s).
296
for (TargetSchedModel::ProcResIter
297
PI = SchedModel->getWriteProcResBegin(SC),
298
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
299
// Don't handle FPd together with the other resources.
300
if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)
301
continue;
302
int &CurrCounter =
303
ProcResourceCounters[PI->ProcResourceIdx];
304
CurrCounter += PI->ReleaseAtCycle;
305
// Check if this is now the new critical resource.
306
if ((CurrCounter > ProcResCostLim) &&
307
(CriticalResourceIdx == UINT_MAX ||
308
(PI->ProcResourceIdx != CriticalResourceIdx &&
309
CurrCounter >
310
ProcResourceCounters[CriticalResourceIdx]))) {
311
LLVM_DEBUG(
312
dbgs() << "++ New critical resource: "
313
<< SchedModel->getProcResource(PI->ProcResourceIdx)->Name
314
<< "\n";);
315
CriticalResourceIdx = PI->ProcResourceIdx;
316
}
317
}
318
319
// Make note of an instruction that uses a blocking resource (FPd).
320
if (SU->isUnbuffered) {
321
LastFPdOpCycleIdx = getCurrCycleIdx(SU);
322
LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx
323
<< "\n";);
324
}
325
326
// Insert SU into current group by increasing number of slots used
327
// in current group.
328
CurrGroupSize += getNumDecoderSlots(SU);
329
CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());
330
unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);
331
assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))
332
&& "SU does not fit into decoder group!");
333
334
// Check if current group is now full/ended. If so, move on to next
335
// group to be ready to evaluate more candidates.
336
if (CurrGroupSize >= GroupLim || SC->EndGroup)
337
nextGroup();
338
}
339
340
int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {
341
const MCSchedClassDesc *SC = getSchedClass(SU);
342
if (!SC->isValid())
343
return 0;
344
345
// If SU begins new group, it can either break a current group early
346
// or fit naturally if current group is empty (negative cost).
347
if (SC->BeginGroup) {
348
if (CurrGroupSize)
349
return 3 - CurrGroupSize;
350
return -1;
351
}
352
353
// Similarly, a group-ending SU may either fit well (last in group), or
354
// end the group prematurely.
355
if (SC->EndGroup) {
356
unsigned resultingGroupSize =
357
(CurrGroupSize + getNumDecoderSlots(SU));
358
if (resultingGroupSize < 3)
359
return (3 - resultingGroupSize);
360
return -1;
361
}
362
363
// An instruction with 4 register operands will not fit in last slot.
364
if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))
365
return 1;
366
367
// Most instructions can be placed in any decoder slot.
368
return 0;
369
}
370
371
bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {
372
assert (SU->isUnbuffered);
373
// If this is the first FPd op, it should be scheduled high.
374
if (LastFPdOpCycleIdx == UINT_MAX)
375
return true;
376
// If this is not the first PFd op, it should go into the other side
377
// of the processor to use the other FPd unit there. This should
378
// generally happen if two FPd ops are placed with 2 other
379
// instructions between them (modulo 6).
380
unsigned SUCycleIdx = getCurrCycleIdx(SU);
381
if (LastFPdOpCycleIdx > SUCycleIdx)
382
return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);
383
return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);
384
}
385
386
int SystemZHazardRecognizer::
387
resourcesCost(SUnit *SU) {
388
int Cost = 0;
389
390
const MCSchedClassDesc *SC = getSchedClass(SU);
391
if (!SC->isValid())
392
return 0;
393
394
// For a FPd op, either return min or max value as indicated by the
395
// distance to any prior FPd op.
396
if (SU->isUnbuffered)
397
Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);
398
// For other instructions, give a cost to the use of the critical resource.
399
else if (CriticalResourceIdx != UINT_MAX) {
400
for (TargetSchedModel::ProcResIter
401
PI = SchedModel->getWriteProcResBegin(SC),
402
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)
403
if (PI->ProcResourceIdx == CriticalResourceIdx)
404
Cost = PI->ReleaseAtCycle;
405
}
406
407
return Cost;
408
}
409
410
void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,
411
bool TakenBranch) {
412
// Make a temporary SUnit.
413
SUnit SU(MI, 0);
414
415
// Set interesting flags.
416
SU.isCall = MI->isCall();
417
418
const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);
419
for (const MCWriteProcResEntry &PRE :
420
make_range(SchedModel->getWriteProcResBegin(SC),
421
SchedModel->getWriteProcResEnd(SC))) {
422
switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {
423
case 0:
424
SU.hasReservedResource = true;
425
break;
426
case 1:
427
SU.isUnbuffered = true;
428
break;
429
default:
430
break;
431
}
432
}
433
434
unsigned GroupSizeBeforeEmit = CurrGroupSize;
435
EmitInstruction(&SU);
436
437
if (!TakenBranch && isBranchRetTrap(MI)) {
438
// NT Branch on second slot ends group.
439
if (GroupSizeBeforeEmit == 1)
440
nextGroup();
441
}
442
443
if (TakenBranch && CurrGroupSize > 0)
444
nextGroup();
445
446
assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&
447
"Scheduler: unhandled terminator!");
448
}
449
450
void SystemZHazardRecognizer::
451
copyState(SystemZHazardRecognizer *Incoming) {
452
// Current decoder group
453
CurrGroupSize = Incoming->CurrGroupSize;
454
LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);
455
456
// Processor resources
457
ProcResourceCounters = Incoming->ProcResourceCounters;
458
CriticalResourceIdx = Incoming->CriticalResourceIdx;
459
460
// FPd
461
LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;
462
GrpCount = Incoming->GrpCount;
463
}
464
465