Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
35266 views
1
//===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
/// \file Pass to preconfig the shape of physical tile registers
10
/// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
11
/// walk each instruction of basic block in reverse order. All the tile
12
/// registers that live out the basic block would be spilled and reloaded
13
/// before its user. It also check the depenedency of the shape to ensure
14
/// the shape is defined before ldtilecfg.
15
//
16
//===----------------------------------------------------------------------===//
17
18
#include "X86.h"
19
#include "X86InstrBuilder.h"
20
#include "X86MachineFunctionInfo.h"
21
#include "X86RegisterInfo.h"
22
#include "X86Subtarget.h"
23
#include "llvm/ADT/PostOrderIterator.h"
24
#include "llvm/ADT/Statistic.h"
25
#include "llvm/CodeGen/MachineFrameInfo.h"
26
#include "llvm/CodeGen/MachineFunctionPass.h"
27
#include "llvm/CodeGen/MachineInstr.h"
28
#include "llvm/CodeGen/MachineRegisterInfo.h"
29
#include "llvm/CodeGen/Passes.h"
30
#include "llvm/CodeGen/TargetInstrInfo.h"
31
#include "llvm/CodeGen/TargetRegisterInfo.h"
32
#include "llvm/InitializePasses.h"
33
#include "llvm/Support/Debug.h"
34
35
using namespace llvm;
36
37
#define DEBUG_TYPE "fastpretileconfig"
38
39
STATISTIC(NumStores, "Number of stores added");
40
STATISTIC(NumLoads, "Number of loads added");
41
42
namespace {
43
44
class X86FastPreTileConfig : public MachineFunctionPass {
45
MachineFunction *MF = nullptr;
46
const X86Subtarget *ST = nullptr;
47
const TargetInstrInfo *TII = nullptr;
48
MachineRegisterInfo *MRI = nullptr;
49
X86MachineFunctionInfo *X86FI = nullptr;
50
MachineFrameInfo *MFI = nullptr;
51
const TargetRegisterInfo *TRI = nullptr;
52
MachineBasicBlock *MBB = nullptr;
53
int CfgSS = -1;
54
struct PHIInfo {
55
Register Row;
56
Register Col;
57
Register StackAddr;
58
};
59
DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
60
61
/// Maps virtual regs to the frame index where these values are spilled.
62
IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
63
64
/// Has a bit set for tile virtual register for which it was determined
65
/// that it is alive across blocks.
66
BitVector MayLiveAcrossBlocks;
67
68
int getStackSpaceFor(Register VirtReg);
69
void InitializeTileConfigStackSpace();
70
bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
71
void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
72
void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
73
MachineOperand *RowMO, MachineOperand *ColMO);
74
void canonicalizePHIs(MachineBasicBlock &MBB);
75
void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
76
void convertPHIs(MachineBasicBlock &MBB);
77
bool configBasicBlock(MachineBasicBlock &MBB);
78
79
public:
80
X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
81
82
/// Return the pass name.
83
StringRef getPassName() const override {
84
return "Fast Tile Register Preconfigure";
85
}
86
87
/// Perform tile register configure.
88
bool runOnMachineFunction(MachineFunction &MFunc) override;
89
90
static char ID;
91
};
92
93
} // end anonymous namespace
94
95
char X86FastPreTileConfig::ID = 0;
96
97
INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
98
"Fast Tile Register Preconfigure", false, false)
99
INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
100
"Fast Tile Register Preconfigure", false, false)
101
102
static bool dominates(MachineBasicBlock &MBB,
103
MachineBasicBlock::const_iterator A,
104
MachineBasicBlock::const_iterator B) {
105
auto MBBEnd = MBB.end();
106
if (B == MBBEnd)
107
return true;
108
109
MachineBasicBlock::const_iterator I = MBB.begin();
110
for (; &*I != A && &*I != B; ++I)
111
;
112
113
return &*I == A;
114
}
115
116
/// This allocates space for the specified virtual register to be held on the
117
/// stack.
118
int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
119
// Find the location Reg would belong...
120
int SS = StackSlotForVirtReg[VirtReg];
121
// Already has space allocated?
122
if (SS != -1)
123
return SS;
124
125
// Allocate a new stack object for this spill location...
126
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
127
unsigned Size = TRI->getSpillSize(RC);
128
Align Alignment = TRI->getSpillAlign(RC);
129
int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
130
131
// Assign the slot.
132
StackSlotForVirtReg[VirtReg] = FrameIdx;
133
return FrameIdx;
134
}
135
136
/// Returns false if \p VirtReg is known to not live out of the current config.
137
/// If \p VirtReg live out of the current MBB, it must live out of the current
138
/// config
139
bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
140
if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
141
return true;
142
143
for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
144
if (UseInst.getParent() != MBB) {
145
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
146
return true;
147
}
148
149
// The use and def are in the same MBB. If the tile register is
150
// reconfigured, it is crobbered and we need to spill and reload
151
// tile register.
152
if (CfgMI) {
153
if (dominates(*MBB, *CfgMI, UseInst)) {
154
MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
155
return true;
156
}
157
}
158
}
159
160
return false;
161
}
162
163
void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
164
MachineBasicBlock &MBB = MF->front();
165
MachineInstr *MI = &*MBB.getFirstNonPHI();
166
DebugLoc DL;
167
if (ST->hasAVX512()) {
168
Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
169
BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
170
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
171
.addReg(Zmm);
172
} else if (ST->hasAVX2()) {
173
Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
174
BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
175
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
176
.addReg(Ymm);
177
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
178
32)
179
.addReg(Ymm);
180
} else {
181
assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
182
unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
183
Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
184
BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
185
addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
186
.addReg(Xmm);
187
addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
188
.addReg(Xmm);
189
addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
190
.addReg(Xmm);
191
addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
192
.addReg(Xmm);
193
}
194
// Fill in the palette first.
195
addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
196
.addImm(1);
197
}
198
199
/// Insert spill instruction for \p AssignedReg before \p Before.
200
/// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
201
void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
202
Register VirtReg, bool Kill) {
203
LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
204
int FI = getStackSpaceFor(VirtReg);
205
LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
206
207
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
208
// Don't need shape information for tile store, becasue it is adjacent to
209
// the tile def instruction.
210
TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI,
211
Register());
212
++NumStores;
213
214
// TODO: update DBG_VALUEs
215
}
216
217
/// Insert reload instruction for \p PhysReg before \p Before.
218
void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
219
Register OrigReg, MachineOperand *RowMO,
220
MachineOperand *ColMO) {
221
int FI = getStackSpaceFor(OrigReg);
222
const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
223
Register TileReg;
224
// Fold copy to tileload
225
// BB1:
226
// spill src to s
227
//
228
// BB2:
229
// t = copy src
230
// -->
231
// t = tileload (s)
232
if (UseMI->isCopy())
233
TileReg = UseMI->getOperand(0).getReg();
234
else
235
TileReg = MRI->createVirtualRegister(&RC);
236
// Can't use TII->loadRegFromStackSlot(), because we need the shape
237
// information for reload.
238
// tileloadd (%sp, %idx), %tmm
239
unsigned Opc = X86::PTILELOADDV;
240
Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
241
// FIXME: MBB is not the parent of UseMI.
242
MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),
243
TII->get(X86::MOV64ri), StrideReg)
244
.addImm(64);
245
NewMI = addFrameReference(
246
BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
247
.addReg(RowMO->getReg())
248
.addReg(ColMO->getReg()),
249
FI);
250
MachineOperand &MO = NewMI->getOperand(5);
251
MO.setReg(StrideReg);
252
MO.setIsKill(true);
253
RowMO->setIsKill(false);
254
ColMO->setIsKill(false);
255
// Erase copy instruction after it is folded.
256
if (UseMI->isCopy()) {
257
UseMI->eraseFromParent();
258
} else {
259
// Replace the register in the user MI.
260
for (auto &MO : UseMI->operands()) {
261
if (MO.isReg() && MO.getReg() == OrigReg)
262
MO.setReg(TileReg);
263
}
264
}
265
266
++NumLoads;
267
LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
268
<< printReg(TileReg, TRI) << '\n');
269
}
270
271
static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
272
// The instruction must have 3 operands: tile def, row, col.
273
if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
274
return false;
275
MachineOperand &MO = MI.getOperand(0);
276
277
if (MO.isReg()) {
278
Register Reg = MO.getReg();
279
// FIXME it may be used after Greedy RA and the physical
280
// register is not rewritten yet.
281
if (Reg.isVirtual() &&
282
MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
283
return true;
284
if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
285
return true;
286
}
287
288
return false;
289
}
290
291
static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
292
MachineInstr *MI = MRI->getVRegDef(TileReg);
293
if (isTileDef(MRI, *MI)) {
294
MachineOperand *RowMO = &MI->getOperand(1);
295
MachineOperand *ColMO = &MI->getOperand(2);
296
return ShapeT(RowMO, ColMO, MRI);
297
} else if (MI->isCopy()) {
298
TileReg = MI->getOperand(1).getReg();
299
return getShape(MRI, TileReg);
300
}
301
302
// The def should not be PHI node, because we walk the MBB in reverse post
303
// order.
304
assert(MI->isPHI() && "Unexpected PHI when get shape.");
305
llvm_unreachable("Unexpected MI when get shape.");
306
}
307
308
// BB0:
309
// spill t0 to s0
310
// BB1:
311
// spill t1 to s1
312
//
313
// BB2:
314
// t = phi [t0, bb0] [t1, bb1]
315
// -->
316
// row = phi [r0, bb0] [r1, bb1]
317
// col = phi [c0, bb0] [c1, bb1]
318
// s = phi [s0, bb0] [s1, bb1]
319
// t = tileload row, col, s
320
// The new instruction is inserted at the end of the phi node. The order
321
// of the original phi node is not ensured.
322
void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
323
MachineInstr &PHI) {
324
// 1. Create instruction to get stack slot address of each incoming block.
325
// 2. Create PHI node for the stack address.
326
// 3. Create PHI node for shape. If one of the incoming shape is immediate
327
// use the immediate and delete the PHI node.
328
// 4. Create tileload instruction from the stack address.
329
Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
330
MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
331
TII->get(X86::PHI), StackAddrReg);
332
Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
333
MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
334
TII->get(X86::PHI), RowReg);
335
Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
336
MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
337
TII->get(X86::PHI), ColReg);
338
// Record the mapping of phi node and its row/column information.
339
VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
340
341
for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
342
// Get the 2 incoming value of tile register and MBB.
343
Register InTileReg = PHI.getOperand(I).getReg();
344
// Mark it as liveout, so that it will be spilled when visit
345
// the incoming MBB. Otherwise since phi will be deleted, it
346
// would miss spill when visit incoming MBB.
347
MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));
348
MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
349
350
MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
351
MachineBasicBlock::iterator InsertPos;
352
if (TileDefMI->isPHI()) {
353
InsertPos = TileDefMI->getParent()->getFirstNonPHI();
354
if (VisitedPHIs.count(TileDefMI)) { // circular phi reference
355
// def t1
356
// / \
357
// def t2 t3 = phi(t1, t4) <--
358
// \ / |
359
// t4 = phi(t2, t3)-------------
360
//
361
// For each (row, column and stack address) append phi incoming value.
362
// Create r3 = phi(r1, r4)
363
// Create r4 = phi(r2, r3)
364
Register InRowReg = VisitedPHIs[TileDefMI].Row;
365
Register InColReg = VisitedPHIs[TileDefMI].Col;
366
Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;
367
RowPHI.addReg(InRowReg).addMBB(InMBB);
368
ColPHI.addReg(InColReg).addMBB(InMBB);
369
AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
370
continue;
371
} else {
372
// Recursively convert PHI to tileload
373
convertPHI(TileDefMI->getParent(), *TileDefMI);
374
// The PHI node is coverted to tileload instruction. Get the stack
375
// address from tileload operands.
376
MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
377
assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
378
Register InRowReg = TileLoad->getOperand(1).getReg();
379
Register InColReg = TileLoad->getOperand(2).getReg();
380
Register InStackAddrReg = TileLoad->getOperand(3).getReg();
381
RowPHI.addReg(InRowReg).addMBB(InMBB);
382
ColPHI.addReg(InColReg).addMBB(InMBB);
383
AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
384
}
385
} else {
386
InsertPos = TileDefMI->getIterator();
387
388
// Fill the incoming operand of row/column phi instruction.
389
ShapeT Shape = getShape(MRI, InTileReg);
390
Shape.getRow()->setIsKill(false);
391
Shape.getCol()->setIsKill(false);
392
RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
393
ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
394
395
// The incoming tile register live out of its def BB, it would be spilled.
396
// Create MI to get the spill stack slot address for the tile register
397
int FI = getStackSpaceFor(InTileReg);
398
Register InStackAddrReg =
399
MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
400
addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
401
TII->get(X86::LEA64r), InStackAddrReg)
402
.addFrameIndex(FI),
403
0);
404
AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
405
}
406
}
407
408
MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
409
Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
410
BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
411
.addImm(64);
412
Register TileReg = PHI.getOperand(0).getReg();
413
MachineInstr *NewMI = addDirectMem(
414
BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
415
.addReg(RowReg)
416
.addReg(ColReg),
417
StackAddrReg);
418
MachineOperand &MO = NewMI->getOperand(5);
419
MO.setReg(StrideReg);
420
MO.setIsKill(true);
421
PHI.eraseFromParent();
422
VisitedPHIs.erase(&PHI);
423
}
424
425
static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
426
MachineOperand &MO = MI.getOperand(0);
427
if (MO.isReg() && MO.getReg().isVirtual() &&
428
MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)
429
return true;
430
return false;
431
}
432
433
void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
434
SmallVector<MachineInstr *, 8> PHIs;
435
436
for (MachineInstr &MI : MBB) {
437
if (!MI.isPHI())
438
break;
439
if (!isTileRegDef(MRI, MI))
440
continue;
441
PHIs.push_back(&MI);
442
}
443
// Canonicalize the phi node first. One tile phi may depeneds previous
444
// phi node. For below case, we need convert %t4.
445
//
446
// BB0:
447
// %t3 = phi (t1 BB1, t2 BB0)
448
// %t4 = phi (t5 BB1, t3 BB0)
449
// -->
450
// %t3 = phi (t1 BB1, t2 BB0)
451
// %t4 = phi (t5 BB1, t2 BB0)
452
//
453
while (!PHIs.empty()) {
454
MachineInstr *PHI = PHIs.pop_back_val();
455
456
// Find the operand that is incoming from the same MBB and the def
457
// is also phi node.
458
MachineOperand *InMO = nullptr;
459
MachineInstr *DefMI = nullptr;
460
for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
461
Register InTileReg = PHI->getOperand(I).getReg();
462
MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
463
DefMI = MRI->getVRegDef(InTileReg);
464
if (InMBB != &MBB || !DefMI->isPHI())
465
continue;
466
467
InMO = &PHI->getOperand(I);
468
break;
469
}
470
// If can't find such operand, do nothing.
471
if (!InMO)
472
continue;
473
474
// Current phi node depends on previous phi node. Break the
475
// dependency.
476
Register DefTileReg;
477
for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
478
MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
479
if (InMBB != &MBB)
480
continue;
481
DefTileReg = DefMI->getOperand(I).getReg();
482
InMO->setReg(DefTileReg);
483
break;
484
}
485
}
486
}
487
488
void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
489
SmallVector<MachineInstr *, 8> PHIs;
490
for (MachineInstr &MI : MBB) {
491
if (!MI.isPHI())
492
break;
493
if (!isTileRegDef(MRI, MI))
494
continue;
495
PHIs.push_back(&MI);
496
}
497
while (!PHIs.empty()) {
498
MachineInstr *MI = PHIs.pop_back_val();
499
VisitedPHIs.clear();
500
convertPHI(&MBB, *MI);
501
}
502
}
503
504
// PreTileConfig should configure the tile registers based on basic
505
// block.
506
bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
507
this->MBB = &MBB;
508
bool Change = false;
509
MachineInstr *LastShapeMI = nullptr;
510
MachineInstr *LastTileCfg = nullptr;
511
bool HasUnconfigTile = false;
512
513
auto Config = [&](MachineInstr &Before) {
514
if (CfgSS == -1)
515
CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
516
ST->getTileConfigAlignment(), false);
517
LastTileCfg = addFrameReference(
518
BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
519
LastShapeMI = nullptr;
520
Change = true;
521
};
522
auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
523
for (const MachineOperand &MO : MI.operands()) {
524
if (!MO.isReg())
525
continue;
526
Register Reg = MO.getReg();
527
if (Reg.isVirtual() &&
528
MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
529
return true;
530
}
531
return false;
532
};
533
for (MachineInstr &MI : reverse(MBB)) {
534
// We have transformed phi node before configuring BB.
535
if (MI.isPHI())
536
break;
537
// Don't collect the shape of used tile, the tile should be defined
538
// before the tile use. Spill and reload would happen if there is only
539
// tile use after ldtilecfg, so the shape can be collected from reload.
540
// Take below code for example. %t would be reloaded before tilestore
541
// call
542
// ....
543
// tilestore %r, %c, %t
544
// -->
545
// call
546
// ldtilecfg
547
// %t = tileload %r, %c
548
// tilestore %r, %c, %t
549
if (HasTileOperand(MRI, MI))
550
HasUnconfigTile = true;
551
// According to AMX ABI, all the tile registers including config register
552
// are volatile. Caller need to save/restore config register.
553
if (MI.isCall() && HasUnconfigTile) {
554
MachineBasicBlock::iterator I;
555
if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
556
I = ++LastShapeMI->getIterator();
557
else
558
I = ++MI.getIterator();
559
Config(*I);
560
HasUnconfigTile = false;
561
continue;
562
}
563
if (!isTileDef(MRI, MI))
564
continue;
565
//
566
//---------------------------------------------------------------------
567
// Don't handle COPY instruction. If the src and dst of the COPY can be
568
// in the same config in below case, we just check the shape of t0.
569
// def row0
570
// def col0
571
// ldtilecfg
572
// t0 = tielzero(row0, col0)
573
// t1 = copy t0
574
// ...
575
// If the src and dst of the COPY can NOT be in the same config in below
576
// case. Reload would be generated befor the copy instruction.
577
// def row0
578
// def col0
579
// t0 = tielzero(row0, col0)
580
// spill t0
581
// ...
582
// def row1
583
// def col1
584
// ldtilecfg
585
// t1 = tilezero(row1, col1)
586
// reload t0
587
// t1 = copy t0
588
//---------------------------------------------------------------------
589
//
590
// If MI dominate the last shape def instruction, we need insert
591
// ldtilecfg after LastShapeMI now. The config doesn't include
592
// current MI.
593
// def row0
594
// def col0
595
// tilezero(row0, col0) <- MI
596
// def row1
597
// def col1
598
// ldtilecfg <- insert
599
// tilezero(row1, col1)
600
if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
601
Config(*(++LastShapeMI->getIterator()));
602
MachineOperand *RowMO = &MI.getOperand(1);
603
MachineOperand *ColMO = &MI.getOperand(2);
604
MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
605
MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
606
// If the shape is defined in current MBB, check the domination.
607
// FIXME how about loop?
608
if (RowMI->getParent() == &MBB) {
609
if (!LastShapeMI)
610
LastShapeMI = RowMI;
611
else if (dominates(MBB, LastShapeMI, RowMI))
612
LastShapeMI = RowMI;
613
}
614
if (ColMI->getParent() == &MBB) {
615
if (!LastShapeMI)
616
LastShapeMI = ColMI;
617
else if (dominates(MBB, LastShapeMI, ColMI))
618
LastShapeMI = ColMI;
619
}
620
// If there is user live out of the tilecfg, spill it and reload in
621
// before the user.
622
Register TileReg = MI.getOperand(0).getReg();
623
if (mayLiveOut(TileReg, LastTileCfg))
624
spill(++MI.getIterator(), TileReg, false);
625
for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
626
if (UseMI.getParent() == &MBB) {
627
// check user should not across ldtilecfg
628
if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
629
continue;
630
// reload befor UseMI
631
reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
632
} else {
633
// Don't reload for phi instruction, we handle phi reload separately.
634
// TODO: merge the reload for the same user MBB.
635
if (!UseMI.isPHI())
636
reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
637
}
638
}
639
}
640
641
// Configure tile registers at the head of the MBB
642
if (HasUnconfigTile) {
643
MachineInstr *Before;
644
if (LastShapeMI == nullptr || LastShapeMI->isPHI())
645
Before = &*MBB.getFirstNonPHI();
646
else
647
Before = &*(++LastShapeMI->getIterator());
648
649
Config(*Before);
650
}
651
652
return Change;
653
}
654
655
bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
656
X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
657
// Early exit in the common case of non-AMX code.
658
if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)
659
return false;
660
661
MF = &MFunc;
662
MRI = &MFunc.getRegInfo();
663
ST = &MFunc.getSubtarget<X86Subtarget>();
664
TII = ST->getInstrInfo();
665
MFI = &MFunc.getFrameInfo();
666
TRI = ST->getRegisterInfo();
667
CfgSS = -1;
668
669
unsigned NumVirtRegs = MRI->getNumVirtRegs();
670
671
StackSlotForVirtReg.resize(NumVirtRegs);
672
MayLiveAcrossBlocks.clear();
673
// We will create register during config. *3 is to make sure
674
// the virtual register number doesn't exceed the size of
675
// the bit vector.
676
MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
677
bool Change = false;
678
assert(MRI->isSSA());
679
680
// Canonicalize the phi node first.
681
for (MachineBasicBlock &MBB : MFunc)
682
canonicalizePHIs(MBB);
683
684
// Loop over all of the basic blocks in reverse post order and insert
685
// ldtilecfg for tile registers. The reserse post order is to facilitate
686
// PHI node convert.
687
ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
688
for (MachineBasicBlock *MBB : RPOT) {
689
convertPHIs(*MBB);
690
Change |= configBasicBlock(*MBB);
691
}
692
693
if (Change)
694
InitializeTileConfigStackSpace();
695
696
StackSlotForVirtReg.clear();
697
return Change;
698
}
699
700
FunctionPass *llvm::createX86FastPreTileConfigPass() {
701
return new X86FastPreTileConfig();
702
}
703
704