Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86FastPreTileConfig.cpp
35266 views
//===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8/// \file Pass to preconfig the shape of physical tile registers9/// It inserts ldtilecfg ahead of each group of tile registers. The algorithm10/// walk each instruction of basic block in reverse order. All the tile11/// registers that live out the basic block would be spilled and reloaded12/// before its user. It also check the depenedency of the shape to ensure13/// the shape is defined before ldtilecfg.14//15//===----------------------------------------------------------------------===//1617#include "X86.h"18#include "X86InstrBuilder.h"19#include "X86MachineFunctionInfo.h"20#include "X86RegisterInfo.h"21#include "X86Subtarget.h"22#include "llvm/ADT/PostOrderIterator.h"23#include "llvm/ADT/Statistic.h"24#include "llvm/CodeGen/MachineFrameInfo.h"25#include "llvm/CodeGen/MachineFunctionPass.h"26#include "llvm/CodeGen/MachineInstr.h"27#include "llvm/CodeGen/MachineRegisterInfo.h"28#include "llvm/CodeGen/Passes.h"29#include "llvm/CodeGen/TargetInstrInfo.h"30#include "llvm/CodeGen/TargetRegisterInfo.h"31#include "llvm/InitializePasses.h"32#include "llvm/Support/Debug.h"3334using namespace llvm;3536#define DEBUG_TYPE "fastpretileconfig"3738STATISTIC(NumStores, "Number of stores added");39STATISTIC(NumLoads, "Number of loads added");4041namespace {4243class X86FastPreTileConfig : public MachineFunctionPass {44MachineFunction *MF = nullptr;45const X86Subtarget *ST = nullptr;46const TargetInstrInfo *TII = nullptr;47MachineRegisterInfo *MRI = nullptr;48X86MachineFunctionInfo *X86FI = nullptr;49MachineFrameInfo *MFI = nullptr;50const TargetRegisterInfo *TRI = nullptr;51MachineBasicBlock *MBB = nullptr;52int CfgSS = -1;53struct PHIInfo {54Register Row;55Register Col;56Register StackAddr;57};58DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;5960/// Maps virtual regs to the frame index where these values are spilled.61IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;6263/// Has a bit set for tile virtual register for which it was determined64/// that it is alive across blocks.65BitVector MayLiveAcrossBlocks;6667int getStackSpaceFor(Register VirtReg);68void InitializeTileConfigStackSpace();69bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);70void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);71void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,72MachineOperand *RowMO, MachineOperand *ColMO);73void canonicalizePHIs(MachineBasicBlock &MBB);74void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);75void convertPHIs(MachineBasicBlock &MBB);76bool configBasicBlock(MachineBasicBlock &MBB);7778public:79X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}8081/// Return the pass name.82StringRef getPassName() const override {83return "Fast Tile Register Preconfigure";84}8586/// Perform tile register configure.87bool runOnMachineFunction(MachineFunction &MFunc) override;8889static char ID;90};9192} // end anonymous namespace9394char X86FastPreTileConfig::ID = 0;9596INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,97"Fast Tile Register Preconfigure", false, false)98INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,99"Fast Tile Register Preconfigure", false, false)100101static bool dominates(MachineBasicBlock &MBB,102MachineBasicBlock::const_iterator A,103MachineBasicBlock::const_iterator B) {104auto MBBEnd = MBB.end();105if (B == MBBEnd)106return true;107108MachineBasicBlock::const_iterator I = MBB.begin();109for (; &*I != A && &*I != B; ++I)110;111112return &*I == A;113}114115/// This allocates space for the specified virtual register to be held on the116/// stack.117int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {118// Find the location Reg would belong...119int SS = StackSlotForVirtReg[VirtReg];120// Already has space allocated?121if (SS != -1)122return SS;123124// Allocate a new stack object for this spill location...125const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);126unsigned Size = TRI->getSpillSize(RC);127Align Alignment = TRI->getSpillAlign(RC);128int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);129130// Assign the slot.131StackSlotForVirtReg[VirtReg] = FrameIdx;132return FrameIdx;133}134135/// Returns false if \p VirtReg is known to not live out of the current config.136/// If \p VirtReg live out of the current MBB, it must live out of the current137/// config138bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {139if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))140return true;141142for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {143if (UseInst.getParent() != MBB) {144MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));145return true;146}147148// The use and def are in the same MBB. If the tile register is149// reconfigured, it is crobbered and we need to spill and reload150// tile register.151if (CfgMI) {152if (dominates(*MBB, *CfgMI, UseInst)) {153MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));154return true;155}156}157}158159return false;160}161162void X86FastPreTileConfig::InitializeTileConfigStackSpace() {163MachineBasicBlock &MBB = MF->front();164MachineInstr *MI = &*MBB.getFirstNonPHI();165DebugLoc DL;166if (ST->hasAVX512()) {167Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);168BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);169addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)170.addReg(Zmm);171} else if (ST->hasAVX2()) {172Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);173BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);174addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)175.addReg(Ymm);176addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,17732)178.addReg(Ymm);179} else {180assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");181unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;182Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);183BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);184addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)185.addReg(Xmm);186addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)187.addReg(Xmm);188addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)189.addReg(Xmm);190addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)191.addReg(Xmm);192}193// Fill in the palette first.194addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)195.addImm(1);196}197198/// Insert spill instruction for \p AssignedReg before \p Before.199/// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.200void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,201Register VirtReg, bool Kill) {202LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");203int FI = getStackSpaceFor(VirtReg);204LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');205206const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);207// Don't need shape information for tile store, becasue it is adjacent to208// the tile def instruction.209TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI,210Register());211++NumStores;212213// TODO: update DBG_VALUEs214}215216/// Insert reload instruction for \p PhysReg before \p Before.217void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,218Register OrigReg, MachineOperand *RowMO,219MachineOperand *ColMO) {220int FI = getStackSpaceFor(OrigReg);221const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);222Register TileReg;223// Fold copy to tileload224// BB1:225// spill src to s226//227// BB2:228// t = copy src229// -->230// t = tileload (s)231if (UseMI->isCopy())232TileReg = UseMI->getOperand(0).getReg();233else234TileReg = MRI->createVirtualRegister(&RC);235// Can't use TII->loadRegFromStackSlot(), because we need the shape236// information for reload.237// tileloadd (%sp, %idx), %tmm238unsigned Opc = X86::PTILELOADDV;239Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);240// FIXME: MBB is not the parent of UseMI.241MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),242TII->get(X86::MOV64ri), StrideReg)243.addImm(64);244NewMI = addFrameReference(245BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)246.addReg(RowMO->getReg())247.addReg(ColMO->getReg()),248FI);249MachineOperand &MO = NewMI->getOperand(5);250MO.setReg(StrideReg);251MO.setIsKill(true);252RowMO->setIsKill(false);253ColMO->setIsKill(false);254// Erase copy instruction after it is folded.255if (UseMI->isCopy()) {256UseMI->eraseFromParent();257} else {258// Replace the register in the user MI.259for (auto &MO : UseMI->operands()) {260if (MO.isReg() && MO.getReg() == OrigReg)261MO.setReg(TileReg);262}263}264265++NumLoads;266LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "267<< printReg(TileReg, TRI) << '\n');268}269270static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {271// The instruction must have 3 operands: tile def, row, col.272if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())273return false;274MachineOperand &MO = MI.getOperand(0);275276if (MO.isReg()) {277Register Reg = MO.getReg();278// FIXME it may be used after Greedy RA and the physical279// register is not rewritten yet.280if (Reg.isVirtual() &&281MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)282return true;283if (Reg >= X86::TMM0 && Reg <= X86::TMM7)284return true;285}286287return false;288}289290static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {291MachineInstr *MI = MRI->getVRegDef(TileReg);292if (isTileDef(MRI, *MI)) {293MachineOperand *RowMO = &MI->getOperand(1);294MachineOperand *ColMO = &MI->getOperand(2);295return ShapeT(RowMO, ColMO, MRI);296} else if (MI->isCopy()) {297TileReg = MI->getOperand(1).getReg();298return getShape(MRI, TileReg);299}300301// The def should not be PHI node, because we walk the MBB in reverse post302// order.303assert(MI->isPHI() && "Unexpected PHI when get shape.");304llvm_unreachable("Unexpected MI when get shape.");305}306307// BB0:308// spill t0 to s0309// BB1:310// spill t1 to s1311//312// BB2:313// t = phi [t0, bb0] [t1, bb1]314// -->315// row = phi [r0, bb0] [r1, bb1]316// col = phi [c0, bb0] [c1, bb1]317// s = phi [s0, bb0] [s1, bb1]318// t = tileload row, col, s319// The new instruction is inserted at the end of the phi node. The order320// of the original phi node is not ensured.321void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,322MachineInstr &PHI) {323// 1. Create instruction to get stack slot address of each incoming block.324// 2. Create PHI node for the stack address.325// 3. Create PHI node for shape. If one of the incoming shape is immediate326// use the immediate and delete the PHI node.327// 4. Create tileload instruction from the stack address.328Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);329MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),330TII->get(X86::PHI), StackAddrReg);331Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);332MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),333TII->get(X86::PHI), RowReg);334Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);335MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),336TII->get(X86::PHI), ColReg);337// Record the mapping of phi node and its row/column information.338VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};339340for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {341// Get the 2 incoming value of tile register and MBB.342Register InTileReg = PHI.getOperand(I).getReg();343// Mark it as liveout, so that it will be spilled when visit344// the incoming MBB. Otherwise since phi will be deleted, it345// would miss spill when visit incoming MBB.346MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));347MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();348349MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);350MachineBasicBlock::iterator InsertPos;351if (TileDefMI->isPHI()) {352InsertPos = TileDefMI->getParent()->getFirstNonPHI();353if (VisitedPHIs.count(TileDefMI)) { // circular phi reference354// def t1355// / \356// def t2 t3 = phi(t1, t4) <--357// \ / |358// t4 = phi(t2, t3)-------------359//360// For each (row, column and stack address) append phi incoming value.361// Create r3 = phi(r1, r4)362// Create r4 = phi(r2, r3)363Register InRowReg = VisitedPHIs[TileDefMI].Row;364Register InColReg = VisitedPHIs[TileDefMI].Col;365Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;366RowPHI.addReg(InRowReg).addMBB(InMBB);367ColPHI.addReg(InColReg).addMBB(InMBB);368AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);369continue;370} else {371// Recursively convert PHI to tileload372convertPHI(TileDefMI->getParent(), *TileDefMI);373// The PHI node is coverted to tileload instruction. Get the stack374// address from tileload operands.375MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);376assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);377Register InRowReg = TileLoad->getOperand(1).getReg();378Register InColReg = TileLoad->getOperand(2).getReg();379Register InStackAddrReg = TileLoad->getOperand(3).getReg();380RowPHI.addReg(InRowReg).addMBB(InMBB);381ColPHI.addReg(InColReg).addMBB(InMBB);382AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);383}384} else {385InsertPos = TileDefMI->getIterator();386387// Fill the incoming operand of row/column phi instruction.388ShapeT Shape = getShape(MRI, InTileReg);389Shape.getRow()->setIsKill(false);390Shape.getCol()->setIsKill(false);391RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);392ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);393394// The incoming tile register live out of its def BB, it would be spilled.395// Create MI to get the spill stack slot address for the tile register396int FI = getStackSpaceFor(InTileReg);397Register InStackAddrReg =398MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);399addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),400TII->get(X86::LEA64r), InStackAddrReg)401.addFrameIndex(FI),4020);403AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);404}405}406407MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();408Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);409BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)410.addImm(64);411Register TileReg = PHI.getOperand(0).getReg();412MachineInstr *NewMI = addDirectMem(413BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)414.addReg(RowReg)415.addReg(ColReg),416StackAddrReg);417MachineOperand &MO = NewMI->getOperand(5);418MO.setReg(StrideReg);419MO.setIsKill(true);420PHI.eraseFromParent();421VisitedPHIs.erase(&PHI);422}423424static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {425MachineOperand &MO = MI.getOperand(0);426if (MO.isReg() && MO.getReg().isVirtual() &&427MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)428return true;429return false;430}431432void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {433SmallVector<MachineInstr *, 8> PHIs;434435for (MachineInstr &MI : MBB) {436if (!MI.isPHI())437break;438if (!isTileRegDef(MRI, MI))439continue;440PHIs.push_back(&MI);441}442// Canonicalize the phi node first. One tile phi may depeneds previous443// phi node. For below case, we need convert %t4.444//445// BB0:446// %t3 = phi (t1 BB1, t2 BB0)447// %t4 = phi (t5 BB1, t3 BB0)448// -->449// %t3 = phi (t1 BB1, t2 BB0)450// %t4 = phi (t5 BB1, t2 BB0)451//452while (!PHIs.empty()) {453MachineInstr *PHI = PHIs.pop_back_val();454455// Find the operand that is incoming from the same MBB and the def456// is also phi node.457MachineOperand *InMO = nullptr;458MachineInstr *DefMI = nullptr;459for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {460Register InTileReg = PHI->getOperand(I).getReg();461MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();462DefMI = MRI->getVRegDef(InTileReg);463if (InMBB != &MBB || !DefMI->isPHI())464continue;465466InMO = &PHI->getOperand(I);467break;468}469// If can't find such operand, do nothing.470if (!InMO)471continue;472473// Current phi node depends on previous phi node. Break the474// dependency.475Register DefTileReg;476for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {477MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();478if (InMBB != &MBB)479continue;480DefTileReg = DefMI->getOperand(I).getReg();481InMO->setReg(DefTileReg);482break;483}484}485}486487void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {488SmallVector<MachineInstr *, 8> PHIs;489for (MachineInstr &MI : MBB) {490if (!MI.isPHI())491break;492if (!isTileRegDef(MRI, MI))493continue;494PHIs.push_back(&MI);495}496while (!PHIs.empty()) {497MachineInstr *MI = PHIs.pop_back_val();498VisitedPHIs.clear();499convertPHI(&MBB, *MI);500}501}502503// PreTileConfig should configure the tile registers based on basic504// block.505bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {506this->MBB = &MBB;507bool Change = false;508MachineInstr *LastShapeMI = nullptr;509MachineInstr *LastTileCfg = nullptr;510bool HasUnconfigTile = false;511512auto Config = [&](MachineInstr &Before) {513if (CfgSS == -1)514CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),515ST->getTileConfigAlignment(), false);516LastTileCfg = addFrameReference(517BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);518LastShapeMI = nullptr;519Change = true;520};521auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {522for (const MachineOperand &MO : MI.operands()) {523if (!MO.isReg())524continue;525Register Reg = MO.getReg();526if (Reg.isVirtual() &&527MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)528return true;529}530return false;531};532for (MachineInstr &MI : reverse(MBB)) {533// We have transformed phi node before configuring BB.534if (MI.isPHI())535break;536// Don't collect the shape of used tile, the tile should be defined537// before the tile use. Spill and reload would happen if there is only538// tile use after ldtilecfg, so the shape can be collected from reload.539// Take below code for example. %t would be reloaded before tilestore540// call541// ....542// tilestore %r, %c, %t543// -->544// call545// ldtilecfg546// %t = tileload %r, %c547// tilestore %r, %c, %t548if (HasTileOperand(MRI, MI))549HasUnconfigTile = true;550// According to AMX ABI, all the tile registers including config register551// are volatile. Caller need to save/restore config register.552if (MI.isCall() && HasUnconfigTile) {553MachineBasicBlock::iterator I;554if (LastShapeMI && dominates(MBB, MI, LastShapeMI))555I = ++LastShapeMI->getIterator();556else557I = ++MI.getIterator();558Config(*I);559HasUnconfigTile = false;560continue;561}562if (!isTileDef(MRI, MI))563continue;564//565//---------------------------------------------------------------------566// Don't handle COPY instruction. If the src and dst of the COPY can be567// in the same config in below case, we just check the shape of t0.568// def row0569// def col0570// ldtilecfg571// t0 = tielzero(row0, col0)572// t1 = copy t0573// ...574// If the src and dst of the COPY can NOT be in the same config in below575// case. Reload would be generated befor the copy instruction.576// def row0577// def col0578// t0 = tielzero(row0, col0)579// spill t0580// ...581// def row1582// def col1583// ldtilecfg584// t1 = tilezero(row1, col1)585// reload t0586// t1 = copy t0587//---------------------------------------------------------------------588//589// If MI dominate the last shape def instruction, we need insert590// ldtilecfg after LastShapeMI now. The config doesn't include591// current MI.592// def row0593// def col0594// tilezero(row0, col0) <- MI595// def row1596// def col1597// ldtilecfg <- insert598// tilezero(row1, col1)599if (LastShapeMI && dominates(MBB, MI, LastShapeMI))600Config(*(++LastShapeMI->getIterator()));601MachineOperand *RowMO = &MI.getOperand(1);602MachineOperand *ColMO = &MI.getOperand(2);603MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());604MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());605// If the shape is defined in current MBB, check the domination.606// FIXME how about loop?607if (RowMI->getParent() == &MBB) {608if (!LastShapeMI)609LastShapeMI = RowMI;610else if (dominates(MBB, LastShapeMI, RowMI))611LastShapeMI = RowMI;612}613if (ColMI->getParent() == &MBB) {614if (!LastShapeMI)615LastShapeMI = ColMI;616else if (dominates(MBB, LastShapeMI, ColMI))617LastShapeMI = ColMI;618}619// If there is user live out of the tilecfg, spill it and reload in620// before the user.621Register TileReg = MI.getOperand(0).getReg();622if (mayLiveOut(TileReg, LastTileCfg))623spill(++MI.getIterator(), TileReg, false);624for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {625if (UseMI.getParent() == &MBB) {626// check user should not across ldtilecfg627if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))628continue;629// reload befor UseMI630reload(UseMI.getIterator(), TileReg, RowMO, ColMO);631} else {632// Don't reload for phi instruction, we handle phi reload separately.633// TODO: merge the reload for the same user MBB.634if (!UseMI.isPHI())635reload(UseMI.getIterator(), TileReg, RowMO, ColMO);636}637}638}639640// Configure tile registers at the head of the MBB641if (HasUnconfigTile) {642MachineInstr *Before;643if (LastShapeMI == nullptr || LastShapeMI->isPHI())644Before = &*MBB.getFirstNonPHI();645else646Before = &*(++LastShapeMI->getIterator());647648Config(*Before);649}650651return Change;652}653654bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {655X86FI = MFunc.getInfo<X86MachineFunctionInfo>();656// Early exit in the common case of non-AMX code.657if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)658return false;659660MF = &MFunc;661MRI = &MFunc.getRegInfo();662ST = &MFunc.getSubtarget<X86Subtarget>();663TII = ST->getInstrInfo();664MFI = &MFunc.getFrameInfo();665TRI = ST->getRegisterInfo();666CfgSS = -1;667668unsigned NumVirtRegs = MRI->getNumVirtRegs();669670StackSlotForVirtReg.resize(NumVirtRegs);671MayLiveAcrossBlocks.clear();672// We will create register during config. *3 is to make sure673// the virtual register number doesn't exceed the size of674// the bit vector.675MayLiveAcrossBlocks.resize(NumVirtRegs * 3);676bool Change = false;677assert(MRI->isSSA());678679// Canonicalize the phi node first.680for (MachineBasicBlock &MBB : MFunc)681canonicalizePHIs(MBB);682683// Loop over all of the basic blocks in reverse post order and insert684// ldtilecfg for tile registers. The reserse post order is to facilitate685// PHI node convert.686ReversePostOrderTraversal<MachineFunction *> RPOT(MF);687for (MachineBasicBlock *MBB : RPOT) {688convertPHIs(*MBB);689Change |= configBasicBlock(*MBB);690}691692if (Change)693InitializeTileConfigStackSpace();694695StackSlotForVirtReg.clear();696return Change;697}698699FunctionPass *llvm::createX86FastPreTileConfigPass() {700return new X86FastPreTileConfig();701}702703704