Path: blob/main/contrib/llvm-project/llvm/lib/Target/X86/X86FastTileConfig.cpp
35266 views
//===-- X86FastTileConfig.cpp - Fast Tile Register Configure---------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8/// \file Pass to config the shape of AMX physical registers9/// AMX register need to be configured before use. Before FastRegAllocation pass10/// the ldtilecfg instruction is inserted, however at that time we don't11/// know the shape of each physical tile registers, because the register12/// allocation is not done yet. This pass runs after register allocation13/// pass. It collects the shape information of each physical tile register14/// and store the shape in the stack slot that is allocated for load config15/// to tile config register.16//17//===----------------------------------------------------------------------===//1819#include "X86.h"20#include "X86InstrBuilder.h"21#include "X86MachineFunctionInfo.h"22#include "X86RegisterInfo.h"23#include "X86Subtarget.h"24#include "llvm/CodeGen/MachineFrameInfo.h"25#include "llvm/CodeGen/MachineFunctionPass.h"26#include "llvm/CodeGen/MachineInstr.h"27#include "llvm/CodeGen/MachineRegisterInfo.h"28#include "llvm/CodeGen/Passes.h"29#include "llvm/CodeGen/TargetInstrInfo.h"30#include "llvm/CodeGen/TargetRegisterInfo.h"31#include "llvm/InitializePasses.h"3233using namespace llvm;3435#define DEBUG_TYPE "fasttileconfig"3637namespace {3839class X86FastTileConfig : public MachineFunctionPass {40// context41MachineFunction *MF = nullptr;42const TargetInstrInfo *TII = nullptr;43MachineRegisterInfo *MRI = nullptr;44const TargetRegisterInfo *TRI = nullptr;45X86MachineFunctionInfo *X86FI = nullptr;4647bool configBasicBlock(MachineBasicBlock &MBB);4849public:50X86FastTileConfig() : MachineFunctionPass(ID) {}5152/// Return the pass name.53StringRef getPassName() const override {54return "Fast Tile Register Configure";55}5657void getAnalysisUsage(AnalysisUsage &AU) const override {58AU.setPreservesAll();59MachineFunctionPass::getAnalysisUsage(AU);60}6162/// Perform register allocation.63bool runOnMachineFunction(MachineFunction &MFunc) override;6465MachineFunctionProperties getRequiredProperties() const override {66return MachineFunctionProperties().set(67MachineFunctionProperties::Property::NoPHIs);68}6970static char ID;71};7273} // end anonymous namespace7475char X86FastTileConfig::ID = 0;7677INITIALIZE_PASS_BEGIN(X86FastTileConfig, DEBUG_TYPE,78"Fast Tile Register Configure", false, false)79INITIALIZE_PASS_END(X86FastTileConfig, DEBUG_TYPE,80"Fast Tile Register Configure", false, false)8182static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {83// There is no phi instruction after register allocation.84assert(MI.isPHI() == false);85// The instruction must have 3 operands: tile def, row, col.86// It should be AMX pseudo instruction that have shape operand.87if (MI.isDebugInstr() || MI.isCopy() || MI.getNumOperands() < 3 ||88!MI.isPseudo())89return false;90MachineOperand &MO = MI.getOperand(0);9192if (MO.isReg()) {93Register Reg = MO.getReg();94// FIXME it may be used after Greedy RA and the physical95// register is not rewritten yet.96if (Reg.isVirtual() &&97MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)98return true;99if (Reg >= X86::TMM0 && Reg <= X86::TMM7)100return true;101}102103return false;104}105106// PreTileConfig should configure the tile registers based on basic107// block.108bool X86FastTileConfig::configBasicBlock(MachineBasicBlock &MBB) {109bool Change = false;110SmallVector<std::pair<unsigned, ShapeT>, 6> ShapeInfos;111for (MachineInstr &MI : reverse(MBB)) {112if (!isTileDef(MRI, MI) && MI.getOpcode() != X86::PLDTILECFGV)113continue;114// AMX instructions that define tile register.115if (MI.getOpcode() != X86::PLDTILECFGV) {116MachineOperand &Row = MI.getOperand(1);117MachineOperand &Col = MI.getOperand(2);118unsigned TMMIdx = MI.getOperand(0).getReg() - X86::TMM0;119ShapeInfos.push_back({TMMIdx, ShapeT(&Row, &Col)});120} else { // PLDTILECFGV121// Rewrite the shape information to memory. Stack slot should have122// been initialized to zero in pre config.123int SS = MI.getOperand(0).getIndex(); // tile config stack slot.124for (auto &ShapeInfo : ShapeInfos) {125DebugLoc DL;126unsigned TMMIdx = ShapeInfo.first;127Register RowReg = ShapeInfo.second.getRow()->getReg();128Register ColReg = ShapeInfo.second.getCol()->getReg();129// Here is the data format for the tile config.130// 0 palette131// 1 start_row132// 2-15 reserved, must be zero133// 16-17 tile0.colsb Tile 0 bytes per row.134// 18-19 tile1.colsb Tile 1 bytes per row.135// 20-21 tile2.colsb Tile 2 bytes per row.136// ... (sequence continues)137// 30-31 tile7.colsb Tile 7 bytes per row.138// 32-47 reserved, must be zero139// 48 tile0.rows Tile 0 rows.140// 49 tile1.rows Tile 1 rows.141// 50 tile2.rows Tile 2 rows.142// ... (sequence continues)143// 55 tile7.rows Tile 7 rows.144// 56-63 reserved, must be zero145int RowOffset = 48 + TMMIdx;146int ColOffset = 16 + TMMIdx * 2;147148Register SubRowReg = TRI->getSubReg(RowReg, X86::sub_8bit);149BuildMI(MBB, MI, DL, TII->get(X86::IMPLICIT_DEF), SubRowReg);150MachineInstrBuilder StoreRow =151BuildMI(MBB, MI, DL, TII->get(X86::MOV8mr));152addFrameReference(StoreRow, SS, RowOffset).addReg(SubRowReg);153154MachineInstrBuilder StoreCol =155BuildMI(MBB, MI, DL, TII->get(X86::MOV16mr));156addFrameReference(StoreCol, SS, ColOffset).addReg(ColReg);157}158ShapeInfos.clear();159Change = true;160}161}162163return Change;164}165166bool X86FastTileConfig::runOnMachineFunction(MachineFunction &MFunc) {167X86FI = MFunc.getInfo<X86MachineFunctionInfo>();168// Early exit in the common case of non-AMX code.169if (X86FI->getAMXProgModel() != AMXProgModelEnum::ManagedRA)170return false;171172MF = &MFunc;173MRI = &MFunc.getRegInfo();174const TargetSubtargetInfo *ST = &MFunc.getSubtarget<X86Subtarget>();175TRI = ST->getRegisterInfo();176TII = MFunc.getSubtarget().getInstrInfo();177bool Change = false;178179// Loop over all of the basic blocks, eliminating virtual register references180for (MachineBasicBlock &MBB : MFunc)181Change |= configBasicBlock(MBB);182183return Change;184}185186FunctionPass *llvm::createX86FastTileConfigPass() {187return new X86FastTileConfig();188}189190191