Path: blob/main/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUInsertSingleUseVDST.cpp
35294 views
//===- AMDGPUInsertSingleUseVDST.cpp - Insert s_singleuse_vdst instructions ==//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8/// \file9/// Insert s_singleuse_vdst instructions on GFX11.5+ to mark regions of VALU10/// instructions that produce single-use VGPR values. If the value is forwarded11/// to the consumer instruction prior to VGPR writeback, the hardware can12/// then skip (kill) the VGPR write.13//14//===----------------------------------------------------------------------===//1516#include "AMDGPU.h"17#include "AMDGPUGenSearchableTables.inc"18#include "GCNSubtarget.h"19#include "SIInstrInfo.h"20#include "SIRegisterInfo.h"21#include "llvm/ADT/DenseMap.h"22#include "llvm/ADT/STLExtras.h"23#include "llvm/ADT/SmallVector.h"24#include "llvm/ADT/StringRef.h"25#include "llvm/CodeGen/MachineBasicBlock.h"26#include "llvm/CodeGen/MachineFunction.h"27#include "llvm/CodeGen/MachineFunctionPass.h"28#include "llvm/CodeGen/MachineInstr.h"29#include "llvm/CodeGen/MachineInstrBuilder.h"30#include "llvm/CodeGen/MachineOperand.h"31#include "llvm/CodeGen/Register.h"32#include "llvm/IR/DebugLoc.h"33#include "llvm/MC/MCRegister.h"34#include "llvm/MC/MCRegisterInfo.h"35#include "llvm/Pass.h"36#include <array>3738using namespace llvm;3940#define DEBUG_TYPE "amdgpu-insert-single-use-vdst"4142namespace {43class AMDGPUInsertSingleUseVDST : public MachineFunctionPass {44private:45const SIInstrInfo *SII;46class SingleUseInstruction {47private:48static const unsigned MaxSkipRange = 0b111;49static const unsigned MaxNumberOfSkipRegions = 2;5051unsigned LastEncodedPositionEnd;52MachineInstr *ProducerInstr;5354std::array<unsigned, MaxNumberOfSkipRegions + 1> SingleUseRegions;55SmallVector<unsigned, MaxNumberOfSkipRegions> SkipRegions;5657// Adds a skip region into the instruction.58void skip(const unsigned ProducerPosition) {59while (LastEncodedPositionEnd + MaxSkipRange < ProducerPosition) {60SkipRegions.push_back(MaxSkipRange);61LastEncodedPositionEnd += MaxSkipRange;62}63SkipRegions.push_back(ProducerPosition - LastEncodedPositionEnd);64LastEncodedPositionEnd = ProducerPosition;65}6667bool currentRegionHasSpace() {68const auto Region = SkipRegions.size();69// The first region has an extra bit of encoding space.70return SingleUseRegions[Region] <71((Region == MaxNumberOfSkipRegions) ? 0b1111U : 0b111U);72}7374unsigned encodeImm() {75// Handle the first Single Use Region separately as it has an extra bit76// of encoding space.77unsigned Imm = SingleUseRegions[SkipRegions.size()];78unsigned ShiftAmount = 4;79for (unsigned i = SkipRegions.size(); i > 0; i--) {80Imm |= SkipRegions[i - 1] << ShiftAmount;81ShiftAmount += 3;82Imm |= SingleUseRegions[i - 1] << ShiftAmount;83ShiftAmount += 3;84}85return Imm;86}8788public:89SingleUseInstruction(const unsigned ProducerPosition,90MachineInstr *Producer)91: LastEncodedPositionEnd(ProducerPosition + 1), ProducerInstr(Producer),92SingleUseRegions({1, 0, 0}) {}9394// Returns false if adding a new single use producer failed. This happens95// because it could not be encoded, either because there is no room to96// encode another single use producer region or that this single use97// producer is too far away to encode the amount of instructions to skip.98bool tryAddProducer(const unsigned ProducerPosition, MachineInstr *MI) {99// Producer is too far away to encode into this instruction or another100// skip region is needed and SkipRegions.size() = 2 so there's no room for101// another skip region, therefore a new instruction is needed.102if (LastEncodedPositionEnd +103(MaxSkipRange * (MaxNumberOfSkipRegions - SkipRegions.size())) <104ProducerPosition)105return false;106107// If a skip region is needed.108if (LastEncodedPositionEnd != ProducerPosition ||109!currentRegionHasSpace()) {110// If the current region is out of space therefore a skip region would111// be needed, but there is no room for another skip region.112if (SkipRegions.size() == MaxNumberOfSkipRegions)113return false;114skip(ProducerPosition);115}116117SingleUseRegions[SkipRegions.size()]++;118LastEncodedPositionEnd = ProducerPosition + 1;119ProducerInstr = MI;120return true;121}122123auto emit(const SIInstrInfo *SII) {124return BuildMI(*ProducerInstr->getParent(), ProducerInstr, DebugLoc(),125SII->get(AMDGPU::S_SINGLEUSE_VDST))126.addImm(encodeImm());127}128};129130public:131static char ID;132133AMDGPUInsertSingleUseVDST() : MachineFunctionPass(ID) {}134135void insertSingleUseInstructions(136ArrayRef<std::pair<unsigned, MachineInstr *>> SingleUseProducers) const {137SmallVector<SingleUseInstruction> Instructions;138139for (auto &[Position, MI] : SingleUseProducers) {140// Encode this position into the last single use instruction if possible.141if (Instructions.empty() ||142!Instructions.back().tryAddProducer(Position, MI)) {143// If not, add a new instruction.144Instructions.push_back(SingleUseInstruction(Position, MI));145}146}147148for (auto &Instruction : Instructions)149Instruction.emit(SII);150}151152bool runOnMachineFunction(MachineFunction &MF) override {153const auto &ST = MF.getSubtarget<GCNSubtarget>();154if (!ST.hasVGPRSingleUseHintInsts())155return false;156157SII = ST.getInstrInfo();158const auto *TRI = &SII->getRegisterInfo();159bool InstructionEmitted = false;160161for (MachineBasicBlock &MBB : MF) {162DenseMap<MCRegUnit, unsigned> RegisterUseCount;163164// Handle boundaries at the end of basic block separately to avoid165// false positives. If they are live at the end of a basic block then166// assume it has more uses later on.167for (const auto &Liveout : MBB.liveouts()) {168for (MCRegUnitMaskIterator Units(Liveout.PhysReg, TRI); Units.isValid();169++Units) {170const auto [Unit, Mask] = *Units;171if ((Mask & Liveout.LaneMask).any())172RegisterUseCount[Unit] = 2;173}174}175176SmallVector<std::pair<unsigned, MachineInstr *>>177SingleUseProducerPositions;178179unsigned VALUInstrCount = 0;180for (MachineInstr &MI : reverse(MBB.instrs())) {181// All registers in all operands need to be single use for an182// instruction to be marked as a single use producer.183bool AllProducerOperandsAreSingleUse = true;184185// Gather a list of Registers used before updating use counts to avoid186// double counting registers that appear multiple times in a single187// MachineInstr.188SmallVector<MCRegUnit> RegistersUsed;189190for (const auto &Operand : MI.all_defs()) {191const auto Reg = Operand.getReg();192193const auto RegUnits = TRI->regunits(Reg);194if (any_of(RegUnits, [&RegisterUseCount](const MCRegUnit Unit) {195return RegisterUseCount[Unit] > 1;196}))197AllProducerOperandsAreSingleUse = false;198199// Reset uses count when a register is no longer live.200for (const MCRegUnit Unit : RegUnits)201RegisterUseCount.erase(Unit);202}203204for (const auto &Operand : MI.all_uses()) {205const auto Reg = Operand.getReg();206207// Count the number of times each register is read.208for (const MCRegUnit Unit : TRI->regunits(Reg)) {209if (!is_contained(RegistersUsed, Unit))210RegistersUsed.push_back(Unit);211}212}213for (const MCRegUnit Unit : RegistersUsed)214RegisterUseCount[Unit]++;215216// Do not attempt to optimise across exec mask changes.217if (MI.modifiesRegister(AMDGPU::EXEC, TRI) ||218AMDGPU::isInvalidSingleUseConsumerInst(MI.getOpcode())) {219for (auto &UsedReg : RegisterUseCount)220UsedReg.second = 2;221}222223if (!SIInstrInfo::isVALU(MI) ||224AMDGPU::isInvalidSingleUseProducerInst(MI.getOpcode()))225continue;226if (AllProducerOperandsAreSingleUse) {227SingleUseProducerPositions.push_back({VALUInstrCount, &MI});228InstructionEmitted = true;229}230VALUInstrCount++;231}232insertSingleUseInstructions(SingleUseProducerPositions);233}234return InstructionEmitted;235}236};237} // namespace238239char AMDGPUInsertSingleUseVDST::ID = 0;240241char &llvm::AMDGPUInsertSingleUseVDSTID = AMDGPUInsertSingleUseVDST::ID;242243INITIALIZE_PASS(AMDGPUInsertSingleUseVDST, DEBUG_TYPE,244"AMDGPU Insert SingleUseVDST", false, false)245246247