Path: blob/main/contrib/llvm-project/llvm/lib/Target/AMDGPU/AMDGPUExportClustering.cpp
35269 views
//===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8/// \file This file contains a DAG scheduling mutation to cluster shader9/// exports.10//11//===----------------------------------------------------------------------===//1213#include "AMDGPUExportClustering.h"14#include "MCTargetDesc/AMDGPUMCTargetDesc.h"15#include "SIInstrInfo.h"16#include "llvm/CodeGen/ScheduleDAGInstrs.h"1718using namespace llvm;1920namespace {2122class ExportClustering : public ScheduleDAGMutation {23public:24ExportClustering() = default;25void apply(ScheduleDAGInstrs *DAG) override;26};2728static bool isExport(const SUnit &SU) {29return SIInstrInfo::isEXP(*SU.getInstr());30}3132static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {33const MachineInstr *MI = SU->getInstr();34unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();35return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST;36}3738static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,39unsigned PosCount) {40if (!PosCount || PosCount == Chain.size())41return;4243// Position exports should occur as soon as possible in the shader44// for optimal performance. This moves position exports before45// other exports while preserving the order within different export46// types (pos or other).47SmallVector<SUnit *, 8> Copy(Chain);48unsigned PosIdx = 0;49unsigned OtherIdx = PosCount;50for (SUnit *SU : Copy) {51if (isPositionExport(TII, SU))52Chain[PosIdx++] = SU;53else54Chain[OtherIdx++] = SU;55}56}5758static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {59SUnit *ChainHead = Exports.front();6061// Now construct cluster from chain by adding new edges.62for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {63SUnit *SUa = Exports[Idx];64SUnit *SUb = Exports[Idx + 1];6566// Copy all dependencies to the head of the chain to avoid any67// computation being inserted into the chain.68for (const SDep &Pred : SUb->Preds) {69SUnit *PredSU = Pred.getSUnit();70if (!isExport(*PredSU) && !Pred.isWeak())71DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));72}7374// New barrier edge ordering exports75DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));76// Also add cluster edge77DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));78}79}8081static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {82SmallVector<SDep, 2> ToAdd, ToRemove;8384for (const SDep &Pred : SU.Preds) {85SUnit *PredSU = Pred.getSUnit();86if (Pred.isBarrier() && isExport(*PredSU)) {87ToRemove.push_back(Pred);88if (isExport(SU))89continue;9091// If we remove a barrier we need to copy dependencies92// from the predecessor to maintain order.93for (const SDep &ExportPred : PredSU->Preds) {94SUnit *ExportPredSU = ExportPred.getSUnit();95if (ExportPred.isBarrier() && !isExport(*ExportPredSU))96ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));97}98}99}100101for (SDep Pred : ToRemove)102SU.removePred(Pred);103for (SDep Pred : ToAdd)104DAG->addEdge(&SU, Pred);105}106107void ExportClustering::apply(ScheduleDAGInstrs *DAG) {108const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);109110SmallVector<SUnit *, 8> Chain;111112// Pass through DAG gathering a list of exports and removing barrier edges113// creating dependencies on exports. Freeing exports of successor edges114// allows more scheduling freedom, and nothing should be order dependent115// on exports. Edges will be added later to order the exports.116unsigned PosCount = 0;117for (SUnit &SU : DAG->SUnits) {118if (!isExport(SU))119continue;120121Chain.push_back(&SU);122if (isPositionExport(TII, &SU))123PosCount++;124125removeExportDependencies(DAG, SU);126127SmallVector<SDep, 4> Succs(SU.Succs);128for (SDep Succ : Succs)129removeExportDependencies(DAG, *Succ.getSUnit());130}131132// Apply clustering if there are multiple exports133if (Chain.size() > 1) {134sortChain(TII, Chain, PosCount);135buildCluster(Chain, DAG);136}137}138139} // end namespace140141namespace llvm {142143std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {144return std::make_unique<ExportClustering>();145}146147} // end namespace llvm148149150