Path: blob/main/contrib/llvm-project/llvm/lib/Target/SystemZ/SystemZHazardRecognizer.cpp
35269 views
//=-- SystemZHazardRecognizer.h - SystemZ Hazard Recognizer -----*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file defines a hazard recognizer for the SystemZ scheduler.9//10// This class is used by the SystemZ scheduling strategy to maintain11// the state during scheduling, and provide cost functions for12// scheduling candidates. This includes:13//14// * Decoder grouping. A decoder group can maximally hold 3 uops, and15// instructions that always begin a new group should be scheduled when16// the current decoder group is empty.17// * Processor resources usage. It is beneficial to balance the use of18// resources.19//20// A goal is to consider all instructions, also those outside of any21// scheduling region. Such instructions are "advanced" past and include22// single instructions before a scheduling region, branches etc.23//24// A block that has only one predecessor continues scheduling with the state25// of it (which may be updated by emitting branches).26//27// ===---------------------------------------------------------------------===//2829#include "SystemZHazardRecognizer.h"30#include "llvm/ADT/Statistic.h"3132using namespace llvm;3334#define DEBUG_TYPE "machine-scheduler"3536// This is the limit of processor resource usage at which the37// scheduler should try to look for other instructions (not using the38// critical resource).39static cl::opt<int> ProcResCostLim("procres-cost-lim", cl::Hidden,40cl::desc("The OOO window for processor "41"resources during scheduling."),42cl::init(8));4344unsigned SystemZHazardRecognizer::45getNumDecoderSlots(SUnit *SU) const {46const MCSchedClassDesc *SC = getSchedClass(SU);47if (!SC->isValid())48return 0; // IMPLICIT_DEF / KILL -- will not make impact in output.4950assert((SC->NumMicroOps != 2 || (SC->BeginGroup && !SC->EndGroup)) &&51"Only cracked instruction can have 2 uops.");52assert((SC->NumMicroOps < 3 || (SC->BeginGroup && SC->EndGroup)) &&53"Expanded instructions always group alone.");54assert((SC->NumMicroOps < 3 || (SC->NumMicroOps % 3 == 0)) &&55"Expanded instructions fill the group(s).");5657return SC->NumMicroOps;58}5960unsigned SystemZHazardRecognizer::getCurrCycleIdx(SUnit *SU) const {61unsigned Idx = CurrGroupSize;62if (GrpCount % 2)63Idx += 3;6465if (SU != nullptr && !fitsIntoCurrentGroup(SU)) {66if (Idx == 1 || Idx == 2)67Idx = 3;68else if (Idx == 4 || Idx == 5)69Idx = 0;70}7172return Idx;73}7475ScheduleHazardRecognizer::HazardType SystemZHazardRecognizer::76getHazardType(SUnit *SU, int Stalls) {77return (fitsIntoCurrentGroup(SU) ? NoHazard : Hazard);78}7980void SystemZHazardRecognizer::Reset() {81CurrGroupSize = 0;82CurrGroupHas4RegOps = false;83clearProcResCounters();84GrpCount = 0;85LastFPdOpCycleIdx = UINT_MAX;86LastEmittedMI = nullptr;87LLVM_DEBUG(CurGroupDbg = "";);88}8990bool91SystemZHazardRecognizer::fitsIntoCurrentGroup(SUnit *SU) const {92const MCSchedClassDesc *SC = getSchedClass(SU);93if (!SC->isValid())94return true;9596// A cracked instruction only fits into schedule if the current97// group is empty.98if (SC->BeginGroup)99return (CurrGroupSize == 0);100101// An instruction with 4 register operands will not fit in last slot.102assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) &&103"Current decoder group is already full!");104if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))105return false;106107// Since a full group is handled immediately in EmitInstruction(),108// SU should fit into current group. NumSlots should be 1 or 0,109// since it is not a cracked or expanded instruction.110assert ((getNumDecoderSlots(SU) <= 1) && (CurrGroupSize < 3) &&111"Expected normal instruction to fit in non-full group!");112113return true;114}115116bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const {117const MachineFunction &MF = *MI->getParent()->getParent();118const TargetRegisterInfo *TRI = &TII->getRegisterInfo();119const MCInstrDesc &MID = MI->getDesc();120unsigned Count = 0;121for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) {122const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF);123if (RC == nullptr)124continue;125if (OpIdx >= MID.getNumDefs() &&126MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1)127continue;128Count++;129}130return Count >= 4;131}132133void SystemZHazardRecognizer::nextGroup() {134if (CurrGroupSize == 0)135return;136137LLVM_DEBUG(dumpCurrGroup("Completed decode group"));138LLVM_DEBUG(CurGroupDbg = "";);139140int NumGroups = ((CurrGroupSize > 3) ? (CurrGroupSize / 3) : 1);141assert((CurrGroupSize <= 3 || CurrGroupSize % 3 == 0) &&142"Current decoder group bad.");143144// Reset counter for next group.145CurrGroupSize = 0;146CurrGroupHas4RegOps = false;147148GrpCount += ((unsigned) NumGroups);149150// Decrease counters for execution units.151for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)152ProcResourceCounters[i] = ((ProcResourceCounters[i] > NumGroups)153? (ProcResourceCounters[i] - NumGroups)154: 0);155156// Clear CriticalResourceIdx if it is now below the threshold.157if (CriticalResourceIdx != UINT_MAX &&158(ProcResourceCounters[CriticalResourceIdx] <=159ProcResCostLim))160CriticalResourceIdx = UINT_MAX;161162LLVM_DEBUG(dumpState(););163}164165#ifndef NDEBUG // Debug output166void SystemZHazardRecognizer::dumpSU(SUnit *SU, raw_ostream &OS) const {167OS << "SU(" << SU->NodeNum << "):";168OS << TII->getName(SU->getInstr()->getOpcode());169170const MCSchedClassDesc *SC = getSchedClass(SU);171if (!SC->isValid())172return;173174for (TargetSchedModel::ProcResIter175PI = SchedModel->getWriteProcResBegin(SC),176PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {177const MCProcResourceDesc &PRD =178*SchedModel->getProcResource(PI->ProcResourceIdx);179std::string FU(PRD.Name);180// trim e.g. Z13_FXaUnit -> FXa181FU = FU.substr(FU.find('_') + 1);182size_t Pos = FU.find("Unit");183if (Pos != std::string::npos)184FU.resize(Pos);185if (FU == "LS") // LSUnit -> LSU186FU = "LSU";187OS << "/" << FU;188189if (PI->ReleaseAtCycle> 1)190OS << "(" << PI->ReleaseAtCycle << "cyc)";191}192193if (SC->NumMicroOps > 1)194OS << "/" << SC->NumMicroOps << "uops";195if (SC->BeginGroup && SC->EndGroup)196OS << "/GroupsAlone";197else if (SC->BeginGroup)198OS << "/BeginsGroup";199else if (SC->EndGroup)200OS << "/EndsGroup";201if (SU->isUnbuffered)202OS << "/Unbuffered";203if (has4RegOps(SU->getInstr()))204OS << "/4RegOps";205}206207void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const {208dbgs() << "++ " << Msg;209dbgs() << ": ";210211if (CurGroupDbg.empty())212dbgs() << " <empty>\n";213else {214dbgs() << "{ " << CurGroupDbg << " }";215dbgs() << " (" << CurrGroupSize << " decoder slot"216<< (CurrGroupSize > 1 ? "s":"")217<< (CurrGroupHas4RegOps ? ", 4RegOps" : "")218<< ")\n";219}220}221222void SystemZHazardRecognizer::dumpProcResourceCounters() const {223bool any = false;224225for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)226if (ProcResourceCounters[i] > 0) {227any = true;228break;229}230231if (!any)232return;233234dbgs() << "++ | Resource counters: ";235for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i)236if (ProcResourceCounters[i] > 0)237dbgs() << SchedModel->getProcResource(i)->Name238<< ":" << ProcResourceCounters[i] << " ";239dbgs() << "\n";240241if (CriticalResourceIdx != UINT_MAX)242dbgs() << "++ | Critical resource: "243<< SchedModel->getProcResource(CriticalResourceIdx)->Name244<< "\n";245}246247void SystemZHazardRecognizer::dumpState() const {248dumpCurrGroup("| Current decoder group");249dbgs() << "++ | Current cycle index: "250<< getCurrCycleIdx() << "\n";251dumpProcResourceCounters();252if (LastFPdOpCycleIdx != UINT_MAX)253dbgs() << "++ | Last FPd cycle index: " << LastFPdOpCycleIdx << "\n";254}255256#endif //NDEBUG257258void SystemZHazardRecognizer::clearProcResCounters() {259ProcResourceCounters.assign(SchedModel->getNumProcResourceKinds(), 0);260CriticalResourceIdx = UINT_MAX;261}262263static inline bool isBranchRetTrap(MachineInstr *MI) {264return (MI->isBranch() || MI->isReturn() ||265MI->getOpcode() == SystemZ::CondTrap);266}267268// Update state with SU as the next scheduled unit.269void SystemZHazardRecognizer::270EmitInstruction(SUnit *SU) {271const MCSchedClassDesc *SC = getSchedClass(SU);272LLVM_DEBUG(dbgs() << "++ HazardRecognizer emitting "; dumpSU(SU, dbgs());273dbgs() << "\n";);274LLVM_DEBUG(dumpCurrGroup("Decode group before emission"););275276// If scheduling an SU that must begin a new decoder group, move on277// to next group.278if (!fitsIntoCurrentGroup(SU))279nextGroup();280281LLVM_DEBUG(raw_string_ostream cgd(CurGroupDbg);282if (CurGroupDbg.length()) cgd << ", "; dumpSU(SU, cgd););283284LastEmittedMI = SU->getInstr();285286// After returning from a call, we don't know much about the state.287if (SU->isCall) {288LLVM_DEBUG(dbgs() << "++ Clearing state after call.\n";);289Reset();290LastEmittedMI = SU->getInstr();291return;292}293294// Increase counter for execution unit(s).295for (TargetSchedModel::ProcResIter296PI = SchedModel->getWriteProcResBegin(SC),297PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {298// Don't handle FPd together with the other resources.299if (SchedModel->getProcResource(PI->ProcResourceIdx)->BufferSize == 1)300continue;301int &CurrCounter =302ProcResourceCounters[PI->ProcResourceIdx];303CurrCounter += PI->ReleaseAtCycle;304// Check if this is now the new critical resource.305if ((CurrCounter > ProcResCostLim) &&306(CriticalResourceIdx == UINT_MAX ||307(PI->ProcResourceIdx != CriticalResourceIdx &&308CurrCounter >309ProcResourceCounters[CriticalResourceIdx]))) {310LLVM_DEBUG(311dbgs() << "++ New critical resource: "312<< SchedModel->getProcResource(PI->ProcResourceIdx)->Name313<< "\n";);314CriticalResourceIdx = PI->ProcResourceIdx;315}316}317318// Make note of an instruction that uses a blocking resource (FPd).319if (SU->isUnbuffered) {320LastFPdOpCycleIdx = getCurrCycleIdx(SU);321LLVM_DEBUG(dbgs() << "++ Last FPd cycle index: " << LastFPdOpCycleIdx322<< "\n";);323}324325// Insert SU into current group by increasing number of slots used326// in current group.327CurrGroupSize += getNumDecoderSlots(SU);328CurrGroupHas4RegOps |= has4RegOps(SU->getInstr());329unsigned GroupLim = (CurrGroupHas4RegOps ? 2 : 3);330assert((CurrGroupSize <= GroupLim || CurrGroupSize == getNumDecoderSlots(SU))331&& "SU does not fit into decoder group!");332333// Check if current group is now full/ended. If so, move on to next334// group to be ready to evaluate more candidates.335if (CurrGroupSize >= GroupLim || SC->EndGroup)336nextGroup();337}338339int SystemZHazardRecognizer::groupingCost(SUnit *SU) const {340const MCSchedClassDesc *SC = getSchedClass(SU);341if (!SC->isValid())342return 0;343344// If SU begins new group, it can either break a current group early345// or fit naturally if current group is empty (negative cost).346if (SC->BeginGroup) {347if (CurrGroupSize)348return 3 - CurrGroupSize;349return -1;350}351352// Similarly, a group-ending SU may either fit well (last in group), or353// end the group prematurely.354if (SC->EndGroup) {355unsigned resultingGroupSize =356(CurrGroupSize + getNumDecoderSlots(SU));357if (resultingGroupSize < 3)358return (3 - resultingGroupSize);359return -1;360}361362// An instruction with 4 register operands will not fit in last slot.363if (CurrGroupSize == 2 && has4RegOps(SU->getInstr()))364return 1;365366// Most instructions can be placed in any decoder slot.367return 0;368}369370bool SystemZHazardRecognizer::isFPdOpPreferred_distance(SUnit *SU) const {371assert (SU->isUnbuffered);372// If this is the first FPd op, it should be scheduled high.373if (LastFPdOpCycleIdx == UINT_MAX)374return true;375// If this is not the first PFd op, it should go into the other side376// of the processor to use the other FPd unit there. This should377// generally happen if two FPd ops are placed with 2 other378// instructions between them (modulo 6).379unsigned SUCycleIdx = getCurrCycleIdx(SU);380if (LastFPdOpCycleIdx > SUCycleIdx)381return ((LastFPdOpCycleIdx - SUCycleIdx) == 3);382return ((SUCycleIdx - LastFPdOpCycleIdx) == 3);383}384385int SystemZHazardRecognizer::386resourcesCost(SUnit *SU) {387int Cost = 0;388389const MCSchedClassDesc *SC = getSchedClass(SU);390if (!SC->isValid())391return 0;392393// For a FPd op, either return min or max value as indicated by the394// distance to any prior FPd op.395if (SU->isUnbuffered)396Cost = (isFPdOpPreferred_distance(SU) ? INT_MIN : INT_MAX);397// For other instructions, give a cost to the use of the critical resource.398else if (CriticalResourceIdx != UINT_MAX) {399for (TargetSchedModel::ProcResIter400PI = SchedModel->getWriteProcResBegin(SC),401PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI)402if (PI->ProcResourceIdx == CriticalResourceIdx)403Cost = PI->ReleaseAtCycle;404}405406return Cost;407}408409void SystemZHazardRecognizer::emitInstruction(MachineInstr *MI,410bool TakenBranch) {411// Make a temporary SUnit.412SUnit SU(MI, 0);413414// Set interesting flags.415SU.isCall = MI->isCall();416417const MCSchedClassDesc *SC = SchedModel->resolveSchedClass(MI);418for (const MCWriteProcResEntry &PRE :419make_range(SchedModel->getWriteProcResBegin(SC),420SchedModel->getWriteProcResEnd(SC))) {421switch (SchedModel->getProcResource(PRE.ProcResourceIdx)->BufferSize) {422case 0:423SU.hasReservedResource = true;424break;425case 1:426SU.isUnbuffered = true;427break;428default:429break;430}431}432433unsigned GroupSizeBeforeEmit = CurrGroupSize;434EmitInstruction(&SU);435436if (!TakenBranch && isBranchRetTrap(MI)) {437// NT Branch on second slot ends group.438if (GroupSizeBeforeEmit == 1)439nextGroup();440}441442if (TakenBranch && CurrGroupSize > 0)443nextGroup();444445assert ((!MI->isTerminator() || isBranchRetTrap(MI)) &&446"Scheduler: unhandled terminator!");447}448449void SystemZHazardRecognizer::450copyState(SystemZHazardRecognizer *Incoming) {451// Current decoder group452CurrGroupSize = Incoming->CurrGroupSize;453LLVM_DEBUG(CurGroupDbg = Incoming->CurGroupDbg;);454455// Processor resources456ProcResourceCounters = Incoming->ProcResourceCounters;457CriticalResourceIdx = Incoming->CriticalResourceIdx;458459// FPd460LastFPdOpCycleIdx = Incoming->LastFPdOpCycleIdx;461GrpCount = Incoming->GrpCount;462}463464465