Path: blob/main/contrib/llvm-project/llvm/lib/Target/PowerPC/PPCHazardRecognizers.cpp
35294 views
//===-- PPCHazardRecognizers.cpp - PowerPC Hazard Recognizer Impls --------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This file implements hazard recognizers for scheduling on PowerPC processors.9//10//===----------------------------------------------------------------------===//1112#include "PPCHazardRecognizers.h"13#include "PPCInstrInfo.h"14#include "PPCSubtarget.h"15#include "llvm/CodeGen/ScheduleDAG.h"16#include "llvm/Support/Debug.h"17#include "llvm/Support/ErrorHandling.h"18#include "llvm/Support/raw_ostream.h"19using namespace llvm;2021#define DEBUG_TYPE "pre-RA-sched"2223bool PPCDispatchGroupSBHazardRecognizer::isLoadAfterStore(SUnit *SU) {24// FIXME: Move this.25if (isBCTRAfterSet(SU))26return true;2728const MCInstrDesc *MCID = DAG->getInstrDesc(SU);29if (!MCID)30return false;3132if (!MCID->mayLoad())33return false;3435// SU is a load; for any predecessors in this dispatch group, that are stores,36// and with which we have an ordering dependency, return true.37for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {38const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());39if (!PredMCID || !PredMCID->mayStore())40continue;4142if (!SU->Preds[i].isNormalMemory() && !SU->Preds[i].isBarrier())43continue;4445for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)46if (SU->Preds[i].getSUnit() == CurGroup[j])47return true;48}4950return false;51}5253bool PPCDispatchGroupSBHazardRecognizer::isBCTRAfterSet(SUnit *SU) {54const MCInstrDesc *MCID = DAG->getInstrDesc(SU);55if (!MCID)56return false;5758if (!MCID->isBranch())59return false;6061// SU is a branch; for any predecessors in this dispatch group, with which we62// have a data dependence and set the counter register, return true.63for (unsigned i = 0, ie = (unsigned) SU->Preds.size(); i != ie; ++i) {64const MCInstrDesc *PredMCID = DAG->getInstrDesc(SU->Preds[i].getSUnit());65if (!PredMCID || PredMCID->getSchedClass() != PPC::Sched::IIC_SprMTSPR)66continue;6768if (SU->Preds[i].isCtrl())69continue;7071for (unsigned j = 0, je = CurGroup.size(); j != je; ++j)72if (SU->Preds[i].getSUnit() == CurGroup[j])73return true;74}7576return false;77}7879// FIXME: Remove this when we don't need this:80namespace llvm { namespace PPC { extern int getNonRecordFormOpcode(uint16_t); } }8182// FIXME: A lot of code in PPCDispatchGroupSBHazardRecognizer is P7 specific.8384bool PPCDispatchGroupSBHazardRecognizer::mustComeFirst(const MCInstrDesc *MCID,85unsigned &NSlots) {86// FIXME: Indirectly, this information is contained in the itinerary, and87// we should derive it from there instead of separately specifying it88// here.89unsigned IIC = MCID->getSchedClass();90switch (IIC) {91default:92NSlots = 1;93break;94case PPC::Sched::IIC_IntDivW:95case PPC::Sched::IIC_IntDivD:96case PPC::Sched::IIC_LdStLoadUpd:97case PPC::Sched::IIC_LdStLDU:98case PPC::Sched::IIC_LdStLFDU:99case PPC::Sched::IIC_LdStLFDUX:100case PPC::Sched::IIC_LdStLHA:101case PPC::Sched::IIC_LdStLHAU:102case PPC::Sched::IIC_LdStLWA:103case PPC::Sched::IIC_LdStSTU:104case PPC::Sched::IIC_LdStSTFDU:105NSlots = 2;106break;107case PPC::Sched::IIC_LdStLoadUpdX:108case PPC::Sched::IIC_LdStLDUX:109case PPC::Sched::IIC_LdStLHAUX:110case PPC::Sched::IIC_LdStLWARX:111case PPC::Sched::IIC_LdStLDARX:112case PPC::Sched::IIC_LdStSTUX:113case PPC::Sched::IIC_LdStSTDCX:114case PPC::Sched::IIC_LdStSTWCX:115case PPC::Sched::IIC_BrMCRX: // mtcr116// FIXME: Add sync/isync (here and in the itinerary).117NSlots = 4;118break;119}120121// FIXME: record-form instructions need a different itinerary class.122if (NSlots == 1 && PPC::getNonRecordFormOpcode(MCID->getOpcode()) != -1)123NSlots = 2;124125switch (IIC) {126default:127// All multi-slot instructions must come first.128return NSlots > 1;129case PPC::Sched::IIC_BrCR: // cr logicals130case PPC::Sched::IIC_SprMFCR:131case PPC::Sched::IIC_SprMFCRF:132case PPC::Sched::IIC_SprMTSPR:133return true;134}135}136137ScheduleHazardRecognizer::HazardType138PPCDispatchGroupSBHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {139if (Stalls == 0 && isLoadAfterStore(SU))140return NoopHazard;141142return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);143}144145bool PPCDispatchGroupSBHazardRecognizer::ShouldPreferAnother(SUnit *SU) {146const MCInstrDesc *MCID = DAG->getInstrDesc(SU);147unsigned NSlots;148if (MCID && mustComeFirst(MCID, NSlots) && CurSlots)149return true;150151return ScoreboardHazardRecognizer::ShouldPreferAnother(SU);152}153154unsigned PPCDispatchGroupSBHazardRecognizer::PreEmitNoops(SUnit *SU) {155// We only need to fill out a maximum of 5 slots here: The 6th slot could156// only be a second branch, and otherwise the next instruction will start a157// new group.158if (isLoadAfterStore(SU) && CurSlots < 6) {159unsigned Directive =160DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();161// If we're using a special group-terminating nop, then we need only one.162// FIXME: the same for P9 as previous gen until POWER9 scheduling is ready163if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||164Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9)165return 1;166167return 5 - CurSlots;168}169170return ScoreboardHazardRecognizer::PreEmitNoops(SU);171}172173void PPCDispatchGroupSBHazardRecognizer::EmitInstruction(SUnit *SU) {174const MCInstrDesc *MCID = DAG->getInstrDesc(SU);175if (MCID) {176if (CurSlots == 5 || (MCID->isBranch() && CurBranches == 1)) {177CurGroup.clear();178CurSlots = CurBranches = 0;179} else {180LLVM_DEBUG(dbgs() << "**** Adding to dispatch group: ");181LLVM_DEBUG(DAG->dumpNode(*SU));182183unsigned NSlots;184bool MustBeFirst = mustComeFirst(MCID, NSlots);185186// If this instruction must come first, but does not, then it starts a187// new group.188if (MustBeFirst && CurSlots) {189CurSlots = CurBranches = 0;190CurGroup.clear();191}192193CurSlots += NSlots;194CurGroup.push_back(SU);195196if (MCID->isBranch())197++CurBranches;198}199}200201return ScoreboardHazardRecognizer::EmitInstruction(SU);202}203204void PPCDispatchGroupSBHazardRecognizer::AdvanceCycle() {205return ScoreboardHazardRecognizer::AdvanceCycle();206}207208void PPCDispatchGroupSBHazardRecognizer::RecedeCycle() {209llvm_unreachable("Bottom-up scheduling not supported");210}211212void PPCDispatchGroupSBHazardRecognizer::Reset() {213CurGroup.clear();214CurSlots = CurBranches = 0;215return ScoreboardHazardRecognizer::Reset();216}217218void PPCDispatchGroupSBHazardRecognizer::EmitNoop() {219unsigned Directive =220DAG->MF.getSubtarget<PPCSubtarget>().getCPUDirective();221// If the group has now filled all of its slots, or if we're using a special222// group-terminating nop, the group is complete.223// FIXME: the same for P9 as previous gen until POWER9 scheduling is ready224if (Directive == PPC::DIR_PWR6 || Directive == PPC::DIR_PWR7 ||225Directive == PPC::DIR_PWR8 || Directive == PPC::DIR_PWR9 ||226CurSlots == 6) {227CurGroup.clear();228CurSlots = CurBranches = 0;229} else {230CurGroup.push_back(nullptr);231++CurSlots;232}233}234235//===----------------------------------------------------------------------===//236// PowerPC 970 Hazard Recognizer237//238// This models the dispatch group formation of the PPC970 processor. Dispatch239// groups are bundles of up to five instructions that can contain various mixes240// of instructions. The PPC970 can dispatch a peak of 4 non-branch and one241// branch instruction per-cycle.242//243// There are a number of restrictions to dispatch group formation: some244// instructions can only be issued in the first slot of a dispatch group, & some245// instructions fill an entire dispatch group. Additionally, only branches can246// issue in the 5th (last) slot.247//248// Finally, there are a number of "structural" hazards on the PPC970. These249// conditions cause large performance penalties due to misprediction, recovery,250// and replay logic that has to happen. These cases include setting a CTR and251// branching through it in the same dispatch group, and storing to an address,252// then loading from the same address within a dispatch group. To avoid these253// conditions, we insert no-op instructions when appropriate.254//255// FIXME: This is missing some significant cases:256// 1. Modeling of microcoded instructions.257// 2. Handling of serialized operations.258// 3. Handling of the esoteric cases in "Resource-based Instruction Grouping".259//260261PPCHazardRecognizer970::PPCHazardRecognizer970(const ScheduleDAG &DAG)262: DAG(DAG) {263EndDispatchGroup();264}265266void PPCHazardRecognizer970::EndDispatchGroup() {267LLVM_DEBUG(errs() << "=== Start of dispatch group\n");268NumIssued = 0;269270// Structural hazard info.271HasCTRSet = false;272NumStores = 0;273}274275276PPCII::PPC970_Unit277PPCHazardRecognizer970::GetInstrType(unsigned Opcode,278bool &isFirst, bool &isSingle,279bool &isCracked,280bool &isLoad, bool &isStore) {281const MCInstrDesc &MCID = DAG.TII->get(Opcode);282283isLoad = MCID.mayLoad();284isStore = MCID.mayStore();285286uint64_t TSFlags = MCID.TSFlags;287288isFirst = TSFlags & PPCII::PPC970_First;289isSingle = TSFlags & PPCII::PPC970_Single;290isCracked = TSFlags & PPCII::PPC970_Cracked;291return (PPCII::PPC970_Unit)(TSFlags & PPCII::PPC970_Mask);292}293294/// isLoadOfStoredAddress - If we have a load from the previously stored pointer295/// as indicated by StorePtr1/StorePtr2/StoreSize, return true.296bool PPCHazardRecognizer970::297isLoadOfStoredAddress(uint64_t LoadSize, int64_t LoadOffset,298const Value *LoadValue) const {299for (unsigned i = 0, e = NumStores; i != e; ++i) {300// Handle exact and commuted addresses.301if (LoadValue == StoreValue[i] && LoadOffset == StoreOffset[i])302return true;303304// Okay, we don't have an exact match, if this is an indexed offset, see if305// we have overlap (which happens during fp->int conversion for example).306if (StoreValue[i] == LoadValue) {307// Okay the base pointers match, so we have [c1+r] vs [c2+r]. Check308// to see if the load and store actually overlap.309if (StoreOffset[i] < LoadOffset) {310if (int64_t(StoreOffset[i]+StoreSize[i]) > LoadOffset) return true;311} else {312if (int64_t(LoadOffset+LoadSize) > StoreOffset[i]) return true;313}314}315}316return false;317}318319/// getHazardType - We return hazard for any non-branch instruction that would320/// terminate the dispatch group. We turn NoopHazard for any321/// instructions that wouldn't terminate the dispatch group that would cause a322/// pipeline flush.323ScheduleHazardRecognizer::HazardType PPCHazardRecognizer970::324getHazardType(SUnit *SU, int Stalls) {325assert(Stalls == 0 && "PPC hazards don't support scoreboard lookahead");326327MachineInstr *MI = SU->getInstr();328329if (MI->isDebugInstr())330return NoHazard;331332unsigned Opcode = MI->getOpcode();333bool isFirst, isSingle, isCracked, isLoad, isStore;334PPCII::PPC970_Unit InstrType =335GetInstrType(Opcode, isFirst, isSingle, isCracked,336isLoad, isStore);337if (InstrType == PPCII::PPC970_Pseudo) return NoHazard;338339// We can only issue a PPC970_First/PPC970_Single instruction (such as340// crand/mtspr/etc) if this is the first cycle of the dispatch group.341if (NumIssued != 0 && (isFirst || isSingle))342return Hazard;343344// If this instruction is cracked into two ops by the decoder, we know that345// it is not a branch and that it cannot issue if 3 other instructions are346// already in the dispatch group.347if (isCracked && NumIssued > 2)348return Hazard;349350switch (InstrType) {351default: llvm_unreachable("Unknown instruction type!");352case PPCII::PPC970_FXU:353case PPCII::PPC970_LSU:354case PPCII::PPC970_FPU:355case PPCII::PPC970_VALU:356case PPCII::PPC970_VPERM:357// We can only issue a branch as the last instruction in a group.358if (NumIssued == 4) return Hazard;359break;360case PPCII::PPC970_CRU:361// We can only issue a CR instruction in the first two slots.362if (NumIssued >= 2) return Hazard;363break;364case PPCII::PPC970_BRU:365break;366}367368// Do not allow MTCTR and BCTRL to be in the same dispatch group.369if (HasCTRSet && Opcode == PPC::BCTRL)370return NoopHazard;371372// If this is a load following a store, make sure it's not to the same or373// overlapping address.374if (isLoad && NumStores && !MI->memoperands_empty()) {375MachineMemOperand *MO = *MI->memoperands_begin();376if (MO->getSize().hasValue() &&377isLoadOfStoredAddress(MO->getSize().getValue(), MO->getOffset(),378MO->getValue()))379return NoopHazard;380}381382return NoHazard;383}384385void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {386MachineInstr *MI = SU->getInstr();387388if (MI->isDebugInstr())389return;390391unsigned Opcode = MI->getOpcode();392bool isFirst, isSingle, isCracked, isLoad, isStore;393PPCII::PPC970_Unit InstrType =394GetInstrType(Opcode, isFirst, isSingle, isCracked,395isLoad, isStore);396if (InstrType == PPCII::PPC970_Pseudo) return;397398// Update structural hazard information.399if (Opcode == PPC::MTCTR || Opcode == PPC::MTCTR8) HasCTRSet = true;400401// Track the address stored to.402if (isStore && NumStores < 4 && !MI->memoperands_empty() &&403(*MI->memoperands_begin())->getSize().hasValue()) {404MachineMemOperand *MO = *MI->memoperands_begin();405StoreSize[NumStores] = MO->getSize().getValue();406StoreOffset[NumStores] = MO->getOffset();407StoreValue[NumStores] = MO->getValue();408++NumStores;409}410411if (InstrType == PPCII::PPC970_BRU || isSingle)412NumIssued = 4; // Terminate a d-group.413++NumIssued;414415// If this instruction is cracked into two ops by the decoder, remember that416// we issued two pieces.417if (isCracked)418++NumIssued;419420if (NumIssued == 5)421EndDispatchGroup();422}423424void PPCHazardRecognizer970::AdvanceCycle() {425assert(NumIssued < 5 && "Illegal dispatch group!");426++NumIssued;427if (NumIssued == 5)428EndDispatchGroup();429}430431void PPCHazardRecognizer970::Reset() {432EndDispatchGroup();433}434435436437