Path: blob/main/contrib/llvm-project/llvm/lib/MCA/InstrBuilder.cpp
35260 views
//===--------------------- InstrBuilder.cpp ---------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7/// \file8///9/// This file implements the InstrBuilder interface.10///11//===----------------------------------------------------------------------===//1213#include "llvm/MCA/InstrBuilder.h"14#include "llvm/ADT/APInt.h"15#include "llvm/ADT/DenseMap.h"16#include "llvm/ADT/Hashing.h"17#include "llvm/ADT/Statistic.h"18#include "llvm/MC/MCInst.h"19#include "llvm/Support/Debug.h"20#include "llvm/Support/WithColor.h"21#include "llvm/Support/raw_ostream.h"2223#define DEBUG_TYPE "llvm-mca-instrbuilder"2425namespace llvm {26namespace mca {2728char RecycledInstErr::ID = 0;2930InstrBuilder::InstrBuilder(const llvm::MCSubtargetInfo &sti,31const llvm::MCInstrInfo &mcii,32const llvm::MCRegisterInfo &mri,33const llvm::MCInstrAnalysis *mcia,34const mca::InstrumentManager &im, unsigned cl)35: STI(sti), MCII(mcii), MRI(mri), MCIA(mcia), IM(im), FirstCallInst(true),36FirstReturnInst(true), CallLatency(cl) {37const MCSchedModel &SM = STI.getSchedModel();38ProcResourceMasks.resize(SM.getNumProcResourceKinds());39computeProcResourceMasks(STI.getSchedModel(), ProcResourceMasks);40}4142static void initializeUsedResources(InstrDesc &ID,43const MCSchedClassDesc &SCDesc,44const MCSubtargetInfo &STI,45ArrayRef<uint64_t> ProcResourceMasks) {46const MCSchedModel &SM = STI.getSchedModel();4748// Populate resources consumed.49using ResourcePlusCycles = std::pair<uint64_t, ResourceUsage>;50SmallVector<ResourcePlusCycles, 4> Worklist;5152// Track cycles contributed by resources that are in a "Super" relationship.53// This is required if we want to correctly match the behavior of method54// SubtargetEmitter::ExpandProcResource() in Tablegen. When computing the set55// of "consumed" processor resources and resource cycles, the logic in56// ExpandProcResource() doesn't update the number of resource cycles57// contributed by a "Super" resource to a group.58// We need to take this into account when we find that a processor resource is59// part of a group, and it is also used as the "Super" of other resources.60// This map stores the number of cycles contributed by sub-resources that are61// part of a "Super" resource. The key value is the "Super" resource mask ID.62DenseMap<uint64_t, unsigned> SuperResources;6364unsigned NumProcResources = SM.getNumProcResourceKinds();65APInt Buffers(NumProcResources, 0);6667bool AllInOrderResources = true;68bool AnyDispatchHazards = false;69for (unsigned I = 0, E = SCDesc.NumWriteProcResEntries; I < E; ++I) {70const MCWriteProcResEntry *PRE = STI.getWriteProcResBegin(&SCDesc) + I;71const MCProcResourceDesc &PR = *SM.getProcResource(PRE->ProcResourceIdx);72if (!PRE->ReleaseAtCycle) {73#ifndef NDEBUG74WithColor::warning()75<< "Ignoring invalid write of zero cycles on processor resource "76<< PR.Name << "\n";77WithColor::note() << "found in scheduling class " << SCDesc.Name78<< " (write index #" << I << ")\n";79#endif80continue;81}8283uint64_t Mask = ProcResourceMasks[PRE->ProcResourceIdx];84if (PR.BufferSize < 0) {85AllInOrderResources = false;86} else {87Buffers.setBit(getResourceStateIndex(Mask));88AnyDispatchHazards |= (PR.BufferSize == 0);89AllInOrderResources &= (PR.BufferSize <= 1);90}9192CycleSegment RCy(0, PRE->ReleaseAtCycle, false);93Worklist.emplace_back(ResourcePlusCycles(Mask, ResourceUsage(RCy)));94if (PR.SuperIdx) {95uint64_t Super = ProcResourceMasks[PR.SuperIdx];96SuperResources[Super] += PRE->ReleaseAtCycle;97}98}99100ID.MustIssueImmediately = AllInOrderResources && AnyDispatchHazards;101102// Sort elements by mask popcount, so that we prioritize resource units over103// resource groups, and smaller groups over larger groups.104sort(Worklist, [](const ResourcePlusCycles &A, const ResourcePlusCycles &B) {105unsigned popcntA = llvm::popcount(A.first);106unsigned popcntB = llvm::popcount(B.first);107if (popcntA < popcntB)108return true;109if (popcntA > popcntB)110return false;111return A.first < B.first;112});113114uint64_t UsedResourceUnits = 0;115uint64_t UsedResourceGroups = 0;116uint64_t UnitsFromResourceGroups = 0;117118// Remove cycles contributed by smaller resources, and check if there119// are partially overlapping resource groups.120ID.HasPartiallyOverlappingGroups = false;121122for (unsigned I = 0, E = Worklist.size(); I < E; ++I) {123ResourcePlusCycles &A = Worklist[I];124if (!A.second.size()) {125assert(llvm::popcount(A.first) > 1 && "Expected a group!");126UsedResourceGroups |= llvm::bit_floor(A.first);127continue;128}129130ID.Resources.emplace_back(A);131uint64_t NormalizedMask = A.first;132133if (llvm::popcount(A.first) == 1) {134UsedResourceUnits |= A.first;135} else {136// Remove the leading 1 from the resource group mask.137NormalizedMask ^= llvm::bit_floor(NormalizedMask);138if (UnitsFromResourceGroups & NormalizedMask)139ID.HasPartiallyOverlappingGroups = true;140141UnitsFromResourceGroups |= NormalizedMask;142UsedResourceGroups |= (A.first ^ NormalizedMask);143}144145for (unsigned J = I + 1; J < E; ++J) {146ResourcePlusCycles &B = Worklist[J];147if ((NormalizedMask & B.first) == NormalizedMask) {148B.second.CS.subtract(A.second.size() - SuperResources[A.first]);149if (llvm::popcount(B.first) > 1)150B.second.NumUnits++;151}152}153}154155// A SchedWrite may specify a number of cycles in which a resource group156// is reserved. For example (on target x86; cpu Haswell):157//158// SchedWriteRes<[HWPort0, HWPort1, HWPort01]> {159// let ReleaseAtCycles = [2, 2, 3];160// }161//162// This means:163// Resource units HWPort0 and HWPort1 are both used for 2cy.164// Resource group HWPort01 is the union of HWPort0 and HWPort1.165// Since this write touches both HWPort0 and HWPort1 for 2cy, HWPort01166// will not be usable for 2 entire cycles from instruction issue.167//168// On top of those 2cy, SchedWriteRes explicitly specifies an extra latency169// of 3 cycles for HWPort01. This tool assumes that the 3cy latency is an170// extra delay on top of the 2 cycles latency.171// During those extra cycles, HWPort01 is not usable by other instructions.172for (ResourcePlusCycles &RPC : ID.Resources) {173if (llvm::popcount(RPC.first) > 1 && !RPC.second.isReserved()) {174// Remove the leading 1 from the resource group mask.175uint64_t Mask = RPC.first ^ llvm::bit_floor(RPC.first);176uint64_t MaxResourceUnits = llvm::popcount(Mask);177if (RPC.second.NumUnits > (unsigned)llvm::popcount(Mask)) {178RPC.second.setReserved();179RPC.second.NumUnits = MaxResourceUnits;180}181}182}183184// Identify extra buffers that are consumed through super resources.185for (const std::pair<uint64_t, unsigned> &SR : SuperResources) {186for (unsigned I = 1, E = NumProcResources; I < E; ++I) {187const MCProcResourceDesc &PR = *SM.getProcResource(I);188if (PR.BufferSize == -1)189continue;190191uint64_t Mask = ProcResourceMasks[I];192if (Mask != SR.first && ((Mask & SR.first) == SR.first))193Buffers.setBit(getResourceStateIndex(Mask));194}195}196197ID.UsedBuffers = Buffers.getZExtValue();198ID.UsedProcResUnits = UsedResourceUnits;199ID.UsedProcResGroups = UsedResourceGroups;200201LLVM_DEBUG({202for (const std::pair<uint64_t, ResourceUsage> &R : ID.Resources)203dbgs() << "\t\tResource Mask=" << format_hex(R.first, 16) << ", "204<< "Reserved=" << R.second.isReserved() << ", "205<< "#Units=" << R.second.NumUnits << ", "206<< "cy=" << R.second.size() << '\n';207uint64_t BufferIDs = ID.UsedBuffers;208while (BufferIDs) {209uint64_t Current = BufferIDs & (-BufferIDs);210dbgs() << "\t\tBuffer Mask=" << format_hex(Current, 16) << '\n';211BufferIDs ^= Current;212}213dbgs() << "\t\t Used Units=" << format_hex(ID.UsedProcResUnits, 16) << '\n';214dbgs() << "\t\tUsed Groups=" << format_hex(ID.UsedProcResGroups, 16)215<< '\n';216dbgs() << "\t\tHasPartiallyOverlappingGroups="217<< ID.HasPartiallyOverlappingGroups << '\n';218});219}220221static void computeMaxLatency(InstrDesc &ID, const MCInstrDesc &MCDesc,222const MCSchedClassDesc &SCDesc,223const MCSubtargetInfo &STI,224unsigned CallLatency) {225if (MCDesc.isCall()) {226// We cannot estimate how long this call will take.227// Artificially set an arbitrarily high latency.228ID.MaxLatency = CallLatency;229return;230}231232int Latency = MCSchedModel::computeInstrLatency(STI, SCDesc);233// If latency is unknown, then conservatively assume the MaxLatency set for234// calls.235ID.MaxLatency = Latency < 0 ? CallLatency : static_cast<unsigned>(Latency);236}237238static Error verifyOperands(const MCInstrDesc &MCDesc, const MCInst &MCI) {239// Count register definitions, and skip non register operands in the process.240unsigned I, E;241unsigned NumExplicitDefs = MCDesc.getNumDefs();242for (I = 0, E = MCI.getNumOperands(); NumExplicitDefs && I < E; ++I) {243const MCOperand &Op = MCI.getOperand(I);244if (Op.isReg())245--NumExplicitDefs;246}247248if (NumExplicitDefs) {249return make_error<InstructionError<MCInst>>(250"Expected more register operand definitions.", MCI);251}252253if (MCDesc.hasOptionalDef()) {254// Always assume that the optional definition is the last operand.255const MCOperand &Op = MCI.getOperand(MCDesc.getNumOperands() - 1);256if (I == MCI.getNumOperands() || !Op.isReg()) {257std::string Message =258"expected a register operand for an optional definition. Instruction "259"has not been correctly analyzed.";260return make_error<InstructionError<MCInst>>(Message, MCI);261}262}263264return ErrorSuccess();265}266267void InstrBuilder::populateWrites(InstrDesc &ID, const MCInst &MCI,268unsigned SchedClassID) {269const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());270const MCSchedModel &SM = STI.getSchedModel();271const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);272273// Assumptions made by this algorithm:274// 1. The number of explicit and implicit register definitions in a MCInst275// matches the number of explicit and implicit definitions according to276// the opcode descriptor (MCInstrDesc).277// 2. Uses start at index #(MCDesc.getNumDefs()).278// 3. There can only be a single optional register definition, an it is279// either the last operand of the sequence (excluding extra operands280// contributed by variadic opcodes) or one of the explicit register281// definitions. The latter occurs for some Thumb1 instructions.282//283// These assumptions work quite well for most out-of-order in-tree targets284// like x86. This is mainly because the vast majority of instructions is285// expanded to MCInst using a straightforward lowering logic that preserves286// the ordering of the operands.287//288// About assumption 1.289// The algorithm allows non-register operands between register operand290// definitions. This helps to handle some special ARM instructions with291// implicit operand increment (-mtriple=armv7):292//293// vld1.32 {d18, d19}, [r1]! @ <MCInst #1463 VLD1q32wb_fixed294// @ <MCOperand Reg:59>295// @ <MCOperand Imm:0> (!!)296// @ <MCOperand Reg:67>297// @ <MCOperand Imm:0>298// @ <MCOperand Imm:14>299// @ <MCOperand Reg:0>>300//301// MCDesc reports:302// 6 explicit operands.303// 1 optional definition304// 2 explicit definitions (!!)305//306// The presence of an 'Imm' operand between the two register definitions307// breaks the assumption that "register definitions are always at the308// beginning of the operand sequence".309//310// To workaround this issue, this algorithm ignores (i.e. skips) any311// non-register operands between register definitions. The optional312// definition is still at index #(NumOperands-1).313//314// According to assumption 2. register reads start at #(NumExplicitDefs-1).315// That means, register R1 from the example is both read and written.316unsigned NumExplicitDefs = MCDesc.getNumDefs();317unsigned NumImplicitDefs = MCDesc.implicit_defs().size();318unsigned NumWriteLatencyEntries = SCDesc.NumWriteLatencyEntries;319unsigned TotalDefs = NumExplicitDefs + NumImplicitDefs;320if (MCDesc.hasOptionalDef())321TotalDefs++;322323unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();324ID.Writes.resize(TotalDefs + NumVariadicOps);325// Iterate over the operands list, and skip non-register or constant register326// operands. The first NumExplicitDefs register operands are expected to be327// register definitions.328unsigned CurrentDef = 0;329unsigned OptionalDefIdx = MCDesc.getNumOperands() - 1;330unsigned i = 0;331for (; i < MCI.getNumOperands() && CurrentDef < NumExplicitDefs; ++i) {332const MCOperand &Op = MCI.getOperand(i);333if (!Op.isReg())334continue;335336if (MCDesc.operands()[CurrentDef].isOptionalDef()) {337OptionalDefIdx = CurrentDef++;338continue;339}340if (MRI.isConstant(Op.getReg())) {341CurrentDef++;342continue;343}344345WriteDescriptor &Write = ID.Writes[CurrentDef];346Write.OpIndex = i;347if (CurrentDef < NumWriteLatencyEntries) {348const MCWriteLatencyEntry &WLE =349*STI.getWriteLatencyEntry(&SCDesc, CurrentDef);350// Conservatively default to MaxLatency.351Write.Latency =352WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);353Write.SClassOrWriteResourceID = WLE.WriteResourceID;354} else {355// Assign a default latency for this write.356Write.Latency = ID.MaxLatency;357Write.SClassOrWriteResourceID = 0;358}359Write.IsOptionalDef = false;360LLVM_DEBUG({361dbgs() << "\t\t[Def] OpIdx=" << Write.OpIndex362<< ", Latency=" << Write.Latency363<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';364});365CurrentDef++;366}367368assert(CurrentDef == NumExplicitDefs &&369"Expected more register operand definitions.");370for (CurrentDef = 0; CurrentDef < NumImplicitDefs; ++CurrentDef) {371unsigned Index = NumExplicitDefs + CurrentDef;372WriteDescriptor &Write = ID.Writes[Index];373Write.OpIndex = ~CurrentDef;374Write.RegisterID = MCDesc.implicit_defs()[CurrentDef];375if (Index < NumWriteLatencyEntries) {376const MCWriteLatencyEntry &WLE =377*STI.getWriteLatencyEntry(&SCDesc, Index);378// Conservatively default to MaxLatency.379Write.Latency =380WLE.Cycles < 0 ? ID.MaxLatency : static_cast<unsigned>(WLE.Cycles);381Write.SClassOrWriteResourceID = WLE.WriteResourceID;382} else {383// Assign a default latency for this write.384Write.Latency = ID.MaxLatency;385Write.SClassOrWriteResourceID = 0;386}387388Write.IsOptionalDef = false;389assert(Write.RegisterID != 0 && "Expected a valid phys register!");390LLVM_DEBUG({391dbgs() << "\t\t[Def][I] OpIdx=" << ~Write.OpIndex392<< ", PhysReg=" << MRI.getName(Write.RegisterID)393<< ", Latency=" << Write.Latency394<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';395});396}397398if (MCDesc.hasOptionalDef()) {399WriteDescriptor &Write = ID.Writes[NumExplicitDefs + NumImplicitDefs];400Write.OpIndex = OptionalDefIdx;401// Assign a default latency for this write.402Write.Latency = ID.MaxLatency;403Write.SClassOrWriteResourceID = 0;404Write.IsOptionalDef = true;405LLVM_DEBUG({406dbgs() << "\t\t[Def][O] OpIdx=" << Write.OpIndex407<< ", Latency=" << Write.Latency408<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';409});410}411412if (!NumVariadicOps)413return;414415bool AssumeUsesOnly = !MCDesc.variadicOpsAreDefs();416CurrentDef = NumExplicitDefs + NumImplicitDefs + MCDesc.hasOptionalDef();417for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();418I < NumVariadicOps && !AssumeUsesOnly; ++I, ++OpIndex) {419const MCOperand &Op = MCI.getOperand(OpIndex);420if (!Op.isReg())421continue;422if (MRI.isConstant(Op.getReg()))423continue;424425WriteDescriptor &Write = ID.Writes[CurrentDef];426Write.OpIndex = OpIndex;427// Assign a default latency for this write.428Write.Latency = ID.MaxLatency;429Write.SClassOrWriteResourceID = 0;430Write.IsOptionalDef = false;431++CurrentDef;432LLVM_DEBUG({433dbgs() << "\t\t[Def][V] OpIdx=" << Write.OpIndex434<< ", Latency=" << Write.Latency435<< ", WriteResourceID=" << Write.SClassOrWriteResourceID << '\n';436});437}438439ID.Writes.resize(CurrentDef);440}441442void InstrBuilder::populateReads(InstrDesc &ID, const MCInst &MCI,443unsigned SchedClassID) {444const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());445unsigned NumExplicitUses = MCDesc.getNumOperands() - MCDesc.getNumDefs();446unsigned NumImplicitUses = MCDesc.implicit_uses().size();447// Remove the optional definition.448if (MCDesc.hasOptionalDef())449--NumExplicitUses;450unsigned NumVariadicOps = MCI.getNumOperands() - MCDesc.getNumOperands();451unsigned TotalUses = NumExplicitUses + NumImplicitUses + NumVariadicOps;452ID.Reads.resize(TotalUses);453unsigned CurrentUse = 0;454for (unsigned I = 0, OpIndex = MCDesc.getNumDefs(); I < NumExplicitUses;455++I, ++OpIndex) {456const MCOperand &Op = MCI.getOperand(OpIndex);457if (!Op.isReg())458continue;459if (MRI.isConstant(Op.getReg()))460continue;461462ReadDescriptor &Read = ID.Reads[CurrentUse];463Read.OpIndex = OpIndex;464Read.UseIndex = I;465Read.SchedClassID = SchedClassID;466++CurrentUse;467LLVM_DEBUG(dbgs() << "\t\t[Use] OpIdx=" << Read.OpIndex468<< ", UseIndex=" << Read.UseIndex << '\n');469}470471// For the purpose of ReadAdvance, implicit uses come directly after explicit472// uses. The "UseIndex" must be updated according to that implicit layout.473for (unsigned I = 0; I < NumImplicitUses; ++I) {474ReadDescriptor &Read = ID.Reads[CurrentUse + I];475Read.OpIndex = ~I;476Read.UseIndex = NumExplicitUses + I;477Read.RegisterID = MCDesc.implicit_uses()[I];478if (MRI.isConstant(Read.RegisterID))479continue;480Read.SchedClassID = SchedClassID;481LLVM_DEBUG(dbgs() << "\t\t[Use][I] OpIdx=" << ~Read.OpIndex482<< ", UseIndex=" << Read.UseIndex << ", RegisterID="483<< MRI.getName(Read.RegisterID) << '\n');484}485486CurrentUse += NumImplicitUses;487488bool AssumeDefsOnly = MCDesc.variadicOpsAreDefs();489for (unsigned I = 0, OpIndex = MCDesc.getNumOperands();490I < NumVariadicOps && !AssumeDefsOnly; ++I, ++OpIndex) {491const MCOperand &Op = MCI.getOperand(OpIndex);492if (!Op.isReg())493continue;494495ReadDescriptor &Read = ID.Reads[CurrentUse];496Read.OpIndex = OpIndex;497Read.UseIndex = NumExplicitUses + NumImplicitUses + I;498Read.SchedClassID = SchedClassID;499++CurrentUse;500LLVM_DEBUG(dbgs() << "\t\t[Use][V] OpIdx=" << Read.OpIndex501<< ", UseIndex=" << Read.UseIndex << '\n');502}503504ID.Reads.resize(CurrentUse);505}506507hash_code hashMCOperand(const MCOperand &MCO) {508hash_code TypeHash = hash_combine(MCO.isReg(), MCO.isImm(), MCO.isSFPImm(),509MCO.isDFPImm(), MCO.isExpr(), MCO.isInst());510if (MCO.isReg())511return hash_combine(TypeHash, MCO.getReg());512513return TypeHash;514}515516hash_code hashMCInst(const MCInst &MCI) {517hash_code InstructionHash = hash_combine(MCI.getOpcode(), MCI.getFlags());518for (unsigned I = 0; I < MCI.getNumOperands(); ++I) {519InstructionHash =520hash_combine(InstructionHash, hashMCOperand(MCI.getOperand(I)));521}522return InstructionHash;523}524525Error InstrBuilder::verifyInstrDesc(const InstrDesc &ID,526const MCInst &MCI) const {527if (ID.NumMicroOps != 0)528return ErrorSuccess();529530bool UsesBuffers = ID.UsedBuffers;531bool UsesResources = !ID.Resources.empty();532if (!UsesBuffers && !UsesResources)533return ErrorSuccess();534535// FIXME: see PR44797. We should revisit these checks and possibly move them536// in CodeGenSchedule.cpp.537StringRef Message = "found an inconsistent instruction that decodes to zero "538"opcodes and that consumes scheduler resources.";539return make_error<InstructionError<MCInst>>(std::string(Message), MCI);540}541542Expected<unsigned> InstrBuilder::getVariantSchedClassID(const MCInst &MCI,543unsigned SchedClassID) {544const MCSchedModel &SM = STI.getSchedModel();545unsigned CPUID = SM.getProcessorID();546while (SchedClassID && SM.getSchedClassDesc(SchedClassID)->isVariant())547SchedClassID =548STI.resolveVariantSchedClass(SchedClassID, &MCI, &MCII, CPUID);549550if (!SchedClassID) {551return make_error<InstructionError<MCInst>>(552"unable to resolve scheduling class for write variant.", MCI);553}554555return SchedClassID;556}557558Expected<const InstrDesc &>559InstrBuilder::createInstrDescImpl(const MCInst &MCI,560const SmallVector<Instrument *> &IVec) {561assert(STI.getSchedModel().hasInstrSchedModel() &&562"Itineraries are not yet supported!");563564// Obtain the instruction descriptor from the opcode.565unsigned short Opcode = MCI.getOpcode();566const MCInstrDesc &MCDesc = MCII.get(Opcode);567const MCSchedModel &SM = STI.getSchedModel();568569// Then obtain the scheduling class information from the instruction.570// Allow InstrumentManager to override and use a different SchedClassID571unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);572bool IsVariant = SM.getSchedClassDesc(SchedClassID)->isVariant();573574// Try to solve variant scheduling classes.575if (IsVariant) {576Expected<unsigned> VariantSchedClassIDOrErr =577getVariantSchedClassID(MCI, SchedClassID);578if (!VariantSchedClassIDOrErr) {579return VariantSchedClassIDOrErr.takeError();580}581582SchedClassID = *VariantSchedClassIDOrErr;583}584585// Check if this instruction is supported. Otherwise, report an error.586const MCSchedClassDesc &SCDesc = *SM.getSchedClassDesc(SchedClassID);587if (SCDesc.NumMicroOps == MCSchedClassDesc::InvalidNumMicroOps) {588return make_error<InstructionError<MCInst>>(589"found an unsupported instruction in the input assembly sequence", MCI);590}591592LLVM_DEBUG(dbgs() << "\n\t\tOpcode Name= " << MCII.getName(Opcode) << '\n');593LLVM_DEBUG(dbgs() << "\t\tSchedClassID=" << SchedClassID << '\n');594LLVM_DEBUG(dbgs() << "\t\tOpcode=" << Opcode << '\n');595596// Create a new empty descriptor.597std::unique_ptr<InstrDesc> ID = std::make_unique<InstrDesc>();598ID->NumMicroOps = SCDesc.NumMicroOps;599ID->SchedClassID = SchedClassID;600601if (MCDesc.isCall() && FirstCallInst) {602// We don't correctly model calls.603WithColor::warning() << "found a call in the input assembly sequence.\n";604WithColor::note() << "call instructions are not correctly modeled. "605<< "Assume a latency of " << CallLatency << "cy.\n";606FirstCallInst = false;607}608609if (MCDesc.isReturn() && FirstReturnInst) {610WithColor::warning() << "found a return instruction in the input"611<< " assembly sequence.\n";612WithColor::note() << "program counter updates are ignored.\n";613FirstReturnInst = false;614}615616initializeUsedResources(*ID, SCDesc, STI, ProcResourceMasks);617computeMaxLatency(*ID, MCDesc, SCDesc, STI, CallLatency);618619if (Error Err = verifyOperands(MCDesc, MCI))620return std::move(Err);621622populateWrites(*ID, MCI, SchedClassID);623populateReads(*ID, MCI, SchedClassID);624625LLVM_DEBUG(dbgs() << "\t\tMaxLatency=" << ID->MaxLatency << '\n');626LLVM_DEBUG(dbgs() << "\t\tNumMicroOps=" << ID->NumMicroOps << '\n');627628// Validation check on the instruction descriptor.629if (Error Err = verifyInstrDesc(*ID, MCI))630return std::move(Err);631632// Now add the new descriptor.633bool IsVariadic = MCDesc.isVariadic();634if ((ID->IsRecyclable = !IsVariadic && !IsVariant)) {635auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);636Descriptors[DKey] = std::move(ID);637return *Descriptors[DKey];638}639640auto VDKey = std::make_pair(hashMCInst(MCI), SchedClassID);641assert(642!VariantDescriptors.contains(VDKey) &&643"Expected VariantDescriptors to not already have a value for this key.");644VariantDescriptors[VDKey] = std::move(ID);645return *VariantDescriptors[VDKey];646}647648Expected<const InstrDesc &>649InstrBuilder::getOrCreateInstrDesc(const MCInst &MCI,650const SmallVector<Instrument *> &IVec) {651// Cache lookup using SchedClassID from Instrumentation652unsigned SchedClassID = IM.getSchedClassID(MCII, MCI, IVec);653654auto DKey = std::make_pair(MCI.getOpcode(), SchedClassID);655if (Descriptors.find_as(DKey) != Descriptors.end())656return *Descriptors[DKey];657658Expected<unsigned> VariantSchedClassIDOrErr =659getVariantSchedClassID(MCI, SchedClassID);660if (!VariantSchedClassIDOrErr) {661return VariantSchedClassIDOrErr.takeError();662}663664SchedClassID = *VariantSchedClassIDOrErr;665666auto VDKey = std::make_pair(hashMCInst(MCI), SchedClassID);667if (VariantDescriptors.contains(VDKey))668return *VariantDescriptors[VDKey];669670return createInstrDescImpl(MCI, IVec);671}672673STATISTIC(NumVariantInst, "Number of MCInsts that doesn't have static Desc");674675Expected<std::unique_ptr<Instruction>>676InstrBuilder::createInstruction(const MCInst &MCI,677const SmallVector<Instrument *> &IVec) {678Expected<const InstrDesc &> DescOrErr = getOrCreateInstrDesc(MCI, IVec);679if (!DescOrErr)680return DescOrErr.takeError();681const InstrDesc &D = *DescOrErr;682Instruction *NewIS = nullptr;683std::unique_ptr<Instruction> CreatedIS;684bool IsInstRecycled = false;685686if (!D.IsRecyclable)687++NumVariantInst;688689if (D.IsRecyclable && InstRecycleCB) {690if (auto *I = InstRecycleCB(D)) {691NewIS = I;692NewIS->reset();693IsInstRecycled = true;694}695}696if (!IsInstRecycled) {697CreatedIS = std::make_unique<Instruction>(D, MCI.getOpcode());698NewIS = CreatedIS.get();699}700701const MCInstrDesc &MCDesc = MCII.get(MCI.getOpcode());702const MCSchedClassDesc &SCDesc =703*STI.getSchedModel().getSchedClassDesc(D.SchedClassID);704705NewIS->setMayLoad(MCDesc.mayLoad());706NewIS->setMayStore(MCDesc.mayStore());707NewIS->setHasSideEffects(MCDesc.hasUnmodeledSideEffects());708NewIS->setBeginGroup(SCDesc.BeginGroup);709NewIS->setEndGroup(SCDesc.EndGroup);710NewIS->setRetireOOO(SCDesc.RetireOOO);711712// Check if this is a dependency breaking instruction.713APInt Mask;714715bool IsZeroIdiom = false;716bool IsDepBreaking = false;717if (MCIA) {718unsigned ProcID = STI.getSchedModel().getProcessorID();719IsZeroIdiom = MCIA->isZeroIdiom(MCI, Mask, ProcID);720IsDepBreaking =721IsZeroIdiom || MCIA->isDependencyBreaking(MCI, Mask, ProcID);722if (MCIA->isOptimizableRegisterMove(MCI, ProcID))723NewIS->setOptimizableMove();724}725726// Initialize Reads first.727MCPhysReg RegID = 0;728size_t Idx = 0U;729for (const ReadDescriptor &RD : D.Reads) {730if (!RD.isImplicitRead()) {731// explicit read.732const MCOperand &Op = MCI.getOperand(RD.OpIndex);733// Skip non-register operands.734if (!Op.isReg())735continue;736RegID = Op.getReg();737} else {738// Implicit read.739RegID = RD.RegisterID;740}741742// Skip invalid register operands.743if (!RegID)744continue;745746// Okay, this is a register operand. Create a ReadState for it.747ReadState *RS = nullptr;748if (IsInstRecycled && Idx < NewIS->getUses().size()) {749NewIS->getUses()[Idx] = ReadState(RD, RegID);750RS = &NewIS->getUses()[Idx++];751} else {752NewIS->getUses().emplace_back(RD, RegID);753RS = &NewIS->getUses().back();754++Idx;755}756757if (IsDepBreaking) {758// A mask of all zeroes means: explicit input operands are not759// independent.760if (Mask.isZero()) {761if (!RD.isImplicitRead())762RS->setIndependentFromDef();763} else {764// Check if this register operand is independent according to `Mask`.765// Note that Mask may not have enough bits to describe all explicit and766// implicit input operands. If this register operand doesn't have a767// corresponding bit in Mask, then conservatively assume that it is768// dependent.769if (Mask.getBitWidth() > RD.UseIndex) {770// Okay. This map describe register use `RD.UseIndex`.771if (Mask[RD.UseIndex])772RS->setIndependentFromDef();773}774}775}776}777if (IsInstRecycled && Idx < NewIS->getUses().size())778NewIS->getUses().pop_back_n(NewIS->getUses().size() - Idx);779780// Early exit if there are no writes.781if (D.Writes.empty()) {782if (IsInstRecycled)783return llvm::make_error<RecycledInstErr>(NewIS);784else785return std::move(CreatedIS);786}787788// Track register writes that implicitly clear the upper portion of the789// underlying super-registers using an APInt.790APInt WriteMask(D.Writes.size(), 0);791792// Now query the MCInstrAnalysis object to obtain information about which793// register writes implicitly clear the upper portion of a super-register.794if (MCIA)795MCIA->clearsSuperRegisters(MRI, MCI, WriteMask);796797// Initialize writes.798unsigned WriteIndex = 0;799Idx = 0U;800for (const WriteDescriptor &WD : D.Writes) {801RegID = WD.isImplicitWrite() ? WD.RegisterID802: MCI.getOperand(WD.OpIndex).getReg();803// Check if this is a optional definition that references NoReg or a write804// to a constant register.805if ((WD.IsOptionalDef && !RegID) || MRI.isConstant(RegID)) {806++WriteIndex;807continue;808}809810assert(RegID && "Expected a valid register ID!");811if (IsInstRecycled && Idx < NewIS->getDefs().size()) {812NewIS->getDefs()[Idx++] =813WriteState(WD, RegID,814/* ClearsSuperRegs */ WriteMask[WriteIndex],815/* WritesZero */ IsZeroIdiom);816} else {817NewIS->getDefs().emplace_back(WD, RegID,818/* ClearsSuperRegs */ WriteMask[WriteIndex],819/* WritesZero */ IsZeroIdiom);820++Idx;821}822++WriteIndex;823}824if (IsInstRecycled && Idx < NewIS->getDefs().size())825NewIS->getDefs().pop_back_n(NewIS->getDefs().size() - Idx);826827if (IsInstRecycled)828return llvm::make_error<RecycledInstErr>(NewIS);829else830return std::move(CreatedIS);831}832} // namespace mca833} // namespace llvm834835836