Path: blob/main/contrib/llvm-project/llvm/tools/llvm-mca/Views/TimelineView.cpp
35290 views
//===--------------------- TimelineView.cpp ---------------------*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7/// \brief8///9/// This file implements the TimelineView interface.10///11//===----------------------------------------------------------------------===//1213#include "Views/TimelineView.h"14#include <numeric>1516namespace llvm {17namespace mca {1819TimelineView::TimelineView(const MCSubtargetInfo &sti, MCInstPrinter &Printer,20llvm::ArrayRef<llvm::MCInst> S, unsigned Iterations,21unsigned Cycles)22: InstructionView(sti, Printer, S), CurrentCycle(0),23MaxCycle(Cycles == 0 ? std::numeric_limits<unsigned>::max() : Cycles),24LastCycle(0), WaitTime(S.size()), UsedBuffer(S.size()) {25unsigned NumInstructions = getSource().size();26assert(Iterations && "Invalid number of iterations specified!");27NumInstructions *= Iterations;28Timeline.resize(NumInstructions);29TimelineViewEntry InvalidTVEntry = {-1, 0, 0, 0, 0};30std::fill(Timeline.begin(), Timeline.end(), InvalidTVEntry);3132WaitTimeEntry NullWTEntry = {0, 0, 0};33std::fill(WaitTime.begin(), WaitTime.end(), NullWTEntry);3435std::pair<unsigned, int> NullUsedBufferEntry = {/* Invalid resource ID*/ 0,36/* unknown buffer size */ -1};37std::fill(UsedBuffer.begin(), UsedBuffer.end(), NullUsedBufferEntry);38}3940void TimelineView::onReservedBuffers(const InstRef &IR,41ArrayRef<unsigned> Buffers) {42if (IR.getSourceIndex() >= getSource().size())43return;4445const MCSchedModel &SM = getSubTargetInfo().getSchedModel();46std::pair<unsigned, int> BufferInfo = {0, -1};47for (const unsigned Buffer : Buffers) {48const MCProcResourceDesc &MCDesc = *SM.getProcResource(Buffer);49if (!BufferInfo.first || BufferInfo.second > MCDesc.BufferSize) {50BufferInfo.first = Buffer;51BufferInfo.second = MCDesc.BufferSize;52}53}5455UsedBuffer[IR.getSourceIndex()] = BufferInfo;56}5758void TimelineView::onEvent(const HWInstructionEvent &Event) {59const unsigned Index = Event.IR.getSourceIndex();60if (Index >= Timeline.size())61return;6263switch (Event.Type) {64case HWInstructionEvent::Retired: {65TimelineViewEntry &TVEntry = Timeline[Index];66if (CurrentCycle < MaxCycle)67TVEntry.CycleRetired = CurrentCycle;6869// Update the WaitTime entry which corresponds to this Index.70assert(TVEntry.CycleDispatched >= 0 && "Invalid TVEntry found!");71unsigned CycleDispatched = static_cast<unsigned>(TVEntry.CycleDispatched);72WaitTimeEntry &WTEntry = WaitTime[Index % getSource().size()];73WTEntry.CyclesSpentInSchedulerQueue +=74TVEntry.CycleIssued - CycleDispatched;75assert(CycleDispatched <= TVEntry.CycleReady &&76"Instruction cannot be ready if it hasn't been dispatched yet!");77WTEntry.CyclesSpentInSQWhileReady +=78TVEntry.CycleIssued - TVEntry.CycleReady;79if (CurrentCycle > TVEntry.CycleExecuted) {80WTEntry.CyclesSpentAfterWBAndBeforeRetire +=81(CurrentCycle - 1) - TVEntry.CycleExecuted;82}83break;84}85case HWInstructionEvent::Ready:86Timeline[Index].CycleReady = CurrentCycle;87break;88case HWInstructionEvent::Issued:89Timeline[Index].CycleIssued = CurrentCycle;90break;91case HWInstructionEvent::Executed:92Timeline[Index].CycleExecuted = CurrentCycle;93break;94case HWInstructionEvent::Dispatched:95// There may be multiple dispatch events. Microcoded instructions that are96// expanded into multiple uOps may require multiple dispatch cycles. Here,97// we want to capture the first dispatch cycle.98if (Timeline[Index].CycleDispatched == -1)99Timeline[Index].CycleDispatched = static_cast<int>(CurrentCycle);100break;101default:102return;103}104if (CurrentCycle < MaxCycle)105LastCycle = std::max(LastCycle, CurrentCycle);106}107108static raw_ostream::Colors chooseColor(unsigned CumulativeCycles,109unsigned Executions, int BufferSize) {110if (CumulativeCycles && BufferSize < 0)111return raw_ostream::MAGENTA;112unsigned Size = static_cast<unsigned>(BufferSize);113if (CumulativeCycles >= Size * Executions)114return raw_ostream::RED;115if ((CumulativeCycles * 2) >= Size * Executions)116return raw_ostream::YELLOW;117return raw_ostream::SAVEDCOLOR;118}119120static void tryChangeColor(raw_ostream &OS, unsigned Cycles,121unsigned Executions, int BufferSize) {122if (!OS.has_colors())123return;124125raw_ostream::Colors Color = chooseColor(Cycles, Executions, BufferSize);126if (Color == raw_ostream::SAVEDCOLOR) {127OS.resetColor();128return;129}130OS.changeColor(Color, /* bold */ true, /* BG */ false);131}132133void TimelineView::printWaitTimeEntry(formatted_raw_ostream &OS,134const WaitTimeEntry &Entry,135unsigned SourceIndex,136unsigned Executions) const {137bool PrintingTotals = SourceIndex == getSource().size();138unsigned CumulativeExecutions = PrintingTotals ? Timeline.size() : Executions;139140if (!PrintingTotals)141OS << SourceIndex << '.';142143OS.PadToColumn(7);144145double AverageTime1, AverageTime2, AverageTime3;146AverageTime1 =147(double)(Entry.CyclesSpentInSchedulerQueue * 10) / CumulativeExecutions;148AverageTime2 =149(double)(Entry.CyclesSpentInSQWhileReady * 10) / CumulativeExecutions;150AverageTime3 = (double)(Entry.CyclesSpentAfterWBAndBeforeRetire * 10) /151CumulativeExecutions;152153OS << Executions;154OS.PadToColumn(13);155156int BufferSize = PrintingTotals ? 0 : UsedBuffer[SourceIndex].second;157if (!PrintingTotals)158tryChangeColor(OS, Entry.CyclesSpentInSchedulerQueue, CumulativeExecutions,159BufferSize);160OS << format("%.1f", floor(AverageTime1 + 0.5) / 10);161OS.PadToColumn(20);162if (!PrintingTotals)163tryChangeColor(OS, Entry.CyclesSpentInSQWhileReady, CumulativeExecutions,164BufferSize);165OS << format("%.1f", floor(AverageTime2 + 0.5) / 10);166OS.PadToColumn(27);167if (!PrintingTotals)168tryChangeColor(OS, Entry.CyclesSpentAfterWBAndBeforeRetire,169CumulativeExecutions,170getSubTargetInfo().getSchedModel().MicroOpBufferSize);171OS << format("%.1f", floor(AverageTime3 + 0.5) / 10);172173if (OS.has_colors())174OS.resetColor();175OS.PadToColumn(34);176}177178void TimelineView::printAverageWaitTimes(raw_ostream &OS) const {179std::string Header =180"\n\nAverage Wait times (based on the timeline view):\n"181"[0]: Executions\n"182"[1]: Average time spent waiting in a scheduler's queue\n"183"[2]: Average time spent waiting in a scheduler's queue while ready\n"184"[3]: Average time elapsed from WB until retire stage\n\n"185" [0] [1] [2] [3]\n";186OS << Header;187formatted_raw_ostream FOS(OS);188unsigned Executions = Timeline.size() / getSource().size();189unsigned IID = 0;190for (const MCInst &Inst : getSource()) {191printWaitTimeEntry(FOS, WaitTime[IID], IID, Executions);192FOS << " " << printInstructionString(Inst) << '\n';193FOS.flush();194++IID;195}196197// If the timeline contains more than one instruction,198// let's also print global averages.199if (getSource().size() != 1) {200WaitTimeEntry TotalWaitTime = std::accumulate(201WaitTime.begin(), WaitTime.end(), WaitTimeEntry{0, 0, 0},202[](const WaitTimeEntry &A, const WaitTimeEntry &B) {203return WaitTimeEntry{204A.CyclesSpentInSchedulerQueue + B.CyclesSpentInSchedulerQueue,205A.CyclesSpentInSQWhileReady + B.CyclesSpentInSQWhileReady,206A.CyclesSpentAfterWBAndBeforeRetire +207B.CyclesSpentAfterWBAndBeforeRetire};208});209printWaitTimeEntry(FOS, TotalWaitTime, IID, Executions);210FOS << " "211<< "<total>" << '\n';212FOS.flush();213}214}215216void TimelineView::printTimelineViewEntry(formatted_raw_ostream &OS,217const TimelineViewEntry &Entry,218unsigned Iteration,219unsigned SourceIndex) const {220if (Iteration == 0 && SourceIndex == 0)221OS << '\n';222OS << '[' << Iteration << ',' << SourceIndex << ']';223OS.PadToColumn(10);224assert(Entry.CycleDispatched >= 0 && "Invalid TimelineViewEntry!");225unsigned CycleDispatched = static_cast<unsigned>(Entry.CycleDispatched);226for (unsigned I = 0, E = CycleDispatched; I < E; ++I)227OS << ((I % 5 == 0) ? '.' : ' ');228OS << TimelineView::DisplayChar::Dispatched;229if (CycleDispatched != Entry.CycleExecuted) {230// Zero latency instructions have the same value for CycleDispatched,231// CycleIssued and CycleExecuted.232for (unsigned I = CycleDispatched + 1, E = Entry.CycleIssued; I < E; ++I)233OS << TimelineView::DisplayChar::Waiting;234if (Entry.CycleIssued == Entry.CycleExecuted)235OS << TimelineView::DisplayChar::DisplayChar::Executed;236else {237if (CycleDispatched != Entry.CycleIssued)238OS << TimelineView::DisplayChar::Executing;239for (unsigned I = Entry.CycleIssued + 1, E = Entry.CycleExecuted; I < E;240++I)241OS << TimelineView::DisplayChar::Executing;242OS << TimelineView::DisplayChar::Executed;243}244}245246for (unsigned I = Entry.CycleExecuted + 1, E = Entry.CycleRetired; I < E; ++I)247OS << TimelineView::DisplayChar::RetireLag;248if (Entry.CycleExecuted < Entry.CycleRetired)249OS << TimelineView::DisplayChar::Retired;250251// Skip other columns.252for (unsigned I = Entry.CycleRetired + 1, E = LastCycle; I <= E; ++I)253OS << ((I % 5 == 0 || I == LastCycle) ? '.' : ' ');254}255256static void printTimelineHeader(formatted_raw_ostream &OS, unsigned Cycles) {257OS << "\n\nTimeline view:\n";258if (Cycles >= 10) {259OS.PadToColumn(10);260for (unsigned I = 0; I <= Cycles; ++I) {261if (((I / 10) & 1) == 0)262OS << ' ';263else264OS << I % 10;265}266OS << '\n';267}268269OS << "Index";270OS.PadToColumn(10);271for (unsigned I = 0; I <= Cycles; ++I) {272if (((I / 10) & 1) == 0)273OS << I % 10;274else275OS << ' ';276}277OS << '\n';278}279280void TimelineView::printTimeline(raw_ostream &OS) const {281formatted_raw_ostream FOS(OS);282printTimelineHeader(FOS, LastCycle);283FOS.flush();284285unsigned IID = 0;286ArrayRef<llvm::MCInst> Source = getSource();287const unsigned Iterations = Timeline.size() / Source.size();288for (unsigned Iteration = 0; Iteration < Iterations; ++Iteration) {289for (const MCInst &Inst : Source) {290const TimelineViewEntry &Entry = Timeline[IID];291// When an instruction is retired after timeline-max-cycles,292// its CycleRetired is left at 0. However, it's possible for293// a 0 latency instruction to be retired during cycle 0 and we294// don't want to early exit in that case. The CycleExecuted295// attribute is set correctly whether or not it is greater296// than timeline-max-cycles so we can use that to ensure297// we don't early exit because of a 0 latency instruction.298if (Entry.CycleRetired == 0 && Entry.CycleExecuted != 0) {299FOS << "Truncated display due to cycle limit\n";300return;301}302303unsigned SourceIndex = IID % Source.size();304printTimelineViewEntry(FOS, Entry, Iteration, SourceIndex);305FOS << " " << printInstructionString(Inst) << '\n';306FOS.flush();307308++IID;309}310}311}312313json::Value TimelineView::toJSON() const {314json::Array TimelineInfo;315316for (const TimelineViewEntry &TLE : Timeline) {317// Check if the timeline-max-cycles has been reached.318if (!TLE.CycleRetired && TLE.CycleExecuted)319break;320321TimelineInfo.push_back(322json::Object({{"CycleDispatched", TLE.CycleDispatched},323{"CycleReady", TLE.CycleReady},324{"CycleIssued", TLE.CycleIssued},325{"CycleExecuted", TLE.CycleExecuted},326{"CycleRetired", TLE.CycleRetired}}));327}328return json::Object({{"TimelineInfo", std::move(TimelineInfo)}});329}330} // namespace mca331} // namespace llvm332333334