Path: blob/main/contrib/llvm-project/llvm/utils/TableGen/Common/VarLenCodeEmitterGen.cpp
35290 views
//===- VarLenCodeEmitterGen.cpp - CEG for variable-length insts -----------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// The CodeEmitterGen component for variable-length instructions.9//10// The basic CodeEmitterGen is almost exclusively designed for fixed-11// length instructions. A good analogy for its encoding scheme is how printf12// works: The (immutable) formatting string represent the fixed values in the13// encoded instruction. Placeholders (i.e. %something), on the other hand,14// represent encoding for instruction operands.15// ```16// printf("1101 %src 1001 %dst", <encoded value for operand `src`>,17// <encoded value for operand `dst`>);18// ```19// VarLenCodeEmitterGen in this file provides an alternative encoding scheme20// that works more like a C++ stream operator:21// ```22// OS << 0b1101;23// if (Cond)24// OS << OperandEncoding0;25// OS << 0b1001 << OperandEncoding1;26// ```27// You are free to concatenate arbitrary types (and sizes) of encoding28// fragments on any bit position, bringing more flexibilities on defining29// encoding for variable-length instructions.30//31// In a more specific way, instruction encoding is represented by a DAG type32// `Inst` field. Here is an example:33// ```34// dag Inst = (descend 0b1101, (operand "$src", 4), 0b1001,35// (operand "$dst", 4));36// ```37// It represents the following instruction encoding:38// ```39// MSB LSB40// 1101<encoding for operand src>1001<encoding for operand dst>41// ```42// For more details about DAG operators in the above snippet, please43// refer to \file include/llvm/Target/Target.td.44//45// VarLenCodeEmitter will convert the above DAG into the same helper function46// generated by CodeEmitter, `MCCodeEmitter::getBinaryCodeForInstr` (except47// for few details).48//49//===----------------------------------------------------------------------===//5051#include "VarLenCodeEmitterGen.h"52#include "CodeGenHwModes.h"53#include "CodeGenInstruction.h"54#include "CodeGenTarget.h"55#include "InfoByHwMode.h"56#include "llvm/ADT/ArrayRef.h"57#include "llvm/ADT/DenseMap.h"58#include "llvm/Support/raw_ostream.h"59#include "llvm/TableGen/Error.h"60#include "llvm/TableGen/Record.h"6162#include <algorithm>6364using namespace llvm;6566namespace {6768class VarLenCodeEmitterGen {69RecordKeeper &Records;7071// Representaton of alternative encodings used for HwModes.72using AltEncodingTy = int;73// Mode identifier when only one encoding is defined.74const AltEncodingTy Universal = -1;75// The set of alternative instruction encodings with a descriptive76// name suffix to improve readability of the generated code.77std::map<AltEncodingTy, std::string> Modes;7879DenseMap<Record *, DenseMap<AltEncodingTy, VarLenInst>> VarLenInsts;8081// Emit based values (i.e. fixed bits in the encoded instructions)82void emitInstructionBaseValues(83raw_ostream &OS,84ArrayRef<const CodeGenInstruction *> NumberedInstructions,85CodeGenTarget &Target, AltEncodingTy Mode);8687std::string getInstructionCases(Record *R, CodeGenTarget &Target);88std::string getInstructionCaseForEncoding(Record *R, AltEncodingTy Mode,89const VarLenInst &VLI,90CodeGenTarget &Target, int I);9192public:93explicit VarLenCodeEmitterGen(RecordKeeper &R) : Records(R) {}9495void run(raw_ostream &OS);96};97} // end anonymous namespace9899// Get the name of custom encoder or decoder, if there is any.100// Returns `{encoder name, decoder name}`.101static std::pair<StringRef, StringRef> getCustomCoders(ArrayRef<Init *> Args) {102std::pair<StringRef, StringRef> Result;103for (const auto *Arg : Args) {104const auto *DI = dyn_cast<DagInit>(Arg);105if (!DI)106continue;107const Init *Op = DI->getOperator();108if (!isa<DefInit>(Op))109continue;110// syntax: `(<encoder | decoder> "function name")`111StringRef OpName = cast<DefInit>(Op)->getDef()->getName();112if (OpName != "encoder" && OpName != "decoder")113continue;114if (!DI->getNumArgs() || !isa<StringInit>(DI->getArg(0)))115PrintFatalError("expected '" + OpName +116"' directive to be followed by a custom function name.");117StringRef FuncName = cast<StringInit>(DI->getArg(0))->getValue();118if (OpName == "encoder")119Result.first = FuncName;120else121Result.second = FuncName;122}123return Result;124}125126VarLenInst::VarLenInst(const DagInit *DI, const RecordVal *TheDef)127: TheDef(TheDef), NumBits(0U), HasDynamicSegment(false) {128buildRec(DI);129for (const auto &S : Segments)130NumBits += S.BitWidth;131}132133void VarLenInst::buildRec(const DagInit *DI) {134assert(TheDef && "The def record is nullptr ?");135136std::string Op = DI->getOperator()->getAsString();137138if (Op == "ascend" || Op == "descend") {139bool Reverse = Op == "descend";140int i = Reverse ? DI->getNumArgs() - 1 : 0;141int e = Reverse ? -1 : DI->getNumArgs();142int s = Reverse ? -1 : 1;143for (; i != e; i += s) {144const Init *Arg = DI->getArg(i);145if (const auto *BI = dyn_cast<BitsInit>(Arg)) {146if (!BI->isComplete())147PrintFatalError(TheDef->getLoc(),148"Expecting complete bits init in `" + Op + "`");149Segments.push_back({BI->getNumBits(), BI});150} else if (const auto *BI = dyn_cast<BitInit>(Arg)) {151if (!BI->isConcrete())152PrintFatalError(TheDef->getLoc(),153"Expecting concrete bit init in `" + Op + "`");154Segments.push_back({1, BI});155} else if (const auto *SubDI = dyn_cast<DagInit>(Arg)) {156buildRec(SubDI);157} else {158PrintFatalError(TheDef->getLoc(), "Unrecognized type of argument in `" +159Op + "`: " + Arg->getAsString());160}161}162} else if (Op == "operand") {163// (operand <operand name>, <# of bits>,164// [(encoder <custom encoder>)][, (decoder <custom decoder>)])165if (DI->getNumArgs() < 2)166PrintFatalError(TheDef->getLoc(),167"Expecting at least 2 arguments for `operand`");168HasDynamicSegment = true;169const Init *OperandName = DI->getArg(0), *NumBits = DI->getArg(1);170if (!isa<StringInit>(OperandName) || !isa<IntInit>(NumBits))171PrintFatalError(TheDef->getLoc(), "Invalid argument types for `operand`");172173auto NumBitsVal = cast<IntInit>(NumBits)->getValue();174if (NumBitsVal <= 0)175PrintFatalError(TheDef->getLoc(), "Invalid number of bits for `operand`");176177auto [CustomEncoder, CustomDecoder] =178getCustomCoders(DI->getArgs().slice(2));179Segments.push_back({static_cast<unsigned>(NumBitsVal), OperandName,180CustomEncoder, CustomDecoder});181} else if (Op == "slice") {182// (slice <operand name>, <high / low bit>, <low / high bit>,183// [(encoder <custom encoder>)][, (decoder <custom decoder>)])184if (DI->getNumArgs() < 3)185PrintFatalError(TheDef->getLoc(),186"Expecting at least 3 arguments for `slice`");187HasDynamicSegment = true;188Init *OperandName = DI->getArg(0), *HiBit = DI->getArg(1),189*LoBit = DI->getArg(2);190if (!isa<StringInit>(OperandName) || !isa<IntInit>(HiBit) ||191!isa<IntInit>(LoBit))192PrintFatalError(TheDef->getLoc(), "Invalid argument types for `slice`");193194auto HiBitVal = cast<IntInit>(HiBit)->getValue(),195LoBitVal = cast<IntInit>(LoBit)->getValue();196if (HiBitVal < 0 || LoBitVal < 0)197PrintFatalError(TheDef->getLoc(), "Invalid bit range for `slice`");198bool NeedSwap = false;199unsigned NumBits = 0U;200if (HiBitVal < LoBitVal) {201NeedSwap = true;202NumBits = static_cast<unsigned>(LoBitVal - HiBitVal + 1);203} else {204NumBits = static_cast<unsigned>(HiBitVal - LoBitVal + 1);205}206207auto [CustomEncoder, CustomDecoder] =208getCustomCoders(DI->getArgs().slice(3));209210if (NeedSwap) {211// Normalization: Hi bit should always be the second argument.212Init *const NewArgs[] = {OperandName, LoBit, HiBit};213Segments.push_back({NumBits,214DagInit::get(DI->getOperator(), nullptr, NewArgs, {}),215CustomEncoder, CustomDecoder});216} else {217Segments.push_back({NumBits, DI, CustomEncoder, CustomDecoder});218}219}220}221222void VarLenCodeEmitterGen::run(raw_ostream &OS) {223CodeGenTarget Target(Records);224auto Insts = Records.getAllDerivedDefinitions("Instruction");225226auto NumberedInstructions = Target.getInstructionsByEnumValue();227228for (const CodeGenInstruction *CGI : NumberedInstructions) {229Record *R = CGI->TheDef;230// Create the corresponding VarLenInst instance.231if (R->getValueAsString("Namespace") == "TargetOpcode" ||232R->getValueAsBit("isPseudo"))233continue;234235// Setup alternative encodings according to HwModes236if (const RecordVal *RV = R->getValue("EncodingInfos")) {237if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {238const CodeGenHwModes &HWM = Target.getHwModes();239EncodingInfoByHwMode EBM(DI->getDef(), HWM);240for (auto &KV : EBM) {241AltEncodingTy Mode = KV.first;242Modes.insert({Mode, "_" + HWM.getMode(Mode).Name.str()});243Record *EncodingDef = KV.second;244RecordVal *RV = EncodingDef->getValue("Inst");245DagInit *DI = cast<DagInit>(RV->getValue());246VarLenInsts[R].insert({Mode, VarLenInst(DI, RV)});247}248continue;249}250}251RecordVal *RV = R->getValue("Inst");252DagInit *DI = cast<DagInit>(RV->getValue());253VarLenInsts[R].insert({Universal, VarLenInst(DI, RV)});254}255256if (Modes.empty())257Modes.insert({Universal, ""}); // Base case, skip suffix.258259// Emit function declaration260OS << "void " << Target.getName()261<< "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"262<< " SmallVectorImpl<MCFixup> &Fixups,\n"263<< " APInt &Inst,\n"264<< " APInt &Scratch,\n"265<< " const MCSubtargetInfo &STI) const {\n";266267// Emit instruction base values268for (const auto &Mode : Modes)269emitInstructionBaseValues(OS, NumberedInstructions, Target, Mode.first);270271if (Modes.size() > 1) {272OS << " unsigned Mode = STI.getHwMode();\n";273}274275for (const auto &Mode : Modes) {276// Emit helper function to retrieve base values.277OS << " auto getInstBits" << Mode.second278<< " = [&](unsigned Opcode) -> APInt {\n"279<< " unsigned NumBits = Index" << Mode.second << "[Opcode][0];\n"280<< " if (!NumBits)\n"281<< " return APInt::getZeroWidth();\n"282<< " unsigned Idx = Index" << Mode.second << "[Opcode][1];\n"283<< " ArrayRef<uint64_t> Data(&InstBits" << Mode.second << "[Idx], "284<< "APInt::getNumWords(NumBits));\n"285<< " return APInt(NumBits, Data);\n"286<< " };\n";287}288289// Map to accumulate all the cases.290std::map<std::string, std::vector<std::string>> CaseMap;291292// Construct all cases statement for each opcode293for (Record *R : Insts) {294if (R->getValueAsString("Namespace") == "TargetOpcode" ||295R->getValueAsBit("isPseudo"))296continue;297std::string InstName =298(R->getValueAsString("Namespace") + "::" + R->getName()).str();299std::string Case = getInstructionCases(R, Target);300301CaseMap[Case].push_back(std::move(InstName));302}303304// Emit initial function code305OS << " const unsigned opcode = MI.getOpcode();\n"306<< " switch (opcode) {\n";307308// Emit each case statement309for (const auto &C : CaseMap) {310const std::string &Case = C.first;311const auto &InstList = C.second;312313ListSeparator LS("\n");314for (const auto &InstName : InstList)315OS << LS << " case " << InstName << ":";316317OS << " {\n";318OS << Case;319OS << " break;\n"320<< " }\n";321}322// Default case: unhandled opcode323OS << " default:\n"324<< " std::string msg;\n"325<< " raw_string_ostream Msg(msg);\n"326<< " Msg << \"Not supported instr: \" << MI;\n"327<< " report_fatal_error(Msg.str().c_str());\n"328<< " }\n";329OS << "}\n\n";330}331332static void emitInstBits(raw_ostream &IS, raw_ostream &SS, const APInt &Bits,333unsigned &Index) {334if (!Bits.getNumWords()) {335IS.indent(4) << "{/*NumBits*/0, /*Index*/0},";336return;337}338339IS.indent(4) << "{/*NumBits*/" << Bits.getBitWidth() << ", " << "/*Index*/"340<< Index << "},";341342SS.indent(4);343for (unsigned I = 0; I < Bits.getNumWords(); ++I, ++Index)344SS << "UINT64_C(" << utostr(Bits.getRawData()[I]) << "),";345}346347void VarLenCodeEmitterGen::emitInstructionBaseValues(348raw_ostream &OS, ArrayRef<const CodeGenInstruction *> NumberedInstructions,349CodeGenTarget &Target, AltEncodingTy Mode) {350std::string IndexArray, StorageArray;351raw_string_ostream IS(IndexArray), SS(StorageArray);352353IS << " static const unsigned Index" << Modes[Mode] << "[][2] = {\n";354SS << " static const uint64_t InstBits" << Modes[Mode] << "[] = {\n";355356unsigned NumFixedValueWords = 0U;357for (const CodeGenInstruction *CGI : NumberedInstructions) {358Record *R = CGI->TheDef;359360if (R->getValueAsString("Namespace") == "TargetOpcode" ||361R->getValueAsBit("isPseudo")) {362IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\n";363continue;364}365366const auto InstIt = VarLenInsts.find(R);367if (InstIt == VarLenInsts.end())368PrintFatalError(R, "VarLenInst not found for this record");369auto ModeIt = InstIt->second.find(Mode);370if (ModeIt == InstIt->second.end())371ModeIt = InstIt->second.find(Universal);372if (ModeIt == InstIt->second.end()) {373IS.indent(4) << "{/*NumBits*/0, /*Index*/0},\t" << "// " << R->getName()374<< " no encoding\n";375continue;376}377const VarLenInst &VLI = ModeIt->second;378unsigned i = 0U, BitWidth = VLI.size();379380// Start by filling in fixed values.381APInt Value(BitWidth, 0);382auto SI = VLI.begin(), SE = VLI.end();383// Scan through all the segments that have fixed-bits values.384while (i < BitWidth && SI != SE) {385unsigned SegmentNumBits = SI->BitWidth;386if (const auto *BI = dyn_cast<BitsInit>(SI->Value)) {387for (unsigned Idx = 0U; Idx != SegmentNumBits; ++Idx) {388auto *B = cast<BitInit>(BI->getBit(Idx));389Value.setBitVal(i + Idx, B->getValue());390}391}392if (const auto *BI = dyn_cast<BitInit>(SI->Value))393Value.setBitVal(i, BI->getValue());394395i += SegmentNumBits;396++SI;397}398399emitInstBits(IS, SS, Value, NumFixedValueWords);400IS << '\t' << "// " << R->getName() << "\n";401if (Value.getNumWords())402SS << '\t' << "// " << R->getName() << "\n";403}404IS.indent(4) << "{/*NumBits*/0, /*Index*/0}\n };\n";405SS.indent(4) << "UINT64_C(0)\n };\n";406407OS << IndexArray << StorageArray;408}409410std::string VarLenCodeEmitterGen::getInstructionCases(Record *R,411CodeGenTarget &Target) {412auto It = VarLenInsts.find(R);413if (It == VarLenInsts.end())414PrintFatalError(R, "Parsed encoding record not found");415const auto &Map = It->second;416417// Is this instructions encoding universal (same for all modes)?418// Allways true if there is only one mode.419if (Map.size() == 1 && Map.begin()->first == Universal) {420// Universal, just pick the first mode.421AltEncodingTy Mode = Modes.begin()->first;422const auto &Encoding = Map.begin()->second;423return getInstructionCaseForEncoding(R, Mode, Encoding, Target, 6);424}425426std::string Case;427Case += " switch (Mode) {\n";428Case += " default: llvm_unreachable(\"Unhandled Mode\");\n";429for (const auto &Mode : Modes) {430Case += " case " + itostr(Mode.first) + ": {\n";431const auto &It = Map.find(Mode.first);432if (It == Map.end()) {433Case +=434" llvm_unreachable(\"Undefined encoding in this mode\");\n";435} else {436Case +=437getInstructionCaseForEncoding(R, It->first, It->second, Target, 8);438}439Case += " break;\n";440Case += " }\n";441}442Case += " }\n";443return Case;444}445446std::string VarLenCodeEmitterGen::getInstructionCaseForEncoding(447Record *R, AltEncodingTy Mode, const VarLenInst &VLI, CodeGenTarget &Target,448int I) {449450CodeGenInstruction &CGI = Target.getInstruction(R);451452std::string Case;453raw_string_ostream SS(Case);454// Populate based value.455SS.indent(I) << "Inst = getInstBits" << Modes[Mode] << "(opcode);\n";456457// Process each segment in VLI.458size_t Offset = 0U;459unsigned HighScratchAccess = 0U;460for (const auto &ES : VLI) {461unsigned NumBits = ES.BitWidth;462const Init *Val = ES.Value;463// If it's a StringInit or DagInit, it's a reference to an operand464// or part of an operand.465if (isa<StringInit>(Val) || isa<DagInit>(Val)) {466StringRef OperandName;467unsigned LoBit = 0U;468if (const auto *SV = dyn_cast<StringInit>(Val)) {469OperandName = SV->getValue();470} else {471// Normalized: (slice <operand name>, <high bit>, <low bit>)472const auto *DV = cast<DagInit>(Val);473OperandName = cast<StringInit>(DV->getArg(0))->getValue();474LoBit = static_cast<unsigned>(cast<IntInit>(DV->getArg(2))->getValue());475}476477auto OpIdx = CGI.Operands.ParseOperandName(OperandName);478unsigned FlatOpIdx = CGI.Operands.getFlattenedOperandNumber(OpIdx);479StringRef CustomEncoder =480CGI.Operands[OpIdx.first].EncoderMethodNames[OpIdx.second];481if (ES.CustomEncoder.size())482CustomEncoder = ES.CustomEncoder;483484SS.indent(I) << "Scratch.clearAllBits();\n";485SS.indent(I) << "// op: " << OperandName.drop_front(1) << "\n";486if (CustomEncoder.empty())487SS.indent(I) << "getMachineOpValue(MI, MI.getOperand("488<< utostr(FlatOpIdx) << ")";489else490SS.indent(I) << CustomEncoder << "(MI, /*OpIdx=*/" << utostr(FlatOpIdx);491492SS << ", /*Pos=*/" << utostr(Offset) << ", Scratch, Fixups, STI);\n";493494SS.indent(I) << "Inst.insertBits(" << "Scratch.extractBits("495<< utostr(NumBits) << ", " << utostr(LoBit) << ")" << ", "496<< Offset << ");\n";497498HighScratchAccess = std::max(HighScratchAccess, NumBits + LoBit);499}500Offset += NumBits;501}502503StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");504if (!PostEmitter.empty())505SS.indent(I) << "Inst = " << PostEmitter << "(MI, Inst, STI);\n";506507// Resize the scratch buffer if it's to small.508std::string ScratchResizeStr;509if (VLI.size() && !VLI.isFixedValueOnly()) {510raw_string_ostream RS(ScratchResizeStr);511RS.indent(I) << "if (Scratch.getBitWidth() < " << HighScratchAccess512<< ") { Scratch = Scratch.zext(" << HighScratchAccess513<< "); }\n";514}515516return ScratchResizeStr + Case;517}518519namespace llvm {520521void emitVarLenCodeEmitter(RecordKeeper &R, raw_ostream &OS) {522VarLenCodeEmitterGen(R).run(OS);523}524525} // end namespace llvm526527528