Path: blob/main/contrib/llvm-project/llvm/utils/TableGen/CodeEmitterGen.cpp
35258 views
//===- CodeEmitterGen.cpp - Code Emitter Generator ------------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// CodeEmitterGen uses the descriptions of instructions and their fields to9// construct an automated code emitter: a function called10// getBinaryCodeForInstr() that, given a MCInst, returns the value of the11// instruction - either as an uint64_t or as an APInt, depending on the12// maximum bit width of all Inst definitions.13//14// In addition, it generates another function called getOperandBitOffset()15// that, given a MCInst and an operand index, returns the minimum of indices of16// all bits that carry some portion of the respective operand. When the target's17// encodeInstruction() stores the instruction in a little-endian byte order, the18// returned value is the offset of the start of the operand in the encoded19// instruction. Other targets might need to adjust the returned value according20// to their encodeInstruction() implementation.21//22//===----------------------------------------------------------------------===//2324#include "Common/CodeGenHwModes.h"25#include "Common/CodeGenInstruction.h"26#include "Common/CodeGenTarget.h"27#include "Common/InfoByHwMode.h"28#include "Common/VarLenCodeEmitterGen.h"29#include "llvm/ADT/APInt.h"30#include "llvm/ADT/ArrayRef.h"31#include "llvm/ADT/StringExtras.h"32#include "llvm/Support/Casting.h"33#include "llvm/Support/raw_ostream.h"34#include "llvm/TableGen/Error.h"35#include "llvm/TableGen/Record.h"36#include "llvm/TableGen/TableGenBackend.h"37#include <cstdint>38#include <map>39#include <set>40#include <string>41#include <utility>42#include <vector>4344using namespace llvm;4546namespace {4748class CodeEmitterGen {49RecordKeeper &Records;5051public:52CodeEmitterGen(RecordKeeper &R) : Records(R) {}5354void run(raw_ostream &o);5556private:57int getVariableBit(const std::string &VarName, BitsInit *BI, int bit);58std::pair<std::string, std::string>59getInstructionCases(Record *R, CodeGenTarget &Target);60void addInstructionCasesForEncoding(Record *R, Record *EncodingDef,61CodeGenTarget &Target, std::string &Case,62std::string &BitOffsetCase);63bool addCodeToMergeInOperand(Record *R, BitsInit *BI,64const std::string &VarName, std::string &Case,65std::string &BitOffsetCase,66CodeGenTarget &Target);6768void emitInstructionBaseValues(69raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,70CodeGenTarget &Target, unsigned HwMode = DefaultMode);71void72emitCaseMap(raw_ostream &o,73const std::map<std::string, std::vector<std::string>> &CaseMap);74unsigned BitWidth = 0u;75bool UseAPInt = false;76};7778// If the VarBitInit at position 'bit' matches the specified variable then79// return the variable bit position. Otherwise return -1.80int CodeEmitterGen::getVariableBit(const std::string &VarName, BitsInit *BI,81int bit) {82if (VarBitInit *VBI = dyn_cast<VarBitInit>(BI->getBit(bit))) {83if (VarInit *VI = dyn_cast<VarInit>(VBI->getBitVar()))84if (VI->getName() == VarName)85return VBI->getBitNum();86} else if (VarInit *VI = dyn_cast<VarInit>(BI->getBit(bit))) {87if (VI->getName() == VarName)88return 0;89}9091return -1;92}9394// Returns true if it succeeds, false if an error.95bool CodeEmitterGen::addCodeToMergeInOperand(Record *R, BitsInit *BI,96const std::string &VarName,97std::string &Case,98std::string &BitOffsetCase,99CodeGenTarget &Target) {100CodeGenInstruction &CGI = Target.getInstruction(R);101102// Determine if VarName actually contributes to the Inst encoding.103int bit = BI->getNumBits() - 1;104105// Scan for a bit that this contributed to.106for (; bit >= 0;) {107if (getVariableBit(VarName, BI, bit) != -1)108break;109110--bit;111}112113// If we found no bits, ignore this value, otherwise emit the call to get the114// operand encoding.115if (bit < 0)116return true;117118// If the operand matches by name, reference according to that119// operand number. Non-matching operands are assumed to be in120// order.121unsigned OpIdx;122std::pair<unsigned, unsigned> SubOp;123if (CGI.Operands.hasSubOperandAlias(VarName, SubOp)) {124OpIdx = CGI.Operands[SubOp.first].MIOperandNo + SubOp.second;125} else if (CGI.Operands.hasOperandNamed(VarName, OpIdx)) {126// Get the machine operand number for the indicated operand.127OpIdx = CGI.Operands[OpIdx].MIOperandNo;128} else {129PrintError(R, Twine("No operand named ") + VarName + " in record " +130R->getName());131return false;132}133134if (CGI.Operands.isFlatOperandNotEmitted(OpIdx)) {135PrintError(R,136"Operand " + VarName + " used but also marked as not emitted!");137return false;138}139140std::pair<unsigned, unsigned> SO = CGI.Operands.getSubOperandNumber(OpIdx);141std::string &EncoderMethodName =142CGI.Operands[SO.first].EncoderMethodNames[SO.second];143144if (UseAPInt)145Case += " op.clearAllBits();\n";146147Case += " // op: " + VarName + "\n";148149// If the source operand has a custom encoder, use it.150if (!EncoderMethodName.empty()) {151if (UseAPInt) {152Case += " " + EncoderMethodName + "(MI, " + utostr(OpIdx);153Case += ", op";154} else {155Case += " op = " + EncoderMethodName + "(MI, " + utostr(OpIdx);156}157Case += ", Fixups, STI);\n";158} else {159if (UseAPInt) {160Case +=161" getMachineOpValue(MI, MI.getOperand(" + utostr(OpIdx) + ")";162Case += ", op, Fixups, STI";163} else {164Case += " op = getMachineOpValue(MI, MI.getOperand(" +165utostr(OpIdx) + ")";166Case += ", Fixups, STI";167}168Case += ");\n";169}170171// Precalculate the number of lits this variable contributes to in the172// operand. If there is a single lit (consecutive range of bits) we can use a173// destructive sequence on APInt that reduces memory allocations.174int numOperandLits = 0;175for (int tmpBit = bit; tmpBit >= 0;) {176int varBit = getVariableBit(VarName, BI, tmpBit);177178// If this bit isn't from a variable, skip it.179if (varBit == -1) {180--tmpBit;181continue;182}183184// Figure out the consecutive range of bits covered by this operand, in185// order to generate better encoding code.186int beginVarBit = varBit;187int N = 1;188for (--tmpBit; tmpBit >= 0;) {189varBit = getVariableBit(VarName, BI, tmpBit);190if (varBit == -1 || varBit != (beginVarBit - N))191break;192++N;193--tmpBit;194}195++numOperandLits;196}197198unsigned BitOffset = -1;199for (; bit >= 0;) {200int varBit = getVariableBit(VarName, BI, bit);201202// If this bit isn't from a variable, skip it.203if (varBit == -1) {204--bit;205continue;206}207208// Figure out the consecutive range of bits covered by this operand, in209// order to generate better encoding code.210int beginInstBit = bit;211int beginVarBit = varBit;212int N = 1;213for (--bit; bit >= 0;) {214varBit = getVariableBit(VarName, BI, bit);215if (varBit == -1 || varBit != (beginVarBit - N))216break;217++N;218--bit;219}220221std::string maskStr;222int opShift;223224unsigned loBit = beginVarBit - N + 1;225unsigned hiBit = loBit + N;226unsigned loInstBit = beginInstBit - N + 1;227BitOffset = loInstBit;228if (UseAPInt) {229std::string extractStr;230if (N >= 64) {231extractStr = "op.extractBits(" + itostr(hiBit - loBit) + ", " +232itostr(loBit) + ")";233Case += " Value.insertBits(" + extractStr + ", " +234itostr(loInstBit) + ");\n";235} else {236extractStr = "op.extractBitsAsZExtValue(" + itostr(hiBit - loBit) +237", " + itostr(loBit) + ")";238Case += " Value.insertBits(" + extractStr + ", " +239itostr(loInstBit) + ", " + itostr(hiBit - loBit) + ");\n";240}241} else {242uint64_t opMask = ~(uint64_t)0 >> (64 - N);243opShift = beginVarBit - N + 1;244opMask <<= opShift;245maskStr = "UINT64_C(" + utostr(opMask) + ")";246opShift = beginInstBit - beginVarBit;247248if (numOperandLits == 1) {249Case += " op &= " + maskStr + ";\n";250if (opShift > 0) {251Case += " op <<= " + itostr(opShift) + ";\n";252} else if (opShift < 0) {253Case += " op >>= " + itostr(-opShift) + ";\n";254}255Case += " Value |= op;\n";256} else {257if (opShift > 0) {258Case += " Value |= (op & " + maskStr + ") << " +259itostr(opShift) + ";\n";260} else if (opShift < 0) {261Case += " Value |= (op & " + maskStr + ") >> " +262itostr(-opShift) + ";\n";263} else {264Case += " Value |= (op & " + maskStr + ");\n";265}266}267}268}269270if (BitOffset != (unsigned)-1) {271BitOffsetCase += " case " + utostr(OpIdx) + ":\n";272BitOffsetCase += " // op: " + VarName + "\n";273BitOffsetCase += " return " + utostr(BitOffset) + ";\n";274}275276return true;277}278279std::pair<std::string, std::string>280CodeEmitterGen::getInstructionCases(Record *R, CodeGenTarget &Target) {281std::string Case, BitOffsetCase;282283auto append = [&](const std::string &S) {284Case += S;285BitOffsetCase += S;286};287288if (const RecordVal *RV = R->getValue("EncodingInfos")) {289if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {290const CodeGenHwModes &HWM = Target.getHwModes();291EncodingInfoByHwMode EBM(DI->getDef(), HWM);292293// Invoke the interface to obtain the HwMode ID controlling the294// EncodingInfo for the current subtarget. This interface will295// mask off irrelevant HwMode IDs.296append(" unsigned HwMode = "297"STI.getHwMode(MCSubtargetInfo::HwMode_EncodingInfo);\n");298Case += " switch (HwMode) {\n";299Case += " default: llvm_unreachable(\"Unknown hardware mode!\"); "300"break;\n";301for (auto &[ModeId, Encoding] : EBM) {302if (ModeId == DefaultMode) {303Case +=304" case " + itostr(DefaultMode) + ": InstBitsByHw = InstBits";305} else {306Case += " case " + itostr(ModeId) +307": InstBitsByHw = InstBits_" +308std::string(HWM.getMode(ModeId).Name);309}310Case += "; break;\n";311}312Case += " };\n";313314// We need to remodify the 'Inst' value from the table we found above.315if (UseAPInt) {316int NumWords = APInt::getNumWords(BitWidth);317Case += " Inst = APInt(" + itostr(BitWidth);318Case += ", ArrayRef(InstBitsByHw + opcode * " + itostr(NumWords) +319", " + itostr(NumWords);320Case += "));\n";321Case += " Value = Inst;\n";322} else {323Case += " Value = InstBitsByHw[opcode];\n";324}325326append(" switch (HwMode) {\n");327append(" default: llvm_unreachable(\"Unhandled HwMode\");\n");328for (auto &[ModeId, Encoding] : EBM) {329append(" case " + itostr(ModeId) + ": {\n");330addInstructionCasesForEncoding(R, Encoding, Target, Case,331BitOffsetCase);332append(" break;\n");333append(" }\n");334}335append(" }\n");336return std::pair(std::move(Case), std::move(BitOffsetCase));337}338}339addInstructionCasesForEncoding(R, R, Target, Case, BitOffsetCase);340return std::pair(std::move(Case), std::move(BitOffsetCase));341}342343void CodeEmitterGen::addInstructionCasesForEncoding(344Record *R, Record *EncodingDef, CodeGenTarget &Target, std::string &Case,345std::string &BitOffsetCase) {346BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");347348// Loop over all of the fields in the instruction, determining which are the349// operands to the instruction.350bool Success = true;351size_t OrigBitOffsetCaseSize = BitOffsetCase.size();352BitOffsetCase += " switch (OpNum) {\n";353size_t BitOffsetCaseSizeBeforeLoop = BitOffsetCase.size();354for (const RecordVal &RV : EncodingDef->getValues()) {355// Ignore fixed fields in the record, we're looking for values like:356// bits<5> RST = { ?, ?, ?, ?, ? };357if (RV.isNonconcreteOK() || RV.getValue()->isComplete())358continue;359360Success &= addCodeToMergeInOperand(R, BI, std::string(RV.getName()), Case,361BitOffsetCase, Target);362}363// Avoid empty switches.364if (BitOffsetCase.size() == BitOffsetCaseSizeBeforeLoop)365BitOffsetCase.resize(OrigBitOffsetCaseSize);366else367BitOffsetCase += " }\n";368369if (!Success) {370// Dump the record, so we can see what's going on...371std::string E;372raw_string_ostream S(E);373S << "Dumping record for previous error:\n";374S << *R;375PrintNote(E);376}377378StringRef PostEmitter = R->getValueAsString("PostEncoderMethod");379if (!PostEmitter.empty()) {380Case += " Value = ";381Case += PostEmitter;382Case += "(MI, Value";383Case += ", STI";384Case += ");\n";385}386}387388static void emitInstBits(raw_ostream &OS, const APInt &Bits) {389for (unsigned I = 0; I < Bits.getNumWords(); ++I)390OS << ((I > 0) ? ", " : "") << "UINT64_C(" << utostr(Bits.getRawData()[I])391<< ")";392}393394void CodeEmitterGen::emitInstructionBaseValues(395raw_ostream &o, ArrayRef<const CodeGenInstruction *> NumberedInstructions,396CodeGenTarget &Target, unsigned HwMode) {397const CodeGenHwModes &HWM = Target.getHwModes();398if (HwMode == DefaultMode)399o << " static const uint64_t InstBits[] = {\n";400else401o << " static const uint64_t InstBits_"402<< HWM.getModeName(HwMode, /*IncludeDefault=*/true) << "[] = {\n";403404for (const CodeGenInstruction *CGI : NumberedInstructions) {405Record *R = CGI->TheDef;406407if (R->getValueAsString("Namespace") == "TargetOpcode" ||408R->getValueAsBit("isPseudo")) {409o << " ";410emitInstBits(o, APInt(BitWidth, 0));411o << ",\n";412continue;413}414415Record *EncodingDef = R;416if (const RecordVal *RV = R->getValue("EncodingInfos")) {417if (auto *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {418EncodingInfoByHwMode EBM(DI->getDef(), HWM);419if (EBM.hasMode(HwMode)) {420EncodingDef = EBM.get(HwMode);421} else {422// If the HwMode does not match, then Encoding '0'423// should be generated.424APInt Value(BitWidth, 0);425o << " ";426emitInstBits(o, Value);427o << "," << '\t' << "// " << R->getName() << "\n";428continue;429}430}431}432BitsInit *BI = EncodingDef->getValueAsBitsInit("Inst");433434// Start by filling in fixed values.435APInt Value(BitWidth, 0);436for (unsigned i = 0, e = BI->getNumBits(); i != e; ++i) {437if (auto *B = dyn_cast<BitInit>(BI->getBit(i)); B && B->getValue())438Value.setBit(i);439}440o << " ";441emitInstBits(o, Value);442o << "," << '\t' << "// " << R->getName() << "\n";443}444o << " UINT64_C(0)\n };\n";445}446447void CodeEmitterGen::emitCaseMap(448raw_ostream &o,449const std::map<std::string, std::vector<std::string>> &CaseMap) {450std::map<std::string, std::vector<std::string>>::const_iterator IE, EE;451for (IE = CaseMap.begin(), EE = CaseMap.end(); IE != EE; ++IE) {452const std::string &Case = IE->first;453const std::vector<std::string> &InstList = IE->second;454455for (int i = 0, N = InstList.size(); i < N; i++) {456if (i)457o << "\n";458o << " case " << InstList[i] << ":";459}460o << " {\n";461o << Case;462o << " break;\n"463<< " }\n";464}465}466467void CodeEmitterGen::run(raw_ostream &o) {468emitSourceFileHeader("Machine Code Emitter", o);469470CodeGenTarget Target(Records);471std::vector<Record *> Insts = Records.getAllDerivedDefinitions("Instruction");472473// For little-endian instruction bit encodings, reverse the bit order474Target.reverseBitsForLittleEndianEncoding();475476ArrayRef<const CodeGenInstruction *> NumberedInstructions =477Target.getInstructionsByEnumValue();478479if (Target.hasVariableLengthEncodings()) {480emitVarLenCodeEmitter(Records, o);481} else {482const CodeGenHwModes &HWM = Target.getHwModes();483// The set of HwModes used by instruction encodings.484std::set<unsigned> HwModes;485BitWidth = 0;486for (const CodeGenInstruction *CGI : NumberedInstructions) {487Record *R = CGI->TheDef;488if (R->getValueAsString("Namespace") == "TargetOpcode" ||489R->getValueAsBit("isPseudo"))490continue;491492if (const RecordVal *RV = R->getValue("EncodingInfos")) {493if (DefInit *DI = dyn_cast_or_null<DefInit>(RV->getValue())) {494EncodingInfoByHwMode EBM(DI->getDef(), HWM);495for (auto &KV : EBM) {496BitsInit *BI = KV.second->getValueAsBitsInit("Inst");497BitWidth = std::max(BitWidth, BI->getNumBits());498HwModes.insert(KV.first);499}500continue;501}502}503BitsInit *BI = R->getValueAsBitsInit("Inst");504BitWidth = std::max(BitWidth, BI->getNumBits());505}506UseAPInt = BitWidth > 64;507508// Emit function declaration509if (UseAPInt) {510o << "void " << Target.getName()511<< "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"512<< " SmallVectorImpl<MCFixup> &Fixups,\n"513<< " APInt &Inst,\n"514<< " APInt &Scratch,\n"515<< " const MCSubtargetInfo &STI) const {\n";516} else {517o << "uint64_t " << Target.getName();518o << "MCCodeEmitter::getBinaryCodeForInstr(const MCInst &MI,\n"519<< " SmallVectorImpl<MCFixup> &Fixups,\n"520<< " const MCSubtargetInfo &STI) const {\n";521}522523// Emit instruction base values524emitInstructionBaseValues(o, NumberedInstructions, Target, DefaultMode);525if (!HwModes.empty()) {526// Emit table for instrs whose encodings are controlled by HwModes.527for (unsigned HwMode : HwModes) {528if (HwMode == DefaultMode)529continue;530emitInstructionBaseValues(o, NumberedInstructions, Target, HwMode);531}532533// This pointer will be assigned to the HwMode table later.534o << " const uint64_t *InstBitsByHw;\n";535}536537// Map to accumulate all the cases.538std::map<std::string, std::vector<std::string>> CaseMap;539std::map<std::string, std::vector<std::string>> BitOffsetCaseMap;540541// Construct all cases statement for each opcode542for (Record *R : Insts) {543if (R->getValueAsString("Namespace") == "TargetOpcode" ||544R->getValueAsBit("isPseudo"))545continue;546std::string InstName =547(R->getValueAsString("Namespace") + "::" + R->getName()).str();548std::string Case, BitOffsetCase;549std::tie(Case, BitOffsetCase) = getInstructionCases(R, Target);550551CaseMap[Case].push_back(InstName);552BitOffsetCaseMap[BitOffsetCase].push_back(std::move(InstName));553}554555// Emit initial function code556if (UseAPInt) {557int NumWords = APInt::getNumWords(BitWidth);558o << " const unsigned opcode = MI.getOpcode();\n"559<< " if (Scratch.getBitWidth() != " << BitWidth << ")\n"560<< " Scratch = Scratch.zext(" << BitWidth << ");\n"561<< " Inst = APInt(" << BitWidth << ", ArrayRef(InstBits + opcode * "562<< NumWords << ", " << NumWords << "));\n"563<< " APInt &Value = Inst;\n"564<< " APInt &op = Scratch;\n"565<< " switch (opcode) {\n";566} else {567o << " const unsigned opcode = MI.getOpcode();\n"568<< " uint64_t Value = InstBits[opcode];\n"569<< " uint64_t op = 0;\n"570<< " (void)op; // suppress warning\n"571<< " switch (opcode) {\n";572}573574// Emit each case statement575emitCaseMap(o, CaseMap);576577// Default case: unhandled opcode578o << " default:\n"579<< " std::string msg;\n"580<< " raw_string_ostream Msg(msg);\n"581<< " Msg << \"Not supported instr: \" << MI;\n"582<< " report_fatal_error(Msg.str().c_str());\n"583<< " }\n";584if (UseAPInt)585o << " Inst = Value;\n";586else587o << " return Value;\n";588o << "}\n\n";589590o << "#ifdef GET_OPERAND_BIT_OFFSET\n"591<< "#undef GET_OPERAND_BIT_OFFSET\n\n"592<< "uint32_t " << Target.getName()593<< "MCCodeEmitter::getOperandBitOffset(const MCInst &MI,\n"594<< " unsigned OpNum,\n"595<< " const MCSubtargetInfo &STI) const {\n"596<< " switch (MI.getOpcode()) {\n";597emitCaseMap(o, BitOffsetCaseMap);598o << " }\n"599<< " std::string msg;\n"600<< " raw_string_ostream Msg(msg);\n"601<< " Msg << \"Not supported instr[opcode]: \" << MI << \"[\" << OpNum "602"<< \"]\";\n"603<< " report_fatal_error(Msg.str().c_str());\n"604<< "}\n\n"605<< "#endif // GET_OPERAND_BIT_OFFSET\n\n";606}607}608609} // end anonymous namespace610611static TableGen::Emitter::OptClass<CodeEmitterGen>612X("gen-emitter", "Generate machine code emitter");613614615