Path: blob/main/contrib/llvm-project/llvm/utils/TableGen/X86FoldTablesEmitter.cpp
35258 views
//===- utils/TableGen/X86FoldTablesEmitter.cpp - X86 backend-*- C++ -*-===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This tablegen backend is responsible for emitting the memory fold tables of9// the X86 backend instructions.10//11//===----------------------------------------------------------------------===//1213#include "Common/CodeGenInstruction.h"14#include "Common/CodeGenTarget.h"15#include "X86RecognizableInstr.h"16#include "llvm/ADT/StringSwitch.h"17#include "llvm/Support/X86FoldTablesUtils.h"18#include "llvm/TableGen/Record.h"19#include "llvm/TableGen/TableGenBackend.h"20#include <set>2122using namespace llvm;23using namespace X86Disassembler;2425namespace {26// Represents an entry in the manual mapped instructions set.27struct ManualMapEntry {28const char *RegInstStr;29const char *MemInstStr;30uint16_t Strategy;31};3233// List of instructions requiring explicitly aligned memory.34const char *ExplicitAlign[] = {"MOVDQA", "MOVAPS", "MOVAPD", "MOVNTPS",35"MOVNTPD", "MOVNTDQ", "MOVNTDQA"};3637// List of instructions NOT requiring explicit memory alignment.38const char *ExplicitUnalign[] = {"MOVDQU", "MOVUPS", "MOVUPD",39"PCMPESTRM", "PCMPESTRI", "PCMPISTRM",40"PCMPISTRI"};4142const ManualMapEntry ManualMapSet[] = {43#define ENTRY(REG, MEM, FLAGS) {#REG, #MEM, FLAGS},44#include "X86ManualFoldTables.def"45};4647const std::set<StringRef> NoFoldSet = {48#define NOFOLD(INSN) #INSN,49#include "X86ManualFoldTables.def"50};5152static bool isExplicitAlign(const CodeGenInstruction *Inst) {53return any_of(ExplicitAlign, [Inst](const char *InstStr) {54return Inst->TheDef->getName().contains(InstStr);55});56}5758static bool isExplicitUnalign(const CodeGenInstruction *Inst) {59return any_of(ExplicitUnalign, [Inst](const char *InstStr) {60return Inst->TheDef->getName().contains(InstStr);61});62}6364class X86FoldTablesEmitter {65RecordKeeper &Records;66CodeGenTarget Target;6768// Represents an entry in the folding table69class X86FoldTableEntry {70const CodeGenInstruction *RegInst;71const CodeGenInstruction *MemInst;7273public:74bool NoReverse = false;75bool NoForward = false;76bool FoldLoad = false;77bool FoldStore = false;78enum BcastType {79BCAST_NONE,80BCAST_W,81BCAST_D,82BCAST_Q,83BCAST_SS,84BCAST_SD,85BCAST_SH,86};87BcastType BroadcastKind = BCAST_NONE;8889Align Alignment;9091X86FoldTableEntry() = default;92X86FoldTableEntry(const CodeGenInstruction *RegInst,93const CodeGenInstruction *MemInst)94: RegInst(RegInst), MemInst(MemInst) {}9596void print(raw_ostream &OS) const {97OS.indent(2);98OS << "{X86::" << RegInst->TheDef->getName() << ", ";99OS << "X86::" << MemInst->TheDef->getName() << ", ";100101std::string Attrs;102if (FoldLoad)103Attrs += "TB_FOLDED_LOAD|";104if (FoldStore)105Attrs += "TB_FOLDED_STORE|";106if (NoReverse)107Attrs += "TB_NO_REVERSE|";108if (NoForward)109Attrs += "TB_NO_FORWARD|";110if (Alignment != Align(1))111Attrs += "TB_ALIGN_" + std::to_string(Alignment.value()) + "|";112switch (BroadcastKind) {113case BCAST_NONE:114break;115case BCAST_W:116Attrs += "TB_BCAST_W|";117break;118case BCAST_D:119Attrs += "TB_BCAST_D|";120break;121case BCAST_Q:122Attrs += "TB_BCAST_Q|";123break;124case BCAST_SS:125Attrs += "TB_BCAST_SS|";126break;127case BCAST_SD:128Attrs += "TB_BCAST_SD|";129break;130case BCAST_SH:131Attrs += "TB_BCAST_SH|";132break;133}134135StringRef SimplifiedAttrs = StringRef(Attrs).rtrim("|");136if (SimplifiedAttrs.empty())137SimplifiedAttrs = "0";138139OS << SimplifiedAttrs << "},\n";140}141142#ifndef NDEBUG143// Check that Uses and Defs are same after memory fold.144void checkCorrectness() const {145auto &RegInstRec = *RegInst->TheDef;146auto &MemInstRec = *MemInst->TheDef;147auto ListOfUsesReg = RegInstRec.getValueAsListOfDefs("Uses");148auto ListOfUsesMem = MemInstRec.getValueAsListOfDefs("Uses");149auto ListOfDefsReg = RegInstRec.getValueAsListOfDefs("Defs");150auto ListOfDefsMem = MemInstRec.getValueAsListOfDefs("Defs");151if (ListOfUsesReg != ListOfUsesMem || ListOfDefsReg != ListOfDefsMem)152report_fatal_error("Uses/Defs couldn't be changed after folding " +153RegInstRec.getName() + " to " +154MemInstRec.getName());155}156#endif157};158159// NOTE: We check the fold tables are sorted in X86InstrFoldTables.cpp by the160// enum of the instruction, which is computed in161// CodeGenTarget::ComputeInstrsByEnum. So we should use the same comparator162// here.163// FIXME: Could we share the code with CodeGenTarget::ComputeInstrsByEnum?164struct CompareInstrsByEnum {165bool operator()(const CodeGenInstruction *LHS,166const CodeGenInstruction *RHS) const {167assert(LHS && RHS && "LHS and RHS shouldn't be nullptr");168const auto &D1 = *LHS->TheDef;169const auto &D2 = *RHS->TheDef;170return std::tuple(!D1.getValueAsBit("isPseudo"), D1.getName()) <171std::tuple(!D2.getValueAsBit("isPseudo"), D2.getName());172}173};174175typedef std::map<const CodeGenInstruction *, X86FoldTableEntry,176CompareInstrsByEnum>177FoldTable;178// Table2Addr - Holds instructions which their memory form performs179// load+store.180//181// Table#i - Holds instructions which the their memory form182// performs a load OR a store, and their #i'th operand is folded.183//184// BroadcastTable#i - Holds instructions which the their memory form performs185// a broadcast load and their #i'th operand is folded.186FoldTable Table2Addr;187FoldTable Table0;188FoldTable Table1;189FoldTable Table2;190FoldTable Table3;191FoldTable Table4;192FoldTable BroadcastTable1;193FoldTable BroadcastTable2;194FoldTable BroadcastTable3;195FoldTable BroadcastTable4;196197public:198X86FoldTablesEmitter(RecordKeeper &R) : Records(R), Target(R) {}199200// run - Generate the 6 X86 memory fold tables.201void run(raw_ostream &OS);202203private:204// Decides to which table to add the entry with the given instructions.205// S sets the strategy of adding the TB_NO_REVERSE flag.206void updateTables(const CodeGenInstruction *RegInst,207const CodeGenInstruction *MemInst, uint16_t S = 0,208bool IsManual = false, bool IsBroadcast = false);209210// Generates X86FoldTableEntry with the given instructions and fill it with211// the appropriate flags, then adds it to a memory fold table.212void addEntryWithFlags(FoldTable &Table, const CodeGenInstruction *RegInst,213const CodeGenInstruction *MemInst, uint16_t S,214unsigned FoldedIdx, bool IsManual);215// Generates X86FoldTableEntry with the given instructions and adds it to a216// broadcast table.217void addBroadcastEntry(FoldTable &Table, const CodeGenInstruction *RegInst,218const CodeGenInstruction *MemInst);219220// Print the given table as a static const C++ array of type221// X86FoldTableEntry.222void printTable(const FoldTable &Table, StringRef TableName,223raw_ostream &OS) {224OS << "static const X86FoldTableEntry " << TableName << "[] = {\n";225226for (auto &E : Table)227E.second.print(OS);228229OS << "};\n\n";230}231};232233// Return true if one of the instruction's operands is a RST register class234static bool hasRSTRegClass(const CodeGenInstruction *Inst) {235return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {236return OpIn.Rec->getName() == "RST" || OpIn.Rec->getName() == "RSTi";237});238}239240// Return true if one of the instruction's operands is a ptr_rc_tailcall241static bool hasPtrTailcallRegClass(const CodeGenInstruction *Inst) {242return any_of(Inst->Operands, [](const CGIOperandList::OperandInfo &OpIn) {243return OpIn.Rec->getName() == "ptr_rc_tailcall";244});245}246247static uint8_t byteFromBitsInit(const BitsInit *B) {248unsigned N = B->getNumBits();249assert(N <= 8 && "Field is too large for uint8_t!");250251uint8_t Value = 0;252for (unsigned I = 0; I != N; ++I) {253BitInit *Bit = cast<BitInit>(B->getBit(I));254Value |= Bit->getValue() << I;255}256return Value;257}258259static bool mayFoldFromForm(uint8_t Form) {260switch (Form) {261default:262return Form >= X86Local::MRM0r && Form <= X86Local::MRM7r;263case X86Local::MRMXr:264case X86Local::MRMXrCC:265case X86Local::MRMDestReg:266case X86Local::MRMSrcReg:267case X86Local::MRMSrcReg4VOp3:268case X86Local::MRMSrcRegOp4:269case X86Local::MRMSrcRegCC:270return true;271}272}273274static bool mayFoldToForm(uint8_t Form) {275switch (Form) {276default:277return Form >= X86Local::MRM0m && Form <= X86Local::MRM7m;278case X86Local::MRMXm:279case X86Local::MRMXmCC:280case X86Local::MRMDestMem:281case X86Local::MRMSrcMem:282case X86Local::MRMSrcMem4VOp3:283case X86Local::MRMSrcMemOp4:284case X86Local::MRMSrcMemCC:285return true;286}287}288289static bool mayFoldFromLeftToRight(uint8_t LHS, uint8_t RHS) {290switch (LHS) {291default:292llvm_unreachable("Unexpected Form!");293case X86Local::MRM0r:294return RHS == X86Local::MRM0m;295case X86Local::MRM1r:296return RHS == X86Local::MRM1m;297case X86Local::MRM2r:298return RHS == X86Local::MRM2m;299case X86Local::MRM3r:300return RHS == X86Local::MRM3m;301case X86Local::MRM4r:302return RHS == X86Local::MRM4m;303case X86Local::MRM5r:304return RHS == X86Local::MRM5m;305case X86Local::MRM6r:306return RHS == X86Local::MRM6m;307case X86Local::MRM7r:308return RHS == X86Local::MRM7m;309case X86Local::MRMXr:310return RHS == X86Local::MRMXm;311case X86Local::MRMXrCC:312return RHS == X86Local::MRMXmCC;313case X86Local::MRMDestReg:314return RHS == X86Local::MRMDestMem;315case X86Local::MRMSrcReg:316return RHS == X86Local::MRMSrcMem;317case X86Local::MRMSrcReg4VOp3:318return RHS == X86Local::MRMSrcMem4VOp3;319case X86Local::MRMSrcRegOp4:320return RHS == X86Local::MRMSrcMemOp4;321case X86Local::MRMSrcRegCC:322return RHS == X86Local::MRMSrcMemCC;323}324}325326static bool isNOREXRegClass(const Record *Op) {327return Op->getName().contains("_NOREX");328}329330// Function object - Operator() returns true if the given Reg instruction331// matches the Mem instruction of this object.332class IsMatch {333const CodeGenInstruction *MemInst;334const X86Disassembler::RecognizableInstrBase MemRI;335bool IsBroadcast;336const unsigned Variant;337338public:339IsMatch(const CodeGenInstruction *Inst, bool IsBroadcast, unsigned V)340: MemInst(Inst), MemRI(*MemInst), IsBroadcast(IsBroadcast), Variant(V) {}341342bool operator()(const CodeGenInstruction *RegInst) {343X86Disassembler::RecognizableInstrBase RegRI(*RegInst);344const Record *RegRec = RegInst->TheDef;345const Record *MemRec = MemInst->TheDef;346347// EVEX_B means different things for memory and register forms.348// register form: rounding control or SAE349// memory form: broadcast350if (IsBroadcast && (RegRI.HasEVEX_B || !MemRI.HasEVEX_B))351return false;352// EVEX_B indicates NDD for MAP4 instructions353if (!IsBroadcast && (RegRI.HasEVEX_B || MemRI.HasEVEX_B) &&354RegRI.OpMap != X86Local::T_MAP4)355return false;356357if (!mayFoldFromLeftToRight(RegRI.Form, MemRI.Form))358return false;359360// X86 encoding is crazy, e.g361//362// f3 0f c7 30 vmxon (%rax)363// f3 0f c7 f0 senduipi %rax364//365// This two instruction have similiar encoding fields but are unrelated366if (X86Disassembler::getMnemonic(MemInst, Variant) !=367X86Disassembler::getMnemonic(RegInst, Variant))368return false;369370// Return false if any of the following fields of does not match.371if (std::tuple(RegRI.Encoding, RegRI.Opcode, RegRI.OpPrefix, RegRI.OpMap,372RegRI.OpSize, RegRI.AdSize, RegRI.HasREX_W, RegRI.HasVEX_4V,373RegRI.HasVEX_L, RegRI.IgnoresVEX_L, RegRI.IgnoresW,374RegRI.HasEVEX_K, RegRI.HasEVEX_KZ, RegRI.HasEVEX_L2,375RegRI.HasEVEX_NF, RegRec->getValueAsBit("hasEVEX_RC"),376RegRec->getValueAsBit("hasLockPrefix"),377RegRec->getValueAsBit("hasNoTrackPrefix")) !=378std::tuple(MemRI.Encoding, MemRI.Opcode, MemRI.OpPrefix, MemRI.OpMap,379MemRI.OpSize, MemRI.AdSize, MemRI.HasREX_W, MemRI.HasVEX_4V,380MemRI.HasVEX_L, MemRI.IgnoresVEX_L, MemRI.IgnoresW,381MemRI.HasEVEX_K, MemRI.HasEVEX_KZ, MemRI.HasEVEX_L2,382MemRI.HasEVEX_NF, MemRec->getValueAsBit("hasEVEX_RC"),383MemRec->getValueAsBit("hasLockPrefix"),384MemRec->getValueAsBit("hasNoTrackPrefix")))385return false;386387// Make sure the sizes of the operands of both instructions suit each other.388// This is needed for instructions with intrinsic version (_Int).389// Where the only difference is the size of the operands.390// For example: VUCOMISDZrm and VUCOMISDrm_Int391// Also for instructions that their EVEX version was upgraded to work with392// k-registers. For example VPCMPEQBrm (xmm output register) and393// VPCMPEQBZ128rm (k register output register).394unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs();395unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs();396unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();397unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();398399// Instructions with one output in their memory form use the memory folded400// operand as source and destination (Read-Modify-Write).401unsigned RegStartIdx =402(MemOutSize + 1 == RegOutSize) && (MemInSize == RegInSize) ? 1 : 0;403404bool FoundFoldedOp = false;405for (unsigned I = 0, E = MemInst->Operands.size(); I != E; I++) {406Record *MemOpRec = MemInst->Operands[I].Rec;407Record *RegOpRec = RegInst->Operands[I + RegStartIdx].Rec;408409if (MemOpRec == RegOpRec)410continue;411412if (isRegisterOperand(MemOpRec) && isRegisterOperand(RegOpRec) &&413((getRegOperandSize(MemOpRec) != getRegOperandSize(RegOpRec)) ||414(isNOREXRegClass(MemOpRec) != isNOREXRegClass(RegOpRec))))415return false;416417if (isMemoryOperand(MemOpRec) && isMemoryOperand(RegOpRec) &&418(getMemOperandSize(MemOpRec) != getMemOperandSize(RegOpRec)))419return false;420421if (isImmediateOperand(MemOpRec) && isImmediateOperand(RegOpRec) &&422(MemOpRec->getValueAsDef("Type") != RegOpRec->getValueAsDef("Type")))423return false;424425// Only one operand can be folded.426if (FoundFoldedOp)427return false;428429assert(isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec));430FoundFoldedOp = true;431}432433return FoundFoldedOp;434}435};436437} // end anonymous namespace438439void X86FoldTablesEmitter::addEntryWithFlags(FoldTable &Table,440const CodeGenInstruction *RegInst,441const CodeGenInstruction *MemInst,442uint16_t S, unsigned FoldedIdx,443bool IsManual) {444445assert((IsManual || Table.find(RegInst) == Table.end()) &&446"Override entry unexpectedly");447X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst);448Record *RegRec = RegInst->TheDef;449Result.NoReverse = S & TB_NO_REVERSE;450Result.NoForward = S & TB_NO_FORWARD;451Result.FoldLoad = S & TB_FOLDED_LOAD;452Result.FoldStore = S & TB_FOLDED_STORE;453Result.Alignment = Align(1ULL << ((S & TB_ALIGN_MASK) >> TB_ALIGN_SHIFT));454if (IsManual) {455Table[RegInst] = Result;456return;457}458459Record *RegOpRec = RegInst->Operands[FoldedIdx].Rec;460Record *MemOpRec = MemInst->Operands[FoldedIdx].Rec;461462// Unfolding code generates a load/store instruction according to the size of463// the register in the register form instruction.464// If the register's size is greater than the memory's operand size, do not465// allow unfolding.466467// the unfolded load size will be based on the register size. If that’s bigger468// than the memory operand size, the unfolded load will load more memory and469// potentially cause a memory fault.470if (getRegOperandSize(RegOpRec) > getMemOperandSize(MemOpRec))471Result.NoReverse = true;472473// Check no-kz version's isMoveReg474StringRef RegInstName = RegRec->getName();475unsigned DropLen =476RegInstName.ends_with("rkz") ? 2 : (RegInstName.ends_with("rk") ? 1 : 0);477Record *BaseDef =478DropLen ? Records.getDef(RegInstName.drop_back(DropLen)) : nullptr;479bool IsMoveReg =480BaseDef ? Target.getInstruction(BaseDef).isMoveReg : RegInst->isMoveReg;481// A masked load can not be unfolded to a full load, otherwise it would access482// unexpected memory. A simple store can not be unfolded.483if (IsMoveReg && (BaseDef || Result.FoldStore))484Result.NoReverse = true;485486uint8_t Enc = byteFromBitsInit(RegRec->getValueAsBitsInit("OpEncBits"));487if (isExplicitAlign(RegInst)) {488// The instruction require explicitly aligned memory.489BitsInit *VectSize = RegRec->getValueAsBitsInit("VectSize");490Result.Alignment = Align(byteFromBitsInit(VectSize));491} else if (!Enc && !isExplicitUnalign(RegInst) &&492getMemOperandSize(MemOpRec) > 64) {493// Instructions with XOP/VEX/EVEX encoding do not require alignment while494// SSE packed vector instructions require a 16 byte alignment.495Result.Alignment = Align(16);496}497// Expand is only ever created as a masked instruction. It is not safe to498// unfold a masked expand because we don't know if it came from an expand load499// intrinsic or folding a plain load. If it is from a expand load intrinsic,500// Unfolding to plain load would read more elements and could trigger a fault.501if (RegRec->getName().contains("EXPAND"))502Result.NoReverse = true;503504Table[RegInst] = Result;505}506507void X86FoldTablesEmitter::addBroadcastEntry(508FoldTable &Table, const CodeGenInstruction *RegInst,509const CodeGenInstruction *MemInst) {510511assert(Table.find(RegInst) == Table.end() && "Override entry unexpectedly");512X86FoldTableEntry Result = X86FoldTableEntry(RegInst, MemInst);513514DagInit *In = MemInst->TheDef->getValueAsDag("InOperandList");515for (unsigned I = 0, E = In->getNumArgs(); I != E; ++I) {516Result.BroadcastKind =517StringSwitch<X86FoldTableEntry::BcastType>(In->getArg(I)->getAsString())518.Case("i16mem", X86FoldTableEntry::BCAST_W)519.Case("i32mem", X86FoldTableEntry::BCAST_D)520.Case("i64mem", X86FoldTableEntry::BCAST_Q)521.Case("f16mem", X86FoldTableEntry::BCAST_SH)522.Case("f32mem", X86FoldTableEntry::BCAST_SS)523.Case("f64mem", X86FoldTableEntry::BCAST_SD)524.Default(X86FoldTableEntry::BCAST_NONE);525if (Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE)526break;527}528assert(Result.BroadcastKind != X86FoldTableEntry::BCAST_NONE &&529"Unknown memory operand for broadcast");530531Table[RegInst] = Result;532}533534void X86FoldTablesEmitter::updateTables(const CodeGenInstruction *RegInst,535const CodeGenInstruction *MemInst,536uint16_t S, bool IsManual,537bool IsBroadcast) {538539Record *RegRec = RegInst->TheDef;540Record *MemRec = MemInst->TheDef;541unsigned MemOutSize = MemRec->getValueAsDag("OutOperandList")->getNumArgs();542unsigned RegOutSize = RegRec->getValueAsDag("OutOperandList")->getNumArgs();543unsigned MemInSize = MemRec->getValueAsDag("InOperandList")->getNumArgs();544unsigned RegInSize = RegRec->getValueAsDag("InOperandList")->getNumArgs();545546// Instructions which Read-Modify-Write should be added to Table2Addr.547if (!MemOutSize && RegOutSize == 1 && MemInSize == RegInSize) {548assert(!IsBroadcast && "Read-Modify-Write can not be broadcast");549// X86 would not unfold Read-Modify-Write instructions so add TB_NO_REVERSE.550addEntryWithFlags(Table2Addr, RegInst, MemInst, S | TB_NO_REVERSE, 0,551IsManual);552return;553}554555// Only table0 entries should explicitly specify a load or store flag.556// If the instruction writes to the folded operand, it will appear as557// an output in the register form instruction and as an input in the558// memory form instruction. If the instruction reads from the folded559// operand, it will appear as in input in both forms.560if (MemInSize == RegInSize && MemOutSize == RegOutSize) {561// Load-Folding cases.562// If the i'th register form operand is a register and the i'th memory form563// operand is a memory operand, add instructions to Table#i.564for (unsigned I = RegOutSize, E = RegInst->Operands.size(); I < E; I++) {565Record *RegOpRec = RegInst->Operands[I].Rec;566Record *MemOpRec = MemInst->Operands[I].Rec;567// PointerLikeRegClass: For instructions like TAILJMPr, TAILJMPr64,568// TAILJMPr64_REX569if ((isRegisterOperand(RegOpRec) ||570RegOpRec->isSubClassOf("PointerLikeRegClass")) &&571isMemoryOperand(MemOpRec)) {572switch (I) {573case 0:574assert(!IsBroadcast && "BroadcastTable0 needs to be added");575addEntryWithFlags(Table0, RegInst, MemInst, S | TB_FOLDED_LOAD, 0,576IsManual);577return;578case 1:579IsBroadcast580? addBroadcastEntry(BroadcastTable1, RegInst, MemInst)581: addEntryWithFlags(Table1, RegInst, MemInst, S, 1, IsManual);582return;583case 2:584IsBroadcast585? addBroadcastEntry(BroadcastTable2, RegInst, MemInst)586: addEntryWithFlags(Table2, RegInst, MemInst, S, 2, IsManual);587return;588case 3:589IsBroadcast590? addBroadcastEntry(BroadcastTable3, RegInst, MemInst)591: addEntryWithFlags(Table3, RegInst, MemInst, S, 3, IsManual);592return;593case 4:594IsBroadcast595? addBroadcastEntry(BroadcastTable4, RegInst, MemInst)596: addEntryWithFlags(Table4, RegInst, MemInst, S, 4, IsManual);597return;598}599}600}601} else if (MemInSize == RegInSize + 1 && MemOutSize + 1 == RegOutSize) {602// Store-Folding cases.603// If the memory form instruction performs a store, the *output*604// register of the register form instructions disappear and instead a605// memory *input* operand appears in the memory form instruction.606// For example:607// MOVAPSrr => (outs VR128:$dst), (ins VR128:$src)608// MOVAPSmr => (outs), (ins f128mem:$dst, VR128:$src)609Record *RegOpRec = RegInst->Operands[RegOutSize - 1].Rec;610Record *MemOpRec = MemInst->Operands[RegOutSize - 1].Rec;611if (isRegisterOperand(RegOpRec) && isMemoryOperand(MemOpRec) &&612getRegOperandSize(RegOpRec) == getMemOperandSize(MemOpRec)) {613assert(!IsBroadcast && "Store can not be broadcast");614addEntryWithFlags(Table0, RegInst, MemInst, S | TB_FOLDED_STORE, 0,615IsManual);616}617}618}619620void X86FoldTablesEmitter::run(raw_ostream &OS) {621// Holds all memory instructions622std::vector<const CodeGenInstruction *> MemInsts;623// Holds all register instructions - divided according to opcode.624std::map<uint8_t, std::vector<const CodeGenInstruction *>> RegInsts;625626ArrayRef<const CodeGenInstruction *> NumberedInstructions =627Target.getInstructionsByEnumValue();628629for (const CodeGenInstruction *Inst : NumberedInstructions) {630const Record *Rec = Inst->TheDef;631if (!Rec->isSubClassOf("X86Inst") || Rec->getValueAsBit("isAsmParserOnly"))632continue;633634if (NoFoldSet.find(Rec->getName()) != NoFoldSet.end())635continue;636637// Promoted legacy instruction is in EVEX space, and has REX2-encoding638// alternative. It's added due to HW design and never emitted by compiler.639if (byteFromBitsInit(Rec->getValueAsBitsInit("OpMapBits")) ==640X86Local::T_MAP4 &&641byteFromBitsInit(Rec->getValueAsBitsInit("explicitOpPrefixBits")) ==642X86Local::ExplicitEVEX)643continue;644645// - Instructions including RST register class operands are not relevant646// for memory folding (for further details check the explanation in647// lib/Target/X86/X86InstrFPStack.td file).648// - Some instructions (listed in the manual map above) use the register649// class ptr_rc_tailcall, which can be of a size 32 or 64, to ensure650// safe mapping of these instruction we manually map them and exclude651// them from the automation.652if (hasRSTRegClass(Inst) || hasPtrTailcallRegClass(Inst))653continue;654655// Add all the memory form instructions to MemInsts, and all the register656// form instructions to RegInsts[Opc], where Opc is the opcode of each657// instructions. this helps reducing the runtime of the backend.658const BitsInit *FormBits = Rec->getValueAsBitsInit("FormBits");659uint8_t Form = byteFromBitsInit(FormBits);660if (mayFoldToForm(Form))661MemInsts.push_back(Inst);662else if (mayFoldFromForm(Form)) {663uint8_t Opc = byteFromBitsInit(Rec->getValueAsBitsInit("Opcode"));664RegInsts[Opc].push_back(Inst);665}666}667668// Create a copy b/c the register instruction will removed when a new entry is669// added into memory fold tables.670auto RegInstsForBroadcast = RegInsts;671672Record *AsmWriter = Target.getAsmWriter();673unsigned Variant = AsmWriter->getValueAsInt("Variant");674auto FixUp = [&](const CodeGenInstruction *RegInst) {675StringRef RegInstName = RegInst->TheDef->getName();676if (RegInstName.ends_with("_REV") || RegInstName.ends_with("_alt"))677if (auto *RegAltRec = Records.getDef(RegInstName.drop_back(4)))678RegInst = &Target.getInstruction(RegAltRec);679return RegInst;680};681// For each memory form instruction, try to find its register form682// instruction.683for (const CodeGenInstruction *MemInst : MemInsts) {684uint8_t Opc =685byteFromBitsInit(MemInst->TheDef->getValueAsBitsInit("Opcode"));686687auto RegInstsIt = RegInsts.find(Opc);688if (RegInstsIt == RegInsts.end())689continue;690691// Two forms (memory & register) of the same instruction must have the same692// opcode.693std::vector<const CodeGenInstruction *> &OpcRegInsts = RegInstsIt->second;694695// Memory fold tables696auto Match =697find_if(OpcRegInsts, IsMatch(MemInst, /*IsBroadcast=*/false, Variant));698if (Match != OpcRegInsts.end()) {699updateTables(FixUp(*Match), MemInst);700OpcRegInsts.erase(Match);701}702703// Broadcast tables704StringRef MemInstName = MemInst->TheDef->getName();705if (!MemInstName.contains("mb") && !MemInstName.contains("mib"))706continue;707RegInstsIt = RegInstsForBroadcast.find(Opc);708assert(RegInstsIt != RegInstsForBroadcast.end() &&709"Unexpected control flow");710std::vector<const CodeGenInstruction *> &OpcRegInstsForBroadcast =711RegInstsIt->second;712Match = find_if(OpcRegInstsForBroadcast,713IsMatch(MemInst, /*IsBroadcast=*/true, Variant));714if (Match != OpcRegInstsForBroadcast.end()) {715updateTables(FixUp(*Match), MemInst, 0, /*IsManual=*/false,716/*IsBroadcast=*/true);717OpcRegInstsForBroadcast.erase(Match);718}719}720721// Add the manually mapped instructions listed above.722for (const ManualMapEntry &Entry : ManualMapSet) {723Record *RegInstIter = Records.getDef(Entry.RegInstStr);724Record *MemInstIter = Records.getDef(Entry.MemInstStr);725726updateTables(&(Target.getInstruction(RegInstIter)),727&(Target.getInstruction(MemInstIter)), Entry.Strategy, true);728}729730#ifndef NDEBUG731auto CheckMemFoldTable = [](const FoldTable &Table) -> void {732for (const auto &Record : Table) {733auto &FoldEntry = Record.second;734FoldEntry.checkCorrectness();735}736};737CheckMemFoldTable(Table2Addr);738CheckMemFoldTable(Table0);739CheckMemFoldTable(Table1);740CheckMemFoldTable(Table2);741CheckMemFoldTable(Table3);742CheckMemFoldTable(Table4);743CheckMemFoldTable(BroadcastTable1);744CheckMemFoldTable(BroadcastTable2);745CheckMemFoldTable(BroadcastTable3);746CheckMemFoldTable(BroadcastTable4);747#endif748#define PRINT_TABLE(TABLE) printTable(TABLE, #TABLE, OS);749// Print all tables.750PRINT_TABLE(Table2Addr)751PRINT_TABLE(Table0)752PRINT_TABLE(Table1)753PRINT_TABLE(Table2)754PRINT_TABLE(Table3)755PRINT_TABLE(Table4)756PRINT_TABLE(BroadcastTable1)757PRINT_TABLE(BroadcastTable2)758PRINT_TABLE(BroadcastTable3)759PRINT_TABLE(BroadcastTable4)760}761762static TableGen::Emitter::OptClass<X86FoldTablesEmitter>763X("gen-x86-fold-tables", "Generate X86 fold tables");764765766