Path: blob/main/contrib/llvm-project/llvm/utils/TableGen/DFAEmitter.cpp
35258 views
//===- DFAEmitter.cpp - Finite state automaton emitter --------------------===//1//2// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.3// See https://llvm.org/LICENSE.txt for license information.4// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception5//6//===----------------------------------------------------------------------===//7//8// This class can produce a generic deterministic finite state automaton (DFA),9// given a set of possible states and transitions.10//11// The input transitions can be nondeterministic - this class will produce the12// deterministic equivalent state machine.13//14// The generated code can run the DFA and produce an accepted / not accepted15// state and also produce, given a sequence of transitions that results in an16// accepted state, the sequence of intermediate states. This is useful if the17// initial automaton was nondeterministic - it allows mapping back from the DFA18// to the NFA.19//20//===----------------------------------------------------------------------===//2122#include "DFAEmitter.h"23#include "Basic/SequenceToOffsetTable.h"24#include "llvm/ADT/SmallVector.h"25#include "llvm/ADT/StringExtras.h"26#include "llvm/ADT/UniqueVector.h"27#include "llvm/Support/Debug.h"28#include "llvm/Support/raw_ostream.h"29#include "llvm/TableGen/Record.h"30#include "llvm/TableGen/TableGenBackend.h"31#include <cassert>32#include <cstdint>33#include <deque>34#include <map>35#include <set>36#include <string>37#include <variant>38#include <vector>3940#define DEBUG_TYPE "dfa-emitter"4142using namespace llvm;4344//===----------------------------------------------------------------------===//45// DfaEmitter implementation. This is independent of the GenAutomaton backend.46//===----------------------------------------------------------------------===//4748void DfaEmitter::addTransition(state_type From, state_type To, action_type A) {49Actions.insert(A);50NfaStates.insert(From);51NfaStates.insert(To);52NfaTransitions[{From, A}].push_back(To);53++NumNfaTransitions;54}5556void DfaEmitter::visitDfaState(const DfaState &DS) {57// For every possible action...58auto FromId = DfaStates.idFor(DS);59for (action_type A : Actions) {60DfaState NewStates;61DfaTransitionInfo TI;62// For every represented state, word pair in the original NFA...63for (state_type FromState : DS) {64// If this action is possible from this state add the transitioned-to65// states to NewStates.66auto I = NfaTransitions.find({FromState, A});67if (I == NfaTransitions.end())68continue;69for (state_type &ToState : I->second) {70NewStates.push_back(ToState);71TI.emplace_back(FromState, ToState);72}73}74if (NewStates.empty())75continue;76// Sort and unique.77sort(NewStates);78NewStates.erase(llvm::unique(NewStates), NewStates.end());79sort(TI);80TI.erase(llvm::unique(TI), TI.end());81unsigned ToId = DfaStates.insert(NewStates);82DfaTransitions.emplace(std::pair(FromId, A), std::pair(ToId, TI));83}84}8586void DfaEmitter::constructDfa() {87DfaState Initial(1, /*NFA initial state=*/0);88DfaStates.insert(Initial);8990// Note that UniqueVector starts indices at 1, not zero.91unsigned DfaStateId = 1;92while (DfaStateId <= DfaStates.size()) {93DfaState S = DfaStates[DfaStateId];94visitDfaState(S);95DfaStateId++;96}97}9899void DfaEmitter::emit(StringRef Name, raw_ostream &OS) {100constructDfa();101102OS << "// Input NFA has " << NfaStates.size() << " states with "103<< NumNfaTransitions << " transitions.\n";104OS << "// Generated DFA has " << DfaStates.size() << " states with "105<< DfaTransitions.size() << " transitions.\n\n";106107// Implementation note: We don't bake a simple std::pair<> here as it requires108// significantly more effort to parse. A simple test with a large array of109// struct-pairs (N=100000) took clang-10 6s to parse. The same array of110// std::pair<uint64_t, uint64_t> took 242s. Instead we allow the user to111// define the pair type.112//113// FIXME: It may make sense to emit these as ULEB sequences instead of114// pairs of uint64_t.115OS << "// A zero-terminated sequence of NFA state transitions. Every DFA\n";116OS << "// transition implies a set of NFA transitions. These are referred\n";117OS << "// to by index in " << Name << "Transitions[].\n";118119SequenceToOffsetTable<DfaTransitionInfo> Table;120std::map<DfaTransitionInfo, unsigned> EmittedIndices;121for (auto &T : DfaTransitions)122Table.add(T.second.second);123Table.layout();124OS << "const std::array<NfaStatePair, " << Table.size() << "> " << Name125<< "TransitionInfo = {{\n";126Table.emit(127OS,128[](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) {129OS << "{" << P.first << ", " << P.second << "}";130},131"{0ULL, 0ULL}");132133OS << "}};\n\n";134135OS << "// A transition in the generated " << Name << " DFA.\n";136OS << "struct " << Name << "Transition {\n";137OS << " unsigned FromDfaState; // The transitioned-from DFA state.\n";138OS << " ";139printActionType(OS);140OS << " Action; // The input symbol that causes this transition.\n";141OS << " unsigned ToDfaState; // The transitioned-to DFA state.\n";142OS << " unsigned InfoIdx; // Start index into " << Name143<< "TransitionInfo.\n";144OS << "};\n\n";145146OS << "// A table of DFA transitions, ordered by {FromDfaState, Action}.\n";147OS << "// The initial state is 1, not zero.\n";148OS << "const std::array<" << Name << "Transition, " << DfaTransitions.size()149<< "> " << Name << "Transitions = {{\n";150for (auto &KV : DfaTransitions) {151dfa_state_type From = KV.first.first;152dfa_state_type To = KV.second.first;153action_type A = KV.first.second;154unsigned InfoIdx = Table.get(KV.second.second);155OS << " {" << From << ", ";156printActionValue(A, OS);157OS << ", " << To << ", " << InfoIdx << "},\n";158}159OS << "\n}};\n\n";160}161162void DfaEmitter::printActionType(raw_ostream &OS) { OS << "uint64_t"; }163164void DfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << A; }165166//===----------------------------------------------------------------------===//167// AutomatonEmitter implementation168//===----------------------------------------------------------------------===//169170namespace {171172using Action = std::variant<Record *, unsigned, std::string>;173using ActionTuple = std::vector<Action>;174class Automaton;175176class Transition {177uint64_t NewState;178// The tuple of actions that causes this transition.179ActionTuple Actions;180// The types of the actions; this is the same across all transitions.181SmallVector<std::string, 4> Types;182183public:184Transition(Record *R, Automaton *Parent);185const ActionTuple &getActions() { return Actions; }186SmallVector<std::string, 4> getTypes() { return Types; }187188bool canTransitionFrom(uint64_t State);189uint64_t transitionFrom(uint64_t State);190};191192class Automaton {193RecordKeeper &Records;194Record *R;195std::vector<Transition> Transitions;196/// All possible action tuples, uniqued.197UniqueVector<ActionTuple> Actions;198/// The fields within each Transition object to find the action symbols.199std::vector<StringRef> ActionSymbolFields;200201public:202Automaton(RecordKeeper &Records, Record *R);203void emit(raw_ostream &OS);204205ArrayRef<StringRef> getActionSymbolFields() { return ActionSymbolFields; }206/// If the type of action A has been overridden (there exists a field207/// "TypeOf_A") return that, otherwise return the empty string.208StringRef getActionSymbolType(StringRef A);209};210211class AutomatonEmitter {212RecordKeeper &Records;213214public:215AutomatonEmitter(RecordKeeper &R) : Records(R) {}216void run(raw_ostream &OS);217};218219/// A DfaEmitter implementation that can print our variant action type.220class CustomDfaEmitter : public DfaEmitter {221const UniqueVector<ActionTuple> &Actions;222std::string TypeName;223224public:225CustomDfaEmitter(const UniqueVector<ActionTuple> &Actions, StringRef TypeName)226: Actions(Actions), TypeName(TypeName) {}227228void printActionType(raw_ostream &OS) override;229void printActionValue(action_type A, raw_ostream &OS) override;230};231} // namespace232233void AutomatonEmitter::run(raw_ostream &OS) {234for (Record *R : Records.getAllDerivedDefinitions("GenericAutomaton")) {235Automaton A(Records, R);236OS << "#ifdef GET_" << R->getName() << "_DECL\n";237A.emit(OS);238OS << "#endif // GET_" << R->getName() << "_DECL\n";239}240}241242Automaton::Automaton(RecordKeeper &Records, Record *R)243: Records(Records), R(R) {244LLVM_DEBUG(dbgs() << "Emitting automaton for " << R->getName() << "\n");245ActionSymbolFields = R->getValueAsListOfStrings("SymbolFields");246}247248void Automaton::emit(raw_ostream &OS) {249StringRef TransitionClass = R->getValueAsString("TransitionClass");250for (Record *T : Records.getAllDerivedDefinitions(TransitionClass)) {251assert(T->isSubClassOf("Transition"));252Transitions.emplace_back(T, this);253Actions.insert(Transitions.back().getActions());254}255256LLVM_DEBUG(dbgs() << " Action alphabet cardinality: " << Actions.size()257<< "\n");258LLVM_DEBUG(dbgs() << " Each state has " << Transitions.size()259<< " potential transitions.\n");260261StringRef Name = R->getName();262263CustomDfaEmitter Emitter(Actions, std::string(Name) + "Action");264// Starting from the initial state, build up a list of possible states and265// transitions.266std::deque<uint64_t> Worklist(1, 0);267std::set<uint64_t> SeenStates;268unsigned NumTransitions = 0;269SeenStates.insert(Worklist.front());270while (!Worklist.empty()) {271uint64_t State = Worklist.front();272Worklist.pop_front();273for (Transition &T : Transitions) {274if (!T.canTransitionFrom(State))275continue;276uint64_t NewState = T.transitionFrom(State);277if (SeenStates.emplace(NewState).second)278Worklist.emplace_back(NewState);279++NumTransitions;280Emitter.addTransition(State, NewState, Actions.idFor(T.getActions()));281}282}283LLVM_DEBUG(dbgs() << " NFA automaton has " << SeenStates.size()284<< " states with " << NumTransitions << " transitions.\n");285(void)NumTransitions;286287const auto &ActionTypes = Transitions.back().getTypes();288OS << "// The type of an action in the " << Name << " automaton.\n";289if (ActionTypes.size() == 1) {290OS << "using " << Name << "Action = " << ActionTypes[0] << ";\n";291} else {292OS << "using " << Name << "Action = std::tuple<" << join(ActionTypes, ", ")293<< ">;\n";294}295OS << "\n";296297Emitter.emit(Name, OS);298}299300StringRef Automaton::getActionSymbolType(StringRef A) {301Twine Ty = "TypeOf_" + A;302if (!R->getValue(Ty.str()))303return "";304return R->getValueAsString(Ty.str());305}306307Transition::Transition(Record *R, Automaton *Parent) {308BitsInit *NewStateInit = R->getValueAsBitsInit("NewState");309NewState = 0;310assert(NewStateInit->getNumBits() <= sizeof(uint64_t) * 8 &&311"State cannot be represented in 64 bits!");312for (unsigned I = 0; I < NewStateInit->getNumBits(); ++I) {313if (auto *Bit = dyn_cast<BitInit>(NewStateInit->getBit(I))) {314if (Bit->getValue())315NewState |= 1ULL << I;316}317}318319for (StringRef A : Parent->getActionSymbolFields()) {320RecordVal *SymbolV = R->getValue(A);321if (auto *Ty = dyn_cast<RecordRecTy>(SymbolV->getType())) {322Actions.emplace_back(R->getValueAsDef(A));323Types.emplace_back(Ty->getAsString());324} else if (isa<IntRecTy>(SymbolV->getType())) {325Actions.emplace_back(static_cast<unsigned>(R->getValueAsInt(A)));326Types.emplace_back("unsigned");327} else if (isa<StringRecTy>(SymbolV->getType())) {328Actions.emplace_back(std::string(R->getValueAsString(A)));329Types.emplace_back("std::string");330} else {331report_fatal_error("Unhandled symbol type!");332}333334StringRef TypeOverride = Parent->getActionSymbolType(A);335if (!TypeOverride.empty())336Types.back() = std::string(TypeOverride);337}338}339340bool Transition::canTransitionFrom(uint64_t State) {341if ((State & NewState) == 0)342// The bits we want to set are not set;343return true;344return false;345}346347uint64_t Transition::transitionFrom(uint64_t State) { return State | NewState; }348349void CustomDfaEmitter::printActionType(raw_ostream &OS) { OS << TypeName; }350351void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) {352const ActionTuple &AT = Actions[A];353if (AT.size() > 1)354OS << "std::tuple(";355ListSeparator LS;356for (const auto &SingleAction : AT) {357OS << LS;358if (const auto *R = std::get_if<Record *>(&SingleAction))359OS << (*R)->getName();360else if (const auto *S = std::get_if<std::string>(&SingleAction))361OS << '"' << *S << '"';362else363OS << std::get<unsigned>(SingleAction);364}365if (AT.size() > 1)366OS << ")";367}368369static TableGen::Emitter::OptClass<AutomatonEmitter>370X("gen-automata", "Generate generic automata");371372373