Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
freebsd
GitHub Repository: freebsd/freebsd-src
Path: blob/main/contrib/llvm-project/llvm/utils/TableGen/DFAEmitter.cpp
35258 views
1
//===- DFAEmitter.cpp - Finite state automaton emitter --------------------===//
2
//
3
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4
// See https://llvm.org/LICENSE.txt for license information.
5
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6
//
7
//===----------------------------------------------------------------------===//
8
//
9
// This class can produce a generic deterministic finite state automaton (DFA),
10
// given a set of possible states and transitions.
11
//
12
// The input transitions can be nondeterministic - this class will produce the
13
// deterministic equivalent state machine.
14
//
15
// The generated code can run the DFA and produce an accepted / not accepted
16
// state and also produce, given a sequence of transitions that results in an
17
// accepted state, the sequence of intermediate states. This is useful if the
18
// initial automaton was nondeterministic - it allows mapping back from the DFA
19
// to the NFA.
20
//
21
//===----------------------------------------------------------------------===//
22
23
#include "DFAEmitter.h"
24
#include "Basic/SequenceToOffsetTable.h"
25
#include "llvm/ADT/SmallVector.h"
26
#include "llvm/ADT/StringExtras.h"
27
#include "llvm/ADT/UniqueVector.h"
28
#include "llvm/Support/Debug.h"
29
#include "llvm/Support/raw_ostream.h"
30
#include "llvm/TableGen/Record.h"
31
#include "llvm/TableGen/TableGenBackend.h"
32
#include <cassert>
33
#include <cstdint>
34
#include <deque>
35
#include <map>
36
#include <set>
37
#include <string>
38
#include <variant>
39
#include <vector>
40
41
#define DEBUG_TYPE "dfa-emitter"
42
43
using namespace llvm;
44
45
//===----------------------------------------------------------------------===//
46
// DfaEmitter implementation. This is independent of the GenAutomaton backend.
47
//===----------------------------------------------------------------------===//
48
49
void DfaEmitter::addTransition(state_type From, state_type To, action_type A) {
50
Actions.insert(A);
51
NfaStates.insert(From);
52
NfaStates.insert(To);
53
NfaTransitions[{From, A}].push_back(To);
54
++NumNfaTransitions;
55
}
56
57
void DfaEmitter::visitDfaState(const DfaState &DS) {
58
// For every possible action...
59
auto FromId = DfaStates.idFor(DS);
60
for (action_type A : Actions) {
61
DfaState NewStates;
62
DfaTransitionInfo TI;
63
// For every represented state, word pair in the original NFA...
64
for (state_type FromState : DS) {
65
// If this action is possible from this state add the transitioned-to
66
// states to NewStates.
67
auto I = NfaTransitions.find({FromState, A});
68
if (I == NfaTransitions.end())
69
continue;
70
for (state_type &ToState : I->second) {
71
NewStates.push_back(ToState);
72
TI.emplace_back(FromState, ToState);
73
}
74
}
75
if (NewStates.empty())
76
continue;
77
// Sort and unique.
78
sort(NewStates);
79
NewStates.erase(llvm::unique(NewStates), NewStates.end());
80
sort(TI);
81
TI.erase(llvm::unique(TI), TI.end());
82
unsigned ToId = DfaStates.insert(NewStates);
83
DfaTransitions.emplace(std::pair(FromId, A), std::pair(ToId, TI));
84
}
85
}
86
87
void DfaEmitter::constructDfa() {
88
DfaState Initial(1, /*NFA initial state=*/0);
89
DfaStates.insert(Initial);
90
91
// Note that UniqueVector starts indices at 1, not zero.
92
unsigned DfaStateId = 1;
93
while (DfaStateId <= DfaStates.size()) {
94
DfaState S = DfaStates[DfaStateId];
95
visitDfaState(S);
96
DfaStateId++;
97
}
98
}
99
100
void DfaEmitter::emit(StringRef Name, raw_ostream &OS) {
101
constructDfa();
102
103
OS << "// Input NFA has " << NfaStates.size() << " states with "
104
<< NumNfaTransitions << " transitions.\n";
105
OS << "// Generated DFA has " << DfaStates.size() << " states with "
106
<< DfaTransitions.size() << " transitions.\n\n";
107
108
// Implementation note: We don't bake a simple std::pair<> here as it requires
109
// significantly more effort to parse. A simple test with a large array of
110
// struct-pairs (N=100000) took clang-10 6s to parse. The same array of
111
// std::pair<uint64_t, uint64_t> took 242s. Instead we allow the user to
112
// define the pair type.
113
//
114
// FIXME: It may make sense to emit these as ULEB sequences instead of
115
// pairs of uint64_t.
116
OS << "// A zero-terminated sequence of NFA state transitions. Every DFA\n";
117
OS << "// transition implies a set of NFA transitions. These are referred\n";
118
OS << "// to by index in " << Name << "Transitions[].\n";
119
120
SequenceToOffsetTable<DfaTransitionInfo> Table;
121
std::map<DfaTransitionInfo, unsigned> EmittedIndices;
122
for (auto &T : DfaTransitions)
123
Table.add(T.second.second);
124
Table.layout();
125
OS << "const std::array<NfaStatePair, " << Table.size() << "> " << Name
126
<< "TransitionInfo = {{\n";
127
Table.emit(
128
OS,
129
[](raw_ostream &OS, std::pair<uint64_t, uint64_t> P) {
130
OS << "{" << P.first << ", " << P.second << "}";
131
},
132
"{0ULL, 0ULL}");
133
134
OS << "}};\n\n";
135
136
OS << "// A transition in the generated " << Name << " DFA.\n";
137
OS << "struct " << Name << "Transition {\n";
138
OS << " unsigned FromDfaState; // The transitioned-from DFA state.\n";
139
OS << " ";
140
printActionType(OS);
141
OS << " Action; // The input symbol that causes this transition.\n";
142
OS << " unsigned ToDfaState; // The transitioned-to DFA state.\n";
143
OS << " unsigned InfoIdx; // Start index into " << Name
144
<< "TransitionInfo.\n";
145
OS << "};\n\n";
146
147
OS << "// A table of DFA transitions, ordered by {FromDfaState, Action}.\n";
148
OS << "// The initial state is 1, not zero.\n";
149
OS << "const std::array<" << Name << "Transition, " << DfaTransitions.size()
150
<< "> " << Name << "Transitions = {{\n";
151
for (auto &KV : DfaTransitions) {
152
dfa_state_type From = KV.first.first;
153
dfa_state_type To = KV.second.first;
154
action_type A = KV.first.second;
155
unsigned InfoIdx = Table.get(KV.second.second);
156
OS << " {" << From << ", ";
157
printActionValue(A, OS);
158
OS << ", " << To << ", " << InfoIdx << "},\n";
159
}
160
OS << "\n}};\n\n";
161
}
162
163
void DfaEmitter::printActionType(raw_ostream &OS) { OS << "uint64_t"; }
164
165
void DfaEmitter::printActionValue(action_type A, raw_ostream &OS) { OS << A; }
166
167
//===----------------------------------------------------------------------===//
168
// AutomatonEmitter implementation
169
//===----------------------------------------------------------------------===//
170
171
namespace {
172
173
using Action = std::variant<Record *, unsigned, std::string>;
174
using ActionTuple = std::vector<Action>;
175
class Automaton;
176
177
class Transition {
178
uint64_t NewState;
179
// The tuple of actions that causes this transition.
180
ActionTuple Actions;
181
// The types of the actions; this is the same across all transitions.
182
SmallVector<std::string, 4> Types;
183
184
public:
185
Transition(Record *R, Automaton *Parent);
186
const ActionTuple &getActions() { return Actions; }
187
SmallVector<std::string, 4> getTypes() { return Types; }
188
189
bool canTransitionFrom(uint64_t State);
190
uint64_t transitionFrom(uint64_t State);
191
};
192
193
class Automaton {
194
RecordKeeper &Records;
195
Record *R;
196
std::vector<Transition> Transitions;
197
/// All possible action tuples, uniqued.
198
UniqueVector<ActionTuple> Actions;
199
/// The fields within each Transition object to find the action symbols.
200
std::vector<StringRef> ActionSymbolFields;
201
202
public:
203
Automaton(RecordKeeper &Records, Record *R);
204
void emit(raw_ostream &OS);
205
206
ArrayRef<StringRef> getActionSymbolFields() { return ActionSymbolFields; }
207
/// If the type of action A has been overridden (there exists a field
208
/// "TypeOf_A") return that, otherwise return the empty string.
209
StringRef getActionSymbolType(StringRef A);
210
};
211
212
class AutomatonEmitter {
213
RecordKeeper &Records;
214
215
public:
216
AutomatonEmitter(RecordKeeper &R) : Records(R) {}
217
void run(raw_ostream &OS);
218
};
219
220
/// A DfaEmitter implementation that can print our variant action type.
221
class CustomDfaEmitter : public DfaEmitter {
222
const UniqueVector<ActionTuple> &Actions;
223
std::string TypeName;
224
225
public:
226
CustomDfaEmitter(const UniqueVector<ActionTuple> &Actions, StringRef TypeName)
227
: Actions(Actions), TypeName(TypeName) {}
228
229
void printActionType(raw_ostream &OS) override;
230
void printActionValue(action_type A, raw_ostream &OS) override;
231
};
232
} // namespace
233
234
void AutomatonEmitter::run(raw_ostream &OS) {
235
for (Record *R : Records.getAllDerivedDefinitions("GenericAutomaton")) {
236
Automaton A(Records, R);
237
OS << "#ifdef GET_" << R->getName() << "_DECL\n";
238
A.emit(OS);
239
OS << "#endif // GET_" << R->getName() << "_DECL\n";
240
}
241
}
242
243
Automaton::Automaton(RecordKeeper &Records, Record *R)
244
: Records(Records), R(R) {
245
LLVM_DEBUG(dbgs() << "Emitting automaton for " << R->getName() << "\n");
246
ActionSymbolFields = R->getValueAsListOfStrings("SymbolFields");
247
}
248
249
void Automaton::emit(raw_ostream &OS) {
250
StringRef TransitionClass = R->getValueAsString("TransitionClass");
251
for (Record *T : Records.getAllDerivedDefinitions(TransitionClass)) {
252
assert(T->isSubClassOf("Transition"));
253
Transitions.emplace_back(T, this);
254
Actions.insert(Transitions.back().getActions());
255
}
256
257
LLVM_DEBUG(dbgs() << " Action alphabet cardinality: " << Actions.size()
258
<< "\n");
259
LLVM_DEBUG(dbgs() << " Each state has " << Transitions.size()
260
<< " potential transitions.\n");
261
262
StringRef Name = R->getName();
263
264
CustomDfaEmitter Emitter(Actions, std::string(Name) + "Action");
265
// Starting from the initial state, build up a list of possible states and
266
// transitions.
267
std::deque<uint64_t> Worklist(1, 0);
268
std::set<uint64_t> SeenStates;
269
unsigned NumTransitions = 0;
270
SeenStates.insert(Worklist.front());
271
while (!Worklist.empty()) {
272
uint64_t State = Worklist.front();
273
Worklist.pop_front();
274
for (Transition &T : Transitions) {
275
if (!T.canTransitionFrom(State))
276
continue;
277
uint64_t NewState = T.transitionFrom(State);
278
if (SeenStates.emplace(NewState).second)
279
Worklist.emplace_back(NewState);
280
++NumTransitions;
281
Emitter.addTransition(State, NewState, Actions.idFor(T.getActions()));
282
}
283
}
284
LLVM_DEBUG(dbgs() << " NFA automaton has " << SeenStates.size()
285
<< " states with " << NumTransitions << " transitions.\n");
286
(void)NumTransitions;
287
288
const auto &ActionTypes = Transitions.back().getTypes();
289
OS << "// The type of an action in the " << Name << " automaton.\n";
290
if (ActionTypes.size() == 1) {
291
OS << "using " << Name << "Action = " << ActionTypes[0] << ";\n";
292
} else {
293
OS << "using " << Name << "Action = std::tuple<" << join(ActionTypes, ", ")
294
<< ">;\n";
295
}
296
OS << "\n";
297
298
Emitter.emit(Name, OS);
299
}
300
301
StringRef Automaton::getActionSymbolType(StringRef A) {
302
Twine Ty = "TypeOf_" + A;
303
if (!R->getValue(Ty.str()))
304
return "";
305
return R->getValueAsString(Ty.str());
306
}
307
308
Transition::Transition(Record *R, Automaton *Parent) {
309
BitsInit *NewStateInit = R->getValueAsBitsInit("NewState");
310
NewState = 0;
311
assert(NewStateInit->getNumBits() <= sizeof(uint64_t) * 8 &&
312
"State cannot be represented in 64 bits!");
313
for (unsigned I = 0; I < NewStateInit->getNumBits(); ++I) {
314
if (auto *Bit = dyn_cast<BitInit>(NewStateInit->getBit(I))) {
315
if (Bit->getValue())
316
NewState |= 1ULL << I;
317
}
318
}
319
320
for (StringRef A : Parent->getActionSymbolFields()) {
321
RecordVal *SymbolV = R->getValue(A);
322
if (auto *Ty = dyn_cast<RecordRecTy>(SymbolV->getType())) {
323
Actions.emplace_back(R->getValueAsDef(A));
324
Types.emplace_back(Ty->getAsString());
325
} else if (isa<IntRecTy>(SymbolV->getType())) {
326
Actions.emplace_back(static_cast<unsigned>(R->getValueAsInt(A)));
327
Types.emplace_back("unsigned");
328
} else if (isa<StringRecTy>(SymbolV->getType())) {
329
Actions.emplace_back(std::string(R->getValueAsString(A)));
330
Types.emplace_back("std::string");
331
} else {
332
report_fatal_error("Unhandled symbol type!");
333
}
334
335
StringRef TypeOverride = Parent->getActionSymbolType(A);
336
if (!TypeOverride.empty())
337
Types.back() = std::string(TypeOverride);
338
}
339
}
340
341
bool Transition::canTransitionFrom(uint64_t State) {
342
if ((State & NewState) == 0)
343
// The bits we want to set are not set;
344
return true;
345
return false;
346
}
347
348
uint64_t Transition::transitionFrom(uint64_t State) { return State | NewState; }
349
350
void CustomDfaEmitter::printActionType(raw_ostream &OS) { OS << TypeName; }
351
352
void CustomDfaEmitter::printActionValue(action_type A, raw_ostream &OS) {
353
const ActionTuple &AT = Actions[A];
354
if (AT.size() > 1)
355
OS << "std::tuple(";
356
ListSeparator LS;
357
for (const auto &SingleAction : AT) {
358
OS << LS;
359
if (const auto *R = std::get_if<Record *>(&SingleAction))
360
OS << (*R)->getName();
361
else if (const auto *S = std::get_if<std::string>(&SingleAction))
362
OS << '"' << *S << '"';
363
else
364
OS << std::get<unsigned>(SingleAction);
365
}
366
if (AT.size() > 1)
367
OS << ")";
368
}
369
370
static TableGen::Emitter::OptClass<AutomatonEmitter>
371
X("gen-automata", "Generate generic automata");
372
373