Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/peg_generator/pegen/parser_generator.py
12 views
1
import ast
2
import contextlib
3
import re
4
from abc import abstractmethod
5
from typing import (
6
IO,
7
AbstractSet,
8
Any,
9
Dict,
10
Iterable,
11
Iterator,
12
List,
13
Optional,
14
Set,
15
Text,
16
Tuple,
17
Union,
18
)
19
20
from pegen import sccutils
21
from pegen.grammar import (
22
Alt,
23
Cut,
24
Forced,
25
Gather,
26
Grammar,
27
GrammarError,
28
GrammarVisitor,
29
Group,
30
Lookahead,
31
NamedItem,
32
NameLeaf,
33
Opt,
34
Plain,
35
Repeat0,
36
Repeat1,
37
Rhs,
38
Rule,
39
StringLeaf,
40
)
41
42
43
class RuleCollectorVisitor(GrammarVisitor):
44
"""Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes"""
45
46
def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
47
self.rulses = rules
48
self.callmaker = callmakervisitor
49
50
def visit_Rule(self, rule: Rule) -> None:
51
self.visit(rule.flatten())
52
53
def visit_NamedItem(self, item: NamedItem) -> None:
54
self.callmaker.visit(item)
55
56
57
class KeywordCollectorVisitor(GrammarVisitor):
58
"""Visitor that collects all the keywods and soft keywords in the Grammar"""
59
60
def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]):
61
self.generator = gen
62
self.keywords = keywords
63
self.soft_keywords = soft_keywords
64
65
def visit_StringLeaf(self, node: StringLeaf) -> None:
66
val = ast.literal_eval(node.value)
67
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
68
if node.value.endswith("'") and node.value not in self.keywords:
69
self.keywords[val] = self.generator.keyword_type()
70
else:
71
return self.soft_keywords.add(node.value.replace('"', ""))
72
73
74
class RuleCheckingVisitor(GrammarVisitor):
75
def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
76
self.rules = rules
77
self.tokens = tokens
78
79
def visit_NameLeaf(self, node: NameLeaf) -> None:
80
if node.value not in self.rules and node.value not in self.tokens:
81
raise GrammarError(f"Dangling reference to rule {node.value!r}")
82
83
def visit_NamedItem(self, node: NamedItem) -> None:
84
if node.name and node.name.startswith("_"):
85
raise GrammarError(f"Variable names cannot start with underscore: '{node.name}'")
86
self.visit(node.item)
87
88
89
class ParserGenerator:
90
callmakervisitor: GrammarVisitor
91
92
def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
93
self.grammar = grammar
94
self.tokens = tokens
95
self.keywords: Dict[str, int] = {}
96
self.soft_keywords: Set[str] = set()
97
self.rules = grammar.rules
98
self.validate_rule_names()
99
if "trailer" not in grammar.metas and "start" not in self.rules:
100
raise GrammarError("Grammar without a trailer must have a 'start' rule")
101
checker = RuleCheckingVisitor(self.rules, self.tokens)
102
for rule in self.rules.values():
103
checker.visit(rule)
104
self.file = file
105
self.level = 0
106
self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
107
self.counter = 0 # For name_rule()/name_loop()
108
self.keyword_counter = 499 # For keyword_type()
109
self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules
110
self._local_variable_stack: List[List[str]] = []
111
112
def validate_rule_names(self) -> None:
113
for rule in self.rules:
114
if rule.startswith("_"):
115
raise GrammarError(f"Rule names cannot start with underscore: '{rule}'")
116
117
@contextlib.contextmanager
118
def local_variable_context(self) -> Iterator[None]:
119
self._local_variable_stack.append([])
120
yield
121
self._local_variable_stack.pop()
122
123
@property
124
def local_variable_names(self) -> List[str]:
125
return self._local_variable_stack[-1]
126
127
@abstractmethod
128
def generate(self, filename: str) -> None:
129
raise NotImplementedError
130
131
@contextlib.contextmanager
132
def indent(self) -> Iterator[None]:
133
self.level += 1
134
try:
135
yield
136
finally:
137
self.level -= 1
138
139
def print(self, *args: object) -> None:
140
if not args:
141
print(file=self.file)
142
else:
143
print(" " * self.level, end="", file=self.file)
144
print(*args, file=self.file)
145
146
def printblock(self, lines: str) -> None:
147
for line in lines.splitlines():
148
self.print(line)
149
150
def collect_rules(self) -> None:
151
keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords)
152
for rule in self.all_rules.values():
153
keyword_collector.visit(rule)
154
155
rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor)
156
done: Set[str] = set()
157
while True:
158
computed_rules = list(self.all_rules)
159
todo = [i for i in computed_rules if i not in done]
160
if not todo:
161
break
162
done = set(self.all_rules)
163
for rulename in todo:
164
rule_collector.visit(self.all_rules[rulename])
165
166
def keyword_type(self) -> int:
167
self.keyword_counter += 1
168
return self.keyword_counter
169
170
def artifical_rule_from_rhs(self, rhs: Rhs) -> str:
171
self.counter += 1
172
name = f"_tmp_{self.counter}" # TODO: Pick a nicer name.
173
self.all_rules[name] = Rule(name, None, rhs)
174
return name
175
176
def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str:
177
self.counter += 1
178
if is_repeat1:
179
prefix = "_loop1_"
180
else:
181
prefix = "_loop0_"
182
name = f"{prefix}{self.counter}"
183
self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
184
return name
185
186
def artifical_rule_from_gather(self, node: Gather) -> str:
187
self.counter += 1
188
name = f"_gather_{self.counter}"
189
self.counter += 1
190
extra_function_name = f"_loop0_{self.counter}"
191
extra_function_alt = Alt(
192
[NamedItem(None, node.separator), NamedItem("elem", node.node)],
193
action="elem",
194
)
195
self.all_rules[extra_function_name] = Rule(
196
extra_function_name,
197
None,
198
Rhs([extra_function_alt]),
199
)
200
alt = Alt(
201
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
202
)
203
self.all_rules[name] = Rule(
204
name,
205
None,
206
Rhs([alt]),
207
)
208
return name
209
210
def dedupe(self, name: str) -> str:
211
origname = name
212
counter = 0
213
while name in self.local_variable_names:
214
counter += 1
215
name = f"{origname}_{counter}"
216
self.local_variable_names.append(name)
217
return name
218
219
220
class NullableVisitor(GrammarVisitor):
221
def __init__(self, rules: Dict[str, Rule]) -> None:
222
self.rules = rules
223
self.visited: Set[Any] = set()
224
self.nullables: Set[Union[Rule, NamedItem]] = set()
225
226
def visit_Rule(self, rule: Rule) -> bool:
227
if rule in self.visited:
228
return False
229
self.visited.add(rule)
230
if self.visit(rule.rhs):
231
self.nullables.add(rule)
232
return rule in self.nullables
233
234
def visit_Rhs(self, rhs: Rhs) -> bool:
235
for alt in rhs.alts:
236
if self.visit(alt):
237
return True
238
return False
239
240
def visit_Alt(self, alt: Alt) -> bool:
241
for item in alt.items:
242
if not self.visit(item):
243
return False
244
return True
245
246
def visit_Forced(self, force: Forced) -> bool:
247
return True
248
249
def visit_LookAhead(self, lookahead: Lookahead) -> bool:
250
return True
251
252
def visit_Opt(self, opt: Opt) -> bool:
253
return True
254
255
def visit_Repeat0(self, repeat: Repeat0) -> bool:
256
return True
257
258
def visit_Repeat1(self, repeat: Repeat1) -> bool:
259
return False
260
261
def visit_Gather(self, gather: Gather) -> bool:
262
return False
263
264
def visit_Cut(self, cut: Cut) -> bool:
265
return False
266
267
def visit_Group(self, group: Group) -> bool:
268
return self.visit(group.rhs)
269
270
def visit_NamedItem(self, item: NamedItem) -> bool:
271
if self.visit(item.item):
272
self.nullables.add(item)
273
return item in self.nullables
274
275
def visit_NameLeaf(self, node: NameLeaf) -> bool:
276
if node.value in self.rules:
277
return self.visit(self.rules[node.value])
278
# Token or unknown; never empty.
279
return False
280
281
def visit_StringLeaf(self, node: StringLeaf) -> bool:
282
# The string token '' is considered empty.
283
return not node.value
284
285
286
def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
287
"""Compute which rules in a grammar are nullable.
288
289
Thanks to TatSu (tatsu/leftrec.py) for inspiration.
290
"""
291
nullable_visitor = NullableVisitor(rules)
292
for rule in rules.values():
293
nullable_visitor.visit(rule)
294
return nullable_visitor.nullables
295
296
297
class InitialNamesVisitor(GrammarVisitor):
298
def __init__(self, rules: Dict[str, Rule]) -> None:
299
self.rules = rules
300
self.nullables = compute_nullables(rules)
301
302
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]:
303
names: Set[str] = set()
304
for value in node:
305
if isinstance(value, list):
306
for item in value:
307
names |= self.visit(item, *args, **kwargs)
308
else:
309
names |= self.visit(value, *args, **kwargs)
310
return names
311
312
def visit_Alt(self, alt: Alt) -> Set[Any]:
313
names: Set[str] = set()
314
for item in alt.items:
315
names |= self.visit(item)
316
if item not in self.nullables:
317
break
318
return names
319
320
def visit_Forced(self, force: Forced) -> Set[Any]:
321
return set()
322
323
def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]:
324
return set()
325
326
def visit_Cut(self, cut: Cut) -> Set[Any]:
327
return set()
328
329
def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]:
330
return {node.value}
331
332
def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]:
333
return set()
334
335
336
def compute_left_recursives(
337
rules: Dict[str, Rule]
338
) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
339
graph = make_first_graph(rules)
340
sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
341
for scc in sccs:
342
if len(scc) > 1:
343
for name in scc:
344
rules[name].left_recursive = True
345
# Try to find a leader such that all cycles go through it.
346
leaders = set(scc)
347
for start in scc:
348
for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
349
# print("Cycle:", " -> ".join(cycle))
350
leaders -= scc - set(cycle)
351
if not leaders:
352
raise ValueError(
353
f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
354
)
355
# print("Leaders:", leaders)
356
leader = min(leaders) # Pick an arbitrary leader from the candidates.
357
rules[leader].leader = True
358
else:
359
name = min(scc) # The only element.
360
if name in graph[name]:
361
rules[name].left_recursive = True
362
rules[name].leader = True
363
return graph, sccs
364
365
366
def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
367
"""Compute the graph of left-invocations.
368
369
There's an edge from A to B if A may invoke B at its initial
370
position.
371
372
Note that this requires the nullable flags to have been computed.
373
"""
374
initial_name_visitor = InitialNamesVisitor(rules)
375
graph = {}
376
vertices: Set[str] = set()
377
for rulename, rhs in rules.items():
378
graph[rulename] = names = initial_name_visitor.visit(rhs)
379
vertices |= names
380
for vertex in vertices:
381
graph.setdefault(vertex, set())
382
return graph
383
384