Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/peg_generator/pegen/python_generator.py
12 views
1
import os.path
2
import token
3
from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple
4
5
from pegen import grammar
6
from pegen.grammar import (
7
Alt,
8
Cut,
9
Forced,
10
Gather,
11
GrammarVisitor,
12
Group,
13
Lookahead,
14
NamedItem,
15
NameLeaf,
16
NegativeLookahead,
17
Opt,
18
PositiveLookahead,
19
Repeat0,
20
Repeat1,
21
Rhs,
22
Rule,
23
StringLeaf,
24
)
25
from pegen.parser_generator import ParserGenerator
26
27
MODULE_PREFIX = """\
28
#!/usr/bin/env python3.8
29
# @generated by pegen from {filename}
30
31
import ast
32
import sys
33
import tokenize
34
35
from typing import Any, Optional
36
37
from pegen.parser import memoize, memoize_left_rec, logger, Parser
38
39
"""
40
MODULE_SUFFIX = """
41
42
if __name__ == '__main__':
43
from pegen.parser import simple_parser_main
44
simple_parser_main({class_name})
45
"""
46
47
48
class InvalidNodeVisitor(GrammarVisitor):
49
def visit_NameLeaf(self, node: NameLeaf) -> bool:
50
name = node.value
51
return name.startswith("invalid")
52
53
def visit_StringLeaf(self, node: StringLeaf) -> bool:
54
return False
55
56
def visit_NamedItem(self, node: NamedItem) -> bool:
57
return self.visit(node.item)
58
59
def visit_Rhs(self, node: Rhs) -> bool:
60
return any(self.visit(alt) for alt in node.alts)
61
62
def visit_Alt(self, node: Alt) -> bool:
63
return any(self.visit(item) for item in node.items)
64
65
def lookahead_call_helper(self, node: Lookahead) -> bool:
66
return self.visit(node.node)
67
68
def visit_PositiveLookahead(self, node: PositiveLookahead) -> bool:
69
return self.lookahead_call_helper(node)
70
71
def visit_NegativeLookahead(self, node: NegativeLookahead) -> bool:
72
return self.lookahead_call_helper(node)
73
74
def visit_Opt(self, node: Opt) -> bool:
75
return self.visit(node.node)
76
77
def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]:
78
return self.visit(node.node)
79
80
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
81
return self.visit(node.node)
82
83
def visit_Group(self, node: Group) -> bool:
84
return self.visit(node.rhs)
85
86
def visit_Cut(self, node: Cut) -> bool:
87
return False
88
89
def visit_Forced(self, node: Forced) -> bool:
90
return self.visit(node.node)
91
92
93
class PythonCallMakerVisitor(GrammarVisitor):
94
def __init__(self, parser_generator: ParserGenerator):
95
self.gen = parser_generator
96
self.cache: Dict[Any, Any] = {}
97
98
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
99
name = node.value
100
if name == "SOFT_KEYWORD":
101
return "soft_keyword", "self.soft_keyword()"
102
if name in ("NAME", "NUMBER", "STRING", "OP", "TYPE_COMMENT"):
103
name = name.lower()
104
return name, f"self.{name}()"
105
if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
106
# Avoid using names that can be Python keywords
107
return "_" + name.lower(), f"self.expect({name!r})"
108
return name, f"self.{name}()"
109
110
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
111
return "literal", f"self.expect({node.value})"
112
113
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
114
if node in self.cache:
115
return self.cache[node]
116
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
117
self.cache[node] = self.visit(node.alts[0].items[0])
118
else:
119
name = self.gen.artifical_rule_from_rhs(node)
120
self.cache[node] = name, f"self.{name}()"
121
return self.cache[node]
122
123
def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
124
name, call = self.visit(node.item)
125
if node.name:
126
name = node.name
127
return name, call
128
129
def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]:
130
name, call = self.visit(node.node)
131
head, tail = call.split("(", 1)
132
assert tail[-1] == ")"
133
tail = tail[:-1]
134
return head, tail
135
136
def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
137
head, tail = self.lookahead_call_helper(node)
138
return None, f"self.positive_lookahead({head}, {tail})"
139
140
def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
141
head, tail = self.lookahead_call_helper(node)
142
return None, f"self.negative_lookahead({head}, {tail})"
143
144
def visit_Opt(self, node: Opt) -> Tuple[str, str]:
145
name, call = self.visit(node.node)
146
# Note trailing comma (the call may already have one comma
147
# at the end, for example when rules have both repeat0 and optional
148
# markers, e.g: [rule*])
149
if call.endswith(","):
150
return "opt", call
151
else:
152
return "opt", f"{call},"
153
154
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
155
if node in self.cache:
156
return self.cache[node]
157
name = self.gen.artificial_rule_from_repeat(node.node, False)
158
self.cache[node] = name, f"self.{name}()," # Also a trailing comma!
159
return self.cache[node]
160
161
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
162
if node in self.cache:
163
return self.cache[node]
164
name = self.gen.artificial_rule_from_repeat(node.node, True)
165
self.cache[node] = name, f"self.{name}()" # But no trailing comma here!
166
return self.cache[node]
167
168
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
169
if node in self.cache:
170
return self.cache[node]
171
name = self.gen.artifical_rule_from_gather(node)
172
self.cache[node] = name, f"self.{name}()" # No trailing comma here either!
173
return self.cache[node]
174
175
def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
176
return self.visit(node.rhs)
177
178
def visit_Cut(self, node: Cut) -> Tuple[str, str]:
179
return "cut", "True"
180
181
def visit_Forced(self, node: Forced) -> Tuple[str, str]:
182
if isinstance(node.node, Group):
183
_, val = self.visit(node.node.rhs)
184
return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')"
185
else:
186
return (
187
"forced",
188
f"self.expect_forced(self.expect({node.node.value}), {node.node.value!r})",
189
)
190
191
192
class PythonParserGenerator(ParserGenerator, GrammarVisitor):
193
def __init__(
194
self,
195
grammar: grammar.Grammar,
196
file: Optional[IO[Text]],
197
tokens: Set[str] = set(token.tok_name.values()),
198
location_formatting: Optional[str] = None,
199
unreachable_formatting: Optional[str] = None,
200
):
201
tokens.add("SOFT_KEYWORD")
202
super().__init__(grammar, tokens, file)
203
self.callmakervisitor: PythonCallMakerVisitor = PythonCallMakerVisitor(self)
204
self.invalidvisitor: InvalidNodeVisitor = InvalidNodeVisitor()
205
self.unreachable_formatting = unreachable_formatting or "None # pragma: no cover"
206
self.location_formatting = (
207
location_formatting
208
or "lineno=start_lineno, col_offset=start_col_offset, "
209
"end_lineno=end_lineno, end_col_offset=end_col_offset"
210
)
211
212
def generate(self, filename: str) -> None:
213
self.collect_rules()
214
header = self.grammar.metas.get("header", MODULE_PREFIX)
215
if header is not None:
216
basename = os.path.basename(filename)
217
self.print(header.rstrip("\n").format(filename=basename))
218
subheader = self.grammar.metas.get("subheader", "")
219
if subheader:
220
self.print(subheader)
221
cls_name = self.grammar.metas.get("class", "GeneratedParser")
222
self.print("# Keywords and soft keywords are listed at the end of the parser definition.")
223
self.print(f"class {cls_name}(Parser):")
224
for rule in self.all_rules.values():
225
self.print()
226
with self.indent():
227
self.visit(rule)
228
229
self.print()
230
with self.indent():
231
self.print(f"KEYWORDS = {tuple(self.keywords)}")
232
self.print(f"SOFT_KEYWORDS = {tuple(self.soft_keywords)}")
233
234
trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name))
235
if trailer is not None:
236
self.print(trailer.rstrip("\n"))
237
238
def alts_uses_locations(self, alts: Sequence[Alt]) -> bool:
239
for alt in alts:
240
if alt.action and "LOCATIONS" in alt.action:
241
return True
242
for n in alt.items:
243
if isinstance(n.item, Group) and self.alts_uses_locations(n.item.rhs.alts):
244
return True
245
return False
246
247
def visit_Rule(self, node: Rule) -> None:
248
is_loop = node.is_loop()
249
is_gather = node.is_gather()
250
rhs = node.flatten()
251
if node.left_recursive:
252
if node.leader:
253
self.print("@memoize_left_rec")
254
else:
255
# Non-leader rules in a cycle are not memoized,
256
# but they must still be logged.
257
self.print("@logger")
258
else:
259
self.print("@memoize")
260
node_type = node.type or "Any"
261
self.print(f"def {node.name}(self) -> Optional[{node_type}]:")
262
with self.indent():
263
self.print(f"# {node.name}: {rhs}")
264
self.print("mark = self._mark()")
265
if self.alts_uses_locations(node.rhs.alts):
266
self.print("tok = self._tokenizer.peek()")
267
self.print("start_lineno, start_col_offset = tok.start")
268
if is_loop:
269
self.print("children = []")
270
self.visit(rhs, is_loop=is_loop, is_gather=is_gather)
271
if is_loop:
272
self.print("return children")
273
else:
274
self.print("return None")
275
276
def visit_NamedItem(self, node: NamedItem) -> None:
277
name, call = self.callmakervisitor.visit(node.item)
278
if node.name:
279
name = node.name
280
if not name:
281
self.print(call)
282
else:
283
if name != "cut":
284
name = self.dedupe(name)
285
self.print(f"({name} := {call})")
286
287
def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:
288
if is_loop:
289
assert len(node.alts) == 1
290
for alt in node.alts:
291
self.visit(alt, is_loop=is_loop, is_gather=is_gather)
292
293
def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
294
has_cut = any(isinstance(item.item, Cut) for item in node.items)
295
with self.local_variable_context():
296
if has_cut:
297
self.print("cut = False")
298
if is_loop:
299
self.print("while (")
300
else:
301
self.print("if (")
302
with self.indent():
303
first = True
304
for item in node.items:
305
if first:
306
first = False
307
else:
308
self.print("and")
309
self.visit(item)
310
if is_gather:
311
self.print("is not None")
312
313
self.print("):")
314
with self.indent():
315
action = node.action
316
if not action:
317
if is_gather:
318
assert len(self.local_variable_names) == 2
319
action = (
320
f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"
321
)
322
else:
323
if self.invalidvisitor.visit(node):
324
action = "UNREACHABLE"
325
elif len(self.local_variable_names) == 1:
326
action = f"{self.local_variable_names[0]}"
327
else:
328
action = f"[{', '.join(self.local_variable_names)}]"
329
elif "LOCATIONS" in action:
330
self.print("tok = self._tokenizer.get_last_non_whitespace_token()")
331
self.print("end_lineno, end_col_offset = tok.end")
332
action = action.replace("LOCATIONS", self.location_formatting)
333
334
if is_loop:
335
self.print(f"children.append({action})")
336
self.print(f"mark = self._mark()")
337
else:
338
if "UNREACHABLE" in action:
339
action = action.replace("UNREACHABLE", self.unreachable_formatting)
340
self.print(f"return {action}")
341
342
self.print("self._reset(mark)")
343
# Skip remaining alternatives if a cut was reached.
344
if has_cut:
345
self.print("if cut: return None")
346
347