Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/peg_generator/pegen/first_sets.py
12 views
1
#!/usr/bin/env python3.8
2
3
import argparse
4
import pprint
5
import sys
6
from typing import Dict, Set
7
8
from pegen.build import build_parser
9
from pegen.grammar import (
10
Alt,
11
Cut,
12
Gather,
13
GrammarVisitor,
14
Group,
15
Lookahead,
16
NamedItem,
17
NameLeaf,
18
NegativeLookahead,
19
Opt,
20
Repeat0,
21
Repeat1,
22
Rhs,
23
Rule,
24
StringLeaf,
25
)
26
from pegen.parser_generator import compute_nullables
27
28
argparser = argparse.ArgumentParser(
29
prog="calculate_first_sets",
30
description="Calculate the first sets of a grammar",
31
)
32
argparser.add_argument("grammar_file", help="The grammar file")
33
34
35
class FirstSetCalculator(GrammarVisitor):
36
def __init__(self, rules: Dict[str, Rule]) -> None:
37
self.rules = rules
38
self.nullables = compute_nullables(rules)
39
self.first_sets: Dict[str, Set[str]] = dict()
40
self.in_process: Set[str] = set()
41
42
def calculate(self) -> Dict[str, Set[str]]:
43
for name, rule in self.rules.items():
44
self.visit(rule)
45
return self.first_sets
46
47
def visit_Alt(self, item: Alt) -> Set[str]:
48
result: Set[str] = set()
49
to_remove: Set[str] = set()
50
for other in item.items:
51
new_terminals = self.visit(other)
52
if isinstance(other.item, NegativeLookahead):
53
to_remove |= new_terminals
54
result |= new_terminals
55
if to_remove:
56
result -= to_remove
57
58
# If the set of new terminals can start with the empty string,
59
# it means that the item is completely nullable and we should
60
# also considering at least the next item in case the current
61
# one fails to parse.
62
63
if "" in new_terminals:
64
continue
65
66
if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)):
67
break
68
69
# Do not allow the empty string to propagate.
70
result.discard("")
71
72
return result
73
74
def visit_Cut(self, item: Cut) -> Set[str]:
75
return set()
76
77
def visit_Group(self, item: Group) -> Set[str]:
78
return self.visit(item.rhs)
79
80
def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
81
return self.visit(item.node)
82
83
def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
84
return self.visit(item.node)
85
86
def visit_NamedItem(self, item: NamedItem) -> Set[str]:
87
return self.visit(item.item)
88
89
def visit_Opt(self, item: Opt) -> Set[str]:
90
return self.visit(item.node)
91
92
def visit_Gather(self, item: Gather) -> Set[str]:
93
return self.visit(item.node)
94
95
def visit_Repeat0(self, item: Repeat0) -> Set[str]:
96
return self.visit(item.node)
97
98
def visit_Repeat1(self, item: Repeat1) -> Set[str]:
99
return self.visit(item.node)
100
101
def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
102
if item.value not in self.rules:
103
return {item.value}
104
105
if item.value not in self.first_sets:
106
self.first_sets[item.value] = self.visit(self.rules[item.value])
107
return self.first_sets[item.value]
108
elif item.value in self.in_process:
109
return set()
110
111
return self.first_sets[item.value]
112
113
def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
114
return {item.value}
115
116
def visit_Rhs(self, item: Rhs) -> Set[str]:
117
result: Set[str] = set()
118
for alt in item.alts:
119
result |= self.visit(alt)
120
return result
121
122
def visit_Rule(self, item: Rule) -> Set[str]:
123
if item.name in self.in_process:
124
return set()
125
elif item.name not in self.first_sets:
126
self.in_process.add(item.name)
127
terminals = self.visit(item.rhs)
128
if item in self.nullables:
129
terminals.add("")
130
self.first_sets[item.name] = terminals
131
self.in_process.remove(item.name)
132
return self.first_sets[item.name]
133
134
135
def main() -> None:
136
args = argparser.parse_args()
137
138
try:
139
grammar, parser, tokenizer = build_parser(args.grammar_file)
140
except Exception as err:
141
print("ERROR: Failed to parse grammar file", file=sys.stderr)
142
sys.exit(1)
143
144
firs_sets = FirstSetCalculator(grammar.rules).calculate()
145
pprint.pprint(firs_sets)
146
147
148
if __name__ == "__main__":
149
main()
150
151