Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/peg_generator/pegen/__main__.py
12 views
1
#!/usr/bin/env python3.8
2
3
"""pegen -- PEG Generator.
4
5
Search the web for PEG Parsers for reference.
6
"""
7
8
import argparse
9
import sys
10
import time
11
import token
12
import traceback
13
from typing import Tuple
14
15
from pegen.build import Grammar, Parser, ParserGenerator, Tokenizer
16
from pegen.validator import validate_grammar
17
18
19
def generate_c_code(
20
args: argparse.Namespace,
21
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
22
from pegen.build import build_c_parser_and_generator
23
24
verbose = args.verbose
25
verbose_tokenizer = verbose >= 3
26
verbose_parser = verbose == 2 or verbose >= 4
27
try:
28
grammar, parser, tokenizer, gen = build_c_parser_and_generator(
29
args.grammar_filename,
30
args.tokens_filename,
31
args.output,
32
args.compile_extension,
33
verbose_tokenizer,
34
verbose_parser,
35
args.verbose,
36
keep_asserts_in_extension=False if args.optimized else True,
37
skip_actions=args.skip_actions,
38
)
39
return grammar, parser, tokenizer, gen
40
except Exception as err:
41
if args.verbose:
42
raise # Show traceback
43
traceback.print_exception(err.__class__, err, None)
44
sys.stderr.write("For full traceback, use -v\n")
45
sys.exit(1)
46
47
48
def generate_python_code(
49
args: argparse.Namespace,
50
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
51
from pegen.build import build_python_parser_and_generator
52
53
verbose = args.verbose
54
verbose_tokenizer = verbose >= 3
55
verbose_parser = verbose == 2 or verbose >= 4
56
try:
57
grammar, parser, tokenizer, gen = build_python_parser_and_generator(
58
args.grammar_filename,
59
args.output,
60
verbose_tokenizer,
61
verbose_parser,
62
skip_actions=args.skip_actions,
63
)
64
return grammar, parser, tokenizer, gen
65
except Exception as err:
66
if args.verbose:
67
raise # Show traceback
68
traceback.print_exception(err.__class__, err, None)
69
sys.stderr.write("For full traceback, use -v\n")
70
sys.exit(1)
71
72
73
argparser = argparse.ArgumentParser(
74
prog="pegen", description="Experimental PEG-like parser generator"
75
)
76
argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar")
77
argparser.add_argument(
78
"-v",
79
"--verbose",
80
action="count",
81
default=0,
82
help="Print timing stats; repeat for more debug output",
83
)
84
subparsers = argparser.add_subparsers(help="target language for the generated code")
85
86
c_parser = subparsers.add_parser("c", help="Generate C code for inclusion into CPython")
87
c_parser.set_defaults(func=generate_c_code)
88
c_parser.add_argument("grammar_filename", help="Grammar description")
89
c_parser.add_argument("tokens_filename", help="Tokens description")
90
c_parser.add_argument(
91
"-o", "--output", metavar="OUT", default="parse.c", help="Where to write the generated parser"
92
)
93
c_parser.add_argument(
94
"--compile-extension",
95
action="store_true",
96
help="Compile generated C code into an extension module",
97
)
98
c_parser.add_argument(
99
"--optimized", action="store_true", help="Compile the extension in optimized mode"
100
)
101
c_parser.add_argument(
102
"--skip-actions",
103
action="store_true",
104
help="Suppress code emission for rule actions",
105
)
106
107
python_parser = subparsers.add_parser("python", help="Generate Python code")
108
python_parser.set_defaults(func=generate_python_code)
109
python_parser.add_argument("grammar_filename", help="Grammar description")
110
python_parser.add_argument(
111
"-o",
112
"--output",
113
metavar="OUT",
114
default="parse.py",
115
help="Where to write the generated parser",
116
)
117
python_parser.add_argument(
118
"--skip-actions",
119
action="store_true",
120
help="Suppress code emission for rule actions",
121
)
122
123
124
def main() -> None:
125
from pegen.testutil import print_memstats
126
127
args = argparser.parse_args()
128
if "func" not in args:
129
argparser.error("Must specify the target language mode ('c' or 'python')")
130
131
t0 = time.time()
132
grammar, parser, tokenizer, gen = args.func(args)
133
t1 = time.time()
134
135
validate_grammar(grammar)
136
137
if not args.quiet:
138
if args.verbose:
139
print("Raw Grammar:")
140
for line in repr(grammar).splitlines():
141
print(" ", line)
142
143
print("Clean Grammar:")
144
for line in str(grammar).splitlines():
145
print(" ", line)
146
147
if args.verbose:
148
print("First Graph:")
149
for src, dsts in gen.first_graph.items():
150
print(f" {src} -> {', '.join(dsts)}")
151
print("First SCCS:")
152
for scc in gen.first_sccs:
153
print(" ", scc, end="")
154
if len(scc) > 1:
155
print(
156
" # Indirectly left-recursive; leaders:",
157
{name for name in scc if grammar.rules[name].leader},
158
)
159
else:
160
name = next(iter(scc))
161
if name in gen.first_graph[name]:
162
print(" # Left-recursive")
163
else:
164
print()
165
166
if args.verbose:
167
dt = t1 - t0
168
diag = tokenizer.diagnose()
169
nlines = diag.end[0]
170
if diag.type == token.ENDMARKER:
171
nlines -= 1
172
print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
173
if dt:
174
print(f"; {nlines / dt:.0f} lines/sec")
175
else:
176
print()
177
print("Caches sizes:")
178
print(f" token array : {len(tokenizer._tokens):10}")
179
print(f" cache : {len(parser._cache):10}")
180
if not print_memstats():
181
print("(Can't find psutil; install it for memory stats.)")
182
183
184
if __name__ == "__main__":
185
if sys.version_info < (3, 8):
186
print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr)
187
sys.exit(1)
188
main()
189
190