Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/peg_generator/pegen/build.py
12 views
1
import itertools
2
import os
3
import pathlib
4
import sys
5
import sysconfig
6
import tempfile
7
import tokenize
8
from typing import IO, Dict, List, Optional, Set, Tuple
9
10
from pegen.c_generator import CParserGenerator
11
from pegen.grammar import Grammar
12
from pegen.grammar_parser import GeneratedParser as GrammarParser
13
from pegen.parser import Parser
14
from pegen.parser_generator import ParserGenerator
15
from pegen.python_generator import PythonParserGenerator
16
from pegen.tokenizer import Tokenizer
17
18
MOD_DIR = pathlib.Path(__file__).resolve().parent
19
20
TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
21
22
23
def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
24
flags = sysconfig.get_config_var(compiler_flags)
25
py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
26
if flags is None or py_flags_nodist is None:
27
return []
28
return f"{flags} {py_flags_nodist}".split()
29
30
31
def fixup_build_ext(cmd):
32
"""Function needed to make build_ext tests pass.
33
34
When Python was built with --enable-shared on Unix, -L. is not enough to
35
find libpython<blah>.so, because regrtest runs in a tempdir, not in the
36
source directory where the .so lives.
37
38
When Python was built with in debug mode on Windows, build_ext commands
39
need their debug attribute set, and it is not done automatically for
40
some reason.
41
42
This function handles both of these things. Example use:
43
44
cmd = build_ext(dist)
45
support.fixup_build_ext(cmd)
46
cmd.ensure_finalized()
47
48
Unlike most other Unix platforms, Mac OS X embeds absolute paths
49
to shared libraries into executables, so the fixup is not needed there.
50
51
Taken from distutils (was part of the CPython stdlib until Python 3.11)
52
"""
53
if os.name == "nt":
54
cmd.debug = sys.executable.endswith("_d.exe")
55
elif sysconfig.get_config_var("Py_ENABLE_SHARED"):
56
# To further add to the shared builds fun on Unix, we can't just add
57
# library_dirs to the Extension() instance because that doesn't get
58
# plumbed through to the final compiler command.
59
runshared = sysconfig.get_config_var("RUNSHARED")
60
if runshared is None:
61
cmd.library_dirs = ["."]
62
else:
63
if sys.platform == "darwin":
64
cmd.library_dirs = []
65
else:
66
name, equals, value = runshared.partition("=")
67
cmd.library_dirs = [d for d in value.split(os.pathsep) if d]
68
69
70
def compile_c_extension(
71
generated_source_path: str,
72
build_dir: Optional[str] = None,
73
verbose: bool = False,
74
keep_asserts: bool = True,
75
disable_optimization: bool = False,
76
library_dir: Optional[str] = None,
77
) -> str:
78
"""Compile the generated source for a parser generator into an extension module.
79
80
The extension module will be generated in the same directory as the provided path
81
for the generated source, with the same basename (in addition to extension module
82
metadata). For example, for the source mydir/parser.c the generated extension
83
in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so.
84
85
If *build_dir* is provided, that path will be used as the temporary build directory
86
of distutils (this is useful in case you want to use a temporary directory).
87
88
If *library_dir* is provided, that path will be used as the directory for a
89
static library of the common parser sources (this is useful in case you are
90
creating multiple extensions).
91
"""
92
import setuptools.logging
93
94
from setuptools import Extension, Distribution
95
from setuptools._distutils.dep_util import newer_group
96
from setuptools._distutils.ccompiler import new_compiler
97
from setuptools._distutils.sysconfig import customize_compiler
98
99
if verbose:
100
setuptools.logging.set_threshold(setuptools.logging.logging.DEBUG)
101
102
source_file_path = pathlib.Path(generated_source_path)
103
extension_name = source_file_path.stem
104
extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
105
extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
106
# Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
107
extra_compile_args.append("-D_Py_TEST_PEGEN")
108
extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
109
if keep_asserts:
110
extra_compile_args.append("-UNDEBUG")
111
if disable_optimization:
112
if sys.platform == "win32":
113
extra_compile_args.append("/Od")
114
extra_link_args.append("/LTCG:OFF")
115
else:
116
extra_compile_args.append("-O0")
117
if sysconfig.get_config_var("GNULD") == "yes":
118
extra_link_args.append("-fno-lto")
119
120
common_sources = [
121
str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
122
str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
123
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
124
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
125
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
126
str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
127
str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
128
str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
129
]
130
include_dirs = [
131
str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
132
str(MOD_DIR.parent.parent.parent / "Parser"),
133
]
134
extension = Extension(
135
extension_name,
136
sources=[generated_source_path],
137
extra_compile_args=extra_compile_args,
138
extra_link_args=extra_link_args,
139
)
140
dist = Distribution({"name": extension_name, "ext_modules": [extension]})
141
cmd = dist.get_command_obj("build_ext")
142
fixup_build_ext(cmd)
143
cmd.build_lib = str(source_file_path.parent)
144
cmd.include_dirs = include_dirs
145
if build_dir:
146
cmd.build_temp = build_dir
147
cmd.ensure_finalized()
148
149
compiler = new_compiler()
150
customize_compiler(compiler)
151
compiler.set_include_dirs(cmd.include_dirs)
152
compiler.set_library_dirs(cmd.library_dirs)
153
# build static lib
154
if library_dir:
155
library_filename = compiler.library_filename(extension_name, output_dir=library_dir)
156
if newer_group(common_sources, library_filename, "newer"):
157
if sys.platform == "win32":
158
pdb = compiler.static_lib_format % (extension_name, ".pdb")
159
compile_opts = [f"/Fd{library_dir}\\{pdb}"]
160
compile_opts.extend(extra_compile_args)
161
else:
162
compile_opts = extra_compile_args
163
objects = compiler.compile(
164
common_sources,
165
output_dir=library_dir,
166
debug=cmd.debug,
167
extra_postargs=compile_opts,
168
)
169
compiler.create_static_lib(
170
objects, extension_name, output_dir=library_dir, debug=cmd.debug
171
)
172
if sys.platform == "win32":
173
compiler.add_library_dir(library_dir)
174
extension.libraries = [extension_name]
175
elif sys.platform == "darwin":
176
compiler.set_link_objects(
177
[
178
"-Wl,-force_load",
179
library_filename,
180
]
181
)
182
else:
183
compiler.set_link_objects(
184
[
185
"-Wl,--whole-archive",
186
library_filename,
187
"-Wl,--no-whole-archive",
188
]
189
)
190
else:
191
extension.sources[0:0] = common_sources
192
193
# Compile the source code to object files.
194
ext_path = cmd.get_ext_fullpath(extension_name)
195
if newer_group(extension.sources, ext_path, "newer"):
196
objects = compiler.compile(
197
extension.sources,
198
output_dir=cmd.build_temp,
199
debug=cmd.debug,
200
extra_postargs=extra_compile_args,
201
)
202
else:
203
objects = compiler.object_filenames(extension.sources, output_dir=cmd.build_temp)
204
# Now link the object files together into a "shared object"
205
compiler.link_shared_object(
206
objects,
207
ext_path,
208
libraries=cmd.get_libraries(extension),
209
extra_postargs=extra_link_args,
210
export_symbols=cmd.get_export_symbols(extension),
211
debug=cmd.debug,
212
build_temp=cmd.build_temp,
213
)
214
215
return pathlib.Path(ext_path)
216
217
218
def build_parser(
219
grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
220
) -> Tuple[Grammar, Parser, Tokenizer]:
221
with open(grammar_file) as file:
222
tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
223
parser = GrammarParser(tokenizer, verbose=verbose_parser)
224
grammar = parser.start()
225
226
if not grammar:
227
raise parser.make_syntax_error(grammar_file)
228
229
return grammar, parser, tokenizer
230
231
232
def generate_token_definitions(tokens: IO[str]) -> TokenDefinitions:
233
all_tokens = {}
234
exact_tokens = {}
235
non_exact_tokens = set()
236
numbers = itertools.count(0)
237
238
for line in tokens:
239
line = line.strip()
240
241
if not line or line.startswith("#"):
242
continue
243
244
pieces = line.split()
245
index = next(numbers)
246
247
if len(pieces) == 1:
248
(token,) = pieces
249
non_exact_tokens.add(token)
250
all_tokens[index] = token
251
elif len(pieces) == 2:
252
token, op = pieces
253
exact_tokens[op.strip("'")] = index
254
all_tokens[index] = token
255
else:
256
raise ValueError(f"Unexpected line found in Tokens file: {line}")
257
258
return all_tokens, exact_tokens, non_exact_tokens
259
260
261
def build_c_generator(
262
grammar: Grammar,
263
grammar_file: str,
264
tokens_file: str,
265
output_file: str,
266
compile_extension: bool = False,
267
verbose_c_extension: bool = False,
268
keep_asserts_in_extension: bool = True,
269
skip_actions: bool = False,
270
) -> ParserGenerator:
271
with open(tokens_file, "r") as tok_file:
272
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
273
with open(output_file, "w") as file:
274
gen: ParserGenerator = CParserGenerator(
275
grammar, all_tokens, exact_tok, non_exact_tok, file, skip_actions=skip_actions
276
)
277
gen.generate(grammar_file)
278
279
if compile_extension:
280
with tempfile.TemporaryDirectory() as build_dir:
281
compile_c_extension(
282
output_file,
283
build_dir=build_dir,
284
verbose=verbose_c_extension,
285
keep_asserts=keep_asserts_in_extension,
286
)
287
return gen
288
289
290
def build_python_generator(
291
grammar: Grammar,
292
grammar_file: str,
293
output_file: str,
294
skip_actions: bool = False,
295
) -> ParserGenerator:
296
with open(output_file, "w") as file:
297
gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions
298
gen.generate(grammar_file)
299
return gen
300
301
302
def build_c_parser_and_generator(
303
grammar_file: str,
304
tokens_file: str,
305
output_file: str,
306
compile_extension: bool = False,
307
verbose_tokenizer: bool = False,
308
verbose_parser: bool = False,
309
verbose_c_extension: bool = False,
310
keep_asserts_in_extension: bool = True,
311
skip_actions: bool = False,
312
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
313
"""Generate rules, C parser, tokenizer, parser generator for a given grammar
314
315
Args:
316
grammar_file (string): Path for the grammar file
317
tokens_file (string): Path for the tokens file
318
output_file (string): Path for the output file
319
compile_extension (bool, optional): Whether to compile the C extension.
320
Defaults to False.
321
verbose_tokenizer (bool, optional): Whether to display additional output
322
when generating the tokenizer. Defaults to False.
323
verbose_parser (bool, optional): Whether to display additional output
324
when generating the parser. Defaults to False.
325
verbose_c_extension (bool, optional): Whether to display additional
326
output when compiling the C extension . Defaults to False.
327
keep_asserts_in_extension (bool, optional): Whether to keep the assert statements
328
when compiling the extension module. Defaults to True.
329
skip_actions (bool, optional): Whether to pretend no rule has any actions.
330
"""
331
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
332
gen = build_c_generator(
333
grammar,
334
grammar_file,
335
tokens_file,
336
output_file,
337
compile_extension,
338
verbose_c_extension,
339
keep_asserts_in_extension,
340
skip_actions=skip_actions,
341
)
342
343
return grammar, parser, tokenizer, gen
344
345
346
def build_python_parser_and_generator(
347
grammar_file: str,
348
output_file: str,
349
verbose_tokenizer: bool = False,
350
verbose_parser: bool = False,
351
skip_actions: bool = False,
352
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
353
"""Generate rules, python parser, tokenizer, parser generator for a given grammar
354
355
Args:
356
grammar_file (string): Path for the grammar file
357
output_file (string): Path for the output file
358
verbose_tokenizer (bool, optional): Whether to display additional output
359
when generating the tokenizer. Defaults to False.
360
verbose_parser (bool, optional): Whether to display additional output
361
when generating the parser. Defaults to False.
362
skip_actions (bool, optional): Whether to pretend no rule has any actions.
363
"""
364
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
365
gen = build_python_generator(
366
grammar,
367
grammar_file,
368
output_file,
369
skip_actions=skip_actions,
370
)
371
return grammar, parser, tokenizer, gen
372
373