Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/build/deepfreeze.py
12 views
1
"""Deep freeze
2
3
The script may be executed by _bootstrap_python interpreter.
4
Shared library extension modules are not available in that case.
5
On Windows, and in cross-compilation cases, it is executed
6
by Python 3.10, and 3.11 features are not available.
7
"""
8
import argparse
9
import ast
10
import builtins
11
import collections
12
import contextlib
13
import os
14
import re
15
import time
16
import types
17
from typing import Dict, FrozenSet, TextIO, Tuple
18
19
import umarshal
20
from generate_global_objects import get_identifiers_and_strings
21
22
verbose = False
23
identifiers, strings = get_identifiers_and_strings()
24
25
# This must be kept in sync with opcode.py
26
RESUME = 151
27
28
def isprintable(b: bytes) -> bool:
29
return all(0x20 <= c < 0x7f for c in b)
30
31
32
def make_string_literal(b: bytes) -> str:
33
res = ['"']
34
if isprintable(b):
35
res.append(b.decode("ascii").replace("\\", "\\\\").replace("\"", "\\\""))
36
else:
37
for i in b:
38
res.append(f"\\x{i:02x}")
39
res.append('"')
40
return "".join(res)
41
42
43
CO_FAST_LOCAL = 0x20
44
CO_FAST_CELL = 0x40
45
CO_FAST_FREE = 0x80
46
47
next_code_version = 1
48
49
def get_localsplus(code: types.CodeType):
50
a = collections.defaultdict(int)
51
for name in code.co_varnames:
52
a[name] |= CO_FAST_LOCAL
53
for name in code.co_cellvars:
54
a[name] |= CO_FAST_CELL
55
for name in code.co_freevars:
56
a[name] |= CO_FAST_FREE
57
return tuple(a.keys()), bytes(a.values())
58
59
60
def get_localsplus_counts(code: types.CodeType,
61
names: Tuple[str, ...],
62
kinds: bytes) -> Tuple[int, int, int, int]:
63
nlocals = 0
64
ncellvars = 0
65
nfreevars = 0
66
assert len(names) == len(kinds)
67
for name, kind in zip(names, kinds):
68
if kind & CO_FAST_LOCAL:
69
nlocals += 1
70
if kind & CO_FAST_CELL:
71
ncellvars += 1
72
elif kind & CO_FAST_CELL:
73
ncellvars += 1
74
elif kind & CO_FAST_FREE:
75
nfreevars += 1
76
assert nlocals == len(code.co_varnames) == code.co_nlocals, \
77
(nlocals, len(code.co_varnames), code.co_nlocals)
78
assert ncellvars == len(code.co_cellvars)
79
assert nfreevars == len(code.co_freevars)
80
return nlocals, ncellvars, nfreevars
81
82
83
PyUnicode_1BYTE_KIND = 1
84
PyUnicode_2BYTE_KIND = 2
85
PyUnicode_4BYTE_KIND = 4
86
87
88
def analyze_character_width(s: str) -> Tuple[int, bool]:
89
maxchar = ' '
90
for c in s:
91
maxchar = max(maxchar, c)
92
ascii = False
93
if maxchar <= '\xFF':
94
kind = PyUnicode_1BYTE_KIND
95
ascii = maxchar <= '\x7F'
96
elif maxchar <= '\uFFFF':
97
kind = PyUnicode_2BYTE_KIND
98
else:
99
kind = PyUnicode_4BYTE_KIND
100
return kind, ascii
101
102
103
def removesuffix(base: str, suffix: str) -> str:
104
if base.endswith(suffix):
105
return base[:len(base) - len(suffix)]
106
return base
107
108
class Printer:
109
110
def __init__(self, file: TextIO) -> None:
111
self.level = 0
112
self.file = file
113
self.cache: Dict[tuple[type, object, str], str] = {}
114
self.hits, self.misses = 0, 0
115
self.finis: list[str] = []
116
self.inits: list[str] = []
117
self.write('#include "Python.h"')
118
self.write('#include "internal/pycore_gc.h"')
119
self.write('#include "internal/pycore_code.h"')
120
self.write('#include "internal/pycore_frame.h"')
121
self.write('#include "internal/pycore_long.h"')
122
self.write("")
123
124
@contextlib.contextmanager
125
def indent(self) -> None:
126
save_level = self.level
127
try:
128
self.level += 1
129
yield
130
finally:
131
self.level = save_level
132
133
def write(self, arg: str) -> None:
134
self.file.writelines((" "*self.level, arg, "\n"))
135
136
@contextlib.contextmanager
137
def block(self, prefix: str, suffix: str = "") -> None:
138
self.write(prefix + " {")
139
with self.indent():
140
yield
141
self.write("}" + suffix)
142
143
def object_head(self, typename: str) -> None:
144
with self.block(".ob_base =", ","):
145
self.write(f".ob_refcnt = _Py_IMMORTAL_REFCNT,")
146
self.write(f".ob_type = &{typename},")
147
148
def object_var_head(self, typename: str, size: int) -> None:
149
with self.block(".ob_base =", ","):
150
self.object_head(typename)
151
self.write(f".ob_size = {size},")
152
153
def field(self, obj: object, name: str) -> None:
154
self.write(f".{name} = {getattr(obj, name)},")
155
156
def generate_bytes(self, name: str, b: bytes) -> str:
157
if b == b"":
158
return "(PyObject *)&_Py_SINGLETON(bytes_empty)"
159
if len(b) == 1:
160
return f"(PyObject *)&_Py_SINGLETON(bytes_characters[{b[0]}])"
161
self.write("static")
162
with self.indent():
163
with self.block("struct"):
164
self.write("PyObject_VAR_HEAD")
165
self.write("Py_hash_t ob_shash;")
166
self.write(f"char ob_sval[{len(b) + 1}];")
167
with self.block(f"{name} =", ";"):
168
self.object_var_head("PyBytes_Type", len(b))
169
self.write(".ob_shash = -1,")
170
self.write(f".ob_sval = {make_string_literal(b)},")
171
return f"& {name}.ob_base.ob_base"
172
173
def generate_unicode(self, name: str, s: str) -> str:
174
if s in strings:
175
return f"&_Py_STR({strings[s]})"
176
if s in identifiers:
177
return f"&_Py_ID({s})"
178
if len(s) == 1:
179
c = ord(s)
180
if c < 128:
181
return f"(PyObject *)&_Py_SINGLETON(strings).ascii[{c}]"
182
elif c < 256:
183
return f"(PyObject *)&_Py_SINGLETON(strings).latin1[{c - 128}]"
184
if re.match(r'\A[A-Za-z0-9_]+\Z', s):
185
name = f"const_str_{s}"
186
kind, ascii = analyze_character_width(s)
187
if kind == PyUnicode_1BYTE_KIND:
188
datatype = "uint8_t"
189
elif kind == PyUnicode_2BYTE_KIND:
190
datatype = "uint16_t"
191
else:
192
datatype = "uint32_t"
193
self.write("static")
194
with self.indent():
195
with self.block("struct"):
196
if ascii:
197
self.write("PyASCIIObject _ascii;")
198
else:
199
self.write("PyCompactUnicodeObject _compact;")
200
self.write(f"{datatype} _data[{len(s)+1}];")
201
with self.block(f"{name} =", ";"):
202
if ascii:
203
with self.block("._ascii =", ","):
204
self.object_head("PyUnicode_Type")
205
self.write(f".length = {len(s)},")
206
self.write(".hash = -1,")
207
with self.block(".state =", ","):
208
self.write(".kind = 1,")
209
self.write(".compact = 1,")
210
self.write(".ascii = 1,")
211
self.write(f"._data = {make_string_literal(s.encode('ascii'))},")
212
return f"& {name}._ascii.ob_base"
213
else:
214
with self.block("._compact =", ","):
215
with self.block("._base =", ","):
216
self.object_head("PyUnicode_Type")
217
self.write(f".length = {len(s)},")
218
self.write(".hash = -1,")
219
with self.block(".state =", ","):
220
self.write(f".kind = {kind},")
221
self.write(".compact = 1,")
222
self.write(".ascii = 0,")
223
utf8 = s.encode('utf-8')
224
self.write(f'.utf8 = {make_string_literal(utf8)},')
225
self.write(f'.utf8_length = {len(utf8)},')
226
with self.block(f"._data =", ","):
227
for i in range(0, len(s), 16):
228
data = s[i:i+16]
229
self.write(", ".join(map(str, map(ord, data))) + ",")
230
return f"& {name}._compact._base.ob_base"
231
232
233
def generate_code(self, name: str, code: types.CodeType) -> str:
234
global next_code_version
235
# The ordering here matches PyCode_NewWithPosOnlyArgs()
236
# (but see below).
237
co_consts = self.generate(name + "_consts", code.co_consts)
238
co_names = self.generate(name + "_names", code.co_names)
239
co_filename = self.generate(name + "_filename", code.co_filename)
240
co_name = self.generate(name + "_name", code.co_name)
241
co_qualname = self.generate(name + "_qualname", code.co_qualname)
242
co_linetable = self.generate(name + "_linetable", code.co_linetable)
243
co_exceptiontable = self.generate(name + "_exceptiontable", code.co_exceptiontable)
244
# These fields are not directly accessible
245
localsplusnames, localspluskinds = get_localsplus(code)
246
co_localsplusnames = self.generate(name + "_localsplusnames", localsplusnames)
247
co_localspluskinds = self.generate(name + "_localspluskinds", localspluskinds)
248
# Derived values
249
nlocals, ncellvars, nfreevars = \
250
get_localsplus_counts(code, localsplusnames, localspluskinds)
251
co_code_adaptive = make_string_literal(code.co_code)
252
self.write("static")
253
with self.indent():
254
self.write(f"struct _PyCode_DEF({len(code.co_code)})")
255
with self.block(f"{name} =", ";"):
256
self.object_var_head("PyCode_Type", len(code.co_code) // 2)
257
# But the ordering here must match that in cpython/code.h
258
# (which is a pain because we tend to reorder those for perf)
259
# otherwise MSVC doesn't like it.
260
self.write(f".co_consts = {co_consts},")
261
self.write(f".co_names = {co_names},")
262
self.write(f".co_exceptiontable = {co_exceptiontable},")
263
self.field(code, "co_flags")
264
self.field(code, "co_argcount")
265
self.field(code, "co_posonlyargcount")
266
self.field(code, "co_kwonlyargcount")
267
# The following should remain in sync with _PyFrame_NumSlotsForCodeObject
268
self.write(f".co_framesize = {code.co_stacksize + len(localsplusnames)} + FRAME_SPECIALS_SIZE,")
269
self.field(code, "co_stacksize")
270
self.field(code, "co_firstlineno")
271
self.write(f".co_nlocalsplus = {len(localsplusnames)},")
272
self.field(code, "co_nlocals")
273
self.write(f".co_ncellvars = {ncellvars},")
274
self.write(f".co_nfreevars = {nfreevars},")
275
self.write(f".co_version = {next_code_version},")
276
next_code_version += 1
277
self.write(f".co_localsplusnames = {co_localsplusnames},")
278
self.write(f".co_localspluskinds = {co_localspluskinds},")
279
self.write(f".co_filename = {co_filename},")
280
self.write(f".co_name = {co_name},")
281
self.write(f".co_qualname = {co_qualname},")
282
self.write(f".co_linetable = {co_linetable},")
283
self.write(f"._co_cached = NULL,")
284
self.write(f".co_code_adaptive = {co_code_adaptive},")
285
for i, op in enumerate(code.co_code[::2]):
286
if op == RESUME:
287
self.write(f"._co_firsttraceable = {i},")
288
break
289
name_as_code = f"(PyCodeObject *)&{name}"
290
self.finis.append(f"_PyStaticCode_Fini({name_as_code});")
291
self.inits.append(f"_PyStaticCode_Init({name_as_code})")
292
return f"& {name}.ob_base.ob_base"
293
294
def generate_tuple(self, name: str, t: Tuple[object, ...]) -> str:
295
if len(t) == 0:
296
return f"(PyObject *)& _Py_SINGLETON(tuple_empty)"
297
items = [self.generate(f"{name}_{i}", it) for i, it in enumerate(t)]
298
self.write("static")
299
with self.indent():
300
with self.block("struct"):
301
self.write("PyGC_Head _gc_head;")
302
with self.block("struct", "_object;"):
303
self.write("PyObject_VAR_HEAD")
304
if t:
305
self.write(f"PyObject *ob_item[{len(t)}];")
306
with self.block(f"{name} =", ";"):
307
with self.block("._object =", ","):
308
self.object_var_head("PyTuple_Type", len(t))
309
if items:
310
with self.block(f".ob_item =", ","):
311
for item in items:
312
self.write(item + ",")
313
return f"& {name}._object.ob_base.ob_base"
314
315
def _generate_int_for_bits(self, name: str, i: int, digit: int) -> None:
316
sign = (i > 0) - (i < 0)
317
i = abs(i)
318
digits: list[int] = []
319
while i:
320
i, rem = divmod(i, digit)
321
digits.append(rem)
322
self.write("static")
323
with self.indent():
324
with self.block("struct"):
325
self.write("PyObject ob_base;")
326
self.write("uintptr_t lv_tag;")
327
self.write(f"digit ob_digit[{max(1, len(digits))}];")
328
with self.block(f"{name} =", ";"):
329
self.object_head("PyLong_Type")
330
self.write(f".lv_tag = TAG_FROM_SIGN_AND_SIZE({sign}, {len(digits)}),")
331
if digits:
332
ds = ", ".join(map(str, digits))
333
self.write(f".ob_digit = {{ {ds} }},")
334
335
def generate_int(self, name: str, i: int) -> str:
336
if -5 <= i <= 256:
337
return f"(PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS + {i}]"
338
if i >= 0:
339
name = f"const_int_{i}"
340
else:
341
name = f"const_int_negative_{abs(i)}"
342
if abs(i) < 2**15:
343
self._generate_int_for_bits(name, i, 2**15)
344
else:
345
connective = "if"
346
for bits_in_digit in 15, 30:
347
self.write(f"#{connective} PYLONG_BITS_IN_DIGIT == {bits_in_digit}")
348
self._generate_int_for_bits(name, i, 2**bits_in_digit)
349
connective = "elif"
350
self.write("#else")
351
self.write('#error "PYLONG_BITS_IN_DIGIT should be 15 or 30"')
352
self.write("#endif")
353
# If neither clause applies, it won't compile
354
return f"& {name}.ob_base"
355
356
def generate_float(self, name: str, x: float) -> str:
357
with self.block(f"static PyFloatObject {name} =", ";"):
358
self.object_head("PyFloat_Type")
359
self.write(f".ob_fval = {x},")
360
return f"&{name}.ob_base"
361
362
def generate_complex(self, name: str, z: complex) -> str:
363
with self.block(f"static PyComplexObject {name} =", ";"):
364
self.object_head("PyComplex_Type")
365
self.write(f".cval = {{ {z.real}, {z.imag} }},")
366
return f"&{name}.ob_base"
367
368
def generate_frozenset(self, name: str, fs: FrozenSet[object]) -> str:
369
try:
370
fs = sorted(fs)
371
except TypeError:
372
# frozen set with incompatible types, fallback to repr()
373
fs = sorted(fs, key=repr)
374
ret = self.generate_tuple(name, tuple(fs))
375
self.write("// TODO: The above tuple should be a frozenset")
376
return ret
377
378
def generate_file(self, module: str, code: object)-> None:
379
module = module.replace(".", "_")
380
self.generate(f"{module}_toplevel", code)
381
self.write(EPILOGUE.format(name=module))
382
383
def generate(self, name: str, obj: object) -> str:
384
# Use repr() in the key to distinguish -0.0 from +0.0
385
key = (type(obj), obj, repr(obj))
386
if key in self.cache:
387
self.hits += 1
388
# print(f"Cache hit {key!r:.40}: {self.cache[key]!r:.40}")
389
return self.cache[key]
390
self.misses += 1
391
if isinstance(obj, (types.CodeType, umarshal.Code)) :
392
val = self.generate_code(name, obj)
393
elif isinstance(obj, tuple):
394
val = self.generate_tuple(name, obj)
395
elif isinstance(obj, str):
396
val = self.generate_unicode(name, obj)
397
elif isinstance(obj, bytes):
398
val = self.generate_bytes(name, obj)
399
elif obj is True:
400
return "Py_True"
401
elif obj is False:
402
return "Py_False"
403
elif isinstance(obj, int):
404
val = self.generate_int(name, obj)
405
elif isinstance(obj, float):
406
val = self.generate_float(name, obj)
407
elif isinstance(obj, complex):
408
val = self.generate_complex(name, obj)
409
elif isinstance(obj, frozenset):
410
val = self.generate_frozenset(name, obj)
411
elif obj is builtins.Ellipsis:
412
return "Py_Ellipsis"
413
elif obj is None:
414
return "Py_None"
415
else:
416
raise TypeError(
417
f"Cannot generate code for {type(obj).__name__} object")
418
# print(f"Cache store {key!r:.40}: {val!r:.40}")
419
self.cache[key] = val
420
return val
421
422
423
EPILOGUE = """
424
PyObject *
425
_Py_get_{name}_toplevel(void)
426
{{
427
return Py_NewRef((PyObject *) &{name}_toplevel);
428
}}
429
"""
430
431
FROZEN_COMMENT_C = "/* Auto-generated by Programs/_freeze_module.c */"
432
FROZEN_COMMENT_PY = "/* Auto-generated by Programs/_freeze_module.py */"
433
434
FROZEN_DATA_LINE = r"\s*(\d+,\s*)+\s*"
435
436
437
def is_frozen_header(source: str) -> bool:
438
return source.startswith((FROZEN_COMMENT_C, FROZEN_COMMENT_PY))
439
440
441
def decode_frozen_data(source: str) -> types.CodeType:
442
lines = source.splitlines()
443
while lines and re.match(FROZEN_DATA_LINE, lines[0]) is None:
444
del lines[0]
445
while lines and re.match(FROZEN_DATA_LINE, lines[-1]) is None:
446
del lines[-1]
447
values: Tuple[int, ...] = ast.literal_eval("".join(lines).strip())
448
data = bytes(values)
449
return umarshal.loads(data)
450
451
452
def generate(args: list[str], output: TextIO) -> None:
453
printer = Printer(output)
454
for arg in args:
455
file, modname = arg.rsplit(':', 1)
456
with open(file, "r", encoding="utf8") as fd:
457
source = fd.read()
458
if is_frozen_header(source):
459
code = decode_frozen_data(source)
460
else:
461
code = compile(fd.read(), f"<frozen {modname}>", "exec")
462
printer.generate_file(modname, code)
463
with printer.block(f"void\n_Py_Deepfreeze_Fini(void)"):
464
for p in printer.finis:
465
printer.write(p)
466
with printer.block(f"int\n_Py_Deepfreeze_Init(void)"):
467
for p in printer.inits:
468
with printer.block(f"if ({p} < 0)"):
469
printer.write("return -1;")
470
printer.write("return 0;")
471
printer.write(f"\nuint32_t _Py_next_func_version = {next_code_version};\n")
472
if verbose:
473
print(f"Cache hits: {printer.hits}, misses: {printer.misses}")
474
475
476
parser = argparse.ArgumentParser()
477
parser.add_argument("-o", "--output", help="Defaults to deepfreeze.c", default="deepfreeze.c")
478
parser.add_argument("-v", "--verbose", action="store_true", help="Print diagnostics")
479
parser.add_argument('args', nargs="+", help="Input file and module name (required) in file:modname format")
480
481
@contextlib.contextmanager
482
def report_time(label: str):
483
t0 = time.time()
484
try:
485
yield
486
finally:
487
t1 = time.time()
488
if verbose:
489
print(f"{label}: {t1-t0:.3f} sec")
490
491
492
def main() -> None:
493
global verbose
494
args = parser.parse_args()
495
verbose = args.verbose
496
output = args.output
497
with open(output, "w", encoding="utf-8") as file:
498
with report_time("generate"):
499
generate(args.args, file)
500
if verbose:
501
print(f"Wrote {os.path.getsize(output)} bytes to {output}")
502
503
504
if __name__ == "__main__":
505
main()
506
507