Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/build/generate_global_objects.py
12 views
1
import contextlib
2
import io
3
import os.path
4
import re
5
6
SCRIPT_NAME = 'Tools/build/generate_global_objects.py'
7
__file__ = os.path.abspath(__file__)
8
ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
9
INTERNAL = os.path.join(ROOT, 'Include', 'internal')
10
11
12
IGNORED = {
13
'ACTION', # Python/_warnings.c
14
'ATTR', # Python/_warnings.c and Objects/funcobject.c
15
'DUNDER', # Objects/typeobject.c
16
'RDUNDER', # Objects/typeobject.c
17
'SPECIAL', # Objects/weakrefobject.c
18
'NAME', # Objects/typeobject.c
19
}
20
IDENTIFIERS = [
21
# from ADD() Python/_warnings.c
22
'default',
23
'ignore',
24
25
# from GET_WARNINGS_ATTR() in Python/_warnings.c
26
'WarningMessage',
27
'_showwarnmsg',
28
'_warn_unawaited_coroutine',
29
'defaultaction',
30
'filters',
31
'onceregistry',
32
33
# from WRAP_METHOD() in Objects/weakrefobject.c
34
'__bytes__',
35
'__reversed__',
36
37
# from COPY_ATTR() in Objects/funcobject.c
38
'__module__',
39
'__name__',
40
'__qualname__',
41
'__doc__',
42
'__annotations__',
43
44
# from SLOT* in Objects/typeobject.c
45
'__abs__',
46
'__add__',
47
'__aiter__',
48
'__and__',
49
'__anext__',
50
'__await__',
51
'__bool__',
52
'__call__',
53
'__contains__',
54
'__del__',
55
'__delattr__',
56
'__delete__',
57
'__delitem__',
58
'__eq__',
59
'__float__',
60
'__floordiv__',
61
'__ge__',
62
'__get__',
63
'__getattr__',
64
'__getattribute__',
65
'__getitem__',
66
'__gt__',
67
'__hash__',
68
'__iadd__',
69
'__iand__',
70
'__ifloordiv__',
71
'__ilshift__',
72
'__imatmul__',
73
'__imod__',
74
'__imul__',
75
'__index__',
76
'__init__',
77
'__int__',
78
'__invert__',
79
'__ior__',
80
'__ipow__',
81
'__irshift__',
82
'__isub__',
83
'__iter__',
84
'__itruediv__',
85
'__ixor__',
86
'__le__',
87
'__len__',
88
'__lshift__',
89
'__lt__',
90
'__matmul__',
91
'__mod__',
92
'__mul__',
93
'__ne__',
94
'__neg__',
95
'__new__',
96
'__next__',
97
'__or__',
98
'__pos__',
99
'__pow__',
100
'__radd__',
101
'__rand__',
102
'__repr__',
103
'__rfloordiv__',
104
'__rlshift__',
105
'__rmatmul__',
106
'__rmod__',
107
'__rmul__',
108
'__ror__',
109
'__rpow__',
110
'__rrshift__',
111
'__rshift__',
112
'__rsub__',
113
'__rtruediv__',
114
'__rxor__',
115
'__set__',
116
'__setattr__',
117
'__setitem__',
118
'__str__',
119
'__sub__',
120
'__truediv__',
121
'__xor__',
122
'__divmod__',
123
'__rdivmod__',
124
'__buffer__',
125
'__release_buffer__',
126
]
127
128
NON_GENERATED_IMMORTAL_OBJECTS = [
129
# The generated ones come from generate_runtime_init().
130
'(PyObject *)&_Py_SINGLETON(bytes_empty)',
131
'(PyObject *)&_Py_SINGLETON(tuple_empty)',
132
'(PyObject *)&_Py_SINGLETON(hamt_bitmap_node_empty)',
133
'(PyObject *)&_Py_INTERP_SINGLETON(interp, hamt_empty)',
134
'(PyObject *)&_Py_SINGLETON(context_token_missing)',
135
]
136
137
138
#######################################
139
# helpers
140
141
def iter_files():
142
for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'):
143
root = os.path.join(ROOT, name)
144
for dirname, _, files in os.walk(root):
145
for name in files:
146
if not name.endswith(('.c', '.h')):
147
continue
148
yield os.path.join(dirname, name)
149
150
151
def iter_global_strings():
152
id_regex = re.compile(r'\b_Py_ID\((\w+)\)')
153
str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')
154
for filename in iter_files():
155
try:
156
infile = open(filename, encoding='utf-8')
157
except FileNotFoundError:
158
# The file must have been a temporary file.
159
continue
160
with infile:
161
for lno, line in enumerate(infile, 1):
162
for m in id_regex.finditer(line):
163
identifier, = m.groups()
164
yield identifier, None, filename, lno, line
165
for m in str_regex.finditer(line):
166
varname, string = m.groups()
167
yield varname, string, filename, lno, line
168
169
170
def iter_to_marker(lines, marker):
171
for line in lines:
172
if line.rstrip() == marker:
173
break
174
yield line
175
176
177
class Printer:
178
179
def __init__(self, file):
180
self.level = 0
181
self.file = file
182
self.continuation = [False]
183
184
@contextlib.contextmanager
185
def indent(self):
186
save_level = self.level
187
try:
188
self.level += 1
189
yield
190
finally:
191
self.level = save_level
192
193
def write(self, arg):
194
eol = '\n'
195
if self.continuation[-1]:
196
eol = f' \\{eol}' if arg else f'\\{eol}'
197
self.file.writelines((" "*self.level, arg, eol))
198
199
@contextlib.contextmanager
200
def block(self, prefix, suffix="", *, continuation=None):
201
if continuation is None:
202
continuation = self.continuation[-1]
203
self.continuation.append(continuation)
204
205
self.write(prefix + " {")
206
with self.indent():
207
yield
208
self.continuation.pop()
209
self.write("}" + suffix)
210
211
212
@contextlib.contextmanager
213
def open_for_changes(filename, orig):
214
"""Like open() but only write to the file if it changed."""
215
outfile = io.StringIO()
216
yield outfile
217
text = outfile.getvalue()
218
if text != orig:
219
with open(filename, 'w', encoding='utf-8') as outfile:
220
outfile.write(text)
221
else:
222
print(f'# not changed: {filename}')
223
224
225
#######################################
226
# the global objects
227
228
START = f'/* The following is auto-generated by {SCRIPT_NAME}. */'
229
END = '/* End auto-generated code */'
230
231
232
def generate_global_strings(identifiers, strings):
233
filename = os.path.join(INTERNAL, 'pycore_global_strings.h')
234
235
# Read the non-generated part of the file.
236
with open(filename) as infile:
237
orig = infile.read()
238
lines = iter(orig.rstrip().splitlines())
239
before = '\n'.join(iter_to_marker(lines, START))
240
for _ in iter_to_marker(lines, END):
241
pass
242
after = '\n'.join(lines)
243
244
# Generate the file.
245
with open_for_changes(filename, orig) as outfile:
246
printer = Printer(outfile)
247
printer.write(before)
248
printer.write(START)
249
with printer.block('struct _Py_global_strings', ';'):
250
with printer.block('struct', ' literals;'):
251
for literal, name in sorted(strings.items(), key=lambda x: x[1]):
252
printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')
253
outfile.write('\n')
254
with printer.block('struct', ' identifiers;'):
255
for name in sorted(identifiers):
256
assert name.isidentifier(), name
257
printer.write(f'STRUCT_FOR_ID({name})')
258
with printer.block('struct', ' ascii[128];'):
259
printer.write("PyASCIIObject _ascii;")
260
printer.write("uint8_t _data[2];")
261
with printer.block('struct', ' latin1[128];'):
262
printer.write("PyCompactUnicodeObject _latin1;")
263
printer.write("uint8_t _data[2];")
264
printer.write(END)
265
printer.write(after)
266
267
268
def generate_runtime_init(identifiers, strings):
269
# First get some info from the declarations.
270
nsmallposints = None
271
nsmallnegints = None
272
with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile:
273
for line in infile:
274
if line.startswith('#define _PY_NSMALLPOSINTS'):
275
nsmallposints = int(line.split()[-1])
276
elif line.startswith('#define _PY_NSMALLNEGINTS'):
277
nsmallnegints = int(line.split()[-1])
278
break
279
else:
280
raise NotImplementedError
281
assert nsmallposints and nsmallnegints
282
283
# Then target the runtime initializer.
284
filename = os.path.join(INTERNAL, 'pycore_runtime_init_generated.h')
285
286
# Read the non-generated part of the file.
287
with open(filename) as infile:
288
orig = infile.read()
289
lines = iter(orig.rstrip().splitlines())
290
before = '\n'.join(iter_to_marker(lines, START))
291
for _ in iter_to_marker(lines, END):
292
pass
293
after = '\n'.join(lines)
294
295
# Generate the file.
296
with open_for_changes(filename, orig) as outfile:
297
immortal_objects = []
298
printer = Printer(outfile)
299
printer.write(before)
300
printer.write(START)
301
with printer.block('#define _Py_small_ints_INIT', continuation=True):
302
for i in range(-nsmallnegints, nsmallposints):
303
printer.write(f'_PyLong_DIGIT_INIT({i}),')
304
immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + {i}]')
305
printer.write('')
306
with printer.block('#define _Py_bytes_characters_INIT', continuation=True):
307
for i in range(256):
308
printer.write(f'_PyBytes_CHAR_INIT({i}),')
309
immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_characters)[{i}]')
310
printer.write('')
311
with printer.block('#define _Py_str_literals_INIT', continuation=True):
312
for literal, name in sorted(strings.items(), key=lambda x: x[1]):
313
printer.write(f'INIT_STR({name}, "{literal}"),')
314
immortal_objects.append(f'(PyObject *)&_Py_STR({name})')
315
printer.write('')
316
with printer.block('#define _Py_str_identifiers_INIT', continuation=True):
317
for name in sorted(identifiers):
318
assert name.isidentifier(), name
319
printer.write(f'INIT_ID({name}),')
320
immortal_objects.append(f'(PyObject *)&_Py_ID({name})')
321
printer.write('')
322
with printer.block('#define _Py_str_ascii_INIT', continuation=True):
323
for i in range(128):
324
printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),')
325
immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]')
326
printer.write('')
327
with printer.block('#define _Py_str_latin1_INIT', continuation=True):
328
for i in range(128, 256):
329
utf8 = ['"']
330
for c in chr(i).encode('utf-8'):
331
utf8.append(f"\\x{c:02x}")
332
utf8.append('"')
333
printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),')
334
immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]')
335
printer.write(END)
336
printer.write(after)
337
return immortal_objects
338
339
340
def generate_static_strings_initializer(identifiers, strings):
341
# Target the runtime initializer.
342
filename = os.path.join(INTERNAL, 'pycore_unicodeobject_generated.h')
343
344
# Read the non-generated part of the file.
345
with open(filename) as infile:
346
orig = infile.read()
347
lines = iter(orig.rstrip().splitlines())
348
before = '\n'.join(iter_to_marker(lines, START))
349
for _ in iter_to_marker(lines, END):
350
pass
351
after = '\n'.join(lines)
352
353
# Generate the file.
354
with open_for_changes(filename, orig) as outfile:
355
printer = Printer(outfile)
356
printer.write(before)
357
printer.write(START)
358
printer.write("static inline void")
359
with printer.block("_PyUnicode_InitStaticStrings(PyInterpreterState *interp)"):
360
printer.write(f'PyObject *string;')
361
for i in sorted(identifiers):
362
# This use of _Py_ID() is ignored by iter_global_strings()
363
# since iter_files() ignores .h files.
364
printer.write(f'string = &_Py_ID({i});')
365
printer.write(f'assert(_PyUnicode_CheckConsistency(string, 1));')
366
printer.write(f'_PyUnicode_InternInPlace(interp, &string);')
367
# XXX What about "strings"?
368
printer.write(END)
369
printer.write(after)
370
371
372
def generate_global_object_finalizers(generated_immortal_objects):
373
# Target the runtime initializer.
374
filename = os.path.join(INTERNAL, 'pycore_global_objects_fini_generated.h')
375
376
# Read the non-generated part of the file.
377
with open(filename) as infile:
378
orig = infile.read()
379
lines = iter(orig.rstrip().splitlines())
380
before = '\n'.join(iter_to_marker(lines, START))
381
for _ in iter_to_marker(lines, END):
382
pass
383
after = '\n'.join(lines)
384
385
# Generate the file.
386
with open_for_changes(filename, orig) as outfile:
387
printer = Printer(outfile)
388
printer.write(before)
389
printer.write(START)
390
printer.write('#ifdef Py_DEBUG')
391
printer.write("static inline void")
392
with printer.block(
393
"_PyStaticObjects_CheckRefcnt(PyInterpreterState *interp)"):
394
printer.write('/* generated runtime-global */')
395
printer.write('// (see pycore_runtime_init_generated.h)')
396
for ref in generated_immortal_objects:
397
printer.write(f'_PyStaticObject_CheckRefcnt({ref});')
398
printer.write('/* non-generated */')
399
for ref in NON_GENERATED_IMMORTAL_OBJECTS:
400
printer.write(f'_PyStaticObject_CheckRefcnt({ref});')
401
printer.write('#endif // Py_DEBUG')
402
printer.write(END)
403
printer.write(after)
404
405
406
def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]':
407
identifiers = set(IDENTIFIERS)
408
strings = {}
409
for name, string, *_ in iter_global_strings():
410
if string is None:
411
if name not in IGNORED:
412
identifiers.add(name)
413
else:
414
if string not in strings:
415
strings[string] = name
416
elif name != strings[string]:
417
raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')
418
return identifiers, strings
419
420
421
#######################################
422
# the script
423
424
def main() -> None:
425
identifiers, strings = get_identifiers_and_strings()
426
427
generate_global_strings(identifiers, strings)
428
generated_immortal_objects = generate_runtime_init(identifiers, strings)
429
generate_static_strings_initializer(identifiers, strings)
430
generate_global_object_finalizers(generated_immortal_objects)
431
432
433
if __name__ == '__main__':
434
main()
435
436