Path: blob/main/Tools/build/generate_global_objects.py
12 views
import contextlib1import io2import os.path3import re45SCRIPT_NAME = 'Tools/build/generate_global_objects.py'6__file__ = os.path.abspath(__file__)7ROOT = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))8INTERNAL = os.path.join(ROOT, 'Include', 'internal')91011IGNORED = {12'ACTION', # Python/_warnings.c13'ATTR', # Python/_warnings.c and Objects/funcobject.c14'DUNDER', # Objects/typeobject.c15'RDUNDER', # Objects/typeobject.c16'SPECIAL', # Objects/weakrefobject.c17'NAME', # Objects/typeobject.c18}19IDENTIFIERS = [20# from ADD() Python/_warnings.c21'default',22'ignore',2324# from GET_WARNINGS_ATTR() in Python/_warnings.c25'WarningMessage',26'_showwarnmsg',27'_warn_unawaited_coroutine',28'defaultaction',29'filters',30'onceregistry',3132# from WRAP_METHOD() in Objects/weakrefobject.c33'__bytes__',34'__reversed__',3536# from COPY_ATTR() in Objects/funcobject.c37'__module__',38'__name__',39'__qualname__',40'__doc__',41'__annotations__',4243# from SLOT* in Objects/typeobject.c44'__abs__',45'__add__',46'__aiter__',47'__and__',48'__anext__',49'__await__',50'__bool__',51'__call__',52'__contains__',53'__del__',54'__delattr__',55'__delete__',56'__delitem__',57'__eq__',58'__float__',59'__floordiv__',60'__ge__',61'__get__',62'__getattr__',63'__getattribute__',64'__getitem__',65'__gt__',66'__hash__',67'__iadd__',68'__iand__',69'__ifloordiv__',70'__ilshift__',71'__imatmul__',72'__imod__',73'__imul__',74'__index__',75'__init__',76'__int__',77'__invert__',78'__ior__',79'__ipow__',80'__irshift__',81'__isub__',82'__iter__',83'__itruediv__',84'__ixor__',85'__le__',86'__len__',87'__lshift__',88'__lt__',89'__matmul__',90'__mod__',91'__mul__',92'__ne__',93'__neg__',94'__new__',95'__next__',96'__or__',97'__pos__',98'__pow__',99'__radd__',100'__rand__',101'__repr__',102'__rfloordiv__',103'__rlshift__',104'__rmatmul__',105'__rmod__',106'__rmul__',107'__ror__',108'__rpow__',109'__rrshift__',110'__rshift__',111'__rsub__',112'__rtruediv__',113'__rxor__',114'__set__',115'__setattr__',116'__setitem__',117'__str__',118'__sub__',119'__truediv__',120'__xor__',121'__divmod__',122'__rdivmod__',123'__buffer__',124'__release_buffer__',125]126127NON_GENERATED_IMMORTAL_OBJECTS = [128# The generated ones come from generate_runtime_init().129'(PyObject *)&_Py_SINGLETON(bytes_empty)',130'(PyObject *)&_Py_SINGLETON(tuple_empty)',131'(PyObject *)&_Py_SINGLETON(hamt_bitmap_node_empty)',132'(PyObject *)&_Py_INTERP_SINGLETON(interp, hamt_empty)',133'(PyObject *)&_Py_SINGLETON(context_token_missing)',134]135136137#######################################138# helpers139140def iter_files():141for name in ('Modules', 'Objects', 'Parser', 'PC', 'Programs', 'Python'):142root = os.path.join(ROOT, name)143for dirname, _, files in os.walk(root):144for name in files:145if not name.endswith(('.c', '.h')):146continue147yield os.path.join(dirname, name)148149150def iter_global_strings():151id_regex = re.compile(r'\b_Py_ID\((\w+)\)')152str_regex = re.compile(r'\b_Py_DECLARE_STR\((\w+), "(.*?)"\)')153for filename in iter_files():154try:155infile = open(filename, encoding='utf-8')156except FileNotFoundError:157# The file must have been a temporary file.158continue159with infile:160for lno, line in enumerate(infile, 1):161for m in id_regex.finditer(line):162identifier, = m.groups()163yield identifier, None, filename, lno, line164for m in str_regex.finditer(line):165varname, string = m.groups()166yield varname, string, filename, lno, line167168169def iter_to_marker(lines, marker):170for line in lines:171if line.rstrip() == marker:172break173yield line174175176class Printer:177178def __init__(self, file):179self.level = 0180self.file = file181self.continuation = [False]182183@contextlib.contextmanager184def indent(self):185save_level = self.level186try:187self.level += 1188yield189finally:190self.level = save_level191192def write(self, arg):193eol = '\n'194if self.continuation[-1]:195eol = f' \\{eol}' if arg else f'\\{eol}'196self.file.writelines((" "*self.level, arg, eol))197198@contextlib.contextmanager199def block(self, prefix, suffix="", *, continuation=None):200if continuation is None:201continuation = self.continuation[-1]202self.continuation.append(continuation)203204self.write(prefix + " {")205with self.indent():206yield207self.continuation.pop()208self.write("}" + suffix)209210211@contextlib.contextmanager212def open_for_changes(filename, orig):213"""Like open() but only write to the file if it changed."""214outfile = io.StringIO()215yield outfile216text = outfile.getvalue()217if text != orig:218with open(filename, 'w', encoding='utf-8') as outfile:219outfile.write(text)220else:221print(f'# not changed: {filename}')222223224#######################################225# the global objects226227START = f'/* The following is auto-generated by {SCRIPT_NAME}. */'228END = '/* End auto-generated code */'229230231def generate_global_strings(identifiers, strings):232filename = os.path.join(INTERNAL, 'pycore_global_strings.h')233234# Read the non-generated part of the file.235with open(filename) as infile:236orig = infile.read()237lines = iter(orig.rstrip().splitlines())238before = '\n'.join(iter_to_marker(lines, START))239for _ in iter_to_marker(lines, END):240pass241after = '\n'.join(lines)242243# Generate the file.244with open_for_changes(filename, orig) as outfile:245printer = Printer(outfile)246printer.write(before)247printer.write(START)248with printer.block('struct _Py_global_strings', ';'):249with printer.block('struct', ' literals;'):250for literal, name in sorted(strings.items(), key=lambda x: x[1]):251printer.write(f'STRUCT_FOR_STR({name}, "{literal}")')252outfile.write('\n')253with printer.block('struct', ' identifiers;'):254for name in sorted(identifiers):255assert name.isidentifier(), name256printer.write(f'STRUCT_FOR_ID({name})')257with printer.block('struct', ' ascii[128];'):258printer.write("PyASCIIObject _ascii;")259printer.write("uint8_t _data[2];")260with printer.block('struct', ' latin1[128];'):261printer.write("PyCompactUnicodeObject _latin1;")262printer.write("uint8_t _data[2];")263printer.write(END)264printer.write(after)265266267def generate_runtime_init(identifiers, strings):268# First get some info from the declarations.269nsmallposints = None270nsmallnegints = None271with open(os.path.join(INTERNAL, 'pycore_global_objects.h')) as infile:272for line in infile:273if line.startswith('#define _PY_NSMALLPOSINTS'):274nsmallposints = int(line.split()[-1])275elif line.startswith('#define _PY_NSMALLNEGINTS'):276nsmallnegints = int(line.split()[-1])277break278else:279raise NotImplementedError280assert nsmallposints and nsmallnegints281282# Then target the runtime initializer.283filename = os.path.join(INTERNAL, 'pycore_runtime_init_generated.h')284285# Read the non-generated part of the file.286with open(filename) as infile:287orig = infile.read()288lines = iter(orig.rstrip().splitlines())289before = '\n'.join(iter_to_marker(lines, START))290for _ in iter_to_marker(lines, END):291pass292after = '\n'.join(lines)293294# Generate the file.295with open_for_changes(filename, orig) as outfile:296immortal_objects = []297printer = Printer(outfile)298printer.write(before)299printer.write(START)300with printer.block('#define _Py_small_ints_INIT', continuation=True):301for i in range(-nsmallnegints, nsmallposints):302printer.write(f'_PyLong_DIGIT_INIT({i}),')303immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + {i}]')304printer.write('')305with printer.block('#define _Py_bytes_characters_INIT', continuation=True):306for i in range(256):307printer.write(f'_PyBytes_CHAR_INIT({i}),')308immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(bytes_characters)[{i}]')309printer.write('')310with printer.block('#define _Py_str_literals_INIT', continuation=True):311for literal, name in sorted(strings.items(), key=lambda x: x[1]):312printer.write(f'INIT_STR({name}, "{literal}"),')313immortal_objects.append(f'(PyObject *)&_Py_STR({name})')314printer.write('')315with printer.block('#define _Py_str_identifiers_INIT', continuation=True):316for name in sorted(identifiers):317assert name.isidentifier(), name318printer.write(f'INIT_ID({name}),')319immortal_objects.append(f'(PyObject *)&_Py_ID({name})')320printer.write('')321with printer.block('#define _Py_str_ascii_INIT', continuation=True):322for i in range(128):323printer.write(f'_PyASCIIObject_INIT("\\x{i:02x}"),')324immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).ascii[{i}]')325printer.write('')326with printer.block('#define _Py_str_latin1_INIT', continuation=True):327for i in range(128, 256):328utf8 = ['"']329for c in chr(i).encode('utf-8'):330utf8.append(f"\\x{c:02x}")331utf8.append('"')332printer.write(f'_PyUnicode_LATIN1_INIT("\\x{i:02x}", {"".join(utf8)}),')333immortal_objects.append(f'(PyObject *)&_Py_SINGLETON(strings).latin1[{i} - 128]')334printer.write(END)335printer.write(after)336return immortal_objects337338339def generate_static_strings_initializer(identifiers, strings):340# Target the runtime initializer.341filename = os.path.join(INTERNAL, 'pycore_unicodeobject_generated.h')342343# Read the non-generated part of the file.344with open(filename) as infile:345orig = infile.read()346lines = iter(orig.rstrip().splitlines())347before = '\n'.join(iter_to_marker(lines, START))348for _ in iter_to_marker(lines, END):349pass350after = '\n'.join(lines)351352# Generate the file.353with open_for_changes(filename, orig) as outfile:354printer = Printer(outfile)355printer.write(before)356printer.write(START)357printer.write("static inline void")358with printer.block("_PyUnicode_InitStaticStrings(PyInterpreterState *interp)"):359printer.write(f'PyObject *string;')360for i in sorted(identifiers):361# This use of _Py_ID() is ignored by iter_global_strings()362# since iter_files() ignores .h files.363printer.write(f'string = &_Py_ID({i});')364printer.write(f'assert(_PyUnicode_CheckConsistency(string, 1));')365printer.write(f'_PyUnicode_InternInPlace(interp, &string);')366# XXX What about "strings"?367printer.write(END)368printer.write(after)369370371def generate_global_object_finalizers(generated_immortal_objects):372# Target the runtime initializer.373filename = os.path.join(INTERNAL, 'pycore_global_objects_fini_generated.h')374375# Read the non-generated part of the file.376with open(filename) as infile:377orig = infile.read()378lines = iter(orig.rstrip().splitlines())379before = '\n'.join(iter_to_marker(lines, START))380for _ in iter_to_marker(lines, END):381pass382after = '\n'.join(lines)383384# Generate the file.385with open_for_changes(filename, orig) as outfile:386printer = Printer(outfile)387printer.write(before)388printer.write(START)389printer.write('#ifdef Py_DEBUG')390printer.write("static inline void")391with printer.block(392"_PyStaticObjects_CheckRefcnt(PyInterpreterState *interp)"):393printer.write('/* generated runtime-global */')394printer.write('// (see pycore_runtime_init_generated.h)')395for ref in generated_immortal_objects:396printer.write(f'_PyStaticObject_CheckRefcnt({ref});')397printer.write('/* non-generated */')398for ref in NON_GENERATED_IMMORTAL_OBJECTS:399printer.write(f'_PyStaticObject_CheckRefcnt({ref});')400printer.write('#endif // Py_DEBUG')401printer.write(END)402printer.write(after)403404405def get_identifiers_and_strings() -> 'tuple[set[str], dict[str, str]]':406identifiers = set(IDENTIFIERS)407strings = {}408for name, string, *_ in iter_global_strings():409if string is None:410if name not in IGNORED:411identifiers.add(name)412else:413if string not in strings:414strings[string] = name415elif name != strings[string]:416raise ValueError(f'string mismatch for {name!r} ({string!r} != {strings[name]!r}')417return identifiers, strings418419420#######################################421# the script422423def main() -> None:424identifiers, strings = get_identifiers_and_strings()425426generate_global_strings(identifiers, strings)427generated_immortal_objects = generate_runtime_init(identifiers, strings)428generate_static_strings_initializer(identifiers, strings)429generate_global_object_finalizers(generated_immortal_objects)430431432if __name__ == '__main__':433main()434435436