CoCalc -- generate

GitHub Repository: allendowney/cpython
Path: blob/main/Tools/build/generate_token.py
¹² views
1
#! /usr/bin/env python3
2
# This script generates token related files from Grammar/Tokens:
3
#
4
#   Doc/library/token-list.inc
5
#   Include/token.h
6
#   Parser/token.c
7
#   Lib/token.py
8

9

10
SCRIPT_NAME = 'Tools/build/generate_token.py'
11
AUTO_GENERATED_BY_SCRIPT = f'Auto-generated by {SCRIPT_NAME}'
12
NT_OFFSET = 256
13

14
def load_tokens(path):
15
    tok_names = []
16
    string_to_tok = {}
17
    ERRORTOKEN = None
18
    with open(path) as fp:
19
        for line in fp:
20
            line = line.strip()
21
            # strip comments
22
            i = line.find('#')
23
            if i >= 0:
24
                line = line[:i].strip()
25
            if not line:
26
                continue
27
            fields = line.split()
28
            name = fields[0]
29
            value = len(tok_names)
30
            if name == 'ERRORTOKEN':
31
                ERRORTOKEN = value
32
            string = fields[1] if len(fields) > 1 else None
33
            if string:
34
                string = eval(string)
35
                string_to_tok[string] = value
36
            tok_names.append(name)
37
    return tok_names, ERRORTOKEN, string_to_tok
38

39

40
def update_file(file, content):
41
    try:
42
        with open(file, 'r') as fobj:
43
            if fobj.read() == content:
44
                return False
45
    except (OSError, ValueError):
46
        pass
47
    with open(file, 'w') as fobj:
48
        fobj.write(content)
49
    return True
50

51

52
token_h_template = f"""\
53
/* {AUTO_GENERATED_BY_SCRIPT} */
54
"""
55
token_h_template += """\
56

57
/* Token types */
58
#ifndef Py_INTERNAL_TOKEN_H
59
#define Py_INTERNAL_TOKEN_H
60
#ifdef __cplusplus
61
extern "C" {
62
#endif
63

64
#ifndef Py_BUILD_CORE
65
#  error "this header requires Py_BUILD_CORE define"
66
#endif
67

68
#undef TILDE   /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
69

70
%s\
71
#define N_TOKENS        %d
72
#define NT_OFFSET       %d
73

74
/* Special definitions for cooperation with parser */
75

76
#define ISTERMINAL(x)           ((x) < NT_OFFSET)
77
#define ISNONTERMINAL(x)        ((x) >= NT_OFFSET)
78
#define ISEOF(x)                ((x) == ENDMARKER)
79
#define ISWHITESPACE(x)         ((x) == ENDMARKER || \\
80
                                 (x) == NEWLINE   || \\
81
                                 (x) == INDENT    || \\
82
                                 (x) == DEDENT)
83
#define ISSTRINGLIT(x)          ((x) == STRING           || \\
84
                                 (x) == FSTRING_MIDDLE)
85

86

87
// Symbols exported for test_peg_generator
88
PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
89
PyAPI_FUNC(int) _PyToken_OneChar(int);
90
PyAPI_FUNC(int) _PyToken_TwoChars(int, int);
91
PyAPI_FUNC(int) _PyToken_ThreeChars(int, int, int);
92

93
#ifdef __cplusplus
94
}
95
#endif
96
#endif  // !Py_INTERNAL_TOKEN_H
97
"""
98

99
def make_h(infile, outfile='Include/internal/pycore_token.h'):
100
    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
101

102
    defines = []
103
    for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
104
        defines.append("#define %-15s %d\n" % (name, value))
105

106
    if update_file(outfile, token_h_template % (
107
            ''.join(defines),
108
            len(tok_names),
109
            NT_OFFSET
110
        )):
111
        print("%s regenerated from %s" % (outfile, infile))
112

113

114
token_c_template = f"""\
115
/* {AUTO_GENERATED_BY_SCRIPT} */
116
"""
117
token_c_template += """\
118

119
#include "Python.h"
120
#include "pycore_token.h"
121

122
/* Token names */
123

124
const char * const _PyParser_TokenNames[] = {
125
%s\
126
};
127

128
/* Return the token corresponding to a single character */
129

130
int
131
_PyToken_OneChar(int c1)
132
{
133
%s\
134
    return OP;
135
}
136

137
int
138
_PyToken_TwoChars(int c1, int c2)
139
{
140
%s\
141
    return OP;
142
}
143

144
int
145
_PyToken_ThreeChars(int c1, int c2, int c3)
146
{
147
%s\
148
    return OP;
149
}
150
"""
151

152
def generate_chars_to_token(mapping, n=1):
153
    result = []
154
    write = result.append
155
    indent = '    ' * n
156
    write(indent)
157
    write('switch (c%d) {\n' % (n,))
158
    for c in sorted(mapping):
159
        write(indent)
160
        value = mapping[c]
161
        if isinstance(value, dict):
162
            write("case '%s':\n" % (c,))
163
            write(generate_chars_to_token(value, n + 1))
164
            write(indent)
165
            write('    break;\n')
166
        else:
167
            write("case '%s': return %s;\n" % (c, value))
168
    write(indent)
169
    write('}\n')
170
    return ''.join(result)
171

172
def make_c(infile, outfile='Parser/token.c'):
173
    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
174
    string_to_tok['<>'] = string_to_tok['!=']
175
    chars_to_token = {}
176
    for string, value in string_to_tok.items():
177
        assert 1 <= len(string) <= 3
178
        name = tok_names[value]
179
        m = chars_to_token.setdefault(len(string), {})
180
        for c in string[:-1]:
181
            m = m.setdefault(c, {})
182
        m[string[-1]] = name
183

184
    names = []
185
    for value, name in enumerate(tok_names):
186
        if value >= ERRORTOKEN:
187
            name = '<%s>' % name
188
        names.append('    "%s",\n' % name)
189
    names.append('    "<N_TOKENS>",\n')
190

191
    if update_file(outfile, token_c_template % (
192
            ''.join(names),
193
            generate_chars_to_token(chars_to_token[1]),
194
            generate_chars_to_token(chars_to_token[2]),
195
            generate_chars_to_token(chars_to_token[3])
196
        )):
197
        print("%s regenerated from %s" % (outfile, infile))
198

199

200
token_inc_template = f"""\
201
.. {AUTO_GENERATED_BY_SCRIPT}
202
%s
203
.. data:: N_TOKENS
204

205
.. data:: NT_OFFSET
206
"""
207

208
def make_rst(infile, outfile='Doc/library/token-list.inc'):
209
    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
210
    tok_to_string = {value: s for s, value in string_to_tok.items()}
211

212
    names = []
213
    for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
214
        names.append('.. data:: %s' % (name,))
215
        if value in tok_to_string:
216
            names.append('')
217
            names.append('   Token value for ``"%s"``.' % tok_to_string[value])
218
        names.append('')
219

220
    if update_file(outfile, token_inc_template % '\n'.join(names)):
221
        print("%s regenerated from %s" % (outfile, infile))
222

223

224
token_py_template = f'''\
225
"""Token constants."""
226
# {AUTO_GENERATED_BY_SCRIPT}
227
'''
228
token_py_template += '''
229
__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
230

231
%s
232
N_TOKENS = %d
233
# Special definitions for cooperation with parser
234
NT_OFFSET = %d
235

236
tok_name = {value: name
237
            for name, value in globals().items()
238
            if isinstance(value, int) and not name.startswith('_')}
239
__all__.extend(tok_name.values())
240

241
EXACT_TOKEN_TYPES = {
242
%s
243
}
244

245
def ISTERMINAL(x):
246
    return x < NT_OFFSET
247

248
def ISNONTERMINAL(x):
249
    return x >= NT_OFFSET
250

251
def ISEOF(x):
252
    return x == ENDMARKER
253
'''
254

255
def make_py(infile, outfile='Lib/token.py'):
256
    tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
257

258
    constants = []
259
    for value, name in enumerate(tok_names):
260
        constants.append('%s = %d' % (name, value))
261
    constants.insert(ERRORTOKEN,
262
        "# These aren't used by the C tokenizer but are needed for tokenize.py")
263

264
    token_types = []
265
    for s, value in sorted(string_to_tok.items()):
266
        token_types.append('    %r: %s,' % (s, tok_names[value]))
267

268
    if update_file(outfile, token_py_template % (
269
            '\n'.join(constants),
270
            len(tok_names),
271
            NT_OFFSET,
272
            '\n'.join(token_types),
273
        )):
274
        print("%s regenerated from %s" % (outfile, infile))
275

276

277
def main(op, infile='Grammar/Tokens', *args):
278
    make = globals()['make_' + op]
279
    make(infile, *args)
280

281

282
if __name__ == '__main__':
283
    import sys
284
    main(*sys.argv[1:])
285

286
Product

Resources

Company