Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/build/generate_token.py
12 views
1
#! /usr/bin/env python3
2
# This script generates token related files from Grammar/Tokens:
3
#
4
# Doc/library/token-list.inc
5
# Include/token.h
6
# Parser/token.c
7
# Lib/token.py
8
9
10
SCRIPT_NAME = 'Tools/build/generate_token.py'
11
AUTO_GENERATED_BY_SCRIPT = f'Auto-generated by {SCRIPT_NAME}'
12
NT_OFFSET = 256
13
14
def load_tokens(path):
15
tok_names = []
16
string_to_tok = {}
17
ERRORTOKEN = None
18
with open(path) as fp:
19
for line in fp:
20
line = line.strip()
21
# strip comments
22
i = line.find('#')
23
if i >= 0:
24
line = line[:i].strip()
25
if not line:
26
continue
27
fields = line.split()
28
name = fields[0]
29
value = len(tok_names)
30
if name == 'ERRORTOKEN':
31
ERRORTOKEN = value
32
string = fields[1] if len(fields) > 1 else None
33
if string:
34
string = eval(string)
35
string_to_tok[string] = value
36
tok_names.append(name)
37
return tok_names, ERRORTOKEN, string_to_tok
38
39
40
def update_file(file, content):
41
try:
42
with open(file, 'r') as fobj:
43
if fobj.read() == content:
44
return False
45
except (OSError, ValueError):
46
pass
47
with open(file, 'w') as fobj:
48
fobj.write(content)
49
return True
50
51
52
token_h_template = f"""\
53
/* {AUTO_GENERATED_BY_SCRIPT} */
54
"""
55
token_h_template += """\
56
57
/* Token types */
58
#ifndef Py_INTERNAL_TOKEN_H
59
#define Py_INTERNAL_TOKEN_H
60
#ifdef __cplusplus
61
extern "C" {
62
#endif
63
64
#ifndef Py_BUILD_CORE
65
# error "this header requires Py_BUILD_CORE define"
66
#endif
67
68
#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
69
70
%s\
71
#define N_TOKENS %d
72
#define NT_OFFSET %d
73
74
/* Special definitions for cooperation with parser */
75
76
#define ISTERMINAL(x) ((x) < NT_OFFSET)
77
#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
78
#define ISEOF(x) ((x) == ENDMARKER)
79
#define ISWHITESPACE(x) ((x) == ENDMARKER || \\
80
(x) == NEWLINE || \\
81
(x) == INDENT || \\
82
(x) == DEDENT)
83
#define ISSTRINGLIT(x) ((x) == STRING || \\
84
(x) == FSTRING_MIDDLE)
85
86
87
// Symbols exported for test_peg_generator
88
PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
89
PyAPI_FUNC(int) _PyToken_OneChar(int);
90
PyAPI_FUNC(int) _PyToken_TwoChars(int, int);
91
PyAPI_FUNC(int) _PyToken_ThreeChars(int, int, int);
92
93
#ifdef __cplusplus
94
}
95
#endif
96
#endif // !Py_INTERNAL_TOKEN_H
97
"""
98
99
def make_h(infile, outfile='Include/internal/pycore_token.h'):
100
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
101
102
defines = []
103
for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
104
defines.append("#define %-15s %d\n" % (name, value))
105
106
if update_file(outfile, token_h_template % (
107
''.join(defines),
108
len(tok_names),
109
NT_OFFSET
110
)):
111
print("%s regenerated from %s" % (outfile, infile))
112
113
114
token_c_template = f"""\
115
/* {AUTO_GENERATED_BY_SCRIPT} */
116
"""
117
token_c_template += """\
118
119
#include "Python.h"
120
#include "pycore_token.h"
121
122
/* Token names */
123
124
const char * const _PyParser_TokenNames[] = {
125
%s\
126
};
127
128
/* Return the token corresponding to a single character */
129
130
int
131
_PyToken_OneChar(int c1)
132
{
133
%s\
134
return OP;
135
}
136
137
int
138
_PyToken_TwoChars(int c1, int c2)
139
{
140
%s\
141
return OP;
142
}
143
144
int
145
_PyToken_ThreeChars(int c1, int c2, int c3)
146
{
147
%s\
148
return OP;
149
}
150
"""
151
152
def generate_chars_to_token(mapping, n=1):
153
result = []
154
write = result.append
155
indent = ' ' * n
156
write(indent)
157
write('switch (c%d) {\n' % (n,))
158
for c in sorted(mapping):
159
write(indent)
160
value = mapping[c]
161
if isinstance(value, dict):
162
write("case '%s':\n" % (c,))
163
write(generate_chars_to_token(value, n + 1))
164
write(indent)
165
write(' break;\n')
166
else:
167
write("case '%s': return %s;\n" % (c, value))
168
write(indent)
169
write('}\n')
170
return ''.join(result)
171
172
def make_c(infile, outfile='Parser/token.c'):
173
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
174
string_to_tok['<>'] = string_to_tok['!=']
175
chars_to_token = {}
176
for string, value in string_to_tok.items():
177
assert 1 <= len(string) <= 3
178
name = tok_names[value]
179
m = chars_to_token.setdefault(len(string), {})
180
for c in string[:-1]:
181
m = m.setdefault(c, {})
182
m[string[-1]] = name
183
184
names = []
185
for value, name in enumerate(tok_names):
186
if value >= ERRORTOKEN:
187
name = '<%s>' % name
188
names.append(' "%s",\n' % name)
189
names.append(' "<N_TOKENS>",\n')
190
191
if update_file(outfile, token_c_template % (
192
''.join(names),
193
generate_chars_to_token(chars_to_token[1]),
194
generate_chars_to_token(chars_to_token[2]),
195
generate_chars_to_token(chars_to_token[3])
196
)):
197
print("%s regenerated from %s" % (outfile, infile))
198
199
200
token_inc_template = f"""\
201
.. {AUTO_GENERATED_BY_SCRIPT}
202
%s
203
.. data:: N_TOKENS
204
205
.. data:: NT_OFFSET
206
"""
207
208
def make_rst(infile, outfile='Doc/library/token-list.inc'):
209
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
210
tok_to_string = {value: s for s, value in string_to_tok.items()}
211
212
names = []
213
for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
214
names.append('.. data:: %s' % (name,))
215
if value in tok_to_string:
216
names.append('')
217
names.append(' Token value for ``"%s"``.' % tok_to_string[value])
218
names.append('')
219
220
if update_file(outfile, token_inc_template % '\n'.join(names)):
221
print("%s regenerated from %s" % (outfile, infile))
222
223
224
token_py_template = f'''\
225
"""Token constants."""
226
# {AUTO_GENERATED_BY_SCRIPT}
227
'''
228
token_py_template += '''
229
__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
230
231
%s
232
N_TOKENS = %d
233
# Special definitions for cooperation with parser
234
NT_OFFSET = %d
235
236
tok_name = {value: name
237
for name, value in globals().items()
238
if isinstance(value, int) and not name.startswith('_')}
239
__all__.extend(tok_name.values())
240
241
EXACT_TOKEN_TYPES = {
242
%s
243
}
244
245
def ISTERMINAL(x):
246
return x < NT_OFFSET
247
248
def ISNONTERMINAL(x):
249
return x >= NT_OFFSET
250
251
def ISEOF(x):
252
return x == ENDMARKER
253
'''
254
255
def make_py(infile, outfile='Lib/token.py'):
256
tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
257
258
constants = []
259
for value, name in enumerate(tok_names):
260
constants.append('%s = %d' % (name, value))
261
constants.insert(ERRORTOKEN,
262
"# These aren't used by the C tokenizer but are needed for tokenize.py")
263
264
token_types = []
265
for s, value in sorted(string_to_tok.items()):
266
token_types.append(' %r: %s,' % (s, tok_names[value]))
267
268
if update_file(outfile, token_py_template % (
269
'\n'.join(constants),
270
len(tok_names),
271
NT_OFFSET,
272
'\n'.join(token_types),
273
)):
274
print("%s regenerated from %s" % (outfile, infile))
275
276
277
def main(op, infile='Grammar/Tokens', *args):
278
make = globals()['make_' + op]
279
make(infile, *args)
280
281
282
if __name__ == '__main__':
283
import sys
284
main(*sys.argv[1:])
285
286