Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Parser/pegen.h
12 views
1
#ifndef PEGEN_H
2
#define PEGEN_H
3
4
#include <Python.h>
5
#include <pycore_ast.h>
6
#include <pycore_token.h>
7
8
#if 0
9
#define PyPARSE_YIELD_IS_KEYWORD 0x0001
10
#endif
11
12
#define PyPARSE_DONT_IMPLY_DEDENT 0x0002
13
14
#if 0
15
#define PyPARSE_WITH_IS_KEYWORD 0x0003
16
#define PyPARSE_PRINT_IS_FUNCTION 0x0004
17
#define PyPARSE_UNICODE_LITERALS 0x0008
18
#endif
19
20
#define PyPARSE_IGNORE_COOKIE 0x0010
21
#define PyPARSE_BARRY_AS_BDFL 0x0020
22
#define PyPARSE_TYPE_COMMENTS 0x0040
23
#define PyPARSE_ASYNC_HACKS 0x0080
24
#define PyPARSE_ALLOW_INCOMPLETE_INPUT 0x0100
25
26
#define CURRENT_POS (-5)
27
28
typedef struct _memo {
29
int type;
30
void *node;
31
int mark;
32
struct _memo *next;
33
} Memo;
34
35
typedef struct {
36
int type;
37
PyObject *bytes;
38
int level;
39
int lineno, col_offset, end_lineno, end_col_offset;
40
Memo *memo;
41
PyObject *metadata;
42
} Token;
43
44
typedef struct {
45
const char *str;
46
int type;
47
} KeywordToken;
48
49
50
typedef struct {
51
struct {
52
int lineno;
53
char *comment; // The " <tag>" in "# type: ignore <tag>"
54
} *items;
55
size_t size;
56
size_t num_items;
57
} growable_comment_array;
58
59
typedef struct {
60
struct tok_state *tok;
61
Token **tokens;
62
int mark;
63
int fill, size;
64
PyArena *arena;
65
KeywordToken **keywords;
66
char **soft_keywords;
67
int n_keyword_lists;
68
int start_rule;
69
int *errcode;
70
int parsing_started;
71
PyObject* normalize;
72
int starting_lineno;
73
int starting_col_offset;
74
int error_indicator;
75
int flags;
76
int feature_version;
77
growable_comment_array type_ignore_comments;
78
Token *known_err_token;
79
int level;
80
int call_invalid_rules;
81
int debug;
82
} Parser;
83
84
typedef struct {
85
cmpop_ty cmpop;
86
expr_ty expr;
87
} CmpopExprPair;
88
89
typedef struct {
90
expr_ty key;
91
expr_ty value;
92
} KeyValuePair;
93
94
typedef struct {
95
expr_ty key;
96
pattern_ty pattern;
97
} KeyPatternPair;
98
99
typedef struct {
100
arg_ty arg;
101
expr_ty value;
102
} NameDefaultPair;
103
104
typedef struct {
105
asdl_arg_seq *plain_names;
106
asdl_seq *names_with_defaults; // asdl_seq* of NameDefaultsPair's
107
} SlashWithDefault;
108
109
typedef struct {
110
arg_ty vararg;
111
asdl_seq *kwonlyargs; // asdl_seq* of NameDefaultsPair's
112
arg_ty kwarg;
113
} StarEtc;
114
115
typedef struct { operator_ty kind; } AugOperator;
116
typedef struct {
117
void *element;
118
int is_keyword;
119
} KeywordOrStarred;
120
121
typedef struct {
122
void *result;
123
PyObject *metadata;
124
} ResultTokenWithMetadata;
125
126
// Internal parser functions
127
#if defined(Py_DEBUG)
128
void _PyPegen_clear_memo_statistics(void);
129
PyObject *_PyPegen_get_memo_statistics(void);
130
#endif
131
132
int _PyPegen_insert_memo(Parser *p, int mark, int type, void *node);
133
int _PyPegen_update_memo(Parser *p, int mark, int type, void *node);
134
int _PyPegen_is_memoized(Parser *p, int type, void *pres);
135
136
int _PyPegen_lookahead_with_name(int, expr_ty (func)(Parser *), Parser *);
137
int _PyPegen_lookahead_with_int(int, Token *(func)(Parser *, int), Parser *, int);
138
int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*), Parser *, const char*);
139
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
140
141
Token *_PyPegen_expect_token(Parser *p, int type);
142
void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected);
143
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
144
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
145
expr_ty _PyPegen_soft_keyword_token(Parser *p);
146
expr_ty _PyPegen_fstring_middle_token(Parser* p);
147
Token *_PyPegen_get_last_nonnwhitespace_token(Parser *);
148
int _PyPegen_fill_token(Parser *p);
149
expr_ty _PyPegen_name_token(Parser *p);
150
expr_ty _PyPegen_number_token(Parser *p);
151
void *_PyPegen_string_token(Parser *p);
152
Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
153
154
// Error handling functions and APIs
155
typedef enum {
156
STAR_TARGETS,
157
DEL_TARGETS,
158
FOR_TARGETS
159
} TARGETS_TYPE;
160
161
int _Pypegen_raise_decode_error(Parser *p);
162
void _PyPegen_raise_tokenizer_init_error(PyObject *filename);
163
int _Pypegen_tokenizer_error(Parser *p);
164
void *_PyPegen_raise_error(Parser *p, PyObject *errtype, int use_mark, const char *errmsg, ...);
165
void *_PyPegen_raise_error_known_location(Parser *p, PyObject *errtype,
166
Py_ssize_t lineno, Py_ssize_t col_offset,
167
Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
168
const char *errmsg, va_list va);
169
void _Pypegen_set_syntax_error(Parser* p, Token* last_token);
170
Py_LOCAL_INLINE(void *)
171
RAISE_ERROR_KNOWN_LOCATION(Parser *p, PyObject *errtype,
172
Py_ssize_t lineno, Py_ssize_t col_offset,
173
Py_ssize_t end_lineno, Py_ssize_t end_col_offset,
174
const char *errmsg, ...)
175
{
176
va_list va;
177
va_start(va, errmsg);
178
Py_ssize_t _col_offset = (col_offset == CURRENT_POS ? CURRENT_POS : col_offset + 1);
179
Py_ssize_t _end_col_offset = (end_col_offset == CURRENT_POS ? CURRENT_POS : end_col_offset + 1);
180
_PyPegen_raise_error_known_location(p, errtype, lineno, _col_offset, end_lineno, _end_col_offset, errmsg, va);
181
va_end(va);
182
return NULL;
183
}
184
#define RAISE_SYNTAX_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 0, msg, ##__VA_ARGS__)
185
#define RAISE_INDENTATION_ERROR(msg, ...) _PyPegen_raise_error(p, PyExc_IndentationError, 0, msg, ##__VA_ARGS__)
186
#define RAISE_SYNTAX_ERROR_ON_NEXT_TOKEN(msg, ...) _PyPegen_raise_error(p, PyExc_SyntaxError, 1, msg, ##__VA_ARGS__)
187
#define RAISE_SYNTAX_ERROR_KNOWN_RANGE(a, b, msg, ...) \
188
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (b)->end_lineno, (b)->end_col_offset, msg, ##__VA_ARGS__)
189
#define RAISE_SYNTAX_ERROR_KNOWN_LOCATION(a, msg, ...) \
190
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, (a)->end_lineno, (a)->end_col_offset, msg, ##__VA_ARGS__)
191
#define RAISE_SYNTAX_ERROR_STARTING_FROM(a, msg, ...) \
192
RAISE_ERROR_KNOWN_LOCATION(p, PyExc_SyntaxError, (a)->lineno, (a)->col_offset, CURRENT_POS, CURRENT_POS, msg, ##__VA_ARGS__)
193
#define RAISE_SYNTAX_ERROR_INVALID_TARGET(type, e) _RAISE_SYNTAX_ERROR_INVALID_TARGET(p, type, e)
194
195
Py_LOCAL_INLINE(void *)
196
CHECK_CALL(Parser *p, void *result)
197
{
198
if (result == NULL) {
199
assert(PyErr_Occurred());
200
p->error_indicator = 1;
201
}
202
return result;
203
}
204
205
/* This is needed for helper functions that are allowed to
206
return NULL without an error. Example: _PyPegen_seq_extract_starred_exprs */
207
Py_LOCAL_INLINE(void *)
208
CHECK_CALL_NULL_ALLOWED(Parser *p, void *result)
209
{
210
if (result == NULL && PyErr_Occurred()) {
211
p->error_indicator = 1;
212
}
213
return result;
214
}
215
216
#define CHECK(type, result) ((type) CHECK_CALL(p, result))
217
#define CHECK_NULL_ALLOWED(type, result) ((type) CHECK_CALL_NULL_ALLOWED(p, result))
218
219
expr_ty _PyPegen_get_invalid_target(expr_ty e, TARGETS_TYPE targets_type);
220
const char *_PyPegen_get_expr_name(expr_ty);
221
Py_LOCAL_INLINE(void *)
222
_RAISE_SYNTAX_ERROR_INVALID_TARGET(Parser *p, TARGETS_TYPE type, void *e)
223
{
224
expr_ty invalid_target = CHECK_NULL_ALLOWED(expr_ty, _PyPegen_get_invalid_target(e, type));
225
if (invalid_target != NULL) {
226
const char *msg;
227
if (type == STAR_TARGETS || type == FOR_TARGETS) {
228
msg = "cannot assign to %s";
229
}
230
else {
231
msg = "cannot delete %s";
232
}
233
return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(
234
invalid_target,
235
msg,
236
_PyPegen_get_expr_name(invalid_target)
237
);
238
return RAISE_SYNTAX_ERROR_KNOWN_LOCATION(invalid_target, "invalid syntax");
239
}
240
return NULL;
241
}
242
243
// Action utility functions
244
245
void *_PyPegen_dummy_name(Parser *p, ...);
246
void * _PyPegen_seq_last_item(asdl_seq *seq);
247
#define PyPegen_last_item(seq, type) ((type)_PyPegen_seq_last_item((asdl_seq*)seq))
248
void * _PyPegen_seq_first_item(asdl_seq *seq);
249
#define PyPegen_first_item(seq, type) ((type)_PyPegen_seq_first_item((asdl_seq*)seq))
250
#define UNUSED(expr) do { (void)(expr); } while (0)
251
#define EXTRA_EXPR(head, tail) head->lineno, (head)->col_offset, (tail)->end_lineno, (tail)->end_col_offset, p->arena
252
#define EXTRA _start_lineno, _start_col_offset, _end_lineno, _end_col_offset, p->arena
253
PyObject *_PyPegen_new_type_comment(Parser *, const char *);
254
255
Py_LOCAL_INLINE(PyObject *)
256
NEW_TYPE_COMMENT(Parser *p, Token *tc)
257
{
258
if (tc == NULL) {
259
return NULL;
260
}
261
const char *bytes = PyBytes_AsString(tc->bytes);
262
if (bytes == NULL) {
263
goto error;
264
}
265
PyObject *tco = _PyPegen_new_type_comment(p, bytes);
266
if (tco == NULL) {
267
goto error;
268
}
269
return tco;
270
error:
271
p->error_indicator = 1; // Inline CHECK_CALL
272
return NULL;
273
}
274
275
Py_LOCAL_INLINE(void *)
276
INVALID_VERSION_CHECK(Parser *p, int version, char *msg, void *node)
277
{
278
if (node == NULL) {
279
p->error_indicator = 1; // Inline CHECK_CALL
280
return NULL;
281
}
282
if (p->feature_version < version) {
283
p->error_indicator = 1;
284
return RAISE_SYNTAX_ERROR("%s only supported in Python 3.%i and greater",
285
msg, version);
286
}
287
return node;
288
}
289
290
#define CHECK_VERSION(type, version, msg, node) ((type) INVALID_VERSION_CHECK(p, version, msg, node))
291
292
arg_ty _PyPegen_add_type_comment_to_arg(Parser *, arg_ty, Token *);
293
PyObject *_PyPegen_new_identifier(Parser *, const char *);
294
asdl_seq *_PyPegen_singleton_seq(Parser *, void *);
295
asdl_seq *_PyPegen_seq_insert_in_front(Parser *, void *, asdl_seq *);
296
asdl_seq *_PyPegen_seq_append_to_end(Parser *, asdl_seq *, void *);
297
asdl_seq *_PyPegen_seq_flatten(Parser *, asdl_seq *);
298
expr_ty _PyPegen_join_names_with_dot(Parser *, expr_ty, expr_ty);
299
int _PyPegen_seq_count_dots(asdl_seq *);
300
alias_ty _PyPegen_alias_for_star(Parser *, int, int, int, int, PyArena *);
301
asdl_identifier_seq *_PyPegen_map_names_to_ids(Parser *, asdl_expr_seq *);
302
CmpopExprPair *_PyPegen_cmpop_expr_pair(Parser *, cmpop_ty, expr_ty);
303
asdl_int_seq *_PyPegen_get_cmpops(Parser *p, asdl_seq *);
304
asdl_expr_seq *_PyPegen_get_exprs(Parser *, asdl_seq *);
305
expr_ty _PyPegen_set_expr_context(Parser *, expr_ty, expr_context_ty);
306
KeyValuePair *_PyPegen_key_value_pair(Parser *, expr_ty, expr_ty);
307
asdl_expr_seq *_PyPegen_get_keys(Parser *, asdl_seq *);
308
asdl_expr_seq *_PyPegen_get_values(Parser *, asdl_seq *);
309
KeyPatternPair *_PyPegen_key_pattern_pair(Parser *, expr_ty, pattern_ty);
310
asdl_expr_seq *_PyPegen_get_pattern_keys(Parser *, asdl_seq *);
311
asdl_pattern_seq *_PyPegen_get_patterns(Parser *, asdl_seq *);
312
NameDefaultPair *_PyPegen_name_default_pair(Parser *, arg_ty, expr_ty, Token *);
313
SlashWithDefault *_PyPegen_slash_with_default(Parser *, asdl_arg_seq *, asdl_seq *);
314
StarEtc *_PyPegen_star_etc(Parser *, arg_ty, asdl_seq *, arg_ty);
315
arguments_ty _PyPegen_make_arguments(Parser *, asdl_arg_seq *, SlashWithDefault *,
316
asdl_arg_seq *, asdl_seq *, StarEtc *);
317
arguments_ty _PyPegen_empty_arguments(Parser *);
318
expr_ty _PyPegen_formatted_value(Parser *, expr_ty, Token *, ResultTokenWithMetadata *, ResultTokenWithMetadata *, Token *,
319
int, int, int, int, PyArena *);
320
AugOperator *_PyPegen_augoperator(Parser*, operator_ty type);
321
stmt_ty _PyPegen_function_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
322
stmt_ty _PyPegen_class_def_decorators(Parser *, asdl_expr_seq *, stmt_ty);
323
KeywordOrStarred *_PyPegen_keyword_or_starred(Parser *, void *, int);
324
asdl_expr_seq *_PyPegen_seq_extract_starred_exprs(Parser *, asdl_seq *);
325
asdl_keyword_seq *_PyPegen_seq_delete_starred_exprs(Parser *, asdl_seq *);
326
expr_ty _PyPegen_collect_call_seqs(Parser *, asdl_expr_seq *, asdl_seq *,
327
int lineno, int col_offset, int end_lineno,
328
int end_col_offset, PyArena *arena);
329
expr_ty _PyPegen_constant_from_token(Parser* p, Token* tok);
330
expr_ty _PyPegen_decoded_constant_from_token(Parser* p, Token* tok);
331
expr_ty _PyPegen_constant_from_string(Parser* p, Token* tok);
332
expr_ty _PyPegen_concatenate_strings(Parser *p, asdl_expr_seq *, int, int, int, int, PyArena *);
333
expr_ty _PyPegen_FetchRawForm(Parser *p, int, int, int, int);
334
expr_ty _PyPegen_ensure_imaginary(Parser *p, expr_ty);
335
expr_ty _PyPegen_ensure_real(Parser *p, expr_ty);
336
asdl_seq *_PyPegen_join_sequences(Parser *, asdl_seq *, asdl_seq *);
337
int _PyPegen_check_barry_as_flufl(Parser *, Token *);
338
int _PyPegen_check_legacy_stmt(Parser *p, expr_ty t);
339
ResultTokenWithMetadata *_PyPegen_check_fstring_conversion(Parser *p, Token *, expr_ty t);
340
ResultTokenWithMetadata *_PyPegen_setup_full_format_spec(Parser *, Token *, asdl_expr_seq *, int, int,
341
int, int, PyArena *);
342
mod_ty _PyPegen_make_module(Parser *, asdl_stmt_seq *);
343
void *_PyPegen_arguments_parsing_error(Parser *, expr_ty);
344
expr_ty _PyPegen_get_last_comprehension_item(comprehension_ty comprehension);
345
void *_PyPegen_nonparen_genexp_in_call(Parser *p, expr_ty args, asdl_comprehension_seq *comprehensions);
346
347
// Parser API
348
349
Parser *_PyPegen_Parser_New(struct tok_state *, int, int, int, int *, PyArena *);
350
void _PyPegen_Parser_Free(Parser *);
351
mod_ty _PyPegen_run_parser_from_file_pointer(FILE *, int, PyObject *, const char *,
352
const char *, const char *, PyCompilerFlags *, int *, PyArena *);
353
void *_PyPegen_run_parser(Parser *);
354
mod_ty _PyPegen_run_parser_from_string(const char *, int, PyObject *, PyCompilerFlags *, PyArena *);
355
asdl_stmt_seq *_PyPegen_interactive_exit(Parser *);
356
357
// TODO: move to the correct place in this file
358
expr_ty _PyPegen_joined_str(Parser *p, Token* a, asdl_expr_seq* expr, Token*b);
359
360
// Generated function in parse.c - function definition in python.gram
361
void *_PyPegen_parse(Parser *);
362
363
#endif
364
365