Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Python/ast.c
12 views
1
/*
2
* This file exposes PyAST_Validate interface to check the integrity
3
* of the given abstract syntax tree (potentially constructed manually).
4
*/
5
#include "Python.h"
6
#include "pycore_ast.h" // asdl_stmt_seq
7
#include "pycore_pystate.h" // _PyThreadState_GET()
8
9
#include <assert.h>
10
#include <stdbool.h>
11
12
struct validator {
13
int recursion_depth; /* current recursion depth */
14
int recursion_limit; /* recursion limit */
15
};
16
17
static int validate_stmts(struct validator *, asdl_stmt_seq *);
18
static int validate_exprs(struct validator *, asdl_expr_seq *, expr_context_ty, int);
19
static int validate_patterns(struct validator *, asdl_pattern_seq *, int);
20
static int validate_type_params(struct validator *, asdl_type_param_seq *);
21
static int _validate_nonempty_seq(asdl_seq *, const char *, const char *);
22
static int validate_stmt(struct validator *, stmt_ty);
23
static int validate_expr(struct validator *, expr_ty, expr_context_ty);
24
static int validate_pattern(struct validator *, pattern_ty, int);
25
static int validate_typeparam(struct validator *, type_param_ty);
26
27
#define VALIDATE_POSITIONS(node) \
28
if (node->lineno > node->end_lineno) { \
29
PyErr_Format(PyExc_ValueError, \
30
"AST node line range (%d, %d) is not valid", \
31
node->lineno, node->end_lineno); \
32
return 0; \
33
} \
34
if ((node->lineno < 0 && node->end_lineno != node->lineno) || \
35
(node->col_offset < 0 && node->col_offset != node->end_col_offset)) { \
36
PyErr_Format(PyExc_ValueError, \
37
"AST node column range (%d, %d) for line range (%d, %d) is not valid", \
38
node->col_offset, node->end_col_offset, node->lineno, node->end_lineno); \
39
return 0; \
40
} \
41
if (node->lineno == node->end_lineno && node->col_offset > node->end_col_offset) { \
42
PyErr_Format(PyExc_ValueError, \
43
"line %d, column %d-%d is not a valid range", \
44
node->lineno, node->col_offset, node->end_col_offset); \
45
return 0; \
46
}
47
48
static int
49
validate_name(PyObject *name)
50
{
51
assert(!PyErr_Occurred());
52
assert(PyUnicode_Check(name));
53
static const char * const forbidden[] = {
54
"None",
55
"True",
56
"False",
57
NULL
58
};
59
for (int i = 0; forbidden[i] != NULL; i++) {
60
if (_PyUnicode_EqualToASCIIString(name, forbidden[i])) {
61
PyErr_Format(PyExc_ValueError, "identifier field can't represent '%s' constant", forbidden[i]);
62
return 0;
63
}
64
}
65
return 1;
66
}
67
68
static int
69
validate_comprehension(struct validator *state, asdl_comprehension_seq *gens)
70
{
71
assert(!PyErr_Occurred());
72
if (!asdl_seq_LEN(gens)) {
73
PyErr_SetString(PyExc_ValueError, "comprehension with no generators");
74
return 0;
75
}
76
for (Py_ssize_t i = 0; i < asdl_seq_LEN(gens); i++) {
77
comprehension_ty comp = asdl_seq_GET(gens, i);
78
if (!validate_expr(state, comp->target, Store) ||
79
!validate_expr(state, comp->iter, Load) ||
80
!validate_exprs(state, comp->ifs, Load, 0))
81
return 0;
82
}
83
return 1;
84
}
85
86
static int
87
validate_keywords(struct validator *state, asdl_keyword_seq *keywords)
88
{
89
assert(!PyErr_Occurred());
90
for (Py_ssize_t i = 0; i < asdl_seq_LEN(keywords); i++)
91
if (!validate_expr(state, (asdl_seq_GET(keywords, i))->value, Load))
92
return 0;
93
return 1;
94
}
95
96
static int
97
validate_args(struct validator *state, asdl_arg_seq *args)
98
{
99
assert(!PyErr_Occurred());
100
for (Py_ssize_t i = 0; i < asdl_seq_LEN(args); i++) {
101
arg_ty arg = asdl_seq_GET(args, i);
102
VALIDATE_POSITIONS(arg);
103
if (arg->annotation && !validate_expr(state, arg->annotation, Load))
104
return 0;
105
}
106
return 1;
107
}
108
109
static const char *
110
expr_context_name(expr_context_ty ctx)
111
{
112
switch (ctx) {
113
case Load:
114
return "Load";
115
case Store:
116
return "Store";
117
case Del:
118
return "Del";
119
// No default case so compiler emits warning for unhandled cases
120
}
121
Py_UNREACHABLE();
122
}
123
124
static int
125
validate_arguments(struct validator *state, arguments_ty args)
126
{
127
assert(!PyErr_Occurred());
128
if (!validate_args(state, args->posonlyargs) || !validate_args(state, args->args)) {
129
return 0;
130
}
131
if (args->vararg && args->vararg->annotation
132
&& !validate_expr(state, args->vararg->annotation, Load)) {
133
return 0;
134
}
135
if (!validate_args(state, args->kwonlyargs))
136
return 0;
137
if (args->kwarg && args->kwarg->annotation
138
&& !validate_expr(state, args->kwarg->annotation, Load)) {
139
return 0;
140
}
141
if (asdl_seq_LEN(args->defaults) > asdl_seq_LEN(args->posonlyargs) + asdl_seq_LEN(args->args)) {
142
PyErr_SetString(PyExc_ValueError, "more positional defaults than args on arguments");
143
return 0;
144
}
145
if (asdl_seq_LEN(args->kw_defaults) != asdl_seq_LEN(args->kwonlyargs)) {
146
PyErr_SetString(PyExc_ValueError, "length of kwonlyargs is not the same as "
147
"kw_defaults on arguments");
148
return 0;
149
}
150
return validate_exprs(state, args->defaults, Load, 0) && validate_exprs(state, args->kw_defaults, Load, 1);
151
}
152
153
static int
154
validate_constant(struct validator *state, PyObject *value)
155
{
156
assert(!PyErr_Occurred());
157
if (value == Py_None || value == Py_Ellipsis)
158
return 1;
159
160
if (PyLong_CheckExact(value)
161
|| PyFloat_CheckExact(value)
162
|| PyComplex_CheckExact(value)
163
|| PyBool_Check(value)
164
|| PyUnicode_CheckExact(value)
165
|| PyBytes_CheckExact(value))
166
return 1;
167
168
if (PyTuple_CheckExact(value) || PyFrozenSet_CheckExact(value)) {
169
if (++state->recursion_depth > state->recursion_limit) {
170
PyErr_SetString(PyExc_RecursionError,
171
"maximum recursion depth exceeded during compilation");
172
return 0;
173
}
174
175
PyObject *it = PyObject_GetIter(value);
176
if (it == NULL)
177
return 0;
178
179
while (1) {
180
PyObject *item = PyIter_Next(it);
181
if (item == NULL) {
182
if (PyErr_Occurred()) {
183
Py_DECREF(it);
184
return 0;
185
}
186
break;
187
}
188
189
if (!validate_constant(state, item)) {
190
Py_DECREF(it);
191
Py_DECREF(item);
192
return 0;
193
}
194
Py_DECREF(item);
195
}
196
197
Py_DECREF(it);
198
--state->recursion_depth;
199
return 1;
200
}
201
202
if (!PyErr_Occurred()) {
203
PyErr_Format(PyExc_TypeError,
204
"got an invalid type in Constant: %s",
205
_PyType_Name(Py_TYPE(value)));
206
}
207
return 0;
208
}
209
210
static int
211
validate_expr(struct validator *state, expr_ty exp, expr_context_ty ctx)
212
{
213
assert(!PyErr_Occurred());
214
VALIDATE_POSITIONS(exp);
215
int ret = -1;
216
if (++state->recursion_depth > state->recursion_limit) {
217
PyErr_SetString(PyExc_RecursionError,
218
"maximum recursion depth exceeded during compilation");
219
return 0;
220
}
221
int check_ctx = 1;
222
expr_context_ty actual_ctx;
223
224
/* First check expression context. */
225
switch (exp->kind) {
226
case Attribute_kind:
227
actual_ctx = exp->v.Attribute.ctx;
228
break;
229
case Subscript_kind:
230
actual_ctx = exp->v.Subscript.ctx;
231
break;
232
case Starred_kind:
233
actual_ctx = exp->v.Starred.ctx;
234
break;
235
case Name_kind:
236
if (!validate_name(exp->v.Name.id)) {
237
return 0;
238
}
239
actual_ctx = exp->v.Name.ctx;
240
break;
241
case List_kind:
242
actual_ctx = exp->v.List.ctx;
243
break;
244
case Tuple_kind:
245
actual_ctx = exp->v.Tuple.ctx;
246
break;
247
default:
248
if (ctx != Load) {
249
PyErr_Format(PyExc_ValueError, "expression which can't be "
250
"assigned to in %s context", expr_context_name(ctx));
251
return 0;
252
}
253
check_ctx = 0;
254
/* set actual_ctx to prevent gcc warning */
255
actual_ctx = 0;
256
}
257
if (check_ctx && actual_ctx != ctx) {
258
PyErr_Format(PyExc_ValueError, "expression must have %s context but has %s instead",
259
expr_context_name(ctx), expr_context_name(actual_ctx));
260
return 0;
261
}
262
263
/* Now validate expression. */
264
switch (exp->kind) {
265
case BoolOp_kind:
266
if (asdl_seq_LEN(exp->v.BoolOp.values) < 2) {
267
PyErr_SetString(PyExc_ValueError, "BoolOp with less than 2 values");
268
return 0;
269
}
270
ret = validate_exprs(state, exp->v.BoolOp.values, Load, 0);
271
break;
272
case BinOp_kind:
273
ret = validate_expr(state, exp->v.BinOp.left, Load) &&
274
validate_expr(state, exp->v.BinOp.right, Load);
275
break;
276
case UnaryOp_kind:
277
ret = validate_expr(state, exp->v.UnaryOp.operand, Load);
278
break;
279
case Lambda_kind:
280
ret = validate_arguments(state, exp->v.Lambda.args) &&
281
validate_expr(state, exp->v.Lambda.body, Load);
282
break;
283
case IfExp_kind:
284
ret = validate_expr(state, exp->v.IfExp.test, Load) &&
285
validate_expr(state, exp->v.IfExp.body, Load) &&
286
validate_expr(state, exp->v.IfExp.orelse, Load);
287
break;
288
case Dict_kind:
289
if (asdl_seq_LEN(exp->v.Dict.keys) != asdl_seq_LEN(exp->v.Dict.values)) {
290
PyErr_SetString(PyExc_ValueError,
291
"Dict doesn't have the same number of keys as values");
292
return 0;
293
}
294
/* null_ok=1 for keys expressions to allow dict unpacking to work in
295
dict literals, i.e. ``{**{a:b}}`` */
296
ret = validate_exprs(state, exp->v.Dict.keys, Load, /*null_ok=*/ 1) &&
297
validate_exprs(state, exp->v.Dict.values, Load, /*null_ok=*/ 0);
298
break;
299
case Set_kind:
300
ret = validate_exprs(state, exp->v.Set.elts, Load, 0);
301
break;
302
#define COMP(NAME) \
303
case NAME ## _kind: \
304
ret = validate_comprehension(state, exp->v.NAME.generators) && \
305
validate_expr(state, exp->v.NAME.elt, Load); \
306
break;
307
COMP(ListComp)
308
COMP(SetComp)
309
COMP(GeneratorExp)
310
#undef COMP
311
case DictComp_kind:
312
ret = validate_comprehension(state, exp->v.DictComp.generators) &&
313
validate_expr(state, exp->v.DictComp.key, Load) &&
314
validate_expr(state, exp->v.DictComp.value, Load);
315
break;
316
case Yield_kind:
317
ret = !exp->v.Yield.value || validate_expr(state, exp->v.Yield.value, Load);
318
break;
319
case YieldFrom_kind:
320
ret = validate_expr(state, exp->v.YieldFrom.value, Load);
321
break;
322
case Await_kind:
323
ret = validate_expr(state, exp->v.Await.value, Load);
324
break;
325
case Compare_kind:
326
if (!asdl_seq_LEN(exp->v.Compare.comparators)) {
327
PyErr_SetString(PyExc_ValueError, "Compare with no comparators");
328
return 0;
329
}
330
if (asdl_seq_LEN(exp->v.Compare.comparators) !=
331
asdl_seq_LEN(exp->v.Compare.ops)) {
332
PyErr_SetString(PyExc_ValueError, "Compare has a different number "
333
"of comparators and operands");
334
return 0;
335
}
336
ret = validate_exprs(state, exp->v.Compare.comparators, Load, 0) &&
337
validate_expr(state, exp->v.Compare.left, Load);
338
break;
339
case Call_kind:
340
ret = validate_expr(state, exp->v.Call.func, Load) &&
341
validate_exprs(state, exp->v.Call.args, Load, 0) &&
342
validate_keywords(state, exp->v.Call.keywords);
343
break;
344
case Constant_kind:
345
if (!validate_constant(state, exp->v.Constant.value)) {
346
return 0;
347
}
348
ret = 1;
349
break;
350
case JoinedStr_kind:
351
ret = validate_exprs(state, exp->v.JoinedStr.values, Load, 0);
352
break;
353
case FormattedValue_kind:
354
if (validate_expr(state, exp->v.FormattedValue.value, Load) == 0)
355
return 0;
356
if (exp->v.FormattedValue.format_spec) {
357
ret = validate_expr(state, exp->v.FormattedValue.format_spec, Load);
358
break;
359
}
360
ret = 1;
361
break;
362
case Attribute_kind:
363
ret = validate_expr(state, exp->v.Attribute.value, Load);
364
break;
365
case Subscript_kind:
366
ret = validate_expr(state, exp->v.Subscript.slice, Load) &&
367
validate_expr(state, exp->v.Subscript.value, Load);
368
break;
369
case Starred_kind:
370
ret = validate_expr(state, exp->v.Starred.value, ctx);
371
break;
372
case Slice_kind:
373
ret = (!exp->v.Slice.lower || validate_expr(state, exp->v.Slice.lower, Load)) &&
374
(!exp->v.Slice.upper || validate_expr(state, exp->v.Slice.upper, Load)) &&
375
(!exp->v.Slice.step || validate_expr(state, exp->v.Slice.step, Load));
376
break;
377
case List_kind:
378
ret = validate_exprs(state, exp->v.List.elts, ctx, 0);
379
break;
380
case Tuple_kind:
381
ret = validate_exprs(state, exp->v.Tuple.elts, ctx, 0);
382
break;
383
case NamedExpr_kind:
384
ret = validate_expr(state, exp->v.NamedExpr.value, Load);
385
break;
386
/* This last case doesn't have any checking. */
387
case Name_kind:
388
ret = 1;
389
break;
390
// No default case so compiler emits warning for unhandled cases
391
}
392
if (ret < 0) {
393
PyErr_SetString(PyExc_SystemError, "unexpected expression");
394
ret = 0;
395
}
396
state->recursion_depth--;
397
return ret;
398
}
399
400
401
// Note: the ensure_literal_* functions are only used to validate a restricted
402
// set of non-recursive literals that have already been checked with
403
// validate_expr, so they don't accept the validator state
404
static int
405
ensure_literal_number(expr_ty exp, bool allow_real, bool allow_imaginary)
406
{
407
assert(exp->kind == Constant_kind);
408
PyObject *value = exp->v.Constant.value;
409
return (allow_real && PyFloat_CheckExact(value)) ||
410
(allow_real && PyLong_CheckExact(value)) ||
411
(allow_imaginary && PyComplex_CheckExact(value));
412
}
413
414
static int
415
ensure_literal_negative(expr_ty exp, bool allow_real, bool allow_imaginary)
416
{
417
assert(exp->kind == UnaryOp_kind);
418
// Must be negation ...
419
if (exp->v.UnaryOp.op != USub) {
420
return 0;
421
}
422
// ... of a constant ...
423
expr_ty operand = exp->v.UnaryOp.operand;
424
if (operand->kind != Constant_kind) {
425
return 0;
426
}
427
// ... number
428
return ensure_literal_number(operand, allow_real, allow_imaginary);
429
}
430
431
static int
432
ensure_literal_complex(expr_ty exp)
433
{
434
assert(exp->kind == BinOp_kind);
435
expr_ty left = exp->v.BinOp.left;
436
expr_ty right = exp->v.BinOp.right;
437
// Ensure op is addition or subtraction
438
if (exp->v.BinOp.op != Add && exp->v.BinOp.op != Sub) {
439
return 0;
440
}
441
// Check LHS is a real number (potentially signed)
442
switch (left->kind)
443
{
444
case Constant_kind:
445
if (!ensure_literal_number(left, /*real=*/true, /*imaginary=*/false)) {
446
return 0;
447
}
448
break;
449
case UnaryOp_kind:
450
if (!ensure_literal_negative(left, /*real=*/true, /*imaginary=*/false)) {
451
return 0;
452
}
453
break;
454
default:
455
return 0;
456
}
457
// Check RHS is an imaginary number (no separate sign allowed)
458
switch (right->kind)
459
{
460
case Constant_kind:
461
if (!ensure_literal_number(right, /*real=*/false, /*imaginary=*/true)) {
462
return 0;
463
}
464
break;
465
default:
466
return 0;
467
}
468
return 1;
469
}
470
471
static int
472
validate_pattern_match_value(struct validator *state, expr_ty exp)
473
{
474
assert(!PyErr_Occurred());
475
if (!validate_expr(state, exp, Load)) {
476
return 0;
477
}
478
479
switch (exp->kind)
480
{
481
case Constant_kind:
482
/* Ellipsis and immutable sequences are not allowed.
483
For True, False and None, MatchSingleton() should
484
be used */
485
if (!validate_expr(state, exp, Load)) {
486
return 0;
487
}
488
PyObject *literal = exp->v.Constant.value;
489
if (PyLong_CheckExact(literal) || PyFloat_CheckExact(literal) ||
490
PyBytes_CheckExact(literal) || PyComplex_CheckExact(literal) ||
491
PyUnicode_CheckExact(literal)) {
492
return 1;
493
}
494
PyErr_SetString(PyExc_ValueError,
495
"unexpected constant inside of a literal pattern");
496
return 0;
497
case Attribute_kind:
498
// Constants and attribute lookups are always permitted
499
return 1;
500
case UnaryOp_kind:
501
// Negated numbers are permitted (whether real or imaginary)
502
// Compiler will complain if AST folding doesn't create a constant
503
if (ensure_literal_negative(exp, /*real=*/true, /*imaginary=*/true)) {
504
return 1;
505
}
506
break;
507
case BinOp_kind:
508
// Complex literals are permitted
509
// Compiler will complain if AST folding doesn't create a constant
510
if (ensure_literal_complex(exp)) {
511
return 1;
512
}
513
break;
514
case JoinedStr_kind:
515
// Handled in the later stages
516
return 1;
517
default:
518
break;
519
}
520
PyErr_SetString(PyExc_ValueError,
521
"patterns may only match literals and attribute lookups");
522
return 0;
523
}
524
525
static int
526
validate_capture(PyObject *name)
527
{
528
assert(!PyErr_Occurred());
529
if (_PyUnicode_EqualToASCIIString(name, "_")) {
530
PyErr_Format(PyExc_ValueError, "can't capture name '_' in patterns");
531
return 0;
532
}
533
return validate_name(name);
534
}
535
536
static int
537
validate_pattern(struct validator *state, pattern_ty p, int star_ok)
538
{
539
assert(!PyErr_Occurred());
540
VALIDATE_POSITIONS(p);
541
int ret = -1;
542
if (++state->recursion_depth > state->recursion_limit) {
543
PyErr_SetString(PyExc_RecursionError,
544
"maximum recursion depth exceeded during compilation");
545
return 0;
546
}
547
switch (p->kind) {
548
case MatchValue_kind:
549
ret = validate_pattern_match_value(state, p->v.MatchValue.value);
550
break;
551
case MatchSingleton_kind:
552
ret = p->v.MatchSingleton.value == Py_None || PyBool_Check(p->v.MatchSingleton.value);
553
if (!ret) {
554
PyErr_SetString(PyExc_ValueError,
555
"MatchSingleton can only contain True, False and None");
556
}
557
break;
558
case MatchSequence_kind:
559
ret = validate_patterns(state, p->v.MatchSequence.patterns, /*star_ok=*/1);
560
break;
561
case MatchMapping_kind:
562
if (asdl_seq_LEN(p->v.MatchMapping.keys) != asdl_seq_LEN(p->v.MatchMapping.patterns)) {
563
PyErr_SetString(PyExc_ValueError,
564
"MatchMapping doesn't have the same number of keys as patterns");
565
ret = 0;
566
break;
567
}
568
569
if (p->v.MatchMapping.rest && !validate_capture(p->v.MatchMapping.rest)) {
570
ret = 0;
571
break;
572
}
573
574
asdl_expr_seq *keys = p->v.MatchMapping.keys;
575
for (Py_ssize_t i = 0; i < asdl_seq_LEN(keys); i++) {
576
expr_ty key = asdl_seq_GET(keys, i);
577
if (key->kind == Constant_kind) {
578
PyObject *literal = key->v.Constant.value;
579
if (literal == Py_None || PyBool_Check(literal)) {
580
/* validate_pattern_match_value will ensure the key
581
doesn't contain True, False and None but it is
582
syntactically valid, so we will pass those on in
583
a special case. */
584
continue;
585
}
586
}
587
if (!validate_pattern_match_value(state, key)) {
588
ret = 0;
589
break;
590
}
591
}
592
if (ret == 0) {
593
break;
594
}
595
ret = validate_patterns(state, p->v.MatchMapping.patterns, /*star_ok=*/0);
596
break;
597
case MatchClass_kind:
598
if (asdl_seq_LEN(p->v.MatchClass.kwd_attrs) != asdl_seq_LEN(p->v.MatchClass.kwd_patterns)) {
599
PyErr_SetString(PyExc_ValueError,
600
"MatchClass doesn't have the same number of keyword attributes as patterns");
601
ret = 0;
602
break;
603
}
604
if (!validate_expr(state, p->v.MatchClass.cls, Load)) {
605
ret = 0;
606
break;
607
}
608
609
expr_ty cls = p->v.MatchClass.cls;
610
while (1) {
611
if (cls->kind == Name_kind) {
612
break;
613
}
614
else if (cls->kind == Attribute_kind) {
615
cls = cls->v.Attribute.value;
616
continue;
617
}
618
else {
619
PyErr_SetString(PyExc_ValueError,
620
"MatchClass cls field can only contain Name or Attribute nodes.");
621
ret = 0;
622
break;
623
}
624
}
625
if (ret == 0) {
626
break;
627
}
628
629
for (Py_ssize_t i = 0; i < asdl_seq_LEN(p->v.MatchClass.kwd_attrs); i++) {
630
PyObject *identifier = asdl_seq_GET(p->v.MatchClass.kwd_attrs, i);
631
if (!validate_name(identifier)) {
632
ret = 0;
633
break;
634
}
635
}
636
if (ret == 0) {
637
break;
638
}
639
640
if (!validate_patterns(state, p->v.MatchClass.patterns, /*star_ok=*/0)) {
641
ret = 0;
642
break;
643
}
644
645
ret = validate_patterns(state, p->v.MatchClass.kwd_patterns, /*star_ok=*/0);
646
break;
647
case MatchStar_kind:
648
if (!star_ok) {
649
PyErr_SetString(PyExc_ValueError, "can't use MatchStar here");
650
ret = 0;
651
break;
652
}
653
ret = p->v.MatchStar.name == NULL || validate_capture(p->v.MatchStar.name);
654
break;
655
case MatchAs_kind:
656
if (p->v.MatchAs.name && !validate_capture(p->v.MatchAs.name)) {
657
ret = 0;
658
break;
659
}
660
if (p->v.MatchAs.pattern == NULL) {
661
ret = 1;
662
}
663
else if (p->v.MatchAs.name == NULL) {
664
PyErr_SetString(PyExc_ValueError,
665
"MatchAs must specify a target name if a pattern is given");
666
ret = 0;
667
}
668
else {
669
ret = validate_pattern(state, p->v.MatchAs.pattern, /*star_ok=*/0);
670
}
671
break;
672
case MatchOr_kind:
673
if (asdl_seq_LEN(p->v.MatchOr.patterns) < 2) {
674
PyErr_SetString(PyExc_ValueError,
675
"MatchOr requires at least 2 patterns");
676
ret = 0;
677
break;
678
}
679
ret = validate_patterns(state, p->v.MatchOr.patterns, /*star_ok=*/0);
680
break;
681
// No default case, so the compiler will emit a warning if new pattern
682
// kinds are added without being handled here
683
}
684
if (ret < 0) {
685
PyErr_SetString(PyExc_SystemError, "unexpected pattern");
686
ret = 0;
687
}
688
state->recursion_depth--;
689
return ret;
690
}
691
692
static int
693
_validate_nonempty_seq(asdl_seq *seq, const char *what, const char *owner)
694
{
695
if (asdl_seq_LEN(seq))
696
return 1;
697
PyErr_Format(PyExc_ValueError, "empty %s on %s", what, owner);
698
return 0;
699
}
700
#define validate_nonempty_seq(seq, what, owner) _validate_nonempty_seq((asdl_seq*)seq, what, owner)
701
702
static int
703
validate_assignlist(struct validator *state, asdl_expr_seq *targets, expr_context_ty ctx)
704
{
705
assert(!PyErr_Occurred());
706
return validate_nonempty_seq(targets, "targets", ctx == Del ? "Delete" : "Assign") &&
707
validate_exprs(state, targets, ctx, 0);
708
}
709
710
static int
711
validate_body(struct validator *state, asdl_stmt_seq *body, const char *owner)
712
{
713
assert(!PyErr_Occurred());
714
return validate_nonempty_seq(body, "body", owner) && validate_stmts(state, body);
715
}
716
717
static int
718
validate_stmt(struct validator *state, stmt_ty stmt)
719
{
720
assert(!PyErr_Occurred());
721
VALIDATE_POSITIONS(stmt);
722
int ret = -1;
723
if (++state->recursion_depth > state->recursion_limit) {
724
PyErr_SetString(PyExc_RecursionError,
725
"maximum recursion depth exceeded during compilation");
726
return 0;
727
}
728
switch (stmt->kind) {
729
case FunctionDef_kind:
730
ret = validate_body(state, stmt->v.FunctionDef.body, "FunctionDef") &&
731
validate_type_params(state, stmt->v.FunctionDef.type_params) &&
732
validate_arguments(state, stmt->v.FunctionDef.args) &&
733
validate_exprs(state, stmt->v.FunctionDef.decorator_list, Load, 0) &&
734
(!stmt->v.FunctionDef.returns ||
735
validate_expr(state, stmt->v.FunctionDef.returns, Load));
736
break;
737
case ClassDef_kind:
738
ret = validate_body(state, stmt->v.ClassDef.body, "ClassDef") &&
739
validate_type_params(state, stmt->v.ClassDef.type_params) &&
740
validate_exprs(state, stmt->v.ClassDef.bases, Load, 0) &&
741
validate_keywords(state, stmt->v.ClassDef.keywords) &&
742
validate_exprs(state, stmt->v.ClassDef.decorator_list, Load, 0);
743
break;
744
case Return_kind:
745
ret = !stmt->v.Return.value || validate_expr(state, stmt->v.Return.value, Load);
746
break;
747
case Delete_kind:
748
ret = validate_assignlist(state, stmt->v.Delete.targets, Del);
749
break;
750
case Assign_kind:
751
ret = validate_assignlist(state, stmt->v.Assign.targets, Store) &&
752
validate_expr(state, stmt->v.Assign.value, Load);
753
break;
754
case AugAssign_kind:
755
ret = validate_expr(state, stmt->v.AugAssign.target, Store) &&
756
validate_expr(state, stmt->v.AugAssign.value, Load);
757
break;
758
case AnnAssign_kind:
759
if (stmt->v.AnnAssign.target->kind != Name_kind &&
760
stmt->v.AnnAssign.simple) {
761
PyErr_SetString(PyExc_TypeError,
762
"AnnAssign with simple non-Name target");
763
return 0;
764
}
765
ret = validate_expr(state, stmt->v.AnnAssign.target, Store) &&
766
(!stmt->v.AnnAssign.value ||
767
validate_expr(state, stmt->v.AnnAssign.value, Load)) &&
768
validate_expr(state, stmt->v.AnnAssign.annotation, Load);
769
break;
770
case TypeAlias_kind:
771
ret = validate_expr(state, stmt->v.TypeAlias.name, Store) &&
772
validate_type_params(state, stmt->v.TypeAlias.type_params) &&
773
validate_expr(state, stmt->v.TypeAlias.value, Load);
774
break;
775
case For_kind:
776
ret = validate_expr(state, stmt->v.For.target, Store) &&
777
validate_expr(state, stmt->v.For.iter, Load) &&
778
validate_body(state, stmt->v.For.body, "For") &&
779
validate_stmts(state, stmt->v.For.orelse);
780
break;
781
case AsyncFor_kind:
782
ret = validate_expr(state, stmt->v.AsyncFor.target, Store) &&
783
validate_expr(state, stmt->v.AsyncFor.iter, Load) &&
784
validate_body(state, stmt->v.AsyncFor.body, "AsyncFor") &&
785
validate_stmts(state, stmt->v.AsyncFor.orelse);
786
break;
787
case While_kind:
788
ret = validate_expr(state, stmt->v.While.test, Load) &&
789
validate_body(state, stmt->v.While.body, "While") &&
790
validate_stmts(state, stmt->v.While.orelse);
791
break;
792
case If_kind:
793
ret = validate_expr(state, stmt->v.If.test, Load) &&
794
validate_body(state, stmt->v.If.body, "If") &&
795
validate_stmts(state, stmt->v.If.orelse);
796
break;
797
case With_kind:
798
if (!validate_nonempty_seq(stmt->v.With.items, "items", "With"))
799
return 0;
800
for (Py_ssize_t i = 0; i < asdl_seq_LEN(stmt->v.With.items); i++) {
801
withitem_ty item = asdl_seq_GET(stmt->v.With.items, i);
802
if (!validate_expr(state, item->context_expr, Load) ||
803
(item->optional_vars && !validate_expr(state, item->optional_vars, Store)))
804
return 0;
805
}
806
ret = validate_body(state, stmt->v.With.body, "With");
807
break;
808
case AsyncWith_kind:
809
if (!validate_nonempty_seq(stmt->v.AsyncWith.items, "items", "AsyncWith"))
810
return 0;
811
for (Py_ssize_t i = 0; i < asdl_seq_LEN(stmt->v.AsyncWith.items); i++) {
812
withitem_ty item = asdl_seq_GET(stmt->v.AsyncWith.items, i);
813
if (!validate_expr(state, item->context_expr, Load) ||
814
(item->optional_vars && !validate_expr(state, item->optional_vars, Store)))
815
return 0;
816
}
817
ret = validate_body(state, stmt->v.AsyncWith.body, "AsyncWith");
818
break;
819
case Match_kind:
820
if (!validate_expr(state, stmt->v.Match.subject, Load)
821
|| !validate_nonempty_seq(stmt->v.Match.cases, "cases", "Match")) {
822
return 0;
823
}
824
for (Py_ssize_t i = 0; i < asdl_seq_LEN(stmt->v.Match.cases); i++) {
825
match_case_ty m = asdl_seq_GET(stmt->v.Match.cases, i);
826
if (!validate_pattern(state, m->pattern, /*star_ok=*/0)
827
|| (m->guard && !validate_expr(state, m->guard, Load))
828
|| !validate_body(state, m->body, "match_case")) {
829
return 0;
830
}
831
}
832
ret = 1;
833
break;
834
case Raise_kind:
835
if (stmt->v.Raise.exc) {
836
ret = validate_expr(state, stmt->v.Raise.exc, Load) &&
837
(!stmt->v.Raise.cause || validate_expr(state, stmt->v.Raise.cause, Load));
838
break;
839
}
840
if (stmt->v.Raise.cause) {
841
PyErr_SetString(PyExc_ValueError, "Raise with cause but no exception");
842
return 0;
843
}
844
ret = 1;
845
break;
846
case Try_kind:
847
if (!validate_body(state, stmt->v.Try.body, "Try"))
848
return 0;
849
if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
850
!asdl_seq_LEN(stmt->v.Try.finalbody)) {
851
PyErr_SetString(PyExc_ValueError, "Try has neither except handlers nor finalbody");
852
return 0;
853
}
854
if (!asdl_seq_LEN(stmt->v.Try.handlers) &&
855
asdl_seq_LEN(stmt->v.Try.orelse)) {
856
PyErr_SetString(PyExc_ValueError, "Try has orelse but no except handlers");
857
return 0;
858
}
859
for (Py_ssize_t i = 0; i < asdl_seq_LEN(stmt->v.Try.handlers); i++) {
860
excepthandler_ty handler = asdl_seq_GET(stmt->v.Try.handlers, i);
861
VALIDATE_POSITIONS(handler);
862
if ((handler->v.ExceptHandler.type &&
863
!validate_expr(state, handler->v.ExceptHandler.type, Load)) ||
864
!validate_body(state, handler->v.ExceptHandler.body, "ExceptHandler"))
865
return 0;
866
}
867
ret = (!asdl_seq_LEN(stmt->v.Try.finalbody) ||
868
validate_stmts(state, stmt->v.Try.finalbody)) &&
869
(!asdl_seq_LEN(stmt->v.Try.orelse) ||
870
validate_stmts(state, stmt->v.Try.orelse));
871
break;
872
case TryStar_kind:
873
if (!validate_body(state, stmt->v.TryStar.body, "TryStar"))
874
return 0;
875
if (!asdl_seq_LEN(stmt->v.TryStar.handlers) &&
876
!asdl_seq_LEN(stmt->v.TryStar.finalbody)) {
877
PyErr_SetString(PyExc_ValueError, "TryStar has neither except handlers nor finalbody");
878
return 0;
879
}
880
if (!asdl_seq_LEN(stmt->v.TryStar.handlers) &&
881
asdl_seq_LEN(stmt->v.TryStar.orelse)) {
882
PyErr_SetString(PyExc_ValueError, "TryStar has orelse but no except handlers");
883
return 0;
884
}
885
for (Py_ssize_t i = 0; i < asdl_seq_LEN(stmt->v.TryStar.handlers); i++) {
886
excepthandler_ty handler = asdl_seq_GET(stmt->v.TryStar.handlers, i);
887
if ((handler->v.ExceptHandler.type &&
888
!validate_expr(state, handler->v.ExceptHandler.type, Load)) ||
889
!validate_body(state, handler->v.ExceptHandler.body, "ExceptHandler"))
890
return 0;
891
}
892
ret = (!asdl_seq_LEN(stmt->v.TryStar.finalbody) ||
893
validate_stmts(state, stmt->v.TryStar.finalbody)) &&
894
(!asdl_seq_LEN(stmt->v.TryStar.orelse) ||
895
validate_stmts(state, stmt->v.TryStar.orelse));
896
break;
897
case Assert_kind:
898
ret = validate_expr(state, stmt->v.Assert.test, Load) &&
899
(!stmt->v.Assert.msg || validate_expr(state, stmt->v.Assert.msg, Load));
900
break;
901
case Import_kind:
902
ret = validate_nonempty_seq(stmt->v.Import.names, "names", "Import");
903
break;
904
case ImportFrom_kind:
905
if (stmt->v.ImportFrom.level < 0) {
906
PyErr_SetString(PyExc_ValueError, "Negative ImportFrom level");
907
return 0;
908
}
909
ret = validate_nonempty_seq(stmt->v.ImportFrom.names, "names", "ImportFrom");
910
break;
911
case Global_kind:
912
ret = validate_nonempty_seq(stmt->v.Global.names, "names", "Global");
913
break;
914
case Nonlocal_kind:
915
ret = validate_nonempty_seq(stmt->v.Nonlocal.names, "names", "Nonlocal");
916
break;
917
case Expr_kind:
918
ret = validate_expr(state, stmt->v.Expr.value, Load);
919
break;
920
case AsyncFunctionDef_kind:
921
ret = validate_body(state, stmt->v.AsyncFunctionDef.body, "AsyncFunctionDef") &&
922
validate_type_params(state, stmt->v.AsyncFunctionDef.type_params) &&
923
validate_arguments(state, stmt->v.AsyncFunctionDef.args) &&
924
validate_exprs(state, stmt->v.AsyncFunctionDef.decorator_list, Load, 0) &&
925
(!stmt->v.AsyncFunctionDef.returns ||
926
validate_expr(state, stmt->v.AsyncFunctionDef.returns, Load));
927
break;
928
case Pass_kind:
929
case Break_kind:
930
case Continue_kind:
931
ret = 1;
932
break;
933
// No default case so compiler emits warning for unhandled cases
934
}
935
if (ret < 0) {
936
PyErr_SetString(PyExc_SystemError, "unexpected statement");
937
ret = 0;
938
}
939
state->recursion_depth--;
940
return ret;
941
}
942
943
static int
944
validate_stmts(struct validator *state, asdl_stmt_seq *seq)
945
{
946
assert(!PyErr_Occurred());
947
for (Py_ssize_t i = 0; i < asdl_seq_LEN(seq); i++) {
948
stmt_ty stmt = asdl_seq_GET(seq, i);
949
if (stmt) {
950
if (!validate_stmt(state, stmt))
951
return 0;
952
}
953
else {
954
PyErr_SetString(PyExc_ValueError,
955
"None disallowed in statement list");
956
return 0;
957
}
958
}
959
return 1;
960
}
961
962
static int
963
validate_exprs(struct validator *state, asdl_expr_seq *exprs, expr_context_ty ctx, int null_ok)
964
{
965
assert(!PyErr_Occurred());
966
for (Py_ssize_t i = 0; i < asdl_seq_LEN(exprs); i++) {
967
expr_ty expr = asdl_seq_GET(exprs, i);
968
if (expr) {
969
if (!validate_expr(state, expr, ctx))
970
return 0;
971
}
972
else if (!null_ok) {
973
PyErr_SetString(PyExc_ValueError,
974
"None disallowed in expression list");
975
return 0;
976
}
977
978
}
979
return 1;
980
}
981
982
static int
983
validate_patterns(struct validator *state, asdl_pattern_seq *patterns, int star_ok)
984
{
985
assert(!PyErr_Occurred());
986
for (Py_ssize_t i = 0; i < asdl_seq_LEN(patterns); i++) {
987
pattern_ty pattern = asdl_seq_GET(patterns, i);
988
if (!validate_pattern(state, pattern, star_ok)) {
989
return 0;
990
}
991
}
992
return 1;
993
}
994
995
static int
996
validate_typeparam(struct validator *state, type_param_ty tp)
997
{
998
VALIDATE_POSITIONS(tp);
999
int ret = -1;
1000
switch (tp->kind) {
1001
case TypeVar_kind:
1002
ret = validate_name(tp->v.TypeVar.name) &&
1003
(!tp->v.TypeVar.bound ||
1004
validate_expr(state, tp->v.TypeVar.bound, Load));
1005
break;
1006
case ParamSpec_kind:
1007
ret = validate_name(tp->v.ParamSpec.name);
1008
break;
1009
case TypeVarTuple_kind:
1010
ret = validate_name(tp->v.TypeVarTuple.name);
1011
break;
1012
}
1013
return ret;
1014
}
1015
1016
static int
1017
validate_type_params(struct validator *state, asdl_type_param_seq *tps)
1018
{
1019
Py_ssize_t i;
1020
for (i = 0; i < asdl_seq_LEN(tps); i++) {
1021
type_param_ty tp = asdl_seq_GET(tps, i);
1022
if (tp) {
1023
if (!validate_typeparam(state, tp))
1024
return 0;
1025
}
1026
}
1027
return 1;
1028
}
1029
1030
1031
/* See comments in symtable.c. */
1032
#define COMPILER_STACK_FRAME_SCALE 3
1033
1034
int
1035
_PyAST_Validate(mod_ty mod)
1036
{
1037
assert(!PyErr_Occurred());
1038
int res = -1;
1039
struct validator state;
1040
PyThreadState *tstate;
1041
int starting_recursion_depth;
1042
1043
/* Setup recursion depth check counters */
1044
tstate = _PyThreadState_GET();
1045
if (!tstate) {
1046
return 0;
1047
}
1048
/* Be careful here to prevent overflow. */
1049
int recursion_depth = C_RECURSION_LIMIT - tstate->c_recursion_remaining;
1050
starting_recursion_depth = recursion_depth * COMPILER_STACK_FRAME_SCALE;
1051
state.recursion_depth = starting_recursion_depth;
1052
state.recursion_limit = C_RECURSION_LIMIT * COMPILER_STACK_FRAME_SCALE;
1053
1054
switch (mod->kind) {
1055
case Module_kind:
1056
res = validate_stmts(&state, mod->v.Module.body);
1057
break;
1058
case Interactive_kind:
1059
res = validate_stmts(&state, mod->v.Interactive.body);
1060
break;
1061
case Expression_kind:
1062
res = validate_expr(&state, mod->v.Expression.body, Load);
1063
break;
1064
case FunctionType_kind:
1065
res = validate_exprs(&state, mod->v.FunctionType.argtypes, Load, /*null_ok=*/0) &&
1066
validate_expr(&state, mod->v.FunctionType.returns, Load);
1067
break;
1068
// No default case so compiler emits warning for unhandled cases
1069
}
1070
1071
if (res < 0) {
1072
PyErr_SetString(PyExc_SystemError, "impossible module node");
1073
return 0;
1074
}
1075
1076
/* Check that the recursion depth counting balanced correctly */
1077
if (res && state.recursion_depth != starting_recursion_depth) {
1078
PyErr_Format(PyExc_SystemError,
1079
"AST validator recursion depth mismatch (before=%d, after=%d)",
1080
starting_recursion_depth, state.recursion_depth);
1081
return 0;
1082
}
1083
return res;
1084
}
1085
1086
PyObject *
1087
_PyAST_GetDocString(asdl_stmt_seq *body)
1088
{
1089
if (!asdl_seq_LEN(body)) {
1090
return NULL;
1091
}
1092
stmt_ty st = asdl_seq_GET(body, 0);
1093
if (st->kind != Expr_kind) {
1094
return NULL;
1095
}
1096
expr_ty e = st->v.Expr.value;
1097
if (e->kind == Constant_kind && PyUnicode_CheckExact(e->v.Constant.value)) {
1098
return e->v.Constant.value;
1099
}
1100
return NULL;
1101
}
1102
1103