Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Tools/peg_generator/pegen/c_generator.py
12 views
1
import ast
2
import os.path
3
import re
4
from dataclasses import dataclass, field
5
from enum import Enum
6
from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple
7
8
from pegen import grammar
9
from pegen.grammar import (
10
Alt,
11
Cut,
12
Forced,
13
Gather,
14
GrammarVisitor,
15
Group,
16
Leaf,
17
Lookahead,
18
NamedItem,
19
NameLeaf,
20
NegativeLookahead,
21
Opt,
22
PositiveLookahead,
23
Repeat0,
24
Repeat1,
25
Rhs,
26
Rule,
27
StringLeaf,
28
)
29
from pegen.parser_generator import ParserGenerator
30
31
EXTENSION_PREFIX = """\
32
#include "pegen.h"
33
34
#if defined(Py_DEBUG) && defined(Py_BUILD_CORE)
35
# define D(x) if (p->debug) { x; }
36
#else
37
# define D(x)
38
#endif
39
40
#ifdef __wasi__
41
# define MAXSTACK 4000
42
#else
43
# define MAXSTACK 6000
44
#endif
45
46
"""
47
48
49
EXTENSION_SUFFIX = """
50
void *
51
_PyPegen_parse(Parser *p)
52
{
53
// Initialize keywords
54
p->keywords = reserved_keywords;
55
p->n_keyword_lists = n_keyword_lists;
56
p->soft_keywords = soft_keywords;
57
58
return start_rule(p);
59
}
60
"""
61
62
63
class NodeTypes(Enum):
64
NAME_TOKEN = 0
65
NUMBER_TOKEN = 1
66
STRING_TOKEN = 2
67
GENERIC_TOKEN = 3
68
KEYWORD = 4
69
SOFT_KEYWORD = 5
70
CUT_OPERATOR = 6
71
F_STRING_CHUNK = 7
72
73
74
BASE_NODETYPES = {
75
"NAME": NodeTypes.NAME_TOKEN,
76
"NUMBER": NodeTypes.NUMBER_TOKEN,
77
"STRING": NodeTypes.STRING_TOKEN,
78
"SOFT_KEYWORD": NodeTypes.SOFT_KEYWORD,
79
}
80
81
82
@dataclass
83
class FunctionCall:
84
function: str
85
arguments: List[Any] = field(default_factory=list)
86
assigned_variable: Optional[str] = None
87
assigned_variable_type: Optional[str] = None
88
return_type: Optional[str] = None
89
nodetype: Optional[NodeTypes] = None
90
force_true: bool = False
91
comment: Optional[str] = None
92
93
def __str__(self) -> str:
94
parts = []
95
parts.append(self.function)
96
if self.arguments:
97
parts.append(f"({', '.join(map(str, self.arguments))})")
98
if self.force_true:
99
parts.append(", !p->error_indicator")
100
if self.assigned_variable:
101
if self.assigned_variable_type:
102
parts = [
103
"(",
104
self.assigned_variable,
105
" = ",
106
"(",
107
self.assigned_variable_type,
108
")",
109
*parts,
110
")",
111
]
112
else:
113
parts = ["(", self.assigned_variable, " = ", *parts, ")"]
114
if self.comment:
115
parts.append(f" // {self.comment}")
116
return "".join(parts)
117
118
119
class CCallMakerVisitor(GrammarVisitor):
120
def __init__(
121
self,
122
parser_generator: ParserGenerator,
123
exact_tokens: Dict[str, int],
124
non_exact_tokens: Set[str],
125
):
126
self.gen = parser_generator
127
self.exact_tokens = exact_tokens
128
self.non_exact_tokens = non_exact_tokens
129
self.cache: Dict[Any, FunctionCall] = {}
130
self.cleanup_statements: List[str] = []
131
132
def keyword_helper(self, keyword: str) -> FunctionCall:
133
return FunctionCall(
134
assigned_variable="_keyword",
135
function="_PyPegen_expect_token",
136
arguments=["p", self.gen.keywords[keyword]],
137
return_type="Token *",
138
nodetype=NodeTypes.KEYWORD,
139
comment=f"token='{keyword}'",
140
)
141
142
def soft_keyword_helper(self, value: str) -> FunctionCall:
143
return FunctionCall(
144
assigned_variable="_keyword",
145
function="_PyPegen_expect_soft_keyword",
146
arguments=["p", value],
147
return_type="expr_ty",
148
nodetype=NodeTypes.SOFT_KEYWORD,
149
comment=f"soft_keyword='{value}'",
150
)
151
152
def visit_NameLeaf(self, node: NameLeaf) -> FunctionCall:
153
name = node.value
154
if name in self.non_exact_tokens:
155
if name in BASE_NODETYPES:
156
return FunctionCall(
157
assigned_variable=f"{name.lower()}_var",
158
function=f"_PyPegen_{name.lower()}_token",
159
arguments=["p"],
160
nodetype=BASE_NODETYPES[name],
161
return_type="expr_ty",
162
comment=name,
163
)
164
return FunctionCall(
165
assigned_variable=f"{name.lower()}_var",
166
function=f"_PyPegen_expect_token",
167
arguments=["p", name],
168
nodetype=NodeTypes.GENERIC_TOKEN,
169
return_type="Token *",
170
comment=f"token='{name}'",
171
)
172
173
type = None
174
rule = self.gen.all_rules.get(name.lower())
175
if rule is not None:
176
type = "asdl_seq *" if rule.is_loop() or rule.is_gather() else rule.type
177
178
return FunctionCall(
179
assigned_variable=f"{name}_var",
180
function=f"{name}_rule",
181
arguments=["p"],
182
return_type=type,
183
comment=f"{node}",
184
)
185
186
def visit_StringLeaf(self, node: StringLeaf) -> FunctionCall:
187
val = ast.literal_eval(node.value)
188
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
189
if node.value.endswith("'"):
190
return self.keyword_helper(val)
191
else:
192
return self.soft_keyword_helper(node.value)
193
else:
194
assert val in self.exact_tokens, f"{node.value} is not a known literal"
195
type = self.exact_tokens[val]
196
return FunctionCall(
197
assigned_variable="_literal",
198
function=f"_PyPegen_expect_token",
199
arguments=["p", type],
200
nodetype=NodeTypes.GENERIC_TOKEN,
201
return_type="Token *",
202
comment=f"token='{val}'",
203
)
204
205
def visit_Rhs(self, node: Rhs) -> FunctionCall:
206
if node in self.cache:
207
return self.cache[node]
208
if node.can_be_inlined:
209
self.cache[node] = self.generate_call(node.alts[0].items[0])
210
else:
211
name = self.gen.artifical_rule_from_rhs(node)
212
self.cache[node] = FunctionCall(
213
assigned_variable=f"{name}_var",
214
function=f"{name}_rule",
215
arguments=["p"],
216
comment=f"{node}",
217
)
218
return self.cache[node]
219
220
def visit_NamedItem(self, node: NamedItem) -> FunctionCall:
221
call = self.generate_call(node.item)
222
if node.name:
223
call.assigned_variable = node.name
224
if node.type:
225
call.assigned_variable_type = node.type
226
return call
227
228
def lookahead_call_helper(self, node: Lookahead, positive: int) -> FunctionCall:
229
call = self.generate_call(node.node)
230
if call.nodetype == NodeTypes.NAME_TOKEN:
231
return FunctionCall(
232
function=f"_PyPegen_lookahead_with_name",
233
arguments=[positive, call.function, *call.arguments],
234
return_type="int",
235
)
236
elif call.nodetype == NodeTypes.SOFT_KEYWORD:
237
return FunctionCall(
238
function=f"_PyPegen_lookahead_with_string",
239
arguments=[positive, call.function, *call.arguments],
240
return_type="int",
241
)
242
elif call.nodetype in {NodeTypes.GENERIC_TOKEN, NodeTypes.KEYWORD}:
243
return FunctionCall(
244
function=f"_PyPegen_lookahead_with_int",
245
arguments=[positive, call.function, *call.arguments],
246
return_type="int",
247
comment=f"token={node.node}",
248
)
249
else:
250
return FunctionCall(
251
function=f"_PyPegen_lookahead",
252
arguments=[positive, call.function, *call.arguments],
253
return_type="int",
254
)
255
256
def visit_PositiveLookahead(self, node: PositiveLookahead) -> FunctionCall:
257
return self.lookahead_call_helper(node, 1)
258
259
def visit_NegativeLookahead(self, node: NegativeLookahead) -> FunctionCall:
260
return self.lookahead_call_helper(node, 0)
261
262
def visit_Forced(self, node: Forced) -> FunctionCall:
263
call = self.generate_call(node.node)
264
if isinstance(node.node, Leaf):
265
assert isinstance(node.node, Leaf)
266
val = ast.literal_eval(node.node.value)
267
assert val in self.exact_tokens, f"{node.node.value} is not a known literal"
268
type = self.exact_tokens[val]
269
return FunctionCall(
270
assigned_variable="_literal",
271
function=f"_PyPegen_expect_forced_token",
272
arguments=["p", type, f'"{val}"'],
273
nodetype=NodeTypes.GENERIC_TOKEN,
274
return_type="Token *",
275
comment=f"forced_token='{val}'",
276
)
277
if isinstance(node.node, Group):
278
call = self.visit(node.node.rhs)
279
call.assigned_variable = None
280
call.comment = None
281
return FunctionCall(
282
assigned_variable="_literal",
283
function=f"_PyPegen_expect_forced_result",
284
arguments=["p", str(call), f'"{node.node.rhs!s}"'],
285
return_type="void *",
286
comment=f"forced_token=({node.node.rhs!s})",
287
)
288
else:
289
raise NotImplementedError(f"Forced tokens don't work with {node.node} nodes")
290
291
def visit_Opt(self, node: Opt) -> FunctionCall:
292
call = self.generate_call(node.node)
293
return FunctionCall(
294
assigned_variable="_opt_var",
295
function=call.function,
296
arguments=call.arguments,
297
force_true=True,
298
comment=f"{node}",
299
)
300
301
def visit_Repeat0(self, node: Repeat0) -> FunctionCall:
302
if node in self.cache:
303
return self.cache[node]
304
name = self.gen.artificial_rule_from_repeat(node.node, False)
305
self.cache[node] = FunctionCall(
306
assigned_variable=f"{name}_var",
307
function=f"{name}_rule",
308
arguments=["p"],
309
return_type="asdl_seq *",
310
comment=f"{node}",
311
)
312
return self.cache[node]
313
314
def visit_Repeat1(self, node: Repeat1) -> FunctionCall:
315
if node in self.cache:
316
return self.cache[node]
317
name = self.gen.artificial_rule_from_repeat(node.node, True)
318
self.cache[node] = FunctionCall(
319
assigned_variable=f"{name}_var",
320
function=f"{name}_rule",
321
arguments=["p"],
322
return_type="asdl_seq *",
323
comment=f"{node}",
324
)
325
return self.cache[node]
326
327
def visit_Gather(self, node: Gather) -> FunctionCall:
328
if node in self.cache:
329
return self.cache[node]
330
name = self.gen.artifical_rule_from_gather(node)
331
self.cache[node] = FunctionCall(
332
assigned_variable=f"{name}_var",
333
function=f"{name}_rule",
334
arguments=["p"],
335
return_type="asdl_seq *",
336
comment=f"{node}",
337
)
338
return self.cache[node]
339
340
def visit_Group(self, node: Group) -> FunctionCall:
341
return self.generate_call(node.rhs)
342
343
def visit_Cut(self, node: Cut) -> FunctionCall:
344
return FunctionCall(
345
assigned_variable="_cut_var",
346
return_type="int",
347
function="1",
348
nodetype=NodeTypes.CUT_OPERATOR,
349
)
350
351
def generate_call(self, node: Any) -> FunctionCall:
352
return super().visit(node)
353
354
355
class CParserGenerator(ParserGenerator, GrammarVisitor):
356
def __init__(
357
self,
358
grammar: grammar.Grammar,
359
tokens: Dict[int, str],
360
exact_tokens: Dict[str, int],
361
non_exact_tokens: Set[str],
362
file: Optional[IO[Text]],
363
debug: bool = False,
364
skip_actions: bool = False,
365
):
366
super().__init__(grammar, set(tokens.values()), file)
367
self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(
368
self, exact_tokens, non_exact_tokens
369
)
370
self._varname_counter = 0
371
self.debug = debug
372
self.skip_actions = skip_actions
373
self.cleanup_statements: List[str] = []
374
375
def add_level(self) -> None:
376
self.print("if (p->level++ == MAXSTACK) {")
377
with self.indent():
378
self.print("p->error_indicator = 1;")
379
self.print("PyErr_NoMemory();")
380
self.print("}")
381
382
def remove_level(self) -> None:
383
self.print("p->level--;")
384
385
def add_return(self, ret_val: str) -> None:
386
for stmt in self.cleanup_statements:
387
self.print(stmt)
388
self.remove_level()
389
self.print(f"return {ret_val};")
390
391
def unique_varname(self, name: str = "tmpvar") -> str:
392
new_var = name + "_" + str(self._varname_counter)
393
self._varname_counter += 1
394
return new_var
395
396
def call_with_errorcheck_return(self, call_text: str, returnval: str) -> None:
397
error_var = self.unique_varname()
398
self.print(f"int {error_var} = {call_text};")
399
self.print(f"if ({error_var}) {{")
400
with self.indent():
401
self.add_return(returnval)
402
self.print("}")
403
404
def call_with_errorcheck_goto(self, call_text: str, goto_target: str) -> None:
405
error_var = self.unique_varname()
406
self.print(f"int {error_var} = {call_text};")
407
self.print(f"if ({error_var}) {{")
408
with self.indent():
409
self.print(f"goto {goto_target};")
410
self.print(f"}}")
411
412
def out_of_memory_return(
413
self,
414
expr: str,
415
cleanup_code: Optional[str] = None,
416
) -> None:
417
self.print(f"if ({expr}) {{")
418
with self.indent():
419
if cleanup_code is not None:
420
self.print(cleanup_code)
421
self.print("p->error_indicator = 1;")
422
self.print("PyErr_NoMemory();")
423
self.add_return("NULL")
424
self.print(f"}}")
425
426
def out_of_memory_goto(self, expr: str, goto_target: str) -> None:
427
self.print(f"if ({expr}) {{")
428
with self.indent():
429
self.print("PyErr_NoMemory();")
430
self.print(f"goto {goto_target};")
431
self.print(f"}}")
432
433
def generate(self, filename: str) -> None:
434
self.collect_rules()
435
basename = os.path.basename(filename)
436
self.print(f"// @generated by pegen from {basename}")
437
header = self.grammar.metas.get("header", EXTENSION_PREFIX)
438
if header:
439
self.print(header.rstrip("\n"))
440
subheader = self.grammar.metas.get("subheader", "")
441
if subheader:
442
self.print(subheader)
443
self._setup_keywords()
444
self._setup_soft_keywords()
445
for i, (rulename, rule) in enumerate(self.all_rules.items(), 1000):
446
comment = " // Left-recursive" if rule.left_recursive else ""
447
self.print(f"#define {rulename}_type {i}{comment}")
448
self.print()
449
for rulename, rule in self.all_rules.items():
450
if rule.is_loop() or rule.is_gather():
451
type = "asdl_seq *"
452
elif rule.type:
453
type = rule.type + " "
454
else:
455
type = "void *"
456
self.print(f"static {type}{rulename}_rule(Parser *p);")
457
self.print()
458
for rulename, rule in list(self.all_rules.items()):
459
self.print()
460
if rule.left_recursive:
461
self.print("// Left-recursive")
462
self.visit(rule)
463
if self.skip_actions:
464
mode = 0
465
else:
466
mode = int(self.rules["start"].type == "mod_ty") if "start" in self.rules else 1
467
if mode == 1 and self.grammar.metas.get("bytecode"):
468
mode += 1
469
modulename = self.grammar.metas.get("modulename", "parse")
470
trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX)
471
if trailer:
472
self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
473
474
def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]:
475
groups: Dict[int, List[Tuple[str, int]]] = {}
476
for keyword_str, keyword_type in self.keywords.items():
477
length = len(keyword_str)
478
if length in groups:
479
groups[length].append((keyword_str, keyword_type))
480
else:
481
groups[length] = [(keyword_str, keyword_type)]
482
return groups
483
484
def _setup_keywords(self) -> None:
485
n_keyword_lists = (
486
len(max(self.keywords.keys(), key=len)) + 1 if len(self.keywords) > 0 else 0
487
)
488
self.print(f"static const int n_keyword_lists = {n_keyword_lists};")
489
groups = self._group_keywords_by_length()
490
self.print("static KeywordToken *reserved_keywords[] = {")
491
with self.indent():
492
num_groups = max(groups) + 1 if groups else 1
493
for keywords_length in range(num_groups):
494
if keywords_length not in groups.keys():
495
self.print("(KeywordToken[]) {{NULL, -1}},")
496
else:
497
self.print("(KeywordToken[]) {")
498
with self.indent():
499
for keyword_str, keyword_type in groups[keywords_length]:
500
self.print(f'{{"{keyword_str}", {keyword_type}}},')
501
self.print("{NULL, -1},")
502
self.print("},")
503
self.print("};")
504
505
def _setup_soft_keywords(self) -> None:
506
soft_keywords = sorted(self.soft_keywords)
507
self.print("static char *soft_keywords[] = {")
508
with self.indent():
509
for keyword in soft_keywords:
510
self.print(f'"{keyword}",')
511
self.print("NULL,")
512
self.print("};")
513
514
def _set_up_token_start_metadata_extraction(self) -> None:
515
self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {")
516
with self.indent():
517
self.print("p->error_indicator = 1;")
518
self.add_return("NULL")
519
self.print("}")
520
self.print("int _start_lineno = p->tokens[_mark]->lineno;")
521
self.print("UNUSED(_start_lineno); // Only used by EXTRA macro")
522
self.print("int _start_col_offset = p->tokens[_mark]->col_offset;")
523
self.print("UNUSED(_start_col_offset); // Only used by EXTRA macro")
524
525
def _set_up_token_end_metadata_extraction(self) -> None:
526
self.print("Token *_token = _PyPegen_get_last_nonnwhitespace_token(p);")
527
self.print("if (_token == NULL) {")
528
with self.indent():
529
self.add_return("NULL")
530
self.print("}")
531
self.print("int _end_lineno = _token->end_lineno;")
532
self.print("UNUSED(_end_lineno); // Only used by EXTRA macro")
533
self.print("int _end_col_offset = _token->end_col_offset;")
534
self.print("UNUSED(_end_col_offset); // Only used by EXTRA macro")
535
536
def _check_for_errors(self) -> None:
537
self.print("if (p->error_indicator) {")
538
with self.indent():
539
self.add_return("NULL")
540
self.print("}")
541
542
def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
543
self.print("{")
544
with self.indent():
545
self.add_level()
546
self.print(f"{result_type} _res = NULL;")
547
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{")
548
with self.indent():
549
self.add_return("_res")
550
self.print("}")
551
self.print("int _mark = p->mark;")
552
self.print("int _resmark = p->mark;")
553
self.print("while (1) {")
554
with self.indent():
555
self.call_with_errorcheck_return(
556
f"_PyPegen_update_memo(p, _mark, {node.name}_type, _res)", "_res"
557
)
558
self.print("p->mark = _mark;")
559
self.print(f"void *_raw = {node.name}_raw(p);")
560
self.print("if (p->error_indicator) {")
561
with self.indent():
562
self.add_return("NULL")
563
self.print("}")
564
self.print("if (_raw == NULL || p->mark <= _resmark)")
565
with self.indent():
566
self.print("break;")
567
self.print(f"_resmark = p->mark;")
568
self.print("_res = _raw;")
569
self.print("}")
570
self.print(f"p->mark = _resmark;")
571
self.add_return("_res")
572
self.print("}")
573
self.print(f"static {result_type}")
574
self.print(f"{node.name}_raw(Parser *p)")
575
576
def _should_memoize(self, node: Rule) -> bool:
577
return node.memo and not node.left_recursive
578
579
def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None:
580
memoize = self._should_memoize(node)
581
582
with self.indent():
583
self.add_level()
584
self._check_for_errors()
585
self.print(f"{result_type} _res = NULL;")
586
if memoize:
587
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{")
588
with self.indent():
589
self.add_return("_res")
590
self.print("}")
591
self.print("int _mark = p->mark;")
592
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
593
self._set_up_token_start_metadata_extraction()
594
self.visit(
595
rhs,
596
is_loop=False,
597
is_gather=node.is_gather(),
598
rulename=node.name,
599
)
600
if self.debug:
601
self.print(f'D(fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark));')
602
self.print("_res = NULL;")
603
self.print(" done:")
604
with self.indent():
605
if memoize:
606
self.print(f"_PyPegen_insert_memo(p, _mark, {node.name}_type, _res);")
607
self.add_return("_res")
608
609
def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None:
610
memoize = self._should_memoize(node)
611
is_repeat1 = node.name.startswith("_loop1")
612
613
with self.indent():
614
self.add_level()
615
self._check_for_errors()
616
self.print("void *_res = NULL;")
617
if memoize:
618
self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &_res)) {{")
619
with self.indent():
620
self.add_return("_res")
621
self.print("}")
622
self.print("int _mark = p->mark;")
623
if memoize:
624
self.print("int _start_mark = p->mark;")
625
self.print("void **_children = PyMem_Malloc(sizeof(void *));")
626
self.out_of_memory_return(f"!_children")
627
self.print("Py_ssize_t _children_capacity = 1;")
628
self.print("Py_ssize_t _n = 0;")
629
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
630
self._set_up_token_start_metadata_extraction()
631
self.visit(
632
rhs,
633
is_loop=True,
634
is_gather=node.is_gather(),
635
rulename=node.name,
636
)
637
if is_repeat1:
638
self.print("if (_n == 0 || p->error_indicator) {")
639
with self.indent():
640
self.print("PyMem_Free(_children);")
641
self.add_return("NULL")
642
self.print("}")
643
self.print("asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena);")
644
self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);")
645
self.print("for (int i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);")
646
self.print("PyMem_Free(_children);")
647
if memoize and node.name:
648
self.print(f"_PyPegen_insert_memo(p, _start_mark, {node.name}_type, _seq);")
649
self.add_return("_seq")
650
651
def visit_Rule(self, node: Rule) -> None:
652
is_loop = node.is_loop()
653
is_gather = node.is_gather()
654
rhs = node.flatten()
655
if is_loop or is_gather:
656
result_type = "asdl_seq *"
657
elif node.type:
658
result_type = node.type
659
else:
660
result_type = "void *"
661
662
for line in str(node).splitlines():
663
self.print(f"// {line}")
664
if node.left_recursive and node.leader:
665
self.print(f"static {result_type} {node.name}_raw(Parser *);")
666
667
self.print(f"static {result_type}")
668
self.print(f"{node.name}_rule(Parser *p)")
669
670
if node.left_recursive and node.leader:
671
self._set_up_rule_memoization(node, result_type)
672
673
self.print("{")
674
675
if node.name.endswith("without_invalid"):
676
with self.indent():
677
self.print("int _prev_call_invalid = p->call_invalid_rules;")
678
self.print("p->call_invalid_rules = 0;")
679
self.cleanup_statements.append("p->call_invalid_rules = _prev_call_invalid;")
680
681
if is_loop:
682
self._handle_loop_rule_body(node, rhs)
683
else:
684
self._handle_default_rule_body(node, rhs, result_type)
685
686
if node.name.endswith("without_invalid"):
687
self.cleanup_statements.pop()
688
689
self.print("}")
690
691
def visit_NamedItem(self, node: NamedItem) -> None:
692
call = self.callmakervisitor.generate_call(node)
693
if call.assigned_variable:
694
call.assigned_variable = self.dedupe(call.assigned_variable)
695
self.print(call)
696
697
def visit_Rhs(
698
self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
699
) -> None:
700
if is_loop:
701
assert len(node.alts) == 1
702
for alt in node.alts:
703
self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename)
704
705
def join_conditions(self, keyword: str, node: Any) -> None:
706
self.print(f"{keyword} (")
707
with self.indent():
708
first = True
709
for item in node.items:
710
if first:
711
first = False
712
else:
713
self.print("&&")
714
self.visit(item)
715
self.print(")")
716
717
def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
718
self.print(f"_res = {node.action};")
719
720
self.print("if (_res == NULL && PyErr_Occurred()) {")
721
with self.indent():
722
self.print("p->error_indicator = 1;")
723
if cleanup_code:
724
self.print(cleanup_code)
725
self.add_return("NULL")
726
self.print("}")
727
728
if self.debug:
729
self.print(
730
f'D(fprintf(stderr, "Hit with action [%d-%d]: %s\\n", _mark, p->mark, "{node}"));'
731
)
732
733
def emit_default_action(self, is_gather: bool, node: Alt) -> None:
734
if len(self.local_variable_names) > 1:
735
if is_gather:
736
assert len(self.local_variable_names) == 2
737
self.print(
738
f"_res = _PyPegen_seq_insert_in_front(p, "
739
f"{self.local_variable_names[0]}, {self.local_variable_names[1]});"
740
)
741
else:
742
if self.debug:
743
self.print(
744
f'D(fprintf(stderr, "Hit without action [%d:%d]: %s\\n", _mark, p->mark, "{node}"));'
745
)
746
self.print(
747
f"_res = _PyPegen_dummy_name(p, {', '.join(self.local_variable_names)});"
748
)
749
else:
750
if self.debug:
751
self.print(
752
f'D(fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", _mark, p->mark, "{node}"));'
753
)
754
self.print(f"_res = {self.local_variable_names[0]};")
755
756
def emit_dummy_action(self) -> None:
757
self.print("_res = _PyPegen_dummy_name(p);")
758
759
def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
760
self.join_conditions(keyword="if", node=node)
761
self.print("{")
762
# We have parsed successfully all the conditions for the option.
763
with self.indent():
764
node_str = str(node).replace('"', '\\"')
765
self.print(
766
f'D(fprintf(stderr, "%*c+ {rulename}[%d-%d]: %s succeeded!\\n", p->level, \' \', _mark, p->mark, "{node_str}"));'
767
)
768
# Prepare to emit the rule action and do so
769
if node.action and "EXTRA" in node.action:
770
self._set_up_token_end_metadata_extraction()
771
if self.skip_actions:
772
self.emit_dummy_action()
773
elif node.action:
774
self.emit_action(node)
775
else:
776
self.emit_default_action(is_gather, node)
777
778
# As the current option has parsed correctly, do not continue with the rest.
779
self.print(f"goto done;")
780
self.print("}")
781
782
def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
783
# Condition of the main body of the alternative
784
self.join_conditions(keyword="while", node=node)
785
self.print("{")
786
# We have parsed successfully one item!
787
with self.indent():
788
# Prepare to emit the rule action and do so
789
if node.action and "EXTRA" in node.action:
790
self._set_up_token_end_metadata_extraction()
791
if self.skip_actions:
792
self.emit_dummy_action()
793
elif node.action:
794
self.emit_action(node, cleanup_code="PyMem_Free(_children);")
795
else:
796
self.emit_default_action(is_gather, node)
797
798
# Add the result of rule to the temporary buffer of children. This buffer
799
# will populate later an asdl_seq with all elements to return.
800
self.print("if (_n == _children_capacity) {")
801
with self.indent():
802
self.print("_children_capacity *= 2;")
803
self.print(
804
"void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *));"
805
)
806
self.out_of_memory_return(f"!_new_children", cleanup_code="PyMem_Free(_children);")
807
self.print("_children = _new_children;")
808
self.print("}")
809
self.print("_children[_n++] = _res;")
810
self.print("_mark = p->mark;")
811
self.print("}")
812
813
def visit_Alt(
814
self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str]
815
) -> None:
816
if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"):
817
self.print(f"if (p->call_invalid_rules) {{ // {node}")
818
else:
819
self.print(f"{{ // {node}")
820
with self.indent():
821
self._check_for_errors()
822
node_str = str(node).replace('"', '\\"')
823
self.print(
824
f'D(fprintf(stderr, "%*c> {rulename}[%d-%d]: %s\\n", p->level, \' \', _mark, p->mark, "{node_str}"));'
825
)
826
# Prepare variable declarations for the alternative
827
vars = self.collect_vars(node)
828
for v, var_type in sorted(item for item in vars.items() if item[0] is not None):
829
if not var_type:
830
var_type = "void *"
831
else:
832
var_type += " "
833
if v == "_cut_var":
834
v += " = 0" # cut_var must be initialized
835
self.print(f"{var_type}{v};")
836
if v and v.startswith("_opt_var"):
837
self.print(f"UNUSED({v}); // Silence compiler warnings")
838
839
with self.local_variable_context():
840
if is_loop:
841
self.handle_alt_loop(node, is_gather, rulename)
842
else:
843
self.handle_alt_normal(node, is_gather, rulename)
844
845
self.print("p->mark = _mark;")
846
node_str = str(node).replace('"', '\\"')
847
self.print(
848
f"D(fprintf(stderr, \"%*c%s {rulename}[%d-%d]: %s failed!\\n\", p->level, ' ',\n"
849
f' p->error_indicator ? "ERROR!" : "-", _mark, p->mark, "{node_str}"));'
850
)
851
if "_cut_var" in vars:
852
self.print("if (_cut_var) {")
853
with self.indent():
854
self.add_return("NULL")
855
self.print("}")
856
self.print("}")
857
858
def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]:
859
types = {}
860
with self.local_variable_context():
861
for item in node.items:
862
name, type = self.add_var(item)
863
types[name] = type
864
return types
865
866
def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]:
867
call = self.callmakervisitor.generate_call(node.item)
868
name = node.name if node.name else call.assigned_variable
869
if name is not None:
870
name = self.dedupe(name)
871
return_type = call.return_type if node.type is None else node.type
872
return name, return_type
873
874