Path: blob/main/Tools/peg_generator/pegen/python_generator.py
12 views
import os.path1import token2from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple34from pegen import grammar5from pegen.grammar import (6Alt,7Cut,8Forced,9Gather,10GrammarVisitor,11Group,12Lookahead,13NamedItem,14NameLeaf,15NegativeLookahead,16Opt,17PositiveLookahead,18Repeat0,19Repeat1,20Rhs,21Rule,22StringLeaf,23)24from pegen.parser_generator import ParserGenerator2526MODULE_PREFIX = """\27#!/usr/bin/env python3.828# @generated by pegen from {filename}2930import ast31import sys32import tokenize3334from typing import Any, Optional3536from pegen.parser import memoize, memoize_left_rec, logger, Parser3738"""39MODULE_SUFFIX = """4041if __name__ == '__main__':42from pegen.parser import simple_parser_main43simple_parser_main({class_name})44"""454647class InvalidNodeVisitor(GrammarVisitor):48def visit_NameLeaf(self, node: NameLeaf) -> bool:49name = node.value50return name.startswith("invalid")5152def visit_StringLeaf(self, node: StringLeaf) -> bool:53return False5455def visit_NamedItem(self, node: NamedItem) -> bool:56return self.visit(node.item)5758def visit_Rhs(self, node: Rhs) -> bool:59return any(self.visit(alt) for alt in node.alts)6061def visit_Alt(self, node: Alt) -> bool:62return any(self.visit(item) for item in node.items)6364def lookahead_call_helper(self, node: Lookahead) -> bool:65return self.visit(node.node)6667def visit_PositiveLookahead(self, node: PositiveLookahead) -> bool:68return self.lookahead_call_helper(node)6970def visit_NegativeLookahead(self, node: NegativeLookahead) -> bool:71return self.lookahead_call_helper(node)7273def visit_Opt(self, node: Opt) -> bool:74return self.visit(node.node)7576def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]:77return self.visit(node.node)7879def visit_Gather(self, node: Gather) -> Tuple[str, str]:80return self.visit(node.node)8182def visit_Group(self, node: Group) -> bool:83return self.visit(node.rhs)8485def visit_Cut(self, node: Cut) -> bool:86return False8788def visit_Forced(self, node: Forced) -> bool:89return self.visit(node.node)909192class PythonCallMakerVisitor(GrammarVisitor):93def __init__(self, parser_generator: ParserGenerator):94self.gen = parser_generator95self.cache: Dict[Any, Any] = {}9697def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:98name = node.value99if name == "SOFT_KEYWORD":100return "soft_keyword", "self.soft_keyword()"101if name in ("NAME", "NUMBER", "STRING", "OP", "TYPE_COMMENT"):102name = name.lower()103return name, f"self.{name}()"104if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):105# Avoid using names that can be Python keywords106return "_" + name.lower(), f"self.expect({name!r})"107return name, f"self.{name}()"108109def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:110return "literal", f"self.expect({node.value})"111112def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:113if node in self.cache:114return self.cache[node]115if len(node.alts) == 1 and len(node.alts[0].items) == 1:116self.cache[node] = self.visit(node.alts[0].items[0])117else:118name = self.gen.artifical_rule_from_rhs(node)119self.cache[node] = name, f"self.{name}()"120return self.cache[node]121122def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:123name, call = self.visit(node.item)124if node.name:125name = node.name126return name, call127128def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]:129name, call = self.visit(node.node)130head, tail = call.split("(", 1)131assert tail[-1] == ")"132tail = tail[:-1]133return head, tail134135def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:136head, tail = self.lookahead_call_helper(node)137return None, f"self.positive_lookahead({head}, {tail})"138139def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:140head, tail = self.lookahead_call_helper(node)141return None, f"self.negative_lookahead({head}, {tail})"142143def visit_Opt(self, node: Opt) -> Tuple[str, str]:144name, call = self.visit(node.node)145# Note trailing comma (the call may already have one comma146# at the end, for example when rules have both repeat0 and optional147# markers, e.g: [rule*])148if call.endswith(","):149return "opt", call150else:151return "opt", f"{call},"152153def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:154if node in self.cache:155return self.cache[node]156name = self.gen.artificial_rule_from_repeat(node.node, False)157self.cache[node] = name, f"self.{name}()," # Also a trailing comma!158return self.cache[node]159160def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:161if node in self.cache:162return self.cache[node]163name = self.gen.artificial_rule_from_repeat(node.node, True)164self.cache[node] = name, f"self.{name}()" # But no trailing comma here!165return self.cache[node]166167def visit_Gather(self, node: Gather) -> Tuple[str, str]:168if node in self.cache:169return self.cache[node]170name = self.gen.artifical_rule_from_gather(node)171self.cache[node] = name, f"self.{name}()" # No trailing comma here either!172return self.cache[node]173174def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:175return self.visit(node.rhs)176177def visit_Cut(self, node: Cut) -> Tuple[str, str]:178return "cut", "True"179180def visit_Forced(self, node: Forced) -> Tuple[str, str]:181if isinstance(node.node, Group):182_, val = self.visit(node.node.rhs)183return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')"184else:185return (186"forced",187f"self.expect_forced(self.expect({node.node.value}), {node.node.value!r})",188)189190191class PythonParserGenerator(ParserGenerator, GrammarVisitor):192def __init__(193self,194grammar: grammar.Grammar,195file: Optional[IO[Text]],196tokens: Set[str] = set(token.tok_name.values()),197location_formatting: Optional[str] = None,198unreachable_formatting: Optional[str] = None,199):200tokens.add("SOFT_KEYWORD")201super().__init__(grammar, tokens, file)202self.callmakervisitor: PythonCallMakerVisitor = PythonCallMakerVisitor(self)203self.invalidvisitor: InvalidNodeVisitor = InvalidNodeVisitor()204self.unreachable_formatting = unreachable_formatting or "None # pragma: no cover"205self.location_formatting = (206location_formatting207or "lineno=start_lineno, col_offset=start_col_offset, "208"end_lineno=end_lineno, end_col_offset=end_col_offset"209)210211def generate(self, filename: str) -> None:212self.collect_rules()213header = self.grammar.metas.get("header", MODULE_PREFIX)214if header is not None:215basename = os.path.basename(filename)216self.print(header.rstrip("\n").format(filename=basename))217subheader = self.grammar.metas.get("subheader", "")218if subheader:219self.print(subheader)220cls_name = self.grammar.metas.get("class", "GeneratedParser")221self.print("# Keywords and soft keywords are listed at the end of the parser definition.")222self.print(f"class {cls_name}(Parser):")223for rule in self.all_rules.values():224self.print()225with self.indent():226self.visit(rule)227228self.print()229with self.indent():230self.print(f"KEYWORDS = {tuple(self.keywords)}")231self.print(f"SOFT_KEYWORDS = {tuple(self.soft_keywords)}")232233trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name))234if trailer is not None:235self.print(trailer.rstrip("\n"))236237def alts_uses_locations(self, alts: Sequence[Alt]) -> bool:238for alt in alts:239if alt.action and "LOCATIONS" in alt.action:240return True241for n in alt.items:242if isinstance(n.item, Group) and self.alts_uses_locations(n.item.rhs.alts):243return True244return False245246def visit_Rule(self, node: Rule) -> None:247is_loop = node.is_loop()248is_gather = node.is_gather()249rhs = node.flatten()250if node.left_recursive:251if node.leader:252self.print("@memoize_left_rec")253else:254# Non-leader rules in a cycle are not memoized,255# but they must still be logged.256self.print("@logger")257else:258self.print("@memoize")259node_type = node.type or "Any"260self.print(f"def {node.name}(self) -> Optional[{node_type}]:")261with self.indent():262self.print(f"# {node.name}: {rhs}")263self.print("mark = self._mark()")264if self.alts_uses_locations(node.rhs.alts):265self.print("tok = self._tokenizer.peek()")266self.print("start_lineno, start_col_offset = tok.start")267if is_loop:268self.print("children = []")269self.visit(rhs, is_loop=is_loop, is_gather=is_gather)270if is_loop:271self.print("return children")272else:273self.print("return None")274275def visit_NamedItem(self, node: NamedItem) -> None:276name, call = self.callmakervisitor.visit(node.item)277if node.name:278name = node.name279if not name:280self.print(call)281else:282if name != "cut":283name = self.dedupe(name)284self.print(f"({name} := {call})")285286def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:287if is_loop:288assert len(node.alts) == 1289for alt in node.alts:290self.visit(alt, is_loop=is_loop, is_gather=is_gather)291292def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:293has_cut = any(isinstance(item.item, Cut) for item in node.items)294with self.local_variable_context():295if has_cut:296self.print("cut = False")297if is_loop:298self.print("while (")299else:300self.print("if (")301with self.indent():302first = True303for item in node.items:304if first:305first = False306else:307self.print("and")308self.visit(item)309if is_gather:310self.print("is not None")311312self.print("):")313with self.indent():314action = node.action315if not action:316if is_gather:317assert len(self.local_variable_names) == 2318action = (319f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"320)321else:322if self.invalidvisitor.visit(node):323action = "UNREACHABLE"324elif len(self.local_variable_names) == 1:325action = f"{self.local_variable_names[0]}"326else:327action = f"[{', '.join(self.local_variable_names)}]"328elif "LOCATIONS" in action:329self.print("tok = self._tokenizer.get_last_non_whitespace_token()")330self.print("end_lineno, end_col_offset = tok.end")331action = action.replace("LOCATIONS", self.location_formatting)332333if is_loop:334self.print(f"children.append({action})")335self.print(f"mark = self._mark()")336else:337if "UNREACHABLE" in action:338action = action.replace("UNREACHABLE", self.unreachable_formatting)339self.print(f"return {action}")340341self.print("self._reset(mark)")342# Skip remaining alternatives if a cut was reached.343if has_cut:344self.print("if cut: return None")345346347