Path: blob/main/Tools/peg_generator/pegen/parser_generator.py
12 views
import ast1import contextlib2import re3from abc import abstractmethod4from typing import (5IO,6AbstractSet,7Any,8Dict,9Iterable,10Iterator,11List,12Optional,13Set,14Text,15Tuple,16Union,17)1819from pegen import sccutils20from pegen.grammar import (21Alt,22Cut,23Forced,24Gather,25Grammar,26GrammarError,27GrammarVisitor,28Group,29Lookahead,30NamedItem,31NameLeaf,32Opt,33Plain,34Repeat0,35Repeat1,36Rhs,37Rule,38StringLeaf,39)404142class RuleCollectorVisitor(GrammarVisitor):43"""Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes"""4445def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:46self.rulses = rules47self.callmaker = callmakervisitor4849def visit_Rule(self, rule: Rule) -> None:50self.visit(rule.flatten())5152def visit_NamedItem(self, item: NamedItem) -> None:53self.callmaker.visit(item)545556class KeywordCollectorVisitor(GrammarVisitor):57"""Visitor that collects all the keywods and soft keywords in the Grammar"""5859def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]):60self.generator = gen61self.keywords = keywords62self.soft_keywords = soft_keywords6364def visit_StringLeaf(self, node: StringLeaf) -> None:65val = ast.literal_eval(node.value)66if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword67if node.value.endswith("'") and node.value not in self.keywords:68self.keywords[val] = self.generator.keyword_type()69else:70return self.soft_keywords.add(node.value.replace('"', ""))717273class RuleCheckingVisitor(GrammarVisitor):74def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):75self.rules = rules76self.tokens = tokens7778def visit_NameLeaf(self, node: NameLeaf) -> None:79if node.value not in self.rules and node.value not in self.tokens:80raise GrammarError(f"Dangling reference to rule {node.value!r}")8182def visit_NamedItem(self, node: NamedItem) -> None:83if node.name and node.name.startswith("_"):84raise GrammarError(f"Variable names cannot start with underscore: '{node.name}'")85self.visit(node.item)868788class ParserGenerator:89callmakervisitor: GrammarVisitor9091def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):92self.grammar = grammar93self.tokens = tokens94self.keywords: Dict[str, int] = {}95self.soft_keywords: Set[str] = set()96self.rules = grammar.rules97self.validate_rule_names()98if "trailer" not in grammar.metas and "start" not in self.rules:99raise GrammarError("Grammar without a trailer must have a 'start' rule")100checker = RuleCheckingVisitor(self.rules, self.tokens)101for rule in self.rules.values():102checker.visit(rule)103self.file = file104self.level = 0105self.first_graph, self.first_sccs = compute_left_recursives(self.rules)106self.counter = 0 # For name_rule()/name_loop()107self.keyword_counter = 499 # For keyword_type()108self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules109self._local_variable_stack: List[List[str]] = []110111def validate_rule_names(self) -> None:112for rule in self.rules:113if rule.startswith("_"):114raise GrammarError(f"Rule names cannot start with underscore: '{rule}'")115116@contextlib.contextmanager117def local_variable_context(self) -> Iterator[None]:118self._local_variable_stack.append([])119yield120self._local_variable_stack.pop()121122@property123def local_variable_names(self) -> List[str]:124return self._local_variable_stack[-1]125126@abstractmethod127def generate(self, filename: str) -> None:128raise NotImplementedError129130@contextlib.contextmanager131def indent(self) -> Iterator[None]:132self.level += 1133try:134yield135finally:136self.level -= 1137138def print(self, *args: object) -> None:139if not args:140print(file=self.file)141else:142print(" " * self.level, end="", file=self.file)143print(*args, file=self.file)144145def printblock(self, lines: str) -> None:146for line in lines.splitlines():147self.print(line)148149def collect_rules(self) -> None:150keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords)151for rule in self.all_rules.values():152keyword_collector.visit(rule)153154rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor)155done: Set[str] = set()156while True:157computed_rules = list(self.all_rules)158todo = [i for i in computed_rules if i not in done]159if not todo:160break161done = set(self.all_rules)162for rulename in todo:163rule_collector.visit(self.all_rules[rulename])164165def keyword_type(self) -> int:166self.keyword_counter += 1167return self.keyword_counter168169def artifical_rule_from_rhs(self, rhs: Rhs) -> str:170self.counter += 1171name = f"_tmp_{self.counter}" # TODO: Pick a nicer name.172self.all_rules[name] = Rule(name, None, rhs)173return name174175def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str:176self.counter += 1177if is_repeat1:178prefix = "_loop1_"179else:180prefix = "_loop0_"181name = f"{prefix}{self.counter}"182self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))183return name184185def artifical_rule_from_gather(self, node: Gather) -> str:186self.counter += 1187name = f"_gather_{self.counter}"188self.counter += 1189extra_function_name = f"_loop0_{self.counter}"190extra_function_alt = Alt(191[NamedItem(None, node.separator), NamedItem("elem", node.node)],192action="elem",193)194self.all_rules[extra_function_name] = Rule(195extra_function_name,196None,197Rhs([extra_function_alt]),198)199alt = Alt(200[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],201)202self.all_rules[name] = Rule(203name,204None,205Rhs([alt]),206)207return name208209def dedupe(self, name: str) -> str:210origname = name211counter = 0212while name in self.local_variable_names:213counter += 1214name = f"{origname}_{counter}"215self.local_variable_names.append(name)216return name217218219class NullableVisitor(GrammarVisitor):220def __init__(self, rules: Dict[str, Rule]) -> None:221self.rules = rules222self.visited: Set[Any] = set()223self.nullables: Set[Union[Rule, NamedItem]] = set()224225def visit_Rule(self, rule: Rule) -> bool:226if rule in self.visited:227return False228self.visited.add(rule)229if self.visit(rule.rhs):230self.nullables.add(rule)231return rule in self.nullables232233def visit_Rhs(self, rhs: Rhs) -> bool:234for alt in rhs.alts:235if self.visit(alt):236return True237return False238239def visit_Alt(self, alt: Alt) -> bool:240for item in alt.items:241if not self.visit(item):242return False243return True244245def visit_Forced(self, force: Forced) -> bool:246return True247248def visit_LookAhead(self, lookahead: Lookahead) -> bool:249return True250251def visit_Opt(self, opt: Opt) -> bool:252return True253254def visit_Repeat0(self, repeat: Repeat0) -> bool:255return True256257def visit_Repeat1(self, repeat: Repeat1) -> bool:258return False259260def visit_Gather(self, gather: Gather) -> bool:261return False262263def visit_Cut(self, cut: Cut) -> bool:264return False265266def visit_Group(self, group: Group) -> bool:267return self.visit(group.rhs)268269def visit_NamedItem(self, item: NamedItem) -> bool:270if self.visit(item.item):271self.nullables.add(item)272return item in self.nullables273274def visit_NameLeaf(self, node: NameLeaf) -> bool:275if node.value in self.rules:276return self.visit(self.rules[node.value])277# Token or unknown; never empty.278return False279280def visit_StringLeaf(self, node: StringLeaf) -> bool:281# The string token '' is considered empty.282return not node.value283284285def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:286"""Compute which rules in a grammar are nullable.287288Thanks to TatSu (tatsu/leftrec.py) for inspiration.289"""290nullable_visitor = NullableVisitor(rules)291for rule in rules.values():292nullable_visitor.visit(rule)293return nullable_visitor.nullables294295296class InitialNamesVisitor(GrammarVisitor):297def __init__(self, rules: Dict[str, Rule]) -> None:298self.rules = rules299self.nullables = compute_nullables(rules)300301def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]:302names: Set[str] = set()303for value in node:304if isinstance(value, list):305for item in value:306names |= self.visit(item, *args, **kwargs)307else:308names |= self.visit(value, *args, **kwargs)309return names310311def visit_Alt(self, alt: Alt) -> Set[Any]:312names: Set[str] = set()313for item in alt.items:314names |= self.visit(item)315if item not in self.nullables:316break317return names318319def visit_Forced(self, force: Forced) -> Set[Any]:320return set()321322def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]:323return set()324325def visit_Cut(self, cut: Cut) -> Set[Any]:326return set()327328def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]:329return {node.value}330331def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]:332return set()333334335def compute_left_recursives(336rules: Dict[str, Rule]337) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:338graph = make_first_graph(rules)339sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))340for scc in sccs:341if len(scc) > 1:342for name in scc:343rules[name].left_recursive = True344# Try to find a leader such that all cycles go through it.345leaders = set(scc)346for start in scc:347for cycle in sccutils.find_cycles_in_scc(graph, scc, start):348# print("Cycle:", " -> ".join(cycle))349leaders -= scc - set(cycle)350if not leaders:351raise ValueError(352f"SCC {scc} has no leadership candidate (no element is included in all cycles)"353)354# print("Leaders:", leaders)355leader = min(leaders) # Pick an arbitrary leader from the candidates.356rules[leader].leader = True357else:358name = min(scc) # The only element.359if name in graph[name]:360rules[name].left_recursive = True361rules[name].leader = True362return graph, sccs363364365def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:366"""Compute the graph of left-invocations.367368There's an edge from A to B if A may invoke B at its initial369position.370371Note that this requires the nullable flags to have been computed.372"""373initial_name_visitor = InitialNamesVisitor(rules)374graph = {}375vertices: Set[str] = set()376for rulename, rhs in rules.items():377graph[rulename] = names = initial_name_visitor.visit(rhs)378vertices |= names379for vertex in vertices:380graph.setdefault(vertex, set())381return graph382383384