Path: blob/main/Tools/peg_generator/pegen/first_sets.py
12 views
#!/usr/bin/env python3.812import argparse3import pprint4import sys5from typing import Dict, Set67from pegen.build import build_parser8from pegen.grammar import (9Alt,10Cut,11Gather,12GrammarVisitor,13Group,14Lookahead,15NamedItem,16NameLeaf,17NegativeLookahead,18Opt,19Repeat0,20Repeat1,21Rhs,22Rule,23StringLeaf,24)25from pegen.parser_generator import compute_nullables2627argparser = argparse.ArgumentParser(28prog="calculate_first_sets",29description="Calculate the first sets of a grammar",30)31argparser.add_argument("grammar_file", help="The grammar file")323334class FirstSetCalculator(GrammarVisitor):35def __init__(self, rules: Dict[str, Rule]) -> None:36self.rules = rules37self.nullables = compute_nullables(rules)38self.first_sets: Dict[str, Set[str]] = dict()39self.in_process: Set[str] = set()4041def calculate(self) -> Dict[str, Set[str]]:42for name, rule in self.rules.items():43self.visit(rule)44return self.first_sets4546def visit_Alt(self, item: Alt) -> Set[str]:47result: Set[str] = set()48to_remove: Set[str] = set()49for other in item.items:50new_terminals = self.visit(other)51if isinstance(other.item, NegativeLookahead):52to_remove |= new_terminals53result |= new_terminals54if to_remove:55result -= to_remove5657# If the set of new terminals can start with the empty string,58# it means that the item is completely nullable and we should59# also considering at least the next item in case the current60# one fails to parse.6162if "" in new_terminals:63continue6465if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)):66break6768# Do not allow the empty string to propagate.69result.discard("")7071return result7273def visit_Cut(self, item: Cut) -> Set[str]:74return set()7576def visit_Group(self, item: Group) -> Set[str]:77return self.visit(item.rhs)7879def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:80return self.visit(item.node)8182def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:83return self.visit(item.node)8485def visit_NamedItem(self, item: NamedItem) -> Set[str]:86return self.visit(item.item)8788def visit_Opt(self, item: Opt) -> Set[str]:89return self.visit(item.node)9091def visit_Gather(self, item: Gather) -> Set[str]:92return self.visit(item.node)9394def visit_Repeat0(self, item: Repeat0) -> Set[str]:95return self.visit(item.node)9697def visit_Repeat1(self, item: Repeat1) -> Set[str]:98return self.visit(item.node)99100def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:101if item.value not in self.rules:102return {item.value}103104if item.value not in self.first_sets:105self.first_sets[item.value] = self.visit(self.rules[item.value])106return self.first_sets[item.value]107elif item.value in self.in_process:108return set()109110return self.first_sets[item.value]111112def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:113return {item.value}114115def visit_Rhs(self, item: Rhs) -> Set[str]:116result: Set[str] = set()117for alt in item.alts:118result |= self.visit(alt)119return result120121def visit_Rule(self, item: Rule) -> Set[str]:122if item.name in self.in_process:123return set()124elif item.name not in self.first_sets:125self.in_process.add(item.name)126terminals = self.visit(item.rhs)127if item in self.nullables:128terminals.add("")129self.first_sets[item.name] = terminals130self.in_process.remove(item.name)131return self.first_sets[item.name]132133134def main() -> None:135args = argparser.parse_args()136137try:138grammar, parser, tokenizer = build_parser(args.grammar_file)139except Exception as err:140print("ERROR: Failed to parse grammar file", file=sys.stderr)141sys.exit(1)142143firs_sets = FirstSetCalculator(grammar.rules).calculate()144pprint.pprint(firs_sets)145146147if __name__ == "__main__":148main()149150151