Path: blob/main/Tools/peg_generator/pegen/tokenizer.py
12 views
import token1import tokenize2from typing import Dict, Iterator, List34Mark = int # NewType('Mark', int)56exact_token_types = token.EXACT_TOKEN_TYPES789def shorttok(tok: tokenize.TokenInfo) -> str:10return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"111213class Tokenizer:14"""Caching wrapper for the tokenize module.1516This is pretty tied to Python's syntax.17"""1819_tokens: List[tokenize.TokenInfo]2021def __init__(22self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False23):24self._tokengen = tokengen25self._tokens = []26self._index = 027self._verbose = verbose28self._lines: Dict[int, str] = {}29self._path = path30if verbose:31self.report(False, False)3233def getnext(self) -> tokenize.TokenInfo:34"""Return the next token and updates the index."""35cached = not self._index == len(self._tokens)36tok = self.peek()37self._index += 138if self._verbose:39self.report(cached, False)40return tok4142def peek(self) -> tokenize.TokenInfo:43"""Return the next token *without* updating the index."""44while self._index == len(self._tokens):45tok = next(self._tokengen)46if tok.type in (tokenize.NL, tokenize.COMMENT):47continue48if tok.type == token.ERRORTOKEN and tok.string.isspace():49continue50if (51tok.type == token.NEWLINE52and self._tokens53and self._tokens[-1].type == token.NEWLINE54):55continue56self._tokens.append(tok)57if not self._path:58self._lines[tok.start[0]] = tok.line59return self._tokens[self._index]6061def diagnose(self) -> tokenize.TokenInfo:62if not self._tokens:63self.getnext()64return self._tokens[-1]6566def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:67for tok in reversed(self._tokens[: self._index]):68if tok.type != tokenize.ENDMARKER and (69tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT70):71break72return tok7374def get_lines(self, line_numbers: List[int]) -> List[str]:75"""Retrieve source lines corresponding to line numbers."""76if self._lines:77lines = self._lines78else:79n = len(line_numbers)80lines = {}81count = 082seen = 083with open(self._path) as f:84for l in f:85count += 186if count in line_numbers:87seen += 188lines[count] = l89if seen == n:90break9192return [lines[n] for n in line_numbers]9394def mark(self) -> Mark:95return self._index9697def reset(self, index: Mark) -> None:98if index == self._index:99return100assert 0 <= index <= len(self._tokens), (index, len(self._tokens))101old_index = self._index102self._index = index103if self._verbose:104self.report(True, index < old_index)105106def report(self, cached: bool, back: bool) -> None:107if back:108fill = "-" * self._index + "-"109elif cached:110fill = "-" * self._index + ">"111else:112fill = "-" * self._index + "*"113if self._index == 0:114print(f"{fill} (Bof)")115else:116tok = self._tokens[self._index - 1]117print(f"{fill} {shorttok(tok)}")118119120