Path: blob/master/elisp/emacs-for-python/rope-dist/rope/base/codeanalyze.py
1415 views
import bisect1import re2import token3import tokenize456class ChangeCollector(object):78def __init__(self, text):9self.text = text10self.changes = []1112def add_change(self, start, end, new_text=None):13if new_text is None:14new_text = self.text[start:end]15self.changes.append((start, end, new_text))1617def get_changed(self):18if not self.changes:19return None20def compare_changes(change1, change2):21return cmp(change1[:2], change2[:2])22self.changes.sort(compare_changes)23pieces = []24last_changed = 025for change in self.changes:26start, end, text = change27pieces.append(self.text[last_changed:start] + text)28last_changed = end29if last_changed < len(self.text):30pieces.append(self.text[last_changed:])31result = ''.join(pieces)32if result != self.text:33return result343536class SourceLinesAdapter(object):37"""Adapts source to Lines interface3839Note: The creation of this class is expensive.40"""4142def __init__(self, source_code):43self.code = source_code44self.starts = None45self._initialize_line_starts()4647def _initialize_line_starts(self):48self.starts = []49self.starts.append(0)50try:51i = 052while True:53i = self.code.index('\n', i) + 154self.starts.append(i)55except ValueError:56pass57self.starts.append(len(self.code) + 1)5859def get_line(self, lineno):60return self.code[self.starts[lineno - 1]:61self.starts[lineno] - 1]6263def length(self):64return len(self.starts) - 16566def get_line_number(self, offset):67return bisect.bisect(self.starts, offset)6869def get_line_start(self, lineno):70return self.starts[lineno - 1]7172def get_line_end(self, lineno):73return self.starts[lineno] - 1747576class ArrayLinesAdapter(object):7778def __init__(self, lines):79self.lines = lines8081def get_line(self, line_number):82return self.lines[line_number - 1]8384def length(self):85return len(self.lines)868788class LinesToReadline(object):8990def __init__(self, lines, start):91self.lines = lines92self.current = start9394def readline(self):95if self.current <= self.lines.length():96self.current += 197return self.lines.get_line(self.current - 1) + '\n'98return ''99100def __call__(self):101return self.readline()102103104class _CustomGenerator(object):105106def __init__(self, lines):107self.lines = lines108self.in_string = ''109self.open_count = 0110self.continuation = False111112def __call__(self):113size = self.lines.length()114result = []115i = 1116while i <= size:117while i <= size and not self.lines.get_line(i).strip():118i += 1119if i <= size:120start = i121while True:122line = self.lines.get_line(i)123self._analyze_line(line)124if not (self.continuation or self.open_count or125self.in_string) or i == size:126break127i += 1128result.append((start, i))129i += 1130return result131132_main_chars = re.compile(r'[\'|"|#|\\|\[|\]|\{|\}|\(|\)]')133def _analyze_line(self, line):134char = None135for match in self._main_chars.finditer(line):136char = match.group()137i = match.start()138if char in '\'"':139if not self.in_string:140self.in_string = char141if char * 3 == line[i:i + 3]:142self.in_string = char * 3143elif self.in_string == line[i:i + len(self.in_string)] and \144not (i > 0 and line[i - 1] == '\\' and145not (i > 1 and line[i - 2] == '\\')):146self.in_string = ''147if self.in_string:148continue149if char == '#':150break151if char in '([{':152self.open_count += 1153elif char in ')]}':154self.open_count -= 1155if line and char != '#' and line.endswith('\\'):156self.continuation = True157else:158self.continuation = False159160def custom_generator(lines):161return _CustomGenerator(lines)()162163164class LogicalLineFinder(object):165166def __init__(self, lines):167self.lines = lines168169def logical_line_in(self, line_number):170indents = count_line_indents(self.lines.get_line(line_number))171tries = 0172while True:173block_start = get_block_start(self.lines, line_number, indents)174try:175return self._block_logical_line(block_start, line_number)176except IndentationError, e:177tries += 1178if tries == 5:179raise e180lineno = e.lineno + block_start - 1181indents = count_line_indents(self.lines.get_line(lineno))182183def generate_starts(self, start_line=1, end_line=None):184for start, end in self.generate_regions(start_line, end_line):185yield start186187def generate_regions(self, start_line=1, end_line=None):188# XXX: `block_start` should be at a better position!189block_start = 1190readline = LinesToReadline(self.lines, block_start)191shifted = start_line - block_start + 1192try:193for start, end in self._logical_lines(readline):194real_start = start + block_start - 1195real_start = self._first_non_blank(real_start)196if end_line is not None and real_start >= end_line:197break198real_end = end + block_start - 1199if real_start >= start_line:200yield (real_start, real_end)201except tokenize.TokenError, e:202pass203204def _block_logical_line(self, block_start, line_number):205readline = LinesToReadline(self.lines, block_start)206shifted = line_number - block_start + 1207region = self._calculate_logical(readline, shifted)208start = self._first_non_blank(region[0] + block_start - 1)209if region[1] is None:210end = self.lines.length()211else:212end = region[1] + block_start - 1213return start, end214215def _calculate_logical(self, readline, line_number):216last_end = 1217try:218for start, end in self._logical_lines(readline):219if line_number <= end:220return (start, end)221last_end = end + 1222except tokenize.TokenError, e:223current = e.args[1][0]224return (last_end, max(last_end, current - 1))225return (last_end, None)226227def _logical_lines(self, readline):228last_end = 1229for current_token in tokenize.generate_tokens(readline):230current = current_token[2][0]231if current_token[0] == token.NEWLINE:232yield (last_end, current)233last_end = current + 1234235def _first_non_blank(self, line_number):236current = line_number237while current < self.lines.length():238line = self.lines.get_line(current).strip()239if line and not line.startswith('#'):240return current241current += 1242return current243244245def tokenizer_generator(lines):246return LogicalLineFinder(lines).generate_regions()247248249class CachingLogicalLineFinder(object):250251def __init__(self, lines, generate=custom_generator):252self.lines = lines253self._generate = generate254255_starts = None256@property257def starts(self):258if self._starts is None:259self._init_logicals()260return self._starts261262_ends = None263@property264def ends(self):265if self._ends is None:266self._init_logicals()267return self._ends268269def _init_logicals(self):270"""Should initialize _starts and _ends attributes"""271size = self.lines.length() + 1272self._starts = [None] * size273self._ends = [None] * size274for start, end in self._generate(self.lines):275self._starts[start] = True276self._ends[end] = True277278def logical_line_in(self, line_number):279start = line_number280while start > 0 and not self.starts[start]:281start -= 1282if start == 0:283try:284start = self.starts.index(True, line_number)285except ValueError:286return (line_number, line_number)287return (start, self.ends.index(True, start))288289def generate_starts(self, start_line=1, end_line=None):290if end_line is None:291end_line = self.lines.length()292for index in range(start_line, end_line):293if self.starts[index]:294yield index295296297def get_block_start(lines, lineno, maximum_indents=80):298"""Approximate block start"""299pattern = get_block_start_patterns()300for i in range(lineno, 0, -1):301match = pattern.search(lines.get_line(i))302if match is not None and \303count_line_indents(lines.get_line(i)) <= maximum_indents:304striped = match.string.lstrip()305# Maybe we're in a list comprehension or generator expression306if i > 1 and striped.startswith('if') or striped.startswith('for'):307bracs = 0308for j in range(i, min(i + 5, lines.length() + 1)):309for c in lines.get_line(j):310if c == '#':311break312if c in '[(':313bracs += 1314if c in ')]':315bracs -= 1316if bracs < 0:317break318if bracs < 0:319break320if bracs < 0:321continue322return i323return 1324325326_block_start_pattern = None327328def get_block_start_patterns():329global _block_start_pattern330if not _block_start_pattern:331pattern = '^\\s*(((def|class|if|elif|except|for|while|with)\\s)|'\332'((try|else|finally|except)\\s*:))'333_block_start_pattern = re.compile(pattern, re.M)334return _block_start_pattern335336337def count_line_indents(line):338indents = 0339for char in line:340if char == ' ':341indents += 1342elif char == '\t':343indents += 8344else:345return indents346return 0347348349def get_string_pattern():350start = r'(\b[uU]?[rR]?)?'351longstr = r'%s"""(\\.|"(?!"")|\\\n|[^"\\])*"""' % start352shortstr = r'%s"(\\.|[^"\\\n])*"' % start353return '|'.join([longstr, longstr.replace('"', "'"),354shortstr, shortstr.replace('"', "'")])355356def get_comment_pattern():357return r'#[^\n]*'358359360