Path: blob/master/src/smc_sagews/smc_sagews/sage_parsing.py
Views: 286
"""1sage_parser.py23Code for parsing Sage code blocks sensibly.4"""56#########################################################################################7# Copyright (C) 2016, Sagemath Inc.8# #9# Distributed under the terms of the GNU General Public License (GPL), version 2+ #10# #11# http://www.gnu.org/licenses/ #12#########################################################################################1314from __future__ import absolute_import15import string16import traceback17import __future__ as future18import ast1920# for the "input()" call21import six222324def get_future_features(code, mode):25if '__future__' not in code:26return {}27features = {}28node = ast.parse(code, mode=mode)29#Make it work for all outer-container node types (module, interactive, expression)30body = getattr(node, 'body', ())31if isinstance(body, ast.AST):32body = [body]33#The first non-future statement ends processing for future statements34for stmt in body:35#Future statements must be "from __future__ import ..."36if isinstance(stmt, ast.ImportFrom):37if getattr(stmt, 'module', None) == '__future__':38for alias in stmt.names:39assert isinstance(alias, ast.alias)40name = alias.name41if (name not in future.all_feature_names):42raise SyntaxError(43"future feature %.50r is not defined: %.150r" %44(name, code))45attr = getattr(future, alias.name, None)46if (attr is not None) and isinstance(47attr, future._Feature):48features[alias.name] = attr49else:50#If the module is not '__future__', we're done processing future statements51break52else:53#If the statement is not an "ImportFrom", we're done processing future statements54break55return features565758def get_input(prompt):59try:60r = six.input(prompt)61z = r62if z.rstrip().endswith(':'):63while True:64try:65z = six.input('... ')66except EOFError:67quit = True68break69if z != '':70r += '\n ' + z71else:72break73return r74except EOFError:75return None767778#def strip_leading_prompts(code, prompts=['sage:', '....:', '...:', '>>>', '...']):79# code, literals, state = strip_string_literals(code)80# code2 = []81# for line in code.splitlines():82# line2 = line.lstrip()83# for p in prompts:84# if line2.startswith(p):85# line2 = line2[len(p):]86# if p[0] != '.':87# line2 = line2.lstrip()88# break89# code2.append(line2)90# code = ('\n'.join(code2))%literals91# return code929394def preparse_code(code):95import sage.all_cmdline96return sage.all_cmdline.preparse(code, ignore_prompts=True)979899def strip_string_literals(code, state=None):100new_code = []101literals = {}102counter = 0103start = q = 0104if state is None:105in_quote = False106raw = False107else:108in_quote, raw = state109while True:110sig_q = code.find("'", q)111dbl_q = code.find('"', q)112hash_q = code.find('#', q)113q = min(sig_q, dbl_q)114if q == -1: q = max(sig_q, dbl_q)115if not in_quote and hash_q != -1 and (q == -1 or hash_q < q):116# it's a comment117newline = code.find('\n', hash_q)118if newline == -1: newline = len(code)119counter += 1120label = "L%s" % counter121literals[label] = code[hash_q:newline]122new_code.append(code[start:hash_q].replace('%', '%%'))123new_code.append("%%(%s)s" % label)124start = q = newline125elif q == -1:126if in_quote:127counter += 1128label = "L%s" % counter129literals[label] = code[start:]130new_code.append("%%(%s)s" % label)131else:132new_code.append(code[start:].replace('%', '%%'))133break134elif in_quote:135if code[q - 1] == '\\':136k = 2137while code[q - k] == '\\':138k += 1139if k % 2 == 0:140q += 1141if code[q:q + len(in_quote)] == in_quote:142counter += 1143label = "L%s" % counter144literals[label] = code[start:q + len(in_quote)]145new_code.append("%%(%s)s" % label)146q += len(in_quote)147start = q148in_quote = False149else:150q += 1151else:152raw = q > 0 and code[q - 1] in 'rR'153if len(code) >= q + 3 and (code[q + 1] == code[q] == code[q + 2]):154in_quote = code[q] * 3155else:156in_quote = code[q]157new_code.append(code[start:q].replace('%', '%%'))158start = q159q += len(in_quote)160161return "".join(new_code), literals, (in_quote, raw)162163164def end_of_expr(s):165"""166The input string s is a code expression that contains no strings (they have been stripped).167Find the end of the expression that starts at the beginning of s by finding the first whitespace168at which the parenthesis and brackets are matched.169170The returned index is the position *after* the expression.171"""172i = 0173parens = 0174brackets = 0175while i < len(s):176c = s[i]177if c == '(':178parens += 1179elif c == '[':180brackets += 1181elif c == ')':182parens -= 1183elif c == ']':184brackets -= 1185elif parens == 0 and brackets == 0 and (c == ' ' or c == '\t'):186return i187i += 1188return i189190191# NOTE/TODO: The dec_args dict will leak memory over time. However, it only192# contains code that was entered, so it should never get big. It193# seems impossible to know for sure whether a bit of code will be194# eventually needed later, so this leakiness seems necessary.195dec_counter = 0196dec_args = {}197198199# Divide the input code (a string) into blocks of code.200def divide_into_blocks(code):201global dec_counter202203# strip string literals from the input, so that we can parse it without having to worry about strings204code, literals, state = strip_string_literals(code)205206# divide the code up into line lines.207code = code.splitlines()208209# Compute the line-level code decorators.210c = list(code)211try:212v = []213for line in code:214done = False215216# Transform shell escape into sh decorator.217if line.lstrip().startswith('!'):218line = line.replace('!', "%%sh ", 1)219220# Check for cell decorator221# NOTE: strip_string_literals maps % to %%, because %foo is used for python string templating.222if line.lstrip().startswith('%%'):223i = line.find("%")224j = end_of_expr(225line[i +2262:]) + i + 2 + 1 # +1 for the space or tab delimiter227expr = line[j:] % literals228# Special case -- if % starts line *and* expr is empty (or a comment),229# then code decorators impacts the rest of the code.230sexpr = expr.strip()231if i == 0 and (len(sexpr) == 0 or sexpr.startswith('#')):232new_line = '%ssalvus.execute_with_code_decorators(*_salvus_parsing.dec_args[%s])' % (233line[:i], dec_counter)234expr = ('\n'.join(code[len(v) + 1:])) % literals235done = True236else:237# Expr is nonempty -- code decorator only impacts this line238new_line = '%ssalvus.execute_with_code_decorators(*_salvus_parsing.dec_args[%s])' % (239line[:i], dec_counter)240241dec_args[dec_counter] = ([line[i + 2:j] % literals], expr)242dec_counter += 1243else:244new_line = line245v.append(new_line)246if done:247break248code = v249except Exception as mesg:250code = c251252## Tested this: Completely disable block parsing:253## but it requires the caller to do "exec compile(block+'\n', '', 'exec') in namespace, locals", which means no display hook,254## so "2+2" breaks.255## return [[0,len(code)-1,('\n'.join(code))%literals]]256257# Remove comment lines -- otherwise could get empty blocks that can't be exec'd.258# For example, exec compile('#', '', 'single') is a syntax error.259# Also, comments will confuse the code to break into blocks before.260comment_lines = {}261for label, v in literals.items():262if v.startswith('#'):263comment_lines["%%(%s)s" % label] = True264code = [x for x in code if not comment_lines.get(x.strip(), False)]265266# take only non-whitespace lines now for Python code (string literals have already been removed).267code = [x for x in code if x.strip()]268269# Compute the blocks270i = len(code) - 1271blocks = []272while i >= 0:273stop = i274paren_depth = code[i].count('(') - code[i].count(')')275brack_depth = code[i].count('[') - code[i].count(']')276curly_depth = code[i].count('{') - code[i].count('}')277while i >= 0 and (278(len(code[i]) > 0 and (code[i][0] in string.whitespace))279or paren_depth < 0 or brack_depth < 0 or curly_depth < 0):280i -= 1281if i >= 0:282paren_depth += code[i].count('(') - code[i].count(')')283brack_depth += code[i].count('[') - code[i].count(']')284curly_depth += code[i].count('{') - code[i].count('}')285block = ('\n'.join(code[i:])) % literals286bs = block.strip()287if bs: # has to not be only whitespace288blocks.insert(0, [i, stop, bs])289code = code[:i]290i = len(code) - 1291292# merge try/except/finally/decorator/else/elif blocks293i = 1294295def merge():296"Merge block i-1 with block i."297blocks[i - 1][-1] += '\n' + blocks[i][-1]298blocks[i - 1][1] = blocks[i][1]299del blocks[i]300301while i < len(blocks):302s = blocks[i][-1].lstrip()303304# finally/except lines after a try305if (s.startswith('finally') or s.startswith('except')306) and blocks[i - 1][-1].lstrip().startswith('try'):307merge()308309# function definitions310elif (s.startswith('def') or s.startswith('@')) and blocks[311i - 1][-1].splitlines()[-1].lstrip().startswith('@'):312merge()313314# lines starting with else conditions (if *and* for *and* while!)315elif s.startswith('else') and (316blocks[i - 1][-1].lstrip().startswith('if')317or blocks[i - 1][-1].lstrip().startswith('while')318or blocks[i - 1][-1].lstrip().startswith('for')319or blocks[i - 1][-1].lstrip().startswith('try')320or blocks[i - 1][-1].lstrip().startswith('elif')):321merge()322323# lines starting with elif324elif s.startswith('elif') and blocks[i -3251][-1].lstrip().startswith('if'):326merge()327328# do not merge blocks -- move on to next one329else:330i += 1331332return blocks333334335############################################336337CHARS0 = string.ascii_letters + string.digits + '_'338CHARS = CHARS0 + '.'339340341def guess_last_expression(342obj): # TODO: bad guess -- need to use a parser to go any further.343i = len(obj) - 1344while i >= 0 and obj[i] in CHARS:345i -= 1346return obj[i + 1:]347348349def is_valid_identifier(target):350if len(target) == 0: return False351for x in target:352if x not in CHARS0:353return False354if target[0] not in string.ascii_letters + '_':355return False356return True357358359# Keywords from http://docs.python.org/release/2.7.2/reference/lexical_analysis.html360_builtin_completions = list(__builtins__.keys()) + [361'and', 'del', 'from', 'not', 'while', 'as', 'elif', 'global', 'or', 'with',362'assert', 'else', 'if', 'pass', 'yield', 'break', 'except', 'import',363'print', 'class', 'exec', 'in', 'raise', 'continue', 'finally', 'is',364'return', 'def', 'for', 'lambda', 'try'365]366367368def introspect(code, namespace, preparse=True):369"""370INPUT:371372- code -- a string containing Sage (if preparse=True) or Python code.373374- namespace -- a dictionary to complete in (we also complete using375builtins such as 'def', 'for', etc.376377- preparse -- a boolean378379OUTPUT:380381An object: {'result':, 'target':, 'expr':, 'status':, 'get_help':, 'get_completions':, 'get_source':}382"""383import re384# result: the docstring, source code, or list of completions (at385# return, it might thus be either a list or a string)386result = []387388# expr: the part of code that is used to do the completion, e.g.,389# for 'a = n.m.foo', expr would be 'n.m.foo'. It can be more complicated,390# e.g., for '(2+3).foo.bar' it would be '(2+3).foo'.391expr = ''392393# target: for completions, target is the part of the code that we394# complete on in the namespace defined by the object right before395# it, e.g., for n.m.foo, the target is "foo". target is the empty396# string for source code and docstrings.397target = ''398399# When returning, exactly one of the following will be true:400get_help = False # getting docstring of something401get_source = False # getting source code of a function402get_completions = True # getting completions of an identifier in some namespace403404try:405# Strip all strings from the code, replacing them by template406# symbols; this makes parsing much easier.407# we strip, since trailing space could cause confusion below408code0, literals, state = strip_string_literals(code.strip())409410# Move i so that it points to the start of the last expression in the code.411# (TODO: this should probably be replaced by using ast on preparsed version. Not easy.)412i = max([code0.rfind(t) for t in '\n;=']) + 1413while i < len(code0) and code0[i] in string.whitespace:414i += 1415416# Break the line in two pieces: before_expr | expr; we may417# need before_expr in order to evaluate and make sense of418# expr. We also put the string literals back in, so that419# evaluation works.420expr = code0[i:] % literals421before_expr = code0[:i] % literals422423chrs = set('.()[]? ')424if not any(c in expr for c in chrs):425# Easy case: this is just completion on a simple identifier in the namespace.426get_help = False427get_completions = True428get_source = False429target = expr430else:431# Now for all of the other harder cases.432i = max([expr.rfind(s) for s in '?('])433# expr ends in two ?? -- source code434if i >= 1 and i == len(expr) - 1 and expr[i - 1] == '?':435get_source = True436get_completions = False437get_help = False438target = ""439obj = expr[:i - 1]440# ends in ( or ? (but not ??) -- docstring441elif i == len(expr) - 1:442get_help = True443get_completions = False444get_source = False445target = ""446obj = expr[:i]447# completions (not docstrings or source)448else:449get_help = False450get_completions = True451get_source = False452i = expr.rfind('.')453target = expr[i + 1:]454if target == '' or is_valid_identifier(455target) or '*' in expr and '* ' not in expr:456# this case includes list.*end[tab]457obj = expr[:i]458else:459# this case includes aaa=...;3 * aa[tab]460expr = guess_last_expression(target)461i = expr.rfind('.')462if i != -1:463target = expr[i + 1:]464obj = expr[:i]465else:466target = expr467468if get_completions and target == expr:469j = len(expr)470if '*' in expr:471# this case includes *_factors<TAB> and abc =...;3 * ab[tab]472try:473pattern = expr.replace("*", ".*").replace("?", ".")474reg = re.compile(pattern + "$")475v = list(476filter(reg.match,477list(namespace.keys()) + _builtin_completions))478# for 2*sq[tab]479if len(v) == 0:480gle = guess_last_expression(expr)481j = len(gle)482if j > 0:483target = gle484v = [485x[j:] for x in (list(namespace.keys()) +486_builtin_completions)487if x.startswith(gle)488]489except:490pass491else:492v = [493x[j:]494for x in (list(namespace.keys()) + _builtin_completions)495if x.startswith(expr)496]497# for 2+sqr[tab]498if len(v) == 0:499gle = guess_last_expression(expr)500j = len(gle)501if j > 0 and j < len(expr):502target = gle503v = [504x[j:] for x in (list(namespace.keys()) +505_builtin_completions)506if x.startswith(gle)507]508else:509510# We will try to evaluate511# obj. This is danerous and a priori could take512# forever, so we spend at most 1 second doing this --513# if it takes longer a signal kills the evaluation.514# Obviously, this could in fact lock if515# non-interruptable code is called, which should be rare.516517O = None518try:519import signal520521def mysig(*args):522raise KeyboardInterrupt523524signal.signal(signal.SIGALRM, mysig)525signal.alarm(1)526import sage.all_cmdline527if before_expr.strip():528try:529exec((before_expr if not preparse else530preparse_code(before_expr)), namespace)531except Exception as msg:532pass533# uncomment for debugging only534# traceback.print_exc()535# We first try to evaluate the part of the expression before the name536try:537O = eval(obj if not preparse else preparse_code(obj),538namespace)539except (SyntaxError, TypeError, AttributeError):540# If that fails, we try on a subexpression.541# TODO: This will not be needed when542# this code is re-written to parse using an543# AST, instead of using this lame hack.544obj = guess_last_expression(obj)545try:546O = eval(obj if not preparse else preparse_code(obj),547namespace)548except:549pass550finally:551signal.signal(signal.SIGALRM, signal.SIG_IGN)552553def get_file():554try:555import sage.misc.sageinspect556eval_getdoc = eval('getdoc(O)', {557'getdoc': sage.misc.sageinspect.sage_getfile,558'O': O559})560return " File: " + eval_getdoc + "\n"561except Exception as err:562return "Unable to read source filename (%s)" % err563564if get_help:565import sage.misc.sageinspect566result = get_file()567try:568569def our_getdoc(s):570try:571x = sage.misc.sageinspect.sage_getargspec(s)572defaults = list(x.defaults) if x.defaults else []573args = list(x.args) if x.args else []574v = []575if x.keywords:576v.insert(0, '**kwds')577if x.varargs:578v.insert(0, '*args')579while defaults:580d = defaults.pop()581k = args.pop()582v.insert(0, '%s=%r' % (k, d))583v = args + v584t = " Signature : %s(%s)\n" % (obj, ', '.join(v))585except:586t = ""587try:588ds_raw = sage.misc.sageinspect.sage_getdoc(s)589if (six.PY3 and type(s) == bytes) or six.PY2:590ds = ds_raw.decode('utf-8')591else:592ds = ds_raw593ds = ds.strip()594t += " Docstring :\n%s" % ds595except Exception as ex:596t += " Problem retrieving Docstring :\n%s" % ex597# print ex # issue 1780: 'ascii' codec can't decode byte 0xc3 in position 3719: ordinal not in range(128)598pass599return t600601result += eval('getdoc(O)', {'getdoc': our_getdoc, 'O': O})602except Exception as err:603result += "Unable to read docstring (%s)" % err604# Get rid of the 3 spaces in front of everything.605result = result.lstrip().replace('\n ', '\n')606607elif get_source:608import sage.misc.sageinspect609result = get_file()610try:611result += " Source:\n " + eval(612'getsource(O)', {613'getsource': sage.misc.sageinspect.sage_getsource,614'O': O615})616except Exception as err:617result += "Unable to read source code (%s)" % err618619elif get_completions:620if O is not None:621v = dir(O)622if hasattr(O, 'trait_names'):623v += O.trait_names()624if not target.startswith('_'):625v = [x for x in v if x and not x.startswith('_')]626# this case excludes abc = ...;for a in ab[tab]627if '*' in expr and '* ' not in expr:628try:629pattern = target.replace("*", ".*")630pattern = pattern.replace("?", ".")631reg = re.compile(pattern + "$")632v = list(filter(reg.match, v))633except:634pass635else:636j = len(target)637v = [x[j:] for x in v if x.startswith(target)]638else:639v = []640641if get_completions:642result = list(sorted(set(v), key=lambda x: x.lower()))643644except Exception as msg:645traceback.print_exc()646result = []647status = 'ok'648else:649status = 'ok'650return {651'result': result,652'target': target,653'expr': expr,654'status': status,655'get_help': get_help,656'get_completions': get_completions,657'get_source': get_source658}659660661