Contact
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
sagemathinc
GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/smc_sagews/smc_sagews/sage_parsing.py
Views: 286
1
"""
2
sage_parser.py
3
4
Code for parsing Sage code blocks sensibly.
5
"""
6
7
#########################################################################################
8
# Copyright (C) 2016, Sagemath Inc.
9
# #
10
# Distributed under the terms of the GNU General Public License (GPL), version 2+ #
11
# #
12
# http://www.gnu.org/licenses/ #
13
#########################################################################################
14
15
from __future__ import absolute_import
16
import string
17
import traceback
18
import __future__ as future
19
import ast
20
21
# for the "input()" call
22
import six
23
24
25
def get_future_features(code, mode):
26
if '__future__' not in code:
27
return {}
28
features = {}
29
node = ast.parse(code, mode=mode)
30
#Make it work for all outer-container node types (module, interactive, expression)
31
body = getattr(node, 'body', ())
32
if isinstance(body, ast.AST):
33
body = [body]
34
#The first non-future statement ends processing for future statements
35
for stmt in body:
36
#Future statements must be "from __future__ import ..."
37
if isinstance(stmt, ast.ImportFrom):
38
if getattr(stmt, 'module', None) == '__future__':
39
for alias in stmt.names:
40
assert isinstance(alias, ast.alias)
41
name = alias.name
42
if (name not in future.all_feature_names):
43
raise SyntaxError(
44
"future feature %.50r is not defined: %.150r" %
45
(name, code))
46
attr = getattr(future, alias.name, None)
47
if (attr is not None) and isinstance(
48
attr, future._Feature):
49
features[alias.name] = attr
50
else:
51
#If the module is not '__future__', we're done processing future statements
52
break
53
else:
54
#If the statement is not an "ImportFrom", we're done processing future statements
55
break
56
return features
57
58
59
def get_input(prompt):
60
try:
61
r = six.input(prompt)
62
z = r
63
if z.rstrip().endswith(':'):
64
while True:
65
try:
66
z = six.input('... ')
67
except EOFError:
68
quit = True
69
break
70
if z != '':
71
r += '\n ' + z
72
else:
73
break
74
return r
75
except EOFError:
76
return None
77
78
79
#def strip_leading_prompts(code, prompts=['sage:', '....:', '...:', '>>>', '...']):
80
# code, literals, state = strip_string_literals(code)
81
# code2 = []
82
# for line in code.splitlines():
83
# line2 = line.lstrip()
84
# for p in prompts:
85
# if line2.startswith(p):
86
# line2 = line2[len(p):]
87
# if p[0] != '.':
88
# line2 = line2.lstrip()
89
# break
90
# code2.append(line2)
91
# code = ('\n'.join(code2))%literals
92
# return code
93
94
95
def preparse_code(code):
96
import sage.all_cmdline
97
return sage.all_cmdline.preparse(code, ignore_prompts=True)
98
99
100
def strip_string_literals(code, state=None):
101
new_code = []
102
literals = {}
103
counter = 0
104
start = q = 0
105
if state is None:
106
in_quote = False
107
raw = False
108
else:
109
in_quote, raw = state
110
while True:
111
sig_q = code.find("'", q)
112
dbl_q = code.find('"', q)
113
hash_q = code.find('#', q)
114
q = min(sig_q, dbl_q)
115
if q == -1: q = max(sig_q, dbl_q)
116
if not in_quote and hash_q != -1 and (q == -1 or hash_q < q):
117
# it's a comment
118
newline = code.find('\n', hash_q)
119
if newline == -1: newline = len(code)
120
counter += 1
121
label = "L%s" % counter
122
literals[label] = code[hash_q:newline]
123
new_code.append(code[start:hash_q].replace('%', '%%'))
124
new_code.append("%%(%s)s" % label)
125
start = q = newline
126
elif q == -1:
127
if in_quote:
128
counter += 1
129
label = "L%s" % counter
130
literals[label] = code[start:]
131
new_code.append("%%(%s)s" % label)
132
else:
133
new_code.append(code[start:].replace('%', '%%'))
134
break
135
elif in_quote:
136
if code[q - 1] == '\\':
137
k = 2
138
while code[q - k] == '\\':
139
k += 1
140
if k % 2 == 0:
141
q += 1
142
if code[q:q + len(in_quote)] == in_quote:
143
counter += 1
144
label = "L%s" % counter
145
literals[label] = code[start:q + len(in_quote)]
146
new_code.append("%%(%s)s" % label)
147
q += len(in_quote)
148
start = q
149
in_quote = False
150
else:
151
q += 1
152
else:
153
raw = q > 0 and code[q - 1] in 'rR'
154
if len(code) >= q + 3 and (code[q + 1] == code[q] == code[q + 2]):
155
in_quote = code[q] * 3
156
else:
157
in_quote = code[q]
158
new_code.append(code[start:q].replace('%', '%%'))
159
start = q
160
q += len(in_quote)
161
162
return "".join(new_code), literals, (in_quote, raw)
163
164
165
def end_of_expr(s):
166
"""
167
The input string s is a code expression that contains no strings (they have been stripped).
168
Find the end of the expression that starts at the beginning of s by finding the first whitespace
169
at which the parenthesis and brackets are matched.
170
171
The returned index is the position *after* the expression.
172
"""
173
i = 0
174
parens = 0
175
brackets = 0
176
while i < len(s):
177
c = s[i]
178
if c == '(':
179
parens += 1
180
elif c == '[':
181
brackets += 1
182
elif c == ')':
183
parens -= 1
184
elif c == ']':
185
brackets -= 1
186
elif parens == 0 and brackets == 0 and (c == ' ' or c == '\t'):
187
return i
188
i += 1
189
return i
190
191
192
# NOTE/TODO: The dec_args dict will leak memory over time. However, it only
193
# contains code that was entered, so it should never get big. It
194
# seems impossible to know for sure whether a bit of code will be
195
# eventually needed later, so this leakiness seems necessary.
196
dec_counter = 0
197
dec_args = {}
198
199
200
# Divide the input code (a string) into blocks of code.
201
def divide_into_blocks(code):
202
global dec_counter
203
204
# strip string literals from the input, so that we can parse it without having to worry about strings
205
code, literals, state = strip_string_literals(code)
206
207
# divide the code up into line lines.
208
code = code.splitlines()
209
210
# Compute the line-level code decorators.
211
c = list(code)
212
try:
213
v = []
214
for line in code:
215
done = False
216
217
# Transform shell escape into sh decorator.
218
if line.lstrip().startswith('!'):
219
line = line.replace('!', "%%sh ", 1)
220
221
# Check for cell decorator
222
# NOTE: strip_string_literals maps % to %%, because %foo is used for python string templating.
223
if line.lstrip().startswith('%%'):
224
i = line.find("%")
225
j = end_of_expr(
226
line[i +
227
2:]) + i + 2 + 1 # +1 for the space or tab delimiter
228
expr = line[j:] % literals
229
# Special case -- if % starts line *and* expr is empty (or a comment),
230
# then code decorators impacts the rest of the code.
231
sexpr = expr.strip()
232
if i == 0 and (len(sexpr) == 0 or sexpr.startswith('#')):
233
new_line = '%ssalvus.execute_with_code_decorators(*_salvus_parsing.dec_args[%s])' % (
234
line[:i], dec_counter)
235
expr = ('\n'.join(code[len(v) + 1:])) % literals
236
done = True
237
else:
238
# Expr is nonempty -- code decorator only impacts this line
239
new_line = '%ssalvus.execute_with_code_decorators(*_salvus_parsing.dec_args[%s])' % (
240
line[:i], dec_counter)
241
242
dec_args[dec_counter] = ([line[i + 2:j] % literals], expr)
243
dec_counter += 1
244
else:
245
new_line = line
246
v.append(new_line)
247
if done:
248
break
249
code = v
250
except Exception as mesg:
251
code = c
252
253
## Tested this: Completely disable block parsing:
254
## but it requires the caller to do "exec compile(block+'\n', '', 'exec') in namespace, locals", which means no display hook,
255
## so "2+2" breaks.
256
## return [[0,len(code)-1,('\n'.join(code))%literals]]
257
258
# Remove comment lines -- otherwise could get empty blocks that can't be exec'd.
259
# For example, exec compile('#', '', 'single') is a syntax error.
260
# Also, comments will confuse the code to break into blocks before.
261
comment_lines = {}
262
for label, v in literals.items():
263
if v.startswith('#'):
264
comment_lines["%%(%s)s" % label] = True
265
code = [x for x in code if not comment_lines.get(x.strip(), False)]
266
267
# take only non-whitespace lines now for Python code (string literals have already been removed).
268
code = [x for x in code if x.strip()]
269
270
# Compute the blocks
271
i = len(code) - 1
272
blocks = []
273
while i >= 0:
274
stop = i
275
paren_depth = code[i].count('(') - code[i].count(')')
276
brack_depth = code[i].count('[') - code[i].count(']')
277
curly_depth = code[i].count('{') - code[i].count('}')
278
while i >= 0 and (
279
(len(code[i]) > 0 and (code[i][0] in string.whitespace))
280
or paren_depth < 0 or brack_depth < 0 or curly_depth < 0):
281
i -= 1
282
if i >= 0:
283
paren_depth += code[i].count('(') - code[i].count(')')
284
brack_depth += code[i].count('[') - code[i].count(']')
285
curly_depth += code[i].count('{') - code[i].count('}')
286
block = ('\n'.join(code[i:])) % literals
287
bs = block.strip()
288
if bs: # has to not be only whitespace
289
blocks.insert(0, [i, stop, bs])
290
code = code[:i]
291
i = len(code) - 1
292
293
# merge try/except/finally/decorator/else/elif blocks
294
i = 1
295
296
def merge():
297
"Merge block i-1 with block i."
298
blocks[i - 1][-1] += '\n' + blocks[i][-1]
299
blocks[i - 1][1] = blocks[i][1]
300
del blocks[i]
301
302
while i < len(blocks):
303
s = blocks[i][-1].lstrip()
304
305
# finally/except lines after a try
306
if (s.startswith('finally') or s.startswith('except')
307
) and blocks[i - 1][-1].lstrip().startswith('try'):
308
merge()
309
310
# function definitions
311
elif (s.startswith('def') or s.startswith('@')) and blocks[
312
i - 1][-1].splitlines()[-1].lstrip().startswith('@'):
313
merge()
314
315
# lines starting with else conditions (if *and* for *and* while!)
316
elif s.startswith('else') and (
317
blocks[i - 1][-1].lstrip().startswith('if')
318
or blocks[i - 1][-1].lstrip().startswith('while')
319
or blocks[i - 1][-1].lstrip().startswith('for')
320
or blocks[i - 1][-1].lstrip().startswith('try')
321
or blocks[i - 1][-1].lstrip().startswith('elif')):
322
merge()
323
324
# lines starting with elif
325
elif s.startswith('elif') and blocks[i -
326
1][-1].lstrip().startswith('if'):
327
merge()
328
329
# do not merge blocks -- move on to next one
330
else:
331
i += 1
332
333
return blocks
334
335
336
############################################
337
338
CHARS0 = string.ascii_letters + string.digits + '_'
339
CHARS = CHARS0 + '.'
340
341
342
def guess_last_expression(
343
obj): # TODO: bad guess -- need to use a parser to go any further.
344
i = len(obj) - 1
345
while i >= 0 and obj[i] in CHARS:
346
i -= 1
347
return obj[i + 1:]
348
349
350
def is_valid_identifier(target):
351
if len(target) == 0: return False
352
for x in target:
353
if x not in CHARS0:
354
return False
355
if target[0] not in string.ascii_letters + '_':
356
return False
357
return True
358
359
360
# Keywords from http://docs.python.org/release/2.7.2/reference/lexical_analysis.html
361
_builtin_completions = list(__builtins__.keys()) + [
362
'and', 'del', 'from', 'not', 'while', 'as', 'elif', 'global', 'or', 'with',
363
'assert', 'else', 'if', 'pass', 'yield', 'break', 'except', 'import',
364
'print', 'class', 'exec', 'in', 'raise', 'continue', 'finally', 'is',
365
'return', 'def', 'for', 'lambda', 'try'
366
]
367
368
369
def introspect(code, namespace, preparse=True):
370
"""
371
INPUT:
372
373
- code -- a string containing Sage (if preparse=True) or Python code.
374
375
- namespace -- a dictionary to complete in (we also complete using
376
builtins such as 'def', 'for', etc.
377
378
- preparse -- a boolean
379
380
OUTPUT:
381
382
An object: {'result':, 'target':, 'expr':, 'status':, 'get_help':, 'get_completions':, 'get_source':}
383
"""
384
import re
385
# result: the docstring, source code, or list of completions (at
386
# return, it might thus be either a list or a string)
387
result = []
388
389
# expr: the part of code that is used to do the completion, e.g.,
390
# for 'a = n.m.foo', expr would be 'n.m.foo'. It can be more complicated,
391
# e.g., for '(2+3).foo.bar' it would be '(2+3).foo'.
392
expr = ''
393
394
# target: for completions, target is the part of the code that we
395
# complete on in the namespace defined by the object right before
396
# it, e.g., for n.m.foo, the target is "foo". target is the empty
397
# string for source code and docstrings.
398
target = ''
399
400
# When returning, exactly one of the following will be true:
401
get_help = False # getting docstring of something
402
get_source = False # getting source code of a function
403
get_completions = True # getting completions of an identifier in some namespace
404
405
try:
406
# Strip all strings from the code, replacing them by template
407
# symbols; this makes parsing much easier.
408
# we strip, since trailing space could cause confusion below
409
code0, literals, state = strip_string_literals(code.strip())
410
411
# Move i so that it points to the start of the last expression in the code.
412
# (TODO: this should probably be replaced by using ast on preparsed version. Not easy.)
413
i = max([code0.rfind(t) for t in '\n;=']) + 1
414
while i < len(code0) and code0[i] in string.whitespace:
415
i += 1
416
417
# Break the line in two pieces: before_expr | expr; we may
418
# need before_expr in order to evaluate and make sense of
419
# expr. We also put the string literals back in, so that
420
# evaluation works.
421
expr = code0[i:] % literals
422
before_expr = code0[:i] % literals
423
424
chrs = set('.()[]? ')
425
if not any(c in expr for c in chrs):
426
# Easy case: this is just completion on a simple identifier in the namespace.
427
get_help = False
428
get_completions = True
429
get_source = False
430
target = expr
431
else:
432
# Now for all of the other harder cases.
433
i = max([expr.rfind(s) for s in '?('])
434
# expr ends in two ?? -- source code
435
if i >= 1 and i == len(expr) - 1 and expr[i - 1] == '?':
436
get_source = True
437
get_completions = False
438
get_help = False
439
target = ""
440
obj = expr[:i - 1]
441
# ends in ( or ? (but not ??) -- docstring
442
elif i == len(expr) - 1:
443
get_help = True
444
get_completions = False
445
get_source = False
446
target = ""
447
obj = expr[:i]
448
# completions (not docstrings or source)
449
else:
450
get_help = False
451
get_completions = True
452
get_source = False
453
i = expr.rfind('.')
454
target = expr[i + 1:]
455
if target == '' or is_valid_identifier(
456
target) or '*' in expr and '* ' not in expr:
457
# this case includes list.*end[tab]
458
obj = expr[:i]
459
else:
460
# this case includes aaa=...;3 * aa[tab]
461
expr = guess_last_expression(target)
462
i = expr.rfind('.')
463
if i != -1:
464
target = expr[i + 1:]
465
obj = expr[:i]
466
else:
467
target = expr
468
469
if get_completions and target == expr:
470
j = len(expr)
471
if '*' in expr:
472
# this case includes *_factors<TAB> and abc =...;3 * ab[tab]
473
try:
474
pattern = expr.replace("*", ".*").replace("?", ".")
475
reg = re.compile(pattern + "$")
476
v = list(
477
filter(reg.match,
478
list(namespace.keys()) + _builtin_completions))
479
# for 2*sq[tab]
480
if len(v) == 0:
481
gle = guess_last_expression(expr)
482
j = len(gle)
483
if j > 0:
484
target = gle
485
v = [
486
x[j:] for x in (list(namespace.keys()) +
487
_builtin_completions)
488
if x.startswith(gle)
489
]
490
except:
491
pass
492
else:
493
v = [
494
x[j:]
495
for x in (list(namespace.keys()) + _builtin_completions)
496
if x.startswith(expr)
497
]
498
# for 2+sqr[tab]
499
if len(v) == 0:
500
gle = guess_last_expression(expr)
501
j = len(gle)
502
if j > 0 and j < len(expr):
503
target = gle
504
v = [
505
x[j:] for x in (list(namespace.keys()) +
506
_builtin_completions)
507
if x.startswith(gle)
508
]
509
else:
510
511
# We will try to evaluate
512
# obj. This is danerous and a priori could take
513
# forever, so we spend at most 1 second doing this --
514
# if it takes longer a signal kills the evaluation.
515
# Obviously, this could in fact lock if
516
# non-interruptable code is called, which should be rare.
517
518
O = None
519
try:
520
import signal
521
522
def mysig(*args):
523
raise KeyboardInterrupt
524
525
signal.signal(signal.SIGALRM, mysig)
526
signal.alarm(1)
527
import sage.all_cmdline
528
if before_expr.strip():
529
try:
530
exec((before_expr if not preparse else
531
preparse_code(before_expr)), namespace)
532
except Exception as msg:
533
pass
534
# uncomment for debugging only
535
# traceback.print_exc()
536
# We first try to evaluate the part of the expression before the name
537
try:
538
O = eval(obj if not preparse else preparse_code(obj),
539
namespace)
540
except (SyntaxError, TypeError, AttributeError):
541
# If that fails, we try on a subexpression.
542
# TODO: This will not be needed when
543
# this code is re-written to parse using an
544
# AST, instead of using this lame hack.
545
obj = guess_last_expression(obj)
546
try:
547
O = eval(obj if not preparse else preparse_code(obj),
548
namespace)
549
except:
550
pass
551
finally:
552
signal.signal(signal.SIGALRM, signal.SIG_IGN)
553
554
def get_file():
555
try:
556
import sage.misc.sageinspect
557
eval_getdoc = eval('getdoc(O)', {
558
'getdoc': sage.misc.sageinspect.sage_getfile,
559
'O': O
560
})
561
return " File: " + eval_getdoc + "\n"
562
except Exception as err:
563
return "Unable to read source filename (%s)" % err
564
565
if get_help:
566
import sage.misc.sageinspect
567
result = get_file()
568
try:
569
570
def our_getdoc(s):
571
try:
572
x = sage.misc.sageinspect.sage_getargspec(s)
573
defaults = list(x.defaults) if x.defaults else []
574
args = list(x.args) if x.args else []
575
v = []
576
if x.keywords:
577
v.insert(0, '**kwds')
578
if x.varargs:
579
v.insert(0, '*args')
580
while defaults:
581
d = defaults.pop()
582
k = args.pop()
583
v.insert(0, '%s=%r' % (k, d))
584
v = args + v
585
t = " Signature : %s(%s)\n" % (obj, ', '.join(v))
586
except:
587
t = ""
588
try:
589
ds_raw = sage.misc.sageinspect.sage_getdoc(s)
590
if (six.PY3 and type(s) == bytes) or six.PY2:
591
ds = ds_raw.decode('utf-8')
592
else:
593
ds = ds_raw
594
ds = ds.strip()
595
t += " Docstring :\n%s" % ds
596
except Exception as ex:
597
t += " Problem retrieving Docstring :\n%s" % ex
598
# print ex # issue 1780: 'ascii' codec can't decode byte 0xc3 in position 3719: ordinal not in range(128)
599
pass
600
return t
601
602
result += eval('getdoc(O)', {'getdoc': our_getdoc, 'O': O})
603
except Exception as err:
604
result += "Unable to read docstring (%s)" % err
605
# Get rid of the 3 spaces in front of everything.
606
result = result.lstrip().replace('\n ', '\n')
607
608
elif get_source:
609
import sage.misc.sageinspect
610
result = get_file()
611
try:
612
result += " Source:\n " + eval(
613
'getsource(O)', {
614
'getsource': sage.misc.sageinspect.sage_getsource,
615
'O': O
616
})
617
except Exception as err:
618
result += "Unable to read source code (%s)" % err
619
620
elif get_completions:
621
if O is not None:
622
v = dir(O)
623
if hasattr(O, 'trait_names'):
624
v += O.trait_names()
625
if not target.startswith('_'):
626
v = [x for x in v if x and not x.startswith('_')]
627
# this case excludes abc = ...;for a in ab[tab]
628
if '*' in expr and '* ' not in expr:
629
try:
630
pattern = target.replace("*", ".*")
631
pattern = pattern.replace("?", ".")
632
reg = re.compile(pattern + "$")
633
v = list(filter(reg.match, v))
634
except:
635
pass
636
else:
637
j = len(target)
638
v = [x[j:] for x in v if x.startswith(target)]
639
else:
640
v = []
641
642
if get_completions:
643
result = list(sorted(set(v), key=lambda x: x.lower()))
644
645
except Exception as msg:
646
traceback.print_exc()
647
result = []
648
status = 'ok'
649
else:
650
status = 'ok'
651
return {
652
'result': result,
653
'target': target,
654
'expr': expr,
655
'status': status,
656
'get_help': get_help,
657
'get_completions': get_completions,
658
'get_source': get_source
659
}
660
661