CoCalc -- sage_parsing.py

GitHub Repository: sagemathinc/cocalc
Path: blob/master/src/smc_sagews/smc_sagews/sage_parsing.py
Views: ²⁸⁶
1
"""
2
sage_parser.py
3

4
Code for parsing Sage code blocks sensibly.
5
"""
6

7
#########################################################################################
8
#       Copyright (C) 2016, Sagemath Inc.
9
#                                                                                       #
10
#  Distributed under the terms of the GNU General Public License (GPL), version 2+      #
11
#                                                                                       #
12
#                  http://www.gnu.org/licenses/                                         #
13
#########################################################################################
14

15
from __future__ import absolute_import
16
import string
17
import traceback
18
import __future__ as future
19
import ast
20

21
# for the "input()" call
22
import six
23

24

25
def get_future_features(code, mode):
26
    if '__future__' not in code:
27
        return {}
28
    features = {}
29
    node = ast.parse(code, mode=mode)
30
    #Make it work for all outer-container node types (module, interactive, expression)
31
    body = getattr(node, 'body', ())
32
    if isinstance(body, ast.AST):
33
        body = [body]
34
    #The first non-future statement ends processing for future statements
35
    for stmt in body:
36
        #Future statements must be "from __future__ import ..."
37
        if isinstance(stmt, ast.ImportFrom):
38
            if getattr(stmt, 'module', None) == '__future__':
39
                for alias in stmt.names:
40
                    assert isinstance(alias, ast.alias)
41
                    name = alias.name
42
                    if (name not in future.all_feature_names):
43
                        raise SyntaxError(
44
                            "future feature %.50r is not defined: %.150r" %
45
                            (name, code))
46
                    attr = getattr(future, alias.name, None)
47
                    if (attr is not None) and isinstance(
48
                            attr, future._Feature):
49
                        features[alias.name] = attr
50
            else:
51
                #If the module is not '__future__', we're done processing future statements
52
                break
53
        else:
54
            #If the statement is not an "ImportFrom", we're done processing future statements
55
            break
56
    return features
57

58

59
def get_input(prompt):
60
    try:
61
        r = six.input(prompt)
62
        z = r
63
        if z.rstrip().endswith(':'):
64
            while True:
65
                try:
66
                    z = six.input('...       ')
67
                except EOFError:
68
                    quit = True
69
                    break
70
                if z != '':
71
                    r += '\n    ' + z
72
                else:
73
                    break
74
        return r
75
    except EOFError:
76
        return None
77

78

79
#def strip_leading_prompts(code, prompts=['sage:', '....:', '...:', '>>>', '...']):
80
#    code, literals, state = strip_string_literals(code)
81
#    code2 = []
82
#    for line in code.splitlines():
83
#        line2 = line.lstrip()
84
#        for p in prompts:
85
#            if line2.startswith(p):
86
#                line2 = line2[len(p):]
87
#                if p[0] != '.':
88
#                    line2 = line2.lstrip()
89
#                break
90
#        code2.append(line2)
91
#    code = ('\n'.join(code2))%literals
92
#    return code
93

94

95
def preparse_code(code):
96
    import sage.all_cmdline
97
    return sage.all_cmdline.preparse(code, ignore_prompts=True)
98

99

100
def strip_string_literals(code, state=None):
101
    new_code = []
102
    literals = {}
103
    counter = 0
104
    start = q = 0
105
    if state is None:
106
        in_quote = False
107
        raw = False
108
    else:
109
        in_quote, raw = state
110
    while True:
111
        sig_q = code.find("'", q)
112
        dbl_q = code.find('"', q)
113
        hash_q = code.find('#', q)
114
        q = min(sig_q, dbl_q)
115
        if q == -1: q = max(sig_q, dbl_q)
116
        if not in_quote and hash_q != -1 and (q == -1 or hash_q < q):
117
            # it's a comment
118
            newline = code.find('\n', hash_q)
119
            if newline == -1: newline = len(code)
120
            counter += 1
121
            label = "L%s" % counter
122
            literals[label] = code[hash_q:newline]
123
            new_code.append(code[start:hash_q].replace('%', '%%'))
124
            new_code.append("%%(%s)s" % label)
125
            start = q = newline
126
        elif q == -1:
127
            if in_quote:
128
                counter += 1
129
                label = "L%s" % counter
130
                literals[label] = code[start:]
131
                new_code.append("%%(%s)s" % label)
132
            else:
133
                new_code.append(code[start:].replace('%', '%%'))
134
            break
135
        elif in_quote:
136
            if code[q - 1] == '\\':
137
                k = 2
138
                while code[q - k] == '\\':
139
                    k += 1
140
                if k % 2 == 0:
141
                    q += 1
142
            if code[q:q + len(in_quote)] == in_quote:
143
                counter += 1
144
                label = "L%s" % counter
145
                literals[label] = code[start:q + len(in_quote)]
146
                new_code.append("%%(%s)s" % label)
147
                q += len(in_quote)
148
                start = q
149
                in_quote = False
150
            else:
151
                q += 1
152
        else:
153
            raw = q > 0 and code[q - 1] in 'rR'
154
            if len(code) >= q + 3 and (code[q + 1] == code[q] == code[q + 2]):
155
                in_quote = code[q] * 3
156
            else:
157
                in_quote = code[q]
158
            new_code.append(code[start:q].replace('%', '%%'))
159
            start = q
160
            q += len(in_quote)
161

162
    return "".join(new_code), literals, (in_quote, raw)
163

164

165
def end_of_expr(s):
166
    """
167
    The input string s is a code expression that contains no strings (they have been stripped).
168
    Find the end of the expression that starts at the beginning of s by finding the first whitespace
169
    at which the parenthesis and brackets are matched.
170

171
    The returned index is the position *after* the expression.
172
    """
173
    i = 0
174
    parens = 0
175
    brackets = 0
176
    while i < len(s):
177
        c = s[i]
178
        if c == '(':
179
            parens += 1
180
        elif c == '[':
181
            brackets += 1
182
        elif c == ')':
183
            parens -= 1
184
        elif c == ']':
185
            brackets -= 1
186
        elif parens == 0 and brackets == 0 and (c == ' ' or c == '\t'):
187
            return i
188
        i += 1
189
    return i
190

191

192
# NOTE/TODO: The dec_args dict will leak memory over time.  However, it only
193
# contains code that was entered, so it should never get big.  It
194
# seems impossible to know for sure whether a bit of code will be
195
# eventually needed later, so this leakiness seems necessary.
196
dec_counter = 0
197
dec_args = {}
198

199

200
# Divide the input code (a string) into blocks of code.
201
def divide_into_blocks(code):
202
    global dec_counter
203

204
    # strip string literals from the input, so that we can parse it without having to worry about strings
205
    code, literals, state = strip_string_literals(code)
206

207
    # divide the code up into line lines.
208
    code = code.splitlines()
209

210
    # Compute the line-level code decorators.
211
    c = list(code)
212
    try:
213
        v = []
214
        for line in code:
215
            done = False
216

217
            # Transform shell escape into sh decorator.
218
            if line.lstrip().startswith('!'):
219
                line = line.replace('!', "%%sh ", 1)
220

221
            # Check for cell decorator
222
            # NOTE: strip_string_literals maps % to %%, because %foo is used for python string templating.
223
            if line.lstrip().startswith('%%'):
224
                i = line.find("%")
225
                j = end_of_expr(
226
                    line[i +
227
                         2:]) + i + 2 + 1  # +1 for the space or tab delimiter
228
                expr = line[j:] % literals
229
                # Special case -- if % starts line *and* expr is empty (or a comment),
230
                # then code decorators impacts the rest of the code.
231
                sexpr = expr.strip()
232
                if i == 0 and (len(sexpr) == 0 or sexpr.startswith('#')):
233
                    new_line = '%ssalvus.execute_with_code_decorators(*_salvus_parsing.dec_args[%s])' % (
234
                        line[:i], dec_counter)
235
                    expr = ('\n'.join(code[len(v) + 1:])) % literals
236
                    done = True
237
                else:
238
                    # Expr is nonempty -- code decorator only impacts this line
239
                    new_line = '%ssalvus.execute_with_code_decorators(*_salvus_parsing.dec_args[%s])' % (
240
                        line[:i], dec_counter)
241

242
                dec_args[dec_counter] = ([line[i + 2:j] % literals], expr)
243
                dec_counter += 1
244
            else:
245
                new_line = line
246
            v.append(new_line)
247
            if done:
248
                break
249
        code = v
250
    except Exception as mesg:
251
        code = c
252

253
    ## Tested this: Completely disable block parsing:
254
    ## but it requires the caller to do "exec compile(block+'\n', '', 'exec') in namespace, locals", which means no display hook,
255
    ## so "2+2" breaks.
256
    ## return [[0,len(code)-1,('\n'.join(code))%literals]]
257

258
    # Remove comment lines -- otherwise could get empty blocks that can't be exec'd.
259
    # For example, exec compile('#', '', 'single') is a syntax error.
260
    # Also, comments will confuse the code to break into blocks before.
261
    comment_lines = {}
262
    for label, v in literals.items():
263
        if v.startswith('#'):
264
            comment_lines["%%(%s)s" % label] = True
265
    code = [x for x in code if not comment_lines.get(x.strip(), False)]
266

267
    # take only non-whitespace lines now for Python code (string literals have already been removed).
268
    code = [x for x in code if x.strip()]
269

270
    # Compute the blocks
271
    i = len(code) - 1
272
    blocks = []
273
    while i >= 0:
274
        stop = i
275
        paren_depth = code[i].count('(') - code[i].count(')')
276
        brack_depth = code[i].count('[') - code[i].count(']')
277
        curly_depth = code[i].count('{') - code[i].count('}')
278
        while i >= 0 and (
279
            (len(code[i]) > 0 and (code[i][0] in string.whitespace))
280
                or paren_depth < 0 or brack_depth < 0 or curly_depth < 0):
281
            i -= 1
282
            if i >= 0:
283
                paren_depth += code[i].count('(') - code[i].count(')')
284
                brack_depth += code[i].count('[') - code[i].count(']')
285
                curly_depth += code[i].count('{') - code[i].count('}')
286
        block = ('\n'.join(code[i:])) % literals
287
        bs = block.strip()
288
        if bs:  # has to not be only whitespace
289
            blocks.insert(0, [i, stop, bs])
290
        code = code[:i]
291
        i = len(code) - 1
292

293
    # merge try/except/finally/decorator/else/elif blocks
294
    i = 1
295

296
    def merge():
297
        "Merge block i-1 with block i."
298
        blocks[i - 1][-1] += '\n' + blocks[i][-1]
299
        blocks[i - 1][1] = blocks[i][1]
300
        del blocks[i]
301

302
    while i < len(blocks):
303
        s = blocks[i][-1].lstrip()
304

305
        # finally/except lines after a try
306
        if (s.startswith('finally') or s.startswith('except')
307
            ) and blocks[i - 1][-1].lstrip().startswith('try'):
308
            merge()
309

310
        # function definitions
311
        elif (s.startswith('def') or s.startswith('@')) and blocks[
312
                i - 1][-1].splitlines()[-1].lstrip().startswith('@'):
313
            merge()
314

315
        # lines starting with else conditions (if *and* for *and* while!)
316
        elif s.startswith('else') and (
317
                blocks[i - 1][-1].lstrip().startswith('if')
318
                or blocks[i - 1][-1].lstrip().startswith('while')
319
                or blocks[i - 1][-1].lstrip().startswith('for')
320
                or blocks[i - 1][-1].lstrip().startswith('try')
321
                or blocks[i - 1][-1].lstrip().startswith('elif')):
322
            merge()
323

324
        # lines starting with elif
325
        elif s.startswith('elif') and blocks[i -
326
                                             1][-1].lstrip().startswith('if'):
327
            merge()
328

329
        # do not merge blocks -- move on to next one
330
        else:
331
            i += 1
332

333
    return blocks
334

335

336
############################################
337

338
CHARS0 = string.ascii_letters + string.digits + '_'
339
CHARS = CHARS0 + '.'
340

341

342
def guess_last_expression(
343
    obj):  # TODO: bad guess -- need to use a parser to go any further.
344
    i = len(obj) - 1
345
    while i >= 0 and obj[i] in CHARS:
346
        i -= 1
347
    return obj[i + 1:]
348

349

350
def is_valid_identifier(target):
351
    if len(target) == 0: return False
352
    for x in target:
353
        if x not in CHARS0:
354
            return False
355
    if target[0] not in string.ascii_letters + '_':
356
        return False
357
    return True
358

359

360
# Keywords from http://docs.python.org/release/2.7.2/reference/lexical_analysis.html
361
_builtin_completions = list(__builtins__.keys()) + [
362
    'and', 'del', 'from', 'not', 'while', 'as', 'elif', 'global', 'or', 'with',
363
    'assert', 'else', 'if', 'pass', 'yield', 'break', 'except', 'import',
364
    'print', 'class', 'exec', 'in', 'raise', 'continue', 'finally', 'is',
365
    'return', 'def', 'for', 'lambda', 'try'
366
]
367

368

369
def introspect(code, namespace, preparse=True):
370
    """
371
    INPUT:
372

373
    - code -- a string containing Sage (if preparse=True) or Python code.
374

375
    - namespace -- a dictionary to complete in (we also complete using
376
      builtins such as 'def', 'for', etc.
377

378
    - preparse -- a boolean
379

380
    OUTPUT:
381

382
    An object: {'result':, 'target':, 'expr':, 'status':, 'get_help':, 'get_completions':, 'get_source':}
383
    """
384
    import re
385
    # result: the docstring, source code, or list of completions (at
386
    # return, it might thus be either a list or a string)
387
    result = []
388

389
    # expr: the part of code that is used to do the completion, e.g.,
390
    # for 'a = n.m.foo', expr would be 'n.m.foo'.  It can be more complicated,
391
    # e.g., for '(2+3).foo.bar' it would be '(2+3).foo'.
392
    expr = ''
393

394
    # target: for completions, target is the part of the code that we
395
    # complete on in the namespace defined by the object right before
396
    # it, e.g., for n.m.foo, the target is "foo".  target is the empty
397
    # string for source code and docstrings.
398
    target = ''
399

400
    # When returning, exactly one of the following will be true:
401
    get_help = False  # getting docstring of something
402
    get_source = False  # getting source code of a function
403
    get_completions = True  # getting completions of an identifier in some namespace
404

405
    try:
406
        # Strip all strings from the code, replacing them by template
407
        # symbols; this makes parsing much easier.
408
        # we strip, since trailing space could cause confusion below
409
        code0, literals, state = strip_string_literals(code.strip())
410

411
        # Move i so that it points to the start of the last expression in the code.
412
        # (TODO: this should probably be replaced by using ast on preparsed version.  Not easy.)
413
        i = max([code0.rfind(t) for t in '\n;=']) + 1
414
        while i < len(code0) and code0[i] in string.whitespace:
415
            i += 1
416

417
        # Break the line in two pieces: before_expr | expr; we may
418
        # need before_expr in order to evaluate and make sense of
419
        # expr.  We also put the string literals back in, so that
420
        # evaluation works.
421
        expr = code0[i:] % literals
422
        before_expr = code0[:i] % literals
423

424
        chrs = set('.()[]? ')
425
        if not any(c in expr for c in chrs):
426
            # Easy case: this is just completion on a simple identifier in the namespace.
427
            get_help = False
428
            get_completions = True
429
            get_source = False
430
            target = expr
431
        else:
432
            # Now for all of the other harder cases.
433
            i = max([expr.rfind(s) for s in '?('])
434
            # expr ends in two ?? -- source code
435
            if i >= 1 and i == len(expr) - 1 and expr[i - 1] == '?':
436
                get_source = True
437
                get_completions = False
438
                get_help = False
439
                target = ""
440
                obj = expr[:i - 1]
441
            # ends in ( or ? (but not ??) -- docstring
442
            elif i == len(expr) - 1:
443
                get_help = True
444
                get_completions = False
445
                get_source = False
446
                target = ""
447
                obj = expr[:i]
448
            # completions (not docstrings or source)
449
            else:
450
                get_help = False
451
                get_completions = True
452
                get_source = False
453
                i = expr.rfind('.')
454
                target = expr[i + 1:]
455
                if target == '' or is_valid_identifier(
456
                        target) or '*' in expr and '* ' not in expr:
457
                    # this case includes list.*end[tab]
458
                    obj = expr[:i]
459
                else:
460
                    # this case includes aaa=...;3 * aa[tab]
461
                    expr = guess_last_expression(target)
462
                    i = expr.rfind('.')
463
                    if i != -1:
464
                        target = expr[i + 1:]
465
                        obj = expr[:i]
466
                    else:
467
                        target = expr
468

469
        if get_completions and target == expr:
470
            j = len(expr)
471
            if '*' in expr:
472
                # this case includes *_factors<TAB> and abc =...;3 * ab[tab]
473
                try:
474
                    pattern = expr.replace("*", ".*").replace("?", ".")
475
                    reg = re.compile(pattern + "$")
476
                    v = list(
477
                        filter(reg.match,
478
                               list(namespace.keys()) + _builtin_completions))
479
                    # for 2*sq[tab]
480
                    if len(v) == 0:
481
                        gle = guess_last_expression(expr)
482
                        j = len(gle)
483
                        if j > 0:
484
                            target = gle
485
                            v = [
486
                                x[j:] for x in (list(namespace.keys()) +
487
                                                _builtin_completions)
488
                                if x.startswith(gle)
489
                            ]
490
                except:
491
                    pass
492
            else:
493
                v = [
494
                    x[j:]
495
                    for x in (list(namespace.keys()) + _builtin_completions)
496
                    if x.startswith(expr)
497
                ]
498
                # for 2+sqr[tab]
499
                if len(v) == 0:
500
                    gle = guess_last_expression(expr)
501
                    j = len(gle)
502
                    if j > 0 and j < len(expr):
503
                        target = gle
504
                        v = [
505
                            x[j:] for x in (list(namespace.keys()) +
506
                                            _builtin_completions)
507
                            if x.startswith(gle)
508
                        ]
509
        else:
510

511
            # We will try to evaluate
512
            # obj.  This is danerous and a priori could take
513
            # forever, so we spend at most 1 second doing this --
514
            # if it takes longer a signal kills the evaluation.
515
            # Obviously, this could in fact lock if
516
            # non-interruptable code is called, which should be rare.
517

518
            O = None
519
            try:
520
                import signal
521

522
                def mysig(*args):
523
                    raise KeyboardInterrupt
524

525
                signal.signal(signal.SIGALRM, mysig)
526
                signal.alarm(1)
527
                import sage.all_cmdline
528
                if before_expr.strip():
529
                    try:
530
                        exec((before_expr if not preparse else
531
                              preparse_code(before_expr)), namespace)
532
                    except Exception as msg:
533
                        pass
534
                        # uncomment for debugging only
535
                        # traceback.print_exc()
536
                # We first try to evaluate the part of the expression before the name
537
                try:
538
                    O = eval(obj if not preparse else preparse_code(obj),
539
                             namespace)
540
                except (SyntaxError, TypeError, AttributeError):
541
                    # If that fails, we try on a subexpression.
542
                    # TODO: This will not be needed when
543
                    # this code is re-written to parse using an
544
                    # AST, instead of using this lame hack.
545
                    obj = guess_last_expression(obj)
546
                    try:
547
                        O = eval(obj if not preparse else preparse_code(obj),
548
                                 namespace)
549
                    except:
550
                        pass
551
            finally:
552
                signal.signal(signal.SIGALRM, signal.SIG_IGN)
553

554
            def get_file():
555
                try:
556
                    import sage.misc.sageinspect
557
                    eval_getdoc = eval('getdoc(O)', {
558
                        'getdoc': sage.misc.sageinspect.sage_getfile,
559
                        'O': O
560
                    })
561
                    return "   File: " + eval_getdoc + "\n"
562
                except Exception as err:
563
                    return "Unable to read source filename (%s)" % err
564

565
            if get_help:
566
                import sage.misc.sageinspect
567
                result = get_file()
568
                try:
569

570
                    def our_getdoc(s):
571
                        try:
572
                            x = sage.misc.sageinspect.sage_getargspec(s)
573
                            defaults = list(x.defaults) if x.defaults else []
574
                            args = list(x.args) if x.args else []
575
                            v = []
576
                            if x.keywords:
577
                                v.insert(0, '**kwds')
578
                            if x.varargs:
579
                                v.insert(0, '*args')
580
                            while defaults:
581
                                d = defaults.pop()
582
                                k = args.pop()
583
                                v.insert(0, '%s=%r' % (k, d))
584
                            v = args + v
585
                            t = "   Signature : %s(%s)\n" % (obj, ', '.join(v))
586
                        except:
587
                            t = ""
588
                        try:
589
                            ds_raw = sage.misc.sageinspect.sage_getdoc(s)
590
                            if (six.PY3 and type(s) == bytes) or six.PY2:
591
                                ds = ds_raw.decode('utf-8')
592
                            else:
593
                                ds = ds_raw
594
                            ds = ds.strip()
595
                            t += "   Docstring :\n%s" % ds
596
                        except Exception as ex:
597
                            t += "   Problem retrieving Docstring :\n%s" % ex
598
                            # print ex  # issue 1780: 'ascii' codec can't decode byte 0xc3 in position 3719: ordinal not in range(128)
599
                            pass
600
                        return t
601

602
                    result += eval('getdoc(O)', {'getdoc': our_getdoc, 'O': O})
603
                except Exception as err:
604
                    result += "Unable to read docstring (%s)" % err
605
                # Get rid of the 3 spaces in front of everything.
606
                result = result.lstrip().replace('\n   ', '\n')
607

608
            elif get_source:
609
                import sage.misc.sageinspect
610
                result = get_file()
611
                try:
612
                    result += "   Source:\n   " + eval(
613
                        'getsource(O)', {
614
                            'getsource': sage.misc.sageinspect.sage_getsource,
615
                            'O': O
616
                        })
617
                except Exception as err:
618
                    result += "Unable to read source code (%s)" % err
619

620
            elif get_completions:
621
                if O is not None:
622
                    v = dir(O)
623
                    if hasattr(O, 'trait_names'):
624
                        v += O.trait_names()
625
                    if not target.startswith('_'):
626
                        v = [x for x in v if x and not x.startswith('_')]
627
                    # this case excludes abc = ...;for a in ab[tab]
628
                    if '*' in expr and '* ' not in expr:
629
                        try:
630
                            pattern = target.replace("*", ".*")
631
                            pattern = pattern.replace("?", ".")
632
                            reg = re.compile(pattern + "$")
633
                            v = list(filter(reg.match, v))
634
                        except:
635
                            pass
636
                    else:
637
                        j = len(target)
638
                        v = [x[j:] for x in v if x.startswith(target)]
639
                else:
640
                    v = []
641

642
        if get_completions:
643
            result = list(sorted(set(v), key=lambda x: x.lower()))
644

645
    except Exception as msg:
646
        traceback.print_exc()
647
        result = []
648
        status = 'ok'
649
    else:
650
        status = 'ok'
651
    return {
652
        'result': result,
653
        'target': target,
654
        'expr': expr,
655
        'status': status,
656
        'get_help': get_help,
657
        'get_completions': get_completions,
658
        'get_source': get_source
659
    }
660

661
Product

Resources

Company