Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sage
Path: blob/develop/src/bin/sage-preparse
4052 views
#!/usr/bin/env python3
"""
Preparse .sage files and save the result to .sage.py files.

AUTHOR:
    -- William Stein (2005): first version
    -- William Stein (2008): fix trac #2391 and document the code.
    -- Dan Drake (2009): fix trac #5052
    -- Dan Drake (2010-12-08): fix trac #10440
    -- Johan S. R. Nielsen (2015-11-06): fix trac #17019
"""

import os
import re
import sys

from sage.misc.temporary_file import atomic_write
from sage.repl.preparse import preparse_file

# The spkg/bin/sage script passes the files to be preparsed as
# arguments (but remove sys.argv[0]).
files = sys.argv[1:]

# There must be at least 1 file or we display an error/usage message
# and exit
if not files:
    print("""Usage: {} <file1.sage> <file2.sage>...
Creates files file1.sage.py, file2.sage.py ... that are the Sage
preparsed versions of file1.sage, file2.sage ...
If a non-autogenerated .sage.py file with the same name exists, you will
receive an error and the file will not be overwritten.""".format(sys.argv[0]))
    sys.exit(1)

# The module-scope variable contains a list of all files we
# have seen while preparsing a given file.  The point of this
# is that we want to avoid preparsing a file we have already
# seen, since then infinite loops would result from mutual
# recursive includes.
files_so_far = []

# This message is inserted in autogenerated files so that the reader
# will know, and so we know it is safe to overwrite them.
AUTOGEN_MSG = "# This file was *autogenerated* from the file "

# We use this regexp to parse lines with load or attach statements.
# Here's what it looks for:
#
# A (possibly empty) sequence of whitespace at the beginning of the
# line, saved as a group named 'lws';
#
#   followed by
#
# the word "load" or "attach";
#
#   followed by
#
# a nonempty sequence of whitespace;
#
#   followed by
#
# whatever else is on the line, saved as a group named 'files'.
#
# We want to save the leading white space so that we can maintain
# correct indentation in the preparsed file.
load_or_attach = re.compile(r"^(?P<lws>\s*)(load|attach)\s+(?P<files>.*)$")

def do_preparse(f, files_before=[]):
    """
    Preparse the file f and write the result out to a filename
    with extension .sage.py.

    INPUT:

    - ``f`` -- string: the name of a file
    - ``files_before`` -- list of strings of previous filenames loaded (to avoid circular loops)

    OUTPUT: none (writes a file with extension .sage.py to disk)
    """
    if f in files_so_far:
        return
    files_so_far.append(f)
    if not os.path.exists(f):
        print("{}: File '{}' is missing".format(sys.argv[0], f))
        return
    if f.endswith('.py'):
        return
    if not f.endswith('.sage'):
        print("{}: Unknown file type {}".format(sys.argv[0], f))
        sys.exit(1)

    fname = f + ".py"
    if os.path.exists(fname):
        with open(fname) as fin:
            if AUTOGEN_MSG not in fin.read():
                print("Refusing to overwrite existing non-autogenerated file {!r}."
                      .format(os.path.abspath(fname)))
                print("Please delete or move this file manually.")
                sys.exit(1)

    # TODO:
    # I am commenting this "intelligence" out, since, e.g., if I change
    # the preparser between versions this can cause problems.  This
    # is an optimization that definitely isn't needed at present, since
    # preparsing is so fast.
    # Idea: I could introduce version numbers, though....
    #if os.path.exists(fname) and os.path.getmtime(fname) >= os.path.getmtime(f):
    #    return

    # Finally open the file
    with open(f) as fin:
        F = fin.read()

    # Check to see if a coding is specified in the .sage file. If it is,
    # then we want to copy it over to the new file and not include it in
    # the preprocessing. If both the first and second line have an
    # encoding declaration, the second line's encoding will get used.

    lines = F.splitlines()
    coding = ''
    for num, line in enumerate(lines[:2]):
        if re.search(r"coding[:=]\s*([-\w.]+)", line):
            coding = line + '\n'
            F = '\n'.join(lines[:num] + lines[(num+1):])

    # It is ** critical ** that all the preparser-stuff we put into
    # the file are put after the module docstring, since
    # otherwise the docstring will not be understood by Python.
    i = find_position_right_after_module_docstring(F)
    header, body = F[:i] , F[i:]
    # Preparse the body
    body = preparse_file(body)

    # Check for "from __future__ import ..." statements. Those
    # statements need to come at the top of the file (after the
    # module-level docstring is okay), so we separate them from the
    # body.
    #
    # Note: this will convert invalid Python to valid, because it will
    # move all "from __future__ import ..." to the top of the file,
    # even if they were not at the top originally.
    future_imports, body = find_future_imports(body)

    # Check for load/attach commands.
    body = do_load_and_attach(body, f, files_before)

    # The Sage library include line along with a autogen message
    sage_incl = '%s%s\nfrom sage.all_cmdline import *   # import sage library\n'%(AUTOGEN_MSG, f)

    # Finally, write out the result.  We use atomic_write to avoid
    # race conditions (for example, the file will never be half written).
    with atomic_write(fname) as f:
        f.write(coding)
        f.write(header)
        f.write('\n')
        f.write(future_imports)
        f.write('\n')
        f.write(sage_incl)
        f.write('\n')
        f.write(body)
        f.write('\n')

def find_position_right_after_module_docstring(G):
    """
    Return first position right after the module docstring of G, if it
    has one.  Otherwise return 0.

    INPUT:
        G -- a string
    OUTPUT:
        an integer -- the index into G so that G[i] is right after
                      the module docstring of G, if G has one.
    """
    # The basic idea below is that we look at each line first ignoring
    # all empty lines and commented out lines.  Then we check to see
    # if the next line is a docstring.  If so, we find where that
    # docstring ends.
    v = G.splitlines()
    i = 0
    while i < len(v):
        s = v[i].strip()
        if s and s[0] != '#':
            break
        i += 1
    if i >= len(v):
        # No module docstring --- entire file is commented out
        return 0

    # Now v[i] contains the first line of the first statement in the file.
    # Is it a docstring?
    n = v[i].lstrip()
    if not (n[0] in ['"',"'"] or n[0:2] in ['r"',"r'"]):
        # not a docstring
        return 0

    # First line is the module docstring.  Where does it end?
    def pos_after_line(k):
        return sum(len(v[j])+1 for j in range(k+1))

    n = n.lstrip('r')  # strip leading r if there is one
    if n[:3] in ["'''", '"""']:
        quotes = n[:3]
        # possibly multiline
        if quotes in n[3:]:
            return pos_after_line(i)
        j = i+1
        while j < len(v) and quotes not in v[j]:
            j += 1
        return pos_after_line(j)
    else:
        # it must be a single line; so add up the lengths of all lines
        # including this one and return that
        return pos_after_line(i)


def find_future_imports(G):
    """
    Parse a file G as a string, looking for "from __future__ import ...".

    Return a tuple: (the import statements, the file G with those
    statements removed)

    INPUT:
        G -- a string; the contents of a file

    This can only handle "from __future__ import ..." statements which
    are completely on a single line: nothing of the form ::

        from __future__ import \
            print_function

    or ::

        from __future__ import (print_function,
                                division)

    This function will raise an error if it detects lines of these forms.
    """
    import_statements = []
    # "from __future__ import ..." should not be indented.
    m = re.search("^(from __future__ import .*)$", G, re.MULTILINE)
    while m:
        statement = m.group(0)
        # If the import statement ends in a line continuation marker
        # or if it contains a left parenthesis but not a right one,
        # then the statement is not complete, so raise an error. (This
        # is not a perfect check and some bad cases may slip through,
        # like two left parentheses and only one right parenthesis,
        # but they should be rare.)
        if (statement[-1] == '\\' or
            (statement.find('(') > -1 and statement.find(')') == -1)):
            raise NotImplementedError('the Sage preparser can only preparse "from __future__ import ..." statements which are on a single line')
        import_statements.append(statement)
        G = G[:m.start()] + G[m.end():]
        m = re.search("^(from __future__ import .*)$", G, re.MULTILINE)
    return ('\n'.join(import_statements), G)


def do_load_and_attach(G, file, files_before):
    """
    Parse a file G and replace load and attach statements with the
    corresponding execfile() statements.

    INPUT:
        G -- a string; a file loaded in from disk
        file -- the name of the file that contains the non-preparsed
                version of G.
        files_before -- list of files seen so far (don't recurse into
                infinite loop)

    OUTPUT:
        string -- result of parsing load/attach statements in G, i.e.
                  modified version of G with execfiles.
    """
    s = ''
    for t in G.split('\n'):
        z = load_or_attach.match(t)
        if z:
            files = z.group('files').split()
            lws = z.group('lws')
            for w in files:
                name = w.replace(',', '').replace('"', '').replace("'", "")
                if name in files_before:
                   print("WARNING: not loading {} (in {}) again since would cause circular loop"
                        .format(name, file))
                   continue
                if name.endswith('.sage'):
                    do_preparse(name, files_before + [file])
                    s += lws + "exec(compile(open({0}.py).read(), {0}.py, \
                            'exec'))\n".format(name)
                elif name.endswith('.py'):
                    s += lws + "exec(compile(open({0}).read(), {0}, \
                            'exec'))\n".format(name)
        else:
            s += t + '\n'
    return s


# Here we do the actual work.  We iterate over ever
# file in the input args and create the corresponding
# output file.
for f in files:
    do_preparse(f)