#!/usr/bin/env python3
"""
Preparse .sage files and save the result to .sage.py files.
AUTHOR:
-- William Stein (2005): first version
-- William Stein (2008): fix trac #2391 and document the code.
-- Dan Drake (2009): fix trac #5052
-- Dan Drake (2010-12-08): fix trac #10440
-- Johan S. R. Nielsen (2015-11-06): fix trac #17019
"""
import os
import re
import sys
from sage.misc.temporary_file import atomic_write
from sage.repl.preparse import preparse_file
# The spkg/bin/sage script passes the files to be preparsed as
# arguments (but remove sys.argv[0]).
files = sys.argv[1:]
# There must be at least 1 file or we display an error/usage message
# and exit
if not files:
print("""Usage: {} <file1.sage> <file2.sage>...
Creates files file1.sage.py, file2.sage.py ... that are the Sage
preparsed versions of file1.sage, file2.sage ...
If a non-autogenerated .sage.py file with the same name exists, you will
receive an error and the file will not be overwritten.""".format(sys.argv[0]))
sys.exit(1)
# The module-scope variable contains a list of all files we
# have seen while preparsing a given file. The point of this
# is that we want to avoid preparsing a file we have already
# seen, since then infinite loops would result from mutual
# recursive includes.
files_so_far = []
# This message is inserted in autogenerated files so that the reader
# will know, and so we know it is safe to overwrite them.
AUTOGEN_MSG = "# This file was *autogenerated* from the file "
# We use this regexp to parse lines with load or attach statements.
# Here's what it looks for:
#
# A (possibly empty) sequence of whitespace at the beginning of the
# line, saved as a group named 'lws';
#
# followed by
#
# the word "load" or "attach";
#
# followed by
#
# a nonempty sequence of whitespace;
#
# followed by
#
# whatever else is on the line, saved as a group named 'files'.
#
# We want to save the leading white space so that we can maintain
# correct indentation in the preparsed file.
load_or_attach = re.compile(r"^(?P<lws>\s*)(load|attach)\s+(?P<files>.*)$")
def do_preparse(f, files_before=[]):
"""
Preparse the file f and write the result out to a filename
with extension .sage.py.
INPUT:
- ``f`` -- string: the name of a file
- ``files_before`` -- list of strings of previous filenames loaded (to avoid circular loops)
OUTPUT: none (writes a file with extension .sage.py to disk)
"""
if f in files_so_far:
return
files_so_far.append(f)
if not os.path.exists(f):
print("{}: File '{}' is missing".format(sys.argv[0], f))
return
if f.endswith('.py'):
return
if not f.endswith('.sage'):
print("{}: Unknown file type {}".format(sys.argv[0], f))
sys.exit(1)
fname = f + ".py"
if os.path.exists(fname):
with open(fname) as fin:
if AUTOGEN_MSG not in fin.read():
print("Refusing to overwrite existing non-autogenerated file {!r}."
.format(os.path.abspath(fname)))
print("Please delete or move this file manually.")
sys.exit(1)
# TODO:
# I am commenting this "intelligence" out, since, e.g., if I change
# the preparser between versions this can cause problems. This
# is an optimization that definitely isn't needed at present, since
# preparsing is so fast.
# Idea: I could introduce version numbers, though....
#if os.path.exists(fname) and os.path.getmtime(fname) >= os.path.getmtime(f):
# return
# Finally open the file
with open(f) as fin:
F = fin.read()
# Check to see if a coding is specified in the .sage file. If it is,
# then we want to copy it over to the new file and not include it in
# the preprocessing. If both the first and second line have an
# encoding declaration, the second line's encoding will get used.
lines = F.splitlines()
coding = ''
for num, line in enumerate(lines[:2]):
if re.search(r"coding[:=]\s*([-\w.]+)", line):
coding = line + '\n'
F = '\n'.join(lines[:num] + lines[(num+1):])
# It is ** critical ** that all the preparser-stuff we put into
# the file are put after the module docstring, since
# otherwise the docstring will not be understood by Python.
i = find_position_right_after_module_docstring(F)
header, body = F[:i] , F[i:]
# Preparse the body
body = preparse_file(body)
# Check for "from __future__ import ..." statements. Those
# statements need to come at the top of the file (after the
# module-level docstring is okay), so we separate them from the
# body.
#
# Note: this will convert invalid Python to valid, because it will
# move all "from __future__ import ..." to the top of the file,
# even if they were not at the top originally.
future_imports, body = find_future_imports(body)
# Check for load/attach commands.
body = do_load_and_attach(body, f, files_before)
# The Sage library include line along with a autogen message
sage_incl = '%s%s\nfrom sage.all_cmdline import * # import sage library\n'%(AUTOGEN_MSG, f)
# Finally, write out the result. We use atomic_write to avoid
# race conditions (for example, the file will never be half written).
with atomic_write(fname) as f:
f.write(coding)
f.write(header)
f.write('\n')
f.write(future_imports)
f.write('\n')
f.write(sage_incl)
f.write('\n')
f.write(body)
f.write('\n')
def find_position_right_after_module_docstring(G):
"""
Return first position right after the module docstring of G, if it
has one. Otherwise return 0.
INPUT:
G -- a string
OUTPUT:
an integer -- the index into G so that G[i] is right after
the module docstring of G, if G has one.
"""
# The basic idea below is that we look at each line first ignoring
# all empty lines and commented out lines. Then we check to see
# if the next line is a docstring. If so, we find where that
# docstring ends.
v = G.splitlines()
i = 0
while i < len(v):
s = v[i].strip()
if s and s[0] != '#':
break
i += 1
if i >= len(v):
# No module docstring --- entire file is commented out
return 0
# Now v[i] contains the first line of the first statement in the file.
# Is it a docstring?
n = v[i].lstrip()
if not (n[0] in ['"',"'"] or n[0:2] in ['r"',"r'"]):
# not a docstring
return 0
# First line is the module docstring. Where does it end?
def pos_after_line(k):
return sum(len(v[j])+1 for j in range(k+1))
n = n.lstrip('r') # strip leading r if there is one
if n[:3] in ["'''", '"""']:
quotes = n[:3]
# possibly multiline
if quotes in n[3:]:
return pos_after_line(i)
j = i+1
while j < len(v) and quotes not in v[j]:
j += 1
return pos_after_line(j)
else:
# it must be a single line; so add up the lengths of all lines
# including this one and return that
return pos_after_line(i)
def find_future_imports(G):
"""
Parse a file G as a string, looking for "from __future__ import ...".
Return a tuple: (the import statements, the file G with those
statements removed)
INPUT:
G -- a string; the contents of a file
This can only handle "from __future__ import ..." statements which
are completely on a single line: nothing of the form ::
from __future__ import \
print_function
or ::
from __future__ import (print_function,
division)
This function will raise an error if it detects lines of these forms.
"""
import_statements = []
# "from __future__ import ..." should not be indented.
m = re.search("^(from __future__ import .*)$", G, re.MULTILINE)
while m:
statement = m.group(0)
# If the import statement ends in a line continuation marker
# or if it contains a left parenthesis but not a right one,
# then the statement is not complete, so raise an error. (This
# is not a perfect check and some bad cases may slip through,
# like two left parentheses and only one right parenthesis,
# but they should be rare.)
if (statement[-1] == '\\' or
(statement.find('(') > -1 and statement.find(')') == -1)):
raise NotImplementedError('the Sage preparser can only preparse "from __future__ import ..." statements which are on a single line')
import_statements.append(statement)
G = G[:m.start()] + G[m.end():]
m = re.search("^(from __future__ import .*)$", G, re.MULTILINE)
return ('\n'.join(import_statements), G)
def do_load_and_attach(G, file, files_before):
"""
Parse a file G and replace load and attach statements with the
corresponding execfile() statements.
INPUT:
G -- a string; a file loaded in from disk
file -- the name of the file that contains the non-preparsed
version of G.
files_before -- list of files seen so far (don't recurse into
infinite loop)
OUTPUT:
string -- result of parsing load/attach statements in G, i.e.
modified version of G with execfiles.
"""
s = ''
for t in G.split('\n'):
z = load_or_attach.match(t)
if z:
files = z.group('files').split()
lws = z.group('lws')
for w in files:
name = w.replace(',', '').replace('"', '').replace("'", "")
if name in files_before:
print("WARNING: not loading {} (in {}) again since would cause circular loop"
.format(name, file))
continue
if name.endswith('.sage'):
do_preparse(name, files_before + [file])
s += lws + "exec(compile(open({0}.py).read(), {0}.py, \
'exec'))\n".format(name)
elif name.endswith('.py'):
s += lws + "exec(compile(open({0}).read(), {0}, \
'exec'))\n".format(name)
else:
s += t + '\n'
return s
# Here we do the actual work. We iterate over ever
# file in the input args and create the corresponding
# output file.
for f in files:
do_preparse(f)