#!/usr/bin/env python3 """ Preparse .sage files and save the result to .sage.py files. AUTHOR: -- William Stein (2005): first version -- William Stein (2008): fix trac #2391 and document the code. -- Dan Drake (2009): fix trac #5052 -- Dan Drake (2010-12-08): fix trac #10440 -- Johan S. R. Nielsen (2015-11-06): fix trac #17019 """ import os import re import sys from sage.misc.temporary_file import atomic_write from sage.repl.preparse import preparse_file # The spkg/bin/sage script passes the files to be preparsed as # arguments (but remove sys.argv[0]). files = sys.argv[1:] # There must be at least 1 file or we display an error/usage message # and exit if not files: print("""Usage: {} <file1.sage> <file2.sage>... Creates files file1.sage.py, file2.sage.py ... that are the Sage preparsed versions of file1.sage, file2.sage ... If a non-autogenerated .sage.py file with the same name exists, you will receive an error and the file will not be overwritten.""".format(sys.argv[0])) sys.exit(1) # The module-scope variable contains a list of all files we # have seen while preparsing a given file. The point of this # is that we want to avoid preparsing a file we have already # seen, since then infinite loops would result from mutual # recursive includes. files_so_far = [] # This message is inserted in autogenerated files so that the reader # will know, and so we know it is safe to overwrite them. AUTOGEN_MSG = "# This file was *autogenerated* from the file " # We use this regexp to parse lines with load or attach statements. # Here's what it looks for: # # A (possibly empty) sequence of whitespace at the beginning of the # line, saved as a group named 'lws'; # # followed by # # the word "load" or "attach"; # # followed by # # a nonempty sequence of whitespace; # # followed by # # whatever else is on the line, saved as a group named 'files'. # # We want to save the leading white space so that we can maintain # correct indentation in the preparsed file. load_or_attach = re.compile(r"^(?P<lws>\s*)(load|attach)\s+(?P<files>.*)$") def do_preparse(f, files_before=[]): """ Preparse the file f and write the result out to a filename with extension .sage.py. INPUT: - ``f`` -- string: the name of a file - ``files_before`` -- list of strings of previous filenames loaded (to avoid circular loops) OUTPUT: none (writes a file with extension .sage.py to disk) """ if f in files_so_far: return files_so_far.append(f) if not os.path.exists(f): print("{}: File '{}' is missing".format(sys.argv[0], f)) return if f.endswith('.py'): return if not f.endswith('.sage'): print("{}: Unknown file type {}".format(sys.argv[0], f)) sys.exit(1) fname = f + ".py" if os.path.exists(fname): with open(fname) as fin: if AUTOGEN_MSG not in fin.read(): print("Refusing to overwrite existing non-autogenerated file {!r}." .format(os.path.abspath(fname))) print("Please delete or move this file manually.") sys.exit(1) # TODO: # I am commenting this "intelligence" out, since, e.g., if I change # the preparser between versions this can cause problems. This # is an optimization that definitely isn't needed at present, since # preparsing is so fast. # Idea: I could introduce version numbers, though.... #if os.path.exists(fname) and os.path.getmtime(fname) >= os.path.getmtime(f): # return # Finally open the file with open(f) as fin: F = fin.read() # Check to see if a coding is specified in the .sage file. If it is, # then we want to copy it over to the new file and not include it in # the preprocessing. If both the first and second line have an # encoding declaration, the second line's encoding will get used. lines = F.splitlines() coding = '' for num, line in enumerate(lines[:2]): if re.search(r"coding[:=]\s*([-\w.]+)", line): coding = line + '\n' F = '\n'.join(lines[:num] + lines[(num+1):]) # It is ** critical ** that all the preparser-stuff we put into # the file are put after the module docstring, since # otherwise the docstring will not be understood by Python. i = find_position_right_after_module_docstring(F) header, body = F[:i] , F[i:] # Preparse the body body = preparse_file(body) # Check for "from __future__ import ..." statements. Those # statements need to come at the top of the file (after the # module-level docstring is okay), so we separate them from the # body. # # Note: this will convert invalid Python to valid, because it will # move all "from __future__ import ..." to the top of the file, # even if they were not at the top originally. future_imports, body = find_future_imports(body) # Check for load/attach commands. body = do_load_and_attach(body, f, files_before) # The Sage library include line along with a autogen message sage_incl = '%s%s\nfrom sage.all_cmdline import * # import sage library\n'%(AUTOGEN_MSG, f) # Finally, write out the result. We use atomic_write to avoid # race conditions (for example, the file will never be half written). with atomic_write(fname) as f: f.write(coding) f.write(header) f.write('\n') f.write(future_imports) f.write('\n') f.write(sage_incl) f.write('\n') f.write(body) f.write('\n') def find_position_right_after_module_docstring(G): """ Return first position right after the module docstring of G, if it has one. Otherwise return 0. INPUT: G -- a string OUTPUT: an integer -- the index into G so that G[i] is right after the module docstring of G, if G has one. """ # The basic idea below is that we look at each line first ignoring # all empty lines and commented out lines. Then we check to see # if the next line is a docstring. If so, we find where that # docstring ends. v = G.splitlines() i = 0 while i < len(v): s = v[i].strip() if s and s[0] != '#': break i += 1 if i >= len(v): # No module docstring --- entire file is commented out return 0 # Now v[i] contains the first line of the first statement in the file. # Is it a docstring? n = v[i].lstrip() if not (n[0] in ['"',"'"] or n[0:2] in ['r"',"r'"]): # not a docstring return 0 # First line is the module docstring. Where does it end? def pos_after_line(k): return sum(len(v[j])+1 for j in range(k+1)) n = n.lstrip('r') # strip leading r if there is one if n[:3] in ["'''", '"""']: quotes = n[:3] # possibly multiline if quotes in n[3:]: return pos_after_line(i) j = i+1 while j < len(v) and quotes not in v[j]: j += 1 return pos_after_line(j) else: # it must be a single line; so add up the lengths of all lines # including this one and return that return pos_after_line(i) def find_future_imports(G): """ Parse a file G as a string, looking for "from __future__ import ...". Return a tuple: (the import statements, the file G with those statements removed) INPUT: G -- a string; the contents of a file This can only handle "from __future__ import ..." statements which are completely on a single line: nothing of the form :: from __future__ import \ print_function or :: from __future__ import (print_function, division) This function will raise an error if it detects lines of these forms. """ import_statements = [] # "from __future__ import ..." should not be indented. m = re.search("^(from __future__ import .*)$", G, re.MULTILINE) while m: statement = m.group(0) # If the import statement ends in a line continuation marker # or if it contains a left parenthesis but not a right one, # then the statement is not complete, so raise an error. (This # is not a perfect check and some bad cases may slip through, # like two left parentheses and only one right parenthesis, # but they should be rare.) if (statement[-1] == '\\' or (statement.find('(') > -1 and statement.find(')') == -1)): raise NotImplementedError('the Sage preparser can only preparse "from __future__ import ..." statements which are on a single line') import_statements.append(statement) G = G[:m.start()] + G[m.end():] m = re.search("^(from __future__ import .*)$", G, re.MULTILINE) return ('\n'.join(import_statements), G) def do_load_and_attach(G, file, files_before): """ Parse a file G and replace load and attach statements with the corresponding execfile() statements. INPUT: G -- a string; a file loaded in from disk file -- the name of the file that contains the non-preparsed version of G. files_before -- list of files seen so far (don't recurse into infinite loop) OUTPUT: string -- result of parsing load/attach statements in G, i.e. modified version of G with execfiles. """ s = '' for t in G.split('\n'): z = load_or_attach.match(t) if z: files = z.group('files').split() lws = z.group('lws') for w in files: name = w.replace(',', '').replace('"', '').replace("'", "") if name in files_before: print("WARNING: not loading {} (in {}) again since would cause circular loop" .format(name, file)) continue if name.endswith('.sage'): do_preparse(name, files_before + [file]) s += lws + "exec(compile(open({0}.py).read(), {0}.py, \ 'exec'))\n".format(name) elif name.endswith('.py'): s += lws + "exec(compile(open({0}).read(), {0}, \ 'exec'))\n".format(name) else: s += t + '\n' return s # Here we do the actual work. We iterate over ever # file in the input args and create the corresponding # output file. for f in files: do_preparse(f)