#!/usr/bin/env python
# Copyright (c) 2008, William Stein (with permission)
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of the Sage Project nor the
# names of its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY WILLIAM STEIN ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL William Stein BE LIABLE FOR ANY
# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# uses system command 'file' to print out
# types of all files in (Sage) source tar
#
# prints "DONE" when finished
#
# takes about 15 CPU minutes to go through Sage
#
# version 20080118
#
# The following file types are considered understood
# so no comment is made by them
#
# TXT type
# __init__.py, nodoctest.py files
# .hg directories
# *.html files (XML type)
# *.pdf (PDF type)
# symbolic links
# *.png (PNG image data)
# *.gif (GIF image data)
# *.tiff (TIFF image data)
# *.jpg (JPEG image data)
# *.s (Assembler source)
#
# Bad files (understood so NOT considered weird)
#
# MS-DOS executables
# ._* (AppleDouble encoded Macintosh files)
# *.class (JAVA)
# *.so (MACH-O)
# *.o (MACH-O)
# *.dylib (MACH-O)
# ELF
#
#runlevel = 0 # print everything
#runlevel = 1 # only print weird file types
runlevel = 2 # only print "bad" stuff
import sys,os
import sage.misc.misc
if len(sys.argv) != 2:
print "tar file required"
sys.exit()
print os.path.abspath('.')
name = sys.argv[1]
basename = os.path.split(name)[1]
tempdir = sage.misc.misc.tmp_dir()
cmd = "cp %s %s"%(name,tempdir)
os.system(cmd)
os.chdir(tempdir)
cmd = "tar xvf " + basename
print cmd
os.system(cmd)
def cleanup():
global tempdir
os.system('rm -rf "%s"'%tempdir)
name = basename
stack = [ name ]
print os.getcwd() + "/" + name
print "runlevel= ", runlevel
if (os.path.isfile(name) == False) and (os.path.isdir(name) == False):
print "does not exist!!!"
cleanup()
sys.exit()
while len(stack) > 0:
name = stack.pop()
if name == -1: # go up one level
os.chdir("..")
elif name == -2: # tar what you have looked at
basename = stack.pop()
tarname = stack.pop()
cmd = "tar cf " + tarname + " " + basename
os.system(cmd)
# remove base
cmd = "rm -rf '" + basename + "'"
if runlevel == 0:
print basename + " -> " + tarname
os.system(cmd)
elif name == -3: # gzip file
name = stack.pop()
if runlevel == 0:
print name + " -> " + name + ".gz"
cmd = "gzip '" + name + "'"
os.system(cmd)
elif name == -4: # bzip file
name = stack.pop()
cmd = "bzip2 '" + name + "'"
if runlevel == 0:
print name + " -> " + name + ".bz2"
os.system(cmd)
elif name == -5: # rename
newname = stack.pop()
name = stack.pop()
os.rename(newname,name)
if runlevel == 0:
print newname + " -> " + name
else:
fullname = os.getcwd() + "/" + name
# get file type
cmd = "file '" + name + "' > /tmp/temp"
os.system(cmd)
f = open('/tmp/temp','r')
file_output = f.read()
# if file is a directory
if file_output.find(': directory') > 0:
if name == ".hg":
if runlevel == 0:
print fullname + ": IGNORING"
# spkg/standard/fortran-20071120.p3/src/
elif name == "g95":
if runlevel == 0:
print fullname + ": IGNORING"
else:
if runlevel == 0:
print fullname + ": DIR"
stack.append(-1)
sys.stdout.flush()
stack = stack + os.listdir(name)
os.chdir(name)
elif name.endswith('.spkg') > 0:
# if file name ends in .spkg
basename = name[0:name.find('.spkg')]
if file_output.find('bzip2') > 0:
newname = basename + '.tar.bz2'
elif file_output.find('tar') > 0:
newname = basename + '.tar'
os.rename(name,newname)
if runlevel == 0:
print fullname + " -> " + newname
stack.append(name)
stack.append(newname)
stack.append(-5)
stack.append(newname)
elif file_output.find(': bzip2') > 0:
# if file is a bzip2 file
cmd = 'bunzip2 ' + name
os.system(cmd)
basename = name[0:name.find('.bz2')]
if runlevel == 0:
print fullname + " -> " + basename
stack.append(basename)
stack.append(-4)
stack.append(basename)
elif file_output.find(': gzip ') > 0:
if name.endswith(".gz"):
# special case - BAD!!!
# 'spkg/standard/tachyon-0.98beta.p3/src/docs/tachyon.html.tar.gz'
# overwrites dir 'tachyon'
if name == "tachyon.html.tar.gz":
if runlevel <= 1:
print fullname + ": IGNORING"
else:
cmd = 'gunzip ' + name
os.system(cmd)
basename = name[0:name.find('.gz')]
stack.append(basename)
stack.append(-3)
stack.append(basename)
if runlevel == 0:
print fullname + " -> " + basename
elif name.endswith(".tgz"):
basename = name[0:name.find('.tgz')]
newname = basename + ".tar.gz"
os.rename(name, newname)
stack.append(name)
stack.append(newname)
stack.append(-5)
stack.append(newname)
if runlevel == 0:
print fullname + " -> " + newname
elif name.endswith(".dia"): # See http://live.gnome.org/Dia
basename = name[0:name.find('.dia')]
newname = basename + ".gz"
os.rename(name, newname)
stack.append(name)
stack.append(newname)
stack.append(-5)
stack.append(newname)
if runlevel == 0:
print fullname + " -> " + newname
elif name.endswith(".rda"):
if runlevel <= 1:
print fullname + ": R data format? (find error)"
else:
print fullname + ": GZIP with unusual suffix"
print "abnormal exit"
cleanup()
sys.exit()
elif file_output.find(': POSIX tar archive') > 0:
# special case BAD!!!
# 'spkg/standard/python-2.5.1.p10/src/Lib/test/testtar.tar'
# untars with error BAD!!!
if name == "testtar.tar":
if runlevel <= 1:
print fullname + ": IGNORING"
# special case BAD!!!
# 'spkg/standard/jmol-11.5.2/jmol/jars/vecmath1.2-1.14.tar'
# 'tar tvf' first line does NOT tell directory name
elif name == "vecmath1.2-1.14.tar":
if runlevel <= 1:
print fullname + ": IGNORING"
else:
# if file is a tar file
# get name that tar unpacks to
cmd = "tar tvf " + name + " > /tmp/temp"
os.system(cmd)
f = open('/tmp/temp','r')
temp0 = f.read()
temp1 = temp0.splitlines()
temp2 = temp1[0]
temp3 = temp2.split()
temp4 = temp3[len(temp3) - 1]
newname = temp4[0:len(temp4)-1]
# now untar
cmd = 'tar xf ' + name
os.system(cmd)
# remove tar file
os.unlink(os.getcwd() + '/' + name)
if runlevel == 0:
print fullname + " -> " + newname
# save tarfilename and newname on stack
stack.append(name)
stack.append(newname)
stack.append(-2)
stack.append(newname)
elif file_output.find('text') != -1:
if runlevel == 0:
print fullname + ": TXT"
elif file_output.find('TeX DVI') > 0:
if runlevel < 2:
print fullname + ": TeX DVI"
elif file_output.find(': XML') > 0:
if name.endswith(".html") > 0:
if runlevel == 0:
print fullname + ": XML"
else:
if runlevel < 2:
print fullname + ": XML"
elif file_output.find(': Zip archive data') > 0:
if runlevel < 2:
print fullname + ": ZIP"
elif file_output.find('Java') > 0:
if name.endswith(".class"):
if runlevel == 0:
print fullname + ": JAVA"
if runlevel == 2:
print fullname
else:
if runlevel < 2:
print fullname + ": JAVA"
elif file_output.find(': data') > 0:
if runlevel < 2:
print fullname + ": DATA"
elif file_output.find(': very short file (no magic)') > 0:
if name == "__init__.py":
if runlevel == 0:
print fullname + ": very short file (no magic)"
else:
if runlevel < 2:
print fullname + ": very short file (no magic)"
elif file_output.find(': DCL command file') > 0:
if runlevel < 2:
print fullname + ": DCL command file"
elif file_output.find(': CLIPPER instruction trace') > 0:
if runlevel < 2:
print fullname + ": CLIPPER instruction trace"
elif file_output.find(': Palm OS dynamic library data') > 0:
if runlevel < 2:
print fullname + ": Palm OS dynamic library data"
elif file_output.find(': 80386 COFF') > 0:
if runlevel < 2:
print fullname + ": 80386 COFF"
elif file_output.find(': ACB archive data') > 0:
if runlevel < 2:
print fullname + ": ACB archive data"
elif file_output.find(': MS Windows HtmlHelp Data') > 0:
if runlevel < 2:
print fullname + ": MS Windows HtmlHelp Data"
elif file_output.find(': AppleDouble encoded Macintosh file') > 0:
if name.startswith("._"):
if runlevel == 0:
print fullname + ": AppleDouble encoded Macintosh file"
if runlevel == 2:
print fullname
else:
if runlevel < 2:
print fullname + ": AppleDouble encoded Macintosh file"
elif file_output.find(': Macromedia Flash data') > 0:
if runlevel < 2:
print fullname + ": Macromedia Flash data"
elif file_output.find(': Microsoft Installer') > 0:
if runlevel < 2:
print fullname + ": Microsoft Installer"
elif file_output.find(': PNG image data') > 0:
if name.endswith(".png") > 0:
if runlevel == 0:
print fullname + ": PNG image data"
else:
if runlevel < 2:
print fullname + ": PNG image data"
elif file_output.find(': empty') > 0:
if name == "__init__.py":
if runlevel == 0:
print fullname + ": empty"
elif name == "nodoctest.py":
if runlevel == 0:
print fullname + ": empty"
else:
if runlevel < 2:
print fullname + ": empty"
elif file_output.find('8086 relocatable') > 0:
if runlevel < 2:
print fullname + ": 8086 relocatable"
elif file_output.find(': PC bitmap data') > 0:
if runlevel < 2:
print fullname + ": PC bitmap data"
elif file_output.find(': GIF image data') > 0:
if name.endswith(".gif") > 0:
if runlevel == 0:
print fullname + ": GIF image data"
else:
if runlevel < 2:
print fullname + ": GIF image data"
elif file_output.find(': Apple binary property list') > 0:
if runlevel < 2:
print fullname + ": Apple binary property list"
elif file_output.find(': LaTeX table of contents') > 0:
if runlevel < 2:
print fullname + ": LaTeX table of contents"
elif file_output.find(': Makeindex log file') > 0:
if runlevel < 2:
print fullname + ": Makeindex log file"
elif file_output.find(': LaTeX raw index file') > 0:
if runlevel < 2:
print fullname + ": LaTeX raw index file"
elif file_output.find('LaTeX auxiliary file') > 0:
if runlevel < 2:
print fullname + ": LaTeX auxiliary file"
elif file_output.find('TIFF image data') > 0:
if name.endswith(".tiff"):
if runlevel == 0:
print fullname + ": TIFF image data"
else:
if runlevel < 2:
print fullname + ": TIFF image data"
elif file_output.find('DOS EPS Binary File') > 0:
if runlevel < 2:
print fullname + ": DOS EPS Binary File"
elif file_output.find('MPEG sequence') > 0:
if runlevel < 2:
print fullname + ": MPEG sequence"
elif file_output.find('JPEG image data') > 0:
if name.endswith(".jpg"):
if runlevel == 0:
print fullname + ": JPEG image data"
else:
if runlevel < 2:
print fullname + ": JPEG image data"
elif file_output.find(': Apple Old Partition') > 0:
if runlevel < 2:
print fullname + ": Apple Old Partition"
elif file_output.find(': current ar archive') > 0:
if runlevel < 2:
print fullname + ": current ar archive"
elif file_output.find(': python 2.3 byte-compiled') > 0:
if runlevel < 2:
print fullname + ": python 2.3 byte-compiled"
elif file_output.find(': python 2.4 byte-compiled') > 0:
if runlevel < 2:
print fullname + ": python 2.4 byte-compiled"
elif file_output.find(': python 2.5 byte-compiled') > 0:
if runlevel < 2:
print fullname + ": python 2.5 byte-compiled"
elif file_output.find('Assembler source') > 0:
if name.endswith(".s"):
if runlevel == 0:
print fullname + ": Assembler source"
else:
if runlevel < 2:
print fullname + ": Assembler source"
elif file_output.find(': PDF') > 0:
if name.endswith(".pdf") > 0:
if runlevel == 0:
print fullname + ": PDF"
else:
if runlevel < 2:
print fullname + ": PDF"
elif file_output.find(': MS-DOS executable') > 0:
if runlevel == 0:
print fullname + ": MS-DOS executable"
if runlevel == 2:
print fullname
elif file_output.find(': DOS executable') > 0:
if runlevel < 2:
print fullname + ": DOS executable"
elif file_output.find(': Matlab v5 mat-file') > 0:
if runlevel < 2:
print fullname + ": Matlab v5 mat-file"
elif file_output.find(': Extreme Tracker AMS Module v1.3') > 0:
if runlevel < 2:
print fullname + ": Extreme Tracker AMS Module v1.3"
elif file_output.find(': JVT NAL sequence') > 0:
if runlevel < 2:
print fullname + ": JVT NAL sequence"
elif file_output.find(': NeXT/Apple typedstream data') > 0:
if runlevel < 2:
print fullname + ": NeXT/Apple typedstream data"
elif file_output.find(': Rich Text Format data') > 0:
if runlevel < 2:
print fullname + ": Rich Text Format data"
elif file_output.find(': AppleSingle encoded Macintosh file') > 0:
if runlevel < 2:
print fullname + ": AppleSingle encoded Macintosh file"
elif file_output.find(': Adobe Photoshop Image') > 0:
if runlevel < 2:
print fullname + ": Adobe Photoshop Image"
elif file_output.find(': Macintosh Application (data)') > 0:
if runlevel < 2:
print fullname + ": Macintosh Application (data)"
elif file_output.find(': X11 SNF font data') > 0:
if runlevel < 2:
print fullname + ": X11 SNF font data"
elif file_output.find(': Sun/NeXT audio data') > 0:
if runlevel < 2:
print fullname + ": Sun/NeXT audio data"
elif file_output.find(': Berkeley DB') > 0:
if runlevel < 2:
print fullname + ": Berkeley DB"
elif file_output.find(': multipart/mixed') > 0:
if runlevel < 2:
print fullname + ": multipart/mixed"
elif file_output.find(': message/rfc822') > 0:
if runlevel < 2:
print fullname + ": message/rfc822"
elif file_output.find(': LaTeX sorted index') > 0:
if runlevel < 2:
print fullname + ": LaTeX sorted index"
elif file_output.find(': Xara graphics file') > 0:
if runlevel < 2:
print fullname + ": Xara graphics file"
elif file_output.find(': PalmOS application') > 0:
if runlevel < 2:
print fullname + ": PalmOS application"
elif file_output.find(': Par archive data') > 0:
if runlevel < 2:
print fullname + ": Par archive data"
elif file_output.find(': PGP key public ring') > 0:
if runlevel < 2:
print fullname + ": PGP key public ring"
elif file_output.find(': GPG key public ring') > 0:
if runlevel < 2:
print fullname + ": GPG key public ring"
elif file_output.find(': PGP key security ring') > 0:
if runlevel < 2:
print fullname + ": PGP key security ring"
elif file_output.find(': FITS image data') > 0:
if runlevel < 2:
print fullname + ": FITS image data"
elif file_output.find(': Bio-Rad .PIC Image File') > 0:
if runlevel < 2:
print fullname + ": Bio-Rad .PIC Image File"
elif file_output.find(': Arhangel archive data') > 0:
if runlevel < 2:
print fullname + ": Arhangel archive data"
elif file_output.find(': RISC OS Draw file data') > 0:
if runlevel < 2:
print fullname + ": RISC OS Draw file data"
elif file_output.find(': CLIPPER instruction profile') > 0:
if runlevel < 2:
print fullname + ": CLIPPER instruction profile"
elif file_output.find(': character Computer Graphics Metafile') > 0:
if runlevel < 2:
print fullname + ": character Computer Graphics Metafile"
elif file_output.find(': Windows INF file') > 0:
if runlevel < 2:
print fullname + ": Windows INF file"
elif file_output.find(': Emacs v18 byte-compiled Lisp data') > 0:
if runlevel < 2:
print fullname + ": Emacs v18 byte-compiled Lisp data"
elif file_output.find(': Netpbm PPM') > 0:
if runlevel < 2:
print fullname + ": Netpbm PPM"
elif file_output.find(': TrueType font data') > 0:
if runlevel < 2:
print fullname + ": TrueType font data"
elif file_output.find(': ASCII font metrics') > 0:
if runlevel < 2:
print fullname + ": ASCII font metrics"
elif file_output.find(': Maple worksheet') > 0:
if runlevel < 2:
print fullname + ": Maple worksheet"
elif file_output.find(': GNU message catalog') > 0:
if runlevel < 2:
print fullname + ": GNU message catalog"
elif file_output.find(': ELF 32-bit LSB executable, Intel 80386') > 0:
if runlevel == 0:
print fullname + ": ELF 32-bit LSB executable"
if runlevel == 2:
print fullname
elif file_output.find(': symbolic link') > 0:
if runlevel == 0:
print fullname + ": symbolic link"
elif file_output.find('SysEx File') > 0:
if runlevel < 2:
print fullname + ": SysEx File"
elif file_output.find(': libtool library file') > 0:
if runlevel < 2:
print fullname + ": libtool library file"
elif file_output.find(': SQLite 3.x database') > 0:
if runlevel < 2:
print fullname + ": SQLite 3.x database"
elif file_output.find(': YAC archive data') > 0:
if runlevel < 2:
print fullname + ": YAC archive data"
elif file_output.find(': PGP armored data') > 0:
if runlevel < 2:
print fullname + ": PGP armored data"
elif file_output.find(': Apple QuickTime movie') > 0:
if runlevel < 2:
print fullname + ": Apple QuickTime movie"
elif file_output.find(': Quake I or II world') > 0:
if runlevel < 2:
print fullname + ": Quake I or II world"
elif file_output.find(': Mach-O') > 0:
if name.endswith(".so") | name.endswith(".o") | name.endswith(".dylib"):
if runlevel == 0:
print fullname + ": Mach-O"
if runlevel == 2:
print fullname
else:
if runlevel < 2:
print fullname + ": Mach-O"
elif file_output.find(': ELF 64-bit') > 0:
if runlevel == 0:
print fullname + ": ELF 64-bit"
if runlevel == 2:
print fullname
elif file_output.find(': ELF 32-bit') > 0:
if runlevel == 0:
print fullname + ": ELF 32-bit"
if runlevel == 2:
print fullname
elif file_output.find(': SPEC') > 0:
if runlevel < 2:
print fullname + ": SPEC"
elif file_output.find(': Octave binary data') > 0:
if runlevel < 2:
print fullname + ": Octave binary data"
elif file_output.find(': DBase 3 data file') > 0:
if runlevel < 2:
print fullname + ": DBase 3 data file"
elif file_output.find(': OpenOffice.org') > 0:
if runlevel < 2:
print fullname + ": OpenOffice.org"
elif file_output.find(': fifo (named pipe)') > 0:
if runlevel < 2:
print fullname + ": fifo (named pipe)"
elif file_output.find(': broken symbolic link') > 0:
if runlevel < 2:
print fullname + ": broken symbolic link"
elif file_output.find(': timezone data') > 0:
if runlevel < 2:
print fullname + ": timezone data"
elif file_output.find(': Macintosh Library (data)') > 0:
if runlevel < 2:
print fullname + ": Macintosh Library (data)"
elif file_output.find(': SPSS System File') > 0:
if runlevel < 2:
print fullname + ": SPSS System File"
elif file_output.find(': mc68020 pure executable not stripped') > 0:
if runlevel < 2:
print fullname + ": mc68020 pure executable not stripped"
elif file_output.find(': MPEG ADTS') > 0:
if runlevel < 2:
print fullname + ": MPEG ADTS"
elif file_output.find(': XPack DiskImage archive data') > 0:
if runlevel < 2:
print fullname + ": XPack DiskImage archive data"
elif file_output.find(': VMS Alpha executable') > 0:
if runlevel < 2:
print fullname + ": VMS Alpha executable"
else:
print fullname + ": UNKNOWN"
print file_output
cleanup()
print "DONE!!!"