Path: blob/develop/build/sage_bootstrap/uncompress/tar_file.py
4055 views
"""1Tar file support2"""34#*****************************************************************************5# Copyright (C) 2016 Volker Braun <[email protected]>6#7# This program is free software: you can redistribute it and/or modify8# it under the terms of the GNU General Public License as published by9# the Free Software Foundation, either version 2 of the License, or10# (at your option) any later version.11# http://www.gnu.org/licenses/12#*****************************************************************************1314from __future__ import print_function1516import os17import copy18import tarfile19import stat20import subprocess21import time2223from io import BytesIO2425from sage_bootstrap.uncompress.filter_os_files import filter_os_files262728class SageBaseTarFile(tarfile.TarFile):29"""30Same as tarfile.TarFile, but applies a reasonable umask (0022) to the31permissions of all extracted files and directories, and fixes32the encoding of file names in the tarball to be 'utf-8' instead of33depending on locale settings.3435Previously this applied the user's current umask per the default behavior36of the ``tar`` utility, but this did not provide sufficiently reliable37behavior in all cases, such as when the user's umask is not strict enough.3839This also sets the modified timestamps on all extracted files to the same40time (the current time), not the timestamps stored in the tarball. This41is meant to work around https://bugs.python.org/issue327734243See https://github.com/sagemath/sage/issues/20218#comment:16 and44https://github.com/sagemath/sage/issues/24567 for more background.45"""4647umask = 0o0224849def __init__(self, *args, **kwargs):5051kwargs['encoding'] = 'utf-8'5253# Unfortunately the only way to get the current umask is to set it54# and then restore it55super(SageBaseTarFile, self).__init__(*args, **kwargs)5657# Extracted files will have this timestamp58self._extracted_mtime = time.time()5960@property61def names(self):62"""63List of filenames in the archive.6465Filters out names of OS-related files that shouldn't be in the66archive (.DS_Store, etc.)67"""6869return filter_os_files(self.getnames())7071def chmod(self, tarinfo, targetpath):72"""Apply ``self.umask`` instead of the permissions in the TarInfo."""73tarinfo = copy.copy(tarinfo)74tarinfo.mode &= ~self.umask75tarinfo.mode |= stat.S_IWUSR76tarinfo.mode &= ~(stat.S_ISUID | stat.S_ISGID)77return super(SageBaseTarFile, self).chmod(tarinfo, targetpath)7879def utime(self, tarinfo, targetpath):80"""Override to keep the extraction time as the file's timestamp."""81tarinfo.mtime = self._extracted_mtime82return super(SageBaseTarFile, self).utime(tarinfo, targetpath)8384def extractall(self, path='.', members=None, **kwargs):85"""86Same as tarfile.TarFile.extractall but allows filenames for87the members argument (like zipfile.ZipFile).8889.. note::90The additional ``**kwargs`` are for Python 2/3 compatibility, since91different versions of this method accept additional arguments.92"""93if members:94name_to_member = dict([member.name, member] for member in self.getmembers())95members = [m if isinstance(m, tarfile.TarInfo)96else name_to_member[m]97for m in members]98return super(SageBaseTarFile, self).extractall(path=path,99members=members,100**kwargs)101102def extractbytes(self, member):103"""104Return the contents of the specified archive member as bytes.105106If the member does not exist, returns None.107"""108109if member in self.getnames():110reader = self.extractfile(member)111return reader.read()112113def _extract_member(self, tarinfo, targetpath, **kwargs):114"""115Override to ensure that our custom umask is applied over the entire116directory tree, even for directories that are not explicitly listed in117the tarball.118119.. note::120The additional ``**kwargs`` are for Python 2/3 compatibility, since121different versions of this method accept additional arguments.122"""123old_umask = os.umask(self.umask)124try:125super(SageBaseTarFile, self)._extract_member(tarinfo, targetpath,126**kwargs)127finally:128os.umask(old_umask)129130131class SageTarFile(SageBaseTarFile):132"""133A wrapper around SageBaseTarFile such that SageTarFile(filename) is134essentially equivalent to TarFile.open(filename) which is more135flexible than the basic TarFile.__init__136"""137def __new__(cls, filename):138return SageBaseTarFile.open(filename)139140@staticmethod141def can_read(filename):142"""143Given an archive filename, returns True if this class can read and144process the archive format of that file.145"""146return tarfile.is_tarfile(filename)147148149class SageTarXZFile(SageBaseTarFile):150"""151A ``.tar.xz`` file which is uncompressed in memory.152"""153def __new__(cls, filename):154# Read uncompressed data through a pipe155proc = subprocess.Popen(["xz", "-d", "-c", filename], stdout=subprocess.PIPE)156data, _ = proc.communicate()157return SageBaseTarFile(mode="r", fileobj=BytesIO(data))158159@staticmethod160def can_read(filename):161"""162Given an archive filename, returns True if this class can read and163process the archive format of that file.164"""165devnull = open(os.devnull, 'w')166try:167subprocess.check_call(["xz", "-l", filename], stdout=devnull, stderr=devnull)168except Exception:169return False170return True171172173