Path: blob/develop/build/sage_bootstrap/uncompress/tar_file.py
7434 views
"""1Tar file support2"""34#*****************************************************************************5# Copyright (C) 2016 Volker Braun <[email protected]>6#7# This program is free software: you can redistribute it and/or modify8# it under the terms of the GNU General Public License as published by9# the Free Software Foundation, either version 2 of the License, or10# (at your option) any later version.11# http://www.gnu.org/licenses/12#*****************************************************************************1314from __future__ import print_function1516import os17import copy18import tarfile19import stat20import subprocess21import time22import inspect2324from io import BytesIO2526from sage_bootstrap.uncompress.filter_os_files import filter_os_files272829class SageBaseTarFile(tarfile.TarFile):30"""31Same as tarfile.TarFile, but applies a reasonable umask (0022) to the32permissions of all extracted files and directories, and fixes33the encoding of file names in the tarball to be 'utf-8' instead of34depending on locale settings.3536Previously this applied the user's current umask per the default behavior37of the ``tar`` utility, but this did not provide sufficiently reliable38behavior in all cases, such as when the user's umask is not strict enough.3940This also sets the modified timestamps on all extracted files to the same41time (the current time), not the timestamps stored in the tarball. This42is meant to work around https://bugs.python.org/issue327734344See https://github.com/sagemath/sage/issues/20218#comment:16 and45https://github.com/sagemath/sage/issues/24567 for more background.46"""4748umask = 0o0224950def __init__(self, *args, **kwargs):5152kwargs['encoding'] = 'utf-8'5354# Unfortunately the only way to get the current umask is to set it55# and then restore it56super(SageBaseTarFile, self).__init__(*args, **kwargs)5758# Extracted files will have this timestamp59self._extracted_mtime = time.time()6061@property62def names(self):63"""64List of filenames in the archive.6566Filters out names of OS-related files that shouldn't be in the67archive (.DS_Store, etc.)68"""6970return filter_os_files(self.getnames())7172def chmod(self, tarinfo, targetpath):73"""Apply ``self.umask`` instead of the permissions in the TarInfo."""74tarinfo = copy.copy(tarinfo)75tarinfo.mode &= ~self.umask76tarinfo.mode |= stat.S_IWUSR77tarinfo.mode &= ~(stat.S_ISUID | stat.S_ISGID)78return super(SageBaseTarFile, self).chmod(tarinfo, targetpath)7980def utime(self, tarinfo, targetpath):81"""Override to keep the extraction time as the file's timestamp."""82tarinfo.mtime = self._extracted_mtime83return super(SageBaseTarFile, self).utime(tarinfo, targetpath)8485def extractall(self, path='.', members=None, **kwargs):86"""87Same as tarfile.TarFile.extractall but allows filenames for88the members argument (like zipfile.ZipFile).8990.. note::91The additional ``**kwargs`` are for Python 2/3 compatibility, since92different versions of this method accept additional arguments.93"""94if members:95name_to_member = dict([member.name, member] for member in self.getmembers())96members = [m if isinstance(m, tarfile.TarInfo)97else name_to_member[m]98for m in members]99tfile = super(SageBaseTarFile, self)100if 'filter' in inspect.signature(tfile.extractall).parameters:101kwargs['filter'] = 'fully_trusted'102return tfile.extractall(path=path, members=members, **kwargs)103104def extractbytes(self, member):105"""106Return the contents of the specified archive member as bytes.107108If the member does not exist, returns None.109"""110111if member in self.getnames():112reader = self.extractfile(member)113return reader.read()114115def _extract_member(self, tarinfo, targetpath, **kwargs):116"""117Override to ensure that our custom umask is applied over the entire118directory tree, even for directories that are not explicitly listed in119the tarball.120121.. note::122The additional ``**kwargs`` are for Python 2/3 compatibility, since123different versions of this method accept additional arguments.124"""125old_umask = os.umask(self.umask)126try:127super(SageBaseTarFile, self)._extract_member(tarinfo, targetpath,128**kwargs)129finally:130os.umask(old_umask)131132133class SageTarFile(SageBaseTarFile):134"""135A wrapper around SageBaseTarFile such that SageTarFile(filename) is136essentially equivalent to TarFile.open(filename) which is more137flexible than the basic TarFile.__init__138"""139def __new__(cls, filename):140return SageBaseTarFile.open(filename)141142@staticmethod143def can_read(filename):144"""145Given an archive filename, returns True if this class can read and146process the archive format of that file.147"""148return tarfile.is_tarfile(filename)149150151class SageTarXZFile(SageBaseTarFile):152"""153A ``.tar.xz`` file which is uncompressed in memory.154"""155def __new__(cls, filename):156# Read uncompressed data through a pipe157proc = subprocess.Popen(["xz", "-d", "-c", filename], stdout=subprocess.PIPE)158data, _ = proc.communicate()159return SageBaseTarFile(mode="r", fileobj=BytesIO(data))160161@staticmethod162def can_read(filename):163"""164Given an archive filename, returns True if this class can read and165process the archive format of that file.166"""167devnull = open(os.devnull, 'w')168try:169subprocess.check_call(["xz", "-l", filename], stdout=devnull, stderr=devnull)170except Exception:171return False172return True173174175