Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sage
Path: blob/develop/build/sage_bootstrap/uncompress/tar_file.py
7434 views
1
"""
2
Tar file support
3
"""
4
5
#*****************************************************************************
6
# Copyright (C) 2016 Volker Braun <[email protected]>
7
#
8
# This program is free software: you can redistribute it and/or modify
9
# it under the terms of the GNU General Public License as published by
10
# the Free Software Foundation, either version 2 of the License, or
11
# (at your option) any later version.
12
# http://www.gnu.org/licenses/
13
#*****************************************************************************
14
15
from __future__ import print_function
16
17
import os
18
import copy
19
import tarfile
20
import stat
21
import subprocess
22
import time
23
import inspect
24
25
from io import BytesIO
26
27
from sage_bootstrap.uncompress.filter_os_files import filter_os_files
28
29
30
class SageBaseTarFile(tarfile.TarFile):
31
"""
32
Same as tarfile.TarFile, but applies a reasonable umask (0022) to the
33
permissions of all extracted files and directories, and fixes
34
the encoding of file names in the tarball to be 'utf-8' instead of
35
depending on locale settings.
36
37
Previously this applied the user's current umask per the default behavior
38
of the ``tar`` utility, but this did not provide sufficiently reliable
39
behavior in all cases, such as when the user's umask is not strict enough.
40
41
This also sets the modified timestamps on all extracted files to the same
42
time (the current time), not the timestamps stored in the tarball. This
43
is meant to work around https://bugs.python.org/issue32773
44
45
See https://github.com/sagemath/sage/issues/20218#comment:16 and
46
https://github.com/sagemath/sage/issues/24567 for more background.
47
"""
48
49
umask = 0o022
50
51
def __init__(self, *args, **kwargs):
52
53
kwargs['encoding'] = 'utf-8'
54
55
# Unfortunately the only way to get the current umask is to set it
56
# and then restore it
57
super(SageBaseTarFile, self).__init__(*args, **kwargs)
58
59
# Extracted files will have this timestamp
60
self._extracted_mtime = time.time()
61
62
@property
63
def names(self):
64
"""
65
List of filenames in the archive.
66
67
Filters out names of OS-related files that shouldn't be in the
68
archive (.DS_Store, etc.)
69
"""
70
71
return filter_os_files(self.getnames())
72
73
def chmod(self, tarinfo, targetpath):
74
"""Apply ``self.umask`` instead of the permissions in the TarInfo."""
75
tarinfo = copy.copy(tarinfo)
76
tarinfo.mode &= ~self.umask
77
tarinfo.mode |= stat.S_IWUSR
78
tarinfo.mode &= ~(stat.S_ISUID | stat.S_ISGID)
79
return super(SageBaseTarFile, self).chmod(tarinfo, targetpath)
80
81
def utime(self, tarinfo, targetpath):
82
"""Override to keep the extraction time as the file's timestamp."""
83
tarinfo.mtime = self._extracted_mtime
84
return super(SageBaseTarFile, self).utime(tarinfo, targetpath)
85
86
def extractall(self, path='.', members=None, **kwargs):
87
"""
88
Same as tarfile.TarFile.extractall but allows filenames for
89
the members argument (like zipfile.ZipFile).
90
91
.. note::
92
The additional ``**kwargs`` are for Python 2/3 compatibility, since
93
different versions of this method accept additional arguments.
94
"""
95
if members:
96
name_to_member = dict([member.name, member] for member in self.getmembers())
97
members = [m if isinstance(m, tarfile.TarInfo)
98
else name_to_member[m]
99
for m in members]
100
tfile = super(SageBaseTarFile, self)
101
if 'filter' in inspect.signature(tfile.extractall).parameters:
102
kwargs['filter'] = 'fully_trusted'
103
return tfile.extractall(path=path, members=members, **kwargs)
104
105
def extractbytes(self, member):
106
"""
107
Return the contents of the specified archive member as bytes.
108
109
If the member does not exist, returns None.
110
"""
111
112
if member in self.getnames():
113
reader = self.extractfile(member)
114
return reader.read()
115
116
def _extract_member(self, tarinfo, targetpath, **kwargs):
117
"""
118
Override to ensure that our custom umask is applied over the entire
119
directory tree, even for directories that are not explicitly listed in
120
the tarball.
121
122
.. note::
123
The additional ``**kwargs`` are for Python 2/3 compatibility, since
124
different versions of this method accept additional arguments.
125
"""
126
old_umask = os.umask(self.umask)
127
try:
128
super(SageBaseTarFile, self)._extract_member(tarinfo, targetpath,
129
**kwargs)
130
finally:
131
os.umask(old_umask)
132
133
134
class SageTarFile(SageBaseTarFile):
135
"""
136
A wrapper around SageBaseTarFile such that SageTarFile(filename) is
137
essentially equivalent to TarFile.open(filename) which is more
138
flexible than the basic TarFile.__init__
139
"""
140
def __new__(cls, filename):
141
return SageBaseTarFile.open(filename)
142
143
@staticmethod
144
def can_read(filename):
145
"""
146
Given an archive filename, returns True if this class can read and
147
process the archive format of that file.
148
"""
149
return tarfile.is_tarfile(filename)
150
151
152
class SageTarXZFile(SageBaseTarFile):
153
"""
154
A ``.tar.xz`` file which is uncompressed in memory.
155
"""
156
def __new__(cls, filename):
157
# Read uncompressed data through a pipe
158
proc = subprocess.Popen(["xz", "-d", "-c", filename], stdout=subprocess.PIPE)
159
data, _ = proc.communicate()
160
return SageBaseTarFile(mode="r", fileobj=BytesIO(data))
161
162
@staticmethod
163
def can_read(filename):
164
"""
165
Given an archive filename, returns True if this class can read and
166
process the archive format of that file.
167
"""
168
devnull = open(os.devnull, 'w')
169
try:
170
subprocess.check_call(["xz", "-l", filename], stdout=devnull, stderr=devnull)
171
except Exception:
172
return False
173
return True
174
175