Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
sagemath
GitHub Repository: sagemath/sage
Path: blob/develop/build/sage_bootstrap/uncompress/tar_file.py
4055 views
1
"""
2
Tar file support
3
"""
4
5
#*****************************************************************************
6
# Copyright (C) 2016 Volker Braun <[email protected]>
7
#
8
# This program is free software: you can redistribute it and/or modify
9
# it under the terms of the GNU General Public License as published by
10
# the Free Software Foundation, either version 2 of the License, or
11
# (at your option) any later version.
12
# http://www.gnu.org/licenses/
13
#*****************************************************************************
14
15
from __future__ import print_function
16
17
import os
18
import copy
19
import tarfile
20
import stat
21
import subprocess
22
import time
23
24
from io import BytesIO
25
26
from sage_bootstrap.uncompress.filter_os_files import filter_os_files
27
28
29
class SageBaseTarFile(tarfile.TarFile):
30
"""
31
Same as tarfile.TarFile, but applies a reasonable umask (0022) to the
32
permissions of all extracted files and directories, and fixes
33
the encoding of file names in the tarball to be 'utf-8' instead of
34
depending on locale settings.
35
36
Previously this applied the user's current umask per the default behavior
37
of the ``tar`` utility, but this did not provide sufficiently reliable
38
behavior in all cases, such as when the user's umask is not strict enough.
39
40
This also sets the modified timestamps on all extracted files to the same
41
time (the current time), not the timestamps stored in the tarball. This
42
is meant to work around https://bugs.python.org/issue32773
43
44
See https://github.com/sagemath/sage/issues/20218#comment:16 and
45
https://github.com/sagemath/sage/issues/24567 for more background.
46
"""
47
48
umask = 0o022
49
50
def __init__(self, *args, **kwargs):
51
52
kwargs['encoding'] = 'utf-8'
53
54
# Unfortunately the only way to get the current umask is to set it
55
# and then restore it
56
super(SageBaseTarFile, self).__init__(*args, **kwargs)
57
58
# Extracted files will have this timestamp
59
self._extracted_mtime = time.time()
60
61
@property
62
def names(self):
63
"""
64
List of filenames in the archive.
65
66
Filters out names of OS-related files that shouldn't be in the
67
archive (.DS_Store, etc.)
68
"""
69
70
return filter_os_files(self.getnames())
71
72
def chmod(self, tarinfo, targetpath):
73
"""Apply ``self.umask`` instead of the permissions in the TarInfo."""
74
tarinfo = copy.copy(tarinfo)
75
tarinfo.mode &= ~self.umask
76
tarinfo.mode |= stat.S_IWUSR
77
tarinfo.mode &= ~(stat.S_ISUID | stat.S_ISGID)
78
return super(SageBaseTarFile, self).chmod(tarinfo, targetpath)
79
80
def utime(self, tarinfo, targetpath):
81
"""Override to keep the extraction time as the file's timestamp."""
82
tarinfo.mtime = self._extracted_mtime
83
return super(SageBaseTarFile, self).utime(tarinfo, targetpath)
84
85
def extractall(self, path='.', members=None, **kwargs):
86
"""
87
Same as tarfile.TarFile.extractall but allows filenames for
88
the members argument (like zipfile.ZipFile).
89
90
.. note::
91
The additional ``**kwargs`` are for Python 2/3 compatibility, since
92
different versions of this method accept additional arguments.
93
"""
94
if members:
95
name_to_member = dict([member.name, member] for member in self.getmembers())
96
members = [m if isinstance(m, tarfile.TarInfo)
97
else name_to_member[m]
98
for m in members]
99
return super(SageBaseTarFile, self).extractall(path=path,
100
members=members,
101
**kwargs)
102
103
def extractbytes(self, member):
104
"""
105
Return the contents of the specified archive member as bytes.
106
107
If the member does not exist, returns None.
108
"""
109
110
if member in self.getnames():
111
reader = self.extractfile(member)
112
return reader.read()
113
114
def _extract_member(self, tarinfo, targetpath, **kwargs):
115
"""
116
Override to ensure that our custom umask is applied over the entire
117
directory tree, even for directories that are not explicitly listed in
118
the tarball.
119
120
.. note::
121
The additional ``**kwargs`` are for Python 2/3 compatibility, since
122
different versions of this method accept additional arguments.
123
"""
124
old_umask = os.umask(self.umask)
125
try:
126
super(SageBaseTarFile, self)._extract_member(tarinfo, targetpath,
127
**kwargs)
128
finally:
129
os.umask(old_umask)
130
131
132
class SageTarFile(SageBaseTarFile):
133
"""
134
A wrapper around SageBaseTarFile such that SageTarFile(filename) is
135
essentially equivalent to TarFile.open(filename) which is more
136
flexible than the basic TarFile.__init__
137
"""
138
def __new__(cls, filename):
139
return SageBaseTarFile.open(filename)
140
141
@staticmethod
142
def can_read(filename):
143
"""
144
Given an archive filename, returns True if this class can read and
145
process the archive format of that file.
146
"""
147
return tarfile.is_tarfile(filename)
148
149
150
class SageTarXZFile(SageBaseTarFile):
151
"""
152
A ``.tar.xz`` file which is uncompressed in memory.
153
"""
154
def __new__(cls, filename):
155
# Read uncompressed data through a pipe
156
proc = subprocess.Popen(["xz", "-d", "-c", filename], stdout=subprocess.PIPE)
157
data, _ = proc.communicate()
158
return SageBaseTarFile(mode="r", fileobj=BytesIO(data))
159
160
@staticmethod
161
def can_read(filename):
162
"""
163
Given an archive filename, returns True if this class can read and
164
process the archive format of that file.
165
"""
166
devnull = open(os.devnull, 'w')
167
try:
168
subprocess.check_call(["xz", "-l", filename], stdout=devnull, stderr=devnull)
169
except Exception:
170
return False
171
return True
172
173