CoCalc -- bz2.py

GitHub Repository: allendowney/cpython
Path: blob/main/Lib/bz2.py
¹² views
1
"""Interface to the libbzip2 compression library.
2

3
This module provides a file interface, classes for incremental
4
(de)compression, and functions for one-shot (de)compression.
5
"""
6

7
__all__ = ["BZ2File", "BZ2Compressor", "BZ2Decompressor",
8
           "open", "compress", "decompress"]
9

10
__author__ = "Nadeem Vawda <[email protected]>"
11

12
from builtins import open as _builtin_open
13
import io
14
import os
15
import _compression
16

17
from _bz2 import BZ2Compressor, BZ2Decompressor
18

19

20
_MODE_CLOSED   = 0
21
_MODE_READ     = 1
22
# Value 2 no longer used
23
_MODE_WRITE    = 3
24

25

26
class BZ2File(_compression.BaseStream):
27

28
    """A file object providing transparent bzip2 (de)compression.
29

30
    A BZ2File can act as a wrapper for an existing file object, or refer
31
    directly to a named file on disk.
32

33
    Note that BZ2File provides a *binary* file interface - data read is
34
    returned as bytes, and data to be written should be given as bytes.
35
    """
36

37
    def __init__(self, filename, mode="r", *, compresslevel=9):
38
        """Open a bzip2-compressed file.
39

40
        If filename is a str, bytes, or PathLike object, it gives the
41
        name of the file to be opened. Otherwise, it should be a file
42
        object, which will be used to read or write the compressed data.
43

44
        mode can be 'r' for reading (default), 'w' for (over)writing,
45
        'x' for creating exclusively, or 'a' for appending. These can
46
        equivalently be given as 'rb', 'wb', 'xb', and 'ab'.
47

48
        If mode is 'w', 'x' or 'a', compresslevel can be a number between 1
49
        and 9 specifying the level of compression: 1 produces the least
50
        compression, and 9 (default) produces the most compression.
51

52
        If mode is 'r', the input file may be the concatenation of
53
        multiple compressed streams.
54
        """
55
        self._fp = None
56
        self._closefp = False
57
        self._mode = _MODE_CLOSED
58

59
        if not (1 <= compresslevel <= 9):
60
            raise ValueError("compresslevel must be between 1 and 9")
61

62
        if mode in ("", "r", "rb"):
63
            mode = "rb"
64
            mode_code = _MODE_READ
65
        elif mode in ("w", "wb"):
66
            mode = "wb"
67
            mode_code = _MODE_WRITE
68
            self._compressor = BZ2Compressor(compresslevel)
69
        elif mode in ("x", "xb"):
70
            mode = "xb"
71
            mode_code = _MODE_WRITE
72
            self._compressor = BZ2Compressor(compresslevel)
73
        elif mode in ("a", "ab"):
74
            mode = "ab"
75
            mode_code = _MODE_WRITE
76
            self._compressor = BZ2Compressor(compresslevel)
77
        else:
78
            raise ValueError("Invalid mode: %r" % (mode,))
79

80
        if isinstance(filename, (str, bytes, os.PathLike)):
81
            self._fp = _builtin_open(filename, mode)
82
            self._closefp = True
83
            self._mode = mode_code
84
        elif hasattr(filename, "read") or hasattr(filename, "write"):
85
            self._fp = filename
86
            self._mode = mode_code
87
        else:
88
            raise TypeError("filename must be a str, bytes, file or PathLike object")
89

90
        if self._mode == _MODE_READ:
91
            raw = _compression.DecompressReader(self._fp,
92
                BZ2Decompressor, trailing_error=OSError)
93
            self._buffer = io.BufferedReader(raw)
94
        else:
95
            self._pos = 0
96

97
    def close(self):
98
        """Flush and close the file.
99

100
        May be called more than once without error. Once the file is
101
        closed, any other operation on it will raise a ValueError.
102
        """
103
        if self._mode == _MODE_CLOSED:
104
            return
105
        try:
106
            if self._mode == _MODE_READ:
107
                self._buffer.close()
108
            elif self._mode == _MODE_WRITE:
109
                self._fp.write(self._compressor.flush())
110
                self._compressor = None
111
        finally:
112
            try:
113
                if self._closefp:
114
                    self._fp.close()
115
            finally:
116
                self._fp = None
117
                self._closefp = False
118
                self._mode = _MODE_CLOSED
119
                self._buffer = None
120

121
    @property
122
    def closed(self):
123
        """True if this file is closed."""
124
        return self._mode == _MODE_CLOSED
125

126
    def fileno(self):
127
        """Return the file descriptor for the underlying file."""
128
        self._check_not_closed()
129
        return self._fp.fileno()
130

131
    def seekable(self):
132
        """Return whether the file supports seeking."""
133
        return self.readable() and self._buffer.seekable()
134

135
    def readable(self):
136
        """Return whether the file was opened for reading."""
137
        self._check_not_closed()
138
        return self._mode == _MODE_READ
139

140
    def writable(self):
141
        """Return whether the file was opened for writing."""
142
        self._check_not_closed()
143
        return self._mode == _MODE_WRITE
144

145
    def peek(self, n=0):
146
        """Return buffered data without advancing the file position.
147

148
        Always returns at least one byte of data, unless at EOF.
149
        The exact number of bytes returned is unspecified.
150
        """
151
        self._check_can_read()
152
        # Relies on the undocumented fact that BufferedReader.peek()
153
        # always returns at least one byte (except at EOF), independent
154
        # of the value of n
155
        return self._buffer.peek(n)
156

157
    def read(self, size=-1):
158
        """Read up to size uncompressed bytes from the file.
159

160
        If size is negative or omitted, read until EOF is reached.
161
        Returns b'' if the file is already at EOF.
162
        """
163
        self._check_can_read()
164
        return self._buffer.read(size)
165

166
    def read1(self, size=-1):
167
        """Read up to size uncompressed bytes, while trying to avoid
168
        making multiple reads from the underlying stream. Reads up to a
169
        buffer's worth of data if size is negative.
170

171
        Returns b'' if the file is at EOF.
172
        """
173
        self._check_can_read()
174
        if size < 0:
175
            size = io.DEFAULT_BUFFER_SIZE
176
        return self._buffer.read1(size)
177

178
    def readinto(self, b):
179
        """Read bytes into b.
180

181
        Returns the number of bytes read (0 for EOF).
182
        """
183
        self._check_can_read()
184
        return self._buffer.readinto(b)
185

186
    def readline(self, size=-1):
187
        """Read a line of uncompressed bytes from the file.
188

189
        The terminating newline (if present) is retained. If size is
190
        non-negative, no more than size bytes will be read (in which
191
        case the line may be incomplete). Returns b'' if already at EOF.
192
        """
193
        if not isinstance(size, int):
194
            if not hasattr(size, "__index__"):
195
                raise TypeError("Integer argument expected")
196
            size = size.__index__()
197
        self._check_can_read()
198
        return self._buffer.readline(size)
199

200
    def readlines(self, size=-1):
201
        """Read a list of lines of uncompressed bytes from the file.
202

203
        size can be specified to control the number of lines read: no
204
        further lines will be read once the total size of the lines read
205
        so far equals or exceeds size.
206
        """
207
        if not isinstance(size, int):
208
            if not hasattr(size, "__index__"):
209
                raise TypeError("Integer argument expected")
210
            size = size.__index__()
211
        self._check_can_read()
212
        return self._buffer.readlines(size)
213

214
    def write(self, data):
215
        """Write a byte string to the file.
216

217
        Returns the number of uncompressed bytes written, which is
218
        always the length of data in bytes. Note that due to buffering,
219
        the file on disk may not reflect the data written until close()
220
        is called.
221
        """
222
        self._check_can_write()
223
        if isinstance(data, (bytes, bytearray)):
224
            length = len(data)
225
        else:
226
            # accept any data that supports the buffer protocol
227
            data = memoryview(data)
228
            length = data.nbytes
229

230
        compressed = self._compressor.compress(data)
231
        self._fp.write(compressed)
232
        self._pos += length
233
        return length
234

235
    def writelines(self, seq):
236
        """Write a sequence of byte strings to the file.
237

238
        Returns the number of uncompressed bytes written.
239
        seq can be any iterable yielding byte strings.
240

241
        Line separators are not added between the written byte strings.
242
        """
243
        return _compression.BaseStream.writelines(self, seq)
244

245
    def seek(self, offset, whence=io.SEEK_SET):
246
        """Change the file position.
247

248
        The new position is specified by offset, relative to the
249
        position indicated by whence. Values for whence are:
250

251
            0: start of stream (default); offset must not be negative
252
            1: current stream position
253
            2: end of stream; offset must not be positive
254

255
        Returns the new file position.
256

257
        Note that seeking is emulated, so depending on the parameters,
258
        this operation may be extremely slow.
259
        """
260
        self._check_can_seek()
261
        return self._buffer.seek(offset, whence)
262

263
    def tell(self):
264
        """Return the current file position."""
265
        self._check_not_closed()
266
        if self._mode == _MODE_READ:
267
            return self._buffer.tell()
268
        return self._pos
269

270

271
def open(filename, mode="rb", compresslevel=9,
272
         encoding=None, errors=None, newline=None):
273
    """Open a bzip2-compressed file in binary or text mode.
274

275
    The filename argument can be an actual filename (a str, bytes, or
276
    PathLike object), or an existing file object to read from or write
277
    to.
278

279
    The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or
280
    "ab" for binary mode, or "rt", "wt", "xt" or "at" for text mode.
281
    The default mode is "rb", and the default compresslevel is 9.
282

283
    For binary mode, this function is equivalent to the BZ2File
284
    constructor: BZ2File(filename, mode, compresslevel). In this case,
285
    the encoding, errors and newline arguments must not be provided.
286

287
    For text mode, a BZ2File object is created, and wrapped in an
288
    io.TextIOWrapper instance with the specified encoding, error
289
    handling behavior, and line ending(s).
290

291
    """
292
    if "t" in mode:
293
        if "b" in mode:
294
            raise ValueError("Invalid mode: %r" % (mode,))
295
    else:
296
        if encoding is not None:
297
            raise ValueError("Argument 'encoding' not supported in binary mode")
298
        if errors is not None:
299
            raise ValueError("Argument 'errors' not supported in binary mode")
300
        if newline is not None:
301
            raise ValueError("Argument 'newline' not supported in binary mode")
302

303
    bz_mode = mode.replace("t", "")
304
    binary_file = BZ2File(filename, bz_mode, compresslevel=compresslevel)
305

306
    if "t" in mode:
307
        encoding = io.text_encoding(encoding)
308
        return io.TextIOWrapper(binary_file, encoding, errors, newline)
309
    else:
310
        return binary_file
311

312

313
def compress(data, compresslevel=9):
314
    """Compress a block of data.
315

316
    compresslevel, if given, must be a number between 1 and 9.
317

318
    For incremental compression, use a BZ2Compressor object instead.
319
    """
320
    comp = BZ2Compressor(compresslevel)
321
    return comp.compress(data) + comp.flush()
322

323

324
def decompress(data):
325
    """Decompress a block of data.
326

327
    For incremental decompression, use a BZ2Decompressor object instead.
328
    """
329
    results = []
330
    while data:
331
        decomp = BZ2Decompressor()
332
        try:
333
            res = decomp.decompress(data)
334
        except OSError:
335
            if results:
336
                break  # Leftover data is not a valid bzip2 stream; ignore it.
337
            else:
338
                raise  # Error on the first iteration; bail out.
339
        results.append(res)
340
        if not decomp.eof:
341
            raise ValueError("Compressed data ended before the "
342
                             "end-of-stream marker was reached")
343
        data = decomp.unused_data
344
    return b"".join(results)
345

346
Product

Resources

Company