CoCalc -- _pyio.py

GitHub Repository: allendowney/cpython
Path: blob/main/Lib/_pyio.py
¹² views
1
"""
2
Python implementation of the io module.
3
"""
4

5
import os
6
import abc
7
import codecs
8
import errno
9
import stat
10
import sys
11
# Import _thread instead of threading to reduce startup cost
12
from _thread import allocate_lock as Lock
13
if sys.platform in {'win32', 'cygwin'}:
14
    from msvcrt import setmode as _setmode
15
else:
16
    _setmode = None
17

18
import io
19
from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
20

21
valid_seek_flags = {0, 1, 2}  # Hardwired values
22
if hasattr(os, 'SEEK_HOLE') :
23
    valid_seek_flags.add(os.SEEK_HOLE)
24
    valid_seek_flags.add(os.SEEK_DATA)
25

26
# open() uses st_blksize whenever we can
27
DEFAULT_BUFFER_SIZE = 8 * 1024  # bytes
28

29
# NOTE: Base classes defined here are registered with the "official" ABCs
30
# defined in io.py. We don't use real inheritance though, because we don't want
31
# to inherit the C implementations.
32

33
# Rebind for compatibility
34
BlockingIOError = BlockingIOError
35

36
# Does open() check its 'errors' argument?
37
_CHECK_ERRORS = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode)
38

39

40
def text_encoding(encoding, stacklevel=2):
41
    """
42
    A helper function to choose the text encoding.
43

44
    When encoding is not None, this function returns it.
45
    Otherwise, this function returns the default text encoding
46
    (i.e. "locale" or "utf-8" depends on UTF-8 mode).
47

48
    This function emits an EncodingWarning if *encoding* is None and
49
    sys.flags.warn_default_encoding is true.
50

51
    This can be used in APIs with an encoding=None parameter
52
    that pass it to TextIOWrapper or open.
53
    However, please consider using encoding="utf-8" for new APIs.
54
    """
55
    if encoding is None:
56
        if sys.flags.utf8_mode:
57
            encoding = "utf-8"
58
        else:
59
            encoding = "locale"
60
        if sys.flags.warn_default_encoding:
61
            import warnings
62
            warnings.warn("'encoding' argument not specified.",
63
                          EncodingWarning, stacklevel + 1)
64
    return encoding
65

66

67
# Wrapper for builtins.open
68
#
69
# Trick so that open() won't become a bound method when stored
70
# as a class variable (as dbm.dumb does).
71
#
72
# See init_set_builtins_open() in Python/pylifecycle.c.
73
@staticmethod
74
def open(file, mode="r", buffering=-1, encoding=None, errors=None,
75
         newline=None, closefd=True, opener=None):
76

77
    r"""Open file and return a stream.  Raise OSError upon failure.
78

79
    file is either a text or byte string giving the name (and the path
80
    if the file isn't in the current working directory) of the file to
81
    be opened or an integer file descriptor of the file to be
82
    wrapped. (If a file descriptor is given, it is closed when the
83
    returned I/O object is closed, unless closefd is set to False.)
84

85
    mode is an optional string that specifies the mode in which the file is
86
    opened. It defaults to 'r' which means open for reading in text mode. Other
87
    common values are 'w' for writing (truncating the file if it already
88
    exists), 'x' for exclusive creation of a new file, and 'a' for appending
89
    (which on some Unix systems, means that all writes append to the end of the
90
    file regardless of the current seek position). In text mode, if encoding is
91
    not specified the encoding used is platform dependent. (For reading and
92
    writing raw bytes use binary mode and leave encoding unspecified.) The
93
    available modes are:
94

95
    ========= ===============================================================
96
    Character Meaning
97
    --------- ---------------------------------------------------------------
98
    'r'       open for reading (default)
99
    'w'       open for writing, truncating the file first
100
    'x'       create a new file and open it for writing
101
    'a'       open for writing, appending to the end of the file if it exists
102
    'b'       binary mode
103
    't'       text mode (default)
104
    '+'       open a disk file for updating (reading and writing)
105
    ========= ===============================================================
106

107
    The default mode is 'rt' (open for reading text). For binary random
108
    access, the mode 'w+b' opens and truncates the file to 0 bytes, while
109
    'r+b' opens the file without truncation. The 'x' mode implies 'w' and
110
    raises an `FileExistsError` if the file already exists.
111

112
    Python distinguishes between files opened in binary and text modes,
113
    even when the underlying operating system doesn't. Files opened in
114
    binary mode (appending 'b' to the mode argument) return contents as
115
    bytes objects without any decoding. In text mode (the default, or when
116
    't' is appended to the mode argument), the contents of the file are
117
    returned as strings, the bytes having been first decoded using a
118
    platform-dependent encoding or using the specified encoding if given.
119

120
    buffering is an optional integer used to set the buffering policy.
121
    Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
122
    line buffering (only usable in text mode), and an integer > 1 to indicate
123
    the size of a fixed-size chunk buffer.  When no buffering argument is
124
    given, the default buffering policy works as follows:
125

126
    * Binary files are buffered in fixed-size chunks; the size of the buffer
127
      is chosen using a heuristic trying to determine the underlying device's
128
      "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
129
      On many systems, the buffer will typically be 4096 or 8192 bytes long.
130

131
    * "Interactive" text files (files for which isatty() returns True)
132
      use line buffering.  Other text files use the policy described above
133
      for binary files.
134

135
    encoding is the str name of the encoding used to decode or encode the
136
    file. This should only be used in text mode. The default encoding is
137
    platform dependent, but any encoding supported by Python can be
138
    passed.  See the codecs module for the list of supported encodings.
139

140
    errors is an optional string that specifies how encoding errors are to
141
    be handled---this argument should not be used in binary mode. Pass
142
    'strict' to raise a ValueError exception if there is an encoding error
143
    (the default of None has the same effect), or pass 'ignore' to ignore
144
    errors. (Note that ignoring encoding errors can lead to data loss.)
145
    See the documentation for codecs.register for a list of the permitted
146
    encoding error strings.
147

148
    newline is a string controlling how universal newlines works (it only
149
    applies to text mode). It can be None, '', '\n', '\r', and '\r\n'.  It works
150
    as follows:
151

152
    * On input, if newline is None, universal newlines mode is
153
      enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
154
      these are translated into '\n' before being returned to the
155
      caller. If it is '', universal newline mode is enabled, but line
156
      endings are returned to the caller untranslated. If it has any of
157
      the other legal values, input lines are only terminated by the given
158
      string, and the line ending is returned to the caller untranslated.
159

160
    * On output, if newline is None, any '\n' characters written are
161
      translated to the system default line separator, os.linesep. If
162
      newline is '', no translation takes place. If newline is any of the
163
      other legal values, any '\n' characters written are translated to
164
      the given string.
165

166
    closedfd is a bool. If closefd is False, the underlying file descriptor will
167
    be kept open when the file is closed. This does not work when a file name is
168
    given and must be True in that case.
169

170
    The newly created file is non-inheritable.
171

172
    A custom opener can be used by passing a callable as *opener*. The
173
    underlying file descriptor for the file object is then obtained by calling
174
    *opener* with (*file*, *flags*). *opener* must return an open file
175
    descriptor (passing os.open as *opener* results in functionality similar to
176
    passing None).
177

178
    open() returns a file object whose type depends on the mode, and
179
    through which the standard file operations such as reading and writing
180
    are performed. When open() is used to open a file in a text mode ('w',
181
    'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
182
    a file in a binary mode, the returned class varies: in read binary
183
    mode, it returns a BufferedReader; in write binary and append binary
184
    modes, it returns a BufferedWriter, and in read/write mode, it returns
185
    a BufferedRandom.
186

187
    It is also possible to use a string or bytearray as a file for both
188
    reading and writing. For strings StringIO can be used like a file
189
    opened in a text mode, and for bytes a BytesIO can be used like a file
190
    opened in a binary mode.
191
    """
192
    if not isinstance(file, int):
193
        file = os.fspath(file)
194
    if not isinstance(file, (str, bytes, int)):
195
        raise TypeError("invalid file: %r" % file)
196
    if not isinstance(mode, str):
197
        raise TypeError("invalid mode: %r" % mode)
198
    if not isinstance(buffering, int):
199
        raise TypeError("invalid buffering: %r" % buffering)
200
    if encoding is not None and not isinstance(encoding, str):
201
        raise TypeError("invalid encoding: %r" % encoding)
202
    if errors is not None and not isinstance(errors, str):
203
        raise TypeError("invalid errors: %r" % errors)
204
    modes = set(mode)
205
    if modes - set("axrwb+t") or len(mode) > len(modes):
206
        raise ValueError("invalid mode: %r" % mode)
207
    creating = "x" in modes
208
    reading = "r" in modes
209
    writing = "w" in modes
210
    appending = "a" in modes
211
    updating = "+" in modes
212
    text = "t" in modes
213
    binary = "b" in modes
214
    if text and binary:
215
        raise ValueError("can't have text and binary mode at once")
216
    if creating + reading + writing + appending > 1:
217
        raise ValueError("can't have read/write/append mode at once")
218
    if not (creating or reading or writing or appending):
219
        raise ValueError("must have exactly one of read/write/append mode")
220
    if binary and encoding is not None:
221
        raise ValueError("binary mode doesn't take an encoding argument")
222
    if binary and errors is not None:
223
        raise ValueError("binary mode doesn't take an errors argument")
224
    if binary and newline is not None:
225
        raise ValueError("binary mode doesn't take a newline argument")
226
    if binary and buffering == 1:
227
        import warnings
228
        warnings.warn("line buffering (buffering=1) isn't supported in binary "
229
                      "mode, the default buffer size will be used",
230
                      RuntimeWarning, 2)
231
    raw = FileIO(file,
232
                 (creating and "x" or "") +
233
                 (reading and "r" or "") +
234
                 (writing and "w" or "") +
235
                 (appending and "a" or "") +
236
                 (updating and "+" or ""),
237
                 closefd, opener=opener)
238
    result = raw
239
    try:
240
        line_buffering = False
241
        if buffering == 1 or buffering < 0 and raw.isatty():
242
            buffering = -1
243
            line_buffering = True
244
        if buffering < 0:
245
            buffering = DEFAULT_BUFFER_SIZE
246
            try:
247
                bs = os.fstat(raw.fileno()).st_blksize
248
            except (OSError, AttributeError):
249
                pass
250
            else:
251
                if bs > 1:
252
                    buffering = bs
253
        if buffering < 0:
254
            raise ValueError("invalid buffering size")
255
        if buffering == 0:
256
            if binary:
257
                return result
258
            raise ValueError("can't have unbuffered text I/O")
259
        if updating:
260
            buffer = BufferedRandom(raw, buffering)
261
        elif creating or writing or appending:
262
            buffer = BufferedWriter(raw, buffering)
263
        elif reading:
264
            buffer = BufferedReader(raw, buffering)
265
        else:
266
            raise ValueError("unknown mode: %r" % mode)
267
        result = buffer
268
        if binary:
269
            return result
270
        encoding = text_encoding(encoding)
271
        text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
272
        result = text
273
        text.mode = mode
274
        return result
275
    except:
276
        result.close()
277
        raise
278

279
# Define a default pure-Python implementation for open_code()
280
# that does not allow hooks. Warn on first use. Defined for tests.
281
def _open_code_with_warning(path):
282
    """Opens the provided file with mode ``'rb'``. This function
283
    should be used when the intent is to treat the contents as
284
    executable code.
285

286
    ``path`` should be an absolute path.
287

288
    When supported by the runtime, this function can be hooked
289
    in order to allow embedders more control over code files.
290
    This functionality is not supported on the current runtime.
291
    """
292
    import warnings
293
    warnings.warn("_pyio.open_code() may not be using hooks",
294
                  RuntimeWarning, 2)
295
    return open(path, "rb")
296

297
try:
298
    open_code = io.open_code
299
except AttributeError:
300
    open_code = _open_code_with_warning
301

302

303
# In normal operation, both `UnsupportedOperation`s should be bound to the
304
# same object.
305
try:
306
    UnsupportedOperation = io.UnsupportedOperation
307
except AttributeError:
308
    class UnsupportedOperation(OSError, ValueError):
309
        pass
310

311

312
class IOBase(metaclass=abc.ABCMeta):
313

314
    """The abstract base class for all I/O classes.
315

316
    This class provides dummy implementations for many methods that
317
    derived classes can override selectively; the default implementations
318
    represent a file that cannot be read, written or seeked.
319

320
    Even though IOBase does not declare read or write because
321
    their signatures will vary, implementations and clients should
322
    consider those methods part of the interface. Also, implementations
323
    may raise UnsupportedOperation when operations they do not support are
324
    called.
325

326
    The basic type used for binary data read from or written to a file is
327
    bytes. Other bytes-like objects are accepted as method arguments too.
328
    Text I/O classes work with str data.
329

330
    Note that calling any method (even inquiries) on a closed stream is
331
    undefined. Implementations may raise OSError in this case.
332

333
    IOBase (and its subclasses) support the iterator protocol, meaning
334
    that an IOBase object can be iterated over yielding the lines in a
335
    stream.
336

337
    IOBase also supports the :keyword:`with` statement. In this example,
338
    fp is closed after the suite of the with statement is complete:
339

340
    with open('spam.txt', 'r') as fp:
341
        fp.write('Spam and eggs!')
342
    """
343

344
    ### Internal ###
345

346
    def _unsupported(self, name):
347
        """Internal: raise an OSError exception for unsupported operations."""
348
        raise UnsupportedOperation("%s.%s() not supported" %
349
                                   (self.__class__.__name__, name))
350

351
    ### Positioning ###
352

353
    def seek(self, pos, whence=0):
354
        """Change stream position.
355

356
        Change the stream position to byte offset pos. Argument pos is
357
        interpreted relative to the position indicated by whence.  Values
358
        for whence are ints:
359

360
        * 0 -- start of stream (the default); offset should be zero or positive
361
        * 1 -- current stream position; offset may be negative
362
        * 2 -- end of stream; offset is usually negative
363
        Some operating systems / file systems could provide additional values.
364

365
        Return an int indicating the new absolute position.
366
        """
367
        self._unsupported("seek")
368

369
    def tell(self):
370
        """Return an int indicating the current stream position."""
371
        return self.seek(0, 1)
372

373
    def truncate(self, pos=None):
374
        """Truncate file to size bytes.
375

376
        Size defaults to the current IO position as reported by tell().  Return
377
        the new size.
378
        """
379
        self._unsupported("truncate")
380

381
    ### Flush and close ###
382

383
    def flush(self):
384
        """Flush write buffers, if applicable.
385

386
        This is not implemented for read-only and non-blocking streams.
387
        """
388
        self._checkClosed()
389
        # XXX Should this return the number of bytes written???
390

391
    __closed = False
392

393
    def close(self):
394
        """Flush and close the IO object.
395

396
        This method has no effect if the file is already closed.
397
        """
398
        if not self.__closed:
399
            try:
400
                self.flush()
401
            finally:
402
                self.__closed = True
403

404
    def __del__(self):
405
        """Destructor.  Calls close()."""
406
        try:
407
            closed = self.closed
408
        except AttributeError:
409
            # If getting closed fails, then the object is probably
410
            # in an unusable state, so ignore.
411
            return
412

413
        if closed:
414
            return
415

416
        # If close() fails, the caller logs the exception with
417
        # sys.unraisablehook. close() must be called at the end at __del__().
418
        self.close()
419

420
    ### Inquiries ###
421

422
    def seekable(self):
423
        """Return a bool indicating whether object supports random access.
424

425
        If False, seek(), tell() and truncate() will raise OSError.
426
        This method may need to do a test seek().
427
        """
428
        return False
429

430
    def _checkSeekable(self, msg=None):
431
        """Internal: raise UnsupportedOperation if file is not seekable
432
        """
433
        if not self.seekable():
434
            raise UnsupportedOperation("File or stream is not seekable."
435
                                       if msg is None else msg)
436

437
    def readable(self):
438
        """Return a bool indicating whether object was opened for reading.
439

440
        If False, read() will raise OSError.
441
        """
442
        return False
443

444
    def _checkReadable(self, msg=None):
445
        """Internal: raise UnsupportedOperation if file is not readable
446
        """
447
        if not self.readable():
448
            raise UnsupportedOperation("File or stream is not readable."
449
                                       if msg is None else msg)
450

451
    def writable(self):
452
        """Return a bool indicating whether object was opened for writing.
453

454
        If False, write() and truncate() will raise OSError.
455
        """
456
        return False
457

458
    def _checkWritable(self, msg=None):
459
        """Internal: raise UnsupportedOperation if file is not writable
460
        """
461
        if not self.writable():
462
            raise UnsupportedOperation("File or stream is not writable."
463
                                       if msg is None else msg)
464

465
    @property
466
    def closed(self):
467
        """closed: bool.  True iff the file has been closed.
468

469
        For backwards compatibility, this is a property, not a predicate.
470
        """
471
        return self.__closed
472

473
    def _checkClosed(self, msg=None):
474
        """Internal: raise a ValueError if file is closed
475
        """
476
        if self.closed:
477
            raise ValueError("I/O operation on closed file."
478
                             if msg is None else msg)
479

480
    ### Context manager ###
481

482
    def __enter__(self):  # That's a forward reference
483
        """Context management protocol.  Returns self (an instance of IOBase)."""
484
        self._checkClosed()
485
        return self
486

487
    def __exit__(self, *args):
488
        """Context management protocol.  Calls close()"""
489
        self.close()
490

491
    ### Lower-level APIs ###
492

493
    # XXX Should these be present even if unimplemented?
494

495
    def fileno(self):
496
        """Returns underlying file descriptor (an int) if one exists.
497

498
        An OSError is raised if the IO object does not use a file descriptor.
499
        """
500
        self._unsupported("fileno")
501

502
    def isatty(self):
503
        """Return a bool indicating whether this is an 'interactive' stream.
504

505
        Return False if it can't be determined.
506
        """
507
        self._checkClosed()
508
        return False
509

510
    ### Readline[s] and writelines ###
511

512
    def readline(self, size=-1):
513
        r"""Read and return a line of bytes from the stream.
514

515
        If size is specified, at most size bytes will be read.
516
        Size should be an int.
517

518
        The line terminator is always b'\n' for binary files; for text
519
        files, the newlines argument to open can be used to select the line
520
        terminator(s) recognized.
521
        """
522
        # For backwards compatibility, a (slowish) readline().
523
        if hasattr(self, "peek"):
524
            def nreadahead():
525
                readahead = self.peek(1)
526
                if not readahead:
527
                    return 1
528
                n = (readahead.find(b"\n") + 1) or len(readahead)
529
                if size >= 0:
530
                    n = min(n, size)
531
                return n
532
        else:
533
            def nreadahead():
534
                return 1
535
        if size is None:
536
            size = -1
537
        else:
538
            try:
539
                size_index = size.__index__
540
            except AttributeError:
541
                raise TypeError(f"{size!r} is not an integer")
542
            else:
543
                size = size_index()
544
        res = bytearray()
545
        while size < 0 or len(res) < size:
546
            b = self.read(nreadahead())
547
            if not b:
548
                break
549
            res += b
550
            if res.endswith(b"\n"):
551
                break
552
        return bytes(res)
553

554
    def __iter__(self):
555
        self._checkClosed()
556
        return self
557

558
    def __next__(self):
559
        line = self.readline()
560
        if not line:
561
            raise StopIteration
562
        return line
563

564
    def readlines(self, hint=None):
565
        """Return a list of lines from the stream.
566

567
        hint can be specified to control the number of lines read: no more
568
        lines will be read if the total size (in bytes/characters) of all
569
        lines so far exceeds hint.
570
        """
571
        if hint is None or hint <= 0:
572
            return list(self)
573
        n = 0
574
        lines = []
575
        for line in self:
576
            lines.append(line)
577
            n += len(line)
578
            if n >= hint:
579
                break
580
        return lines
581

582
    def writelines(self, lines):
583
        """Write a list of lines to the stream.
584

585
        Line separators are not added, so it is usual for each of the lines
586
        provided to have a line separator at the end.
587
        """
588
        self._checkClosed()
589
        for line in lines:
590
            self.write(line)
591

592
io.IOBase.register(IOBase)
593

594

595
class RawIOBase(IOBase):
596

597
    """Base class for raw binary I/O."""
598

599
    # The read() method is implemented by calling readinto(); derived
600
    # classes that want to support read() only need to implement
601
    # readinto() as a primitive operation.  In general, readinto() can be
602
    # more efficient than read().
603

604
    # (It would be tempting to also provide an implementation of
605
    # readinto() in terms of read(), in case the latter is a more suitable
606
    # primitive operation, but that would lead to nasty recursion in case
607
    # a subclass doesn't implement either.)
608

609
    def read(self, size=-1):
610
        """Read and return up to size bytes, where size is an int.
611

612
        Returns an empty bytes object on EOF, or None if the object is
613
        set not to block and has no data to read.
614
        """
615
        if size is None:
616
            size = -1
617
        if size < 0:
618
            return self.readall()
619
        b = bytearray(size.__index__())
620
        n = self.readinto(b)
621
        if n is None:
622
            return None
623
        del b[n:]
624
        return bytes(b)
625

626
    def readall(self):
627
        """Read until EOF, using multiple read() call."""
628
        res = bytearray()
629
        while data := self.read(DEFAULT_BUFFER_SIZE):
630
            res += data
631
        if res:
632
            return bytes(res)
633
        else:
634
            # b'' or None
635
            return data
636

637
    def readinto(self, b):
638
        """Read bytes into a pre-allocated bytes-like object b.
639

640
        Returns an int representing the number of bytes read (0 for EOF), or
641
        None if the object is set not to block and has no data to read.
642
        """
643
        self._unsupported("readinto")
644

645
    def write(self, b):
646
        """Write the given buffer to the IO stream.
647

648
        Returns the number of bytes written, which may be less than the
649
        length of b in bytes.
650
        """
651
        self._unsupported("write")
652

653
io.RawIOBase.register(RawIOBase)
654
from _io import FileIO
655
RawIOBase.register(FileIO)
656

657

658
class BufferedIOBase(IOBase):
659

660
    """Base class for buffered IO objects.
661

662
    The main difference with RawIOBase is that the read() method
663
    supports omitting the size argument, and does not have a default
664
    implementation that defers to readinto().
665

666
    In addition, read(), readinto() and write() may raise
667
    BlockingIOError if the underlying raw stream is in non-blocking
668
    mode and not ready; unlike their raw counterparts, they will never
669
    return None.
670

671
    A typical implementation should not inherit from a RawIOBase
672
    implementation, but wrap one.
673
    """
674

675
    def read(self, size=-1):
676
        """Read and return up to size bytes, where size is an int.
677

678
        If the argument is omitted, None, or negative, reads and
679
        returns all data until EOF.
680

681
        If the argument is positive, and the underlying raw stream is
682
        not 'interactive', multiple raw reads may be issued to satisfy
683
        the byte count (unless EOF is reached first).  But for
684
        interactive raw streams (XXX and for pipes?), at most one raw
685
        read will be issued, and a short result does not imply that
686
        EOF is imminent.
687

688
        Returns an empty bytes array on EOF.
689

690
        Raises BlockingIOError if the underlying raw stream has no
691
        data at the moment.
692
        """
693
        self._unsupported("read")
694

695
    def read1(self, size=-1):
696
        """Read up to size bytes with at most one read() system call,
697
        where size is an int.
698
        """
699
        self._unsupported("read1")
700

701
    def readinto(self, b):
702
        """Read bytes into a pre-allocated bytes-like object b.
703

704
        Like read(), this may issue multiple reads to the underlying raw
705
        stream, unless the latter is 'interactive'.
706

707
        Returns an int representing the number of bytes read (0 for EOF).
708

709
        Raises BlockingIOError if the underlying raw stream has no
710
        data at the moment.
711
        """
712

713
        return self._readinto(b, read1=False)
714

715
    def readinto1(self, b):
716
        """Read bytes into buffer *b*, using at most one system call
717

718
        Returns an int representing the number of bytes read (0 for EOF).
719

720
        Raises BlockingIOError if the underlying raw stream has no
721
        data at the moment.
722
        """
723

724
        return self._readinto(b, read1=True)
725

726
    def _readinto(self, b, read1):
727
        if not isinstance(b, memoryview):
728
            b = memoryview(b)
729
        b = b.cast('B')
730

731
        if read1:
732
            data = self.read1(len(b))
733
        else:
734
            data = self.read(len(b))
735
        n = len(data)
736

737
        b[:n] = data
738

739
        return n
740

741
    def write(self, b):
742
        """Write the given bytes buffer to the IO stream.
743

744
        Return the number of bytes written, which is always the length of b
745
        in bytes.
746

747
        Raises BlockingIOError if the buffer is full and the
748
        underlying raw stream cannot accept more data at the moment.
749
        """
750
        self._unsupported("write")
751

752
    def detach(self):
753
        """
754
        Separate the underlying raw stream from the buffer and return it.
755

756
        After the raw stream has been detached, the buffer is in an unusable
757
        state.
758
        """
759
        self._unsupported("detach")
760

761
io.BufferedIOBase.register(BufferedIOBase)
762

763

764
class _BufferedIOMixin(BufferedIOBase):
765

766
    """A mixin implementation of BufferedIOBase with an underlying raw stream.
767

768
    This passes most requests on to the underlying raw stream.  It
769
    does *not* provide implementations of read(), readinto() or
770
    write().
771
    """
772

773
    def __init__(self, raw):
774
        self._raw = raw
775

776
    ### Positioning ###
777

778
    def seek(self, pos, whence=0):
779
        new_position = self.raw.seek(pos, whence)
780
        if new_position < 0:
781
            raise OSError("seek() returned an invalid position")
782
        return new_position
783

784
    def tell(self):
785
        pos = self.raw.tell()
786
        if pos < 0:
787
            raise OSError("tell() returned an invalid position")
788
        return pos
789

790
    def truncate(self, pos=None):
791
        self._checkClosed()
792
        self._checkWritable()
793

794
        # Flush the stream.  We're mixing buffered I/O with lower-level I/O,
795
        # and a flush may be necessary to synch both views of the current
796
        # file state.
797
        self.flush()
798

799
        if pos is None:
800
            pos = self.tell()
801
        # XXX: Should seek() be used, instead of passing the position
802
        # XXX  directly to truncate?
803
        return self.raw.truncate(pos)
804

805
    ### Flush and close ###
806

807
    def flush(self):
808
        if self.closed:
809
            raise ValueError("flush on closed file")
810
        self.raw.flush()
811

812
    def close(self):
813
        if self.raw is not None and not self.closed:
814
            try:
815
                # may raise BlockingIOError or BrokenPipeError etc
816
                self.flush()
817
            finally:
818
                self.raw.close()
819

820
    def detach(self):
821
        if self.raw is None:
822
            raise ValueError("raw stream already detached")
823
        self.flush()
824
        raw = self._raw
825
        self._raw = None
826
        return raw
827

828
    ### Inquiries ###
829

830
    def seekable(self):
831
        return self.raw.seekable()
832

833
    @property
834
    def raw(self):
835
        return self._raw
836

837
    @property
838
    def closed(self):
839
        return self.raw.closed
840

841
    @property
842
    def name(self):
843
        return self.raw.name
844

845
    @property
846
    def mode(self):
847
        return self.raw.mode
848

849
    def __getstate__(self):
850
        raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
851

852
    def __repr__(self):
853
        modname = self.__class__.__module__
854
        clsname = self.__class__.__qualname__
855
        try:
856
            name = self.name
857
        except AttributeError:
858
            return "<{}.{}>".format(modname, clsname)
859
        else:
860
            return "<{}.{} name={!r}>".format(modname, clsname, name)
861

862
    ### Lower-level APIs ###
863

864
    def fileno(self):
865
        return self.raw.fileno()
866

867
    def isatty(self):
868
        return self.raw.isatty()
869

870

871
class BytesIO(BufferedIOBase):
872

873
    """Buffered I/O implementation using an in-memory bytes buffer."""
874

875
    # Initialize _buffer as soon as possible since it's used by __del__()
876
    # which calls close()
877
    _buffer = None
878

879
    def __init__(self, initial_bytes=None):
880
        buf = bytearray()
881
        if initial_bytes is not None:
882
            buf += initial_bytes
883
        self._buffer = buf
884
        self._pos = 0
885

886
    def __getstate__(self):
887
        if self.closed:
888
            raise ValueError("__getstate__ on closed file")
889
        return self.__dict__.copy()
890

891
    def getvalue(self):
892
        """Return the bytes value (contents) of the buffer
893
        """
894
        if self.closed:
895
            raise ValueError("getvalue on closed file")
896
        return bytes(self._buffer)
897

898
    def getbuffer(self):
899
        """Return a readable and writable view of the buffer.
900
        """
901
        if self.closed:
902
            raise ValueError("getbuffer on closed file")
903
        return memoryview(self._buffer)
904

905
    def close(self):
906
        if self._buffer is not None:
907
            self._buffer.clear()
908
        super().close()
909

910
    def read(self, size=-1):
911
        if self.closed:
912
            raise ValueError("read from closed file")
913
        if size is None:
914
            size = -1
915
        else:
916
            try:
917
                size_index = size.__index__
918
            except AttributeError:
919
                raise TypeError(f"{size!r} is not an integer")
920
            else:
921
                size = size_index()
922
        if size < 0:
923
            size = len(self._buffer)
924
        if len(self._buffer) <= self._pos:
925
            return b""
926
        newpos = min(len(self._buffer), self._pos + size)
927
        b = self._buffer[self._pos : newpos]
928
        self._pos = newpos
929
        return bytes(b)
930

931
    def read1(self, size=-1):
932
        """This is the same as read.
933
        """
934
        return self.read(size)
935

936
    def write(self, b):
937
        if self.closed:
938
            raise ValueError("write to closed file")
939
        if isinstance(b, str):
940
            raise TypeError("can't write str to binary stream")
941
        with memoryview(b) as view:
942
            n = view.nbytes  # Size of any bytes-like object
943
        if n == 0:
944
            return 0
945
        pos = self._pos
946
        if pos > len(self._buffer):
947
            # Inserts null bytes between the current end of the file
948
            # and the new write position.
949
            padding = b'\x00' * (pos - len(self._buffer))
950
            self._buffer += padding
951
        self._buffer[pos:pos + n] = b
952
        self._pos += n
953
        return n
954

955
    def seek(self, pos, whence=0):
956
        if self.closed:
957
            raise ValueError("seek on closed file")
958
        try:
959
            pos_index = pos.__index__
960
        except AttributeError:
961
            raise TypeError(f"{pos!r} is not an integer")
962
        else:
963
            pos = pos_index()
964
        if whence == 0:
965
            if pos < 0:
966
                raise ValueError("negative seek position %r" % (pos,))
967
            self._pos = pos
968
        elif whence == 1:
969
            self._pos = max(0, self._pos + pos)
970
        elif whence == 2:
971
            self._pos = max(0, len(self._buffer) + pos)
972
        else:
973
            raise ValueError("unsupported whence value")
974
        return self._pos
975

976
    def tell(self):
977
        if self.closed:
978
            raise ValueError("tell on closed file")
979
        return self._pos
980

981
    def truncate(self, pos=None):
982
        if self.closed:
983
            raise ValueError("truncate on closed file")
984
        if pos is None:
985
            pos = self._pos
986
        else:
987
            try:
988
                pos_index = pos.__index__
989
            except AttributeError:
990
                raise TypeError(f"{pos!r} is not an integer")
991
            else:
992
                pos = pos_index()
993
            if pos < 0:
994
                raise ValueError("negative truncate position %r" % (pos,))
995
        del self._buffer[pos:]
996
        return pos
997

998
    def readable(self):
999
        if self.closed:
1000
            raise ValueError("I/O operation on closed file.")
1001
        return True
1002

1003
    def writable(self):
1004
        if self.closed:
1005
            raise ValueError("I/O operation on closed file.")
1006
        return True
1007

1008
    def seekable(self):
1009
        if self.closed:
1010
            raise ValueError("I/O operation on closed file.")
1011
        return True
1012

1013

1014
class BufferedReader(_BufferedIOMixin):
1015

1016
    """BufferedReader(raw[, buffer_size])
1017

1018
    A buffer for a readable, sequential BaseRawIO object.
1019

1020
    The constructor creates a BufferedReader for the given readable raw
1021
    stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
1022
    is used.
1023
    """
1024

1025
    def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1026
        """Create a new buffered reader using the given readable raw IO object.
1027
        """
1028
        if not raw.readable():
1029
            raise OSError('"raw" argument must be readable.')
1030

1031
        _BufferedIOMixin.__init__(self, raw)
1032
        if buffer_size <= 0:
1033
            raise ValueError("invalid buffer size")
1034
        self.buffer_size = buffer_size
1035
        self._reset_read_buf()
1036
        self._read_lock = Lock()
1037

1038
    def readable(self):
1039
        return self.raw.readable()
1040

1041
    def _reset_read_buf(self):
1042
        self._read_buf = b""
1043
        self._read_pos = 0
1044

1045
    def read(self, size=None):
1046
        """Read size bytes.
1047

1048
        Returns exactly size bytes of data unless the underlying raw IO
1049
        stream reaches EOF or if the call would block in non-blocking
1050
        mode. If size is negative, read until EOF or until read() would
1051
        block.
1052
        """
1053
        if size is not None and size < -1:
1054
            raise ValueError("invalid number of bytes to read")
1055
        with self._read_lock:
1056
            return self._read_unlocked(size)
1057

1058
    def _read_unlocked(self, n=None):
1059
        nodata_val = b""
1060
        empty_values = (b"", None)
1061
        buf = self._read_buf
1062
        pos = self._read_pos
1063

1064
        # Special case for when the number of bytes to read is unspecified.
1065
        if n is None or n == -1:
1066
            self._reset_read_buf()
1067
            if hasattr(self.raw, 'readall'):
1068
                chunk = self.raw.readall()
1069
                if chunk is None:
1070
                    return buf[pos:] or None
1071
                else:
1072
                    return buf[pos:] + chunk
1073
            chunks = [buf[pos:]]  # Strip the consumed bytes.
1074
            current_size = 0
1075
            while True:
1076
                # Read until EOF or until read() would block.
1077
                chunk = self.raw.read()
1078
                if chunk in empty_values:
1079
                    nodata_val = chunk
1080
                    break
1081
                current_size += len(chunk)
1082
                chunks.append(chunk)
1083
            return b"".join(chunks) or nodata_val
1084

1085
        # The number of bytes to read is specified, return at most n bytes.
1086
        avail = len(buf) - pos  # Length of the available buffered data.
1087
        if n <= avail:
1088
            # Fast path: the data to read is fully buffered.
1089
            self._read_pos += n
1090
            return buf[pos:pos+n]
1091
        # Slow path: read from the stream until enough bytes are read,
1092
        # or until an EOF occurs or until read() would block.
1093
        chunks = [buf[pos:]]
1094
        wanted = max(self.buffer_size, n)
1095
        while avail < n:
1096
            chunk = self.raw.read(wanted)
1097
            if chunk in empty_values:
1098
                nodata_val = chunk
1099
                break
1100
            avail += len(chunk)
1101
            chunks.append(chunk)
1102
        # n is more than avail only when an EOF occurred or when
1103
        # read() would have blocked.
1104
        n = min(n, avail)
1105
        out = b"".join(chunks)
1106
        self._read_buf = out[n:]  # Save the extra data in the buffer.
1107
        self._read_pos = 0
1108
        return out[:n] if out else nodata_val
1109

1110
    def peek(self, size=0):
1111
        """Returns buffered bytes without advancing the position.
1112

1113
        The argument indicates a desired minimal number of bytes; we
1114
        do at most one raw read to satisfy it.  We never return more
1115
        than self.buffer_size.
1116
        """
1117
        self._checkClosed("peek of closed file")
1118
        with self._read_lock:
1119
            return self._peek_unlocked(size)
1120

1121
    def _peek_unlocked(self, n=0):
1122
        want = min(n, self.buffer_size)
1123
        have = len(self._read_buf) - self._read_pos
1124
        if have < want or have <= 0:
1125
            to_read = self.buffer_size - have
1126
            current = self.raw.read(to_read)
1127
            if current:
1128
                self._read_buf = self._read_buf[self._read_pos:] + current
1129
                self._read_pos = 0
1130
        return self._read_buf[self._read_pos:]
1131

1132
    def read1(self, size=-1):
1133
        """Reads up to size bytes, with at most one read() system call."""
1134
        # Returns up to size bytes.  If at least one byte is buffered, we
1135
        # only return buffered bytes.  Otherwise, we do one raw read.
1136
        self._checkClosed("read of closed file")
1137
        if size < 0:
1138
            size = self.buffer_size
1139
        if size == 0:
1140
            return b""
1141
        with self._read_lock:
1142
            self._peek_unlocked(1)
1143
            return self._read_unlocked(
1144
                min(size, len(self._read_buf) - self._read_pos))
1145

1146
    # Implementing readinto() and readinto1() is not strictly necessary (we
1147
    # could rely on the base class that provides an implementation in terms of
1148
    # read() and read1()). We do it anyway to keep the _pyio implementation
1149
    # similar to the io implementation (which implements the methods for
1150
    # performance reasons).
1151
    def _readinto(self, buf, read1):
1152
        """Read data into *buf* with at most one system call."""
1153

1154
        self._checkClosed("readinto of closed file")
1155

1156
        # Need to create a memoryview object of type 'b', otherwise
1157
        # we may not be able to assign bytes to it, and slicing it
1158
        # would create a new object.
1159
        if not isinstance(buf, memoryview):
1160
            buf = memoryview(buf)
1161
        if buf.nbytes == 0:
1162
            return 0
1163
        buf = buf.cast('B')
1164

1165
        written = 0
1166
        with self._read_lock:
1167
            while written < len(buf):
1168

1169
                # First try to read from internal buffer
1170
                avail = min(len(self._read_buf) - self._read_pos, len(buf))
1171
                if avail:
1172
                    buf[written:written+avail] = \
1173
                        self._read_buf[self._read_pos:self._read_pos+avail]
1174
                    self._read_pos += avail
1175
                    written += avail
1176
                    if written == len(buf):
1177
                        break
1178

1179
                # If remaining space in callers buffer is larger than
1180
                # internal buffer, read directly into callers buffer
1181
                if len(buf) - written > self.buffer_size:
1182
                    n = self.raw.readinto(buf[written:])
1183
                    if not n:
1184
                        break # eof
1185
                    written += n
1186

1187
                # Otherwise refill internal buffer - unless we're
1188
                # in read1 mode and already got some data
1189
                elif not (read1 and written):
1190
                    if not self._peek_unlocked(1):
1191
                        break # eof
1192

1193
                # In readinto1 mode, return as soon as we have some data
1194
                if read1 and written:
1195
                    break
1196

1197
        return written
1198

1199
    def tell(self):
1200
        return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1201

1202
    def seek(self, pos, whence=0):
1203
        if whence not in valid_seek_flags:
1204
            raise ValueError("invalid whence value")
1205
        self._checkClosed("seek of closed file")
1206
        with self._read_lock:
1207
            if whence == 1:
1208
                pos -= len(self._read_buf) - self._read_pos
1209
            pos = _BufferedIOMixin.seek(self, pos, whence)
1210
            self._reset_read_buf()
1211
            return pos
1212

1213
class BufferedWriter(_BufferedIOMixin):
1214

1215
    """A buffer for a writeable sequential RawIO object.
1216

1217
    The constructor creates a BufferedWriter for the given writeable raw
1218
    stream. If the buffer_size is not given, it defaults to
1219
    DEFAULT_BUFFER_SIZE.
1220
    """
1221

1222
    def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1223
        if not raw.writable():
1224
            raise OSError('"raw" argument must be writable.')
1225

1226
        _BufferedIOMixin.__init__(self, raw)
1227
        if buffer_size <= 0:
1228
            raise ValueError("invalid buffer size")
1229
        self.buffer_size = buffer_size
1230
        self._write_buf = bytearray()
1231
        self._write_lock = Lock()
1232

1233
    def writable(self):
1234
        return self.raw.writable()
1235

1236
    def write(self, b):
1237
        if isinstance(b, str):
1238
            raise TypeError("can't write str to binary stream")
1239
        with self._write_lock:
1240
            if self.closed:
1241
                raise ValueError("write to closed file")
1242
            # XXX we can implement some more tricks to try and avoid
1243
            # partial writes
1244
            if len(self._write_buf) > self.buffer_size:
1245
                # We're full, so let's pre-flush the buffer.  (This may
1246
                # raise BlockingIOError with characters_written == 0.)
1247
                self._flush_unlocked()
1248
            before = len(self._write_buf)
1249
            self._write_buf.extend(b)
1250
            written = len(self._write_buf) - before
1251
            if len(self._write_buf) > self.buffer_size:
1252
                try:
1253
                    self._flush_unlocked()
1254
                except BlockingIOError as e:
1255
                    if len(self._write_buf) > self.buffer_size:
1256
                        # We've hit the buffer_size. We have to accept a partial
1257
                        # write and cut back our buffer.
1258
                        overage = len(self._write_buf) - self.buffer_size
1259
                        written -= overage
1260
                        self._write_buf = self._write_buf[:self.buffer_size]
1261
                        raise BlockingIOError(e.errno, e.strerror, written)
1262
            return written
1263

1264
    def truncate(self, pos=None):
1265
        with self._write_lock:
1266
            self._flush_unlocked()
1267
            if pos is None:
1268
                pos = self.raw.tell()
1269
            return self.raw.truncate(pos)
1270

1271
    def flush(self):
1272
        with self._write_lock:
1273
            self._flush_unlocked()
1274

1275
    def _flush_unlocked(self):
1276
        if self.closed:
1277
            raise ValueError("flush on closed file")
1278
        while self._write_buf:
1279
            try:
1280
                n = self.raw.write(self._write_buf)
1281
            except BlockingIOError:
1282
                raise RuntimeError("self.raw should implement RawIOBase: it "
1283
                                   "should not raise BlockingIOError")
1284
            if n is None:
1285
                raise BlockingIOError(
1286
                    errno.EAGAIN,
1287
                    "write could not complete without blocking", 0)
1288
            if n > len(self._write_buf) or n < 0:
1289
                raise OSError("write() returned incorrect number of bytes")
1290
            del self._write_buf[:n]
1291

1292
    def tell(self):
1293
        return _BufferedIOMixin.tell(self) + len(self._write_buf)
1294

1295
    def seek(self, pos, whence=0):
1296
        if whence not in valid_seek_flags:
1297
            raise ValueError("invalid whence value")
1298
        with self._write_lock:
1299
            self._flush_unlocked()
1300
            return _BufferedIOMixin.seek(self, pos, whence)
1301

1302
    def close(self):
1303
        with self._write_lock:
1304
            if self.raw is None or self.closed:
1305
                return
1306
        # We have to release the lock and call self.flush() (which will
1307
        # probably just re-take the lock) in case flush has been overridden in
1308
        # a subclass or the user set self.flush to something. This is the same
1309
        # behavior as the C implementation.
1310
        try:
1311
            # may raise BlockingIOError or BrokenPipeError etc
1312
            self.flush()
1313
        finally:
1314
            with self._write_lock:
1315
                self.raw.close()
1316

1317

1318
class BufferedRWPair(BufferedIOBase):
1319

1320
    """A buffered reader and writer object together.
1321

1322
    A buffered reader object and buffered writer object put together to
1323
    form a sequential IO object that can read and write. This is typically
1324
    used with a socket or two-way pipe.
1325

1326
    reader and writer are RawIOBase objects that are readable and
1327
    writeable respectively. If the buffer_size is omitted it defaults to
1328
    DEFAULT_BUFFER_SIZE.
1329
    """
1330

1331
    # XXX The usefulness of this (compared to having two separate IO
1332
    # objects) is questionable.
1333

1334
    def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
1335
        """Constructor.
1336

1337
        The arguments are two RawIO instances.
1338
        """
1339
        if not reader.readable():
1340
            raise OSError('"reader" argument must be readable.')
1341

1342
        if not writer.writable():
1343
            raise OSError('"writer" argument must be writable.')
1344

1345
        self.reader = BufferedReader(reader, buffer_size)
1346
        self.writer = BufferedWriter(writer, buffer_size)
1347

1348
    def read(self, size=-1):
1349
        if size is None:
1350
            size = -1
1351
        return self.reader.read(size)
1352

1353
    def readinto(self, b):
1354
        return self.reader.readinto(b)
1355

1356
    def write(self, b):
1357
        return self.writer.write(b)
1358

1359
    def peek(self, size=0):
1360
        return self.reader.peek(size)
1361

1362
    def read1(self, size=-1):
1363
        return self.reader.read1(size)
1364

1365
    def readinto1(self, b):
1366
        return self.reader.readinto1(b)
1367

1368
    def readable(self):
1369
        return self.reader.readable()
1370

1371
    def writable(self):
1372
        return self.writer.writable()
1373

1374
    def flush(self):
1375
        return self.writer.flush()
1376

1377
    def close(self):
1378
        try:
1379
            self.writer.close()
1380
        finally:
1381
            self.reader.close()
1382

1383
    def isatty(self):
1384
        return self.reader.isatty() or self.writer.isatty()
1385

1386
    @property
1387
    def closed(self):
1388
        return self.writer.closed
1389

1390

1391
class BufferedRandom(BufferedWriter, BufferedReader):
1392

1393
    """A buffered interface to random access streams.
1394

1395
    The constructor creates a reader and writer for a seekable stream,
1396
    raw, given in the first argument. If the buffer_size is omitted it
1397
    defaults to DEFAULT_BUFFER_SIZE.
1398
    """
1399

1400
    def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1401
        raw._checkSeekable()
1402
        BufferedReader.__init__(self, raw, buffer_size)
1403
        BufferedWriter.__init__(self, raw, buffer_size)
1404

1405
    def seek(self, pos, whence=0):
1406
        if whence not in valid_seek_flags:
1407
            raise ValueError("invalid whence value")
1408
        self.flush()
1409
        if self._read_buf:
1410
            # Undo read ahead.
1411
            with self._read_lock:
1412
                self.raw.seek(self._read_pos - len(self._read_buf), 1)
1413
        # First do the raw seek, then empty the read buffer, so that
1414
        # if the raw seek fails, we don't lose buffered data forever.
1415
        pos = self.raw.seek(pos, whence)
1416
        with self._read_lock:
1417
            self._reset_read_buf()
1418
        if pos < 0:
1419
            raise OSError("seek() returned invalid position")
1420
        return pos
1421

1422
    def tell(self):
1423
        if self._write_buf:
1424
            return BufferedWriter.tell(self)
1425
        else:
1426
            return BufferedReader.tell(self)
1427

1428
    def truncate(self, pos=None):
1429
        if pos is None:
1430
            pos = self.tell()
1431
        # Use seek to flush the read buffer.
1432
        return BufferedWriter.truncate(self, pos)
1433

1434
    def read(self, size=None):
1435
        if size is None:
1436
            size = -1
1437
        self.flush()
1438
        return BufferedReader.read(self, size)
1439

1440
    def readinto(self, b):
1441
        self.flush()
1442
        return BufferedReader.readinto(self, b)
1443

1444
    def peek(self, size=0):
1445
        self.flush()
1446
        return BufferedReader.peek(self, size)
1447

1448
    def read1(self, size=-1):
1449
        self.flush()
1450
        return BufferedReader.read1(self, size)
1451

1452
    def readinto1(self, b):
1453
        self.flush()
1454
        return BufferedReader.readinto1(self, b)
1455

1456
    def write(self, b):
1457
        if self._read_buf:
1458
            # Undo readahead
1459
            with self._read_lock:
1460
                self.raw.seek(self._read_pos - len(self._read_buf), 1)
1461
                self._reset_read_buf()
1462
        return BufferedWriter.write(self, b)
1463

1464

1465
class FileIO(RawIOBase):
1466
    _fd = -1
1467
    _created = False
1468
    _readable = False
1469
    _writable = False
1470
    _appending = False
1471
    _seekable = None
1472
    _closefd = True
1473

1474
    def __init__(self, file, mode='r', closefd=True, opener=None):
1475
        """Open a file.  The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1476
        writing, exclusive creation or appending.  The file will be created if it
1477
        doesn't exist when opened for writing or appending; it will be truncated
1478
        when opened for writing.  A FileExistsError will be raised if it already
1479
        exists when opened for creating. Opening a file for creating implies
1480
        writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1481
        to allow simultaneous reading and writing. A custom opener can be used by
1482
        passing a callable as *opener*. The underlying file descriptor for the file
1483
        object is then obtained by calling opener with (*name*, *flags*).
1484
        *opener* must return an open file descriptor (passing os.open as *opener*
1485
        results in functionality similar to passing None).
1486
        """
1487
        if self._fd >= 0:
1488
            # Have to close the existing file first.
1489
            try:
1490
                if self._closefd:
1491
                    os.close(self._fd)
1492
            finally:
1493
                self._fd = -1
1494

1495
        if isinstance(file, float):
1496
            raise TypeError('integer argument expected, got float')
1497
        if isinstance(file, int):
1498
            fd = file
1499
            if fd < 0:
1500
                raise ValueError('negative file descriptor')
1501
        else:
1502
            fd = -1
1503

1504
        if not isinstance(mode, str):
1505
            raise TypeError('invalid mode: %s' % (mode,))
1506
        if not set(mode) <= set('xrwab+'):
1507
            raise ValueError('invalid mode: %s' % (mode,))
1508
        if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1509
            raise ValueError('Must have exactly one of create/read/write/append '
1510
                             'mode and at most one plus')
1511

1512
        if 'x' in mode:
1513
            self._created = True
1514
            self._writable = True
1515
            flags = os.O_EXCL | os.O_CREAT
1516
        elif 'r' in mode:
1517
            self._readable = True
1518
            flags = 0
1519
        elif 'w' in mode:
1520
            self._writable = True
1521
            flags = os.O_CREAT | os.O_TRUNC
1522
        elif 'a' in mode:
1523
            self._writable = True
1524
            self._appending = True
1525
            flags = os.O_APPEND | os.O_CREAT
1526

1527
        if '+' in mode:
1528
            self._readable = True
1529
            self._writable = True
1530

1531
        if self._readable and self._writable:
1532
            flags |= os.O_RDWR
1533
        elif self._readable:
1534
            flags |= os.O_RDONLY
1535
        else:
1536
            flags |= os.O_WRONLY
1537

1538
        flags |= getattr(os, 'O_BINARY', 0)
1539

1540
        noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1541
                          getattr(os, 'O_CLOEXEC', 0))
1542
        flags |= noinherit_flag
1543

1544
        owned_fd = None
1545
        try:
1546
            if fd < 0:
1547
                if not closefd:
1548
                    raise ValueError('Cannot use closefd=False with file name')
1549
                if opener is None:
1550
                    fd = os.open(file, flags, 0o666)
1551
                else:
1552
                    fd = opener(file, flags)
1553
                    if not isinstance(fd, int):
1554
                        raise TypeError('expected integer from opener')
1555
                    if fd < 0:
1556
                        raise OSError('Negative file descriptor')
1557
                owned_fd = fd
1558
                if not noinherit_flag:
1559
                    os.set_inheritable(fd, False)
1560

1561
            self._closefd = closefd
1562
            fdfstat = os.fstat(fd)
1563
            try:
1564
                if stat.S_ISDIR(fdfstat.st_mode):
1565
                    raise IsADirectoryError(errno.EISDIR,
1566
                                            os.strerror(errno.EISDIR), file)
1567
            except AttributeError:
1568
                # Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
1569
                # don't exist.
1570
                pass
1571
            self._blksize = getattr(fdfstat, 'st_blksize', 0)
1572
            if self._blksize <= 1:
1573
                self._blksize = DEFAULT_BUFFER_SIZE
1574

1575
            if _setmode:
1576
                # don't translate newlines (\r\n <=> \n)
1577
                _setmode(fd, os.O_BINARY)
1578

1579
            self.name = file
1580
            if self._appending:
1581
                # For consistent behaviour, we explicitly seek to the
1582
                # end of file (otherwise, it might be done only on the
1583
                # first write()).
1584
                try:
1585
                    os.lseek(fd, 0, SEEK_END)
1586
                except OSError as e:
1587
                    if e.errno != errno.ESPIPE:
1588
                        raise
1589
        except:
1590
            if owned_fd is not None:
1591
                os.close(owned_fd)
1592
            raise
1593
        self._fd = fd
1594

1595
    def __del__(self):
1596
        if self._fd >= 0 and self._closefd and not self.closed:
1597
            import warnings
1598
            warnings.warn('unclosed file %r' % (self,), ResourceWarning,
1599
                          stacklevel=2, source=self)
1600
            self.close()
1601

1602
    def __getstate__(self):
1603
        raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
1604

1605
    def __repr__(self):
1606
        class_name = '%s.%s' % (self.__class__.__module__,
1607
                                self.__class__.__qualname__)
1608
        if self.closed:
1609
            return '<%s [closed]>' % class_name
1610
        try:
1611
            name = self.name
1612
        except AttributeError:
1613
            return ('<%s fd=%d mode=%r closefd=%r>' %
1614
                    (class_name, self._fd, self.mode, self._closefd))
1615
        else:
1616
            return ('<%s name=%r mode=%r closefd=%r>' %
1617
                    (class_name, name, self.mode, self._closefd))
1618

1619
    def _checkReadable(self):
1620
        if not self._readable:
1621
            raise UnsupportedOperation('File not open for reading')
1622

1623
    def _checkWritable(self, msg=None):
1624
        if not self._writable:
1625
            raise UnsupportedOperation('File not open for writing')
1626

1627
    def read(self, size=None):
1628
        """Read at most size bytes, returned as bytes.
1629

1630
        Only makes one system call, so less data may be returned than requested
1631
        In non-blocking mode, returns None if no data is available.
1632
        Return an empty bytes object at EOF.
1633
        """
1634
        self._checkClosed()
1635
        self._checkReadable()
1636
        if size is None or size < 0:
1637
            return self.readall()
1638
        try:
1639
            return os.read(self._fd, size)
1640
        except BlockingIOError:
1641
            return None
1642

1643
    def readall(self):
1644
        """Read all data from the file, returned as bytes.
1645

1646
        In non-blocking mode, returns as much as is immediately available,
1647
        or None if no data is available.  Return an empty bytes object at EOF.
1648
        """
1649
        self._checkClosed()
1650
        self._checkReadable()
1651
        bufsize = DEFAULT_BUFFER_SIZE
1652
        try:
1653
            pos = os.lseek(self._fd, 0, SEEK_CUR)
1654
            end = os.fstat(self._fd).st_size
1655
            if end >= pos:
1656
                bufsize = end - pos + 1
1657
        except OSError:
1658
            pass
1659

1660
        result = bytearray()
1661
        while True:
1662
            if len(result) >= bufsize:
1663
                bufsize = len(result)
1664
                bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1665
            n = bufsize - len(result)
1666
            try:
1667
                chunk = os.read(self._fd, n)
1668
            except BlockingIOError:
1669
                if result:
1670
                    break
1671
                return None
1672
            if not chunk: # reached the end of the file
1673
                break
1674
            result += chunk
1675

1676
        return bytes(result)
1677

1678
    def readinto(self, b):
1679
        """Same as RawIOBase.readinto()."""
1680
        m = memoryview(b).cast('B')
1681
        data = self.read(len(m))
1682
        n = len(data)
1683
        m[:n] = data
1684
        return n
1685

1686
    def write(self, b):
1687
        """Write bytes b to file, return number written.
1688

1689
        Only makes one system call, so not all of the data may be written.
1690
        The number of bytes actually written is returned.  In non-blocking mode,
1691
        returns None if the write would block.
1692
        """
1693
        self._checkClosed()
1694
        self._checkWritable()
1695
        try:
1696
            return os.write(self._fd, b)
1697
        except BlockingIOError:
1698
            return None
1699

1700
    def seek(self, pos, whence=SEEK_SET):
1701
        """Move to new file position.
1702

1703
        Argument offset is a byte count.  Optional argument whence defaults to
1704
        SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1705
        are SEEK_CUR or 1 (move relative to current position, positive or negative),
1706
        and SEEK_END or 2 (move relative to end of file, usually negative, although
1707
        many platforms allow seeking beyond the end of a file).
1708

1709
        Note that not all file objects are seekable.
1710
        """
1711
        if isinstance(pos, float):
1712
            raise TypeError('an integer is required')
1713
        self._checkClosed()
1714
        return os.lseek(self._fd, pos, whence)
1715

1716
    def tell(self):
1717
        """tell() -> int.  Current file position.
1718

1719
        Can raise OSError for non seekable files."""
1720
        self._checkClosed()
1721
        return os.lseek(self._fd, 0, SEEK_CUR)
1722

1723
    def truncate(self, size=None):
1724
        """Truncate the file to at most size bytes.
1725

1726
        Size defaults to the current file position, as returned by tell().
1727
        The current file position is changed to the value of size.
1728
        """
1729
        self._checkClosed()
1730
        self._checkWritable()
1731
        if size is None:
1732
            size = self.tell()
1733
        os.ftruncate(self._fd, size)
1734
        return size
1735

1736
    def close(self):
1737
        """Close the file.
1738

1739
        A closed file cannot be used for further I/O operations.  close() may be
1740
        called more than once without error.
1741
        """
1742
        if not self.closed:
1743
            try:
1744
                if self._closefd:
1745
                    os.close(self._fd)
1746
            finally:
1747
                super().close()
1748

1749
    def seekable(self):
1750
        """True if file supports random-access."""
1751
        self._checkClosed()
1752
        if self._seekable is None:
1753
            try:
1754
                self.tell()
1755
            except OSError:
1756
                self._seekable = False
1757
            else:
1758
                self._seekable = True
1759
        return self._seekable
1760

1761
    def readable(self):
1762
        """True if file was opened in a read mode."""
1763
        self._checkClosed()
1764
        return self._readable
1765

1766
    def writable(self):
1767
        """True if file was opened in a write mode."""
1768
        self._checkClosed()
1769
        return self._writable
1770

1771
    def fileno(self):
1772
        """Return the underlying file descriptor (an integer)."""
1773
        self._checkClosed()
1774
        return self._fd
1775

1776
    def isatty(self):
1777
        """True if the file is connected to a TTY device."""
1778
        self._checkClosed()
1779
        return os.isatty(self._fd)
1780

1781
    @property
1782
    def closefd(self):
1783
        """True if the file descriptor will be closed by close()."""
1784
        return self._closefd
1785

1786
    @property
1787
    def mode(self):
1788
        """String giving the file mode"""
1789
        if self._created:
1790
            if self._readable:
1791
                return 'xb+'
1792
            else:
1793
                return 'xb'
1794
        elif self._appending:
1795
            if self._readable:
1796
                return 'ab+'
1797
            else:
1798
                return 'ab'
1799
        elif self._readable:
1800
            if self._writable:
1801
                return 'rb+'
1802
            else:
1803
                return 'rb'
1804
        else:
1805
            return 'wb'
1806

1807

1808
class TextIOBase(IOBase):
1809

1810
    """Base class for text I/O.
1811

1812
    This class provides a character and line based interface to stream
1813
    I/O.
1814
    """
1815

1816
    def read(self, size=-1):
1817
        """Read at most size characters from stream, where size is an int.
1818

1819
        Read from underlying buffer until we have size characters or we hit EOF.
1820
        If size is negative or omitted, read until EOF.
1821

1822
        Returns a string.
1823
        """
1824
        self._unsupported("read")
1825

1826
    def write(self, s):
1827
        """Write string s to stream and returning an int."""
1828
        self._unsupported("write")
1829

1830
    def truncate(self, pos=None):
1831
        """Truncate size to pos, where pos is an int."""
1832
        self._unsupported("truncate")
1833

1834
    def readline(self):
1835
        """Read until newline or EOF.
1836

1837
        Returns an empty string if EOF is hit immediately.
1838
        """
1839
        self._unsupported("readline")
1840

1841
    def detach(self):
1842
        """
1843
        Separate the underlying buffer from the TextIOBase and return it.
1844

1845
        After the underlying buffer has been detached, the TextIO is in an
1846
        unusable state.
1847
        """
1848
        self._unsupported("detach")
1849

1850
    @property
1851
    def encoding(self):
1852
        """Subclasses should override."""
1853
        return None
1854

1855
    @property
1856
    def newlines(self):
1857
        """Line endings translated so far.
1858

1859
        Only line endings translated during reading are considered.
1860

1861
        Subclasses should override.
1862
        """
1863
        return None
1864

1865
    @property
1866
    def errors(self):
1867
        """Error setting of the decoder or encoder.
1868

1869
        Subclasses should override."""
1870
        return None
1871

1872
io.TextIOBase.register(TextIOBase)
1873

1874

1875
class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1876
    r"""Codec used when reading a file in universal newlines mode.  It wraps
1877
    another incremental decoder, translating \r\n and \r into \n.  It also
1878
    records the types of newlines encountered.  When used with
1879
    translate=False, it ensures that the newline sequence is returned in
1880
    one piece.
1881
    """
1882
    def __init__(self, decoder, translate, errors='strict'):
1883
        codecs.IncrementalDecoder.__init__(self, errors=errors)
1884
        self.translate = translate
1885
        self.decoder = decoder
1886
        self.seennl = 0
1887
        self.pendingcr = False
1888

1889
    def decode(self, input, final=False):
1890
        # decode input (with the eventual \r from a previous pass)
1891
        if self.decoder is None:
1892
            output = input
1893
        else:
1894
            output = self.decoder.decode(input, final=final)
1895
        if self.pendingcr and (output or final):
1896
            output = "\r" + output
1897
            self.pendingcr = False
1898

1899
        # retain last \r even when not translating data:
1900
        # then readline() is sure to get \r\n in one pass
1901
        if output.endswith("\r") and not final:
1902
            output = output[:-1]
1903
            self.pendingcr = True
1904

1905
        # Record which newlines are read
1906
        crlf = output.count('\r\n')
1907
        cr = output.count('\r') - crlf
1908
        lf = output.count('\n') - crlf
1909
        self.seennl |= (lf and self._LF) | (cr and self._CR) \
1910
                    | (crlf and self._CRLF)
1911

1912
        if self.translate:
1913
            if crlf:
1914
                output = output.replace("\r\n", "\n")
1915
            if cr:
1916
                output = output.replace("\r", "\n")
1917

1918
        return output
1919

1920
    def getstate(self):
1921
        if self.decoder is None:
1922
            buf = b""
1923
            flag = 0
1924
        else:
1925
            buf, flag = self.decoder.getstate()
1926
        flag <<= 1
1927
        if self.pendingcr:
1928
            flag |= 1
1929
        return buf, flag
1930

1931
    def setstate(self, state):
1932
        buf, flag = state
1933
        self.pendingcr = bool(flag & 1)
1934
        if self.decoder is not None:
1935
            self.decoder.setstate((buf, flag >> 1))
1936

1937
    def reset(self):
1938
        self.seennl = 0
1939
        self.pendingcr = False
1940
        if self.decoder is not None:
1941
            self.decoder.reset()
1942

1943
    _LF = 1
1944
    _CR = 2
1945
    _CRLF = 4
1946

1947
    @property
1948
    def newlines(self):
1949
        return (None,
1950
                "\n",
1951
                "\r",
1952
                ("\r", "\n"),
1953
                "\r\n",
1954
                ("\n", "\r\n"),
1955
                ("\r", "\r\n"),
1956
                ("\r", "\n", "\r\n")
1957
               )[self.seennl]
1958

1959

1960
class TextIOWrapper(TextIOBase):
1961

1962
    r"""Character and line based layer over a BufferedIOBase object, buffer.
1963

1964
    encoding gives the name of the encoding that the stream will be
1965
    decoded or encoded with. It defaults to locale.getencoding().
1966

1967
    errors determines the strictness of encoding and decoding (see the
1968
    codecs.register) and defaults to "strict".
1969

1970
    newline can be None, '', '\n', '\r', or '\r\n'.  It controls the
1971
    handling of line endings. If it is None, universal newlines is
1972
    enabled.  With this enabled, on input, the lines endings '\n', '\r',
1973
    or '\r\n' are translated to '\n' before being returned to the
1974
    caller. Conversely, on output, '\n' is translated to the system
1975
    default line separator, os.linesep. If newline is any other of its
1976
    legal values, that newline becomes the newline when the file is read
1977
    and it is returned untranslated. On output, '\n' is converted to the
1978
    newline.
1979

1980
    If line_buffering is True, a call to flush is implied when a call to
1981
    write contains a newline character.
1982
    """
1983

1984
    _CHUNK_SIZE = 2048
1985

1986
    # Initialize _buffer as soon as possible since it's used by __del__()
1987
    # which calls close()
1988
    _buffer = None
1989

1990
    # The write_through argument has no effect here since this
1991
    # implementation always writes through.  The argument is present only
1992
    # so that the signature can match the signature of the C version.
1993
    def __init__(self, buffer, encoding=None, errors=None, newline=None,
1994
                 line_buffering=False, write_through=False):
1995
        self._check_newline(newline)
1996
        encoding = text_encoding(encoding)
1997

1998
        if encoding == "locale":
1999
            encoding = self._get_locale_encoding()
2000

2001
        if not isinstance(encoding, str):
2002
            raise ValueError("invalid encoding: %r" % encoding)
2003

2004
        if not codecs.lookup(encoding)._is_text_encoding:
2005
            msg = ("%r is not a text encoding; "
2006
                   "use codecs.open() to handle arbitrary codecs")
2007
            raise LookupError(msg % encoding)
2008

2009
        if errors is None:
2010
            errors = "strict"
2011
        else:
2012
            if not isinstance(errors, str):
2013
                raise ValueError("invalid errors: %r" % errors)
2014
            if _CHECK_ERRORS:
2015
                codecs.lookup_error(errors)
2016

2017
        self._buffer = buffer
2018
        self._decoded_chars = ''  # buffer for text returned from decoder
2019
        self._decoded_chars_used = 0  # offset into _decoded_chars for read()
2020
        self._snapshot = None  # info for reconstructing decoder state
2021
        self._seekable = self._telling = self.buffer.seekable()
2022
        self._has_read1 = hasattr(self.buffer, 'read1')
2023
        self._configure(encoding, errors, newline,
2024
                        line_buffering, write_through)
2025

2026
    def _check_newline(self, newline):
2027
        if newline is not None and not isinstance(newline, str):
2028
            raise TypeError("illegal newline type: %r" % (type(newline),))
2029
        if newline not in (None, "", "\n", "\r", "\r\n"):
2030
            raise ValueError("illegal newline value: %r" % (newline,))
2031

2032
    def _configure(self, encoding=None, errors=None, newline=None,
2033
                   line_buffering=False, write_through=False):
2034
        self._encoding = encoding
2035
        self._errors = errors
2036
        self._encoder = None
2037
        self._decoder = None
2038
        self._b2cratio = 0.0
2039

2040
        self._readuniversal = not newline
2041
        self._readtranslate = newline is None
2042
        self._readnl = newline
2043
        self._writetranslate = newline != ''
2044
        self._writenl = newline or os.linesep
2045

2046
        self._line_buffering = line_buffering
2047
        self._write_through = write_through
2048

2049
        # don't write a BOM in the middle of a file
2050
        if self._seekable and self.writable():
2051
            position = self.buffer.tell()
2052
            if position != 0:
2053
                try:
2054
                    self._get_encoder().setstate(0)
2055
                except LookupError:
2056
                    # Sometimes the encoder doesn't exist
2057
                    pass
2058

2059
    # self._snapshot is either None, or a tuple (dec_flags, next_input)
2060
    # where dec_flags is the second (integer) item of the decoder state
2061
    # and next_input is the chunk of input bytes that comes next after the
2062
    # snapshot point.  We use this to reconstruct decoder states in tell().
2063

2064
    # Naming convention:
2065
    #   - "bytes_..." for integer variables that count input bytes
2066
    #   - "chars_..." for integer variables that count decoded characters
2067

2068
    def __repr__(self):
2069
        result = "<{}.{}".format(self.__class__.__module__,
2070
                                 self.__class__.__qualname__)
2071
        try:
2072
            name = self.name
2073
        except AttributeError:
2074
            pass
2075
        else:
2076
            result += " name={0!r}".format(name)
2077
        try:
2078
            mode = self.mode
2079
        except AttributeError:
2080
            pass
2081
        else:
2082
            result += " mode={0!r}".format(mode)
2083
        return result + " encoding={0!r}>".format(self.encoding)
2084

2085
    @property
2086
    def encoding(self):
2087
        return self._encoding
2088

2089
    @property
2090
    def errors(self):
2091
        return self._errors
2092

2093
    @property
2094
    def line_buffering(self):
2095
        return self._line_buffering
2096

2097
    @property
2098
    def write_through(self):
2099
        return self._write_through
2100

2101
    @property
2102
    def buffer(self):
2103
        return self._buffer
2104

2105
    def reconfigure(self, *,
2106
                    encoding=None, errors=None, newline=Ellipsis,
2107
                    line_buffering=None, write_through=None):
2108
        """Reconfigure the text stream with new parameters.
2109

2110
        This also flushes the stream.
2111
        """
2112
        if (self._decoder is not None
2113
                and (encoding is not None or errors is not None
2114
                     or newline is not Ellipsis)):
2115
            raise UnsupportedOperation(
2116
                "It is not possible to set the encoding or newline of stream "
2117
                "after the first read")
2118

2119
        if errors is None:
2120
            if encoding is None:
2121
                errors = self._errors
2122
            else:
2123
                errors = 'strict'
2124
        elif not isinstance(errors, str):
2125
            raise TypeError("invalid errors: %r" % errors)
2126

2127
        if encoding is None:
2128
            encoding = self._encoding
2129
        else:
2130
            if not isinstance(encoding, str):
2131
                raise TypeError("invalid encoding: %r" % encoding)
2132
            if encoding == "locale":
2133
                encoding = self._get_locale_encoding()
2134

2135
        if newline is Ellipsis:
2136
            newline = self._readnl
2137
        self._check_newline(newline)
2138

2139
        if line_buffering is None:
2140
            line_buffering = self.line_buffering
2141
        if write_through is None:
2142
            write_through = self.write_through
2143

2144
        self.flush()
2145
        self._configure(encoding, errors, newline,
2146
                        line_buffering, write_through)
2147

2148
    def seekable(self):
2149
        if self.closed:
2150
            raise ValueError("I/O operation on closed file.")
2151
        return self._seekable
2152

2153
    def readable(self):
2154
        return self.buffer.readable()
2155

2156
    def writable(self):
2157
        return self.buffer.writable()
2158

2159
    def flush(self):
2160
        self.buffer.flush()
2161
        self._telling = self._seekable
2162

2163
    def close(self):
2164
        if self.buffer is not None and not self.closed:
2165
            try:
2166
                self.flush()
2167
            finally:
2168
                self.buffer.close()
2169

2170
    @property
2171
    def closed(self):
2172
        return self.buffer.closed
2173

2174
    @property
2175
    def name(self):
2176
        return self.buffer.name
2177

2178
    def fileno(self):
2179
        return self.buffer.fileno()
2180

2181
    def isatty(self):
2182
        return self.buffer.isatty()
2183

2184
    def write(self, s):
2185
        'Write data, where s is a str'
2186
        if self.closed:
2187
            raise ValueError("write to closed file")
2188
        if not isinstance(s, str):
2189
            raise TypeError("can't write %s to text stream" %
2190
                            s.__class__.__name__)
2191
        length = len(s)
2192
        haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2193
        if haslf and self._writetranslate and self._writenl != "\n":
2194
            s = s.replace("\n", self._writenl)
2195
        encoder = self._encoder or self._get_encoder()
2196
        # XXX What if we were just reading?
2197
        b = encoder.encode(s)
2198
        self.buffer.write(b)
2199
        if self._line_buffering and (haslf or "\r" in s):
2200
            self.flush()
2201
        self._set_decoded_chars('')
2202
        self._snapshot = None
2203
        if self._decoder:
2204
            self._decoder.reset()
2205
        return length
2206

2207
    def _get_encoder(self):
2208
        make_encoder = codecs.getincrementalencoder(self._encoding)
2209
        self._encoder = make_encoder(self._errors)
2210
        return self._encoder
2211

2212
    def _get_decoder(self):
2213
        make_decoder = codecs.getincrementaldecoder(self._encoding)
2214
        decoder = make_decoder(self._errors)
2215
        if self._readuniversal:
2216
            decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2217
        self._decoder = decoder
2218
        return decoder
2219

2220
    # The following three methods implement an ADT for _decoded_chars.
2221
    # Text returned from the decoder is buffered here until the client
2222
    # requests it by calling our read() or readline() method.
2223
    def _set_decoded_chars(self, chars):
2224
        """Set the _decoded_chars buffer."""
2225
        self._decoded_chars = chars
2226
        self._decoded_chars_used = 0
2227

2228
    def _get_decoded_chars(self, n=None):
2229
        """Advance into the _decoded_chars buffer."""
2230
        offset = self._decoded_chars_used
2231
        if n is None:
2232
            chars = self._decoded_chars[offset:]
2233
        else:
2234
            chars = self._decoded_chars[offset:offset + n]
2235
        self._decoded_chars_used += len(chars)
2236
        return chars
2237

2238
    def _get_locale_encoding(self):
2239
        try:
2240
            import locale
2241
        except ImportError:
2242
            # Importing locale may fail if Python is being built
2243
            return "utf-8"
2244
        else:
2245
            return locale.getencoding()
2246

2247
    def _rewind_decoded_chars(self, n):
2248
        """Rewind the _decoded_chars buffer."""
2249
        if self._decoded_chars_used < n:
2250
            raise AssertionError("rewind decoded_chars out of bounds")
2251
        self._decoded_chars_used -= n
2252

2253
    def _read_chunk(self):
2254
        """
2255
        Read and decode the next chunk of data from the BufferedReader.
2256
        """
2257

2258
        # The return value is True unless EOF was reached.  The decoded
2259
        # string is placed in self._decoded_chars (replacing its previous
2260
        # value).  The entire input chunk is sent to the decoder, though
2261
        # some of it may remain buffered in the decoder, yet to be
2262
        # converted.
2263

2264
        if self._decoder is None:
2265
            raise ValueError("no decoder")
2266

2267
        if self._telling:
2268
            # To prepare for tell(), we need to snapshot a point in the
2269
            # file where the decoder's input buffer is empty.
2270

2271
            dec_buffer, dec_flags = self._decoder.getstate()
2272
            # Given this, we know there was a valid snapshot point
2273
            # len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2274

2275
        # Read a chunk, decode it, and put the result in self._decoded_chars.
2276
        if self._has_read1:
2277
            input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2278
        else:
2279
            input_chunk = self.buffer.read(self._CHUNK_SIZE)
2280
        eof = not input_chunk
2281
        decoded_chars = self._decoder.decode(input_chunk, eof)
2282
        self._set_decoded_chars(decoded_chars)
2283
        if decoded_chars:
2284
            self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2285
        else:
2286
            self._b2cratio = 0.0
2287

2288
        if self._telling:
2289
            # At the snapshot point, len(dec_buffer) bytes before the read,
2290
            # the next input to be decoded is dec_buffer + input_chunk.
2291
            self._snapshot = (dec_flags, dec_buffer + input_chunk)
2292

2293
        return not eof
2294

2295
    def _pack_cookie(self, position, dec_flags=0,
2296
                           bytes_to_feed=0, need_eof=False, chars_to_skip=0):
2297
        # The meaning of a tell() cookie is: seek to position, set the
2298
        # decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2299
        # into the decoder with need_eof as the EOF flag, then skip
2300
        # chars_to_skip characters of the decoded result.  For most simple
2301
        # decoders, tell() will often just give a byte offset in the file.
2302
        return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2303
               (chars_to_skip<<192) | bool(need_eof)<<256)
2304

2305
    def _unpack_cookie(self, bigint):
2306
        rest, position = divmod(bigint, 1<<64)
2307
        rest, dec_flags = divmod(rest, 1<<64)
2308
        rest, bytes_to_feed = divmod(rest, 1<<64)
2309
        need_eof, chars_to_skip = divmod(rest, 1<<64)
2310
        return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip
2311

2312
    def tell(self):
2313
        if not self._seekable:
2314
            raise UnsupportedOperation("underlying stream is not seekable")
2315
        if not self._telling:
2316
            raise OSError("telling position disabled by next() call")
2317
        self.flush()
2318
        position = self.buffer.tell()
2319
        decoder = self._decoder
2320
        if decoder is None or self._snapshot is None:
2321
            if self._decoded_chars:
2322
                # This should never happen.
2323
                raise AssertionError("pending decoded text")
2324
            return position
2325

2326
        # Skip backward to the snapshot point (see _read_chunk).
2327
        dec_flags, next_input = self._snapshot
2328
        position -= len(next_input)
2329

2330
        # How many decoded characters have been used up since the snapshot?
2331
        chars_to_skip = self._decoded_chars_used
2332
        if chars_to_skip == 0:
2333
            # We haven't moved from the snapshot point.
2334
            return self._pack_cookie(position, dec_flags)
2335

2336
        # Starting from the snapshot position, we will walk the decoder
2337
        # forward until it gives us enough decoded characters.
2338
        saved_state = decoder.getstate()
2339
        try:
2340
            # Fast search for an acceptable start point, close to our
2341
            # current pos.
2342
            # Rationale: calling decoder.decode() has a large overhead
2343
            # regardless of chunk size; we want the number of such calls to
2344
            # be O(1) in most situations (common decoders, sensible input).
2345
            # Actually, it will be exactly 1 for fixed-size codecs (all
2346
            # 8-bit codecs, also UTF-16 and UTF-32).
2347
            skip_bytes = int(self._b2cratio * chars_to_skip)
2348
            skip_back = 1
2349
            assert skip_bytes <= len(next_input)
2350
            while skip_bytes > 0:
2351
                decoder.setstate((b'', dec_flags))
2352
                # Decode up to temptative start point
2353
                n = len(decoder.decode(next_input[:skip_bytes]))
2354
                if n <= chars_to_skip:
2355
                    b, d = decoder.getstate()
2356
                    if not b:
2357
                        # Before pos and no bytes buffered in decoder => OK
2358
                        dec_flags = d
2359
                        chars_to_skip -= n
2360
                        break
2361
                    # Skip back by buffered amount and reset heuristic
2362
                    skip_bytes -= len(b)
2363
                    skip_back = 1
2364
                else:
2365
                    # We're too far ahead, skip back a bit
2366
                    skip_bytes -= skip_back
2367
                    skip_back = skip_back * 2
2368
            else:
2369
                skip_bytes = 0
2370
                decoder.setstate((b'', dec_flags))
2371

2372
            # Note our initial start point.
2373
            start_pos = position + skip_bytes
2374
            start_flags = dec_flags
2375
            if chars_to_skip == 0:
2376
                # We haven't moved from the start point.
2377
                return self._pack_cookie(start_pos, start_flags)
2378

2379
            # Feed the decoder one byte at a time.  As we go, note the
2380
            # nearest "safe start point" before the current location
2381
            # (a point where the decoder has nothing buffered, so seek()
2382
            # can safely start from there and advance to this location).
2383
            bytes_fed = 0
2384
            need_eof = False
2385
            # Chars decoded since `start_pos`
2386
            chars_decoded = 0
2387
            for i in range(skip_bytes, len(next_input)):
2388
                bytes_fed += 1
2389
                chars_decoded += len(decoder.decode(next_input[i:i+1]))
2390
                dec_buffer, dec_flags = decoder.getstate()
2391
                if not dec_buffer and chars_decoded <= chars_to_skip:
2392
                    # Decoder buffer is empty, so this is a safe start point.
2393
                    start_pos += bytes_fed
2394
                    chars_to_skip -= chars_decoded
2395
                    start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2396
                if chars_decoded >= chars_to_skip:
2397
                    break
2398
            else:
2399
                # We didn't get enough decoded data; signal EOF to get more.
2400
                chars_decoded += len(decoder.decode(b'', final=True))
2401
                need_eof = True
2402
                if chars_decoded < chars_to_skip:
2403
                    raise OSError("can't reconstruct logical file position")
2404

2405
            # The returned cookie corresponds to the last safe start point.
2406
            return self._pack_cookie(
2407
                start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2408
        finally:
2409
            decoder.setstate(saved_state)
2410

2411
    def truncate(self, pos=None):
2412
        self.flush()
2413
        if pos is None:
2414
            pos = self.tell()
2415
        return self.buffer.truncate(pos)
2416

2417
    def detach(self):
2418
        if self.buffer is None:
2419
            raise ValueError("buffer is already detached")
2420
        self.flush()
2421
        buffer = self._buffer
2422
        self._buffer = None
2423
        return buffer
2424

2425
    def seek(self, cookie, whence=0):
2426
        def _reset_encoder(position):
2427
            """Reset the encoder (merely useful for proper BOM handling)"""
2428
            try:
2429
                encoder = self._encoder or self._get_encoder()
2430
            except LookupError:
2431
                # Sometimes the encoder doesn't exist
2432
                pass
2433
            else:
2434
                if position != 0:
2435
                    encoder.setstate(0)
2436
                else:
2437
                    encoder.reset()
2438

2439
        if self.closed:
2440
            raise ValueError("tell on closed file")
2441
        if not self._seekable:
2442
            raise UnsupportedOperation("underlying stream is not seekable")
2443
        if whence == SEEK_CUR:
2444
            if cookie != 0:
2445
                raise UnsupportedOperation("can't do nonzero cur-relative seeks")
2446
            # Seeking to the current position should attempt to
2447
            # sync the underlying buffer with the current position.
2448
            whence = 0
2449
            cookie = self.tell()
2450
        elif whence == SEEK_END:
2451
            if cookie != 0:
2452
                raise UnsupportedOperation("can't do nonzero end-relative seeks")
2453
            self.flush()
2454
            position = self.buffer.seek(0, whence)
2455
            self._set_decoded_chars('')
2456
            self._snapshot = None
2457
            if self._decoder:
2458
                self._decoder.reset()
2459
            _reset_encoder(position)
2460
            return position
2461
        if whence != 0:
2462
            raise ValueError("unsupported whence (%r)" % (whence,))
2463
        if cookie < 0:
2464
            raise ValueError("negative seek position %r" % (cookie,))
2465
        self.flush()
2466

2467
        # The strategy of seek() is to go back to the safe start point
2468
        # and replay the effect of read(chars_to_skip) from there.
2469
        start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2470
            self._unpack_cookie(cookie)
2471

2472
        # Seek back to the safe start point.
2473
        self.buffer.seek(start_pos)
2474
        self._set_decoded_chars('')
2475
        self._snapshot = None
2476

2477
        # Restore the decoder to its state from the safe start point.
2478
        if cookie == 0 and self._decoder:
2479
            self._decoder.reset()
2480
        elif self._decoder or dec_flags or chars_to_skip:
2481
            self._decoder = self._decoder or self._get_decoder()
2482
            self._decoder.setstate((b'', dec_flags))
2483
            self._snapshot = (dec_flags, b'')
2484

2485
        if chars_to_skip:
2486
            # Just like _read_chunk, feed the decoder and save a snapshot.
2487
            input_chunk = self.buffer.read(bytes_to_feed)
2488
            self._set_decoded_chars(
2489
                self._decoder.decode(input_chunk, need_eof))
2490
            self._snapshot = (dec_flags, input_chunk)
2491

2492
            # Skip chars_to_skip of the decoded characters.
2493
            if len(self._decoded_chars) < chars_to_skip:
2494
                raise OSError("can't restore logical file position")
2495
            self._decoded_chars_used = chars_to_skip
2496

2497
        _reset_encoder(cookie)
2498
        return cookie
2499

2500
    def read(self, size=None):
2501
        self._checkReadable()
2502
        if size is None:
2503
            size = -1
2504
        else:
2505
            try:
2506
                size_index = size.__index__
2507
            except AttributeError:
2508
                raise TypeError(f"{size!r} is not an integer")
2509
            else:
2510
                size = size_index()
2511
        decoder = self._decoder or self._get_decoder()
2512
        if size < 0:
2513
            # Read everything.
2514
            result = (self._get_decoded_chars() +
2515
                      decoder.decode(self.buffer.read(), final=True))
2516
            self._set_decoded_chars('')
2517
            self._snapshot = None
2518
            return result
2519
        else:
2520
            # Keep reading chunks until we have size characters to return.
2521
            eof = False
2522
            result = self._get_decoded_chars(size)
2523
            while len(result) < size and not eof:
2524
                eof = not self._read_chunk()
2525
                result += self._get_decoded_chars(size - len(result))
2526
            return result
2527

2528
    def __next__(self):
2529
        self._telling = False
2530
        line = self.readline()
2531
        if not line:
2532
            self._snapshot = None
2533
            self._telling = self._seekable
2534
            raise StopIteration
2535
        return line
2536

2537
    def readline(self, size=None):
2538
        if self.closed:
2539
            raise ValueError("read from closed file")
2540
        if size is None:
2541
            size = -1
2542
        else:
2543
            try:
2544
                size_index = size.__index__
2545
            except AttributeError:
2546
                raise TypeError(f"{size!r} is not an integer")
2547
            else:
2548
                size = size_index()
2549

2550
        # Grab all the decoded text (we will rewind any extra bits later).
2551
        line = self._get_decoded_chars()
2552

2553
        start = 0
2554
        # Make the decoder if it doesn't already exist.
2555
        if not self._decoder:
2556
            self._get_decoder()
2557

2558
        pos = endpos = None
2559
        while True:
2560
            if self._readtranslate:
2561
                # Newlines are already translated, only search for \n
2562
                pos = line.find('\n', start)
2563
                if pos >= 0:
2564
                    endpos = pos + 1
2565
                    break
2566
                else:
2567
                    start = len(line)
2568

2569
            elif self._readuniversal:
2570
                # Universal newline search. Find any of \r, \r\n, \n
2571
                # The decoder ensures that \r\n are not split in two pieces
2572

2573
                # In C we'd look for these in parallel of course.
2574
                nlpos = line.find("\n", start)
2575
                crpos = line.find("\r", start)
2576
                if crpos == -1:
2577
                    if nlpos == -1:
2578
                        # Nothing found
2579
                        start = len(line)
2580
                    else:
2581
                        # Found \n
2582
                        endpos = nlpos + 1
2583
                        break
2584
                elif nlpos == -1:
2585
                    # Found lone \r
2586
                    endpos = crpos + 1
2587
                    break
2588
                elif nlpos < crpos:
2589
                    # Found \n
2590
                    endpos = nlpos + 1
2591
                    break
2592
                elif nlpos == crpos + 1:
2593
                    # Found \r\n
2594
                    endpos = crpos + 2
2595
                    break
2596
                else:
2597
                    # Found \r
2598
                    endpos = crpos + 1
2599
                    break
2600
            else:
2601
                # non-universal
2602
                pos = line.find(self._readnl)
2603
                if pos >= 0:
2604
                    endpos = pos + len(self._readnl)
2605
                    break
2606

2607
            if size >= 0 and len(line) >= size:
2608
                endpos = size  # reached length size
2609
                break
2610

2611
            # No line ending seen yet - get more data'
2612
            while self._read_chunk():
2613
                if self._decoded_chars:
2614
                    break
2615
            if self._decoded_chars:
2616
                line += self._get_decoded_chars()
2617
            else:
2618
                # end of file
2619
                self._set_decoded_chars('')
2620
                self._snapshot = None
2621
                return line
2622

2623
        if size >= 0 and endpos > size:
2624
            endpos = size  # don't exceed size
2625

2626
        # Rewind _decoded_chars to just after the line ending we found.
2627
        self._rewind_decoded_chars(len(line) - endpos)
2628
        return line[:endpos]
2629

2630
    @property
2631
    def newlines(self):
2632
        return self._decoder.newlines if self._decoder else None
2633

2634

2635
class StringIO(TextIOWrapper):
2636
    """Text I/O implementation using an in-memory buffer.
2637

2638
    The initial_value argument sets the value of object.  The newline
2639
    argument is like the one of TextIOWrapper's constructor.
2640
    """
2641

2642
    def __init__(self, initial_value="", newline="\n"):
2643
        super(StringIO, self).__init__(BytesIO(),
2644
                                       encoding="utf-8",
2645
                                       errors="surrogatepass",
2646
                                       newline=newline)
2647
        # Issue #5645: make universal newlines semantics the same as in the
2648
        # C version, even under Windows.
2649
        if newline is None:
2650
            self._writetranslate = False
2651
        if initial_value is not None:
2652
            if not isinstance(initial_value, str):
2653
                raise TypeError("initial_value must be str or None, not {0}"
2654
                                .format(type(initial_value).__name__))
2655
            self.write(initial_value)
2656
            self.seek(0)
2657

2658
    def getvalue(self):
2659
        self.flush()
2660
        decoder = self._decoder or self._get_decoder()
2661
        old_state = decoder.getstate()
2662
        decoder.reset()
2663
        try:
2664
            return decoder.decode(self.buffer.getvalue(), final=True)
2665
        finally:
2666
            decoder.setstate(old_state)
2667

2668
    def __repr__(self):
2669
        # TextIOWrapper tells the encoding in its repr. In StringIO,
2670
        # that's an implementation detail.
2671
        return object.__repr__(self)
2672

2673
    @property
2674
    def errors(self):
2675
        return None
2676

2677
    @property
2678
    def encoding(self):
2679
        return None
2680

2681
    def detach(self):
2682
        # This doesn't make sense on StringIO.
2683
        self._unsupported("detach")
2684

2685
Product

Resources

Company