Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Lib/_pyio.py
12 views
1
"""
2
Python implementation of the io module.
3
"""
4
5
import os
6
import abc
7
import codecs
8
import errno
9
import stat
10
import sys
11
# Import _thread instead of threading to reduce startup cost
12
from _thread import allocate_lock as Lock
13
if sys.platform in {'win32', 'cygwin'}:
14
from msvcrt import setmode as _setmode
15
else:
16
_setmode = None
17
18
import io
19
from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
20
21
valid_seek_flags = {0, 1, 2} # Hardwired values
22
if hasattr(os, 'SEEK_HOLE') :
23
valid_seek_flags.add(os.SEEK_HOLE)
24
valid_seek_flags.add(os.SEEK_DATA)
25
26
# open() uses st_blksize whenever we can
27
DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
28
29
# NOTE: Base classes defined here are registered with the "official" ABCs
30
# defined in io.py. We don't use real inheritance though, because we don't want
31
# to inherit the C implementations.
32
33
# Rebind for compatibility
34
BlockingIOError = BlockingIOError
35
36
# Does open() check its 'errors' argument?
37
_CHECK_ERRORS = (hasattr(sys, "gettotalrefcount") or sys.flags.dev_mode)
38
39
40
def text_encoding(encoding, stacklevel=2):
41
"""
42
A helper function to choose the text encoding.
43
44
When encoding is not None, this function returns it.
45
Otherwise, this function returns the default text encoding
46
(i.e. "locale" or "utf-8" depends on UTF-8 mode).
47
48
This function emits an EncodingWarning if *encoding* is None and
49
sys.flags.warn_default_encoding is true.
50
51
This can be used in APIs with an encoding=None parameter
52
that pass it to TextIOWrapper or open.
53
However, please consider using encoding="utf-8" for new APIs.
54
"""
55
if encoding is None:
56
if sys.flags.utf8_mode:
57
encoding = "utf-8"
58
else:
59
encoding = "locale"
60
if sys.flags.warn_default_encoding:
61
import warnings
62
warnings.warn("'encoding' argument not specified.",
63
EncodingWarning, stacklevel + 1)
64
return encoding
65
66
67
# Wrapper for builtins.open
68
#
69
# Trick so that open() won't become a bound method when stored
70
# as a class variable (as dbm.dumb does).
71
#
72
# See init_set_builtins_open() in Python/pylifecycle.c.
73
@staticmethod
74
def open(file, mode="r", buffering=-1, encoding=None, errors=None,
75
newline=None, closefd=True, opener=None):
76
77
r"""Open file and return a stream. Raise OSError upon failure.
78
79
file is either a text or byte string giving the name (and the path
80
if the file isn't in the current working directory) of the file to
81
be opened or an integer file descriptor of the file to be
82
wrapped. (If a file descriptor is given, it is closed when the
83
returned I/O object is closed, unless closefd is set to False.)
84
85
mode is an optional string that specifies the mode in which the file is
86
opened. It defaults to 'r' which means open for reading in text mode. Other
87
common values are 'w' for writing (truncating the file if it already
88
exists), 'x' for exclusive creation of a new file, and 'a' for appending
89
(which on some Unix systems, means that all writes append to the end of the
90
file regardless of the current seek position). In text mode, if encoding is
91
not specified the encoding used is platform dependent. (For reading and
92
writing raw bytes use binary mode and leave encoding unspecified.) The
93
available modes are:
94
95
========= ===============================================================
96
Character Meaning
97
--------- ---------------------------------------------------------------
98
'r' open for reading (default)
99
'w' open for writing, truncating the file first
100
'x' create a new file and open it for writing
101
'a' open for writing, appending to the end of the file if it exists
102
'b' binary mode
103
't' text mode (default)
104
'+' open a disk file for updating (reading and writing)
105
========= ===============================================================
106
107
The default mode is 'rt' (open for reading text). For binary random
108
access, the mode 'w+b' opens and truncates the file to 0 bytes, while
109
'r+b' opens the file without truncation. The 'x' mode implies 'w' and
110
raises an `FileExistsError` if the file already exists.
111
112
Python distinguishes between files opened in binary and text modes,
113
even when the underlying operating system doesn't. Files opened in
114
binary mode (appending 'b' to the mode argument) return contents as
115
bytes objects without any decoding. In text mode (the default, or when
116
't' is appended to the mode argument), the contents of the file are
117
returned as strings, the bytes having been first decoded using a
118
platform-dependent encoding or using the specified encoding if given.
119
120
buffering is an optional integer used to set the buffering policy.
121
Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
122
line buffering (only usable in text mode), and an integer > 1 to indicate
123
the size of a fixed-size chunk buffer. When no buffering argument is
124
given, the default buffering policy works as follows:
125
126
* Binary files are buffered in fixed-size chunks; the size of the buffer
127
is chosen using a heuristic trying to determine the underlying device's
128
"block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
129
On many systems, the buffer will typically be 4096 or 8192 bytes long.
130
131
* "Interactive" text files (files for which isatty() returns True)
132
use line buffering. Other text files use the policy described above
133
for binary files.
134
135
encoding is the str name of the encoding used to decode or encode the
136
file. This should only be used in text mode. The default encoding is
137
platform dependent, but any encoding supported by Python can be
138
passed. See the codecs module for the list of supported encodings.
139
140
errors is an optional string that specifies how encoding errors are to
141
be handled---this argument should not be used in binary mode. Pass
142
'strict' to raise a ValueError exception if there is an encoding error
143
(the default of None has the same effect), or pass 'ignore' to ignore
144
errors. (Note that ignoring encoding errors can lead to data loss.)
145
See the documentation for codecs.register for a list of the permitted
146
encoding error strings.
147
148
newline is a string controlling how universal newlines works (it only
149
applies to text mode). It can be None, '', '\n', '\r', and '\r\n'. It works
150
as follows:
151
152
* On input, if newline is None, universal newlines mode is
153
enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
154
these are translated into '\n' before being returned to the
155
caller. If it is '', universal newline mode is enabled, but line
156
endings are returned to the caller untranslated. If it has any of
157
the other legal values, input lines are only terminated by the given
158
string, and the line ending is returned to the caller untranslated.
159
160
* On output, if newline is None, any '\n' characters written are
161
translated to the system default line separator, os.linesep. If
162
newline is '', no translation takes place. If newline is any of the
163
other legal values, any '\n' characters written are translated to
164
the given string.
165
166
closedfd is a bool. If closefd is False, the underlying file descriptor will
167
be kept open when the file is closed. This does not work when a file name is
168
given and must be True in that case.
169
170
The newly created file is non-inheritable.
171
172
A custom opener can be used by passing a callable as *opener*. The
173
underlying file descriptor for the file object is then obtained by calling
174
*opener* with (*file*, *flags*). *opener* must return an open file
175
descriptor (passing os.open as *opener* results in functionality similar to
176
passing None).
177
178
open() returns a file object whose type depends on the mode, and
179
through which the standard file operations such as reading and writing
180
are performed. When open() is used to open a file in a text mode ('w',
181
'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
182
a file in a binary mode, the returned class varies: in read binary
183
mode, it returns a BufferedReader; in write binary and append binary
184
modes, it returns a BufferedWriter, and in read/write mode, it returns
185
a BufferedRandom.
186
187
It is also possible to use a string or bytearray as a file for both
188
reading and writing. For strings StringIO can be used like a file
189
opened in a text mode, and for bytes a BytesIO can be used like a file
190
opened in a binary mode.
191
"""
192
if not isinstance(file, int):
193
file = os.fspath(file)
194
if not isinstance(file, (str, bytes, int)):
195
raise TypeError("invalid file: %r" % file)
196
if not isinstance(mode, str):
197
raise TypeError("invalid mode: %r" % mode)
198
if not isinstance(buffering, int):
199
raise TypeError("invalid buffering: %r" % buffering)
200
if encoding is not None and not isinstance(encoding, str):
201
raise TypeError("invalid encoding: %r" % encoding)
202
if errors is not None and not isinstance(errors, str):
203
raise TypeError("invalid errors: %r" % errors)
204
modes = set(mode)
205
if modes - set("axrwb+t") or len(mode) > len(modes):
206
raise ValueError("invalid mode: %r" % mode)
207
creating = "x" in modes
208
reading = "r" in modes
209
writing = "w" in modes
210
appending = "a" in modes
211
updating = "+" in modes
212
text = "t" in modes
213
binary = "b" in modes
214
if text and binary:
215
raise ValueError("can't have text and binary mode at once")
216
if creating + reading + writing + appending > 1:
217
raise ValueError("can't have read/write/append mode at once")
218
if not (creating or reading or writing or appending):
219
raise ValueError("must have exactly one of read/write/append mode")
220
if binary and encoding is not None:
221
raise ValueError("binary mode doesn't take an encoding argument")
222
if binary and errors is not None:
223
raise ValueError("binary mode doesn't take an errors argument")
224
if binary and newline is not None:
225
raise ValueError("binary mode doesn't take a newline argument")
226
if binary and buffering == 1:
227
import warnings
228
warnings.warn("line buffering (buffering=1) isn't supported in binary "
229
"mode, the default buffer size will be used",
230
RuntimeWarning, 2)
231
raw = FileIO(file,
232
(creating and "x" or "") +
233
(reading and "r" or "") +
234
(writing and "w" or "") +
235
(appending and "a" or "") +
236
(updating and "+" or ""),
237
closefd, opener=opener)
238
result = raw
239
try:
240
line_buffering = False
241
if buffering == 1 or buffering < 0 and raw.isatty():
242
buffering = -1
243
line_buffering = True
244
if buffering < 0:
245
buffering = DEFAULT_BUFFER_SIZE
246
try:
247
bs = os.fstat(raw.fileno()).st_blksize
248
except (OSError, AttributeError):
249
pass
250
else:
251
if bs > 1:
252
buffering = bs
253
if buffering < 0:
254
raise ValueError("invalid buffering size")
255
if buffering == 0:
256
if binary:
257
return result
258
raise ValueError("can't have unbuffered text I/O")
259
if updating:
260
buffer = BufferedRandom(raw, buffering)
261
elif creating or writing or appending:
262
buffer = BufferedWriter(raw, buffering)
263
elif reading:
264
buffer = BufferedReader(raw, buffering)
265
else:
266
raise ValueError("unknown mode: %r" % mode)
267
result = buffer
268
if binary:
269
return result
270
encoding = text_encoding(encoding)
271
text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
272
result = text
273
text.mode = mode
274
return result
275
except:
276
result.close()
277
raise
278
279
# Define a default pure-Python implementation for open_code()
280
# that does not allow hooks. Warn on first use. Defined for tests.
281
def _open_code_with_warning(path):
282
"""Opens the provided file with mode ``'rb'``. This function
283
should be used when the intent is to treat the contents as
284
executable code.
285
286
``path`` should be an absolute path.
287
288
When supported by the runtime, this function can be hooked
289
in order to allow embedders more control over code files.
290
This functionality is not supported on the current runtime.
291
"""
292
import warnings
293
warnings.warn("_pyio.open_code() may not be using hooks",
294
RuntimeWarning, 2)
295
return open(path, "rb")
296
297
try:
298
open_code = io.open_code
299
except AttributeError:
300
open_code = _open_code_with_warning
301
302
303
# In normal operation, both `UnsupportedOperation`s should be bound to the
304
# same object.
305
try:
306
UnsupportedOperation = io.UnsupportedOperation
307
except AttributeError:
308
class UnsupportedOperation(OSError, ValueError):
309
pass
310
311
312
class IOBase(metaclass=abc.ABCMeta):
313
314
"""The abstract base class for all I/O classes.
315
316
This class provides dummy implementations for many methods that
317
derived classes can override selectively; the default implementations
318
represent a file that cannot be read, written or seeked.
319
320
Even though IOBase does not declare read or write because
321
their signatures will vary, implementations and clients should
322
consider those methods part of the interface. Also, implementations
323
may raise UnsupportedOperation when operations they do not support are
324
called.
325
326
The basic type used for binary data read from or written to a file is
327
bytes. Other bytes-like objects are accepted as method arguments too.
328
Text I/O classes work with str data.
329
330
Note that calling any method (even inquiries) on a closed stream is
331
undefined. Implementations may raise OSError in this case.
332
333
IOBase (and its subclasses) support the iterator protocol, meaning
334
that an IOBase object can be iterated over yielding the lines in a
335
stream.
336
337
IOBase also supports the :keyword:`with` statement. In this example,
338
fp is closed after the suite of the with statement is complete:
339
340
with open('spam.txt', 'r') as fp:
341
fp.write('Spam and eggs!')
342
"""
343
344
### Internal ###
345
346
def _unsupported(self, name):
347
"""Internal: raise an OSError exception for unsupported operations."""
348
raise UnsupportedOperation("%s.%s() not supported" %
349
(self.__class__.__name__, name))
350
351
### Positioning ###
352
353
def seek(self, pos, whence=0):
354
"""Change stream position.
355
356
Change the stream position to byte offset pos. Argument pos is
357
interpreted relative to the position indicated by whence. Values
358
for whence are ints:
359
360
* 0 -- start of stream (the default); offset should be zero or positive
361
* 1 -- current stream position; offset may be negative
362
* 2 -- end of stream; offset is usually negative
363
Some operating systems / file systems could provide additional values.
364
365
Return an int indicating the new absolute position.
366
"""
367
self._unsupported("seek")
368
369
def tell(self):
370
"""Return an int indicating the current stream position."""
371
return self.seek(0, 1)
372
373
def truncate(self, pos=None):
374
"""Truncate file to size bytes.
375
376
Size defaults to the current IO position as reported by tell(). Return
377
the new size.
378
"""
379
self._unsupported("truncate")
380
381
### Flush and close ###
382
383
def flush(self):
384
"""Flush write buffers, if applicable.
385
386
This is not implemented for read-only and non-blocking streams.
387
"""
388
self._checkClosed()
389
# XXX Should this return the number of bytes written???
390
391
__closed = False
392
393
def close(self):
394
"""Flush and close the IO object.
395
396
This method has no effect if the file is already closed.
397
"""
398
if not self.__closed:
399
try:
400
self.flush()
401
finally:
402
self.__closed = True
403
404
def __del__(self):
405
"""Destructor. Calls close()."""
406
try:
407
closed = self.closed
408
except AttributeError:
409
# If getting closed fails, then the object is probably
410
# in an unusable state, so ignore.
411
return
412
413
if closed:
414
return
415
416
# If close() fails, the caller logs the exception with
417
# sys.unraisablehook. close() must be called at the end at __del__().
418
self.close()
419
420
### Inquiries ###
421
422
def seekable(self):
423
"""Return a bool indicating whether object supports random access.
424
425
If False, seek(), tell() and truncate() will raise OSError.
426
This method may need to do a test seek().
427
"""
428
return False
429
430
def _checkSeekable(self, msg=None):
431
"""Internal: raise UnsupportedOperation if file is not seekable
432
"""
433
if not self.seekable():
434
raise UnsupportedOperation("File or stream is not seekable."
435
if msg is None else msg)
436
437
def readable(self):
438
"""Return a bool indicating whether object was opened for reading.
439
440
If False, read() will raise OSError.
441
"""
442
return False
443
444
def _checkReadable(self, msg=None):
445
"""Internal: raise UnsupportedOperation if file is not readable
446
"""
447
if not self.readable():
448
raise UnsupportedOperation("File or stream is not readable."
449
if msg is None else msg)
450
451
def writable(self):
452
"""Return a bool indicating whether object was opened for writing.
453
454
If False, write() and truncate() will raise OSError.
455
"""
456
return False
457
458
def _checkWritable(self, msg=None):
459
"""Internal: raise UnsupportedOperation if file is not writable
460
"""
461
if not self.writable():
462
raise UnsupportedOperation("File or stream is not writable."
463
if msg is None else msg)
464
465
@property
466
def closed(self):
467
"""closed: bool. True iff the file has been closed.
468
469
For backwards compatibility, this is a property, not a predicate.
470
"""
471
return self.__closed
472
473
def _checkClosed(self, msg=None):
474
"""Internal: raise a ValueError if file is closed
475
"""
476
if self.closed:
477
raise ValueError("I/O operation on closed file."
478
if msg is None else msg)
479
480
### Context manager ###
481
482
def __enter__(self): # That's a forward reference
483
"""Context management protocol. Returns self (an instance of IOBase)."""
484
self._checkClosed()
485
return self
486
487
def __exit__(self, *args):
488
"""Context management protocol. Calls close()"""
489
self.close()
490
491
### Lower-level APIs ###
492
493
# XXX Should these be present even if unimplemented?
494
495
def fileno(self):
496
"""Returns underlying file descriptor (an int) if one exists.
497
498
An OSError is raised if the IO object does not use a file descriptor.
499
"""
500
self._unsupported("fileno")
501
502
def isatty(self):
503
"""Return a bool indicating whether this is an 'interactive' stream.
504
505
Return False if it can't be determined.
506
"""
507
self._checkClosed()
508
return False
509
510
### Readline[s] and writelines ###
511
512
def readline(self, size=-1):
513
r"""Read and return a line of bytes from the stream.
514
515
If size is specified, at most size bytes will be read.
516
Size should be an int.
517
518
The line terminator is always b'\n' for binary files; for text
519
files, the newlines argument to open can be used to select the line
520
terminator(s) recognized.
521
"""
522
# For backwards compatibility, a (slowish) readline().
523
if hasattr(self, "peek"):
524
def nreadahead():
525
readahead = self.peek(1)
526
if not readahead:
527
return 1
528
n = (readahead.find(b"\n") + 1) or len(readahead)
529
if size >= 0:
530
n = min(n, size)
531
return n
532
else:
533
def nreadahead():
534
return 1
535
if size is None:
536
size = -1
537
else:
538
try:
539
size_index = size.__index__
540
except AttributeError:
541
raise TypeError(f"{size!r} is not an integer")
542
else:
543
size = size_index()
544
res = bytearray()
545
while size < 0 or len(res) < size:
546
b = self.read(nreadahead())
547
if not b:
548
break
549
res += b
550
if res.endswith(b"\n"):
551
break
552
return bytes(res)
553
554
def __iter__(self):
555
self._checkClosed()
556
return self
557
558
def __next__(self):
559
line = self.readline()
560
if not line:
561
raise StopIteration
562
return line
563
564
def readlines(self, hint=None):
565
"""Return a list of lines from the stream.
566
567
hint can be specified to control the number of lines read: no more
568
lines will be read if the total size (in bytes/characters) of all
569
lines so far exceeds hint.
570
"""
571
if hint is None or hint <= 0:
572
return list(self)
573
n = 0
574
lines = []
575
for line in self:
576
lines.append(line)
577
n += len(line)
578
if n >= hint:
579
break
580
return lines
581
582
def writelines(self, lines):
583
"""Write a list of lines to the stream.
584
585
Line separators are not added, so it is usual for each of the lines
586
provided to have a line separator at the end.
587
"""
588
self._checkClosed()
589
for line in lines:
590
self.write(line)
591
592
io.IOBase.register(IOBase)
593
594
595
class RawIOBase(IOBase):
596
597
"""Base class for raw binary I/O."""
598
599
# The read() method is implemented by calling readinto(); derived
600
# classes that want to support read() only need to implement
601
# readinto() as a primitive operation. In general, readinto() can be
602
# more efficient than read().
603
604
# (It would be tempting to also provide an implementation of
605
# readinto() in terms of read(), in case the latter is a more suitable
606
# primitive operation, but that would lead to nasty recursion in case
607
# a subclass doesn't implement either.)
608
609
def read(self, size=-1):
610
"""Read and return up to size bytes, where size is an int.
611
612
Returns an empty bytes object on EOF, or None if the object is
613
set not to block and has no data to read.
614
"""
615
if size is None:
616
size = -1
617
if size < 0:
618
return self.readall()
619
b = bytearray(size.__index__())
620
n = self.readinto(b)
621
if n is None:
622
return None
623
del b[n:]
624
return bytes(b)
625
626
def readall(self):
627
"""Read until EOF, using multiple read() call."""
628
res = bytearray()
629
while data := self.read(DEFAULT_BUFFER_SIZE):
630
res += data
631
if res:
632
return bytes(res)
633
else:
634
# b'' or None
635
return data
636
637
def readinto(self, b):
638
"""Read bytes into a pre-allocated bytes-like object b.
639
640
Returns an int representing the number of bytes read (0 for EOF), or
641
None if the object is set not to block and has no data to read.
642
"""
643
self._unsupported("readinto")
644
645
def write(self, b):
646
"""Write the given buffer to the IO stream.
647
648
Returns the number of bytes written, which may be less than the
649
length of b in bytes.
650
"""
651
self._unsupported("write")
652
653
io.RawIOBase.register(RawIOBase)
654
from _io import FileIO
655
RawIOBase.register(FileIO)
656
657
658
class BufferedIOBase(IOBase):
659
660
"""Base class for buffered IO objects.
661
662
The main difference with RawIOBase is that the read() method
663
supports omitting the size argument, and does not have a default
664
implementation that defers to readinto().
665
666
In addition, read(), readinto() and write() may raise
667
BlockingIOError if the underlying raw stream is in non-blocking
668
mode and not ready; unlike their raw counterparts, they will never
669
return None.
670
671
A typical implementation should not inherit from a RawIOBase
672
implementation, but wrap one.
673
"""
674
675
def read(self, size=-1):
676
"""Read and return up to size bytes, where size is an int.
677
678
If the argument is omitted, None, or negative, reads and
679
returns all data until EOF.
680
681
If the argument is positive, and the underlying raw stream is
682
not 'interactive', multiple raw reads may be issued to satisfy
683
the byte count (unless EOF is reached first). But for
684
interactive raw streams (XXX and for pipes?), at most one raw
685
read will be issued, and a short result does not imply that
686
EOF is imminent.
687
688
Returns an empty bytes array on EOF.
689
690
Raises BlockingIOError if the underlying raw stream has no
691
data at the moment.
692
"""
693
self._unsupported("read")
694
695
def read1(self, size=-1):
696
"""Read up to size bytes with at most one read() system call,
697
where size is an int.
698
"""
699
self._unsupported("read1")
700
701
def readinto(self, b):
702
"""Read bytes into a pre-allocated bytes-like object b.
703
704
Like read(), this may issue multiple reads to the underlying raw
705
stream, unless the latter is 'interactive'.
706
707
Returns an int representing the number of bytes read (0 for EOF).
708
709
Raises BlockingIOError if the underlying raw stream has no
710
data at the moment.
711
"""
712
713
return self._readinto(b, read1=False)
714
715
def readinto1(self, b):
716
"""Read bytes into buffer *b*, using at most one system call
717
718
Returns an int representing the number of bytes read (0 for EOF).
719
720
Raises BlockingIOError if the underlying raw stream has no
721
data at the moment.
722
"""
723
724
return self._readinto(b, read1=True)
725
726
def _readinto(self, b, read1):
727
if not isinstance(b, memoryview):
728
b = memoryview(b)
729
b = b.cast('B')
730
731
if read1:
732
data = self.read1(len(b))
733
else:
734
data = self.read(len(b))
735
n = len(data)
736
737
b[:n] = data
738
739
return n
740
741
def write(self, b):
742
"""Write the given bytes buffer to the IO stream.
743
744
Return the number of bytes written, which is always the length of b
745
in bytes.
746
747
Raises BlockingIOError if the buffer is full and the
748
underlying raw stream cannot accept more data at the moment.
749
"""
750
self._unsupported("write")
751
752
def detach(self):
753
"""
754
Separate the underlying raw stream from the buffer and return it.
755
756
After the raw stream has been detached, the buffer is in an unusable
757
state.
758
"""
759
self._unsupported("detach")
760
761
io.BufferedIOBase.register(BufferedIOBase)
762
763
764
class _BufferedIOMixin(BufferedIOBase):
765
766
"""A mixin implementation of BufferedIOBase with an underlying raw stream.
767
768
This passes most requests on to the underlying raw stream. It
769
does *not* provide implementations of read(), readinto() or
770
write().
771
"""
772
773
def __init__(self, raw):
774
self._raw = raw
775
776
### Positioning ###
777
778
def seek(self, pos, whence=0):
779
new_position = self.raw.seek(pos, whence)
780
if new_position < 0:
781
raise OSError("seek() returned an invalid position")
782
return new_position
783
784
def tell(self):
785
pos = self.raw.tell()
786
if pos < 0:
787
raise OSError("tell() returned an invalid position")
788
return pos
789
790
def truncate(self, pos=None):
791
self._checkClosed()
792
self._checkWritable()
793
794
# Flush the stream. We're mixing buffered I/O with lower-level I/O,
795
# and a flush may be necessary to synch both views of the current
796
# file state.
797
self.flush()
798
799
if pos is None:
800
pos = self.tell()
801
# XXX: Should seek() be used, instead of passing the position
802
# XXX directly to truncate?
803
return self.raw.truncate(pos)
804
805
### Flush and close ###
806
807
def flush(self):
808
if self.closed:
809
raise ValueError("flush on closed file")
810
self.raw.flush()
811
812
def close(self):
813
if self.raw is not None and not self.closed:
814
try:
815
# may raise BlockingIOError or BrokenPipeError etc
816
self.flush()
817
finally:
818
self.raw.close()
819
820
def detach(self):
821
if self.raw is None:
822
raise ValueError("raw stream already detached")
823
self.flush()
824
raw = self._raw
825
self._raw = None
826
return raw
827
828
### Inquiries ###
829
830
def seekable(self):
831
return self.raw.seekable()
832
833
@property
834
def raw(self):
835
return self._raw
836
837
@property
838
def closed(self):
839
return self.raw.closed
840
841
@property
842
def name(self):
843
return self.raw.name
844
845
@property
846
def mode(self):
847
return self.raw.mode
848
849
def __getstate__(self):
850
raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
851
852
def __repr__(self):
853
modname = self.__class__.__module__
854
clsname = self.__class__.__qualname__
855
try:
856
name = self.name
857
except AttributeError:
858
return "<{}.{}>".format(modname, clsname)
859
else:
860
return "<{}.{} name={!r}>".format(modname, clsname, name)
861
862
### Lower-level APIs ###
863
864
def fileno(self):
865
return self.raw.fileno()
866
867
def isatty(self):
868
return self.raw.isatty()
869
870
871
class BytesIO(BufferedIOBase):
872
873
"""Buffered I/O implementation using an in-memory bytes buffer."""
874
875
# Initialize _buffer as soon as possible since it's used by __del__()
876
# which calls close()
877
_buffer = None
878
879
def __init__(self, initial_bytes=None):
880
buf = bytearray()
881
if initial_bytes is not None:
882
buf += initial_bytes
883
self._buffer = buf
884
self._pos = 0
885
886
def __getstate__(self):
887
if self.closed:
888
raise ValueError("__getstate__ on closed file")
889
return self.__dict__.copy()
890
891
def getvalue(self):
892
"""Return the bytes value (contents) of the buffer
893
"""
894
if self.closed:
895
raise ValueError("getvalue on closed file")
896
return bytes(self._buffer)
897
898
def getbuffer(self):
899
"""Return a readable and writable view of the buffer.
900
"""
901
if self.closed:
902
raise ValueError("getbuffer on closed file")
903
return memoryview(self._buffer)
904
905
def close(self):
906
if self._buffer is not None:
907
self._buffer.clear()
908
super().close()
909
910
def read(self, size=-1):
911
if self.closed:
912
raise ValueError("read from closed file")
913
if size is None:
914
size = -1
915
else:
916
try:
917
size_index = size.__index__
918
except AttributeError:
919
raise TypeError(f"{size!r} is not an integer")
920
else:
921
size = size_index()
922
if size < 0:
923
size = len(self._buffer)
924
if len(self._buffer) <= self._pos:
925
return b""
926
newpos = min(len(self._buffer), self._pos + size)
927
b = self._buffer[self._pos : newpos]
928
self._pos = newpos
929
return bytes(b)
930
931
def read1(self, size=-1):
932
"""This is the same as read.
933
"""
934
return self.read(size)
935
936
def write(self, b):
937
if self.closed:
938
raise ValueError("write to closed file")
939
if isinstance(b, str):
940
raise TypeError("can't write str to binary stream")
941
with memoryview(b) as view:
942
n = view.nbytes # Size of any bytes-like object
943
if n == 0:
944
return 0
945
pos = self._pos
946
if pos > len(self._buffer):
947
# Inserts null bytes between the current end of the file
948
# and the new write position.
949
padding = b'\x00' * (pos - len(self._buffer))
950
self._buffer += padding
951
self._buffer[pos:pos + n] = b
952
self._pos += n
953
return n
954
955
def seek(self, pos, whence=0):
956
if self.closed:
957
raise ValueError("seek on closed file")
958
try:
959
pos_index = pos.__index__
960
except AttributeError:
961
raise TypeError(f"{pos!r} is not an integer")
962
else:
963
pos = pos_index()
964
if whence == 0:
965
if pos < 0:
966
raise ValueError("negative seek position %r" % (pos,))
967
self._pos = pos
968
elif whence == 1:
969
self._pos = max(0, self._pos + pos)
970
elif whence == 2:
971
self._pos = max(0, len(self._buffer) + pos)
972
else:
973
raise ValueError("unsupported whence value")
974
return self._pos
975
976
def tell(self):
977
if self.closed:
978
raise ValueError("tell on closed file")
979
return self._pos
980
981
def truncate(self, pos=None):
982
if self.closed:
983
raise ValueError("truncate on closed file")
984
if pos is None:
985
pos = self._pos
986
else:
987
try:
988
pos_index = pos.__index__
989
except AttributeError:
990
raise TypeError(f"{pos!r} is not an integer")
991
else:
992
pos = pos_index()
993
if pos < 0:
994
raise ValueError("negative truncate position %r" % (pos,))
995
del self._buffer[pos:]
996
return pos
997
998
def readable(self):
999
if self.closed:
1000
raise ValueError("I/O operation on closed file.")
1001
return True
1002
1003
def writable(self):
1004
if self.closed:
1005
raise ValueError("I/O operation on closed file.")
1006
return True
1007
1008
def seekable(self):
1009
if self.closed:
1010
raise ValueError("I/O operation on closed file.")
1011
return True
1012
1013
1014
class BufferedReader(_BufferedIOMixin):
1015
1016
"""BufferedReader(raw[, buffer_size])
1017
1018
A buffer for a readable, sequential BaseRawIO object.
1019
1020
The constructor creates a BufferedReader for the given readable raw
1021
stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
1022
is used.
1023
"""
1024
1025
def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1026
"""Create a new buffered reader using the given readable raw IO object.
1027
"""
1028
if not raw.readable():
1029
raise OSError('"raw" argument must be readable.')
1030
1031
_BufferedIOMixin.__init__(self, raw)
1032
if buffer_size <= 0:
1033
raise ValueError("invalid buffer size")
1034
self.buffer_size = buffer_size
1035
self._reset_read_buf()
1036
self._read_lock = Lock()
1037
1038
def readable(self):
1039
return self.raw.readable()
1040
1041
def _reset_read_buf(self):
1042
self._read_buf = b""
1043
self._read_pos = 0
1044
1045
def read(self, size=None):
1046
"""Read size bytes.
1047
1048
Returns exactly size bytes of data unless the underlying raw IO
1049
stream reaches EOF or if the call would block in non-blocking
1050
mode. If size is negative, read until EOF or until read() would
1051
block.
1052
"""
1053
if size is not None and size < -1:
1054
raise ValueError("invalid number of bytes to read")
1055
with self._read_lock:
1056
return self._read_unlocked(size)
1057
1058
def _read_unlocked(self, n=None):
1059
nodata_val = b""
1060
empty_values = (b"", None)
1061
buf = self._read_buf
1062
pos = self._read_pos
1063
1064
# Special case for when the number of bytes to read is unspecified.
1065
if n is None or n == -1:
1066
self._reset_read_buf()
1067
if hasattr(self.raw, 'readall'):
1068
chunk = self.raw.readall()
1069
if chunk is None:
1070
return buf[pos:] or None
1071
else:
1072
return buf[pos:] + chunk
1073
chunks = [buf[pos:]] # Strip the consumed bytes.
1074
current_size = 0
1075
while True:
1076
# Read until EOF or until read() would block.
1077
chunk = self.raw.read()
1078
if chunk in empty_values:
1079
nodata_val = chunk
1080
break
1081
current_size += len(chunk)
1082
chunks.append(chunk)
1083
return b"".join(chunks) or nodata_val
1084
1085
# The number of bytes to read is specified, return at most n bytes.
1086
avail = len(buf) - pos # Length of the available buffered data.
1087
if n <= avail:
1088
# Fast path: the data to read is fully buffered.
1089
self._read_pos += n
1090
return buf[pos:pos+n]
1091
# Slow path: read from the stream until enough bytes are read,
1092
# or until an EOF occurs or until read() would block.
1093
chunks = [buf[pos:]]
1094
wanted = max(self.buffer_size, n)
1095
while avail < n:
1096
chunk = self.raw.read(wanted)
1097
if chunk in empty_values:
1098
nodata_val = chunk
1099
break
1100
avail += len(chunk)
1101
chunks.append(chunk)
1102
# n is more than avail only when an EOF occurred or when
1103
# read() would have blocked.
1104
n = min(n, avail)
1105
out = b"".join(chunks)
1106
self._read_buf = out[n:] # Save the extra data in the buffer.
1107
self._read_pos = 0
1108
return out[:n] if out else nodata_val
1109
1110
def peek(self, size=0):
1111
"""Returns buffered bytes without advancing the position.
1112
1113
The argument indicates a desired minimal number of bytes; we
1114
do at most one raw read to satisfy it. We never return more
1115
than self.buffer_size.
1116
"""
1117
self._checkClosed("peek of closed file")
1118
with self._read_lock:
1119
return self._peek_unlocked(size)
1120
1121
def _peek_unlocked(self, n=0):
1122
want = min(n, self.buffer_size)
1123
have = len(self._read_buf) - self._read_pos
1124
if have < want or have <= 0:
1125
to_read = self.buffer_size - have
1126
current = self.raw.read(to_read)
1127
if current:
1128
self._read_buf = self._read_buf[self._read_pos:] + current
1129
self._read_pos = 0
1130
return self._read_buf[self._read_pos:]
1131
1132
def read1(self, size=-1):
1133
"""Reads up to size bytes, with at most one read() system call."""
1134
# Returns up to size bytes. If at least one byte is buffered, we
1135
# only return buffered bytes. Otherwise, we do one raw read.
1136
self._checkClosed("read of closed file")
1137
if size < 0:
1138
size = self.buffer_size
1139
if size == 0:
1140
return b""
1141
with self._read_lock:
1142
self._peek_unlocked(1)
1143
return self._read_unlocked(
1144
min(size, len(self._read_buf) - self._read_pos))
1145
1146
# Implementing readinto() and readinto1() is not strictly necessary (we
1147
# could rely on the base class that provides an implementation in terms of
1148
# read() and read1()). We do it anyway to keep the _pyio implementation
1149
# similar to the io implementation (which implements the methods for
1150
# performance reasons).
1151
def _readinto(self, buf, read1):
1152
"""Read data into *buf* with at most one system call."""
1153
1154
self._checkClosed("readinto of closed file")
1155
1156
# Need to create a memoryview object of type 'b', otherwise
1157
# we may not be able to assign bytes to it, and slicing it
1158
# would create a new object.
1159
if not isinstance(buf, memoryview):
1160
buf = memoryview(buf)
1161
if buf.nbytes == 0:
1162
return 0
1163
buf = buf.cast('B')
1164
1165
written = 0
1166
with self._read_lock:
1167
while written < len(buf):
1168
1169
# First try to read from internal buffer
1170
avail = min(len(self._read_buf) - self._read_pos, len(buf))
1171
if avail:
1172
buf[written:written+avail] = \
1173
self._read_buf[self._read_pos:self._read_pos+avail]
1174
self._read_pos += avail
1175
written += avail
1176
if written == len(buf):
1177
break
1178
1179
# If remaining space in callers buffer is larger than
1180
# internal buffer, read directly into callers buffer
1181
if len(buf) - written > self.buffer_size:
1182
n = self.raw.readinto(buf[written:])
1183
if not n:
1184
break # eof
1185
written += n
1186
1187
# Otherwise refill internal buffer - unless we're
1188
# in read1 mode and already got some data
1189
elif not (read1 and written):
1190
if not self._peek_unlocked(1):
1191
break # eof
1192
1193
# In readinto1 mode, return as soon as we have some data
1194
if read1 and written:
1195
break
1196
1197
return written
1198
1199
def tell(self):
1200
return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1201
1202
def seek(self, pos, whence=0):
1203
if whence not in valid_seek_flags:
1204
raise ValueError("invalid whence value")
1205
self._checkClosed("seek of closed file")
1206
with self._read_lock:
1207
if whence == 1:
1208
pos -= len(self._read_buf) - self._read_pos
1209
pos = _BufferedIOMixin.seek(self, pos, whence)
1210
self._reset_read_buf()
1211
return pos
1212
1213
class BufferedWriter(_BufferedIOMixin):
1214
1215
"""A buffer for a writeable sequential RawIO object.
1216
1217
The constructor creates a BufferedWriter for the given writeable raw
1218
stream. If the buffer_size is not given, it defaults to
1219
DEFAULT_BUFFER_SIZE.
1220
"""
1221
1222
def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1223
if not raw.writable():
1224
raise OSError('"raw" argument must be writable.')
1225
1226
_BufferedIOMixin.__init__(self, raw)
1227
if buffer_size <= 0:
1228
raise ValueError("invalid buffer size")
1229
self.buffer_size = buffer_size
1230
self._write_buf = bytearray()
1231
self._write_lock = Lock()
1232
1233
def writable(self):
1234
return self.raw.writable()
1235
1236
def write(self, b):
1237
if isinstance(b, str):
1238
raise TypeError("can't write str to binary stream")
1239
with self._write_lock:
1240
if self.closed:
1241
raise ValueError("write to closed file")
1242
# XXX we can implement some more tricks to try and avoid
1243
# partial writes
1244
if len(self._write_buf) > self.buffer_size:
1245
# We're full, so let's pre-flush the buffer. (This may
1246
# raise BlockingIOError with characters_written == 0.)
1247
self._flush_unlocked()
1248
before = len(self._write_buf)
1249
self._write_buf.extend(b)
1250
written = len(self._write_buf) - before
1251
if len(self._write_buf) > self.buffer_size:
1252
try:
1253
self._flush_unlocked()
1254
except BlockingIOError as e:
1255
if len(self._write_buf) > self.buffer_size:
1256
# We've hit the buffer_size. We have to accept a partial
1257
# write and cut back our buffer.
1258
overage = len(self._write_buf) - self.buffer_size
1259
written -= overage
1260
self._write_buf = self._write_buf[:self.buffer_size]
1261
raise BlockingIOError(e.errno, e.strerror, written)
1262
return written
1263
1264
def truncate(self, pos=None):
1265
with self._write_lock:
1266
self._flush_unlocked()
1267
if pos is None:
1268
pos = self.raw.tell()
1269
return self.raw.truncate(pos)
1270
1271
def flush(self):
1272
with self._write_lock:
1273
self._flush_unlocked()
1274
1275
def _flush_unlocked(self):
1276
if self.closed:
1277
raise ValueError("flush on closed file")
1278
while self._write_buf:
1279
try:
1280
n = self.raw.write(self._write_buf)
1281
except BlockingIOError:
1282
raise RuntimeError("self.raw should implement RawIOBase: it "
1283
"should not raise BlockingIOError")
1284
if n is None:
1285
raise BlockingIOError(
1286
errno.EAGAIN,
1287
"write could not complete without blocking", 0)
1288
if n > len(self._write_buf) or n < 0:
1289
raise OSError("write() returned incorrect number of bytes")
1290
del self._write_buf[:n]
1291
1292
def tell(self):
1293
return _BufferedIOMixin.tell(self) + len(self._write_buf)
1294
1295
def seek(self, pos, whence=0):
1296
if whence not in valid_seek_flags:
1297
raise ValueError("invalid whence value")
1298
with self._write_lock:
1299
self._flush_unlocked()
1300
return _BufferedIOMixin.seek(self, pos, whence)
1301
1302
def close(self):
1303
with self._write_lock:
1304
if self.raw is None or self.closed:
1305
return
1306
# We have to release the lock and call self.flush() (which will
1307
# probably just re-take the lock) in case flush has been overridden in
1308
# a subclass or the user set self.flush to something. This is the same
1309
# behavior as the C implementation.
1310
try:
1311
# may raise BlockingIOError or BrokenPipeError etc
1312
self.flush()
1313
finally:
1314
with self._write_lock:
1315
self.raw.close()
1316
1317
1318
class BufferedRWPair(BufferedIOBase):
1319
1320
"""A buffered reader and writer object together.
1321
1322
A buffered reader object and buffered writer object put together to
1323
form a sequential IO object that can read and write. This is typically
1324
used with a socket or two-way pipe.
1325
1326
reader and writer are RawIOBase objects that are readable and
1327
writeable respectively. If the buffer_size is omitted it defaults to
1328
DEFAULT_BUFFER_SIZE.
1329
"""
1330
1331
# XXX The usefulness of this (compared to having two separate IO
1332
# objects) is questionable.
1333
1334
def __init__(self, reader, writer, buffer_size=DEFAULT_BUFFER_SIZE):
1335
"""Constructor.
1336
1337
The arguments are two RawIO instances.
1338
"""
1339
if not reader.readable():
1340
raise OSError('"reader" argument must be readable.')
1341
1342
if not writer.writable():
1343
raise OSError('"writer" argument must be writable.')
1344
1345
self.reader = BufferedReader(reader, buffer_size)
1346
self.writer = BufferedWriter(writer, buffer_size)
1347
1348
def read(self, size=-1):
1349
if size is None:
1350
size = -1
1351
return self.reader.read(size)
1352
1353
def readinto(self, b):
1354
return self.reader.readinto(b)
1355
1356
def write(self, b):
1357
return self.writer.write(b)
1358
1359
def peek(self, size=0):
1360
return self.reader.peek(size)
1361
1362
def read1(self, size=-1):
1363
return self.reader.read1(size)
1364
1365
def readinto1(self, b):
1366
return self.reader.readinto1(b)
1367
1368
def readable(self):
1369
return self.reader.readable()
1370
1371
def writable(self):
1372
return self.writer.writable()
1373
1374
def flush(self):
1375
return self.writer.flush()
1376
1377
def close(self):
1378
try:
1379
self.writer.close()
1380
finally:
1381
self.reader.close()
1382
1383
def isatty(self):
1384
return self.reader.isatty() or self.writer.isatty()
1385
1386
@property
1387
def closed(self):
1388
return self.writer.closed
1389
1390
1391
class BufferedRandom(BufferedWriter, BufferedReader):
1392
1393
"""A buffered interface to random access streams.
1394
1395
The constructor creates a reader and writer for a seekable stream,
1396
raw, given in the first argument. If the buffer_size is omitted it
1397
defaults to DEFAULT_BUFFER_SIZE.
1398
"""
1399
1400
def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
1401
raw._checkSeekable()
1402
BufferedReader.__init__(self, raw, buffer_size)
1403
BufferedWriter.__init__(self, raw, buffer_size)
1404
1405
def seek(self, pos, whence=0):
1406
if whence not in valid_seek_flags:
1407
raise ValueError("invalid whence value")
1408
self.flush()
1409
if self._read_buf:
1410
# Undo read ahead.
1411
with self._read_lock:
1412
self.raw.seek(self._read_pos - len(self._read_buf), 1)
1413
# First do the raw seek, then empty the read buffer, so that
1414
# if the raw seek fails, we don't lose buffered data forever.
1415
pos = self.raw.seek(pos, whence)
1416
with self._read_lock:
1417
self._reset_read_buf()
1418
if pos < 0:
1419
raise OSError("seek() returned invalid position")
1420
return pos
1421
1422
def tell(self):
1423
if self._write_buf:
1424
return BufferedWriter.tell(self)
1425
else:
1426
return BufferedReader.tell(self)
1427
1428
def truncate(self, pos=None):
1429
if pos is None:
1430
pos = self.tell()
1431
# Use seek to flush the read buffer.
1432
return BufferedWriter.truncate(self, pos)
1433
1434
def read(self, size=None):
1435
if size is None:
1436
size = -1
1437
self.flush()
1438
return BufferedReader.read(self, size)
1439
1440
def readinto(self, b):
1441
self.flush()
1442
return BufferedReader.readinto(self, b)
1443
1444
def peek(self, size=0):
1445
self.flush()
1446
return BufferedReader.peek(self, size)
1447
1448
def read1(self, size=-1):
1449
self.flush()
1450
return BufferedReader.read1(self, size)
1451
1452
def readinto1(self, b):
1453
self.flush()
1454
return BufferedReader.readinto1(self, b)
1455
1456
def write(self, b):
1457
if self._read_buf:
1458
# Undo readahead
1459
with self._read_lock:
1460
self.raw.seek(self._read_pos - len(self._read_buf), 1)
1461
self._reset_read_buf()
1462
return BufferedWriter.write(self, b)
1463
1464
1465
class FileIO(RawIOBase):
1466
_fd = -1
1467
_created = False
1468
_readable = False
1469
_writable = False
1470
_appending = False
1471
_seekable = None
1472
_closefd = True
1473
1474
def __init__(self, file, mode='r', closefd=True, opener=None):
1475
"""Open a file. The mode can be 'r' (default), 'w', 'x' or 'a' for reading,
1476
writing, exclusive creation or appending. The file will be created if it
1477
doesn't exist when opened for writing or appending; it will be truncated
1478
when opened for writing. A FileExistsError will be raised if it already
1479
exists when opened for creating. Opening a file for creating implies
1480
writing so this mode behaves in a similar way to 'w'. Add a '+' to the mode
1481
to allow simultaneous reading and writing. A custom opener can be used by
1482
passing a callable as *opener*. The underlying file descriptor for the file
1483
object is then obtained by calling opener with (*name*, *flags*).
1484
*opener* must return an open file descriptor (passing os.open as *opener*
1485
results in functionality similar to passing None).
1486
"""
1487
if self._fd >= 0:
1488
# Have to close the existing file first.
1489
try:
1490
if self._closefd:
1491
os.close(self._fd)
1492
finally:
1493
self._fd = -1
1494
1495
if isinstance(file, float):
1496
raise TypeError('integer argument expected, got float')
1497
if isinstance(file, int):
1498
fd = file
1499
if fd < 0:
1500
raise ValueError('negative file descriptor')
1501
else:
1502
fd = -1
1503
1504
if not isinstance(mode, str):
1505
raise TypeError('invalid mode: %s' % (mode,))
1506
if not set(mode) <= set('xrwab+'):
1507
raise ValueError('invalid mode: %s' % (mode,))
1508
if sum(c in 'rwax' for c in mode) != 1 or mode.count('+') > 1:
1509
raise ValueError('Must have exactly one of create/read/write/append '
1510
'mode and at most one plus')
1511
1512
if 'x' in mode:
1513
self._created = True
1514
self._writable = True
1515
flags = os.O_EXCL | os.O_CREAT
1516
elif 'r' in mode:
1517
self._readable = True
1518
flags = 0
1519
elif 'w' in mode:
1520
self._writable = True
1521
flags = os.O_CREAT | os.O_TRUNC
1522
elif 'a' in mode:
1523
self._writable = True
1524
self._appending = True
1525
flags = os.O_APPEND | os.O_CREAT
1526
1527
if '+' in mode:
1528
self._readable = True
1529
self._writable = True
1530
1531
if self._readable and self._writable:
1532
flags |= os.O_RDWR
1533
elif self._readable:
1534
flags |= os.O_RDONLY
1535
else:
1536
flags |= os.O_WRONLY
1537
1538
flags |= getattr(os, 'O_BINARY', 0)
1539
1540
noinherit_flag = (getattr(os, 'O_NOINHERIT', 0) or
1541
getattr(os, 'O_CLOEXEC', 0))
1542
flags |= noinherit_flag
1543
1544
owned_fd = None
1545
try:
1546
if fd < 0:
1547
if not closefd:
1548
raise ValueError('Cannot use closefd=False with file name')
1549
if opener is None:
1550
fd = os.open(file, flags, 0o666)
1551
else:
1552
fd = opener(file, flags)
1553
if not isinstance(fd, int):
1554
raise TypeError('expected integer from opener')
1555
if fd < 0:
1556
raise OSError('Negative file descriptor')
1557
owned_fd = fd
1558
if not noinherit_flag:
1559
os.set_inheritable(fd, False)
1560
1561
self._closefd = closefd
1562
fdfstat = os.fstat(fd)
1563
try:
1564
if stat.S_ISDIR(fdfstat.st_mode):
1565
raise IsADirectoryError(errno.EISDIR,
1566
os.strerror(errno.EISDIR), file)
1567
except AttributeError:
1568
# Ignore the AttributeError if stat.S_ISDIR or errno.EISDIR
1569
# don't exist.
1570
pass
1571
self._blksize = getattr(fdfstat, 'st_blksize', 0)
1572
if self._blksize <= 1:
1573
self._blksize = DEFAULT_BUFFER_SIZE
1574
1575
if _setmode:
1576
# don't translate newlines (\r\n <=> \n)
1577
_setmode(fd, os.O_BINARY)
1578
1579
self.name = file
1580
if self._appending:
1581
# For consistent behaviour, we explicitly seek to the
1582
# end of file (otherwise, it might be done only on the
1583
# first write()).
1584
try:
1585
os.lseek(fd, 0, SEEK_END)
1586
except OSError as e:
1587
if e.errno != errno.ESPIPE:
1588
raise
1589
except:
1590
if owned_fd is not None:
1591
os.close(owned_fd)
1592
raise
1593
self._fd = fd
1594
1595
def __del__(self):
1596
if self._fd >= 0 and self._closefd and not self.closed:
1597
import warnings
1598
warnings.warn('unclosed file %r' % (self,), ResourceWarning,
1599
stacklevel=2, source=self)
1600
self.close()
1601
1602
def __getstate__(self):
1603
raise TypeError(f"cannot pickle {self.__class__.__name__!r} object")
1604
1605
def __repr__(self):
1606
class_name = '%s.%s' % (self.__class__.__module__,
1607
self.__class__.__qualname__)
1608
if self.closed:
1609
return '<%s [closed]>' % class_name
1610
try:
1611
name = self.name
1612
except AttributeError:
1613
return ('<%s fd=%d mode=%r closefd=%r>' %
1614
(class_name, self._fd, self.mode, self._closefd))
1615
else:
1616
return ('<%s name=%r mode=%r closefd=%r>' %
1617
(class_name, name, self.mode, self._closefd))
1618
1619
def _checkReadable(self):
1620
if not self._readable:
1621
raise UnsupportedOperation('File not open for reading')
1622
1623
def _checkWritable(self, msg=None):
1624
if not self._writable:
1625
raise UnsupportedOperation('File not open for writing')
1626
1627
def read(self, size=None):
1628
"""Read at most size bytes, returned as bytes.
1629
1630
Only makes one system call, so less data may be returned than requested
1631
In non-blocking mode, returns None if no data is available.
1632
Return an empty bytes object at EOF.
1633
"""
1634
self._checkClosed()
1635
self._checkReadable()
1636
if size is None or size < 0:
1637
return self.readall()
1638
try:
1639
return os.read(self._fd, size)
1640
except BlockingIOError:
1641
return None
1642
1643
def readall(self):
1644
"""Read all data from the file, returned as bytes.
1645
1646
In non-blocking mode, returns as much as is immediately available,
1647
or None if no data is available. Return an empty bytes object at EOF.
1648
"""
1649
self._checkClosed()
1650
self._checkReadable()
1651
bufsize = DEFAULT_BUFFER_SIZE
1652
try:
1653
pos = os.lseek(self._fd, 0, SEEK_CUR)
1654
end = os.fstat(self._fd).st_size
1655
if end >= pos:
1656
bufsize = end - pos + 1
1657
except OSError:
1658
pass
1659
1660
result = bytearray()
1661
while True:
1662
if len(result) >= bufsize:
1663
bufsize = len(result)
1664
bufsize += max(bufsize, DEFAULT_BUFFER_SIZE)
1665
n = bufsize - len(result)
1666
try:
1667
chunk = os.read(self._fd, n)
1668
except BlockingIOError:
1669
if result:
1670
break
1671
return None
1672
if not chunk: # reached the end of the file
1673
break
1674
result += chunk
1675
1676
return bytes(result)
1677
1678
def readinto(self, b):
1679
"""Same as RawIOBase.readinto()."""
1680
m = memoryview(b).cast('B')
1681
data = self.read(len(m))
1682
n = len(data)
1683
m[:n] = data
1684
return n
1685
1686
def write(self, b):
1687
"""Write bytes b to file, return number written.
1688
1689
Only makes one system call, so not all of the data may be written.
1690
The number of bytes actually written is returned. In non-blocking mode,
1691
returns None if the write would block.
1692
"""
1693
self._checkClosed()
1694
self._checkWritable()
1695
try:
1696
return os.write(self._fd, b)
1697
except BlockingIOError:
1698
return None
1699
1700
def seek(self, pos, whence=SEEK_SET):
1701
"""Move to new file position.
1702
1703
Argument offset is a byte count. Optional argument whence defaults to
1704
SEEK_SET or 0 (offset from start of file, offset should be >= 0); other values
1705
are SEEK_CUR or 1 (move relative to current position, positive or negative),
1706
and SEEK_END or 2 (move relative to end of file, usually negative, although
1707
many platforms allow seeking beyond the end of a file).
1708
1709
Note that not all file objects are seekable.
1710
"""
1711
if isinstance(pos, float):
1712
raise TypeError('an integer is required')
1713
self._checkClosed()
1714
return os.lseek(self._fd, pos, whence)
1715
1716
def tell(self):
1717
"""tell() -> int. Current file position.
1718
1719
Can raise OSError for non seekable files."""
1720
self._checkClosed()
1721
return os.lseek(self._fd, 0, SEEK_CUR)
1722
1723
def truncate(self, size=None):
1724
"""Truncate the file to at most size bytes.
1725
1726
Size defaults to the current file position, as returned by tell().
1727
The current file position is changed to the value of size.
1728
"""
1729
self._checkClosed()
1730
self._checkWritable()
1731
if size is None:
1732
size = self.tell()
1733
os.ftruncate(self._fd, size)
1734
return size
1735
1736
def close(self):
1737
"""Close the file.
1738
1739
A closed file cannot be used for further I/O operations. close() may be
1740
called more than once without error.
1741
"""
1742
if not self.closed:
1743
try:
1744
if self._closefd:
1745
os.close(self._fd)
1746
finally:
1747
super().close()
1748
1749
def seekable(self):
1750
"""True if file supports random-access."""
1751
self._checkClosed()
1752
if self._seekable is None:
1753
try:
1754
self.tell()
1755
except OSError:
1756
self._seekable = False
1757
else:
1758
self._seekable = True
1759
return self._seekable
1760
1761
def readable(self):
1762
"""True if file was opened in a read mode."""
1763
self._checkClosed()
1764
return self._readable
1765
1766
def writable(self):
1767
"""True if file was opened in a write mode."""
1768
self._checkClosed()
1769
return self._writable
1770
1771
def fileno(self):
1772
"""Return the underlying file descriptor (an integer)."""
1773
self._checkClosed()
1774
return self._fd
1775
1776
def isatty(self):
1777
"""True if the file is connected to a TTY device."""
1778
self._checkClosed()
1779
return os.isatty(self._fd)
1780
1781
@property
1782
def closefd(self):
1783
"""True if the file descriptor will be closed by close()."""
1784
return self._closefd
1785
1786
@property
1787
def mode(self):
1788
"""String giving the file mode"""
1789
if self._created:
1790
if self._readable:
1791
return 'xb+'
1792
else:
1793
return 'xb'
1794
elif self._appending:
1795
if self._readable:
1796
return 'ab+'
1797
else:
1798
return 'ab'
1799
elif self._readable:
1800
if self._writable:
1801
return 'rb+'
1802
else:
1803
return 'rb'
1804
else:
1805
return 'wb'
1806
1807
1808
class TextIOBase(IOBase):
1809
1810
"""Base class for text I/O.
1811
1812
This class provides a character and line based interface to stream
1813
I/O.
1814
"""
1815
1816
def read(self, size=-1):
1817
"""Read at most size characters from stream, where size is an int.
1818
1819
Read from underlying buffer until we have size characters or we hit EOF.
1820
If size is negative or omitted, read until EOF.
1821
1822
Returns a string.
1823
"""
1824
self._unsupported("read")
1825
1826
def write(self, s):
1827
"""Write string s to stream and returning an int."""
1828
self._unsupported("write")
1829
1830
def truncate(self, pos=None):
1831
"""Truncate size to pos, where pos is an int."""
1832
self._unsupported("truncate")
1833
1834
def readline(self):
1835
"""Read until newline or EOF.
1836
1837
Returns an empty string if EOF is hit immediately.
1838
"""
1839
self._unsupported("readline")
1840
1841
def detach(self):
1842
"""
1843
Separate the underlying buffer from the TextIOBase and return it.
1844
1845
After the underlying buffer has been detached, the TextIO is in an
1846
unusable state.
1847
"""
1848
self._unsupported("detach")
1849
1850
@property
1851
def encoding(self):
1852
"""Subclasses should override."""
1853
return None
1854
1855
@property
1856
def newlines(self):
1857
"""Line endings translated so far.
1858
1859
Only line endings translated during reading are considered.
1860
1861
Subclasses should override.
1862
"""
1863
return None
1864
1865
@property
1866
def errors(self):
1867
"""Error setting of the decoder or encoder.
1868
1869
Subclasses should override."""
1870
return None
1871
1872
io.TextIOBase.register(TextIOBase)
1873
1874
1875
class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1876
r"""Codec used when reading a file in universal newlines mode. It wraps
1877
another incremental decoder, translating \r\n and \r into \n. It also
1878
records the types of newlines encountered. When used with
1879
translate=False, it ensures that the newline sequence is returned in
1880
one piece.
1881
"""
1882
def __init__(self, decoder, translate, errors='strict'):
1883
codecs.IncrementalDecoder.__init__(self, errors=errors)
1884
self.translate = translate
1885
self.decoder = decoder
1886
self.seennl = 0
1887
self.pendingcr = False
1888
1889
def decode(self, input, final=False):
1890
# decode input (with the eventual \r from a previous pass)
1891
if self.decoder is None:
1892
output = input
1893
else:
1894
output = self.decoder.decode(input, final=final)
1895
if self.pendingcr and (output or final):
1896
output = "\r" + output
1897
self.pendingcr = False
1898
1899
# retain last \r even when not translating data:
1900
# then readline() is sure to get \r\n in one pass
1901
if output.endswith("\r") and not final:
1902
output = output[:-1]
1903
self.pendingcr = True
1904
1905
# Record which newlines are read
1906
crlf = output.count('\r\n')
1907
cr = output.count('\r') - crlf
1908
lf = output.count('\n') - crlf
1909
self.seennl |= (lf and self._LF) | (cr and self._CR) \
1910
| (crlf and self._CRLF)
1911
1912
if self.translate:
1913
if crlf:
1914
output = output.replace("\r\n", "\n")
1915
if cr:
1916
output = output.replace("\r", "\n")
1917
1918
return output
1919
1920
def getstate(self):
1921
if self.decoder is None:
1922
buf = b""
1923
flag = 0
1924
else:
1925
buf, flag = self.decoder.getstate()
1926
flag <<= 1
1927
if self.pendingcr:
1928
flag |= 1
1929
return buf, flag
1930
1931
def setstate(self, state):
1932
buf, flag = state
1933
self.pendingcr = bool(flag & 1)
1934
if self.decoder is not None:
1935
self.decoder.setstate((buf, flag >> 1))
1936
1937
def reset(self):
1938
self.seennl = 0
1939
self.pendingcr = False
1940
if self.decoder is not None:
1941
self.decoder.reset()
1942
1943
_LF = 1
1944
_CR = 2
1945
_CRLF = 4
1946
1947
@property
1948
def newlines(self):
1949
return (None,
1950
"\n",
1951
"\r",
1952
("\r", "\n"),
1953
"\r\n",
1954
("\n", "\r\n"),
1955
("\r", "\r\n"),
1956
("\r", "\n", "\r\n")
1957
)[self.seennl]
1958
1959
1960
class TextIOWrapper(TextIOBase):
1961
1962
r"""Character and line based layer over a BufferedIOBase object, buffer.
1963
1964
encoding gives the name of the encoding that the stream will be
1965
decoded or encoded with. It defaults to locale.getencoding().
1966
1967
errors determines the strictness of encoding and decoding (see the
1968
codecs.register) and defaults to "strict".
1969
1970
newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1971
handling of line endings. If it is None, universal newlines is
1972
enabled. With this enabled, on input, the lines endings '\n', '\r',
1973
or '\r\n' are translated to '\n' before being returned to the
1974
caller. Conversely, on output, '\n' is translated to the system
1975
default line separator, os.linesep. If newline is any other of its
1976
legal values, that newline becomes the newline when the file is read
1977
and it is returned untranslated. On output, '\n' is converted to the
1978
newline.
1979
1980
If line_buffering is True, a call to flush is implied when a call to
1981
write contains a newline character.
1982
"""
1983
1984
_CHUNK_SIZE = 2048
1985
1986
# Initialize _buffer as soon as possible since it's used by __del__()
1987
# which calls close()
1988
_buffer = None
1989
1990
# The write_through argument has no effect here since this
1991
# implementation always writes through. The argument is present only
1992
# so that the signature can match the signature of the C version.
1993
def __init__(self, buffer, encoding=None, errors=None, newline=None,
1994
line_buffering=False, write_through=False):
1995
self._check_newline(newline)
1996
encoding = text_encoding(encoding)
1997
1998
if encoding == "locale":
1999
encoding = self._get_locale_encoding()
2000
2001
if not isinstance(encoding, str):
2002
raise ValueError("invalid encoding: %r" % encoding)
2003
2004
if not codecs.lookup(encoding)._is_text_encoding:
2005
msg = ("%r is not a text encoding; "
2006
"use codecs.open() to handle arbitrary codecs")
2007
raise LookupError(msg % encoding)
2008
2009
if errors is None:
2010
errors = "strict"
2011
else:
2012
if not isinstance(errors, str):
2013
raise ValueError("invalid errors: %r" % errors)
2014
if _CHECK_ERRORS:
2015
codecs.lookup_error(errors)
2016
2017
self._buffer = buffer
2018
self._decoded_chars = '' # buffer for text returned from decoder
2019
self._decoded_chars_used = 0 # offset into _decoded_chars for read()
2020
self._snapshot = None # info for reconstructing decoder state
2021
self._seekable = self._telling = self.buffer.seekable()
2022
self._has_read1 = hasattr(self.buffer, 'read1')
2023
self._configure(encoding, errors, newline,
2024
line_buffering, write_through)
2025
2026
def _check_newline(self, newline):
2027
if newline is not None and not isinstance(newline, str):
2028
raise TypeError("illegal newline type: %r" % (type(newline),))
2029
if newline not in (None, "", "\n", "\r", "\r\n"):
2030
raise ValueError("illegal newline value: %r" % (newline,))
2031
2032
def _configure(self, encoding=None, errors=None, newline=None,
2033
line_buffering=False, write_through=False):
2034
self._encoding = encoding
2035
self._errors = errors
2036
self._encoder = None
2037
self._decoder = None
2038
self._b2cratio = 0.0
2039
2040
self._readuniversal = not newline
2041
self._readtranslate = newline is None
2042
self._readnl = newline
2043
self._writetranslate = newline != ''
2044
self._writenl = newline or os.linesep
2045
2046
self._line_buffering = line_buffering
2047
self._write_through = write_through
2048
2049
# don't write a BOM in the middle of a file
2050
if self._seekable and self.writable():
2051
position = self.buffer.tell()
2052
if position != 0:
2053
try:
2054
self._get_encoder().setstate(0)
2055
except LookupError:
2056
# Sometimes the encoder doesn't exist
2057
pass
2058
2059
# self._snapshot is either None, or a tuple (dec_flags, next_input)
2060
# where dec_flags is the second (integer) item of the decoder state
2061
# and next_input is the chunk of input bytes that comes next after the
2062
# snapshot point. We use this to reconstruct decoder states in tell().
2063
2064
# Naming convention:
2065
# - "bytes_..." for integer variables that count input bytes
2066
# - "chars_..." for integer variables that count decoded characters
2067
2068
def __repr__(self):
2069
result = "<{}.{}".format(self.__class__.__module__,
2070
self.__class__.__qualname__)
2071
try:
2072
name = self.name
2073
except AttributeError:
2074
pass
2075
else:
2076
result += " name={0!r}".format(name)
2077
try:
2078
mode = self.mode
2079
except AttributeError:
2080
pass
2081
else:
2082
result += " mode={0!r}".format(mode)
2083
return result + " encoding={0!r}>".format(self.encoding)
2084
2085
@property
2086
def encoding(self):
2087
return self._encoding
2088
2089
@property
2090
def errors(self):
2091
return self._errors
2092
2093
@property
2094
def line_buffering(self):
2095
return self._line_buffering
2096
2097
@property
2098
def write_through(self):
2099
return self._write_through
2100
2101
@property
2102
def buffer(self):
2103
return self._buffer
2104
2105
def reconfigure(self, *,
2106
encoding=None, errors=None, newline=Ellipsis,
2107
line_buffering=None, write_through=None):
2108
"""Reconfigure the text stream with new parameters.
2109
2110
This also flushes the stream.
2111
"""
2112
if (self._decoder is not None
2113
and (encoding is not None or errors is not None
2114
or newline is not Ellipsis)):
2115
raise UnsupportedOperation(
2116
"It is not possible to set the encoding or newline of stream "
2117
"after the first read")
2118
2119
if errors is None:
2120
if encoding is None:
2121
errors = self._errors
2122
else:
2123
errors = 'strict'
2124
elif not isinstance(errors, str):
2125
raise TypeError("invalid errors: %r" % errors)
2126
2127
if encoding is None:
2128
encoding = self._encoding
2129
else:
2130
if not isinstance(encoding, str):
2131
raise TypeError("invalid encoding: %r" % encoding)
2132
if encoding == "locale":
2133
encoding = self._get_locale_encoding()
2134
2135
if newline is Ellipsis:
2136
newline = self._readnl
2137
self._check_newline(newline)
2138
2139
if line_buffering is None:
2140
line_buffering = self.line_buffering
2141
if write_through is None:
2142
write_through = self.write_through
2143
2144
self.flush()
2145
self._configure(encoding, errors, newline,
2146
line_buffering, write_through)
2147
2148
def seekable(self):
2149
if self.closed:
2150
raise ValueError("I/O operation on closed file.")
2151
return self._seekable
2152
2153
def readable(self):
2154
return self.buffer.readable()
2155
2156
def writable(self):
2157
return self.buffer.writable()
2158
2159
def flush(self):
2160
self.buffer.flush()
2161
self._telling = self._seekable
2162
2163
def close(self):
2164
if self.buffer is not None and not self.closed:
2165
try:
2166
self.flush()
2167
finally:
2168
self.buffer.close()
2169
2170
@property
2171
def closed(self):
2172
return self.buffer.closed
2173
2174
@property
2175
def name(self):
2176
return self.buffer.name
2177
2178
def fileno(self):
2179
return self.buffer.fileno()
2180
2181
def isatty(self):
2182
return self.buffer.isatty()
2183
2184
def write(self, s):
2185
'Write data, where s is a str'
2186
if self.closed:
2187
raise ValueError("write to closed file")
2188
if not isinstance(s, str):
2189
raise TypeError("can't write %s to text stream" %
2190
s.__class__.__name__)
2191
length = len(s)
2192
haslf = (self._writetranslate or self._line_buffering) and "\n" in s
2193
if haslf and self._writetranslate and self._writenl != "\n":
2194
s = s.replace("\n", self._writenl)
2195
encoder = self._encoder or self._get_encoder()
2196
# XXX What if we were just reading?
2197
b = encoder.encode(s)
2198
self.buffer.write(b)
2199
if self._line_buffering and (haslf or "\r" in s):
2200
self.flush()
2201
self._set_decoded_chars('')
2202
self._snapshot = None
2203
if self._decoder:
2204
self._decoder.reset()
2205
return length
2206
2207
def _get_encoder(self):
2208
make_encoder = codecs.getincrementalencoder(self._encoding)
2209
self._encoder = make_encoder(self._errors)
2210
return self._encoder
2211
2212
def _get_decoder(self):
2213
make_decoder = codecs.getincrementaldecoder(self._encoding)
2214
decoder = make_decoder(self._errors)
2215
if self._readuniversal:
2216
decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
2217
self._decoder = decoder
2218
return decoder
2219
2220
# The following three methods implement an ADT for _decoded_chars.
2221
# Text returned from the decoder is buffered here until the client
2222
# requests it by calling our read() or readline() method.
2223
def _set_decoded_chars(self, chars):
2224
"""Set the _decoded_chars buffer."""
2225
self._decoded_chars = chars
2226
self._decoded_chars_used = 0
2227
2228
def _get_decoded_chars(self, n=None):
2229
"""Advance into the _decoded_chars buffer."""
2230
offset = self._decoded_chars_used
2231
if n is None:
2232
chars = self._decoded_chars[offset:]
2233
else:
2234
chars = self._decoded_chars[offset:offset + n]
2235
self._decoded_chars_used += len(chars)
2236
return chars
2237
2238
def _get_locale_encoding(self):
2239
try:
2240
import locale
2241
except ImportError:
2242
# Importing locale may fail if Python is being built
2243
return "utf-8"
2244
else:
2245
return locale.getencoding()
2246
2247
def _rewind_decoded_chars(self, n):
2248
"""Rewind the _decoded_chars buffer."""
2249
if self._decoded_chars_used < n:
2250
raise AssertionError("rewind decoded_chars out of bounds")
2251
self._decoded_chars_used -= n
2252
2253
def _read_chunk(self):
2254
"""
2255
Read and decode the next chunk of data from the BufferedReader.
2256
"""
2257
2258
# The return value is True unless EOF was reached. The decoded
2259
# string is placed in self._decoded_chars (replacing its previous
2260
# value). The entire input chunk is sent to the decoder, though
2261
# some of it may remain buffered in the decoder, yet to be
2262
# converted.
2263
2264
if self._decoder is None:
2265
raise ValueError("no decoder")
2266
2267
if self._telling:
2268
# To prepare for tell(), we need to snapshot a point in the
2269
# file where the decoder's input buffer is empty.
2270
2271
dec_buffer, dec_flags = self._decoder.getstate()
2272
# Given this, we know there was a valid snapshot point
2273
# len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
2274
2275
# Read a chunk, decode it, and put the result in self._decoded_chars.
2276
if self._has_read1:
2277
input_chunk = self.buffer.read1(self._CHUNK_SIZE)
2278
else:
2279
input_chunk = self.buffer.read(self._CHUNK_SIZE)
2280
eof = not input_chunk
2281
decoded_chars = self._decoder.decode(input_chunk, eof)
2282
self._set_decoded_chars(decoded_chars)
2283
if decoded_chars:
2284
self._b2cratio = len(input_chunk) / len(self._decoded_chars)
2285
else:
2286
self._b2cratio = 0.0
2287
2288
if self._telling:
2289
# At the snapshot point, len(dec_buffer) bytes before the read,
2290
# the next input to be decoded is dec_buffer + input_chunk.
2291
self._snapshot = (dec_flags, dec_buffer + input_chunk)
2292
2293
return not eof
2294
2295
def _pack_cookie(self, position, dec_flags=0,
2296
bytes_to_feed=0, need_eof=False, chars_to_skip=0):
2297
# The meaning of a tell() cookie is: seek to position, set the
2298
# decoder flags to dec_flags, read bytes_to_feed bytes, feed them
2299
# into the decoder with need_eof as the EOF flag, then skip
2300
# chars_to_skip characters of the decoded result. For most simple
2301
# decoders, tell() will often just give a byte offset in the file.
2302
return (position | (dec_flags<<64) | (bytes_to_feed<<128) |
2303
(chars_to_skip<<192) | bool(need_eof)<<256)
2304
2305
def _unpack_cookie(self, bigint):
2306
rest, position = divmod(bigint, 1<<64)
2307
rest, dec_flags = divmod(rest, 1<<64)
2308
rest, bytes_to_feed = divmod(rest, 1<<64)
2309
need_eof, chars_to_skip = divmod(rest, 1<<64)
2310
return position, dec_flags, bytes_to_feed, bool(need_eof), chars_to_skip
2311
2312
def tell(self):
2313
if not self._seekable:
2314
raise UnsupportedOperation("underlying stream is not seekable")
2315
if not self._telling:
2316
raise OSError("telling position disabled by next() call")
2317
self.flush()
2318
position = self.buffer.tell()
2319
decoder = self._decoder
2320
if decoder is None or self._snapshot is None:
2321
if self._decoded_chars:
2322
# This should never happen.
2323
raise AssertionError("pending decoded text")
2324
return position
2325
2326
# Skip backward to the snapshot point (see _read_chunk).
2327
dec_flags, next_input = self._snapshot
2328
position -= len(next_input)
2329
2330
# How many decoded characters have been used up since the snapshot?
2331
chars_to_skip = self._decoded_chars_used
2332
if chars_to_skip == 0:
2333
# We haven't moved from the snapshot point.
2334
return self._pack_cookie(position, dec_flags)
2335
2336
# Starting from the snapshot position, we will walk the decoder
2337
# forward until it gives us enough decoded characters.
2338
saved_state = decoder.getstate()
2339
try:
2340
# Fast search for an acceptable start point, close to our
2341
# current pos.
2342
# Rationale: calling decoder.decode() has a large overhead
2343
# regardless of chunk size; we want the number of such calls to
2344
# be O(1) in most situations (common decoders, sensible input).
2345
# Actually, it will be exactly 1 for fixed-size codecs (all
2346
# 8-bit codecs, also UTF-16 and UTF-32).
2347
skip_bytes = int(self._b2cratio * chars_to_skip)
2348
skip_back = 1
2349
assert skip_bytes <= len(next_input)
2350
while skip_bytes > 0:
2351
decoder.setstate((b'', dec_flags))
2352
# Decode up to temptative start point
2353
n = len(decoder.decode(next_input[:skip_bytes]))
2354
if n <= chars_to_skip:
2355
b, d = decoder.getstate()
2356
if not b:
2357
# Before pos and no bytes buffered in decoder => OK
2358
dec_flags = d
2359
chars_to_skip -= n
2360
break
2361
# Skip back by buffered amount and reset heuristic
2362
skip_bytes -= len(b)
2363
skip_back = 1
2364
else:
2365
# We're too far ahead, skip back a bit
2366
skip_bytes -= skip_back
2367
skip_back = skip_back * 2
2368
else:
2369
skip_bytes = 0
2370
decoder.setstate((b'', dec_flags))
2371
2372
# Note our initial start point.
2373
start_pos = position + skip_bytes
2374
start_flags = dec_flags
2375
if chars_to_skip == 0:
2376
# We haven't moved from the start point.
2377
return self._pack_cookie(start_pos, start_flags)
2378
2379
# Feed the decoder one byte at a time. As we go, note the
2380
# nearest "safe start point" before the current location
2381
# (a point where the decoder has nothing buffered, so seek()
2382
# can safely start from there and advance to this location).
2383
bytes_fed = 0
2384
need_eof = False
2385
# Chars decoded since `start_pos`
2386
chars_decoded = 0
2387
for i in range(skip_bytes, len(next_input)):
2388
bytes_fed += 1
2389
chars_decoded += len(decoder.decode(next_input[i:i+1]))
2390
dec_buffer, dec_flags = decoder.getstate()
2391
if not dec_buffer and chars_decoded <= chars_to_skip:
2392
# Decoder buffer is empty, so this is a safe start point.
2393
start_pos += bytes_fed
2394
chars_to_skip -= chars_decoded
2395
start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
2396
if chars_decoded >= chars_to_skip:
2397
break
2398
else:
2399
# We didn't get enough decoded data; signal EOF to get more.
2400
chars_decoded += len(decoder.decode(b'', final=True))
2401
need_eof = True
2402
if chars_decoded < chars_to_skip:
2403
raise OSError("can't reconstruct logical file position")
2404
2405
# The returned cookie corresponds to the last safe start point.
2406
return self._pack_cookie(
2407
start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
2408
finally:
2409
decoder.setstate(saved_state)
2410
2411
def truncate(self, pos=None):
2412
self.flush()
2413
if pos is None:
2414
pos = self.tell()
2415
return self.buffer.truncate(pos)
2416
2417
def detach(self):
2418
if self.buffer is None:
2419
raise ValueError("buffer is already detached")
2420
self.flush()
2421
buffer = self._buffer
2422
self._buffer = None
2423
return buffer
2424
2425
def seek(self, cookie, whence=0):
2426
def _reset_encoder(position):
2427
"""Reset the encoder (merely useful for proper BOM handling)"""
2428
try:
2429
encoder = self._encoder or self._get_encoder()
2430
except LookupError:
2431
# Sometimes the encoder doesn't exist
2432
pass
2433
else:
2434
if position != 0:
2435
encoder.setstate(0)
2436
else:
2437
encoder.reset()
2438
2439
if self.closed:
2440
raise ValueError("tell on closed file")
2441
if not self._seekable:
2442
raise UnsupportedOperation("underlying stream is not seekable")
2443
if whence == SEEK_CUR:
2444
if cookie != 0:
2445
raise UnsupportedOperation("can't do nonzero cur-relative seeks")
2446
# Seeking to the current position should attempt to
2447
# sync the underlying buffer with the current position.
2448
whence = 0
2449
cookie = self.tell()
2450
elif whence == SEEK_END:
2451
if cookie != 0:
2452
raise UnsupportedOperation("can't do nonzero end-relative seeks")
2453
self.flush()
2454
position = self.buffer.seek(0, whence)
2455
self._set_decoded_chars('')
2456
self._snapshot = None
2457
if self._decoder:
2458
self._decoder.reset()
2459
_reset_encoder(position)
2460
return position
2461
if whence != 0:
2462
raise ValueError("unsupported whence (%r)" % (whence,))
2463
if cookie < 0:
2464
raise ValueError("negative seek position %r" % (cookie,))
2465
self.flush()
2466
2467
# The strategy of seek() is to go back to the safe start point
2468
# and replay the effect of read(chars_to_skip) from there.
2469
start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
2470
self._unpack_cookie(cookie)
2471
2472
# Seek back to the safe start point.
2473
self.buffer.seek(start_pos)
2474
self._set_decoded_chars('')
2475
self._snapshot = None
2476
2477
# Restore the decoder to its state from the safe start point.
2478
if cookie == 0 and self._decoder:
2479
self._decoder.reset()
2480
elif self._decoder or dec_flags or chars_to_skip:
2481
self._decoder = self._decoder or self._get_decoder()
2482
self._decoder.setstate((b'', dec_flags))
2483
self._snapshot = (dec_flags, b'')
2484
2485
if chars_to_skip:
2486
# Just like _read_chunk, feed the decoder and save a snapshot.
2487
input_chunk = self.buffer.read(bytes_to_feed)
2488
self._set_decoded_chars(
2489
self._decoder.decode(input_chunk, need_eof))
2490
self._snapshot = (dec_flags, input_chunk)
2491
2492
# Skip chars_to_skip of the decoded characters.
2493
if len(self._decoded_chars) < chars_to_skip:
2494
raise OSError("can't restore logical file position")
2495
self._decoded_chars_used = chars_to_skip
2496
2497
_reset_encoder(cookie)
2498
return cookie
2499
2500
def read(self, size=None):
2501
self._checkReadable()
2502
if size is None:
2503
size = -1
2504
else:
2505
try:
2506
size_index = size.__index__
2507
except AttributeError:
2508
raise TypeError(f"{size!r} is not an integer")
2509
else:
2510
size = size_index()
2511
decoder = self._decoder or self._get_decoder()
2512
if size < 0:
2513
# Read everything.
2514
result = (self._get_decoded_chars() +
2515
decoder.decode(self.buffer.read(), final=True))
2516
self._set_decoded_chars('')
2517
self._snapshot = None
2518
return result
2519
else:
2520
# Keep reading chunks until we have size characters to return.
2521
eof = False
2522
result = self._get_decoded_chars(size)
2523
while len(result) < size and not eof:
2524
eof = not self._read_chunk()
2525
result += self._get_decoded_chars(size - len(result))
2526
return result
2527
2528
def __next__(self):
2529
self._telling = False
2530
line = self.readline()
2531
if not line:
2532
self._snapshot = None
2533
self._telling = self._seekable
2534
raise StopIteration
2535
return line
2536
2537
def readline(self, size=None):
2538
if self.closed:
2539
raise ValueError("read from closed file")
2540
if size is None:
2541
size = -1
2542
else:
2543
try:
2544
size_index = size.__index__
2545
except AttributeError:
2546
raise TypeError(f"{size!r} is not an integer")
2547
else:
2548
size = size_index()
2549
2550
# Grab all the decoded text (we will rewind any extra bits later).
2551
line = self._get_decoded_chars()
2552
2553
start = 0
2554
# Make the decoder if it doesn't already exist.
2555
if not self._decoder:
2556
self._get_decoder()
2557
2558
pos = endpos = None
2559
while True:
2560
if self._readtranslate:
2561
# Newlines are already translated, only search for \n
2562
pos = line.find('\n', start)
2563
if pos >= 0:
2564
endpos = pos + 1
2565
break
2566
else:
2567
start = len(line)
2568
2569
elif self._readuniversal:
2570
# Universal newline search. Find any of \r, \r\n, \n
2571
# The decoder ensures that \r\n are not split in two pieces
2572
2573
# In C we'd look for these in parallel of course.
2574
nlpos = line.find("\n", start)
2575
crpos = line.find("\r", start)
2576
if crpos == -1:
2577
if nlpos == -1:
2578
# Nothing found
2579
start = len(line)
2580
else:
2581
# Found \n
2582
endpos = nlpos + 1
2583
break
2584
elif nlpos == -1:
2585
# Found lone \r
2586
endpos = crpos + 1
2587
break
2588
elif nlpos < crpos:
2589
# Found \n
2590
endpos = nlpos + 1
2591
break
2592
elif nlpos == crpos + 1:
2593
# Found \r\n
2594
endpos = crpos + 2
2595
break
2596
else:
2597
# Found \r
2598
endpos = crpos + 1
2599
break
2600
else:
2601
# non-universal
2602
pos = line.find(self._readnl)
2603
if pos >= 0:
2604
endpos = pos + len(self._readnl)
2605
break
2606
2607
if size >= 0 and len(line) >= size:
2608
endpos = size # reached length size
2609
break
2610
2611
# No line ending seen yet - get more data'
2612
while self._read_chunk():
2613
if self._decoded_chars:
2614
break
2615
if self._decoded_chars:
2616
line += self._get_decoded_chars()
2617
else:
2618
# end of file
2619
self._set_decoded_chars('')
2620
self._snapshot = None
2621
return line
2622
2623
if size >= 0 and endpos > size:
2624
endpos = size # don't exceed size
2625
2626
# Rewind _decoded_chars to just after the line ending we found.
2627
self._rewind_decoded_chars(len(line) - endpos)
2628
return line[:endpos]
2629
2630
@property
2631
def newlines(self):
2632
return self._decoder.newlines if self._decoder else None
2633
2634
2635
class StringIO(TextIOWrapper):
2636
"""Text I/O implementation using an in-memory buffer.
2637
2638
The initial_value argument sets the value of object. The newline
2639
argument is like the one of TextIOWrapper's constructor.
2640
"""
2641
2642
def __init__(self, initial_value="", newline="\n"):
2643
super(StringIO, self).__init__(BytesIO(),
2644
encoding="utf-8",
2645
errors="surrogatepass",
2646
newline=newline)
2647
# Issue #5645: make universal newlines semantics the same as in the
2648
# C version, even under Windows.
2649
if newline is None:
2650
self._writetranslate = False
2651
if initial_value is not None:
2652
if not isinstance(initial_value, str):
2653
raise TypeError("initial_value must be str or None, not {0}"
2654
.format(type(initial_value).__name__))
2655
self.write(initial_value)
2656
self.seek(0)
2657
2658
def getvalue(self):
2659
self.flush()
2660
decoder = self._decoder or self._get_decoder()
2661
old_state = decoder.getstate()
2662
decoder.reset()
2663
try:
2664
return decoder.decode(self.buffer.getvalue(), final=True)
2665
finally:
2666
decoder.setstate(old_state)
2667
2668
def __repr__(self):
2669
# TextIOWrapper tells the encoding in its repr. In StringIO,
2670
# that's an implementation detail.
2671
return object.__repr__(self)
2672
2673
@property
2674
def errors(self):
2675
return None
2676
2677
@property
2678
def encoding(self):
2679
return None
2680
2681
def detach(self):
2682
# This doesn't make sense on StringIO.
2683
self._unsupported("detach")
2684
2685