Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Modules/_io/textio.c
12 views
1
/*
2
An implementation of Text I/O as defined by PEP 3116 - "New I/O"
3
4
Classes defined here: TextIOBase, IncrementalNewlineDecoder, TextIOWrapper.
5
6
Written by Amaury Forgeot d'Arc and Antoine Pitrou
7
*/
8
9
#include "Python.h"
10
#include "pycore_call.h" // _PyObject_CallMethod()
11
#include "pycore_codecs.h" // _PyCodecInfo_GetIncrementalDecoder()
12
#include "pycore_interp.h" // PyInterpreterState.fs_codec
13
#include "pycore_long.h" // _PyLong_GetZero()
14
#include "pycore_fileutils.h" // _Py_GetLocaleEncoding()
15
#include "pycore_object.h" // _PyObject_GC_UNTRACK()
16
#include "pycore_pystate.h" // _PyInterpreterState_GET()
17
#include "structmember.h" // PyMemberDef
18
#include "_iomodule.h"
19
20
/*[clinic input]
21
module _io
22
class _io.IncrementalNewlineDecoder "nldecoder_object *" "clinic_state()->PyIncrementalNewlineDecoder_Type"
23
class _io.TextIOWrapper "textio *" "clinic_state()->TextIOWrapper_Type"
24
class _io._TextIOBase "PyObject *" "&PyTextIOBase_Type"
25
[clinic start generated code]*/
26
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=8b7f24fa13bfdd7f]*/
27
28
typedef struct nldecoder_object nldecoder_object;
29
typedef struct textio textio;
30
31
#define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
32
#include "clinic/textio.c.h"
33
#undef clinic_state
34
35
/* TextIOBase */
36
37
PyDoc_STRVAR(textiobase_doc,
38
"Base class for text I/O.\n"
39
"\n"
40
"This class provides a character and line based interface to stream\n"
41
"I/O. There is no readinto method because Python's character strings\n"
42
"are immutable.\n"
43
);
44
45
static PyObject *
46
_unsupported(_PyIO_State *state, const char *message)
47
{
48
PyErr_SetString(state->unsupported_operation, message);
49
return NULL;
50
}
51
52
/*[clinic input]
53
_io._TextIOBase.detach
54
cls: defining_class
55
/
56
57
Separate the underlying buffer from the TextIOBase and return it.
58
59
After the underlying buffer has been detached, the TextIO is in an unusable state.
60
[clinic start generated code]*/
61
62
static PyObject *
63
_io__TextIOBase_detach_impl(PyObject *self, PyTypeObject *cls)
64
/*[clinic end generated code: output=50915f40c609eaa4 input=987ca3640d0a3776]*/
65
{
66
_PyIO_State *state = get_io_state_by_cls(cls);
67
return _unsupported(state, "detach");
68
}
69
70
/*[clinic input]
71
_io._TextIOBase.read
72
cls: defining_class
73
size: int(unused=True) = -1
74
/
75
76
Read at most size characters from stream.
77
78
Read from underlying buffer until we have size characters or we hit EOF.
79
If size is negative or omitted, read until EOF.
80
[clinic start generated code]*/
81
82
static PyObject *
83
_io__TextIOBase_read_impl(PyObject *self, PyTypeObject *cls,
84
int Py_UNUSED(size))
85
/*[clinic end generated code: output=51a5178a309ce647 input=f5e37720f9fc563f]*/
86
{
87
_PyIO_State *state = get_io_state_by_cls(cls);
88
return _unsupported(state, "read");
89
}
90
91
/*[clinic input]
92
_io._TextIOBase.readline
93
cls: defining_class
94
size: int(unused=True) = -1
95
/
96
97
Read until newline or EOF.
98
99
Return an empty string if EOF is hit immediately.
100
If size is specified, at most size characters will be read.
101
[clinic start generated code]*/
102
103
static PyObject *
104
_io__TextIOBase_readline_impl(PyObject *self, PyTypeObject *cls,
105
int Py_UNUSED(size))
106
/*[clinic end generated code: output=3f47d7966d6d074e input=42eafec94107fa27]*/
107
{
108
_PyIO_State *state = get_io_state_by_cls(cls);
109
return _unsupported(state, "readline");
110
}
111
112
/*[clinic input]
113
_io._TextIOBase.write
114
cls: defining_class
115
s: str(unused=True)
116
/
117
118
Write string s to stream.
119
120
Return the number of characters written
121
(which is always equal to the length of the string).
122
[clinic start generated code]*/
123
124
static PyObject *
125
_io__TextIOBase_write_impl(PyObject *self, PyTypeObject *cls,
126
const char *Py_UNUSED(s))
127
/*[clinic end generated code: output=18b28231460275de input=e9cabaa5f6732b07]*/
128
{
129
_PyIO_State *state = get_io_state_by_cls(cls);
130
return _unsupported(state, "write");
131
}
132
133
PyDoc_STRVAR(textiobase_encoding_doc,
134
"Encoding of the text stream.\n"
135
"\n"
136
"Subclasses should override.\n"
137
);
138
139
static PyObject *
140
textiobase_encoding_get(PyObject *self, void *context)
141
{
142
Py_RETURN_NONE;
143
}
144
145
PyDoc_STRVAR(textiobase_newlines_doc,
146
"Line endings translated so far.\n"
147
"\n"
148
"Only line endings translated during reading are considered.\n"
149
"\n"
150
"Subclasses should override.\n"
151
);
152
153
static PyObject *
154
textiobase_newlines_get(PyObject *self, void *context)
155
{
156
Py_RETURN_NONE;
157
}
158
159
PyDoc_STRVAR(textiobase_errors_doc,
160
"The error setting of the decoder or encoder.\n"
161
"\n"
162
"Subclasses should override.\n"
163
);
164
165
static PyObject *
166
textiobase_errors_get(PyObject *self, void *context)
167
{
168
Py_RETURN_NONE;
169
}
170
171
172
static PyMethodDef textiobase_methods[] = {
173
_IO__TEXTIOBASE_DETACH_METHODDEF
174
_IO__TEXTIOBASE_READ_METHODDEF
175
_IO__TEXTIOBASE_READLINE_METHODDEF
176
_IO__TEXTIOBASE_WRITE_METHODDEF
177
{NULL, NULL}
178
};
179
180
static PyGetSetDef textiobase_getset[] = {
181
{"encoding", (getter)textiobase_encoding_get, NULL, textiobase_encoding_doc},
182
{"newlines", (getter)textiobase_newlines_get, NULL, textiobase_newlines_doc},
183
{"errors", (getter)textiobase_errors_get, NULL, textiobase_errors_doc},
184
{NULL}
185
};
186
187
static PyType_Slot textiobase_slots[] = {
188
{Py_tp_doc, (void *)textiobase_doc},
189
{Py_tp_methods, textiobase_methods},
190
{Py_tp_getset, textiobase_getset},
191
{0, NULL},
192
};
193
194
/* Do not set Py_TPFLAGS_HAVE_GC so that tp_traverse and tp_clear are inherited */
195
PyType_Spec textiobase_spec = {
196
.name = "_io._TextIOBase",
197
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
198
Py_TPFLAGS_IMMUTABLETYPE),
199
.slots = textiobase_slots,
200
};
201
202
/* IncrementalNewlineDecoder */
203
204
struct nldecoder_object {
205
PyObject_HEAD
206
PyObject *decoder;
207
PyObject *errors;
208
unsigned int pendingcr: 1;
209
unsigned int translate: 1;
210
unsigned int seennl: 3;
211
};
212
213
/*[clinic input]
214
_io.IncrementalNewlineDecoder.__init__
215
decoder: object
216
translate: bool
217
errors: object(c_default="NULL") = "strict"
218
219
Codec used when reading a file in universal newlines mode.
220
221
It wraps another incremental decoder, translating \r\n and \r into \n.
222
It also records the types of newlines encountered. When used with
223
translate=False, it ensures that the newline sequence is returned in
224
one piece. When used with decoder=None, it expects unicode strings as
225
decode input and translates newlines without first invoking an external
226
decoder.
227
[clinic start generated code]*/
228
229
static int
230
_io_IncrementalNewlineDecoder___init___impl(nldecoder_object *self,
231
PyObject *decoder, int translate,
232
PyObject *errors)
233
/*[clinic end generated code: output=fbd04d443e764ec2 input=ed547aa257616b0e]*/
234
{
235
236
if (errors == NULL) {
237
errors = Py_NewRef(&_Py_ID(strict));
238
}
239
else {
240
errors = Py_NewRef(errors);
241
}
242
243
Py_XSETREF(self->errors, errors);
244
Py_XSETREF(self->decoder, Py_NewRef(decoder));
245
self->translate = translate ? 1 : 0;
246
self->seennl = 0;
247
self->pendingcr = 0;
248
249
return 0;
250
}
251
252
static int
253
incrementalnewlinedecoder_traverse(nldecoder_object *self, visitproc visit,
254
void *arg)
255
{
256
Py_VISIT(Py_TYPE(self));
257
Py_VISIT(self->decoder);
258
Py_VISIT(self->errors);
259
return 0;
260
}
261
262
static int
263
incrementalnewlinedecoder_clear(nldecoder_object *self)
264
{
265
Py_CLEAR(self->decoder);
266
Py_CLEAR(self->errors);
267
return 0;
268
}
269
270
static void
271
incrementalnewlinedecoder_dealloc(nldecoder_object *self)
272
{
273
PyTypeObject *tp = Py_TYPE(self);
274
_PyObject_GC_UNTRACK(self);
275
(void)incrementalnewlinedecoder_clear(self);
276
tp->tp_free((PyObject *)self);
277
Py_DECREF(tp);
278
}
279
280
static int
281
check_decoded(PyObject *decoded)
282
{
283
if (decoded == NULL)
284
return -1;
285
if (!PyUnicode_Check(decoded)) {
286
PyErr_Format(PyExc_TypeError,
287
"decoder should return a string result, not '%.200s'",
288
Py_TYPE(decoded)->tp_name);
289
Py_DECREF(decoded);
290
return -1;
291
}
292
return 0;
293
}
294
295
#define CHECK_INITIALIZED_DECODER(self) \
296
if (self->errors == NULL) { \
297
PyErr_SetString(PyExc_ValueError, \
298
"IncrementalNewlineDecoder.__init__() not called"); \
299
return NULL; \
300
}
301
302
#define SEEN_CR 1
303
#define SEEN_LF 2
304
#define SEEN_CRLF 4
305
#define SEEN_ALL (SEEN_CR | SEEN_LF | SEEN_CRLF)
306
307
PyObject *
308
_PyIncrementalNewlineDecoder_decode(PyObject *myself,
309
PyObject *input, int final)
310
{
311
PyObject *output;
312
Py_ssize_t output_len;
313
nldecoder_object *self = (nldecoder_object *) myself;
314
315
CHECK_INITIALIZED_DECODER(self);
316
317
/* decode input (with the eventual \r from a previous pass) */
318
if (self->decoder != Py_None) {
319
output = PyObject_CallMethodObjArgs(self->decoder,
320
&_Py_ID(decode), input, final ? Py_True : Py_False, NULL);
321
}
322
else {
323
output = Py_NewRef(input);
324
}
325
326
if (check_decoded(output) < 0)
327
return NULL;
328
329
output_len = PyUnicode_GET_LENGTH(output);
330
if (self->pendingcr && (final || output_len > 0)) {
331
/* Prefix output with CR */
332
int kind;
333
PyObject *modified;
334
char *out;
335
336
modified = PyUnicode_New(output_len + 1,
337
PyUnicode_MAX_CHAR_VALUE(output));
338
if (modified == NULL)
339
goto error;
340
kind = PyUnicode_KIND(modified);
341
out = PyUnicode_DATA(modified);
342
PyUnicode_WRITE(kind, out, 0, '\r');
343
memcpy(out + kind, PyUnicode_DATA(output), kind * output_len);
344
Py_SETREF(output, modified); /* output remains ready */
345
self->pendingcr = 0;
346
output_len++;
347
}
348
349
/* retain last \r even when not translating data:
350
* then readline() is sure to get \r\n in one pass
351
*/
352
if (!final) {
353
if (output_len > 0
354
&& PyUnicode_READ_CHAR(output, output_len - 1) == '\r')
355
{
356
PyObject *modified = PyUnicode_Substring(output, 0, output_len -1);
357
if (modified == NULL)
358
goto error;
359
Py_SETREF(output, modified);
360
self->pendingcr = 1;
361
}
362
}
363
364
/* Record which newlines are read and do newline translation if desired,
365
all in one pass. */
366
{
367
const void *in_str;
368
Py_ssize_t len;
369
int seennl = self->seennl;
370
int only_lf = 0;
371
int kind;
372
373
in_str = PyUnicode_DATA(output);
374
len = PyUnicode_GET_LENGTH(output);
375
kind = PyUnicode_KIND(output);
376
377
if (len == 0)
378
return output;
379
380
/* If, up to now, newlines are consistently \n, do a quick check
381
for the \r *byte* with the libc's optimized memchr.
382
*/
383
if (seennl == SEEN_LF || seennl == 0) {
384
only_lf = (memchr(in_str, '\r', kind * len) == NULL);
385
}
386
387
if (only_lf) {
388
/* If not already seen, quick scan for a possible "\n" character.
389
(there's nothing else to be done, even when in translation mode)
390
*/
391
if (seennl == 0 &&
392
memchr(in_str, '\n', kind * len) != NULL) {
393
if (kind == PyUnicode_1BYTE_KIND)
394
seennl |= SEEN_LF;
395
else {
396
Py_ssize_t i = 0;
397
for (;;) {
398
Py_UCS4 c;
399
/* Fast loop for non-control characters */
400
while (PyUnicode_READ(kind, in_str, i) > '\n')
401
i++;
402
c = PyUnicode_READ(kind, in_str, i++);
403
if (c == '\n') {
404
seennl |= SEEN_LF;
405
break;
406
}
407
if (i >= len)
408
break;
409
}
410
}
411
}
412
/* Finished: we have scanned for newlines, and none of them
413
need translating */
414
}
415
else if (!self->translate) {
416
Py_ssize_t i = 0;
417
/* We have already seen all newline types, no need to scan again */
418
if (seennl == SEEN_ALL)
419
goto endscan;
420
for (;;) {
421
Py_UCS4 c;
422
/* Fast loop for non-control characters */
423
while (PyUnicode_READ(kind, in_str, i) > '\r')
424
i++;
425
c = PyUnicode_READ(kind, in_str, i++);
426
if (c == '\n')
427
seennl |= SEEN_LF;
428
else if (c == '\r') {
429
if (PyUnicode_READ(kind, in_str, i) == '\n') {
430
seennl |= SEEN_CRLF;
431
i++;
432
}
433
else
434
seennl |= SEEN_CR;
435
}
436
if (i >= len)
437
break;
438
if (seennl == SEEN_ALL)
439
break;
440
}
441
endscan:
442
;
443
}
444
else {
445
void *translated;
446
int kind = PyUnicode_KIND(output);
447
const void *in_str = PyUnicode_DATA(output);
448
Py_ssize_t in, out;
449
/* XXX: Previous in-place translation here is disabled as
450
resizing is not possible anymore */
451
/* We could try to optimize this so that we only do a copy
452
when there is something to translate. On the other hand,
453
we already know there is a \r byte, so chances are high
454
that something needs to be done. */
455
translated = PyMem_Malloc(kind * len);
456
if (translated == NULL) {
457
PyErr_NoMemory();
458
goto error;
459
}
460
in = out = 0;
461
for (;;) {
462
Py_UCS4 c;
463
/* Fast loop for non-control characters */
464
while ((c = PyUnicode_READ(kind, in_str, in++)) > '\r')
465
PyUnicode_WRITE(kind, translated, out++, c);
466
if (c == '\n') {
467
PyUnicode_WRITE(kind, translated, out++, c);
468
seennl |= SEEN_LF;
469
continue;
470
}
471
if (c == '\r') {
472
if (PyUnicode_READ(kind, in_str, in) == '\n') {
473
in++;
474
seennl |= SEEN_CRLF;
475
}
476
else
477
seennl |= SEEN_CR;
478
PyUnicode_WRITE(kind, translated, out++, '\n');
479
continue;
480
}
481
if (in > len)
482
break;
483
PyUnicode_WRITE(kind, translated, out++, c);
484
}
485
Py_DECREF(output);
486
output = PyUnicode_FromKindAndData(kind, translated, out);
487
PyMem_Free(translated);
488
if (!output)
489
return NULL;
490
}
491
self->seennl |= seennl;
492
}
493
494
return output;
495
496
error:
497
Py_DECREF(output);
498
return NULL;
499
}
500
501
/*[clinic input]
502
_io.IncrementalNewlineDecoder.decode
503
input: object
504
final: bool = False
505
[clinic start generated code]*/
506
507
static PyObject *
508
_io_IncrementalNewlineDecoder_decode_impl(nldecoder_object *self,
509
PyObject *input, int final)
510
/*[clinic end generated code: output=0d486755bb37a66e input=90e223c70322c5cd]*/
511
{
512
return _PyIncrementalNewlineDecoder_decode((PyObject *) self, input, final);
513
}
514
515
/*[clinic input]
516
_io.IncrementalNewlineDecoder.getstate
517
[clinic start generated code]*/
518
519
static PyObject *
520
_io_IncrementalNewlineDecoder_getstate_impl(nldecoder_object *self)
521
/*[clinic end generated code: output=f0d2c9c136f4e0d0 input=f8ff101825e32e7f]*/
522
{
523
PyObject *buffer;
524
unsigned long long flag;
525
526
CHECK_INITIALIZED_DECODER(self);
527
528
if (self->decoder != Py_None) {
529
PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
530
&_Py_ID(getstate));
531
if (state == NULL)
532
return NULL;
533
if (!PyTuple_Check(state)) {
534
PyErr_SetString(PyExc_TypeError,
535
"illegal decoder state");
536
Py_DECREF(state);
537
return NULL;
538
}
539
if (!PyArg_ParseTuple(state, "OK;illegal decoder state",
540
&buffer, &flag))
541
{
542
Py_DECREF(state);
543
return NULL;
544
}
545
Py_INCREF(buffer);
546
Py_DECREF(state);
547
}
548
else {
549
buffer = PyBytes_FromString("");
550
flag = 0;
551
}
552
flag <<= 1;
553
if (self->pendingcr)
554
flag |= 1;
555
return Py_BuildValue("NK", buffer, flag);
556
}
557
558
/*[clinic input]
559
_io.IncrementalNewlineDecoder.setstate
560
state: object
561
/
562
[clinic start generated code]*/
563
564
static PyObject *
565
_io_IncrementalNewlineDecoder_setstate(nldecoder_object *self,
566
PyObject *state)
567
/*[clinic end generated code: output=c10c622508b576cb input=c53fb505a76dbbe2]*/
568
{
569
PyObject *buffer;
570
unsigned long long flag;
571
572
CHECK_INITIALIZED_DECODER(self);
573
574
if (!PyTuple_Check(state)) {
575
PyErr_SetString(PyExc_TypeError, "state argument must be a tuple");
576
return NULL;
577
}
578
if (!PyArg_ParseTuple(state, "OK;setstate(): illegal state argument",
579
&buffer, &flag))
580
{
581
return NULL;
582
}
583
584
self->pendingcr = (int) (flag & 1);
585
flag >>= 1;
586
587
if (self->decoder != Py_None) {
588
return _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
589
"((OK))", buffer, flag);
590
}
591
else {
592
Py_RETURN_NONE;
593
}
594
}
595
596
/*[clinic input]
597
_io.IncrementalNewlineDecoder.reset
598
[clinic start generated code]*/
599
600
static PyObject *
601
_io_IncrementalNewlineDecoder_reset_impl(nldecoder_object *self)
602
/*[clinic end generated code: output=32fa40c7462aa8ff input=728678ddaea776df]*/
603
{
604
CHECK_INITIALIZED_DECODER(self);
605
606
self->seennl = 0;
607
self->pendingcr = 0;
608
if (self->decoder != Py_None)
609
return PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
610
else
611
Py_RETURN_NONE;
612
}
613
614
static PyObject *
615
incrementalnewlinedecoder_newlines_get(nldecoder_object *self, void *context)
616
{
617
CHECK_INITIALIZED_DECODER(self);
618
619
switch (self->seennl) {
620
case SEEN_CR:
621
return PyUnicode_FromString("\r");
622
case SEEN_LF:
623
return PyUnicode_FromString("\n");
624
case SEEN_CRLF:
625
return PyUnicode_FromString("\r\n");
626
case SEEN_CR | SEEN_LF:
627
return Py_BuildValue("ss", "\r", "\n");
628
case SEEN_CR | SEEN_CRLF:
629
return Py_BuildValue("ss", "\r", "\r\n");
630
case SEEN_LF | SEEN_CRLF:
631
return Py_BuildValue("ss", "\n", "\r\n");
632
case SEEN_CR | SEEN_LF | SEEN_CRLF:
633
return Py_BuildValue("sss", "\r", "\n", "\r\n");
634
default:
635
Py_RETURN_NONE;
636
}
637
638
}
639
640
/* TextIOWrapper */
641
642
typedef PyObject *
643
(*encodefunc_t)(PyObject *, PyObject *);
644
645
struct textio
646
{
647
PyObject_HEAD
648
int ok; /* initialized? */
649
int detached;
650
Py_ssize_t chunk_size;
651
PyObject *buffer;
652
PyObject *encoding;
653
PyObject *encoder;
654
PyObject *decoder;
655
PyObject *readnl;
656
PyObject *errors;
657
const char *writenl; /* ASCII-encoded; NULL stands for \n */
658
char line_buffering;
659
char write_through;
660
char readuniversal;
661
char readtranslate;
662
char writetranslate;
663
char seekable;
664
char has_read1;
665
char telling;
666
char finalizing;
667
/* Specialized encoding func (see below) */
668
encodefunc_t encodefunc;
669
/* Whether or not it's the start of the stream */
670
char encoding_start_of_stream;
671
672
/* Reads and writes are internally buffered in order to speed things up.
673
However, any read will first flush the write buffer if itsn't empty.
674
675
Please also note that text to be written is first encoded before being
676
buffered. This is necessary so that encoding errors are immediately
677
reported to the caller, but it unfortunately means that the
678
IncrementalEncoder (whose encode() method is always written in Python)
679
becomes a bottleneck for small writes.
680
*/
681
PyObject *decoded_chars; /* buffer for text returned from decoder */
682
Py_ssize_t decoded_chars_used; /* offset into _decoded_chars for read() */
683
PyObject *pending_bytes; // data waiting to be written.
684
// ascii unicode, bytes, or list of them.
685
Py_ssize_t pending_bytes_count;
686
687
/* snapshot is either NULL, or a tuple (dec_flags, next_input) where
688
* dec_flags is the second (integer) item of the decoder state and
689
* next_input is the chunk of input bytes that comes next after the
690
* snapshot point. We use this to reconstruct decoder states in tell().
691
*/
692
PyObject *snapshot;
693
/* Bytes-to-characters ratio for the current chunk. Serves as input for
694
the heuristic in tell(). */
695
double b2cratio;
696
697
/* Cache raw object if it's a FileIO object */
698
PyObject *raw;
699
700
PyObject *weakreflist;
701
PyObject *dict;
702
703
_PyIO_State *state;
704
};
705
706
static void
707
textiowrapper_set_decoded_chars(textio *self, PyObject *chars);
708
709
/* A couple of specialized cases in order to bypass the slow incremental
710
encoding methods for the most popular encodings. */
711
712
static PyObject *
713
ascii_encode(textio *self, PyObject *text)
714
{
715
return _PyUnicode_AsASCIIString(text, PyUnicode_AsUTF8(self->errors));
716
}
717
718
static PyObject *
719
utf16be_encode(textio *self, PyObject *text)
720
{
721
return _PyUnicode_EncodeUTF16(text,
722
PyUnicode_AsUTF8(self->errors), 1);
723
}
724
725
static PyObject *
726
utf16le_encode(textio *self, PyObject *text)
727
{
728
return _PyUnicode_EncodeUTF16(text,
729
PyUnicode_AsUTF8(self->errors), -1);
730
}
731
732
static PyObject *
733
utf16_encode(textio *self, PyObject *text)
734
{
735
if (!self->encoding_start_of_stream) {
736
/* Skip the BOM and use native byte ordering */
737
#if PY_BIG_ENDIAN
738
return utf16be_encode(self, text);
739
#else
740
return utf16le_encode(self, text);
741
#endif
742
}
743
return _PyUnicode_EncodeUTF16(text,
744
PyUnicode_AsUTF8(self->errors), 0);
745
}
746
747
static PyObject *
748
utf32be_encode(textio *self, PyObject *text)
749
{
750
return _PyUnicode_EncodeUTF32(text,
751
PyUnicode_AsUTF8(self->errors), 1);
752
}
753
754
static PyObject *
755
utf32le_encode(textio *self, PyObject *text)
756
{
757
return _PyUnicode_EncodeUTF32(text,
758
PyUnicode_AsUTF8(self->errors), -1);
759
}
760
761
static PyObject *
762
utf32_encode(textio *self, PyObject *text)
763
{
764
if (!self->encoding_start_of_stream) {
765
/* Skip the BOM and use native byte ordering */
766
#if PY_BIG_ENDIAN
767
return utf32be_encode(self, text);
768
#else
769
return utf32le_encode(self, text);
770
#endif
771
}
772
return _PyUnicode_EncodeUTF32(text,
773
PyUnicode_AsUTF8(self->errors), 0);
774
}
775
776
static PyObject *
777
utf8_encode(textio *self, PyObject *text)
778
{
779
return _PyUnicode_AsUTF8String(text, PyUnicode_AsUTF8(self->errors));
780
}
781
782
static PyObject *
783
latin1_encode(textio *self, PyObject *text)
784
{
785
return _PyUnicode_AsLatin1String(text, PyUnicode_AsUTF8(self->errors));
786
}
787
788
// Return true when encoding can be skipped when text is ascii.
789
static inline int
790
is_asciicompat_encoding(encodefunc_t f)
791
{
792
return f == (encodefunc_t) ascii_encode
793
|| f == (encodefunc_t) latin1_encode
794
|| f == (encodefunc_t) utf8_encode;
795
}
796
797
/* Map normalized encoding names onto the specialized encoding funcs */
798
799
typedef struct {
800
const char *name;
801
encodefunc_t encodefunc;
802
} encodefuncentry;
803
804
static const encodefuncentry encodefuncs[] = {
805
{"ascii", (encodefunc_t) ascii_encode},
806
{"iso8859-1", (encodefunc_t) latin1_encode},
807
{"utf-8", (encodefunc_t) utf8_encode},
808
{"utf-16-be", (encodefunc_t) utf16be_encode},
809
{"utf-16-le", (encodefunc_t) utf16le_encode},
810
{"utf-16", (encodefunc_t) utf16_encode},
811
{"utf-32-be", (encodefunc_t) utf32be_encode},
812
{"utf-32-le", (encodefunc_t) utf32le_encode},
813
{"utf-32", (encodefunc_t) utf32_encode},
814
{NULL, NULL}
815
};
816
817
static int
818
validate_newline(const char *newline)
819
{
820
if (newline && newline[0] != '\0'
821
&& !(newline[0] == '\n' && newline[1] == '\0')
822
&& !(newline[0] == '\r' && newline[1] == '\0')
823
&& !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
824
PyErr_Format(PyExc_ValueError,
825
"illegal newline value: %s", newline);
826
return -1;
827
}
828
return 0;
829
}
830
831
static int
832
set_newline(textio *self, const char *newline)
833
{
834
PyObject *old = self->readnl;
835
if (newline == NULL) {
836
self->readnl = NULL;
837
}
838
else {
839
self->readnl = PyUnicode_FromString(newline);
840
if (self->readnl == NULL) {
841
self->readnl = old;
842
return -1;
843
}
844
}
845
self->readuniversal = (newline == NULL || newline[0] == '\0');
846
self->readtranslate = (newline == NULL);
847
self->writetranslate = (newline == NULL || newline[0] != '\0');
848
if (!self->readuniversal && self->readnl != NULL) {
849
// validate_newline() accepts only ASCII newlines.
850
assert(PyUnicode_KIND(self->readnl) == PyUnicode_1BYTE_KIND);
851
self->writenl = (const char *)PyUnicode_1BYTE_DATA(self->readnl);
852
if (strcmp(self->writenl, "\n") == 0) {
853
self->writenl = NULL;
854
}
855
}
856
else {
857
#ifdef MS_WINDOWS
858
self->writenl = "\r\n";
859
#else
860
self->writenl = NULL;
861
#endif
862
}
863
Py_XDECREF(old);
864
return 0;
865
}
866
867
static int
868
_textiowrapper_set_decoder(textio *self, PyObject *codec_info,
869
const char *errors)
870
{
871
PyObject *res;
872
int r;
873
874
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
875
if (res == NULL)
876
return -1;
877
878
r = PyObject_IsTrue(res);
879
Py_DECREF(res);
880
if (r == -1)
881
return -1;
882
883
if (r != 1)
884
return 0;
885
886
Py_CLEAR(self->decoder);
887
self->decoder = _PyCodecInfo_GetIncrementalDecoder(codec_info, errors);
888
if (self->decoder == NULL)
889
return -1;
890
891
if (self->readuniversal) {
892
_PyIO_State *state = self->state;
893
PyObject *incrementalDecoder = PyObject_CallFunctionObjArgs(
894
(PyObject *)state->PyIncrementalNewlineDecoder_Type,
895
self->decoder, self->readtranslate ? Py_True : Py_False, NULL);
896
if (incrementalDecoder == NULL)
897
return -1;
898
Py_XSETREF(self->decoder, incrementalDecoder);
899
}
900
901
return 0;
902
}
903
904
static PyObject*
905
_textiowrapper_decode(_PyIO_State *state, PyObject *decoder, PyObject *bytes,
906
int eof)
907
{
908
PyObject *chars;
909
910
if (Py_IS_TYPE(decoder, state->PyIncrementalNewlineDecoder_Type))
911
chars = _PyIncrementalNewlineDecoder_decode(decoder, bytes, eof);
912
else
913
chars = PyObject_CallMethodObjArgs(decoder, &_Py_ID(decode), bytes,
914
eof ? Py_True : Py_False, NULL);
915
916
if (check_decoded(chars) < 0)
917
// check_decoded already decreases refcount
918
return NULL;
919
920
return chars;
921
}
922
923
static int
924
_textiowrapper_set_encoder(textio *self, PyObject *codec_info,
925
const char *errors)
926
{
927
PyObject *res;
928
int r;
929
930
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
931
if (res == NULL)
932
return -1;
933
934
r = PyObject_IsTrue(res);
935
Py_DECREF(res);
936
if (r == -1)
937
return -1;
938
939
if (r != 1)
940
return 0;
941
942
Py_CLEAR(self->encoder);
943
self->encodefunc = NULL;
944
self->encoder = _PyCodecInfo_GetIncrementalEncoder(codec_info, errors);
945
if (self->encoder == NULL)
946
return -1;
947
948
/* Get the normalized named of the codec */
949
if (_PyObject_LookupAttr(codec_info, &_Py_ID(name), &res) < 0) {
950
return -1;
951
}
952
if (res != NULL && PyUnicode_Check(res)) {
953
const encodefuncentry *e = encodefuncs;
954
while (e->name != NULL) {
955
if (_PyUnicode_EqualToASCIIString(res, e->name)) {
956
self->encodefunc = e->encodefunc;
957
break;
958
}
959
e++;
960
}
961
}
962
Py_XDECREF(res);
963
964
return 0;
965
}
966
967
static int
968
_textiowrapper_fix_encoder_state(textio *self)
969
{
970
if (!self->seekable || !self->encoder) {
971
return 0;
972
}
973
974
self->encoding_start_of_stream = 1;
975
976
PyObject *cookieObj = PyObject_CallMethodNoArgs(
977
self->buffer, &_Py_ID(tell));
978
if (cookieObj == NULL) {
979
return -1;
980
}
981
982
int cmp = PyObject_RichCompareBool(cookieObj, _PyLong_GetZero(), Py_EQ);
983
Py_DECREF(cookieObj);
984
if (cmp < 0) {
985
return -1;
986
}
987
988
if (cmp == 0) {
989
self->encoding_start_of_stream = 0;
990
PyObject *res = PyObject_CallMethodOneArg(
991
self->encoder, &_Py_ID(setstate), _PyLong_GetZero());
992
if (res == NULL) {
993
return -1;
994
}
995
Py_DECREF(res);
996
}
997
998
return 0;
999
}
1000
1001
static int
1002
io_check_errors(PyObject *errors)
1003
{
1004
assert(errors != NULL && errors != Py_None);
1005
1006
PyInterpreterState *interp = _PyInterpreterState_GET();
1007
#ifndef Py_DEBUG
1008
/* In release mode, only check in development mode (-X dev) */
1009
if (!_PyInterpreterState_GetConfig(interp)->dev_mode) {
1010
return 0;
1011
}
1012
#else
1013
/* Always check in debug mode */
1014
#endif
1015
1016
/* Avoid calling PyCodec_LookupError() before the codec registry is ready:
1017
before_PyUnicode_InitEncodings() is called. */
1018
if (!interp->unicode.fs_codec.encoding) {
1019
return 0;
1020
}
1021
1022
Py_ssize_t name_length;
1023
const char *name = PyUnicode_AsUTF8AndSize(errors, &name_length);
1024
if (name == NULL) {
1025
return -1;
1026
}
1027
if (strlen(name) != (size_t)name_length) {
1028
PyErr_SetString(PyExc_ValueError, "embedded null character in errors");
1029
return -1;
1030
}
1031
PyObject *handler = PyCodec_LookupError(name);
1032
if (handler != NULL) {
1033
Py_DECREF(handler);
1034
return 0;
1035
}
1036
return -1;
1037
}
1038
1039
1040
1041
/*[clinic input]
1042
_io.TextIOWrapper.__init__
1043
buffer: object
1044
encoding: str(accept={str, NoneType}) = None
1045
errors: object = None
1046
newline: str(accept={str, NoneType}) = None
1047
line_buffering: bool = False
1048
write_through: bool = False
1049
1050
Character and line based layer over a BufferedIOBase object, buffer.
1051
1052
encoding gives the name of the encoding that the stream will be
1053
decoded or encoded with. It defaults to locale.getencoding().
1054
1055
errors determines the strictness of encoding and decoding (see
1056
help(codecs.Codec) or the documentation for codecs.register) and
1057
defaults to "strict".
1058
1059
newline controls how line endings are handled. It can be None, '',
1060
'\n', '\r', and '\r\n'. It works as follows:
1061
1062
* On input, if newline is None, universal newlines mode is
1063
enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
1064
these are translated into '\n' before being returned to the
1065
caller. If it is '', universal newline mode is enabled, but line
1066
endings are returned to the caller untranslated. If it has any of
1067
the other legal values, input lines are only terminated by the given
1068
string, and the line ending is returned to the caller untranslated.
1069
1070
* On output, if newline is None, any '\n' characters written are
1071
translated to the system default line separator, os.linesep. If
1072
newline is '' or '\n', no translation takes place. If newline is any
1073
of the other legal values, any '\n' characters written are translated
1074
to the given string.
1075
1076
If line_buffering is True, a call to flush is implied when a call to
1077
write contains a newline character.
1078
[clinic start generated code]*/
1079
1080
static int
1081
_io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
1082
const char *encoding, PyObject *errors,
1083
const char *newline, int line_buffering,
1084
int write_through)
1085
/*[clinic end generated code: output=72267c0c01032ed2 input=e6cfaaaf6059d4f5]*/
1086
{
1087
PyObject *raw, *codec_info = NULL;
1088
PyObject *res;
1089
int r;
1090
1091
self->ok = 0;
1092
self->detached = 0;
1093
1094
if (encoding == NULL) {
1095
PyInterpreterState *interp = _PyInterpreterState_GET();
1096
if (_PyInterpreterState_GetConfig(interp)->warn_default_encoding) {
1097
if (PyErr_WarnEx(PyExc_EncodingWarning,
1098
"'encoding' argument not specified", 1)) {
1099
return -1;
1100
}
1101
}
1102
}
1103
1104
if (errors == Py_None) {
1105
errors = &_Py_ID(strict);
1106
}
1107
else if (!PyUnicode_Check(errors)) {
1108
// Check 'errors' argument here because Argument Clinic doesn't support
1109
// 'str(accept={str, NoneType})' converter.
1110
PyErr_Format(
1111
PyExc_TypeError,
1112
"TextIOWrapper() argument 'errors' must be str or None, not %.50s",
1113
Py_TYPE(errors)->tp_name);
1114
return -1;
1115
}
1116
else if (io_check_errors(errors)) {
1117
return -1;
1118
}
1119
1120
if (validate_newline(newline) < 0) {
1121
return -1;
1122
}
1123
1124
Py_CLEAR(self->buffer);
1125
Py_CLEAR(self->encoding);
1126
Py_CLEAR(self->encoder);
1127
Py_CLEAR(self->decoder);
1128
Py_CLEAR(self->readnl);
1129
Py_CLEAR(self->decoded_chars);
1130
Py_CLEAR(self->pending_bytes);
1131
Py_CLEAR(self->snapshot);
1132
Py_CLEAR(self->errors);
1133
Py_CLEAR(self->raw);
1134
self->decoded_chars_used = 0;
1135
self->pending_bytes_count = 0;
1136
self->encodefunc = NULL;
1137
self->b2cratio = 0.0;
1138
1139
if (encoding == NULL && _PyRuntime.preconfig.utf8_mode) {
1140
_Py_DECLARE_STR(utf_8, "utf-8");
1141
self->encoding = Py_NewRef(&_Py_STR(utf_8));
1142
}
1143
else if (encoding == NULL || (strcmp(encoding, "locale") == 0)) {
1144
self->encoding = _Py_GetLocaleEncodingObject();
1145
if (self->encoding == NULL) {
1146
goto error;
1147
}
1148
assert(PyUnicode_Check(self->encoding));
1149
}
1150
1151
if (self->encoding != NULL) {
1152
encoding = PyUnicode_AsUTF8(self->encoding);
1153
if (encoding == NULL)
1154
goto error;
1155
}
1156
else if (encoding != NULL) {
1157
self->encoding = PyUnicode_FromString(encoding);
1158
if (self->encoding == NULL)
1159
goto error;
1160
}
1161
else {
1162
PyErr_SetString(PyExc_OSError,
1163
"could not determine default encoding");
1164
goto error;
1165
}
1166
1167
/* Check we have been asked for a real text encoding */
1168
codec_info = _PyCodec_LookupTextEncoding(encoding, "codecs.open()");
1169
if (codec_info == NULL) {
1170
Py_CLEAR(self->encoding);
1171
goto error;
1172
}
1173
1174
/* XXX: Failures beyond this point have the potential to leak elements
1175
* of the partially constructed object (like self->encoding)
1176
*/
1177
1178
self->errors = Py_NewRef(errors);
1179
self->chunk_size = 8192;
1180
self->line_buffering = line_buffering;
1181
self->write_through = write_through;
1182
if (set_newline(self, newline) < 0) {
1183
goto error;
1184
}
1185
1186
self->buffer = Py_NewRef(buffer);
1187
1188
/* Build the decoder object */
1189
_PyIO_State *state = find_io_state_by_def(Py_TYPE(self));
1190
self->state = state;
1191
if (_textiowrapper_set_decoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1192
goto error;
1193
1194
/* Build the encoder object */
1195
if (_textiowrapper_set_encoder(self, codec_info, PyUnicode_AsUTF8(errors)) != 0)
1196
goto error;
1197
1198
/* Finished sorting out the codec details */
1199
Py_CLEAR(codec_info);
1200
1201
if (Py_IS_TYPE(buffer, state->PyBufferedReader_Type) ||
1202
Py_IS_TYPE(buffer, state->PyBufferedWriter_Type) ||
1203
Py_IS_TYPE(buffer, state->PyBufferedRandom_Type))
1204
{
1205
if (_PyObject_LookupAttr(buffer, &_Py_ID(raw), &raw) < 0)
1206
goto error;
1207
/* Cache the raw FileIO object to speed up 'closed' checks */
1208
if (raw != NULL) {
1209
if (Py_IS_TYPE(raw, state->PyFileIO_Type))
1210
self->raw = raw;
1211
else
1212
Py_DECREF(raw);
1213
}
1214
}
1215
1216
res = PyObject_CallMethodNoArgs(buffer, &_Py_ID(seekable));
1217
if (res == NULL)
1218
goto error;
1219
r = PyObject_IsTrue(res);
1220
Py_DECREF(res);
1221
if (r < 0)
1222
goto error;
1223
self->seekable = self->telling = r;
1224
1225
r = _PyObject_LookupAttr(buffer, &_Py_ID(read1), &res);
1226
if (r < 0) {
1227
goto error;
1228
}
1229
Py_XDECREF(res);
1230
self->has_read1 = r;
1231
1232
self->encoding_start_of_stream = 0;
1233
if (_textiowrapper_fix_encoder_state(self) < 0) {
1234
goto error;
1235
}
1236
1237
self->ok = 1;
1238
return 0;
1239
1240
error:
1241
Py_XDECREF(codec_info);
1242
return -1;
1243
}
1244
1245
/* Return *default_value* if ob is None, 0 if ob is false, 1 if ob is true,
1246
* -1 on error.
1247
*/
1248
static int
1249
convert_optional_bool(PyObject *obj, int default_value)
1250
{
1251
long v;
1252
if (obj == Py_None) {
1253
v = default_value;
1254
}
1255
else {
1256
v = PyLong_AsLong(obj);
1257
if (v == -1 && PyErr_Occurred())
1258
return -1;
1259
}
1260
return v != 0;
1261
}
1262
1263
static int
1264
textiowrapper_change_encoding(textio *self, PyObject *encoding,
1265
PyObject *errors, int newline_changed)
1266
{
1267
/* Use existing settings where new settings are not specified */
1268
if (encoding == Py_None && errors == Py_None && !newline_changed) {
1269
return 0; // no change
1270
}
1271
1272
if (encoding == Py_None) {
1273
encoding = self->encoding;
1274
if (errors == Py_None) {
1275
errors = self->errors;
1276
}
1277
Py_INCREF(encoding);
1278
}
1279
else {
1280
if (_PyUnicode_EqualToASCIIString(encoding, "locale")) {
1281
encoding = _Py_GetLocaleEncodingObject();
1282
if (encoding == NULL) {
1283
return -1;
1284
}
1285
} else {
1286
Py_INCREF(encoding);
1287
}
1288
if (errors == Py_None) {
1289
errors = &_Py_ID(strict);
1290
}
1291
}
1292
1293
const char *c_errors = PyUnicode_AsUTF8(errors);
1294
if (c_errors == NULL) {
1295
Py_DECREF(encoding);
1296
return -1;
1297
}
1298
1299
// Create new encoder & decoder
1300
PyObject *codec_info = _PyCodec_LookupTextEncoding(
1301
PyUnicode_AsUTF8(encoding), "codecs.open()");
1302
if (codec_info == NULL) {
1303
Py_DECREF(encoding);
1304
return -1;
1305
}
1306
if (_textiowrapper_set_decoder(self, codec_info, c_errors) != 0 ||
1307
_textiowrapper_set_encoder(self, codec_info, c_errors) != 0) {
1308
Py_DECREF(codec_info);
1309
Py_DECREF(encoding);
1310
return -1;
1311
}
1312
Py_DECREF(codec_info);
1313
1314
Py_SETREF(self->encoding, encoding);
1315
Py_SETREF(self->errors, Py_NewRef(errors));
1316
1317
return _textiowrapper_fix_encoder_state(self);
1318
}
1319
1320
/*[clinic input]
1321
_io.TextIOWrapper.reconfigure
1322
*
1323
encoding: object = None
1324
errors: object = None
1325
newline as newline_obj: object(c_default="NULL") = None
1326
line_buffering as line_buffering_obj: object = None
1327
write_through as write_through_obj: object = None
1328
1329
Reconfigure the text stream with new parameters.
1330
1331
This also does an implicit stream flush.
1332
1333
[clinic start generated code]*/
1334
1335
static PyObject *
1336
_io_TextIOWrapper_reconfigure_impl(textio *self, PyObject *encoding,
1337
PyObject *errors, PyObject *newline_obj,
1338
PyObject *line_buffering_obj,
1339
PyObject *write_through_obj)
1340
/*[clinic end generated code: output=52b812ff4b3d4b0f input=671e82136e0f5822]*/
1341
{
1342
int line_buffering;
1343
int write_through;
1344
const char *newline = NULL;
1345
1346
/* Check if something is in the read buffer */
1347
if (self->decoded_chars != NULL) {
1348
if (encoding != Py_None || errors != Py_None || newline_obj != NULL) {
1349
_unsupported(self->state,
1350
"It is not possible to set the encoding or newline "
1351
"of stream after the first read");
1352
return NULL;
1353
}
1354
}
1355
1356
if (newline_obj != NULL && newline_obj != Py_None) {
1357
newline = PyUnicode_AsUTF8(newline_obj);
1358
if (newline == NULL || validate_newline(newline) < 0) {
1359
return NULL;
1360
}
1361
}
1362
1363
line_buffering = convert_optional_bool(line_buffering_obj,
1364
self->line_buffering);
1365
write_through = convert_optional_bool(write_through_obj,
1366
self->write_through);
1367
if (line_buffering < 0 || write_through < 0) {
1368
return NULL;
1369
}
1370
1371
PyObject *res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
1372
if (res == NULL) {
1373
return NULL;
1374
}
1375
Py_DECREF(res);
1376
self->b2cratio = 0;
1377
1378
if (newline_obj != NULL && set_newline(self, newline) < 0) {
1379
return NULL;
1380
}
1381
1382
if (textiowrapper_change_encoding(
1383
self, encoding, errors, newline_obj != NULL) < 0) {
1384
return NULL;
1385
}
1386
1387
self->line_buffering = line_buffering;
1388
self->write_through = write_through;
1389
Py_RETURN_NONE;
1390
}
1391
1392
static int
1393
textiowrapper_clear(textio *self)
1394
{
1395
self->ok = 0;
1396
Py_CLEAR(self->buffer);
1397
Py_CLEAR(self->encoding);
1398
Py_CLEAR(self->encoder);
1399
Py_CLEAR(self->decoder);
1400
Py_CLEAR(self->readnl);
1401
Py_CLEAR(self->decoded_chars);
1402
Py_CLEAR(self->pending_bytes);
1403
Py_CLEAR(self->snapshot);
1404
Py_CLEAR(self->errors);
1405
Py_CLEAR(self->raw);
1406
1407
Py_CLEAR(self->dict);
1408
return 0;
1409
}
1410
1411
static void
1412
textiowrapper_dealloc(textio *self)
1413
{
1414
PyTypeObject *tp = Py_TYPE(self);
1415
self->finalizing = 1;
1416
if (_PyIOBase_finalize((PyObject *) self) < 0)
1417
return;
1418
self->ok = 0;
1419
_PyObject_GC_UNTRACK(self);
1420
if (self->weakreflist != NULL)
1421
PyObject_ClearWeakRefs((PyObject *)self);
1422
(void)textiowrapper_clear(self);
1423
tp->tp_free((PyObject *)self);
1424
Py_DECREF(tp);
1425
}
1426
1427
static int
1428
textiowrapper_traverse(textio *self, visitproc visit, void *arg)
1429
{
1430
Py_VISIT(Py_TYPE(self));
1431
Py_VISIT(self->buffer);
1432
Py_VISIT(self->encoding);
1433
Py_VISIT(self->encoder);
1434
Py_VISIT(self->decoder);
1435
Py_VISIT(self->readnl);
1436
Py_VISIT(self->decoded_chars);
1437
Py_VISIT(self->pending_bytes);
1438
Py_VISIT(self->snapshot);
1439
Py_VISIT(self->errors);
1440
Py_VISIT(self->raw);
1441
1442
Py_VISIT(self->dict);
1443
return 0;
1444
}
1445
1446
static PyObject *
1447
textiowrapper_closed_get(textio *self, void *context);
1448
1449
/* This macro takes some shortcuts to make the common case faster. */
1450
#define CHECK_CLOSED(self) \
1451
do { \
1452
int r; \
1453
PyObject *_res; \
1454
if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) { \
1455
if (self->raw != NULL) \
1456
r = _PyFileIO_closed(self->raw); \
1457
else { \
1458
_res = textiowrapper_closed_get(self, NULL); \
1459
if (_res == NULL) \
1460
return NULL; \
1461
r = PyObject_IsTrue(_res); \
1462
Py_DECREF(_res); \
1463
if (r < 0) \
1464
return NULL; \
1465
} \
1466
if (r > 0) { \
1467
PyErr_SetString(PyExc_ValueError, \
1468
"I/O operation on closed file."); \
1469
return NULL; \
1470
} \
1471
} \
1472
else if (_PyIOBase_check_closed((PyObject *)self, Py_True) == NULL) \
1473
return NULL; \
1474
} while (0)
1475
1476
#define CHECK_INITIALIZED(self) \
1477
if (self->ok <= 0) { \
1478
PyErr_SetString(PyExc_ValueError, \
1479
"I/O operation on uninitialized object"); \
1480
return NULL; \
1481
}
1482
1483
#define CHECK_ATTACHED(self) \
1484
CHECK_INITIALIZED(self); \
1485
if (self->detached) { \
1486
PyErr_SetString(PyExc_ValueError, \
1487
"underlying buffer has been detached"); \
1488
return NULL; \
1489
}
1490
1491
#define CHECK_ATTACHED_INT(self) \
1492
if (self->ok <= 0) { \
1493
PyErr_SetString(PyExc_ValueError, \
1494
"I/O operation on uninitialized object"); \
1495
return -1; \
1496
} else if (self->detached) { \
1497
PyErr_SetString(PyExc_ValueError, \
1498
"underlying buffer has been detached"); \
1499
return -1; \
1500
}
1501
1502
1503
/*[clinic input]
1504
_io.TextIOWrapper.detach
1505
[clinic start generated code]*/
1506
1507
static PyObject *
1508
_io_TextIOWrapper_detach_impl(textio *self)
1509
/*[clinic end generated code: output=7ba3715cd032d5f2 input=e5a71fbda9e1d9f9]*/
1510
{
1511
PyObject *buffer, *res;
1512
CHECK_ATTACHED(self);
1513
res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
1514
if (res == NULL)
1515
return NULL;
1516
Py_DECREF(res);
1517
buffer = self->buffer;
1518
self->buffer = NULL;
1519
self->detached = 1;
1520
return buffer;
1521
}
1522
1523
/* Flush the internal write buffer. This doesn't explicitly flush the
1524
underlying buffered object, though. */
1525
static int
1526
_textiowrapper_writeflush(textio *self)
1527
{
1528
if (self->pending_bytes == NULL)
1529
return 0;
1530
1531
PyObject *pending = self->pending_bytes;
1532
PyObject *b;
1533
1534
if (PyBytes_Check(pending)) {
1535
b = Py_NewRef(pending);
1536
}
1537
else if (PyUnicode_Check(pending)) {
1538
assert(PyUnicode_IS_ASCII(pending));
1539
assert(PyUnicode_GET_LENGTH(pending) == self->pending_bytes_count);
1540
b = PyBytes_FromStringAndSize(
1541
PyUnicode_DATA(pending), PyUnicode_GET_LENGTH(pending));
1542
if (b == NULL) {
1543
return -1;
1544
}
1545
}
1546
else {
1547
assert(PyList_Check(pending));
1548
b = PyBytes_FromStringAndSize(NULL, self->pending_bytes_count);
1549
if (b == NULL) {
1550
return -1;
1551
}
1552
1553
char *buf = PyBytes_AsString(b);
1554
Py_ssize_t pos = 0;
1555
1556
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(pending); i++) {
1557
PyObject *obj = PyList_GET_ITEM(pending, i);
1558
char *src;
1559
Py_ssize_t len;
1560
if (PyUnicode_Check(obj)) {
1561
assert(PyUnicode_IS_ASCII(obj));
1562
src = PyUnicode_DATA(obj);
1563
len = PyUnicode_GET_LENGTH(obj);
1564
}
1565
else {
1566
assert(PyBytes_Check(obj));
1567
if (PyBytes_AsStringAndSize(obj, &src, &len) < 0) {
1568
Py_DECREF(b);
1569
return -1;
1570
}
1571
}
1572
memcpy(buf + pos, src, len);
1573
pos += len;
1574
}
1575
assert(pos == self->pending_bytes_count);
1576
}
1577
1578
self->pending_bytes_count = 0;
1579
self->pending_bytes = NULL;
1580
Py_DECREF(pending);
1581
1582
PyObject *ret;
1583
do {
1584
ret = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(write), b);
1585
} while (ret == NULL && _PyIO_trap_eintr());
1586
Py_DECREF(b);
1587
// NOTE: We cleared buffer but we don't know how many bytes are actually written
1588
// when an error occurred.
1589
if (ret == NULL)
1590
return -1;
1591
Py_DECREF(ret);
1592
return 0;
1593
}
1594
1595
/*[clinic input]
1596
_io.TextIOWrapper.write
1597
text: unicode
1598
/
1599
[clinic start generated code]*/
1600
1601
static PyObject *
1602
_io_TextIOWrapper_write_impl(textio *self, PyObject *text)
1603
/*[clinic end generated code: output=d2deb0d50771fcec input=fdf19153584a0e44]*/
1604
{
1605
PyObject *ret;
1606
PyObject *b;
1607
Py_ssize_t textlen;
1608
int haslf = 0;
1609
int needflush = 0, text_needflush = 0;
1610
1611
CHECK_ATTACHED(self);
1612
CHECK_CLOSED(self);
1613
1614
if (self->encoder == NULL) {
1615
return _unsupported(self->state, "not writable");
1616
}
1617
1618
Py_INCREF(text);
1619
1620
textlen = PyUnicode_GET_LENGTH(text);
1621
1622
if ((self->writetranslate && self->writenl != NULL) || self->line_buffering)
1623
if (PyUnicode_FindChar(text, '\n', 0, PyUnicode_GET_LENGTH(text), 1) != -1)
1624
haslf = 1;
1625
1626
if (haslf && self->writetranslate && self->writenl != NULL) {
1627
PyObject *newtext = _PyObject_CallMethod(text, &_Py_ID(replace),
1628
"ss", "\n", self->writenl);
1629
Py_DECREF(text);
1630
if (newtext == NULL)
1631
return NULL;
1632
text = newtext;
1633
}
1634
1635
if (self->write_through)
1636
text_needflush = 1;
1637
if (self->line_buffering &&
1638
(haslf ||
1639
PyUnicode_FindChar(text, '\r', 0, PyUnicode_GET_LENGTH(text), 1) != -1))
1640
needflush = 1;
1641
1642
/* XXX What if we were just reading? */
1643
if (self->encodefunc != NULL) {
1644
if (PyUnicode_IS_ASCII(text) &&
1645
// See bpo-43260
1646
PyUnicode_GET_LENGTH(text) <= self->chunk_size &&
1647
is_asciicompat_encoding(self->encodefunc)) {
1648
b = Py_NewRef(text);
1649
}
1650
else {
1651
b = (*self->encodefunc)((PyObject *) self, text);
1652
}
1653
self->encoding_start_of_stream = 0;
1654
}
1655
else {
1656
b = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(encode), text);
1657
}
1658
1659
Py_DECREF(text);
1660
if (b == NULL)
1661
return NULL;
1662
if (b != text && !PyBytes_Check(b)) {
1663
PyErr_Format(PyExc_TypeError,
1664
"encoder should return a bytes object, not '%.200s'",
1665
Py_TYPE(b)->tp_name);
1666
Py_DECREF(b);
1667
return NULL;
1668
}
1669
1670
Py_ssize_t bytes_len;
1671
if (b == text) {
1672
bytes_len = PyUnicode_GET_LENGTH(b);
1673
}
1674
else {
1675
bytes_len = PyBytes_GET_SIZE(b);
1676
}
1677
1678
if (self->pending_bytes == NULL) {
1679
self->pending_bytes_count = 0;
1680
self->pending_bytes = b;
1681
}
1682
else if (self->pending_bytes_count + bytes_len > self->chunk_size) {
1683
// Prevent to concatenate more than chunk_size data.
1684
if (_textiowrapper_writeflush(self) < 0) {
1685
Py_DECREF(b);
1686
return NULL;
1687
}
1688
self->pending_bytes = b;
1689
}
1690
else if (!PyList_CheckExact(self->pending_bytes)) {
1691
PyObject *list = PyList_New(2);
1692
if (list == NULL) {
1693
Py_DECREF(b);
1694
return NULL;
1695
}
1696
PyList_SET_ITEM(list, 0, self->pending_bytes);
1697
PyList_SET_ITEM(list, 1, b);
1698
self->pending_bytes = list;
1699
}
1700
else {
1701
if (PyList_Append(self->pending_bytes, b) < 0) {
1702
Py_DECREF(b);
1703
return NULL;
1704
}
1705
Py_DECREF(b);
1706
}
1707
1708
self->pending_bytes_count += bytes_len;
1709
if (self->pending_bytes_count >= self->chunk_size || needflush ||
1710
text_needflush) {
1711
if (_textiowrapper_writeflush(self) < 0)
1712
return NULL;
1713
}
1714
1715
if (needflush) {
1716
ret = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
1717
if (ret == NULL)
1718
return NULL;
1719
Py_DECREF(ret);
1720
}
1721
1722
textiowrapper_set_decoded_chars(self, NULL);
1723
Py_CLEAR(self->snapshot);
1724
1725
if (self->decoder) {
1726
ret = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
1727
if (ret == NULL)
1728
return NULL;
1729
Py_DECREF(ret);
1730
}
1731
1732
return PyLong_FromSsize_t(textlen);
1733
}
1734
1735
/* Steal a reference to chars and store it in the decoded_char buffer;
1736
*/
1737
static void
1738
textiowrapper_set_decoded_chars(textio *self, PyObject *chars)
1739
{
1740
Py_XSETREF(self->decoded_chars, chars);
1741
self->decoded_chars_used = 0;
1742
}
1743
1744
static PyObject *
1745
textiowrapper_get_decoded_chars(textio *self, Py_ssize_t n)
1746
{
1747
PyObject *chars;
1748
Py_ssize_t avail;
1749
1750
if (self->decoded_chars == NULL)
1751
return PyUnicode_FromStringAndSize(NULL, 0);
1752
1753
/* decoded_chars is guaranteed to be "ready". */
1754
avail = (PyUnicode_GET_LENGTH(self->decoded_chars)
1755
- self->decoded_chars_used);
1756
1757
assert(avail >= 0);
1758
1759
if (n < 0 || n > avail)
1760
n = avail;
1761
1762
if (self->decoded_chars_used > 0 || n < avail) {
1763
chars = PyUnicode_Substring(self->decoded_chars,
1764
self->decoded_chars_used,
1765
self->decoded_chars_used + n);
1766
if (chars == NULL)
1767
return NULL;
1768
}
1769
else {
1770
chars = Py_NewRef(self->decoded_chars);
1771
}
1772
1773
self->decoded_chars_used += n;
1774
return chars;
1775
}
1776
1777
/* Read and decode the next chunk of data from the BufferedReader.
1778
*/
1779
static int
1780
textiowrapper_read_chunk(textio *self, Py_ssize_t size_hint)
1781
{
1782
PyObject *dec_buffer = NULL;
1783
PyObject *dec_flags = NULL;
1784
PyObject *input_chunk = NULL;
1785
Py_buffer input_chunk_buf;
1786
PyObject *decoded_chars, *chunk_size;
1787
Py_ssize_t nbytes, nchars;
1788
int eof;
1789
1790
/* The return value is True unless EOF was reached. The decoded string is
1791
* placed in self._decoded_chars (replacing its previous value). The
1792
* entire input chunk is sent to the decoder, though some of it may remain
1793
* buffered in the decoder, yet to be converted.
1794
*/
1795
1796
if (self->decoder == NULL) {
1797
_unsupported(self->state, "not readable");
1798
return -1;
1799
}
1800
1801
if (self->telling) {
1802
/* To prepare for tell(), we need to snapshot a point in the file
1803
* where the decoder's input buffer is empty.
1804
*/
1805
PyObject *state = PyObject_CallMethodNoArgs(self->decoder,
1806
&_Py_ID(getstate));
1807
if (state == NULL)
1808
return -1;
1809
/* Given this, we know there was a valid snapshot point
1810
* len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1811
*/
1812
if (!PyTuple_Check(state)) {
1813
PyErr_SetString(PyExc_TypeError,
1814
"illegal decoder state");
1815
Py_DECREF(state);
1816
return -1;
1817
}
1818
if (!PyArg_ParseTuple(state,
1819
"OO;illegal decoder state", &dec_buffer, &dec_flags))
1820
{
1821
Py_DECREF(state);
1822
return -1;
1823
}
1824
1825
if (!PyBytes_Check(dec_buffer)) {
1826
PyErr_Format(PyExc_TypeError,
1827
"illegal decoder state: the first item should be a "
1828
"bytes object, not '%.200s'",
1829
Py_TYPE(dec_buffer)->tp_name);
1830
Py_DECREF(state);
1831
return -1;
1832
}
1833
Py_INCREF(dec_buffer);
1834
Py_INCREF(dec_flags);
1835
Py_DECREF(state);
1836
}
1837
1838
/* Read a chunk, decode it, and put the result in self._decoded_chars. */
1839
if (size_hint > 0) {
1840
size_hint = (Py_ssize_t)(Py_MAX(self->b2cratio, 1.0) * size_hint);
1841
}
1842
chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
1843
if (chunk_size == NULL)
1844
goto fail;
1845
1846
input_chunk = PyObject_CallMethodOneArg(self->buffer,
1847
(self->has_read1 ? &_Py_ID(read1): &_Py_ID(read)),
1848
chunk_size);
1849
Py_DECREF(chunk_size);
1850
if (input_chunk == NULL)
1851
goto fail;
1852
1853
if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
1854
PyErr_Format(PyExc_TypeError,
1855
"underlying %s() should have returned a bytes-like object, "
1856
"not '%.200s'", (self->has_read1 ? "read1": "read"),
1857
Py_TYPE(input_chunk)->tp_name);
1858
goto fail;
1859
}
1860
1861
nbytes = input_chunk_buf.len;
1862
eof = (nbytes == 0);
1863
1864
decoded_chars = _textiowrapper_decode(self->state, self->decoder,
1865
input_chunk, eof);
1866
PyBuffer_Release(&input_chunk_buf);
1867
if (decoded_chars == NULL)
1868
goto fail;
1869
1870
textiowrapper_set_decoded_chars(self, decoded_chars);
1871
nchars = PyUnicode_GET_LENGTH(decoded_chars);
1872
if (nchars > 0)
1873
self->b2cratio = (double) nbytes / nchars;
1874
else
1875
self->b2cratio = 0.0;
1876
if (nchars > 0)
1877
eof = 0;
1878
1879
if (self->telling) {
1880
/* At the snapshot point, len(dec_buffer) bytes before the read, the
1881
* next input to be decoded is dec_buffer + input_chunk.
1882
*/
1883
PyObject *next_input = dec_buffer;
1884
PyBytes_Concat(&next_input, input_chunk);
1885
dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
1886
if (next_input == NULL) {
1887
goto fail;
1888
}
1889
PyObject *snapshot = Py_BuildValue("NN", dec_flags, next_input);
1890
if (snapshot == NULL) {
1891
dec_flags = NULL;
1892
goto fail;
1893
}
1894
Py_XSETREF(self->snapshot, snapshot);
1895
}
1896
Py_DECREF(input_chunk);
1897
1898
return (eof == 0);
1899
1900
fail:
1901
Py_XDECREF(dec_buffer);
1902
Py_XDECREF(dec_flags);
1903
Py_XDECREF(input_chunk);
1904
return -1;
1905
}
1906
1907
/*[clinic input]
1908
_io.TextIOWrapper.read
1909
size as n: Py_ssize_t(accept={int, NoneType}) = -1
1910
/
1911
[clinic start generated code]*/
1912
1913
static PyObject *
1914
_io_TextIOWrapper_read_impl(textio *self, Py_ssize_t n)
1915
/*[clinic end generated code: output=7e651ce6cc6a25a6 input=123eecbfe214aeb8]*/
1916
{
1917
PyObject *result = NULL, *chunks = NULL;
1918
1919
CHECK_ATTACHED(self);
1920
CHECK_CLOSED(self);
1921
1922
if (self->decoder == NULL) {
1923
return _unsupported(self->state, "not readable");
1924
}
1925
1926
if (_textiowrapper_writeflush(self) < 0)
1927
return NULL;
1928
1929
if (n < 0) {
1930
/* Read everything */
1931
PyObject *bytes = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(read));
1932
PyObject *decoded;
1933
if (bytes == NULL)
1934
goto fail;
1935
1936
_PyIO_State *state = self->state;
1937
if (Py_IS_TYPE(self->decoder, state->PyIncrementalNewlineDecoder_Type))
1938
decoded = _PyIncrementalNewlineDecoder_decode(self->decoder,
1939
bytes, 1);
1940
else
1941
decoded = PyObject_CallMethodObjArgs(
1942
self->decoder, &_Py_ID(decode), bytes, Py_True, NULL);
1943
Py_DECREF(bytes);
1944
if (check_decoded(decoded) < 0)
1945
goto fail;
1946
1947
result = textiowrapper_get_decoded_chars(self, -1);
1948
1949
if (result == NULL) {
1950
Py_DECREF(decoded);
1951
return NULL;
1952
}
1953
1954
PyUnicode_AppendAndDel(&result, decoded);
1955
if (result == NULL)
1956
goto fail;
1957
1958
textiowrapper_set_decoded_chars(self, NULL);
1959
Py_CLEAR(self->snapshot);
1960
return result;
1961
}
1962
else {
1963
int res = 1;
1964
Py_ssize_t remaining = n;
1965
1966
result = textiowrapper_get_decoded_chars(self, n);
1967
if (result == NULL)
1968
goto fail;
1969
remaining -= PyUnicode_GET_LENGTH(result);
1970
1971
/* Keep reading chunks until we have n characters to return */
1972
while (remaining > 0) {
1973
res = textiowrapper_read_chunk(self, remaining);
1974
if (res < 0) {
1975
/* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
1976
when EINTR occurs so we needn't do it ourselves. */
1977
if (_PyIO_trap_eintr()) {
1978
continue;
1979
}
1980
goto fail;
1981
}
1982
if (res == 0) /* EOF */
1983
break;
1984
if (chunks == NULL) {
1985
chunks = PyList_New(0);
1986
if (chunks == NULL)
1987
goto fail;
1988
}
1989
if (PyUnicode_GET_LENGTH(result) > 0 &&
1990
PyList_Append(chunks, result) < 0)
1991
goto fail;
1992
Py_DECREF(result);
1993
result = textiowrapper_get_decoded_chars(self, remaining);
1994
if (result == NULL)
1995
goto fail;
1996
remaining -= PyUnicode_GET_LENGTH(result);
1997
}
1998
if (chunks != NULL) {
1999
if (result != NULL && PyList_Append(chunks, result) < 0)
2000
goto fail;
2001
_Py_DECLARE_STR(empty, "");
2002
Py_XSETREF(result, PyUnicode_Join(&_Py_STR(empty), chunks));
2003
if (result == NULL)
2004
goto fail;
2005
Py_CLEAR(chunks);
2006
}
2007
return result;
2008
}
2009
fail:
2010
Py_XDECREF(result);
2011
Py_XDECREF(chunks);
2012
return NULL;
2013
}
2014
2015
2016
/* NOTE: `end` must point to the real end of the Py_UCS4 storage,
2017
that is to the NUL character. Otherwise the function will produce
2018
incorrect results. */
2019
static const char *
2020
find_control_char(int kind, const char *s, const char *end, Py_UCS4 ch)
2021
{
2022
if (kind == PyUnicode_1BYTE_KIND) {
2023
assert(ch < 256);
2024
return (char *) memchr((const void *) s, (char) ch, end - s);
2025
}
2026
for (;;) {
2027
while (PyUnicode_READ(kind, s, 0) > ch)
2028
s += kind;
2029
if (PyUnicode_READ(kind, s, 0) == ch)
2030
return s;
2031
if (s == end)
2032
return NULL;
2033
s += kind;
2034
}
2035
}
2036
2037
Py_ssize_t
2038
_PyIO_find_line_ending(
2039
int translated, int universal, PyObject *readnl,
2040
int kind, const char *start, const char *end, Py_ssize_t *consumed)
2041
{
2042
Py_ssize_t len = (end - start)/kind;
2043
2044
if (translated) {
2045
/* Newlines are already translated, only search for \n */
2046
const char *pos = find_control_char(kind, start, end, '\n');
2047
if (pos != NULL)
2048
return (pos - start)/kind + 1;
2049
else {
2050
*consumed = len;
2051
return -1;
2052
}
2053
}
2054
else if (universal) {
2055
/* Universal newline search. Find any of \r, \r\n, \n
2056
* The decoder ensures that \r\n are not split in two pieces
2057
*/
2058
const char *s = start;
2059
for (;;) {
2060
Py_UCS4 ch;
2061
/* Fast path for non-control chars. The loop always ends
2062
since the Unicode string is NUL-terminated. */
2063
while (PyUnicode_READ(kind, s, 0) > '\r')
2064
s += kind;
2065
if (s >= end) {
2066
*consumed = len;
2067
return -1;
2068
}
2069
ch = PyUnicode_READ(kind, s, 0);
2070
s += kind;
2071
if (ch == '\n')
2072
return (s - start)/kind;
2073
if (ch == '\r') {
2074
if (PyUnicode_READ(kind, s, 0) == '\n')
2075
return (s - start)/kind + 1;
2076
else
2077
return (s - start)/kind;
2078
}
2079
}
2080
}
2081
else {
2082
/* Non-universal mode. */
2083
Py_ssize_t readnl_len = PyUnicode_GET_LENGTH(readnl);
2084
const Py_UCS1 *nl = PyUnicode_1BYTE_DATA(readnl);
2085
/* Assume that readnl is an ASCII character. */
2086
assert(PyUnicode_KIND(readnl) == PyUnicode_1BYTE_KIND);
2087
if (readnl_len == 1) {
2088
const char *pos = find_control_char(kind, start, end, nl[0]);
2089
if (pos != NULL)
2090
return (pos - start)/kind + 1;
2091
*consumed = len;
2092
return -1;
2093
}
2094
else {
2095
const char *s = start;
2096
const char *e = end - (readnl_len - 1)*kind;
2097
const char *pos;
2098
if (e < s)
2099
e = s;
2100
while (s < e) {
2101
Py_ssize_t i;
2102
const char *pos = find_control_char(kind, s, end, nl[0]);
2103
if (pos == NULL || pos >= e)
2104
break;
2105
for (i = 1; i < readnl_len; i++) {
2106
if (PyUnicode_READ(kind, pos, i) != nl[i])
2107
break;
2108
}
2109
if (i == readnl_len)
2110
return (pos - start)/kind + readnl_len;
2111
s = pos + kind;
2112
}
2113
pos = find_control_char(kind, e, end, nl[0]);
2114
if (pos == NULL)
2115
*consumed = len;
2116
else
2117
*consumed = (pos - start)/kind;
2118
return -1;
2119
}
2120
}
2121
}
2122
2123
static PyObject *
2124
_textiowrapper_readline(textio *self, Py_ssize_t limit)
2125
{
2126
PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
2127
Py_ssize_t start, endpos, chunked, offset_to_buffer;
2128
int res;
2129
2130
CHECK_CLOSED(self);
2131
2132
if (_textiowrapper_writeflush(self) < 0)
2133
return NULL;
2134
2135
chunked = 0;
2136
2137
while (1) {
2138
const char *ptr;
2139
Py_ssize_t line_len;
2140
int kind;
2141
Py_ssize_t consumed = 0;
2142
2143
/* First, get some data if necessary */
2144
res = 1;
2145
while (!self->decoded_chars ||
2146
!PyUnicode_GET_LENGTH(self->decoded_chars)) {
2147
res = textiowrapper_read_chunk(self, 0);
2148
if (res < 0) {
2149
/* NOTE: PyErr_SetFromErrno() calls PyErr_CheckSignals()
2150
when EINTR occurs so we needn't do it ourselves. */
2151
if (_PyIO_trap_eintr()) {
2152
continue;
2153
}
2154
goto error;
2155
}
2156
if (res == 0)
2157
break;
2158
}
2159
if (res == 0) {
2160
/* end of file */
2161
textiowrapper_set_decoded_chars(self, NULL);
2162
Py_CLEAR(self->snapshot);
2163
start = endpos = offset_to_buffer = 0;
2164
break;
2165
}
2166
2167
if (remaining == NULL) {
2168
line = Py_NewRef(self->decoded_chars);
2169
start = self->decoded_chars_used;
2170
offset_to_buffer = 0;
2171
}
2172
else {
2173
assert(self->decoded_chars_used == 0);
2174
line = PyUnicode_Concat(remaining, self->decoded_chars);
2175
start = 0;
2176
offset_to_buffer = PyUnicode_GET_LENGTH(remaining);
2177
Py_CLEAR(remaining);
2178
if (line == NULL)
2179
goto error;
2180
}
2181
2182
ptr = PyUnicode_DATA(line);
2183
line_len = PyUnicode_GET_LENGTH(line);
2184
kind = PyUnicode_KIND(line);
2185
2186
endpos = _PyIO_find_line_ending(
2187
self->readtranslate, self->readuniversal, self->readnl,
2188
kind,
2189
ptr + kind * start,
2190
ptr + kind * line_len,
2191
&consumed);
2192
if (endpos >= 0) {
2193
endpos += start;
2194
if (limit >= 0 && (endpos - start) + chunked >= limit)
2195
endpos = start + limit - chunked;
2196
break;
2197
}
2198
2199
/* We can put aside up to `endpos` */
2200
endpos = consumed + start;
2201
if (limit >= 0 && (endpos - start) + chunked >= limit) {
2202
/* Didn't find line ending, but reached length limit */
2203
endpos = start + limit - chunked;
2204
break;
2205
}
2206
2207
if (endpos > start) {
2208
/* No line ending seen yet - put aside current data */
2209
PyObject *s;
2210
if (chunks == NULL) {
2211
chunks = PyList_New(0);
2212
if (chunks == NULL)
2213
goto error;
2214
}
2215
s = PyUnicode_Substring(line, start, endpos);
2216
if (s == NULL)
2217
goto error;
2218
if (PyList_Append(chunks, s) < 0) {
2219
Py_DECREF(s);
2220
goto error;
2221
}
2222
chunked += PyUnicode_GET_LENGTH(s);
2223
Py_DECREF(s);
2224
}
2225
/* There may be some remaining bytes we'll have to prepend to the
2226
next chunk of data */
2227
if (endpos < line_len) {
2228
remaining = PyUnicode_Substring(line, endpos, line_len);
2229
if (remaining == NULL)
2230
goto error;
2231
}
2232
Py_CLEAR(line);
2233
/* We have consumed the buffer */
2234
textiowrapper_set_decoded_chars(self, NULL);
2235
}
2236
2237
if (line != NULL) {
2238
/* Our line ends in the current buffer */
2239
self->decoded_chars_used = endpos - offset_to_buffer;
2240
if (start > 0 || endpos < PyUnicode_GET_LENGTH(line)) {
2241
PyObject *s = PyUnicode_Substring(line, start, endpos);
2242
Py_CLEAR(line);
2243
if (s == NULL)
2244
goto error;
2245
line = s;
2246
}
2247
}
2248
if (remaining != NULL) {
2249
if (chunks == NULL) {
2250
chunks = PyList_New(0);
2251
if (chunks == NULL)
2252
goto error;
2253
}
2254
if (PyList_Append(chunks, remaining) < 0)
2255
goto error;
2256
Py_CLEAR(remaining);
2257
}
2258
if (chunks != NULL) {
2259
if (line != NULL) {
2260
if (PyList_Append(chunks, line) < 0)
2261
goto error;
2262
Py_DECREF(line);
2263
}
2264
line = PyUnicode_Join(&_Py_STR(empty), chunks);
2265
if (line == NULL)
2266
goto error;
2267
Py_CLEAR(chunks);
2268
}
2269
if (line == NULL) {
2270
line = Py_NewRef(&_Py_STR(empty));
2271
}
2272
2273
return line;
2274
2275
error:
2276
Py_XDECREF(chunks);
2277
Py_XDECREF(remaining);
2278
Py_XDECREF(line);
2279
return NULL;
2280
}
2281
2282
/*[clinic input]
2283
_io.TextIOWrapper.readline
2284
size: Py_ssize_t = -1
2285
/
2286
[clinic start generated code]*/
2287
2288
static PyObject *
2289
_io_TextIOWrapper_readline_impl(textio *self, Py_ssize_t size)
2290
/*[clinic end generated code: output=344afa98804e8b25 input=56c7172483b36db6]*/
2291
{
2292
CHECK_ATTACHED(self);
2293
return _textiowrapper_readline(self, size);
2294
}
2295
2296
/* Seek and Tell */
2297
2298
typedef struct {
2299
Py_off_t start_pos;
2300
int dec_flags;
2301
int bytes_to_feed;
2302
int chars_to_skip;
2303
char need_eof;
2304
} cookie_type;
2305
2306
/*
2307
To speed up cookie packing/unpacking, we store the fields in a temporary
2308
string and call _PyLong_FromByteArray() or _PyLong_AsByteArray (resp.).
2309
The following macros define at which offsets in the intermediary byte
2310
string the various CookieStruct fields will be stored.
2311
*/
2312
2313
#define COOKIE_BUF_LEN (sizeof(Py_off_t) + 3 * sizeof(int) + sizeof(char))
2314
2315
#if PY_BIG_ENDIAN
2316
/* We want the least significant byte of start_pos to also be the least
2317
significant byte of the cookie, which means that in big-endian mode we
2318
must copy the fields in reverse order. */
2319
2320
# define OFF_START_POS (sizeof(char) + 3 * sizeof(int))
2321
# define OFF_DEC_FLAGS (sizeof(char) + 2 * sizeof(int))
2322
# define OFF_BYTES_TO_FEED (sizeof(char) + sizeof(int))
2323
# define OFF_CHARS_TO_SKIP (sizeof(char))
2324
# define OFF_NEED_EOF 0
2325
2326
#else
2327
/* Little-endian mode: the least significant byte of start_pos will
2328
naturally end up the least significant byte of the cookie. */
2329
2330
# define OFF_START_POS 0
2331
# define OFF_DEC_FLAGS (sizeof(Py_off_t))
2332
# define OFF_BYTES_TO_FEED (sizeof(Py_off_t) + sizeof(int))
2333
# define OFF_CHARS_TO_SKIP (sizeof(Py_off_t) + 2 * sizeof(int))
2334
# define OFF_NEED_EOF (sizeof(Py_off_t) + 3 * sizeof(int))
2335
2336
#endif
2337
2338
static int
2339
textiowrapper_parse_cookie(cookie_type *cookie, PyObject *cookieObj)
2340
{
2341
unsigned char buffer[COOKIE_BUF_LEN];
2342
PyLongObject *cookieLong = (PyLongObject *)PyNumber_Long(cookieObj);
2343
if (cookieLong == NULL)
2344
return -1;
2345
2346
if (_PyLong_AsByteArray(cookieLong, buffer, sizeof(buffer),
2347
PY_LITTLE_ENDIAN, 0) < 0) {
2348
Py_DECREF(cookieLong);
2349
return -1;
2350
}
2351
Py_DECREF(cookieLong);
2352
2353
memcpy(&cookie->start_pos, buffer + OFF_START_POS, sizeof(cookie->start_pos));
2354
memcpy(&cookie->dec_flags, buffer + OFF_DEC_FLAGS, sizeof(cookie->dec_flags));
2355
memcpy(&cookie->bytes_to_feed, buffer + OFF_BYTES_TO_FEED, sizeof(cookie->bytes_to_feed));
2356
memcpy(&cookie->chars_to_skip, buffer + OFF_CHARS_TO_SKIP, sizeof(cookie->chars_to_skip));
2357
memcpy(&cookie->need_eof, buffer + OFF_NEED_EOF, sizeof(cookie->need_eof));
2358
2359
return 0;
2360
}
2361
2362
static PyObject *
2363
textiowrapper_build_cookie(cookie_type *cookie)
2364
{
2365
unsigned char buffer[COOKIE_BUF_LEN];
2366
2367
memcpy(buffer + OFF_START_POS, &cookie->start_pos, sizeof(cookie->start_pos));
2368
memcpy(buffer + OFF_DEC_FLAGS, &cookie->dec_flags, sizeof(cookie->dec_flags));
2369
memcpy(buffer + OFF_BYTES_TO_FEED, &cookie->bytes_to_feed, sizeof(cookie->bytes_to_feed));
2370
memcpy(buffer + OFF_CHARS_TO_SKIP, &cookie->chars_to_skip, sizeof(cookie->chars_to_skip));
2371
memcpy(buffer + OFF_NEED_EOF, &cookie->need_eof, sizeof(cookie->need_eof));
2372
2373
return _PyLong_FromByteArray(buffer, sizeof(buffer),
2374
PY_LITTLE_ENDIAN, 0);
2375
}
2376
2377
static int
2378
_textiowrapper_decoder_setstate(textio *self, cookie_type *cookie)
2379
{
2380
PyObject *res;
2381
/* When seeking to the start of the stream, we call decoder.reset()
2382
rather than decoder.getstate().
2383
This is for a few decoders such as utf-16 for which the state value
2384
at start is not (b"", 0) but e.g. (b"", 2) (meaning, in the case of
2385
utf-16, that we are expecting a BOM).
2386
*/
2387
if (cookie->start_pos == 0 && cookie->dec_flags == 0) {
2388
res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2389
}
2390
else {
2391
res = _PyObject_CallMethod(self->decoder, &_Py_ID(setstate),
2392
"((yi))", "", cookie->dec_flags);
2393
}
2394
if (res == NULL) {
2395
return -1;
2396
}
2397
Py_DECREF(res);
2398
return 0;
2399
}
2400
2401
static int
2402
_textiowrapper_encoder_reset(textio *self, int start_of_stream)
2403
{
2404
PyObject *res;
2405
if (start_of_stream) {
2406
res = PyObject_CallMethodNoArgs(self->encoder, &_Py_ID(reset));
2407
self->encoding_start_of_stream = 1;
2408
}
2409
else {
2410
res = PyObject_CallMethodOneArg(self->encoder, &_Py_ID(setstate),
2411
_PyLong_GetZero());
2412
self->encoding_start_of_stream = 0;
2413
}
2414
if (res == NULL)
2415
return -1;
2416
Py_DECREF(res);
2417
return 0;
2418
}
2419
2420
static int
2421
_textiowrapper_encoder_setstate(textio *self, cookie_type *cookie)
2422
{
2423
/* Same as _textiowrapper_decoder_setstate() above. */
2424
return _textiowrapper_encoder_reset(
2425
self, cookie->start_pos == 0 && cookie->dec_flags == 0);
2426
}
2427
2428
/*[clinic input]
2429
_io.TextIOWrapper.seek
2430
cookie as cookieObj: object
2431
whence: int = 0
2432
/
2433
[clinic start generated code]*/
2434
2435
static PyObject *
2436
_io_TextIOWrapper_seek_impl(textio *self, PyObject *cookieObj, int whence)
2437
/*[clinic end generated code: output=0a15679764e2d04d input=0458abeb3d7842be]*/
2438
{
2439
PyObject *posobj;
2440
cookie_type cookie;
2441
PyObject *res;
2442
int cmp;
2443
PyObject *snapshot;
2444
2445
CHECK_ATTACHED(self);
2446
CHECK_CLOSED(self);
2447
2448
Py_INCREF(cookieObj);
2449
2450
if (!self->seekable) {
2451
_unsupported(self->state, "underlying stream is not seekable");
2452
goto fail;
2453
}
2454
2455
PyObject *zero = _PyLong_GetZero(); // borrowed reference
2456
2457
switch (whence) {
2458
case SEEK_CUR:
2459
/* seek relative to current position */
2460
cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2461
if (cmp < 0)
2462
goto fail;
2463
2464
if (cmp == 0) {
2465
_unsupported(self->state, "can't do nonzero cur-relative seeks");
2466
goto fail;
2467
}
2468
2469
/* Seeking to the current position should attempt to
2470
* sync the underlying buffer with the current position.
2471
*/
2472
Py_DECREF(cookieObj);
2473
cookieObj = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(tell));
2474
if (cookieObj == NULL)
2475
goto fail;
2476
break;
2477
2478
case SEEK_END:
2479
/* seek relative to end of file */
2480
cmp = PyObject_RichCompareBool(cookieObj, zero, Py_EQ);
2481
if (cmp < 0)
2482
goto fail;
2483
2484
if (cmp == 0) {
2485
_unsupported(self->state, "can't do nonzero end-relative seeks");
2486
goto fail;
2487
}
2488
2489
res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2490
if (res == NULL)
2491
goto fail;
2492
Py_DECREF(res);
2493
2494
textiowrapper_set_decoded_chars(self, NULL);
2495
Py_CLEAR(self->snapshot);
2496
if (self->decoder) {
2497
res = PyObject_CallMethodNoArgs(self->decoder, &_Py_ID(reset));
2498
if (res == NULL)
2499
goto fail;
2500
Py_DECREF(res);
2501
}
2502
2503
res = _PyObject_CallMethod(self->buffer, &_Py_ID(seek), "ii", 0, 2);
2504
Py_CLEAR(cookieObj);
2505
if (res == NULL)
2506
goto fail;
2507
if (self->encoder) {
2508
/* If seek() == 0, we are at the start of stream, otherwise not */
2509
cmp = PyObject_RichCompareBool(res, zero, Py_EQ);
2510
if (cmp < 0 || _textiowrapper_encoder_reset(self, cmp)) {
2511
Py_DECREF(res);
2512
goto fail;
2513
}
2514
}
2515
return res;
2516
2517
case SEEK_SET:
2518
break;
2519
2520
default:
2521
PyErr_Format(PyExc_ValueError,
2522
"invalid whence (%d, should be %d, %d or %d)", whence,
2523
SEEK_SET, SEEK_CUR, SEEK_END);
2524
goto fail;
2525
}
2526
2527
cmp = PyObject_RichCompareBool(cookieObj, zero, Py_LT);
2528
if (cmp < 0)
2529
goto fail;
2530
2531
if (cmp == 1) {
2532
PyErr_Format(PyExc_ValueError,
2533
"negative seek position %R", cookieObj);
2534
goto fail;
2535
}
2536
2537
res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2538
if (res == NULL)
2539
goto fail;
2540
Py_DECREF(res);
2541
2542
/* The strategy of seek() is to go back to the safe start point
2543
* and replay the effect of read(chars_to_skip) from there.
2544
*/
2545
if (textiowrapper_parse_cookie(&cookie, cookieObj) < 0)
2546
goto fail;
2547
2548
/* Seek back to the safe start point. */
2549
posobj = PyLong_FromOff_t(cookie.start_pos);
2550
if (posobj == NULL)
2551
goto fail;
2552
res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(seek), posobj);
2553
Py_DECREF(posobj);
2554
if (res == NULL)
2555
goto fail;
2556
Py_DECREF(res);
2557
2558
textiowrapper_set_decoded_chars(self, NULL);
2559
Py_CLEAR(self->snapshot);
2560
2561
/* Restore the decoder to its state from the safe start point. */
2562
if (self->decoder) {
2563
if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2564
goto fail;
2565
}
2566
2567
if (cookie.chars_to_skip) {
2568
/* Just like _read_chunk, feed the decoder and save a snapshot. */
2569
PyObject *input_chunk = _PyObject_CallMethod(self->buffer, &_Py_ID(read),
2570
"i", cookie.bytes_to_feed);
2571
PyObject *decoded;
2572
2573
if (input_chunk == NULL)
2574
goto fail;
2575
2576
if (!PyBytes_Check(input_chunk)) {
2577
PyErr_Format(PyExc_TypeError,
2578
"underlying read() should have returned a bytes "
2579
"object, not '%.200s'",
2580
Py_TYPE(input_chunk)->tp_name);
2581
Py_DECREF(input_chunk);
2582
goto fail;
2583
}
2584
2585
snapshot = Py_BuildValue("iN", cookie.dec_flags, input_chunk);
2586
if (snapshot == NULL) {
2587
goto fail;
2588
}
2589
Py_XSETREF(self->snapshot, snapshot);
2590
2591
decoded = PyObject_CallMethodObjArgs(self->decoder, &_Py_ID(decode),
2592
input_chunk, cookie.need_eof ? Py_True : Py_False, NULL);
2593
2594
if (check_decoded(decoded) < 0)
2595
goto fail;
2596
2597
textiowrapper_set_decoded_chars(self, decoded);
2598
2599
/* Skip chars_to_skip of the decoded characters. */
2600
if (PyUnicode_GetLength(self->decoded_chars) < cookie.chars_to_skip) {
2601
PyErr_SetString(PyExc_OSError, "can't restore logical file position");
2602
goto fail;
2603
}
2604
self->decoded_chars_used = cookie.chars_to_skip;
2605
}
2606
else {
2607
snapshot = Py_BuildValue("iy", cookie.dec_flags, "");
2608
if (snapshot == NULL)
2609
goto fail;
2610
Py_XSETREF(self->snapshot, snapshot);
2611
}
2612
2613
/* Finally, reset the encoder (merely useful for proper BOM handling) */
2614
if (self->encoder) {
2615
if (_textiowrapper_encoder_setstate(self, &cookie) < 0)
2616
goto fail;
2617
}
2618
return cookieObj;
2619
fail:
2620
Py_XDECREF(cookieObj);
2621
return NULL;
2622
2623
}
2624
2625
/*[clinic input]
2626
_io.TextIOWrapper.tell
2627
[clinic start generated code]*/
2628
2629
static PyObject *
2630
_io_TextIOWrapper_tell_impl(textio *self)
2631
/*[clinic end generated code: output=4f168c08bf34ad5f input=9a2caf88c24f9ddf]*/
2632
{
2633
PyObject *res;
2634
PyObject *posobj = NULL;
2635
cookie_type cookie = {0,0,0,0,0};
2636
PyObject *next_input;
2637
Py_ssize_t chars_to_skip, chars_decoded;
2638
Py_ssize_t skip_bytes, skip_back;
2639
PyObject *saved_state = NULL;
2640
const char *input, *input_end;
2641
Py_ssize_t dec_buffer_len;
2642
int dec_flags;
2643
2644
CHECK_ATTACHED(self);
2645
CHECK_CLOSED(self);
2646
2647
if (!self->seekable) {
2648
_unsupported(self->state, "underlying stream is not seekable");
2649
goto fail;
2650
}
2651
if (!self->telling) {
2652
PyErr_SetString(PyExc_OSError,
2653
"telling position disabled by next() call");
2654
goto fail;
2655
}
2656
2657
if (_textiowrapper_writeflush(self) < 0)
2658
return NULL;
2659
res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2660
if (res == NULL)
2661
goto fail;
2662
Py_DECREF(res);
2663
2664
posobj = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(tell));
2665
if (posobj == NULL)
2666
goto fail;
2667
2668
if (self->decoder == NULL || self->snapshot == NULL) {
2669
assert (self->decoded_chars == NULL || PyUnicode_GetLength(self->decoded_chars) == 0);
2670
return posobj;
2671
}
2672
2673
#if defined(HAVE_LARGEFILE_SUPPORT)
2674
cookie.start_pos = PyLong_AsLongLong(posobj);
2675
#else
2676
cookie.start_pos = PyLong_AsLong(posobj);
2677
#endif
2678
Py_DECREF(posobj);
2679
if (PyErr_Occurred())
2680
goto fail;
2681
2682
/* Skip backward to the snapshot point (see _read_chunk). */
2683
assert(PyTuple_Check(self->snapshot));
2684
if (!PyArg_ParseTuple(self->snapshot, "iO", &cookie.dec_flags, &next_input))
2685
goto fail;
2686
2687
assert (PyBytes_Check(next_input));
2688
2689
cookie.start_pos -= PyBytes_GET_SIZE(next_input);
2690
2691
/* How many decoded characters have been used up since the snapshot? */
2692
if (self->decoded_chars_used == 0) {
2693
/* We haven't moved from the snapshot point. */
2694
return textiowrapper_build_cookie(&cookie);
2695
}
2696
2697
chars_to_skip = self->decoded_chars_used;
2698
2699
/* Decoder state will be restored at the end */
2700
saved_state = PyObject_CallMethodNoArgs(self->decoder,
2701
&_Py_ID(getstate));
2702
if (saved_state == NULL)
2703
goto fail;
2704
2705
#define DECODER_GETSTATE() do { \
2706
PyObject *dec_buffer; \
2707
PyObject *_state = PyObject_CallMethodNoArgs(self->decoder, \
2708
&_Py_ID(getstate)); \
2709
if (_state == NULL) \
2710
goto fail; \
2711
if (!PyTuple_Check(_state)) { \
2712
PyErr_SetString(PyExc_TypeError, \
2713
"illegal decoder state"); \
2714
Py_DECREF(_state); \
2715
goto fail; \
2716
} \
2717
if (!PyArg_ParseTuple(_state, "Oi;illegal decoder state", \
2718
&dec_buffer, &dec_flags)) \
2719
{ \
2720
Py_DECREF(_state); \
2721
goto fail; \
2722
} \
2723
if (!PyBytes_Check(dec_buffer)) { \
2724
PyErr_Format(PyExc_TypeError, \
2725
"illegal decoder state: the first item should be a " \
2726
"bytes object, not '%.200s'", \
2727
Py_TYPE(dec_buffer)->tp_name); \
2728
Py_DECREF(_state); \
2729
goto fail; \
2730
} \
2731
dec_buffer_len = PyBytes_GET_SIZE(dec_buffer); \
2732
Py_DECREF(_state); \
2733
} while (0)
2734
2735
#define DECODER_DECODE(start, len, res) do { \
2736
PyObject *_decoded = _PyObject_CallMethod( \
2737
self->decoder, &_Py_ID(decode), "y#", start, len); \
2738
if (check_decoded(_decoded) < 0) \
2739
goto fail; \
2740
res = PyUnicode_GET_LENGTH(_decoded); \
2741
Py_DECREF(_decoded); \
2742
} while (0)
2743
2744
/* Fast search for an acceptable start point, close to our
2745
current pos */
2746
skip_bytes = (Py_ssize_t) (self->b2cratio * chars_to_skip);
2747
skip_back = 1;
2748
assert(skip_back <= PyBytes_GET_SIZE(next_input));
2749
input = PyBytes_AS_STRING(next_input);
2750
while (skip_bytes > 0) {
2751
/* Decode up to temptative start point */
2752
if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2753
goto fail;
2754
DECODER_DECODE(input, skip_bytes, chars_decoded);
2755
if (chars_decoded <= chars_to_skip) {
2756
DECODER_GETSTATE();
2757
if (dec_buffer_len == 0) {
2758
/* Before pos and no bytes buffered in decoder => OK */
2759
cookie.dec_flags = dec_flags;
2760
chars_to_skip -= chars_decoded;
2761
break;
2762
}
2763
/* Skip back by buffered amount and reset heuristic */
2764
skip_bytes -= dec_buffer_len;
2765
skip_back = 1;
2766
}
2767
else {
2768
/* We're too far ahead, skip back a bit */
2769
skip_bytes -= skip_back;
2770
skip_back *= 2;
2771
}
2772
}
2773
if (skip_bytes <= 0) {
2774
skip_bytes = 0;
2775
if (_textiowrapper_decoder_setstate(self, &cookie) < 0)
2776
goto fail;
2777
}
2778
2779
/* Note our initial start point. */
2780
cookie.start_pos += skip_bytes;
2781
cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2782
if (chars_to_skip == 0)
2783
goto finally;
2784
2785
/* We should be close to the desired position. Now feed the decoder one
2786
* byte at a time until we reach the `chars_to_skip` target.
2787
* As we go, note the nearest "safe start point" before the current
2788
* location (a point where the decoder has nothing buffered, so seek()
2789
* can safely start from there and advance to this location).
2790
*/
2791
chars_decoded = 0;
2792
input = PyBytes_AS_STRING(next_input);
2793
input_end = input + PyBytes_GET_SIZE(next_input);
2794
input += skip_bytes;
2795
while (input < input_end) {
2796
Py_ssize_t n;
2797
2798
DECODER_DECODE(input, (Py_ssize_t)1, n);
2799
/* We got n chars for 1 byte */
2800
chars_decoded += n;
2801
cookie.bytes_to_feed += 1;
2802
DECODER_GETSTATE();
2803
2804
if (dec_buffer_len == 0 && chars_decoded <= chars_to_skip) {
2805
/* Decoder buffer is empty, so this is a safe start point. */
2806
cookie.start_pos += cookie.bytes_to_feed;
2807
chars_to_skip -= chars_decoded;
2808
cookie.dec_flags = dec_flags;
2809
cookie.bytes_to_feed = 0;
2810
chars_decoded = 0;
2811
}
2812
if (chars_decoded >= chars_to_skip)
2813
break;
2814
input++;
2815
}
2816
if (input == input_end) {
2817
/* We didn't get enough decoded data; signal EOF to get more. */
2818
PyObject *decoded = _PyObject_CallMethod(
2819
self->decoder, &_Py_ID(decode), "yO", "", /* final = */ Py_True);
2820
if (check_decoded(decoded) < 0)
2821
goto fail;
2822
chars_decoded += PyUnicode_GET_LENGTH(decoded);
2823
Py_DECREF(decoded);
2824
cookie.need_eof = 1;
2825
2826
if (chars_decoded < chars_to_skip) {
2827
PyErr_SetString(PyExc_OSError,
2828
"can't reconstruct logical file position");
2829
goto fail;
2830
}
2831
}
2832
2833
finally:
2834
res = PyObject_CallMethodOneArg(
2835
self->decoder, &_Py_ID(setstate), saved_state);
2836
Py_DECREF(saved_state);
2837
if (res == NULL)
2838
return NULL;
2839
Py_DECREF(res);
2840
2841
/* The returned cookie corresponds to the last safe start point. */
2842
cookie.chars_to_skip = Py_SAFE_DOWNCAST(chars_to_skip, Py_ssize_t, int);
2843
return textiowrapper_build_cookie(&cookie);
2844
2845
fail:
2846
if (saved_state) {
2847
PyObject *exc = PyErr_GetRaisedException();
2848
res = PyObject_CallMethodOneArg(
2849
self->decoder, &_Py_ID(setstate), saved_state);
2850
_PyErr_ChainExceptions1(exc);
2851
Py_DECREF(saved_state);
2852
Py_XDECREF(res);
2853
}
2854
return NULL;
2855
}
2856
2857
/*[clinic input]
2858
_io.TextIOWrapper.truncate
2859
pos: object = None
2860
/
2861
[clinic start generated code]*/
2862
2863
static PyObject *
2864
_io_TextIOWrapper_truncate_impl(textio *self, PyObject *pos)
2865
/*[clinic end generated code: output=90ec2afb9bb7745f input=56ec8baa65aea377]*/
2866
{
2867
PyObject *res;
2868
2869
CHECK_ATTACHED(self)
2870
2871
res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
2872
if (res == NULL)
2873
return NULL;
2874
Py_DECREF(res);
2875
2876
return PyObject_CallMethodOneArg(self->buffer, &_Py_ID(truncate), pos);
2877
}
2878
2879
static PyObject *
2880
textiowrapper_repr(textio *self)
2881
{
2882
PyObject *nameobj, *modeobj, *res, *s;
2883
int status;
2884
2885
CHECK_INITIALIZED(self);
2886
2887
res = PyUnicode_FromString("<_io.TextIOWrapper");
2888
if (res == NULL)
2889
return NULL;
2890
2891
status = Py_ReprEnter((PyObject *)self);
2892
if (status != 0) {
2893
if (status > 0) {
2894
PyErr_Format(PyExc_RuntimeError,
2895
"reentrant call inside %s.__repr__",
2896
Py_TYPE(self)->tp_name);
2897
}
2898
goto error;
2899
}
2900
if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(name), &nameobj) < 0) {
2901
if (!PyErr_ExceptionMatches(PyExc_ValueError)) {
2902
goto error;
2903
}
2904
/* Ignore ValueError raised if the underlying stream was detached */
2905
PyErr_Clear();
2906
}
2907
if (nameobj != NULL) {
2908
s = PyUnicode_FromFormat(" name=%R", nameobj);
2909
Py_DECREF(nameobj);
2910
if (s == NULL)
2911
goto error;
2912
PyUnicode_AppendAndDel(&res, s);
2913
if (res == NULL)
2914
goto error;
2915
}
2916
if (_PyObject_LookupAttr((PyObject *) self, &_Py_ID(mode), &modeobj) < 0) {
2917
goto error;
2918
}
2919
if (modeobj != NULL) {
2920
s = PyUnicode_FromFormat(" mode=%R", modeobj);
2921
Py_DECREF(modeobj);
2922
if (s == NULL)
2923
goto error;
2924
PyUnicode_AppendAndDel(&res, s);
2925
if (res == NULL)
2926
goto error;
2927
}
2928
s = PyUnicode_FromFormat("%U encoding=%R>",
2929
res, self->encoding);
2930
Py_DECREF(res);
2931
if (status == 0) {
2932
Py_ReprLeave((PyObject *)self);
2933
}
2934
return s;
2935
2936
error:
2937
Py_XDECREF(res);
2938
if (status == 0) {
2939
Py_ReprLeave((PyObject *)self);
2940
}
2941
return NULL;
2942
}
2943
2944
2945
/* Inquiries */
2946
2947
/*[clinic input]
2948
_io.TextIOWrapper.fileno
2949
[clinic start generated code]*/
2950
2951
static PyObject *
2952
_io_TextIOWrapper_fileno_impl(textio *self)
2953
/*[clinic end generated code: output=21490a4c3da13e6c input=c488ca83d0069f9b]*/
2954
{
2955
CHECK_ATTACHED(self);
2956
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(fileno));
2957
}
2958
2959
/*[clinic input]
2960
_io.TextIOWrapper.seekable
2961
[clinic start generated code]*/
2962
2963
static PyObject *
2964
_io_TextIOWrapper_seekable_impl(textio *self)
2965
/*[clinic end generated code: output=ab223dbbcffc0f00 input=8b005ca06e1fca13]*/
2966
{
2967
CHECK_ATTACHED(self);
2968
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(seekable));
2969
}
2970
2971
/*[clinic input]
2972
_io.TextIOWrapper.readable
2973
[clinic start generated code]*/
2974
2975
static PyObject *
2976
_io_TextIOWrapper_readable_impl(textio *self)
2977
/*[clinic end generated code: output=72ff7ba289a8a91b input=0704ea7e01b0d3eb]*/
2978
{
2979
CHECK_ATTACHED(self);
2980
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(readable));
2981
}
2982
2983
/*[clinic input]
2984
_io.TextIOWrapper.writable
2985
[clinic start generated code]*/
2986
2987
static PyObject *
2988
_io_TextIOWrapper_writable_impl(textio *self)
2989
/*[clinic end generated code: output=a728c71790d03200 input=c41740bc9d8636e8]*/
2990
{
2991
CHECK_ATTACHED(self);
2992
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(writable));
2993
}
2994
2995
/*[clinic input]
2996
_io.TextIOWrapper.isatty
2997
[clinic start generated code]*/
2998
2999
static PyObject *
3000
_io_TextIOWrapper_isatty_impl(textio *self)
3001
/*[clinic end generated code: output=12be1a35bace882e input=fb68d9f2c99bbfff]*/
3002
{
3003
CHECK_ATTACHED(self);
3004
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(isatty));
3005
}
3006
3007
/*[clinic input]
3008
_io.TextIOWrapper.flush
3009
[clinic start generated code]*/
3010
3011
static PyObject *
3012
_io_TextIOWrapper_flush_impl(textio *self)
3013
/*[clinic end generated code: output=59de9165f9c2e4d2 input=928c60590694ab85]*/
3014
{
3015
CHECK_ATTACHED(self);
3016
CHECK_CLOSED(self);
3017
self->telling = self->seekable;
3018
if (_textiowrapper_writeflush(self) < 0)
3019
return NULL;
3020
return PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(flush));
3021
}
3022
3023
/*[clinic input]
3024
_io.TextIOWrapper.close
3025
[clinic start generated code]*/
3026
3027
static PyObject *
3028
_io_TextIOWrapper_close_impl(textio *self)
3029
/*[clinic end generated code: output=056ccf8b4876e4f4 input=9c2114315eae1948]*/
3030
{
3031
PyObject *res;
3032
int r;
3033
CHECK_ATTACHED(self);
3034
3035
res = textiowrapper_closed_get(self, NULL);
3036
if (res == NULL)
3037
return NULL;
3038
r = PyObject_IsTrue(res);
3039
Py_DECREF(res);
3040
if (r < 0)
3041
return NULL;
3042
3043
if (r > 0) {
3044
Py_RETURN_NONE; /* stream already closed */
3045
}
3046
else {
3047
PyObject *exc = NULL;
3048
if (self->finalizing) {
3049
res = PyObject_CallMethodOneArg(self->buffer, &_Py_ID(_dealloc_warn),
3050
(PyObject *)self);
3051
if (res) {
3052
Py_DECREF(res);
3053
}
3054
else {
3055
PyErr_Clear();
3056
}
3057
}
3058
res = PyObject_CallMethodNoArgs((PyObject *)self, &_Py_ID(flush));
3059
if (res == NULL) {
3060
exc = PyErr_GetRaisedException();
3061
}
3062
else {
3063
Py_DECREF(res);
3064
}
3065
3066
res = PyObject_CallMethodNoArgs(self->buffer, &_Py_ID(close));
3067
if (exc != NULL) {
3068
_PyErr_ChainExceptions1(exc);
3069
Py_CLEAR(res);
3070
}
3071
return res;
3072
}
3073
}
3074
3075
static PyObject *
3076
textiowrapper_iternext(textio *self)
3077
{
3078
PyObject *line;
3079
3080
CHECK_ATTACHED(self);
3081
3082
self->telling = 0;
3083
if (Py_IS_TYPE(self, self->state->PyTextIOWrapper_Type)) {
3084
/* Skip method call overhead for speed */
3085
line = _textiowrapper_readline(self, -1);
3086
}
3087
else {
3088
line = PyObject_CallMethodNoArgs((PyObject *)self,
3089
&_Py_ID(readline));
3090
if (line && !PyUnicode_Check(line)) {
3091
PyErr_Format(PyExc_OSError,
3092
"readline() should have returned a str object, "
3093
"not '%.200s'", Py_TYPE(line)->tp_name);
3094
Py_DECREF(line);
3095
return NULL;
3096
}
3097
}
3098
3099
if (line == NULL)
3100
return NULL;
3101
3102
if (PyUnicode_GET_LENGTH(line) == 0) {
3103
/* Reached EOF or would have blocked */
3104
Py_DECREF(line);
3105
Py_CLEAR(self->snapshot);
3106
self->telling = self->seekable;
3107
return NULL;
3108
}
3109
3110
return line;
3111
}
3112
3113
static PyObject *
3114
textiowrapper_name_get(textio *self, void *context)
3115
{
3116
CHECK_ATTACHED(self);
3117
return PyObject_GetAttr(self->buffer, &_Py_ID(name));
3118
}
3119
3120
static PyObject *
3121
textiowrapper_closed_get(textio *self, void *context)
3122
{
3123
CHECK_ATTACHED(self);
3124
return PyObject_GetAttr(self->buffer, &_Py_ID(closed));
3125
}
3126
3127
static PyObject *
3128
textiowrapper_newlines_get(textio *self, void *context)
3129
{
3130
PyObject *res;
3131
CHECK_ATTACHED(self);
3132
if (self->decoder == NULL ||
3133
_PyObject_LookupAttr(self->decoder, &_Py_ID(newlines), &res) == 0)
3134
{
3135
Py_RETURN_NONE;
3136
}
3137
return res;
3138
}
3139
3140
static PyObject *
3141
textiowrapper_errors_get(textio *self, void *context)
3142
{
3143
CHECK_INITIALIZED(self);
3144
return Py_NewRef(self->errors);
3145
}
3146
3147
static PyObject *
3148
textiowrapper_chunk_size_get(textio *self, void *context)
3149
{
3150
CHECK_ATTACHED(self);
3151
return PyLong_FromSsize_t(self->chunk_size);
3152
}
3153
3154
static int
3155
textiowrapper_chunk_size_set(textio *self, PyObject *arg, void *context)
3156
{
3157
Py_ssize_t n;
3158
CHECK_ATTACHED_INT(self);
3159
if (arg == NULL) {
3160
PyErr_SetString(PyExc_AttributeError, "cannot delete attribute");
3161
return -1;
3162
}
3163
n = PyNumber_AsSsize_t(arg, PyExc_ValueError);
3164
if (n == -1 && PyErr_Occurred())
3165
return -1;
3166
if (n <= 0) {
3167
PyErr_SetString(PyExc_ValueError,
3168
"a strictly positive integer is required");
3169
return -1;
3170
}
3171
self->chunk_size = n;
3172
return 0;
3173
}
3174
3175
static PyMethodDef incrementalnewlinedecoder_methods[] = {
3176
_IO_INCREMENTALNEWLINEDECODER_DECODE_METHODDEF
3177
_IO_INCREMENTALNEWLINEDECODER_GETSTATE_METHODDEF
3178
_IO_INCREMENTALNEWLINEDECODER_SETSTATE_METHODDEF
3179
_IO_INCREMENTALNEWLINEDECODER_RESET_METHODDEF
3180
{NULL}
3181
};
3182
3183
static PyGetSetDef incrementalnewlinedecoder_getset[] = {
3184
{"newlines", (getter)incrementalnewlinedecoder_newlines_get, NULL, NULL},
3185
{NULL}
3186
};
3187
3188
static PyType_Slot nldecoder_slots[] = {
3189
{Py_tp_dealloc, incrementalnewlinedecoder_dealloc},
3190
{Py_tp_doc, (void *)_io_IncrementalNewlineDecoder___init____doc__},
3191
{Py_tp_methods, incrementalnewlinedecoder_methods},
3192
{Py_tp_getset, incrementalnewlinedecoder_getset},
3193
{Py_tp_traverse, incrementalnewlinedecoder_traverse},
3194
{Py_tp_clear, incrementalnewlinedecoder_clear},
3195
{Py_tp_init, _io_IncrementalNewlineDecoder___init__},
3196
{0, NULL},
3197
};
3198
3199
PyType_Spec nldecoder_spec = {
3200
.name = "_io.IncrementalNewlineDecoder",
3201
.basicsize = sizeof(nldecoder_object),
3202
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3203
Py_TPFLAGS_IMMUTABLETYPE),
3204
.slots = nldecoder_slots,
3205
};
3206
3207
3208
static PyMethodDef textiowrapper_methods[] = {
3209
_IO_TEXTIOWRAPPER_DETACH_METHODDEF
3210
_IO_TEXTIOWRAPPER_RECONFIGURE_METHODDEF
3211
_IO_TEXTIOWRAPPER_WRITE_METHODDEF
3212
_IO_TEXTIOWRAPPER_READ_METHODDEF
3213
_IO_TEXTIOWRAPPER_READLINE_METHODDEF
3214
_IO_TEXTIOWRAPPER_FLUSH_METHODDEF
3215
_IO_TEXTIOWRAPPER_CLOSE_METHODDEF
3216
3217
_IO_TEXTIOWRAPPER_FILENO_METHODDEF
3218
_IO_TEXTIOWRAPPER_SEEKABLE_METHODDEF
3219
_IO_TEXTIOWRAPPER_READABLE_METHODDEF
3220
_IO_TEXTIOWRAPPER_WRITABLE_METHODDEF
3221
_IO_TEXTIOWRAPPER_ISATTY_METHODDEF
3222
3223
_IO_TEXTIOWRAPPER_SEEK_METHODDEF
3224
_IO_TEXTIOWRAPPER_TELL_METHODDEF
3225
_IO_TEXTIOWRAPPER_TRUNCATE_METHODDEF
3226
3227
{"__reduce__", _PyIOBase_cannot_pickle, METH_VARARGS},
3228
{"__reduce_ex__", _PyIOBase_cannot_pickle, METH_VARARGS},
3229
{NULL, NULL}
3230
};
3231
3232
static PyMemberDef textiowrapper_members[] = {
3233
{"encoding", T_OBJECT, offsetof(textio, encoding), READONLY},
3234
{"buffer", T_OBJECT, offsetof(textio, buffer), READONLY},
3235
{"line_buffering", T_BOOL, offsetof(textio, line_buffering), READONLY},
3236
{"write_through", T_BOOL, offsetof(textio, write_through), READONLY},
3237
{"_finalizing", T_BOOL, offsetof(textio, finalizing), 0},
3238
{"__weaklistoffset__", T_PYSSIZET, offsetof(textio, weakreflist), READONLY},
3239
{"__dictoffset__", T_PYSSIZET, offsetof(textio, dict), READONLY},
3240
{NULL}
3241
};
3242
3243
static PyGetSetDef textiowrapper_getset[] = {
3244
{"name", (getter)textiowrapper_name_get, NULL, NULL},
3245
{"closed", (getter)textiowrapper_closed_get, NULL, NULL},
3246
/* {"mode", (getter)TextIOWrapper_mode_get, NULL, NULL},
3247
*/
3248
{"newlines", (getter)textiowrapper_newlines_get, NULL, NULL},
3249
{"errors", (getter)textiowrapper_errors_get, NULL, NULL},
3250
{"_CHUNK_SIZE", (getter)textiowrapper_chunk_size_get,
3251
(setter)textiowrapper_chunk_size_set, NULL},
3252
{NULL}
3253
};
3254
3255
PyType_Slot textiowrapper_slots[] = {
3256
{Py_tp_dealloc, textiowrapper_dealloc},
3257
{Py_tp_repr, textiowrapper_repr},
3258
{Py_tp_doc, (void *)_io_TextIOWrapper___init____doc__},
3259
{Py_tp_traverse, textiowrapper_traverse},
3260
{Py_tp_clear, textiowrapper_clear},
3261
{Py_tp_iternext, textiowrapper_iternext},
3262
{Py_tp_methods, textiowrapper_methods},
3263
{Py_tp_members, textiowrapper_members},
3264
{Py_tp_getset, textiowrapper_getset},
3265
{Py_tp_init, _io_TextIOWrapper___init__},
3266
{0, NULL},
3267
};
3268
3269
PyType_Spec textiowrapper_spec = {
3270
.name = "_io.TextIOWrapper",
3271
.basicsize = sizeof(textio),
3272
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
3273
Py_TPFLAGS_IMMUTABLETYPE),
3274
.slots = textiowrapper_slots,
3275
};
3276
3277