#include "Python.h"
#include <stddef.h>
#include "pycore_object.h"
#include "_iomodule.h"
#define STATE_REALIZED 1
#define STATE_ACCUMULATING 2
typedef struct {
PyObject_HEAD
Py_UCS4 *buf;
Py_ssize_t pos;
Py_ssize_t string_size;
size_t buf_size;
int state;
_PyUnicodeWriter writer;
char ok;
char closed;
char readuniversal;
char readtranslate;
PyObject *decoder;
PyObject *readnl;
PyObject *writenl;
PyObject *dict;
PyObject *weakreflist;
_PyIO_State *module_state;
} stringio;
static int _io_StringIO___init__(PyObject *self, PyObject *args, PyObject *kwargs);
#define CHECK_INITIALIZED(self) \
if (self->ok <= 0) { \
PyErr_SetString(PyExc_ValueError, \
"I/O operation on uninitialized object"); \
return NULL; \
}
#define CHECK_CLOSED(self) \
if (self->closed) { \
PyErr_SetString(PyExc_ValueError, \
"I/O operation on closed file"); \
return NULL; \
}
#define ENSURE_REALIZED(self) \
if (realize(self) < 0) { \
return NULL; \
}
static int
resize_buffer(stringio *self, size_t size)
{
size_t alloc = self->buf_size;
Py_UCS4 *new_buf = NULL;
assert(self->buf != NULL);
size = size + 1;
if (size > PY_SSIZE_T_MAX)
goto overflow;
if (size < alloc / 2) {
alloc = size + 1;
}
else if (size < alloc) {
return 0;
}
else if (size <= alloc * 1.125) {
alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
}
else {
alloc = size + 1;
}
if (alloc > PY_SIZE_MAX / sizeof(Py_UCS4))
goto overflow;
new_buf = (Py_UCS4 *)PyMem_Realloc(self->buf, alloc * sizeof(Py_UCS4));
if (new_buf == NULL) {
PyErr_NoMemory();
return -1;
}
self->buf_size = alloc;
self->buf = new_buf;
return 0;
overflow:
PyErr_SetString(PyExc_OverflowError,
"new buffer size too large");
return -1;
}
static PyObject *
make_intermediate(stringio *self)
{
PyObject *intermediate = _PyUnicodeWriter_Finish(&self->writer);
self->state = STATE_REALIZED;
if (intermediate == NULL)
return NULL;
_PyUnicodeWriter_Init(&self->writer);
self->writer.overallocate = 1;
if (_PyUnicodeWriter_WriteStr(&self->writer, intermediate)) {
Py_DECREF(intermediate);
return NULL;
}
self->state = STATE_ACCUMULATING;
return intermediate;
}
static int
realize(stringio *self)
{
Py_ssize_t len;
PyObject *intermediate;
if (self->state == STATE_REALIZED)
return 0;
assert(self->state == STATE_ACCUMULATING);
self->state = STATE_REALIZED;
intermediate = _PyUnicodeWriter_Finish(&self->writer);
if (intermediate == NULL)
return -1;
len = PyUnicode_GET_LENGTH(intermediate);
if (resize_buffer(self, len) < 0) {
Py_DECREF(intermediate);
return -1;
}
if (!PyUnicode_AsUCS4(intermediate, self->buf, len, 0)) {
Py_DECREF(intermediate);
return -1;
}
Py_DECREF(intermediate);
return 0;
}
static Py_ssize_t
write_str(stringio *self, PyObject *obj)
{
Py_ssize_t len;
PyObject *decoded = NULL;
assert(self->buf != NULL);
assert(self->pos >= 0);
if (self->decoder != NULL) {
decoded = _PyIncrementalNewlineDecoder_decode(
self->decoder, obj, 1 );
}
else {
decoded = Py_NewRef(obj);
}
if (self->writenl) {
PyObject *translated = PyUnicode_Replace(
decoded, &_Py_STR(newline), self->writenl, -1);
Py_SETREF(decoded, translated);
}
if (decoded == NULL)
return -1;
assert(PyUnicode_Check(decoded));
len = PyUnicode_GET_LENGTH(decoded);
assert(len >= 0);
if (self->pos > PY_SSIZE_T_MAX - len) {
PyErr_SetString(PyExc_OverflowError,
"new position too large");
goto fail;
}
if (self->state == STATE_ACCUMULATING) {
if (self->string_size == self->pos) {
if (_PyUnicodeWriter_WriteStr(&self->writer, decoded))
goto fail;
goto success;
}
if (realize(self))
goto fail;
}
if (self->pos + len > self->string_size) {
if (resize_buffer(self, self->pos + len) < 0)
goto fail;
}
if (self->pos > self->string_size) {
memset(self->buf + self->string_size, '\0',
(self->pos - self->string_size) * sizeof(Py_UCS4));
}
if (!PyUnicode_AsUCS4(decoded,
self->buf + self->pos,
self->buf_size - self->pos,
0))
goto fail;
success:
self->pos += len;
if (self->string_size < self->pos)
self->string_size = self->pos;
Py_DECREF(decoded);
return 0;
fail:
Py_XDECREF(decoded);
return -1;
}
static PyObject *
_io_StringIO_getvalue_impl(stringio *self)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
if (self->state == STATE_ACCUMULATING)
return make_intermediate(self);
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, self->buf,
self->string_size);
}
static PyObject *
_io_StringIO_tell_impl(stringio *self)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
return PyLong_FromSsize_t(self->pos);
}
static PyObject *
_io_StringIO_read_impl(stringio *self, Py_ssize_t size)
{
Py_ssize_t n;
Py_UCS4 *output;
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
n = self->string_size - self->pos;
if (size < 0 || size > n) {
size = n;
if (size < 0)
size = 0;
}
if (self->state == STATE_ACCUMULATING && self->pos == 0 && size == n) {
PyObject *result = make_intermediate(self);
self->pos = self->string_size;
return result;
}
ENSURE_REALIZED(self);
output = self->buf + self->pos;
self->pos += size;
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, output, size);
}
static PyObject *
_stringio_readline(stringio *self, Py_ssize_t limit)
{
Py_UCS4 *start, *end, old_char;
Py_ssize_t len, consumed;
if (self->pos >= self->string_size)
return PyUnicode_New(0, 0);
start = self->buf + self->pos;
if (limit < 0 || limit > self->string_size - self->pos)
limit = self->string_size - self->pos;
end = start + limit;
old_char = *end;
*end = '\0';
len = _PyIO_find_line_ending(
self->readtranslate, self->readuniversal, self->readnl,
PyUnicode_4BYTE_KIND, (char*)start, (char*)end, &consumed);
*end = old_char;
if (len < 0)
len = limit;
self->pos += len;
return PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND, start, len);
}
static PyObject *
_io_StringIO_readline_impl(stringio *self, Py_ssize_t size)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
ENSURE_REALIZED(self);
return _stringio_readline(self, size);
}
static PyObject *
stringio_iternext(stringio *self)
{
PyObject *line;
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
ENSURE_REALIZED(self);
if (Py_IS_TYPE(self, self->module_state->PyStringIO_Type)) {
line = _stringio_readline(self, -1);
}
else {
line = PyObject_CallMethodNoArgs((PyObject *)self,
&_Py_ID(readline));
if (line && !PyUnicode_Check(line)) {
PyErr_Format(PyExc_OSError,
"readline() should have returned a str object, "
"not '%.200s'", Py_TYPE(line)->tp_name);
Py_DECREF(line);
return NULL;
}
}
if (line == NULL)
return NULL;
if (PyUnicode_GET_LENGTH(line) == 0) {
Py_DECREF(line);
return NULL;
}
return line;
}
static PyObject *
_io_StringIO_truncate_impl(stringio *self, Py_ssize_t size)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
if (size < 0) {
PyErr_Format(PyExc_ValueError,
"Negative size value %zd", size);
return NULL;
}
if (size < self->string_size) {
ENSURE_REALIZED(self);
if (resize_buffer(self, size) < 0)
return NULL;
self->string_size = size;
}
return PyLong_FromSsize_t(size);
}
static PyObject *
_io_StringIO_seek_impl(stringio *self, Py_ssize_t pos, int whence)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
if (whence != 0 && whence != 1 && whence != 2) {
PyErr_Format(PyExc_ValueError,
"Invalid whence (%i, should be 0, 1 or 2)", whence);
return NULL;
}
else if (pos < 0 && whence == 0) {
PyErr_Format(PyExc_ValueError,
"Negative seek position %zd", pos);
return NULL;
}
else if (whence != 0 && pos != 0) {
PyErr_SetString(PyExc_OSError,
"Can't do nonzero cur-relative seeks");
return NULL;
}
if (whence == 1) {
pos = self->pos;
}
else if (whence == 2) {
pos = self->string_size;
}
self->pos = pos;
return PyLong_FromSsize_t(self->pos);
}
static PyObject *
_io_StringIO_write(stringio *self, PyObject *obj)
{
Py_ssize_t size;
CHECK_INITIALIZED(self);
if (!PyUnicode_Check(obj)) {
PyErr_Format(PyExc_TypeError, "string argument expected, got '%s'",
Py_TYPE(obj)->tp_name);
return NULL;
}
CHECK_CLOSED(self);
size = PyUnicode_GET_LENGTH(obj);
if (size > 0 && write_str(self, obj) < 0)
return NULL;
return PyLong_FromSsize_t(size);
}
static PyObject *
_io_StringIO_close_impl(stringio *self)
{
self->closed = 1;
if (resize_buffer(self, 0) < 0)
return NULL;
_PyUnicodeWriter_Dealloc(&self->writer);
Py_CLEAR(self->readnl);
Py_CLEAR(self->writenl);
Py_CLEAR(self->decoder);
Py_RETURN_NONE;
}
static int
stringio_traverse(stringio *self, visitproc visit, void *arg)
{
Py_VISIT(Py_TYPE(self));
Py_VISIT(self->readnl);
Py_VISIT(self->writenl);
Py_VISIT(self->decoder);
Py_VISIT(self->dict);
return 0;
}
static int
stringio_clear(stringio *self)
{
Py_CLEAR(self->readnl);
Py_CLEAR(self->writenl);
Py_CLEAR(self->decoder);
Py_CLEAR(self->dict);
return 0;
}
static void
stringio_dealloc(stringio *self)
{
PyTypeObject *tp = Py_TYPE(self);
_PyObject_GC_UNTRACK(self);
self->ok = 0;
if (self->buf) {
PyMem_Free(self->buf);
self->buf = NULL;
}
_PyUnicodeWriter_Dealloc(&self->writer);
(void)stringio_clear(self);
if (self->weakreflist != NULL) {
PyObject_ClearWeakRefs((PyObject *) self);
}
tp->tp_free(self);
Py_DECREF(tp);
}
static PyObject *
stringio_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
{
stringio *self;
assert(type != NULL && type->tp_alloc != NULL);
self = (stringio *)type->tp_alloc(type, 0);
if (self == NULL)
return NULL;
self->buf = (Py_UCS4 *)PyMem_Malloc(0);
if (self->buf == NULL) {
Py_DECREF(self);
return PyErr_NoMemory();
}
return (PyObject *)self;
}
static int
_io_StringIO___init___impl(stringio *self, PyObject *value,
PyObject *newline_obj)
{
const char *newline = "\n";
Py_ssize_t value_len;
if (newline_obj == Py_None) {
newline = NULL;
}
else if (newline_obj) {
if (!PyUnicode_Check(newline_obj)) {
PyErr_Format(PyExc_TypeError,
"newline must be str or None, not %.200s",
Py_TYPE(newline_obj)->tp_name);
return -1;
}
newline = PyUnicode_AsUTF8(newline_obj);
if (newline == NULL)
return -1;
}
if (newline && newline[0] != '\0'
&& !(newline[0] == '\n' && newline[1] == '\0')
&& !(newline[0] == '\r' && newline[1] == '\0')
&& !(newline[0] == '\r' && newline[1] == '\n' && newline[2] == '\0')) {
PyErr_Format(PyExc_ValueError,
"illegal newline value: %R", newline_obj);
return -1;
}
if (value && value != Py_None && !PyUnicode_Check(value)) {
PyErr_Format(PyExc_TypeError,
"initial_value must be str or None, not %.200s",
Py_TYPE(value)->tp_name);
return -1;
}
self->ok = 0;
_PyUnicodeWriter_Dealloc(&self->writer);
Py_CLEAR(self->readnl);
Py_CLEAR(self->writenl);
Py_CLEAR(self->decoder);
assert((newline != NULL && newline_obj != Py_None) ||
(newline == NULL && newline_obj == Py_None));
if (newline) {
self->readnl = PyUnicode_FromString(newline);
if (self->readnl == NULL)
return -1;
}
self->readuniversal = (newline == NULL || newline[0] == '\0');
self->readtranslate = (newline == NULL);
if (newline != NULL && newline[0] == '\r') {
self->writenl = Py_NewRef(self->readnl);
}
_PyIO_State *module_state = find_io_state_by_def(Py_TYPE(self));
if (self->readuniversal) {
self->decoder = PyObject_CallFunctionObjArgs(
(PyObject *)module_state->PyIncrementalNewlineDecoder_Type,
Py_None, self->readtranslate ? Py_True : Py_False, NULL);
if (self->decoder == NULL)
return -1;
}
self->string_size = 0;
if (value && value != Py_None)
value_len = PyUnicode_GetLength(value);
else
value_len = 0;
if (value_len > 0) {
if (resize_buffer(self, 0) < 0)
return -1;
self->state = STATE_REALIZED;
self->pos = 0;
if (write_str(self, value) < 0)
return -1;
}
else {
if (resize_buffer(self, 0) < 0)
return -1;
_PyUnicodeWriter_Init(&self->writer);
self->writer.overallocate = 1;
self->state = STATE_ACCUMULATING;
}
self->pos = 0;
self->module_state = module_state;
self->closed = 0;
self->ok = 1;
return 0;
}
static PyObject *
_io_StringIO_readable_impl(stringio *self)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
Py_RETURN_TRUE;
}
static PyObject *
_io_StringIO_writable_impl(stringio *self)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
Py_RETURN_TRUE;
}
static PyObject *
_io_StringIO_seekable_impl(stringio *self)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
Py_RETURN_TRUE;
}
static PyObject *
stringio_getstate(stringio *self, PyObject *Py_UNUSED(ignored))
{
PyObject *initvalue = _io_StringIO_getvalue_impl(self);
PyObject *dict;
PyObject *state;
if (initvalue == NULL)
return NULL;
if (self->dict == NULL) {
dict = Py_NewRef(Py_None);
}
else {
dict = PyDict_Copy(self->dict);
if (dict == NULL) {
Py_DECREF(initvalue);
return NULL;
}
}
state = Py_BuildValue("(OOnN)", initvalue,
self->readnl ? self->readnl : Py_None,
self->pos, dict);
Py_DECREF(initvalue);
return state;
}
static PyObject *
stringio_setstate(stringio *self, PyObject *state)
{
PyObject *initarg;
PyObject *position_obj;
PyObject *dict;
Py_ssize_t pos;
assert(state != NULL);
CHECK_CLOSED(self);
if (!PyTuple_Check(state) || PyTuple_GET_SIZE(state) < 4) {
PyErr_Format(PyExc_TypeError,
"%.200s.__setstate__ argument should be 4-tuple, got %.200s",
Py_TYPE(self)->tp_name, Py_TYPE(state)->tp_name);
return NULL;
}
initarg = PyTuple_GetSlice(state, 0, 2);
if (initarg == NULL)
return NULL;
if (_io_StringIO___init__((PyObject *)self, initarg, NULL) < 0) {
Py_DECREF(initarg);
return NULL;
}
Py_DECREF(initarg);
{
PyObject *item;
Py_UCS4 *buf;
Py_ssize_t bufsize;
item = PyTuple_GET_ITEM(state, 0);
buf = PyUnicode_AsUCS4Copy(item);
if (buf == NULL)
return NULL;
bufsize = PyUnicode_GET_LENGTH(item);
if (resize_buffer(self, bufsize) < 0) {
PyMem_Free(buf);
return NULL;
}
memcpy(self->buf, buf, bufsize * sizeof(Py_UCS4));
PyMem_Free(buf);
self->string_size = bufsize;
}
position_obj = PyTuple_GET_ITEM(state, 2);
if (!PyLong_Check(position_obj)) {
PyErr_Format(PyExc_TypeError,
"third item of state must be an integer, got %.200s",
Py_TYPE(position_obj)->tp_name);
return NULL;
}
pos = PyLong_AsSsize_t(position_obj);
if (pos == -1 && PyErr_Occurred())
return NULL;
if (pos < 0) {
PyErr_SetString(PyExc_ValueError,
"position value cannot be negative");
return NULL;
}
self->pos = pos;
dict = PyTuple_GET_ITEM(state, 3);
if (dict != Py_None) {
if (!PyDict_Check(dict)) {
PyErr_Format(PyExc_TypeError,
"fourth item of state should be a dict, got a %.200s",
Py_TYPE(dict)->tp_name);
return NULL;
}
if (self->dict) {
if (PyDict_Update(self->dict, dict) < 0)
return NULL;
}
else {
self->dict = Py_NewRef(dict);
}
}
Py_RETURN_NONE;
}
static PyObject *
stringio_closed(stringio *self, void *context)
{
CHECK_INITIALIZED(self);
return PyBool_FromLong(self->closed);
}
static PyObject *
stringio_line_buffering(stringio *self, void *context)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
Py_RETURN_FALSE;
}
static PyObject *
stringio_newlines(stringio *self, void *context)
{
CHECK_INITIALIZED(self);
CHECK_CLOSED(self);
if (self->decoder == NULL)
Py_RETURN_NONE;
return PyObject_GetAttr(self->decoder, &_Py_ID(newlines));
}
#define clinic_state() (find_io_state_by_def(Py_TYPE(self)))
#include "clinic/stringio.c.h"
#undef clinic_state
static struct PyMethodDef stringio_methods[] = {
_IO_STRINGIO_CLOSE_METHODDEF
_IO_STRINGIO_GETVALUE_METHODDEF
_IO_STRINGIO_READ_METHODDEF
_IO_STRINGIO_READLINE_METHODDEF
_IO_STRINGIO_TELL_METHODDEF
_IO_STRINGIO_TRUNCATE_METHODDEF
_IO_STRINGIO_SEEK_METHODDEF
_IO_STRINGIO_WRITE_METHODDEF
_IO_STRINGIO_SEEKABLE_METHODDEF
_IO_STRINGIO_READABLE_METHODDEF
_IO_STRINGIO_WRITABLE_METHODDEF
{"__getstate__", (PyCFunction)stringio_getstate, METH_NOARGS},
{"__setstate__", (PyCFunction)stringio_setstate, METH_O},
{NULL, NULL}
};
static PyGetSetDef stringio_getset[] = {
{"closed", (getter)stringio_closed, NULL, NULL},
{"newlines", (getter)stringio_newlines, NULL, NULL},
{"line_buffering", (getter)stringio_line_buffering, NULL, NULL},
{NULL}
};
static struct PyMemberDef stringio_members[] = {
{"__weaklistoffset__", T_PYSSIZET, offsetof(stringio, weakreflist), READONLY},
{"__dictoffset__", T_PYSSIZET, offsetof(stringio, dict), READONLY},
{NULL},
};
static PyType_Slot stringio_slots[] = {
{Py_tp_dealloc, stringio_dealloc},
{Py_tp_doc, (void *)_io_StringIO___init____doc__},
{Py_tp_traverse, stringio_traverse},
{Py_tp_clear, stringio_clear},
{Py_tp_iternext, stringio_iternext},
{Py_tp_methods, stringio_methods},
{Py_tp_members, stringio_members},
{Py_tp_getset, stringio_getset},
{Py_tp_init, _io_StringIO___init__},
{Py_tp_new, stringio_new},
{0, NULL},
};
PyType_Spec stringio_spec = {
.name = "_io.StringIO",
.basicsize = sizeof(stringio),
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
Py_TPFLAGS_IMMUTABLETYPE),
.slots = stringio_slots,
};