CoCalc --

GitHub Repository: allendowney/cpython
Path: blob/main/Modules/_csv.c
¹² views
1
/* csv module */
2

3
/*
4

5
This module provides the low-level underpinnings of a CSV reading/writing
6
module.  Users should not use this module directly, but import the csv.py
7
module instead.
8

9
*/
10

11
#define MODULE_VERSION "1.0"
12

13
#include "Python.h"
14
#include "structmember.h"         // PyMemberDef
15
#include <stdbool.h>
16

17
/*[clinic input]
18
module _csv
19
[clinic start generated code]*/
20
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=385118b71aa43706]*/
21

22
#include "clinic/_csv.c.h"
23
#define NOT_SET ((Py_UCS4)-1)
24
#define EOL ((Py_UCS4)-2)
25

26

27
typedef struct {
28
    PyObject *error_obj;   /* CSV exception */
29
    PyObject *dialects;   /* Dialect registry */
30
    PyTypeObject *dialect_type;
31
    PyTypeObject *reader_type;
32
    PyTypeObject *writer_type;
33
    long field_limit;   /* max parsed field size */
34
    PyObject *str_write;
35
} _csvstate;
36

37
static struct PyModuleDef _csvmodule;
38

39
static inline _csvstate*
40
get_csv_state(PyObject *module)
41
{
42
    void *state = PyModule_GetState(module);
43
    assert(state != NULL);
44
    return (_csvstate *)state;
45
}
46

47
static int
48
_csv_clear(PyObject *module)
49
{
50
    _csvstate *module_state = PyModule_GetState(module);
51
    Py_CLEAR(module_state->error_obj);
52
    Py_CLEAR(module_state->dialects);
53
    Py_CLEAR(module_state->dialect_type);
54
    Py_CLEAR(module_state->reader_type);
55
    Py_CLEAR(module_state->writer_type);
56
    Py_CLEAR(module_state->str_write);
57
    return 0;
58
}
59

60
static int
61
_csv_traverse(PyObject *module, visitproc visit, void *arg)
62
{
63
    _csvstate *module_state = PyModule_GetState(module);
64
    Py_VISIT(module_state->error_obj);
65
    Py_VISIT(module_state->dialects);
66
    Py_VISIT(module_state->dialect_type);
67
    Py_VISIT(module_state->reader_type);
68
    Py_VISIT(module_state->writer_type);
69
    return 0;
70
}
71

72
static void
73
_csv_free(void *module)
74
{
75
   _csv_clear((PyObject *)module);
76
}
77

78
typedef enum {
79
    START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
80
    IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
81
    EAT_CRNL,AFTER_ESCAPED_CRNL
82
} ParserState;
83

84
typedef enum {
85
    QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE,
86
    QUOTE_STRINGS, QUOTE_NOTNULL
87
} QuoteStyle;
88

89
typedef struct {
90
    QuoteStyle style;
91
    const char *name;
92
} StyleDesc;
93

94
static const StyleDesc quote_styles[] = {
95
    { QUOTE_MINIMAL,    "QUOTE_MINIMAL" },
96
    { QUOTE_ALL,        "QUOTE_ALL" },
97
    { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
98
    { QUOTE_NONE,       "QUOTE_NONE" },
99
    { QUOTE_STRINGS,    "QUOTE_STRINGS" },
100
    { QUOTE_NOTNULL,    "QUOTE_NOTNULL" },
101
    { 0 }
102
};
103

104
typedef struct {
105
    PyObject_HEAD
106

107
    char doublequote;           /* is " represented by ""? */
108
    char skipinitialspace;      /* ignore spaces following delimiter? */
109
    char strict;                /* raise exception on bad CSV */
110
    int quoting;                /* style of quoting to write */
111
    Py_UCS4 delimiter;          /* field separator */
112
    Py_UCS4 quotechar;          /* quote character */
113
    Py_UCS4 escapechar;         /* escape character */
114
    PyObject *lineterminator;   /* string to write between records */
115

116
} DialectObj;
117

118
typedef struct {
119
    PyObject_HEAD
120

121
    PyObject *input_iter;   /* iterate over this for input lines */
122

123
    DialectObj *dialect;    /* parsing dialect */
124

125
    PyObject *fields;           /* field list for current record */
126
    ParserState state;          /* current CSV parse state */
127
    Py_UCS4 *field;             /* temporary buffer */
128
    Py_ssize_t field_size;      /* size of allocated buffer */
129
    Py_ssize_t field_len;       /* length of current field */
130
    int numeric_field;          /* treat field as numeric */
131
    unsigned long line_num;     /* Source-file line number */
132
} ReaderObj;
133

134
typedef struct {
135
    PyObject_HEAD
136

137
    PyObject *write;    /* write output lines to this file */
138

139
    DialectObj *dialect;    /* parsing dialect */
140

141
    Py_UCS4 *rec;            /* buffer for parser.join */
142
    Py_ssize_t rec_size;        /* size of allocated record */
143
    Py_ssize_t rec_len;         /* length of record */
144
    int num_fields;             /* number of fields in record */
145

146
    PyObject *error_obj;       /* cached error object */
147
} WriterObj;
148

149
/*
150
 * DIALECT class
151
 */
152

153
static PyObject *
154
get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
155
{
156
    PyObject *dialect_obj;
157

158
    dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
159
    if (dialect_obj == NULL) {
160
        if (!PyErr_Occurred())
161
            PyErr_Format(module_state->error_obj, "unknown dialect");
162
    }
163
    else
164
        Py_INCREF(dialect_obj);
165

166
    return dialect_obj;
167
}
168

169
static PyObject *
170
get_char_or_None(Py_UCS4 c)
171
{
172
    if (c == NOT_SET) {
173
        Py_RETURN_NONE;
174
    }
175
    else
176
        return PyUnicode_FromOrdinal(c);
177
}
178

179
static PyObject *
180
Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
181
{
182
    return Py_XNewRef(self->lineterminator);
183
}
184

185
static PyObject *
186
Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
187
{
188
    return get_char_or_None(self->delimiter);
189
}
190

191
static PyObject *
192
Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
193
{
194
    return get_char_or_None(self->escapechar);
195
}
196

197
static PyObject *
198
Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
199
{
200
    return get_char_or_None(self->quotechar);
201
}
202

203
static PyObject *
204
Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
205
{
206
    return PyLong_FromLong(self->quoting);
207
}
208

209
static int
210
_set_bool(const char *name, char *target, PyObject *src, bool dflt)
211
{
212
    if (src == NULL)
213
        *target = dflt;
214
    else {
215
        int b = PyObject_IsTrue(src);
216
        if (b < 0)
217
            return -1;
218
        *target = (char)b;
219
    }
220
    return 0;
221
}
222

223
static int
224
_set_int(const char *name, int *target, PyObject *src, int dflt)
225
{
226
    if (src == NULL)
227
        *target = dflt;
228
    else {
229
        int value;
230
        if (!PyLong_CheckExact(src)) {
231
            PyErr_Format(PyExc_TypeError,
232
                         "\"%s\" must be an integer", name);
233
            return -1;
234
        }
235
        value = _PyLong_AsInt(src);
236
        if (value == -1 && PyErr_Occurred()) {
237
            return -1;
238
        }
239
        *target = value;
240
    }
241
    return 0;
242
}
243

244
static int
245
_set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
246
{
247
    if (src == NULL) {
248
        *target = dflt;
249
    }
250
    else {
251
        *target = NOT_SET;
252
        if (src != Py_None) {
253
            if (!PyUnicode_Check(src)) {
254
                PyErr_Format(PyExc_TypeError,
255
                    "\"%s\" must be string or None, not %.200s", name,
256
                    Py_TYPE(src)->tp_name);
257
                return -1;
258
            }
259
            Py_ssize_t len = PyUnicode_GetLength(src);
260
            if (len < 0) {
261
                return -1;
262
            }
263
            if (len != 1) {
264
                PyErr_Format(PyExc_TypeError,
265
                    "\"%s\" must be a 1-character string",
266
                    name);
267
                return -1;
268
            }
269
            *target = PyUnicode_READ_CHAR(src, 0);
270
        }
271
    }
272
    return 0;
273
}
274

275
static int
276
_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
277
{
278
    if (src == NULL) {
279
        *target = dflt;
280
    }
281
    else {
282
        if (!PyUnicode_Check(src)) {
283
            PyErr_Format(PyExc_TypeError,
284
                         "\"%s\" must be string, not %.200s", name,
285
                         Py_TYPE(src)->tp_name);
286
                return -1;
287
        }
288
        Py_ssize_t len = PyUnicode_GetLength(src);
289
        if (len < 0) {
290
            return -1;
291
        }
292
        if (len != 1) {
293
            PyErr_Format(PyExc_TypeError,
294
                         "\"%s\" must be a 1-character string",
295
                         name);
296
            return -1;
297
        }
298
        *target = PyUnicode_READ_CHAR(src, 0);
299
    }
300
    return 0;
301
}
302

303
static int
304
_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
305
{
306
    if (src == NULL)
307
        *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
308
    else {
309
        if (src == Py_None)
310
            *target = NULL;
311
        else if (!PyUnicode_Check(src)) {
312
            PyErr_Format(PyExc_TypeError,
313
                         "\"%s\" must be a string", name);
314
            return -1;
315
        }
316
        else {
317
            Py_XSETREF(*target, Py_NewRef(src));
318
        }
319
    }
320
    return 0;
321
}
322

323
static int
324
dialect_check_quoting(int quoting)
325
{
326
    const StyleDesc *qs;
327

328
    for (qs = quote_styles; qs->name; qs++) {
329
        if ((int)qs->style == quoting)
330
            return 0;
331
    }
332
    PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
333
    return -1;
334
}
335

336
#define D_OFF(x) offsetof(DialectObj, x)
337

338
static struct PyMemberDef Dialect_memberlist[] = {
339
    { "skipinitialspace",   T_BOOL, D_OFF(skipinitialspace), READONLY },
340
    { "doublequote",        T_BOOL, D_OFF(doublequote), READONLY },
341
    { "strict",             T_BOOL, D_OFF(strict), READONLY },
342
    { NULL }
343
};
344

345
static PyGetSetDef Dialect_getsetlist[] = {
346
    { "delimiter",          (getter)Dialect_get_delimiter},
347
    { "escapechar",             (getter)Dialect_get_escapechar},
348
    { "lineterminator",         (getter)Dialect_get_lineterminator},
349
    { "quotechar",              (getter)Dialect_get_quotechar},
350
    { "quoting",                (getter)Dialect_get_quoting},
351
    {NULL},
352
};
353

354
static void
355
Dialect_dealloc(DialectObj *self)
356
{
357
    PyTypeObject *tp = Py_TYPE(self);
358
    PyObject_GC_UnTrack(self);
359
    tp->tp_clear((PyObject *)self);
360
    PyObject_GC_Del(self);
361
    Py_DECREF(tp);
362
}
363

364
static char *dialect_kws[] = {
365
    "dialect",
366
    "delimiter",
367
    "doublequote",
368
    "escapechar",
369
    "lineterminator",
370
    "quotechar",
371
    "quoting",
372
    "skipinitialspace",
373
    "strict",
374
    NULL
375
};
376

377
static _csvstate *
378
_csv_state_from_type(PyTypeObject *type, const char *name)
379
{
380
    PyObject *module = PyType_GetModuleByDef(type, &_csvmodule);
381
    if (module == NULL) {
382
        return NULL;
383
    }
384
    _csvstate *module_state = PyModule_GetState(module);
385
    if (module_state == NULL) {
386
        PyErr_Format(PyExc_SystemError,
387
                     "%s: No _csv module state found", name);
388
        return NULL;
389
    }
390
    return module_state;
391
}
392

393
static PyObject *
394
dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
395
{
396
    DialectObj *self;
397
    PyObject *ret = NULL;
398
    PyObject *dialect = NULL;
399
    PyObject *delimiter = NULL;
400
    PyObject *doublequote = NULL;
401
    PyObject *escapechar = NULL;
402
    PyObject *lineterminator = NULL;
403
    PyObject *quotechar = NULL;
404
    PyObject *quoting = NULL;
405
    PyObject *skipinitialspace = NULL;
406
    PyObject *strict = NULL;
407

408
    if (!PyArg_ParseTupleAndKeywords(args, kwargs,
409
                                     "|OOOOOOOOO", dialect_kws,
410
                                     &dialect,
411
                                     &delimiter,
412
                                     &doublequote,
413
                                     &escapechar,
414
                                     &lineterminator,
415
                                     &quotechar,
416
                                     &quoting,
417
                                     &skipinitialspace,
418
                                     &strict))
419
        return NULL;
420

421
    _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
422
    if (module_state == NULL) {
423
        return NULL;
424
    }
425

426
    if (dialect != NULL) {
427
        if (PyUnicode_Check(dialect)) {
428
            dialect = get_dialect_from_registry(dialect, module_state);
429
            if (dialect == NULL)
430
                return NULL;
431
        }
432
        else
433
            Py_INCREF(dialect);
434
        /* Can we reuse this instance? */
435
        if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
436
            delimiter == NULL &&
437
            doublequote == NULL &&
438
            escapechar == NULL &&
439
            lineterminator == NULL &&
440
            quotechar == NULL &&
441
            quoting == NULL &&
442
            skipinitialspace == NULL &&
443
            strict == NULL)
444
            return dialect;
445
    }
446

447
    self = (DialectObj *)type->tp_alloc(type, 0);
448
    if (self == NULL) {
449
        Py_CLEAR(dialect);
450
        return NULL;
451
    }
452
    self->lineterminator = NULL;
453

454
    Py_XINCREF(delimiter);
455
    Py_XINCREF(doublequote);
456
    Py_XINCREF(escapechar);
457
    Py_XINCREF(lineterminator);
458
    Py_XINCREF(quotechar);
459
    Py_XINCREF(quoting);
460
    Py_XINCREF(skipinitialspace);
461
    Py_XINCREF(strict);
462
    if (dialect != NULL) {
463
#define DIALECT_GETATTR(v, n)                            \
464
        do {                                             \
465
            if (v == NULL) {                             \
466
                v = PyObject_GetAttrString(dialect, n);  \
467
                if (v == NULL)                           \
468
                    PyErr_Clear();                       \
469
            }                                            \
470
        } while (0)
471
        DIALECT_GETATTR(delimiter, "delimiter");
472
        DIALECT_GETATTR(doublequote, "doublequote");
473
        DIALECT_GETATTR(escapechar, "escapechar");
474
        DIALECT_GETATTR(lineterminator, "lineterminator");
475
        DIALECT_GETATTR(quotechar, "quotechar");
476
        DIALECT_GETATTR(quoting, "quoting");
477
        DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
478
        DIALECT_GETATTR(strict, "strict");
479
    }
480

481
    /* check types and convert to C values */
482
#define DIASET(meth, name, target, src, dflt) \
483
    if (meth(name, target, src, dflt)) \
484
        goto err
485
    DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
486
    DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
487
    DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, NOT_SET);
488
    DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
489
    DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
490
    DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
491
    DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
492
    DIASET(_set_bool, "strict", &self->strict, strict, false);
493

494
    /* validate options */
495
    if (dialect_check_quoting(self->quoting))
496
        goto err;
497
    if (self->delimiter == NOT_SET) {
498
        PyErr_SetString(PyExc_TypeError,
499
                        "\"delimiter\" must be a 1-character string");
500
        goto err;
501
    }
502
    if (quotechar == Py_None && quoting == NULL)
503
        self->quoting = QUOTE_NONE;
504
    if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) {
505
        PyErr_SetString(PyExc_TypeError,
506
                        "quotechar must be set if quoting enabled");
507
        goto err;
508
    }
509
    if (self->lineterminator == NULL) {
510
        PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
511
        goto err;
512
    }
513

514
    ret = Py_NewRef(self);
515
err:
516
    Py_CLEAR(self);
517
    Py_CLEAR(dialect);
518
    Py_CLEAR(delimiter);
519
    Py_CLEAR(doublequote);
520
    Py_CLEAR(escapechar);
521
    Py_CLEAR(lineterminator);
522
    Py_CLEAR(quotechar);
523
    Py_CLEAR(quoting);
524
    Py_CLEAR(skipinitialspace);
525
    Py_CLEAR(strict);
526
    return ret;
527
}
528

529
/* Since dialect is now a heap type, it inherits pickling method for
530
 * protocol 0 and 1 from object, therefore it needs to be overridden */
531

532
PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
533

534
static PyObject *
535
Dialect_reduce(PyObject *self, PyObject *args) {
536
    PyErr_Format(PyExc_TypeError,
537
        "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
538
    return NULL;
539
}
540

541
static struct PyMethodDef dialect_methods[] = {
542
    {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
543
    {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
544
    {NULL, NULL}
545
};
546

547
PyDoc_STRVAR(Dialect_Type_doc,
548
"CSV dialect\n"
549
"\n"
550
"The Dialect type records CSV parsing and generation options.\n");
551

552
static int
553
Dialect_clear(DialectObj *self)
554
{
555
    Py_CLEAR(self->lineterminator);
556
    return 0;
557
}
558

559
static int
560
Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
561
{
562
    Py_VISIT(self->lineterminator);
563
    Py_VISIT(Py_TYPE(self));
564
    return 0;
565
}
566

567
static PyType_Slot Dialect_Type_slots[] = {
568
    {Py_tp_doc, (char*)Dialect_Type_doc},
569
    {Py_tp_members, Dialect_memberlist},
570
    {Py_tp_getset, Dialect_getsetlist},
571
    {Py_tp_new, dialect_new},
572
    {Py_tp_methods, dialect_methods},
573
    {Py_tp_dealloc, Dialect_dealloc},
574
    {Py_tp_clear, Dialect_clear},
575
    {Py_tp_traverse, Dialect_traverse},
576
    {0, NULL}
577
};
578

579
PyType_Spec Dialect_Type_spec = {
580
    .name = "_csv.Dialect",
581
    .basicsize = sizeof(DialectObj),
582
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
583
              Py_TPFLAGS_IMMUTABLETYPE),
584
    .slots = Dialect_Type_slots,
585
};
586

587

588
/*
589
 * Return an instance of the dialect type, given a Python instance or kwarg
590
 * description of the dialect
591
 */
592
static PyObject *
593
_call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
594
{
595
    PyObject *type = (PyObject *)module_state->dialect_type;
596
    if (dialect_inst) {
597
        return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
598
    }
599
    else {
600
        return PyObject_VectorcallDict(type, NULL, 0, kwargs);
601
    }
602
}
603

604
/*
605
 * READER
606
 */
607
static int
608
parse_save_field(ReaderObj *self)
609
{
610
    PyObject *field;
611

612
    field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
613
                                      (void *) self->field, self->field_len);
614
    if (field == NULL)
615
        return -1;
616
    self->field_len = 0;
617
    if (self->numeric_field) {
618
        PyObject *tmp;
619

620
        self->numeric_field = 0;
621
        tmp = PyNumber_Float(field);
622
        Py_DECREF(field);
623
        if (tmp == NULL)
624
            return -1;
625
        field = tmp;
626
    }
627
    if (PyList_Append(self->fields, field) < 0) {
628
        Py_DECREF(field);
629
        return -1;
630
    }
631
    Py_DECREF(field);
632
    return 0;
633
}
634

635
static int
636
parse_grow_buff(ReaderObj *self)
637
{
638
    assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
639

640
    Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
641
    Py_UCS4 *field_new = self->field;
642
    PyMem_Resize(field_new, Py_UCS4, field_size_new);
643
    if (field_new == NULL) {
644
        PyErr_NoMemory();
645
        return 0;
646
    }
647
    self->field = field_new;
648
    self->field_size = field_size_new;
649
    return 1;
650
}
651

652
static int
653
parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
654
{
655
    if (self->field_len >= module_state->field_limit) {
656
        PyErr_Format(module_state->error_obj,
657
                     "field larger than field limit (%ld)",
658
                     module_state->field_limit);
659
        return -1;
660
    }
661
    if (self->field_len == self->field_size && !parse_grow_buff(self))
662
        return -1;
663
    self->field[self->field_len++] = c;
664
    return 0;
665
}
666

667
static int
668
parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
669
{
670
    DialectObj *dialect = self->dialect;
671

672
    switch (self->state) {
673
    case START_RECORD:
674
        /* start of record */
675
        if (c == EOL)
676
            /* empty line - return [] */
677
            break;
678
        else if (c == '\n' || c == '\r') {
679
            self->state = EAT_CRNL;
680
            break;
681
        }
682
        /* normal character - handle as START_FIELD */
683
        self->state = START_FIELD;
684
        /* fallthru */
685
    case START_FIELD:
686
        /* expecting field */
687
        if (c == '\n' || c == '\r' || c == EOL) {
688
            /* save empty field - return [fields] */
689
            if (parse_save_field(self) < 0)
690
                return -1;
691
            self->state = (c == EOL ? START_RECORD : EAT_CRNL);
692
        }
693
        else if (c == dialect->quotechar &&
694
                 dialect->quoting != QUOTE_NONE) {
695
            /* start quoted field */
696
            self->state = IN_QUOTED_FIELD;
697
        }
698
        else if (c == dialect->escapechar) {
699
            /* possible escaped character */
700
            self->state = ESCAPED_CHAR;
701
        }
702
        else if (c == ' ' && dialect->skipinitialspace)
703
            /* ignore spaces at start of field */
704
            ;
705
        else if (c == dialect->delimiter) {
706
            /* save empty field */
707
            if (parse_save_field(self) < 0)
708
                return -1;
709
        }
710
        else {
711
            /* begin new unquoted field */
712
            if (dialect->quoting == QUOTE_NONNUMERIC)
713
                self->numeric_field = 1;
714
            if (parse_add_char(self, module_state, c) < 0)
715
                return -1;
716
            self->state = IN_FIELD;
717
        }
718
        break;
719

720
    case ESCAPED_CHAR:
721
        if (c == '\n' || c=='\r') {
722
            if (parse_add_char(self, module_state, c) < 0)
723
                return -1;
724
            self->state = AFTER_ESCAPED_CRNL;
725
            break;
726
        }
727
        if (c == EOL)
728
            c = '\n';
729
        if (parse_add_char(self, module_state, c) < 0)
730
            return -1;
731
        self->state = IN_FIELD;
732
        break;
733

734
    case AFTER_ESCAPED_CRNL:
735
        if (c == EOL)
736
            break;
737
        /*fallthru*/
738

739
    case IN_FIELD:
740
        /* in unquoted field */
741
        if (c == '\n' || c == '\r' || c == EOL) {
742
            /* end of line - return [fields] */
743
            if (parse_save_field(self) < 0)
744
                return -1;
745
            self->state = (c == EOL ? START_RECORD : EAT_CRNL);
746
        }
747
        else if (c == dialect->escapechar) {
748
            /* possible escaped character */
749
            self->state = ESCAPED_CHAR;
750
        }
751
        else if (c == dialect->delimiter) {
752
            /* save field - wait for new field */
753
            if (parse_save_field(self) < 0)
754
                return -1;
755
            self->state = START_FIELD;
756
        }
757
        else {
758
            /* normal character - save in field */
759
            if (parse_add_char(self, module_state, c) < 0)
760
                return -1;
761
        }
762
        break;
763

764
    case IN_QUOTED_FIELD:
765
        /* in quoted field */
766
        if (c == EOL)
767
            ;
768
        else if (c == dialect->escapechar) {
769
            /* Possible escape character */
770
            self->state = ESCAPE_IN_QUOTED_FIELD;
771
        }
772
        else if (c == dialect->quotechar &&
773
                 dialect->quoting != QUOTE_NONE) {
774
            if (dialect->doublequote) {
775
                /* doublequote; " represented by "" */
776
                self->state = QUOTE_IN_QUOTED_FIELD;
777
            }
778
            else {
779
                /* end of quote part of field */
780
                self->state = IN_FIELD;
781
            }
782
        }
783
        else {
784
            /* normal character - save in field */
785
            if (parse_add_char(self, module_state, c) < 0)
786
                return -1;
787
        }
788
        break;
789

790
    case ESCAPE_IN_QUOTED_FIELD:
791
        if (c == EOL)
792
            c = '\n';
793
        if (parse_add_char(self, module_state, c) < 0)
794
            return -1;
795
        self->state = IN_QUOTED_FIELD;
796
        break;
797

798
    case QUOTE_IN_QUOTED_FIELD:
799
        /* doublequote - seen a quote in a quoted field */
800
        if (dialect->quoting != QUOTE_NONE &&
801
            c == dialect->quotechar) {
802
            /* save "" as " */
803
            if (parse_add_char(self, module_state, c) < 0)
804
                return -1;
805
            self->state = IN_QUOTED_FIELD;
806
        }
807
        else if (c == dialect->delimiter) {
808
            /* save field - wait for new field */
809
            if (parse_save_field(self) < 0)
810
                return -1;
811
            self->state = START_FIELD;
812
        }
813
        else if (c == '\n' || c == '\r' || c == EOL) {
814
            /* end of line - return [fields] */
815
            if (parse_save_field(self) < 0)
816
                return -1;
817
            self->state = (c == EOL ? START_RECORD : EAT_CRNL);
818
        }
819
        else if (!dialect->strict) {
820
            if (parse_add_char(self, module_state, c) < 0)
821
                return -1;
822
            self->state = IN_FIELD;
823
        }
824
        else {
825
            /* illegal */
826
            PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
827
                            dialect->delimiter,
828
                            dialect->quotechar);
829
            return -1;
830
        }
831
        break;
832

833
    case EAT_CRNL:
834
        if (c == '\n' || c == '\r')
835
            ;
836
        else if (c == EOL)
837
            self->state = START_RECORD;
838
        else {
839
            PyErr_Format(module_state->error_obj,
840
                         "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
841
            return -1;
842
        }
843
        break;
844

845
    }
846
    return 0;
847
}
848

849
static int
850
parse_reset(ReaderObj *self)
851
{
852
    Py_XSETREF(self->fields, PyList_New(0));
853
    if (self->fields == NULL)
854
        return -1;
855
    self->field_len = 0;
856
    self->state = START_RECORD;
857
    self->numeric_field = 0;
858
    return 0;
859
}
860

861
static PyObject *
862
Reader_iternext(ReaderObj *self)
863
{
864
    PyObject *fields = NULL;
865
    Py_UCS4 c;
866
    Py_ssize_t pos, linelen;
867
    int kind;
868
    const void *data;
869
    PyObject *lineobj;
870

871
    _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
872
                                                   "Reader.__next__");
873
    if (module_state == NULL) {
874
        return NULL;
875
    }
876

877
    if (parse_reset(self) < 0)
878
        return NULL;
879
    do {
880
        lineobj = PyIter_Next(self->input_iter);
881
        if (lineobj == NULL) {
882
            /* End of input OR exception */
883
            if (!PyErr_Occurred() && (self->field_len != 0 ||
884
                                      self->state == IN_QUOTED_FIELD)) {
885
                if (self->dialect->strict)
886
                    PyErr_SetString(module_state->error_obj,
887
                                    "unexpected end of data");
888
                else if (parse_save_field(self) >= 0)
889
                    break;
890
            }
891
            return NULL;
892
        }
893
        if (!PyUnicode_Check(lineobj)) {
894
            PyErr_Format(module_state->error_obj,
895
                         "iterator should return strings, "
896
                         "not %.200s "
897
                         "(the file should be opened in text mode)",
898
                         Py_TYPE(lineobj)->tp_name
899
                );
900
            Py_DECREF(lineobj);
901
            return NULL;
902
        }
903
        ++self->line_num;
904
        kind = PyUnicode_KIND(lineobj);
905
        data = PyUnicode_DATA(lineobj);
906
        pos = 0;
907
        linelen = PyUnicode_GET_LENGTH(lineobj);
908
        while (linelen--) {
909
            c = PyUnicode_READ(kind, data, pos);
910
            if (parse_process_char(self, module_state, c) < 0) {
911
                Py_DECREF(lineobj);
912
                goto err;
913
            }
914
            pos++;
915
        }
916
        Py_DECREF(lineobj);
917
        if (parse_process_char(self, module_state, EOL) < 0)
918
            goto err;
919
    } while (self->state != START_RECORD);
920

921
    fields = self->fields;
922
    self->fields = NULL;
923
err:
924
    return fields;
925
}
926

927
static void
928
Reader_dealloc(ReaderObj *self)
929
{
930
    PyTypeObject *tp = Py_TYPE(self);
931
    PyObject_GC_UnTrack(self);
932
    tp->tp_clear((PyObject *)self);
933
    if (self->field != NULL) {
934
        PyMem_Free(self->field);
935
        self->field = NULL;
936
    }
937
    PyObject_GC_Del(self);
938
    Py_DECREF(tp);
939
}
940

941
static int
942
Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
943
{
944
    Py_VISIT(self->dialect);
945
    Py_VISIT(self->input_iter);
946
    Py_VISIT(self->fields);
947
    Py_VISIT(Py_TYPE(self));
948
    return 0;
949
}
950

951
static int
952
Reader_clear(ReaderObj *self)
953
{
954
    Py_CLEAR(self->dialect);
955
    Py_CLEAR(self->input_iter);
956
    Py_CLEAR(self->fields);
957
    return 0;
958
}
959

960
PyDoc_STRVAR(Reader_Type_doc,
961
"CSV reader\n"
962
"\n"
963
"Reader objects are responsible for reading and parsing tabular data\n"
964
"in CSV format.\n"
965
);
966

967
static struct PyMethodDef Reader_methods[] = {
968
    { NULL, NULL }
969
};
970
#define R_OFF(x) offsetof(ReaderObj, x)
971

972
static struct PyMemberDef Reader_memberlist[] = {
973
    { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
974
    { "line_num", T_ULONG, R_OFF(line_num), READONLY },
975
    { NULL }
976
};
977

978

979
static PyType_Slot Reader_Type_slots[] = {
980
    {Py_tp_doc, (char*)Reader_Type_doc},
981
    {Py_tp_traverse, Reader_traverse},
982
    {Py_tp_iter, PyObject_SelfIter},
983
    {Py_tp_iternext, Reader_iternext},
984
    {Py_tp_methods, Reader_methods},
985
    {Py_tp_members, Reader_memberlist},
986
    {Py_tp_clear, Reader_clear},
987
    {Py_tp_dealloc, Reader_dealloc},
988
    {0, NULL}
989
};
990

991
PyType_Spec Reader_Type_spec = {
992
    .name = "_csv.reader",
993
    .basicsize = sizeof(ReaderObj),
994
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
995
              Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
996
    .slots = Reader_Type_slots
997
};
998

999

1000
static PyObject *
1001
csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
1002
{
1003
    PyObject * iterator, * dialect = NULL;
1004
    _csvstate *module_state = get_csv_state(module);
1005
    ReaderObj * self = PyObject_GC_New(
1006
        ReaderObj,
1007
        module_state->reader_type);
1008

1009
    if (!self)
1010
        return NULL;
1011

1012
    self->dialect = NULL;
1013
    self->fields = NULL;
1014
    self->input_iter = NULL;
1015
    self->field = NULL;
1016
    self->field_size = 0;
1017
    self->line_num = 0;
1018

1019
    if (parse_reset(self) < 0) {
1020
        Py_DECREF(self);
1021
        return NULL;
1022
    }
1023

1024
    if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
1025
        Py_DECREF(self);
1026
        return NULL;
1027
    }
1028
    self->input_iter = PyObject_GetIter(iterator);
1029
    if (self->input_iter == NULL) {
1030
        Py_DECREF(self);
1031
        return NULL;
1032
    }
1033
    self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1034
                                                keyword_args);
1035
    if (self->dialect == NULL) {
1036
        Py_DECREF(self);
1037
        return NULL;
1038
    }
1039

1040
    PyObject_GC_Track(self);
1041
    return (PyObject *)self;
1042
}
1043

1044
/*
1045
 * WRITER
1046
 */
1047
/* ---------------------------------------------------------------- */
1048
static void
1049
join_reset(WriterObj *self)
1050
{
1051
    self->rec_len = 0;
1052
    self->num_fields = 0;
1053
}
1054

1055
#define MEM_INCR 32768
1056

1057
/* Calculate new record length or append field to record.  Return new
1058
 * record length.
1059
 */
1060
static Py_ssize_t
1061
join_append_data(WriterObj *self, int field_kind, const void *field_data,
1062
                 Py_ssize_t field_len, int *quoted,
1063
                 int copy_phase)
1064
{
1065
    DialectObj *dialect = self->dialect;
1066
    int i;
1067
    Py_ssize_t rec_len;
1068

1069
#define INCLEN \
1070
    do {\
1071
        if (!copy_phase && rec_len == PY_SSIZE_T_MAX) {    \
1072
            goto overflow; \
1073
        } \
1074
        rec_len++; \
1075
    } while(0)
1076

1077
#define ADDCH(c)                                \
1078
    do {\
1079
        if (copy_phase) \
1080
            self->rec[rec_len] = c;\
1081
        INCLEN;\
1082
    } while(0)
1083

1084
    rec_len = self->rec_len;
1085

1086
    /* If this is not the first field we need a field separator */
1087
    if (self->num_fields > 0)
1088
        ADDCH(dialect->delimiter);
1089

1090
    /* Handle preceding quote */
1091
    if (copy_phase && *quoted)
1092
        ADDCH(dialect->quotechar);
1093

1094
    /* Copy/count field data */
1095
    /* If field is null just pass over */
1096
    for (i = 0; field_data && (i < field_len); i++) {
1097
        Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1098
        int want_escape = 0;
1099

1100
        if (c == dialect->delimiter ||
1101
            c == dialect->escapechar ||
1102
            c == dialect->quotechar  ||
1103
            PyUnicode_FindChar(
1104
                dialect->lineterminator, c, 0,
1105
                PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1106
            if (dialect->quoting == QUOTE_NONE)
1107
                want_escape = 1;
1108
            else {
1109
                if (c == dialect->quotechar) {
1110
                    if (dialect->doublequote)
1111
                        ADDCH(dialect->quotechar);
1112
                    else
1113
                        want_escape = 1;
1114
                }
1115
                else if (c == dialect->escapechar) {
1116
                    want_escape = 1;
1117
                }
1118
                if (!want_escape)
1119
                    *quoted = 1;
1120
            }
1121
            if (want_escape) {
1122
                if (dialect->escapechar == NOT_SET) {
1123
                    PyErr_Format(self->error_obj,
1124
                                 "need to escape, but no escapechar set");
1125
                    return -1;
1126
                }
1127
                ADDCH(dialect->escapechar);
1128
            }
1129
        }
1130
        /* Copy field character into record buffer.
1131
         */
1132
        ADDCH(c);
1133
    }
1134

1135
    if (*quoted) {
1136
        if (copy_phase)
1137
            ADDCH(dialect->quotechar);
1138
        else {
1139
            INCLEN; /* starting quote */
1140
            INCLEN; /* ending quote */
1141
        }
1142
    }
1143
    return rec_len;
1144

1145
  overflow:
1146
    PyErr_NoMemory();
1147
    return -1;
1148
#undef ADDCH
1149
#undef INCLEN
1150
}
1151

1152
static int
1153
join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1154
{
1155
    assert(rec_len >= 0);
1156

1157
    if (rec_len > self->rec_size) {
1158
        size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1159
        Py_UCS4 *rec_new = self->rec;
1160
        PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1161
        if (rec_new == NULL) {
1162
            PyErr_NoMemory();
1163
            return 0;
1164
        }
1165
        self->rec = rec_new;
1166
        self->rec_size = (Py_ssize_t)rec_size_new;
1167
    }
1168
    return 1;
1169
}
1170

1171
static int
1172
join_append(WriterObj *self, PyObject *field, int quoted)
1173
{
1174
    int field_kind = -1;
1175
    const void *field_data = NULL;
1176
    Py_ssize_t field_len = 0;
1177
    Py_ssize_t rec_len;
1178

1179
    if (field != NULL) {
1180
        field_kind = PyUnicode_KIND(field);
1181
        field_data = PyUnicode_DATA(field);
1182
        field_len = PyUnicode_GET_LENGTH(field);
1183
    }
1184
    rec_len = join_append_data(self, field_kind, field_data, field_len,
1185
                               &quoted, 0);
1186
    if (rec_len < 0)
1187
        return 0;
1188

1189
    /* grow record buffer if necessary */
1190
    if (!join_check_rec_size(self, rec_len))
1191
        return 0;
1192

1193
    self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1194
                                     &quoted, 1);
1195
    self->num_fields++;
1196

1197
    return 1;
1198
}
1199

1200
static int
1201
join_append_lineterminator(WriterObj *self)
1202
{
1203
    Py_ssize_t terminator_len, i;
1204
    int term_kind;
1205
    const void *term_data;
1206

1207
    terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1208
    if (terminator_len == -1)
1209
        return 0;
1210

1211
    /* grow record buffer if necessary */
1212
    if (!join_check_rec_size(self, self->rec_len + terminator_len))
1213
        return 0;
1214

1215
    term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1216
    term_data = PyUnicode_DATA(self->dialect->lineterminator);
1217
    for (i = 0; i < terminator_len; i++)
1218
        self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1219
    self->rec_len += terminator_len;
1220

1221
    return 1;
1222
}
1223

1224
PyDoc_STRVAR(csv_writerow_doc,
1225
"writerow(iterable)\n"
1226
"\n"
1227
"Construct and write a CSV record from an iterable of fields.  Non-string\n"
1228
"elements will be converted to string.");
1229

1230
static PyObject *
1231
csv_writerow(WriterObj *self, PyObject *seq)
1232
{
1233
    DialectObj *dialect = self->dialect;
1234
    PyObject *iter, *field, *line, *result;
1235

1236
    iter = PyObject_GetIter(seq);
1237
    if (iter == NULL) {
1238
        if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1239
            PyErr_Format(self->error_obj,
1240
                         "iterable expected, not %.200s",
1241
                         Py_TYPE(seq)->tp_name);
1242
        }
1243
        return NULL;
1244
    }
1245

1246
    /* Join all fields in internal buffer.
1247
     */
1248
    join_reset(self);
1249
    while ((field = PyIter_Next(iter))) {
1250
        int append_ok;
1251
        int quoted;
1252

1253
        switch (dialect->quoting) {
1254
        case QUOTE_NONNUMERIC:
1255
            quoted = !PyNumber_Check(field);
1256
            break;
1257
        case QUOTE_ALL:
1258
            quoted = 1;
1259
            break;
1260
        case QUOTE_STRINGS:
1261
            quoted = PyUnicode_Check(field);
1262
            break;
1263
        case QUOTE_NOTNULL:
1264
            quoted = field != Py_None;
1265
            break;
1266
        default:
1267
            quoted = 0;
1268
            break;
1269
        }
1270

1271
        if (PyUnicode_Check(field)) {
1272
            append_ok = join_append(self, field, quoted);
1273
            Py_DECREF(field);
1274
        }
1275
        else if (field == Py_None) {
1276
            append_ok = join_append(self, NULL, quoted);
1277
            Py_DECREF(field);
1278
        }
1279
        else {
1280
            PyObject *str;
1281

1282
            str = PyObject_Str(field);
1283
            Py_DECREF(field);
1284
            if (str == NULL) {
1285
                Py_DECREF(iter);
1286
                return NULL;
1287
            }
1288
            append_ok = join_append(self, str, quoted);
1289
            Py_DECREF(str);
1290
        }
1291
        if (!append_ok) {
1292
            Py_DECREF(iter);
1293
            return NULL;
1294
        }
1295
    }
1296
    Py_DECREF(iter);
1297
    if (PyErr_Occurred())
1298
        return NULL;
1299

1300
    if (self->num_fields > 0 && self->rec_len == 0) {
1301
        if (dialect->quoting == QUOTE_NONE) {
1302
            PyErr_Format(self->error_obj,
1303
                "single empty field record must be quoted");
1304
            return NULL;
1305
        }
1306
        self->num_fields--;
1307
        if (!join_append(self, NULL, 1))
1308
            return NULL;
1309
    }
1310

1311
    /* Add line terminator.
1312
     */
1313
    if (!join_append_lineterminator(self)) {
1314
        return NULL;
1315
    }
1316

1317
    line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1318
                                     (void *) self->rec, self->rec_len);
1319
    if (line == NULL) {
1320
        return NULL;
1321
    }
1322
    result = PyObject_CallOneArg(self->write, line);
1323
    Py_DECREF(line);
1324
    return result;
1325
}
1326

1327
PyDoc_STRVAR(csv_writerows_doc,
1328
"writerows(iterable of iterables)\n"
1329
"\n"
1330
"Construct and write a series of iterables to a csv file.  Non-string\n"
1331
"elements will be converted to string.");
1332

1333
static PyObject *
1334
csv_writerows(WriterObj *self, PyObject *seqseq)
1335
{
1336
    PyObject *row_iter, *row_obj, *result;
1337

1338
    row_iter = PyObject_GetIter(seqseq);
1339
    if (row_iter == NULL) {
1340
        return NULL;
1341
    }
1342
    while ((row_obj = PyIter_Next(row_iter))) {
1343
        result = csv_writerow(self, row_obj);
1344
        Py_DECREF(row_obj);
1345
        if (!result) {
1346
            Py_DECREF(row_iter);
1347
            return NULL;
1348
        }
1349
        else
1350
             Py_DECREF(result);
1351
    }
1352
    Py_DECREF(row_iter);
1353
    if (PyErr_Occurred())
1354
        return NULL;
1355
    Py_RETURN_NONE;
1356
}
1357

1358
static struct PyMethodDef Writer_methods[] = {
1359
    { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1360
    { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1361
    { NULL, NULL }
1362
};
1363

1364
#define W_OFF(x) offsetof(WriterObj, x)
1365

1366
static struct PyMemberDef Writer_memberlist[] = {
1367
    { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1368
    { NULL }
1369
};
1370

1371
static int
1372
Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1373
{
1374
    Py_VISIT(self->dialect);
1375
    Py_VISIT(self->write);
1376
    Py_VISIT(self->error_obj);
1377
    Py_VISIT(Py_TYPE(self));
1378
    return 0;
1379
}
1380

1381
static int
1382
Writer_clear(WriterObj *self)
1383
{
1384
    Py_CLEAR(self->dialect);
1385
    Py_CLEAR(self->write);
1386
    Py_CLEAR(self->error_obj);
1387
    return 0;
1388
}
1389

1390
static void
1391
Writer_dealloc(WriterObj *self)
1392
{
1393
    PyTypeObject *tp = Py_TYPE(self);
1394
    PyObject_GC_UnTrack(self);
1395
    tp->tp_clear((PyObject *)self);
1396
    if (self->rec != NULL) {
1397
        PyMem_Free(self->rec);
1398
    }
1399
    PyObject_GC_Del(self);
1400
    Py_DECREF(tp);
1401
}
1402

1403
PyDoc_STRVAR(Writer_Type_doc,
1404
"CSV writer\n"
1405
"\n"
1406
"Writer objects are responsible for generating tabular data\n"
1407
"in CSV format from sequence input.\n"
1408
);
1409

1410
static PyType_Slot Writer_Type_slots[] = {
1411
    {Py_tp_doc, (char*)Writer_Type_doc},
1412
    {Py_tp_traverse, Writer_traverse},
1413
    {Py_tp_clear, Writer_clear},
1414
    {Py_tp_dealloc, Writer_dealloc},
1415
    {Py_tp_methods, Writer_methods},
1416
    {Py_tp_members, Writer_memberlist},
1417
    {0, NULL}
1418
};
1419

1420
PyType_Spec Writer_Type_spec = {
1421
    .name = "_csv.writer",
1422
    .basicsize = sizeof(WriterObj),
1423
    .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1424
              Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1425
    .slots = Writer_Type_slots,
1426
};
1427

1428

1429
static PyObject *
1430
csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1431
{
1432
    PyObject * output_file, * dialect = NULL;
1433
    _csvstate *module_state = get_csv_state(module);
1434
    WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
1435

1436
    if (!self)
1437
        return NULL;
1438

1439
    self->dialect = NULL;
1440
    self->write = NULL;
1441

1442
    self->rec = NULL;
1443
    self->rec_size = 0;
1444
    self->rec_len = 0;
1445
    self->num_fields = 0;
1446

1447
    self->error_obj = Py_NewRef(module_state->error_obj);
1448

1449
    if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1450
        Py_DECREF(self);
1451
        return NULL;
1452
    }
1453
    if (_PyObject_LookupAttr(output_file,
1454
                             module_state->str_write,
1455
                             &self->write) < 0) {
1456
        Py_DECREF(self);
1457
        return NULL;
1458
    }
1459
    if (self->write == NULL || !PyCallable_Check(self->write)) {
1460
        PyErr_SetString(PyExc_TypeError,
1461
                        "argument 1 must have a \"write\" method");
1462
        Py_DECREF(self);
1463
        return NULL;
1464
    }
1465
    self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1466
                                                keyword_args);
1467
    if (self->dialect == NULL) {
1468
        Py_DECREF(self);
1469
        return NULL;
1470
    }
1471
    PyObject_GC_Track(self);
1472
    return (PyObject *)self;
1473
}
1474

1475
/*
1476
 * DIALECT REGISTRY
1477
 */
1478

1479
/*[clinic input]
1480
_csv.list_dialects
1481

1482
Return a list of all known dialect names.
1483

1484
    names = csv.list_dialects()
1485
[clinic start generated code]*/
1486

1487
static PyObject *
1488
_csv_list_dialects_impl(PyObject *module)
1489
/*[clinic end generated code: output=a5b92b215b006a6d input=8953943eb17d98ab]*/
1490
{
1491
    return PyDict_Keys(get_csv_state(module)->dialects);
1492
}
1493

1494
static PyObject *
1495
csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1496
{
1497
    PyObject *name_obj, *dialect_obj = NULL;
1498
    _csvstate *module_state = get_csv_state(module);
1499
    PyObject *dialect;
1500

1501
    if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1502
        return NULL;
1503
    if (!PyUnicode_Check(name_obj)) {
1504
        PyErr_SetString(PyExc_TypeError,
1505
                        "dialect name must be a string");
1506
        return NULL;
1507
    }
1508
    dialect = _call_dialect(module_state, dialect_obj, kwargs);
1509
    if (dialect == NULL)
1510
        return NULL;
1511
    if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
1512
        Py_DECREF(dialect);
1513
        return NULL;
1514
    }
1515
    Py_DECREF(dialect);
1516
    Py_RETURN_NONE;
1517
}
1518

1519

1520
/*[clinic input]
1521
_csv.unregister_dialect
1522

1523
    name: object
1524

1525
Delete the name/dialect mapping associated with a string name.
1526

1527
    csv.unregister_dialect(name)
1528
[clinic start generated code]*/
1529

1530
static PyObject *
1531
_csv_unregister_dialect_impl(PyObject *module, PyObject *name)
1532
/*[clinic end generated code: output=0813ebca6c058df4 input=6b5c1557bf60c7e7]*/
1533
{
1534
    _csvstate *module_state = get_csv_state(module);
1535
    if (PyDict_DelItem(module_state->dialects, name) < 0) {
1536
        if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1537
            PyErr_Format(module_state->error_obj, "unknown dialect");
1538
        }
1539
        return NULL;
1540
    }
1541
    Py_RETURN_NONE;
1542
}
1543

1544
/*[clinic input]
1545
_csv.get_dialect
1546

1547
    name: object
1548

1549
Return the dialect instance associated with name.
1550

1551
    dialect = csv.get_dialect(name)
1552
[clinic start generated code]*/
1553

1554
static PyObject *
1555
_csv_get_dialect_impl(PyObject *module, PyObject *name)
1556
/*[clinic end generated code: output=aa988cd573bebebb input=edf9ddab32e448fb]*/
1557
{
1558
    return get_dialect_from_registry(name, get_csv_state(module));
1559
}
1560

1561
/*[clinic input]
1562
_csv.field_size_limit
1563

1564
    new_limit: object = NULL
1565

1566
Sets an upper limit on parsed fields.
1567

1568
    csv.field_size_limit([limit])
1569

1570
Returns old limit. If limit is not given, no new limit is set and
1571
the old limit is returned
1572
[clinic start generated code]*/
1573

1574
static PyObject *
1575
_csv_field_size_limit_impl(PyObject *module, PyObject *new_limit)
1576
/*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/
1577
{
1578
    _csvstate *module_state = get_csv_state(module);
1579
    long old_limit = module_state->field_limit;
1580
    if (new_limit != NULL) {
1581
        if (!PyLong_CheckExact(new_limit)) {
1582
            PyErr_Format(PyExc_TypeError,
1583
                         "limit must be an integer");
1584
            return NULL;
1585
        }
1586
        module_state->field_limit = PyLong_AsLong(new_limit);
1587
        if (module_state->field_limit == -1 && PyErr_Occurred()) {
1588
            module_state->field_limit = old_limit;
1589
            return NULL;
1590
        }
1591
    }
1592
    return PyLong_FromLong(old_limit);
1593
}
1594

1595
static PyType_Slot error_slots[] = {
1596
    {0, NULL},
1597
};
1598

1599
PyType_Spec error_spec = {
1600
    .name = "_csv.Error",
1601
    .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
1602
    .slots = error_slots,
1603
};
1604

1605
/*
1606
 * MODULE
1607
 */
1608

1609
PyDoc_STRVAR(csv_module_doc,
1610
"CSV parsing and writing.\n"
1611
"\n"
1612
"This module provides classes that assist in the reading and writing\n"
1613
"of Comma Separated Value (CSV) files, and implements the interface\n"
1614
"described by PEP 305.  Although many CSV files are simple to parse,\n"
1615
"the format is not formally defined by a stable specification and\n"
1616
"is subtle enough that parsing lines of a CSV file with something\n"
1617
"like line.split(\",\") is bound to fail.  The module supports three\n"
1618
"basic APIs: reading, writing, and registration of dialects.\n"
1619
"\n"
1620
"\n"
1621
"DIALECT REGISTRATION:\n"
1622
"\n"
1623
"Readers and writers support a dialect argument, which is a convenient\n"
1624
"handle on a group of settings.  When the dialect argument is a string,\n"
1625
"it identifies one of the dialects previously registered with the module.\n"
1626
"If it is a class or instance, the attributes of the argument are used as\n"
1627
"the settings for the reader or writer:\n"
1628
"\n"
1629
"    class excel:\n"
1630
"        delimiter = ','\n"
1631
"        quotechar = '\"'\n"
1632
"        escapechar = None\n"
1633
"        doublequote = True\n"
1634
"        skipinitialspace = False\n"
1635
"        lineterminator = '\\r\\n'\n"
1636
"        quoting = QUOTE_MINIMAL\n"
1637
"\n"
1638
"SETTINGS:\n"
1639
"\n"
1640
"    * quotechar - specifies a one-character string to use as the\n"
1641
"        quoting character.  It defaults to '\"'.\n"
1642
"    * delimiter - specifies a one-character string to use as the\n"
1643
"        field separator.  It defaults to ','.\n"
1644
"    * skipinitialspace - specifies how to interpret spaces which\n"
1645
"        immediately follow a delimiter.  It defaults to False, which\n"
1646
"        means that spaces immediately following a delimiter is part\n"
1647
"        of the following field.\n"
1648
"    * lineterminator -  specifies the character sequence which should\n"
1649
"        terminate rows.\n"
1650
"    * quoting - controls when quotes should be generated by the writer.\n"
1651
"        It can take on any of the following module constants:\n"
1652
"\n"
1653
"        csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1654
"            field contains either the quotechar or the delimiter\n"
1655
"        csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1656
"        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1657
"            fields which do not parse as integers or floating point\n"
1658
"            numbers.\n"
1659
"        csv.QUOTE_STRINGS means that quotes are always placed around\n"
1660
"            fields which are strings.  Note that the Python value None\n"
1661
"            is not a string.\n"
1662
"        csv.QUOTE_NOTNULL means that quotes are only placed around fields\n"
1663
"            that are not the Python value None.\n"
1664
"        csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1665
"    * escapechar - specifies a one-character string used to escape\n"
1666
"        the delimiter when quoting is set to QUOTE_NONE.\n"
1667
"    * doublequote - controls the handling of quotes inside fields.  When\n"
1668
"        True, two consecutive quotes are interpreted as one during read,\n"
1669
"        and when writing, each quote character embedded in the data is\n"
1670
"        written as two quotes\n");
1671

1672
PyDoc_STRVAR(csv_reader_doc,
1673
"    csv_reader = reader(iterable [, dialect='excel']\n"
1674
"                        [optional keyword args])\n"
1675
"    for row in csv_reader:\n"
1676
"        process(row)\n"
1677
"\n"
1678
"The \"iterable\" argument can be any object that returns a line\n"
1679
"of input for each iteration, such as a file object or a list.  The\n"
1680
"optional \"dialect\" parameter is discussed below.  The function\n"
1681
"also accepts optional keyword arguments which override settings\n"
1682
"provided by the dialect.\n"
1683
"\n"
1684
"The returned object is an iterator.  Each iteration returns a row\n"
1685
"of the CSV file (which can span multiple input lines).\n");
1686

1687
PyDoc_STRVAR(csv_writer_doc,
1688
"    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1689
"                            [optional keyword args])\n"
1690
"    for row in sequence:\n"
1691
"        csv_writer.writerow(row)\n"
1692
"\n"
1693
"    [or]\n"
1694
"\n"
1695
"    csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1696
"                            [optional keyword args])\n"
1697
"    csv_writer.writerows(rows)\n"
1698
"\n"
1699
"The \"fileobj\" argument can be any object that supports the file API.\n");
1700

1701
PyDoc_STRVAR(csv_register_dialect_doc,
1702
"Create a mapping from a string name to a dialect class.\n"
1703
"    dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1704

1705
static struct PyMethodDef csv_methods[] = {
1706
    { "reader", _PyCFunction_CAST(csv_reader),
1707
        METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1708
    { "writer", _PyCFunction_CAST(csv_writer),
1709
        METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1710
    { "register_dialect", _PyCFunction_CAST(csv_register_dialect),
1711
        METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1712
    _CSV_LIST_DIALECTS_METHODDEF
1713
    _CSV_UNREGISTER_DIALECT_METHODDEF
1714
    _CSV_GET_DIALECT_METHODDEF
1715
    _CSV_FIELD_SIZE_LIMIT_METHODDEF
1716
    { NULL, NULL }
1717
};
1718

1719
static int
1720
csv_exec(PyObject *module) {
1721
    const StyleDesc *style;
1722
    PyObject *temp;
1723
    _csvstate *module_state = get_csv_state(module);
1724

1725
    temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1726
    module_state->dialect_type = (PyTypeObject *)temp;
1727
    if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1728
        return -1;
1729
    }
1730

1731
    temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1732
    module_state->reader_type = (PyTypeObject *)temp;
1733
    if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1734
        return -1;
1735
    }
1736

1737
    temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1738
    module_state->writer_type = (PyTypeObject *)temp;
1739
    if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1740
        return -1;
1741
    }
1742

1743
    /* Add version to the module. */
1744
    if (PyModule_AddStringConstant(module, "__version__",
1745
                                   MODULE_VERSION) == -1) {
1746
        return -1;
1747
    }
1748

1749
    /* Set the field limit */
1750
    module_state->field_limit = 128 * 1024;
1751

1752
    /* Add _dialects dictionary */
1753
    module_state->dialects = PyDict_New();
1754
    if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1755
        return -1;
1756
    }
1757

1758
    /* Add quote styles into dictionary */
1759
    for (style = quote_styles; style->name; style++) {
1760
        if (PyModule_AddIntConstant(module, style->name,
1761
                                    style->style) == -1)
1762
            return -1;
1763
    }
1764

1765
    /* Add the CSV exception object to the module. */
1766
    PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1767
    if (bases == NULL) {
1768
        return -1;
1769
    }
1770
    module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1771
                                                       bases);
1772
    Py_DECREF(bases);
1773
    if (module_state->error_obj == NULL) {
1774
        return -1;
1775
    }
1776
    if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1777
        return -1;
1778
    }
1779

1780
    module_state->str_write = PyUnicode_InternFromString("write");
1781
    if (module_state->str_write == NULL) {
1782
        return -1;
1783
    }
1784
    return 0;
1785
}
1786

1787
static PyModuleDef_Slot csv_slots[] = {
1788
    {Py_mod_exec, csv_exec},
1789
    {Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1790
    {0, NULL}
1791
};
1792

1793
static struct PyModuleDef _csvmodule = {
1794
    PyModuleDef_HEAD_INIT,
1795
    "_csv",
1796
    csv_module_doc,
1797
    sizeof(_csvstate),
1798
    csv_methods,
1799
    csv_slots,
1800
    _csv_traverse,
1801
    _csv_clear,
1802
    _csv_free
1803
};
1804

1805
PyMODINIT_FUNC
1806
PyInit__csv(void)
1807
{
1808
    return PyModuleDef_Init(&_csvmodule);
1809
}
1810

1811
Product

Resources

Company