Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Modules/_csv.c
12 views
1
/* csv module */
2
3
/*
4
5
This module provides the low-level underpinnings of a CSV reading/writing
6
module. Users should not use this module directly, but import the csv.py
7
module instead.
8
9
*/
10
11
#define MODULE_VERSION "1.0"
12
13
#include "Python.h"
14
#include "structmember.h" // PyMemberDef
15
#include <stdbool.h>
16
17
/*[clinic input]
18
module _csv
19
[clinic start generated code]*/
20
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=385118b71aa43706]*/
21
22
#include "clinic/_csv.c.h"
23
#define NOT_SET ((Py_UCS4)-1)
24
#define EOL ((Py_UCS4)-2)
25
26
27
typedef struct {
28
PyObject *error_obj; /* CSV exception */
29
PyObject *dialects; /* Dialect registry */
30
PyTypeObject *dialect_type;
31
PyTypeObject *reader_type;
32
PyTypeObject *writer_type;
33
long field_limit; /* max parsed field size */
34
PyObject *str_write;
35
} _csvstate;
36
37
static struct PyModuleDef _csvmodule;
38
39
static inline _csvstate*
40
get_csv_state(PyObject *module)
41
{
42
void *state = PyModule_GetState(module);
43
assert(state != NULL);
44
return (_csvstate *)state;
45
}
46
47
static int
48
_csv_clear(PyObject *module)
49
{
50
_csvstate *module_state = PyModule_GetState(module);
51
Py_CLEAR(module_state->error_obj);
52
Py_CLEAR(module_state->dialects);
53
Py_CLEAR(module_state->dialect_type);
54
Py_CLEAR(module_state->reader_type);
55
Py_CLEAR(module_state->writer_type);
56
Py_CLEAR(module_state->str_write);
57
return 0;
58
}
59
60
static int
61
_csv_traverse(PyObject *module, visitproc visit, void *arg)
62
{
63
_csvstate *module_state = PyModule_GetState(module);
64
Py_VISIT(module_state->error_obj);
65
Py_VISIT(module_state->dialects);
66
Py_VISIT(module_state->dialect_type);
67
Py_VISIT(module_state->reader_type);
68
Py_VISIT(module_state->writer_type);
69
return 0;
70
}
71
72
static void
73
_csv_free(void *module)
74
{
75
_csv_clear((PyObject *)module);
76
}
77
78
typedef enum {
79
START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
80
IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
81
EAT_CRNL,AFTER_ESCAPED_CRNL
82
} ParserState;
83
84
typedef enum {
85
QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE,
86
QUOTE_STRINGS, QUOTE_NOTNULL
87
} QuoteStyle;
88
89
typedef struct {
90
QuoteStyle style;
91
const char *name;
92
} StyleDesc;
93
94
static const StyleDesc quote_styles[] = {
95
{ QUOTE_MINIMAL, "QUOTE_MINIMAL" },
96
{ QUOTE_ALL, "QUOTE_ALL" },
97
{ QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
98
{ QUOTE_NONE, "QUOTE_NONE" },
99
{ QUOTE_STRINGS, "QUOTE_STRINGS" },
100
{ QUOTE_NOTNULL, "QUOTE_NOTNULL" },
101
{ 0 }
102
};
103
104
typedef struct {
105
PyObject_HEAD
106
107
char doublequote; /* is " represented by ""? */
108
char skipinitialspace; /* ignore spaces following delimiter? */
109
char strict; /* raise exception on bad CSV */
110
int quoting; /* style of quoting to write */
111
Py_UCS4 delimiter; /* field separator */
112
Py_UCS4 quotechar; /* quote character */
113
Py_UCS4 escapechar; /* escape character */
114
PyObject *lineterminator; /* string to write between records */
115
116
} DialectObj;
117
118
typedef struct {
119
PyObject_HEAD
120
121
PyObject *input_iter; /* iterate over this for input lines */
122
123
DialectObj *dialect; /* parsing dialect */
124
125
PyObject *fields; /* field list for current record */
126
ParserState state; /* current CSV parse state */
127
Py_UCS4 *field; /* temporary buffer */
128
Py_ssize_t field_size; /* size of allocated buffer */
129
Py_ssize_t field_len; /* length of current field */
130
int numeric_field; /* treat field as numeric */
131
unsigned long line_num; /* Source-file line number */
132
} ReaderObj;
133
134
typedef struct {
135
PyObject_HEAD
136
137
PyObject *write; /* write output lines to this file */
138
139
DialectObj *dialect; /* parsing dialect */
140
141
Py_UCS4 *rec; /* buffer for parser.join */
142
Py_ssize_t rec_size; /* size of allocated record */
143
Py_ssize_t rec_len; /* length of record */
144
int num_fields; /* number of fields in record */
145
146
PyObject *error_obj; /* cached error object */
147
} WriterObj;
148
149
/*
150
* DIALECT class
151
*/
152
153
static PyObject *
154
get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
155
{
156
PyObject *dialect_obj;
157
158
dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
159
if (dialect_obj == NULL) {
160
if (!PyErr_Occurred())
161
PyErr_Format(module_state->error_obj, "unknown dialect");
162
}
163
else
164
Py_INCREF(dialect_obj);
165
166
return dialect_obj;
167
}
168
169
static PyObject *
170
get_char_or_None(Py_UCS4 c)
171
{
172
if (c == NOT_SET) {
173
Py_RETURN_NONE;
174
}
175
else
176
return PyUnicode_FromOrdinal(c);
177
}
178
179
static PyObject *
180
Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
181
{
182
return Py_XNewRef(self->lineterminator);
183
}
184
185
static PyObject *
186
Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
187
{
188
return get_char_or_None(self->delimiter);
189
}
190
191
static PyObject *
192
Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
193
{
194
return get_char_or_None(self->escapechar);
195
}
196
197
static PyObject *
198
Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
199
{
200
return get_char_or_None(self->quotechar);
201
}
202
203
static PyObject *
204
Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
205
{
206
return PyLong_FromLong(self->quoting);
207
}
208
209
static int
210
_set_bool(const char *name, char *target, PyObject *src, bool dflt)
211
{
212
if (src == NULL)
213
*target = dflt;
214
else {
215
int b = PyObject_IsTrue(src);
216
if (b < 0)
217
return -1;
218
*target = (char)b;
219
}
220
return 0;
221
}
222
223
static int
224
_set_int(const char *name, int *target, PyObject *src, int dflt)
225
{
226
if (src == NULL)
227
*target = dflt;
228
else {
229
int value;
230
if (!PyLong_CheckExact(src)) {
231
PyErr_Format(PyExc_TypeError,
232
"\"%s\" must be an integer", name);
233
return -1;
234
}
235
value = _PyLong_AsInt(src);
236
if (value == -1 && PyErr_Occurred()) {
237
return -1;
238
}
239
*target = value;
240
}
241
return 0;
242
}
243
244
static int
245
_set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
246
{
247
if (src == NULL) {
248
*target = dflt;
249
}
250
else {
251
*target = NOT_SET;
252
if (src != Py_None) {
253
if (!PyUnicode_Check(src)) {
254
PyErr_Format(PyExc_TypeError,
255
"\"%s\" must be string or None, not %.200s", name,
256
Py_TYPE(src)->tp_name);
257
return -1;
258
}
259
Py_ssize_t len = PyUnicode_GetLength(src);
260
if (len < 0) {
261
return -1;
262
}
263
if (len != 1) {
264
PyErr_Format(PyExc_TypeError,
265
"\"%s\" must be a 1-character string",
266
name);
267
return -1;
268
}
269
*target = PyUnicode_READ_CHAR(src, 0);
270
}
271
}
272
return 0;
273
}
274
275
static int
276
_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
277
{
278
if (src == NULL) {
279
*target = dflt;
280
}
281
else {
282
if (!PyUnicode_Check(src)) {
283
PyErr_Format(PyExc_TypeError,
284
"\"%s\" must be string, not %.200s", name,
285
Py_TYPE(src)->tp_name);
286
return -1;
287
}
288
Py_ssize_t len = PyUnicode_GetLength(src);
289
if (len < 0) {
290
return -1;
291
}
292
if (len != 1) {
293
PyErr_Format(PyExc_TypeError,
294
"\"%s\" must be a 1-character string",
295
name);
296
return -1;
297
}
298
*target = PyUnicode_READ_CHAR(src, 0);
299
}
300
return 0;
301
}
302
303
static int
304
_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
305
{
306
if (src == NULL)
307
*target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
308
else {
309
if (src == Py_None)
310
*target = NULL;
311
else if (!PyUnicode_Check(src)) {
312
PyErr_Format(PyExc_TypeError,
313
"\"%s\" must be a string", name);
314
return -1;
315
}
316
else {
317
Py_XSETREF(*target, Py_NewRef(src));
318
}
319
}
320
return 0;
321
}
322
323
static int
324
dialect_check_quoting(int quoting)
325
{
326
const StyleDesc *qs;
327
328
for (qs = quote_styles; qs->name; qs++) {
329
if ((int)qs->style == quoting)
330
return 0;
331
}
332
PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
333
return -1;
334
}
335
336
#define D_OFF(x) offsetof(DialectObj, x)
337
338
static struct PyMemberDef Dialect_memberlist[] = {
339
{ "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
340
{ "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
341
{ "strict", T_BOOL, D_OFF(strict), READONLY },
342
{ NULL }
343
};
344
345
static PyGetSetDef Dialect_getsetlist[] = {
346
{ "delimiter", (getter)Dialect_get_delimiter},
347
{ "escapechar", (getter)Dialect_get_escapechar},
348
{ "lineterminator", (getter)Dialect_get_lineterminator},
349
{ "quotechar", (getter)Dialect_get_quotechar},
350
{ "quoting", (getter)Dialect_get_quoting},
351
{NULL},
352
};
353
354
static void
355
Dialect_dealloc(DialectObj *self)
356
{
357
PyTypeObject *tp = Py_TYPE(self);
358
PyObject_GC_UnTrack(self);
359
tp->tp_clear((PyObject *)self);
360
PyObject_GC_Del(self);
361
Py_DECREF(tp);
362
}
363
364
static char *dialect_kws[] = {
365
"dialect",
366
"delimiter",
367
"doublequote",
368
"escapechar",
369
"lineterminator",
370
"quotechar",
371
"quoting",
372
"skipinitialspace",
373
"strict",
374
NULL
375
};
376
377
static _csvstate *
378
_csv_state_from_type(PyTypeObject *type, const char *name)
379
{
380
PyObject *module = PyType_GetModuleByDef(type, &_csvmodule);
381
if (module == NULL) {
382
return NULL;
383
}
384
_csvstate *module_state = PyModule_GetState(module);
385
if (module_state == NULL) {
386
PyErr_Format(PyExc_SystemError,
387
"%s: No _csv module state found", name);
388
return NULL;
389
}
390
return module_state;
391
}
392
393
static PyObject *
394
dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
395
{
396
DialectObj *self;
397
PyObject *ret = NULL;
398
PyObject *dialect = NULL;
399
PyObject *delimiter = NULL;
400
PyObject *doublequote = NULL;
401
PyObject *escapechar = NULL;
402
PyObject *lineterminator = NULL;
403
PyObject *quotechar = NULL;
404
PyObject *quoting = NULL;
405
PyObject *skipinitialspace = NULL;
406
PyObject *strict = NULL;
407
408
if (!PyArg_ParseTupleAndKeywords(args, kwargs,
409
"|OOOOOOOOO", dialect_kws,
410
&dialect,
411
&delimiter,
412
&doublequote,
413
&escapechar,
414
&lineterminator,
415
&quotechar,
416
&quoting,
417
&skipinitialspace,
418
&strict))
419
return NULL;
420
421
_csvstate *module_state = _csv_state_from_type(type, "dialect_new");
422
if (module_state == NULL) {
423
return NULL;
424
}
425
426
if (dialect != NULL) {
427
if (PyUnicode_Check(dialect)) {
428
dialect = get_dialect_from_registry(dialect, module_state);
429
if (dialect == NULL)
430
return NULL;
431
}
432
else
433
Py_INCREF(dialect);
434
/* Can we reuse this instance? */
435
if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
436
delimiter == NULL &&
437
doublequote == NULL &&
438
escapechar == NULL &&
439
lineterminator == NULL &&
440
quotechar == NULL &&
441
quoting == NULL &&
442
skipinitialspace == NULL &&
443
strict == NULL)
444
return dialect;
445
}
446
447
self = (DialectObj *)type->tp_alloc(type, 0);
448
if (self == NULL) {
449
Py_CLEAR(dialect);
450
return NULL;
451
}
452
self->lineterminator = NULL;
453
454
Py_XINCREF(delimiter);
455
Py_XINCREF(doublequote);
456
Py_XINCREF(escapechar);
457
Py_XINCREF(lineterminator);
458
Py_XINCREF(quotechar);
459
Py_XINCREF(quoting);
460
Py_XINCREF(skipinitialspace);
461
Py_XINCREF(strict);
462
if (dialect != NULL) {
463
#define DIALECT_GETATTR(v, n) \
464
do { \
465
if (v == NULL) { \
466
v = PyObject_GetAttrString(dialect, n); \
467
if (v == NULL) \
468
PyErr_Clear(); \
469
} \
470
} while (0)
471
DIALECT_GETATTR(delimiter, "delimiter");
472
DIALECT_GETATTR(doublequote, "doublequote");
473
DIALECT_GETATTR(escapechar, "escapechar");
474
DIALECT_GETATTR(lineterminator, "lineterminator");
475
DIALECT_GETATTR(quotechar, "quotechar");
476
DIALECT_GETATTR(quoting, "quoting");
477
DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
478
DIALECT_GETATTR(strict, "strict");
479
}
480
481
/* check types and convert to C values */
482
#define DIASET(meth, name, target, src, dflt) \
483
if (meth(name, target, src, dflt)) \
484
goto err
485
DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
486
DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
487
DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, NOT_SET);
488
DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
489
DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
490
DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
491
DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
492
DIASET(_set_bool, "strict", &self->strict, strict, false);
493
494
/* validate options */
495
if (dialect_check_quoting(self->quoting))
496
goto err;
497
if (self->delimiter == NOT_SET) {
498
PyErr_SetString(PyExc_TypeError,
499
"\"delimiter\" must be a 1-character string");
500
goto err;
501
}
502
if (quotechar == Py_None && quoting == NULL)
503
self->quoting = QUOTE_NONE;
504
if (self->quoting != QUOTE_NONE && self->quotechar == NOT_SET) {
505
PyErr_SetString(PyExc_TypeError,
506
"quotechar must be set if quoting enabled");
507
goto err;
508
}
509
if (self->lineterminator == NULL) {
510
PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
511
goto err;
512
}
513
514
ret = Py_NewRef(self);
515
err:
516
Py_CLEAR(self);
517
Py_CLEAR(dialect);
518
Py_CLEAR(delimiter);
519
Py_CLEAR(doublequote);
520
Py_CLEAR(escapechar);
521
Py_CLEAR(lineterminator);
522
Py_CLEAR(quotechar);
523
Py_CLEAR(quoting);
524
Py_CLEAR(skipinitialspace);
525
Py_CLEAR(strict);
526
return ret;
527
}
528
529
/* Since dialect is now a heap type, it inherits pickling method for
530
* protocol 0 and 1 from object, therefore it needs to be overridden */
531
532
PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
533
534
static PyObject *
535
Dialect_reduce(PyObject *self, PyObject *args) {
536
PyErr_Format(PyExc_TypeError,
537
"cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
538
return NULL;
539
}
540
541
static struct PyMethodDef dialect_methods[] = {
542
{"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
543
{"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
544
{NULL, NULL}
545
};
546
547
PyDoc_STRVAR(Dialect_Type_doc,
548
"CSV dialect\n"
549
"\n"
550
"The Dialect type records CSV parsing and generation options.\n");
551
552
static int
553
Dialect_clear(DialectObj *self)
554
{
555
Py_CLEAR(self->lineterminator);
556
return 0;
557
}
558
559
static int
560
Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
561
{
562
Py_VISIT(self->lineterminator);
563
Py_VISIT(Py_TYPE(self));
564
return 0;
565
}
566
567
static PyType_Slot Dialect_Type_slots[] = {
568
{Py_tp_doc, (char*)Dialect_Type_doc},
569
{Py_tp_members, Dialect_memberlist},
570
{Py_tp_getset, Dialect_getsetlist},
571
{Py_tp_new, dialect_new},
572
{Py_tp_methods, dialect_methods},
573
{Py_tp_dealloc, Dialect_dealloc},
574
{Py_tp_clear, Dialect_clear},
575
{Py_tp_traverse, Dialect_traverse},
576
{0, NULL}
577
};
578
579
PyType_Spec Dialect_Type_spec = {
580
.name = "_csv.Dialect",
581
.basicsize = sizeof(DialectObj),
582
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
583
Py_TPFLAGS_IMMUTABLETYPE),
584
.slots = Dialect_Type_slots,
585
};
586
587
588
/*
589
* Return an instance of the dialect type, given a Python instance or kwarg
590
* description of the dialect
591
*/
592
static PyObject *
593
_call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
594
{
595
PyObject *type = (PyObject *)module_state->dialect_type;
596
if (dialect_inst) {
597
return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
598
}
599
else {
600
return PyObject_VectorcallDict(type, NULL, 0, kwargs);
601
}
602
}
603
604
/*
605
* READER
606
*/
607
static int
608
parse_save_field(ReaderObj *self)
609
{
610
PyObject *field;
611
612
field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
613
(void *) self->field, self->field_len);
614
if (field == NULL)
615
return -1;
616
self->field_len = 0;
617
if (self->numeric_field) {
618
PyObject *tmp;
619
620
self->numeric_field = 0;
621
tmp = PyNumber_Float(field);
622
Py_DECREF(field);
623
if (tmp == NULL)
624
return -1;
625
field = tmp;
626
}
627
if (PyList_Append(self->fields, field) < 0) {
628
Py_DECREF(field);
629
return -1;
630
}
631
Py_DECREF(field);
632
return 0;
633
}
634
635
static int
636
parse_grow_buff(ReaderObj *self)
637
{
638
assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
639
640
Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
641
Py_UCS4 *field_new = self->field;
642
PyMem_Resize(field_new, Py_UCS4, field_size_new);
643
if (field_new == NULL) {
644
PyErr_NoMemory();
645
return 0;
646
}
647
self->field = field_new;
648
self->field_size = field_size_new;
649
return 1;
650
}
651
652
static int
653
parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
654
{
655
if (self->field_len >= module_state->field_limit) {
656
PyErr_Format(module_state->error_obj,
657
"field larger than field limit (%ld)",
658
module_state->field_limit);
659
return -1;
660
}
661
if (self->field_len == self->field_size && !parse_grow_buff(self))
662
return -1;
663
self->field[self->field_len++] = c;
664
return 0;
665
}
666
667
static int
668
parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
669
{
670
DialectObj *dialect = self->dialect;
671
672
switch (self->state) {
673
case START_RECORD:
674
/* start of record */
675
if (c == EOL)
676
/* empty line - return [] */
677
break;
678
else if (c == '\n' || c == '\r') {
679
self->state = EAT_CRNL;
680
break;
681
}
682
/* normal character - handle as START_FIELD */
683
self->state = START_FIELD;
684
/* fallthru */
685
case START_FIELD:
686
/* expecting field */
687
if (c == '\n' || c == '\r' || c == EOL) {
688
/* save empty field - return [fields] */
689
if (parse_save_field(self) < 0)
690
return -1;
691
self->state = (c == EOL ? START_RECORD : EAT_CRNL);
692
}
693
else if (c == dialect->quotechar &&
694
dialect->quoting != QUOTE_NONE) {
695
/* start quoted field */
696
self->state = IN_QUOTED_FIELD;
697
}
698
else if (c == dialect->escapechar) {
699
/* possible escaped character */
700
self->state = ESCAPED_CHAR;
701
}
702
else if (c == ' ' && dialect->skipinitialspace)
703
/* ignore spaces at start of field */
704
;
705
else if (c == dialect->delimiter) {
706
/* save empty field */
707
if (parse_save_field(self) < 0)
708
return -1;
709
}
710
else {
711
/* begin new unquoted field */
712
if (dialect->quoting == QUOTE_NONNUMERIC)
713
self->numeric_field = 1;
714
if (parse_add_char(self, module_state, c) < 0)
715
return -1;
716
self->state = IN_FIELD;
717
}
718
break;
719
720
case ESCAPED_CHAR:
721
if (c == '\n' || c=='\r') {
722
if (parse_add_char(self, module_state, c) < 0)
723
return -1;
724
self->state = AFTER_ESCAPED_CRNL;
725
break;
726
}
727
if (c == EOL)
728
c = '\n';
729
if (parse_add_char(self, module_state, c) < 0)
730
return -1;
731
self->state = IN_FIELD;
732
break;
733
734
case AFTER_ESCAPED_CRNL:
735
if (c == EOL)
736
break;
737
/*fallthru*/
738
739
case IN_FIELD:
740
/* in unquoted field */
741
if (c == '\n' || c == '\r' || c == EOL) {
742
/* end of line - return [fields] */
743
if (parse_save_field(self) < 0)
744
return -1;
745
self->state = (c == EOL ? START_RECORD : EAT_CRNL);
746
}
747
else if (c == dialect->escapechar) {
748
/* possible escaped character */
749
self->state = ESCAPED_CHAR;
750
}
751
else if (c == dialect->delimiter) {
752
/* save field - wait for new field */
753
if (parse_save_field(self) < 0)
754
return -1;
755
self->state = START_FIELD;
756
}
757
else {
758
/* normal character - save in field */
759
if (parse_add_char(self, module_state, c) < 0)
760
return -1;
761
}
762
break;
763
764
case IN_QUOTED_FIELD:
765
/* in quoted field */
766
if (c == EOL)
767
;
768
else if (c == dialect->escapechar) {
769
/* Possible escape character */
770
self->state = ESCAPE_IN_QUOTED_FIELD;
771
}
772
else if (c == dialect->quotechar &&
773
dialect->quoting != QUOTE_NONE) {
774
if (dialect->doublequote) {
775
/* doublequote; " represented by "" */
776
self->state = QUOTE_IN_QUOTED_FIELD;
777
}
778
else {
779
/* end of quote part of field */
780
self->state = IN_FIELD;
781
}
782
}
783
else {
784
/* normal character - save in field */
785
if (parse_add_char(self, module_state, c) < 0)
786
return -1;
787
}
788
break;
789
790
case ESCAPE_IN_QUOTED_FIELD:
791
if (c == EOL)
792
c = '\n';
793
if (parse_add_char(self, module_state, c) < 0)
794
return -1;
795
self->state = IN_QUOTED_FIELD;
796
break;
797
798
case QUOTE_IN_QUOTED_FIELD:
799
/* doublequote - seen a quote in a quoted field */
800
if (dialect->quoting != QUOTE_NONE &&
801
c == dialect->quotechar) {
802
/* save "" as " */
803
if (parse_add_char(self, module_state, c) < 0)
804
return -1;
805
self->state = IN_QUOTED_FIELD;
806
}
807
else if (c == dialect->delimiter) {
808
/* save field - wait for new field */
809
if (parse_save_field(self) < 0)
810
return -1;
811
self->state = START_FIELD;
812
}
813
else if (c == '\n' || c == '\r' || c == EOL) {
814
/* end of line - return [fields] */
815
if (parse_save_field(self) < 0)
816
return -1;
817
self->state = (c == EOL ? START_RECORD : EAT_CRNL);
818
}
819
else if (!dialect->strict) {
820
if (parse_add_char(self, module_state, c) < 0)
821
return -1;
822
self->state = IN_FIELD;
823
}
824
else {
825
/* illegal */
826
PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
827
dialect->delimiter,
828
dialect->quotechar);
829
return -1;
830
}
831
break;
832
833
case EAT_CRNL:
834
if (c == '\n' || c == '\r')
835
;
836
else if (c == EOL)
837
self->state = START_RECORD;
838
else {
839
PyErr_Format(module_state->error_obj,
840
"new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
841
return -1;
842
}
843
break;
844
845
}
846
return 0;
847
}
848
849
static int
850
parse_reset(ReaderObj *self)
851
{
852
Py_XSETREF(self->fields, PyList_New(0));
853
if (self->fields == NULL)
854
return -1;
855
self->field_len = 0;
856
self->state = START_RECORD;
857
self->numeric_field = 0;
858
return 0;
859
}
860
861
static PyObject *
862
Reader_iternext(ReaderObj *self)
863
{
864
PyObject *fields = NULL;
865
Py_UCS4 c;
866
Py_ssize_t pos, linelen;
867
int kind;
868
const void *data;
869
PyObject *lineobj;
870
871
_csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
872
"Reader.__next__");
873
if (module_state == NULL) {
874
return NULL;
875
}
876
877
if (parse_reset(self) < 0)
878
return NULL;
879
do {
880
lineobj = PyIter_Next(self->input_iter);
881
if (lineobj == NULL) {
882
/* End of input OR exception */
883
if (!PyErr_Occurred() && (self->field_len != 0 ||
884
self->state == IN_QUOTED_FIELD)) {
885
if (self->dialect->strict)
886
PyErr_SetString(module_state->error_obj,
887
"unexpected end of data");
888
else if (parse_save_field(self) >= 0)
889
break;
890
}
891
return NULL;
892
}
893
if (!PyUnicode_Check(lineobj)) {
894
PyErr_Format(module_state->error_obj,
895
"iterator should return strings, "
896
"not %.200s "
897
"(the file should be opened in text mode)",
898
Py_TYPE(lineobj)->tp_name
899
);
900
Py_DECREF(lineobj);
901
return NULL;
902
}
903
++self->line_num;
904
kind = PyUnicode_KIND(lineobj);
905
data = PyUnicode_DATA(lineobj);
906
pos = 0;
907
linelen = PyUnicode_GET_LENGTH(lineobj);
908
while (linelen--) {
909
c = PyUnicode_READ(kind, data, pos);
910
if (parse_process_char(self, module_state, c) < 0) {
911
Py_DECREF(lineobj);
912
goto err;
913
}
914
pos++;
915
}
916
Py_DECREF(lineobj);
917
if (parse_process_char(self, module_state, EOL) < 0)
918
goto err;
919
} while (self->state != START_RECORD);
920
921
fields = self->fields;
922
self->fields = NULL;
923
err:
924
return fields;
925
}
926
927
static void
928
Reader_dealloc(ReaderObj *self)
929
{
930
PyTypeObject *tp = Py_TYPE(self);
931
PyObject_GC_UnTrack(self);
932
tp->tp_clear((PyObject *)self);
933
if (self->field != NULL) {
934
PyMem_Free(self->field);
935
self->field = NULL;
936
}
937
PyObject_GC_Del(self);
938
Py_DECREF(tp);
939
}
940
941
static int
942
Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
943
{
944
Py_VISIT(self->dialect);
945
Py_VISIT(self->input_iter);
946
Py_VISIT(self->fields);
947
Py_VISIT(Py_TYPE(self));
948
return 0;
949
}
950
951
static int
952
Reader_clear(ReaderObj *self)
953
{
954
Py_CLEAR(self->dialect);
955
Py_CLEAR(self->input_iter);
956
Py_CLEAR(self->fields);
957
return 0;
958
}
959
960
PyDoc_STRVAR(Reader_Type_doc,
961
"CSV reader\n"
962
"\n"
963
"Reader objects are responsible for reading and parsing tabular data\n"
964
"in CSV format.\n"
965
);
966
967
static struct PyMethodDef Reader_methods[] = {
968
{ NULL, NULL }
969
};
970
#define R_OFF(x) offsetof(ReaderObj, x)
971
972
static struct PyMemberDef Reader_memberlist[] = {
973
{ "dialect", T_OBJECT, R_OFF(dialect), READONLY },
974
{ "line_num", T_ULONG, R_OFF(line_num), READONLY },
975
{ NULL }
976
};
977
978
979
static PyType_Slot Reader_Type_slots[] = {
980
{Py_tp_doc, (char*)Reader_Type_doc},
981
{Py_tp_traverse, Reader_traverse},
982
{Py_tp_iter, PyObject_SelfIter},
983
{Py_tp_iternext, Reader_iternext},
984
{Py_tp_methods, Reader_methods},
985
{Py_tp_members, Reader_memberlist},
986
{Py_tp_clear, Reader_clear},
987
{Py_tp_dealloc, Reader_dealloc},
988
{0, NULL}
989
};
990
991
PyType_Spec Reader_Type_spec = {
992
.name = "_csv.reader",
993
.basicsize = sizeof(ReaderObj),
994
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
995
Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
996
.slots = Reader_Type_slots
997
};
998
999
1000
static PyObject *
1001
csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
1002
{
1003
PyObject * iterator, * dialect = NULL;
1004
_csvstate *module_state = get_csv_state(module);
1005
ReaderObj * self = PyObject_GC_New(
1006
ReaderObj,
1007
module_state->reader_type);
1008
1009
if (!self)
1010
return NULL;
1011
1012
self->dialect = NULL;
1013
self->fields = NULL;
1014
self->input_iter = NULL;
1015
self->field = NULL;
1016
self->field_size = 0;
1017
self->line_num = 0;
1018
1019
if (parse_reset(self) < 0) {
1020
Py_DECREF(self);
1021
return NULL;
1022
}
1023
1024
if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
1025
Py_DECREF(self);
1026
return NULL;
1027
}
1028
self->input_iter = PyObject_GetIter(iterator);
1029
if (self->input_iter == NULL) {
1030
Py_DECREF(self);
1031
return NULL;
1032
}
1033
self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1034
keyword_args);
1035
if (self->dialect == NULL) {
1036
Py_DECREF(self);
1037
return NULL;
1038
}
1039
1040
PyObject_GC_Track(self);
1041
return (PyObject *)self;
1042
}
1043
1044
/*
1045
* WRITER
1046
*/
1047
/* ---------------------------------------------------------------- */
1048
static void
1049
join_reset(WriterObj *self)
1050
{
1051
self->rec_len = 0;
1052
self->num_fields = 0;
1053
}
1054
1055
#define MEM_INCR 32768
1056
1057
/* Calculate new record length or append field to record. Return new
1058
* record length.
1059
*/
1060
static Py_ssize_t
1061
join_append_data(WriterObj *self, int field_kind, const void *field_data,
1062
Py_ssize_t field_len, int *quoted,
1063
int copy_phase)
1064
{
1065
DialectObj *dialect = self->dialect;
1066
int i;
1067
Py_ssize_t rec_len;
1068
1069
#define INCLEN \
1070
do {\
1071
if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1072
goto overflow; \
1073
} \
1074
rec_len++; \
1075
} while(0)
1076
1077
#define ADDCH(c) \
1078
do {\
1079
if (copy_phase) \
1080
self->rec[rec_len] = c;\
1081
INCLEN;\
1082
} while(0)
1083
1084
rec_len = self->rec_len;
1085
1086
/* If this is not the first field we need a field separator */
1087
if (self->num_fields > 0)
1088
ADDCH(dialect->delimiter);
1089
1090
/* Handle preceding quote */
1091
if (copy_phase && *quoted)
1092
ADDCH(dialect->quotechar);
1093
1094
/* Copy/count field data */
1095
/* If field is null just pass over */
1096
for (i = 0; field_data && (i < field_len); i++) {
1097
Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
1098
int want_escape = 0;
1099
1100
if (c == dialect->delimiter ||
1101
c == dialect->escapechar ||
1102
c == dialect->quotechar ||
1103
PyUnicode_FindChar(
1104
dialect->lineterminator, c, 0,
1105
PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
1106
if (dialect->quoting == QUOTE_NONE)
1107
want_escape = 1;
1108
else {
1109
if (c == dialect->quotechar) {
1110
if (dialect->doublequote)
1111
ADDCH(dialect->quotechar);
1112
else
1113
want_escape = 1;
1114
}
1115
else if (c == dialect->escapechar) {
1116
want_escape = 1;
1117
}
1118
if (!want_escape)
1119
*quoted = 1;
1120
}
1121
if (want_escape) {
1122
if (dialect->escapechar == NOT_SET) {
1123
PyErr_Format(self->error_obj,
1124
"need to escape, but no escapechar set");
1125
return -1;
1126
}
1127
ADDCH(dialect->escapechar);
1128
}
1129
}
1130
/* Copy field character into record buffer.
1131
*/
1132
ADDCH(c);
1133
}
1134
1135
if (*quoted) {
1136
if (copy_phase)
1137
ADDCH(dialect->quotechar);
1138
else {
1139
INCLEN; /* starting quote */
1140
INCLEN; /* ending quote */
1141
}
1142
}
1143
return rec_len;
1144
1145
overflow:
1146
PyErr_NoMemory();
1147
return -1;
1148
#undef ADDCH
1149
#undef INCLEN
1150
}
1151
1152
static int
1153
join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
1154
{
1155
assert(rec_len >= 0);
1156
1157
if (rec_len > self->rec_size) {
1158
size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1159
Py_UCS4 *rec_new = self->rec;
1160
PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1161
if (rec_new == NULL) {
1162
PyErr_NoMemory();
1163
return 0;
1164
}
1165
self->rec = rec_new;
1166
self->rec_size = (Py_ssize_t)rec_size_new;
1167
}
1168
return 1;
1169
}
1170
1171
static int
1172
join_append(WriterObj *self, PyObject *field, int quoted)
1173
{
1174
int field_kind = -1;
1175
const void *field_data = NULL;
1176
Py_ssize_t field_len = 0;
1177
Py_ssize_t rec_len;
1178
1179
if (field != NULL) {
1180
field_kind = PyUnicode_KIND(field);
1181
field_data = PyUnicode_DATA(field);
1182
field_len = PyUnicode_GET_LENGTH(field);
1183
}
1184
rec_len = join_append_data(self, field_kind, field_data, field_len,
1185
&quoted, 0);
1186
if (rec_len < 0)
1187
return 0;
1188
1189
/* grow record buffer if necessary */
1190
if (!join_check_rec_size(self, rec_len))
1191
return 0;
1192
1193
self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1194
&quoted, 1);
1195
self->num_fields++;
1196
1197
return 1;
1198
}
1199
1200
static int
1201
join_append_lineterminator(WriterObj *self)
1202
{
1203
Py_ssize_t terminator_len, i;
1204
int term_kind;
1205
const void *term_data;
1206
1207
terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
1208
if (terminator_len == -1)
1209
return 0;
1210
1211
/* grow record buffer if necessary */
1212
if (!join_check_rec_size(self, self->rec_len + terminator_len))
1213
return 0;
1214
1215
term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1216
term_data = PyUnicode_DATA(self->dialect->lineterminator);
1217
for (i = 0; i < terminator_len; i++)
1218
self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
1219
self->rec_len += terminator_len;
1220
1221
return 1;
1222
}
1223
1224
PyDoc_STRVAR(csv_writerow_doc,
1225
"writerow(iterable)\n"
1226
"\n"
1227
"Construct and write a CSV record from an iterable of fields. Non-string\n"
1228
"elements will be converted to string.");
1229
1230
static PyObject *
1231
csv_writerow(WriterObj *self, PyObject *seq)
1232
{
1233
DialectObj *dialect = self->dialect;
1234
PyObject *iter, *field, *line, *result;
1235
1236
iter = PyObject_GetIter(seq);
1237
if (iter == NULL) {
1238
if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1239
PyErr_Format(self->error_obj,
1240
"iterable expected, not %.200s",
1241
Py_TYPE(seq)->tp_name);
1242
}
1243
return NULL;
1244
}
1245
1246
/* Join all fields in internal buffer.
1247
*/
1248
join_reset(self);
1249
while ((field = PyIter_Next(iter))) {
1250
int append_ok;
1251
int quoted;
1252
1253
switch (dialect->quoting) {
1254
case QUOTE_NONNUMERIC:
1255
quoted = !PyNumber_Check(field);
1256
break;
1257
case QUOTE_ALL:
1258
quoted = 1;
1259
break;
1260
case QUOTE_STRINGS:
1261
quoted = PyUnicode_Check(field);
1262
break;
1263
case QUOTE_NOTNULL:
1264
quoted = field != Py_None;
1265
break;
1266
default:
1267
quoted = 0;
1268
break;
1269
}
1270
1271
if (PyUnicode_Check(field)) {
1272
append_ok = join_append(self, field, quoted);
1273
Py_DECREF(field);
1274
}
1275
else if (field == Py_None) {
1276
append_ok = join_append(self, NULL, quoted);
1277
Py_DECREF(field);
1278
}
1279
else {
1280
PyObject *str;
1281
1282
str = PyObject_Str(field);
1283
Py_DECREF(field);
1284
if (str == NULL) {
1285
Py_DECREF(iter);
1286
return NULL;
1287
}
1288
append_ok = join_append(self, str, quoted);
1289
Py_DECREF(str);
1290
}
1291
if (!append_ok) {
1292
Py_DECREF(iter);
1293
return NULL;
1294
}
1295
}
1296
Py_DECREF(iter);
1297
if (PyErr_Occurred())
1298
return NULL;
1299
1300
if (self->num_fields > 0 && self->rec_len == 0) {
1301
if (dialect->quoting == QUOTE_NONE) {
1302
PyErr_Format(self->error_obj,
1303
"single empty field record must be quoted");
1304
return NULL;
1305
}
1306
self->num_fields--;
1307
if (!join_append(self, NULL, 1))
1308
return NULL;
1309
}
1310
1311
/* Add line terminator.
1312
*/
1313
if (!join_append_lineterminator(self)) {
1314
return NULL;
1315
}
1316
1317
line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1318
(void *) self->rec, self->rec_len);
1319
if (line == NULL) {
1320
return NULL;
1321
}
1322
result = PyObject_CallOneArg(self->write, line);
1323
Py_DECREF(line);
1324
return result;
1325
}
1326
1327
PyDoc_STRVAR(csv_writerows_doc,
1328
"writerows(iterable of iterables)\n"
1329
"\n"
1330
"Construct and write a series of iterables to a csv file. Non-string\n"
1331
"elements will be converted to string.");
1332
1333
static PyObject *
1334
csv_writerows(WriterObj *self, PyObject *seqseq)
1335
{
1336
PyObject *row_iter, *row_obj, *result;
1337
1338
row_iter = PyObject_GetIter(seqseq);
1339
if (row_iter == NULL) {
1340
return NULL;
1341
}
1342
while ((row_obj = PyIter_Next(row_iter))) {
1343
result = csv_writerow(self, row_obj);
1344
Py_DECREF(row_obj);
1345
if (!result) {
1346
Py_DECREF(row_iter);
1347
return NULL;
1348
}
1349
else
1350
Py_DECREF(result);
1351
}
1352
Py_DECREF(row_iter);
1353
if (PyErr_Occurred())
1354
return NULL;
1355
Py_RETURN_NONE;
1356
}
1357
1358
static struct PyMethodDef Writer_methods[] = {
1359
{ "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1360
{ "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1361
{ NULL, NULL }
1362
};
1363
1364
#define W_OFF(x) offsetof(WriterObj, x)
1365
1366
static struct PyMemberDef Writer_memberlist[] = {
1367
{ "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1368
{ NULL }
1369
};
1370
1371
static int
1372
Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1373
{
1374
Py_VISIT(self->dialect);
1375
Py_VISIT(self->write);
1376
Py_VISIT(self->error_obj);
1377
Py_VISIT(Py_TYPE(self));
1378
return 0;
1379
}
1380
1381
static int
1382
Writer_clear(WriterObj *self)
1383
{
1384
Py_CLEAR(self->dialect);
1385
Py_CLEAR(self->write);
1386
Py_CLEAR(self->error_obj);
1387
return 0;
1388
}
1389
1390
static void
1391
Writer_dealloc(WriterObj *self)
1392
{
1393
PyTypeObject *tp = Py_TYPE(self);
1394
PyObject_GC_UnTrack(self);
1395
tp->tp_clear((PyObject *)self);
1396
if (self->rec != NULL) {
1397
PyMem_Free(self->rec);
1398
}
1399
PyObject_GC_Del(self);
1400
Py_DECREF(tp);
1401
}
1402
1403
PyDoc_STRVAR(Writer_Type_doc,
1404
"CSV writer\n"
1405
"\n"
1406
"Writer objects are responsible for generating tabular data\n"
1407
"in CSV format from sequence input.\n"
1408
);
1409
1410
static PyType_Slot Writer_Type_slots[] = {
1411
{Py_tp_doc, (char*)Writer_Type_doc},
1412
{Py_tp_traverse, Writer_traverse},
1413
{Py_tp_clear, Writer_clear},
1414
{Py_tp_dealloc, Writer_dealloc},
1415
{Py_tp_methods, Writer_methods},
1416
{Py_tp_members, Writer_memberlist},
1417
{0, NULL}
1418
};
1419
1420
PyType_Spec Writer_Type_spec = {
1421
.name = "_csv.writer",
1422
.basicsize = sizeof(WriterObj),
1423
.flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1424
Py_TPFLAGS_IMMUTABLETYPE | Py_TPFLAGS_DISALLOW_INSTANTIATION),
1425
.slots = Writer_Type_slots,
1426
};
1427
1428
1429
static PyObject *
1430
csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1431
{
1432
PyObject * output_file, * dialect = NULL;
1433
_csvstate *module_state = get_csv_state(module);
1434
WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
1435
1436
if (!self)
1437
return NULL;
1438
1439
self->dialect = NULL;
1440
self->write = NULL;
1441
1442
self->rec = NULL;
1443
self->rec_size = 0;
1444
self->rec_len = 0;
1445
self->num_fields = 0;
1446
1447
self->error_obj = Py_NewRef(module_state->error_obj);
1448
1449
if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1450
Py_DECREF(self);
1451
return NULL;
1452
}
1453
if (_PyObject_LookupAttr(output_file,
1454
module_state->str_write,
1455
&self->write) < 0) {
1456
Py_DECREF(self);
1457
return NULL;
1458
}
1459
if (self->write == NULL || !PyCallable_Check(self->write)) {
1460
PyErr_SetString(PyExc_TypeError,
1461
"argument 1 must have a \"write\" method");
1462
Py_DECREF(self);
1463
return NULL;
1464
}
1465
self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1466
keyword_args);
1467
if (self->dialect == NULL) {
1468
Py_DECREF(self);
1469
return NULL;
1470
}
1471
PyObject_GC_Track(self);
1472
return (PyObject *)self;
1473
}
1474
1475
/*
1476
* DIALECT REGISTRY
1477
*/
1478
1479
/*[clinic input]
1480
_csv.list_dialects
1481
1482
Return a list of all known dialect names.
1483
1484
names = csv.list_dialects()
1485
[clinic start generated code]*/
1486
1487
static PyObject *
1488
_csv_list_dialects_impl(PyObject *module)
1489
/*[clinic end generated code: output=a5b92b215b006a6d input=8953943eb17d98ab]*/
1490
{
1491
return PyDict_Keys(get_csv_state(module)->dialects);
1492
}
1493
1494
static PyObject *
1495
csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
1496
{
1497
PyObject *name_obj, *dialect_obj = NULL;
1498
_csvstate *module_state = get_csv_state(module);
1499
PyObject *dialect;
1500
1501
if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1502
return NULL;
1503
if (!PyUnicode_Check(name_obj)) {
1504
PyErr_SetString(PyExc_TypeError,
1505
"dialect name must be a string");
1506
return NULL;
1507
}
1508
dialect = _call_dialect(module_state, dialect_obj, kwargs);
1509
if (dialect == NULL)
1510
return NULL;
1511
if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
1512
Py_DECREF(dialect);
1513
return NULL;
1514
}
1515
Py_DECREF(dialect);
1516
Py_RETURN_NONE;
1517
}
1518
1519
1520
/*[clinic input]
1521
_csv.unregister_dialect
1522
1523
name: object
1524
1525
Delete the name/dialect mapping associated with a string name.
1526
1527
csv.unregister_dialect(name)
1528
[clinic start generated code]*/
1529
1530
static PyObject *
1531
_csv_unregister_dialect_impl(PyObject *module, PyObject *name)
1532
/*[clinic end generated code: output=0813ebca6c058df4 input=6b5c1557bf60c7e7]*/
1533
{
1534
_csvstate *module_state = get_csv_state(module);
1535
if (PyDict_DelItem(module_state->dialects, name) < 0) {
1536
if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1537
PyErr_Format(module_state->error_obj, "unknown dialect");
1538
}
1539
return NULL;
1540
}
1541
Py_RETURN_NONE;
1542
}
1543
1544
/*[clinic input]
1545
_csv.get_dialect
1546
1547
name: object
1548
1549
Return the dialect instance associated with name.
1550
1551
dialect = csv.get_dialect(name)
1552
[clinic start generated code]*/
1553
1554
static PyObject *
1555
_csv_get_dialect_impl(PyObject *module, PyObject *name)
1556
/*[clinic end generated code: output=aa988cd573bebebb input=edf9ddab32e448fb]*/
1557
{
1558
return get_dialect_from_registry(name, get_csv_state(module));
1559
}
1560
1561
/*[clinic input]
1562
_csv.field_size_limit
1563
1564
new_limit: object = NULL
1565
1566
Sets an upper limit on parsed fields.
1567
1568
csv.field_size_limit([limit])
1569
1570
Returns old limit. If limit is not given, no new limit is set and
1571
the old limit is returned
1572
[clinic start generated code]*/
1573
1574
static PyObject *
1575
_csv_field_size_limit_impl(PyObject *module, PyObject *new_limit)
1576
/*[clinic end generated code: output=f2799ecd908e250b input=cec70e9226406435]*/
1577
{
1578
_csvstate *module_state = get_csv_state(module);
1579
long old_limit = module_state->field_limit;
1580
if (new_limit != NULL) {
1581
if (!PyLong_CheckExact(new_limit)) {
1582
PyErr_Format(PyExc_TypeError,
1583
"limit must be an integer");
1584
return NULL;
1585
}
1586
module_state->field_limit = PyLong_AsLong(new_limit);
1587
if (module_state->field_limit == -1 && PyErr_Occurred()) {
1588
module_state->field_limit = old_limit;
1589
return NULL;
1590
}
1591
}
1592
return PyLong_FromLong(old_limit);
1593
}
1594
1595
static PyType_Slot error_slots[] = {
1596
{0, NULL},
1597
};
1598
1599
PyType_Spec error_spec = {
1600
.name = "_csv.Error",
1601
.flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
1602
.slots = error_slots,
1603
};
1604
1605
/*
1606
* MODULE
1607
*/
1608
1609
PyDoc_STRVAR(csv_module_doc,
1610
"CSV parsing and writing.\n"
1611
"\n"
1612
"This module provides classes that assist in the reading and writing\n"
1613
"of Comma Separated Value (CSV) files, and implements the interface\n"
1614
"described by PEP 305. Although many CSV files are simple to parse,\n"
1615
"the format is not formally defined by a stable specification and\n"
1616
"is subtle enough that parsing lines of a CSV file with something\n"
1617
"like line.split(\",\") is bound to fail. The module supports three\n"
1618
"basic APIs: reading, writing, and registration of dialects.\n"
1619
"\n"
1620
"\n"
1621
"DIALECT REGISTRATION:\n"
1622
"\n"
1623
"Readers and writers support a dialect argument, which is a convenient\n"
1624
"handle on a group of settings. When the dialect argument is a string,\n"
1625
"it identifies one of the dialects previously registered with the module.\n"
1626
"If it is a class or instance, the attributes of the argument are used as\n"
1627
"the settings for the reader or writer:\n"
1628
"\n"
1629
" class excel:\n"
1630
" delimiter = ','\n"
1631
" quotechar = '\"'\n"
1632
" escapechar = None\n"
1633
" doublequote = True\n"
1634
" skipinitialspace = False\n"
1635
" lineterminator = '\\r\\n'\n"
1636
" quoting = QUOTE_MINIMAL\n"
1637
"\n"
1638
"SETTINGS:\n"
1639
"\n"
1640
" * quotechar - specifies a one-character string to use as the\n"
1641
" quoting character. It defaults to '\"'.\n"
1642
" * delimiter - specifies a one-character string to use as the\n"
1643
" field separator. It defaults to ','.\n"
1644
" * skipinitialspace - specifies how to interpret spaces which\n"
1645
" immediately follow a delimiter. It defaults to False, which\n"
1646
" means that spaces immediately following a delimiter is part\n"
1647
" of the following field.\n"
1648
" * lineterminator - specifies the character sequence which should\n"
1649
" terminate rows.\n"
1650
" * quoting - controls when quotes should be generated by the writer.\n"
1651
" It can take on any of the following module constants:\n"
1652
"\n"
1653
" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1654
" field contains either the quotechar or the delimiter\n"
1655
" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1656
" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
1657
" fields which do not parse as integers or floating point\n"
1658
" numbers.\n"
1659
" csv.QUOTE_STRINGS means that quotes are always placed around\n"
1660
" fields which are strings. Note that the Python value None\n"
1661
" is not a string.\n"
1662
" csv.QUOTE_NOTNULL means that quotes are only placed around fields\n"
1663
" that are not the Python value None.\n"
1664
" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1665
" * escapechar - specifies a one-character string used to escape\n"
1666
" the delimiter when quoting is set to QUOTE_NONE.\n"
1667
" * doublequote - controls the handling of quotes inside fields. When\n"
1668
" True, two consecutive quotes are interpreted as one during read,\n"
1669
" and when writing, each quote character embedded in the data is\n"
1670
" written as two quotes\n");
1671
1672
PyDoc_STRVAR(csv_reader_doc,
1673
" csv_reader = reader(iterable [, dialect='excel']\n"
1674
" [optional keyword args])\n"
1675
" for row in csv_reader:\n"
1676
" process(row)\n"
1677
"\n"
1678
"The \"iterable\" argument can be any object that returns a line\n"
1679
"of input for each iteration, such as a file object or a list. The\n"
1680
"optional \"dialect\" parameter is discussed below. The function\n"
1681
"also accepts optional keyword arguments which override settings\n"
1682
"provided by the dialect.\n"
1683
"\n"
1684
"The returned object is an iterator. Each iteration returns a row\n"
1685
"of the CSV file (which can span multiple input lines).\n");
1686
1687
PyDoc_STRVAR(csv_writer_doc,
1688
" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1689
" [optional keyword args])\n"
1690
" for row in sequence:\n"
1691
" csv_writer.writerow(row)\n"
1692
"\n"
1693
" [or]\n"
1694
"\n"
1695
" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1696
" [optional keyword args])\n"
1697
" csv_writer.writerows(rows)\n"
1698
"\n"
1699
"The \"fileobj\" argument can be any object that supports the file API.\n");
1700
1701
PyDoc_STRVAR(csv_register_dialect_doc,
1702
"Create a mapping from a string name to a dialect class.\n"
1703
" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
1704
1705
static struct PyMethodDef csv_methods[] = {
1706
{ "reader", _PyCFunction_CAST(csv_reader),
1707
METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1708
{ "writer", _PyCFunction_CAST(csv_writer),
1709
METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1710
{ "register_dialect", _PyCFunction_CAST(csv_register_dialect),
1711
METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1712
_CSV_LIST_DIALECTS_METHODDEF
1713
_CSV_UNREGISTER_DIALECT_METHODDEF
1714
_CSV_GET_DIALECT_METHODDEF
1715
_CSV_FIELD_SIZE_LIMIT_METHODDEF
1716
{ NULL, NULL }
1717
};
1718
1719
static int
1720
csv_exec(PyObject *module) {
1721
const StyleDesc *style;
1722
PyObject *temp;
1723
_csvstate *module_state = get_csv_state(module);
1724
1725
temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1726
module_state->dialect_type = (PyTypeObject *)temp;
1727
if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1728
return -1;
1729
}
1730
1731
temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1732
module_state->reader_type = (PyTypeObject *)temp;
1733
if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1734
return -1;
1735
}
1736
1737
temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1738
module_state->writer_type = (PyTypeObject *)temp;
1739
if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1740
return -1;
1741
}
1742
1743
/* Add version to the module. */
1744
if (PyModule_AddStringConstant(module, "__version__",
1745
MODULE_VERSION) == -1) {
1746
return -1;
1747
}
1748
1749
/* Set the field limit */
1750
module_state->field_limit = 128 * 1024;
1751
1752
/* Add _dialects dictionary */
1753
module_state->dialects = PyDict_New();
1754
if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1755
return -1;
1756
}
1757
1758
/* Add quote styles into dictionary */
1759
for (style = quote_styles; style->name; style++) {
1760
if (PyModule_AddIntConstant(module, style->name,
1761
style->style) == -1)
1762
return -1;
1763
}
1764
1765
/* Add the CSV exception object to the module. */
1766
PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1767
if (bases == NULL) {
1768
return -1;
1769
}
1770
module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1771
bases);
1772
Py_DECREF(bases);
1773
if (module_state->error_obj == NULL) {
1774
return -1;
1775
}
1776
if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1777
return -1;
1778
}
1779
1780
module_state->str_write = PyUnicode_InternFromString("write");
1781
if (module_state->str_write == NULL) {
1782
return -1;
1783
}
1784
return 0;
1785
}
1786
1787
static PyModuleDef_Slot csv_slots[] = {
1788
{Py_mod_exec, csv_exec},
1789
{Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1790
{0, NULL}
1791
};
1792
1793
static struct PyModuleDef _csvmodule = {
1794
PyModuleDef_HEAD_INIT,
1795
"_csv",
1796
csv_module_doc,
1797
sizeof(_csvstate),
1798
csv_methods,
1799
csv_slots,
1800
_csv_traverse,
1801
_csv_clear,
1802
_csv_free
1803
};
1804
1805
PyMODINIT_FUNC
1806
PyInit__csv(void)
1807
{
1808
return PyModuleDef_Init(&_csvmodule);
1809
}
1810
1811