Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Modules/_json.c
12 views
1
/* JSON accelerator C extensor: _json module.
2
*
3
* It is built as a built-in module (Py_BUILD_CORE_BUILTIN define) on Windows
4
* and as an extension module (Py_BUILD_CORE_MODULE define) on other
5
* platforms. */
6
7
#ifndef Py_BUILD_CORE_BUILTIN
8
# define Py_BUILD_CORE_MODULE 1
9
#endif
10
11
#include "Python.h"
12
#include "pycore_ceval.h" // _Py_EnterRecursiveCall()
13
#include "pycore_runtime.h" // _PyRuntime
14
#include "structmember.h" // PyMemberDef
15
#include "pycore_global_objects.h" // _Py_ID()
16
#include <stdbool.h> // bool
17
18
19
typedef struct _PyScannerObject {
20
PyObject_HEAD
21
signed char strict;
22
PyObject *object_hook;
23
PyObject *object_pairs_hook;
24
PyObject *parse_float;
25
PyObject *parse_int;
26
PyObject *parse_constant;
27
PyObject *memo;
28
} PyScannerObject;
29
30
static PyMemberDef scanner_members[] = {
31
{"strict", T_BOOL, offsetof(PyScannerObject, strict), READONLY, "strict"},
32
{"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
33
{"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, object_pairs_hook), READONLY},
34
{"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
35
{"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
36
{"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
37
{NULL}
38
};
39
40
typedef struct _PyEncoderObject {
41
PyObject_HEAD
42
PyObject *markers;
43
PyObject *defaultfn;
44
PyObject *encoder;
45
PyObject *indent;
46
PyObject *key_separator;
47
PyObject *item_separator;
48
char sort_keys;
49
char skipkeys;
50
int allow_nan;
51
PyCFunction fast_encode;
52
} PyEncoderObject;
53
54
static PyMemberDef encoder_members[] = {
55
{"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
56
{"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
57
{"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
58
{"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
59
{"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
60
{"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
61
{"sort_keys", T_BOOL, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
62
{"skipkeys", T_BOOL, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
63
{NULL}
64
};
65
66
/* Forward decls */
67
68
static PyObject *
69
ascii_escape_unicode(PyObject *pystr);
70
static PyObject *
71
py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr);
72
static PyObject *
73
scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
74
static PyObject *
75
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
76
static PyObject *
77
scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
78
static void
79
scanner_dealloc(PyObject *self);
80
static int
81
scanner_clear(PyScannerObject *self);
82
static PyObject *
83
encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
84
static void
85
encoder_dealloc(PyObject *self);
86
static int
87
encoder_clear(PyEncoderObject *self);
88
static int
89
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *seq, Py_ssize_t indent_level);
90
static int
91
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *obj, Py_ssize_t indent_level);
92
static int
93
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer, PyObject *dct, Py_ssize_t indent_level);
94
static PyObject *
95
_encoded_const(PyObject *obj);
96
static void
97
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end);
98
static PyObject *
99
encoder_encode_string(PyEncoderObject *s, PyObject *obj);
100
static PyObject *
101
encoder_encode_float(PyEncoderObject *s, PyObject *obj);
102
103
#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
104
#define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
105
106
static Py_ssize_t
107
ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
108
{
109
/* Escape unicode code point c to ASCII escape sequences
110
in char *output. output must have at least 12 bytes unused to
111
accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
112
output[chars++] = '\\';
113
switch (c) {
114
case '\\': output[chars++] = c; break;
115
case '"': output[chars++] = c; break;
116
case '\b': output[chars++] = 'b'; break;
117
case '\f': output[chars++] = 'f'; break;
118
case '\n': output[chars++] = 'n'; break;
119
case '\r': output[chars++] = 'r'; break;
120
case '\t': output[chars++] = 't'; break;
121
default:
122
if (c >= 0x10000) {
123
/* UTF-16 surrogate pair */
124
Py_UCS4 v = Py_UNICODE_HIGH_SURROGATE(c);
125
output[chars++] = 'u';
126
output[chars++] = Py_hexdigits[(v >> 12) & 0xf];
127
output[chars++] = Py_hexdigits[(v >> 8) & 0xf];
128
output[chars++] = Py_hexdigits[(v >> 4) & 0xf];
129
output[chars++] = Py_hexdigits[(v ) & 0xf];
130
c = Py_UNICODE_LOW_SURROGATE(c);
131
output[chars++] = '\\';
132
}
133
output[chars++] = 'u';
134
output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
135
output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
136
output[chars++] = Py_hexdigits[(c >> 4) & 0xf];
137
output[chars++] = Py_hexdigits[(c ) & 0xf];
138
}
139
return chars;
140
}
141
142
static PyObject *
143
ascii_escape_unicode(PyObject *pystr)
144
{
145
/* Take a PyUnicode pystr and return a new ASCII-only escaped PyUnicode */
146
Py_ssize_t i;
147
Py_ssize_t input_chars;
148
Py_ssize_t output_size;
149
Py_ssize_t chars;
150
PyObject *rval;
151
const void *input;
152
Py_UCS1 *output;
153
int kind;
154
155
input_chars = PyUnicode_GET_LENGTH(pystr);
156
input = PyUnicode_DATA(pystr);
157
kind = PyUnicode_KIND(pystr);
158
159
/* Compute the output size */
160
for (i = 0, output_size = 2; i < input_chars; i++) {
161
Py_UCS4 c = PyUnicode_READ(kind, input, i);
162
Py_ssize_t d;
163
if (S_CHAR(c)) {
164
d = 1;
165
}
166
else {
167
switch(c) {
168
case '\\': case '"': case '\b': case '\f':
169
case '\n': case '\r': case '\t':
170
d = 2; break;
171
default:
172
d = c >= 0x10000 ? 12 : 6;
173
}
174
}
175
if (output_size > PY_SSIZE_T_MAX - d) {
176
PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
177
return NULL;
178
}
179
output_size += d;
180
}
181
182
rval = PyUnicode_New(output_size, 127);
183
if (rval == NULL) {
184
return NULL;
185
}
186
output = PyUnicode_1BYTE_DATA(rval);
187
chars = 0;
188
output[chars++] = '"';
189
for (i = 0; i < input_chars; i++) {
190
Py_UCS4 c = PyUnicode_READ(kind, input, i);
191
if (S_CHAR(c)) {
192
output[chars++] = c;
193
}
194
else {
195
chars = ascii_escape_unichar(c, output, chars);
196
}
197
}
198
output[chars++] = '"';
199
#ifdef Py_DEBUG
200
assert(_PyUnicode_CheckConsistency(rval, 1));
201
#endif
202
return rval;
203
}
204
205
static PyObject *
206
escape_unicode(PyObject *pystr)
207
{
208
/* Take a PyUnicode pystr and return a new escaped PyUnicode */
209
Py_ssize_t i;
210
Py_ssize_t input_chars;
211
Py_ssize_t output_size;
212
Py_ssize_t chars;
213
PyObject *rval;
214
const void *input;
215
int kind;
216
Py_UCS4 maxchar;
217
218
maxchar = PyUnicode_MAX_CHAR_VALUE(pystr);
219
input_chars = PyUnicode_GET_LENGTH(pystr);
220
input = PyUnicode_DATA(pystr);
221
kind = PyUnicode_KIND(pystr);
222
223
/* Compute the output size */
224
for (i = 0, output_size = 2; i < input_chars; i++) {
225
Py_UCS4 c = PyUnicode_READ(kind, input, i);
226
Py_ssize_t d;
227
switch (c) {
228
case '\\': case '"': case '\b': case '\f':
229
case '\n': case '\r': case '\t':
230
d = 2;
231
break;
232
default:
233
if (c <= 0x1f)
234
d = 6;
235
else
236
d = 1;
237
}
238
if (output_size > PY_SSIZE_T_MAX - d) {
239
PyErr_SetString(PyExc_OverflowError, "string is too long to escape");
240
return NULL;
241
}
242
output_size += d;
243
}
244
245
rval = PyUnicode_New(output_size, maxchar);
246
if (rval == NULL)
247
return NULL;
248
249
kind = PyUnicode_KIND(rval);
250
251
#define ENCODE_OUTPUT do { \
252
chars = 0; \
253
output[chars++] = '"'; \
254
for (i = 0; i < input_chars; i++) { \
255
Py_UCS4 c = PyUnicode_READ(kind, input, i); \
256
switch (c) { \
257
case '\\': output[chars++] = '\\'; output[chars++] = c; break; \
258
case '"': output[chars++] = '\\'; output[chars++] = c; break; \
259
case '\b': output[chars++] = '\\'; output[chars++] = 'b'; break; \
260
case '\f': output[chars++] = '\\'; output[chars++] = 'f'; break; \
261
case '\n': output[chars++] = '\\'; output[chars++] = 'n'; break; \
262
case '\r': output[chars++] = '\\'; output[chars++] = 'r'; break; \
263
case '\t': output[chars++] = '\\'; output[chars++] = 't'; break; \
264
default: \
265
if (c <= 0x1f) { \
266
output[chars++] = '\\'; \
267
output[chars++] = 'u'; \
268
output[chars++] = '0'; \
269
output[chars++] = '0'; \
270
output[chars++] = Py_hexdigits[(c >> 4) & 0xf]; \
271
output[chars++] = Py_hexdigits[(c ) & 0xf]; \
272
} else { \
273
output[chars++] = c; \
274
} \
275
} \
276
} \
277
output[chars++] = '"'; \
278
} while (0)
279
280
if (kind == PyUnicode_1BYTE_KIND) {
281
Py_UCS1 *output = PyUnicode_1BYTE_DATA(rval);
282
ENCODE_OUTPUT;
283
} else if (kind == PyUnicode_2BYTE_KIND) {
284
Py_UCS2 *output = PyUnicode_2BYTE_DATA(rval);
285
ENCODE_OUTPUT;
286
} else {
287
Py_UCS4 *output = PyUnicode_4BYTE_DATA(rval);
288
assert(kind == PyUnicode_4BYTE_KIND);
289
ENCODE_OUTPUT;
290
}
291
#undef ENCODE_OUTPUT
292
293
#ifdef Py_DEBUG
294
assert(_PyUnicode_CheckConsistency(rval, 1));
295
#endif
296
return rval;
297
}
298
299
static void
300
raise_errmsg(const char *msg, PyObject *s, Py_ssize_t end)
301
{
302
/* Use JSONDecodeError exception to raise a nice looking ValueError subclass */
303
_Py_DECLARE_STR(json_decoder, "json.decoder");
304
PyObject *JSONDecodeError =
305
_PyImport_GetModuleAttr(&_Py_STR(json_decoder), &_Py_ID(JSONDecodeError));
306
if (JSONDecodeError == NULL) {
307
return;
308
}
309
310
PyObject *exc;
311
exc = PyObject_CallFunction(JSONDecodeError, "zOn", msg, s, end);
312
Py_DECREF(JSONDecodeError);
313
if (exc) {
314
PyErr_SetObject(JSONDecodeError, exc);
315
Py_DECREF(exc);
316
}
317
}
318
319
static void
320
raise_stop_iteration(Py_ssize_t idx)
321
{
322
PyObject *value = PyLong_FromSsize_t(idx);
323
if (value != NULL) {
324
PyErr_SetObject(PyExc_StopIteration, value);
325
Py_DECREF(value);
326
}
327
}
328
329
static PyObject *
330
_build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
331
/* return (rval, idx) tuple, stealing reference to rval */
332
PyObject *tpl;
333
PyObject *pyidx;
334
/*
335
steal a reference to rval, returns (rval, idx)
336
*/
337
if (rval == NULL) {
338
return NULL;
339
}
340
pyidx = PyLong_FromSsize_t(idx);
341
if (pyidx == NULL) {
342
Py_DECREF(rval);
343
return NULL;
344
}
345
tpl = PyTuple_New(2);
346
if (tpl == NULL) {
347
Py_DECREF(pyidx);
348
Py_DECREF(rval);
349
return NULL;
350
}
351
PyTuple_SET_ITEM(tpl, 0, rval);
352
PyTuple_SET_ITEM(tpl, 1, pyidx);
353
return tpl;
354
}
355
356
static PyObject *
357
scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
358
{
359
/* Read the JSON string from PyUnicode pystr.
360
end is the index of the first character after the quote.
361
if strict is zero then literal control characters are allowed
362
*next_end_ptr is a return-by-reference index of the character
363
after the end quote
364
365
Return value is a new PyUnicode
366
*/
367
PyObject *rval = NULL;
368
Py_ssize_t len;
369
Py_ssize_t begin = end - 1;
370
Py_ssize_t next /* = begin */;
371
const void *buf;
372
int kind;
373
374
_PyUnicodeWriter writer;
375
_PyUnicodeWriter_Init(&writer);
376
writer.overallocate = 1;
377
378
len = PyUnicode_GET_LENGTH(pystr);
379
buf = PyUnicode_DATA(pystr);
380
kind = PyUnicode_KIND(pystr);
381
382
if (end < 0 || len < end) {
383
PyErr_SetString(PyExc_ValueError, "end is out of bounds");
384
goto bail;
385
}
386
while (1) {
387
/* Find the end of the string or the next escape */
388
Py_UCS4 c;
389
{
390
// Use tight scope variable to help register allocation.
391
Py_UCS4 d = 0;
392
for (next = end; next < len; next++) {
393
d = PyUnicode_READ(kind, buf, next);
394
if (d == '"' || d == '\\') {
395
break;
396
}
397
if (d <= 0x1f && strict) {
398
raise_errmsg("Invalid control character at", pystr, next);
399
goto bail;
400
}
401
}
402
c = d;
403
}
404
405
if (c == '"') {
406
// Fast path for simple case.
407
if (writer.buffer == NULL) {
408
PyObject *ret = PyUnicode_Substring(pystr, end, next);
409
if (ret == NULL) {
410
goto bail;
411
}
412
*next_end_ptr = next + 1;;
413
return ret;
414
}
415
}
416
else if (c != '\\') {
417
raise_errmsg("Unterminated string starting at", pystr, begin);
418
goto bail;
419
}
420
421
/* Pick up this chunk if it's not zero length */
422
if (next != end) {
423
if (_PyUnicodeWriter_WriteSubstring(&writer, pystr, end, next) < 0) {
424
goto bail;
425
}
426
}
427
next++;
428
if (c == '"') {
429
end = next;
430
break;
431
}
432
if (next == len) {
433
raise_errmsg("Unterminated string starting at", pystr, begin);
434
goto bail;
435
}
436
c = PyUnicode_READ(kind, buf, next);
437
if (c != 'u') {
438
/* Non-unicode backslash escapes */
439
end = next + 1;
440
switch (c) {
441
case '"': break;
442
case '\\': break;
443
case '/': break;
444
case 'b': c = '\b'; break;
445
case 'f': c = '\f'; break;
446
case 'n': c = '\n'; break;
447
case 'r': c = '\r'; break;
448
case 't': c = '\t'; break;
449
default: c = 0;
450
}
451
if (c == 0) {
452
raise_errmsg("Invalid \\escape", pystr, end - 2);
453
goto bail;
454
}
455
}
456
else {
457
c = 0;
458
next++;
459
end = next + 4;
460
if (end >= len) {
461
raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
462
goto bail;
463
}
464
/* Decode 4 hex digits */
465
for (; next < end; next++) {
466
Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
467
c <<= 4;
468
switch (digit) {
469
case '0': case '1': case '2': case '3': case '4':
470
case '5': case '6': case '7': case '8': case '9':
471
c |= (digit - '0'); break;
472
case 'a': case 'b': case 'c': case 'd': case 'e':
473
case 'f':
474
c |= (digit - 'a' + 10); break;
475
case 'A': case 'B': case 'C': case 'D': case 'E':
476
case 'F':
477
c |= (digit - 'A' + 10); break;
478
default:
479
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
480
goto bail;
481
}
482
}
483
/* Surrogate pair */
484
if (Py_UNICODE_IS_HIGH_SURROGATE(c) && end + 6 < len &&
485
PyUnicode_READ(kind, buf, next++) == '\\' &&
486
PyUnicode_READ(kind, buf, next++) == 'u') {
487
Py_UCS4 c2 = 0;
488
end += 6;
489
/* Decode 4 hex digits */
490
for (; next < end; next++) {
491
Py_UCS4 digit = PyUnicode_READ(kind, buf, next);
492
c2 <<= 4;
493
switch (digit) {
494
case '0': case '1': case '2': case '3': case '4':
495
case '5': case '6': case '7': case '8': case '9':
496
c2 |= (digit - '0'); break;
497
case 'a': case 'b': case 'c': case 'd': case 'e':
498
case 'f':
499
c2 |= (digit - 'a' + 10); break;
500
case 'A': case 'B': case 'C': case 'D': case 'E':
501
case 'F':
502
c2 |= (digit - 'A' + 10); break;
503
default:
504
raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
505
goto bail;
506
}
507
}
508
if (Py_UNICODE_IS_LOW_SURROGATE(c2))
509
c = Py_UNICODE_JOIN_SURROGATES(c, c2);
510
else
511
end -= 6;
512
}
513
}
514
if (_PyUnicodeWriter_WriteChar(&writer, c) < 0) {
515
goto bail;
516
}
517
}
518
519
rval = _PyUnicodeWriter_Finish(&writer);
520
*next_end_ptr = end;
521
return rval;
522
523
bail:
524
*next_end_ptr = -1;
525
_PyUnicodeWriter_Dealloc(&writer);
526
return NULL;
527
}
528
529
PyDoc_STRVAR(pydoc_scanstring,
530
"scanstring(string, end, strict=True) -> (string, end)\n"
531
"\n"
532
"Scan the string s for a JSON string. End is the index of the\n"
533
"character in s after the quote that started the JSON string.\n"
534
"Unescapes all valid JSON string escape sequences and raises ValueError\n"
535
"on attempt to decode an invalid string. If strict is False then literal\n"
536
"control characters are allowed in the string.\n"
537
"\n"
538
"Returns a tuple of the decoded string and the index of the character in s\n"
539
"after the end quote."
540
);
541
542
static PyObject *
543
py_scanstring(PyObject* Py_UNUSED(self), PyObject *args)
544
{
545
PyObject *pystr;
546
PyObject *rval;
547
Py_ssize_t end;
548
Py_ssize_t next_end = -1;
549
int strict = 1;
550
if (!PyArg_ParseTuple(args, "On|p:scanstring", &pystr, &end, &strict)) {
551
return NULL;
552
}
553
if (PyUnicode_Check(pystr)) {
554
rval = scanstring_unicode(pystr, end, strict, &next_end);
555
}
556
else {
557
PyErr_Format(PyExc_TypeError,
558
"first argument must be a string, not %.80s",
559
Py_TYPE(pystr)->tp_name);
560
return NULL;
561
}
562
return _build_rval_index_tuple(rval, next_end);
563
}
564
565
PyDoc_STRVAR(pydoc_encode_basestring_ascii,
566
"encode_basestring_ascii(string) -> string\n"
567
"\n"
568
"Return an ASCII-only JSON representation of a Python string"
569
);
570
571
static PyObject *
572
py_encode_basestring_ascii(PyObject* Py_UNUSED(self), PyObject *pystr)
573
{
574
PyObject *rval;
575
/* Return an ASCII-only JSON representation of a Python string */
576
/* METH_O */
577
if (PyUnicode_Check(pystr)) {
578
rval = ascii_escape_unicode(pystr);
579
}
580
else {
581
PyErr_Format(PyExc_TypeError,
582
"first argument must be a string, not %.80s",
583
Py_TYPE(pystr)->tp_name);
584
return NULL;
585
}
586
return rval;
587
}
588
589
590
PyDoc_STRVAR(pydoc_encode_basestring,
591
"encode_basestring(string) -> string\n"
592
"\n"
593
"Return a JSON representation of a Python string"
594
);
595
596
static PyObject *
597
py_encode_basestring(PyObject* Py_UNUSED(self), PyObject *pystr)
598
{
599
PyObject *rval;
600
/* Return a JSON representation of a Python string */
601
/* METH_O */
602
if (PyUnicode_Check(pystr)) {
603
rval = escape_unicode(pystr);
604
}
605
else {
606
PyErr_Format(PyExc_TypeError,
607
"first argument must be a string, not %.80s",
608
Py_TYPE(pystr)->tp_name);
609
return NULL;
610
}
611
return rval;
612
}
613
614
static void
615
scanner_dealloc(PyObject *self)
616
{
617
PyTypeObject *tp = Py_TYPE(self);
618
/* bpo-31095: UnTrack is needed before calling any callbacks */
619
PyObject_GC_UnTrack(self);
620
scanner_clear((PyScannerObject *)self);
621
tp->tp_free(self);
622
Py_DECREF(tp);
623
}
624
625
static int
626
scanner_traverse(PyScannerObject *self, visitproc visit, void *arg)
627
{
628
Py_VISIT(Py_TYPE(self));
629
Py_VISIT(self->object_hook);
630
Py_VISIT(self->object_pairs_hook);
631
Py_VISIT(self->parse_float);
632
Py_VISIT(self->parse_int);
633
Py_VISIT(self->parse_constant);
634
Py_VISIT(self->memo);
635
return 0;
636
}
637
638
static int
639
scanner_clear(PyScannerObject *self)
640
{
641
Py_CLEAR(self->object_hook);
642
Py_CLEAR(self->object_pairs_hook);
643
Py_CLEAR(self->parse_float);
644
Py_CLEAR(self->parse_int);
645
Py_CLEAR(self->parse_constant);
646
Py_CLEAR(self->memo);
647
return 0;
648
}
649
650
static PyObject *
651
_parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
652
{
653
/* Read a JSON object from PyUnicode pystr.
654
idx is the index of the first character after the opening curly brace.
655
*next_idx_ptr is a return-by-reference index to the first character after
656
the closing curly brace.
657
658
Returns a new PyObject (usually a dict, but object_hook can change that)
659
*/
660
const void *str;
661
int kind;
662
Py_ssize_t end_idx;
663
PyObject *val = NULL;
664
PyObject *rval = NULL;
665
PyObject *key = NULL;
666
int has_pairs_hook = (s->object_pairs_hook != Py_None);
667
Py_ssize_t next_idx;
668
669
str = PyUnicode_DATA(pystr);
670
kind = PyUnicode_KIND(pystr);
671
end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
672
673
if (has_pairs_hook)
674
rval = PyList_New(0);
675
else
676
rval = PyDict_New();
677
if (rval == NULL)
678
return NULL;
679
680
/* skip whitespace after { */
681
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
682
683
/* only loop if the object is non-empty */
684
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
685
while (1) {
686
PyObject *memokey;
687
688
/* read key */
689
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
690
raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
691
goto bail;
692
}
693
key = scanstring_unicode(pystr, idx + 1, s->strict, &next_idx);
694
if (key == NULL)
695
goto bail;
696
memokey = PyDict_SetDefault(s->memo, key, key);
697
if (memokey == NULL) {
698
goto bail;
699
}
700
Py_SETREF(key, Py_NewRef(memokey));
701
idx = next_idx;
702
703
/* skip whitespace between key and : delimiter, read :, skip whitespace */
704
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
705
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ':') {
706
raise_errmsg("Expecting ':' delimiter", pystr, idx);
707
goto bail;
708
}
709
idx++;
710
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
711
712
/* read any JSON term */
713
val = scan_once_unicode(s, pystr, idx, &next_idx);
714
if (val == NULL)
715
goto bail;
716
717
if (has_pairs_hook) {
718
PyObject *item = PyTuple_Pack(2, key, val);
719
if (item == NULL)
720
goto bail;
721
Py_CLEAR(key);
722
Py_CLEAR(val);
723
if (PyList_Append(rval, item) == -1) {
724
Py_DECREF(item);
725
goto bail;
726
}
727
Py_DECREF(item);
728
}
729
else {
730
if (PyDict_SetItem(rval, key, val) < 0)
731
goto bail;
732
Py_CLEAR(key);
733
Py_CLEAR(val);
734
}
735
idx = next_idx;
736
737
/* skip whitespace before } or , */
738
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
739
740
/* bail if the object is closed or we didn't get the , delimiter */
741
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
742
break;
743
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
744
raise_errmsg("Expecting ',' delimiter", pystr, idx);
745
goto bail;
746
}
747
idx++;
748
749
/* skip whitespace after , delimiter */
750
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
751
}
752
}
753
754
*next_idx_ptr = idx + 1;
755
756
if (has_pairs_hook) {
757
val = PyObject_CallOneArg(s->object_pairs_hook, rval);
758
Py_DECREF(rval);
759
return val;
760
}
761
762
/* if object_hook is not None: rval = object_hook(rval) */
763
if (s->object_hook != Py_None) {
764
val = PyObject_CallOneArg(s->object_hook, rval);
765
Py_DECREF(rval);
766
return val;
767
}
768
return rval;
769
bail:
770
Py_XDECREF(key);
771
Py_XDECREF(val);
772
Py_XDECREF(rval);
773
return NULL;
774
}
775
776
static PyObject *
777
_parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
778
/* Read a JSON array from PyUnicode pystr.
779
idx is the index of the first character after the opening brace.
780
*next_idx_ptr is a return-by-reference index to the first character after
781
the closing brace.
782
783
Returns a new PyList
784
*/
785
const void *str;
786
int kind;
787
Py_ssize_t end_idx;
788
PyObject *val = NULL;
789
PyObject *rval;
790
Py_ssize_t next_idx;
791
792
rval = PyList_New(0);
793
if (rval == NULL)
794
return NULL;
795
796
str = PyUnicode_DATA(pystr);
797
kind = PyUnicode_KIND(pystr);
798
end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
799
800
/* skip whitespace after [ */
801
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
802
803
/* only loop if the array is non-empty */
804
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
805
while (1) {
806
807
/* read any JSON term */
808
val = scan_once_unicode(s, pystr, idx, &next_idx);
809
if (val == NULL)
810
goto bail;
811
812
if (PyList_Append(rval, val) == -1)
813
goto bail;
814
815
Py_CLEAR(val);
816
idx = next_idx;
817
818
/* skip whitespace between term and , */
819
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
820
821
/* bail if the array is closed or we didn't get the , delimiter */
822
if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
823
break;
824
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
825
raise_errmsg("Expecting ',' delimiter", pystr, idx);
826
goto bail;
827
}
828
idx++;
829
830
/* skip whitespace after , */
831
while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
832
}
833
}
834
835
/* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
836
if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
837
raise_errmsg("Expecting value", pystr, end_idx);
838
goto bail;
839
}
840
*next_idx_ptr = idx + 1;
841
return rval;
842
bail:
843
Py_XDECREF(val);
844
Py_DECREF(rval);
845
return NULL;
846
}
847
848
static PyObject *
849
_parse_constant(PyScannerObject *s, const char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
850
/* Read a JSON constant.
851
constant is the constant string that was found
852
("NaN", "Infinity", "-Infinity").
853
idx is the index of the first character of the constant
854
*next_idx_ptr is a return-by-reference index to the first character after
855
the constant.
856
857
Returns the result of parse_constant
858
*/
859
PyObject *cstr;
860
PyObject *rval;
861
/* constant is "NaN", "Infinity", or "-Infinity" */
862
cstr = PyUnicode_InternFromString(constant);
863
if (cstr == NULL)
864
return NULL;
865
866
/* rval = parse_constant(constant) */
867
rval = PyObject_CallOneArg(s->parse_constant, cstr);
868
idx += PyUnicode_GET_LENGTH(cstr);
869
Py_DECREF(cstr);
870
*next_idx_ptr = idx;
871
return rval;
872
}
873
874
static PyObject *
875
_match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
876
/* Read a JSON number from PyUnicode pystr.
877
idx is the index of the first character of the number
878
*next_idx_ptr is a return-by-reference index to the first character after
879
the number.
880
881
Returns a new PyObject representation of that number:
882
PyLong, or PyFloat.
883
May return other types if parse_int or parse_float are set
884
*/
885
const void *str;
886
int kind;
887
Py_ssize_t end_idx;
888
Py_ssize_t idx = start;
889
int is_float = 0;
890
PyObject *rval;
891
PyObject *numstr = NULL;
892
PyObject *custom_func;
893
894
str = PyUnicode_DATA(pystr);
895
kind = PyUnicode_KIND(pystr);
896
end_idx = PyUnicode_GET_LENGTH(pystr) - 1;
897
898
/* read a sign if it's there, make sure it's not the end of the string */
899
if (PyUnicode_READ(kind, str, idx) == '-') {
900
idx++;
901
if (idx > end_idx) {
902
raise_stop_iteration(start);
903
return NULL;
904
}
905
}
906
907
/* read as many integer digits as we find as long as it doesn't start with 0 */
908
if (PyUnicode_READ(kind, str, idx) >= '1' && PyUnicode_READ(kind, str, idx) <= '9') {
909
idx++;
910
while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
911
}
912
/* if it starts with 0 we only expect one integer digit */
913
else if (PyUnicode_READ(kind, str, idx) == '0') {
914
idx++;
915
}
916
/* no integer digits, error */
917
else {
918
raise_stop_iteration(start);
919
return NULL;
920
}
921
922
/* if the next char is '.' followed by a digit then read all float digits */
923
if (idx < end_idx && PyUnicode_READ(kind, str, idx) == '.' && PyUnicode_READ(kind, str, idx + 1) >= '0' && PyUnicode_READ(kind, str, idx + 1) <= '9') {
924
is_float = 1;
925
idx += 2;
926
while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
927
}
928
929
/* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
930
if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == 'e' || PyUnicode_READ(kind, str, idx) == 'E')) {
931
Py_ssize_t e_start = idx;
932
idx++;
933
934
/* read an exponent sign if present */
935
if (idx < end_idx && (PyUnicode_READ(kind, str, idx) == '-' || PyUnicode_READ(kind, str, idx) == '+')) idx++;
936
937
/* read all digits */
938
while (idx <= end_idx && PyUnicode_READ(kind, str, idx) >= '0' && PyUnicode_READ(kind, str, idx) <= '9') idx++;
939
940
/* if we got a digit, then parse as float. if not, backtrack */
941
if (PyUnicode_READ(kind, str, idx - 1) >= '0' && PyUnicode_READ(kind, str, idx - 1) <= '9') {
942
is_float = 1;
943
}
944
else {
945
idx = e_start;
946
}
947
}
948
949
if (is_float && s->parse_float != (PyObject *)&PyFloat_Type)
950
custom_func = s->parse_float;
951
else if (!is_float && s->parse_int != (PyObject *) &PyLong_Type)
952
custom_func = s->parse_int;
953
else
954
custom_func = NULL;
955
956
if (custom_func) {
957
/* copy the section we determined to be a number */
958
numstr = PyUnicode_FromKindAndData(kind,
959
(char*)str + kind * start,
960
idx - start);
961
if (numstr == NULL)
962
return NULL;
963
rval = PyObject_CallOneArg(custom_func, numstr);
964
}
965
else {
966
Py_ssize_t i, n;
967
char *buf;
968
/* Straight conversion to ASCII, to avoid costly conversion of
969
decimal unicode digits (which cannot appear here) */
970
n = idx - start;
971
numstr = PyBytes_FromStringAndSize(NULL, n);
972
if (numstr == NULL)
973
return NULL;
974
buf = PyBytes_AS_STRING(numstr);
975
for (i = 0; i < n; i++) {
976
buf[i] = (char) PyUnicode_READ(kind, str, i + start);
977
}
978
if (is_float)
979
rval = PyFloat_FromString(numstr);
980
else
981
rval = PyLong_FromString(buf, NULL, 10);
982
}
983
Py_DECREF(numstr);
984
*next_idx_ptr = idx;
985
return rval;
986
}
987
988
static PyObject *
989
scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
990
{
991
/* Read one JSON term (of any kind) from PyUnicode pystr.
992
idx is the index of the first character of the term
993
*next_idx_ptr is a return-by-reference index to the first character after
994
the number.
995
996
Returns a new PyObject representation of the term.
997
*/
998
PyObject *res;
999
const void *str;
1000
int kind;
1001
Py_ssize_t length;
1002
1003
str = PyUnicode_DATA(pystr);
1004
kind = PyUnicode_KIND(pystr);
1005
length = PyUnicode_GET_LENGTH(pystr);
1006
1007
if (idx < 0) {
1008
PyErr_SetString(PyExc_ValueError, "idx cannot be negative");
1009
return NULL;
1010
}
1011
if (idx >= length) {
1012
raise_stop_iteration(idx);
1013
return NULL;
1014
}
1015
1016
switch (PyUnicode_READ(kind, str, idx)) {
1017
case '"':
1018
/* string */
1019
return scanstring_unicode(pystr, idx + 1, s->strict, next_idx_ptr);
1020
case '{':
1021
/* object */
1022
if (_Py_EnterRecursiveCall(" while decoding a JSON object "
1023
"from a unicode string"))
1024
return NULL;
1025
res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1026
_Py_LeaveRecursiveCall();
1027
return res;
1028
case '[':
1029
/* array */
1030
if (_Py_EnterRecursiveCall(" while decoding a JSON array "
1031
"from a unicode string"))
1032
return NULL;
1033
res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1034
_Py_LeaveRecursiveCall();
1035
return res;
1036
case 'n':
1037
/* null */
1038
if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'u' && PyUnicode_READ(kind, str, idx + 2) == 'l' && PyUnicode_READ(kind, str, idx + 3) == 'l') {
1039
*next_idx_ptr = idx + 4;
1040
Py_RETURN_NONE;
1041
}
1042
break;
1043
case 't':
1044
/* true */
1045
if ((idx + 3 < length) && PyUnicode_READ(kind, str, idx + 1) == 'r' && PyUnicode_READ(kind, str, idx + 2) == 'u' && PyUnicode_READ(kind, str, idx + 3) == 'e') {
1046
*next_idx_ptr = idx + 4;
1047
Py_RETURN_TRUE;
1048
}
1049
break;
1050
case 'f':
1051
/* false */
1052
if ((idx + 4 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1053
PyUnicode_READ(kind, str, idx + 2) == 'l' &&
1054
PyUnicode_READ(kind, str, idx + 3) == 's' &&
1055
PyUnicode_READ(kind, str, idx + 4) == 'e') {
1056
*next_idx_ptr = idx + 5;
1057
Py_RETURN_FALSE;
1058
}
1059
break;
1060
case 'N':
1061
/* NaN */
1062
if ((idx + 2 < length) && PyUnicode_READ(kind, str, idx + 1) == 'a' &&
1063
PyUnicode_READ(kind, str, idx + 2) == 'N') {
1064
return _parse_constant(s, "NaN", idx, next_idx_ptr);
1065
}
1066
break;
1067
case 'I':
1068
/* Infinity */
1069
if ((idx + 7 < length) && PyUnicode_READ(kind, str, idx + 1) == 'n' &&
1070
PyUnicode_READ(kind, str, idx + 2) == 'f' &&
1071
PyUnicode_READ(kind, str, idx + 3) == 'i' &&
1072
PyUnicode_READ(kind, str, idx + 4) == 'n' &&
1073
PyUnicode_READ(kind, str, idx + 5) == 'i' &&
1074
PyUnicode_READ(kind, str, idx + 6) == 't' &&
1075
PyUnicode_READ(kind, str, idx + 7) == 'y') {
1076
return _parse_constant(s, "Infinity", idx, next_idx_ptr);
1077
}
1078
break;
1079
case '-':
1080
/* -Infinity */
1081
if ((idx + 8 < length) && PyUnicode_READ(kind, str, idx + 1) == 'I' &&
1082
PyUnicode_READ(kind, str, idx + 2) == 'n' &&
1083
PyUnicode_READ(kind, str, idx + 3) == 'f' &&
1084
PyUnicode_READ(kind, str, idx + 4) == 'i' &&
1085
PyUnicode_READ(kind, str, idx + 5) == 'n' &&
1086
PyUnicode_READ(kind, str, idx + 6) == 'i' &&
1087
PyUnicode_READ(kind, str, idx + 7) == 't' &&
1088
PyUnicode_READ(kind, str, idx + 8) == 'y') {
1089
return _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1090
}
1091
break;
1092
}
1093
/* Didn't find a string, object, array, or named constant. Look for a number. */
1094
return _match_number_unicode(s, pystr, idx, next_idx_ptr);
1095
}
1096
1097
static PyObject *
1098
scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
1099
{
1100
/* Python callable interface to scan_once_{str,unicode} */
1101
PyObject *pystr;
1102
PyObject *rval;
1103
Py_ssize_t idx;
1104
Py_ssize_t next_idx = -1;
1105
static char *kwlist[] = {"string", "idx", NULL};
1106
if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:scan_once", kwlist, &pystr, &idx))
1107
return NULL;
1108
1109
if (PyUnicode_Check(pystr)) {
1110
rval = scan_once_unicode(self, pystr, idx, &next_idx);
1111
}
1112
else {
1113
PyErr_Format(PyExc_TypeError,
1114
"first argument must be a string, not %.80s",
1115
Py_TYPE(pystr)->tp_name);
1116
return NULL;
1117
}
1118
PyDict_Clear(self->memo);
1119
if (rval == NULL)
1120
return NULL;
1121
return _build_rval_index_tuple(rval, next_idx);
1122
}
1123
1124
static PyObject *
1125
scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1126
{
1127
PyScannerObject *s;
1128
PyObject *ctx;
1129
PyObject *strict;
1130
static char *kwlist[] = {"context", NULL};
1131
1132
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1133
return NULL;
1134
1135
s = (PyScannerObject *)type->tp_alloc(type, 0);
1136
if (s == NULL) {
1137
return NULL;
1138
}
1139
1140
s->memo = PyDict_New();
1141
if (s->memo == NULL)
1142
goto bail;
1143
1144
/* All of these will fail "gracefully" so we don't need to verify them */
1145
strict = PyObject_GetAttrString(ctx, "strict");
1146
if (strict == NULL)
1147
goto bail;
1148
s->strict = PyObject_IsTrue(strict);
1149
Py_DECREF(strict);
1150
if (s->strict < 0)
1151
goto bail;
1152
s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1153
if (s->object_hook == NULL)
1154
goto bail;
1155
s->object_pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1156
if (s->object_pairs_hook == NULL)
1157
goto bail;
1158
s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1159
if (s->parse_float == NULL)
1160
goto bail;
1161
s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1162
if (s->parse_int == NULL)
1163
goto bail;
1164
s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1165
if (s->parse_constant == NULL)
1166
goto bail;
1167
1168
return (PyObject *)s;
1169
1170
bail:
1171
Py_DECREF(s);
1172
return NULL;
1173
}
1174
1175
PyDoc_STRVAR(scanner_doc, "JSON scanner object");
1176
1177
static PyType_Slot PyScannerType_slots[] = {
1178
{Py_tp_doc, (void *)scanner_doc},
1179
{Py_tp_dealloc, scanner_dealloc},
1180
{Py_tp_call, scanner_call},
1181
{Py_tp_traverse, scanner_traverse},
1182
{Py_tp_clear, scanner_clear},
1183
{Py_tp_members, scanner_members},
1184
{Py_tp_new, scanner_new},
1185
{0, 0}
1186
};
1187
1188
static PyType_Spec PyScannerType_spec = {
1189
.name = "_json.Scanner",
1190
.basicsize = sizeof(PyScannerObject),
1191
.itemsize = 0,
1192
.flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1193
.slots = PyScannerType_slots,
1194
};
1195
1196
static PyObject *
1197
encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1198
{
1199
static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL};
1200
1201
PyEncoderObject *s;
1202
PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
1203
PyObject *item_separator;
1204
int sort_keys, skipkeys, allow_nan;
1205
1206
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOUUppp:make_encoder", kwlist,
1207
&markers, &defaultfn, &encoder, &indent,
1208
&key_separator, &item_separator,
1209
&sort_keys, &skipkeys, &allow_nan))
1210
return NULL;
1211
1212
if (markers != Py_None && !PyDict_Check(markers)) {
1213
PyErr_Format(PyExc_TypeError,
1214
"make_encoder() argument 1 must be dict or None, "
1215
"not %.200s", Py_TYPE(markers)->tp_name);
1216
return NULL;
1217
}
1218
1219
s = (PyEncoderObject *)type->tp_alloc(type, 0);
1220
if (s == NULL)
1221
return NULL;
1222
1223
s->markers = Py_NewRef(markers);
1224
s->defaultfn = Py_NewRef(defaultfn);
1225
s->encoder = Py_NewRef(encoder);
1226
s->indent = Py_NewRef(indent);
1227
s->key_separator = Py_NewRef(key_separator);
1228
s->item_separator = Py_NewRef(item_separator);
1229
s->sort_keys = sort_keys;
1230
s->skipkeys = skipkeys;
1231
s->allow_nan = allow_nan;
1232
s->fast_encode = NULL;
1233
1234
if (PyCFunction_Check(s->encoder)) {
1235
PyCFunction f = PyCFunction_GetFunction(s->encoder);
1236
if (f == (PyCFunction)py_encode_basestring_ascii ||
1237
f == (PyCFunction)py_encode_basestring) {
1238
s->fast_encode = f;
1239
}
1240
}
1241
1242
return (PyObject *)s;
1243
}
1244
1245
static PyObject *
1246
encoder_call(PyEncoderObject *self, PyObject *args, PyObject *kwds)
1247
{
1248
/* Python callable interface to encode_listencode_obj */
1249
static char *kwlist[] = {"obj", "_current_indent_level", NULL};
1250
PyObject *obj, *result;
1251
Py_ssize_t indent_level;
1252
_PyUnicodeWriter writer;
1253
1254
if (!PyArg_ParseTupleAndKeywords(args, kwds, "On:_iterencode", kwlist,
1255
&obj, &indent_level))
1256
return NULL;
1257
1258
_PyUnicodeWriter_Init(&writer);
1259
writer.overallocate = 1;
1260
1261
if (encoder_listencode_obj(self, &writer, obj, indent_level)) {
1262
_PyUnicodeWriter_Dealloc(&writer);
1263
return NULL;
1264
}
1265
1266
result = PyTuple_New(1);
1267
if (result == NULL ||
1268
PyTuple_SetItem(result, 0, _PyUnicodeWriter_Finish(&writer)) < 0) {
1269
Py_XDECREF(result);
1270
return NULL;
1271
}
1272
return result;
1273
}
1274
1275
static PyObject *
1276
_encoded_const(PyObject *obj)
1277
{
1278
/* Return the JSON string representation of None, True, False */
1279
if (obj == Py_None) {
1280
return Py_NewRef(&_Py_ID(null));
1281
}
1282
else if (obj == Py_True) {
1283
return Py_NewRef(&_Py_ID(true));
1284
}
1285
else if (obj == Py_False) {
1286
return Py_NewRef(&_Py_ID(false));
1287
}
1288
else {
1289
PyErr_SetString(PyExc_ValueError, "not a const");
1290
return NULL;
1291
}
1292
}
1293
1294
static PyObject *
1295
encoder_encode_float(PyEncoderObject *s, PyObject *obj)
1296
{
1297
/* Return the JSON representation of a PyFloat. */
1298
double i = PyFloat_AS_DOUBLE(obj);
1299
if (!Py_IS_FINITE(i)) {
1300
if (!s->allow_nan) {
1301
PyErr_Format(
1302
PyExc_ValueError,
1303
"Out of range float values are not JSON compliant: %R",
1304
obj
1305
);
1306
return NULL;
1307
}
1308
if (i > 0) {
1309
return PyUnicode_FromString("Infinity");
1310
}
1311
else if (i < 0) {
1312
return PyUnicode_FromString("-Infinity");
1313
}
1314
else {
1315
return PyUnicode_FromString("NaN");
1316
}
1317
}
1318
return PyFloat_Type.tp_repr(obj);
1319
}
1320
1321
static PyObject *
1322
encoder_encode_string(PyEncoderObject *s, PyObject *obj)
1323
{
1324
/* Return the JSON representation of a string */
1325
PyObject *encoded;
1326
1327
if (s->fast_encode) {
1328
return s->fast_encode(NULL, obj);
1329
}
1330
encoded = PyObject_CallOneArg(s->encoder, obj);
1331
if (encoded != NULL && !PyUnicode_Check(encoded)) {
1332
PyErr_Format(PyExc_TypeError,
1333
"encoder() must return a string, not %.80s",
1334
Py_TYPE(encoded)->tp_name);
1335
Py_DECREF(encoded);
1336
return NULL;
1337
}
1338
return encoded;
1339
}
1340
1341
static int
1342
_steal_accumulate(_PyUnicodeWriter *writer, PyObject *stolen)
1343
{
1344
/* Append stolen and then decrement its reference count */
1345
int rval = _PyUnicodeWriter_WriteStr(writer, stolen);
1346
Py_DECREF(stolen);
1347
return rval;
1348
}
1349
1350
static int
1351
encoder_listencode_obj(PyEncoderObject *s, _PyUnicodeWriter *writer,
1352
PyObject *obj, Py_ssize_t indent_level)
1353
{
1354
/* Encode Python object obj to a JSON term */
1355
PyObject *newobj;
1356
int rv;
1357
1358
if (obj == Py_None) {
1359
return _PyUnicodeWriter_WriteASCIIString(writer, "null", 4);
1360
}
1361
else if (obj == Py_True) {
1362
return _PyUnicodeWriter_WriteASCIIString(writer, "true", 4);
1363
}
1364
else if (obj == Py_False) {
1365
return _PyUnicodeWriter_WriteASCIIString(writer, "false", 5);
1366
}
1367
else if (PyUnicode_Check(obj)) {
1368
PyObject *encoded = encoder_encode_string(s, obj);
1369
if (encoded == NULL)
1370
return -1;
1371
return _steal_accumulate(writer, encoded);
1372
}
1373
else if (PyLong_Check(obj)) {
1374
PyObject *encoded = PyLong_Type.tp_repr(obj);
1375
if (encoded == NULL)
1376
return -1;
1377
return _steal_accumulate(writer, encoded);
1378
}
1379
else if (PyFloat_Check(obj)) {
1380
PyObject *encoded = encoder_encode_float(s, obj);
1381
if (encoded == NULL)
1382
return -1;
1383
return _steal_accumulate(writer, encoded);
1384
}
1385
else if (PyList_Check(obj) || PyTuple_Check(obj)) {
1386
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
1387
return -1;
1388
rv = encoder_listencode_list(s, writer, obj, indent_level);
1389
_Py_LeaveRecursiveCall();
1390
return rv;
1391
}
1392
else if (PyDict_Check(obj)) {
1393
if (_Py_EnterRecursiveCall(" while encoding a JSON object"))
1394
return -1;
1395
rv = encoder_listencode_dict(s, writer, obj, indent_level);
1396
_Py_LeaveRecursiveCall();
1397
return rv;
1398
}
1399
else {
1400
PyObject *ident = NULL;
1401
if (s->markers != Py_None) {
1402
int has_key;
1403
ident = PyLong_FromVoidPtr(obj);
1404
if (ident == NULL)
1405
return -1;
1406
has_key = PyDict_Contains(s->markers, ident);
1407
if (has_key) {
1408
if (has_key != -1)
1409
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1410
Py_DECREF(ident);
1411
return -1;
1412
}
1413
if (PyDict_SetItem(s->markers, ident, obj)) {
1414
Py_DECREF(ident);
1415
return -1;
1416
}
1417
}
1418
newobj = PyObject_CallOneArg(s->defaultfn, obj);
1419
if (newobj == NULL) {
1420
Py_XDECREF(ident);
1421
return -1;
1422
}
1423
1424
if (_Py_EnterRecursiveCall(" while encoding a JSON object")) {
1425
Py_DECREF(newobj);
1426
Py_XDECREF(ident);
1427
return -1;
1428
}
1429
rv = encoder_listencode_obj(s, writer, newobj, indent_level);
1430
_Py_LeaveRecursiveCall();
1431
1432
Py_DECREF(newobj);
1433
if (rv) {
1434
Py_XDECREF(ident);
1435
return -1;
1436
}
1437
if (ident != NULL) {
1438
if (PyDict_DelItem(s->markers, ident)) {
1439
Py_XDECREF(ident);
1440
return -1;
1441
}
1442
Py_XDECREF(ident);
1443
}
1444
return rv;
1445
}
1446
}
1447
1448
static int
1449
encoder_encode_key_value(PyEncoderObject *s, _PyUnicodeWriter *writer, bool *first,
1450
PyObject *key, PyObject *value, Py_ssize_t indent_level)
1451
{
1452
PyObject *keystr = NULL;
1453
PyObject *encoded;
1454
1455
if (PyUnicode_Check(key)) {
1456
keystr = Py_NewRef(key);
1457
}
1458
else if (PyFloat_Check(key)) {
1459
keystr = encoder_encode_float(s, key);
1460
}
1461
else if (key == Py_True || key == Py_False || key == Py_None) {
1462
/* This must come before the PyLong_Check because
1463
True and False are also 1 and 0.*/
1464
keystr = _encoded_const(key);
1465
}
1466
else if (PyLong_Check(key)) {
1467
keystr = PyLong_Type.tp_repr(key);
1468
}
1469
else if (s->skipkeys) {
1470
return 0;
1471
}
1472
else {
1473
PyErr_Format(PyExc_TypeError,
1474
"keys must be str, int, float, bool or None, "
1475
"not %.100s", Py_TYPE(key)->tp_name);
1476
return -1;
1477
}
1478
1479
if (keystr == NULL) {
1480
return -1;
1481
}
1482
1483
if (*first) {
1484
*first = false;
1485
}
1486
else {
1487
if (_PyUnicodeWriter_WriteStr(writer, s->item_separator) < 0) {
1488
Py_DECREF(keystr);
1489
return -1;
1490
}
1491
}
1492
1493
encoded = encoder_encode_string(s, keystr);
1494
Py_DECREF(keystr);
1495
if (encoded == NULL) {
1496
return -1;
1497
}
1498
1499
if (_steal_accumulate(writer, encoded) < 0) {
1500
return -1;
1501
}
1502
if (_PyUnicodeWriter_WriteStr(writer, s->key_separator) < 0) {
1503
return -1;
1504
}
1505
if (encoder_listencode_obj(s, writer, value, indent_level) < 0) {
1506
return -1;
1507
}
1508
return 0;
1509
}
1510
1511
static int
1512
encoder_listencode_dict(PyEncoderObject *s, _PyUnicodeWriter *writer,
1513
PyObject *dct, Py_ssize_t indent_level)
1514
{
1515
/* Encode Python dict dct a JSON term */
1516
PyObject *ident = NULL;
1517
PyObject *items = NULL;
1518
PyObject *key, *value;
1519
bool first = true;
1520
1521
if (PyDict_GET_SIZE(dct) == 0) /* Fast path */
1522
return _PyUnicodeWriter_WriteASCIIString(writer, "{}", 2);
1523
1524
if (s->markers != Py_None) {
1525
int has_key;
1526
ident = PyLong_FromVoidPtr(dct);
1527
if (ident == NULL)
1528
goto bail;
1529
has_key = PyDict_Contains(s->markers, ident);
1530
if (has_key) {
1531
if (has_key != -1)
1532
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1533
goto bail;
1534
}
1535
if (PyDict_SetItem(s->markers, ident, dct)) {
1536
goto bail;
1537
}
1538
}
1539
1540
if (_PyUnicodeWriter_WriteChar(writer, '{'))
1541
goto bail;
1542
1543
if (s->indent != Py_None) {
1544
/* TODO: DOES NOT RUN */
1545
indent_level += 1;
1546
/*
1547
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1548
separator = _item_separator + newline_indent
1549
buf += newline_indent
1550
*/
1551
}
1552
1553
if (s->sort_keys || !PyDict_CheckExact(dct)) {
1554
items = PyMapping_Items(dct);
1555
if (items == NULL || (s->sort_keys && PyList_Sort(items) < 0))
1556
goto bail;
1557
1558
for (Py_ssize_t i = 0; i < PyList_GET_SIZE(items); i++) {
1559
PyObject *item = PyList_GET_ITEM(items, i);
1560
1561
if (!PyTuple_Check(item) || PyTuple_GET_SIZE(item) != 2) {
1562
PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
1563
goto bail;
1564
}
1565
1566
key = PyTuple_GET_ITEM(item, 0);
1567
value = PyTuple_GET_ITEM(item, 1);
1568
if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
1569
goto bail;
1570
}
1571
Py_CLEAR(items);
1572
1573
} else {
1574
Py_ssize_t pos = 0;
1575
while (PyDict_Next(dct, &pos, &key, &value)) {
1576
if (encoder_encode_key_value(s, writer, &first, key, value, indent_level) < 0)
1577
goto bail;
1578
}
1579
}
1580
1581
if (ident != NULL) {
1582
if (PyDict_DelItem(s->markers, ident))
1583
goto bail;
1584
Py_CLEAR(ident);
1585
}
1586
/* TODO DOES NOT RUN; dead code
1587
if (s->indent != Py_None) {
1588
indent_level -= 1;
1589
1590
yield '\n' + (' ' * (_indent * _current_indent_level))
1591
}*/
1592
if (_PyUnicodeWriter_WriteChar(writer, '}'))
1593
goto bail;
1594
return 0;
1595
1596
bail:
1597
Py_XDECREF(items);
1598
Py_XDECREF(ident);
1599
return -1;
1600
}
1601
1602
static int
1603
encoder_listencode_list(PyEncoderObject *s, _PyUnicodeWriter *writer,
1604
PyObject *seq, Py_ssize_t indent_level)
1605
{
1606
PyObject *ident = NULL;
1607
PyObject *s_fast = NULL;
1608
Py_ssize_t i;
1609
1610
ident = NULL;
1611
s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence");
1612
if (s_fast == NULL)
1613
return -1;
1614
if (PySequence_Fast_GET_SIZE(s_fast) == 0) {
1615
Py_DECREF(s_fast);
1616
return _PyUnicodeWriter_WriteASCIIString(writer, "[]", 2);
1617
}
1618
1619
if (s->markers != Py_None) {
1620
int has_key;
1621
ident = PyLong_FromVoidPtr(seq);
1622
if (ident == NULL)
1623
goto bail;
1624
has_key = PyDict_Contains(s->markers, ident);
1625
if (has_key) {
1626
if (has_key != -1)
1627
PyErr_SetString(PyExc_ValueError, "Circular reference detected");
1628
goto bail;
1629
}
1630
if (PyDict_SetItem(s->markers, ident, seq)) {
1631
goto bail;
1632
}
1633
}
1634
1635
if (_PyUnicodeWriter_WriteChar(writer, '['))
1636
goto bail;
1637
if (s->indent != Py_None) {
1638
/* TODO: DOES NOT RUN */
1639
indent_level += 1;
1640
/*
1641
newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
1642
separator = _item_separator + newline_indent
1643
buf += newline_indent
1644
*/
1645
}
1646
for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) {
1647
PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i);
1648
if (i) {
1649
if (_PyUnicodeWriter_WriteStr(writer, s->item_separator))
1650
goto bail;
1651
}
1652
if (encoder_listencode_obj(s, writer, obj, indent_level))
1653
goto bail;
1654
}
1655
if (ident != NULL) {
1656
if (PyDict_DelItem(s->markers, ident))
1657
goto bail;
1658
Py_CLEAR(ident);
1659
}
1660
1661
/* TODO: DOES NOT RUN
1662
if (s->indent != Py_None) {
1663
indent_level -= 1;
1664
1665
yield '\n' + (' ' * (_indent * _current_indent_level))
1666
}*/
1667
if (_PyUnicodeWriter_WriteChar(writer, ']'))
1668
goto bail;
1669
Py_DECREF(s_fast);
1670
return 0;
1671
1672
bail:
1673
Py_XDECREF(ident);
1674
Py_DECREF(s_fast);
1675
return -1;
1676
}
1677
1678
static void
1679
encoder_dealloc(PyObject *self)
1680
{
1681
PyTypeObject *tp = Py_TYPE(self);
1682
/* bpo-31095: UnTrack is needed before calling any callbacks */
1683
PyObject_GC_UnTrack(self);
1684
encoder_clear((PyEncoderObject *)self);
1685
tp->tp_free(self);
1686
Py_DECREF(tp);
1687
}
1688
1689
static int
1690
encoder_traverse(PyEncoderObject *self, visitproc visit, void *arg)
1691
{
1692
Py_VISIT(Py_TYPE(self));
1693
Py_VISIT(self->markers);
1694
Py_VISIT(self->defaultfn);
1695
Py_VISIT(self->encoder);
1696
Py_VISIT(self->indent);
1697
Py_VISIT(self->key_separator);
1698
Py_VISIT(self->item_separator);
1699
return 0;
1700
}
1701
1702
static int
1703
encoder_clear(PyEncoderObject *self)
1704
{
1705
/* Deallocate Encoder */
1706
Py_CLEAR(self->markers);
1707
Py_CLEAR(self->defaultfn);
1708
Py_CLEAR(self->encoder);
1709
Py_CLEAR(self->indent);
1710
Py_CLEAR(self->key_separator);
1711
Py_CLEAR(self->item_separator);
1712
return 0;
1713
}
1714
1715
PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
1716
1717
static PyType_Slot PyEncoderType_slots[] = {
1718
{Py_tp_doc, (void *)encoder_doc},
1719
{Py_tp_dealloc, encoder_dealloc},
1720
{Py_tp_call, encoder_call},
1721
{Py_tp_traverse, encoder_traverse},
1722
{Py_tp_clear, encoder_clear},
1723
{Py_tp_members, encoder_members},
1724
{Py_tp_new, encoder_new},
1725
{0, 0}
1726
};
1727
1728
static PyType_Spec PyEncoderType_spec = {
1729
.name = "_json.Encoder",
1730
.basicsize = sizeof(PyEncoderObject),
1731
.itemsize = 0,
1732
.flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,
1733
.slots = PyEncoderType_slots
1734
};
1735
1736
static PyMethodDef speedups_methods[] = {
1737
{"encode_basestring_ascii",
1738
(PyCFunction)py_encode_basestring_ascii,
1739
METH_O,
1740
pydoc_encode_basestring_ascii},
1741
{"encode_basestring",
1742
(PyCFunction)py_encode_basestring,
1743
METH_O,
1744
pydoc_encode_basestring},
1745
{"scanstring",
1746
(PyCFunction)py_scanstring,
1747
METH_VARARGS,
1748
pydoc_scanstring},
1749
{NULL, NULL, 0, NULL}
1750
};
1751
1752
PyDoc_STRVAR(module_doc,
1753
"json speedups\n");
1754
1755
static int
1756
_json_exec(PyObject *module)
1757
{
1758
PyObject *PyScannerType = PyType_FromSpec(&PyScannerType_spec);
1759
if (PyScannerType == NULL) {
1760
return -1;
1761
}
1762
int rc = PyModule_AddObjectRef(module, "make_scanner", PyScannerType);
1763
Py_DECREF(PyScannerType);
1764
if (rc < 0) {
1765
return -1;
1766
}
1767
1768
PyObject *PyEncoderType = PyType_FromSpec(&PyEncoderType_spec);
1769
if (PyEncoderType == NULL) {
1770
return -1;
1771
}
1772
rc = PyModule_AddObjectRef(module, "make_encoder", PyEncoderType);
1773
Py_DECREF(PyEncoderType);
1774
if (rc < 0) {
1775
return -1;
1776
}
1777
1778
return 0;
1779
}
1780
1781
static PyModuleDef_Slot _json_slots[] = {
1782
{Py_mod_exec, _json_exec},
1783
{Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1784
{0, NULL}
1785
};
1786
1787
static struct PyModuleDef jsonmodule = {
1788
.m_base = PyModuleDef_HEAD_INIT,
1789
.m_name = "_json",
1790
.m_doc = module_doc,
1791
.m_methods = speedups_methods,
1792
.m_slots = _json_slots,
1793
};
1794
1795
PyMODINIT_FUNC
1796
PyInit__json(void)
1797
{
1798
return PyModuleDef_Init(&jsonmodule);
1799
}
1800
1801