Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
allendowney
GitHub Repository: allendowney/cpython
Path: blob/main/Modules/_codecsmodule.c
12 views
1
/* ------------------------------------------------------------------------
2
3
_codecs -- Provides access to the codec registry and the builtin
4
codecs.
5
6
This module should never be imported directly. The standard library
7
module "codecs" wraps this builtin module for use within Python.
8
9
The codec registry is accessible via:
10
11
register(search_function) -> None
12
13
lookup(encoding) -> CodecInfo object
14
15
The builtin Unicode codecs use the following interface:
16
17
<encoding>_encode(Unicode_object[,errors='strict']) ->
18
(string object, bytes consumed)
19
20
<encoding>_decode(char_buffer_obj[,errors='strict']) ->
21
(Unicode object, bytes consumed)
22
23
These <encoding>s are available: utf_8, unicode_escape,
24
raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).
25
26
27
Written by Marc-Andre Lemburg ([email protected]).
28
29
Copyright (c) Corporation for National Research Initiatives.
30
31
------------------------------------------------------------------------ */
32
33
#include "Python.h"
34
#include "pycore_codecs.h" // _PyCodec_Lookup()
35
36
#ifdef MS_WINDOWS
37
#include <windows.h>
38
#endif
39
40
/*[clinic input]
41
module _codecs
42
[clinic start generated code]*/
43
/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/
44
45
#include "pycore_runtime.h"
46
#include "clinic/_codecsmodule.c.h"
47
48
/* --- Registry ----------------------------------------------------------- */
49
50
/*[clinic input]
51
_codecs.register
52
search_function: object
53
/
54
55
Register a codec search function.
56
57
Search functions are expected to take one argument, the encoding name in
58
all lower case letters, and either return None, or a tuple of functions
59
(encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).
60
[clinic start generated code]*/
61
62
static PyObject *
63
_codecs_register(PyObject *module, PyObject *search_function)
64
/*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/
65
{
66
if (PyCodec_Register(search_function))
67
return NULL;
68
69
Py_RETURN_NONE;
70
}
71
72
/*[clinic input]
73
_codecs.unregister
74
search_function: object
75
/
76
77
Unregister a codec search function and clear the registry's cache.
78
79
If the search function is not registered, do nothing.
80
[clinic start generated code]*/
81
82
static PyObject *
83
_codecs_unregister(PyObject *module, PyObject *search_function)
84
/*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/
85
{
86
if (PyCodec_Unregister(search_function) < 0) {
87
return NULL;
88
}
89
90
Py_RETURN_NONE;
91
}
92
93
/*[clinic input]
94
_codecs.lookup
95
encoding: str
96
/
97
98
Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.
99
[clinic start generated code]*/
100
101
static PyObject *
102
_codecs_lookup_impl(PyObject *module, const char *encoding)
103
/*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/
104
{
105
return _PyCodec_Lookup(encoding);
106
}
107
108
/*[clinic input]
109
_codecs.encode
110
obj: object
111
encoding: str(c_default="NULL") = "utf-8"
112
errors: str(c_default="NULL") = "strict"
113
114
Encodes obj using the codec registered for encoding.
115
116
The default encoding is 'utf-8'. errors may be given to set a
117
different error handling scheme. Default is 'strict' meaning that encoding
118
errors raise a ValueError. Other possible values are 'ignore', 'replace'
119
and 'backslashreplace' as well as any other name registered with
120
codecs.register_error that can handle ValueErrors.
121
[clinic start generated code]*/
122
123
static PyObject *
124
_codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,
125
const char *errors)
126
/*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/
127
{
128
if (encoding == NULL)
129
encoding = PyUnicode_GetDefaultEncoding();
130
131
/* Encode via the codec registry */
132
return PyCodec_Encode(obj, encoding, errors);
133
}
134
135
/*[clinic input]
136
_codecs.decode
137
obj: object
138
encoding: str(c_default="NULL") = "utf-8"
139
errors: str(c_default="NULL") = "strict"
140
141
Decodes obj using the codec registered for encoding.
142
143
Default encoding is 'utf-8'. errors may be given to set a
144
different error handling scheme. Default is 'strict' meaning that encoding
145
errors raise a ValueError. Other possible values are 'ignore', 'replace'
146
and 'backslashreplace' as well as any other name registered with
147
codecs.register_error that can handle ValueErrors.
148
[clinic start generated code]*/
149
150
static PyObject *
151
_codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,
152
const char *errors)
153
/*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/
154
{
155
if (encoding == NULL)
156
encoding = PyUnicode_GetDefaultEncoding();
157
158
/* Decode via the codec registry */
159
return PyCodec_Decode(obj, encoding, errors);
160
}
161
162
/* --- Helpers ------------------------------------------------------------ */
163
164
static
165
PyObject *codec_tuple(PyObject *decoded,
166
Py_ssize_t len)
167
{
168
if (decoded == NULL)
169
return NULL;
170
return Py_BuildValue("Nn", decoded, len);
171
}
172
173
/* --- String codecs ------------------------------------------------------ */
174
/*[clinic input]
175
_codecs.escape_decode
176
data: Py_buffer(accept={str, buffer})
177
errors: str(accept={str, NoneType}) = None
178
/
179
[clinic start generated code]*/
180
181
static PyObject *
182
_codecs_escape_decode_impl(PyObject *module, Py_buffer *data,
183
const char *errors)
184
/*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/
185
{
186
PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,
187
errors, 0, NULL);
188
return codec_tuple(decoded, data->len);
189
}
190
191
/*[clinic input]
192
_codecs.escape_encode
193
data: object(subclass_of='&PyBytes_Type')
194
errors: str(accept={str, NoneType}) = None
195
/
196
[clinic start generated code]*/
197
198
static PyObject *
199
_codecs_escape_encode_impl(PyObject *module, PyObject *data,
200
const char *errors)
201
/*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/
202
{
203
Py_ssize_t size;
204
Py_ssize_t newsize;
205
PyObject *v;
206
207
size = PyBytes_GET_SIZE(data);
208
if (size > PY_SSIZE_T_MAX / 4) {
209
PyErr_SetString(PyExc_OverflowError,
210
"string is too large to encode");
211
return NULL;
212
}
213
newsize = 4*size;
214
v = PyBytes_FromStringAndSize(NULL, newsize);
215
216
if (v == NULL) {
217
return NULL;
218
}
219
else {
220
Py_ssize_t i;
221
char c;
222
char *p = PyBytes_AS_STRING(v);
223
224
for (i = 0; i < size; i++) {
225
/* There's at least enough room for a hex escape */
226
assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);
227
c = PyBytes_AS_STRING(data)[i];
228
if (c == '\'' || c == '\\')
229
*p++ = '\\', *p++ = c;
230
else if (c == '\t')
231
*p++ = '\\', *p++ = 't';
232
else if (c == '\n')
233
*p++ = '\\', *p++ = 'n';
234
else if (c == '\r')
235
*p++ = '\\', *p++ = 'r';
236
else if (c < ' ' || c >= 0x7f) {
237
*p++ = '\\';
238
*p++ = 'x';
239
*p++ = Py_hexdigits[(c & 0xf0) >> 4];
240
*p++ = Py_hexdigits[c & 0xf];
241
}
242
else
243
*p++ = c;
244
}
245
*p = '\0';
246
if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {
247
return NULL;
248
}
249
}
250
251
return codec_tuple(v, size);
252
}
253
254
/* --- Decoder ------------------------------------------------------------ */
255
/*[clinic input]
256
_codecs.utf_7_decode
257
data: Py_buffer
258
errors: str(accept={str, NoneType}) = None
259
final: bool = False
260
/
261
[clinic start generated code]*/
262
263
static PyObject *
264
_codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,
265
const char *errors, int final)
266
/*[clinic end generated code: output=0cd3a944a32a4089 input=dbf8c8998102dc7d]*/
267
{
268
Py_ssize_t consumed = data->len;
269
PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,
270
errors,
271
final ? NULL : &consumed);
272
return codec_tuple(decoded, consumed);
273
}
274
275
/*[clinic input]
276
_codecs.utf_8_decode
277
data: Py_buffer
278
errors: str(accept={str, NoneType}) = None
279
final: bool = False
280
/
281
[clinic start generated code]*/
282
283
static PyObject *
284
_codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,
285
const char *errors, int final)
286
/*[clinic end generated code: output=10f74dec8d9bb8bf input=ca06bc8a9c970e25]*/
287
{
288
Py_ssize_t consumed = data->len;
289
PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,
290
errors,
291
final ? NULL : &consumed);
292
return codec_tuple(decoded, consumed);
293
}
294
295
/*[clinic input]
296
_codecs.utf_16_decode
297
data: Py_buffer
298
errors: str(accept={str, NoneType}) = None
299
final: bool = False
300
/
301
[clinic start generated code]*/
302
303
static PyObject *
304
_codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,
305
const char *errors, int final)
306
/*[clinic end generated code: output=783b442abcbcc2d0 input=5b0f52071ba6cadc]*/
307
{
308
int byteorder = 0;
309
/* This is overwritten unless final is true. */
310
Py_ssize_t consumed = data->len;
311
PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
312
errors, &byteorder,
313
final ? NULL : &consumed);
314
return codec_tuple(decoded, consumed);
315
}
316
317
/*[clinic input]
318
_codecs.utf_16_le_decode
319
data: Py_buffer
320
errors: str(accept={str, NoneType}) = None
321
final: bool = False
322
/
323
[clinic start generated code]*/
324
325
static PyObject *
326
_codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,
327
const char *errors, int final)
328
/*[clinic end generated code: output=899b9e6364379dcd input=115bd8c7b783d0bf]*/
329
{
330
int byteorder = -1;
331
/* This is overwritten unless final is true. */
332
Py_ssize_t consumed = data->len;
333
PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
334
errors, &byteorder,
335
final ? NULL : &consumed);
336
return codec_tuple(decoded, consumed);
337
}
338
339
/*[clinic input]
340
_codecs.utf_16_be_decode
341
data: Py_buffer
342
errors: str(accept={str, NoneType}) = None
343
final: bool = False
344
/
345
[clinic start generated code]*/
346
347
static PyObject *
348
_codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,
349
const char *errors, int final)
350
/*[clinic end generated code: output=49f6465ea07669c8 input=63131422b01f9cb4]*/
351
{
352
int byteorder = 1;
353
/* This is overwritten unless final is true. */
354
Py_ssize_t consumed = data->len;
355
PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
356
errors, &byteorder,
357
final ? NULL : &consumed);
358
return codec_tuple(decoded, consumed);
359
}
360
361
/* This non-standard version also provides access to the byteorder
362
parameter of the builtin UTF-16 codec.
363
364
It returns a tuple (unicode, bytesread, byteorder) with byteorder
365
being the value in effect at the end of data.
366
367
*/
368
/*[clinic input]
369
_codecs.utf_16_ex_decode
370
data: Py_buffer
371
errors: str(accept={str, NoneType}) = None
372
byteorder: int = 0
373
final: bool = False
374
/
375
[clinic start generated code]*/
376
377
static PyObject *
378
_codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,
379
const char *errors, int byteorder, int final)
380
/*[clinic end generated code: output=0f385f251ecc1988 input=f368a51cf384bf4c]*/
381
{
382
/* This is overwritten unless final is true. */
383
Py_ssize_t consumed = data->len;
384
385
PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,
386
errors, &byteorder,
387
final ? NULL : &consumed);
388
if (decoded == NULL)
389
return NULL;
390
return Py_BuildValue("Nni", decoded, consumed, byteorder);
391
}
392
393
/*[clinic input]
394
_codecs.utf_32_decode
395
data: Py_buffer
396
errors: str(accept={str, NoneType}) = None
397
final: bool = False
398
/
399
[clinic start generated code]*/
400
401
static PyObject *
402
_codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,
403
const char *errors, int final)
404
/*[clinic end generated code: output=2fc961807f7b145f input=fcdf3658c5e9b5f3]*/
405
{
406
int byteorder = 0;
407
/* This is overwritten unless final is true. */
408
Py_ssize_t consumed = data->len;
409
PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
410
errors, &byteorder,
411
final ? NULL : &consumed);
412
return codec_tuple(decoded, consumed);
413
}
414
415
/*[clinic input]
416
_codecs.utf_32_le_decode
417
data: Py_buffer
418
errors: str(accept={str, NoneType}) = None
419
final: bool = False
420
/
421
[clinic start generated code]*/
422
423
static PyObject *
424
_codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,
425
const char *errors, int final)
426
/*[clinic end generated code: output=ec8f46b67a94f3e6 input=12220556e885f817]*/
427
{
428
int byteorder = -1;
429
/* This is overwritten unless final is true. */
430
Py_ssize_t consumed = data->len;
431
PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
432
errors, &byteorder,
433
final ? NULL : &consumed);
434
return codec_tuple(decoded, consumed);
435
}
436
437
/*[clinic input]
438
_codecs.utf_32_be_decode
439
data: Py_buffer
440
errors: str(accept={str, NoneType}) = None
441
final: bool = False
442
/
443
[clinic start generated code]*/
444
445
static PyObject *
446
_codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,
447
const char *errors, int final)
448
/*[clinic end generated code: output=ff82bae862c92c4e input=2bc669b4781598db]*/
449
{
450
int byteorder = 1;
451
/* This is overwritten unless final is true. */
452
Py_ssize_t consumed = data->len;
453
PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
454
errors, &byteorder,
455
final ? NULL : &consumed);
456
return codec_tuple(decoded, consumed);
457
}
458
459
/* This non-standard version also provides access to the byteorder
460
parameter of the builtin UTF-32 codec.
461
462
It returns a tuple (unicode, bytesread, byteorder) with byteorder
463
being the value in effect at the end of data.
464
465
*/
466
/*[clinic input]
467
_codecs.utf_32_ex_decode
468
data: Py_buffer
469
errors: str(accept={str, NoneType}) = None
470
byteorder: int = 0
471
final: bool = False
472
/
473
[clinic start generated code]*/
474
475
static PyObject *
476
_codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,
477
const char *errors, int byteorder, int final)
478
/*[clinic end generated code: output=6bfb177dceaf4848 input=4a2323d0013620df]*/
479
{
480
Py_ssize_t consumed = data->len;
481
PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,
482
errors, &byteorder,
483
final ? NULL : &consumed);
484
if (decoded == NULL)
485
return NULL;
486
return Py_BuildValue("Nni", decoded, consumed, byteorder);
487
}
488
489
/*[clinic input]
490
_codecs.unicode_escape_decode
491
data: Py_buffer(accept={str, buffer})
492
errors: str(accept={str, NoneType}) = None
493
final: bool = True
494
/
495
[clinic start generated code]*/
496
497
static PyObject *
498
_codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
499
const char *errors, int final)
500
/*[clinic end generated code: output=b284f97b12c635ee input=15019f081ffe272b]*/
501
{
502
Py_ssize_t consumed = data->len;
503
PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,
504
errors,
505
final ? NULL : &consumed);
506
return codec_tuple(decoded, consumed);
507
}
508
509
/*[clinic input]
510
_codecs.raw_unicode_escape_decode
511
data: Py_buffer(accept={str, buffer})
512
errors: str(accept={str, NoneType}) = None
513
final: bool = True
514
/
515
[clinic start generated code]*/
516
517
static PyObject *
518
_codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,
519
const char *errors, int final)
520
/*[clinic end generated code: output=11dbd96301e2879e input=b93f823aa8c343ad]*/
521
{
522
Py_ssize_t consumed = data->len;
523
PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,
524
errors,
525
final ? NULL : &consumed);
526
return codec_tuple(decoded, consumed);
527
}
528
529
/*[clinic input]
530
_codecs.latin_1_decode
531
data: Py_buffer
532
errors: str(accept={str, NoneType}) = None
533
/
534
[clinic start generated code]*/
535
536
static PyObject *
537
_codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,
538
const char *errors)
539
/*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/
540
{
541
PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);
542
return codec_tuple(decoded, data->len);
543
}
544
545
/*[clinic input]
546
_codecs.ascii_decode
547
data: Py_buffer
548
errors: str(accept={str, NoneType}) = None
549
/
550
[clinic start generated code]*/
551
552
static PyObject *
553
_codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,
554
const char *errors)
555
/*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/
556
{
557
PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);
558
return codec_tuple(decoded, data->len);
559
}
560
561
/*[clinic input]
562
_codecs.charmap_decode
563
data: Py_buffer
564
errors: str(accept={str, NoneType}) = None
565
mapping: object = None
566
/
567
[clinic start generated code]*/
568
569
static PyObject *
570
_codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,
571
const char *errors, PyObject *mapping)
572
/*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/
573
{
574
PyObject *decoded;
575
576
if (mapping == Py_None)
577
mapping = NULL;
578
579
decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);
580
return codec_tuple(decoded, data->len);
581
}
582
583
#ifdef MS_WINDOWS
584
585
/*[clinic input]
586
_codecs.mbcs_decode
587
data: Py_buffer
588
errors: str(accept={str, NoneType}) = None
589
final: bool = False
590
/
591
[clinic start generated code]*/
592
593
static PyObject *
594
_codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,
595
const char *errors, int final)
596
/*[clinic end generated code: output=39b65b8598938c4b input=f144ad1ed6d8f5a6]*/
597
{
598
Py_ssize_t consumed = data->len;
599
PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,
600
errors, final ? NULL : &consumed);
601
return codec_tuple(decoded, consumed);
602
}
603
604
/*[clinic input]
605
_codecs.oem_decode
606
data: Py_buffer
607
errors: str(accept={str, NoneType}) = None
608
final: bool = False
609
/
610
[clinic start generated code]*/
611
612
static PyObject *
613
_codecs_oem_decode_impl(PyObject *module, Py_buffer *data,
614
const char *errors, int final)
615
/*[clinic end generated code: output=da1617612f3fcad8 input=629bf87376d211b4]*/
616
{
617
Py_ssize_t consumed = data->len;
618
PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,
619
data->buf, data->len, errors, final ? NULL : &consumed);
620
return codec_tuple(decoded, consumed);
621
}
622
623
/*[clinic input]
624
_codecs.code_page_decode
625
codepage: int
626
data: Py_buffer
627
errors: str(accept={str, NoneType}) = None
628
final: bool = False
629
/
630
[clinic start generated code]*/
631
632
static PyObject *
633
_codecs_code_page_decode_impl(PyObject *module, int codepage,
634
Py_buffer *data, const char *errors, int final)
635
/*[clinic end generated code: output=53008ea967da3fff input=6a32589b0658c277]*/
636
{
637
Py_ssize_t consumed = data->len;
638
PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,
639
data->buf, data->len,
640
errors,
641
final ? NULL : &consumed);
642
return codec_tuple(decoded, consumed);
643
}
644
645
#endif /* MS_WINDOWS */
646
647
/* --- Encoder ------------------------------------------------------------ */
648
649
/*[clinic input]
650
_codecs.readbuffer_encode
651
data: Py_buffer(accept={str, buffer})
652
errors: str(accept={str, NoneType}) = None
653
/
654
[clinic start generated code]*/
655
656
static PyObject *
657
_codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,
658
const char *errors)
659
/*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/
660
{
661
PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);
662
return codec_tuple(result, data->len);
663
}
664
665
/*[clinic input]
666
_codecs.utf_7_encode
667
str: unicode
668
errors: str(accept={str, NoneType}) = None
669
/
670
[clinic start generated code]*/
671
672
static PyObject *
673
_codecs_utf_7_encode_impl(PyObject *module, PyObject *str,
674
const char *errors)
675
/*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/
676
{
677
return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),
678
PyUnicode_GET_LENGTH(str));
679
}
680
681
/*[clinic input]
682
_codecs.utf_8_encode
683
str: unicode
684
errors: str(accept={str, NoneType}) = None
685
/
686
[clinic start generated code]*/
687
688
static PyObject *
689
_codecs_utf_8_encode_impl(PyObject *module, PyObject *str,
690
const char *errors)
691
/*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/
692
{
693
return codec_tuple(_PyUnicode_AsUTF8String(str, errors),
694
PyUnicode_GET_LENGTH(str));
695
}
696
697
/* This version provides access to the byteorder parameter of the
698
builtin UTF-16 codecs as optional third argument. It defaults to 0
699
which means: use the native byte order and prepend the data with a
700
BOM mark.
701
702
*/
703
704
/*[clinic input]
705
_codecs.utf_16_encode
706
str: unicode
707
errors: str(accept={str, NoneType}) = None
708
byteorder: int = 0
709
/
710
[clinic start generated code]*/
711
712
static PyObject *
713
_codecs_utf_16_encode_impl(PyObject *module, PyObject *str,
714
const char *errors, int byteorder)
715
/*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/
716
{
717
return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),
718
PyUnicode_GET_LENGTH(str));
719
}
720
721
/*[clinic input]
722
_codecs.utf_16_le_encode
723
str: unicode
724
errors: str(accept={str, NoneType}) = None
725
/
726
[clinic start generated code]*/
727
728
static PyObject *
729
_codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,
730
const char *errors)
731
/*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/
732
{
733
return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),
734
PyUnicode_GET_LENGTH(str));
735
}
736
737
/*[clinic input]
738
_codecs.utf_16_be_encode
739
str: unicode
740
errors: str(accept={str, NoneType}) = None
741
/
742
[clinic start generated code]*/
743
744
static PyObject *
745
_codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,
746
const char *errors)
747
/*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/
748
{
749
return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),
750
PyUnicode_GET_LENGTH(str));
751
}
752
753
/* This version provides access to the byteorder parameter of the
754
builtin UTF-32 codecs as optional third argument. It defaults to 0
755
which means: use the native byte order and prepend the data with a
756
BOM mark.
757
758
*/
759
760
/*[clinic input]
761
_codecs.utf_32_encode
762
str: unicode
763
errors: str(accept={str, NoneType}) = None
764
byteorder: int = 0
765
/
766
[clinic start generated code]*/
767
768
static PyObject *
769
_codecs_utf_32_encode_impl(PyObject *module, PyObject *str,
770
const char *errors, int byteorder)
771
/*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/
772
{
773
return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),
774
PyUnicode_GET_LENGTH(str));
775
}
776
777
/*[clinic input]
778
_codecs.utf_32_le_encode
779
str: unicode
780
errors: str(accept={str, NoneType}) = None
781
/
782
[clinic start generated code]*/
783
784
static PyObject *
785
_codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,
786
const char *errors)
787
/*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/
788
{
789
return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),
790
PyUnicode_GET_LENGTH(str));
791
}
792
793
/*[clinic input]
794
_codecs.utf_32_be_encode
795
str: unicode
796
errors: str(accept={str, NoneType}) = None
797
/
798
[clinic start generated code]*/
799
800
static PyObject *
801
_codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,
802
const char *errors)
803
/*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/
804
{
805
return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),
806
PyUnicode_GET_LENGTH(str));
807
}
808
809
/*[clinic input]
810
_codecs.unicode_escape_encode
811
str: unicode
812
errors: str(accept={str, NoneType}) = None
813
/
814
[clinic start generated code]*/
815
816
static PyObject *
817
_codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,
818
const char *errors)
819
/*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/
820
{
821
return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),
822
PyUnicode_GET_LENGTH(str));
823
}
824
825
/*[clinic input]
826
_codecs.raw_unicode_escape_encode
827
str: unicode
828
errors: str(accept={str, NoneType}) = None
829
/
830
[clinic start generated code]*/
831
832
static PyObject *
833
_codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,
834
const char *errors)
835
/*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/
836
{
837
return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),
838
PyUnicode_GET_LENGTH(str));
839
}
840
841
/*[clinic input]
842
_codecs.latin_1_encode
843
str: unicode
844
errors: str(accept={str, NoneType}) = None
845
/
846
[clinic start generated code]*/
847
848
static PyObject *
849
_codecs_latin_1_encode_impl(PyObject *module, PyObject *str,
850
const char *errors)
851
/*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/
852
{
853
return codec_tuple(_PyUnicode_AsLatin1String(str, errors),
854
PyUnicode_GET_LENGTH(str));
855
}
856
857
/*[clinic input]
858
_codecs.ascii_encode
859
str: unicode
860
errors: str(accept={str, NoneType}) = None
861
/
862
[clinic start generated code]*/
863
864
static PyObject *
865
_codecs_ascii_encode_impl(PyObject *module, PyObject *str,
866
const char *errors)
867
/*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/
868
{
869
return codec_tuple(_PyUnicode_AsASCIIString(str, errors),
870
PyUnicode_GET_LENGTH(str));
871
}
872
873
/*[clinic input]
874
_codecs.charmap_encode
875
str: unicode
876
errors: str(accept={str, NoneType}) = None
877
mapping: object = None
878
/
879
[clinic start generated code]*/
880
881
static PyObject *
882
_codecs_charmap_encode_impl(PyObject *module, PyObject *str,
883
const char *errors, PyObject *mapping)
884
/*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/
885
{
886
if (mapping == Py_None)
887
mapping = NULL;
888
889
return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),
890
PyUnicode_GET_LENGTH(str));
891
}
892
893
/*[clinic input]
894
_codecs.charmap_build
895
map: unicode
896
/
897
[clinic start generated code]*/
898
899
static PyObject *
900
_codecs_charmap_build_impl(PyObject *module, PyObject *map)
901
/*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/
902
{
903
return PyUnicode_BuildEncodingMap(map);
904
}
905
906
#ifdef MS_WINDOWS
907
908
/*[clinic input]
909
_codecs.mbcs_encode
910
str: unicode
911
errors: str(accept={str, NoneType}) = None
912
/
913
[clinic start generated code]*/
914
915
static PyObject *
916
_codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)
917
/*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/
918
{
919
return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),
920
PyUnicode_GET_LENGTH(str));
921
}
922
923
/*[clinic input]
924
_codecs.oem_encode
925
str: unicode
926
errors: str(accept={str, NoneType}) = None
927
/
928
[clinic start generated code]*/
929
930
static PyObject *
931
_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)
932
/*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/
933
{
934
return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),
935
PyUnicode_GET_LENGTH(str));
936
}
937
938
/*[clinic input]
939
_codecs.code_page_encode
940
code_page: int
941
str: unicode
942
errors: str(accept={str, NoneType}) = None
943
/
944
[clinic start generated code]*/
945
946
static PyObject *
947
_codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,
948
const char *errors)
949
/*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/
950
{
951
return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),
952
PyUnicode_GET_LENGTH(str));
953
}
954
955
#endif /* MS_WINDOWS */
956
957
/* --- Error handler registry --------------------------------------------- */
958
959
/*[clinic input]
960
_codecs.register_error
961
errors: str
962
handler: object
963
/
964
965
Register the specified error handler under the name errors.
966
967
handler must be a callable object, that will be called with an exception
968
instance containing information about the location of the encoding/decoding
969
error and must return a (replacement, new position) tuple.
970
[clinic start generated code]*/
971
972
static PyObject *
973
_codecs_register_error_impl(PyObject *module, const char *errors,
974
PyObject *handler)
975
/*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/
976
{
977
if (PyCodec_RegisterError(errors, handler))
978
return NULL;
979
Py_RETURN_NONE;
980
}
981
982
/*[clinic input]
983
_codecs.lookup_error
984
name: str
985
/
986
987
lookup_error(errors) -> handler
988
989
Return the error handler for the specified error handling name or raise a
990
LookupError, if no handler exists under this name.
991
[clinic start generated code]*/
992
993
static PyObject *
994
_codecs_lookup_error_impl(PyObject *module, const char *name)
995
/*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/
996
{
997
return PyCodec_LookupError(name);
998
}
999
1000
/* --- Module API --------------------------------------------------------- */
1001
1002
static PyMethodDef _codecs_functions[] = {
1003
_CODECS_REGISTER_METHODDEF
1004
_CODECS_UNREGISTER_METHODDEF
1005
_CODECS_LOOKUP_METHODDEF
1006
_CODECS_ENCODE_METHODDEF
1007
_CODECS_DECODE_METHODDEF
1008
_CODECS_ESCAPE_ENCODE_METHODDEF
1009
_CODECS_ESCAPE_DECODE_METHODDEF
1010
_CODECS_UTF_8_ENCODE_METHODDEF
1011
_CODECS_UTF_8_DECODE_METHODDEF
1012
_CODECS_UTF_7_ENCODE_METHODDEF
1013
_CODECS_UTF_7_DECODE_METHODDEF
1014
_CODECS_UTF_16_ENCODE_METHODDEF
1015
_CODECS_UTF_16_LE_ENCODE_METHODDEF
1016
_CODECS_UTF_16_BE_ENCODE_METHODDEF
1017
_CODECS_UTF_16_DECODE_METHODDEF
1018
_CODECS_UTF_16_LE_DECODE_METHODDEF
1019
_CODECS_UTF_16_BE_DECODE_METHODDEF
1020
_CODECS_UTF_16_EX_DECODE_METHODDEF
1021
_CODECS_UTF_32_ENCODE_METHODDEF
1022
_CODECS_UTF_32_LE_ENCODE_METHODDEF
1023
_CODECS_UTF_32_BE_ENCODE_METHODDEF
1024
_CODECS_UTF_32_DECODE_METHODDEF
1025
_CODECS_UTF_32_LE_DECODE_METHODDEF
1026
_CODECS_UTF_32_BE_DECODE_METHODDEF
1027
_CODECS_UTF_32_EX_DECODE_METHODDEF
1028
_CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF
1029
_CODECS_UNICODE_ESCAPE_DECODE_METHODDEF
1030
_CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF
1031
_CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF
1032
_CODECS_LATIN_1_ENCODE_METHODDEF
1033
_CODECS_LATIN_1_DECODE_METHODDEF
1034
_CODECS_ASCII_ENCODE_METHODDEF
1035
_CODECS_ASCII_DECODE_METHODDEF
1036
_CODECS_CHARMAP_ENCODE_METHODDEF
1037
_CODECS_CHARMAP_DECODE_METHODDEF
1038
_CODECS_CHARMAP_BUILD_METHODDEF
1039
_CODECS_READBUFFER_ENCODE_METHODDEF
1040
_CODECS_MBCS_ENCODE_METHODDEF
1041
_CODECS_MBCS_DECODE_METHODDEF
1042
_CODECS_OEM_ENCODE_METHODDEF
1043
_CODECS_OEM_DECODE_METHODDEF
1044
_CODECS_CODE_PAGE_ENCODE_METHODDEF
1045
_CODECS_CODE_PAGE_DECODE_METHODDEF
1046
_CODECS_REGISTER_ERROR_METHODDEF
1047
_CODECS_LOOKUP_ERROR_METHODDEF
1048
{NULL, NULL} /* sentinel */
1049
};
1050
1051
static PyModuleDef_Slot _codecs_slots[] = {
1052
{Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},
1053
{0, NULL}
1054
};
1055
1056
static struct PyModuleDef codecsmodule = {
1057
PyModuleDef_HEAD_INIT,
1058
"_codecs",
1059
NULL,
1060
0,
1061
_codecs_functions,
1062
_codecs_slots,
1063
NULL,
1064
NULL,
1065
NULL
1066
};
1067
1068
PyMODINIT_FUNC
1069
PyInit__codecs(void)
1070
{
1071
return PyModuleDef_Init(&codecsmodule);
1072
}
1073
1074