/* ------------------------------------------------------------------------12_codecs -- Provides access to the codec registry and the builtin3codecs.45This module should never be imported directly. The standard library6module "codecs" wraps this builtin module for use within Python.78The codec registry is accessible via:910register(search_function) -> None1112lookup(encoding) -> CodecInfo object1314The builtin Unicode codecs use the following interface:1516<encoding>_encode(Unicode_object[,errors='strict']) ->17(string object, bytes consumed)1819<encoding>_decode(char_buffer_obj[,errors='strict']) ->20(Unicode object, bytes consumed)2122These <encoding>s are available: utf_8, unicode_escape,23raw_unicode_escape, latin_1, ascii (7-bit), mbcs (on win32).242526Written by Marc-Andre Lemburg ([email protected]).2728Copyright (c) Corporation for National Research Initiatives.2930------------------------------------------------------------------------ */3132#include "Python.h"33#include "pycore_codecs.h" // _PyCodec_Lookup()3435#ifdef MS_WINDOWS36#include <windows.h>37#endif3839/*[clinic input]40module _codecs41[clinic start generated code]*/42/*[clinic end generated code: output=da39a3ee5e6b4b0d input=e1390e3da3cb9deb]*/4344#include "pycore_runtime.h"45#include "clinic/_codecsmodule.c.h"4647/* --- Registry ----------------------------------------------------------- */4849/*[clinic input]50_codecs.register51search_function: object52/5354Register a codec search function.5556Search functions are expected to take one argument, the encoding name in57all lower case letters, and either return None, or a tuple of functions58(encoder, decoder, stream_reader, stream_writer) (or a CodecInfo object).59[clinic start generated code]*/6061static PyObject *62_codecs_register(PyObject *module, PyObject *search_function)63/*[clinic end generated code: output=d1bf21e99db7d6d3 input=369578467955cae4]*/64{65if (PyCodec_Register(search_function))66return NULL;6768Py_RETURN_NONE;69}7071/*[clinic input]72_codecs.unregister73search_function: object74/7576Unregister a codec search function and clear the registry's cache.7778If the search function is not registered, do nothing.79[clinic start generated code]*/8081static PyObject *82_codecs_unregister(PyObject *module, PyObject *search_function)83/*[clinic end generated code: output=1f0edee9cf246399 input=dd7c004c652d345e]*/84{85if (PyCodec_Unregister(search_function) < 0) {86return NULL;87}8889Py_RETURN_NONE;90}9192/*[clinic input]93_codecs.lookup94encoding: str95/9697Looks up a codec tuple in the Python codec registry and returns a CodecInfo object.98[clinic start generated code]*/99100static PyObject *101_codecs_lookup_impl(PyObject *module, const char *encoding)102/*[clinic end generated code: output=9f0afa572080c36d input=3c572c0db3febe9c]*/103{104return _PyCodec_Lookup(encoding);105}106107/*[clinic input]108_codecs.encode109obj: object110encoding: str(c_default="NULL") = "utf-8"111errors: str(c_default="NULL") = "strict"112113Encodes obj using the codec registered for encoding.114115The default encoding is 'utf-8'. errors may be given to set a116different error handling scheme. Default is 'strict' meaning that encoding117errors raise a ValueError. Other possible values are 'ignore', 'replace'118and 'backslashreplace' as well as any other name registered with119codecs.register_error that can handle ValueErrors.120[clinic start generated code]*/121122static PyObject *123_codecs_encode_impl(PyObject *module, PyObject *obj, const char *encoding,124const char *errors)125/*[clinic end generated code: output=385148eb9a067c86 input=cd5b685040ff61f0]*/126{127if (encoding == NULL)128encoding = PyUnicode_GetDefaultEncoding();129130/* Encode via the codec registry */131return PyCodec_Encode(obj, encoding, errors);132}133134/*[clinic input]135_codecs.decode136obj: object137encoding: str(c_default="NULL") = "utf-8"138errors: str(c_default="NULL") = "strict"139140Decodes obj using the codec registered for encoding.141142Default encoding is 'utf-8'. errors may be given to set a143different error handling scheme. Default is 'strict' meaning that encoding144errors raise a ValueError. Other possible values are 'ignore', 'replace'145and 'backslashreplace' as well as any other name registered with146codecs.register_error that can handle ValueErrors.147[clinic start generated code]*/148149static PyObject *150_codecs_decode_impl(PyObject *module, PyObject *obj, const char *encoding,151const char *errors)152/*[clinic end generated code: output=679882417dc3a0bd input=7702c0cc2fa1add6]*/153{154if (encoding == NULL)155encoding = PyUnicode_GetDefaultEncoding();156157/* Decode via the codec registry */158return PyCodec_Decode(obj, encoding, errors);159}160161/* --- Helpers ------------------------------------------------------------ */162163static164PyObject *codec_tuple(PyObject *decoded,165Py_ssize_t len)166{167if (decoded == NULL)168return NULL;169return Py_BuildValue("Nn", decoded, len);170}171172/* --- String codecs ------------------------------------------------------ */173/*[clinic input]174_codecs.escape_decode175data: Py_buffer(accept={str, buffer})176errors: str(accept={str, NoneType}) = None177/178[clinic start generated code]*/179180static PyObject *181_codecs_escape_decode_impl(PyObject *module, Py_buffer *data,182const char *errors)183/*[clinic end generated code: output=505200ba8056979a input=77298a561c90bd82]*/184{185PyObject *decoded = PyBytes_DecodeEscape(data->buf, data->len,186errors, 0, NULL);187return codec_tuple(decoded, data->len);188}189190/*[clinic input]191_codecs.escape_encode192data: object(subclass_of='&PyBytes_Type')193errors: str(accept={str, NoneType}) = None194/195[clinic start generated code]*/196197static PyObject *198_codecs_escape_encode_impl(PyObject *module, PyObject *data,199const char *errors)200/*[clinic end generated code: output=4af1d477834bab34 input=8f4b144799a94245]*/201{202Py_ssize_t size;203Py_ssize_t newsize;204PyObject *v;205206size = PyBytes_GET_SIZE(data);207if (size > PY_SSIZE_T_MAX / 4) {208PyErr_SetString(PyExc_OverflowError,209"string is too large to encode");210return NULL;211}212newsize = 4*size;213v = PyBytes_FromStringAndSize(NULL, newsize);214215if (v == NULL) {216return NULL;217}218else {219Py_ssize_t i;220char c;221char *p = PyBytes_AS_STRING(v);222223for (i = 0; i < size; i++) {224/* There's at least enough room for a hex escape */225assert(newsize - (p - PyBytes_AS_STRING(v)) >= 4);226c = PyBytes_AS_STRING(data)[i];227if (c == '\'' || c == '\\')228*p++ = '\\', *p++ = c;229else if (c == '\t')230*p++ = '\\', *p++ = 't';231else if (c == '\n')232*p++ = '\\', *p++ = 'n';233else if (c == '\r')234*p++ = '\\', *p++ = 'r';235else if (c < ' ' || c >= 0x7f) {236*p++ = '\\';237*p++ = 'x';238*p++ = Py_hexdigits[(c & 0xf0) >> 4];239*p++ = Py_hexdigits[c & 0xf];240}241else242*p++ = c;243}244*p = '\0';245if (_PyBytes_Resize(&v, (p - PyBytes_AS_STRING(v)))) {246return NULL;247}248}249250return codec_tuple(v, size);251}252253/* --- Decoder ------------------------------------------------------------ */254/*[clinic input]255_codecs.utf_7_decode256data: Py_buffer257errors: str(accept={str, NoneType}) = None258final: bool = False259/260[clinic start generated code]*/261262static PyObject *263_codecs_utf_7_decode_impl(PyObject *module, Py_buffer *data,264const char *errors, int final)265/*[clinic end generated code: output=0cd3a944a32a4089 input=dbf8c8998102dc7d]*/266{267Py_ssize_t consumed = data->len;268PyObject *decoded = PyUnicode_DecodeUTF7Stateful(data->buf, data->len,269errors,270final ? NULL : &consumed);271return codec_tuple(decoded, consumed);272}273274/*[clinic input]275_codecs.utf_8_decode276data: Py_buffer277errors: str(accept={str, NoneType}) = None278final: bool = False279/280[clinic start generated code]*/281282static PyObject *283_codecs_utf_8_decode_impl(PyObject *module, Py_buffer *data,284const char *errors, int final)285/*[clinic end generated code: output=10f74dec8d9bb8bf input=ca06bc8a9c970e25]*/286{287Py_ssize_t consumed = data->len;288PyObject *decoded = PyUnicode_DecodeUTF8Stateful(data->buf, data->len,289errors,290final ? NULL : &consumed);291return codec_tuple(decoded, consumed);292}293294/*[clinic input]295_codecs.utf_16_decode296data: Py_buffer297errors: str(accept={str, NoneType}) = None298final: bool = False299/300[clinic start generated code]*/301302static PyObject *303_codecs_utf_16_decode_impl(PyObject *module, Py_buffer *data,304const char *errors, int final)305/*[clinic end generated code: output=783b442abcbcc2d0 input=5b0f52071ba6cadc]*/306{307int byteorder = 0;308/* This is overwritten unless final is true. */309Py_ssize_t consumed = data->len;310PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,311errors, &byteorder,312final ? NULL : &consumed);313return codec_tuple(decoded, consumed);314}315316/*[clinic input]317_codecs.utf_16_le_decode318data: Py_buffer319errors: str(accept={str, NoneType}) = None320final: bool = False321/322[clinic start generated code]*/323324static PyObject *325_codecs_utf_16_le_decode_impl(PyObject *module, Py_buffer *data,326const char *errors, int final)327/*[clinic end generated code: output=899b9e6364379dcd input=115bd8c7b783d0bf]*/328{329int byteorder = -1;330/* This is overwritten unless final is true. */331Py_ssize_t consumed = data->len;332PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,333errors, &byteorder,334final ? NULL : &consumed);335return codec_tuple(decoded, consumed);336}337338/*[clinic input]339_codecs.utf_16_be_decode340data: Py_buffer341errors: str(accept={str, NoneType}) = None342final: bool = False343/344[clinic start generated code]*/345346static PyObject *347_codecs_utf_16_be_decode_impl(PyObject *module, Py_buffer *data,348const char *errors, int final)349/*[clinic end generated code: output=49f6465ea07669c8 input=63131422b01f9cb4]*/350{351int byteorder = 1;352/* This is overwritten unless final is true. */353Py_ssize_t consumed = data->len;354PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,355errors, &byteorder,356final ? NULL : &consumed);357return codec_tuple(decoded, consumed);358}359360/* This non-standard version also provides access to the byteorder361parameter of the builtin UTF-16 codec.362363It returns a tuple (unicode, bytesread, byteorder) with byteorder364being the value in effect at the end of data.365366*/367/*[clinic input]368_codecs.utf_16_ex_decode369data: Py_buffer370errors: str(accept={str, NoneType}) = None371byteorder: int = 0372final: bool = False373/374[clinic start generated code]*/375376static PyObject *377_codecs_utf_16_ex_decode_impl(PyObject *module, Py_buffer *data,378const char *errors, int byteorder, int final)379/*[clinic end generated code: output=0f385f251ecc1988 input=f368a51cf384bf4c]*/380{381/* This is overwritten unless final is true. */382Py_ssize_t consumed = data->len;383384PyObject *decoded = PyUnicode_DecodeUTF16Stateful(data->buf, data->len,385errors, &byteorder,386final ? NULL : &consumed);387if (decoded == NULL)388return NULL;389return Py_BuildValue("Nni", decoded, consumed, byteorder);390}391392/*[clinic input]393_codecs.utf_32_decode394data: Py_buffer395errors: str(accept={str, NoneType}) = None396final: bool = False397/398[clinic start generated code]*/399400static PyObject *401_codecs_utf_32_decode_impl(PyObject *module, Py_buffer *data,402const char *errors, int final)403/*[clinic end generated code: output=2fc961807f7b145f input=fcdf3658c5e9b5f3]*/404{405int byteorder = 0;406/* This is overwritten unless final is true. */407Py_ssize_t consumed = data->len;408PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,409errors, &byteorder,410final ? NULL : &consumed);411return codec_tuple(decoded, consumed);412}413414/*[clinic input]415_codecs.utf_32_le_decode416data: Py_buffer417errors: str(accept={str, NoneType}) = None418final: bool = False419/420[clinic start generated code]*/421422static PyObject *423_codecs_utf_32_le_decode_impl(PyObject *module, Py_buffer *data,424const char *errors, int final)425/*[clinic end generated code: output=ec8f46b67a94f3e6 input=12220556e885f817]*/426{427int byteorder = -1;428/* This is overwritten unless final is true. */429Py_ssize_t consumed = data->len;430PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,431errors, &byteorder,432final ? NULL : &consumed);433return codec_tuple(decoded, consumed);434}435436/*[clinic input]437_codecs.utf_32_be_decode438data: Py_buffer439errors: str(accept={str, NoneType}) = None440final: bool = False441/442[clinic start generated code]*/443444static PyObject *445_codecs_utf_32_be_decode_impl(PyObject *module, Py_buffer *data,446const char *errors, int final)447/*[clinic end generated code: output=ff82bae862c92c4e input=2bc669b4781598db]*/448{449int byteorder = 1;450/* This is overwritten unless final is true. */451Py_ssize_t consumed = data->len;452PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,453errors, &byteorder,454final ? NULL : &consumed);455return codec_tuple(decoded, consumed);456}457458/* This non-standard version also provides access to the byteorder459parameter of the builtin UTF-32 codec.460461It returns a tuple (unicode, bytesread, byteorder) with byteorder462being the value in effect at the end of data.463464*/465/*[clinic input]466_codecs.utf_32_ex_decode467data: Py_buffer468errors: str(accept={str, NoneType}) = None469byteorder: int = 0470final: bool = False471/472[clinic start generated code]*/473474static PyObject *475_codecs_utf_32_ex_decode_impl(PyObject *module, Py_buffer *data,476const char *errors, int byteorder, int final)477/*[clinic end generated code: output=6bfb177dceaf4848 input=4a2323d0013620df]*/478{479Py_ssize_t consumed = data->len;480PyObject *decoded = PyUnicode_DecodeUTF32Stateful(data->buf, data->len,481errors, &byteorder,482final ? NULL : &consumed);483if (decoded == NULL)484return NULL;485return Py_BuildValue("Nni", decoded, consumed, byteorder);486}487488/*[clinic input]489_codecs.unicode_escape_decode490data: Py_buffer(accept={str, buffer})491errors: str(accept={str, NoneType}) = None492final: bool = True493/494[clinic start generated code]*/495496static PyObject *497_codecs_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,498const char *errors, int final)499/*[clinic end generated code: output=b284f97b12c635ee input=15019f081ffe272b]*/500{501Py_ssize_t consumed = data->len;502PyObject *decoded = _PyUnicode_DecodeUnicodeEscapeStateful(data->buf, data->len,503errors,504final ? NULL : &consumed);505return codec_tuple(decoded, consumed);506}507508/*[clinic input]509_codecs.raw_unicode_escape_decode510data: Py_buffer(accept={str, buffer})511errors: str(accept={str, NoneType}) = None512final: bool = True513/514[clinic start generated code]*/515516static PyObject *517_codecs_raw_unicode_escape_decode_impl(PyObject *module, Py_buffer *data,518const char *errors, int final)519/*[clinic end generated code: output=11dbd96301e2879e input=b93f823aa8c343ad]*/520{521Py_ssize_t consumed = data->len;522PyObject *decoded = _PyUnicode_DecodeRawUnicodeEscapeStateful(data->buf, data->len,523errors,524final ? NULL : &consumed);525return codec_tuple(decoded, consumed);526}527528/*[clinic input]529_codecs.latin_1_decode530data: Py_buffer531errors: str(accept={str, NoneType}) = None532/533[clinic start generated code]*/534535static PyObject *536_codecs_latin_1_decode_impl(PyObject *module, Py_buffer *data,537const char *errors)538/*[clinic end generated code: output=07f3dfa3f72c7d8f input=76ca58fd6dcd08c7]*/539{540PyObject *decoded = PyUnicode_DecodeLatin1(data->buf, data->len, errors);541return codec_tuple(decoded, data->len);542}543544/*[clinic input]545_codecs.ascii_decode546data: Py_buffer547errors: str(accept={str, NoneType}) = None548/549[clinic start generated code]*/550551static PyObject *552_codecs_ascii_decode_impl(PyObject *module, Py_buffer *data,553const char *errors)554/*[clinic end generated code: output=2627d72058d42429 input=e428a267a04b4481]*/555{556PyObject *decoded = PyUnicode_DecodeASCII(data->buf, data->len, errors);557return codec_tuple(decoded, data->len);558}559560/*[clinic input]561_codecs.charmap_decode562data: Py_buffer563errors: str(accept={str, NoneType}) = None564mapping: object = None565/566[clinic start generated code]*/567568static PyObject *569_codecs_charmap_decode_impl(PyObject *module, Py_buffer *data,570const char *errors, PyObject *mapping)571/*[clinic end generated code: output=2c335b09778cf895 input=15b69df43458eb40]*/572{573PyObject *decoded;574575if (mapping == Py_None)576mapping = NULL;577578decoded = PyUnicode_DecodeCharmap(data->buf, data->len, mapping, errors);579return codec_tuple(decoded, data->len);580}581582#ifdef MS_WINDOWS583584/*[clinic input]585_codecs.mbcs_decode586data: Py_buffer587errors: str(accept={str, NoneType}) = None588final: bool = False589/590[clinic start generated code]*/591592static PyObject *593_codecs_mbcs_decode_impl(PyObject *module, Py_buffer *data,594const char *errors, int final)595/*[clinic end generated code: output=39b65b8598938c4b input=f144ad1ed6d8f5a6]*/596{597Py_ssize_t consumed = data->len;598PyObject *decoded = PyUnicode_DecodeMBCSStateful(data->buf, data->len,599errors, final ? NULL : &consumed);600return codec_tuple(decoded, consumed);601}602603/*[clinic input]604_codecs.oem_decode605data: Py_buffer606errors: str(accept={str, NoneType}) = None607final: bool = False608/609[clinic start generated code]*/610611static PyObject *612_codecs_oem_decode_impl(PyObject *module, Py_buffer *data,613const char *errors, int final)614/*[clinic end generated code: output=da1617612f3fcad8 input=629bf87376d211b4]*/615{616Py_ssize_t consumed = data->len;617PyObject *decoded = PyUnicode_DecodeCodePageStateful(CP_OEMCP,618data->buf, data->len, errors, final ? NULL : &consumed);619return codec_tuple(decoded, consumed);620}621622/*[clinic input]623_codecs.code_page_decode624codepage: int625data: Py_buffer626errors: str(accept={str, NoneType}) = None627final: bool = False628/629[clinic start generated code]*/630631static PyObject *632_codecs_code_page_decode_impl(PyObject *module, int codepage,633Py_buffer *data, const char *errors, int final)634/*[clinic end generated code: output=53008ea967da3fff input=6a32589b0658c277]*/635{636Py_ssize_t consumed = data->len;637PyObject *decoded = PyUnicode_DecodeCodePageStateful(codepage,638data->buf, data->len,639errors,640final ? NULL : &consumed);641return codec_tuple(decoded, consumed);642}643644#endif /* MS_WINDOWS */645646/* --- Encoder ------------------------------------------------------------ */647648/*[clinic input]649_codecs.readbuffer_encode650data: Py_buffer(accept={str, buffer})651errors: str(accept={str, NoneType}) = None652/653[clinic start generated code]*/654655static PyObject *656_codecs_readbuffer_encode_impl(PyObject *module, Py_buffer *data,657const char *errors)658/*[clinic end generated code: output=c645ea7cdb3d6e86 input=aa10cfdf252455c5]*/659{660PyObject *result = PyBytes_FromStringAndSize(data->buf, data->len);661return codec_tuple(result, data->len);662}663664/*[clinic input]665_codecs.utf_7_encode666str: unicode667errors: str(accept={str, NoneType}) = None668/669[clinic start generated code]*/670671static PyObject *672_codecs_utf_7_encode_impl(PyObject *module, PyObject *str,673const char *errors)674/*[clinic end generated code: output=0feda21ffc921bc8 input=2546dbbb3fa53114]*/675{676return codec_tuple(_PyUnicode_EncodeUTF7(str, 0, 0, errors),677PyUnicode_GET_LENGTH(str));678}679680/*[clinic input]681_codecs.utf_8_encode682str: unicode683errors: str(accept={str, NoneType}) = None684/685[clinic start generated code]*/686687static PyObject *688_codecs_utf_8_encode_impl(PyObject *module, PyObject *str,689const char *errors)690/*[clinic end generated code: output=02bf47332b9c796c input=a3e71ae01c3f93f3]*/691{692return codec_tuple(_PyUnicode_AsUTF8String(str, errors),693PyUnicode_GET_LENGTH(str));694}695696/* This version provides access to the byteorder parameter of the697builtin UTF-16 codecs as optional third argument. It defaults to 0698which means: use the native byte order and prepend the data with a699BOM mark.700701*/702703/*[clinic input]704_codecs.utf_16_encode705str: unicode706errors: str(accept={str, NoneType}) = None707byteorder: int = 0708/709[clinic start generated code]*/710711static PyObject *712_codecs_utf_16_encode_impl(PyObject *module, PyObject *str,713const char *errors, int byteorder)714/*[clinic end generated code: output=c654e13efa2e64e4 input=68cdc2eb8338555d]*/715{716return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, byteorder),717PyUnicode_GET_LENGTH(str));718}719720/*[clinic input]721_codecs.utf_16_le_encode722str: unicode723errors: str(accept={str, NoneType}) = None724/725[clinic start generated code]*/726727static PyObject *728_codecs_utf_16_le_encode_impl(PyObject *module, PyObject *str,729const char *errors)730/*[clinic end generated code: output=431b01e55f2d4995 input=83d042706eed6798]*/731{732return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, -1),733PyUnicode_GET_LENGTH(str));734}735736/*[clinic input]737_codecs.utf_16_be_encode738str: unicode739errors: str(accept={str, NoneType}) = None740/741[clinic start generated code]*/742743static PyObject *744_codecs_utf_16_be_encode_impl(PyObject *module, PyObject *str,745const char *errors)746/*[clinic end generated code: output=96886a6fd54dcae3 input=6f1e9e623b03071b]*/747{748return codec_tuple(_PyUnicode_EncodeUTF16(str, errors, +1),749PyUnicode_GET_LENGTH(str));750}751752/* This version provides access to the byteorder parameter of the753builtin UTF-32 codecs as optional third argument. It defaults to 0754which means: use the native byte order and prepend the data with a755BOM mark.756757*/758759/*[clinic input]760_codecs.utf_32_encode761str: unicode762errors: str(accept={str, NoneType}) = None763byteorder: int = 0764/765[clinic start generated code]*/766767static PyObject *768_codecs_utf_32_encode_impl(PyObject *module, PyObject *str,769const char *errors, int byteorder)770/*[clinic end generated code: output=5c760da0c09a8b83 input=8ec4c64d983bc52b]*/771{772return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, byteorder),773PyUnicode_GET_LENGTH(str));774}775776/*[clinic input]777_codecs.utf_32_le_encode778str: unicode779errors: str(accept={str, NoneType}) = None780/781[clinic start generated code]*/782783static PyObject *784_codecs_utf_32_le_encode_impl(PyObject *module, PyObject *str,785const char *errors)786/*[clinic end generated code: output=b65cd176de8e36d6 input=f0918d41de3eb1b1]*/787{788return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, -1),789PyUnicode_GET_LENGTH(str));790}791792/*[clinic input]793_codecs.utf_32_be_encode794str: unicode795errors: str(accept={str, NoneType}) = None796/797[clinic start generated code]*/798799static PyObject *800_codecs_utf_32_be_encode_impl(PyObject *module, PyObject *str,801const char *errors)802/*[clinic end generated code: output=1d9e71a9358709e9 input=967a99a95748b557]*/803{804return codec_tuple(_PyUnicode_EncodeUTF32(str, errors, +1),805PyUnicode_GET_LENGTH(str));806}807808/*[clinic input]809_codecs.unicode_escape_encode810str: unicode811errors: str(accept={str, NoneType}) = None812/813[clinic start generated code]*/814815static PyObject *816_codecs_unicode_escape_encode_impl(PyObject *module, PyObject *str,817const char *errors)818/*[clinic end generated code: output=66271b30bc4f7a3c input=8c4de07597054e33]*/819{820return codec_tuple(PyUnicode_AsUnicodeEscapeString(str),821PyUnicode_GET_LENGTH(str));822}823824/*[clinic input]825_codecs.raw_unicode_escape_encode826str: unicode827errors: str(accept={str, NoneType}) = None828/829[clinic start generated code]*/830831static PyObject *832_codecs_raw_unicode_escape_encode_impl(PyObject *module, PyObject *str,833const char *errors)834/*[clinic end generated code: output=a66a806ed01c830a input=4aa6f280d78e4574]*/835{836return codec_tuple(PyUnicode_AsRawUnicodeEscapeString(str),837PyUnicode_GET_LENGTH(str));838}839840/*[clinic input]841_codecs.latin_1_encode842str: unicode843errors: str(accept={str, NoneType}) = None844/845[clinic start generated code]*/846847static PyObject *848_codecs_latin_1_encode_impl(PyObject *module, PyObject *str,849const char *errors)850/*[clinic end generated code: output=2c28c83a27884e08 input=ec3ef74bf85c5c5d]*/851{852return codec_tuple(_PyUnicode_AsLatin1String(str, errors),853PyUnicode_GET_LENGTH(str));854}855856/*[clinic input]857_codecs.ascii_encode858str: unicode859errors: str(accept={str, NoneType}) = None860/861[clinic start generated code]*/862863static PyObject *864_codecs_ascii_encode_impl(PyObject *module, PyObject *str,865const char *errors)866/*[clinic end generated code: output=b5e035182d33befc input=93e6e602838bd3de]*/867{868return codec_tuple(_PyUnicode_AsASCIIString(str, errors),869PyUnicode_GET_LENGTH(str));870}871872/*[clinic input]873_codecs.charmap_encode874str: unicode875errors: str(accept={str, NoneType}) = None876mapping: object = None877/878[clinic start generated code]*/879880static PyObject *881_codecs_charmap_encode_impl(PyObject *module, PyObject *str,882const char *errors, PyObject *mapping)883/*[clinic end generated code: output=047476f48495a9e9 input=2a98feae73dadce8]*/884{885if (mapping == Py_None)886mapping = NULL;887888return codec_tuple(_PyUnicode_EncodeCharmap(str, mapping, errors),889PyUnicode_GET_LENGTH(str));890}891892/*[clinic input]893_codecs.charmap_build894map: unicode895/896[clinic start generated code]*/897898static PyObject *899_codecs_charmap_build_impl(PyObject *module, PyObject *map)900/*[clinic end generated code: output=bb073c27031db9ac input=d91a91d1717dbc6d]*/901{902return PyUnicode_BuildEncodingMap(map);903}904905#ifdef MS_WINDOWS906907/*[clinic input]908_codecs.mbcs_encode909str: unicode910errors: str(accept={str, NoneType}) = None911/912[clinic start generated code]*/913914static PyObject *915_codecs_mbcs_encode_impl(PyObject *module, PyObject *str, const char *errors)916/*[clinic end generated code: output=76e2e170c966c080 input=2e932fc289ea5a5b]*/917{918return codec_tuple(PyUnicode_EncodeCodePage(CP_ACP, str, errors),919PyUnicode_GET_LENGTH(str));920}921922/*[clinic input]923_codecs.oem_encode924str: unicode925errors: str(accept={str, NoneType}) = None926/927[clinic start generated code]*/928929static PyObject *930_codecs_oem_encode_impl(PyObject *module, PyObject *str, const char *errors)931/*[clinic end generated code: output=65d5982c737de649 input=9eac86dc21eb14f2]*/932{933return codec_tuple(PyUnicode_EncodeCodePage(CP_OEMCP, str, errors),934PyUnicode_GET_LENGTH(str));935}936937/*[clinic input]938_codecs.code_page_encode939code_page: int940str: unicode941errors: str(accept={str, NoneType}) = None942/943[clinic start generated code]*/944945static PyObject *946_codecs_code_page_encode_impl(PyObject *module, int code_page, PyObject *str,947const char *errors)948/*[clinic end generated code: output=45673f6085657a9e input=7d18a33bc8cd0f94]*/949{950return codec_tuple(PyUnicode_EncodeCodePage(code_page, str, errors),951PyUnicode_GET_LENGTH(str));952}953954#endif /* MS_WINDOWS */955956/* --- Error handler registry --------------------------------------------- */957958/*[clinic input]959_codecs.register_error960errors: str961handler: object962/963964Register the specified error handler under the name errors.965966handler must be a callable object, that will be called with an exception967instance containing information about the location of the encoding/decoding968error and must return a (replacement, new position) tuple.969[clinic start generated code]*/970971static PyObject *972_codecs_register_error_impl(PyObject *module, const char *errors,973PyObject *handler)974/*[clinic end generated code: output=fa2f7d1879b3067d input=5e6709203c2e33fe]*/975{976if (PyCodec_RegisterError(errors, handler))977return NULL;978Py_RETURN_NONE;979}980981/*[clinic input]982_codecs.lookup_error983name: str984/985986lookup_error(errors) -> handler987988Return the error handler for the specified error handling name or raise a989LookupError, if no handler exists under this name.990[clinic start generated code]*/991992static PyObject *993_codecs_lookup_error_impl(PyObject *module, const char *name)994/*[clinic end generated code: output=087f05dc0c9a98cc input=4775dd65e6235aba]*/995{996return PyCodec_LookupError(name);997}998999/* --- Module API --------------------------------------------------------- */10001001static PyMethodDef _codecs_functions[] = {1002_CODECS_REGISTER_METHODDEF1003_CODECS_UNREGISTER_METHODDEF1004_CODECS_LOOKUP_METHODDEF1005_CODECS_ENCODE_METHODDEF1006_CODECS_DECODE_METHODDEF1007_CODECS_ESCAPE_ENCODE_METHODDEF1008_CODECS_ESCAPE_DECODE_METHODDEF1009_CODECS_UTF_8_ENCODE_METHODDEF1010_CODECS_UTF_8_DECODE_METHODDEF1011_CODECS_UTF_7_ENCODE_METHODDEF1012_CODECS_UTF_7_DECODE_METHODDEF1013_CODECS_UTF_16_ENCODE_METHODDEF1014_CODECS_UTF_16_LE_ENCODE_METHODDEF1015_CODECS_UTF_16_BE_ENCODE_METHODDEF1016_CODECS_UTF_16_DECODE_METHODDEF1017_CODECS_UTF_16_LE_DECODE_METHODDEF1018_CODECS_UTF_16_BE_DECODE_METHODDEF1019_CODECS_UTF_16_EX_DECODE_METHODDEF1020_CODECS_UTF_32_ENCODE_METHODDEF1021_CODECS_UTF_32_LE_ENCODE_METHODDEF1022_CODECS_UTF_32_BE_ENCODE_METHODDEF1023_CODECS_UTF_32_DECODE_METHODDEF1024_CODECS_UTF_32_LE_DECODE_METHODDEF1025_CODECS_UTF_32_BE_DECODE_METHODDEF1026_CODECS_UTF_32_EX_DECODE_METHODDEF1027_CODECS_UNICODE_ESCAPE_ENCODE_METHODDEF1028_CODECS_UNICODE_ESCAPE_DECODE_METHODDEF1029_CODECS_RAW_UNICODE_ESCAPE_ENCODE_METHODDEF1030_CODECS_RAW_UNICODE_ESCAPE_DECODE_METHODDEF1031_CODECS_LATIN_1_ENCODE_METHODDEF1032_CODECS_LATIN_1_DECODE_METHODDEF1033_CODECS_ASCII_ENCODE_METHODDEF1034_CODECS_ASCII_DECODE_METHODDEF1035_CODECS_CHARMAP_ENCODE_METHODDEF1036_CODECS_CHARMAP_DECODE_METHODDEF1037_CODECS_CHARMAP_BUILD_METHODDEF1038_CODECS_READBUFFER_ENCODE_METHODDEF1039_CODECS_MBCS_ENCODE_METHODDEF1040_CODECS_MBCS_DECODE_METHODDEF1041_CODECS_OEM_ENCODE_METHODDEF1042_CODECS_OEM_DECODE_METHODDEF1043_CODECS_CODE_PAGE_ENCODE_METHODDEF1044_CODECS_CODE_PAGE_DECODE_METHODDEF1045_CODECS_REGISTER_ERROR_METHODDEF1046_CODECS_LOOKUP_ERROR_METHODDEF1047{NULL, NULL} /* sentinel */1048};10491050static PyModuleDef_Slot _codecs_slots[] = {1051{Py_mod_multiple_interpreters, Py_MOD_PER_INTERPRETER_GIL_SUPPORTED},1052{0, NULL}1053};10541055static struct PyModuleDef codecsmodule = {1056PyModuleDef_HEAD_INIT,1057"_codecs",1058NULL,10590,1060_codecs_functions,1061_codecs_slots,1062NULL,1063NULL,1064NULL1065};10661067PyMODINIT_FUNC1068PyInit__codecs(void)1069{1070return PyModuleDef_Init(&codecsmodule);1071}107210731074