Path: blob/master/venv/Lib/site-packages/lxml/includes/libxml/encoding.h
811 views
/*1* Summary: interface for the encoding conversion functions2* Description: interface for the encoding conversion functions needed for3* XML basic encoding and iconv() support.4*5* Related specs are6* rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies7* [ISO-10646] UTF-8 and UTF-16 in Annexes8* [ISO-8859-1] ISO Latin-1 characters codes.9* [UNICODE] The Unicode Consortium, "The Unicode Standard --10* Worldwide Character Encoding -- Version 1.0", Addison-11* Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is12* described in Unicode Technical Report #4.13* [US-ASCII] Coded Character Set--7-bit American Standard Code for14* Information Interchange, ANSI X3.4-1986.15*16* Copy: See Copyright for the status of this software.17*18* Author: Daniel Veillard19*/2021#ifndef __XML_CHAR_ENCODING_H__22#define __XML_CHAR_ENCODING_H__2324#include <libxml/xmlversion.h>2526#ifdef LIBXML_ICONV_ENABLED27#include <iconv.h>28#endif29#ifdef LIBXML_ICU_ENABLED30#include <unicode/ucnv.h>31#endif32#ifdef __cplusplus33extern "C" {34#endif3536/*37* xmlCharEncoding:38*39* Predefined values for some standard encodings.40* Libxml does not do beforehand translation on UTF8 and ISOLatinX.41* It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.42*43* Anything else would have to be translated to UTF8 before being44* given to the parser itself. The BOM for UTF16 and the encoding45* declaration are looked at and a converter is looked for at that46* point. If not found the parser stops here as asked by the XML REC. A47* converter can be registered by the user using xmlRegisterCharEncodingHandler48* but the current form doesn't allow stateful transcoding (a serious49* problem agreed !). If iconv has been found it will be used50* automatically and allow stateful transcoding, the simplest is then51* to be sure to enable iconv and to provide iconv libs for the encoding52* support needed.53*54* Note that the generic "UTF-16" is not a predefined value. Instead, only55* the specific UTF-16LE and UTF-16BE are present.56*/57typedef enum {58XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */59XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */60XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */61XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */62XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */63XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */64XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */65XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */66XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */67XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */68XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */69XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */70XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */71XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */72XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */73XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */74XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */75XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */76XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */77XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */78XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */79XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */80XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */81XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */82} xmlCharEncoding;8384/**85* xmlCharEncodingInputFunc:86* @out: a pointer to an array of bytes to store the UTF-8 result87* @outlen: the length of @out88* @in: a pointer to an array of chars in the original encoding89* @inlen: the length of @in90*91* Take a block of chars in the original encoding and try to convert92* it to an UTF-8 block of chars out.93*94* Returns the number of bytes written, -1 if lack of space, or -295* if the transcoding failed.96* The value of @inlen after return is the number of octets consumed97* if the return value is positive, else unpredictiable.98* The value of @outlen after return is the number of octets consumed.99*/100typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen,101const unsigned char *in, int *inlen);102103104/**105* xmlCharEncodingOutputFunc:106* @out: a pointer to an array of bytes to store the result107* @outlen: the length of @out108* @in: a pointer to an array of UTF-8 chars109* @inlen: the length of @in110*111* Take a block of UTF-8 chars in and try to convert it to another112* encoding.113* Note: a first call designed to produce heading info is called with114* in = NULL. If stateful this should also initialize the encoder state.115*116* Returns the number of bytes written, -1 if lack of space, or -2117* if the transcoding failed.118* The value of @inlen after return is the number of octets consumed119* if the return value is positive, else unpredictiable.120* The value of @outlen after return is the number of octets produced.121*/122typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,123const unsigned char *in, int *inlen);124125126/*127* Block defining the handlers for non UTF-8 encodings.128* If iconv is supported, there are two extra fields.129*/130#ifdef LIBXML_ICU_ENABLED131struct _uconv_t {132UConverter *uconv; /* for conversion between an encoding and UTF-16 */133UConverter *utf8; /* for conversion between UTF-8 and UTF-16 */134};135typedef struct _uconv_t uconv_t;136#endif137138typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;139typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;140struct _xmlCharEncodingHandler {141char *name;142xmlCharEncodingInputFunc input;143xmlCharEncodingOutputFunc output;144#ifdef LIBXML_ICONV_ENABLED145iconv_t iconv_in;146iconv_t iconv_out;147#endif /* LIBXML_ICONV_ENABLED */148#ifdef LIBXML_ICU_ENABLED149uconv_t *uconv_in;150uconv_t *uconv_out;151#endif /* LIBXML_ICU_ENABLED */152};153154#ifdef __cplusplus155}156#endif157#include <libxml/tree.h>158#ifdef __cplusplus159extern "C" {160#endif161162/*163* Interfaces for encoding handlers.164*/165XMLPUBFUN void XMLCALL166xmlInitCharEncodingHandlers (void);167XMLPUBFUN void XMLCALL168xmlCleanupCharEncodingHandlers (void);169XMLPUBFUN void XMLCALL170xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler);171XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL172xmlGetCharEncodingHandler (xmlCharEncoding enc);173XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL174xmlFindCharEncodingHandler (const char *name);175XMLPUBFUN xmlCharEncodingHandlerPtr XMLCALL176xmlNewCharEncodingHandler (const char *name,177xmlCharEncodingInputFunc input,178xmlCharEncodingOutputFunc output);179180/*181* Interfaces for encoding names and aliases.182*/183XMLPUBFUN int XMLCALL184xmlAddEncodingAlias (const char *name,185const char *alias);186XMLPUBFUN int XMLCALL187xmlDelEncodingAlias (const char *alias);188XMLPUBFUN const char * XMLCALL189xmlGetEncodingAlias (const char *alias);190XMLPUBFUN void XMLCALL191xmlCleanupEncodingAliases (void);192XMLPUBFUN xmlCharEncoding XMLCALL193xmlParseCharEncoding (const char *name);194XMLPUBFUN const char * XMLCALL195xmlGetCharEncodingName (xmlCharEncoding enc);196197/*198* Interfaces directly used by the parsers.199*/200XMLPUBFUN xmlCharEncoding XMLCALL201xmlDetectCharEncoding (const unsigned char *in,202int len);203204XMLPUBFUN int XMLCALL205xmlCharEncOutFunc (xmlCharEncodingHandler *handler,206xmlBufferPtr out,207xmlBufferPtr in);208209XMLPUBFUN int XMLCALL210xmlCharEncInFunc (xmlCharEncodingHandler *handler,211xmlBufferPtr out,212xmlBufferPtr in);213XMLPUBFUN int XMLCALL214xmlCharEncFirstLine (xmlCharEncodingHandler *handler,215xmlBufferPtr out,216xmlBufferPtr in);217XMLPUBFUN int XMLCALL218xmlCharEncCloseFunc (xmlCharEncodingHandler *handler);219220/*221* Export a few useful functions222*/223#ifdef LIBXML_OUTPUT_ENABLED224XMLPUBFUN int XMLCALL225UTF8Toisolat1 (unsigned char *out,226int *outlen,227const unsigned char *in,228int *inlen);229#endif /* LIBXML_OUTPUT_ENABLED */230XMLPUBFUN int XMLCALL231isolat1ToUTF8 (unsigned char *out,232int *outlen,233const unsigned char *in,234int *inlen);235#ifdef __cplusplus236}237#endif238239#endif /* __XML_CHAR_ENCODING_H__ */240241242