/*1* Summary: interface for the encoding conversion functions2* Description: interface for the encoding conversion functions needed for3* XML basic encoding and iconv() support.4*5* Related specs are6* rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies7* [ISO-10646] UTF-8 and UTF-16 in Annexes8* [ISO-8859-1] ISO Latin-1 characters codes.9* [UNICODE] The Unicode Consortium, "The Unicode Standard --10* Worldwide Character Encoding -- Version 1.0", Addison-11* Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is12* described in Unicode Technical Report #4.13* [US-ASCII] Coded Character Set--7-bit American Standard Code for14* Information Interchange, ANSI X3.4-1986.15*16* Copy: See Copyright for the status of this software.17*18* Author: Daniel Veillard19*/2021#ifndef __XML_CHAR_ENCODING_H__22#define __XML_CHAR_ENCODING_H__2324#include <libxml/xmlversion.h>2526#ifdef LIBXML_ICONV_ENABLED27#include <iconv.h>28#endif2930#ifdef __cplusplus31extern "C" {32#endif3334typedef enum {35XML_ENC_ERR_SUCCESS = 0,36XML_ENC_ERR_SPACE = -1,37XML_ENC_ERR_INPUT = -2,38XML_ENC_ERR_PARTIAL = -3,39XML_ENC_ERR_INTERNAL = -4,40XML_ENC_ERR_MEMORY = -541} xmlCharEncError;4243/*44* xmlCharEncoding:45*46* Predefined values for some standard encodings.47* Libxml does not do beforehand translation on UTF8 and ISOLatinX.48* It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default.49*50* Anything else would have to be translated to UTF8 before being51* given to the parser itself. The BOM for UTF16 and the encoding52* declaration are looked at and a converter is looked for at that53* point. If not found the parser stops here as asked by the XML REC. A54* converter can be registered by the user using xmlRegisterCharEncodingHandler55* but the current form doesn't allow stateful transcoding (a serious56* problem agreed !). If iconv has been found it will be used57* automatically and allow stateful transcoding, the simplest is then58* to be sure to enable iconv and to provide iconv libs for the encoding59* support needed.60*61* Note that the generic "UTF-16" is not a predefined value. Instead, only62* the specific UTF-16LE and UTF-16BE are present.63*/64typedef enum {65XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */66XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */67XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */68XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */69XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */70XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */71XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */72XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */73XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */74XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */75XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */76XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */77XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */78XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */79XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */80XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */81XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */82XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */83XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */84XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */85XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */86XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */87XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */88XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */89} xmlCharEncoding;9091/**92* xmlCharEncodingInputFunc:93* @out: a pointer to an array of bytes to store the UTF-8 result94* @outlen: the length of @out95* @in: a pointer to an array of chars in the original encoding96* @inlen: the length of @in97*98* Take a block of chars in the original encoding and try to convert99* it to an UTF-8 block of chars out.100*101* Returns the number of bytes written, -1 if lack of space, or -2102* if the transcoding failed.103* The value of @inlen after return is the number of octets consumed104* if the return value is positive, else unpredictiable.105* The value of @outlen after return is the number of octets consumed.106*/107typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen,108const unsigned char *in, int *inlen);109110111/**112* xmlCharEncodingOutputFunc:113* @out: a pointer to an array of bytes to store the result114* @outlen: the length of @out115* @in: a pointer to an array of UTF-8 chars116* @inlen: the length of @in117*118* Take a block of UTF-8 chars in and try to convert it to another119* encoding.120* Note: a first call designed to produce heading info is called with121* in = NULL. If stateful this should also initialize the encoder state.122*123* Returns the number of bytes written, -1 if lack of space, or -2124* if the transcoding failed.125* The value of @inlen after return is the number of octets consumed126* if the return value is positive, else unpredictiable.127* The value of @outlen after return is the number of octets produced.128*/129typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen,130const unsigned char *in, int *inlen);131132133/*134* Block defining the handlers for non UTF-8 encodings.135* If iconv is supported, there are two extra fields.136*/137typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;138typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;139struct _xmlCharEncodingHandler {140char *name;141xmlCharEncodingInputFunc input;142xmlCharEncodingOutputFunc output;143#ifdef LIBXML_ICONV_ENABLED144iconv_t iconv_in;145iconv_t iconv_out;146#endif /* LIBXML_ICONV_ENABLED */147#ifdef LIBXML_ICU_ENABLED148struct _uconv_t *uconv_in;149struct _uconv_t *uconv_out;150#endif /* LIBXML_ICU_ENABLED */151};152153/*154* Interfaces for encoding handlers.155*/156XML_DEPRECATED157XMLPUBFUN void158xmlInitCharEncodingHandlers (void);159XML_DEPRECATED160XMLPUBFUN void161xmlCleanupCharEncodingHandlers (void);162XMLPUBFUN void163xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler);164XMLPUBFUN xmlCharEncodingHandlerPtr165xmlGetCharEncodingHandler (xmlCharEncoding enc);166XMLPUBFUN xmlCharEncodingHandlerPtr167xmlFindCharEncodingHandler (const char *name);168XMLPUBFUN xmlCharEncodingHandlerPtr169xmlNewCharEncodingHandler (const char *name,170xmlCharEncodingInputFunc input,171xmlCharEncodingOutputFunc output);172173/*174* Interfaces for encoding names and aliases.175*/176XMLPUBFUN int177xmlAddEncodingAlias (const char *name,178const char *alias);179XMLPUBFUN int180xmlDelEncodingAlias (const char *alias);181XMLPUBFUN const char *182xmlGetEncodingAlias (const char *alias);183XMLPUBFUN void184xmlCleanupEncodingAliases (void);185XMLPUBFUN xmlCharEncoding186xmlParseCharEncoding (const char *name);187XMLPUBFUN const char *188xmlGetCharEncodingName (xmlCharEncoding enc);189190/*191* Interfaces directly used by the parsers.192*/193XMLPUBFUN xmlCharEncoding194xmlDetectCharEncoding (const unsigned char *in,195int len);196197struct _xmlBuffer;198XMLPUBFUN int199xmlCharEncOutFunc (xmlCharEncodingHandler *handler,200struct _xmlBuffer *out,201struct _xmlBuffer *in);202203XMLPUBFUN int204xmlCharEncInFunc (xmlCharEncodingHandler *handler,205struct _xmlBuffer *out,206struct _xmlBuffer *in);207XML_DEPRECATED208XMLPUBFUN int209xmlCharEncFirstLine (xmlCharEncodingHandler *handler,210struct _xmlBuffer *out,211struct _xmlBuffer *in);212XMLPUBFUN int213xmlCharEncCloseFunc (xmlCharEncodingHandler *handler);214215/*216* Export a few useful functions217*/218#ifdef LIBXML_OUTPUT_ENABLED219XMLPUBFUN int220UTF8Toisolat1 (unsigned char *out,221int *outlen,222const unsigned char *in,223int *inlen);224#endif /* LIBXML_OUTPUT_ENABLED */225XMLPUBFUN int226isolat1ToUTF8 (unsigned char *out,227int *outlen,228const unsigned char *in,229int *inlen);230#ifdef __cplusplus231}232#endif233234#endif /* __XML_CHAR_ENCODING_H__ */235236237