Path: blob/master/venv/Lib/site-packages/lxml/includes/libxml/parserInternals.h
811 views
/*1* Summary: internals routines and limits exported by the parser.2* Description: this module exports a number of internal parsing routines3* they are not really all intended for applications but4* can prove useful doing low level processing.5*6* Copy: See Copyright for the status of this software.7*8* Author: Daniel Veillard9*/1011#ifndef __XML_PARSER_INTERNALS_H__12#define __XML_PARSER_INTERNALS_H__1314#include <libxml/xmlversion.h>15#include <libxml/parser.h>16#include <libxml/HTMLparser.h>17#include <libxml/chvalid.h>1819#ifdef __cplusplus20extern "C" {21#endif2223/**24* xmlParserMaxDepth:25*26* arbitrary depth limit for the XML documents that we allow to27* process. This is not a limitation of the parser but a safety28* boundary feature, use XML_PARSE_HUGE option to override it.29*/30XMLPUBVAR unsigned int xmlParserMaxDepth;3132/**33* XML_MAX_TEXT_LENGTH:34*35* Maximum size allowed for a single text node when building a tree.36* This is not a limitation of the parser but a safety boundary feature,37* use XML_PARSE_HUGE option to override it.38* Introduced in 2.9.039*/40#define XML_MAX_TEXT_LENGTH 100000004142/**43* XML_MAX_NAME_LENGTH:44*45* Maximum size allowed for a markup identitier46* This is not a limitation of the parser but a safety boundary feature,47* use XML_PARSE_HUGE option to override it.48* Note that with the use of parsing dictionaries overriding the limit49* may result in more runtime memory usage in face of "unfriendly' content50* Introduced in 2.9.051*/52#define XML_MAX_NAME_LENGTH 500005354/**55* XML_MAX_DICTIONARY_LIMIT:56*57* Maximum size allowed by the parser for a dictionary by default58* This is not a limitation of the parser but a safety boundary feature,59* use XML_PARSE_HUGE option to override it.60* Introduced in 2.9.061*/62#define XML_MAX_DICTIONARY_LIMIT 100000006364/**65* XML_MAX_LOOKUP_LIMIT:66*67* Maximum size allowed by the parser for ahead lookup68* This is an upper boundary enforced by the parser to avoid bad69* behaviour on "unfriendly' content70* Introduced in 2.9.071*/72#define XML_MAX_LOOKUP_LIMIT 100000007374/**75* XML_MAX_NAMELEN:76*77* Identifiers can be longer, but this will be more costly78* at runtime.79*/80#define XML_MAX_NAMELEN 1008182/**83* INPUT_CHUNK:84*85* The parser tries to always have that amount of input ready.86* One of the point is providing context when reporting errors.87*/88#define INPUT_CHUNK 2508990/************************************************************************91* *92* UNICODE version of the macros. *93* *94************************************************************************/95/**96* IS_BYTE_CHAR:97* @c: an byte value (int)98*99* Macro to check the following production in the XML spec:100*101* [2] Char ::= #x9 | #xA | #xD | [#x20...]102* any byte character in the accepted range103*/104#define IS_BYTE_CHAR(c) xmlIsChar_ch(c)105106/**107* IS_CHAR:108* @c: an UNICODE value (int)109*110* Macro to check the following production in the XML spec:111*112* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]113* | [#x10000-#x10FFFF]114* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.115*/116#define IS_CHAR(c) xmlIsCharQ(c)117118/**119* IS_CHAR_CH:120* @c: an xmlChar (usually an unsigned char)121*122* Behaves like IS_CHAR on single-byte value123*/124#define IS_CHAR_CH(c) xmlIsChar_ch(c)125126/**127* IS_BLANK:128* @c: an UNICODE value (int)129*130* Macro to check the following production in the XML spec:131*132* [3] S ::= (#x20 | #x9 | #xD | #xA)+133*/134#define IS_BLANK(c) xmlIsBlankQ(c)135136/**137* IS_BLANK_CH:138* @c: an xmlChar value (normally unsigned char)139*140* Behaviour same as IS_BLANK141*/142#define IS_BLANK_CH(c) xmlIsBlank_ch(c)143144/**145* IS_BASECHAR:146* @c: an UNICODE value (int)147*148* Macro to check the following production in the XML spec:149*150* [85] BaseChar ::= ... long list see REC ...151*/152#define IS_BASECHAR(c) xmlIsBaseCharQ(c)153154/**155* IS_DIGIT:156* @c: an UNICODE value (int)157*158* Macro to check the following production in the XML spec:159*160* [88] Digit ::= ... long list see REC ...161*/162#define IS_DIGIT(c) xmlIsDigitQ(c)163164/**165* IS_DIGIT_CH:166* @c: an xmlChar value (usually an unsigned char)167*168* Behaves like IS_DIGIT but with a single byte argument169*/170#define IS_DIGIT_CH(c) xmlIsDigit_ch(c)171172/**173* IS_COMBINING:174* @c: an UNICODE value (int)175*176* Macro to check the following production in the XML spec:177*178* [87] CombiningChar ::= ... long list see REC ...179*/180#define IS_COMBINING(c) xmlIsCombiningQ(c)181182/**183* IS_COMBINING_CH:184* @c: an xmlChar (usually an unsigned char)185*186* Always false (all combining chars > 0xff)187*/188#define IS_COMBINING_CH(c) 0189190/**191* IS_EXTENDER:192* @c: an UNICODE value (int)193*194* Macro to check the following production in the XML spec:195*196*197* [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |198* #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |199* [#x309D-#x309E] | [#x30FC-#x30FE]200*/201#define IS_EXTENDER(c) xmlIsExtenderQ(c)202203/**204* IS_EXTENDER_CH:205* @c: an xmlChar value (usually an unsigned char)206*207* Behaves like IS_EXTENDER but with a single-byte argument208*/209#define IS_EXTENDER_CH(c) xmlIsExtender_ch(c)210211/**212* IS_IDEOGRAPHIC:213* @c: an UNICODE value (int)214*215* Macro to check the following production in the XML spec:216*217*218* [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]219*/220#define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c)221222/**223* IS_LETTER:224* @c: an UNICODE value (int)225*226* Macro to check the following production in the XML spec:227*228*229* [84] Letter ::= BaseChar | Ideographic230*/231#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))232233/**234* IS_LETTER_CH:235* @c: an xmlChar value (normally unsigned char)236*237* Macro behaves like IS_LETTER, but only check base chars238*239*/240#define IS_LETTER_CH(c) xmlIsBaseChar_ch(c)241242/**243* IS_ASCII_LETTER:244* @c: an xmlChar value245*246* Macro to check [a-zA-Z]247*248*/249#define IS_ASCII_LETTER(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \250((0x61 <= (c)) && ((c) <= 0x7a)))251252/**253* IS_ASCII_DIGIT:254* @c: an xmlChar value255*256* Macro to check [0-9]257*258*/259#define IS_ASCII_DIGIT(c) ((0x30 <= (c)) && ((c) <= 0x39))260261/**262* IS_PUBIDCHAR:263* @c: an UNICODE value (int)264*265* Macro to check the following production in the XML spec:266*267*268* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]269*/270#define IS_PUBIDCHAR(c) xmlIsPubidCharQ(c)271272/**273* IS_PUBIDCHAR_CH:274* @c: an xmlChar value (normally unsigned char)275*276* Same as IS_PUBIDCHAR but for single-byte value277*/278#define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c)279280/**281* SKIP_EOL:282* @p: and UTF8 string pointer283*284* Skips the end of line chars.285*/286#define SKIP_EOL(p) \287if (*(p) == 0x13) { p++ ; if (*(p) == 0x10) p++; } \288if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }289290/**291* MOVETO_ENDTAG:292* @p: and UTF8 string pointer293*294* Skips to the next '>' char.295*/296#define MOVETO_ENDTAG(p) \297while ((*p) && (*(p) != '>')) (p)++298299/**300* MOVETO_STARTTAG:301* @p: and UTF8 string pointer302*303* Skips to the next '<' char.304*/305#define MOVETO_STARTTAG(p) \306while ((*p) && (*(p) != '<')) (p)++307308/**309* Global variables used for predefined strings.310*/311XMLPUBVAR const xmlChar xmlStringText[];312XMLPUBVAR const xmlChar xmlStringTextNoenc[];313XMLPUBVAR const xmlChar xmlStringComment[];314315/*316* Function to finish the work of the macros where needed.317*/318XMLPUBFUN int XMLCALL xmlIsLetter (int c);319320/**321* Parser context.322*/323XMLPUBFUN xmlParserCtxtPtr XMLCALL324xmlCreateFileParserCtxt (const char *filename);325XMLPUBFUN xmlParserCtxtPtr XMLCALL326xmlCreateURLParserCtxt (const char *filename,327int options);328XMLPUBFUN xmlParserCtxtPtr XMLCALL329xmlCreateMemoryParserCtxt(const char *buffer,330int size);331XMLPUBFUN xmlParserCtxtPtr XMLCALL332xmlCreateEntityParserCtxt(const xmlChar *URL,333const xmlChar *ID,334const xmlChar *base);335XMLPUBFUN int XMLCALL336xmlSwitchEncoding (xmlParserCtxtPtr ctxt,337xmlCharEncoding enc);338XMLPUBFUN int XMLCALL339xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,340xmlCharEncodingHandlerPtr handler);341XMLPUBFUN int XMLCALL342xmlSwitchInputEncoding (xmlParserCtxtPtr ctxt,343xmlParserInputPtr input,344xmlCharEncodingHandlerPtr handler);345346#ifdef IN_LIBXML347/* internal error reporting */348XMLPUBFUN void XMLCALL349__xmlErrEncoding (xmlParserCtxtPtr ctxt,350xmlParserErrors xmlerr,351const char *msg,352const xmlChar * str1,353const xmlChar * str2) LIBXML_ATTR_FORMAT(3,0);354#endif355356/**357* Input Streams.358*/359XMLPUBFUN xmlParserInputPtr XMLCALL360xmlNewStringInputStream (xmlParserCtxtPtr ctxt,361const xmlChar *buffer);362XMLPUBFUN xmlParserInputPtr XMLCALL363xmlNewEntityInputStream (xmlParserCtxtPtr ctxt,364xmlEntityPtr entity);365XMLPUBFUN int XMLCALL366xmlPushInput (xmlParserCtxtPtr ctxt,367xmlParserInputPtr input);368XMLPUBFUN xmlChar XMLCALL369xmlPopInput (xmlParserCtxtPtr ctxt);370XMLPUBFUN void XMLCALL371xmlFreeInputStream (xmlParserInputPtr input);372XMLPUBFUN xmlParserInputPtr XMLCALL373xmlNewInputFromFile (xmlParserCtxtPtr ctxt,374const char *filename);375XMLPUBFUN xmlParserInputPtr XMLCALL376xmlNewInputStream (xmlParserCtxtPtr ctxt);377378/**379* Namespaces.380*/381XMLPUBFUN xmlChar * XMLCALL382xmlSplitQName (xmlParserCtxtPtr ctxt,383const xmlChar *name,384xmlChar **prefix);385386/**387* Generic production rules.388*/389XMLPUBFUN const xmlChar * XMLCALL390xmlParseName (xmlParserCtxtPtr ctxt);391XMLPUBFUN xmlChar * XMLCALL392xmlParseNmtoken (xmlParserCtxtPtr ctxt);393XMLPUBFUN xmlChar * XMLCALL394xmlParseEntityValue (xmlParserCtxtPtr ctxt,395xmlChar **orig);396XMLPUBFUN xmlChar * XMLCALL397xmlParseAttValue (xmlParserCtxtPtr ctxt);398XMLPUBFUN xmlChar * XMLCALL399xmlParseSystemLiteral (xmlParserCtxtPtr ctxt);400XMLPUBFUN xmlChar * XMLCALL401xmlParsePubidLiteral (xmlParserCtxtPtr ctxt);402XMLPUBFUN void XMLCALL403xmlParseCharData (xmlParserCtxtPtr ctxt,404int cdata);405XMLPUBFUN xmlChar * XMLCALL406xmlParseExternalID (xmlParserCtxtPtr ctxt,407xmlChar **publicID,408int strict);409XMLPUBFUN void XMLCALL410xmlParseComment (xmlParserCtxtPtr ctxt);411XMLPUBFUN const xmlChar * XMLCALL412xmlParsePITarget (xmlParserCtxtPtr ctxt);413XMLPUBFUN void XMLCALL414xmlParsePI (xmlParserCtxtPtr ctxt);415XMLPUBFUN void XMLCALL416xmlParseNotationDecl (xmlParserCtxtPtr ctxt);417XMLPUBFUN void XMLCALL418xmlParseEntityDecl (xmlParserCtxtPtr ctxt);419XMLPUBFUN int XMLCALL420xmlParseDefaultDecl (xmlParserCtxtPtr ctxt,421xmlChar **value);422XMLPUBFUN xmlEnumerationPtr XMLCALL423xmlParseNotationType (xmlParserCtxtPtr ctxt);424XMLPUBFUN xmlEnumerationPtr XMLCALL425xmlParseEnumerationType (xmlParserCtxtPtr ctxt);426XMLPUBFUN int XMLCALL427xmlParseEnumeratedType (xmlParserCtxtPtr ctxt,428xmlEnumerationPtr *tree);429XMLPUBFUN int XMLCALL430xmlParseAttributeType (xmlParserCtxtPtr ctxt,431xmlEnumerationPtr *tree);432XMLPUBFUN void XMLCALL433xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);434XMLPUBFUN xmlElementContentPtr XMLCALL435xmlParseElementMixedContentDecl436(xmlParserCtxtPtr ctxt,437int inputchk);438XMLPUBFUN xmlElementContentPtr XMLCALL439xmlParseElementChildrenContentDecl440(xmlParserCtxtPtr ctxt,441int inputchk);442XMLPUBFUN int XMLCALL443xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,444const xmlChar *name,445xmlElementContentPtr *result);446XMLPUBFUN int XMLCALL447xmlParseElementDecl (xmlParserCtxtPtr ctxt);448XMLPUBFUN void XMLCALL449xmlParseMarkupDecl (xmlParserCtxtPtr ctxt);450XMLPUBFUN int XMLCALL451xmlParseCharRef (xmlParserCtxtPtr ctxt);452XMLPUBFUN xmlEntityPtr XMLCALL453xmlParseEntityRef (xmlParserCtxtPtr ctxt);454XMLPUBFUN void XMLCALL455xmlParseReference (xmlParserCtxtPtr ctxt);456XMLPUBFUN void XMLCALL457xmlParsePEReference (xmlParserCtxtPtr ctxt);458XMLPUBFUN void XMLCALL459xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt);460#ifdef LIBXML_SAX1_ENABLED461XMLPUBFUN const xmlChar * XMLCALL462xmlParseAttribute (xmlParserCtxtPtr ctxt,463xmlChar **value);464XMLPUBFUN const xmlChar * XMLCALL465xmlParseStartTag (xmlParserCtxtPtr ctxt);466XMLPUBFUN void XMLCALL467xmlParseEndTag (xmlParserCtxtPtr ctxt);468#endif /* LIBXML_SAX1_ENABLED */469XMLPUBFUN void XMLCALL470xmlParseCDSect (xmlParserCtxtPtr ctxt);471XMLPUBFUN void XMLCALL472xmlParseContent (xmlParserCtxtPtr ctxt);473XMLPUBFUN void XMLCALL474xmlParseElement (xmlParserCtxtPtr ctxt);475XMLPUBFUN xmlChar * XMLCALL476xmlParseVersionNum (xmlParserCtxtPtr ctxt);477XMLPUBFUN xmlChar * XMLCALL478xmlParseVersionInfo (xmlParserCtxtPtr ctxt);479XMLPUBFUN xmlChar * XMLCALL480xmlParseEncName (xmlParserCtxtPtr ctxt);481XMLPUBFUN const xmlChar * XMLCALL482xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);483XMLPUBFUN int XMLCALL484xmlParseSDDecl (xmlParserCtxtPtr ctxt);485XMLPUBFUN void XMLCALL486xmlParseXMLDecl (xmlParserCtxtPtr ctxt);487XMLPUBFUN void XMLCALL488xmlParseTextDecl (xmlParserCtxtPtr ctxt);489XMLPUBFUN void XMLCALL490xmlParseMisc (xmlParserCtxtPtr ctxt);491XMLPUBFUN void XMLCALL492xmlParseExternalSubset (xmlParserCtxtPtr ctxt,493const xmlChar *ExternalID,494const xmlChar *SystemID);495/**496* XML_SUBSTITUTE_NONE:497*498* If no entities need to be substituted.499*/500#define XML_SUBSTITUTE_NONE 0501/**502* XML_SUBSTITUTE_REF:503*504* Whether general entities need to be substituted.505*/506#define XML_SUBSTITUTE_REF 1507/**508* XML_SUBSTITUTE_PEREF:509*510* Whether parameter entities need to be substituted.511*/512#define XML_SUBSTITUTE_PEREF 2513/**514* XML_SUBSTITUTE_BOTH:515*516* Both general and parameter entities need to be substituted.517*/518#define XML_SUBSTITUTE_BOTH 3519520XMLPUBFUN xmlChar * XMLCALL521xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,522const xmlChar *str,523int what,524xmlChar end,525xmlChar end2,526xmlChar end3);527XMLPUBFUN xmlChar * XMLCALL528xmlStringLenDecodeEntities (xmlParserCtxtPtr ctxt,529const xmlChar *str,530int len,531int what,532xmlChar end,533xmlChar end2,534xmlChar end3);535536/*537* Generated by MACROS on top of parser.c c.f. PUSH_AND_POP.538*/539XMLPUBFUN int XMLCALL nodePush (xmlParserCtxtPtr ctxt,540xmlNodePtr value);541XMLPUBFUN xmlNodePtr XMLCALL nodePop (xmlParserCtxtPtr ctxt);542XMLPUBFUN int XMLCALL inputPush (xmlParserCtxtPtr ctxt,543xmlParserInputPtr value);544XMLPUBFUN xmlParserInputPtr XMLCALL inputPop (xmlParserCtxtPtr ctxt);545XMLPUBFUN const xmlChar * XMLCALL namePop (xmlParserCtxtPtr ctxt);546XMLPUBFUN int XMLCALL namePush (xmlParserCtxtPtr ctxt,547const xmlChar *value);548549/*550* other commodities shared between parser.c and parserInternals.551*/552XMLPUBFUN int XMLCALL xmlSkipBlankChars (xmlParserCtxtPtr ctxt);553XMLPUBFUN int XMLCALL xmlStringCurrentChar (xmlParserCtxtPtr ctxt,554const xmlChar *cur,555int *len);556XMLPUBFUN void XMLCALL xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);557XMLPUBFUN int XMLCALL xmlCheckLanguageID (const xmlChar *lang);558559/*560* Really core function shared with HTML parser.561*/562XMLPUBFUN int XMLCALL xmlCurrentChar (xmlParserCtxtPtr ctxt,563int *len);564XMLPUBFUN int XMLCALL xmlCopyCharMultiByte (xmlChar *out,565int val);566XMLPUBFUN int XMLCALL xmlCopyChar (int len,567xmlChar *out,568int val);569XMLPUBFUN void XMLCALL xmlNextChar (xmlParserCtxtPtr ctxt);570XMLPUBFUN void XMLCALL xmlParserInputShrink (xmlParserInputPtr in);571572#ifdef LIBXML_HTML_ENABLED573/*574* Actually comes from the HTML parser but launched from the init stuff.575*/576XMLPUBFUN void XMLCALL htmlInitAutoClose (void);577XMLPUBFUN htmlParserCtxtPtr XMLCALL htmlCreateFileParserCtxt(const char *filename,578const char *encoding);579#endif580581/*582* Specific function to keep track of entities references583* and used by the XSLT debugger.584*/585#ifdef LIBXML_LEGACY_ENABLED586/**587* xmlEntityReferenceFunc:588* @ent: the entity589* @firstNode: the fist node in the chunk590* @lastNode: the last nod in the chunk591*592* Callback function used when one needs to be able to track back the593* provenance of a chunk of nodes inherited from an entity replacement.594*/595typedef void (*xmlEntityReferenceFunc) (xmlEntityPtr ent,596xmlNodePtr firstNode,597xmlNodePtr lastNode);598599XMLPUBFUN void XMLCALL xmlSetEntityReferenceFunc (xmlEntityReferenceFunc func);600601XMLPUBFUN xmlChar * XMLCALL602xmlParseQuotedString (xmlParserCtxtPtr ctxt);603XMLPUBFUN void XMLCALL604xmlParseNamespace (xmlParserCtxtPtr ctxt);605XMLPUBFUN xmlChar * XMLCALL606xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt);607XMLPUBFUN xmlChar * XMLCALL608xmlScanName (xmlParserCtxtPtr ctxt);609XMLPUBFUN xmlChar * XMLCALL610xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);611XMLPUBFUN void XMLCALL xmlParserHandleReference(xmlParserCtxtPtr ctxt);612XMLPUBFUN xmlChar * XMLCALL613xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,614xmlChar **prefix);615/**616* Entities617*/618XMLPUBFUN xmlChar * XMLCALL619xmlDecodeEntities (xmlParserCtxtPtr ctxt,620int len,621int what,622xmlChar end,623xmlChar end2,624xmlChar end3);625XMLPUBFUN void XMLCALL626xmlHandleEntity (xmlParserCtxtPtr ctxt,627xmlEntityPtr entity);628629#endif /* LIBXML_LEGACY_ENABLED */630631#ifdef IN_LIBXML632/*633* internal only634*/635XMLPUBFUN void XMLCALL636xmlErrMemory (xmlParserCtxtPtr ctxt,637const char *extra);638#endif639640#ifdef __cplusplus641}642#endif643#endif /* __XML_PARSER_INTERNALS_H__ */644645646