Path: blob/master/libs/xml2/include/libxml/parserInternals.h
4394 views
/*1* Summary: internals routines and limits exported by the parser.2* Description: this module exports a number of internal parsing routines3* they are not really all intended for applications but4* can prove useful doing low level processing.5*6* Copy: See Copyright for the status of this software.7*8* Author: Daniel Veillard9*/1011#ifndef __XML_PARSER_INTERNALS_H__12#define __XML_PARSER_INTERNALS_H__1314#include <libxml/xmlversion.h>15#include <libxml/parser.h>16#include <libxml/HTMLparser.h>17#include <libxml/chvalid.h>18#include <libxml/SAX2.h>1920#ifdef __cplusplus21extern "C" {22#endif2324/**25* xmlParserMaxDepth:26*27* arbitrary depth limit for the XML documents that we allow to28* process. This is not a limitation of the parser but a safety29* boundary feature, use XML_PARSE_HUGE option to override it.30*/31XMLPUBVAR unsigned int xmlParserMaxDepth;3233/**34* XML_MAX_TEXT_LENGTH:35*36* Maximum size allowed for a single text node when building a tree.37* This is not a limitation of the parser but a safety boundary feature,38* use XML_PARSE_HUGE option to override it.39* Introduced in 2.9.040*/41#define XML_MAX_TEXT_LENGTH 100000004243/**44* XML_MAX_HUGE_LENGTH:45*46* Maximum size allowed when XML_PARSE_HUGE is set.47*/48#define XML_MAX_HUGE_LENGTH 10000000004950/**51* XML_MAX_NAME_LENGTH:52*53* Maximum size allowed for a markup identifier.54* This is not a limitation of the parser but a safety boundary feature,55* use XML_PARSE_HUGE option to override it.56* Note that with the use of parsing dictionaries overriding the limit57* may result in more runtime memory usage in face of "unfriendly' content58* Introduced in 2.9.059*/60#define XML_MAX_NAME_LENGTH 500006162/**63* XML_MAX_DICTIONARY_LIMIT:64*65* Maximum size allowed by the parser for a dictionary by default66* This is not a limitation of the parser but a safety boundary feature,67* use XML_PARSE_HUGE option to override it.68* Introduced in 2.9.069*/70#define XML_MAX_DICTIONARY_LIMIT 100000007172/**73* XML_MAX_LOOKUP_LIMIT:74*75* Maximum size allowed by the parser for ahead lookup76* This is an upper boundary enforced by the parser to avoid bad77* behaviour on "unfriendly' content78* Introduced in 2.9.079*/80#define XML_MAX_LOOKUP_LIMIT 100000008182/**83* XML_MAX_NAMELEN:84*85* Identifiers can be longer, but this will be more costly86* at runtime.87*/88#define XML_MAX_NAMELEN 1008990/**91* INPUT_CHUNK:92*93* The parser tries to always have that amount of input ready.94* One of the point is providing context when reporting errors.95*/96#define INPUT_CHUNK 2509798/************************************************************************99* *100* UNICODE version of the macros. *101* *102************************************************************************/103/**104* IS_BYTE_CHAR:105* @c: an byte value (int)106*107* Macro to check the following production in the XML spec:108*109* [2] Char ::= #x9 | #xA | #xD | [#x20...]110* any byte character in the accepted range111*/112#define IS_BYTE_CHAR(c) xmlIsChar_ch(c)113114/**115* IS_CHAR:116* @c: an UNICODE value (int)117*118* Macro to check the following production in the XML spec:119*120* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]121* | [#x10000-#x10FFFF]122* any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.123*/124#define IS_CHAR(c) xmlIsCharQ(c)125126/**127* IS_CHAR_CH:128* @c: an xmlChar (usually an unsigned char)129*130* Behaves like IS_CHAR on single-byte value131*/132#define IS_CHAR_CH(c) xmlIsChar_ch(c)133134/**135* IS_BLANK:136* @c: an UNICODE value (int)137*138* Macro to check the following production in the XML spec:139*140* [3] S ::= (#x20 | #x9 | #xD | #xA)+141*/142#define IS_BLANK(c) xmlIsBlankQ(c)143144/**145* IS_BLANK_CH:146* @c: an xmlChar value (normally unsigned char)147*148* Behaviour same as IS_BLANK149*/150#define IS_BLANK_CH(c) xmlIsBlank_ch(c)151152/**153* IS_BASECHAR:154* @c: an UNICODE value (int)155*156* Macro to check the following production in the XML spec:157*158* [85] BaseChar ::= ... long list see REC ...159*/160#define IS_BASECHAR(c) xmlIsBaseCharQ(c)161162/**163* IS_DIGIT:164* @c: an UNICODE value (int)165*166* Macro to check the following production in the XML spec:167*168* [88] Digit ::= ... long list see REC ...169*/170#define IS_DIGIT(c) xmlIsDigitQ(c)171172/**173* IS_DIGIT_CH:174* @c: an xmlChar value (usually an unsigned char)175*176* Behaves like IS_DIGIT but with a single byte argument177*/178#define IS_DIGIT_CH(c) xmlIsDigit_ch(c)179180/**181* IS_COMBINING:182* @c: an UNICODE value (int)183*184* Macro to check the following production in the XML spec:185*186* [87] CombiningChar ::= ... long list see REC ...187*/188#define IS_COMBINING(c) xmlIsCombiningQ(c)189190/**191* IS_COMBINING_CH:192* @c: an xmlChar (usually an unsigned char)193*194* Always false (all combining chars > 0xff)195*/196#define IS_COMBINING_CH(c) 0197198/**199* IS_EXTENDER:200* @c: an UNICODE value (int)201*202* Macro to check the following production in the XML spec:203*204*205* [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |206* #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |207* [#x309D-#x309E] | [#x30FC-#x30FE]208*/209#define IS_EXTENDER(c) xmlIsExtenderQ(c)210211/**212* IS_EXTENDER_CH:213* @c: an xmlChar value (usually an unsigned char)214*215* Behaves like IS_EXTENDER but with a single-byte argument216*/217#define IS_EXTENDER_CH(c) xmlIsExtender_ch(c)218219/**220* IS_IDEOGRAPHIC:221* @c: an UNICODE value (int)222*223* Macro to check the following production in the XML spec:224*225*226* [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029]227*/228#define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c)229230/**231* IS_LETTER:232* @c: an UNICODE value (int)233*234* Macro to check the following production in the XML spec:235*236*237* [84] Letter ::= BaseChar | Ideographic238*/239#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))240241/**242* IS_LETTER_CH:243* @c: an xmlChar value (normally unsigned char)244*245* Macro behaves like IS_LETTER, but only check base chars246*247*/248#define IS_LETTER_CH(c) xmlIsBaseChar_ch(c)249250/**251* IS_ASCII_LETTER:252* @c: an xmlChar value253*254* Macro to check [a-zA-Z]255*256*/257#define IS_ASCII_LETTER(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \258((0x61 <= (c)) && ((c) <= 0x7a)))259260/**261* IS_ASCII_DIGIT:262* @c: an xmlChar value263*264* Macro to check [0-9]265*266*/267#define IS_ASCII_DIGIT(c) ((0x30 <= (c)) && ((c) <= 0x39))268269/**270* IS_PUBIDCHAR:271* @c: an UNICODE value (int)272*273* Macro to check the following production in the XML spec:274*275*276* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]277*/278#define IS_PUBIDCHAR(c) xmlIsPubidCharQ(c)279280/**281* IS_PUBIDCHAR_CH:282* @c: an xmlChar value (normally unsigned char)283*284* Same as IS_PUBIDCHAR but for single-byte value285*/286#define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c)287288/**289* Global variables used for predefined strings.290*/291XMLPUBVAR const xmlChar xmlStringText[];292XMLPUBVAR const xmlChar xmlStringTextNoenc[];293XMLPUBVAR const xmlChar xmlStringComment[];294295/*296* Function to finish the work of the macros where needed.297*/298XMLPUBFUN int xmlIsLetter (int c);299300/**301* Parser context.302*/303XMLPUBFUN xmlParserCtxtPtr304xmlCreateFileParserCtxt (const char *filename);305XMLPUBFUN xmlParserCtxtPtr306xmlCreateURLParserCtxt (const char *filename,307int options);308XMLPUBFUN xmlParserCtxtPtr309xmlCreateMemoryParserCtxt(const char *buffer,310int size);311XMLPUBFUN xmlParserCtxtPtr312xmlCreateEntityParserCtxt(const xmlChar *URL,313const xmlChar *ID,314const xmlChar *base);315XMLPUBFUN int316xmlSwitchEncoding (xmlParserCtxtPtr ctxt,317xmlCharEncoding enc);318XMLPUBFUN int319xmlSwitchToEncoding (xmlParserCtxtPtr ctxt,320xmlCharEncodingHandlerPtr handler);321XML_DEPRECATED322XMLPUBFUN int323xmlSwitchInputEncoding (xmlParserCtxtPtr ctxt,324xmlParserInputPtr input,325xmlCharEncodingHandlerPtr handler);326327/**328* Input Streams.329*/330XMLPUBFUN xmlParserInputPtr331xmlNewStringInputStream (xmlParserCtxtPtr ctxt,332const xmlChar *buffer);333XML_DEPRECATED334XMLPUBFUN xmlParserInputPtr335xmlNewEntityInputStream (xmlParserCtxtPtr ctxt,336xmlEntityPtr entity);337XMLPUBFUN int338xmlPushInput (xmlParserCtxtPtr ctxt,339xmlParserInputPtr input);340XMLPUBFUN xmlChar341xmlPopInput (xmlParserCtxtPtr ctxt);342XMLPUBFUN void343xmlFreeInputStream (xmlParserInputPtr input);344XMLPUBFUN xmlParserInputPtr345xmlNewInputFromFile (xmlParserCtxtPtr ctxt,346const char *filename);347XMLPUBFUN xmlParserInputPtr348xmlNewInputStream (xmlParserCtxtPtr ctxt);349350/**351* Namespaces.352*/353XMLPUBFUN xmlChar *354xmlSplitQName (xmlParserCtxtPtr ctxt,355const xmlChar *name,356xmlChar **prefix);357358/**359* Generic production rules.360*/361XML_DEPRECATED362XMLPUBFUN const xmlChar *363xmlParseName (xmlParserCtxtPtr ctxt);364XML_DEPRECATED365XMLPUBFUN xmlChar *366xmlParseNmtoken (xmlParserCtxtPtr ctxt);367XML_DEPRECATED368XMLPUBFUN xmlChar *369xmlParseEntityValue (xmlParserCtxtPtr ctxt,370xmlChar **orig);371XML_DEPRECATED372XMLPUBFUN xmlChar *373xmlParseAttValue (xmlParserCtxtPtr ctxt);374XML_DEPRECATED375XMLPUBFUN xmlChar *376xmlParseSystemLiteral (xmlParserCtxtPtr ctxt);377XML_DEPRECATED378XMLPUBFUN xmlChar *379xmlParsePubidLiteral (xmlParserCtxtPtr ctxt);380XML_DEPRECATED381XMLPUBFUN void382xmlParseCharData (xmlParserCtxtPtr ctxt,383int cdata);384XML_DEPRECATED385XMLPUBFUN xmlChar *386xmlParseExternalID (xmlParserCtxtPtr ctxt,387xmlChar **publicID,388int strict);389XML_DEPRECATED390XMLPUBFUN void391xmlParseComment (xmlParserCtxtPtr ctxt);392XML_DEPRECATED393XMLPUBFUN const xmlChar *394xmlParsePITarget (xmlParserCtxtPtr ctxt);395XML_DEPRECATED396XMLPUBFUN void397xmlParsePI (xmlParserCtxtPtr ctxt);398XML_DEPRECATED399XMLPUBFUN void400xmlParseNotationDecl (xmlParserCtxtPtr ctxt);401XML_DEPRECATED402XMLPUBFUN void403xmlParseEntityDecl (xmlParserCtxtPtr ctxt);404XML_DEPRECATED405XMLPUBFUN int406xmlParseDefaultDecl (xmlParserCtxtPtr ctxt,407xmlChar **value);408XML_DEPRECATED409XMLPUBFUN xmlEnumerationPtr410xmlParseNotationType (xmlParserCtxtPtr ctxt);411XML_DEPRECATED412XMLPUBFUN xmlEnumerationPtr413xmlParseEnumerationType (xmlParserCtxtPtr ctxt);414XML_DEPRECATED415XMLPUBFUN int416xmlParseEnumeratedType (xmlParserCtxtPtr ctxt,417xmlEnumerationPtr *tree);418XML_DEPRECATED419XMLPUBFUN int420xmlParseAttributeType (xmlParserCtxtPtr ctxt,421xmlEnumerationPtr *tree);422XML_DEPRECATED423XMLPUBFUN void424xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt);425XML_DEPRECATED426XMLPUBFUN xmlElementContentPtr427xmlParseElementMixedContentDecl428(xmlParserCtxtPtr ctxt,429int inputchk);430XML_DEPRECATED431XMLPUBFUN xmlElementContentPtr432xmlParseElementChildrenContentDecl433(xmlParserCtxtPtr ctxt,434int inputchk);435XML_DEPRECATED436XMLPUBFUN int437xmlParseElementContentDecl(xmlParserCtxtPtr ctxt,438const xmlChar *name,439xmlElementContentPtr *result);440XML_DEPRECATED441XMLPUBFUN int442xmlParseElementDecl (xmlParserCtxtPtr ctxt);443XML_DEPRECATED444XMLPUBFUN void445xmlParseMarkupDecl (xmlParserCtxtPtr ctxt);446XML_DEPRECATED447XMLPUBFUN int448xmlParseCharRef (xmlParserCtxtPtr ctxt);449XML_DEPRECATED450XMLPUBFUN xmlEntityPtr451xmlParseEntityRef (xmlParserCtxtPtr ctxt);452XML_DEPRECATED453XMLPUBFUN void454xmlParseReference (xmlParserCtxtPtr ctxt);455XML_DEPRECATED456XMLPUBFUN void457xmlParsePEReference (xmlParserCtxtPtr ctxt);458XML_DEPRECATED459XMLPUBFUN void460xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt);461#ifdef LIBXML_SAX1_ENABLED462XML_DEPRECATED463XMLPUBFUN const xmlChar *464xmlParseAttribute (xmlParserCtxtPtr ctxt,465xmlChar **value);466XML_DEPRECATED467XMLPUBFUN const xmlChar *468xmlParseStartTag (xmlParserCtxtPtr ctxt);469XML_DEPRECATED470XMLPUBFUN void471xmlParseEndTag (xmlParserCtxtPtr ctxt);472#endif /* LIBXML_SAX1_ENABLED */473XML_DEPRECATED474XMLPUBFUN void475xmlParseCDSect (xmlParserCtxtPtr ctxt);476XMLPUBFUN void477xmlParseContent (xmlParserCtxtPtr ctxt);478XML_DEPRECATED479XMLPUBFUN void480xmlParseElement (xmlParserCtxtPtr ctxt);481XML_DEPRECATED482XMLPUBFUN xmlChar *483xmlParseVersionNum (xmlParserCtxtPtr ctxt);484XML_DEPRECATED485XMLPUBFUN xmlChar *486xmlParseVersionInfo (xmlParserCtxtPtr ctxt);487XML_DEPRECATED488XMLPUBFUN xmlChar *489xmlParseEncName (xmlParserCtxtPtr ctxt);490XML_DEPRECATED491XMLPUBFUN const xmlChar *492xmlParseEncodingDecl (xmlParserCtxtPtr ctxt);493XML_DEPRECATED494XMLPUBFUN int495xmlParseSDDecl (xmlParserCtxtPtr ctxt);496XML_DEPRECATED497XMLPUBFUN void498xmlParseXMLDecl (xmlParserCtxtPtr ctxt);499XML_DEPRECATED500XMLPUBFUN void501xmlParseTextDecl (xmlParserCtxtPtr ctxt);502XML_DEPRECATED503XMLPUBFUN void504xmlParseMisc (xmlParserCtxtPtr ctxt);505XMLPUBFUN void506xmlParseExternalSubset (xmlParserCtxtPtr ctxt,507const xmlChar *ExternalID,508const xmlChar *SystemID);509/**510* XML_SUBSTITUTE_NONE:511*512* If no entities need to be substituted.513*/514#define XML_SUBSTITUTE_NONE 0515/**516* XML_SUBSTITUTE_REF:517*518* Whether general entities need to be substituted.519*/520#define XML_SUBSTITUTE_REF 1521/**522* XML_SUBSTITUTE_PEREF:523*524* Whether parameter entities need to be substituted.525*/526#define XML_SUBSTITUTE_PEREF 2527/**528* XML_SUBSTITUTE_BOTH:529*530* Both general and parameter entities need to be substituted.531*/532#define XML_SUBSTITUTE_BOTH 3533534XML_DEPRECATED535XMLPUBFUN xmlChar *536xmlStringDecodeEntities (xmlParserCtxtPtr ctxt,537const xmlChar *str,538int what,539xmlChar end,540xmlChar end2,541xmlChar end3);542XML_DEPRECATED543XMLPUBFUN xmlChar *544xmlStringLenDecodeEntities (xmlParserCtxtPtr ctxt,545const xmlChar *str,546int len,547int what,548xmlChar end,549xmlChar end2,550xmlChar end3);551552/*553* Generated by MACROS on top of parser.c c.f. PUSH_AND_POP.554*/555XML_DEPRECATED556XMLPUBFUN int nodePush (xmlParserCtxtPtr ctxt,557xmlNodePtr value);558XML_DEPRECATED559XMLPUBFUN xmlNodePtr nodePop (xmlParserCtxtPtr ctxt);560XMLPUBFUN int inputPush (xmlParserCtxtPtr ctxt,561xmlParserInputPtr value);562XMLPUBFUN xmlParserInputPtr inputPop (xmlParserCtxtPtr ctxt);563XML_DEPRECATED564XMLPUBFUN const xmlChar * namePop (xmlParserCtxtPtr ctxt);565XML_DEPRECATED566XMLPUBFUN int namePush (xmlParserCtxtPtr ctxt,567const xmlChar *value);568569/*570* other commodities shared between parser.c and parserInternals.571*/572XML_DEPRECATED573XMLPUBFUN int xmlSkipBlankChars (xmlParserCtxtPtr ctxt);574XML_DEPRECATED575XMLPUBFUN int xmlStringCurrentChar (xmlParserCtxtPtr ctxt,576const xmlChar *cur,577int *len);578XML_DEPRECATED579XMLPUBFUN void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt);580XML_DEPRECATED581XMLPUBFUN int xmlCheckLanguageID (const xmlChar *lang);582583/*584* Really core function shared with HTML parser.585*/586XML_DEPRECATED587XMLPUBFUN int xmlCurrentChar (xmlParserCtxtPtr ctxt,588int *len);589XMLPUBFUN int xmlCopyCharMultiByte (xmlChar *out,590int val);591XMLPUBFUN int xmlCopyChar (int len,592xmlChar *out,593int val);594XML_DEPRECATED595XMLPUBFUN void xmlNextChar (xmlParserCtxtPtr ctxt);596XML_DEPRECATED597XMLPUBFUN void xmlParserInputShrink (xmlParserInputPtr in);598599/*600* Specific function to keep track of entities references601* and used by the XSLT debugger.602*/603#ifdef LIBXML_LEGACY_ENABLED604/**605* xmlEntityReferenceFunc:606* @ent: the entity607* @firstNode: the fist node in the chunk608* @lastNode: the last nod in the chunk609*610* Callback function used when one needs to be able to track back the611* provenance of a chunk of nodes inherited from an entity replacement.612*/613typedef void (*xmlEntityReferenceFunc) (xmlEntityPtr ent,614xmlNodePtr firstNode,615xmlNodePtr lastNode);616617XML_DEPRECATED618XMLPUBFUN void xmlSetEntityReferenceFunc (xmlEntityReferenceFunc func);619620XML_DEPRECATED621XMLPUBFUN xmlChar *622xmlParseQuotedString (xmlParserCtxtPtr ctxt);623XML_DEPRECATED624XMLPUBFUN void625xmlParseNamespace (xmlParserCtxtPtr ctxt);626XML_DEPRECATED627XMLPUBFUN xmlChar *628xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt);629XML_DEPRECATED630XMLPUBFUN xmlChar *631xmlScanName (xmlParserCtxtPtr ctxt);632XML_DEPRECATED633XMLPUBFUN xmlChar *634xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt);635XML_DEPRECATED636XMLPUBFUN void xmlParserHandleReference(xmlParserCtxtPtr ctxt);637XML_DEPRECATED638XMLPUBFUN xmlChar *639xmlNamespaceParseQName (xmlParserCtxtPtr ctxt,640xmlChar **prefix);641/**642* Entities643*/644XML_DEPRECATED645XMLPUBFUN xmlChar *646xmlDecodeEntities (xmlParserCtxtPtr ctxt,647int len,648int what,649xmlChar end,650xmlChar end2,651xmlChar end3);652XML_DEPRECATED653XMLPUBFUN void654xmlHandleEntity (xmlParserCtxtPtr ctxt,655xmlEntityPtr entity);656657#endif /* LIBXML_LEGACY_ENABLED */658659#ifdef __cplusplus660}661#endif662#endif /* __XML_PARSER_INTERNALS_H__ */663664665