Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/xml2/parser.c
4393 views
1
/*
2
* parser.c : an XML 1.0 parser, namespaces and validity support are mostly
3
* implemented on top of the SAX interfaces
4
*
5
* References:
6
* The XML specification:
7
* http://www.w3.org/TR/REC-xml
8
* Original 1.0 version:
9
* http://www.w3.org/TR/1998/REC-xml-19980210
10
* XML second edition working draft
11
* http://www.w3.org/TR/2000/WD-xml-2e-20000814
12
*
13
* Okay this is a big file, the parser core is around 7000 lines, then it
14
* is followed by the progressive parser top routines, then the various
15
* high level APIs to call the parser and a few miscellaneous functions.
16
* A number of helper functions and deprecated ones have been moved to
17
* parserInternals.c to reduce this file size.
18
* As much as possible the functions are associated with their relative
19
* production in the XML specification. A few productions defining the
20
* different ranges of character are actually implanted either in
21
* parserInternals.h or parserInternals.c
22
* The DOM tree build is realized from the default SAX callbacks in
23
* the module SAX.c.
24
* The routines doing the validation checks are in valid.c and called either
25
* from the SAX callbacks or as standalone functions using a preparsed
26
* document.
27
*
28
* See Copyright for the status of this software.
29
*
30
* [email protected]
31
*/
32
33
/* To avoid EBCDIC trouble when parsing on zOS */
34
#if defined(__MVS__)
35
#pragma convert("ISO8859-1")
36
#endif
37
38
#define IN_LIBXML
39
#include "libxml.h"
40
41
#if defined(_WIN32)
42
#define XML_DIR_SEP '\\'
43
#else
44
#define XML_DIR_SEP '/'
45
#endif
46
47
#include <stdlib.h>
48
#include <limits.h>
49
#include <string.h>
50
#include <stdarg.h>
51
#include <stddef.h>
52
#include <ctype.h>
53
#include <stdlib.h>
54
#include <libxml/parser.h>
55
#include <libxml/xmlmemory.h>
56
#include <libxml/tree.h>
57
#include <libxml/parserInternals.h>
58
#include <libxml/valid.h>
59
#include <libxml/entities.h>
60
#include <libxml/xmlerror.h>
61
#include <libxml/encoding.h>
62
#include <libxml/xmlIO.h>
63
#include <libxml/uri.h>
64
#include <libxml/SAX2.h>
65
#ifdef LIBXML_CATALOG_ENABLED
66
#include <libxml/catalog.h>
67
#endif
68
69
#include "private/buf.h"
70
#include "private/dict.h"
71
#include "private/entities.h"
72
#include "private/error.h"
73
#include "private/html.h"
74
#include "private/io.h"
75
#include "private/parser.h"
76
77
#define NS_INDEX_EMPTY INT_MAX
78
#define NS_INDEX_XML (INT_MAX - 1)
79
#define URI_HASH_EMPTY 0xD943A04E
80
#define URI_HASH_XML 0xF0451F02
81
82
struct _xmlStartTag {
83
const xmlChar *prefix;
84
const xmlChar *URI;
85
int line;
86
int nsNr;
87
};
88
89
typedef struct {
90
void *saxData;
91
unsigned prefixHashValue;
92
unsigned uriHashValue;
93
unsigned elementId;
94
int oldIndex;
95
} xmlParserNsExtra;
96
97
typedef struct {
98
unsigned hashValue;
99
int index;
100
} xmlParserNsBucket;
101
102
struct _xmlParserNsData {
103
xmlParserNsExtra *extra;
104
105
unsigned hashSize;
106
unsigned hashElems;
107
xmlParserNsBucket *hash;
108
109
unsigned elementId;
110
int defaultNsIndex;
111
};
112
113
struct _xmlAttrHashBucket {
114
int index;
115
};
116
117
static xmlParserCtxtPtr
118
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
119
const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
120
xmlParserCtxtPtr pctx);
121
122
static int
123
xmlParseElementStart(xmlParserCtxtPtr ctxt);
124
125
static void
126
xmlParseElementEnd(xmlParserCtxtPtr ctxt);
127
128
/************************************************************************
129
* *
130
* Arbitrary limits set in the parser. See XML_PARSE_HUGE *
131
* *
132
************************************************************************/
133
134
#define XML_PARSER_BIG_ENTITY 1000
135
#define XML_PARSER_LOT_ENTITY 5000
136
137
/*
138
* Constants for protection against abusive entity expansion
139
* ("billion laughs").
140
*/
141
142
/*
143
* A certain amount of entity expansion which is always allowed.
144
*/
145
#define XML_PARSER_ALLOWED_EXPANSION 1000000
146
147
/*
148
* Fixed cost for each entity reference. This crudely models processing time
149
* as well to protect, for example, against exponential expansion of empty
150
* or very short entities.
151
*/
152
#define XML_ENT_FIXED_COST 20
153
154
/**
155
* xmlParserMaxDepth:
156
*
157
* arbitrary depth limit for the XML documents that we allow to
158
* process. This is not a limitation of the parser but a safety
159
* boundary feature. It can be disabled with the XML_PARSE_HUGE
160
* parser option.
161
*/
162
unsigned int xmlParserMaxDepth = 256;
163
164
165
166
#define XML_PARSER_BIG_BUFFER_SIZE 300
167
#define XML_PARSER_BUFFER_SIZE 100
168
#define SAX_COMPAT_MODE BAD_CAST "SAX compatibility mode document"
169
170
/**
171
* XML_PARSER_CHUNK_SIZE
172
*
173
* When calling GROW that's the minimal amount of data
174
* the parser expected to have received. It is not a hard
175
* limit but an optimization when reading strings like Names
176
* It is not strictly needed as long as inputs available characters
177
* are followed by 0, which should be provided by the I/O level
178
*/
179
#define XML_PARSER_CHUNK_SIZE 100
180
181
/**
182
* xmlParserVersion:
183
*
184
* Constant string describing the internal version of the library
185
*/
186
const char *const
187
xmlParserVersion = LIBXML_VERSION_STRING LIBXML_VERSION_EXTRA;
188
189
/*
190
* List of XML prefixed PI allowed by W3C specs
191
*/
192
193
static const char* const xmlW3CPIs[] = {
194
"xml-stylesheet",
195
"xml-model",
196
NULL
197
};
198
199
200
/* DEPR void xmlParserHandleReference(xmlParserCtxtPtr ctxt); */
201
static xmlEntityPtr xmlParseStringPEReference(xmlParserCtxtPtr ctxt,
202
const xmlChar **str);
203
204
static xmlParserErrors
205
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
206
xmlSAXHandlerPtr sax,
207
void *user_data, int depth, const xmlChar *URL,
208
const xmlChar *ID, xmlNodePtr *list);
209
210
static int
211
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options);
212
#ifdef LIBXML_LEGACY_ENABLED
213
static void
214
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
215
xmlNodePtr lastNode);
216
#endif /* LIBXML_LEGACY_ENABLED */
217
218
static xmlParserErrors
219
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
220
const xmlChar *string, void *user_data, xmlNodePtr *lst);
221
222
static int
223
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity);
224
225
/************************************************************************
226
* *
227
* Some factorized error routines *
228
* *
229
************************************************************************/
230
231
/**
232
* xmlErrAttributeDup:
233
* @ctxt: an XML parser context
234
* @prefix: the attribute prefix
235
* @localname: the attribute localname
236
*
237
* Handle a redefinition of attribute error
238
*/
239
static void
240
xmlErrAttributeDup(xmlParserCtxtPtr ctxt, const xmlChar * prefix,
241
const xmlChar * localname)
242
{
243
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
244
(ctxt->instate == XML_PARSER_EOF))
245
return;
246
if (ctxt != NULL)
247
ctxt->errNo = XML_ERR_ATTRIBUTE_REDEFINED;
248
249
if (prefix == NULL)
250
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
251
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
252
(const char *) localname, NULL, NULL, 0, 0,
253
"Attribute %s redefined\n", localname);
254
else
255
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
256
XML_ERR_ATTRIBUTE_REDEFINED, XML_ERR_FATAL, NULL, 0,
257
(const char *) prefix, (const char *) localname,
258
NULL, 0, 0, "Attribute %s:%s redefined\n", prefix,
259
localname);
260
if (ctxt != NULL) {
261
ctxt->wellFormed = 0;
262
if (ctxt->recovery == 0)
263
ctxt->disableSAX = 1;
264
}
265
}
266
267
/**
268
* xmlFatalErrMsg:
269
* @ctxt: an XML parser context
270
* @error: the error number
271
* @msg: the error message
272
*
273
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
274
*/
275
static void LIBXML_ATTR_FORMAT(3,0)
276
xmlFatalErrMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
277
const char *msg)
278
{
279
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
280
(ctxt->instate == XML_PARSER_EOF))
281
return;
282
if (ctxt != NULL)
283
ctxt->errNo = error;
284
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
285
XML_ERR_FATAL, NULL, 0, NULL, NULL, NULL, 0, 0, "%s", msg);
286
if (ctxt != NULL) {
287
ctxt->wellFormed = 0;
288
if (ctxt->recovery == 0)
289
ctxt->disableSAX = 1;
290
}
291
}
292
293
/**
294
* xmlWarningMsg:
295
* @ctxt: an XML parser context
296
* @error: the error number
297
* @msg: the error message
298
* @str1: extra data
299
* @str2: extra data
300
*
301
* Handle a warning.
302
*/
303
void LIBXML_ATTR_FORMAT(3,0)
304
xmlWarningMsg(xmlParserCtxtPtr ctxt, xmlParserErrors error,
305
const char *msg, const xmlChar *str1, const xmlChar *str2)
306
{
307
xmlStructuredErrorFunc schannel = NULL;
308
309
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
310
(ctxt->instate == XML_PARSER_EOF))
311
return;
312
if ((ctxt != NULL) && (ctxt->sax != NULL) &&
313
(ctxt->sax->initialized == XML_SAX2_MAGIC))
314
schannel = ctxt->sax->serror;
315
if (ctxt != NULL) {
316
__xmlRaiseError(schannel,
317
(ctxt->sax) ? ctxt->sax->warning : NULL,
318
ctxt->userData,
319
ctxt, NULL, XML_FROM_PARSER, error,
320
XML_ERR_WARNING, NULL, 0,
321
(const char *) str1, (const char *) str2, NULL, 0, 0,
322
msg, (const char *) str1, (const char *) str2);
323
} else {
324
__xmlRaiseError(schannel, NULL, NULL,
325
ctxt, NULL, XML_FROM_PARSER, error,
326
XML_ERR_WARNING, NULL, 0,
327
(const char *) str1, (const char *) str2, NULL, 0, 0,
328
msg, (const char *) str1, (const char *) str2);
329
}
330
}
331
332
/**
333
* xmlValidityError:
334
* @ctxt: an XML parser context
335
* @error: the error number
336
* @msg: the error message
337
* @str1: extra data
338
*
339
* Handle a validity error.
340
*/
341
static void LIBXML_ATTR_FORMAT(3,0)
342
xmlValidityError(xmlParserCtxtPtr ctxt, xmlParserErrors error,
343
const char *msg, const xmlChar *str1, const xmlChar *str2)
344
{
345
xmlStructuredErrorFunc schannel = NULL;
346
347
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
348
(ctxt->instate == XML_PARSER_EOF))
349
return;
350
if (ctxt != NULL) {
351
ctxt->errNo = error;
352
if ((ctxt->sax != NULL) && (ctxt->sax->initialized == XML_SAX2_MAGIC))
353
schannel = ctxt->sax->serror;
354
}
355
if (ctxt != NULL) {
356
__xmlRaiseError(schannel,
357
ctxt->vctxt.error, ctxt->vctxt.userData,
358
ctxt, NULL, XML_FROM_DTD, error,
359
XML_ERR_ERROR, NULL, 0, (const char *) str1,
360
(const char *) str2, NULL, 0, 0,
361
msg, (const char *) str1, (const char *) str2);
362
ctxt->valid = 0;
363
} else {
364
__xmlRaiseError(schannel, NULL, NULL,
365
ctxt, NULL, XML_FROM_DTD, error,
366
XML_ERR_ERROR, NULL, 0, (const char *) str1,
367
(const char *) str2, NULL, 0, 0,
368
msg, (const char *) str1, (const char *) str2);
369
}
370
}
371
372
/**
373
* xmlFatalErrMsgInt:
374
* @ctxt: an XML parser context
375
* @error: the error number
376
* @msg: the error message
377
* @val: an integer value
378
*
379
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
380
*/
381
static void LIBXML_ATTR_FORMAT(3,0)
382
xmlFatalErrMsgInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
383
const char *msg, int val)
384
{
385
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
386
(ctxt->instate == XML_PARSER_EOF))
387
return;
388
if (ctxt != NULL)
389
ctxt->errNo = error;
390
__xmlRaiseError(NULL, NULL, NULL,
391
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
392
NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
393
if (ctxt != NULL) {
394
ctxt->wellFormed = 0;
395
if (ctxt->recovery == 0)
396
ctxt->disableSAX = 1;
397
}
398
}
399
400
/**
401
* xmlFatalErrMsgStrIntStr:
402
* @ctxt: an XML parser context
403
* @error: the error number
404
* @msg: the error message
405
* @str1: an string info
406
* @val: an integer value
407
* @str2: an string info
408
*
409
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
410
*/
411
static void LIBXML_ATTR_FORMAT(3,0)
412
xmlFatalErrMsgStrIntStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
413
const char *msg, const xmlChar *str1, int val,
414
const xmlChar *str2)
415
{
416
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
417
(ctxt->instate == XML_PARSER_EOF))
418
return;
419
if (ctxt != NULL)
420
ctxt->errNo = error;
421
__xmlRaiseError(NULL, NULL, NULL,
422
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
423
NULL, 0, (const char *) str1, (const char *) str2,
424
NULL, val, 0, msg, str1, val, str2);
425
if (ctxt != NULL) {
426
ctxt->wellFormed = 0;
427
if (ctxt->recovery == 0)
428
ctxt->disableSAX = 1;
429
}
430
}
431
432
/**
433
* xmlFatalErrMsgStr:
434
* @ctxt: an XML parser context
435
* @error: the error number
436
* @msg: the error message
437
* @val: a string value
438
*
439
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
440
*/
441
static void LIBXML_ATTR_FORMAT(3,0)
442
xmlFatalErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
443
const char *msg, const xmlChar * val)
444
{
445
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
446
(ctxt->instate == XML_PARSER_EOF))
447
return;
448
if (ctxt != NULL)
449
ctxt->errNo = error;
450
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
451
XML_FROM_PARSER, error, XML_ERR_FATAL,
452
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
453
val);
454
if (ctxt != NULL) {
455
ctxt->wellFormed = 0;
456
if (ctxt->recovery == 0)
457
ctxt->disableSAX = 1;
458
}
459
}
460
461
/**
462
* xmlErrMsgStr:
463
* @ctxt: an XML parser context
464
* @error: the error number
465
* @msg: the error message
466
* @val: a string value
467
*
468
* Handle a non fatal parser error
469
*/
470
static void LIBXML_ATTR_FORMAT(3,0)
471
xmlErrMsgStr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
472
const char *msg, const xmlChar * val)
473
{
474
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
475
(ctxt->instate == XML_PARSER_EOF))
476
return;
477
if (ctxt != NULL)
478
ctxt->errNo = error;
479
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL,
480
XML_FROM_PARSER, error, XML_ERR_ERROR,
481
NULL, 0, (const char *) val, NULL, NULL, 0, 0, msg,
482
val);
483
}
484
485
/**
486
* xmlNsErr:
487
* @ctxt: an XML parser context
488
* @error: the error number
489
* @msg: the message
490
* @info1: extra information string
491
* @info2: extra information string
492
*
493
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
494
*/
495
static void LIBXML_ATTR_FORMAT(3,0)
496
xmlNsErr(xmlParserCtxtPtr ctxt, xmlParserErrors error,
497
const char *msg,
498
const xmlChar * info1, const xmlChar * info2,
499
const xmlChar * info3)
500
{
501
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
502
(ctxt->instate == XML_PARSER_EOF))
503
return;
504
if (ctxt != NULL)
505
ctxt->errNo = error;
506
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
507
XML_ERR_ERROR, NULL, 0, (const char *) info1,
508
(const char *) info2, (const char *) info3, 0, 0, msg,
509
info1, info2, info3);
510
if (ctxt != NULL)
511
ctxt->nsWellFormed = 0;
512
}
513
514
/**
515
* xmlNsWarn
516
* @ctxt: an XML parser context
517
* @error: the error number
518
* @msg: the message
519
* @info1: extra information string
520
* @info2: extra information string
521
*
522
* Handle a namespace warning error
523
*/
524
static void LIBXML_ATTR_FORMAT(3,0)
525
xmlNsWarn(xmlParserCtxtPtr ctxt, xmlParserErrors error,
526
const char *msg,
527
const xmlChar * info1, const xmlChar * info2,
528
const xmlChar * info3)
529
{
530
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
531
(ctxt->instate == XML_PARSER_EOF))
532
return;
533
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_NAMESPACE, error,
534
XML_ERR_WARNING, NULL, 0, (const char *) info1,
535
(const char *) info2, (const char *) info3, 0, 0, msg,
536
info1, info2, info3);
537
}
538
539
static void
540
xmlSaturatedAdd(unsigned long *dst, unsigned long val) {
541
if (val > ULONG_MAX - *dst)
542
*dst = ULONG_MAX;
543
else
544
*dst += val;
545
}
546
547
static void
548
xmlSaturatedAddSizeT(unsigned long *dst, unsigned long val) {
549
if (val > ULONG_MAX - *dst)
550
*dst = ULONG_MAX;
551
else
552
*dst += val;
553
}
554
555
/**
556
* xmlParserEntityCheck:
557
* @ctxt: parser context
558
* @extra: sum of unexpanded entity sizes
559
*
560
* Check for non-linear entity expansion behaviour.
561
*
562
* In some cases like xmlStringDecodeEntities, this function is called
563
* for each, possibly nested entity and its unexpanded content length.
564
*
565
* In other cases like xmlParseReference, it's only called for each
566
* top-level entity with its unexpanded content length plus the sum of
567
* the unexpanded content lengths (plus fixed cost) of all nested
568
* entities.
569
*
570
* Summing the unexpanded lengths also adds the length of the reference.
571
* This is by design. Taking the length of the entity name into account
572
* discourages attacks that try to waste CPU time with abusively long
573
* entity names. See test/recurse/lol6.xml for example. Each call also
574
* adds some fixed cost XML_ENT_FIXED_COST to discourage attacks with
575
* short entities.
576
*
577
* Returns 1 on error, 0 on success.
578
*/
579
static int
580
xmlParserEntityCheck(xmlParserCtxtPtr ctxt, unsigned long extra)
581
{
582
unsigned long consumed;
583
xmlParserInputPtr input = ctxt->input;
584
xmlEntityPtr entity = input->entity;
585
586
/*
587
* Compute total consumed bytes so far, including input streams of
588
* external entities.
589
*/
590
consumed = input->parentConsumed;
591
if ((entity == NULL) ||
592
((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
593
((entity->flags & XML_ENT_PARSED) == 0))) {
594
xmlSaturatedAdd(&consumed, input->consumed);
595
xmlSaturatedAddSizeT(&consumed, input->cur - input->base);
596
}
597
xmlSaturatedAdd(&consumed, ctxt->sizeentities);
598
599
/*
600
* Add extra cost and some fixed cost.
601
*/
602
xmlSaturatedAdd(&ctxt->sizeentcopy, extra);
603
xmlSaturatedAdd(&ctxt->sizeentcopy, XML_ENT_FIXED_COST);
604
605
/*
606
* It's important to always use saturation arithmetic when tracking
607
* entity sizes to make the size checks reliable. If "sizeentcopy"
608
* overflows, we have to abort.
609
*/
610
if ((ctxt->sizeentcopy > XML_PARSER_ALLOWED_EXPANSION) &&
611
((ctxt->sizeentcopy >= ULONG_MAX) ||
612
(ctxt->sizeentcopy / ctxt->maxAmpl > consumed))) {
613
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
614
"Maximum entity amplification factor exceeded, see "
615
"xmlCtxtSetMaxAmplification.\n");
616
xmlHaltParser(ctxt);
617
return(1);
618
}
619
620
return(0);
621
}
622
623
/************************************************************************
624
* *
625
* Library wide options *
626
* *
627
************************************************************************/
628
629
/**
630
* xmlHasFeature:
631
* @feature: the feature to be examined
632
*
633
* Examines if the library has been compiled with a given feature.
634
*
635
* Returns a non-zero value if the feature exist, otherwise zero.
636
* Returns zero (0) if the feature does not exist or an unknown
637
* unknown feature is requested, non-zero otherwise.
638
*/
639
int
640
xmlHasFeature(xmlFeature feature)
641
{
642
switch (feature) {
643
case XML_WITH_THREAD:
644
#ifdef LIBXML_THREAD_ENABLED
645
return(1);
646
#else
647
return(0);
648
#endif
649
case XML_WITH_TREE:
650
#ifdef LIBXML_TREE_ENABLED
651
return(1);
652
#else
653
return(0);
654
#endif
655
case XML_WITH_OUTPUT:
656
#ifdef LIBXML_OUTPUT_ENABLED
657
return(1);
658
#else
659
return(0);
660
#endif
661
case XML_WITH_PUSH:
662
#ifdef LIBXML_PUSH_ENABLED
663
return(1);
664
#else
665
return(0);
666
#endif
667
case XML_WITH_READER:
668
#ifdef LIBXML_READER_ENABLED
669
return(1);
670
#else
671
return(0);
672
#endif
673
case XML_WITH_PATTERN:
674
#ifdef LIBXML_PATTERN_ENABLED
675
return(1);
676
#else
677
return(0);
678
#endif
679
case XML_WITH_WRITER:
680
#ifdef LIBXML_WRITER_ENABLED
681
return(1);
682
#else
683
return(0);
684
#endif
685
case XML_WITH_SAX1:
686
#ifdef LIBXML_SAX1_ENABLED
687
return(1);
688
#else
689
return(0);
690
#endif
691
case XML_WITH_FTP:
692
#ifdef LIBXML_FTP_ENABLED
693
return(1);
694
#else
695
return(0);
696
#endif
697
case XML_WITH_HTTP:
698
#ifdef LIBXML_HTTP_ENABLED
699
return(1);
700
#else
701
return(0);
702
#endif
703
case XML_WITH_VALID:
704
#ifdef LIBXML_VALID_ENABLED
705
return(1);
706
#else
707
return(0);
708
#endif
709
case XML_WITH_HTML:
710
#ifdef LIBXML_HTML_ENABLED
711
return(1);
712
#else
713
return(0);
714
#endif
715
case XML_WITH_LEGACY:
716
#ifdef LIBXML_LEGACY_ENABLED
717
return(1);
718
#else
719
return(0);
720
#endif
721
case XML_WITH_C14N:
722
#ifdef LIBXML_C14N_ENABLED
723
return(1);
724
#else
725
return(0);
726
#endif
727
case XML_WITH_CATALOG:
728
#ifdef LIBXML_CATALOG_ENABLED
729
return(1);
730
#else
731
return(0);
732
#endif
733
case XML_WITH_XPATH:
734
#ifdef LIBXML_XPATH_ENABLED
735
return(1);
736
#else
737
return(0);
738
#endif
739
case XML_WITH_XPTR:
740
#ifdef LIBXML_XPTR_ENABLED
741
return(1);
742
#else
743
return(0);
744
#endif
745
case XML_WITH_XINCLUDE:
746
#ifdef LIBXML_XINCLUDE_ENABLED
747
return(1);
748
#else
749
return(0);
750
#endif
751
case XML_WITH_ICONV:
752
#ifdef LIBXML_ICONV_ENABLED
753
return(1);
754
#else
755
return(0);
756
#endif
757
case XML_WITH_ISO8859X:
758
#ifdef LIBXML_ISO8859X_ENABLED
759
return(1);
760
#else
761
return(0);
762
#endif
763
case XML_WITH_UNICODE:
764
#ifdef LIBXML_UNICODE_ENABLED
765
return(1);
766
#else
767
return(0);
768
#endif
769
case XML_WITH_REGEXP:
770
#ifdef LIBXML_REGEXP_ENABLED
771
return(1);
772
#else
773
return(0);
774
#endif
775
case XML_WITH_AUTOMATA:
776
#ifdef LIBXML_AUTOMATA_ENABLED
777
return(1);
778
#else
779
return(0);
780
#endif
781
case XML_WITH_EXPR:
782
#ifdef LIBXML_EXPR_ENABLED
783
return(1);
784
#else
785
return(0);
786
#endif
787
case XML_WITH_SCHEMAS:
788
#ifdef LIBXML_SCHEMAS_ENABLED
789
return(1);
790
#else
791
return(0);
792
#endif
793
case XML_WITH_SCHEMATRON:
794
#ifdef LIBXML_SCHEMATRON_ENABLED
795
return(1);
796
#else
797
return(0);
798
#endif
799
case XML_WITH_MODULES:
800
#ifdef LIBXML_MODULES_ENABLED
801
return(1);
802
#else
803
return(0);
804
#endif
805
case XML_WITH_DEBUG:
806
#ifdef LIBXML_DEBUG_ENABLED
807
return(1);
808
#else
809
return(0);
810
#endif
811
case XML_WITH_DEBUG_MEM:
812
#ifdef DEBUG_MEMORY_LOCATION
813
return(1);
814
#else
815
return(0);
816
#endif
817
case XML_WITH_DEBUG_RUN:
818
return(0);
819
case XML_WITH_ZLIB:
820
#ifdef LIBXML_ZLIB_ENABLED
821
return(1);
822
#else
823
return(0);
824
#endif
825
case XML_WITH_LZMA:
826
#ifdef LIBXML_LZMA_ENABLED
827
return(1);
828
#else
829
return(0);
830
#endif
831
case XML_WITH_ICU:
832
#ifdef LIBXML_ICU_ENABLED
833
return(1);
834
#else
835
return(0);
836
#endif
837
default:
838
break;
839
}
840
return(0);
841
}
842
843
/************************************************************************
844
* *
845
* SAX2 defaulted attributes handling *
846
* *
847
************************************************************************/
848
849
/**
850
* xmlDetectSAX2:
851
* @ctxt: an XML parser context
852
*
853
* Do the SAX2 detection and specific initialization
854
*/
855
static void
856
xmlDetectSAX2(xmlParserCtxtPtr ctxt) {
857
xmlSAXHandlerPtr sax;
858
859
/* Avoid unused variable warning if features are disabled. */
860
(void) sax;
861
862
if (ctxt == NULL) return;
863
sax = ctxt->sax;
864
#ifdef LIBXML_SAX1_ENABLED
865
/*
866
* Only enable SAX2 if there SAX2 element handlers, except when there
867
* are no element handlers at all.
868
*/
869
if ((sax) && (sax->initialized == XML_SAX2_MAGIC) &&
870
((sax->startElementNs != NULL) ||
871
(sax->endElementNs != NULL) ||
872
((sax->startElement == NULL) && (sax->endElement == NULL))))
873
ctxt->sax2 = 1;
874
#else
875
ctxt->sax2 = 1;
876
#endif /* LIBXML_SAX1_ENABLED */
877
878
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
879
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
880
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
881
if ((ctxt->str_xml==NULL) || (ctxt->str_xmlns==NULL) ||
882
(ctxt->str_xml_ns == NULL)) {
883
xmlErrMemory(ctxt, NULL);
884
}
885
}
886
887
typedef struct {
888
xmlHashedString prefix;
889
xmlHashedString name;
890
xmlHashedString value;
891
const xmlChar *valueEnd;
892
int external;
893
int expandedSize;
894
} xmlDefAttr;
895
896
typedef struct _xmlDefAttrs xmlDefAttrs;
897
typedef xmlDefAttrs *xmlDefAttrsPtr;
898
struct _xmlDefAttrs {
899
int nbAttrs; /* number of defaulted attributes on that element */
900
int maxAttrs; /* the size of the array */
901
#if __STDC_VERSION__ >= 199901L
902
/* Using a C99 flexible array member avoids UBSan errors. */
903
xmlDefAttr attrs[]; /* array of localname/prefix/values/external */
904
#else
905
xmlDefAttr attrs[1];
906
#endif
907
};
908
909
/**
910
* xmlAttrNormalizeSpace:
911
* @src: the source string
912
* @dst: the target string
913
*
914
* Normalize the space in non CDATA attribute values:
915
* If the attribute type is not CDATA, then the XML processor MUST further
916
* process the normalized attribute value by discarding any leading and
917
* trailing space (#x20) characters, and by replacing sequences of space
918
* (#x20) characters by a single space (#x20) character.
919
* Note that the size of dst need to be at least src, and if one doesn't need
920
* to preserve dst (and it doesn't come from a dictionary or read-only) then
921
* passing src as dst is just fine.
922
*
923
* Returns a pointer to the normalized value (dst) or NULL if no conversion
924
* is needed.
925
*/
926
static xmlChar *
927
xmlAttrNormalizeSpace(const xmlChar *src, xmlChar *dst)
928
{
929
if ((src == NULL) || (dst == NULL))
930
return(NULL);
931
932
while (*src == 0x20) src++;
933
while (*src != 0) {
934
if (*src == 0x20) {
935
while (*src == 0x20) src++;
936
if (*src != 0)
937
*dst++ = 0x20;
938
} else {
939
*dst++ = *src++;
940
}
941
}
942
*dst = 0;
943
if (dst == src)
944
return(NULL);
945
return(dst);
946
}
947
948
/**
949
* xmlAttrNormalizeSpace2:
950
* @src: the source string
951
*
952
* Normalize the space in non CDATA attribute values, a slightly more complex
953
* front end to avoid allocation problems when running on attribute values
954
* coming from the input.
955
*
956
* Returns a pointer to the normalized value (dst) or NULL if no conversion
957
* is needed.
958
*/
959
static const xmlChar *
960
xmlAttrNormalizeSpace2(xmlParserCtxtPtr ctxt, xmlChar *src, int *len)
961
{
962
int i;
963
int remove_head = 0;
964
int need_realloc = 0;
965
const xmlChar *cur;
966
967
if ((ctxt == NULL) || (src == NULL) || (len == NULL))
968
return(NULL);
969
i = *len;
970
if (i <= 0)
971
return(NULL);
972
973
cur = src;
974
while (*cur == 0x20) {
975
cur++;
976
remove_head++;
977
}
978
while (*cur != 0) {
979
if (*cur == 0x20) {
980
cur++;
981
if ((*cur == 0x20) || (*cur == 0)) {
982
need_realloc = 1;
983
break;
984
}
985
} else
986
cur++;
987
}
988
if (need_realloc) {
989
xmlChar *ret;
990
991
ret = xmlStrndup(src + remove_head, i - remove_head + 1);
992
if (ret == NULL) {
993
xmlErrMemory(ctxt, NULL);
994
return(NULL);
995
}
996
xmlAttrNormalizeSpace(ret, ret);
997
*len = strlen((const char *)ret);
998
return(ret);
999
} else if (remove_head) {
1000
*len -= remove_head;
1001
memmove(src, src + remove_head, 1 + *len);
1002
return(src);
1003
}
1004
return(NULL);
1005
}
1006
1007
/**
1008
* xmlAddDefAttrs:
1009
* @ctxt: an XML parser context
1010
* @fullname: the element fullname
1011
* @fullattr: the attribute fullname
1012
* @value: the attribute value
1013
*
1014
* Add a defaulted attribute for an element
1015
*/
1016
static void
1017
xmlAddDefAttrs(xmlParserCtxtPtr ctxt,
1018
const xmlChar *fullname,
1019
const xmlChar *fullattr,
1020
const xmlChar *value) {
1021
xmlDefAttrsPtr defaults;
1022
xmlDefAttr *attr;
1023
int len, expandedSize;
1024
xmlHashedString name;
1025
xmlHashedString prefix;
1026
xmlHashedString hvalue;
1027
const xmlChar *localname;
1028
1029
/*
1030
* Allows to detect attribute redefinitions
1031
*/
1032
if (ctxt->attsSpecial != NULL) {
1033
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1034
return;
1035
}
1036
1037
if (ctxt->attsDefault == NULL) {
1038
ctxt->attsDefault = xmlHashCreateDict(10, ctxt->dict);
1039
if (ctxt->attsDefault == NULL)
1040
goto mem_error;
1041
}
1042
1043
/*
1044
* split the element name into prefix:localname , the string found
1045
* are within the DTD and then not associated to namespace names.
1046
*/
1047
localname = xmlSplitQName3(fullname, &len);
1048
if (localname == NULL) {
1049
name = xmlDictLookupHashed(ctxt->dict, fullname, -1);
1050
prefix.name = NULL;
1051
} else {
1052
name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1053
prefix = xmlDictLookupHashed(ctxt->dict, fullname, len);
1054
if (prefix.name == NULL)
1055
goto mem_error;
1056
}
1057
if (name.name == NULL)
1058
goto mem_error;
1059
1060
/*
1061
* make sure there is some storage
1062
*/
1063
defaults = xmlHashLookup2(ctxt->attsDefault, name.name, prefix.name);
1064
if ((defaults == NULL) ||
1065
(defaults->nbAttrs >= defaults->maxAttrs)) {
1066
xmlDefAttrsPtr temp;
1067
int newSize;
1068
1069
newSize = (defaults != NULL) ? 2 * defaults->maxAttrs : 4;
1070
temp = xmlRealloc(defaults,
1071
sizeof(*defaults) + newSize * sizeof(xmlDefAttr));
1072
if (temp == NULL)
1073
goto mem_error;
1074
if (defaults == NULL)
1075
temp->nbAttrs = 0;
1076
temp->maxAttrs = newSize;
1077
defaults = temp;
1078
if (xmlHashUpdateEntry2(ctxt->attsDefault, name.name, prefix.name,
1079
defaults, NULL) < 0) {
1080
xmlFree(defaults);
1081
goto mem_error;
1082
}
1083
}
1084
1085
/*
1086
* Split the attribute name into prefix:localname , the string found
1087
* are within the DTD and hen not associated to namespace names.
1088
*/
1089
localname = xmlSplitQName3(fullattr, &len);
1090
if (localname == NULL) {
1091
name = xmlDictLookupHashed(ctxt->dict, fullattr, -1);
1092
prefix.name = NULL;
1093
} else {
1094
name = xmlDictLookupHashed(ctxt->dict, localname, -1);
1095
prefix = xmlDictLookupHashed(ctxt->dict, fullattr, len);
1096
if (prefix.name == NULL)
1097
goto mem_error;
1098
}
1099
if (name.name == NULL)
1100
goto mem_error;
1101
1102
/* intern the string and precompute the end */
1103
len = strlen((const char *) value);
1104
hvalue = xmlDictLookupHashed(ctxt->dict, value, len);
1105
if (hvalue.name == NULL)
1106
goto mem_error;
1107
1108
expandedSize = strlen((const char *) name.name);
1109
if (prefix.name != NULL)
1110
expandedSize += strlen((const char *) prefix.name);
1111
expandedSize += len;
1112
1113
attr = &defaults->attrs[defaults->nbAttrs++];
1114
attr->name = name;
1115
attr->prefix = prefix;
1116
attr->value = hvalue;
1117
attr->valueEnd = hvalue.name + len;
1118
attr->external = ctxt->external;
1119
attr->expandedSize = expandedSize;
1120
1121
return;
1122
1123
mem_error:
1124
xmlErrMemory(ctxt, NULL);
1125
return;
1126
}
1127
1128
/**
1129
* xmlAddSpecialAttr:
1130
* @ctxt: an XML parser context
1131
* @fullname: the element fullname
1132
* @fullattr: the attribute fullname
1133
* @type: the attribute type
1134
*
1135
* Register this attribute type
1136
*/
1137
static void
1138
xmlAddSpecialAttr(xmlParserCtxtPtr ctxt,
1139
const xmlChar *fullname,
1140
const xmlChar *fullattr,
1141
int type)
1142
{
1143
if (ctxt->attsSpecial == NULL) {
1144
ctxt->attsSpecial = xmlHashCreateDict(10, ctxt->dict);
1145
if (ctxt->attsSpecial == NULL)
1146
goto mem_error;
1147
}
1148
1149
if (xmlHashLookup2(ctxt->attsSpecial, fullname, fullattr) != NULL)
1150
return;
1151
1152
xmlHashAddEntry2(ctxt->attsSpecial, fullname, fullattr,
1153
(void *) (ptrdiff_t) type);
1154
return;
1155
1156
mem_error:
1157
xmlErrMemory(ctxt, NULL);
1158
return;
1159
}
1160
1161
/**
1162
* xmlCleanSpecialAttrCallback:
1163
*
1164
* Removes CDATA attributes from the special attribute table
1165
*/
1166
static void
1167
xmlCleanSpecialAttrCallback(void *payload, void *data,
1168
const xmlChar *fullname, const xmlChar *fullattr,
1169
const xmlChar *unused ATTRIBUTE_UNUSED) {
1170
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) data;
1171
1172
if (((ptrdiff_t) payload) == XML_ATTRIBUTE_CDATA) {
1173
xmlHashRemoveEntry2(ctxt->attsSpecial, fullname, fullattr, NULL);
1174
}
1175
}
1176
1177
/**
1178
* xmlCleanSpecialAttr:
1179
* @ctxt: an XML parser context
1180
*
1181
* Trim the list of attributes defined to remove all those of type
1182
* CDATA as they are not special. This call should be done when finishing
1183
* to parse the DTD and before starting to parse the document root.
1184
*/
1185
static void
1186
xmlCleanSpecialAttr(xmlParserCtxtPtr ctxt)
1187
{
1188
if (ctxt->attsSpecial == NULL)
1189
return;
1190
1191
xmlHashScanFull(ctxt->attsSpecial, xmlCleanSpecialAttrCallback, ctxt);
1192
1193
if (xmlHashSize(ctxt->attsSpecial) == 0) {
1194
xmlHashFree(ctxt->attsSpecial, NULL);
1195
ctxt->attsSpecial = NULL;
1196
}
1197
return;
1198
}
1199
1200
/**
1201
* xmlCheckLanguageID:
1202
* @lang: pointer to the string value
1203
*
1204
* DEPRECATED: Internal function, do not use.
1205
*
1206
* Checks that the value conforms to the LanguageID production:
1207
*
1208
* NOTE: this is somewhat deprecated, those productions were removed from
1209
* the XML Second edition.
1210
*
1211
* [33] LanguageID ::= Langcode ('-' Subcode)*
1212
* [34] Langcode ::= ISO639Code | IanaCode | UserCode
1213
* [35] ISO639Code ::= ([a-z] | [A-Z]) ([a-z] | [A-Z])
1214
* [36] IanaCode ::= ('i' | 'I') '-' ([a-z] | [A-Z])+
1215
* [37] UserCode ::= ('x' | 'X') '-' ([a-z] | [A-Z])+
1216
* [38] Subcode ::= ([a-z] | [A-Z])+
1217
*
1218
* The current REC reference the successors of RFC 1766, currently 5646
1219
*
1220
* http://www.rfc-editor.org/rfc/rfc5646.txt
1221
* langtag = language
1222
* ["-" script]
1223
* ["-" region]
1224
* *("-" variant)
1225
* *("-" extension)
1226
* ["-" privateuse]
1227
* language = 2*3ALPHA ; shortest ISO 639 code
1228
* ["-" extlang] ; sometimes followed by
1229
* ; extended language subtags
1230
* / 4ALPHA ; or reserved for future use
1231
* / 5*8ALPHA ; or registered language subtag
1232
*
1233
* extlang = 3ALPHA ; selected ISO 639 codes
1234
* *2("-" 3ALPHA) ; permanently reserved
1235
*
1236
* script = 4ALPHA ; ISO 15924 code
1237
*
1238
* region = 2ALPHA ; ISO 3166-1 code
1239
* / 3DIGIT ; UN M.49 code
1240
*
1241
* variant = 5*8alphanum ; registered variants
1242
* / (DIGIT 3alphanum)
1243
*
1244
* extension = singleton 1*("-" (2*8alphanum))
1245
*
1246
* ; Single alphanumerics
1247
* ; "x" reserved for private use
1248
* singleton = DIGIT ; 0 - 9
1249
* / %x41-57 ; A - W
1250
* / %x59-5A ; Y - Z
1251
* / %x61-77 ; a - w
1252
* / %x79-7A ; y - z
1253
*
1254
* it sounds right to still allow Irregular i-xxx IANA and user codes too
1255
* The parser below doesn't try to cope with extension or privateuse
1256
* that could be added but that's not interoperable anyway
1257
*
1258
* Returns 1 if correct 0 otherwise
1259
**/
1260
int
1261
xmlCheckLanguageID(const xmlChar * lang)
1262
{
1263
const xmlChar *cur = lang, *nxt;
1264
1265
if (cur == NULL)
1266
return (0);
1267
if (((cur[0] == 'i') && (cur[1] == '-')) ||
1268
((cur[0] == 'I') && (cur[1] == '-')) ||
1269
((cur[0] == 'x') && (cur[1] == '-')) ||
1270
((cur[0] == 'X') && (cur[1] == '-'))) {
1271
/*
1272
* Still allow IANA code and user code which were coming
1273
* from the previous version of the XML-1.0 specification
1274
* it's deprecated but we should not fail
1275
*/
1276
cur += 2;
1277
while (((cur[0] >= 'A') && (cur[0] <= 'Z')) ||
1278
((cur[0] >= 'a') && (cur[0] <= 'z')))
1279
cur++;
1280
return(cur[0] == 0);
1281
}
1282
nxt = cur;
1283
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1284
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1285
nxt++;
1286
if (nxt - cur >= 4) {
1287
/*
1288
* Reserved
1289
*/
1290
if ((nxt - cur > 8) || (nxt[0] != 0))
1291
return(0);
1292
return(1);
1293
}
1294
if (nxt - cur < 2)
1295
return(0);
1296
/* we got an ISO 639 code */
1297
if (nxt[0] == 0)
1298
return(1);
1299
if (nxt[0] != '-')
1300
return(0);
1301
1302
nxt++;
1303
cur = nxt;
1304
/* now we can have extlang or script or region or variant */
1305
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1306
goto region_m49;
1307
1308
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1309
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1310
nxt++;
1311
if (nxt - cur == 4)
1312
goto script;
1313
if (nxt - cur == 2)
1314
goto region;
1315
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1316
goto variant;
1317
if (nxt - cur != 3)
1318
return(0);
1319
/* we parsed an extlang */
1320
if (nxt[0] == 0)
1321
return(1);
1322
if (nxt[0] != '-')
1323
return(0);
1324
1325
nxt++;
1326
cur = nxt;
1327
/* now we can have script or region or variant */
1328
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1329
goto region_m49;
1330
1331
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1332
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1333
nxt++;
1334
if (nxt - cur == 2)
1335
goto region;
1336
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1337
goto variant;
1338
if (nxt - cur != 4)
1339
return(0);
1340
/* we parsed a script */
1341
script:
1342
if (nxt[0] == 0)
1343
return(1);
1344
if (nxt[0] != '-')
1345
return(0);
1346
1347
nxt++;
1348
cur = nxt;
1349
/* now we can have region or variant */
1350
if ((nxt[0] >= '0') && (nxt[0] <= '9'))
1351
goto region_m49;
1352
1353
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1354
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1355
nxt++;
1356
1357
if ((nxt - cur >= 5) && (nxt - cur <= 8))
1358
goto variant;
1359
if (nxt - cur != 2)
1360
return(0);
1361
/* we parsed a region */
1362
region:
1363
if (nxt[0] == 0)
1364
return(1);
1365
if (nxt[0] != '-')
1366
return(0);
1367
1368
nxt++;
1369
cur = nxt;
1370
/* now we can just have a variant */
1371
while (((nxt[0] >= 'A') && (nxt[0] <= 'Z')) ||
1372
((nxt[0] >= 'a') && (nxt[0] <= 'z')))
1373
nxt++;
1374
1375
if ((nxt - cur < 5) || (nxt - cur > 8))
1376
return(0);
1377
1378
/* we parsed a variant */
1379
variant:
1380
if (nxt[0] == 0)
1381
return(1);
1382
if (nxt[0] != '-')
1383
return(0);
1384
/* extensions and private use subtags not checked */
1385
return (1);
1386
1387
region_m49:
1388
if (((nxt[1] >= '0') && (nxt[1] <= '9')) &&
1389
((nxt[2] >= '0') && (nxt[2] <= '9'))) {
1390
nxt += 3;
1391
goto region;
1392
}
1393
return(0);
1394
}
1395
1396
/************************************************************************
1397
* *
1398
* Parser stacks related functions and macros *
1399
* *
1400
************************************************************************/
1401
1402
static xmlEntityPtr xmlParseStringEntityRef(xmlParserCtxtPtr ctxt,
1403
const xmlChar ** str);
1404
1405
/**
1406
* xmlParserNsCreate:
1407
*
1408
* Create a new namespace database.
1409
*
1410
* Returns the new obejct.
1411
*/
1412
xmlParserNsData *
1413
xmlParserNsCreate(void) {
1414
xmlParserNsData *nsdb = xmlMalloc(sizeof(*nsdb));
1415
1416
if (nsdb == NULL)
1417
return(NULL);
1418
memset(nsdb, 0, sizeof(*nsdb));
1419
nsdb->defaultNsIndex = INT_MAX;
1420
1421
return(nsdb);
1422
}
1423
1424
/**
1425
* xmlParserNsFree:
1426
* @nsdb: namespace database
1427
*
1428
* Free a namespace database.
1429
*/
1430
void
1431
xmlParserNsFree(xmlParserNsData *nsdb) {
1432
if (nsdb == NULL)
1433
return;
1434
1435
xmlFree(nsdb->extra);
1436
xmlFree(nsdb->hash);
1437
xmlFree(nsdb);
1438
}
1439
1440
/**
1441
* xmlParserNsReset:
1442
* @nsdb: namespace database
1443
*
1444
* Reset a namespace database.
1445
*/
1446
static void
1447
xmlParserNsReset(xmlParserNsData *nsdb) {
1448
if (nsdb == NULL)
1449
return;
1450
1451
nsdb->hashElems = 0;
1452
nsdb->elementId = 0;
1453
nsdb->defaultNsIndex = INT_MAX;
1454
1455
if (nsdb->hash)
1456
memset(nsdb->hash, 0, nsdb->hashSize * sizeof(nsdb->hash[0]));
1457
}
1458
1459
/**
1460
* xmlParserStartElement:
1461
* @nsdb: namespace database
1462
*
1463
* Signal that a new element has started.
1464
*
1465
* Returns 0 on success, -1 if the element counter overflowed.
1466
*/
1467
static int
1468
xmlParserNsStartElement(xmlParserNsData *nsdb) {
1469
if (nsdb->elementId == UINT_MAX)
1470
return(-1);
1471
nsdb->elementId++;
1472
1473
return(0);
1474
}
1475
1476
/**
1477
* xmlParserNsLookup:
1478
* @ctxt: parser context
1479
* @prefix: namespace prefix
1480
* @bucketPtr: optional bucket (return value)
1481
*
1482
* Lookup namespace with given prefix. If @bucketPtr is non-NULL, it will
1483
* be set to the matching bucket, or the first empty bucket if no match
1484
* was found.
1485
*
1486
* Returns the namespace index on success, INT_MAX if no namespace was
1487
* found.
1488
*/
1489
static int
1490
xmlParserNsLookup(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1491
xmlParserNsBucket **bucketPtr) {
1492
xmlParserNsBucket *bucket, *tombstone;
1493
unsigned index, hashValue;
1494
1495
if (prefix->name == NULL)
1496
return(ctxt->nsdb->defaultNsIndex);
1497
1498
if (ctxt->nsdb->hashSize == 0)
1499
return(INT_MAX);
1500
1501
hashValue = prefix->hashValue;
1502
index = hashValue & (ctxt->nsdb->hashSize - 1);
1503
bucket = &ctxt->nsdb->hash[index];
1504
tombstone = NULL;
1505
1506
while (bucket->hashValue) {
1507
if (bucket->index == INT_MAX) {
1508
if (tombstone == NULL)
1509
tombstone = bucket;
1510
} else if (bucket->hashValue == hashValue) {
1511
if (ctxt->nsTab[bucket->index * 2] == prefix->name) {
1512
if (bucketPtr != NULL)
1513
*bucketPtr = bucket;
1514
return(bucket->index);
1515
}
1516
}
1517
1518
index++;
1519
bucket++;
1520
if (index == ctxt->nsdb->hashSize) {
1521
index = 0;
1522
bucket = ctxt->nsdb->hash;
1523
}
1524
}
1525
1526
if (bucketPtr != NULL)
1527
*bucketPtr = tombstone ? tombstone : bucket;
1528
return(INT_MAX);
1529
}
1530
1531
/**
1532
* xmlParserNsLookupUri:
1533
* @ctxt: parser context
1534
* @prefix: namespace prefix
1535
*
1536
* Lookup namespace URI with given prefix.
1537
*
1538
* Returns the namespace URI on success, NULL if no namespace was found.
1539
*/
1540
static const xmlChar *
1541
xmlParserNsLookupUri(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix) {
1542
const xmlChar *ret;
1543
int nsIndex;
1544
1545
if (prefix->name == ctxt->str_xml)
1546
return(ctxt->str_xml_ns);
1547
1548
nsIndex = xmlParserNsLookup(ctxt, prefix, NULL);
1549
if (nsIndex == INT_MAX)
1550
return(NULL);
1551
1552
ret = ctxt->nsTab[nsIndex * 2 + 1];
1553
if (ret[0] == 0)
1554
ret = NULL;
1555
return(ret);
1556
}
1557
1558
/**
1559
* xmlParserNsLookupSax:
1560
* @ctxt: parser context
1561
* @prefix: namespace prefix
1562
*
1563
* Lookup extra data for the given prefix. This returns data stored
1564
* with xmlParserNsUdpateSax.
1565
*
1566
* Returns the data on success, NULL if no namespace was found.
1567
*/
1568
void *
1569
xmlParserNsLookupSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix) {
1570
xmlHashedString hprefix;
1571
int nsIndex;
1572
1573
if (prefix == ctxt->str_xml)
1574
return(NULL);
1575
1576
hprefix.name = prefix;
1577
if (prefix != NULL)
1578
hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1579
else
1580
hprefix.hashValue = 0;
1581
nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1582
if (nsIndex == INT_MAX)
1583
return(NULL);
1584
1585
return(ctxt->nsdb->extra[nsIndex].saxData);
1586
}
1587
1588
/**
1589
* xmlParserNsUpdateSax:
1590
* @ctxt: parser context
1591
* @prefix: namespace prefix
1592
* @saxData: extra data for SAX handler
1593
*
1594
* Sets or updates extra data for the given prefix. This value will be
1595
* returned by xmlParserNsLookupSax as long as the namespace with the
1596
* given prefix is in scope.
1597
*
1598
* Returns the data on success, NULL if no namespace was found.
1599
*/
1600
int
1601
xmlParserNsUpdateSax(xmlParserCtxtPtr ctxt, const xmlChar *prefix,
1602
void *saxData) {
1603
xmlHashedString hprefix;
1604
int nsIndex;
1605
1606
if (prefix == ctxt->str_xml)
1607
return(-1);
1608
1609
hprefix.name = prefix;
1610
if (prefix != NULL)
1611
hprefix.hashValue = xmlDictComputeHash(ctxt->dict, prefix);
1612
else
1613
hprefix.hashValue = 0;
1614
nsIndex = xmlParserNsLookup(ctxt, &hprefix, NULL);
1615
if (nsIndex == INT_MAX)
1616
return(-1);
1617
1618
ctxt->nsdb->extra[nsIndex].saxData = saxData;
1619
return(0);
1620
}
1621
1622
/**
1623
* xmlParserNsGrow:
1624
* @ctxt: parser context
1625
*
1626
* Grows the namespace tables.
1627
*
1628
* Returns 0 on success, -1 if a memory allocation failed.
1629
*/
1630
static int
1631
xmlParserNsGrow(xmlParserCtxtPtr ctxt) {
1632
const xmlChar **table;
1633
xmlParserNsExtra *extra;
1634
int newSize;
1635
1636
if (ctxt->nsMax > INT_MAX / 2)
1637
goto error;
1638
newSize = ctxt->nsMax ? ctxt->nsMax * 2 : 16;
1639
1640
table = xmlRealloc(ctxt->nsTab, 2 * newSize * sizeof(table[0]));
1641
if (table == NULL)
1642
goto error;
1643
ctxt->nsTab = table;
1644
1645
extra = xmlRealloc(ctxt->nsdb->extra, newSize * sizeof(extra[0]));
1646
if (extra == NULL)
1647
goto error;
1648
ctxt->nsdb->extra = extra;
1649
1650
ctxt->nsMax = newSize;
1651
return(0);
1652
1653
error:
1654
xmlErrMemory(ctxt, NULL);
1655
return(-1);
1656
}
1657
1658
/**
1659
* xmlParserNsPush:
1660
* @ctxt: parser context
1661
* @prefix: prefix with hash value
1662
* @uri: uri with hash value
1663
* @saxData: extra data for SAX handler
1664
* @defAttr: whether the namespace comes from a default attribute
1665
*
1666
* Push a new namespace on the table.
1667
*
1668
* Returns 1 if the namespace was pushed, 0 if the namespace was ignored,
1669
* -1 if a memory allocation failed.
1670
*/
1671
static int
1672
xmlParserNsPush(xmlParserCtxtPtr ctxt, const xmlHashedString *prefix,
1673
const xmlHashedString *uri, void *saxData, int defAttr) {
1674
xmlParserNsBucket *bucket = NULL;
1675
xmlParserNsExtra *extra;
1676
const xmlChar **ns;
1677
unsigned hashValue, nsIndex, oldIndex;
1678
1679
if ((prefix != NULL) && (prefix->name == ctxt->str_xml))
1680
return(0);
1681
1682
if ((ctxt->nsNr >= ctxt->nsMax) && (xmlParserNsGrow(ctxt) < 0)) {
1683
xmlErrMemory(ctxt, NULL);
1684
return(-1);
1685
}
1686
1687
/*
1688
* Default namespace and 'xml' namespace
1689
*/
1690
if ((prefix == NULL) || (prefix->name == NULL)) {
1691
oldIndex = ctxt->nsdb->defaultNsIndex;
1692
1693
if (oldIndex != INT_MAX) {
1694
extra = &ctxt->nsdb->extra[oldIndex];
1695
1696
if (extra->elementId == ctxt->nsdb->elementId) {
1697
if (defAttr == 0)
1698
xmlErrAttributeDup(ctxt, NULL, BAD_CAST "xmlns");
1699
return(0);
1700
}
1701
1702
if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1703
(uri->name == ctxt->nsTab[oldIndex * 2 + 1]))
1704
return(0);
1705
}
1706
1707
ctxt->nsdb->defaultNsIndex = ctxt->nsNr;
1708
goto populate_entry;
1709
}
1710
1711
/*
1712
* Hash table lookup
1713
*/
1714
oldIndex = xmlParserNsLookup(ctxt, prefix, &bucket);
1715
if (oldIndex != INT_MAX) {
1716
extra = &ctxt->nsdb->extra[oldIndex];
1717
1718
/*
1719
* Check for duplicate definitions on the same element.
1720
*/
1721
if (extra->elementId == ctxt->nsdb->elementId) {
1722
if (defAttr == 0)
1723
xmlErrAttributeDup(ctxt, BAD_CAST "xmlns", prefix->name);
1724
return(0);
1725
}
1726
1727
if ((ctxt->options & XML_PARSE_NSCLEAN) &&
1728
(uri->name == ctxt->nsTab[bucket->index * 2 + 1]))
1729
return(0);
1730
1731
bucket->index = ctxt->nsNr;
1732
goto populate_entry;
1733
}
1734
1735
/*
1736
* Insert new bucket
1737
*/
1738
1739
hashValue = prefix->hashValue;
1740
1741
/*
1742
* Grow hash table, 50% fill factor
1743
*/
1744
if (ctxt->nsdb->hashElems + 1 > ctxt->nsdb->hashSize / 2) {
1745
xmlParserNsBucket *newHash;
1746
unsigned newSize, i, index;
1747
1748
if (ctxt->nsdb->hashSize > UINT_MAX / 2) {
1749
xmlErrMemory(ctxt, NULL);
1750
return(-1);
1751
}
1752
newSize = ctxt->nsdb->hashSize ? ctxt->nsdb->hashSize * 2 : 16;
1753
newHash = xmlMalloc(newSize * sizeof(newHash[0]));
1754
if (newHash == NULL) {
1755
xmlErrMemory(ctxt, NULL);
1756
return(-1);
1757
}
1758
memset(newHash, 0, newSize * sizeof(newHash[0]));
1759
1760
for (i = 0; i < ctxt->nsdb->hashSize; i++) {
1761
unsigned hv = ctxt->nsdb->hash[i].hashValue;
1762
unsigned newIndex;
1763
1764
if ((hv == 0) || (ctxt->nsdb->hash[i].index == INT_MAX))
1765
continue;
1766
newIndex = hv & (newSize - 1);
1767
1768
while (newHash[newIndex].hashValue != 0) {
1769
newIndex++;
1770
if (newIndex == newSize)
1771
newIndex = 0;
1772
}
1773
1774
newHash[newIndex] = ctxt->nsdb->hash[i];
1775
}
1776
1777
xmlFree(ctxt->nsdb->hash);
1778
ctxt->nsdb->hash = newHash;
1779
ctxt->nsdb->hashSize = newSize;
1780
1781
/*
1782
* Relookup
1783
*/
1784
index = hashValue & (newSize - 1);
1785
1786
while (newHash[index].hashValue != 0) {
1787
index++;
1788
if (index == newSize)
1789
index = 0;
1790
}
1791
1792
bucket = &newHash[index];
1793
}
1794
1795
bucket->hashValue = hashValue;
1796
bucket->index = ctxt->nsNr;
1797
ctxt->nsdb->hashElems++;
1798
oldIndex = INT_MAX;
1799
1800
populate_entry:
1801
nsIndex = ctxt->nsNr;
1802
1803
ns = &ctxt->nsTab[nsIndex * 2];
1804
ns[0] = prefix ? prefix->name : NULL;
1805
ns[1] = uri->name;
1806
1807
extra = &ctxt->nsdb->extra[nsIndex];
1808
extra->saxData = saxData;
1809
extra->prefixHashValue = prefix ? prefix->hashValue : 0;
1810
extra->uriHashValue = uri->hashValue;
1811
extra->elementId = ctxt->nsdb->elementId;
1812
extra->oldIndex = oldIndex;
1813
1814
ctxt->nsNr++;
1815
1816
return(1);
1817
}
1818
1819
/**
1820
* xmlParserNsPop:
1821
* @ctxt: an XML parser context
1822
* @nr: the number to pop
1823
*
1824
* Pops the top @nr namespaces and restores the hash table.
1825
*
1826
* Returns the number of namespaces popped.
1827
*/
1828
static int
1829
xmlParserNsPop(xmlParserCtxtPtr ctxt, int nr)
1830
{
1831
int i;
1832
1833
/* assert(nr <= ctxt->nsNr); */
1834
1835
for (i = ctxt->nsNr - 1; i >= ctxt->nsNr - nr; i--) {
1836
const xmlChar *prefix = ctxt->nsTab[i * 2];
1837
xmlParserNsExtra *extra = &ctxt->nsdb->extra[i];
1838
1839
if (prefix == NULL) {
1840
ctxt->nsdb->defaultNsIndex = extra->oldIndex;
1841
} else {
1842
xmlHashedString hprefix;
1843
xmlParserNsBucket *bucket = NULL;
1844
1845
hprefix.name = prefix;
1846
hprefix.hashValue = extra->prefixHashValue;
1847
xmlParserNsLookup(ctxt, &hprefix, &bucket);
1848
/* assert(bucket && bucket->hashValue); */
1849
bucket->index = extra->oldIndex;
1850
}
1851
}
1852
1853
ctxt->nsNr -= nr;
1854
return(nr);
1855
}
1856
1857
static int
1858
xmlCtxtGrowAttrs(xmlParserCtxtPtr ctxt, int nr) {
1859
const xmlChar **atts;
1860
unsigned *attallocs;
1861
int maxatts;
1862
1863
if (nr + 5 > ctxt->maxatts) {
1864
maxatts = ctxt->maxatts == 0 ? 55 : (nr + 5) * 2;
1865
atts = (const xmlChar **) xmlMalloc(
1866
maxatts * sizeof(const xmlChar *));
1867
if (atts == NULL) goto mem_error;
1868
attallocs = xmlRealloc(ctxt->attallocs,
1869
(maxatts / 5) * sizeof(attallocs[0]));
1870
if (attallocs == NULL) {
1871
xmlFree(atts);
1872
goto mem_error;
1873
}
1874
if (ctxt->maxatts > 0)
1875
memcpy(atts, ctxt->atts, ctxt->maxatts * sizeof(const xmlChar *));
1876
xmlFree(ctxt->atts);
1877
ctxt->atts = atts;
1878
ctxt->attallocs = attallocs;
1879
ctxt->maxatts = maxatts;
1880
}
1881
return(ctxt->maxatts);
1882
mem_error:
1883
xmlErrMemory(ctxt, NULL);
1884
return(-1);
1885
}
1886
1887
/**
1888
* inputPush:
1889
* @ctxt: an XML parser context
1890
* @value: the parser input
1891
*
1892
* Pushes a new parser input on top of the input stack
1893
*
1894
* Returns -1 in case of error, the index in the stack otherwise
1895
*/
1896
int
1897
inputPush(xmlParserCtxtPtr ctxt, xmlParserInputPtr value)
1898
{
1899
if ((ctxt == NULL) || (value == NULL))
1900
return(-1);
1901
if (ctxt->inputNr >= ctxt->inputMax) {
1902
size_t newSize = ctxt->inputMax * 2;
1903
xmlParserInputPtr *tmp;
1904
1905
tmp = (xmlParserInputPtr *) xmlRealloc(ctxt->inputTab,
1906
newSize * sizeof(*tmp));
1907
if (tmp == NULL) {
1908
xmlErrMemory(ctxt, NULL);
1909
return (-1);
1910
}
1911
ctxt->inputTab = tmp;
1912
ctxt->inputMax = newSize;
1913
}
1914
ctxt->inputTab[ctxt->inputNr] = value;
1915
ctxt->input = value;
1916
return (ctxt->inputNr++);
1917
}
1918
/**
1919
* inputPop:
1920
* @ctxt: an XML parser context
1921
*
1922
* Pops the top parser input from the input stack
1923
*
1924
* Returns the input just removed
1925
*/
1926
xmlParserInputPtr
1927
inputPop(xmlParserCtxtPtr ctxt)
1928
{
1929
xmlParserInputPtr ret;
1930
1931
if (ctxt == NULL)
1932
return(NULL);
1933
if (ctxt->inputNr <= 0)
1934
return (NULL);
1935
ctxt->inputNr--;
1936
if (ctxt->inputNr > 0)
1937
ctxt->input = ctxt->inputTab[ctxt->inputNr - 1];
1938
else
1939
ctxt->input = NULL;
1940
ret = ctxt->inputTab[ctxt->inputNr];
1941
ctxt->inputTab[ctxt->inputNr] = NULL;
1942
return (ret);
1943
}
1944
/**
1945
* nodePush:
1946
* @ctxt: an XML parser context
1947
* @value: the element node
1948
*
1949
* DEPRECATED: Internal function, do not use.
1950
*
1951
* Pushes a new element node on top of the node stack
1952
*
1953
* Returns -1 in case of error, the index in the stack otherwise
1954
*/
1955
int
1956
nodePush(xmlParserCtxtPtr ctxt, xmlNodePtr value)
1957
{
1958
if (ctxt == NULL) return(0);
1959
if (ctxt->nodeNr >= ctxt->nodeMax) {
1960
xmlNodePtr *tmp;
1961
1962
tmp = (xmlNodePtr *) xmlRealloc(ctxt->nodeTab,
1963
ctxt->nodeMax * 2 *
1964
sizeof(ctxt->nodeTab[0]));
1965
if (tmp == NULL) {
1966
xmlErrMemory(ctxt, NULL);
1967
return (-1);
1968
}
1969
ctxt->nodeTab = tmp;
1970
ctxt->nodeMax *= 2;
1971
}
1972
if ((((unsigned int) ctxt->nodeNr) > xmlParserMaxDepth) &&
1973
((ctxt->options & XML_PARSE_HUGE) == 0)) {
1974
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
1975
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
1976
xmlParserMaxDepth);
1977
xmlHaltParser(ctxt);
1978
return(-1);
1979
}
1980
ctxt->nodeTab[ctxt->nodeNr] = value;
1981
ctxt->node = value;
1982
return (ctxt->nodeNr++);
1983
}
1984
1985
/**
1986
* nodePop:
1987
* @ctxt: an XML parser context
1988
*
1989
* DEPRECATED: Internal function, do not use.
1990
*
1991
* Pops the top element node from the node stack
1992
*
1993
* Returns the node just removed
1994
*/
1995
xmlNodePtr
1996
nodePop(xmlParserCtxtPtr ctxt)
1997
{
1998
xmlNodePtr ret;
1999
2000
if (ctxt == NULL) return(NULL);
2001
if (ctxt->nodeNr <= 0)
2002
return (NULL);
2003
ctxt->nodeNr--;
2004
if (ctxt->nodeNr > 0)
2005
ctxt->node = ctxt->nodeTab[ctxt->nodeNr - 1];
2006
else
2007
ctxt->node = NULL;
2008
ret = ctxt->nodeTab[ctxt->nodeNr];
2009
ctxt->nodeTab[ctxt->nodeNr] = NULL;
2010
return (ret);
2011
}
2012
2013
/**
2014
* nameNsPush:
2015
* @ctxt: an XML parser context
2016
* @value: the element name
2017
* @prefix: the element prefix
2018
* @URI: the element namespace name
2019
* @line: the current line number for error messages
2020
* @nsNr: the number of namespaces pushed on the namespace table
2021
*
2022
* Pushes a new element name/prefix/URL on top of the name stack
2023
*
2024
* Returns -1 in case of error, the index in the stack otherwise
2025
*/
2026
static int
2027
nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
2028
const xmlChar *prefix, const xmlChar *URI, int line, int nsNr)
2029
{
2030
xmlStartTag *tag;
2031
2032
if (ctxt->nameNr >= ctxt->nameMax) {
2033
const xmlChar * *tmp;
2034
xmlStartTag *tmp2;
2035
ctxt->nameMax *= 2;
2036
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2037
ctxt->nameMax *
2038
sizeof(ctxt->nameTab[0]));
2039
if (tmp == NULL) {
2040
ctxt->nameMax /= 2;
2041
goto mem_error;
2042
}
2043
ctxt->nameTab = tmp;
2044
tmp2 = (xmlStartTag *) xmlRealloc((void * *)ctxt->pushTab,
2045
ctxt->nameMax *
2046
sizeof(ctxt->pushTab[0]));
2047
if (tmp2 == NULL) {
2048
ctxt->nameMax /= 2;
2049
goto mem_error;
2050
}
2051
ctxt->pushTab = tmp2;
2052
} else if (ctxt->pushTab == NULL) {
2053
ctxt->pushTab = (xmlStartTag *) xmlMalloc(ctxt->nameMax *
2054
sizeof(ctxt->pushTab[0]));
2055
if (ctxt->pushTab == NULL)
2056
goto mem_error;
2057
}
2058
ctxt->nameTab[ctxt->nameNr] = value;
2059
ctxt->name = value;
2060
tag = &ctxt->pushTab[ctxt->nameNr];
2061
tag->prefix = prefix;
2062
tag->URI = URI;
2063
tag->line = line;
2064
tag->nsNr = nsNr;
2065
return (ctxt->nameNr++);
2066
mem_error:
2067
xmlErrMemory(ctxt, NULL);
2068
return (-1);
2069
}
2070
#ifdef LIBXML_PUSH_ENABLED
2071
/**
2072
* nameNsPop:
2073
* @ctxt: an XML parser context
2074
*
2075
* Pops the top element/prefix/URI name from the name stack
2076
*
2077
* Returns the name just removed
2078
*/
2079
static const xmlChar *
2080
nameNsPop(xmlParserCtxtPtr ctxt)
2081
{
2082
const xmlChar *ret;
2083
2084
if (ctxt->nameNr <= 0)
2085
return (NULL);
2086
ctxt->nameNr--;
2087
if (ctxt->nameNr > 0)
2088
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2089
else
2090
ctxt->name = NULL;
2091
ret = ctxt->nameTab[ctxt->nameNr];
2092
ctxt->nameTab[ctxt->nameNr] = NULL;
2093
return (ret);
2094
}
2095
#endif /* LIBXML_PUSH_ENABLED */
2096
2097
/**
2098
* namePush:
2099
* @ctxt: an XML parser context
2100
* @value: the element name
2101
*
2102
* DEPRECATED: Internal function, do not use.
2103
*
2104
* Pushes a new element name on top of the name stack
2105
*
2106
* Returns -1 in case of error, the index in the stack otherwise
2107
*/
2108
int
2109
namePush(xmlParserCtxtPtr ctxt, const xmlChar * value)
2110
{
2111
if (ctxt == NULL) return (-1);
2112
2113
if (ctxt->nameNr >= ctxt->nameMax) {
2114
const xmlChar * *tmp;
2115
tmp = (const xmlChar * *) xmlRealloc((xmlChar * *)ctxt->nameTab,
2116
ctxt->nameMax * 2 *
2117
sizeof(ctxt->nameTab[0]));
2118
if (tmp == NULL) {
2119
goto mem_error;
2120
}
2121
ctxt->nameTab = tmp;
2122
ctxt->nameMax *= 2;
2123
}
2124
ctxt->nameTab[ctxt->nameNr] = value;
2125
ctxt->name = value;
2126
return (ctxt->nameNr++);
2127
mem_error:
2128
xmlErrMemory(ctxt, NULL);
2129
return (-1);
2130
}
2131
2132
/**
2133
* namePop:
2134
* @ctxt: an XML parser context
2135
*
2136
* DEPRECATED: Internal function, do not use.
2137
*
2138
* Pops the top element name from the name stack
2139
*
2140
* Returns the name just removed
2141
*/
2142
const xmlChar *
2143
namePop(xmlParserCtxtPtr ctxt)
2144
{
2145
const xmlChar *ret;
2146
2147
if ((ctxt == NULL) || (ctxt->nameNr <= 0))
2148
return (NULL);
2149
ctxt->nameNr--;
2150
if (ctxt->nameNr > 0)
2151
ctxt->name = ctxt->nameTab[ctxt->nameNr - 1];
2152
else
2153
ctxt->name = NULL;
2154
ret = ctxt->nameTab[ctxt->nameNr];
2155
ctxt->nameTab[ctxt->nameNr] = NULL;
2156
return (ret);
2157
}
2158
2159
static int spacePush(xmlParserCtxtPtr ctxt, int val) {
2160
if (ctxt->spaceNr >= ctxt->spaceMax) {
2161
int *tmp;
2162
2163
ctxt->spaceMax *= 2;
2164
tmp = (int *) xmlRealloc(ctxt->spaceTab,
2165
ctxt->spaceMax * sizeof(ctxt->spaceTab[0]));
2166
if (tmp == NULL) {
2167
xmlErrMemory(ctxt, NULL);
2168
ctxt->spaceMax /=2;
2169
return(-1);
2170
}
2171
ctxt->spaceTab = tmp;
2172
}
2173
ctxt->spaceTab[ctxt->spaceNr] = val;
2174
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr];
2175
return(ctxt->spaceNr++);
2176
}
2177
2178
static int spacePop(xmlParserCtxtPtr ctxt) {
2179
int ret;
2180
if (ctxt->spaceNr <= 0) return(0);
2181
ctxt->spaceNr--;
2182
if (ctxt->spaceNr > 0)
2183
ctxt->space = &ctxt->spaceTab[ctxt->spaceNr - 1];
2184
else
2185
ctxt->space = &ctxt->spaceTab[0];
2186
ret = ctxt->spaceTab[ctxt->spaceNr];
2187
ctxt->spaceTab[ctxt->spaceNr] = -1;
2188
return(ret);
2189
}
2190
2191
/*
2192
* Macros for accessing the content. Those should be used only by the parser,
2193
* and not exported.
2194
*
2195
* Dirty macros, i.e. one often need to make assumption on the context to
2196
* use them
2197
*
2198
* CUR_PTR return the current pointer to the xmlChar to be parsed.
2199
* To be used with extreme caution since operations consuming
2200
* characters may move the input buffer to a different location !
2201
* CUR returns the current xmlChar value, i.e. a 8 bit value if compiled
2202
* This should be used internally by the parser
2203
* only to compare to ASCII values otherwise it would break when
2204
* running with UTF-8 encoding.
2205
* RAW same as CUR but in the input buffer, bypass any token
2206
* extraction that may have been done
2207
* NXT(n) returns the n'th next xmlChar. Same as CUR is should be used only
2208
* to compare on ASCII based substring.
2209
* SKIP(n) Skip n xmlChar, and must also be used only to skip ASCII defined
2210
* strings without newlines within the parser.
2211
* NEXT1(l) Skip 1 xmlChar, and must also be used only to skip 1 non-newline ASCII
2212
* defined char within the parser.
2213
* Clean macros, not dependent of an ASCII context, expect UTF-8 encoding
2214
*
2215
* NEXT Skip to the next character, this does the proper decoding
2216
* in UTF-8 mode. It also pop-up unfinished entities on the fly.
2217
* NEXTL(l) Skip the current unicode character of l xmlChars long.
2218
* CUR_CHAR(l) returns the current unicode character (int), set l
2219
* to the number of xmlChars used for the encoding [0-5].
2220
* CUR_SCHAR same but operate on a string instead of the context
2221
* COPY_BUF copy the current unicode char to the target buffer, increment
2222
* the index
2223
* GROW, SHRINK handling of input buffers
2224
*/
2225
2226
#define RAW (*ctxt->input->cur)
2227
#define CUR (*ctxt->input->cur)
2228
#define NXT(val) ctxt->input->cur[(val)]
2229
#define CUR_PTR ctxt->input->cur
2230
#define BASE_PTR ctxt->input->base
2231
2232
#define CMP4( s, c1, c2, c3, c4 ) \
2233
( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
2234
((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
2235
#define CMP5( s, c1, c2, c3, c4, c5 ) \
2236
( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
2237
#define CMP6( s, c1, c2, c3, c4, c5, c6 ) \
2238
( CMP5( s, c1, c2, c3, c4, c5 ) && ((unsigned char *) s)[ 5 ] == c6 )
2239
#define CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) \
2240
( CMP6( s, c1, c2, c3, c4, c5, c6 ) && ((unsigned char *) s)[ 6 ] == c7 )
2241
#define CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) \
2242
( CMP7( s, c1, c2, c3, c4, c5, c6, c7 ) && ((unsigned char *) s)[ 7 ] == c8 )
2243
#define CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) \
2244
( CMP8( s, c1, c2, c3, c4, c5, c6, c7, c8 ) && \
2245
((unsigned char *) s)[ 8 ] == c9 )
2246
#define CMP10( s, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10 ) \
2247
( CMP9( s, c1, c2, c3, c4, c5, c6, c7, c8, c9 ) && \
2248
((unsigned char *) s)[ 9 ] == c10 )
2249
2250
#define SKIP(val) do { \
2251
ctxt->input->cur += (val),ctxt->input->col+=(val); \
2252
if (*ctxt->input->cur == 0) \
2253
xmlParserGrow(ctxt); \
2254
} while (0)
2255
2256
#define SKIPL(val) do { \
2257
int skipl; \
2258
for(skipl=0; skipl<val; skipl++) { \
2259
if (*(ctxt->input->cur) == '\n') { \
2260
ctxt->input->line++; ctxt->input->col = 1; \
2261
} else ctxt->input->col++; \
2262
ctxt->input->cur++; \
2263
} \
2264
if (*ctxt->input->cur == 0) \
2265
xmlParserGrow(ctxt); \
2266
} while (0)
2267
2268
/* Don't shrink push parser buffer. */
2269
#define SHRINK \
2270
if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) && \
2271
(ctxt->input->cur - ctxt->input->base > 2 * INPUT_CHUNK) && \
2272
(ctxt->input->end - ctxt->input->cur < 2 * INPUT_CHUNK)) \
2273
xmlParserShrink(ctxt);
2274
2275
#define GROW if (ctxt->input->end - ctxt->input->cur < INPUT_CHUNK) \
2276
xmlParserGrow(ctxt);
2277
2278
#define SKIP_BLANKS xmlSkipBlankChars(ctxt)
2279
2280
#define NEXT xmlNextChar(ctxt)
2281
2282
#define NEXT1 { \
2283
ctxt->input->col++; \
2284
ctxt->input->cur++; \
2285
if (*ctxt->input->cur == 0) \
2286
xmlParserGrow(ctxt); \
2287
}
2288
2289
#define NEXTL(l) do { \
2290
if (*(ctxt->input->cur) == '\n') { \
2291
ctxt->input->line++; ctxt->input->col = 1; \
2292
} else ctxt->input->col++; \
2293
ctxt->input->cur += l; \
2294
} while (0)
2295
2296
#define CUR_CHAR(l) xmlCurrentChar(ctxt, &l)
2297
#define CUR_SCHAR(s, l) xmlStringCurrentChar(ctxt, s, &l)
2298
2299
#define COPY_BUF(b, i, v) \
2300
if (v < 0x80) b[i++] = v; \
2301
else i += xmlCopyCharMultiByte(&b[i],v)
2302
2303
/**
2304
* xmlSkipBlankChars:
2305
* @ctxt: the XML parser context
2306
*
2307
* DEPRECATED: Internal function, do not use.
2308
*
2309
* skip all blanks character found at that point in the input streams.
2310
* It pops up finished entities in the process if allowable at that point.
2311
*
2312
* Returns the number of space chars skipped
2313
*/
2314
2315
int
2316
xmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
2317
int res = 0;
2318
2319
/*
2320
* It's Okay to use CUR/NEXT here since all the blanks are on
2321
* the ASCII range.
2322
*/
2323
if (((ctxt->inputNr == 1) && (ctxt->instate != XML_PARSER_DTD)) ||
2324
(ctxt->instate == XML_PARSER_START)) {
2325
const xmlChar *cur;
2326
/*
2327
* if we are in the document content, go really fast
2328
*/
2329
cur = ctxt->input->cur;
2330
while (IS_BLANK_CH(*cur)) {
2331
if (*cur == '\n') {
2332
ctxt->input->line++; ctxt->input->col = 1;
2333
} else {
2334
ctxt->input->col++;
2335
}
2336
cur++;
2337
if (res < INT_MAX)
2338
res++;
2339
if (*cur == 0) {
2340
ctxt->input->cur = cur;
2341
xmlParserGrow(ctxt);
2342
cur = ctxt->input->cur;
2343
}
2344
}
2345
ctxt->input->cur = cur;
2346
} else {
2347
int expandPE = ((ctxt->external != 0) || (ctxt->inputNr != 1));
2348
2349
while (ctxt->instate != XML_PARSER_EOF) {
2350
if (IS_BLANK_CH(CUR)) { /* CHECKED tstblanks.xml */
2351
NEXT;
2352
} else if (CUR == '%') {
2353
/*
2354
* Need to handle support of entities branching here
2355
*/
2356
if ((expandPE == 0) || (IS_BLANK_CH(NXT(1))) || (NXT(1) == 0))
2357
break;
2358
xmlParsePEReference(ctxt);
2359
} else if (CUR == 0) {
2360
unsigned long consumed;
2361
xmlEntityPtr ent;
2362
2363
if (ctxt->inputNr <= 1)
2364
break;
2365
2366
consumed = ctxt->input->consumed;
2367
xmlSaturatedAddSizeT(&consumed,
2368
ctxt->input->cur - ctxt->input->base);
2369
2370
/*
2371
* Add to sizeentities when parsing an external entity
2372
* for the first time.
2373
*/
2374
ent = ctxt->input->entity;
2375
if ((ent->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
2376
((ent->flags & XML_ENT_PARSED) == 0)) {
2377
ent->flags |= XML_ENT_PARSED;
2378
2379
xmlSaturatedAdd(&ctxt->sizeentities, consumed);
2380
}
2381
2382
xmlParserEntityCheck(ctxt, consumed);
2383
2384
xmlPopInput(ctxt);
2385
} else {
2386
break;
2387
}
2388
2389
/*
2390
* Also increase the counter when entering or exiting a PERef.
2391
* The spec says: "When a parameter-entity reference is recognized
2392
* in the DTD and included, its replacement text MUST be enlarged
2393
* by the attachment of one leading and one following space (#x20)
2394
* character."
2395
*/
2396
if (res < INT_MAX)
2397
res++;
2398
}
2399
}
2400
return(res);
2401
}
2402
2403
/************************************************************************
2404
* *
2405
* Commodity functions to handle entities *
2406
* *
2407
************************************************************************/
2408
2409
/**
2410
* xmlPopInput:
2411
* @ctxt: an XML parser context
2412
*
2413
* xmlPopInput: the current input pointed by ctxt->input came to an end
2414
* pop it and return the next char.
2415
*
2416
* Returns the current xmlChar in the parser context
2417
*/
2418
xmlChar
2419
xmlPopInput(xmlParserCtxtPtr ctxt) {
2420
xmlParserInputPtr input;
2421
2422
if ((ctxt == NULL) || (ctxt->inputNr <= 1)) return(0);
2423
if (xmlParserDebugEntities)
2424
xmlGenericError(xmlGenericErrorContext,
2425
"Popping input %d\n", ctxt->inputNr);
2426
if ((ctxt->inputNr > 1) && (ctxt->inSubset == 0) &&
2427
(ctxt->instate != XML_PARSER_EOF))
2428
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
2429
"Unfinished entity outside the DTD");
2430
input = inputPop(ctxt);
2431
if (input->entity != NULL)
2432
input->entity->flags &= ~XML_ENT_EXPANDING;
2433
xmlFreeInputStream(input);
2434
if (*ctxt->input->cur == 0)
2435
xmlParserGrow(ctxt);
2436
return(CUR);
2437
}
2438
2439
/**
2440
* xmlPushInput:
2441
* @ctxt: an XML parser context
2442
* @input: an XML parser input fragment (entity, XML fragment ...).
2443
*
2444
* xmlPushInput: switch to a new input stream which is stacked on top
2445
* of the previous one(s).
2446
* Returns -1 in case of error or the index in the input stack
2447
*/
2448
int
2449
xmlPushInput(xmlParserCtxtPtr ctxt, xmlParserInputPtr input) {
2450
int ret;
2451
if (input == NULL) return(-1);
2452
2453
if (xmlParserDebugEntities) {
2454
if ((ctxt->input != NULL) && (ctxt->input->filename))
2455
xmlGenericError(xmlGenericErrorContext,
2456
"%s(%d): ", ctxt->input->filename,
2457
ctxt->input->line);
2458
xmlGenericError(xmlGenericErrorContext,
2459
"Pushing input %d : %.30s\n", ctxt->inputNr+1, input->cur);
2460
}
2461
if (((ctxt->inputNr > 40) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2462
(ctxt->inputNr > 100)) {
2463
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2464
while (ctxt->inputNr > 1)
2465
xmlFreeInputStream(inputPop(ctxt));
2466
return(-1);
2467
}
2468
ret = inputPush(ctxt, input);
2469
if (ctxt->instate == XML_PARSER_EOF)
2470
return(-1);
2471
GROW;
2472
return(ret);
2473
}
2474
2475
/**
2476
* xmlParseCharRef:
2477
* @ctxt: an XML parser context
2478
*
2479
* DEPRECATED: Internal function, don't use.
2480
*
2481
* Parse a numeric character reference. Always consumes '&'.
2482
*
2483
* [66] CharRef ::= '&#' [0-9]+ ';' |
2484
* '&#x' [0-9a-fA-F]+ ';'
2485
*
2486
* [ WFC: Legal Character ]
2487
* Characters referred to using character references must match the
2488
* production for Char.
2489
*
2490
* Returns the value parsed (as an int), 0 in case of error
2491
*/
2492
int
2493
xmlParseCharRef(xmlParserCtxtPtr ctxt) {
2494
int val = 0;
2495
int count = 0;
2496
2497
/*
2498
* Using RAW/CUR/NEXT is okay since we are working on ASCII range here
2499
*/
2500
if ((RAW == '&') && (NXT(1) == '#') &&
2501
(NXT(2) == 'x')) {
2502
SKIP(3);
2503
GROW;
2504
while (RAW != ';') { /* loop blocked by count */
2505
if (count++ > 20) {
2506
count = 0;
2507
GROW;
2508
if (ctxt->instate == XML_PARSER_EOF)
2509
return(0);
2510
}
2511
if ((RAW >= '0') && (RAW <= '9'))
2512
val = val * 16 + (CUR - '0');
2513
else if ((RAW >= 'a') && (RAW <= 'f') && (count < 20))
2514
val = val * 16 + (CUR - 'a') + 10;
2515
else if ((RAW >= 'A') && (RAW <= 'F') && (count < 20))
2516
val = val * 16 + (CUR - 'A') + 10;
2517
else {
2518
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2519
val = 0;
2520
break;
2521
}
2522
if (val > 0x110000)
2523
val = 0x110000;
2524
2525
NEXT;
2526
count++;
2527
}
2528
if (RAW == ';') {
2529
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
2530
ctxt->input->col++;
2531
ctxt->input->cur++;
2532
}
2533
} else if ((RAW == '&') && (NXT(1) == '#')) {
2534
SKIP(2);
2535
GROW;
2536
while (RAW != ';') { /* loop blocked by count */
2537
if (count++ > 20) {
2538
count = 0;
2539
GROW;
2540
if (ctxt->instate == XML_PARSER_EOF)
2541
return(0);
2542
}
2543
if ((RAW >= '0') && (RAW <= '9'))
2544
val = val * 10 + (CUR - '0');
2545
else {
2546
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2547
val = 0;
2548
break;
2549
}
2550
if (val > 0x110000)
2551
val = 0x110000;
2552
2553
NEXT;
2554
count++;
2555
}
2556
if (RAW == ';') {
2557
/* on purpose to avoid reentrancy problems with NEXT and SKIP */
2558
ctxt->input->col++;
2559
ctxt->input->cur++;
2560
}
2561
} else {
2562
if (RAW == '&')
2563
SKIP(1);
2564
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2565
}
2566
2567
/*
2568
* [ WFC: Legal Character ]
2569
* Characters referred to using character references must match the
2570
* production for Char.
2571
*/
2572
if (val >= 0x110000) {
2573
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2574
"xmlParseCharRef: character reference out of bounds\n",
2575
val);
2576
} else if (IS_CHAR(val)) {
2577
return(val);
2578
} else {
2579
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2580
"xmlParseCharRef: invalid xmlChar value %d\n",
2581
val);
2582
}
2583
return(0);
2584
}
2585
2586
/**
2587
* xmlParseStringCharRef:
2588
* @ctxt: an XML parser context
2589
* @str: a pointer to an index in the string
2590
*
2591
* parse Reference declarations, variant parsing from a string rather
2592
* than an an input flow.
2593
*
2594
* [66] CharRef ::= '&#' [0-9]+ ';' |
2595
* '&#x' [0-9a-fA-F]+ ';'
2596
*
2597
* [ WFC: Legal Character ]
2598
* Characters referred to using character references must match the
2599
* production for Char.
2600
*
2601
* Returns the value parsed (as an int), 0 in case of error, str will be
2602
* updated to the current value of the index
2603
*/
2604
static int
2605
xmlParseStringCharRef(xmlParserCtxtPtr ctxt, const xmlChar **str) {
2606
const xmlChar *ptr;
2607
xmlChar cur;
2608
int val = 0;
2609
2610
if ((str == NULL) || (*str == NULL)) return(0);
2611
ptr = *str;
2612
cur = *ptr;
2613
if ((cur == '&') && (ptr[1] == '#') && (ptr[2] == 'x')) {
2614
ptr += 3;
2615
cur = *ptr;
2616
while (cur != ';') { /* Non input consuming loop */
2617
if ((cur >= '0') && (cur <= '9'))
2618
val = val * 16 + (cur - '0');
2619
else if ((cur >= 'a') && (cur <= 'f'))
2620
val = val * 16 + (cur - 'a') + 10;
2621
else if ((cur >= 'A') && (cur <= 'F'))
2622
val = val * 16 + (cur - 'A') + 10;
2623
else {
2624
xmlFatalErr(ctxt, XML_ERR_INVALID_HEX_CHARREF, NULL);
2625
val = 0;
2626
break;
2627
}
2628
if (val > 0x110000)
2629
val = 0x110000;
2630
2631
ptr++;
2632
cur = *ptr;
2633
}
2634
if (cur == ';')
2635
ptr++;
2636
} else if ((cur == '&') && (ptr[1] == '#')){
2637
ptr += 2;
2638
cur = *ptr;
2639
while (cur != ';') { /* Non input consuming loops */
2640
if ((cur >= '0') && (cur <= '9'))
2641
val = val * 10 + (cur - '0');
2642
else {
2643
xmlFatalErr(ctxt, XML_ERR_INVALID_DEC_CHARREF, NULL);
2644
val = 0;
2645
break;
2646
}
2647
if (val > 0x110000)
2648
val = 0x110000;
2649
2650
ptr++;
2651
cur = *ptr;
2652
}
2653
if (cur == ';')
2654
ptr++;
2655
} else {
2656
xmlFatalErr(ctxt, XML_ERR_INVALID_CHARREF, NULL);
2657
return(0);
2658
}
2659
*str = ptr;
2660
2661
/*
2662
* [ WFC: Legal Character ]
2663
* Characters referred to using character references must match the
2664
* production for Char.
2665
*/
2666
if (val >= 0x110000) {
2667
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2668
"xmlParseStringCharRef: character reference out of bounds\n",
2669
val);
2670
} else if (IS_CHAR(val)) {
2671
return(val);
2672
} else {
2673
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
2674
"xmlParseStringCharRef: invalid xmlChar value %d\n",
2675
val);
2676
}
2677
return(0);
2678
}
2679
2680
/**
2681
* xmlParserHandlePEReference:
2682
* @ctxt: the parser context
2683
*
2684
* DEPRECATED: Internal function, do not use.
2685
*
2686
* [69] PEReference ::= '%' Name ';'
2687
*
2688
* [ WFC: No Recursion ]
2689
* A parsed entity must not contain a recursive
2690
* reference to itself, either directly or indirectly.
2691
*
2692
* [ WFC: Entity Declared ]
2693
* In a document without any DTD, a document with only an internal DTD
2694
* subset which contains no parameter entity references, or a document
2695
* with "standalone='yes'", ... ... The declaration of a parameter
2696
* entity must precede any reference to it...
2697
*
2698
* [ VC: Entity Declared ]
2699
* In a document with an external subset or external parameter entities
2700
* with "standalone='no'", ... ... The declaration of a parameter entity
2701
* must precede any reference to it...
2702
*
2703
* [ WFC: In DTD ]
2704
* Parameter-entity references may only appear in the DTD.
2705
* NOTE: misleading but this is handled.
2706
*
2707
* A PEReference may have been detected in the current input stream
2708
* the handling is done accordingly to
2709
* http://www.w3.org/TR/REC-xml#entproc
2710
* i.e.
2711
* - Included in literal in entity values
2712
* - Included as Parameter Entity reference within DTDs
2713
*/
2714
void
2715
xmlParserHandlePEReference(xmlParserCtxtPtr ctxt) {
2716
switch(ctxt->instate) {
2717
case XML_PARSER_CDATA_SECTION:
2718
return;
2719
case XML_PARSER_COMMENT:
2720
return;
2721
case XML_PARSER_START_TAG:
2722
return;
2723
case XML_PARSER_END_TAG:
2724
return;
2725
case XML_PARSER_EOF:
2726
xmlFatalErr(ctxt, XML_ERR_PEREF_AT_EOF, NULL);
2727
return;
2728
case XML_PARSER_PROLOG:
2729
case XML_PARSER_START:
2730
case XML_PARSER_XML_DECL:
2731
case XML_PARSER_MISC:
2732
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_PROLOG, NULL);
2733
return;
2734
case XML_PARSER_ENTITY_DECL:
2735
case XML_PARSER_CONTENT:
2736
case XML_PARSER_ATTRIBUTE_VALUE:
2737
case XML_PARSER_PI:
2738
case XML_PARSER_SYSTEM_LITERAL:
2739
case XML_PARSER_PUBLIC_LITERAL:
2740
/* we just ignore it there */
2741
return;
2742
case XML_PARSER_EPILOG:
2743
xmlFatalErr(ctxt, XML_ERR_PEREF_IN_EPILOG, NULL);
2744
return;
2745
case XML_PARSER_ENTITY_VALUE:
2746
/*
2747
* NOTE: in the case of entity values, we don't do the
2748
* substitution here since we need the literal
2749
* entity value to be able to save the internal
2750
* subset of the document.
2751
* This will be handled by xmlStringDecodeEntities
2752
*/
2753
return;
2754
case XML_PARSER_DTD:
2755
/*
2756
* [WFC: Well-Formedness Constraint: PEs in Internal Subset]
2757
* In the internal DTD subset, parameter-entity references
2758
* can occur only where markup declarations can occur, not
2759
* within markup declarations.
2760
* In that case this is handled in xmlParseMarkupDecl
2761
*/
2762
if ((ctxt->external == 0) && (ctxt->inputNr == 1))
2763
return;
2764
if (IS_BLANK_CH(NXT(1)) || NXT(1) == 0)
2765
return;
2766
break;
2767
case XML_PARSER_IGNORE:
2768
return;
2769
}
2770
2771
xmlParsePEReference(ctxt);
2772
}
2773
2774
/*
2775
* Macro used to grow the current buffer.
2776
* buffer##_size is expected to be a size_t
2777
* mem_error: is expected to handle memory allocation failures
2778
*/
2779
#define growBuffer(buffer, n) { \
2780
xmlChar *tmp; \
2781
size_t new_size = buffer##_size * 2 + n; \
2782
if (new_size < buffer##_size) goto mem_error; \
2783
tmp = (xmlChar *) xmlRealloc(buffer, new_size); \
2784
if (tmp == NULL) goto mem_error; \
2785
buffer = tmp; \
2786
buffer##_size = new_size; \
2787
}
2788
2789
/**
2790
* xmlStringDecodeEntitiesInt:
2791
* @ctxt: the parser context
2792
* @str: the input string
2793
* @len: the string length
2794
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
2795
* @end: an end marker xmlChar, 0 if none
2796
* @end2: an end marker xmlChar, 0 if none
2797
* @end3: an end marker xmlChar, 0 if none
2798
* @check: whether to perform entity checks
2799
*/
2800
static xmlChar *
2801
xmlStringDecodeEntitiesInt(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
2802
int what, xmlChar end, xmlChar end2, xmlChar end3,
2803
int check) {
2804
xmlChar *buffer = NULL;
2805
size_t buffer_size = 0;
2806
size_t nbchars = 0;
2807
2808
xmlChar *current = NULL;
2809
xmlChar *rep = NULL;
2810
const xmlChar *last;
2811
xmlEntityPtr ent;
2812
int c,l;
2813
2814
if (str == NULL)
2815
return(NULL);
2816
last = str + len;
2817
2818
if (((ctxt->depth > 40) &&
2819
((ctxt->options & XML_PARSE_HUGE) == 0)) ||
2820
(ctxt->depth > 100)) {
2821
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_LOOP,
2822
"Maximum entity nesting depth exceeded");
2823
return(NULL);
2824
}
2825
2826
/*
2827
* allocate a translation buffer.
2828
*/
2829
buffer_size = XML_PARSER_BIG_BUFFER_SIZE;
2830
buffer = (xmlChar *) xmlMallocAtomic(buffer_size);
2831
if (buffer == NULL) goto mem_error;
2832
2833
/*
2834
* OK loop until we reach one of the ending char or a size limit.
2835
* we are operating on already parsed values.
2836
*/
2837
if (str < last)
2838
c = CUR_SCHAR(str, l);
2839
else
2840
c = 0;
2841
while ((c != 0) && (c != end) && /* non input consuming loop */
2842
(c != end2) && (c != end3) &&
2843
(ctxt->instate != XML_PARSER_EOF)) {
2844
2845
if (c == 0) break;
2846
if ((c == '&') && (str[1] == '#')) {
2847
int val = xmlParseStringCharRef(ctxt, &str);
2848
if (val == 0)
2849
goto int_error;
2850
COPY_BUF(buffer, nbchars, val);
2851
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2852
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2853
}
2854
} else if ((c == '&') && (what & XML_SUBSTITUTE_REF)) {
2855
if (xmlParserDebugEntities)
2856
xmlGenericError(xmlGenericErrorContext,
2857
"String decoding Entity Reference: %.30s\n",
2858
str);
2859
ent = xmlParseStringEntityRef(ctxt, &str);
2860
if ((ent != NULL) &&
2861
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
2862
if (ent->content != NULL) {
2863
COPY_BUF(buffer, nbchars, ent->content[0]);
2864
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2865
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2866
}
2867
} else {
2868
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
2869
"predefined entity has no content\n");
2870
goto int_error;
2871
}
2872
} else if ((ent != NULL) && (ent->content != NULL)) {
2873
if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2874
goto int_error;
2875
2876
if (ent->flags & XML_ENT_EXPANDING) {
2877
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2878
xmlHaltParser(ctxt);
2879
ent->content[0] = 0;
2880
goto int_error;
2881
}
2882
2883
ent->flags |= XML_ENT_EXPANDING;
2884
ctxt->depth++;
2885
rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2886
ent->length, what, 0, 0, 0, check);
2887
ctxt->depth--;
2888
ent->flags &= ~XML_ENT_EXPANDING;
2889
2890
if (rep == NULL) {
2891
ent->content[0] = 0;
2892
goto int_error;
2893
}
2894
2895
current = rep;
2896
while (*current != 0) { /* non input consuming loop */
2897
buffer[nbchars++] = *current++;
2898
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2899
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2900
}
2901
}
2902
xmlFree(rep);
2903
rep = NULL;
2904
} else if (ent != NULL) {
2905
int i = xmlStrlen(ent->name);
2906
const xmlChar *cur = ent->name;
2907
2908
buffer[nbchars++] = '&';
2909
if (nbchars + i + XML_PARSER_BUFFER_SIZE > buffer_size) {
2910
growBuffer(buffer, i + XML_PARSER_BUFFER_SIZE);
2911
}
2912
for (;i > 0;i--)
2913
buffer[nbchars++] = *cur++;
2914
buffer[nbchars++] = ';';
2915
}
2916
} else if (c == '%' && (what & XML_SUBSTITUTE_PEREF)) {
2917
if (xmlParserDebugEntities)
2918
xmlGenericError(xmlGenericErrorContext,
2919
"String decoding PE Reference: %.30s\n", str);
2920
ent = xmlParseStringPEReference(ctxt, &str);
2921
if (ent != NULL) {
2922
if (ent->content == NULL) {
2923
/*
2924
* Note: external parsed entities will not be loaded,
2925
* it is not required for a non-validating parser to
2926
* complete external PEReferences coming from the
2927
* internal subset
2928
*/
2929
if (((ctxt->options & XML_PARSE_NOENT) != 0) ||
2930
((ctxt->options & XML_PARSE_DTDVALID) != 0) ||
2931
(ctxt->validate != 0)) {
2932
xmlLoadEntityContent(ctxt, ent);
2933
} else {
2934
xmlWarningMsg(ctxt, XML_ERR_ENTITY_PROCESSING,
2935
"not validating will not read content for PE entity %s\n",
2936
ent->name, NULL);
2937
}
2938
}
2939
2940
if ((check) && (xmlParserEntityCheck(ctxt, ent->length)))
2941
goto int_error;
2942
2943
if (ent->flags & XML_ENT_EXPANDING) {
2944
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
2945
xmlHaltParser(ctxt);
2946
if (ent->content != NULL)
2947
ent->content[0] = 0;
2948
goto int_error;
2949
}
2950
2951
ent->flags |= XML_ENT_EXPANDING;
2952
ctxt->depth++;
2953
rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
2954
ent->length, what, 0, 0, 0, check);
2955
ctxt->depth--;
2956
ent->flags &= ~XML_ENT_EXPANDING;
2957
2958
if (rep == NULL) {
2959
if (ent->content != NULL)
2960
ent->content[0] = 0;
2961
goto int_error;
2962
}
2963
current = rep;
2964
while (*current != 0) { /* non input consuming loop */
2965
buffer[nbchars++] = *current++;
2966
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2967
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2968
}
2969
}
2970
xmlFree(rep);
2971
rep = NULL;
2972
}
2973
} else {
2974
COPY_BUF(buffer, nbchars, c);
2975
str += l;
2976
if (nbchars + XML_PARSER_BUFFER_SIZE > buffer_size) {
2977
growBuffer(buffer, XML_PARSER_BUFFER_SIZE);
2978
}
2979
}
2980
if (str < last)
2981
c = CUR_SCHAR(str, l);
2982
else
2983
c = 0;
2984
}
2985
buffer[nbchars] = 0;
2986
return(buffer);
2987
2988
mem_error:
2989
xmlErrMemory(ctxt, NULL);
2990
int_error:
2991
if (rep != NULL)
2992
xmlFree(rep);
2993
if (buffer != NULL)
2994
xmlFree(buffer);
2995
return(NULL);
2996
}
2997
2998
/**
2999
* xmlStringLenDecodeEntities:
3000
* @ctxt: the parser context
3001
* @str: the input string
3002
* @len: the string length
3003
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
3004
* @end: an end marker xmlChar, 0 if none
3005
* @end2: an end marker xmlChar, 0 if none
3006
* @end3: an end marker xmlChar, 0 if none
3007
*
3008
* DEPRECATED: Internal function, don't use.
3009
*
3010
* Takes a entity string content and process to do the adequate substitutions.
3011
*
3012
* [67] Reference ::= EntityRef | CharRef
3013
*
3014
* [69] PEReference ::= '%' Name ';'
3015
*
3016
* Returns A newly allocated string with the substitution done. The caller
3017
* must deallocate it !
3018
*/
3019
xmlChar *
3020
xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
3021
int what, xmlChar end, xmlChar end2,
3022
xmlChar end3) {
3023
if ((ctxt == NULL) || (str == NULL) || (len < 0))
3024
return(NULL);
3025
return(xmlStringDecodeEntitiesInt(ctxt, str, len, what,
3026
end, end2, end3, 0));
3027
}
3028
3029
/**
3030
* xmlStringDecodeEntities:
3031
* @ctxt: the parser context
3032
* @str: the input string
3033
* @what: combination of XML_SUBSTITUTE_REF and XML_SUBSTITUTE_PEREF
3034
* @end: an end marker xmlChar, 0 if none
3035
* @end2: an end marker xmlChar, 0 if none
3036
* @end3: an end marker xmlChar, 0 if none
3037
*
3038
* DEPRECATED: Internal function, don't use.
3039
*
3040
* Takes a entity string content and process to do the adequate substitutions.
3041
*
3042
* [67] Reference ::= EntityRef | CharRef
3043
*
3044
* [69] PEReference ::= '%' Name ';'
3045
*
3046
* Returns A newly allocated string with the substitution done. The caller
3047
* must deallocate it !
3048
*/
3049
xmlChar *
3050
xmlStringDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int what,
3051
xmlChar end, xmlChar end2, xmlChar end3) {
3052
if ((ctxt == NULL) || (str == NULL)) return(NULL);
3053
return(xmlStringDecodeEntitiesInt(ctxt, str, xmlStrlen(str), what,
3054
end, end2, end3, 0));
3055
}
3056
3057
/************************************************************************
3058
* *
3059
* Commodity functions, cleanup needed ? *
3060
* *
3061
************************************************************************/
3062
3063
/**
3064
* areBlanks:
3065
* @ctxt: an XML parser context
3066
* @str: a xmlChar *
3067
* @len: the size of @str
3068
* @blank_chars: we know the chars are blanks
3069
*
3070
* Is this a sequence of blank chars that one can ignore ?
3071
*
3072
* Returns 1 if ignorable 0 otherwise.
3073
*/
3074
3075
static int areBlanks(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
3076
int blank_chars) {
3077
int i, ret;
3078
xmlNodePtr lastChild;
3079
3080
/*
3081
* Don't spend time trying to differentiate them, the same callback is
3082
* used !
3083
*/
3084
if (ctxt->sax->ignorableWhitespace == ctxt->sax->characters)
3085
return(0);
3086
3087
/*
3088
* Check for xml:space value.
3089
*/
3090
if ((ctxt->space == NULL) || (*(ctxt->space) == 1) ||
3091
(*(ctxt->space) == -2))
3092
return(0);
3093
3094
/*
3095
* Check that the string is made of blanks
3096
*/
3097
if (blank_chars == 0) {
3098
for (i = 0;i < len;i++)
3099
if (!(IS_BLANK_CH(str[i]))) return(0);
3100
}
3101
3102
/*
3103
* Look if the element is mixed content in the DTD if available
3104
*/
3105
if (ctxt->node == NULL) return(0);
3106
if (ctxt->myDoc != NULL) {
3107
ret = xmlIsMixedElement(ctxt->myDoc, ctxt->node->name);
3108
if (ret == 0) return(1);
3109
if (ret == 1) return(0);
3110
}
3111
3112
/*
3113
* Otherwise, heuristic :-\
3114
*/
3115
if ((RAW != '<') && (RAW != 0xD)) return(0);
3116
if ((ctxt->node->children == NULL) &&
3117
(RAW == '<') && (NXT(1) == '/')) return(0);
3118
3119
lastChild = xmlGetLastChild(ctxt->node);
3120
if (lastChild == NULL) {
3121
if ((ctxt->node->type != XML_ELEMENT_NODE) &&
3122
(ctxt->node->content != NULL)) return(0);
3123
} else if (xmlNodeIsText(lastChild))
3124
return(0);
3125
else if ((ctxt->node->children != NULL) &&
3126
(xmlNodeIsText(ctxt->node->children)))
3127
return(0);
3128
return(1);
3129
}
3130
3131
/************************************************************************
3132
* *
3133
* Extra stuff for namespace support *
3134
* Relates to http://www.w3.org/TR/WD-xml-names *
3135
* *
3136
************************************************************************/
3137
3138
/**
3139
* xmlSplitQName:
3140
* @ctxt: an XML parser context
3141
* @name: an XML parser context
3142
* @prefix: a xmlChar **
3143
*
3144
* parse an UTF8 encoded XML qualified name string
3145
*
3146
* [NS 5] QName ::= (Prefix ':')? LocalPart
3147
*
3148
* [NS 6] Prefix ::= NCName
3149
*
3150
* [NS 7] LocalPart ::= NCName
3151
*
3152
* Returns the local part, and prefix is updated
3153
* to get the Prefix if any.
3154
*/
3155
3156
xmlChar *
3157
xmlSplitQName(xmlParserCtxtPtr ctxt, const xmlChar *name, xmlChar **prefix) {
3158
xmlChar buf[XML_MAX_NAMELEN + 5];
3159
xmlChar *buffer = NULL;
3160
int len = 0;
3161
int max = XML_MAX_NAMELEN;
3162
xmlChar *ret = NULL;
3163
const xmlChar *cur = name;
3164
int c;
3165
3166
if (prefix == NULL) return(NULL);
3167
*prefix = NULL;
3168
3169
if (cur == NULL) return(NULL);
3170
3171
#ifndef XML_XML_NAMESPACE
3172
/* xml: prefix is not really a namespace */
3173
if ((cur[0] == 'x') && (cur[1] == 'm') &&
3174
(cur[2] == 'l') && (cur[3] == ':'))
3175
return(xmlStrdup(name));
3176
#endif
3177
3178
/* nasty but well=formed */
3179
if (cur[0] == ':')
3180
return(xmlStrdup(name));
3181
3182
c = *cur++;
3183
while ((c != 0) && (c != ':') && (len < max)) { /* tested bigname.xml */
3184
buf[len++] = c;
3185
c = *cur++;
3186
}
3187
if (len >= max) {
3188
/*
3189
* Okay someone managed to make a huge name, so he's ready to pay
3190
* for the processing speed.
3191
*/
3192
max = len * 2;
3193
3194
buffer = (xmlChar *) xmlMallocAtomic(max);
3195
if (buffer == NULL) {
3196
xmlErrMemory(ctxt, NULL);
3197
return(NULL);
3198
}
3199
memcpy(buffer, buf, len);
3200
while ((c != 0) && (c != ':')) { /* tested bigname.xml */
3201
if (len + 10 > max) {
3202
xmlChar *tmp;
3203
3204
max *= 2;
3205
tmp = (xmlChar *) xmlRealloc(buffer, max);
3206
if (tmp == NULL) {
3207
xmlFree(buffer);
3208
xmlErrMemory(ctxt, NULL);
3209
return(NULL);
3210
}
3211
buffer = tmp;
3212
}
3213
buffer[len++] = c;
3214
c = *cur++;
3215
}
3216
buffer[len] = 0;
3217
}
3218
3219
if ((c == ':') && (*cur == 0)) {
3220
if (buffer != NULL)
3221
xmlFree(buffer);
3222
*prefix = NULL;
3223
return(xmlStrdup(name));
3224
}
3225
3226
if (buffer == NULL)
3227
ret = xmlStrndup(buf, len);
3228
else {
3229
ret = buffer;
3230
buffer = NULL;
3231
max = XML_MAX_NAMELEN;
3232
}
3233
3234
3235
if (c == ':') {
3236
c = *cur;
3237
*prefix = ret;
3238
if (c == 0) {
3239
return(xmlStrndup(BAD_CAST "", 0));
3240
}
3241
len = 0;
3242
3243
/*
3244
* Check that the first character is proper to start
3245
* a new name
3246
*/
3247
if (!(((c >= 0x61) && (c <= 0x7A)) ||
3248
((c >= 0x41) && (c <= 0x5A)) ||
3249
(c == '_') || (c == ':'))) {
3250
int l;
3251
int first = CUR_SCHAR(cur, l);
3252
3253
if (!IS_LETTER(first) && (first != '_')) {
3254
xmlFatalErrMsgStr(ctxt, XML_NS_ERR_QNAME,
3255
"Name %s is not XML Namespace compliant\n",
3256
name);
3257
}
3258
}
3259
cur++;
3260
3261
while ((c != 0) && (len < max)) { /* tested bigname2.xml */
3262
buf[len++] = c;
3263
c = *cur++;
3264
}
3265
if (len >= max) {
3266
/*
3267
* Okay someone managed to make a huge name, so he's ready to pay
3268
* for the processing speed.
3269
*/
3270
max = len * 2;
3271
3272
buffer = (xmlChar *) xmlMallocAtomic(max);
3273
if (buffer == NULL) {
3274
xmlErrMemory(ctxt, NULL);
3275
return(NULL);
3276
}
3277
memcpy(buffer, buf, len);
3278
while (c != 0) { /* tested bigname2.xml */
3279
if (len + 10 > max) {
3280
xmlChar *tmp;
3281
3282
max *= 2;
3283
tmp = (xmlChar *) xmlRealloc(buffer, max);
3284
if (tmp == NULL) {
3285
xmlErrMemory(ctxt, NULL);
3286
xmlFree(buffer);
3287
return(NULL);
3288
}
3289
buffer = tmp;
3290
}
3291
buffer[len++] = c;
3292
c = *cur++;
3293
}
3294
buffer[len] = 0;
3295
}
3296
3297
if (buffer == NULL)
3298
ret = xmlStrndup(buf, len);
3299
else {
3300
ret = buffer;
3301
}
3302
}
3303
3304
return(ret);
3305
}
3306
3307
/************************************************************************
3308
* *
3309
* The parser itself *
3310
* Relates to http://www.w3.org/TR/REC-xml *
3311
* *
3312
************************************************************************/
3313
3314
/************************************************************************
3315
* *
3316
* Routines to parse Name, NCName and NmToken *
3317
* *
3318
************************************************************************/
3319
3320
/*
3321
* The two following functions are related to the change of accepted
3322
* characters for Name and NmToken in the Revision 5 of XML-1.0
3323
* They correspond to the modified production [4] and the new production [4a]
3324
* changes in that revision. Also note that the macros used for the
3325
* productions Letter, Digit, CombiningChar and Extender are not needed
3326
* anymore.
3327
* We still keep compatibility to pre-revision5 parsing semantic if the
3328
* new XML_PARSE_OLD10 option is given to the parser.
3329
*/
3330
static int
3331
xmlIsNameStartChar(xmlParserCtxtPtr ctxt, int c) {
3332
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3333
/*
3334
* Use the new checks of production [4] [4a] amd [5] of the
3335
* Update 5 of XML-1.0
3336
*/
3337
if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3338
(((c >= 'a') && (c <= 'z')) ||
3339
((c >= 'A') && (c <= 'Z')) ||
3340
(c == '_') || (c == ':') ||
3341
((c >= 0xC0) && (c <= 0xD6)) ||
3342
((c >= 0xD8) && (c <= 0xF6)) ||
3343
((c >= 0xF8) && (c <= 0x2FF)) ||
3344
((c >= 0x370) && (c <= 0x37D)) ||
3345
((c >= 0x37F) && (c <= 0x1FFF)) ||
3346
((c >= 0x200C) && (c <= 0x200D)) ||
3347
((c >= 0x2070) && (c <= 0x218F)) ||
3348
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3349
((c >= 0x3001) && (c <= 0xD7FF)) ||
3350
((c >= 0xF900) && (c <= 0xFDCF)) ||
3351
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3352
((c >= 0x10000) && (c <= 0xEFFFF))))
3353
return(1);
3354
} else {
3355
if (IS_LETTER(c) || (c == '_') || (c == ':'))
3356
return(1);
3357
}
3358
return(0);
3359
}
3360
3361
static int
3362
xmlIsNameChar(xmlParserCtxtPtr ctxt, int c) {
3363
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3364
/*
3365
* Use the new checks of production [4] [4a] amd [5] of the
3366
* Update 5 of XML-1.0
3367
*/
3368
if ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3369
(((c >= 'a') && (c <= 'z')) ||
3370
((c >= 'A') && (c <= 'Z')) ||
3371
((c >= '0') && (c <= '9')) || /* !start */
3372
(c == '_') || (c == ':') ||
3373
(c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3374
((c >= 0xC0) && (c <= 0xD6)) ||
3375
((c >= 0xD8) && (c <= 0xF6)) ||
3376
((c >= 0xF8) && (c <= 0x2FF)) ||
3377
((c >= 0x300) && (c <= 0x36F)) || /* !start */
3378
((c >= 0x370) && (c <= 0x37D)) ||
3379
((c >= 0x37F) && (c <= 0x1FFF)) ||
3380
((c >= 0x200C) && (c <= 0x200D)) ||
3381
((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3382
((c >= 0x2070) && (c <= 0x218F)) ||
3383
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3384
((c >= 0x3001) && (c <= 0xD7FF)) ||
3385
((c >= 0xF900) && (c <= 0xFDCF)) ||
3386
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3387
((c >= 0x10000) && (c <= 0xEFFFF))))
3388
return(1);
3389
} else {
3390
if ((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3391
(c == '.') || (c == '-') ||
3392
(c == '_') || (c == ':') ||
3393
(IS_COMBINING(c)) ||
3394
(IS_EXTENDER(c)))
3395
return(1);
3396
}
3397
return(0);
3398
}
3399
3400
static xmlChar * xmlParseAttValueInternal(xmlParserCtxtPtr ctxt,
3401
int *len, int *alloc, int normalize);
3402
3403
static const xmlChar *
3404
xmlParseNameComplex(xmlParserCtxtPtr ctxt) {
3405
int len = 0, l;
3406
int c;
3407
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3408
XML_MAX_TEXT_LENGTH :
3409
XML_MAX_NAME_LENGTH;
3410
3411
/*
3412
* Handler for more complex cases
3413
*/
3414
c = CUR_CHAR(l);
3415
if ((ctxt->options & XML_PARSE_OLD10) == 0) {
3416
/*
3417
* Use the new checks of production [4] [4a] amd [5] of the
3418
* Update 5 of XML-1.0
3419
*/
3420
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3421
(!(((c >= 'a') && (c <= 'z')) ||
3422
((c >= 'A') && (c <= 'Z')) ||
3423
(c == '_') || (c == ':') ||
3424
((c >= 0xC0) && (c <= 0xD6)) ||
3425
((c >= 0xD8) && (c <= 0xF6)) ||
3426
((c >= 0xF8) && (c <= 0x2FF)) ||
3427
((c >= 0x370) && (c <= 0x37D)) ||
3428
((c >= 0x37F) && (c <= 0x1FFF)) ||
3429
((c >= 0x200C) && (c <= 0x200D)) ||
3430
((c >= 0x2070) && (c <= 0x218F)) ||
3431
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3432
((c >= 0x3001) && (c <= 0xD7FF)) ||
3433
((c >= 0xF900) && (c <= 0xFDCF)) ||
3434
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3435
((c >= 0x10000) && (c <= 0xEFFFF))))) {
3436
return(NULL);
3437
}
3438
len += l;
3439
NEXTL(l);
3440
c = CUR_CHAR(l);
3441
while ((c != ' ') && (c != '>') && (c != '/') && /* accelerators */
3442
(((c >= 'a') && (c <= 'z')) ||
3443
((c >= 'A') && (c <= 'Z')) ||
3444
((c >= '0') && (c <= '9')) || /* !start */
3445
(c == '_') || (c == ':') ||
3446
(c == '-') || (c == '.') || (c == 0xB7) || /* !start */
3447
((c >= 0xC0) && (c <= 0xD6)) ||
3448
((c >= 0xD8) && (c <= 0xF6)) ||
3449
((c >= 0xF8) && (c <= 0x2FF)) ||
3450
((c >= 0x300) && (c <= 0x36F)) || /* !start */
3451
((c >= 0x370) && (c <= 0x37D)) ||
3452
((c >= 0x37F) && (c <= 0x1FFF)) ||
3453
((c >= 0x200C) && (c <= 0x200D)) ||
3454
((c >= 0x203F) && (c <= 0x2040)) || /* !start */
3455
((c >= 0x2070) && (c <= 0x218F)) ||
3456
((c >= 0x2C00) && (c <= 0x2FEF)) ||
3457
((c >= 0x3001) && (c <= 0xD7FF)) ||
3458
((c >= 0xF900) && (c <= 0xFDCF)) ||
3459
((c >= 0xFDF0) && (c <= 0xFFFD)) ||
3460
((c >= 0x10000) && (c <= 0xEFFFF))
3461
)) {
3462
if (len <= INT_MAX - l)
3463
len += l;
3464
NEXTL(l);
3465
c = CUR_CHAR(l);
3466
}
3467
} else {
3468
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3469
(!IS_LETTER(c) && (c != '_') &&
3470
(c != ':'))) {
3471
return(NULL);
3472
}
3473
len += l;
3474
NEXTL(l);
3475
c = CUR_CHAR(l);
3476
3477
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3478
((IS_LETTER(c)) || (IS_DIGIT(c)) ||
3479
(c == '.') || (c == '-') ||
3480
(c == '_') || (c == ':') ||
3481
(IS_COMBINING(c)) ||
3482
(IS_EXTENDER(c)))) {
3483
if (len <= INT_MAX - l)
3484
len += l;
3485
NEXTL(l);
3486
c = CUR_CHAR(l);
3487
}
3488
}
3489
if (ctxt->instate == XML_PARSER_EOF)
3490
return(NULL);
3491
if (len > maxLength) {
3492
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3493
return(NULL);
3494
}
3495
if (ctxt->input->cur - ctxt->input->base < len) {
3496
/*
3497
* There were a couple of bugs where PERefs lead to to a change
3498
* of the buffer. Check the buffer size to avoid passing an invalid
3499
* pointer to xmlDictLookup.
3500
*/
3501
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
3502
"unexpected change of input buffer");
3503
return (NULL);
3504
}
3505
if ((*ctxt->input->cur == '\n') && (ctxt->input->cur[-1] == '\r'))
3506
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - (len + 1), len));
3507
return(xmlDictLookup(ctxt->dict, ctxt->input->cur - len, len));
3508
}
3509
3510
/**
3511
* xmlParseName:
3512
* @ctxt: an XML parser context
3513
*
3514
* DEPRECATED: Internal function, don't use.
3515
*
3516
* parse an XML name.
3517
*
3518
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3519
* CombiningChar | Extender
3520
*
3521
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
3522
*
3523
* [6] Names ::= Name (#x20 Name)*
3524
*
3525
* Returns the Name parsed or NULL
3526
*/
3527
3528
const xmlChar *
3529
xmlParseName(xmlParserCtxtPtr ctxt) {
3530
const xmlChar *in;
3531
const xmlChar *ret;
3532
size_t count = 0;
3533
size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3534
XML_MAX_TEXT_LENGTH :
3535
XML_MAX_NAME_LENGTH;
3536
3537
GROW;
3538
if (ctxt->instate == XML_PARSER_EOF)
3539
return(NULL);
3540
3541
/*
3542
* Accelerator for simple ASCII names
3543
*/
3544
in = ctxt->input->cur;
3545
if (((*in >= 0x61) && (*in <= 0x7A)) ||
3546
((*in >= 0x41) && (*in <= 0x5A)) ||
3547
(*in == '_') || (*in == ':')) {
3548
in++;
3549
while (((*in >= 0x61) && (*in <= 0x7A)) ||
3550
((*in >= 0x41) && (*in <= 0x5A)) ||
3551
((*in >= 0x30) && (*in <= 0x39)) ||
3552
(*in == '_') || (*in == '-') ||
3553
(*in == ':') || (*in == '.'))
3554
in++;
3555
if ((*in > 0) && (*in < 0x80)) {
3556
count = in - ctxt->input->cur;
3557
if (count > maxLength) {
3558
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Name");
3559
return(NULL);
3560
}
3561
ret = xmlDictLookup(ctxt->dict, ctxt->input->cur, count);
3562
ctxt->input->cur = in;
3563
ctxt->input->col += count;
3564
if (ret == NULL)
3565
xmlErrMemory(ctxt, NULL);
3566
return(ret);
3567
}
3568
}
3569
/* accelerator for special cases */
3570
return(xmlParseNameComplex(ctxt));
3571
}
3572
3573
static xmlHashedString
3574
xmlParseNCNameComplex(xmlParserCtxtPtr ctxt) {
3575
xmlHashedString ret;
3576
int len = 0, l;
3577
int c;
3578
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3579
XML_MAX_TEXT_LENGTH :
3580
XML_MAX_NAME_LENGTH;
3581
size_t startPosition = 0;
3582
3583
ret.name = NULL;
3584
ret.hashValue = 0;
3585
3586
/*
3587
* Handler for more complex cases
3588
*/
3589
startPosition = CUR_PTR - BASE_PTR;
3590
c = CUR_CHAR(l);
3591
if ((c == ' ') || (c == '>') || (c == '/') || /* accelerators */
3592
(!xmlIsNameStartChar(ctxt, c) || (c == ':'))) {
3593
return(ret);
3594
}
3595
3596
while ((c != ' ') && (c != '>') && (c != '/') && /* test bigname.xml */
3597
(xmlIsNameChar(ctxt, c) && (c != ':'))) {
3598
if (len <= INT_MAX - l)
3599
len += l;
3600
NEXTL(l);
3601
c = CUR_CHAR(l);
3602
}
3603
if (ctxt->instate == XML_PARSER_EOF)
3604
return(ret);
3605
if (len > maxLength) {
3606
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3607
return(ret);
3608
}
3609
ret = xmlDictLookupHashed(ctxt->dict, (BASE_PTR + startPosition), len);
3610
return(ret);
3611
}
3612
3613
/**
3614
* xmlParseNCName:
3615
* @ctxt: an XML parser context
3616
* @len: length of the string parsed
3617
*
3618
* parse an XML name.
3619
*
3620
* [4NS] NCNameChar ::= Letter | Digit | '.' | '-' | '_' |
3621
* CombiningChar | Extender
3622
*
3623
* [5NS] NCName ::= (Letter | '_') (NCNameChar)*
3624
*
3625
* Returns the Name parsed or NULL
3626
*/
3627
3628
static xmlHashedString
3629
xmlParseNCName(xmlParserCtxtPtr ctxt) {
3630
const xmlChar *in, *e;
3631
xmlHashedString ret;
3632
size_t count = 0;
3633
size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3634
XML_MAX_TEXT_LENGTH :
3635
XML_MAX_NAME_LENGTH;
3636
3637
ret.name = NULL;
3638
3639
/*
3640
* Accelerator for simple ASCII names
3641
*/
3642
in = ctxt->input->cur;
3643
e = ctxt->input->end;
3644
if ((((*in >= 0x61) && (*in <= 0x7A)) ||
3645
((*in >= 0x41) && (*in <= 0x5A)) ||
3646
(*in == '_')) && (in < e)) {
3647
in++;
3648
while ((((*in >= 0x61) && (*in <= 0x7A)) ||
3649
((*in >= 0x41) && (*in <= 0x5A)) ||
3650
((*in >= 0x30) && (*in <= 0x39)) ||
3651
(*in == '_') || (*in == '-') ||
3652
(*in == '.')) && (in < e))
3653
in++;
3654
if (in >= e)
3655
goto complex;
3656
if ((*in > 0) && (*in < 0x80)) {
3657
count = in - ctxt->input->cur;
3658
if (count > maxLength) {
3659
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3660
return(ret);
3661
}
3662
ret = xmlDictLookupHashed(ctxt->dict, ctxt->input->cur, count);
3663
ctxt->input->cur = in;
3664
ctxt->input->col += count;
3665
if (ret.name == NULL) {
3666
xmlErrMemory(ctxt, NULL);
3667
}
3668
return(ret);
3669
}
3670
}
3671
complex:
3672
return(xmlParseNCNameComplex(ctxt));
3673
}
3674
3675
/**
3676
* xmlParseNameAndCompare:
3677
* @ctxt: an XML parser context
3678
*
3679
* parse an XML name and compares for match
3680
* (specialized for endtag parsing)
3681
*
3682
* Returns NULL for an illegal name, (xmlChar*) 1 for success
3683
* and the name for mismatch
3684
*/
3685
3686
static const xmlChar *
3687
xmlParseNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *other) {
3688
register const xmlChar *cmp = other;
3689
register const xmlChar *in;
3690
const xmlChar *ret;
3691
3692
GROW;
3693
if (ctxt->instate == XML_PARSER_EOF)
3694
return(NULL);
3695
3696
in = ctxt->input->cur;
3697
while (*in != 0 && *in == *cmp) {
3698
++in;
3699
++cmp;
3700
}
3701
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
3702
/* success */
3703
ctxt->input->col += in - ctxt->input->cur;
3704
ctxt->input->cur = in;
3705
return (const xmlChar*) 1;
3706
}
3707
/* failure (or end of input buffer), check with full function */
3708
ret = xmlParseName (ctxt);
3709
/* strings coming from the dictionary direct compare possible */
3710
if (ret == other) {
3711
return (const xmlChar*) 1;
3712
}
3713
return ret;
3714
}
3715
3716
/**
3717
* xmlParseStringName:
3718
* @ctxt: an XML parser context
3719
* @str: a pointer to the string pointer (IN/OUT)
3720
*
3721
* parse an XML name.
3722
*
3723
* [4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
3724
* CombiningChar | Extender
3725
*
3726
* [5] Name ::= (Letter | '_' | ':') (NameChar)*
3727
*
3728
* [6] Names ::= Name (#x20 Name)*
3729
*
3730
* Returns the Name parsed or NULL. The @str pointer
3731
* is updated to the current location in the string.
3732
*/
3733
3734
static xmlChar *
3735
xmlParseStringName(xmlParserCtxtPtr ctxt, const xmlChar** str) {
3736
xmlChar buf[XML_MAX_NAMELEN + 5];
3737
const xmlChar *cur = *str;
3738
int len = 0, l;
3739
int c;
3740
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3741
XML_MAX_TEXT_LENGTH :
3742
XML_MAX_NAME_LENGTH;
3743
3744
c = CUR_SCHAR(cur, l);
3745
if (!xmlIsNameStartChar(ctxt, c)) {
3746
return(NULL);
3747
}
3748
3749
COPY_BUF(buf, len, c);
3750
cur += l;
3751
c = CUR_SCHAR(cur, l);
3752
while (xmlIsNameChar(ctxt, c)) {
3753
COPY_BUF(buf, len, c);
3754
cur += l;
3755
c = CUR_SCHAR(cur, l);
3756
if (len >= XML_MAX_NAMELEN) { /* test bigentname.xml */
3757
/*
3758
* Okay someone managed to make a huge name, so he's ready to pay
3759
* for the processing speed.
3760
*/
3761
xmlChar *buffer;
3762
int max = len * 2;
3763
3764
buffer = (xmlChar *) xmlMallocAtomic(max);
3765
if (buffer == NULL) {
3766
xmlErrMemory(ctxt, NULL);
3767
return(NULL);
3768
}
3769
memcpy(buffer, buf, len);
3770
while (xmlIsNameChar(ctxt, c)) {
3771
if (len + 10 > max) {
3772
xmlChar *tmp;
3773
3774
max *= 2;
3775
tmp = (xmlChar *) xmlRealloc(buffer, max);
3776
if (tmp == NULL) {
3777
xmlErrMemory(ctxt, NULL);
3778
xmlFree(buffer);
3779
return(NULL);
3780
}
3781
buffer = tmp;
3782
}
3783
COPY_BUF(buffer, len, c);
3784
cur += l;
3785
c = CUR_SCHAR(cur, l);
3786
if (len > maxLength) {
3787
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3788
xmlFree(buffer);
3789
return(NULL);
3790
}
3791
}
3792
buffer[len] = 0;
3793
*str = cur;
3794
return(buffer);
3795
}
3796
}
3797
if (len > maxLength) {
3798
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NCName");
3799
return(NULL);
3800
}
3801
*str = cur;
3802
return(xmlStrndup(buf, len));
3803
}
3804
3805
/**
3806
* xmlParseNmtoken:
3807
* @ctxt: an XML parser context
3808
*
3809
* DEPRECATED: Internal function, don't use.
3810
*
3811
* parse an XML Nmtoken.
3812
*
3813
* [7] Nmtoken ::= (NameChar)+
3814
*
3815
* [8] Nmtokens ::= Nmtoken (#x20 Nmtoken)*
3816
*
3817
* Returns the Nmtoken parsed or NULL
3818
*/
3819
3820
xmlChar *
3821
xmlParseNmtoken(xmlParserCtxtPtr ctxt) {
3822
xmlChar buf[XML_MAX_NAMELEN + 5];
3823
int len = 0, l;
3824
int c;
3825
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3826
XML_MAX_TEXT_LENGTH :
3827
XML_MAX_NAME_LENGTH;
3828
3829
c = CUR_CHAR(l);
3830
3831
while (xmlIsNameChar(ctxt, c)) {
3832
COPY_BUF(buf, len, c);
3833
NEXTL(l);
3834
c = CUR_CHAR(l);
3835
if (len >= XML_MAX_NAMELEN) {
3836
/*
3837
* Okay someone managed to make a huge token, so he's ready to pay
3838
* for the processing speed.
3839
*/
3840
xmlChar *buffer;
3841
int max = len * 2;
3842
3843
buffer = (xmlChar *) xmlMallocAtomic(max);
3844
if (buffer == NULL) {
3845
xmlErrMemory(ctxt, NULL);
3846
return(NULL);
3847
}
3848
memcpy(buffer, buf, len);
3849
while (xmlIsNameChar(ctxt, c)) {
3850
if (len + 10 > max) {
3851
xmlChar *tmp;
3852
3853
max *= 2;
3854
tmp = (xmlChar *) xmlRealloc(buffer, max);
3855
if (tmp == NULL) {
3856
xmlErrMemory(ctxt, NULL);
3857
xmlFree(buffer);
3858
return(NULL);
3859
}
3860
buffer = tmp;
3861
}
3862
COPY_BUF(buffer, len, c);
3863
if (len > maxLength) {
3864
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3865
xmlFree(buffer);
3866
return(NULL);
3867
}
3868
NEXTL(l);
3869
c = CUR_CHAR(l);
3870
}
3871
buffer[len] = 0;
3872
if (ctxt->instate == XML_PARSER_EOF) {
3873
xmlFree(buffer);
3874
return(NULL);
3875
}
3876
return(buffer);
3877
}
3878
}
3879
if (ctxt->instate == XML_PARSER_EOF)
3880
return(NULL);
3881
if (len == 0)
3882
return(NULL);
3883
if (len > maxLength) {
3884
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "NmToken");
3885
return(NULL);
3886
}
3887
return(xmlStrndup(buf, len));
3888
}
3889
3890
/**
3891
* xmlParseEntityValue:
3892
* @ctxt: an XML parser context
3893
* @orig: if non-NULL store a copy of the original entity value
3894
*
3895
* DEPRECATED: Internal function, don't use.
3896
*
3897
* parse a value for ENTITY declarations
3898
*
3899
* [9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' |
3900
* "'" ([^%&'] | PEReference | Reference)* "'"
3901
*
3902
* Returns the EntityValue parsed with reference substituted or NULL
3903
*/
3904
3905
xmlChar *
3906
xmlParseEntityValue(xmlParserCtxtPtr ctxt, xmlChar **orig) {
3907
xmlChar *buf = NULL;
3908
int len = 0;
3909
int size = XML_PARSER_BUFFER_SIZE;
3910
int c, l;
3911
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
3912
XML_MAX_HUGE_LENGTH :
3913
XML_MAX_TEXT_LENGTH;
3914
xmlChar stop;
3915
xmlChar *ret = NULL;
3916
const xmlChar *cur = NULL;
3917
xmlParserInputPtr input;
3918
3919
if (RAW == '"') stop = '"';
3920
else if (RAW == '\'') stop = '\'';
3921
else {
3922
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_STARTED, NULL);
3923
return(NULL);
3924
}
3925
buf = (xmlChar *) xmlMallocAtomic(size);
3926
if (buf == NULL) {
3927
xmlErrMemory(ctxt, NULL);
3928
return(NULL);
3929
}
3930
3931
/*
3932
* The content of the entity definition is copied in a buffer.
3933
*/
3934
3935
ctxt->instate = XML_PARSER_ENTITY_VALUE;
3936
input = ctxt->input;
3937
GROW;
3938
if (ctxt->instate == XML_PARSER_EOF)
3939
goto error;
3940
NEXT;
3941
c = CUR_CHAR(l);
3942
/*
3943
* NOTE: 4.4.5 Included in Literal
3944
* When a parameter entity reference appears in a literal entity
3945
* value, ... a single or double quote character in the replacement
3946
* text is always treated as a normal data character and will not
3947
* terminate the literal.
3948
* In practice it means we stop the loop only when back at parsing
3949
* the initial entity and the quote is found
3950
*/
3951
while (((IS_CHAR(c)) && ((c != stop) || /* checked */
3952
(ctxt->input != input))) && (ctxt->instate != XML_PARSER_EOF)) {
3953
if (len + 5 >= size) {
3954
xmlChar *tmp;
3955
3956
size *= 2;
3957
tmp = (xmlChar *) xmlRealloc(buf, size);
3958
if (tmp == NULL) {
3959
xmlErrMemory(ctxt, NULL);
3960
goto error;
3961
}
3962
buf = tmp;
3963
}
3964
COPY_BUF(buf, len, c);
3965
NEXTL(l);
3966
3967
GROW;
3968
c = CUR_CHAR(l);
3969
if (c == 0) {
3970
GROW;
3971
c = CUR_CHAR(l);
3972
}
3973
3974
if (len > maxLength) {
3975
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
3976
"entity value too long\n");
3977
goto error;
3978
}
3979
}
3980
buf[len] = 0;
3981
if (ctxt->instate == XML_PARSER_EOF)
3982
goto error;
3983
if (c != stop) {
3984
xmlFatalErr(ctxt, XML_ERR_ENTITY_NOT_FINISHED, NULL);
3985
goto error;
3986
}
3987
NEXT;
3988
3989
/*
3990
* Raise problem w.r.t. '&' and '%' being used in non-entities
3991
* reference constructs. Note Charref will be handled in
3992
* xmlStringDecodeEntities()
3993
*/
3994
cur = buf;
3995
while (*cur != 0) { /* non input consuming */
3996
if ((*cur == '%') || ((*cur == '&') && (cur[1] != '#'))) {
3997
xmlChar *name;
3998
xmlChar tmp = *cur;
3999
int nameOk = 0;
4000
4001
cur++;
4002
name = xmlParseStringName(ctxt, &cur);
4003
if (name != NULL) {
4004
nameOk = 1;
4005
xmlFree(name);
4006
}
4007
if ((nameOk == 0) || (*cur != ';')) {
4008
xmlFatalErrMsgInt(ctxt, XML_ERR_ENTITY_CHAR_ERROR,
4009
"EntityValue: '%c' forbidden except for entities references\n",
4010
tmp);
4011
goto error;
4012
}
4013
if ((tmp == '%') && (ctxt->inSubset == 1) &&
4014
(ctxt->inputNr == 1)) {
4015
xmlFatalErr(ctxt, XML_ERR_ENTITY_PE_INTERNAL, NULL);
4016
goto error;
4017
}
4018
if (*cur == 0)
4019
break;
4020
}
4021
cur++;
4022
}
4023
4024
/*
4025
* Then PEReference entities are substituted.
4026
*
4027
* NOTE: 4.4.7 Bypassed
4028
* When a general entity reference appears in the EntityValue in
4029
* an entity declaration, it is bypassed and left as is.
4030
* so XML_SUBSTITUTE_REF is not set here.
4031
*/
4032
++ctxt->depth;
4033
ret = xmlStringDecodeEntitiesInt(ctxt, buf, len, XML_SUBSTITUTE_PEREF,
4034
0, 0, 0, /* check */ 1);
4035
--ctxt->depth;
4036
4037
if (orig != NULL) {
4038
*orig = buf;
4039
buf = NULL;
4040
}
4041
4042
error:
4043
if (buf != NULL)
4044
xmlFree(buf);
4045
return(ret);
4046
}
4047
4048
/**
4049
* xmlParseAttValueComplex:
4050
* @ctxt: an XML parser context
4051
* @len: the resulting attribute len
4052
* @normalize: whether to apply the inner normalization
4053
*
4054
* parse a value for an attribute, this is the fallback function
4055
* of xmlParseAttValue() when the attribute parsing requires handling
4056
* of non-ASCII characters, or normalization compaction.
4057
*
4058
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4059
*/
4060
static xmlChar *
4061
xmlParseAttValueComplex(xmlParserCtxtPtr ctxt, int *attlen, int normalize) {
4062
xmlChar limit = 0;
4063
xmlChar *buf = NULL;
4064
xmlChar *rep = NULL;
4065
size_t len = 0;
4066
size_t buf_size = 0;
4067
size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4068
XML_MAX_HUGE_LENGTH :
4069
XML_MAX_TEXT_LENGTH;
4070
int c, l, in_space = 0;
4071
xmlChar *current = NULL;
4072
xmlEntityPtr ent;
4073
4074
if (NXT(0) == '"') {
4075
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4076
limit = '"';
4077
NEXT;
4078
} else if (NXT(0) == '\'') {
4079
limit = '\'';
4080
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
4081
NEXT;
4082
} else {
4083
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
4084
return(NULL);
4085
}
4086
4087
/*
4088
* allocate a translation buffer.
4089
*/
4090
buf_size = XML_PARSER_BUFFER_SIZE;
4091
buf = (xmlChar *) xmlMallocAtomic(buf_size);
4092
if (buf == NULL) goto mem_error;
4093
4094
/*
4095
* OK loop until we reach one of the ending char or a size limit.
4096
*/
4097
c = CUR_CHAR(l);
4098
while (((NXT(0) != limit) && /* checked */
4099
(IS_CHAR(c)) && (c != '<')) &&
4100
(ctxt->instate != XML_PARSER_EOF)) {
4101
if (c == '&') {
4102
in_space = 0;
4103
if (NXT(1) == '#') {
4104
int val = xmlParseCharRef(ctxt);
4105
4106
if (val == '&') {
4107
if (ctxt->replaceEntities) {
4108
if (len + 10 > buf_size) {
4109
growBuffer(buf, 10);
4110
}
4111
buf[len++] = '&';
4112
} else {
4113
/*
4114
* The reparsing will be done in xmlStringGetNodeList()
4115
* called by the attribute() function in SAX.c
4116
*/
4117
if (len + 10 > buf_size) {
4118
growBuffer(buf, 10);
4119
}
4120
buf[len++] = '&';
4121
buf[len++] = '#';
4122
buf[len++] = '3';
4123
buf[len++] = '8';
4124
buf[len++] = ';';
4125
}
4126
} else if (val != 0) {
4127
if (len + 10 > buf_size) {
4128
growBuffer(buf, 10);
4129
}
4130
len += xmlCopyChar(0, &buf[len], val);
4131
}
4132
} else {
4133
ent = xmlParseEntityRef(ctxt);
4134
if ((ent != NULL) &&
4135
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
4136
if (len + 10 > buf_size) {
4137
growBuffer(buf, 10);
4138
}
4139
if ((ctxt->replaceEntities == 0) &&
4140
(ent->content[0] == '&')) {
4141
buf[len++] = '&';
4142
buf[len++] = '#';
4143
buf[len++] = '3';
4144
buf[len++] = '8';
4145
buf[len++] = ';';
4146
} else {
4147
buf[len++] = ent->content[0];
4148
}
4149
} else if ((ent != NULL) &&
4150
(ctxt->replaceEntities != 0)) {
4151
if (ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
4152
if (xmlParserEntityCheck(ctxt, ent->length))
4153
goto error;
4154
4155
++ctxt->depth;
4156
rep = xmlStringDecodeEntitiesInt(ctxt, ent->content,
4157
ent->length, XML_SUBSTITUTE_REF, 0, 0, 0,
4158
/* check */ 1);
4159
--ctxt->depth;
4160
if (rep != NULL) {
4161
current = rep;
4162
while (*current != 0) { /* non input consuming */
4163
if ((*current == 0xD) || (*current == 0xA) ||
4164
(*current == 0x9)) {
4165
buf[len++] = 0x20;
4166
current++;
4167
} else
4168
buf[len++] = *current++;
4169
if (len + 10 > buf_size) {
4170
growBuffer(buf, 10);
4171
}
4172
}
4173
xmlFree(rep);
4174
rep = NULL;
4175
}
4176
} else {
4177
if (len + 10 > buf_size) {
4178
growBuffer(buf, 10);
4179
}
4180
if (ent->content != NULL)
4181
buf[len++] = ent->content[0];
4182
}
4183
} else if (ent != NULL) {
4184
int i = xmlStrlen(ent->name);
4185
const xmlChar *cur = ent->name;
4186
4187
/*
4188
* We also check for recursion and amplification
4189
* when entities are not substituted. They're
4190
* often expanded later.
4191
*/
4192
if ((ent->etype != XML_INTERNAL_PREDEFINED_ENTITY) &&
4193
(ent->content != NULL)) {
4194
if ((ent->flags & XML_ENT_CHECKED) == 0) {
4195
unsigned long oldCopy = ctxt->sizeentcopy;
4196
4197
ctxt->sizeentcopy = ent->length;
4198
4199
++ctxt->depth;
4200
rep = xmlStringDecodeEntitiesInt(ctxt,
4201
ent->content, ent->length,
4202
XML_SUBSTITUTE_REF, 0, 0, 0,
4203
/* check */ 1);
4204
--ctxt->depth;
4205
4206
/*
4207
* If we're parsing DTD content, the entity
4208
* might reference other entities which
4209
* weren't defined yet, so the check isn't
4210
* reliable.
4211
*/
4212
if (ctxt->inSubset == 0) {
4213
ent->flags |= XML_ENT_CHECKED;
4214
ent->expandedSize = ctxt->sizeentcopy;
4215
}
4216
4217
if (rep != NULL) {
4218
xmlFree(rep);
4219
rep = NULL;
4220
} else {
4221
ent->content[0] = 0;
4222
}
4223
4224
if (xmlParserEntityCheck(ctxt, oldCopy))
4225
goto error;
4226
} else {
4227
if (xmlParserEntityCheck(ctxt, ent->expandedSize))
4228
goto error;
4229
}
4230
}
4231
4232
/*
4233
* Just output the reference
4234
*/
4235
buf[len++] = '&';
4236
while (len + i + 10 > buf_size) {
4237
growBuffer(buf, i + 10);
4238
}
4239
for (;i > 0;i--)
4240
buf[len++] = *cur++;
4241
buf[len++] = ';';
4242
}
4243
}
4244
} else {
4245
if ((c == 0x20) || (c == 0xD) || (c == 0xA) || (c == 0x9)) {
4246
if ((len != 0) || (!normalize)) {
4247
if ((!normalize) || (!in_space)) {
4248
COPY_BUF(buf, len, 0x20);
4249
while (len + 10 > buf_size) {
4250
growBuffer(buf, 10);
4251
}
4252
}
4253
in_space = 1;
4254
}
4255
} else {
4256
in_space = 0;
4257
COPY_BUF(buf, len, c);
4258
if (len + 10 > buf_size) {
4259
growBuffer(buf, 10);
4260
}
4261
}
4262
NEXTL(l);
4263
}
4264
GROW;
4265
c = CUR_CHAR(l);
4266
if (len > maxLength) {
4267
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4268
"AttValue length too long\n");
4269
goto mem_error;
4270
}
4271
}
4272
if (ctxt->instate == XML_PARSER_EOF)
4273
goto error;
4274
4275
if ((in_space) && (normalize)) {
4276
while ((len > 0) && (buf[len - 1] == 0x20)) len--;
4277
}
4278
buf[len] = 0;
4279
if (RAW == '<') {
4280
xmlFatalErr(ctxt, XML_ERR_LT_IN_ATTRIBUTE, NULL);
4281
} else if (RAW != limit) {
4282
if ((c != 0) && (!IS_CHAR(c))) {
4283
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
4284
"invalid character in attribute value\n");
4285
} else {
4286
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
4287
"AttValue: ' expected\n");
4288
}
4289
} else
4290
NEXT;
4291
4292
if (attlen != NULL) *attlen = len;
4293
return(buf);
4294
4295
mem_error:
4296
xmlErrMemory(ctxt, NULL);
4297
error:
4298
if (buf != NULL)
4299
xmlFree(buf);
4300
if (rep != NULL)
4301
xmlFree(rep);
4302
return(NULL);
4303
}
4304
4305
/**
4306
* xmlParseAttValue:
4307
* @ctxt: an XML parser context
4308
*
4309
* DEPRECATED: Internal function, don't use.
4310
*
4311
* parse a value for an attribute
4312
* Note: the parser won't do substitution of entities here, this
4313
* will be handled later in xmlStringGetNodeList
4314
*
4315
* [10] AttValue ::= '"' ([^<&"] | Reference)* '"' |
4316
* "'" ([^<&'] | Reference)* "'"
4317
*
4318
* 3.3.3 Attribute-Value Normalization:
4319
* Before the value of an attribute is passed to the application or
4320
* checked for validity, the XML processor must normalize it as follows:
4321
* - a character reference is processed by appending the referenced
4322
* character to the attribute value
4323
* - an entity reference is processed by recursively processing the
4324
* replacement text of the entity
4325
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
4326
* appending #x20 to the normalized value, except that only a single
4327
* #x20 is appended for a "#xD#xA" sequence that is part of an external
4328
* parsed entity or the literal entity value of an internal parsed entity
4329
* - other characters are processed by appending them to the normalized value
4330
* If the declared value is not CDATA, then the XML processor must further
4331
* process the normalized attribute value by discarding any leading and
4332
* trailing space (#x20) characters, and by replacing sequences of space
4333
* (#x20) characters by a single space (#x20) character.
4334
* All attributes for which no declaration has been read should be treated
4335
* by a non-validating parser as if declared CDATA.
4336
*
4337
* Returns the AttValue parsed or NULL. The value has to be freed by the caller.
4338
*/
4339
4340
4341
xmlChar *
4342
xmlParseAttValue(xmlParserCtxtPtr ctxt) {
4343
if ((ctxt == NULL) || (ctxt->input == NULL)) return(NULL);
4344
return(xmlParseAttValueInternal(ctxt, NULL, NULL, 0));
4345
}
4346
4347
/**
4348
* xmlParseSystemLiteral:
4349
* @ctxt: an XML parser context
4350
*
4351
* DEPRECATED: Internal function, don't use.
4352
*
4353
* parse an XML Literal
4354
*
4355
* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'")
4356
*
4357
* Returns the SystemLiteral parsed or NULL
4358
*/
4359
4360
xmlChar *
4361
xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
4362
xmlChar *buf = NULL;
4363
int len = 0;
4364
int size = XML_PARSER_BUFFER_SIZE;
4365
int cur, l;
4366
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4367
XML_MAX_TEXT_LENGTH :
4368
XML_MAX_NAME_LENGTH;
4369
xmlChar stop;
4370
int state = ctxt->instate;
4371
4372
if (RAW == '"') {
4373
NEXT;
4374
stop = '"';
4375
} else if (RAW == '\'') {
4376
NEXT;
4377
stop = '\'';
4378
} else {
4379
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4380
return(NULL);
4381
}
4382
4383
buf = (xmlChar *) xmlMallocAtomic(size);
4384
if (buf == NULL) {
4385
xmlErrMemory(ctxt, NULL);
4386
return(NULL);
4387
}
4388
ctxt->instate = XML_PARSER_SYSTEM_LITERAL;
4389
cur = CUR_CHAR(l);
4390
while ((IS_CHAR(cur)) && (cur != stop)) { /* checked */
4391
if (len + 5 >= size) {
4392
xmlChar *tmp;
4393
4394
size *= 2;
4395
tmp = (xmlChar *) xmlRealloc(buf, size);
4396
if (tmp == NULL) {
4397
xmlFree(buf);
4398
xmlErrMemory(ctxt, NULL);
4399
ctxt->instate = (xmlParserInputState) state;
4400
return(NULL);
4401
}
4402
buf = tmp;
4403
}
4404
COPY_BUF(buf, len, cur);
4405
if (len > maxLength) {
4406
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "SystemLiteral");
4407
xmlFree(buf);
4408
ctxt->instate = (xmlParserInputState) state;
4409
return(NULL);
4410
}
4411
NEXTL(l);
4412
cur = CUR_CHAR(l);
4413
}
4414
buf[len] = 0;
4415
if (ctxt->instate == XML_PARSER_EOF) {
4416
xmlFree(buf);
4417
return(NULL);
4418
}
4419
ctxt->instate = (xmlParserInputState) state;
4420
if (!IS_CHAR(cur)) {
4421
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4422
} else {
4423
NEXT;
4424
}
4425
return(buf);
4426
}
4427
4428
/**
4429
* xmlParsePubidLiteral:
4430
* @ctxt: an XML parser context
4431
*
4432
* DEPRECATED: Internal function, don't use.
4433
*
4434
* parse an XML public literal
4435
*
4436
* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
4437
*
4438
* Returns the PubidLiteral parsed or NULL.
4439
*/
4440
4441
xmlChar *
4442
xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
4443
xmlChar *buf = NULL;
4444
int len = 0;
4445
int size = XML_PARSER_BUFFER_SIZE;
4446
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4447
XML_MAX_TEXT_LENGTH :
4448
XML_MAX_NAME_LENGTH;
4449
xmlChar cur;
4450
xmlChar stop;
4451
xmlParserInputState oldstate = ctxt->instate;
4452
4453
if (RAW == '"') {
4454
NEXT;
4455
stop = '"';
4456
} else if (RAW == '\'') {
4457
NEXT;
4458
stop = '\'';
4459
} else {
4460
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_STARTED, NULL);
4461
return(NULL);
4462
}
4463
buf = (xmlChar *) xmlMallocAtomic(size);
4464
if (buf == NULL) {
4465
xmlErrMemory(ctxt, NULL);
4466
return(NULL);
4467
}
4468
ctxt->instate = XML_PARSER_PUBLIC_LITERAL;
4469
cur = CUR;
4470
while ((IS_PUBIDCHAR_CH(cur)) && (cur != stop)) { /* checked */
4471
if (len + 1 >= size) {
4472
xmlChar *tmp;
4473
4474
size *= 2;
4475
tmp = (xmlChar *) xmlRealloc(buf, size);
4476
if (tmp == NULL) {
4477
xmlErrMemory(ctxt, NULL);
4478
xmlFree(buf);
4479
return(NULL);
4480
}
4481
buf = tmp;
4482
}
4483
buf[len++] = cur;
4484
if (len > maxLength) {
4485
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "Public ID");
4486
xmlFree(buf);
4487
return(NULL);
4488
}
4489
NEXT;
4490
cur = CUR;
4491
}
4492
buf[len] = 0;
4493
if (ctxt->instate == XML_PARSER_EOF) {
4494
xmlFree(buf);
4495
return(NULL);
4496
}
4497
if (cur != stop) {
4498
xmlFatalErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED, NULL);
4499
} else {
4500
NEXTL(1);
4501
}
4502
ctxt->instate = oldstate;
4503
return(buf);
4504
}
4505
4506
static void xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial);
4507
4508
/*
4509
* used for the test in the inner loop of the char data testing
4510
*/
4511
static const unsigned char test_char_data[256] = {
4512
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4513
0x00, 0x09, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0x9, CR/LF separated */
4514
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4515
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4516
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x00, 0x27, /* & */
4517
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
4518
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
4519
0x38, 0x39, 0x3A, 0x3B, 0x00, 0x3D, 0x3E, 0x3F, /* < */
4520
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
4521
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
4522
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
4523
0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x00, 0x5E, 0x5F, /* ] */
4524
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
4525
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
4526
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
4527
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
4528
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* non-ascii */
4529
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4530
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4531
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4532
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4533
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4534
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4535
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4536
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4537
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4538
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4539
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4540
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4541
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4542
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
4543
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
4544
};
4545
4546
/**
4547
* xmlParseCharDataInternal:
4548
* @ctxt: an XML parser context
4549
* @partial: buffer may contain partial UTF-8 sequences
4550
*
4551
* Parse character data. Always makes progress if the first char isn't
4552
* '<' or '&'.
4553
*
4554
* The right angle bracket (>) may be represented using the string "&gt;",
4555
* and must, for compatibility, be escaped using "&gt;" or a character
4556
* reference when it appears in the string "]]>" in content, when that
4557
* string is not marking the end of a CDATA section.
4558
*
4559
* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*)
4560
*/
4561
static void
4562
xmlParseCharDataInternal(xmlParserCtxtPtr ctxt, int partial) {
4563
const xmlChar *in;
4564
int nbchar = 0;
4565
int line = ctxt->input->line;
4566
int col = ctxt->input->col;
4567
int ccol;
4568
4569
GROW;
4570
/*
4571
* Accelerated common case where input don't need to be
4572
* modified before passing it to the handler.
4573
*/
4574
in = ctxt->input->cur;
4575
do {
4576
get_more_space:
4577
while (*in == 0x20) { in++; ctxt->input->col++; }
4578
if (*in == 0xA) {
4579
do {
4580
ctxt->input->line++; ctxt->input->col = 1;
4581
in++;
4582
} while (*in == 0xA);
4583
goto get_more_space;
4584
}
4585
if (*in == '<') {
4586
nbchar = in - ctxt->input->cur;
4587
if (nbchar > 0) {
4588
const xmlChar *tmp = ctxt->input->cur;
4589
ctxt->input->cur = in;
4590
4591
if ((ctxt->sax != NULL) &&
4592
(ctxt->disableSAX == 0) &&
4593
(ctxt->sax->ignorableWhitespace !=
4594
ctxt->sax->characters)) {
4595
if (areBlanks(ctxt, tmp, nbchar, 1)) {
4596
if (ctxt->sax->ignorableWhitespace != NULL)
4597
ctxt->sax->ignorableWhitespace(ctxt->userData,
4598
tmp, nbchar);
4599
} else {
4600
if (ctxt->sax->characters != NULL)
4601
ctxt->sax->characters(ctxt->userData,
4602
tmp, nbchar);
4603
if (*ctxt->space == -1)
4604
*ctxt->space = -2;
4605
}
4606
} else if ((ctxt->sax != NULL) &&
4607
(ctxt->disableSAX == 0) &&
4608
(ctxt->sax->characters != NULL)) {
4609
ctxt->sax->characters(ctxt->userData,
4610
tmp, nbchar);
4611
}
4612
}
4613
return;
4614
}
4615
4616
get_more:
4617
ccol = ctxt->input->col;
4618
while (test_char_data[*in]) {
4619
in++;
4620
ccol++;
4621
}
4622
ctxt->input->col = ccol;
4623
if (*in == 0xA) {
4624
do {
4625
ctxt->input->line++; ctxt->input->col = 1;
4626
in++;
4627
} while (*in == 0xA);
4628
goto get_more;
4629
}
4630
if (*in == ']') {
4631
if ((in[1] == ']') && (in[2] == '>')) {
4632
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4633
if (ctxt->instate != XML_PARSER_EOF)
4634
ctxt->input->cur = in + 1;
4635
return;
4636
}
4637
in++;
4638
ctxt->input->col++;
4639
goto get_more;
4640
}
4641
nbchar = in - ctxt->input->cur;
4642
if (nbchar > 0) {
4643
if ((ctxt->sax != NULL) &&
4644
(ctxt->disableSAX == 0) &&
4645
(ctxt->sax->ignorableWhitespace !=
4646
ctxt->sax->characters) &&
4647
(IS_BLANK_CH(*ctxt->input->cur))) {
4648
const xmlChar *tmp = ctxt->input->cur;
4649
ctxt->input->cur = in;
4650
4651
if (areBlanks(ctxt, tmp, nbchar, 0)) {
4652
if (ctxt->sax->ignorableWhitespace != NULL)
4653
ctxt->sax->ignorableWhitespace(ctxt->userData,
4654
tmp, nbchar);
4655
} else {
4656
if (ctxt->sax->characters != NULL)
4657
ctxt->sax->characters(ctxt->userData,
4658
tmp, nbchar);
4659
if (*ctxt->space == -1)
4660
*ctxt->space = -2;
4661
}
4662
line = ctxt->input->line;
4663
col = ctxt->input->col;
4664
} else if ((ctxt->sax != NULL) &&
4665
(ctxt->disableSAX == 0)) {
4666
if (ctxt->sax->characters != NULL)
4667
ctxt->sax->characters(ctxt->userData,
4668
ctxt->input->cur, nbchar);
4669
line = ctxt->input->line;
4670
col = ctxt->input->col;
4671
}
4672
if (ctxt->instate == XML_PARSER_EOF)
4673
return;
4674
}
4675
ctxt->input->cur = in;
4676
if (*in == 0xD) {
4677
in++;
4678
if (*in == 0xA) {
4679
ctxt->input->cur = in;
4680
in++;
4681
ctxt->input->line++; ctxt->input->col = 1;
4682
continue; /* while */
4683
}
4684
in--;
4685
}
4686
if (*in == '<') {
4687
return;
4688
}
4689
if (*in == '&') {
4690
return;
4691
}
4692
SHRINK;
4693
GROW;
4694
if (ctxt->instate == XML_PARSER_EOF)
4695
return;
4696
in = ctxt->input->cur;
4697
} while (((*in >= 0x20) && (*in <= 0x7F)) ||
4698
(*in == 0x09) || (*in == 0x0a));
4699
ctxt->input->line = line;
4700
ctxt->input->col = col;
4701
xmlParseCharDataComplex(ctxt, partial);
4702
}
4703
4704
/**
4705
* xmlParseCharDataComplex:
4706
* @ctxt: an XML parser context
4707
* @cdata: int indicating whether we are within a CDATA section
4708
*
4709
* Always makes progress if the first char isn't '<' or '&'.
4710
*
4711
* parse a CharData section.this is the fallback function
4712
* of xmlParseCharData() when the parsing requires handling
4713
* of non-ASCII characters.
4714
*/
4715
static void
4716
xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int partial) {
4717
xmlChar buf[XML_PARSER_BIG_BUFFER_SIZE + 5];
4718
int nbchar = 0;
4719
int cur, l;
4720
4721
cur = CUR_CHAR(l);
4722
while ((cur != '<') && /* checked */
4723
(cur != '&') &&
4724
(IS_CHAR(cur))) {
4725
if ((cur == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
4726
xmlFatalErr(ctxt, XML_ERR_MISPLACED_CDATA_END, NULL);
4727
}
4728
COPY_BUF(buf, nbchar, cur);
4729
/* move current position before possible calling of ctxt->sax->characters */
4730
NEXTL(l);
4731
if (nbchar >= XML_PARSER_BIG_BUFFER_SIZE) {
4732
buf[nbchar] = 0;
4733
4734
/*
4735
* OK the segment is to be consumed as chars.
4736
*/
4737
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4738
if (areBlanks(ctxt, buf, nbchar, 0)) {
4739
if (ctxt->sax->ignorableWhitespace != NULL)
4740
ctxt->sax->ignorableWhitespace(ctxt->userData,
4741
buf, nbchar);
4742
} else {
4743
if (ctxt->sax->characters != NULL)
4744
ctxt->sax->characters(ctxt->userData, buf, nbchar);
4745
if ((ctxt->sax->characters !=
4746
ctxt->sax->ignorableWhitespace) &&
4747
(*ctxt->space == -1))
4748
*ctxt->space = -2;
4749
}
4750
}
4751
nbchar = 0;
4752
/* something really bad happened in the SAX callback */
4753
if (ctxt->instate != XML_PARSER_CONTENT)
4754
return;
4755
SHRINK;
4756
}
4757
cur = CUR_CHAR(l);
4758
}
4759
if (ctxt->instate == XML_PARSER_EOF)
4760
return;
4761
if (nbchar != 0) {
4762
buf[nbchar] = 0;
4763
/*
4764
* OK the segment is to be consumed as chars.
4765
*/
4766
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
4767
if (areBlanks(ctxt, buf, nbchar, 0)) {
4768
if (ctxt->sax->ignorableWhitespace != NULL)
4769
ctxt->sax->ignorableWhitespace(ctxt->userData, buf, nbchar);
4770
} else {
4771
if (ctxt->sax->characters != NULL)
4772
ctxt->sax->characters(ctxt->userData, buf, nbchar);
4773
if ((ctxt->sax->characters != ctxt->sax->ignorableWhitespace) &&
4774
(*ctxt->space == -1))
4775
*ctxt->space = -2;
4776
}
4777
}
4778
}
4779
/*
4780
* cur == 0 can mean
4781
*
4782
* - XML_PARSER_EOF or memory error. This is checked above.
4783
* - An actual 0 character.
4784
* - End of buffer.
4785
* - An incomplete UTF-8 sequence. This is allowed if partial is set.
4786
*/
4787
if (ctxt->input->cur < ctxt->input->end) {
4788
if ((cur == 0) && (CUR != 0)) {
4789
if (partial == 0) {
4790
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4791
"Incomplete UTF-8 sequence starting with %02X\n", CUR);
4792
NEXTL(1);
4793
}
4794
} else if ((cur != '<') && (cur != '&')) {
4795
/* Generate the error and skip the offending character */
4796
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4797
"PCDATA invalid Char value %d\n", cur);
4798
NEXTL(l);
4799
}
4800
}
4801
}
4802
4803
/**
4804
* xmlParseCharData:
4805
* @ctxt: an XML parser context
4806
* @cdata: unused
4807
*
4808
* DEPRECATED: Internal function, don't use.
4809
*/
4810
void
4811
xmlParseCharData(xmlParserCtxtPtr ctxt, ATTRIBUTE_UNUSED int cdata) {
4812
xmlParseCharDataInternal(ctxt, 0);
4813
}
4814
4815
/**
4816
* xmlParseExternalID:
4817
* @ctxt: an XML parser context
4818
* @publicID: a xmlChar** receiving PubidLiteral
4819
* @strict: indicate whether we should restrict parsing to only
4820
* production [75], see NOTE below
4821
*
4822
* DEPRECATED: Internal function, don't use.
4823
*
4824
* Parse an External ID or a Public ID
4825
*
4826
* NOTE: Productions [75] and [83] interact badly since [75] can generate
4827
* 'PUBLIC' S PubidLiteral S SystemLiteral
4828
*
4829
* [75] ExternalID ::= 'SYSTEM' S SystemLiteral
4830
* | 'PUBLIC' S PubidLiteral S SystemLiteral
4831
*
4832
* [83] PublicID ::= 'PUBLIC' S PubidLiteral
4833
*
4834
* Returns the function returns SystemLiteral and in the second
4835
* case publicID receives PubidLiteral, is strict is off
4836
* it is possible to return NULL and have publicID set.
4837
*/
4838
4839
xmlChar *
4840
xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
4841
xmlChar *URI = NULL;
4842
4843
*publicID = NULL;
4844
if (CMP6(CUR_PTR, 'S', 'Y', 'S', 'T', 'E', 'M')) {
4845
SKIP(6);
4846
if (SKIP_BLANKS == 0) {
4847
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4848
"Space required after 'SYSTEM'\n");
4849
}
4850
URI = xmlParseSystemLiteral(ctxt);
4851
if (URI == NULL) {
4852
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4853
}
4854
} else if (CMP6(CUR_PTR, 'P', 'U', 'B', 'L', 'I', 'C')) {
4855
SKIP(6);
4856
if (SKIP_BLANKS == 0) {
4857
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4858
"Space required after 'PUBLIC'\n");
4859
}
4860
*publicID = xmlParsePubidLiteral(ctxt);
4861
if (*publicID == NULL) {
4862
xmlFatalErr(ctxt, XML_ERR_PUBID_REQUIRED, NULL);
4863
}
4864
if (strict) {
4865
/*
4866
* We don't handle [83] so "S SystemLiteral" is required.
4867
*/
4868
if (SKIP_BLANKS == 0) {
4869
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
4870
"Space required after the Public Identifier\n");
4871
}
4872
} else {
4873
/*
4874
* We handle [83] so we return immediately, if
4875
* "S SystemLiteral" is not detected. We skip blanks if no
4876
* system literal was found, but this is harmless since we must
4877
* be at the end of a NotationDecl.
4878
*/
4879
if (SKIP_BLANKS == 0) return(NULL);
4880
if ((CUR != '\'') && (CUR != '"')) return(NULL);
4881
}
4882
URI = xmlParseSystemLiteral(ctxt);
4883
if (URI == NULL) {
4884
xmlFatalErr(ctxt, XML_ERR_URI_REQUIRED, NULL);
4885
}
4886
}
4887
return(URI);
4888
}
4889
4890
/**
4891
* xmlParseCommentComplex:
4892
* @ctxt: an XML parser context
4893
* @buf: the already parsed part of the buffer
4894
* @len: number of bytes in the buffer
4895
* @size: allocated size of the buffer
4896
*
4897
* Skip an XML (SGML) comment <!-- .... -->
4898
* The spec says that "For compatibility, the string "--" (double-hyphen)
4899
* must not occur within comments. "
4900
* This is the slow routine in case the accelerator for ascii didn't work
4901
*
4902
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
4903
*/
4904
static void
4905
xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
4906
size_t len, size_t size) {
4907
int q, ql;
4908
int r, rl;
4909
int cur, l;
4910
size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
4911
XML_MAX_HUGE_LENGTH :
4912
XML_MAX_TEXT_LENGTH;
4913
int inputid;
4914
4915
inputid = ctxt->input->id;
4916
4917
if (buf == NULL) {
4918
len = 0;
4919
size = XML_PARSER_BUFFER_SIZE;
4920
buf = (xmlChar *) xmlMallocAtomic(size);
4921
if (buf == NULL) {
4922
xmlErrMemory(ctxt, NULL);
4923
return;
4924
}
4925
}
4926
q = CUR_CHAR(ql);
4927
if (q == 0)
4928
goto not_terminated;
4929
if (!IS_CHAR(q)) {
4930
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4931
"xmlParseComment: invalid xmlChar value %d\n",
4932
q);
4933
xmlFree (buf);
4934
return;
4935
}
4936
NEXTL(ql);
4937
r = CUR_CHAR(rl);
4938
if (r == 0)
4939
goto not_terminated;
4940
if (!IS_CHAR(r)) {
4941
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4942
"xmlParseComment: invalid xmlChar value %d\n",
4943
r);
4944
xmlFree (buf);
4945
return;
4946
}
4947
NEXTL(rl);
4948
cur = CUR_CHAR(l);
4949
if (cur == 0)
4950
goto not_terminated;
4951
while (IS_CHAR(cur) && /* checked */
4952
((cur != '>') ||
4953
(r != '-') || (q != '-'))) {
4954
if ((r == '-') && (q == '-')) {
4955
xmlFatalErr(ctxt, XML_ERR_HYPHEN_IN_COMMENT, NULL);
4956
}
4957
if (len + 5 >= size) {
4958
xmlChar *new_buf;
4959
size_t new_size;
4960
4961
new_size = size * 2;
4962
new_buf = (xmlChar *) xmlRealloc(buf, new_size);
4963
if (new_buf == NULL) {
4964
xmlFree (buf);
4965
xmlErrMemory(ctxt, NULL);
4966
return;
4967
}
4968
buf = new_buf;
4969
size = new_size;
4970
}
4971
COPY_BUF(buf, len, q);
4972
if (len > maxLength) {
4973
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4974
"Comment too big found", NULL);
4975
xmlFree (buf);
4976
return;
4977
}
4978
4979
q = r;
4980
ql = rl;
4981
r = cur;
4982
rl = l;
4983
4984
NEXTL(l);
4985
cur = CUR_CHAR(l);
4986
4987
}
4988
buf[len] = 0;
4989
if (ctxt->instate == XML_PARSER_EOF) {
4990
xmlFree(buf);
4991
return;
4992
}
4993
if (cur == 0) {
4994
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
4995
"Comment not terminated \n<!--%.50s\n", buf);
4996
} else if (!IS_CHAR(cur)) {
4997
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
4998
"xmlParseComment: invalid xmlChar value %d\n",
4999
cur);
5000
} else {
5001
if (inputid != ctxt->input->id) {
5002
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5003
"Comment doesn't start and stop in the same"
5004
" entity\n");
5005
}
5006
NEXT;
5007
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5008
(!ctxt->disableSAX))
5009
ctxt->sax->comment(ctxt->userData, buf);
5010
}
5011
xmlFree(buf);
5012
return;
5013
not_terminated:
5014
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5015
"Comment not terminated\n", NULL);
5016
xmlFree(buf);
5017
return;
5018
}
5019
5020
/**
5021
* xmlParseComment:
5022
* @ctxt: an XML parser context
5023
*
5024
* DEPRECATED: Internal function, don't use.
5025
*
5026
* Parse an XML (SGML) comment. Always consumes '<!'.
5027
*
5028
* The spec says that "For compatibility, the string "--" (double-hyphen)
5029
* must not occur within comments. "
5030
*
5031
* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'
5032
*/
5033
void
5034
xmlParseComment(xmlParserCtxtPtr ctxt) {
5035
xmlChar *buf = NULL;
5036
size_t size = XML_PARSER_BUFFER_SIZE;
5037
size_t len = 0;
5038
size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5039
XML_MAX_HUGE_LENGTH :
5040
XML_MAX_TEXT_LENGTH;
5041
xmlParserInputState state;
5042
const xmlChar *in;
5043
size_t nbchar = 0;
5044
int ccol;
5045
int inputid;
5046
5047
/*
5048
* Check that there is a comment right here.
5049
*/
5050
if ((RAW != '<') || (NXT(1) != '!'))
5051
return;
5052
SKIP(2);
5053
if ((RAW != '-') || (NXT(1) != '-'))
5054
return;
5055
state = ctxt->instate;
5056
ctxt->instate = XML_PARSER_COMMENT;
5057
inputid = ctxt->input->id;
5058
SKIP(2);
5059
GROW;
5060
5061
/*
5062
* Accelerated common case where input don't need to be
5063
* modified before passing it to the handler.
5064
*/
5065
in = ctxt->input->cur;
5066
do {
5067
if (*in == 0xA) {
5068
do {
5069
ctxt->input->line++; ctxt->input->col = 1;
5070
in++;
5071
} while (*in == 0xA);
5072
}
5073
get_more:
5074
ccol = ctxt->input->col;
5075
while (((*in > '-') && (*in <= 0x7F)) ||
5076
((*in >= 0x20) && (*in < '-')) ||
5077
(*in == 0x09)) {
5078
in++;
5079
ccol++;
5080
}
5081
ctxt->input->col = ccol;
5082
if (*in == 0xA) {
5083
do {
5084
ctxt->input->line++; ctxt->input->col = 1;
5085
in++;
5086
} while (*in == 0xA);
5087
goto get_more;
5088
}
5089
nbchar = in - ctxt->input->cur;
5090
/*
5091
* save current set of data
5092
*/
5093
if (nbchar > 0) {
5094
if (buf == NULL) {
5095
if ((*in == '-') && (in[1] == '-'))
5096
size = nbchar + 1;
5097
else
5098
size = XML_PARSER_BUFFER_SIZE + nbchar;
5099
buf = (xmlChar *) xmlMallocAtomic(size);
5100
if (buf == NULL) {
5101
xmlErrMemory(ctxt, NULL);
5102
ctxt->instate = state;
5103
return;
5104
}
5105
len = 0;
5106
} else if (len + nbchar + 1 >= size) {
5107
xmlChar *new_buf;
5108
size += len + nbchar + XML_PARSER_BUFFER_SIZE;
5109
new_buf = (xmlChar *) xmlRealloc(buf, size);
5110
if (new_buf == NULL) {
5111
xmlFree (buf);
5112
xmlErrMemory(ctxt, NULL);
5113
ctxt->instate = state;
5114
return;
5115
}
5116
buf = new_buf;
5117
}
5118
memcpy(&buf[len], ctxt->input->cur, nbchar);
5119
len += nbchar;
5120
buf[len] = 0;
5121
}
5122
if (len > maxLength) {
5123
xmlFatalErrMsgStr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
5124
"Comment too big found", NULL);
5125
xmlFree (buf);
5126
return;
5127
}
5128
ctxt->input->cur = in;
5129
if (*in == 0xA) {
5130
in++;
5131
ctxt->input->line++; ctxt->input->col = 1;
5132
}
5133
if (*in == 0xD) {
5134
in++;
5135
if (*in == 0xA) {
5136
ctxt->input->cur = in;
5137
in++;
5138
ctxt->input->line++; ctxt->input->col = 1;
5139
goto get_more;
5140
}
5141
in--;
5142
}
5143
SHRINK;
5144
GROW;
5145
if (ctxt->instate == XML_PARSER_EOF) {
5146
xmlFree(buf);
5147
return;
5148
}
5149
in = ctxt->input->cur;
5150
if (*in == '-') {
5151
if (in[1] == '-') {
5152
if (in[2] == '>') {
5153
if (ctxt->input->id != inputid) {
5154
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5155
"comment doesn't start and stop in the"
5156
" same entity\n");
5157
}
5158
SKIP(3);
5159
if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
5160
(!ctxt->disableSAX)) {
5161
if (buf != NULL)
5162
ctxt->sax->comment(ctxt->userData, buf);
5163
else
5164
ctxt->sax->comment(ctxt->userData, BAD_CAST "");
5165
}
5166
if (buf != NULL)
5167
xmlFree(buf);
5168
if (ctxt->instate != XML_PARSER_EOF)
5169
ctxt->instate = state;
5170
return;
5171
}
5172
if (buf != NULL) {
5173
xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5174
"Double hyphen within comment: "
5175
"<!--%.50s\n",
5176
buf);
5177
} else
5178
xmlFatalErrMsgStr(ctxt, XML_ERR_HYPHEN_IN_COMMENT,
5179
"Double hyphen within comment\n", NULL);
5180
if (ctxt->instate == XML_PARSER_EOF) {
5181
xmlFree(buf);
5182
return;
5183
}
5184
in++;
5185
ctxt->input->col++;
5186
}
5187
in++;
5188
ctxt->input->col++;
5189
goto get_more;
5190
}
5191
} while (((*in >= 0x20) && (*in <= 0x7F)) || (*in == 0x09) || (*in == 0x0a));
5192
xmlParseCommentComplex(ctxt, buf, len, size);
5193
ctxt->instate = state;
5194
return;
5195
}
5196
5197
5198
/**
5199
* xmlParsePITarget:
5200
* @ctxt: an XML parser context
5201
*
5202
* DEPRECATED: Internal function, don't use.
5203
*
5204
* parse the name of a PI
5205
*
5206
* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l'))
5207
*
5208
* Returns the PITarget name or NULL
5209
*/
5210
5211
const xmlChar *
5212
xmlParsePITarget(xmlParserCtxtPtr ctxt) {
5213
const xmlChar *name;
5214
5215
name = xmlParseName(ctxt);
5216
if ((name != NULL) &&
5217
((name[0] == 'x') || (name[0] == 'X')) &&
5218
((name[1] == 'm') || (name[1] == 'M')) &&
5219
((name[2] == 'l') || (name[2] == 'L'))) {
5220
int i;
5221
if ((name[0] == 'x') && (name[1] == 'm') &&
5222
(name[2] == 'l') && (name[3] == 0)) {
5223
xmlFatalErrMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5224
"XML declaration allowed only at the start of the document\n");
5225
return(name);
5226
} else if (name[3] == 0) {
5227
xmlFatalErr(ctxt, XML_ERR_RESERVED_XML_NAME, NULL);
5228
return(name);
5229
}
5230
for (i = 0;;i++) {
5231
if (xmlW3CPIs[i] == NULL) break;
5232
if (xmlStrEqual(name, (const xmlChar *)xmlW3CPIs[i]))
5233
return(name);
5234
}
5235
xmlWarningMsg(ctxt, XML_ERR_RESERVED_XML_NAME,
5236
"xmlParsePITarget: invalid name prefix 'xml'\n",
5237
NULL, NULL);
5238
}
5239
if ((name != NULL) && (xmlStrchr(name, ':') != NULL)) {
5240
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5241
"colons are forbidden from PI names '%s'\n", name, NULL, NULL);
5242
}
5243
return(name);
5244
}
5245
5246
#ifdef LIBXML_CATALOG_ENABLED
5247
/**
5248
* xmlParseCatalogPI:
5249
* @ctxt: an XML parser context
5250
* @catalog: the PI value string
5251
*
5252
* parse an XML Catalog Processing Instruction.
5253
*
5254
* <?oasis-xml-catalog catalog="http://example.com/catalog.xml"?>
5255
*
5256
* Occurs only if allowed by the user and if happening in the Misc
5257
* part of the document before any doctype information
5258
* This will add the given catalog to the parsing context in order
5259
* to be used if there is a resolution need further down in the document
5260
*/
5261
5262
static void
5263
xmlParseCatalogPI(xmlParserCtxtPtr ctxt, const xmlChar *catalog) {
5264
xmlChar *URL = NULL;
5265
const xmlChar *tmp, *base;
5266
xmlChar marker;
5267
5268
tmp = catalog;
5269
while (IS_BLANK_CH(*tmp)) tmp++;
5270
if (xmlStrncmp(tmp, BAD_CAST"catalog", 7))
5271
goto error;
5272
tmp += 7;
5273
while (IS_BLANK_CH(*tmp)) tmp++;
5274
if (*tmp != '=') {
5275
return;
5276
}
5277
tmp++;
5278
while (IS_BLANK_CH(*tmp)) tmp++;
5279
marker = *tmp;
5280
if ((marker != '\'') && (marker != '"'))
5281
goto error;
5282
tmp++;
5283
base = tmp;
5284
while ((*tmp != 0) && (*tmp != marker)) tmp++;
5285
if (*tmp == 0)
5286
goto error;
5287
URL = xmlStrndup(base, tmp - base);
5288
tmp++;
5289
while (IS_BLANK_CH(*tmp)) tmp++;
5290
if (*tmp != 0)
5291
goto error;
5292
5293
if (URL != NULL) {
5294
ctxt->catalogs = xmlCatalogAddLocal(ctxt->catalogs, URL);
5295
xmlFree(URL);
5296
}
5297
return;
5298
5299
error:
5300
xmlWarningMsg(ctxt, XML_WAR_CATALOG_PI,
5301
"Catalog PI syntax error: %s\n",
5302
catalog, NULL);
5303
if (URL != NULL)
5304
xmlFree(URL);
5305
}
5306
#endif
5307
5308
/**
5309
* xmlParsePI:
5310
* @ctxt: an XML parser context
5311
*
5312
* DEPRECATED: Internal function, don't use.
5313
*
5314
* parse an XML Processing Instruction.
5315
*
5316
* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>'
5317
*
5318
* The processing is transferred to SAX once parsed.
5319
*/
5320
5321
void
5322
xmlParsePI(xmlParserCtxtPtr ctxt) {
5323
xmlChar *buf = NULL;
5324
size_t len = 0;
5325
size_t size = XML_PARSER_BUFFER_SIZE;
5326
size_t maxLength = (ctxt->options & XML_PARSE_HUGE) ?
5327
XML_MAX_HUGE_LENGTH :
5328
XML_MAX_TEXT_LENGTH;
5329
int cur, l;
5330
const xmlChar *target;
5331
xmlParserInputState state;
5332
5333
if ((RAW == '<') && (NXT(1) == '?')) {
5334
int inputid = ctxt->input->id;
5335
state = ctxt->instate;
5336
ctxt->instate = XML_PARSER_PI;
5337
/*
5338
* this is a Processing Instruction.
5339
*/
5340
SKIP(2);
5341
5342
/*
5343
* Parse the target name and check for special support like
5344
* namespace.
5345
*/
5346
target = xmlParsePITarget(ctxt);
5347
if (target != NULL) {
5348
if ((RAW == '?') && (NXT(1) == '>')) {
5349
if (inputid != ctxt->input->id) {
5350
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5351
"PI declaration doesn't start and stop in"
5352
" the same entity\n");
5353
}
5354
SKIP(2);
5355
5356
/*
5357
* SAX: PI detected.
5358
*/
5359
if ((ctxt->sax) && (!ctxt->disableSAX) &&
5360
(ctxt->sax->processingInstruction != NULL))
5361
ctxt->sax->processingInstruction(ctxt->userData,
5362
target, NULL);
5363
if (ctxt->instate != XML_PARSER_EOF)
5364
ctxt->instate = state;
5365
return;
5366
}
5367
buf = (xmlChar *) xmlMallocAtomic(size);
5368
if (buf == NULL) {
5369
xmlErrMemory(ctxt, NULL);
5370
ctxt->instate = state;
5371
return;
5372
}
5373
if (SKIP_BLANKS == 0) {
5374
xmlFatalErrMsgStr(ctxt, XML_ERR_SPACE_REQUIRED,
5375
"ParsePI: PI %s space expected\n", target);
5376
}
5377
cur = CUR_CHAR(l);
5378
while (IS_CHAR(cur) && /* checked */
5379
((cur != '?') || (NXT(1) != '>'))) {
5380
if (len + 5 >= size) {
5381
xmlChar *tmp;
5382
size_t new_size = size * 2;
5383
tmp = (xmlChar *) xmlRealloc(buf, new_size);
5384
if (tmp == NULL) {
5385
xmlErrMemory(ctxt, NULL);
5386
xmlFree(buf);
5387
ctxt->instate = state;
5388
return;
5389
}
5390
buf = tmp;
5391
size = new_size;
5392
}
5393
COPY_BUF(buf, len, cur);
5394
if (len > maxLength) {
5395
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5396
"PI %s too big found", target);
5397
xmlFree(buf);
5398
ctxt->instate = state;
5399
return;
5400
}
5401
NEXTL(l);
5402
cur = CUR_CHAR(l);
5403
}
5404
buf[len] = 0;
5405
if (ctxt->instate == XML_PARSER_EOF) {
5406
xmlFree(buf);
5407
return;
5408
}
5409
if (cur != '?') {
5410
xmlFatalErrMsgStr(ctxt, XML_ERR_PI_NOT_FINISHED,
5411
"ParsePI: PI %s never end ...\n", target);
5412
} else {
5413
if (inputid != ctxt->input->id) {
5414
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5415
"PI declaration doesn't start and stop in"
5416
" the same entity\n");
5417
}
5418
SKIP(2);
5419
5420
#ifdef LIBXML_CATALOG_ENABLED
5421
if (((state == XML_PARSER_MISC) ||
5422
(state == XML_PARSER_START)) &&
5423
(xmlStrEqual(target, XML_CATALOG_PI))) {
5424
xmlCatalogAllow allow = xmlCatalogGetDefaults();
5425
if ((allow == XML_CATA_ALLOW_DOCUMENT) ||
5426
(allow == XML_CATA_ALLOW_ALL))
5427
xmlParseCatalogPI(ctxt, buf);
5428
}
5429
#endif
5430
5431
5432
/*
5433
* SAX: PI detected.
5434
*/
5435
if ((ctxt->sax) && (!ctxt->disableSAX) &&
5436
(ctxt->sax->processingInstruction != NULL))
5437
ctxt->sax->processingInstruction(ctxt->userData,
5438
target, buf);
5439
}
5440
xmlFree(buf);
5441
} else {
5442
xmlFatalErr(ctxt, XML_ERR_PI_NOT_STARTED, NULL);
5443
}
5444
if (ctxt->instate != XML_PARSER_EOF)
5445
ctxt->instate = state;
5446
}
5447
}
5448
5449
/**
5450
* xmlParseNotationDecl:
5451
* @ctxt: an XML parser context
5452
*
5453
* DEPRECATED: Internal function, don't use.
5454
*
5455
* Parse a notation declaration. Always consumes '<!'.
5456
*
5457
* [82] NotationDecl ::= '<!NOTATION' S Name S (ExternalID | PublicID) S? '>'
5458
*
5459
* Hence there is actually 3 choices:
5460
* 'PUBLIC' S PubidLiteral
5461
* 'PUBLIC' S PubidLiteral S SystemLiteral
5462
* and 'SYSTEM' S SystemLiteral
5463
*
5464
* See the NOTE on xmlParseExternalID().
5465
*/
5466
5467
void
5468
xmlParseNotationDecl(xmlParserCtxtPtr ctxt) {
5469
const xmlChar *name;
5470
xmlChar *Pubid;
5471
xmlChar *Systemid;
5472
5473
if ((CUR != '<') || (NXT(1) != '!'))
5474
return;
5475
SKIP(2);
5476
5477
if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
5478
int inputid = ctxt->input->id;
5479
SKIP(8);
5480
if (SKIP_BLANKS == 0) {
5481
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5482
"Space required after '<!NOTATION'\n");
5483
return;
5484
}
5485
5486
name = xmlParseName(ctxt);
5487
if (name == NULL) {
5488
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5489
return;
5490
}
5491
if (xmlStrchr(name, ':') != NULL) {
5492
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5493
"colons are forbidden from notation names '%s'\n",
5494
name, NULL, NULL);
5495
}
5496
if (SKIP_BLANKS == 0) {
5497
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5498
"Space required after the NOTATION name'\n");
5499
return;
5500
}
5501
5502
/*
5503
* Parse the IDs.
5504
*/
5505
Systemid = xmlParseExternalID(ctxt, &Pubid, 0);
5506
SKIP_BLANKS;
5507
5508
if (RAW == '>') {
5509
if (inputid != ctxt->input->id) {
5510
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5511
"Notation declaration doesn't start and stop"
5512
" in the same entity\n");
5513
}
5514
NEXT;
5515
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5516
(ctxt->sax->notationDecl != NULL))
5517
ctxt->sax->notationDecl(ctxt->userData, name, Pubid, Systemid);
5518
} else {
5519
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5520
}
5521
if (Systemid != NULL) xmlFree(Systemid);
5522
if (Pubid != NULL) xmlFree(Pubid);
5523
}
5524
}
5525
5526
/**
5527
* xmlParseEntityDecl:
5528
* @ctxt: an XML parser context
5529
*
5530
* DEPRECATED: Internal function, don't use.
5531
*
5532
* Parse an entity declaration. Always consumes '<!'.
5533
*
5534
* [70] EntityDecl ::= GEDecl | PEDecl
5535
*
5536
* [71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
5537
*
5538
* [72] PEDecl ::= '<!ENTITY' S '%' S Name S PEDef S? '>'
5539
*
5540
* [73] EntityDef ::= EntityValue | (ExternalID NDataDecl?)
5541
*
5542
* [74] PEDef ::= EntityValue | ExternalID
5543
*
5544
* [76] NDataDecl ::= S 'NDATA' S Name
5545
*
5546
* [ VC: Notation Declared ]
5547
* The Name must match the declared name of a notation.
5548
*/
5549
5550
void
5551
xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
5552
const xmlChar *name = NULL;
5553
xmlChar *value = NULL;
5554
xmlChar *URI = NULL, *literal = NULL;
5555
const xmlChar *ndata = NULL;
5556
int isParameter = 0;
5557
xmlChar *orig = NULL;
5558
5559
if ((CUR != '<') || (NXT(1) != '!'))
5560
return;
5561
SKIP(2);
5562
5563
/* GROW; done in the caller */
5564
if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
5565
int inputid = ctxt->input->id;
5566
SKIP(6);
5567
if (SKIP_BLANKS == 0) {
5568
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5569
"Space required after '<!ENTITY'\n");
5570
}
5571
5572
if (RAW == '%') {
5573
NEXT;
5574
if (SKIP_BLANKS == 0) {
5575
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5576
"Space required after '%%'\n");
5577
}
5578
isParameter = 1;
5579
}
5580
5581
name = xmlParseName(ctxt);
5582
if (name == NULL) {
5583
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5584
"xmlParseEntityDecl: no name\n");
5585
return;
5586
}
5587
if (xmlStrchr(name, ':') != NULL) {
5588
xmlNsErr(ctxt, XML_NS_ERR_COLON,
5589
"colons are forbidden from entities names '%s'\n",
5590
name, NULL, NULL);
5591
}
5592
if (SKIP_BLANKS == 0) {
5593
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5594
"Space required after the entity name\n");
5595
}
5596
5597
ctxt->instate = XML_PARSER_ENTITY_DECL;
5598
/*
5599
* handle the various case of definitions...
5600
*/
5601
if (isParameter) {
5602
if ((RAW == '"') || (RAW == '\'')) {
5603
value = xmlParseEntityValue(ctxt, &orig);
5604
if (value) {
5605
if ((ctxt->sax != NULL) &&
5606
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5607
ctxt->sax->entityDecl(ctxt->userData, name,
5608
XML_INTERNAL_PARAMETER_ENTITY,
5609
NULL, NULL, value);
5610
}
5611
} else {
5612
URI = xmlParseExternalID(ctxt, &literal, 1);
5613
if ((URI == NULL) && (literal == NULL)) {
5614
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5615
}
5616
if (URI) {
5617
xmlURIPtr uri;
5618
5619
uri = xmlParseURI((const char *) URI);
5620
if (uri == NULL) {
5621
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5622
"Invalid URI: %s\n", URI);
5623
/*
5624
* This really ought to be a well formedness error
5625
* but the XML Core WG decided otherwise c.f. issue
5626
* E26 of the XML erratas.
5627
*/
5628
} else {
5629
if (uri->fragment != NULL) {
5630
/*
5631
* Okay this is foolish to block those but not
5632
* invalid URIs.
5633
*/
5634
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5635
} else {
5636
if ((ctxt->sax != NULL) &&
5637
(!ctxt->disableSAX) &&
5638
(ctxt->sax->entityDecl != NULL))
5639
ctxt->sax->entityDecl(ctxt->userData, name,
5640
XML_EXTERNAL_PARAMETER_ENTITY,
5641
literal, URI, NULL);
5642
}
5643
xmlFreeURI(uri);
5644
}
5645
}
5646
}
5647
} else {
5648
if ((RAW == '"') || (RAW == '\'')) {
5649
value = xmlParseEntityValue(ctxt, &orig);
5650
if ((ctxt->sax != NULL) &&
5651
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5652
ctxt->sax->entityDecl(ctxt->userData, name,
5653
XML_INTERNAL_GENERAL_ENTITY,
5654
NULL, NULL, value);
5655
/*
5656
* For expat compatibility in SAX mode.
5657
*/
5658
if ((ctxt->myDoc == NULL) ||
5659
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
5660
if (ctxt->myDoc == NULL) {
5661
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5662
if (ctxt->myDoc == NULL) {
5663
xmlErrMemory(ctxt, "New Doc failed");
5664
goto done;
5665
}
5666
ctxt->myDoc->properties = XML_DOC_INTERNAL;
5667
}
5668
if (ctxt->myDoc->intSubset == NULL)
5669
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5670
BAD_CAST "fake", NULL, NULL);
5671
5672
xmlSAX2EntityDecl(ctxt, name, XML_INTERNAL_GENERAL_ENTITY,
5673
NULL, NULL, value);
5674
}
5675
} else {
5676
URI = xmlParseExternalID(ctxt, &literal, 1);
5677
if ((URI == NULL) && (literal == NULL)) {
5678
xmlFatalErr(ctxt, XML_ERR_VALUE_REQUIRED, NULL);
5679
}
5680
if (URI) {
5681
xmlURIPtr uri;
5682
5683
uri = xmlParseURI((const char *)URI);
5684
if (uri == NULL) {
5685
xmlErrMsgStr(ctxt, XML_ERR_INVALID_URI,
5686
"Invalid URI: %s\n", URI);
5687
/*
5688
* This really ought to be a well formedness error
5689
* but the XML Core WG decided otherwise c.f. issue
5690
* E26 of the XML erratas.
5691
*/
5692
} else {
5693
if (uri->fragment != NULL) {
5694
/*
5695
* Okay this is foolish to block those but not
5696
* invalid URIs.
5697
*/
5698
xmlFatalErr(ctxt, XML_ERR_URI_FRAGMENT, NULL);
5699
}
5700
xmlFreeURI(uri);
5701
}
5702
}
5703
if ((RAW != '>') && (SKIP_BLANKS == 0)) {
5704
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5705
"Space required before 'NDATA'\n");
5706
}
5707
if (CMP5(CUR_PTR, 'N', 'D', 'A', 'T', 'A')) {
5708
SKIP(5);
5709
if (SKIP_BLANKS == 0) {
5710
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5711
"Space required after 'NDATA'\n");
5712
}
5713
ndata = xmlParseName(ctxt);
5714
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
5715
(ctxt->sax->unparsedEntityDecl != NULL))
5716
ctxt->sax->unparsedEntityDecl(ctxt->userData, name,
5717
literal, URI, ndata);
5718
} else {
5719
if ((ctxt->sax != NULL) &&
5720
(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
5721
ctxt->sax->entityDecl(ctxt->userData, name,
5722
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5723
literal, URI, NULL);
5724
/*
5725
* For expat compatibility in SAX mode.
5726
* assuming the entity replacement was asked for
5727
*/
5728
if ((ctxt->replaceEntities != 0) &&
5729
((ctxt->myDoc == NULL) ||
5730
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE)))) {
5731
if (ctxt->myDoc == NULL) {
5732
ctxt->myDoc = xmlNewDoc(SAX_COMPAT_MODE);
5733
if (ctxt->myDoc == NULL) {
5734
xmlErrMemory(ctxt, "New Doc failed");
5735
goto done;
5736
}
5737
ctxt->myDoc->properties = XML_DOC_INTERNAL;
5738
}
5739
5740
if (ctxt->myDoc->intSubset == NULL)
5741
ctxt->myDoc->intSubset = xmlNewDtd(ctxt->myDoc,
5742
BAD_CAST "fake", NULL, NULL);
5743
xmlSAX2EntityDecl(ctxt, name,
5744
XML_EXTERNAL_GENERAL_PARSED_ENTITY,
5745
literal, URI, NULL);
5746
}
5747
}
5748
}
5749
}
5750
if (ctxt->instate == XML_PARSER_EOF)
5751
goto done;
5752
SKIP_BLANKS;
5753
if (RAW != '>') {
5754
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_NOT_FINISHED,
5755
"xmlParseEntityDecl: entity %s not terminated\n", name);
5756
xmlHaltParser(ctxt);
5757
} else {
5758
if (inputid != ctxt->input->id) {
5759
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
5760
"Entity declaration doesn't start and stop in"
5761
" the same entity\n");
5762
}
5763
NEXT;
5764
}
5765
if (orig != NULL) {
5766
/*
5767
* Ugly mechanism to save the raw entity value.
5768
*/
5769
xmlEntityPtr cur = NULL;
5770
5771
if (isParameter) {
5772
if ((ctxt->sax != NULL) &&
5773
(ctxt->sax->getParameterEntity != NULL))
5774
cur = ctxt->sax->getParameterEntity(ctxt->userData, name);
5775
} else {
5776
if ((ctxt->sax != NULL) &&
5777
(ctxt->sax->getEntity != NULL))
5778
cur = ctxt->sax->getEntity(ctxt->userData, name);
5779
if ((cur == NULL) && (ctxt->userData==ctxt)) {
5780
cur = xmlSAX2GetEntity(ctxt, name);
5781
}
5782
}
5783
if ((cur != NULL) && (cur->orig == NULL)) {
5784
cur->orig = orig;
5785
orig = NULL;
5786
}
5787
}
5788
5789
done:
5790
if (value != NULL) xmlFree(value);
5791
if (URI != NULL) xmlFree(URI);
5792
if (literal != NULL) xmlFree(literal);
5793
if (orig != NULL) xmlFree(orig);
5794
}
5795
}
5796
5797
/**
5798
* xmlParseDefaultDecl:
5799
* @ctxt: an XML parser context
5800
* @value: Receive a possible fixed default value for the attribute
5801
*
5802
* DEPRECATED: Internal function, don't use.
5803
*
5804
* Parse an attribute default declaration
5805
*
5806
* [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue)
5807
*
5808
* [ VC: Required Attribute ]
5809
* if the default declaration is the keyword #REQUIRED, then the
5810
* attribute must be specified for all elements of the type in the
5811
* attribute-list declaration.
5812
*
5813
* [ VC: Attribute Default Legal ]
5814
* The declared default value must meet the lexical constraints of
5815
* the declared attribute type c.f. xmlValidateAttributeDecl()
5816
*
5817
* [ VC: Fixed Attribute Default ]
5818
* if an attribute has a default value declared with the #FIXED
5819
* keyword, instances of that attribute must match the default value.
5820
*
5821
* [ WFC: No < in Attribute Values ]
5822
* handled in xmlParseAttValue()
5823
*
5824
* returns: XML_ATTRIBUTE_NONE, XML_ATTRIBUTE_REQUIRED, XML_ATTRIBUTE_IMPLIED
5825
* or XML_ATTRIBUTE_FIXED.
5826
*/
5827
5828
int
5829
xmlParseDefaultDecl(xmlParserCtxtPtr ctxt, xmlChar **value) {
5830
int val;
5831
xmlChar *ret;
5832
5833
*value = NULL;
5834
if (CMP9(CUR_PTR, '#', 'R', 'E', 'Q', 'U', 'I', 'R', 'E', 'D')) {
5835
SKIP(9);
5836
return(XML_ATTRIBUTE_REQUIRED);
5837
}
5838
if (CMP8(CUR_PTR, '#', 'I', 'M', 'P', 'L', 'I', 'E', 'D')) {
5839
SKIP(8);
5840
return(XML_ATTRIBUTE_IMPLIED);
5841
}
5842
val = XML_ATTRIBUTE_NONE;
5843
if (CMP6(CUR_PTR, '#', 'F', 'I', 'X', 'E', 'D')) {
5844
SKIP(6);
5845
val = XML_ATTRIBUTE_FIXED;
5846
if (SKIP_BLANKS == 0) {
5847
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
5848
"Space required after '#FIXED'\n");
5849
}
5850
}
5851
ret = xmlParseAttValue(ctxt);
5852
ctxt->instate = XML_PARSER_DTD;
5853
if (ret == NULL) {
5854
xmlFatalErrMsg(ctxt, (xmlParserErrors)ctxt->errNo,
5855
"Attribute default value declaration error\n");
5856
} else
5857
*value = ret;
5858
return(val);
5859
}
5860
5861
/**
5862
* xmlParseNotationType:
5863
* @ctxt: an XML parser context
5864
*
5865
* DEPRECATED: Internal function, don't use.
5866
*
5867
* parse an Notation attribute type.
5868
*
5869
* Note: the leading 'NOTATION' S part has already being parsed...
5870
*
5871
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
5872
*
5873
* [ VC: Notation Attributes ]
5874
* Values of this type must match one of the notation names included
5875
* in the declaration; all notation names in the declaration must be declared.
5876
*
5877
* Returns: the notation attribute tree built while parsing
5878
*/
5879
5880
xmlEnumerationPtr
5881
xmlParseNotationType(xmlParserCtxtPtr ctxt) {
5882
const xmlChar *name;
5883
xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5884
5885
if (RAW != '(') {
5886
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_STARTED, NULL);
5887
return(NULL);
5888
}
5889
do {
5890
NEXT;
5891
SKIP_BLANKS;
5892
name = xmlParseName(ctxt);
5893
if (name == NULL) {
5894
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
5895
"Name expected in NOTATION declaration\n");
5896
xmlFreeEnumeration(ret);
5897
return(NULL);
5898
}
5899
tmp = ret;
5900
while (tmp != NULL) {
5901
if (xmlStrEqual(name, tmp->name)) {
5902
xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5903
"standalone: attribute notation value token %s duplicated\n",
5904
name, NULL);
5905
if (!xmlDictOwns(ctxt->dict, name))
5906
xmlFree((xmlChar *) name);
5907
break;
5908
}
5909
tmp = tmp->next;
5910
}
5911
if (tmp == NULL) {
5912
cur = xmlCreateEnumeration(name);
5913
if (cur == NULL) {
5914
xmlFreeEnumeration(ret);
5915
return(NULL);
5916
}
5917
if (last == NULL) ret = last = cur;
5918
else {
5919
last->next = cur;
5920
last = cur;
5921
}
5922
}
5923
SKIP_BLANKS;
5924
} while (RAW == '|');
5925
if (RAW != ')') {
5926
xmlFatalErr(ctxt, XML_ERR_NOTATION_NOT_FINISHED, NULL);
5927
xmlFreeEnumeration(ret);
5928
return(NULL);
5929
}
5930
NEXT;
5931
return(ret);
5932
}
5933
5934
/**
5935
* xmlParseEnumerationType:
5936
* @ctxt: an XML parser context
5937
*
5938
* DEPRECATED: Internal function, don't use.
5939
*
5940
* parse an Enumeration attribute type.
5941
*
5942
* [59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')'
5943
*
5944
* [ VC: Enumeration ]
5945
* Values of this type must match one of the Nmtoken tokens in
5946
* the declaration
5947
*
5948
* Returns: the enumeration attribute tree built while parsing
5949
*/
5950
5951
xmlEnumerationPtr
5952
xmlParseEnumerationType(xmlParserCtxtPtr ctxt) {
5953
xmlChar *name;
5954
xmlEnumerationPtr ret = NULL, last = NULL, cur, tmp;
5955
5956
if (RAW != '(') {
5957
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_STARTED, NULL);
5958
return(NULL);
5959
}
5960
do {
5961
NEXT;
5962
SKIP_BLANKS;
5963
name = xmlParseNmtoken(ctxt);
5964
if (name == NULL) {
5965
xmlFatalErr(ctxt, XML_ERR_NMTOKEN_REQUIRED, NULL);
5966
return(ret);
5967
}
5968
tmp = ret;
5969
while (tmp != NULL) {
5970
if (xmlStrEqual(name, tmp->name)) {
5971
xmlValidityError(ctxt, XML_DTD_DUP_TOKEN,
5972
"standalone: attribute enumeration value token %s duplicated\n",
5973
name, NULL);
5974
if (!xmlDictOwns(ctxt->dict, name))
5975
xmlFree(name);
5976
break;
5977
}
5978
tmp = tmp->next;
5979
}
5980
if (tmp == NULL) {
5981
cur = xmlCreateEnumeration(name);
5982
if (!xmlDictOwns(ctxt->dict, name))
5983
xmlFree(name);
5984
if (cur == NULL) {
5985
xmlFreeEnumeration(ret);
5986
return(NULL);
5987
}
5988
if (last == NULL) ret = last = cur;
5989
else {
5990
last->next = cur;
5991
last = cur;
5992
}
5993
}
5994
SKIP_BLANKS;
5995
} while (RAW == '|');
5996
if (RAW != ')') {
5997
xmlFatalErr(ctxt, XML_ERR_ATTLIST_NOT_FINISHED, NULL);
5998
return(ret);
5999
}
6000
NEXT;
6001
return(ret);
6002
}
6003
6004
/**
6005
* xmlParseEnumeratedType:
6006
* @ctxt: an XML parser context
6007
* @tree: the enumeration tree built while parsing
6008
*
6009
* DEPRECATED: Internal function, don't use.
6010
*
6011
* parse an Enumerated attribute type.
6012
*
6013
* [57] EnumeratedType ::= NotationType | Enumeration
6014
*
6015
* [58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')'
6016
*
6017
*
6018
* Returns: XML_ATTRIBUTE_ENUMERATION or XML_ATTRIBUTE_NOTATION
6019
*/
6020
6021
int
6022
xmlParseEnumeratedType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6023
if (CMP8(CUR_PTR, 'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N')) {
6024
SKIP(8);
6025
if (SKIP_BLANKS == 0) {
6026
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6027
"Space required after 'NOTATION'\n");
6028
return(0);
6029
}
6030
*tree = xmlParseNotationType(ctxt);
6031
if (*tree == NULL) return(0);
6032
return(XML_ATTRIBUTE_NOTATION);
6033
}
6034
*tree = xmlParseEnumerationType(ctxt);
6035
if (*tree == NULL) return(0);
6036
return(XML_ATTRIBUTE_ENUMERATION);
6037
}
6038
6039
/**
6040
* xmlParseAttributeType:
6041
* @ctxt: an XML parser context
6042
* @tree: the enumeration tree built while parsing
6043
*
6044
* DEPRECATED: Internal function, don't use.
6045
*
6046
* parse the Attribute list def for an element
6047
*
6048
* [54] AttType ::= StringType | TokenizedType | EnumeratedType
6049
*
6050
* [55] StringType ::= 'CDATA'
6051
*
6052
* [56] TokenizedType ::= 'ID' | 'IDREF' | 'IDREFS' | 'ENTITY' |
6053
* 'ENTITIES' | 'NMTOKEN' | 'NMTOKENS'
6054
*
6055
* Validity constraints for attribute values syntax are checked in
6056
* xmlValidateAttributeValue()
6057
*
6058
* [ VC: ID ]
6059
* Values of type ID must match the Name production. A name must not
6060
* appear more than once in an XML document as a value of this type;
6061
* i.e., ID values must uniquely identify the elements which bear them.
6062
*
6063
* [ VC: One ID per Element Type ]
6064
* No element type may have more than one ID attribute specified.
6065
*
6066
* [ VC: ID Attribute Default ]
6067
* An ID attribute must have a declared default of #IMPLIED or #REQUIRED.
6068
*
6069
* [ VC: IDREF ]
6070
* Values of type IDREF must match the Name production, and values
6071
* of type IDREFS must match Names; each IDREF Name must match the value
6072
* of an ID attribute on some element in the XML document; i.e. IDREF
6073
* values must match the value of some ID attribute.
6074
*
6075
* [ VC: Entity Name ]
6076
* Values of type ENTITY must match the Name production, values
6077
* of type ENTITIES must match Names; each Entity Name must match the
6078
* name of an unparsed entity declared in the DTD.
6079
*
6080
* [ VC: Name Token ]
6081
* Values of type NMTOKEN must match the Nmtoken production; values
6082
* of type NMTOKENS must match Nmtokens.
6083
*
6084
* Returns the attribute type
6085
*/
6086
int
6087
xmlParseAttributeType(xmlParserCtxtPtr ctxt, xmlEnumerationPtr *tree) {
6088
if (CMP5(CUR_PTR, 'C', 'D', 'A', 'T', 'A')) {
6089
SKIP(5);
6090
return(XML_ATTRIBUTE_CDATA);
6091
} else if (CMP6(CUR_PTR, 'I', 'D', 'R', 'E', 'F', 'S')) {
6092
SKIP(6);
6093
return(XML_ATTRIBUTE_IDREFS);
6094
} else if (CMP5(CUR_PTR, 'I', 'D', 'R', 'E', 'F')) {
6095
SKIP(5);
6096
return(XML_ATTRIBUTE_IDREF);
6097
} else if ((RAW == 'I') && (NXT(1) == 'D')) {
6098
SKIP(2);
6099
return(XML_ATTRIBUTE_ID);
6100
} else if (CMP6(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'Y')) {
6101
SKIP(6);
6102
return(XML_ATTRIBUTE_ENTITY);
6103
} else if (CMP8(CUR_PTR, 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S')) {
6104
SKIP(8);
6105
return(XML_ATTRIBUTE_ENTITIES);
6106
} else if (CMP8(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S')) {
6107
SKIP(8);
6108
return(XML_ATTRIBUTE_NMTOKENS);
6109
} else if (CMP7(CUR_PTR, 'N', 'M', 'T', 'O', 'K', 'E', 'N')) {
6110
SKIP(7);
6111
return(XML_ATTRIBUTE_NMTOKEN);
6112
}
6113
return(xmlParseEnumeratedType(ctxt, tree));
6114
}
6115
6116
/**
6117
* xmlParseAttributeListDecl:
6118
* @ctxt: an XML parser context
6119
*
6120
* DEPRECATED: Internal function, don't use.
6121
*
6122
* Parse an attribute list declaration for an element. Always consumes '<!'.
6123
*
6124
* [52] AttlistDecl ::= '<!ATTLIST' S Name AttDef* S? '>'
6125
*
6126
* [53] AttDef ::= S Name S AttType S DefaultDecl
6127
*
6128
*/
6129
void
6130
xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt) {
6131
const xmlChar *elemName;
6132
const xmlChar *attrName;
6133
xmlEnumerationPtr tree;
6134
6135
if ((CUR != '<') || (NXT(1) != '!'))
6136
return;
6137
SKIP(2);
6138
6139
if (CMP7(CUR_PTR, 'A', 'T', 'T', 'L', 'I', 'S', 'T')) {
6140
int inputid = ctxt->input->id;
6141
6142
SKIP(7);
6143
if (SKIP_BLANKS == 0) {
6144
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6145
"Space required after '<!ATTLIST'\n");
6146
}
6147
elemName = xmlParseName(ctxt);
6148
if (elemName == NULL) {
6149
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6150
"ATTLIST: no name for Element\n");
6151
return;
6152
}
6153
SKIP_BLANKS;
6154
GROW;
6155
while ((RAW != '>') && (ctxt->instate != XML_PARSER_EOF)) {
6156
int type;
6157
int def;
6158
xmlChar *defaultValue = NULL;
6159
6160
GROW;
6161
tree = NULL;
6162
attrName = xmlParseName(ctxt);
6163
if (attrName == NULL) {
6164
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6165
"ATTLIST: no name for Attribute\n");
6166
break;
6167
}
6168
GROW;
6169
if (SKIP_BLANKS == 0) {
6170
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6171
"Space required after the attribute name\n");
6172
break;
6173
}
6174
6175
type = xmlParseAttributeType(ctxt, &tree);
6176
if (type <= 0) {
6177
break;
6178
}
6179
6180
GROW;
6181
if (SKIP_BLANKS == 0) {
6182
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6183
"Space required after the attribute type\n");
6184
if (tree != NULL)
6185
xmlFreeEnumeration(tree);
6186
break;
6187
}
6188
6189
def = xmlParseDefaultDecl(ctxt, &defaultValue);
6190
if (def <= 0) {
6191
if (defaultValue != NULL)
6192
xmlFree(defaultValue);
6193
if (tree != NULL)
6194
xmlFreeEnumeration(tree);
6195
break;
6196
}
6197
if ((type != XML_ATTRIBUTE_CDATA) && (defaultValue != NULL))
6198
xmlAttrNormalizeSpace(defaultValue, defaultValue);
6199
6200
GROW;
6201
if (RAW != '>') {
6202
if (SKIP_BLANKS == 0) {
6203
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6204
"Space required after the attribute default value\n");
6205
if (defaultValue != NULL)
6206
xmlFree(defaultValue);
6207
if (tree != NULL)
6208
xmlFreeEnumeration(tree);
6209
break;
6210
}
6211
}
6212
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6213
(ctxt->sax->attributeDecl != NULL))
6214
ctxt->sax->attributeDecl(ctxt->userData, elemName, attrName,
6215
type, def, defaultValue, tree);
6216
else if (tree != NULL)
6217
xmlFreeEnumeration(tree);
6218
6219
if ((ctxt->sax2) && (defaultValue != NULL) &&
6220
(def != XML_ATTRIBUTE_IMPLIED) &&
6221
(def != XML_ATTRIBUTE_REQUIRED)) {
6222
xmlAddDefAttrs(ctxt, elemName, attrName, defaultValue);
6223
}
6224
if (ctxt->sax2) {
6225
xmlAddSpecialAttr(ctxt, elemName, attrName, type);
6226
}
6227
if (defaultValue != NULL)
6228
xmlFree(defaultValue);
6229
GROW;
6230
}
6231
if (RAW == '>') {
6232
if (inputid != ctxt->input->id) {
6233
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6234
"Attribute list declaration doesn't start and"
6235
" stop in the same entity\n");
6236
}
6237
NEXT;
6238
}
6239
}
6240
}
6241
6242
/**
6243
* xmlParseElementMixedContentDecl:
6244
* @ctxt: an XML parser context
6245
* @inputchk: the input used for the current entity, needed for boundary checks
6246
*
6247
* DEPRECATED: Internal function, don't use.
6248
*
6249
* parse the declaration for a Mixed Element content
6250
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6251
*
6252
* [51] Mixed ::= '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*' |
6253
* '(' S? '#PCDATA' S? ')'
6254
*
6255
* [ VC: Proper Group/PE Nesting ] applies to [51] too (see [49])
6256
*
6257
* [ VC: No Duplicate Types ]
6258
* The same name must not appear more than once in a single
6259
* mixed-content declaration.
6260
*
6261
* returns: the list of the xmlElementContentPtr describing the element choices
6262
*/
6263
xmlElementContentPtr
6264
xmlParseElementMixedContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6265
xmlElementContentPtr ret = NULL, cur = NULL, n;
6266
const xmlChar *elem = NULL;
6267
6268
GROW;
6269
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6270
SKIP(7);
6271
SKIP_BLANKS;
6272
if (RAW == ')') {
6273
if (ctxt->input->id != inputchk) {
6274
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6275
"Element content declaration doesn't start and"
6276
" stop in the same entity\n");
6277
}
6278
NEXT;
6279
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6280
if (ret == NULL)
6281
return(NULL);
6282
if (RAW == '*') {
6283
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6284
NEXT;
6285
}
6286
return(ret);
6287
}
6288
if ((RAW == '(') || (RAW == '|')) {
6289
ret = cur = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_PCDATA);
6290
if (ret == NULL) return(NULL);
6291
}
6292
while ((RAW == '|') && (ctxt->instate != XML_PARSER_EOF)) {
6293
NEXT;
6294
if (elem == NULL) {
6295
ret = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6296
if (ret == NULL) {
6297
xmlFreeDocElementContent(ctxt->myDoc, cur);
6298
return(NULL);
6299
}
6300
ret->c1 = cur;
6301
if (cur != NULL)
6302
cur->parent = ret;
6303
cur = ret;
6304
} else {
6305
n = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6306
if (n == NULL) {
6307
xmlFreeDocElementContent(ctxt->myDoc, ret);
6308
return(NULL);
6309
}
6310
n->c1 = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6311
if (n->c1 != NULL)
6312
n->c1->parent = n;
6313
cur->c2 = n;
6314
if (n != NULL)
6315
n->parent = cur;
6316
cur = n;
6317
}
6318
SKIP_BLANKS;
6319
elem = xmlParseName(ctxt);
6320
if (elem == NULL) {
6321
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6322
"xmlParseElementMixedContentDecl : Name expected\n");
6323
xmlFreeDocElementContent(ctxt->myDoc, ret);
6324
return(NULL);
6325
}
6326
SKIP_BLANKS;
6327
GROW;
6328
}
6329
if ((RAW == ')') && (NXT(1) == '*')) {
6330
if (elem != NULL) {
6331
cur->c2 = xmlNewDocElementContent(ctxt->myDoc, elem,
6332
XML_ELEMENT_CONTENT_ELEMENT);
6333
if (cur->c2 != NULL)
6334
cur->c2->parent = cur;
6335
}
6336
if (ret != NULL)
6337
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6338
if (ctxt->input->id != inputchk) {
6339
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6340
"Element content declaration doesn't start and"
6341
" stop in the same entity\n");
6342
}
6343
SKIP(2);
6344
} else {
6345
xmlFreeDocElementContent(ctxt->myDoc, ret);
6346
xmlFatalErr(ctxt, XML_ERR_MIXED_NOT_STARTED, NULL);
6347
return(NULL);
6348
}
6349
6350
} else {
6351
xmlFatalErr(ctxt, XML_ERR_PCDATA_REQUIRED, NULL);
6352
}
6353
return(ret);
6354
}
6355
6356
/**
6357
* xmlParseElementChildrenContentDeclPriv:
6358
* @ctxt: an XML parser context
6359
* @inputchk: the input used for the current entity, needed for boundary checks
6360
* @depth: the level of recursion
6361
*
6362
* parse the declaration for a Mixed Element content
6363
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6364
*
6365
*
6366
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
6367
*
6368
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6369
*
6370
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6371
*
6372
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6373
*
6374
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6375
* TODO Parameter-entity replacement text must be properly nested
6376
* with parenthesized groups. That is to say, if either of the
6377
* opening or closing parentheses in a choice, seq, or Mixed
6378
* construct is contained in the replacement text for a parameter
6379
* entity, both must be contained in the same replacement text. For
6380
* interoperability, if a parameter-entity reference appears in a
6381
* choice, seq, or Mixed construct, its replacement text should not
6382
* be empty, and neither the first nor last non-blank character of
6383
* the replacement text should be a connector (| or ,).
6384
*
6385
* Returns the tree of xmlElementContentPtr describing the element
6386
* hierarchy.
6387
*/
6388
static xmlElementContentPtr
6389
xmlParseElementChildrenContentDeclPriv(xmlParserCtxtPtr ctxt, int inputchk,
6390
int depth) {
6391
xmlElementContentPtr ret = NULL, cur = NULL, last = NULL, op = NULL;
6392
const xmlChar *elem;
6393
xmlChar type = 0;
6394
6395
if (((depth > 128) && ((ctxt->options & XML_PARSE_HUGE) == 0)) ||
6396
(depth > 2048)) {
6397
xmlFatalErrMsgInt(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED,
6398
"xmlParseElementChildrenContentDecl : depth %d too deep, use XML_PARSE_HUGE\n",
6399
depth);
6400
return(NULL);
6401
}
6402
SKIP_BLANKS;
6403
GROW;
6404
if (RAW == '(') {
6405
int inputid = ctxt->input->id;
6406
6407
/* Recurse on first child */
6408
NEXT;
6409
SKIP_BLANKS;
6410
cur = ret = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6411
depth + 1);
6412
if (cur == NULL)
6413
return(NULL);
6414
SKIP_BLANKS;
6415
GROW;
6416
} else {
6417
elem = xmlParseName(ctxt);
6418
if (elem == NULL) {
6419
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6420
return(NULL);
6421
}
6422
cur = ret = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6423
if (cur == NULL) {
6424
xmlErrMemory(ctxt, NULL);
6425
return(NULL);
6426
}
6427
GROW;
6428
if (RAW == '?') {
6429
cur->ocur = XML_ELEMENT_CONTENT_OPT;
6430
NEXT;
6431
} else if (RAW == '*') {
6432
cur->ocur = XML_ELEMENT_CONTENT_MULT;
6433
NEXT;
6434
} else if (RAW == '+') {
6435
cur->ocur = XML_ELEMENT_CONTENT_PLUS;
6436
NEXT;
6437
} else {
6438
cur->ocur = XML_ELEMENT_CONTENT_ONCE;
6439
}
6440
GROW;
6441
}
6442
SKIP_BLANKS;
6443
while ((RAW != ')') && (ctxt->instate != XML_PARSER_EOF)) {
6444
/*
6445
* Each loop we parse one separator and one element.
6446
*/
6447
if (RAW == ',') {
6448
if (type == 0) type = CUR;
6449
6450
/*
6451
* Detect "Name | Name , Name" error
6452
*/
6453
else if (type != CUR) {
6454
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6455
"xmlParseElementChildrenContentDecl : '%c' expected\n",
6456
type);
6457
if ((last != NULL) && (last != ret))
6458
xmlFreeDocElementContent(ctxt->myDoc, last);
6459
if (ret != NULL)
6460
xmlFreeDocElementContent(ctxt->myDoc, ret);
6461
return(NULL);
6462
}
6463
NEXT;
6464
6465
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_SEQ);
6466
if (op == NULL) {
6467
if ((last != NULL) && (last != ret))
6468
xmlFreeDocElementContent(ctxt->myDoc, last);
6469
xmlFreeDocElementContent(ctxt->myDoc, ret);
6470
return(NULL);
6471
}
6472
if (last == NULL) {
6473
op->c1 = ret;
6474
if (ret != NULL)
6475
ret->parent = op;
6476
ret = cur = op;
6477
} else {
6478
cur->c2 = op;
6479
if (op != NULL)
6480
op->parent = cur;
6481
op->c1 = last;
6482
if (last != NULL)
6483
last->parent = op;
6484
cur =op;
6485
last = NULL;
6486
}
6487
} else if (RAW == '|') {
6488
if (type == 0) type = CUR;
6489
6490
/*
6491
* Detect "Name , Name | Name" error
6492
*/
6493
else if (type != CUR) {
6494
xmlFatalErrMsgInt(ctxt, XML_ERR_SEPARATOR_REQUIRED,
6495
"xmlParseElementChildrenContentDecl : '%c' expected\n",
6496
type);
6497
if ((last != NULL) && (last != ret))
6498
xmlFreeDocElementContent(ctxt->myDoc, last);
6499
if (ret != NULL)
6500
xmlFreeDocElementContent(ctxt->myDoc, ret);
6501
return(NULL);
6502
}
6503
NEXT;
6504
6505
op = xmlNewDocElementContent(ctxt->myDoc, NULL, XML_ELEMENT_CONTENT_OR);
6506
if (op == NULL) {
6507
if ((last != NULL) && (last != ret))
6508
xmlFreeDocElementContent(ctxt->myDoc, last);
6509
if (ret != NULL)
6510
xmlFreeDocElementContent(ctxt->myDoc, ret);
6511
return(NULL);
6512
}
6513
if (last == NULL) {
6514
op->c1 = ret;
6515
if (ret != NULL)
6516
ret->parent = op;
6517
ret = cur = op;
6518
} else {
6519
cur->c2 = op;
6520
if (op != NULL)
6521
op->parent = cur;
6522
op->c1 = last;
6523
if (last != NULL)
6524
last->parent = op;
6525
cur =op;
6526
last = NULL;
6527
}
6528
} else {
6529
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_FINISHED, NULL);
6530
if ((last != NULL) && (last != ret))
6531
xmlFreeDocElementContent(ctxt->myDoc, last);
6532
if (ret != NULL)
6533
xmlFreeDocElementContent(ctxt->myDoc, ret);
6534
return(NULL);
6535
}
6536
GROW;
6537
SKIP_BLANKS;
6538
GROW;
6539
if (RAW == '(') {
6540
int inputid = ctxt->input->id;
6541
/* Recurse on second child */
6542
NEXT;
6543
SKIP_BLANKS;
6544
last = xmlParseElementChildrenContentDeclPriv(ctxt, inputid,
6545
depth + 1);
6546
if (last == NULL) {
6547
if (ret != NULL)
6548
xmlFreeDocElementContent(ctxt->myDoc, ret);
6549
return(NULL);
6550
}
6551
SKIP_BLANKS;
6552
} else {
6553
elem = xmlParseName(ctxt);
6554
if (elem == NULL) {
6555
xmlFatalErr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED, NULL);
6556
if (ret != NULL)
6557
xmlFreeDocElementContent(ctxt->myDoc, ret);
6558
return(NULL);
6559
}
6560
last = xmlNewDocElementContent(ctxt->myDoc, elem, XML_ELEMENT_CONTENT_ELEMENT);
6561
if (last == NULL) {
6562
if (ret != NULL)
6563
xmlFreeDocElementContent(ctxt->myDoc, ret);
6564
return(NULL);
6565
}
6566
if (RAW == '?') {
6567
last->ocur = XML_ELEMENT_CONTENT_OPT;
6568
NEXT;
6569
} else if (RAW == '*') {
6570
last->ocur = XML_ELEMENT_CONTENT_MULT;
6571
NEXT;
6572
} else if (RAW == '+') {
6573
last->ocur = XML_ELEMENT_CONTENT_PLUS;
6574
NEXT;
6575
} else {
6576
last->ocur = XML_ELEMENT_CONTENT_ONCE;
6577
}
6578
}
6579
SKIP_BLANKS;
6580
GROW;
6581
}
6582
if ((cur != NULL) && (last != NULL)) {
6583
cur->c2 = last;
6584
if (last != NULL)
6585
last->parent = cur;
6586
}
6587
if (ctxt->input->id != inputchk) {
6588
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6589
"Element content declaration doesn't start and stop in"
6590
" the same entity\n");
6591
}
6592
NEXT;
6593
if (RAW == '?') {
6594
if (ret != NULL) {
6595
if ((ret->ocur == XML_ELEMENT_CONTENT_PLUS) ||
6596
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
6597
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6598
else
6599
ret->ocur = XML_ELEMENT_CONTENT_OPT;
6600
}
6601
NEXT;
6602
} else if (RAW == '*') {
6603
if (ret != NULL) {
6604
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6605
cur = ret;
6606
/*
6607
* Some normalization:
6608
* (a | b* | c?)* == (a | b | c)*
6609
*/
6610
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6611
if ((cur->c1 != NULL) &&
6612
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6613
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT)))
6614
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6615
if ((cur->c2 != NULL) &&
6616
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6617
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT)))
6618
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6619
cur = cur->c2;
6620
}
6621
}
6622
NEXT;
6623
} else if (RAW == '+') {
6624
if (ret != NULL) {
6625
int found = 0;
6626
6627
if ((ret->ocur == XML_ELEMENT_CONTENT_OPT) ||
6628
(ret->ocur == XML_ELEMENT_CONTENT_MULT))
6629
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6630
else
6631
ret->ocur = XML_ELEMENT_CONTENT_PLUS;
6632
/*
6633
* Some normalization:
6634
* (a | b*)+ == (a | b)*
6635
* (a | b?)+ == (a | b)*
6636
*/
6637
while ((cur != NULL) && (cur->type == XML_ELEMENT_CONTENT_OR)) {
6638
if ((cur->c1 != NULL) &&
6639
((cur->c1->ocur == XML_ELEMENT_CONTENT_OPT) ||
6640
(cur->c1->ocur == XML_ELEMENT_CONTENT_MULT))) {
6641
cur->c1->ocur = XML_ELEMENT_CONTENT_ONCE;
6642
found = 1;
6643
}
6644
if ((cur->c2 != NULL) &&
6645
((cur->c2->ocur == XML_ELEMENT_CONTENT_OPT) ||
6646
(cur->c2->ocur == XML_ELEMENT_CONTENT_MULT))) {
6647
cur->c2->ocur = XML_ELEMENT_CONTENT_ONCE;
6648
found = 1;
6649
}
6650
cur = cur->c2;
6651
}
6652
if (found)
6653
ret->ocur = XML_ELEMENT_CONTENT_MULT;
6654
}
6655
NEXT;
6656
}
6657
return(ret);
6658
}
6659
6660
/**
6661
* xmlParseElementChildrenContentDecl:
6662
* @ctxt: an XML parser context
6663
* @inputchk: the input used for the current entity, needed for boundary checks
6664
*
6665
* DEPRECATED: Internal function, don't use.
6666
*
6667
* parse the declaration for a Mixed Element content
6668
* The leading '(' and spaces have been skipped in xmlParseElementContentDecl
6669
*
6670
* [47] children ::= (choice | seq) ('?' | '*' | '+')?
6671
*
6672
* [48] cp ::= (Name | choice | seq) ('?' | '*' | '+')?
6673
*
6674
* [49] choice ::= '(' S? cp ( S? '|' S? cp )* S? ')'
6675
*
6676
* [50] seq ::= '(' S? cp ( S? ',' S? cp )* S? ')'
6677
*
6678
* [ VC: Proper Group/PE Nesting ] applies to [49] and [50]
6679
* TODO Parameter-entity replacement text must be properly nested
6680
* with parenthesized groups. That is to say, if either of the
6681
* opening or closing parentheses in a choice, seq, or Mixed
6682
* construct is contained in the replacement text for a parameter
6683
* entity, both must be contained in the same replacement text. For
6684
* interoperability, if a parameter-entity reference appears in a
6685
* choice, seq, or Mixed construct, its replacement text should not
6686
* be empty, and neither the first nor last non-blank character of
6687
* the replacement text should be a connector (| or ,).
6688
*
6689
* Returns the tree of xmlElementContentPtr describing the element
6690
* hierarchy.
6691
*/
6692
xmlElementContentPtr
6693
xmlParseElementChildrenContentDecl(xmlParserCtxtPtr ctxt, int inputchk) {
6694
/* stub left for API/ABI compat */
6695
return(xmlParseElementChildrenContentDeclPriv(ctxt, inputchk, 1));
6696
}
6697
6698
/**
6699
* xmlParseElementContentDecl:
6700
* @ctxt: an XML parser context
6701
* @name: the name of the element being defined.
6702
* @result: the Element Content pointer will be stored here if any
6703
*
6704
* DEPRECATED: Internal function, don't use.
6705
*
6706
* parse the declaration for an Element content either Mixed or Children,
6707
* the cases EMPTY and ANY are handled directly in xmlParseElementDecl
6708
*
6709
* [46] contentspec ::= 'EMPTY' | 'ANY' | Mixed | children
6710
*
6711
* returns: the type of element content XML_ELEMENT_TYPE_xxx
6712
*/
6713
6714
int
6715
xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, const xmlChar *name,
6716
xmlElementContentPtr *result) {
6717
6718
xmlElementContentPtr tree = NULL;
6719
int inputid = ctxt->input->id;
6720
int res;
6721
6722
*result = NULL;
6723
6724
if (RAW != '(') {
6725
xmlFatalErrMsgStr(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6726
"xmlParseElementContentDecl : %s '(' expected\n", name);
6727
return(-1);
6728
}
6729
NEXT;
6730
GROW;
6731
if (ctxt->instate == XML_PARSER_EOF)
6732
return(-1);
6733
SKIP_BLANKS;
6734
if (CMP7(CUR_PTR, '#', 'P', 'C', 'D', 'A', 'T', 'A')) {
6735
tree = xmlParseElementMixedContentDecl(ctxt, inputid);
6736
res = XML_ELEMENT_TYPE_MIXED;
6737
} else {
6738
tree = xmlParseElementChildrenContentDeclPriv(ctxt, inputid, 1);
6739
res = XML_ELEMENT_TYPE_ELEMENT;
6740
}
6741
SKIP_BLANKS;
6742
*result = tree;
6743
return(res);
6744
}
6745
6746
/**
6747
* xmlParseElementDecl:
6748
* @ctxt: an XML parser context
6749
*
6750
* DEPRECATED: Internal function, don't use.
6751
*
6752
* Parse an element declaration. Always consumes '<!'.
6753
*
6754
* [45] elementdecl ::= '<!ELEMENT' S Name S contentspec S? '>'
6755
*
6756
* [ VC: Unique Element Type Declaration ]
6757
* No element type may be declared more than once
6758
*
6759
* Returns the type of the element, or -1 in case of error
6760
*/
6761
int
6762
xmlParseElementDecl(xmlParserCtxtPtr ctxt) {
6763
const xmlChar *name;
6764
int ret = -1;
6765
xmlElementContentPtr content = NULL;
6766
6767
if ((CUR != '<') || (NXT(1) != '!'))
6768
return(ret);
6769
SKIP(2);
6770
6771
/* GROW; done in the caller */
6772
if (CMP7(CUR_PTR, 'E', 'L', 'E', 'M', 'E', 'N', 'T')) {
6773
int inputid = ctxt->input->id;
6774
6775
SKIP(7);
6776
if (SKIP_BLANKS == 0) {
6777
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6778
"Space required after 'ELEMENT'\n");
6779
return(-1);
6780
}
6781
name = xmlParseName(ctxt);
6782
if (name == NULL) {
6783
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
6784
"xmlParseElementDecl: no name for Element\n");
6785
return(-1);
6786
}
6787
if (SKIP_BLANKS == 0) {
6788
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
6789
"Space required after the element name\n");
6790
}
6791
if (CMP5(CUR_PTR, 'E', 'M', 'P', 'T', 'Y')) {
6792
SKIP(5);
6793
/*
6794
* Element must always be empty.
6795
*/
6796
ret = XML_ELEMENT_TYPE_EMPTY;
6797
} else if ((RAW == 'A') && (NXT(1) == 'N') &&
6798
(NXT(2) == 'Y')) {
6799
SKIP(3);
6800
/*
6801
* Element is a generic container.
6802
*/
6803
ret = XML_ELEMENT_TYPE_ANY;
6804
} else if (RAW == '(') {
6805
ret = xmlParseElementContentDecl(ctxt, name, &content);
6806
} else {
6807
/*
6808
* [ WFC: PEs in Internal Subset ] error handling.
6809
*/
6810
if ((RAW == '%') && (ctxt->external == 0) &&
6811
(ctxt->inputNr == 1)) {
6812
xmlFatalErrMsg(ctxt, XML_ERR_PEREF_IN_INT_SUBSET,
6813
"PEReference: forbidden within markup decl in internal subset\n");
6814
} else {
6815
xmlFatalErrMsg(ctxt, XML_ERR_ELEMCONTENT_NOT_STARTED,
6816
"xmlParseElementDecl: 'EMPTY', 'ANY' or '(' expected\n");
6817
}
6818
return(-1);
6819
}
6820
6821
SKIP_BLANKS;
6822
6823
if (RAW != '>') {
6824
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
6825
if (content != NULL) {
6826
xmlFreeDocElementContent(ctxt->myDoc, content);
6827
}
6828
} else {
6829
if (inputid != ctxt->input->id) {
6830
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6831
"Element declaration doesn't start and stop in"
6832
" the same entity\n");
6833
}
6834
6835
NEXT;
6836
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
6837
(ctxt->sax->elementDecl != NULL)) {
6838
if (content != NULL)
6839
content->parent = NULL;
6840
ctxt->sax->elementDecl(ctxt->userData, name, ret,
6841
content);
6842
if ((content != NULL) && (content->parent == NULL)) {
6843
/*
6844
* this is a trick: if xmlAddElementDecl is called,
6845
* instead of copying the full tree it is plugged directly
6846
* if called from the parser. Avoid duplicating the
6847
* interfaces or change the API/ABI
6848
*/
6849
xmlFreeDocElementContent(ctxt->myDoc, content);
6850
}
6851
} else if (content != NULL) {
6852
xmlFreeDocElementContent(ctxt->myDoc, content);
6853
}
6854
}
6855
}
6856
return(ret);
6857
}
6858
6859
/**
6860
* xmlParseConditionalSections
6861
* @ctxt: an XML parser context
6862
*
6863
* Parse a conditional section. Always consumes '<!['.
6864
*
6865
* [61] conditionalSect ::= includeSect | ignoreSect
6866
* [62] includeSect ::= '<![' S? 'INCLUDE' S? '[' extSubsetDecl ']]>'
6867
* [63] ignoreSect ::= '<![' S? 'IGNORE' S? '[' ignoreSectContents* ']]>'
6868
* [64] ignoreSectContents ::= Ignore ('<![' ignoreSectContents ']]>' Ignore)*
6869
* [65] Ignore ::= Char* - (Char* ('<![' | ']]>') Char*)
6870
*/
6871
6872
static void
6873
xmlParseConditionalSections(xmlParserCtxtPtr ctxt) {
6874
int *inputIds = NULL;
6875
size_t inputIdsSize = 0;
6876
size_t depth = 0;
6877
6878
while (ctxt->instate != XML_PARSER_EOF) {
6879
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6880
int id = ctxt->input->id;
6881
6882
SKIP(3);
6883
SKIP_BLANKS;
6884
6885
if (CMP7(CUR_PTR, 'I', 'N', 'C', 'L', 'U', 'D', 'E')) {
6886
SKIP(7);
6887
SKIP_BLANKS;
6888
if (RAW != '[') {
6889
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6890
xmlHaltParser(ctxt);
6891
goto error;
6892
}
6893
if (ctxt->input->id != id) {
6894
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6895
"All markup of the conditional section is"
6896
" not in the same entity\n");
6897
}
6898
NEXT;
6899
6900
if (inputIdsSize <= depth) {
6901
int *tmp;
6902
6903
inputIdsSize = (inputIdsSize == 0 ? 4 : inputIdsSize * 2);
6904
tmp = (int *) xmlRealloc(inputIds,
6905
inputIdsSize * sizeof(int));
6906
if (tmp == NULL) {
6907
xmlErrMemory(ctxt, NULL);
6908
goto error;
6909
}
6910
inputIds = tmp;
6911
}
6912
inputIds[depth] = id;
6913
depth++;
6914
} else if (CMP6(CUR_PTR, 'I', 'G', 'N', 'O', 'R', 'E')) {
6915
size_t ignoreDepth = 0;
6916
6917
SKIP(6);
6918
SKIP_BLANKS;
6919
if (RAW != '[') {
6920
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID, NULL);
6921
xmlHaltParser(ctxt);
6922
goto error;
6923
}
6924
if (ctxt->input->id != id) {
6925
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6926
"All markup of the conditional section is"
6927
" not in the same entity\n");
6928
}
6929
NEXT;
6930
6931
while (RAW != 0) {
6932
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
6933
SKIP(3);
6934
ignoreDepth++;
6935
/* Check for integer overflow */
6936
if (ignoreDepth == 0) {
6937
xmlErrMemory(ctxt, NULL);
6938
goto error;
6939
}
6940
} else if ((RAW == ']') && (NXT(1) == ']') &&
6941
(NXT(2) == '>')) {
6942
if (ignoreDepth == 0)
6943
break;
6944
SKIP(3);
6945
ignoreDepth--;
6946
} else {
6947
NEXT;
6948
}
6949
}
6950
6951
if (RAW == 0) {
6952
xmlFatalErr(ctxt, XML_ERR_CONDSEC_NOT_FINISHED, NULL);
6953
goto error;
6954
}
6955
if (ctxt->input->id != id) {
6956
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6957
"All markup of the conditional section is"
6958
" not in the same entity\n");
6959
}
6960
SKIP(3);
6961
} else {
6962
xmlFatalErr(ctxt, XML_ERR_CONDSEC_INVALID_KEYWORD, NULL);
6963
xmlHaltParser(ctxt);
6964
goto error;
6965
}
6966
} else if ((depth > 0) &&
6967
(RAW == ']') && (NXT(1) == ']') && (NXT(2) == '>')) {
6968
depth--;
6969
if (ctxt->input->id != inputIds[depth]) {
6970
xmlFatalErrMsg(ctxt, XML_ERR_ENTITY_BOUNDARY,
6971
"All markup of the conditional section is not"
6972
" in the same entity\n");
6973
}
6974
SKIP(3);
6975
} else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
6976
xmlParseMarkupDecl(ctxt);
6977
} else {
6978
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
6979
xmlHaltParser(ctxt);
6980
goto error;
6981
}
6982
6983
if (depth == 0)
6984
break;
6985
6986
SKIP_BLANKS;
6987
SHRINK;
6988
GROW;
6989
}
6990
6991
error:
6992
xmlFree(inputIds);
6993
}
6994
6995
/**
6996
* xmlParseMarkupDecl:
6997
* @ctxt: an XML parser context
6998
*
6999
* DEPRECATED: Internal function, don't use.
7000
*
7001
* Parse markup declarations. Always consumes '<!' or '<?'.
7002
*
7003
* [29] markupdecl ::= elementdecl | AttlistDecl | EntityDecl |
7004
* NotationDecl | PI | Comment
7005
*
7006
* [ VC: Proper Declaration/PE Nesting ]
7007
* Parameter-entity replacement text must be properly nested with
7008
* markup declarations. That is to say, if either the first character
7009
* or the last character of a markup declaration (markupdecl above) is
7010
* contained in the replacement text for a parameter-entity reference,
7011
* both must be contained in the same replacement text.
7012
*
7013
* [ WFC: PEs in Internal Subset ]
7014
* In the internal DTD subset, parameter-entity references can occur
7015
* only where markup declarations can occur, not within markup declarations.
7016
* (This does not apply to references that occur in external parameter
7017
* entities or to the external subset.)
7018
*/
7019
void
7020
xmlParseMarkupDecl(xmlParserCtxtPtr ctxt) {
7021
GROW;
7022
if (CUR == '<') {
7023
if (NXT(1) == '!') {
7024
switch (NXT(2)) {
7025
case 'E':
7026
if (NXT(3) == 'L')
7027
xmlParseElementDecl(ctxt);
7028
else if (NXT(3) == 'N')
7029
xmlParseEntityDecl(ctxt);
7030
else
7031
SKIP(2);
7032
break;
7033
case 'A':
7034
xmlParseAttributeListDecl(ctxt);
7035
break;
7036
case 'N':
7037
xmlParseNotationDecl(ctxt);
7038
break;
7039
case '-':
7040
xmlParseComment(ctxt);
7041
break;
7042
default:
7043
/* there is an error but it will be detected later */
7044
SKIP(2);
7045
break;
7046
}
7047
} else if (NXT(1) == '?') {
7048
xmlParsePI(ctxt);
7049
}
7050
}
7051
7052
/*
7053
* detect requirement to exit there and act accordingly
7054
* and avoid having instate overridden later on
7055
*/
7056
if (ctxt->instate == XML_PARSER_EOF)
7057
return;
7058
7059
ctxt->instate = XML_PARSER_DTD;
7060
}
7061
7062
/**
7063
* xmlParseTextDecl:
7064
* @ctxt: an XML parser context
7065
*
7066
* DEPRECATED: Internal function, don't use.
7067
*
7068
* parse an XML declaration header for external entities
7069
*
7070
* [77] TextDecl ::= '<?xml' VersionInfo? EncodingDecl S? '?>'
7071
*/
7072
7073
void
7074
xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
7075
xmlChar *version;
7076
int oldstate;
7077
7078
/*
7079
* We know that '<?xml' is here.
7080
*/
7081
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
7082
SKIP(5);
7083
} else {
7084
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_STARTED, NULL);
7085
return;
7086
}
7087
7088
/* Avoid expansion of parameter entities when skipping blanks. */
7089
oldstate = ctxt->instate;
7090
ctxt->instate = XML_PARSER_START;
7091
7092
if (SKIP_BLANKS == 0) {
7093
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7094
"Space needed after '<?xml'\n");
7095
}
7096
7097
/*
7098
* We may have the VersionInfo here.
7099
*/
7100
version = xmlParseVersionInfo(ctxt);
7101
if (version == NULL)
7102
version = xmlCharStrdup(XML_DEFAULT_VERSION);
7103
else {
7104
if (SKIP_BLANKS == 0) {
7105
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
7106
"Space needed here\n");
7107
}
7108
}
7109
ctxt->input->version = version;
7110
7111
/*
7112
* We must have the encoding declaration
7113
*/
7114
xmlParseEncodingDecl(ctxt);
7115
if (ctxt->instate == XML_PARSER_EOF)
7116
return;
7117
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7118
/*
7119
* The XML REC instructs us to stop parsing right here
7120
*/
7121
ctxt->instate = oldstate;
7122
return;
7123
}
7124
7125
SKIP_BLANKS;
7126
if ((RAW == '?') && (NXT(1) == '>')) {
7127
SKIP(2);
7128
} else if (RAW == '>') {
7129
/* Deprecated old WD ... */
7130
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7131
NEXT;
7132
} else {
7133
int c;
7134
7135
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
7136
while ((c = CUR) != 0) {
7137
NEXT;
7138
if (c == '>')
7139
break;
7140
}
7141
}
7142
7143
if (ctxt->instate != XML_PARSER_EOF)
7144
ctxt->instate = oldstate;
7145
}
7146
7147
/**
7148
* xmlParseExternalSubset:
7149
* @ctxt: an XML parser context
7150
* @ExternalID: the external identifier
7151
* @SystemID: the system identifier (or URL)
7152
*
7153
* parse Markup declarations from an external subset
7154
*
7155
* [30] extSubset ::= textDecl? extSubsetDecl
7156
*
7157
* [31] extSubsetDecl ::= (markupdecl | conditionalSect | PEReference | S) *
7158
*/
7159
void
7160
xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
7161
const xmlChar *SystemID) {
7162
xmlDetectSAX2(ctxt);
7163
7164
xmlDetectEncoding(ctxt);
7165
7166
if (CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) {
7167
xmlParseTextDecl(ctxt);
7168
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
7169
/*
7170
* The XML REC instructs us to stop parsing right here
7171
*/
7172
xmlHaltParser(ctxt);
7173
return;
7174
}
7175
}
7176
if (ctxt->myDoc == NULL) {
7177
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
7178
if (ctxt->myDoc == NULL) {
7179
xmlErrMemory(ctxt, "New Doc failed");
7180
return;
7181
}
7182
ctxt->myDoc->properties = XML_DOC_INTERNAL;
7183
}
7184
if ((ctxt->myDoc != NULL) && (ctxt->myDoc->intSubset == NULL))
7185
xmlCreateIntSubset(ctxt->myDoc, NULL, ExternalID, SystemID);
7186
7187
ctxt->instate = XML_PARSER_DTD;
7188
ctxt->external = 1;
7189
SKIP_BLANKS;
7190
while ((ctxt->instate != XML_PARSER_EOF) && (RAW != 0)) {
7191
GROW;
7192
if ((RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
7193
xmlParseConditionalSections(ctxt);
7194
} else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
7195
xmlParseMarkupDecl(ctxt);
7196
} else {
7197
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7198
xmlHaltParser(ctxt);
7199
return;
7200
}
7201
SKIP_BLANKS;
7202
SHRINK;
7203
}
7204
7205
if (RAW != 0) {
7206
xmlFatalErr(ctxt, XML_ERR_EXT_SUBSET_NOT_FINISHED, NULL);
7207
}
7208
7209
}
7210
7211
/**
7212
* xmlParseReference:
7213
* @ctxt: an XML parser context
7214
*
7215
* DEPRECATED: Internal function, don't use.
7216
*
7217
* parse and handle entity references in content, depending on the SAX
7218
* interface, this may end-up in a call to character() if this is a
7219
* CharRef, a predefined entity, if there is no reference() callback.
7220
* or if the parser was asked to switch to that mode.
7221
*
7222
* Always consumes '&'.
7223
*
7224
* [67] Reference ::= EntityRef | CharRef
7225
*/
7226
void
7227
xmlParseReference(xmlParserCtxtPtr ctxt) {
7228
xmlEntityPtr ent;
7229
xmlChar *val;
7230
int was_checked;
7231
xmlNodePtr list = NULL;
7232
xmlParserErrors ret = XML_ERR_OK;
7233
7234
7235
if (RAW != '&')
7236
return;
7237
7238
/*
7239
* Simple case of a CharRef
7240
*/
7241
if (NXT(1) == '#') {
7242
int i = 0;
7243
xmlChar out[16];
7244
int value = xmlParseCharRef(ctxt);
7245
7246
if (value == 0)
7247
return;
7248
7249
/*
7250
* Just encode the value in UTF-8
7251
*/
7252
COPY_BUF(out, i, value);
7253
out[i] = 0;
7254
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7255
(!ctxt->disableSAX))
7256
ctxt->sax->characters(ctxt->userData, out, i);
7257
return;
7258
}
7259
7260
/*
7261
* We are seeing an entity reference
7262
*/
7263
ent = xmlParseEntityRef(ctxt);
7264
if (ent == NULL) return;
7265
if (!ctxt->wellFormed)
7266
return;
7267
was_checked = ent->flags & XML_ENT_PARSED;
7268
7269
/* special case of predefined entities */
7270
if ((ent->name == NULL) ||
7271
(ent->etype == XML_INTERNAL_PREDEFINED_ENTITY)) {
7272
val = ent->content;
7273
if (val == NULL) return;
7274
/*
7275
* inline the entity.
7276
*/
7277
if ((ctxt->sax != NULL) && (ctxt->sax->characters != NULL) &&
7278
(!ctxt->disableSAX))
7279
ctxt->sax->characters(ctxt->userData, val, xmlStrlen(val));
7280
return;
7281
}
7282
7283
/*
7284
* Some users try to parse entities on their own and used to set
7285
* the renamed "checked" member. Fix the flags to cover this
7286
* case.
7287
*/
7288
if (((ent->flags & XML_ENT_PARSED) == 0) && (ent->children != NULL))
7289
ent->flags |= XML_ENT_PARSED;
7290
7291
/*
7292
* The first reference to the entity trigger a parsing phase
7293
* where the ent->children is filled with the result from
7294
* the parsing.
7295
* Note: external parsed entities will not be loaded, it is not
7296
* required for a non-validating parser, unless the parsing option
7297
* of validating, or substituting entities were given. Doing so is
7298
* far more secure as the parser will only process data coming from
7299
* the document entity by default.
7300
*
7301
* FIXME: This doesn't work correctly since entities can be
7302
* expanded with different namespace declarations in scope.
7303
* For example:
7304
*
7305
* <!DOCTYPE doc [
7306
* <!ENTITY ent "<ns:elem/>">
7307
* ]>
7308
* <doc>
7309
* <decl1 xmlns:ns="urn:ns1">
7310
* &ent;
7311
* </decl1>
7312
* <decl2 xmlns:ns="urn:ns2">
7313
* &ent;
7314
* </decl2>
7315
* </doc>
7316
*
7317
* Proposed fix:
7318
*
7319
* - Remove the ent->owner optimization which tries to avoid the
7320
* initial copy of the entity. Always make entities own the
7321
* subtree.
7322
* - Ignore current namespace declarations when parsing the
7323
* entity. If a prefix can't be resolved, don't report an error
7324
* but mark it as unresolved.
7325
* - Try to resolve these prefixes when expanding the entity.
7326
* This will require a specialized version of xmlStaticCopyNode
7327
* which can also make use of the namespace hash table to avoid
7328
* quadratic behavior.
7329
*
7330
* Alternatively, we could simply reparse the entity on each
7331
* expansion like we already do with custom SAX callbacks.
7332
* External entity content should be cached in this case.
7333
*/
7334
if (((ent->flags & XML_ENT_PARSED) == 0) &&
7335
((ent->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY) ||
7336
(ctxt->options & (XML_PARSE_NOENT | XML_PARSE_DTDVALID)))) {
7337
unsigned long oldsizeentcopy = ctxt->sizeentcopy;
7338
7339
/*
7340
* This is a bit hackish but this seems the best
7341
* way to make sure both SAX and DOM entity support
7342
* behaves okay.
7343
*/
7344
void *user_data;
7345
if (ctxt->userData == ctxt)
7346
user_data = NULL;
7347
else
7348
user_data = ctxt->userData;
7349
7350
/* Avoid overflow as much as possible */
7351
ctxt->sizeentcopy = 0;
7352
7353
if (ent->flags & XML_ENT_EXPANDING) {
7354
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7355
xmlHaltParser(ctxt);
7356
return;
7357
}
7358
7359
ent->flags |= XML_ENT_EXPANDING;
7360
7361
/*
7362
* Check that this entity is well formed
7363
* 4.3.2: An internal general parsed entity is well-formed
7364
* if its replacement text matches the production labeled
7365
* content.
7366
*/
7367
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7368
ctxt->depth++;
7369
ret = xmlParseBalancedChunkMemoryInternal(ctxt, ent->content,
7370
user_data, &list);
7371
ctxt->depth--;
7372
7373
} else if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7374
ctxt->depth++;
7375
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt, ctxt->sax,
7376
user_data, ctxt->depth, ent->URI,
7377
ent->ExternalID, &list);
7378
ctxt->depth--;
7379
} else {
7380
ret = XML_ERR_ENTITY_PE_INTERNAL;
7381
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7382
"invalid entity type found\n", NULL);
7383
}
7384
7385
ent->flags &= ~XML_ENT_EXPANDING;
7386
ent->flags |= XML_ENT_PARSED | XML_ENT_CHECKED;
7387
ent->expandedSize = ctxt->sizeentcopy;
7388
if (ret == XML_ERR_ENTITY_LOOP) {
7389
xmlHaltParser(ctxt);
7390
xmlFreeNodeList(list);
7391
return;
7392
}
7393
if (xmlParserEntityCheck(ctxt, oldsizeentcopy)) {
7394
xmlFreeNodeList(list);
7395
return;
7396
}
7397
7398
if ((ret == XML_ERR_OK) && (list != NULL)) {
7399
ent->children = list;
7400
/*
7401
* Prune it directly in the generated document
7402
* except for single text nodes.
7403
*/
7404
if ((ctxt->replaceEntities == 0) ||
7405
(ctxt->parseMode == XML_PARSE_READER) ||
7406
((list->type == XML_TEXT_NODE) &&
7407
(list->next == NULL))) {
7408
ent->owner = 1;
7409
while (list != NULL) {
7410
list->parent = (xmlNodePtr) ent;
7411
if (list->doc != ent->doc)
7412
xmlSetTreeDoc(list, ent->doc);
7413
if (list->next == NULL)
7414
ent->last = list;
7415
list = list->next;
7416
}
7417
list = NULL;
7418
} else {
7419
ent->owner = 0;
7420
while (list != NULL) {
7421
list->parent = (xmlNodePtr) ctxt->node;
7422
list->doc = ctxt->myDoc;
7423
if (list->next == NULL)
7424
ent->last = list;
7425
list = list->next;
7426
}
7427
list = ent->children;
7428
#ifdef LIBXML_LEGACY_ENABLED
7429
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7430
xmlAddEntityReference(ent, list, NULL);
7431
#endif /* LIBXML_LEGACY_ENABLED */
7432
}
7433
} else if ((ret != XML_ERR_OK) &&
7434
(ret != XML_WAR_UNDECLARED_ENTITY)) {
7435
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7436
"Entity '%s' failed to parse\n", ent->name);
7437
if (ent->content != NULL)
7438
ent->content[0] = 0;
7439
} else if (list != NULL) {
7440
xmlFreeNodeList(list);
7441
list = NULL;
7442
}
7443
7444
/* Prevent entity from being parsed and expanded twice (Bug 760367). */
7445
was_checked = 0;
7446
}
7447
7448
/*
7449
* Now that the entity content has been gathered
7450
* provide it to the application, this can take different forms based
7451
* on the parsing modes.
7452
*/
7453
if (ent->children == NULL) {
7454
/*
7455
* Probably running in SAX mode and the callbacks don't
7456
* build the entity content. So unless we already went
7457
* though parsing for first checking go though the entity
7458
* content to generate callbacks associated to the entity
7459
*/
7460
if (was_checked != 0) {
7461
void *user_data;
7462
/*
7463
* This is a bit hackish but this seems the best
7464
* way to make sure both SAX and DOM entity support
7465
* behaves okay.
7466
*/
7467
if (ctxt->userData == ctxt)
7468
user_data = NULL;
7469
else
7470
user_data = ctxt->userData;
7471
7472
if (ent->etype == XML_INTERNAL_GENERAL_ENTITY) {
7473
ctxt->depth++;
7474
ret = xmlParseBalancedChunkMemoryInternal(ctxt,
7475
ent->content, user_data, NULL);
7476
ctxt->depth--;
7477
} else if (ent->etype ==
7478
XML_EXTERNAL_GENERAL_PARSED_ENTITY) {
7479
unsigned long oldsizeentities = ctxt->sizeentities;
7480
7481
ctxt->depth++;
7482
ret = xmlParseExternalEntityPrivate(ctxt->myDoc, ctxt,
7483
ctxt->sax, user_data, ctxt->depth,
7484
ent->URI, ent->ExternalID, NULL);
7485
ctxt->depth--;
7486
7487
/* Undo the change to sizeentities */
7488
ctxt->sizeentities = oldsizeentities;
7489
} else {
7490
ret = XML_ERR_ENTITY_PE_INTERNAL;
7491
xmlErrMsgStr(ctxt, XML_ERR_INTERNAL_ERROR,
7492
"invalid entity type found\n", NULL);
7493
}
7494
if (ret == XML_ERR_ENTITY_LOOP) {
7495
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
7496
return;
7497
}
7498
if (xmlParserEntityCheck(ctxt, 0))
7499
return;
7500
}
7501
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7502
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7503
/*
7504
* Entity reference callback comes second, it's somewhat
7505
* superfluous but a compatibility to historical behaviour
7506
*/
7507
ctxt->sax->reference(ctxt->userData, ent->name);
7508
}
7509
return;
7510
}
7511
7512
/*
7513
* We also check for amplification if entities aren't substituted.
7514
* They might be expanded later.
7515
*/
7516
if ((was_checked != 0) &&
7517
(xmlParserEntityCheck(ctxt, ent->expandedSize)))
7518
return;
7519
7520
/*
7521
* If we didn't get any children for the entity being built
7522
*/
7523
if ((ctxt->sax != NULL) && (ctxt->sax->reference != NULL) &&
7524
(ctxt->replaceEntities == 0) && (!ctxt->disableSAX)) {
7525
/*
7526
* Create a node.
7527
*/
7528
ctxt->sax->reference(ctxt->userData, ent->name);
7529
return;
7530
}
7531
7532
if (ctxt->replaceEntities) {
7533
/*
7534
* There is a problem on the handling of _private for entities
7535
* (bug 155816): Should we copy the content of the field from
7536
* the entity (possibly overwriting some value set by the user
7537
* when a copy is created), should we leave it alone, or should
7538
* we try to take care of different situations? The problem
7539
* is exacerbated by the usage of this field by the xmlReader.
7540
* To fix this bug, we look at _private on the created node
7541
* and, if it's NULL, we copy in whatever was in the entity.
7542
* If it's not NULL we leave it alone. This is somewhat of a
7543
* hack - maybe we should have further tests to determine
7544
* what to do.
7545
*/
7546
if (ctxt->node != NULL) {
7547
/*
7548
* Seems we are generating the DOM content, do
7549
* a simple tree copy for all references except the first
7550
* In the first occurrence list contains the replacement.
7551
*/
7552
if (((list == NULL) && (ent->owner == 0)) ||
7553
(ctxt->parseMode == XML_PARSE_READER)) {
7554
xmlNodePtr nw = NULL, cur, firstChild = NULL;
7555
7556
/*
7557
* when operating on a reader, the entities definitions
7558
* are always owning the entities subtree.
7559
if (ctxt->parseMode == XML_PARSE_READER)
7560
ent->owner = 1;
7561
*/
7562
7563
cur = ent->children;
7564
while (cur != NULL) {
7565
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7566
if (nw != NULL) {
7567
if (nw->_private == NULL)
7568
nw->_private = cur->_private;
7569
if (firstChild == NULL){
7570
firstChild = nw;
7571
}
7572
nw = xmlAddChild(ctxt->node, nw);
7573
}
7574
if (cur == ent->last) {
7575
/*
7576
* needed to detect some strange empty
7577
* node cases in the reader tests
7578
*/
7579
if ((ctxt->parseMode == XML_PARSE_READER) &&
7580
(nw != NULL) &&
7581
(nw->type == XML_ELEMENT_NODE) &&
7582
(nw->children == NULL))
7583
nw->extra = 1;
7584
7585
break;
7586
}
7587
cur = cur->next;
7588
}
7589
#ifdef LIBXML_LEGACY_ENABLED
7590
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7591
xmlAddEntityReference(ent, firstChild, nw);
7592
#endif /* LIBXML_LEGACY_ENABLED */
7593
} else if ((list == NULL) || (ctxt->inputNr > 0)) {
7594
xmlNodePtr nw = NULL, cur, next, last,
7595
firstChild = NULL;
7596
7597
/*
7598
* Copy the entity child list and make it the new
7599
* entity child list. The goal is to make sure any
7600
* ID or REF referenced will be the one from the
7601
* document content and not the entity copy.
7602
*/
7603
cur = ent->children;
7604
ent->children = NULL;
7605
last = ent->last;
7606
ent->last = NULL;
7607
while (cur != NULL) {
7608
next = cur->next;
7609
cur->next = NULL;
7610
cur->parent = NULL;
7611
nw = xmlDocCopyNode(cur, ctxt->myDoc, 1);
7612
if (nw != NULL) {
7613
if (nw->_private == NULL)
7614
nw->_private = cur->_private;
7615
if (firstChild == NULL){
7616
firstChild = cur;
7617
}
7618
xmlAddChild((xmlNodePtr) ent, nw);
7619
}
7620
xmlAddChild(ctxt->node, cur);
7621
if (cur == last)
7622
break;
7623
cur = next;
7624
}
7625
if (ent->owner == 0)
7626
ent->owner = 1;
7627
#ifdef LIBXML_LEGACY_ENABLED
7628
if (ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)
7629
xmlAddEntityReference(ent, firstChild, nw);
7630
#endif /* LIBXML_LEGACY_ENABLED */
7631
} else {
7632
const xmlChar *nbktext;
7633
7634
/*
7635
* the name change is to avoid coalescing of the
7636
* node with a possible previous text one which
7637
* would make ent->children a dangling pointer
7638
*/
7639
nbktext = xmlDictLookup(ctxt->dict, BAD_CAST "nbktext",
7640
-1);
7641
if (ent->children->type == XML_TEXT_NODE)
7642
ent->children->name = nbktext;
7643
if ((ent->last != ent->children) &&
7644
(ent->last->type == XML_TEXT_NODE))
7645
ent->last->name = nbktext;
7646
xmlAddChildList(ctxt->node, ent->children);
7647
}
7648
7649
/*
7650
* This is to avoid a nasty side effect, see
7651
* characters() in SAX.c
7652
*/
7653
ctxt->nodemem = 0;
7654
ctxt->nodelen = 0;
7655
return;
7656
}
7657
}
7658
}
7659
7660
/**
7661
* xmlParseEntityRef:
7662
* @ctxt: an XML parser context
7663
*
7664
* DEPRECATED: Internal function, don't use.
7665
*
7666
* Parse an entitiy reference. Always consumes '&'.
7667
*
7668
* [68] EntityRef ::= '&' Name ';'
7669
*
7670
* [ WFC: Entity Declared ]
7671
* In a document without any DTD, a document with only an internal DTD
7672
* subset which contains no parameter entity references, or a document
7673
* with "standalone='yes'", the Name given in the entity reference
7674
* must match that in an entity declaration, except that well-formed
7675
* documents need not declare any of the following entities: amp, lt,
7676
* gt, apos, quot. The declaration of a parameter entity must precede
7677
* any reference to it. Similarly, the declaration of a general entity
7678
* must precede any reference to it which appears in a default value in an
7679
* attribute-list declaration. Note that if entities are declared in the
7680
* external subset or in external parameter entities, a non-validating
7681
* processor is not obligated to read and process their declarations;
7682
* for such documents, the rule that an entity must be declared is a
7683
* well-formedness constraint only if standalone='yes'.
7684
*
7685
* [ WFC: Parsed Entity ]
7686
* An entity reference must not contain the name of an unparsed entity
7687
*
7688
* Returns the xmlEntityPtr if found, or NULL otherwise.
7689
*/
7690
xmlEntityPtr
7691
xmlParseEntityRef(xmlParserCtxtPtr ctxt) {
7692
const xmlChar *name;
7693
xmlEntityPtr ent = NULL;
7694
7695
GROW;
7696
if (ctxt->instate == XML_PARSER_EOF)
7697
return(NULL);
7698
7699
if (RAW != '&')
7700
return(NULL);
7701
NEXT;
7702
name = xmlParseName(ctxt);
7703
if (name == NULL) {
7704
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7705
"xmlParseEntityRef: no name\n");
7706
return(NULL);
7707
}
7708
if (RAW != ';') {
7709
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7710
return(NULL);
7711
}
7712
NEXT;
7713
7714
/*
7715
* Predefined entities override any extra definition
7716
*/
7717
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7718
ent = xmlGetPredefinedEntity(name);
7719
if (ent != NULL)
7720
return(ent);
7721
}
7722
7723
/*
7724
* Ask first SAX for entity resolution, otherwise try the
7725
* entities which may have stored in the parser context.
7726
*/
7727
if (ctxt->sax != NULL) {
7728
if (ctxt->sax->getEntity != NULL)
7729
ent = ctxt->sax->getEntity(ctxt->userData, name);
7730
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7731
(ctxt->options & XML_PARSE_OLDSAX))
7732
ent = xmlGetPredefinedEntity(name);
7733
if ((ctxt->wellFormed == 1 ) && (ent == NULL) &&
7734
(ctxt->userData==ctxt)) {
7735
ent = xmlSAX2GetEntity(ctxt, name);
7736
}
7737
}
7738
if (ctxt->instate == XML_PARSER_EOF)
7739
return(NULL);
7740
/*
7741
* [ WFC: Entity Declared ]
7742
* In a document without any DTD, a document with only an
7743
* internal DTD subset which contains no parameter entity
7744
* references, or a document with "standalone='yes'", the
7745
* Name given in the entity reference must match that in an
7746
* entity declaration, except that well-formed documents
7747
* need not declare any of the following entities: amp, lt,
7748
* gt, apos, quot.
7749
* The declaration of a parameter entity must precede any
7750
* reference to it.
7751
* Similarly, the declaration of a general entity must
7752
* precede any reference to it which appears in a default
7753
* value in an attribute-list declaration. Note that if
7754
* entities are declared in the external subset or in
7755
* external parameter entities, a non-validating processor
7756
* is not obligated to read and process their declarations;
7757
* for such documents, the rule that an entity must be
7758
* declared is a well-formedness constraint only if
7759
* standalone='yes'.
7760
*/
7761
if (ent == NULL) {
7762
if ((ctxt->standalone == 1) ||
7763
((ctxt->hasExternalSubset == 0) &&
7764
(ctxt->hasPErefs == 0))) {
7765
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7766
"Entity '%s' not defined\n", name);
7767
} else {
7768
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7769
"Entity '%s' not defined\n", name);
7770
if ((ctxt->inSubset == 0) &&
7771
(ctxt->sax != NULL) &&
7772
(ctxt->disableSAX == 0) &&
7773
(ctxt->sax->reference != NULL)) {
7774
ctxt->sax->reference(ctxt->userData, name);
7775
}
7776
}
7777
ctxt->valid = 0;
7778
}
7779
7780
/*
7781
* [ WFC: Parsed Entity ]
7782
* An entity reference must not contain the name of an
7783
* unparsed entity
7784
*/
7785
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7786
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7787
"Entity reference to unparsed entity %s\n", name);
7788
}
7789
7790
/*
7791
* [ WFC: No External Entity References ]
7792
* Attribute values cannot contain direct or indirect
7793
* entity references to external entities.
7794
*/
7795
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7796
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7797
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7798
"Attribute references external entity '%s'\n", name);
7799
}
7800
/*
7801
* [ WFC: No < in Attribute Values ]
7802
* The replacement text of any entity referred to directly or
7803
* indirectly in an attribute value (other than "&lt;") must
7804
* not contain a <.
7805
*/
7806
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7807
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7808
if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
7809
if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
7810
ent->flags |= XML_ENT_CONTAINS_LT;
7811
ent->flags |= XML_ENT_CHECKED_LT;
7812
}
7813
if (ent->flags & XML_ENT_CONTAINS_LT)
7814
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
7815
"'<' in entity '%s' is not allowed in attributes "
7816
"values\n", name);
7817
}
7818
7819
/*
7820
* Internal check, no parameter entities here ...
7821
*/
7822
else {
7823
switch (ent->etype) {
7824
case XML_INTERNAL_PARAMETER_ENTITY:
7825
case XML_EXTERNAL_PARAMETER_ENTITY:
7826
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
7827
"Attempt to reference the parameter entity '%s'\n",
7828
name);
7829
break;
7830
default:
7831
break;
7832
}
7833
}
7834
7835
/*
7836
* [ WFC: No Recursion ]
7837
* A parsed entity must not contain a recursive reference
7838
* to itself, either directly or indirectly.
7839
* Done somewhere else
7840
*/
7841
return(ent);
7842
}
7843
7844
/**
7845
* xmlParseStringEntityRef:
7846
* @ctxt: an XML parser context
7847
* @str: a pointer to an index in the string
7848
*
7849
* parse ENTITY references declarations, but this version parses it from
7850
* a string value.
7851
*
7852
* [68] EntityRef ::= '&' Name ';'
7853
*
7854
* [ WFC: Entity Declared ]
7855
* In a document without any DTD, a document with only an internal DTD
7856
* subset which contains no parameter entity references, or a document
7857
* with "standalone='yes'", the Name given in the entity reference
7858
* must match that in an entity declaration, except that well-formed
7859
* documents need not declare any of the following entities: amp, lt,
7860
* gt, apos, quot. The declaration of a parameter entity must precede
7861
* any reference to it. Similarly, the declaration of a general entity
7862
* must precede any reference to it which appears in a default value in an
7863
* attribute-list declaration. Note that if entities are declared in the
7864
* external subset or in external parameter entities, a non-validating
7865
* processor is not obligated to read and process their declarations;
7866
* for such documents, the rule that an entity must be declared is a
7867
* well-formedness constraint only if standalone='yes'.
7868
*
7869
* [ WFC: Parsed Entity ]
7870
* An entity reference must not contain the name of an unparsed entity
7871
*
7872
* Returns the xmlEntityPtr if found, or NULL otherwise. The str pointer
7873
* is updated to the current location in the string.
7874
*/
7875
static xmlEntityPtr
7876
xmlParseStringEntityRef(xmlParserCtxtPtr ctxt, const xmlChar ** str) {
7877
xmlChar *name;
7878
const xmlChar *ptr;
7879
xmlChar cur;
7880
xmlEntityPtr ent = NULL;
7881
7882
if ((str == NULL) || (*str == NULL))
7883
return(NULL);
7884
ptr = *str;
7885
cur = *ptr;
7886
if (cur != '&')
7887
return(NULL);
7888
7889
ptr++;
7890
name = xmlParseStringName(ctxt, &ptr);
7891
if (name == NULL) {
7892
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
7893
"xmlParseStringEntityRef: no name\n");
7894
*str = ptr;
7895
return(NULL);
7896
}
7897
if (*ptr != ';') {
7898
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
7899
xmlFree(name);
7900
*str = ptr;
7901
return(NULL);
7902
}
7903
ptr++;
7904
7905
7906
/*
7907
* Predefined entities override any extra definition
7908
*/
7909
if ((ctxt->options & XML_PARSE_OLDSAX) == 0) {
7910
ent = xmlGetPredefinedEntity(name);
7911
if (ent != NULL) {
7912
xmlFree(name);
7913
*str = ptr;
7914
return(ent);
7915
}
7916
}
7917
7918
/*
7919
* Ask first SAX for entity resolution, otherwise try the
7920
* entities which may have stored in the parser context.
7921
*/
7922
if (ctxt->sax != NULL) {
7923
if (ctxt->sax->getEntity != NULL)
7924
ent = ctxt->sax->getEntity(ctxt->userData, name);
7925
if ((ent == NULL) && (ctxt->options & XML_PARSE_OLDSAX))
7926
ent = xmlGetPredefinedEntity(name);
7927
if ((ent == NULL) && (ctxt->userData==ctxt)) {
7928
ent = xmlSAX2GetEntity(ctxt, name);
7929
}
7930
}
7931
if (ctxt->instate == XML_PARSER_EOF) {
7932
xmlFree(name);
7933
return(NULL);
7934
}
7935
7936
/*
7937
* [ WFC: Entity Declared ]
7938
* In a document without any DTD, a document with only an
7939
* internal DTD subset which contains no parameter entity
7940
* references, or a document with "standalone='yes'", the
7941
* Name given in the entity reference must match that in an
7942
* entity declaration, except that well-formed documents
7943
* need not declare any of the following entities: amp, lt,
7944
* gt, apos, quot.
7945
* The declaration of a parameter entity must precede any
7946
* reference to it.
7947
* Similarly, the declaration of a general entity must
7948
* precede any reference to it which appears in a default
7949
* value in an attribute-list declaration. Note that if
7950
* entities are declared in the external subset or in
7951
* external parameter entities, a non-validating processor
7952
* is not obligated to read and process their declarations;
7953
* for such documents, the rule that an entity must be
7954
* declared is a well-formedness constraint only if
7955
* standalone='yes'.
7956
*/
7957
if (ent == NULL) {
7958
if ((ctxt->standalone == 1) ||
7959
((ctxt->hasExternalSubset == 0) &&
7960
(ctxt->hasPErefs == 0))) {
7961
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
7962
"Entity '%s' not defined\n", name);
7963
} else {
7964
xmlErrMsgStr(ctxt, XML_WAR_UNDECLARED_ENTITY,
7965
"Entity '%s' not defined\n",
7966
name);
7967
}
7968
/* TODO ? check regressions ctxt->valid = 0; */
7969
}
7970
7971
/*
7972
* [ WFC: Parsed Entity ]
7973
* An entity reference must not contain the name of an
7974
* unparsed entity
7975
*/
7976
else if (ent->etype == XML_EXTERNAL_GENERAL_UNPARSED_ENTITY) {
7977
xmlFatalErrMsgStr(ctxt, XML_ERR_UNPARSED_ENTITY,
7978
"Entity reference to unparsed entity %s\n", name);
7979
}
7980
7981
/*
7982
* [ WFC: No External Entity References ]
7983
* Attribute values cannot contain direct or indirect
7984
* entity references to external entities.
7985
*/
7986
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7987
(ent->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) {
7988
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_EXTERNAL,
7989
"Attribute references external entity '%s'\n", name);
7990
}
7991
/*
7992
* [ WFC: No < in Attribute Values ]
7993
* The replacement text of any entity referred to directly or
7994
* indirectly in an attribute value (other than "&lt;") must
7995
* not contain a <.
7996
*/
7997
else if ((ctxt->instate == XML_PARSER_ATTRIBUTE_VALUE) &&
7998
(ent->etype != XML_INTERNAL_PREDEFINED_ENTITY)) {
7999
if ((ent->flags & XML_ENT_CHECKED_LT) == 0) {
8000
if ((ent->content != NULL) && (xmlStrchr(ent->content, '<')))
8001
ent->flags |= XML_ENT_CONTAINS_LT;
8002
ent->flags |= XML_ENT_CHECKED_LT;
8003
}
8004
if (ent->flags & XML_ENT_CONTAINS_LT)
8005
xmlFatalErrMsgStr(ctxt, XML_ERR_LT_IN_ATTRIBUTE,
8006
"'<' in entity '%s' is not allowed in attributes "
8007
"values\n", name);
8008
}
8009
8010
/*
8011
* Internal check, no parameter entities here ...
8012
*/
8013
else {
8014
switch (ent->etype) {
8015
case XML_INTERNAL_PARAMETER_ENTITY:
8016
case XML_EXTERNAL_PARAMETER_ENTITY:
8017
xmlFatalErrMsgStr(ctxt, XML_ERR_ENTITY_IS_PARAMETER,
8018
"Attempt to reference the parameter entity '%s'\n",
8019
name);
8020
break;
8021
default:
8022
break;
8023
}
8024
}
8025
8026
/*
8027
* [ WFC: No Recursion ]
8028
* A parsed entity must not contain a recursive reference
8029
* to itself, either directly or indirectly.
8030
* Done somewhere else
8031
*/
8032
8033
xmlFree(name);
8034
*str = ptr;
8035
return(ent);
8036
}
8037
8038
/**
8039
* xmlParsePEReference:
8040
* @ctxt: an XML parser context
8041
*
8042
* DEPRECATED: Internal function, don't use.
8043
*
8044
* Parse a parameter entity reference. Always consumes '%'.
8045
*
8046
* The entity content is handled directly by pushing it's content as
8047
* a new input stream.
8048
*
8049
* [69] PEReference ::= '%' Name ';'
8050
*
8051
* [ WFC: No Recursion ]
8052
* A parsed entity must not contain a recursive
8053
* reference to itself, either directly or indirectly.
8054
*
8055
* [ WFC: Entity Declared ]
8056
* In a document without any DTD, a document with only an internal DTD
8057
* subset which contains no parameter entity references, or a document
8058
* with "standalone='yes'", ... ... The declaration of a parameter
8059
* entity must precede any reference to it...
8060
*
8061
* [ VC: Entity Declared ]
8062
* In a document with an external subset or external parameter entities
8063
* with "standalone='no'", ... ... The declaration of a parameter entity
8064
* must precede any reference to it...
8065
*
8066
* [ WFC: In DTD ]
8067
* Parameter-entity references may only appear in the DTD.
8068
* NOTE: misleading but this is handled.
8069
*/
8070
void
8071
xmlParsePEReference(xmlParserCtxtPtr ctxt)
8072
{
8073
const xmlChar *name;
8074
xmlEntityPtr entity = NULL;
8075
xmlParserInputPtr input;
8076
8077
if (RAW != '%')
8078
return;
8079
NEXT;
8080
name = xmlParseName(ctxt);
8081
if (name == NULL) {
8082
xmlFatalErrMsg(ctxt, XML_ERR_PEREF_NO_NAME, "PEReference: no name\n");
8083
return;
8084
}
8085
if (xmlParserDebugEntities)
8086
xmlGenericError(xmlGenericErrorContext,
8087
"PEReference: %s\n", name);
8088
if (RAW != ';') {
8089
xmlFatalErr(ctxt, XML_ERR_PEREF_SEMICOL_MISSING, NULL);
8090
return;
8091
}
8092
8093
NEXT;
8094
8095
/*
8096
* Request the entity from SAX
8097
*/
8098
if ((ctxt->sax != NULL) &&
8099
(ctxt->sax->getParameterEntity != NULL))
8100
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8101
if (ctxt->instate == XML_PARSER_EOF)
8102
return;
8103
if (entity == NULL) {
8104
/*
8105
* [ WFC: Entity Declared ]
8106
* In a document without any DTD, a document with only an
8107
* internal DTD subset which contains no parameter entity
8108
* references, or a document with "standalone='yes'", ...
8109
* ... The declaration of a parameter entity must precede
8110
* any reference to it...
8111
*/
8112
if ((ctxt->standalone == 1) ||
8113
((ctxt->hasExternalSubset == 0) &&
8114
(ctxt->hasPErefs == 0))) {
8115
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8116
"PEReference: %%%s; not found\n",
8117
name);
8118
} else {
8119
/*
8120
* [ VC: Entity Declared ]
8121
* In a document with an external subset or external
8122
* parameter entities with "standalone='no'", ...
8123
* ... The declaration of a parameter entity must
8124
* precede any reference to it...
8125
*/
8126
if ((ctxt->validate) && (ctxt->vctxt.error != NULL)) {
8127
xmlValidityError(ctxt, XML_WAR_UNDECLARED_ENTITY,
8128
"PEReference: %%%s; not found\n",
8129
name, NULL);
8130
} else
8131
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8132
"PEReference: %%%s; not found\n",
8133
name, NULL);
8134
ctxt->valid = 0;
8135
}
8136
} else {
8137
/*
8138
* Internal checking in case the entity quest barfed
8139
*/
8140
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8141
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8142
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8143
"Internal: %%%s; is not a parameter entity\n",
8144
name, NULL);
8145
} else {
8146
unsigned long parentConsumed;
8147
xmlEntityPtr oldEnt;
8148
8149
if ((entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8150
((ctxt->options & XML_PARSE_NOENT) == 0) &&
8151
((ctxt->options & XML_PARSE_DTDVALID) == 0) &&
8152
((ctxt->options & XML_PARSE_DTDLOAD) == 0) &&
8153
((ctxt->options & XML_PARSE_DTDATTR) == 0) &&
8154
(ctxt->replaceEntities == 0) &&
8155
(ctxt->validate == 0))
8156
return;
8157
8158
if (entity->flags & XML_ENT_EXPANDING) {
8159
xmlFatalErr(ctxt, XML_ERR_ENTITY_LOOP, NULL);
8160
xmlHaltParser(ctxt);
8161
return;
8162
}
8163
8164
/* Must be computed from old input before pushing new input. */
8165
parentConsumed = ctxt->input->parentConsumed;
8166
oldEnt = ctxt->input->entity;
8167
if ((oldEnt == NULL) ||
8168
((oldEnt->etype == XML_EXTERNAL_PARAMETER_ENTITY) &&
8169
((oldEnt->flags & XML_ENT_PARSED) == 0))) {
8170
xmlSaturatedAdd(&parentConsumed, ctxt->input->consumed);
8171
xmlSaturatedAddSizeT(&parentConsumed,
8172
ctxt->input->cur - ctxt->input->base);
8173
}
8174
8175
input = xmlNewEntityInputStream(ctxt, entity);
8176
if (xmlPushInput(ctxt, input) < 0) {
8177
xmlFreeInputStream(input);
8178
return;
8179
}
8180
8181
entity->flags |= XML_ENT_EXPANDING;
8182
8183
input->parentConsumed = parentConsumed;
8184
8185
if (entity->etype == XML_EXTERNAL_PARAMETER_ENTITY) {
8186
xmlDetectEncoding(ctxt);
8187
8188
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) &&
8189
(IS_BLANK_CH(NXT(5)))) {
8190
xmlParseTextDecl(ctxt);
8191
}
8192
}
8193
}
8194
}
8195
ctxt->hasPErefs = 1;
8196
}
8197
8198
/**
8199
* xmlLoadEntityContent:
8200
* @ctxt: an XML parser context
8201
* @entity: an unloaded system entity
8202
*
8203
* Load the original content of the given system entity from the
8204
* ExternalID/SystemID given. This is to be used for Included in Literal
8205
* http://www.w3.org/TR/REC-xml/#inliteral processing of entities references
8206
*
8207
* Returns 0 in case of success and -1 in case of failure
8208
*/
8209
static int
8210
xmlLoadEntityContent(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
8211
xmlParserInputPtr oldinput, input = NULL;
8212
xmlParserInputPtr *oldinputTab;
8213
const xmlChar *oldencoding;
8214
xmlChar *content = NULL;
8215
size_t length, i;
8216
int oldinputNr, oldinputMax, oldprogressive;
8217
int ret = -1;
8218
int res;
8219
8220
if ((ctxt == NULL) || (entity == NULL) ||
8221
((entity->etype != XML_EXTERNAL_PARAMETER_ENTITY) &&
8222
(entity->etype != XML_EXTERNAL_GENERAL_PARSED_ENTITY)) ||
8223
(entity->content != NULL)) {
8224
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8225
"xmlLoadEntityContent parameter error");
8226
return(-1);
8227
}
8228
8229
if (xmlParserDebugEntities)
8230
xmlGenericError(xmlGenericErrorContext,
8231
"Reading %s entity content input\n", entity->name);
8232
8233
input = xmlLoadExternalEntity((char *) entity->URI,
8234
(char *) entity->ExternalID, ctxt);
8235
if (input == NULL) {
8236
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8237
"xmlLoadEntityContent input error");
8238
return(-1);
8239
}
8240
8241
oldinput = ctxt->input;
8242
oldinputNr = ctxt->inputNr;
8243
oldinputMax = ctxt->inputMax;
8244
oldinputTab = ctxt->inputTab;
8245
oldencoding = ctxt->encoding;
8246
oldprogressive = ctxt->progressive;
8247
8248
ctxt->input = NULL;
8249
ctxt->inputNr = 0;
8250
ctxt->inputMax = 1;
8251
ctxt->encoding = NULL;
8252
ctxt->progressive = 0;
8253
ctxt->inputTab = xmlMalloc(sizeof(xmlParserInputPtr));
8254
if (ctxt->inputTab == NULL) {
8255
xmlErrMemory(ctxt, NULL);
8256
xmlFreeInputStream(input);
8257
goto error;
8258
}
8259
8260
xmlBufResetInput(input->buf->buffer, input);
8261
8262
inputPush(ctxt, input);
8263
8264
xmlDetectEncoding(ctxt);
8265
8266
/*
8267
* Parse a possible text declaration first
8268
*/
8269
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
8270
xmlParseTextDecl(ctxt);
8271
/*
8272
* An XML-1.0 document can't reference an entity not XML-1.0
8273
*/
8274
if ((xmlStrEqual(ctxt->version, BAD_CAST "1.0")) &&
8275
(!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
8276
xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
8277
"Version mismatch between document and entity\n");
8278
}
8279
}
8280
8281
if (ctxt->instate == XML_PARSER_EOF)
8282
goto error;
8283
8284
length = input->cur - input->base;
8285
xmlBufShrink(input->buf->buffer, length);
8286
xmlSaturatedAdd(&ctxt->sizeentities, length);
8287
8288
while ((res = xmlParserInputBufferGrow(input->buf, 4096)) > 0)
8289
;
8290
8291
xmlBufResetInput(input->buf->buffer, input);
8292
8293
if (res < 0) {
8294
xmlFatalErr(ctxt, input->buf->error, NULL);
8295
goto error;
8296
}
8297
8298
length = xmlBufUse(input->buf->buffer);
8299
content = xmlBufDetach(input->buf->buffer);
8300
8301
if (length > INT_MAX) {
8302
xmlErrMemory(ctxt, NULL);
8303
goto error;
8304
}
8305
8306
for (i = 0; i < length; ) {
8307
int clen = length - i;
8308
int c = xmlGetUTF8Char(content + i, &clen);
8309
8310
if ((c < 0) || (!IS_CHAR(c))) {
8311
xmlFatalErrMsgInt(ctxt, XML_ERR_INVALID_CHAR,
8312
"xmlLoadEntityContent: invalid char value %d\n",
8313
content[i]);
8314
goto error;
8315
}
8316
i += clen;
8317
}
8318
8319
xmlSaturatedAdd(&ctxt->sizeentities, length);
8320
entity->content = content;
8321
entity->length = length;
8322
content = NULL;
8323
ret = 0;
8324
8325
error:
8326
while (ctxt->inputNr > 0)
8327
xmlFreeInputStream(inputPop(ctxt));
8328
xmlFree(ctxt->inputTab);
8329
xmlFree((xmlChar *) ctxt->encoding);
8330
8331
ctxt->input = oldinput;
8332
ctxt->inputNr = oldinputNr;
8333
ctxt->inputMax = oldinputMax;
8334
ctxt->inputTab = oldinputTab;
8335
ctxt->encoding = oldencoding;
8336
ctxt->progressive = oldprogressive;
8337
8338
xmlFree(content);
8339
8340
return(ret);
8341
}
8342
8343
/**
8344
* xmlParseStringPEReference:
8345
* @ctxt: an XML parser context
8346
* @str: a pointer to an index in the string
8347
*
8348
* parse PEReference declarations
8349
*
8350
* [69] PEReference ::= '%' Name ';'
8351
*
8352
* [ WFC: No Recursion ]
8353
* A parsed entity must not contain a recursive
8354
* reference to itself, either directly or indirectly.
8355
*
8356
* [ WFC: Entity Declared ]
8357
* In a document without any DTD, a document with only an internal DTD
8358
* subset which contains no parameter entity references, or a document
8359
* with "standalone='yes'", ... ... The declaration of a parameter
8360
* entity must precede any reference to it...
8361
*
8362
* [ VC: Entity Declared ]
8363
* In a document with an external subset or external parameter entities
8364
* with "standalone='no'", ... ... The declaration of a parameter entity
8365
* must precede any reference to it...
8366
*
8367
* [ WFC: In DTD ]
8368
* Parameter-entity references may only appear in the DTD.
8369
* NOTE: misleading but this is handled.
8370
*
8371
* Returns the string of the entity content.
8372
* str is updated to the current value of the index
8373
*/
8374
static xmlEntityPtr
8375
xmlParseStringPEReference(xmlParserCtxtPtr ctxt, const xmlChar **str) {
8376
const xmlChar *ptr;
8377
xmlChar cur;
8378
xmlChar *name;
8379
xmlEntityPtr entity = NULL;
8380
8381
if ((str == NULL) || (*str == NULL)) return(NULL);
8382
ptr = *str;
8383
cur = *ptr;
8384
if (cur != '%')
8385
return(NULL);
8386
ptr++;
8387
name = xmlParseStringName(ctxt, &ptr);
8388
if (name == NULL) {
8389
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8390
"xmlParseStringPEReference: no name\n");
8391
*str = ptr;
8392
return(NULL);
8393
}
8394
cur = *ptr;
8395
if (cur != ';') {
8396
xmlFatalErr(ctxt, XML_ERR_ENTITYREF_SEMICOL_MISSING, NULL);
8397
xmlFree(name);
8398
*str = ptr;
8399
return(NULL);
8400
}
8401
ptr++;
8402
8403
/*
8404
* Request the entity from SAX
8405
*/
8406
if ((ctxt->sax != NULL) &&
8407
(ctxt->sax->getParameterEntity != NULL))
8408
entity = ctxt->sax->getParameterEntity(ctxt->userData, name);
8409
if (ctxt->instate == XML_PARSER_EOF) {
8410
xmlFree(name);
8411
*str = ptr;
8412
return(NULL);
8413
}
8414
if (entity == NULL) {
8415
/*
8416
* [ WFC: Entity Declared ]
8417
* In a document without any DTD, a document with only an
8418
* internal DTD subset which contains no parameter entity
8419
* references, or a document with "standalone='yes'", ...
8420
* ... The declaration of a parameter entity must precede
8421
* any reference to it...
8422
*/
8423
if ((ctxt->standalone == 1) ||
8424
((ctxt->hasExternalSubset == 0) && (ctxt->hasPErefs == 0))) {
8425
xmlFatalErrMsgStr(ctxt, XML_ERR_UNDECLARED_ENTITY,
8426
"PEReference: %%%s; not found\n", name);
8427
} else {
8428
/*
8429
* [ VC: Entity Declared ]
8430
* In a document with an external subset or external
8431
* parameter entities with "standalone='no'", ...
8432
* ... The declaration of a parameter entity must
8433
* precede any reference to it...
8434
*/
8435
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8436
"PEReference: %%%s; not found\n",
8437
name, NULL);
8438
ctxt->valid = 0;
8439
}
8440
} else {
8441
/*
8442
* Internal checking in case the entity quest barfed
8443
*/
8444
if ((entity->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
8445
(entity->etype != XML_EXTERNAL_PARAMETER_ENTITY)) {
8446
xmlWarningMsg(ctxt, XML_WAR_UNDECLARED_ENTITY,
8447
"%%%s; is not a parameter entity\n",
8448
name, NULL);
8449
}
8450
}
8451
ctxt->hasPErefs = 1;
8452
xmlFree(name);
8453
*str = ptr;
8454
return(entity);
8455
}
8456
8457
/**
8458
* xmlParseDocTypeDecl:
8459
* @ctxt: an XML parser context
8460
*
8461
* DEPRECATED: Internal function, don't use.
8462
*
8463
* parse a DOCTYPE declaration
8464
*
8465
* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S?
8466
* ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8467
*
8468
* [ VC: Root Element Type ]
8469
* The Name in the document type declaration must match the element
8470
* type of the root element.
8471
*/
8472
8473
void
8474
xmlParseDocTypeDecl(xmlParserCtxtPtr ctxt) {
8475
const xmlChar *name = NULL;
8476
xmlChar *ExternalID = NULL;
8477
xmlChar *URI = NULL;
8478
8479
/*
8480
* We know that '<!DOCTYPE' has been detected.
8481
*/
8482
SKIP(9);
8483
8484
SKIP_BLANKS;
8485
8486
/*
8487
* Parse the DOCTYPE name.
8488
*/
8489
name = xmlParseName(ctxt);
8490
if (name == NULL) {
8491
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8492
"xmlParseDocTypeDecl : no DOCTYPE name !\n");
8493
}
8494
ctxt->intSubName = name;
8495
8496
SKIP_BLANKS;
8497
8498
/*
8499
* Check for SystemID and ExternalID
8500
*/
8501
URI = xmlParseExternalID(ctxt, &ExternalID, 1);
8502
8503
if ((URI != NULL) || (ExternalID != NULL)) {
8504
ctxt->hasExternalSubset = 1;
8505
}
8506
ctxt->extSubURI = URI;
8507
ctxt->extSubSystem = ExternalID;
8508
8509
SKIP_BLANKS;
8510
8511
/*
8512
* Create and update the internal subset.
8513
*/
8514
if ((ctxt->sax != NULL) && (ctxt->sax->internalSubset != NULL) &&
8515
(!ctxt->disableSAX))
8516
ctxt->sax->internalSubset(ctxt->userData, name, ExternalID, URI);
8517
if (ctxt->instate == XML_PARSER_EOF)
8518
return;
8519
8520
/*
8521
* Is there any internal subset declarations ?
8522
* they are handled separately in xmlParseInternalSubset()
8523
*/
8524
if (RAW == '[')
8525
return;
8526
8527
/*
8528
* We should be at the end of the DOCTYPE declaration.
8529
*/
8530
if (RAW != '>') {
8531
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8532
}
8533
NEXT;
8534
}
8535
8536
/**
8537
* xmlParseInternalSubset:
8538
* @ctxt: an XML parser context
8539
*
8540
* parse the internal subset declaration
8541
*
8542
* [28 end] ('[' (markupdecl | PEReference | S)* ']' S?)? '>'
8543
*/
8544
8545
static void
8546
xmlParseInternalSubset(xmlParserCtxtPtr ctxt) {
8547
/*
8548
* Is there any DTD definition ?
8549
*/
8550
if (RAW == '[') {
8551
int baseInputNr = ctxt->inputNr;
8552
ctxt->instate = XML_PARSER_DTD;
8553
NEXT;
8554
/*
8555
* Parse the succession of Markup declarations and
8556
* PEReferences.
8557
* Subsequence (markupdecl | PEReference | S)*
8558
*/
8559
SKIP_BLANKS;
8560
while (((RAW != ']') || (ctxt->inputNr > baseInputNr)) &&
8561
(ctxt->instate != XML_PARSER_EOF)) {
8562
8563
/*
8564
* Conditional sections are allowed from external entities included
8565
* by PE References in the internal subset.
8566
*/
8567
if ((ctxt->inputNr > 1) && (ctxt->input->filename != NULL) &&
8568
(RAW == '<') && (NXT(1) == '!') && (NXT(2) == '[')) {
8569
xmlParseConditionalSections(ctxt);
8570
} else if ((RAW == '<') && ((NXT(1) == '!') || (NXT(1) == '?'))) {
8571
xmlParseMarkupDecl(ctxt);
8572
} else if (RAW == '%') {
8573
xmlParsePEReference(ctxt);
8574
} else {
8575
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
8576
"xmlParseInternalSubset: error detected in"
8577
" Markup declaration\n");
8578
xmlHaltParser(ctxt);
8579
return;
8580
}
8581
SKIP_BLANKS;
8582
SHRINK;
8583
GROW;
8584
}
8585
if (RAW == ']') {
8586
NEXT;
8587
SKIP_BLANKS;
8588
}
8589
}
8590
8591
/*
8592
* We should be at the end of the DOCTYPE declaration.
8593
*/
8594
if (RAW != '>') {
8595
xmlFatalErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED, NULL);
8596
return;
8597
}
8598
NEXT;
8599
}
8600
8601
#ifdef LIBXML_SAX1_ENABLED
8602
/**
8603
* xmlParseAttribute:
8604
* @ctxt: an XML parser context
8605
* @value: a xmlChar ** used to store the value of the attribute
8606
*
8607
* DEPRECATED: Internal function, don't use.
8608
*
8609
* parse an attribute
8610
*
8611
* [41] Attribute ::= Name Eq AttValue
8612
*
8613
* [ WFC: No External Entity References ]
8614
* Attribute values cannot contain direct or indirect entity references
8615
* to external entities.
8616
*
8617
* [ WFC: No < in Attribute Values ]
8618
* The replacement text of any entity referred to directly or indirectly in
8619
* an attribute value (other than "&lt;") must not contain a <.
8620
*
8621
* [ VC: Attribute Value Type ]
8622
* The attribute must have been declared; the value must be of the type
8623
* declared for it.
8624
*
8625
* [25] Eq ::= S? '=' S?
8626
*
8627
* With namespace:
8628
*
8629
* [NS 11] Attribute ::= QName Eq AttValue
8630
*
8631
* Also the case QName == xmlns:??? is handled independently as a namespace
8632
* definition.
8633
*
8634
* Returns the attribute name, and the value in *value.
8635
*/
8636
8637
const xmlChar *
8638
xmlParseAttribute(xmlParserCtxtPtr ctxt, xmlChar **value) {
8639
const xmlChar *name;
8640
xmlChar *val;
8641
8642
*value = NULL;
8643
GROW;
8644
name = xmlParseName(ctxt);
8645
if (name == NULL) {
8646
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8647
"error parsing attribute name\n");
8648
return(NULL);
8649
}
8650
8651
/*
8652
* read the value
8653
*/
8654
SKIP_BLANKS;
8655
if (RAW == '=') {
8656
NEXT;
8657
SKIP_BLANKS;
8658
val = xmlParseAttValue(ctxt);
8659
ctxt->instate = XML_PARSER_CONTENT;
8660
} else {
8661
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
8662
"Specification mandates value for attribute %s\n", name);
8663
return(name);
8664
}
8665
8666
/*
8667
* Check that xml:lang conforms to the specification
8668
* No more registered as an error, just generate a warning now
8669
* since this was deprecated in XML second edition
8670
*/
8671
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "xml:lang"))) {
8672
if (!xmlCheckLanguageID(val)) {
8673
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
8674
"Malformed value for xml:lang : %s\n",
8675
val, NULL);
8676
}
8677
}
8678
8679
/*
8680
* Check that xml:space conforms to the specification
8681
*/
8682
if (xmlStrEqual(name, BAD_CAST "xml:space")) {
8683
if (xmlStrEqual(val, BAD_CAST "default"))
8684
*(ctxt->space) = 0;
8685
else if (xmlStrEqual(val, BAD_CAST "preserve"))
8686
*(ctxt->space) = 1;
8687
else {
8688
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
8689
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
8690
val, NULL);
8691
}
8692
}
8693
8694
*value = val;
8695
return(name);
8696
}
8697
8698
/**
8699
* xmlParseStartTag:
8700
* @ctxt: an XML parser context
8701
*
8702
* DEPRECATED: Internal function, don't use.
8703
*
8704
* Parse a start tag. Always consumes '<'.
8705
*
8706
* [40] STag ::= '<' Name (S Attribute)* S? '>'
8707
*
8708
* [ WFC: Unique Att Spec ]
8709
* No attribute name may appear more than once in the same start-tag or
8710
* empty-element tag.
8711
*
8712
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
8713
*
8714
* [ WFC: Unique Att Spec ]
8715
* No attribute name may appear more than once in the same start-tag or
8716
* empty-element tag.
8717
*
8718
* With namespace:
8719
*
8720
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
8721
*
8722
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
8723
*
8724
* Returns the element name parsed
8725
*/
8726
8727
const xmlChar *
8728
xmlParseStartTag(xmlParserCtxtPtr ctxt) {
8729
const xmlChar *name;
8730
const xmlChar *attname;
8731
xmlChar *attvalue;
8732
const xmlChar **atts = ctxt->atts;
8733
int nbatts = 0;
8734
int maxatts = ctxt->maxatts;
8735
int i;
8736
8737
if (RAW != '<') return(NULL);
8738
NEXT1;
8739
8740
name = xmlParseName(ctxt);
8741
if (name == NULL) {
8742
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
8743
"xmlParseStartTag: invalid element name\n");
8744
return(NULL);
8745
}
8746
8747
/*
8748
* Now parse the attributes, it ends up with the ending
8749
*
8750
* (S Attribute)* S?
8751
*/
8752
SKIP_BLANKS;
8753
GROW;
8754
8755
while (((RAW != '>') &&
8756
((RAW != '/') || (NXT(1) != '>')) &&
8757
(IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
8758
attname = xmlParseAttribute(ctxt, &attvalue);
8759
if (attname == NULL) {
8760
xmlFatalErrMsg(ctxt, XML_ERR_INTERNAL_ERROR,
8761
"xmlParseStartTag: problem parsing attributes\n");
8762
break;
8763
}
8764
if (attvalue != NULL) {
8765
/*
8766
* [ WFC: Unique Att Spec ]
8767
* No attribute name may appear more than once in the same
8768
* start-tag or empty-element tag.
8769
*/
8770
for (i = 0; i < nbatts;i += 2) {
8771
if (xmlStrEqual(atts[i], attname)) {
8772
xmlErrAttributeDup(ctxt, NULL, attname);
8773
xmlFree(attvalue);
8774
goto failed;
8775
}
8776
}
8777
/*
8778
* Add the pair to atts
8779
*/
8780
if (atts == NULL) {
8781
maxatts = 22; /* allow for 10 attrs by default */
8782
atts = (const xmlChar **)
8783
xmlMalloc(maxatts * sizeof(xmlChar *));
8784
if (atts == NULL) {
8785
xmlErrMemory(ctxt, NULL);
8786
if (attvalue != NULL)
8787
xmlFree(attvalue);
8788
goto failed;
8789
}
8790
ctxt->atts = atts;
8791
ctxt->maxatts = maxatts;
8792
} else if (nbatts + 4 > maxatts) {
8793
const xmlChar **n;
8794
8795
maxatts *= 2;
8796
n = (const xmlChar **) xmlRealloc((void *) atts,
8797
maxatts * sizeof(const xmlChar *));
8798
if (n == NULL) {
8799
xmlErrMemory(ctxt, NULL);
8800
if (attvalue != NULL)
8801
xmlFree(attvalue);
8802
goto failed;
8803
}
8804
atts = n;
8805
ctxt->atts = atts;
8806
ctxt->maxatts = maxatts;
8807
}
8808
atts[nbatts++] = attname;
8809
atts[nbatts++] = attvalue;
8810
atts[nbatts] = NULL;
8811
atts[nbatts + 1] = NULL;
8812
} else {
8813
if (attvalue != NULL)
8814
xmlFree(attvalue);
8815
}
8816
8817
failed:
8818
8819
GROW
8820
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
8821
break;
8822
if (SKIP_BLANKS == 0) {
8823
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
8824
"attributes construct error\n");
8825
}
8826
SHRINK;
8827
GROW;
8828
}
8829
8830
/*
8831
* SAX: Start of Element !
8832
*/
8833
if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL) &&
8834
(!ctxt->disableSAX)) {
8835
if (nbatts > 0)
8836
ctxt->sax->startElement(ctxt->userData, name, atts);
8837
else
8838
ctxt->sax->startElement(ctxt->userData, name, NULL);
8839
}
8840
8841
if (atts != NULL) {
8842
/* Free only the content strings */
8843
for (i = 1;i < nbatts;i+=2)
8844
if (atts[i] != NULL)
8845
xmlFree((xmlChar *) atts[i]);
8846
}
8847
return(name);
8848
}
8849
8850
/**
8851
* xmlParseEndTag1:
8852
* @ctxt: an XML parser context
8853
* @line: line of the start tag
8854
* @nsNr: number of namespaces on the start tag
8855
*
8856
* Parse an end tag. Always consumes '</'.
8857
*
8858
* [42] ETag ::= '</' Name S? '>'
8859
*
8860
* With namespace
8861
*
8862
* [NS 9] ETag ::= '</' QName S? '>'
8863
*/
8864
8865
static void
8866
xmlParseEndTag1(xmlParserCtxtPtr ctxt, int line) {
8867
const xmlChar *name;
8868
8869
GROW;
8870
if ((RAW != '<') || (NXT(1) != '/')) {
8871
xmlFatalErrMsg(ctxt, XML_ERR_LTSLASH_REQUIRED,
8872
"xmlParseEndTag: '</' not found\n");
8873
return;
8874
}
8875
SKIP(2);
8876
8877
name = xmlParseNameAndCompare(ctxt,ctxt->name);
8878
8879
/*
8880
* We should definitely be at the ending "S? '>'" part
8881
*/
8882
GROW;
8883
SKIP_BLANKS;
8884
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
8885
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
8886
} else
8887
NEXT1;
8888
8889
/*
8890
* [ WFC: Element Type Match ]
8891
* The Name in an element's end-tag must match the element type in the
8892
* start-tag.
8893
*
8894
*/
8895
if (name != (xmlChar*)1) {
8896
if (name == NULL) name = BAD_CAST "unparsable";
8897
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
8898
"Opening and ending tag mismatch: %s line %d and %s\n",
8899
ctxt->name, line, name);
8900
}
8901
8902
/*
8903
* SAX: End of Tag
8904
*/
8905
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
8906
(!ctxt->disableSAX))
8907
ctxt->sax->endElement(ctxt->userData, ctxt->name);
8908
8909
namePop(ctxt);
8910
spacePop(ctxt);
8911
return;
8912
}
8913
8914
/**
8915
* xmlParseEndTag:
8916
* @ctxt: an XML parser context
8917
*
8918
* DEPRECATED: Internal function, don't use.
8919
*
8920
* parse an end of tag
8921
*
8922
* [42] ETag ::= '</' Name S? '>'
8923
*
8924
* With namespace
8925
*
8926
* [NS 9] ETag ::= '</' QName S? '>'
8927
*/
8928
8929
void
8930
xmlParseEndTag(xmlParserCtxtPtr ctxt) {
8931
xmlParseEndTag1(ctxt, 0);
8932
}
8933
#endif /* LIBXML_SAX1_ENABLED */
8934
8935
/************************************************************************
8936
* *
8937
* SAX 2 specific operations *
8938
* *
8939
************************************************************************/
8940
8941
/**
8942
* xmlParseQNameHashed:
8943
* @ctxt: an XML parser context
8944
* @prefix: pointer to store the prefix part
8945
*
8946
* parse an XML Namespace QName
8947
*
8948
* [6] QName ::= (Prefix ':')? LocalPart
8949
* [7] Prefix ::= NCName
8950
* [8] LocalPart ::= NCName
8951
*
8952
* Returns the Name parsed or NULL
8953
*/
8954
8955
static xmlHashedString
8956
xmlParseQNameHashed(xmlParserCtxtPtr ctxt, xmlHashedString *prefix) {
8957
xmlHashedString l, p;
8958
int start, isNCName = 0;
8959
8960
l.name = NULL;
8961
p.name = NULL;
8962
8963
GROW;
8964
if (ctxt->instate == XML_PARSER_EOF)
8965
return(l);
8966
start = CUR_PTR - BASE_PTR;
8967
8968
l = xmlParseNCName(ctxt);
8969
if (l.name != NULL) {
8970
isNCName = 1;
8971
if (CUR == ':') {
8972
NEXT;
8973
p = l;
8974
l = xmlParseNCName(ctxt);
8975
}
8976
}
8977
if ((l.name == NULL) || (CUR == ':')) {
8978
xmlChar *tmp;
8979
8980
l.name = NULL;
8981
p.name = NULL;
8982
if (ctxt->instate == XML_PARSER_EOF)
8983
return(l);
8984
if ((isNCName == 0) && (CUR != ':'))
8985
return(l);
8986
tmp = xmlParseNmtoken(ctxt);
8987
if (tmp != NULL)
8988
xmlFree(tmp);
8989
if (ctxt->instate == XML_PARSER_EOF)
8990
return(l);
8991
l = xmlDictLookupHashed(ctxt->dict, BASE_PTR + start,
8992
CUR_PTR - (BASE_PTR + start));
8993
xmlNsErr(ctxt, XML_NS_ERR_QNAME,
8994
"Failed to parse QName '%s'\n", l.name, NULL, NULL);
8995
}
8996
8997
*prefix = p;
8998
return(l);
8999
}
9000
9001
/**
9002
* xmlParseQName:
9003
* @ctxt: an XML parser context
9004
* @prefix: pointer to store the prefix part
9005
*
9006
* parse an XML Namespace QName
9007
*
9008
* [6] QName ::= (Prefix ':')? LocalPart
9009
* [7] Prefix ::= NCName
9010
* [8] LocalPart ::= NCName
9011
*
9012
* Returns the Name parsed or NULL
9013
*/
9014
9015
static const xmlChar *
9016
xmlParseQName(xmlParserCtxtPtr ctxt, const xmlChar **prefix) {
9017
xmlHashedString n, p;
9018
9019
n = xmlParseQNameHashed(ctxt, &p);
9020
if (n.name == NULL)
9021
return(NULL);
9022
*prefix = p.name;
9023
return(n.name);
9024
}
9025
9026
/**
9027
* xmlParseQNameAndCompare:
9028
* @ctxt: an XML parser context
9029
* @name: the localname
9030
* @prefix: the prefix, if any.
9031
*
9032
* parse an XML name and compares for match
9033
* (specialized for endtag parsing)
9034
*
9035
* Returns NULL for an illegal name, (xmlChar*) 1 for success
9036
* and the name for mismatch
9037
*/
9038
9039
static const xmlChar *
9040
xmlParseQNameAndCompare(xmlParserCtxtPtr ctxt, xmlChar const *name,
9041
xmlChar const *prefix) {
9042
const xmlChar *cmp;
9043
const xmlChar *in;
9044
const xmlChar *ret;
9045
const xmlChar *prefix2;
9046
9047
if (prefix == NULL) return(xmlParseNameAndCompare(ctxt, name));
9048
9049
GROW;
9050
in = ctxt->input->cur;
9051
9052
cmp = prefix;
9053
while (*in != 0 && *in == *cmp) {
9054
++in;
9055
++cmp;
9056
}
9057
if ((*cmp == 0) && (*in == ':')) {
9058
in++;
9059
cmp = name;
9060
while (*in != 0 && *in == *cmp) {
9061
++in;
9062
++cmp;
9063
}
9064
if (*cmp == 0 && (*in == '>' || IS_BLANK_CH (*in))) {
9065
/* success */
9066
ctxt->input->col += in - ctxt->input->cur;
9067
ctxt->input->cur = in;
9068
return((const xmlChar*) 1);
9069
}
9070
}
9071
/*
9072
* all strings coms from the dictionary, equality can be done directly
9073
*/
9074
ret = xmlParseQName (ctxt, &prefix2);
9075
if (ret == NULL)
9076
return(NULL);
9077
if ((ret == name) && (prefix == prefix2))
9078
return((const xmlChar*) 1);
9079
return ret;
9080
}
9081
9082
/**
9083
* xmlParseAttValueInternal:
9084
* @ctxt: an XML parser context
9085
* @len: attribute len result
9086
* @alloc: whether the attribute was reallocated as a new string
9087
* @normalize: if 1 then further non-CDATA normalization must be done
9088
*
9089
* parse a value for an attribute.
9090
* NOTE: if no normalization is needed, the routine will return pointers
9091
* directly from the data buffer.
9092
*
9093
* 3.3.3 Attribute-Value Normalization:
9094
* Before the value of an attribute is passed to the application or
9095
* checked for validity, the XML processor must normalize it as follows:
9096
* - a character reference is processed by appending the referenced
9097
* character to the attribute value
9098
* - an entity reference is processed by recursively processing the
9099
* replacement text of the entity
9100
* - a whitespace character (#x20, #xD, #xA, #x9) is processed by
9101
* appending #x20 to the normalized value, except that only a single
9102
* #x20 is appended for a "#xD#xA" sequence that is part of an external
9103
* parsed entity or the literal entity value of an internal parsed entity
9104
* - other characters are processed by appending them to the normalized value
9105
* If the declared value is not CDATA, then the XML processor must further
9106
* process the normalized attribute value by discarding any leading and
9107
* trailing space (#x20) characters, and by replacing sequences of space
9108
* (#x20) characters by a single space (#x20) character.
9109
* All attributes for which no declaration has been read should be treated
9110
* by a non-validating parser as if declared CDATA.
9111
*
9112
* Returns the AttValue parsed or NULL. The value has to be freed by the
9113
* caller if it was copied, this can be detected by val[*len] == 0.
9114
*/
9115
9116
#define GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end) \
9117
const xmlChar *oldbase = ctxt->input->base;\
9118
GROW;\
9119
if (ctxt->instate == XML_PARSER_EOF)\
9120
return(NULL);\
9121
if (oldbase != ctxt->input->base) {\
9122
ptrdiff_t delta = ctxt->input->base - oldbase;\
9123
start = start + delta;\
9124
in = in + delta;\
9125
}\
9126
end = ctxt->input->end;
9127
9128
static xmlChar *
9129
xmlParseAttValueInternal(xmlParserCtxtPtr ctxt, int *len, int *alloc,
9130
int normalize)
9131
{
9132
xmlChar limit = 0;
9133
const xmlChar *in = NULL, *start, *end, *last;
9134
xmlChar *ret = NULL;
9135
int line, col;
9136
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
9137
XML_MAX_HUGE_LENGTH :
9138
XML_MAX_TEXT_LENGTH;
9139
9140
GROW;
9141
in = (xmlChar *) CUR_PTR;
9142
line = ctxt->input->line;
9143
col = ctxt->input->col;
9144
if (*in != '"' && *in != '\'') {
9145
xmlFatalErr(ctxt, XML_ERR_ATTRIBUTE_NOT_STARTED, NULL);
9146
return (NULL);
9147
}
9148
ctxt->instate = XML_PARSER_ATTRIBUTE_VALUE;
9149
9150
/*
9151
* try to handle in this routine the most common case where no
9152
* allocation of a new string is required and where content is
9153
* pure ASCII.
9154
*/
9155
limit = *in++;
9156
col++;
9157
end = ctxt->input->end;
9158
start = in;
9159
if (in >= end) {
9160
GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9161
}
9162
if (normalize) {
9163
/*
9164
* Skip any leading spaces
9165
*/
9166
while ((in < end) && (*in != limit) &&
9167
((*in == 0x20) || (*in == 0x9) ||
9168
(*in == 0xA) || (*in == 0xD))) {
9169
if (*in == 0xA) {
9170
line++; col = 1;
9171
} else {
9172
col++;
9173
}
9174
in++;
9175
start = in;
9176
if (in >= end) {
9177
GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9178
if ((in - start) > maxLength) {
9179
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9180
"AttValue length too long\n");
9181
return(NULL);
9182
}
9183
}
9184
}
9185
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9186
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9187
col++;
9188
if ((*in++ == 0x20) && (*in == 0x20)) break;
9189
if (in >= end) {
9190
GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9191
if ((in - start) > maxLength) {
9192
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9193
"AttValue length too long\n");
9194
return(NULL);
9195
}
9196
}
9197
}
9198
last = in;
9199
/*
9200
* skip the trailing blanks
9201
*/
9202
while ((last[-1] == 0x20) && (last > start)) last--;
9203
while ((in < end) && (*in != limit) &&
9204
((*in == 0x20) || (*in == 0x9) ||
9205
(*in == 0xA) || (*in == 0xD))) {
9206
if (*in == 0xA) {
9207
line++, col = 1;
9208
} else {
9209
col++;
9210
}
9211
in++;
9212
if (in >= end) {
9213
const xmlChar *oldbase = ctxt->input->base;
9214
GROW;
9215
if (ctxt->instate == XML_PARSER_EOF)
9216
return(NULL);
9217
if (oldbase != ctxt->input->base) {
9218
ptrdiff_t delta = ctxt->input->base - oldbase;
9219
start = start + delta;
9220
in = in + delta;
9221
last = last + delta;
9222
}
9223
end = ctxt->input->end;
9224
if ((in - start) > maxLength) {
9225
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9226
"AttValue length too long\n");
9227
return(NULL);
9228
}
9229
}
9230
}
9231
if ((in - start) > maxLength) {
9232
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9233
"AttValue length too long\n");
9234
return(NULL);
9235
}
9236
if (*in != limit) goto need_complex;
9237
} else {
9238
while ((in < end) && (*in != limit) && (*in >= 0x20) &&
9239
(*in <= 0x7f) && (*in != '&') && (*in != '<')) {
9240
in++;
9241
col++;
9242
if (in >= end) {
9243
GROW_PARSE_ATT_VALUE_INTERNAL(ctxt, in, start, end)
9244
if ((in - start) > maxLength) {
9245
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9246
"AttValue length too long\n");
9247
return(NULL);
9248
}
9249
}
9250
}
9251
last = in;
9252
if ((in - start) > maxLength) {
9253
xmlFatalErrMsg(ctxt, XML_ERR_ATTRIBUTE_NOT_FINISHED,
9254
"AttValue length too long\n");
9255
return(NULL);
9256
}
9257
if (*in != limit) goto need_complex;
9258
}
9259
in++;
9260
col++;
9261
if (len != NULL) {
9262
if (alloc) *alloc = 0;
9263
*len = last - start;
9264
ret = (xmlChar *) start;
9265
} else {
9266
if (alloc) *alloc = 1;
9267
ret = xmlStrndup(start, last - start);
9268
}
9269
CUR_PTR = in;
9270
ctxt->input->line = line;
9271
ctxt->input->col = col;
9272
return ret;
9273
need_complex:
9274
if (alloc) *alloc = 1;
9275
return xmlParseAttValueComplex(ctxt, len, normalize);
9276
}
9277
9278
/**
9279
* xmlParseAttribute2:
9280
* @ctxt: an XML parser context
9281
* @pref: the element prefix
9282
* @elem: the element name
9283
* @prefix: a xmlChar ** used to store the value of the attribute prefix
9284
* @value: a xmlChar ** used to store the value of the attribute
9285
* @len: an int * to save the length of the attribute
9286
* @alloc: an int * to indicate if the attribute was allocated
9287
*
9288
* parse an attribute in the new SAX2 framework.
9289
*
9290
* Returns the attribute name, and the value in *value, .
9291
*/
9292
9293
static xmlHashedString
9294
xmlParseAttribute2(xmlParserCtxtPtr ctxt,
9295
const xmlChar * pref, const xmlChar * elem,
9296
xmlHashedString * hprefix, xmlChar ** value,
9297
int *len, int *alloc)
9298
{
9299
xmlHashedString hname;
9300
const xmlChar *prefix, *name;
9301
xmlChar *val, *internal_val = NULL;
9302
int normalize = 0;
9303
9304
*value = NULL;
9305
GROW;
9306
hname = xmlParseQNameHashed(ctxt, hprefix);
9307
if (hname.name == NULL) {
9308
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9309
"error parsing attribute name\n");
9310
return(hname);
9311
}
9312
name = hname.name;
9313
if (hprefix->name != NULL)
9314
prefix = hprefix->name;
9315
else
9316
prefix = NULL;
9317
9318
/*
9319
* get the type if needed
9320
*/
9321
if (ctxt->attsSpecial != NULL) {
9322
int type;
9323
9324
type = (int) (ptrdiff_t) xmlHashQLookup2(ctxt->attsSpecial,
9325
pref, elem,
9326
prefix, name);
9327
if (type != 0)
9328
normalize = 1;
9329
}
9330
9331
/*
9332
* read the value
9333
*/
9334
SKIP_BLANKS;
9335
if (RAW == '=') {
9336
NEXT;
9337
SKIP_BLANKS;
9338
val = xmlParseAttValueInternal(ctxt, len, alloc, normalize);
9339
if (val == NULL) {
9340
hname.name = NULL;
9341
return(hname);
9342
}
9343
if (normalize) {
9344
/*
9345
* Sometimes a second normalisation pass for spaces is needed
9346
* but that only happens if charrefs or entities references
9347
* have been used in the attribute value, i.e. the attribute
9348
* value have been extracted in an allocated string already.
9349
*/
9350
if (*alloc) {
9351
const xmlChar *val2;
9352
9353
val2 = xmlAttrNormalizeSpace2(ctxt, val, len);
9354
if ((val2 != NULL) && (val2 != val)) {
9355
xmlFree(val);
9356
val = (xmlChar *) val2;
9357
}
9358
}
9359
}
9360
ctxt->instate = XML_PARSER_CONTENT;
9361
} else {
9362
xmlFatalErrMsgStr(ctxt, XML_ERR_ATTRIBUTE_WITHOUT_VALUE,
9363
"Specification mandates value for attribute %s\n",
9364
name);
9365
return(hname);
9366
}
9367
9368
if (prefix == ctxt->str_xml) {
9369
/*
9370
* Check that xml:lang conforms to the specification
9371
* No more registered as an error, just generate a warning now
9372
* since this was deprecated in XML second edition
9373
*/
9374
if ((ctxt->pedantic) && (xmlStrEqual(name, BAD_CAST "lang"))) {
9375
internal_val = xmlStrndup(val, *len);
9376
if (!xmlCheckLanguageID(internal_val)) {
9377
xmlWarningMsg(ctxt, XML_WAR_LANG_VALUE,
9378
"Malformed value for xml:lang : %s\n",
9379
internal_val, NULL);
9380
}
9381
}
9382
9383
/*
9384
* Check that xml:space conforms to the specification
9385
*/
9386
if (xmlStrEqual(name, BAD_CAST "space")) {
9387
internal_val = xmlStrndup(val, *len);
9388
if (xmlStrEqual(internal_val, BAD_CAST "default"))
9389
*(ctxt->space) = 0;
9390
else if (xmlStrEqual(internal_val, BAD_CAST "preserve"))
9391
*(ctxt->space) = 1;
9392
else {
9393
xmlWarningMsg(ctxt, XML_WAR_SPACE_VALUE,
9394
"Invalid value \"%s\" for xml:space : \"default\" or \"preserve\" expected\n",
9395
internal_val, NULL);
9396
}
9397
}
9398
if (internal_val) {
9399
xmlFree(internal_val);
9400
}
9401
}
9402
9403
*value = val;
9404
return (hname);
9405
}
9406
9407
/**
9408
* xmlAttrHashInsert:
9409
* @ctxt: parser context
9410
* @size: size of the hash table
9411
* @name: attribute name
9412
* @uri: namespace uri
9413
* @hashValue: combined hash value of name and uri
9414
* @aindex: attribute index (this is a multiple of 5)
9415
*
9416
* Inserts a new attribute into the hash table.
9417
*
9418
* Returns INT_MAX if no existing attribute was found, the attribute
9419
* index if an attribute was found, -1 if a memory allocation failed.
9420
*/
9421
static int
9422
xmlAttrHashInsert(xmlParserCtxtPtr ctxt, unsigned size, const xmlChar *name,
9423
const xmlChar *uri, unsigned hashValue, int aindex) {
9424
xmlAttrHashBucket *table = ctxt->attrHash;
9425
xmlAttrHashBucket *bucket;
9426
unsigned hindex;
9427
9428
hindex = hashValue & (size - 1);
9429
bucket = &table[hindex];
9430
9431
while (bucket->index >= 0) {
9432
const xmlChar **atts = &ctxt->atts[bucket->index];
9433
9434
if (name == atts[0]) {
9435
int nsIndex = (int) (ptrdiff_t) atts[2];
9436
9437
if ((nsIndex == NS_INDEX_EMPTY) ? (uri == NULL) :
9438
(nsIndex == NS_INDEX_XML) ? (uri == ctxt->str_xml_ns) :
9439
(uri == ctxt->nsTab[nsIndex * 2 + 1]))
9440
return(bucket->index);
9441
}
9442
9443
hindex++;
9444
bucket++;
9445
if (hindex >= size) {
9446
hindex = 0;
9447
bucket = table;
9448
}
9449
}
9450
9451
bucket->index = aindex;
9452
9453
return(INT_MAX);
9454
}
9455
9456
static int
9457
xmlAttrHashInsertQName(xmlParserCtxtPtr ctxt, unsigned size,
9458
const xmlChar *name, const xmlChar *prefix,
9459
unsigned hashValue, int aindex) {
9460
xmlAttrHashBucket *table = ctxt->attrHash;
9461
xmlAttrHashBucket *bucket;
9462
unsigned hindex;
9463
9464
hindex = hashValue & (size - 1);
9465
bucket = &table[hindex];
9466
9467
while (bucket->index >= 0) {
9468
const xmlChar **atts = &ctxt->atts[bucket->index];
9469
9470
if ((name == atts[0]) && (prefix == atts[1]))
9471
return(bucket->index);
9472
9473
hindex++;
9474
bucket++;
9475
if (hindex >= size) {
9476
hindex = 0;
9477
bucket = table;
9478
}
9479
}
9480
9481
bucket->index = aindex;
9482
9483
return(INT_MAX);
9484
}
9485
/**
9486
* xmlParseStartTag2:
9487
* @ctxt: an XML parser context
9488
*
9489
* Parse a start tag. Always consumes '<'.
9490
*
9491
* This routine is called when running SAX2 parsing
9492
*
9493
* [40] STag ::= '<' Name (S Attribute)* S? '>'
9494
*
9495
* [ WFC: Unique Att Spec ]
9496
* No attribute name may appear more than once in the same start-tag or
9497
* empty-element tag.
9498
*
9499
* [44] EmptyElemTag ::= '<' Name (S Attribute)* S? '/>'
9500
*
9501
* [ WFC: Unique Att Spec ]
9502
* No attribute name may appear more than once in the same start-tag or
9503
* empty-element tag.
9504
*
9505
* With namespace:
9506
*
9507
* [NS 8] STag ::= '<' QName (S Attribute)* S? '>'
9508
*
9509
* [NS 10] EmptyElement ::= '<' QName (S Attribute)* S? '/>'
9510
*
9511
* Returns the element name parsed
9512
*/
9513
9514
static const xmlChar *
9515
xmlParseStartTag2(xmlParserCtxtPtr ctxt, const xmlChar **pref,
9516
const xmlChar **URI, int *nbNsPtr) {
9517
xmlHashedString hlocalname;
9518
xmlHashedString hprefix;
9519
xmlHashedString hattname;
9520
xmlHashedString haprefix;
9521
const xmlChar *localname;
9522
const xmlChar *prefix;
9523
const xmlChar *attname;
9524
const xmlChar *aprefix;
9525
const xmlChar *uri;
9526
xmlChar *attvalue = NULL;
9527
const xmlChar **atts = ctxt->atts;
9528
unsigned attrHashSize = 0;
9529
int maxatts = ctxt->maxatts;
9530
int nratts, nbatts, nbdef, inputid;
9531
int i, j, nbNs, nbTotalDef, attval, nsIndex, maxAtts;
9532
int alloc = 0;
9533
int numNsErr = 0;
9534
int numDupErr = 0;
9535
9536
if (RAW != '<') return(NULL);
9537
NEXT1;
9538
9539
inputid = ctxt->input->id;
9540
nbatts = 0;
9541
nratts = 0;
9542
nbdef = 0;
9543
nbNs = 0;
9544
nbTotalDef = 0;
9545
attval = 0;
9546
9547
if (xmlParserNsStartElement(ctxt->nsdb) < 0) {
9548
xmlErrMemory(ctxt, NULL);
9549
return(NULL);
9550
}
9551
9552
hlocalname = xmlParseQNameHashed(ctxt, &hprefix);
9553
if (hlocalname.name == NULL) {
9554
xmlFatalErrMsg(ctxt, XML_ERR_NAME_REQUIRED,
9555
"StartTag: invalid element name\n");
9556
return(NULL);
9557
}
9558
localname = hlocalname.name;
9559
prefix = hprefix.name;
9560
9561
/*
9562
* Now parse the attributes, it ends up with the ending
9563
*
9564
* (S Attribute)* S?
9565
*/
9566
SKIP_BLANKS;
9567
GROW;
9568
9569
/*
9570
* The ctxt->atts array will be ultimately passed to the SAX callback
9571
* containing five xmlChar pointers for each attribute:
9572
*
9573
* [0] attribute name
9574
* [1] attribute prefix
9575
* [2] namespace URI
9576
* [3] attribute value
9577
* [4] end of attribute value
9578
*
9579
* To save memory, we reuse this array temporarily and store integers
9580
* in these pointer variables.
9581
*
9582
* [0] attribute name
9583
* [1] attribute prefix
9584
* [2] hash value of attribute prefix, and later namespace index
9585
* [3] for non-allocated values: ptrdiff_t offset into input buffer
9586
* [4] for non-allocated values: ptrdiff_t offset into input buffer
9587
*
9588
* The ctxt->attallocs array contains an additional unsigned int for
9589
* each attribute, containing the hash value of the attribute name
9590
* and the alloc flag in bit 31.
9591
*/
9592
9593
while (((RAW != '>') &&
9594
((RAW != '/') || (NXT(1) != '>')) &&
9595
(IS_BYTE_CHAR(RAW))) && (ctxt->instate != XML_PARSER_EOF)) {
9596
int len = -1;
9597
9598
hattname = xmlParseAttribute2(ctxt, prefix, localname,
9599
&haprefix, &attvalue, &len,
9600
&alloc);
9601
if (hattname.name == NULL) {
9602
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9603
"xmlParseStartTag: problem parsing attributes\n");
9604
break;
9605
}
9606
if (attvalue == NULL)
9607
goto next_attr;
9608
attname = hattname.name;
9609
aprefix = haprefix.name;
9610
if (len < 0) len = xmlStrlen(attvalue);
9611
9612
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9613
xmlHashedString huri;
9614
xmlURIPtr parsedUri;
9615
9616
huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9617
uri = huri.name;
9618
if (uri == NULL) {
9619
xmlErrMemory(ctxt, NULL);
9620
goto next_attr;
9621
}
9622
if (*uri != 0) {
9623
parsedUri = xmlParseURI((const char *) uri);
9624
if (parsedUri == NULL) {
9625
xmlNsErr(ctxt, XML_WAR_NS_URI,
9626
"xmlns: '%s' is not a valid URI\n",
9627
uri, NULL, NULL);
9628
} else {
9629
if (parsedUri->scheme == NULL) {
9630
xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9631
"xmlns: URI %s is not absolute\n",
9632
uri, NULL, NULL);
9633
}
9634
xmlFreeURI(parsedUri);
9635
}
9636
if (uri == ctxt->str_xml_ns) {
9637
if (attname != ctxt->str_xml) {
9638
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9639
"xml namespace URI cannot be the default namespace\n",
9640
NULL, NULL, NULL);
9641
}
9642
goto next_attr;
9643
}
9644
if ((len == 29) &&
9645
(xmlStrEqual(uri,
9646
BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9647
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9648
"reuse of the xmlns namespace name is forbidden\n",
9649
NULL, NULL, NULL);
9650
goto next_attr;
9651
}
9652
}
9653
9654
if (xmlParserNsPush(ctxt, NULL, &huri, NULL, 0) > 0)
9655
nbNs++;
9656
} else if (aprefix == ctxt->str_xmlns) {
9657
xmlHashedString huri;
9658
xmlURIPtr parsedUri;
9659
9660
huri = xmlDictLookupHashed(ctxt->dict, attvalue, len);
9661
uri = huri.name;
9662
if (uri == NULL) {
9663
xmlErrMemory(ctxt, NULL);
9664
goto next_attr;
9665
}
9666
9667
if (attname == ctxt->str_xml) {
9668
if (uri != ctxt->str_xml_ns) {
9669
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9670
"xml namespace prefix mapped to wrong URI\n",
9671
NULL, NULL, NULL);
9672
}
9673
/*
9674
* Do not keep a namespace definition node
9675
*/
9676
goto next_attr;
9677
}
9678
if (uri == ctxt->str_xml_ns) {
9679
if (attname != ctxt->str_xml) {
9680
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9681
"xml namespace URI mapped to wrong prefix\n",
9682
NULL, NULL, NULL);
9683
}
9684
goto next_attr;
9685
}
9686
if (attname == ctxt->str_xmlns) {
9687
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9688
"redefinition of the xmlns prefix is forbidden\n",
9689
NULL, NULL, NULL);
9690
goto next_attr;
9691
}
9692
if ((len == 29) &&
9693
(xmlStrEqual(uri,
9694
BAD_CAST "http://www.w3.org/2000/xmlns/"))) {
9695
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9696
"reuse of the xmlns namespace name is forbidden\n",
9697
NULL, NULL, NULL);
9698
goto next_attr;
9699
}
9700
if ((uri == NULL) || (uri[0] == 0)) {
9701
xmlNsErr(ctxt, XML_NS_ERR_XML_NAMESPACE,
9702
"xmlns:%s: Empty XML namespace is not allowed\n",
9703
attname, NULL, NULL);
9704
goto next_attr;
9705
} else {
9706
parsedUri = xmlParseURI((const char *) uri);
9707
if (parsedUri == NULL) {
9708
xmlNsErr(ctxt, XML_WAR_NS_URI,
9709
"xmlns:%s: '%s' is not a valid URI\n",
9710
attname, uri, NULL);
9711
} else {
9712
if ((ctxt->pedantic) && (parsedUri->scheme == NULL)) {
9713
xmlNsWarn(ctxt, XML_WAR_NS_URI_RELATIVE,
9714
"xmlns:%s: URI %s is not absolute\n",
9715
attname, uri, NULL);
9716
}
9717
xmlFreeURI(parsedUri);
9718
}
9719
}
9720
9721
if (xmlParserNsPush(ctxt, &hattname, &huri, NULL, 0) > 0)
9722
nbNs++;
9723
} else {
9724
/*
9725
* Populate attributes array, see above for repurposing
9726
* of xmlChar pointers.
9727
*/
9728
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9729
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9730
goto next_attr;
9731
}
9732
maxatts = ctxt->maxatts;
9733
atts = ctxt->atts;
9734
}
9735
ctxt->attallocs[nratts++] = (hattname.hashValue & 0x7FFFFFFF) |
9736
((unsigned) alloc << 31);
9737
atts[nbatts++] = attname;
9738
atts[nbatts++] = aprefix;
9739
atts[nbatts++] = (const xmlChar *) (size_t) haprefix.hashValue;
9740
if (alloc) {
9741
atts[nbatts++] = attvalue;
9742
attvalue += len;
9743
atts[nbatts++] = attvalue;
9744
} else {
9745
/*
9746
* attvalue points into the input buffer which can be
9747
* reallocated. Store differences to input->base instead.
9748
* The pointers will be reconstructed later.
9749
*/
9750
atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9751
attvalue += len;
9752
atts[nbatts++] = (void *) (attvalue - BASE_PTR);
9753
}
9754
/*
9755
* tag if some deallocation is needed
9756
*/
9757
if (alloc != 0) attval = 1;
9758
attvalue = NULL; /* moved into atts */
9759
}
9760
9761
next_attr:
9762
if ((attvalue != NULL) && (alloc != 0)) {
9763
xmlFree(attvalue);
9764
attvalue = NULL;
9765
}
9766
9767
GROW
9768
if (ctxt->instate == XML_PARSER_EOF)
9769
break;
9770
if ((RAW == '>') || (((RAW == '/') && (NXT(1) == '>'))))
9771
break;
9772
if (SKIP_BLANKS == 0) {
9773
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
9774
"attributes construct error\n");
9775
break;
9776
}
9777
GROW;
9778
}
9779
9780
if (ctxt->input->id != inputid) {
9781
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR,
9782
"Unexpected change of input\n");
9783
localname = NULL;
9784
goto done;
9785
}
9786
9787
/*
9788
* Namespaces from default attributes
9789
*/
9790
if (ctxt->attsDefault != NULL) {
9791
xmlDefAttrsPtr defaults;
9792
9793
defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9794
if (defaults != NULL) {
9795
for (i = 0; i < defaults->nbAttrs; i++) {
9796
xmlDefAttr *attr = &defaults->attrs[i];
9797
9798
attname = attr->name.name;
9799
aprefix = attr->prefix.name;
9800
9801
if ((attname == ctxt->str_xmlns) && (aprefix == NULL)) {
9802
xmlParserEntityCheck(ctxt, attr->expandedSize);
9803
9804
if (xmlParserNsPush(ctxt, NULL, &attr->value, NULL, 1) > 0)
9805
nbNs++;
9806
} else if (aprefix == ctxt->str_xmlns) {
9807
xmlParserEntityCheck(ctxt, attr->expandedSize);
9808
9809
if (xmlParserNsPush(ctxt, &attr->name, &attr->value,
9810
NULL, 1) > 0)
9811
nbNs++;
9812
} else {
9813
nbTotalDef += 1;
9814
}
9815
}
9816
}
9817
}
9818
9819
/*
9820
* Resolve attribute namespaces
9821
*/
9822
for (i = 0; i < nbatts; i += 5) {
9823
attname = atts[i];
9824
aprefix = atts[i+1];
9825
9826
/*
9827
* The default namespace does not apply to attribute names.
9828
*/
9829
if (aprefix == NULL) {
9830
nsIndex = NS_INDEX_EMPTY;
9831
} else if (aprefix == ctxt->str_xml) {
9832
nsIndex = NS_INDEX_XML;
9833
} else {
9834
haprefix.name = aprefix;
9835
haprefix.hashValue = (size_t) atts[i+2];
9836
nsIndex = xmlParserNsLookup(ctxt, &haprefix, NULL);
9837
if (nsIndex == INT_MAX) {
9838
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9839
"Namespace prefix %s for %s on %s is not defined\n",
9840
aprefix, attname, localname);
9841
nsIndex = NS_INDEX_EMPTY;
9842
}
9843
}
9844
9845
atts[i+2] = (const xmlChar *) (ptrdiff_t) nsIndex;
9846
}
9847
9848
/*
9849
* Maximum number of attributes including default attributes.
9850
*/
9851
maxAtts = nratts + nbTotalDef;
9852
9853
/*
9854
* Verify that attribute names are unique.
9855
*/
9856
if (maxAtts > 1) {
9857
attrHashSize = 4;
9858
while (attrHashSize / 2 < (unsigned) maxAtts)
9859
attrHashSize *= 2;
9860
9861
if (attrHashSize > ctxt->attrHashMax) {
9862
xmlAttrHashBucket *tmp;
9863
9864
tmp = xmlRealloc(ctxt->attrHash, attrHashSize * sizeof(tmp[0]));
9865
if (tmp == NULL) {
9866
xmlErrMemory(ctxt, NULL);
9867
goto done;
9868
}
9869
9870
ctxt->attrHash = tmp;
9871
ctxt->attrHashMax = attrHashSize;
9872
}
9873
9874
memset(ctxt->attrHash, -1, attrHashSize * sizeof(ctxt->attrHash[0]));
9875
9876
for (i = 0, j = 0; j < nratts; i += 5, j++) {
9877
const xmlChar *nsuri;
9878
unsigned hashValue, nameHashValue, uriHashValue;
9879
int res;
9880
9881
attname = atts[i];
9882
aprefix = atts[i+1];
9883
nsIndex = (ptrdiff_t) atts[i+2];
9884
/* Hash values always have bit 31 set, see dict.c */
9885
nameHashValue = ctxt->attallocs[j] | 0x80000000;
9886
9887
if (nsIndex == NS_INDEX_EMPTY) {
9888
nsuri = NULL;
9889
uriHashValue = URI_HASH_EMPTY;
9890
} else if (nsIndex == NS_INDEX_XML) {
9891
nsuri = ctxt->str_xml_ns;
9892
uriHashValue = URI_HASH_XML;
9893
} else {
9894
nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9895
uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9896
}
9897
9898
hashValue = xmlDictCombineHash(nameHashValue, uriHashValue);
9899
res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9900
hashValue, i);
9901
if (res < 0)
9902
continue;
9903
9904
/*
9905
* [ WFC: Unique Att Spec ]
9906
* No attribute name may appear more than once in the same
9907
* start-tag or empty-element tag.
9908
* As extended by the Namespace in XML REC.
9909
*/
9910
if (res < INT_MAX) {
9911
if (aprefix == atts[res+1]) {
9912
xmlErrAttributeDup(ctxt, aprefix, attname);
9913
numDupErr += 1;
9914
} else {
9915
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9916
"Namespaced Attribute %s in '%s' redefined\n",
9917
attname, nsuri, NULL);
9918
numNsErr += 1;
9919
}
9920
}
9921
}
9922
}
9923
9924
/*
9925
* Default attributes
9926
*/
9927
if (ctxt->attsDefault != NULL) {
9928
xmlDefAttrsPtr defaults;
9929
9930
defaults = xmlHashLookup2(ctxt->attsDefault, localname, prefix);
9931
if (defaults != NULL) {
9932
for (i = 0; i < defaults->nbAttrs; i++) {
9933
xmlDefAttr *attr = &defaults->attrs[i];
9934
const xmlChar *nsuri;
9935
unsigned hashValue, uriHashValue;
9936
int res;
9937
9938
attname = attr->name.name;
9939
aprefix = attr->prefix.name;
9940
9941
if ((attname == ctxt->str_xmlns) && (aprefix == NULL))
9942
continue;
9943
if (aprefix == ctxt->str_xmlns)
9944
continue;
9945
9946
if (aprefix == NULL) {
9947
nsIndex = NS_INDEX_EMPTY;
9948
nsuri = NULL;
9949
uriHashValue = URI_HASH_EMPTY;
9950
} if (aprefix == ctxt->str_xml) {
9951
nsIndex = NS_INDEX_XML;
9952
nsuri = ctxt->str_xml_ns;
9953
uriHashValue = URI_HASH_XML;
9954
} else if (aprefix != NULL) {
9955
nsIndex = xmlParserNsLookup(ctxt, &attr->prefix, NULL);
9956
if (nsIndex == INT_MAX) {
9957
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
9958
"Namespace prefix %s for %s on %s is not "
9959
"defined\n",
9960
aprefix, attname, localname);
9961
nsIndex = NS_INDEX_EMPTY;
9962
nsuri = NULL;
9963
uriHashValue = URI_HASH_EMPTY;
9964
} else {
9965
nsuri = ctxt->nsTab[nsIndex * 2 + 1];
9966
uriHashValue = ctxt->nsdb->extra[nsIndex].uriHashValue;
9967
}
9968
}
9969
9970
/*
9971
* Check whether the attribute exists
9972
*/
9973
if (maxAtts > 1) {
9974
hashValue = xmlDictCombineHash(attr->name.hashValue,
9975
uriHashValue);
9976
res = xmlAttrHashInsert(ctxt, attrHashSize, attname, nsuri,
9977
hashValue, nbatts);
9978
if (res < 0)
9979
continue;
9980
if (res < INT_MAX) {
9981
if (aprefix == atts[res+1])
9982
continue;
9983
xmlNsErr(ctxt, XML_NS_ERR_ATTRIBUTE_REDEFINED,
9984
"Namespaced Attribute %s in '%s' redefined\n",
9985
attname, nsuri, NULL);
9986
}
9987
}
9988
9989
xmlParserEntityCheck(ctxt, attr->expandedSize);
9990
9991
if ((atts == NULL) || (nbatts + 5 > maxatts)) {
9992
if (xmlCtxtGrowAttrs(ctxt, nbatts + 5) < 0) {
9993
localname = NULL;
9994
goto done;
9995
}
9996
maxatts = ctxt->maxatts;
9997
atts = ctxt->atts;
9998
}
9999
10000
atts[nbatts++] = attname;
10001
atts[nbatts++] = aprefix;
10002
atts[nbatts++] = (const xmlChar *) (ptrdiff_t) nsIndex;
10003
atts[nbatts++] = attr->value.name;
10004
atts[nbatts++] = attr->valueEnd;
10005
if ((ctxt->standalone == 1) && (attr->external != 0)) {
10006
xmlValidityError(ctxt, XML_DTD_STANDALONE_DEFAULTED,
10007
"standalone: attribute %s on %s defaulted "
10008
"from external subset\n",
10009
attname, localname);
10010
}
10011
nbdef++;
10012
}
10013
}
10014
}
10015
10016
/*
10017
* Using a single hash table for nsUri/localName pairs cannot
10018
* detect duplicate QNames reliably. The following example will
10019
* only result in two namespace errors.
10020
*
10021
* <doc xmlns:a="a" xmlns:b="a">
10022
* <elem a:a="" b:a="" b:a=""/>
10023
* </doc>
10024
*
10025
* If we saw more than one namespace error but no duplicate QNames
10026
* were found, we have to scan for duplicate QNames.
10027
*/
10028
if ((numDupErr == 0) && (numNsErr > 1)) {
10029
memset(ctxt->attrHash, -1,
10030
attrHashSize * sizeof(ctxt->attrHash[0]));
10031
10032
for (i = 0, j = 0; j < nratts; i += 5, j++) {
10033
unsigned hashValue, nameHashValue, prefixHashValue;
10034
int res;
10035
10036
aprefix = atts[i+1];
10037
if (aprefix == NULL)
10038
continue;
10039
10040
attname = atts[i];
10041
/* Hash values always have bit 31 set, see dict.c */
10042
nameHashValue = ctxt->attallocs[j] | 0x80000000;
10043
prefixHashValue = xmlDictComputeHash(ctxt->dict, aprefix);
10044
10045
hashValue = xmlDictCombineHash(nameHashValue, prefixHashValue);
10046
res = xmlAttrHashInsertQName(ctxt, attrHashSize, attname,
10047
aprefix, hashValue, i);
10048
if (res < INT_MAX)
10049
xmlErrAttributeDup(ctxt, aprefix, attname);
10050
}
10051
}
10052
10053
/*
10054
* Reconstruct attribute pointers
10055
*/
10056
for (i = 0, j = 0; i < nbatts; i += 5, j++) {
10057
/* namespace URI */
10058
nsIndex = (ptrdiff_t) atts[i+2];
10059
if (nsIndex == INT_MAX)
10060
atts[i+2] = NULL;
10061
else if (nsIndex == INT_MAX - 1)
10062
atts[i+2] = ctxt->str_xml_ns;
10063
else
10064
atts[i+2] = ctxt->nsTab[nsIndex * 2 + 1];
10065
10066
if ((j < nratts) && (ctxt->attallocs[j] & 0x80000000) == 0) {
10067
atts[i+3] = BASE_PTR + (ptrdiff_t) atts[i+3]; /* value */
10068
atts[i+4] = BASE_PTR + (ptrdiff_t) atts[i+4]; /* valuend */
10069
}
10070
}
10071
10072
uri = xmlParserNsLookupUri(ctxt, &hprefix);
10073
if ((prefix != NULL) && (uri == NULL)) {
10074
xmlNsErr(ctxt, XML_NS_ERR_UNDEFINED_NAMESPACE,
10075
"Namespace prefix %s on %s is not defined\n",
10076
prefix, localname, NULL);
10077
}
10078
*pref = prefix;
10079
*URI = uri;
10080
10081
/*
10082
* SAX callback
10083
*/
10084
if ((ctxt->sax != NULL) && (ctxt->sax->startElementNs != NULL) &&
10085
(!ctxt->disableSAX)) {
10086
if (nbNs > 0)
10087
ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
10088
nbNs, ctxt->nsTab + 2 * (ctxt->nsNr - nbNs),
10089
nbatts / 5, nbdef, atts);
10090
else
10091
ctxt->sax->startElementNs(ctxt->userData, localname, prefix, uri,
10092
0, NULL, nbatts / 5, nbdef, atts);
10093
}
10094
10095
done:
10096
/*
10097
* Free allocated attribute values
10098
*/
10099
if (attval != 0) {
10100
for (i = 0, j = 0; j < nratts; i += 5, j++)
10101
if (ctxt->attallocs[j] & 0x80000000)
10102
xmlFree((xmlChar *) atts[i+3]);
10103
}
10104
10105
*nbNsPtr = nbNs;
10106
return(localname);
10107
}
10108
10109
/**
10110
* xmlParseEndTag2:
10111
* @ctxt: an XML parser context
10112
* @line: line of the start tag
10113
* @nsNr: number of namespaces on the start tag
10114
*
10115
* Parse an end tag. Always consumes '</'.
10116
*
10117
* [42] ETag ::= '</' Name S? '>'
10118
*
10119
* With namespace
10120
*
10121
* [NS 9] ETag ::= '</' QName S? '>'
10122
*/
10123
10124
static void
10125
xmlParseEndTag2(xmlParserCtxtPtr ctxt, const xmlStartTag *tag) {
10126
const xmlChar *name;
10127
10128
GROW;
10129
if ((RAW != '<') || (NXT(1) != '/')) {
10130
xmlFatalErr(ctxt, XML_ERR_LTSLASH_REQUIRED, NULL);
10131
return;
10132
}
10133
SKIP(2);
10134
10135
if (tag->prefix == NULL)
10136
name = xmlParseNameAndCompare(ctxt, ctxt->name);
10137
else
10138
name = xmlParseQNameAndCompare(ctxt, ctxt->name, tag->prefix);
10139
10140
/*
10141
* We should definitely be at the ending "S? '>'" part
10142
*/
10143
GROW;
10144
if (ctxt->instate == XML_PARSER_EOF)
10145
return;
10146
SKIP_BLANKS;
10147
if ((!IS_BYTE_CHAR(RAW)) || (RAW != '>')) {
10148
xmlFatalErr(ctxt, XML_ERR_GT_REQUIRED, NULL);
10149
} else
10150
NEXT1;
10151
10152
/*
10153
* [ WFC: Element Type Match ]
10154
* The Name in an element's end-tag must match the element type in the
10155
* start-tag.
10156
*
10157
*/
10158
if (name != (xmlChar*)1) {
10159
if (name == NULL) name = BAD_CAST "unparsable";
10160
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NAME_MISMATCH,
10161
"Opening and ending tag mismatch: %s line %d and %s\n",
10162
ctxt->name, tag->line, name);
10163
}
10164
10165
/*
10166
* SAX: End of Tag
10167
*/
10168
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10169
(!ctxt->disableSAX))
10170
ctxt->sax->endElementNs(ctxt->userData, ctxt->name, tag->prefix,
10171
tag->URI);
10172
10173
spacePop(ctxt);
10174
if (tag->nsNr != 0)
10175
xmlParserNsPop(ctxt, tag->nsNr);
10176
}
10177
10178
/**
10179
* xmlParseCDSect:
10180
* @ctxt: an XML parser context
10181
*
10182
* DEPRECATED: Internal function, don't use.
10183
*
10184
* Parse escaped pure raw content. Always consumes '<!['.
10185
*
10186
* [18] CDSect ::= CDStart CData CDEnd
10187
*
10188
* [19] CDStart ::= '<![CDATA['
10189
*
10190
* [20] Data ::= (Char* - (Char* ']]>' Char*))
10191
*
10192
* [21] CDEnd ::= ']]>'
10193
*/
10194
void
10195
xmlParseCDSect(xmlParserCtxtPtr ctxt) {
10196
xmlChar *buf = NULL;
10197
int len = 0;
10198
int size = XML_PARSER_BUFFER_SIZE;
10199
int r, rl;
10200
int s, sl;
10201
int cur, l;
10202
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10203
XML_MAX_HUGE_LENGTH :
10204
XML_MAX_TEXT_LENGTH;
10205
10206
if ((CUR != '<') || (NXT(1) != '!') || (NXT(2) != '['))
10207
return;
10208
SKIP(3);
10209
10210
if (!CMP6(CUR_PTR, 'C', 'D', 'A', 'T', 'A', '['))
10211
return;
10212
SKIP(6);
10213
10214
ctxt->instate = XML_PARSER_CDATA_SECTION;
10215
r = CUR_CHAR(rl);
10216
if (!IS_CHAR(r)) {
10217
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
10218
goto out;
10219
}
10220
NEXTL(rl);
10221
s = CUR_CHAR(sl);
10222
if (!IS_CHAR(s)) {
10223
xmlFatalErr(ctxt, XML_ERR_CDATA_NOT_FINISHED, NULL);
10224
goto out;
10225
}
10226
NEXTL(sl);
10227
cur = CUR_CHAR(l);
10228
buf = (xmlChar *) xmlMallocAtomic(size);
10229
if (buf == NULL) {
10230
xmlErrMemory(ctxt, NULL);
10231
goto out;
10232
}
10233
while (IS_CHAR(cur) &&
10234
((r != ']') || (s != ']') || (cur != '>'))) {
10235
if (len + 5 >= size) {
10236
xmlChar *tmp;
10237
10238
tmp = (xmlChar *) xmlRealloc(buf, size * 2);
10239
if (tmp == NULL) {
10240
xmlErrMemory(ctxt, NULL);
10241
goto out;
10242
}
10243
buf = tmp;
10244
size *= 2;
10245
}
10246
COPY_BUF(buf, len, r);
10247
if (len > maxLength) {
10248
xmlFatalErrMsg(ctxt, XML_ERR_CDATA_NOT_FINISHED,
10249
"CData section too big found\n");
10250
goto out;
10251
}
10252
r = s;
10253
rl = sl;
10254
s = cur;
10255
sl = l;
10256
NEXTL(l);
10257
cur = CUR_CHAR(l);
10258
}
10259
buf[len] = 0;
10260
if (ctxt->instate == XML_PARSER_EOF) {
10261
xmlFree(buf);
10262
return;
10263
}
10264
if (cur != '>') {
10265
xmlFatalErrMsgStr(ctxt, XML_ERR_CDATA_NOT_FINISHED,
10266
"CData section not finished\n%.50s\n", buf);
10267
goto out;
10268
}
10269
NEXTL(l);
10270
10271
/*
10272
* OK the buffer is to be consumed as cdata.
10273
*/
10274
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
10275
if (ctxt->sax->cdataBlock != NULL)
10276
ctxt->sax->cdataBlock(ctxt->userData, buf, len);
10277
else if (ctxt->sax->characters != NULL)
10278
ctxt->sax->characters(ctxt->userData, buf, len);
10279
}
10280
10281
out:
10282
if (ctxt->instate != XML_PARSER_EOF)
10283
ctxt->instate = XML_PARSER_CONTENT;
10284
xmlFree(buf);
10285
}
10286
10287
/**
10288
* xmlParseContentInternal:
10289
* @ctxt: an XML parser context
10290
*
10291
* Parse a content sequence. Stops at EOF or '</'. Leaves checking of
10292
* unexpected EOF to the caller.
10293
*/
10294
10295
static void
10296
xmlParseContentInternal(xmlParserCtxtPtr ctxt) {
10297
int nameNr = ctxt->nameNr;
10298
10299
GROW;
10300
while ((ctxt->input->cur < ctxt->input->end) &&
10301
(ctxt->instate != XML_PARSER_EOF)) {
10302
const xmlChar *cur = ctxt->input->cur;
10303
10304
/*
10305
* First case : a Processing Instruction.
10306
*/
10307
if ((*cur == '<') && (cur[1] == '?')) {
10308
xmlParsePI(ctxt);
10309
}
10310
10311
/*
10312
* Second case : a CDSection
10313
*/
10314
/* 2.6.0 test was *cur not RAW */
10315
else if (CMP9(CUR_PTR, '<', '!', '[', 'C', 'D', 'A', 'T', 'A', '[')) {
10316
xmlParseCDSect(ctxt);
10317
}
10318
10319
/*
10320
* Third case : a comment
10321
*/
10322
else if ((*cur == '<') && (NXT(1) == '!') &&
10323
(NXT(2) == '-') && (NXT(3) == '-')) {
10324
xmlParseComment(ctxt);
10325
ctxt->instate = XML_PARSER_CONTENT;
10326
}
10327
10328
/*
10329
* Fourth case : a sub-element.
10330
*/
10331
else if (*cur == '<') {
10332
if (NXT(1) == '/') {
10333
if (ctxt->nameNr <= nameNr)
10334
break;
10335
xmlParseElementEnd(ctxt);
10336
} else {
10337
xmlParseElementStart(ctxt);
10338
}
10339
}
10340
10341
/*
10342
* Fifth case : a reference. If if has not been resolved,
10343
* parsing returns it's Name, create the node
10344
*/
10345
10346
else if (*cur == '&') {
10347
xmlParseReference(ctxt);
10348
}
10349
10350
/*
10351
* Last case, text. Note that References are handled directly.
10352
*/
10353
else {
10354
xmlParseCharDataInternal(ctxt, 0);
10355
}
10356
10357
SHRINK;
10358
GROW;
10359
}
10360
}
10361
10362
/**
10363
* xmlParseContent:
10364
* @ctxt: an XML parser context
10365
*
10366
* Parse a content sequence. Stops at EOF or '</'.
10367
*
10368
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
10369
*/
10370
10371
void
10372
xmlParseContent(xmlParserCtxtPtr ctxt) {
10373
int nameNr = ctxt->nameNr;
10374
10375
xmlParseContentInternal(ctxt);
10376
10377
if ((ctxt->instate != XML_PARSER_EOF) &&
10378
(ctxt->errNo == XML_ERR_OK) &&
10379
(ctxt->nameNr > nameNr)) {
10380
const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10381
int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10382
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10383
"Premature end of data in tag %s line %d\n",
10384
name, line, NULL);
10385
}
10386
}
10387
10388
/**
10389
* xmlParseElement:
10390
* @ctxt: an XML parser context
10391
*
10392
* DEPRECATED: Internal function, don't use.
10393
*
10394
* parse an XML element
10395
*
10396
* [39] element ::= EmptyElemTag | STag content ETag
10397
*
10398
* [ WFC: Element Type Match ]
10399
* The Name in an element's end-tag must match the element type in the
10400
* start-tag.
10401
*
10402
*/
10403
10404
void
10405
xmlParseElement(xmlParserCtxtPtr ctxt) {
10406
if (xmlParseElementStart(ctxt) != 0)
10407
return;
10408
10409
xmlParseContentInternal(ctxt);
10410
if (ctxt->instate == XML_PARSER_EOF)
10411
return;
10412
10413
if (ctxt->input->cur >= ctxt->input->end) {
10414
if (ctxt->errNo == XML_ERR_OK) {
10415
const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
10416
int line = ctxt->pushTab[ctxt->nameNr - 1].line;
10417
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
10418
"Premature end of data in tag %s line %d\n",
10419
name, line, NULL);
10420
}
10421
return;
10422
}
10423
10424
xmlParseElementEnd(ctxt);
10425
}
10426
10427
/**
10428
* xmlParseElementStart:
10429
* @ctxt: an XML parser context
10430
*
10431
* Parse the start of an XML element. Returns -1 in case of error, 0 if an
10432
* opening tag was parsed, 1 if an empty element was parsed.
10433
*
10434
* Always consumes '<'.
10435
*/
10436
static int
10437
xmlParseElementStart(xmlParserCtxtPtr ctxt) {
10438
const xmlChar *name;
10439
const xmlChar *prefix = NULL;
10440
const xmlChar *URI = NULL;
10441
xmlParserNodeInfo node_info;
10442
int line;
10443
xmlNodePtr cur;
10444
int nbNs = 0;
10445
10446
if (((unsigned int) ctxt->nameNr > xmlParserMaxDepth) &&
10447
((ctxt->options & XML_PARSE_HUGE) == 0)) {
10448
xmlFatalErrMsgInt(ctxt, XML_ERR_INTERNAL_ERROR,
10449
"Excessive depth in document: %d use XML_PARSE_HUGE option\n",
10450
xmlParserMaxDepth);
10451
xmlHaltParser(ctxt);
10452
return(-1);
10453
}
10454
10455
/* Capture start position */
10456
if (ctxt->record_info) {
10457
node_info.begin_pos = ctxt->input->consumed +
10458
(CUR_PTR - ctxt->input->base);
10459
node_info.begin_line = ctxt->input->line;
10460
}
10461
10462
if (ctxt->spaceNr == 0)
10463
spacePush(ctxt, -1);
10464
else if (*ctxt->space == -2)
10465
spacePush(ctxt, -1);
10466
else
10467
spacePush(ctxt, *ctxt->space);
10468
10469
line = ctxt->input->line;
10470
#ifdef LIBXML_SAX1_ENABLED
10471
if (ctxt->sax2)
10472
#endif /* LIBXML_SAX1_ENABLED */
10473
name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
10474
#ifdef LIBXML_SAX1_ENABLED
10475
else
10476
name = xmlParseStartTag(ctxt);
10477
#endif /* LIBXML_SAX1_ENABLED */
10478
if (ctxt->instate == XML_PARSER_EOF)
10479
return(-1);
10480
if (name == NULL) {
10481
spacePop(ctxt);
10482
return(-1);
10483
}
10484
nameNsPush(ctxt, name, prefix, URI, line, nbNs);
10485
cur = ctxt->node;
10486
10487
#ifdef LIBXML_VALID_ENABLED
10488
/*
10489
* [ VC: Root Element Type ]
10490
* The Name in the document type declaration must match the element
10491
* type of the root element.
10492
*/
10493
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
10494
ctxt->node && (ctxt->node == ctxt->myDoc->children))
10495
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
10496
#endif /* LIBXML_VALID_ENABLED */
10497
10498
/*
10499
* Check for an Empty Element.
10500
*/
10501
if ((RAW == '/') && (NXT(1) == '>')) {
10502
SKIP(2);
10503
if (ctxt->sax2) {
10504
if ((ctxt->sax != NULL) && (ctxt->sax->endElementNs != NULL) &&
10505
(!ctxt->disableSAX))
10506
ctxt->sax->endElementNs(ctxt->userData, name, prefix, URI);
10507
#ifdef LIBXML_SAX1_ENABLED
10508
} else {
10509
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL) &&
10510
(!ctxt->disableSAX))
10511
ctxt->sax->endElement(ctxt->userData, name);
10512
#endif /* LIBXML_SAX1_ENABLED */
10513
}
10514
namePop(ctxt);
10515
spacePop(ctxt);
10516
if (nbNs > 0)
10517
xmlParserNsPop(ctxt, nbNs);
10518
if (cur != NULL && ctxt->record_info) {
10519
node_info.node = cur;
10520
node_info.end_pos = ctxt->input->consumed +
10521
(CUR_PTR - ctxt->input->base);
10522
node_info.end_line = ctxt->input->line;
10523
xmlParserAddNodeInfo(ctxt, &node_info);
10524
}
10525
return(1);
10526
}
10527
if (RAW == '>') {
10528
NEXT1;
10529
if (cur != NULL && ctxt->record_info) {
10530
node_info.node = cur;
10531
node_info.end_pos = 0;
10532
node_info.end_line = 0;
10533
xmlParserAddNodeInfo(ctxt, &node_info);
10534
}
10535
} else {
10536
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_GT_REQUIRED,
10537
"Couldn't find end of Start Tag %s line %d\n",
10538
name, line, NULL);
10539
10540
/*
10541
* end of parsing of this node.
10542
*/
10543
nodePop(ctxt);
10544
namePop(ctxt);
10545
spacePop(ctxt);
10546
if (nbNs > 0)
10547
xmlParserNsPop(ctxt, nbNs);
10548
return(-1);
10549
}
10550
10551
return(0);
10552
}
10553
10554
/**
10555
* xmlParseElementEnd:
10556
* @ctxt: an XML parser context
10557
*
10558
* Parse the end of an XML element. Always consumes '</'.
10559
*/
10560
static void
10561
xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
10562
xmlNodePtr cur = ctxt->node;
10563
10564
if (ctxt->nameNr <= 0) {
10565
if ((RAW == '<') && (NXT(1) == '/'))
10566
SKIP(2);
10567
return;
10568
}
10569
10570
/*
10571
* parse the end of tag: '</' should be here.
10572
*/
10573
if (ctxt->sax2) {
10574
xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
10575
namePop(ctxt);
10576
}
10577
#ifdef LIBXML_SAX1_ENABLED
10578
else
10579
xmlParseEndTag1(ctxt, 0);
10580
#endif /* LIBXML_SAX1_ENABLED */
10581
10582
/*
10583
* Capture end position
10584
*/
10585
if (cur != NULL && ctxt->record_info) {
10586
xmlParserNodeInfoPtr node_info;
10587
10588
node_info = (xmlParserNodeInfoPtr) xmlParserFindNodeInfo(ctxt, cur);
10589
if (node_info != NULL) {
10590
node_info->end_pos = ctxt->input->consumed +
10591
(CUR_PTR - ctxt->input->base);
10592
node_info->end_line = ctxt->input->line;
10593
}
10594
}
10595
}
10596
10597
/**
10598
* xmlParseVersionNum:
10599
* @ctxt: an XML parser context
10600
*
10601
* DEPRECATED: Internal function, don't use.
10602
*
10603
* parse the XML version value.
10604
*
10605
* [26] VersionNum ::= '1.' [0-9]+
10606
*
10607
* In practice allow [0-9].[0-9]+ at that level
10608
*
10609
* Returns the string giving the XML version number, or NULL
10610
*/
10611
xmlChar *
10612
xmlParseVersionNum(xmlParserCtxtPtr ctxt) {
10613
xmlChar *buf = NULL;
10614
int len = 0;
10615
int size = 10;
10616
xmlChar cur;
10617
10618
buf = (xmlChar *) xmlMallocAtomic(size);
10619
if (buf == NULL) {
10620
xmlErrMemory(ctxt, NULL);
10621
return(NULL);
10622
}
10623
cur = CUR;
10624
if (!((cur >= '0') && (cur <= '9'))) {
10625
xmlFree(buf);
10626
return(NULL);
10627
}
10628
buf[len++] = cur;
10629
NEXT;
10630
cur=CUR;
10631
if (cur != '.') {
10632
xmlFree(buf);
10633
return(NULL);
10634
}
10635
buf[len++] = cur;
10636
NEXT;
10637
cur=CUR;
10638
while ((cur >= '0') && (cur <= '9')) {
10639
if (len + 1 >= size) {
10640
xmlChar *tmp;
10641
10642
size *= 2;
10643
tmp = (xmlChar *) xmlRealloc(buf, size);
10644
if (tmp == NULL) {
10645
xmlFree(buf);
10646
xmlErrMemory(ctxt, NULL);
10647
return(NULL);
10648
}
10649
buf = tmp;
10650
}
10651
buf[len++] = cur;
10652
NEXT;
10653
cur=CUR;
10654
}
10655
buf[len] = 0;
10656
return(buf);
10657
}
10658
10659
/**
10660
* xmlParseVersionInfo:
10661
* @ctxt: an XML parser context
10662
*
10663
* DEPRECATED: Internal function, don't use.
10664
*
10665
* parse the XML version.
10666
*
10667
* [24] VersionInfo ::= S 'version' Eq (' VersionNum ' | " VersionNum ")
10668
*
10669
* [25] Eq ::= S? '=' S?
10670
*
10671
* Returns the version string, e.g. "1.0"
10672
*/
10673
10674
xmlChar *
10675
xmlParseVersionInfo(xmlParserCtxtPtr ctxt) {
10676
xmlChar *version = NULL;
10677
10678
if (CMP7(CUR_PTR, 'v', 'e', 'r', 's', 'i', 'o', 'n')) {
10679
SKIP(7);
10680
SKIP_BLANKS;
10681
if (RAW != '=') {
10682
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10683
return(NULL);
10684
}
10685
NEXT;
10686
SKIP_BLANKS;
10687
if (RAW == '"') {
10688
NEXT;
10689
version = xmlParseVersionNum(ctxt);
10690
if (RAW != '"') {
10691
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10692
} else
10693
NEXT;
10694
} else if (RAW == '\''){
10695
NEXT;
10696
version = xmlParseVersionNum(ctxt);
10697
if (RAW != '\'') {
10698
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10699
} else
10700
NEXT;
10701
} else {
10702
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10703
}
10704
}
10705
return(version);
10706
}
10707
10708
/**
10709
* xmlParseEncName:
10710
* @ctxt: an XML parser context
10711
*
10712
* DEPRECATED: Internal function, don't use.
10713
*
10714
* parse the XML encoding name
10715
*
10716
* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')*
10717
*
10718
* Returns the encoding name value or NULL
10719
*/
10720
xmlChar *
10721
xmlParseEncName(xmlParserCtxtPtr ctxt) {
10722
xmlChar *buf = NULL;
10723
int len = 0;
10724
int size = 10;
10725
int maxLength = (ctxt->options & XML_PARSE_HUGE) ?
10726
XML_MAX_TEXT_LENGTH :
10727
XML_MAX_NAME_LENGTH;
10728
xmlChar cur;
10729
10730
cur = CUR;
10731
if (((cur >= 'a') && (cur <= 'z')) ||
10732
((cur >= 'A') && (cur <= 'Z'))) {
10733
buf = (xmlChar *) xmlMallocAtomic(size);
10734
if (buf == NULL) {
10735
xmlErrMemory(ctxt, NULL);
10736
return(NULL);
10737
}
10738
10739
buf[len++] = cur;
10740
NEXT;
10741
cur = CUR;
10742
while (((cur >= 'a') && (cur <= 'z')) ||
10743
((cur >= 'A') && (cur <= 'Z')) ||
10744
((cur >= '0') && (cur <= '9')) ||
10745
(cur == '.') || (cur == '_') ||
10746
(cur == '-')) {
10747
if (len + 1 >= size) {
10748
xmlChar *tmp;
10749
10750
size *= 2;
10751
tmp = (xmlChar *) xmlRealloc(buf, size);
10752
if (tmp == NULL) {
10753
xmlErrMemory(ctxt, NULL);
10754
xmlFree(buf);
10755
return(NULL);
10756
}
10757
buf = tmp;
10758
}
10759
buf[len++] = cur;
10760
if (len > maxLength) {
10761
xmlFatalErr(ctxt, XML_ERR_NAME_TOO_LONG, "EncName");
10762
xmlFree(buf);
10763
return(NULL);
10764
}
10765
NEXT;
10766
cur = CUR;
10767
}
10768
buf[len] = 0;
10769
} else {
10770
xmlFatalErr(ctxt, XML_ERR_ENCODING_NAME, NULL);
10771
}
10772
return(buf);
10773
}
10774
10775
/**
10776
* xmlParseEncodingDecl:
10777
* @ctxt: an XML parser context
10778
*
10779
* DEPRECATED: Internal function, don't use.
10780
*
10781
* parse the XML encoding declaration
10782
*
10783
* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'")
10784
*
10785
* this setups the conversion filters.
10786
*
10787
* Returns the encoding value or NULL
10788
*/
10789
10790
const xmlChar *
10791
xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
10792
xmlChar *encoding = NULL;
10793
10794
SKIP_BLANKS;
10795
if (CMP8(CUR_PTR, 'e', 'n', 'c', 'o', 'd', 'i', 'n', 'g') == 0)
10796
return(NULL);
10797
10798
SKIP(8);
10799
SKIP_BLANKS;
10800
if (RAW != '=') {
10801
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10802
return(NULL);
10803
}
10804
NEXT;
10805
SKIP_BLANKS;
10806
if (RAW == '"') {
10807
NEXT;
10808
encoding = xmlParseEncName(ctxt);
10809
if (RAW != '"') {
10810
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10811
xmlFree((xmlChar *) encoding);
10812
return(NULL);
10813
} else
10814
NEXT;
10815
} else if (RAW == '\''){
10816
NEXT;
10817
encoding = xmlParseEncName(ctxt);
10818
if (RAW != '\'') {
10819
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10820
xmlFree((xmlChar *) encoding);
10821
return(NULL);
10822
} else
10823
NEXT;
10824
} else {
10825
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10826
}
10827
10828
if (encoding == NULL)
10829
return(NULL);
10830
10831
xmlSetDeclaredEncoding(ctxt, encoding);
10832
10833
return(ctxt->encoding);
10834
}
10835
10836
/**
10837
* xmlParseSDDecl:
10838
* @ctxt: an XML parser context
10839
*
10840
* DEPRECATED: Internal function, don't use.
10841
*
10842
* parse the XML standalone declaration
10843
*
10844
* [32] SDDecl ::= S 'standalone' Eq
10845
* (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no')'"'))
10846
*
10847
* [ VC: Standalone Document Declaration ]
10848
* TODO The standalone document declaration must have the value "no"
10849
* if any external markup declarations contain declarations of:
10850
* - attributes with default values, if elements to which these
10851
* attributes apply appear in the document without specifications
10852
* of values for these attributes, or
10853
* - entities (other than amp, lt, gt, apos, quot), if references
10854
* to those entities appear in the document, or
10855
* - attributes with values subject to normalization, where the
10856
* attribute appears in the document with a value which will change
10857
* as a result of normalization, or
10858
* - element types with element content, if white space occurs directly
10859
* within any instance of those types.
10860
*
10861
* Returns:
10862
* 1 if standalone="yes"
10863
* 0 if standalone="no"
10864
* -2 if standalone attribute is missing or invalid
10865
* (A standalone value of -2 means that the XML declaration was found,
10866
* but no value was specified for the standalone attribute).
10867
*/
10868
10869
int
10870
xmlParseSDDecl(xmlParserCtxtPtr ctxt) {
10871
int standalone = -2;
10872
10873
SKIP_BLANKS;
10874
if (CMP10(CUR_PTR, 's', 't', 'a', 'n', 'd', 'a', 'l', 'o', 'n', 'e')) {
10875
SKIP(10);
10876
SKIP_BLANKS;
10877
if (RAW != '=') {
10878
xmlFatalErr(ctxt, XML_ERR_EQUAL_REQUIRED, NULL);
10879
return(standalone);
10880
}
10881
NEXT;
10882
SKIP_BLANKS;
10883
if (RAW == '\''){
10884
NEXT;
10885
if ((RAW == 'n') && (NXT(1) == 'o')) {
10886
standalone = 0;
10887
SKIP(2);
10888
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
10889
(NXT(2) == 's')) {
10890
standalone = 1;
10891
SKIP(3);
10892
} else {
10893
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10894
}
10895
if (RAW != '\'') {
10896
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10897
} else
10898
NEXT;
10899
} else if (RAW == '"'){
10900
NEXT;
10901
if ((RAW == 'n') && (NXT(1) == 'o')) {
10902
standalone = 0;
10903
SKIP(2);
10904
} else if ((RAW == 'y') && (NXT(1) == 'e') &&
10905
(NXT(2) == 's')) {
10906
standalone = 1;
10907
SKIP(3);
10908
} else {
10909
xmlFatalErr(ctxt, XML_ERR_STANDALONE_VALUE, NULL);
10910
}
10911
if (RAW != '"') {
10912
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_CLOSED, NULL);
10913
} else
10914
NEXT;
10915
} else {
10916
xmlFatalErr(ctxt, XML_ERR_STRING_NOT_STARTED, NULL);
10917
}
10918
}
10919
return(standalone);
10920
}
10921
10922
/**
10923
* xmlParseXMLDecl:
10924
* @ctxt: an XML parser context
10925
*
10926
* DEPRECATED: Internal function, don't use.
10927
*
10928
* parse an XML declaration header
10929
*
10930
* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>'
10931
*/
10932
10933
void
10934
xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
10935
xmlChar *version;
10936
10937
/*
10938
* This value for standalone indicates that the document has an
10939
* XML declaration but it does not have a standalone attribute.
10940
* It will be overwritten later if a standalone attribute is found.
10941
*/
10942
10943
ctxt->standalone = -2;
10944
10945
/*
10946
* We know that '<?xml' is here.
10947
*/
10948
SKIP(5);
10949
10950
if (!IS_BLANK_CH(RAW)) {
10951
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED,
10952
"Blank needed after '<?xml'\n");
10953
}
10954
SKIP_BLANKS;
10955
10956
/*
10957
* We must have the VersionInfo here.
10958
*/
10959
version = xmlParseVersionInfo(ctxt);
10960
if (version == NULL) {
10961
xmlFatalErr(ctxt, XML_ERR_VERSION_MISSING, NULL);
10962
} else {
10963
if (!xmlStrEqual(version, (const xmlChar *) XML_DEFAULT_VERSION)) {
10964
/*
10965
* Changed here for XML-1.0 5th edition
10966
*/
10967
if (ctxt->options & XML_PARSE_OLD10) {
10968
xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10969
"Unsupported version '%s'\n",
10970
version);
10971
} else {
10972
if ((version[0] == '1') && ((version[1] == '.'))) {
10973
xmlWarningMsg(ctxt, XML_WAR_UNKNOWN_VERSION,
10974
"Unsupported version '%s'\n",
10975
version, NULL);
10976
} else {
10977
xmlFatalErrMsgStr(ctxt, XML_ERR_UNKNOWN_VERSION,
10978
"Unsupported version '%s'\n",
10979
version);
10980
}
10981
}
10982
}
10983
if (ctxt->version != NULL)
10984
xmlFree((void *) ctxt->version);
10985
ctxt->version = version;
10986
}
10987
10988
/*
10989
* We may have the encoding declaration
10990
*/
10991
if (!IS_BLANK_CH(RAW)) {
10992
if ((RAW == '?') && (NXT(1) == '>')) {
10993
SKIP(2);
10994
return;
10995
}
10996
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
10997
}
10998
xmlParseEncodingDecl(ctxt);
10999
if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
11000
(ctxt->instate == XML_PARSER_EOF)) {
11001
/*
11002
* The XML REC instructs us to stop parsing right here
11003
*/
11004
return;
11005
}
11006
11007
/*
11008
* We may have the standalone status.
11009
*/
11010
if ((ctxt->encoding != NULL) && (!IS_BLANK_CH(RAW))) {
11011
if ((RAW == '?') && (NXT(1) == '>')) {
11012
SKIP(2);
11013
return;
11014
}
11015
xmlFatalErrMsg(ctxt, XML_ERR_SPACE_REQUIRED, "Blank needed here\n");
11016
}
11017
11018
/*
11019
* We can grow the input buffer freely at that point
11020
*/
11021
GROW;
11022
11023
SKIP_BLANKS;
11024
ctxt->standalone = xmlParseSDDecl(ctxt);
11025
11026
SKIP_BLANKS;
11027
if ((RAW == '?') && (NXT(1) == '>')) {
11028
SKIP(2);
11029
} else if (RAW == '>') {
11030
/* Deprecated old WD ... */
11031
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
11032
NEXT;
11033
} else {
11034
int c;
11035
11036
xmlFatalErr(ctxt, XML_ERR_XMLDECL_NOT_FINISHED, NULL);
11037
while ((c = CUR) != 0) {
11038
NEXT;
11039
if (c == '>')
11040
break;
11041
}
11042
}
11043
}
11044
11045
/**
11046
* xmlParseMisc:
11047
* @ctxt: an XML parser context
11048
*
11049
* DEPRECATED: Internal function, don't use.
11050
*
11051
* parse an XML Misc* optional field.
11052
*
11053
* [27] Misc ::= Comment | PI | S
11054
*/
11055
11056
void
11057
xmlParseMisc(xmlParserCtxtPtr ctxt) {
11058
while (ctxt->instate != XML_PARSER_EOF) {
11059
SKIP_BLANKS;
11060
GROW;
11061
if ((RAW == '<') && (NXT(1) == '?')) {
11062
xmlParsePI(ctxt);
11063
} else if (CMP4(CUR_PTR, '<', '!', '-', '-')) {
11064
xmlParseComment(ctxt);
11065
} else {
11066
break;
11067
}
11068
}
11069
}
11070
11071
/**
11072
* xmlParseDocument:
11073
* @ctxt: an XML parser context
11074
*
11075
* parse an XML document (and build a tree if using the standard SAX
11076
* interface).
11077
*
11078
* [1] document ::= prolog element Misc*
11079
*
11080
* [22] prolog ::= XMLDecl? Misc* (doctypedecl Misc*)?
11081
*
11082
* Returns 0, -1 in case of error. the parser context is augmented
11083
* as a result of the parsing.
11084
*/
11085
11086
int
11087
xmlParseDocument(xmlParserCtxtPtr ctxt) {
11088
xmlInitParser();
11089
11090
if ((ctxt == NULL) || (ctxt->input == NULL))
11091
return(-1);
11092
11093
GROW;
11094
11095
/*
11096
* SAX: detecting the level.
11097
*/
11098
xmlDetectSAX2(ctxt);
11099
11100
/*
11101
* SAX: beginning of the document processing.
11102
*/
11103
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11104
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11105
if (ctxt->instate == XML_PARSER_EOF)
11106
return(-1);
11107
11108
xmlDetectEncoding(ctxt);
11109
11110
if (CUR == 0) {
11111
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11112
return(-1);
11113
}
11114
11115
GROW;
11116
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11117
11118
/*
11119
* Note that we will switch encoding on the fly.
11120
*/
11121
xmlParseXMLDecl(ctxt);
11122
if ((ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) ||
11123
(ctxt->instate == XML_PARSER_EOF)) {
11124
/*
11125
* The XML REC instructs us to stop parsing right here
11126
*/
11127
return(-1);
11128
}
11129
SKIP_BLANKS;
11130
} else {
11131
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11132
}
11133
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11134
ctxt->sax->startDocument(ctxt->userData);
11135
if (ctxt->instate == XML_PARSER_EOF)
11136
return(-1);
11137
if ((ctxt->myDoc != NULL) && (ctxt->input != NULL) &&
11138
(ctxt->input->buf != NULL) && (ctxt->input->buf->compressed >= 0)) {
11139
ctxt->myDoc->compression = ctxt->input->buf->compressed;
11140
}
11141
11142
/*
11143
* The Misc part of the Prolog
11144
*/
11145
xmlParseMisc(ctxt);
11146
11147
/*
11148
* Then possibly doc type declaration(s) and more Misc
11149
* (doctypedecl Misc*)?
11150
*/
11151
GROW;
11152
if (CMP9(CUR_PTR, '<', '!', 'D', 'O', 'C', 'T', 'Y', 'P', 'E')) {
11153
11154
ctxt->inSubset = 1;
11155
xmlParseDocTypeDecl(ctxt);
11156
if (RAW == '[') {
11157
ctxt->instate = XML_PARSER_DTD;
11158
xmlParseInternalSubset(ctxt);
11159
if (ctxt->instate == XML_PARSER_EOF)
11160
return(-1);
11161
}
11162
11163
/*
11164
* Create and update the external subset.
11165
*/
11166
ctxt->inSubset = 2;
11167
if ((ctxt->sax != NULL) && (ctxt->sax->externalSubset != NULL) &&
11168
(!ctxt->disableSAX))
11169
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
11170
ctxt->extSubSystem, ctxt->extSubURI);
11171
if (ctxt->instate == XML_PARSER_EOF)
11172
return(-1);
11173
ctxt->inSubset = 0;
11174
11175
xmlCleanSpecialAttr(ctxt);
11176
11177
ctxt->instate = XML_PARSER_PROLOG;
11178
xmlParseMisc(ctxt);
11179
}
11180
11181
/*
11182
* Time to start parsing the tree itself
11183
*/
11184
GROW;
11185
if (RAW != '<') {
11186
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11187
"Start tag expected, '<' not found\n");
11188
} else {
11189
ctxt->instate = XML_PARSER_CONTENT;
11190
xmlParseElement(ctxt);
11191
ctxt->instate = XML_PARSER_EPILOG;
11192
11193
11194
/*
11195
* The Misc part at the end
11196
*/
11197
xmlParseMisc(ctxt);
11198
11199
if (ctxt->input->cur < ctxt->input->end) {
11200
if (ctxt->errNo == XML_ERR_OK)
11201
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
11202
} else if ((ctxt->input->buf != NULL) &&
11203
(ctxt->input->buf->encoder != NULL) &&
11204
(!xmlBufIsEmpty(ctxt->input->buf->raw))) {
11205
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
11206
"Truncated multi-byte sequence at EOF\n");
11207
}
11208
ctxt->instate = XML_PARSER_EOF;
11209
}
11210
11211
/*
11212
* SAX: end of the document processing.
11213
*/
11214
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11215
ctxt->sax->endDocument(ctxt->userData);
11216
11217
/*
11218
* Remove locally kept entity definitions if the tree was not built
11219
*/
11220
if ((ctxt->myDoc != NULL) &&
11221
(xmlStrEqual(ctxt->myDoc->version, SAX_COMPAT_MODE))) {
11222
xmlFreeDoc(ctxt->myDoc);
11223
ctxt->myDoc = NULL;
11224
}
11225
11226
if ((ctxt->wellFormed) && (ctxt->myDoc != NULL)) {
11227
ctxt->myDoc->properties |= XML_DOC_WELLFORMED;
11228
if (ctxt->valid)
11229
ctxt->myDoc->properties |= XML_DOC_DTDVALID;
11230
if (ctxt->nsWellFormed)
11231
ctxt->myDoc->properties |= XML_DOC_NSVALID;
11232
if (ctxt->options & XML_PARSE_OLD10)
11233
ctxt->myDoc->properties |= XML_DOC_OLD10;
11234
}
11235
if (! ctxt->wellFormed) {
11236
ctxt->valid = 0;
11237
return(-1);
11238
}
11239
return(0);
11240
}
11241
11242
/**
11243
* xmlParseExtParsedEnt:
11244
* @ctxt: an XML parser context
11245
*
11246
* parse a general parsed entity
11247
* An external general parsed entity is well-formed if it matches the
11248
* production labeled extParsedEnt.
11249
*
11250
* [78] extParsedEnt ::= TextDecl? content
11251
*
11252
* Returns 0, -1 in case of error. the parser context is augmented
11253
* as a result of the parsing.
11254
*/
11255
11256
int
11257
xmlParseExtParsedEnt(xmlParserCtxtPtr ctxt) {
11258
if ((ctxt == NULL) || (ctxt->input == NULL))
11259
return(-1);
11260
11261
xmlDetectSAX2(ctxt);
11262
11263
/*
11264
* SAX: beginning of the document processing.
11265
*/
11266
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11267
ctxt->sax->setDocumentLocator(ctxt->userData, &xmlDefaultSAXLocator);
11268
11269
xmlDetectEncoding(ctxt);
11270
11271
if (CUR == 0) {
11272
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
11273
}
11274
11275
/*
11276
* Check for the XMLDecl in the Prolog.
11277
*/
11278
GROW;
11279
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
11280
11281
/*
11282
* Note that we will switch encoding on the fly.
11283
*/
11284
xmlParseXMLDecl(ctxt);
11285
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11286
/*
11287
* The XML REC instructs us to stop parsing right here
11288
*/
11289
return(-1);
11290
}
11291
SKIP_BLANKS;
11292
} else {
11293
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11294
}
11295
if ((ctxt->sax) && (ctxt->sax->startDocument) && (!ctxt->disableSAX))
11296
ctxt->sax->startDocument(ctxt->userData);
11297
if (ctxt->instate == XML_PARSER_EOF)
11298
return(-1);
11299
11300
/*
11301
* Doing validity checking on chunk doesn't make sense
11302
*/
11303
ctxt->instate = XML_PARSER_CONTENT;
11304
ctxt->validate = 0;
11305
ctxt->loadsubset = 0;
11306
ctxt->depth = 0;
11307
11308
xmlParseContent(ctxt);
11309
if (ctxt->instate == XML_PARSER_EOF)
11310
return(-1);
11311
11312
if ((RAW == '<') && (NXT(1) == '/')) {
11313
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
11314
} else if (RAW != 0) {
11315
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
11316
}
11317
11318
/*
11319
* SAX: end of the document processing.
11320
*/
11321
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11322
ctxt->sax->endDocument(ctxt->userData);
11323
11324
if (! ctxt->wellFormed) return(-1);
11325
return(0);
11326
}
11327
11328
#ifdef LIBXML_PUSH_ENABLED
11329
/************************************************************************
11330
* *
11331
* Progressive parsing interfaces *
11332
* *
11333
************************************************************************/
11334
11335
/**
11336
* xmlParseLookupChar:
11337
* @ctxt: an XML parser context
11338
* @c: character
11339
*
11340
* Check whether the input buffer contains a character.
11341
*/
11342
static int
11343
xmlParseLookupChar(xmlParserCtxtPtr ctxt, int c) {
11344
const xmlChar *cur;
11345
11346
if (ctxt->checkIndex == 0) {
11347
cur = ctxt->input->cur + 1;
11348
} else {
11349
cur = ctxt->input->cur + ctxt->checkIndex;
11350
}
11351
11352
if (memchr(cur, c, ctxt->input->end - cur) == NULL) {
11353
size_t index = ctxt->input->end - ctxt->input->cur;
11354
11355
if (index > LONG_MAX) {
11356
ctxt->checkIndex = 0;
11357
return(1);
11358
}
11359
ctxt->checkIndex = index;
11360
return(0);
11361
} else {
11362
ctxt->checkIndex = 0;
11363
return(1);
11364
}
11365
}
11366
11367
/**
11368
* xmlParseLookupString:
11369
* @ctxt: an XML parser context
11370
* @startDelta: delta to apply at the start
11371
* @str: string
11372
* @strLen: length of string
11373
*
11374
* Check whether the input buffer contains a string.
11375
*/
11376
static const xmlChar *
11377
xmlParseLookupString(xmlParserCtxtPtr ctxt, size_t startDelta,
11378
const char *str, size_t strLen) {
11379
const xmlChar *cur, *term;
11380
11381
if (ctxt->checkIndex == 0) {
11382
cur = ctxt->input->cur + startDelta;
11383
} else {
11384
cur = ctxt->input->cur + ctxt->checkIndex;
11385
}
11386
11387
term = BAD_CAST strstr((const char *) cur, str);
11388
if (term == NULL) {
11389
const xmlChar *end = ctxt->input->end;
11390
size_t index;
11391
11392
/* Rescan (strLen - 1) characters. */
11393
if ((size_t) (end - cur) < strLen)
11394
end = cur;
11395
else
11396
end -= strLen - 1;
11397
index = end - ctxt->input->cur;
11398
if (index > LONG_MAX) {
11399
ctxt->checkIndex = 0;
11400
return(ctxt->input->end - strLen);
11401
}
11402
ctxt->checkIndex = index;
11403
} else {
11404
ctxt->checkIndex = 0;
11405
}
11406
11407
return(term);
11408
}
11409
11410
/**
11411
* xmlParseLookupCharData:
11412
* @ctxt: an XML parser context
11413
*
11414
* Check whether the input buffer contains terminated char data.
11415
*/
11416
static int
11417
xmlParseLookupCharData(xmlParserCtxtPtr ctxt) {
11418
const xmlChar *cur = ctxt->input->cur + ctxt->checkIndex;
11419
const xmlChar *end = ctxt->input->end;
11420
size_t index;
11421
11422
while (cur < end) {
11423
if ((*cur == '<') || (*cur == '&')) {
11424
ctxt->checkIndex = 0;
11425
return(1);
11426
}
11427
cur++;
11428
}
11429
11430
index = cur - ctxt->input->cur;
11431
if (index > LONG_MAX) {
11432
ctxt->checkIndex = 0;
11433
return(1);
11434
}
11435
ctxt->checkIndex = index;
11436
return(0);
11437
}
11438
11439
/**
11440
* xmlParseLookupGt:
11441
* @ctxt: an XML parser context
11442
*
11443
* Check whether there's enough data in the input buffer to finish parsing
11444
* a start tag. This has to take quotes into account.
11445
*/
11446
static int
11447
xmlParseLookupGt(xmlParserCtxtPtr ctxt) {
11448
const xmlChar *cur;
11449
const xmlChar *end = ctxt->input->end;
11450
int state = ctxt->endCheckState;
11451
size_t index;
11452
11453
if (ctxt->checkIndex == 0)
11454
cur = ctxt->input->cur + 1;
11455
else
11456
cur = ctxt->input->cur + ctxt->checkIndex;
11457
11458
while (cur < end) {
11459
if (state) {
11460
if (*cur == state)
11461
state = 0;
11462
} else if (*cur == '\'' || *cur == '"') {
11463
state = *cur;
11464
} else if (*cur == '>') {
11465
ctxt->checkIndex = 0;
11466
ctxt->endCheckState = 0;
11467
return(1);
11468
}
11469
cur++;
11470
}
11471
11472
index = cur - ctxt->input->cur;
11473
if (index > LONG_MAX) {
11474
ctxt->checkIndex = 0;
11475
ctxt->endCheckState = 0;
11476
return(1);
11477
}
11478
ctxt->checkIndex = index;
11479
ctxt->endCheckState = state;
11480
return(0);
11481
}
11482
11483
/**
11484
* xmlParseLookupInternalSubset:
11485
* @ctxt: an XML parser context
11486
*
11487
* Check whether there's enough data in the input buffer to finish parsing
11488
* the internal subset.
11489
*/
11490
static int
11491
xmlParseLookupInternalSubset(xmlParserCtxtPtr ctxt) {
11492
/*
11493
* Sorry, but progressive parsing of the internal subset is not
11494
* supported. We first check that the full content of the internal
11495
* subset is available and parsing is launched only at that point.
11496
* Internal subset ends with "']' S? '>'" in an unescaped section and
11497
* not in a ']]>' sequence which are conditional sections.
11498
*/
11499
const xmlChar *cur, *start;
11500
const xmlChar *end = ctxt->input->end;
11501
int state = ctxt->endCheckState;
11502
size_t index;
11503
11504
if (ctxt->checkIndex == 0) {
11505
cur = ctxt->input->cur + 1;
11506
} else {
11507
cur = ctxt->input->cur + ctxt->checkIndex;
11508
}
11509
start = cur;
11510
11511
while (cur < end) {
11512
if (state == '-') {
11513
if ((*cur == '-') &&
11514
(cur[1] == '-') &&
11515
(cur[2] == '>')) {
11516
state = 0;
11517
cur += 3;
11518
start = cur;
11519
continue;
11520
}
11521
}
11522
else if (state == ']') {
11523
if (*cur == '>') {
11524
ctxt->checkIndex = 0;
11525
ctxt->endCheckState = 0;
11526
return(1);
11527
}
11528
if (IS_BLANK_CH(*cur)) {
11529
state = ' ';
11530
} else if (*cur != ']') {
11531
state = 0;
11532
start = cur;
11533
continue;
11534
}
11535
}
11536
else if (state == ' ') {
11537
if (*cur == '>') {
11538
ctxt->checkIndex = 0;
11539
ctxt->endCheckState = 0;
11540
return(1);
11541
}
11542
if (!IS_BLANK_CH(*cur)) {
11543
state = 0;
11544
start = cur;
11545
continue;
11546
}
11547
}
11548
else if (state != 0) {
11549
if (*cur == state) {
11550
state = 0;
11551
start = cur + 1;
11552
}
11553
}
11554
else if (*cur == '<') {
11555
if ((cur[1] == '!') &&
11556
(cur[2] == '-') &&
11557
(cur[3] == '-')) {
11558
state = '-';
11559
cur += 4;
11560
/* Don't treat <!--> as comment */
11561
start = cur;
11562
continue;
11563
}
11564
}
11565
else if ((*cur == '"') || (*cur == '\'') || (*cur == ']')) {
11566
state = *cur;
11567
}
11568
11569
cur++;
11570
}
11571
11572
/*
11573
* Rescan the three last characters to detect "<!--" and "-->"
11574
* split across chunks.
11575
*/
11576
if ((state == 0) || (state == '-')) {
11577
if (cur - start < 3)
11578
cur = start;
11579
else
11580
cur -= 3;
11581
}
11582
index = cur - ctxt->input->cur;
11583
if (index > LONG_MAX) {
11584
ctxt->checkIndex = 0;
11585
ctxt->endCheckState = 0;
11586
return(1);
11587
}
11588
ctxt->checkIndex = index;
11589
ctxt->endCheckState = state;
11590
return(0);
11591
}
11592
11593
/**
11594
* xmlCheckCdataPush:
11595
* @cur: pointer to the block of characters
11596
* @len: length of the block in bytes
11597
* @complete: 1 if complete CDATA block is passed in, 0 if partial block
11598
*
11599
* Check that the block of characters is okay as SCdata content [20]
11600
*
11601
* Returns the number of bytes to pass if okay, a negative index where an
11602
* UTF-8 error occurred otherwise
11603
*/
11604
static int
11605
xmlCheckCdataPush(const xmlChar *utf, int len, int complete) {
11606
int ix;
11607
unsigned char c;
11608
int codepoint;
11609
11610
if ((utf == NULL) || (len <= 0))
11611
return(0);
11612
11613
for (ix = 0; ix < len;) { /* string is 0-terminated */
11614
c = utf[ix];
11615
if ((c & 0x80) == 0x00) { /* 1-byte code, starts with 10 */
11616
if (c >= 0x20)
11617
ix++;
11618
else if ((c == 0xA) || (c == 0xD) || (c == 0x9))
11619
ix++;
11620
else
11621
return(-ix);
11622
} else if ((c & 0xe0) == 0xc0) {/* 2-byte code, starts with 110 */
11623
if (ix + 2 > len) return(complete ? -ix : ix);
11624
if ((utf[ix+1] & 0xc0 ) != 0x80)
11625
return(-ix);
11626
codepoint = (utf[ix] & 0x1f) << 6;
11627
codepoint |= utf[ix+1] & 0x3f;
11628
if (!xmlIsCharQ(codepoint))
11629
return(-ix);
11630
ix += 2;
11631
} else if ((c & 0xf0) == 0xe0) {/* 3-byte code, starts with 1110 */
11632
if (ix + 3 > len) return(complete ? -ix : ix);
11633
if (((utf[ix+1] & 0xc0) != 0x80) ||
11634
((utf[ix+2] & 0xc0) != 0x80))
11635
return(-ix);
11636
codepoint = (utf[ix] & 0xf) << 12;
11637
codepoint |= (utf[ix+1] & 0x3f) << 6;
11638
codepoint |= utf[ix+2] & 0x3f;
11639
if (!xmlIsCharQ(codepoint))
11640
return(-ix);
11641
ix += 3;
11642
} else if ((c & 0xf8) == 0xf0) {/* 4-byte code, starts with 11110 */
11643
if (ix + 4 > len) return(complete ? -ix : ix);
11644
if (((utf[ix+1] & 0xc0) != 0x80) ||
11645
((utf[ix+2] & 0xc0) != 0x80) ||
11646
((utf[ix+3] & 0xc0) != 0x80))
11647
return(-ix);
11648
codepoint = (utf[ix] & 0x7) << 18;
11649
codepoint |= (utf[ix+1] & 0x3f) << 12;
11650
codepoint |= (utf[ix+2] & 0x3f) << 6;
11651
codepoint |= utf[ix+3] & 0x3f;
11652
if (!xmlIsCharQ(codepoint))
11653
return(-ix);
11654
ix += 4;
11655
} else /* unknown encoding */
11656
return(-ix);
11657
}
11658
return(ix);
11659
}
11660
11661
/**
11662
* xmlParseTryOrFinish:
11663
* @ctxt: an XML parser context
11664
* @terminate: last chunk indicator
11665
*
11666
* Try to progress on parsing
11667
*
11668
* Returns zero if no parsing was possible
11669
*/
11670
static int
11671
xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
11672
int ret = 0;
11673
size_t avail;
11674
xmlChar cur, next;
11675
11676
if (ctxt->input == NULL)
11677
return(0);
11678
11679
if ((ctxt->input != NULL) &&
11680
(ctxt->input->cur - ctxt->input->base > 4096)) {
11681
xmlParserShrink(ctxt);
11682
}
11683
11684
while (ctxt->instate != XML_PARSER_EOF) {
11685
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
11686
return(0);
11687
11688
avail = ctxt->input->end - ctxt->input->cur;
11689
if (avail < 1)
11690
goto done;
11691
switch (ctxt->instate) {
11692
case XML_PARSER_EOF:
11693
/*
11694
* Document parsing is done !
11695
*/
11696
goto done;
11697
case XML_PARSER_START:
11698
/*
11699
* Very first chars read from the document flow.
11700
*/
11701
if ((!terminate) && (avail < 4))
11702
goto done;
11703
11704
/*
11705
* We need more bytes to detect EBCDIC code pages.
11706
* See xmlDetectEBCDIC.
11707
*/
11708
if ((CMP4(CUR_PTR, 0x4C, 0x6F, 0xA7, 0x94)) &&
11709
(!terminate) && (avail < 200))
11710
goto done;
11711
11712
xmlDetectEncoding(ctxt);
11713
if (ctxt->instate == XML_PARSER_EOF)
11714
goto done;
11715
ctxt->instate = XML_PARSER_XML_DECL;
11716
break;
11717
11718
case XML_PARSER_XML_DECL:
11719
if ((!terminate) && (avail < 2))
11720
goto done;
11721
cur = ctxt->input->cur[0];
11722
next = ctxt->input->cur[1];
11723
if ((cur == '<') && (next == '?')) {
11724
/* PI or XML decl */
11725
if ((!terminate) &&
11726
(!xmlParseLookupString(ctxt, 2, "?>", 2)))
11727
goto done;
11728
if ((ctxt->input->cur[2] == 'x') &&
11729
(ctxt->input->cur[3] == 'm') &&
11730
(ctxt->input->cur[4] == 'l') &&
11731
(IS_BLANK_CH(ctxt->input->cur[5]))) {
11732
ret += 5;
11733
xmlParseXMLDecl(ctxt);
11734
if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
11735
/*
11736
* The XML REC instructs us to stop parsing right
11737
* here
11738
*/
11739
xmlHaltParser(ctxt);
11740
return(0);
11741
}
11742
} else {
11743
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11744
}
11745
} else {
11746
ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
11747
if (ctxt->version == NULL) {
11748
xmlErrMemory(ctxt, NULL);
11749
break;
11750
}
11751
}
11752
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator))
11753
ctxt->sax->setDocumentLocator(ctxt->userData,
11754
&xmlDefaultSAXLocator);
11755
if ((ctxt->sax) && (ctxt->sax->startDocument) &&
11756
(!ctxt->disableSAX))
11757
ctxt->sax->startDocument(ctxt->userData);
11758
if (ctxt->instate == XML_PARSER_EOF)
11759
goto done;
11760
ctxt->instate = XML_PARSER_MISC;
11761
break;
11762
case XML_PARSER_START_TAG: {
11763
const xmlChar *name;
11764
const xmlChar *prefix = NULL;
11765
const xmlChar *URI = NULL;
11766
int line = ctxt->input->line;
11767
int nbNs = 0;
11768
11769
if ((!terminate) && (avail < 2))
11770
goto done;
11771
cur = ctxt->input->cur[0];
11772
if (cur != '<') {
11773
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
11774
"Start tag expected, '<' not found");
11775
xmlHaltParser(ctxt);
11776
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11777
ctxt->sax->endDocument(ctxt->userData);
11778
goto done;
11779
}
11780
if ((!terminate) && (!xmlParseLookupGt(ctxt)))
11781
goto done;
11782
if (ctxt->spaceNr == 0)
11783
spacePush(ctxt, -1);
11784
else if (*ctxt->space == -2)
11785
spacePush(ctxt, -1);
11786
else
11787
spacePush(ctxt, *ctxt->space);
11788
#ifdef LIBXML_SAX1_ENABLED
11789
if (ctxt->sax2)
11790
#endif /* LIBXML_SAX1_ENABLED */
11791
name = xmlParseStartTag2(ctxt, &prefix, &URI, &nbNs);
11792
#ifdef LIBXML_SAX1_ENABLED
11793
else
11794
name = xmlParseStartTag(ctxt);
11795
#endif /* LIBXML_SAX1_ENABLED */
11796
if (ctxt->instate == XML_PARSER_EOF)
11797
goto done;
11798
if (name == NULL) {
11799
spacePop(ctxt);
11800
xmlHaltParser(ctxt);
11801
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
11802
ctxt->sax->endDocument(ctxt->userData);
11803
goto done;
11804
}
11805
#ifdef LIBXML_VALID_ENABLED
11806
/*
11807
* [ VC: Root Element Type ]
11808
* The Name in the document type declaration must match
11809
* the element type of the root element.
11810
*/
11811
if (ctxt->validate && ctxt->wellFormed && ctxt->myDoc &&
11812
ctxt->node && (ctxt->node == ctxt->myDoc->children))
11813
ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
11814
#endif /* LIBXML_VALID_ENABLED */
11815
11816
/*
11817
* Check for an Empty Element.
11818
*/
11819
if ((RAW == '/') && (NXT(1) == '>')) {
11820
SKIP(2);
11821
11822
if (ctxt->sax2) {
11823
if ((ctxt->sax != NULL) &&
11824
(ctxt->sax->endElementNs != NULL) &&
11825
(!ctxt->disableSAX))
11826
ctxt->sax->endElementNs(ctxt->userData, name,
11827
prefix, URI);
11828
if (nbNs > 0)
11829
xmlParserNsPop(ctxt, nbNs);
11830
#ifdef LIBXML_SAX1_ENABLED
11831
} else {
11832
if ((ctxt->sax != NULL) &&
11833
(ctxt->sax->endElement != NULL) &&
11834
(!ctxt->disableSAX))
11835
ctxt->sax->endElement(ctxt->userData, name);
11836
#endif /* LIBXML_SAX1_ENABLED */
11837
}
11838
spacePop(ctxt);
11839
} else if (RAW == '>') {
11840
NEXT;
11841
nameNsPush(ctxt, name, prefix, URI, line, nbNs);
11842
} else {
11843
xmlFatalErrMsgStr(ctxt, XML_ERR_GT_REQUIRED,
11844
"Couldn't find end of Start Tag %s\n",
11845
name);
11846
nodePop(ctxt);
11847
spacePop(ctxt);
11848
if (nbNs > 0)
11849
xmlParserNsPop(ctxt, nbNs);
11850
}
11851
11852
if (ctxt->instate == XML_PARSER_EOF)
11853
goto done;
11854
if (ctxt->nameNr == 0)
11855
ctxt->instate = XML_PARSER_EPILOG;
11856
else
11857
ctxt->instate = XML_PARSER_CONTENT;
11858
break;
11859
}
11860
case XML_PARSER_CONTENT: {
11861
cur = ctxt->input->cur[0];
11862
11863
if (cur == '<') {
11864
if ((!terminate) && (avail < 2))
11865
goto done;
11866
next = ctxt->input->cur[1];
11867
11868
if (next == '/') {
11869
ctxt->instate = XML_PARSER_END_TAG;
11870
break;
11871
} else if (next == '?') {
11872
if ((!terminate) &&
11873
(!xmlParseLookupString(ctxt, 2, "?>", 2)))
11874
goto done;
11875
xmlParsePI(ctxt);
11876
if (ctxt->instate == XML_PARSER_EOF)
11877
goto done;
11878
ctxt->instate = XML_PARSER_CONTENT;
11879
break;
11880
} else if (next == '!') {
11881
if ((!terminate) && (avail < 3))
11882
goto done;
11883
next = ctxt->input->cur[2];
11884
11885
if (next == '-') {
11886
if ((!terminate) && (avail < 4))
11887
goto done;
11888
if (ctxt->input->cur[3] == '-') {
11889
if ((!terminate) &&
11890
(!xmlParseLookupString(ctxt, 4, "-->", 3)))
11891
goto done;
11892
xmlParseComment(ctxt);
11893
if (ctxt->instate == XML_PARSER_EOF)
11894
goto done;
11895
ctxt->instate = XML_PARSER_CONTENT;
11896
break;
11897
}
11898
} else if (next == '[') {
11899
if ((!terminate) && (avail < 9))
11900
goto done;
11901
if ((ctxt->input->cur[2] == '[') &&
11902
(ctxt->input->cur[3] == 'C') &&
11903
(ctxt->input->cur[4] == 'D') &&
11904
(ctxt->input->cur[5] == 'A') &&
11905
(ctxt->input->cur[6] == 'T') &&
11906
(ctxt->input->cur[7] == 'A') &&
11907
(ctxt->input->cur[8] == '[')) {
11908
SKIP(9);
11909
ctxt->instate = XML_PARSER_CDATA_SECTION;
11910
break;
11911
}
11912
}
11913
}
11914
} else if (cur == '&') {
11915
if ((!terminate) && (!xmlParseLookupChar(ctxt, ';')))
11916
goto done;
11917
xmlParseReference(ctxt);
11918
break;
11919
} else {
11920
/* TODO Avoid the extra copy, handle directly !!! */
11921
/*
11922
* Goal of the following test is:
11923
* - minimize calls to the SAX 'character' callback
11924
* when they are mergeable
11925
* - handle an problem for isBlank when we only parse
11926
* a sequence of blank chars and the next one is
11927
* not available to check against '<' presence.
11928
* - tries to homogenize the differences in SAX
11929
* callbacks between the push and pull versions
11930
* of the parser.
11931
*/
11932
if (avail < XML_PARSER_BIG_BUFFER_SIZE) {
11933
if ((!terminate) && (!xmlParseLookupCharData(ctxt)))
11934
goto done;
11935
}
11936
ctxt->checkIndex = 0;
11937
xmlParseCharDataInternal(ctxt, !terminate);
11938
break;
11939
}
11940
11941
ctxt->instate = XML_PARSER_START_TAG;
11942
break;
11943
}
11944
case XML_PARSER_END_TAG:
11945
if ((!terminate) && (!xmlParseLookupChar(ctxt, '>')))
11946
goto done;
11947
if (ctxt->sax2) {
11948
xmlParseEndTag2(ctxt, &ctxt->pushTab[ctxt->nameNr - 1]);
11949
nameNsPop(ctxt);
11950
}
11951
#ifdef LIBXML_SAX1_ENABLED
11952
else
11953
xmlParseEndTag1(ctxt, 0);
11954
#endif /* LIBXML_SAX1_ENABLED */
11955
if (ctxt->instate == XML_PARSER_EOF)
11956
goto done;
11957
if (ctxt->nameNr == 0) {
11958
ctxt->instate = XML_PARSER_EPILOG;
11959
} else {
11960
ctxt->instate = XML_PARSER_CONTENT;
11961
}
11962
break;
11963
case XML_PARSER_CDATA_SECTION: {
11964
/*
11965
* The Push mode need to have the SAX callback for
11966
* cdataBlock merge back contiguous callbacks.
11967
*/
11968
const xmlChar *term;
11969
11970
if (terminate) {
11971
/*
11972
* Don't call xmlParseLookupString. If 'terminate'
11973
* is set, checkIndex is invalid.
11974
*/
11975
term = BAD_CAST strstr((const char *) ctxt->input->cur,
11976
"]]>");
11977
} else {
11978
term = xmlParseLookupString(ctxt, 0, "]]>", 3);
11979
}
11980
11981
if (term == NULL) {
11982
int tmp, size;
11983
11984
if (terminate) {
11985
/* Unfinished CDATA section */
11986
size = ctxt->input->end - ctxt->input->cur;
11987
} else {
11988
if (avail < XML_PARSER_BIG_BUFFER_SIZE + 2)
11989
goto done;
11990
ctxt->checkIndex = 0;
11991
/* XXX: Why don't we pass the full buffer? */
11992
size = XML_PARSER_BIG_BUFFER_SIZE;
11993
}
11994
tmp = xmlCheckCdataPush(ctxt->input->cur, size, 0);
11995
if (tmp <= 0) {
11996
tmp = -tmp;
11997
ctxt->input->cur += tmp;
11998
goto encoding_error;
11999
}
12000
if ((ctxt->sax != NULL) && (!ctxt->disableSAX)) {
12001
if (ctxt->sax->cdataBlock != NULL)
12002
ctxt->sax->cdataBlock(ctxt->userData,
12003
ctxt->input->cur, tmp);
12004
else if (ctxt->sax->characters != NULL)
12005
ctxt->sax->characters(ctxt->userData,
12006
ctxt->input->cur, tmp);
12007
}
12008
if (ctxt->instate == XML_PARSER_EOF)
12009
goto done;
12010
SKIPL(tmp);
12011
} else {
12012
int base = term - CUR_PTR;
12013
int tmp;
12014
12015
tmp = xmlCheckCdataPush(ctxt->input->cur, base, 1);
12016
if ((tmp < 0) || (tmp != base)) {
12017
tmp = -tmp;
12018
ctxt->input->cur += tmp;
12019
goto encoding_error;
12020
}
12021
if ((ctxt->sax != NULL) && (base == 0) &&
12022
(ctxt->sax->cdataBlock != NULL) &&
12023
(!ctxt->disableSAX)) {
12024
/*
12025
* Special case to provide identical behaviour
12026
* between pull and push parsers on enpty CDATA
12027
* sections
12028
*/
12029
if ((ctxt->input->cur - ctxt->input->base >= 9) &&
12030
(!strncmp((const char *)&ctxt->input->cur[-9],
12031
"<![CDATA[", 9)))
12032
ctxt->sax->cdataBlock(ctxt->userData,
12033
BAD_CAST "", 0);
12034
} else if ((ctxt->sax != NULL) && (base > 0) &&
12035
(!ctxt->disableSAX)) {
12036
if (ctxt->sax->cdataBlock != NULL)
12037
ctxt->sax->cdataBlock(ctxt->userData,
12038
ctxt->input->cur, base);
12039
else if (ctxt->sax->characters != NULL)
12040
ctxt->sax->characters(ctxt->userData,
12041
ctxt->input->cur, base);
12042
}
12043
if (ctxt->instate == XML_PARSER_EOF)
12044
goto done;
12045
SKIPL(base + 3);
12046
ctxt->instate = XML_PARSER_CONTENT;
12047
}
12048
break;
12049
}
12050
case XML_PARSER_MISC:
12051
case XML_PARSER_PROLOG:
12052
case XML_PARSER_EPILOG:
12053
SKIP_BLANKS;
12054
avail = ctxt->input->end - ctxt->input->cur;
12055
if (avail < 1)
12056
goto done;
12057
if (ctxt->input->cur[0] == '<') {
12058
if ((!terminate) && (avail < 2))
12059
goto done;
12060
next = ctxt->input->cur[1];
12061
if (next == '?') {
12062
if ((!terminate) &&
12063
(!xmlParseLookupString(ctxt, 2, "?>", 2)))
12064
goto done;
12065
xmlParsePI(ctxt);
12066
if (ctxt->instate == XML_PARSER_EOF)
12067
goto done;
12068
break;
12069
} else if (next == '!') {
12070
if ((!terminate) && (avail < 3))
12071
goto done;
12072
12073
if (ctxt->input->cur[2] == '-') {
12074
if ((!terminate) && (avail < 4))
12075
goto done;
12076
if (ctxt->input->cur[3] == '-') {
12077
if ((!terminate) &&
12078
(!xmlParseLookupString(ctxt, 4, "-->", 3)))
12079
goto done;
12080
xmlParseComment(ctxt);
12081
if (ctxt->instate == XML_PARSER_EOF)
12082
goto done;
12083
break;
12084
}
12085
} else if (ctxt->instate == XML_PARSER_MISC) {
12086
if ((!terminate) && (avail < 9))
12087
goto done;
12088
if ((ctxt->input->cur[2] == 'D') &&
12089
(ctxt->input->cur[3] == 'O') &&
12090
(ctxt->input->cur[4] == 'C') &&
12091
(ctxt->input->cur[5] == 'T') &&
12092
(ctxt->input->cur[6] == 'Y') &&
12093
(ctxt->input->cur[7] == 'P') &&
12094
(ctxt->input->cur[8] == 'E')) {
12095
if ((!terminate) && (!xmlParseLookupGt(ctxt)))
12096
goto done;
12097
ctxt->inSubset = 1;
12098
xmlParseDocTypeDecl(ctxt);
12099
if (ctxt->instate == XML_PARSER_EOF)
12100
goto done;
12101
if (RAW == '[') {
12102
ctxt->instate = XML_PARSER_DTD;
12103
} else {
12104
/*
12105
* Create and update the external subset.
12106
*/
12107
ctxt->inSubset = 2;
12108
if ((ctxt->sax != NULL) &&
12109
(!ctxt->disableSAX) &&
12110
(ctxt->sax->externalSubset != NULL))
12111
ctxt->sax->externalSubset(
12112
ctxt->userData,
12113
ctxt->intSubName,
12114
ctxt->extSubSystem,
12115
ctxt->extSubURI);
12116
ctxt->inSubset = 0;
12117
xmlCleanSpecialAttr(ctxt);
12118
if (ctxt->instate == XML_PARSER_EOF)
12119
goto done;
12120
ctxt->instate = XML_PARSER_PROLOG;
12121
}
12122
break;
12123
}
12124
}
12125
}
12126
}
12127
12128
if (ctxt->instate == XML_PARSER_EPILOG) {
12129
if (ctxt->errNo == XML_ERR_OK)
12130
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_END, NULL);
12131
ctxt->instate = XML_PARSER_EOF;
12132
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12133
ctxt->sax->endDocument(ctxt->userData);
12134
} else {
12135
ctxt->instate = XML_PARSER_START_TAG;
12136
}
12137
break;
12138
case XML_PARSER_DTD: {
12139
if ((!terminate) && (!xmlParseLookupInternalSubset(ctxt)))
12140
goto done;
12141
xmlParseInternalSubset(ctxt);
12142
if (ctxt->instate == XML_PARSER_EOF)
12143
goto done;
12144
ctxt->inSubset = 2;
12145
if ((ctxt->sax != NULL) && (!ctxt->disableSAX) &&
12146
(ctxt->sax->externalSubset != NULL))
12147
ctxt->sax->externalSubset(ctxt->userData, ctxt->intSubName,
12148
ctxt->extSubSystem, ctxt->extSubURI);
12149
ctxt->inSubset = 0;
12150
xmlCleanSpecialAttr(ctxt);
12151
if (ctxt->instate == XML_PARSER_EOF)
12152
goto done;
12153
ctxt->instate = XML_PARSER_PROLOG;
12154
break;
12155
}
12156
default:
12157
xmlGenericError(xmlGenericErrorContext,
12158
"PP: internal error\n");
12159
ctxt->instate = XML_PARSER_EOF;
12160
break;
12161
}
12162
}
12163
done:
12164
return(ret);
12165
encoding_error:
12166
if (ctxt->input->end - ctxt->input->cur < 4) {
12167
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12168
"Input is not proper UTF-8, indicate encoding !\n",
12169
NULL, NULL);
12170
} else {
12171
char buffer[150];
12172
12173
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
12174
ctxt->input->cur[0], ctxt->input->cur[1],
12175
ctxt->input->cur[2], ctxt->input->cur[3]);
12176
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
12177
"Input is not proper UTF-8, indicate encoding !\n%s",
12178
BAD_CAST buffer, NULL);
12179
}
12180
return(0);
12181
}
12182
12183
/**
12184
* xmlParseChunk:
12185
* @ctxt: an XML parser context
12186
* @chunk: an char array
12187
* @size: the size in byte of the chunk
12188
* @terminate: last chunk indicator
12189
*
12190
* Parse a Chunk of memory
12191
*
12192
* Returns zero if no error, the xmlParserErrors otherwise.
12193
*/
12194
int
12195
xmlParseChunk(xmlParserCtxtPtr ctxt, const char *chunk, int size,
12196
int terminate) {
12197
int end_in_lf = 0;
12198
12199
if (ctxt == NULL)
12200
return(XML_ERR_INTERNAL_ERROR);
12201
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12202
return(ctxt->errNo);
12203
if (ctxt->instate == XML_PARSER_EOF)
12204
return(-1);
12205
if (ctxt->input == NULL)
12206
return(-1);
12207
12208
ctxt->progressive = 1;
12209
if (ctxt->instate == XML_PARSER_START)
12210
xmlDetectSAX2(ctxt);
12211
if ((size > 0) && (chunk != NULL) && (!terminate) &&
12212
(chunk[size - 1] == '\r')) {
12213
end_in_lf = 1;
12214
size--;
12215
}
12216
12217
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
12218
(ctxt->input->buf != NULL) && (ctxt->instate != XML_PARSER_EOF)) {
12219
size_t pos = ctxt->input->cur - ctxt->input->base;
12220
int res;
12221
12222
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12223
xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
12224
if (res < 0) {
12225
xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12226
xmlHaltParser(ctxt);
12227
return(ctxt->errNo);
12228
}
12229
}
12230
12231
xmlParseTryOrFinish(ctxt, terminate);
12232
if (ctxt->instate == XML_PARSER_EOF)
12233
return(ctxt->errNo);
12234
12235
if ((ctxt->input != NULL) &&
12236
(((ctxt->input->end - ctxt->input->cur) > XML_MAX_LOOKUP_LIMIT) ||
12237
((ctxt->input->cur - ctxt->input->base) > XML_MAX_LOOKUP_LIMIT)) &&
12238
((ctxt->options & XML_PARSE_HUGE) == 0)) {
12239
xmlFatalErr(ctxt, XML_ERR_INTERNAL_ERROR, "Huge input lookup");
12240
xmlHaltParser(ctxt);
12241
}
12242
if ((ctxt->errNo != XML_ERR_OK) && (ctxt->disableSAX == 1))
12243
return(ctxt->errNo);
12244
12245
if ((end_in_lf == 1) && (ctxt->input != NULL) &&
12246
(ctxt->input->buf != NULL)) {
12247
size_t pos = ctxt->input->cur - ctxt->input->base;
12248
int res;
12249
12250
res = xmlParserInputBufferPush(ctxt->input->buf, 1, "\r");
12251
xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
12252
if (res < 0) {
12253
xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12254
xmlHaltParser(ctxt);
12255
return(ctxt->errNo);
12256
}
12257
}
12258
if (terminate) {
12259
/*
12260
* Check for termination
12261
*/
12262
if ((ctxt->instate != XML_PARSER_EOF) &&
12263
(ctxt->instate != XML_PARSER_EPILOG)) {
12264
if (ctxt->nameNr > 0) {
12265
const xmlChar *name = ctxt->nameTab[ctxt->nameNr - 1];
12266
int line = ctxt->pushTab[ctxt->nameNr - 1].line;
12267
xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
12268
"Premature end of data in tag %s line %d\n",
12269
name, line, NULL);
12270
} else if (ctxt->instate == XML_PARSER_START) {
12271
xmlFatalErr(ctxt, XML_ERR_DOCUMENT_EMPTY, NULL);
12272
} else {
12273
xmlFatalErrMsg(ctxt, XML_ERR_DOCUMENT_EMPTY,
12274
"Start tag expected, '<' not found\n");
12275
}
12276
} else if ((ctxt->input->buf != NULL) &&
12277
(ctxt->input->buf->encoder != NULL) &&
12278
(!xmlBufIsEmpty(ctxt->input->buf->raw))) {
12279
xmlFatalErrMsg(ctxt, XML_ERR_INVALID_CHAR,
12280
"Truncated multi-byte sequence at EOF\n");
12281
}
12282
if (ctxt->instate != XML_PARSER_EOF) {
12283
if ((ctxt->sax) && (ctxt->sax->endDocument != NULL))
12284
ctxt->sax->endDocument(ctxt->userData);
12285
}
12286
ctxt->instate = XML_PARSER_EOF;
12287
}
12288
if (ctxt->wellFormed == 0)
12289
return((xmlParserErrors) ctxt->errNo);
12290
else
12291
return(0);
12292
}
12293
12294
/************************************************************************
12295
* *
12296
* I/O front end functions to the parser *
12297
* *
12298
************************************************************************/
12299
12300
/**
12301
* xmlCreatePushParserCtxt:
12302
* @sax: a SAX handler
12303
* @user_data: The user data returned on SAX callbacks
12304
* @chunk: a pointer to an array of chars
12305
* @size: number of chars in the array
12306
* @filename: an optional file name or URI
12307
*
12308
* Create a parser context for using the XML parser in push mode.
12309
* If @buffer and @size are non-NULL, the data is used to detect
12310
* the encoding. The remaining characters will be parsed so they
12311
* don't need to be fed in again through xmlParseChunk.
12312
* To allow content encoding detection, @size should be >= 4
12313
* The value of @filename is used for fetching external entities
12314
* and error/warning reports.
12315
*
12316
* Returns the new parser context or NULL
12317
*/
12318
12319
xmlParserCtxtPtr
12320
xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12321
const char *chunk, int size, const char *filename) {
12322
xmlParserCtxtPtr ctxt;
12323
xmlParserInputPtr inputStream;
12324
xmlParserInputBufferPtr buf;
12325
12326
buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
12327
if (buf == NULL) return(NULL);
12328
12329
ctxt = xmlNewSAXParserCtxt(sax, user_data);
12330
if (ctxt == NULL) {
12331
xmlErrMemory(NULL, "creating parser: out of memory\n");
12332
xmlFreeParserInputBuffer(buf);
12333
return(NULL);
12334
}
12335
ctxt->dictNames = 1;
12336
if (filename == NULL) {
12337
ctxt->directory = NULL;
12338
} else {
12339
ctxt->directory = xmlParserGetDirectory(filename);
12340
}
12341
12342
inputStream = xmlNewInputStream(ctxt);
12343
if (inputStream == NULL) {
12344
xmlFreeParserCtxt(ctxt);
12345
xmlFreeParserInputBuffer(buf);
12346
return(NULL);
12347
}
12348
12349
if (filename == NULL)
12350
inputStream->filename = NULL;
12351
else {
12352
inputStream->filename = (char *)
12353
xmlCanonicPath((const xmlChar *) filename);
12354
if (inputStream->filename == NULL) {
12355
xmlFreeInputStream(inputStream);
12356
xmlFreeParserCtxt(ctxt);
12357
xmlFreeParserInputBuffer(buf);
12358
return(NULL);
12359
}
12360
}
12361
inputStream->buf = buf;
12362
xmlBufResetInput(inputStream->buf->buffer, inputStream);
12363
inputPush(ctxt, inputStream);
12364
12365
if ((size != 0) && (chunk != NULL) &&
12366
(ctxt->input != NULL) && (ctxt->input->buf != NULL)) {
12367
size_t pos = ctxt->input->cur - ctxt->input->base;
12368
int res;
12369
12370
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
12371
xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
12372
if (res < 0) {
12373
xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
12374
xmlHaltParser(ctxt);
12375
}
12376
}
12377
12378
return(ctxt);
12379
}
12380
#endif /* LIBXML_PUSH_ENABLED */
12381
12382
/**
12383
* xmlStopParser:
12384
* @ctxt: an XML parser context
12385
*
12386
* Blocks further parser processing
12387
*/
12388
void
12389
xmlStopParser(xmlParserCtxtPtr ctxt) {
12390
if (ctxt == NULL)
12391
return;
12392
xmlHaltParser(ctxt);
12393
ctxt->errNo = XML_ERR_USER_STOP;
12394
}
12395
12396
/**
12397
* xmlCreateIOParserCtxt:
12398
* @sax: a SAX handler
12399
* @user_data: The user data returned on SAX callbacks
12400
* @ioread: an I/O read function
12401
* @ioclose: an I/O close function
12402
* @ioctx: an I/O handler
12403
* @enc: the charset encoding if known
12404
*
12405
* Create a parser context for using the XML parser with an existing
12406
* I/O stream
12407
*
12408
* Returns the new parser context or NULL
12409
*/
12410
xmlParserCtxtPtr
12411
xmlCreateIOParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
12412
xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
12413
void *ioctx, xmlCharEncoding enc) {
12414
xmlParserCtxtPtr ctxt;
12415
xmlParserInputPtr inputStream;
12416
xmlParserInputBufferPtr buf;
12417
12418
if (ioread == NULL) return(NULL);
12419
12420
buf = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx, enc);
12421
if (buf == NULL) {
12422
if (ioclose != NULL)
12423
ioclose(ioctx);
12424
return (NULL);
12425
}
12426
12427
ctxt = xmlNewSAXParserCtxt(sax, user_data);
12428
if (ctxt == NULL) {
12429
xmlFreeParserInputBuffer(buf);
12430
return(NULL);
12431
}
12432
12433
inputStream = xmlNewIOInputStream(ctxt, buf, enc);
12434
if (inputStream == NULL) {
12435
xmlFreeParserCtxt(ctxt);
12436
return(NULL);
12437
}
12438
inputPush(ctxt, inputStream);
12439
12440
return(ctxt);
12441
}
12442
12443
#ifdef LIBXML_VALID_ENABLED
12444
/************************************************************************
12445
* *
12446
* Front ends when parsing a DTD *
12447
* *
12448
************************************************************************/
12449
12450
/**
12451
* xmlIOParseDTD:
12452
* @sax: the SAX handler block or NULL
12453
* @input: an Input Buffer
12454
* @enc: the charset encoding if known
12455
*
12456
* Load and parse a DTD
12457
*
12458
* Returns the resulting xmlDtdPtr or NULL in case of error.
12459
* @input will be freed by the function in any case.
12460
*/
12461
12462
xmlDtdPtr
12463
xmlIOParseDTD(xmlSAXHandlerPtr sax, xmlParserInputBufferPtr input,
12464
xmlCharEncoding enc) {
12465
xmlDtdPtr ret = NULL;
12466
xmlParserCtxtPtr ctxt;
12467
xmlParserInputPtr pinput = NULL;
12468
12469
if (input == NULL)
12470
return(NULL);
12471
12472
ctxt = xmlNewSAXParserCtxt(sax, NULL);
12473
if (ctxt == NULL) {
12474
xmlFreeParserInputBuffer(input);
12475
return(NULL);
12476
}
12477
12478
/* We are loading a DTD */
12479
ctxt->options |= XML_PARSE_DTDLOAD;
12480
12481
xmlDetectSAX2(ctxt);
12482
12483
/*
12484
* generate a parser input from the I/O handler
12485
*/
12486
12487
pinput = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
12488
if (pinput == NULL) {
12489
xmlFreeParserInputBuffer(input);
12490
xmlFreeParserCtxt(ctxt);
12491
return(NULL);
12492
}
12493
12494
/*
12495
* plug some encoding conversion routines here.
12496
*/
12497
if (xmlPushInput(ctxt, pinput) < 0) {
12498
xmlFreeParserCtxt(ctxt);
12499
return(NULL);
12500
}
12501
if (enc != XML_CHAR_ENCODING_NONE) {
12502
xmlSwitchEncoding(ctxt, enc);
12503
}
12504
12505
/*
12506
* let's parse that entity knowing it's an external subset.
12507
*/
12508
ctxt->inSubset = 2;
12509
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12510
if (ctxt->myDoc == NULL) {
12511
xmlErrMemory(ctxt, "New Doc failed");
12512
return(NULL);
12513
}
12514
ctxt->myDoc->properties = XML_DOC_INTERNAL;
12515
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12516
BAD_CAST "none", BAD_CAST "none");
12517
12518
xmlDetectEncoding(ctxt);
12519
12520
xmlParseExternalSubset(ctxt, BAD_CAST "none", BAD_CAST "none");
12521
12522
if (ctxt->myDoc != NULL) {
12523
if (ctxt->wellFormed) {
12524
ret = ctxt->myDoc->extSubset;
12525
ctxt->myDoc->extSubset = NULL;
12526
if (ret != NULL) {
12527
xmlNodePtr tmp;
12528
12529
ret->doc = NULL;
12530
tmp = ret->children;
12531
while (tmp != NULL) {
12532
tmp->doc = NULL;
12533
tmp = tmp->next;
12534
}
12535
}
12536
} else {
12537
ret = NULL;
12538
}
12539
xmlFreeDoc(ctxt->myDoc);
12540
ctxt->myDoc = NULL;
12541
}
12542
xmlFreeParserCtxt(ctxt);
12543
12544
return(ret);
12545
}
12546
12547
/**
12548
* xmlSAXParseDTD:
12549
* @sax: the SAX handler block
12550
* @ExternalID: a NAME* containing the External ID of the DTD
12551
* @SystemID: a NAME* containing the URL to the DTD
12552
*
12553
* DEPRECATED: Don't use.
12554
*
12555
* Load and parse an external subset.
12556
*
12557
* Returns the resulting xmlDtdPtr or NULL in case of error.
12558
*/
12559
12560
xmlDtdPtr
12561
xmlSAXParseDTD(xmlSAXHandlerPtr sax, const xmlChar *ExternalID,
12562
const xmlChar *SystemID) {
12563
xmlDtdPtr ret = NULL;
12564
xmlParserCtxtPtr ctxt;
12565
xmlParserInputPtr input = NULL;
12566
xmlChar* systemIdCanonic;
12567
12568
if ((ExternalID == NULL) && (SystemID == NULL)) return(NULL);
12569
12570
ctxt = xmlNewSAXParserCtxt(sax, NULL);
12571
if (ctxt == NULL) {
12572
return(NULL);
12573
}
12574
12575
/* We are loading a DTD */
12576
ctxt->options |= XML_PARSE_DTDLOAD;
12577
12578
/*
12579
* Canonicalise the system ID
12580
*/
12581
systemIdCanonic = xmlCanonicPath(SystemID);
12582
if ((SystemID != NULL) && (systemIdCanonic == NULL)) {
12583
xmlFreeParserCtxt(ctxt);
12584
return(NULL);
12585
}
12586
12587
/*
12588
* Ask the Entity resolver to load the damn thing
12589
*/
12590
12591
if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
12592
input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
12593
systemIdCanonic);
12594
if (input == NULL) {
12595
xmlFreeParserCtxt(ctxt);
12596
if (systemIdCanonic != NULL)
12597
xmlFree(systemIdCanonic);
12598
return(NULL);
12599
}
12600
12601
/*
12602
* plug some encoding conversion routines here.
12603
*/
12604
if (xmlPushInput(ctxt, input) < 0) {
12605
xmlFreeParserCtxt(ctxt);
12606
if (systemIdCanonic != NULL)
12607
xmlFree(systemIdCanonic);
12608
return(NULL);
12609
}
12610
12611
xmlDetectEncoding(ctxt);
12612
12613
if (input->filename == NULL)
12614
input->filename = (char *) systemIdCanonic;
12615
else
12616
xmlFree(systemIdCanonic);
12617
12618
/*
12619
* let's parse that entity knowing it's an external subset.
12620
*/
12621
ctxt->inSubset = 2;
12622
ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
12623
if (ctxt->myDoc == NULL) {
12624
xmlErrMemory(ctxt, "New Doc failed");
12625
xmlFreeParserCtxt(ctxt);
12626
return(NULL);
12627
}
12628
ctxt->myDoc->properties = XML_DOC_INTERNAL;
12629
ctxt->myDoc->extSubset = xmlNewDtd(ctxt->myDoc, BAD_CAST "none",
12630
ExternalID, SystemID);
12631
xmlParseExternalSubset(ctxt, ExternalID, SystemID);
12632
12633
if (ctxt->myDoc != NULL) {
12634
if (ctxt->wellFormed) {
12635
ret = ctxt->myDoc->extSubset;
12636
ctxt->myDoc->extSubset = NULL;
12637
if (ret != NULL) {
12638
xmlNodePtr tmp;
12639
12640
ret->doc = NULL;
12641
tmp = ret->children;
12642
while (tmp != NULL) {
12643
tmp->doc = NULL;
12644
tmp = tmp->next;
12645
}
12646
}
12647
} else {
12648
ret = NULL;
12649
}
12650
xmlFreeDoc(ctxt->myDoc);
12651
ctxt->myDoc = NULL;
12652
}
12653
xmlFreeParserCtxt(ctxt);
12654
12655
return(ret);
12656
}
12657
12658
12659
/**
12660
* xmlParseDTD:
12661
* @ExternalID: a NAME* containing the External ID of the DTD
12662
* @SystemID: a NAME* containing the URL to the DTD
12663
*
12664
* Load and parse an external subset.
12665
*
12666
* Returns the resulting xmlDtdPtr or NULL in case of error.
12667
*/
12668
12669
xmlDtdPtr
12670
xmlParseDTD(const xmlChar *ExternalID, const xmlChar *SystemID) {
12671
return(xmlSAXParseDTD(NULL, ExternalID, SystemID));
12672
}
12673
#endif /* LIBXML_VALID_ENABLED */
12674
12675
/************************************************************************
12676
* *
12677
* Front ends when parsing an Entity *
12678
* *
12679
************************************************************************/
12680
12681
/**
12682
* xmlParseCtxtExternalEntity:
12683
* @ctx: the existing parsing context
12684
* @URL: the URL for the entity to load
12685
* @ID: the System ID for the entity to load
12686
* @lst: the return value for the set of parsed nodes
12687
*
12688
* Parse an external general entity within an existing parsing context
12689
* An external general parsed entity is well-formed if it matches the
12690
* production labeled extParsedEnt.
12691
*
12692
* [78] extParsedEnt ::= TextDecl? content
12693
*
12694
* Returns 0 if the entity is well formed, -1 in case of args problem and
12695
* the parser error code otherwise
12696
*/
12697
12698
int
12699
xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, const xmlChar *URL,
12700
const xmlChar *ID, xmlNodePtr *lst) {
12701
void *userData;
12702
12703
if (ctx == NULL) return(-1);
12704
/*
12705
* If the user provided their own SAX callbacks, then reuse the
12706
* userData callback field, otherwise the expected setup in a
12707
* DOM builder is to have userData == ctxt
12708
*/
12709
if (ctx->userData == ctx)
12710
userData = NULL;
12711
else
12712
userData = ctx->userData;
12713
return xmlParseExternalEntityPrivate(ctx->myDoc, ctx, ctx->sax,
12714
userData, ctx->depth + 1,
12715
URL, ID, lst);
12716
}
12717
12718
/**
12719
* xmlParseExternalEntityPrivate:
12720
* @doc: the document the chunk pertains to
12721
* @oldctxt: the previous parser context if available
12722
* @sax: the SAX handler block (possibly NULL)
12723
* @user_data: The user data returned on SAX callbacks (possibly NULL)
12724
* @depth: Used for loop detection, use 0
12725
* @URL: the URL for the entity to load
12726
* @ID: the System ID for the entity to load
12727
* @list: the return value for the set of parsed nodes
12728
*
12729
* Private version of xmlParseExternalEntity()
12730
*
12731
* Returns 0 if the entity is well formed, -1 in case of args problem and
12732
* the parser error code otherwise
12733
*/
12734
12735
static xmlParserErrors
12736
xmlParseExternalEntityPrivate(xmlDocPtr doc, xmlParserCtxtPtr oldctxt,
12737
xmlSAXHandlerPtr sax,
12738
void *user_data, int depth, const xmlChar *URL,
12739
const xmlChar *ID, xmlNodePtr *list) {
12740
xmlParserCtxtPtr ctxt;
12741
xmlDocPtr newDoc;
12742
xmlNodePtr newRoot;
12743
xmlParserErrors ret = XML_ERR_OK;
12744
12745
if (((depth > 40) &&
12746
((oldctxt == NULL) || (oldctxt->options & XML_PARSE_HUGE) == 0)) ||
12747
(depth > 100)) {
12748
xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
12749
"Maximum entity nesting depth exceeded");
12750
return(XML_ERR_ENTITY_LOOP);
12751
}
12752
12753
if (list != NULL)
12754
*list = NULL;
12755
if ((URL == NULL) && (ID == NULL))
12756
return(XML_ERR_INTERNAL_ERROR);
12757
if (doc == NULL)
12758
return(XML_ERR_INTERNAL_ERROR);
12759
12760
ctxt = xmlCreateEntityParserCtxtInternal(sax, user_data, URL, ID, NULL,
12761
oldctxt);
12762
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
12763
if (oldctxt != NULL) {
12764
ctxt->nbErrors = oldctxt->nbErrors;
12765
ctxt->nbWarnings = oldctxt->nbWarnings;
12766
}
12767
xmlDetectSAX2(ctxt);
12768
12769
newDoc = xmlNewDoc(BAD_CAST "1.0");
12770
if (newDoc == NULL) {
12771
xmlFreeParserCtxt(ctxt);
12772
return(XML_ERR_INTERNAL_ERROR);
12773
}
12774
newDoc->properties = XML_DOC_INTERNAL;
12775
if (doc) {
12776
newDoc->intSubset = doc->intSubset;
12777
newDoc->extSubset = doc->extSubset;
12778
if (doc->dict) {
12779
newDoc->dict = doc->dict;
12780
xmlDictReference(newDoc->dict);
12781
}
12782
if (doc->URL != NULL) {
12783
newDoc->URL = xmlStrdup(doc->URL);
12784
}
12785
}
12786
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
12787
if (newRoot == NULL) {
12788
if (sax != NULL)
12789
xmlFreeParserCtxt(ctxt);
12790
newDoc->intSubset = NULL;
12791
newDoc->extSubset = NULL;
12792
xmlFreeDoc(newDoc);
12793
return(XML_ERR_INTERNAL_ERROR);
12794
}
12795
xmlAddChild((xmlNodePtr) newDoc, newRoot);
12796
nodePush(ctxt, newDoc->children);
12797
if (doc == NULL) {
12798
ctxt->myDoc = newDoc;
12799
} else {
12800
ctxt->myDoc = doc;
12801
newRoot->doc = doc;
12802
}
12803
12804
xmlDetectEncoding(ctxt);
12805
12806
/*
12807
* Parse a possible text declaration first
12808
*/
12809
if ((CMP5(CUR_PTR, '<', '?', 'x', 'm', 'l')) && (IS_BLANK_CH(NXT(5)))) {
12810
xmlParseTextDecl(ctxt);
12811
/*
12812
* An XML-1.0 document can't reference an entity not XML-1.0
12813
*/
12814
if ((xmlStrEqual(oldctxt->version, BAD_CAST "1.0")) &&
12815
(!xmlStrEqual(ctxt->input->version, BAD_CAST "1.0"))) {
12816
xmlFatalErrMsg(ctxt, XML_ERR_VERSION_MISMATCH,
12817
"Version mismatch between document and entity\n");
12818
}
12819
}
12820
12821
ctxt->instate = XML_PARSER_CONTENT;
12822
ctxt->depth = depth;
12823
if (oldctxt != NULL) {
12824
ctxt->_private = oldctxt->_private;
12825
ctxt->loadsubset = oldctxt->loadsubset;
12826
ctxt->validate = oldctxt->validate;
12827
ctxt->valid = oldctxt->valid;
12828
ctxt->replaceEntities = oldctxt->replaceEntities;
12829
if (oldctxt->validate) {
12830
ctxt->vctxt.error = oldctxt->vctxt.error;
12831
ctxt->vctxt.warning = oldctxt->vctxt.warning;
12832
ctxt->vctxt.userData = oldctxt->vctxt.userData;
12833
ctxt->vctxt.flags = oldctxt->vctxt.flags;
12834
}
12835
ctxt->external = oldctxt->external;
12836
if (ctxt->dict) xmlDictFree(ctxt->dict);
12837
ctxt->dict = oldctxt->dict;
12838
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
12839
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
12840
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
12841
ctxt->dictNames = oldctxt->dictNames;
12842
ctxt->attsDefault = oldctxt->attsDefault;
12843
ctxt->attsSpecial = oldctxt->attsSpecial;
12844
ctxt->linenumbers = oldctxt->linenumbers;
12845
ctxt->record_info = oldctxt->record_info;
12846
ctxt->node_seq.maximum = oldctxt->node_seq.maximum;
12847
ctxt->node_seq.length = oldctxt->node_seq.length;
12848
ctxt->node_seq.buffer = oldctxt->node_seq.buffer;
12849
} else {
12850
/*
12851
* Doing validity checking on chunk without context
12852
* doesn't make sense
12853
*/
12854
ctxt->_private = NULL;
12855
ctxt->validate = 0;
12856
ctxt->external = 2;
12857
ctxt->loadsubset = 0;
12858
}
12859
12860
xmlParseContent(ctxt);
12861
12862
if ((RAW == '<') && (NXT(1) == '/')) {
12863
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12864
} else if (RAW != 0) {
12865
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
12866
}
12867
if (ctxt->node != newDoc->children) {
12868
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
12869
}
12870
12871
if (!ctxt->wellFormed) {
12872
ret = (xmlParserErrors)ctxt->errNo;
12873
if (oldctxt != NULL) {
12874
oldctxt->errNo = ctxt->errNo;
12875
oldctxt->wellFormed = 0;
12876
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
12877
}
12878
} else {
12879
if (list != NULL) {
12880
xmlNodePtr cur;
12881
12882
/*
12883
* Return the newly created nodeset after unlinking it from
12884
* they pseudo parent.
12885
*/
12886
cur = newDoc->children->children;
12887
*list = cur;
12888
while (cur != NULL) {
12889
cur->parent = NULL;
12890
cur = cur->next;
12891
}
12892
newDoc->children->children = NULL;
12893
}
12894
ret = XML_ERR_OK;
12895
}
12896
12897
/*
12898
* Also record the size of the entity parsed
12899
*/
12900
if (ctxt->input != NULL && oldctxt != NULL) {
12901
unsigned long consumed = ctxt->input->consumed;
12902
12903
xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
12904
12905
xmlSaturatedAdd(&oldctxt->sizeentities, consumed);
12906
xmlSaturatedAdd(&oldctxt->sizeentities, ctxt->sizeentities);
12907
12908
xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
12909
xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
12910
}
12911
12912
if (oldctxt != NULL) {
12913
ctxt->dict = NULL;
12914
ctxt->attsDefault = NULL;
12915
ctxt->attsSpecial = NULL;
12916
oldctxt->nbErrors = ctxt->nbErrors;
12917
oldctxt->nbWarnings = ctxt->nbWarnings;
12918
oldctxt->validate = ctxt->validate;
12919
oldctxt->valid = ctxt->valid;
12920
oldctxt->node_seq.maximum = ctxt->node_seq.maximum;
12921
oldctxt->node_seq.length = ctxt->node_seq.length;
12922
oldctxt->node_seq.buffer = ctxt->node_seq.buffer;
12923
}
12924
ctxt->node_seq.maximum = 0;
12925
ctxt->node_seq.length = 0;
12926
ctxt->node_seq.buffer = NULL;
12927
xmlFreeParserCtxt(ctxt);
12928
newDoc->intSubset = NULL;
12929
newDoc->extSubset = NULL;
12930
xmlFreeDoc(newDoc);
12931
12932
return(ret);
12933
}
12934
12935
#ifdef LIBXML_SAX1_ENABLED
12936
/**
12937
* xmlParseExternalEntity:
12938
* @doc: the document the chunk pertains to
12939
* @sax: the SAX handler block (possibly NULL)
12940
* @user_data: The user data returned on SAX callbacks (possibly NULL)
12941
* @depth: Used for loop detection, use 0
12942
* @URL: the URL for the entity to load
12943
* @ID: the System ID for the entity to load
12944
* @lst: the return value for the set of parsed nodes
12945
*
12946
* Parse an external general entity
12947
* An external general parsed entity is well-formed if it matches the
12948
* production labeled extParsedEnt.
12949
*
12950
* [78] extParsedEnt ::= TextDecl? content
12951
*
12952
* Returns 0 if the entity is well formed, -1 in case of args problem and
12953
* the parser error code otherwise
12954
*/
12955
12956
int
12957
xmlParseExternalEntity(xmlDocPtr doc, xmlSAXHandlerPtr sax, void *user_data,
12958
int depth, const xmlChar *URL, const xmlChar *ID, xmlNodePtr *lst) {
12959
return(xmlParseExternalEntityPrivate(doc, NULL, sax, user_data, depth, URL,
12960
ID, lst));
12961
}
12962
12963
/**
12964
* xmlParseBalancedChunkMemory:
12965
* @doc: the document the chunk pertains to (must not be NULL)
12966
* @sax: the SAX handler block (possibly NULL)
12967
* @user_data: The user data returned on SAX callbacks (possibly NULL)
12968
* @depth: Used for loop detection, use 0
12969
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
12970
* @lst: the return value for the set of parsed nodes
12971
*
12972
* Parse a well-balanced chunk of an XML document
12973
* called by the parser
12974
* The allowed sequence for the Well Balanced Chunk is the one defined by
12975
* the content production in the XML grammar:
12976
*
12977
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
12978
*
12979
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
12980
* the parser error code otherwise
12981
*/
12982
12983
int
12984
xmlParseBalancedChunkMemory(xmlDocPtr doc, xmlSAXHandlerPtr sax,
12985
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst) {
12986
return xmlParseBalancedChunkMemoryRecover( doc, sax, user_data,
12987
depth, string, lst, 0 );
12988
}
12989
#endif /* LIBXML_SAX1_ENABLED */
12990
12991
/**
12992
* xmlParseBalancedChunkMemoryInternal:
12993
* @oldctxt: the existing parsing context
12994
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
12995
* @user_data: the user data field for the parser context
12996
* @lst: the return value for the set of parsed nodes
12997
*
12998
*
12999
* Parse a well-balanced chunk of an XML document
13000
* called by the parser
13001
* The allowed sequence for the Well Balanced Chunk is the one defined by
13002
* the content production in the XML grammar:
13003
*
13004
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13005
*
13006
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
13007
* error code otherwise
13008
*
13009
* In case recover is set to 1, the nodelist will not be empty even if
13010
* the parsed chunk is not well balanced.
13011
*/
13012
static xmlParserErrors
13013
xmlParseBalancedChunkMemoryInternal(xmlParserCtxtPtr oldctxt,
13014
const xmlChar *string, void *user_data, xmlNodePtr *lst) {
13015
xmlParserCtxtPtr ctxt;
13016
xmlDocPtr newDoc = NULL;
13017
xmlNodePtr newRoot;
13018
xmlSAXHandlerPtr oldsax = NULL;
13019
xmlNodePtr content = NULL;
13020
xmlNodePtr last = NULL;
13021
xmlParserErrors ret = XML_ERR_OK;
13022
xmlHashedString hprefix, huri;
13023
unsigned i;
13024
13025
if (((oldctxt->depth > 40) && ((oldctxt->options & XML_PARSE_HUGE) == 0)) ||
13026
(oldctxt->depth > 100)) {
13027
xmlFatalErrMsg(oldctxt, XML_ERR_ENTITY_LOOP,
13028
"Maximum entity nesting depth exceeded");
13029
return(XML_ERR_ENTITY_LOOP);
13030
}
13031
13032
13033
if (lst != NULL)
13034
*lst = NULL;
13035
if (string == NULL)
13036
return(XML_ERR_INTERNAL_ERROR);
13037
13038
ctxt = xmlCreateDocParserCtxt(string);
13039
if (ctxt == NULL) return(XML_WAR_UNDECLARED_ENTITY);
13040
ctxt->nbErrors = oldctxt->nbErrors;
13041
ctxt->nbWarnings = oldctxt->nbWarnings;
13042
if (user_data != NULL)
13043
ctxt->userData = user_data;
13044
else
13045
ctxt->userData = ctxt;
13046
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
13047
ctxt->dict = oldctxt->dict;
13048
ctxt->input_id = oldctxt->input_id;
13049
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13050
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13051
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13052
13053
/*
13054
* Propagate namespaces down the entity
13055
*
13056
* Making entities and namespaces work correctly requires additional
13057
* changes, see xmlParseReference.
13058
*/
13059
13060
/* Default namespace */
13061
hprefix.name = NULL;
13062
hprefix.hashValue = 0;
13063
huri.name = xmlParserNsLookupUri(oldctxt, &hprefix);
13064
huri.hashValue = 0;
13065
if (huri.name != NULL)
13066
xmlParserNsPush(ctxt, NULL, &huri, NULL, 0);
13067
13068
for (i = 0; i < oldctxt->nsdb->hashSize; i++) {
13069
xmlParserNsBucket *bucket = &oldctxt->nsdb->hash[i];
13070
const xmlChar **ns;
13071
xmlParserNsExtra *extra;
13072
unsigned nsIndex;
13073
13074
if ((bucket->hashValue != 0) &&
13075
(bucket->index != INT_MAX)) {
13076
nsIndex = bucket->index;
13077
ns = &oldctxt->nsTab[nsIndex * 2];
13078
extra = &oldctxt->nsdb->extra[nsIndex];
13079
13080
hprefix.name = ns[0];
13081
hprefix.hashValue = bucket->hashValue;
13082
huri.name = ns[1];
13083
huri.hashValue = extra->uriHashValue;
13084
/*
13085
* Don't copy SAX data to avoid a use-after-free with XML reader.
13086
* This matches the pre-2.12 behavior.
13087
*/
13088
xmlParserNsPush(ctxt, &hprefix, &huri, NULL, 0);
13089
}
13090
}
13091
13092
oldsax = ctxt->sax;
13093
ctxt->sax = oldctxt->sax;
13094
xmlDetectSAX2(ctxt);
13095
ctxt->replaceEntities = oldctxt->replaceEntities;
13096
ctxt->options = oldctxt->options;
13097
13098
ctxt->_private = oldctxt->_private;
13099
if (oldctxt->myDoc == NULL) {
13100
newDoc = xmlNewDoc(BAD_CAST "1.0");
13101
if (newDoc == NULL) {
13102
ret = XML_ERR_INTERNAL_ERROR;
13103
goto error;
13104
}
13105
newDoc->properties = XML_DOC_INTERNAL;
13106
newDoc->dict = ctxt->dict;
13107
xmlDictReference(newDoc->dict);
13108
ctxt->myDoc = newDoc;
13109
} else {
13110
ctxt->myDoc = oldctxt->myDoc;
13111
content = ctxt->myDoc->children;
13112
last = ctxt->myDoc->last;
13113
}
13114
newRoot = xmlNewDocNode(ctxt->myDoc, NULL, BAD_CAST "pseudoroot", NULL);
13115
if (newRoot == NULL) {
13116
ret = XML_ERR_INTERNAL_ERROR;
13117
goto error;
13118
}
13119
ctxt->myDoc->children = NULL;
13120
ctxt->myDoc->last = NULL;
13121
xmlAddChild((xmlNodePtr) ctxt->myDoc, newRoot);
13122
nodePush(ctxt, ctxt->myDoc->children);
13123
ctxt->instate = XML_PARSER_CONTENT;
13124
ctxt->depth = oldctxt->depth;
13125
13126
ctxt->validate = 0;
13127
ctxt->loadsubset = oldctxt->loadsubset;
13128
if ((oldctxt->validate) || (oldctxt->replaceEntities != 0)) {
13129
/*
13130
* ID/IDREF registration will be done in xmlValidateElement below
13131
*/
13132
ctxt->loadsubset |= XML_SKIP_IDS;
13133
}
13134
ctxt->dictNames = oldctxt->dictNames;
13135
ctxt->attsDefault = oldctxt->attsDefault;
13136
ctxt->attsSpecial = oldctxt->attsSpecial;
13137
13138
xmlParseContent(ctxt);
13139
if ((RAW == '<') && (NXT(1) == '/')) {
13140
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13141
} else if (RAW != 0) {
13142
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13143
}
13144
if (ctxt->node != ctxt->myDoc->children) {
13145
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13146
}
13147
13148
if (!ctxt->wellFormed) {
13149
ret = (xmlParserErrors)ctxt->errNo;
13150
oldctxt->errNo = ctxt->errNo;
13151
oldctxt->wellFormed = 0;
13152
xmlCopyError(&ctxt->lastError, &oldctxt->lastError);
13153
} else {
13154
ret = XML_ERR_OK;
13155
}
13156
13157
if ((lst != NULL) && (ret == XML_ERR_OK)) {
13158
xmlNodePtr cur;
13159
13160
/*
13161
* Return the newly created nodeset after unlinking it from
13162
* they pseudo parent.
13163
*/
13164
cur = ctxt->myDoc->children->children;
13165
*lst = cur;
13166
while (cur != NULL) {
13167
#ifdef LIBXML_VALID_ENABLED
13168
if ((oldctxt->validate) && (oldctxt->wellFormed) &&
13169
(oldctxt->myDoc) && (oldctxt->myDoc->intSubset) &&
13170
(cur->type == XML_ELEMENT_NODE)) {
13171
oldctxt->valid &= xmlValidateElement(&oldctxt->vctxt,
13172
oldctxt->myDoc, cur);
13173
}
13174
#endif /* LIBXML_VALID_ENABLED */
13175
cur->parent = NULL;
13176
cur = cur->next;
13177
}
13178
ctxt->myDoc->children->children = NULL;
13179
}
13180
if (ctxt->myDoc != NULL) {
13181
xmlFreeNode(ctxt->myDoc->children);
13182
ctxt->myDoc->children = content;
13183
ctxt->myDoc->last = last;
13184
}
13185
13186
/*
13187
* Also record the size of the entity parsed
13188
*/
13189
if (ctxt->input != NULL && oldctxt != NULL) {
13190
unsigned long consumed = ctxt->input->consumed;
13191
13192
xmlSaturatedAddSizeT(&consumed, ctxt->input->cur - ctxt->input->base);
13193
13194
xmlSaturatedAdd(&oldctxt->sizeentcopy, consumed);
13195
xmlSaturatedAdd(&oldctxt->sizeentcopy, ctxt->sizeentcopy);
13196
}
13197
13198
oldctxt->nbErrors = ctxt->nbErrors;
13199
oldctxt->nbWarnings = ctxt->nbWarnings;
13200
13201
error:
13202
ctxt->sax = oldsax;
13203
ctxt->dict = NULL;
13204
ctxt->attsDefault = NULL;
13205
ctxt->attsSpecial = NULL;
13206
xmlFreeParserCtxt(ctxt);
13207
if (newDoc != NULL) {
13208
xmlFreeDoc(newDoc);
13209
}
13210
13211
return(ret);
13212
}
13213
13214
/**
13215
* xmlParseInNodeContext:
13216
* @node: the context node
13217
* @data: the input string
13218
* @datalen: the input string length in bytes
13219
* @options: a combination of xmlParserOption
13220
* @lst: the return value for the set of parsed nodes
13221
*
13222
* Parse a well-balanced chunk of an XML document
13223
* within the context (DTD, namespaces, etc ...) of the given node.
13224
*
13225
* The allowed sequence for the data is a Well Balanced Chunk defined by
13226
* the content production in the XML grammar:
13227
*
13228
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13229
*
13230
* Returns XML_ERR_OK if the chunk is well balanced, and the parser
13231
* error code otherwise
13232
*/
13233
xmlParserErrors
13234
xmlParseInNodeContext(xmlNodePtr node, const char *data, int datalen,
13235
int options, xmlNodePtr *lst) {
13236
xmlParserCtxtPtr ctxt;
13237
xmlDocPtr doc = NULL;
13238
xmlNodePtr fake, cur;
13239
int nsnr = 0;
13240
13241
xmlParserErrors ret = XML_ERR_OK;
13242
13243
/*
13244
* check all input parameters, grab the document
13245
*/
13246
if ((lst == NULL) || (node == NULL) || (data == NULL) || (datalen < 0))
13247
return(XML_ERR_INTERNAL_ERROR);
13248
switch (node->type) {
13249
case XML_ELEMENT_NODE:
13250
case XML_ATTRIBUTE_NODE:
13251
case XML_TEXT_NODE:
13252
case XML_CDATA_SECTION_NODE:
13253
case XML_ENTITY_REF_NODE:
13254
case XML_PI_NODE:
13255
case XML_COMMENT_NODE:
13256
case XML_DOCUMENT_NODE:
13257
case XML_HTML_DOCUMENT_NODE:
13258
break;
13259
default:
13260
return(XML_ERR_INTERNAL_ERROR);
13261
13262
}
13263
while ((node != NULL) && (node->type != XML_ELEMENT_NODE) &&
13264
(node->type != XML_DOCUMENT_NODE) &&
13265
(node->type != XML_HTML_DOCUMENT_NODE))
13266
node = node->parent;
13267
if (node == NULL)
13268
return(XML_ERR_INTERNAL_ERROR);
13269
if (node->type == XML_ELEMENT_NODE)
13270
doc = node->doc;
13271
else
13272
doc = (xmlDocPtr) node;
13273
if (doc == NULL)
13274
return(XML_ERR_INTERNAL_ERROR);
13275
13276
/*
13277
* allocate a context and set-up everything not related to the
13278
* node position in the tree
13279
*/
13280
if (doc->type == XML_DOCUMENT_NODE)
13281
ctxt = xmlCreateMemoryParserCtxt((char *) data, datalen);
13282
#ifdef LIBXML_HTML_ENABLED
13283
else if (doc->type == XML_HTML_DOCUMENT_NODE) {
13284
ctxt = htmlCreateMemoryParserCtxt((char *) data, datalen);
13285
/*
13286
* When parsing in context, it makes no sense to add implied
13287
* elements like html/body/etc...
13288
*/
13289
options |= HTML_PARSE_NOIMPLIED;
13290
}
13291
#endif
13292
else
13293
return(XML_ERR_INTERNAL_ERROR);
13294
13295
if (ctxt == NULL)
13296
return(XML_ERR_NO_MEMORY);
13297
13298
/*
13299
* Use input doc's dict if present, else assure XML_PARSE_NODICT is set.
13300
* We need a dictionary for xmlDetectSAX2, so if there's no doc dict
13301
* we must wait until the last moment to free the original one.
13302
*/
13303
if (doc->dict != NULL) {
13304
if (ctxt->dict != NULL)
13305
xmlDictFree(ctxt->dict);
13306
ctxt->dict = doc->dict;
13307
} else
13308
options |= XML_PARSE_NODICT;
13309
13310
if (doc->encoding != NULL) {
13311
xmlCharEncodingHandlerPtr hdlr;
13312
13313
hdlr = xmlFindCharEncodingHandler((const char *) doc->encoding);
13314
if (hdlr != NULL) {
13315
xmlSwitchToEncoding(ctxt, hdlr);
13316
} else {
13317
return(XML_ERR_UNSUPPORTED_ENCODING);
13318
}
13319
}
13320
13321
xmlCtxtUseOptionsInternal(ctxt, options);
13322
xmlDetectSAX2(ctxt);
13323
ctxt->myDoc = doc;
13324
/* parsing in context, i.e. as within existing content */
13325
ctxt->input_id = 2;
13326
ctxt->instate = XML_PARSER_CONTENT;
13327
13328
fake = xmlNewDocComment(node->doc, NULL);
13329
if (fake == NULL) {
13330
xmlFreeParserCtxt(ctxt);
13331
return(XML_ERR_NO_MEMORY);
13332
}
13333
xmlAddChild(node, fake);
13334
13335
if (node->type == XML_ELEMENT_NODE)
13336
nodePush(ctxt, node);
13337
13338
if ((ctxt->html == 0) && (node->type == XML_ELEMENT_NODE)) {
13339
/*
13340
* initialize the SAX2 namespaces stack
13341
*/
13342
cur = node;
13343
while ((cur != NULL) && (cur->type == XML_ELEMENT_NODE)) {
13344
xmlNsPtr ns = cur->nsDef;
13345
xmlHashedString hprefix, huri;
13346
13347
while (ns != NULL) {
13348
hprefix = xmlDictLookupHashed(ctxt->dict, ns->prefix, -1);
13349
huri = xmlDictLookupHashed(ctxt->dict, ns->href, -1);
13350
if (xmlParserNsPush(ctxt, &hprefix, &huri, ns, 1) > 0)
13351
nsnr++;
13352
ns = ns->next;
13353
}
13354
cur = cur->parent;
13355
}
13356
}
13357
13358
if ((ctxt->validate) || (ctxt->replaceEntities != 0)) {
13359
/*
13360
* ID/IDREF registration will be done in xmlValidateElement below
13361
*/
13362
ctxt->loadsubset |= XML_SKIP_IDS;
13363
}
13364
13365
#ifdef LIBXML_HTML_ENABLED
13366
if (doc->type == XML_HTML_DOCUMENT_NODE)
13367
__htmlParseContent(ctxt);
13368
else
13369
#endif
13370
xmlParseContent(ctxt);
13371
13372
xmlParserNsPop(ctxt, nsnr);
13373
if ((RAW == '<') && (NXT(1) == '/')) {
13374
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13375
} else if (RAW != 0) {
13376
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13377
}
13378
if ((ctxt->node != NULL) && (ctxt->node != node)) {
13379
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13380
ctxt->wellFormed = 0;
13381
}
13382
13383
if (!ctxt->wellFormed) {
13384
if (ctxt->errNo == 0)
13385
ret = XML_ERR_INTERNAL_ERROR;
13386
else
13387
ret = (xmlParserErrors)ctxt->errNo;
13388
} else {
13389
ret = XML_ERR_OK;
13390
}
13391
13392
/*
13393
* Return the newly created nodeset after unlinking it from
13394
* the pseudo sibling.
13395
*/
13396
13397
cur = fake->next;
13398
fake->next = NULL;
13399
node->last = fake;
13400
13401
if (cur != NULL) {
13402
cur->prev = NULL;
13403
}
13404
13405
*lst = cur;
13406
13407
while (cur != NULL) {
13408
cur->parent = NULL;
13409
cur = cur->next;
13410
}
13411
13412
xmlUnlinkNode(fake);
13413
xmlFreeNode(fake);
13414
13415
13416
if (ret != XML_ERR_OK) {
13417
xmlFreeNodeList(*lst);
13418
*lst = NULL;
13419
}
13420
13421
if (doc->dict != NULL)
13422
ctxt->dict = NULL;
13423
xmlFreeParserCtxt(ctxt);
13424
13425
return(ret);
13426
}
13427
13428
#ifdef LIBXML_SAX1_ENABLED
13429
/**
13430
* xmlParseBalancedChunkMemoryRecover:
13431
* @doc: the document the chunk pertains to (must not be NULL)
13432
* @sax: the SAX handler block (possibly NULL)
13433
* @user_data: The user data returned on SAX callbacks (possibly NULL)
13434
* @depth: Used for loop detection, use 0
13435
* @string: the input string in UTF8 or ISO-Latin (zero terminated)
13436
* @lst: the return value for the set of parsed nodes
13437
* @recover: return nodes even if the data is broken (use 0)
13438
*
13439
*
13440
* Parse a well-balanced chunk of an XML document
13441
* called by the parser
13442
* The allowed sequence for the Well Balanced Chunk is the one defined by
13443
* the content production in the XML grammar:
13444
*
13445
* [43] content ::= (element | CharData | Reference | CDSect | PI | Comment)*
13446
*
13447
* Returns 0 if the chunk is well balanced, -1 in case of args problem and
13448
* the parser error code otherwise
13449
*
13450
* In case recover is set to 1, the nodelist will not be empty even if
13451
* the parsed chunk is not well balanced, assuming the parsing succeeded to
13452
* some extent.
13453
*/
13454
int
13455
xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, xmlSAXHandlerPtr sax,
13456
void *user_data, int depth, const xmlChar *string, xmlNodePtr *lst,
13457
int recover) {
13458
xmlParserCtxtPtr ctxt;
13459
xmlDocPtr newDoc;
13460
xmlSAXHandlerPtr oldsax = NULL;
13461
xmlNodePtr content, newRoot;
13462
int ret = 0;
13463
13464
if (depth > 40) {
13465
return(XML_ERR_ENTITY_LOOP);
13466
}
13467
13468
13469
if (lst != NULL)
13470
*lst = NULL;
13471
if (string == NULL)
13472
return(-1);
13473
13474
ctxt = xmlCreateDocParserCtxt(string);
13475
if (ctxt == NULL) return(-1);
13476
ctxt->userData = ctxt;
13477
if (sax != NULL) {
13478
oldsax = ctxt->sax;
13479
ctxt->sax = sax;
13480
if (user_data != NULL)
13481
ctxt->userData = user_data;
13482
}
13483
newDoc = xmlNewDoc(BAD_CAST "1.0");
13484
if (newDoc == NULL) {
13485
xmlFreeParserCtxt(ctxt);
13486
return(-1);
13487
}
13488
newDoc->properties = XML_DOC_INTERNAL;
13489
if ((doc != NULL) && (doc->dict != NULL)) {
13490
xmlDictFree(ctxt->dict);
13491
ctxt->dict = doc->dict;
13492
xmlDictReference(ctxt->dict);
13493
ctxt->str_xml = xmlDictLookup(ctxt->dict, BAD_CAST "xml", 3);
13494
ctxt->str_xmlns = xmlDictLookup(ctxt->dict, BAD_CAST "xmlns", 5);
13495
ctxt->str_xml_ns = xmlDictLookup(ctxt->dict, XML_XML_NAMESPACE, 36);
13496
ctxt->dictNames = 1;
13497
newDoc->dict = ctxt->dict;
13498
xmlDictReference(newDoc->dict);
13499
} else {
13500
xmlCtxtUseOptionsInternal(ctxt, XML_PARSE_NODICT);
13501
}
13502
/* doc == NULL is only supported for historic reasons */
13503
if (doc != NULL) {
13504
newDoc->intSubset = doc->intSubset;
13505
newDoc->extSubset = doc->extSubset;
13506
}
13507
newRoot = xmlNewDocNode(newDoc, NULL, BAD_CAST "pseudoroot", NULL);
13508
if (newRoot == NULL) {
13509
if (sax != NULL)
13510
ctxt->sax = oldsax;
13511
xmlFreeParserCtxt(ctxt);
13512
newDoc->intSubset = NULL;
13513
newDoc->extSubset = NULL;
13514
xmlFreeDoc(newDoc);
13515
return(-1);
13516
}
13517
xmlAddChild((xmlNodePtr) newDoc, newRoot);
13518
nodePush(ctxt, newRoot);
13519
/* doc == NULL is only supported for historic reasons */
13520
if (doc == NULL) {
13521
ctxt->myDoc = newDoc;
13522
} else {
13523
ctxt->myDoc = newDoc;
13524
/* Ensure that doc has XML spec namespace */
13525
xmlSearchNsByHref(doc, (xmlNodePtr)doc, XML_XML_NAMESPACE);
13526
newDoc->oldNs = doc->oldNs;
13527
}
13528
ctxt->instate = XML_PARSER_CONTENT;
13529
ctxt->input_id = 2;
13530
ctxt->depth = depth;
13531
13532
/*
13533
* Doing validity checking on chunk doesn't make sense
13534
*/
13535
ctxt->validate = 0;
13536
ctxt->loadsubset = 0;
13537
xmlDetectSAX2(ctxt);
13538
13539
if ( doc != NULL ){
13540
content = doc->children;
13541
doc->children = NULL;
13542
xmlParseContent(ctxt);
13543
doc->children = content;
13544
}
13545
else {
13546
xmlParseContent(ctxt);
13547
}
13548
if ((RAW == '<') && (NXT(1) == '/')) {
13549
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13550
} else if (RAW != 0) {
13551
xmlFatalErr(ctxt, XML_ERR_EXTRA_CONTENT, NULL);
13552
}
13553
if (ctxt->node != newDoc->children) {
13554
xmlFatalErr(ctxt, XML_ERR_NOT_WELL_BALANCED, NULL);
13555
}
13556
13557
if (!ctxt->wellFormed) {
13558
if (ctxt->errNo == 0)
13559
ret = 1;
13560
else
13561
ret = ctxt->errNo;
13562
} else {
13563
ret = 0;
13564
}
13565
13566
if ((lst != NULL) && ((ret == 0) || (recover == 1))) {
13567
xmlNodePtr cur;
13568
13569
/*
13570
* Return the newly created nodeset after unlinking it from
13571
* they pseudo parent.
13572
*/
13573
cur = newDoc->children->children;
13574
*lst = cur;
13575
while (cur != NULL) {
13576
xmlSetTreeDoc(cur, doc);
13577
cur->parent = NULL;
13578
cur = cur->next;
13579
}
13580
newDoc->children->children = NULL;
13581
}
13582
13583
if (sax != NULL)
13584
ctxt->sax = oldsax;
13585
xmlFreeParserCtxt(ctxt);
13586
newDoc->intSubset = NULL;
13587
newDoc->extSubset = NULL;
13588
/* This leaks the namespace list if doc == NULL */
13589
newDoc->oldNs = NULL;
13590
xmlFreeDoc(newDoc);
13591
13592
return(ret);
13593
}
13594
13595
/**
13596
* xmlSAXParseEntity:
13597
* @sax: the SAX handler block
13598
* @filename: the filename
13599
*
13600
* DEPRECATED: Don't use.
13601
*
13602
* parse an XML external entity out of context and build a tree.
13603
* It use the given SAX function block to handle the parsing callback.
13604
* If sax is NULL, fallback to the default DOM tree building routines.
13605
*
13606
* [78] extParsedEnt ::= TextDecl? content
13607
*
13608
* This correspond to a "Well Balanced" chunk
13609
*
13610
* Returns the resulting document tree
13611
*/
13612
13613
xmlDocPtr
13614
xmlSAXParseEntity(xmlSAXHandlerPtr sax, const char *filename) {
13615
xmlDocPtr ret;
13616
xmlParserCtxtPtr ctxt;
13617
13618
ctxt = xmlCreateFileParserCtxt(filename);
13619
if (ctxt == NULL) {
13620
return(NULL);
13621
}
13622
if (sax != NULL) {
13623
if (ctxt->sax != NULL)
13624
xmlFree(ctxt->sax);
13625
ctxt->sax = sax;
13626
ctxt->userData = NULL;
13627
}
13628
13629
xmlParseExtParsedEnt(ctxt);
13630
13631
if (ctxt->wellFormed)
13632
ret = ctxt->myDoc;
13633
else {
13634
ret = NULL;
13635
xmlFreeDoc(ctxt->myDoc);
13636
ctxt->myDoc = NULL;
13637
}
13638
if (sax != NULL)
13639
ctxt->sax = NULL;
13640
xmlFreeParserCtxt(ctxt);
13641
13642
return(ret);
13643
}
13644
13645
/**
13646
* xmlParseEntity:
13647
* @filename: the filename
13648
*
13649
* parse an XML external entity out of context and build a tree.
13650
*
13651
* [78] extParsedEnt ::= TextDecl? content
13652
*
13653
* This correspond to a "Well Balanced" chunk
13654
*
13655
* Returns the resulting document tree
13656
*/
13657
13658
xmlDocPtr
13659
xmlParseEntity(const char *filename) {
13660
return(xmlSAXParseEntity(NULL, filename));
13661
}
13662
#endif /* LIBXML_SAX1_ENABLED */
13663
13664
/**
13665
* xmlCreateEntityParserCtxtInternal:
13666
* @URL: the entity URL
13667
* @ID: the entity PUBLIC ID
13668
* @base: a possible base for the target URI
13669
* @pctx: parser context used to set options on new context
13670
*
13671
* Create a parser context for an external entity
13672
* Automatic support for ZLIB/Compress compressed document is provided
13673
* by default if found at compile-time.
13674
*
13675
* Returns the new parser context or NULL
13676
*/
13677
static xmlParserCtxtPtr
13678
xmlCreateEntityParserCtxtInternal(xmlSAXHandlerPtr sax, void *userData,
13679
const xmlChar *URL, const xmlChar *ID, const xmlChar *base,
13680
xmlParserCtxtPtr pctx) {
13681
xmlParserCtxtPtr ctxt;
13682
xmlParserInputPtr inputStream;
13683
char *directory = NULL;
13684
xmlChar *uri;
13685
13686
ctxt = xmlNewSAXParserCtxt(sax, userData);
13687
if (ctxt == NULL) {
13688
return(NULL);
13689
}
13690
13691
if (pctx != NULL) {
13692
ctxt->options = pctx->options;
13693
ctxt->_private = pctx->_private;
13694
ctxt->input_id = pctx->input_id;
13695
}
13696
13697
/* Don't read from stdin. */
13698
if (xmlStrcmp(URL, BAD_CAST "-") == 0)
13699
URL = BAD_CAST "./-";
13700
13701
uri = xmlBuildURI(URL, base);
13702
13703
if (uri == NULL) {
13704
inputStream = xmlLoadExternalEntity((char *)URL, (char *)ID, ctxt);
13705
if (inputStream == NULL) {
13706
xmlFreeParserCtxt(ctxt);
13707
return(NULL);
13708
}
13709
13710
inputPush(ctxt, inputStream);
13711
13712
if ((ctxt->directory == NULL) && (directory == NULL))
13713
directory = xmlParserGetDirectory((char *)URL);
13714
if ((ctxt->directory == NULL) && (directory != NULL))
13715
ctxt->directory = directory;
13716
} else {
13717
inputStream = xmlLoadExternalEntity((char *)uri, (char *)ID, ctxt);
13718
if (inputStream == NULL) {
13719
xmlFree(uri);
13720
xmlFreeParserCtxt(ctxt);
13721
return(NULL);
13722
}
13723
13724
inputPush(ctxt, inputStream);
13725
13726
if ((ctxt->directory == NULL) && (directory == NULL))
13727
directory = xmlParserGetDirectory((char *)uri);
13728
if ((ctxt->directory == NULL) && (directory != NULL))
13729
ctxt->directory = directory;
13730
xmlFree(uri);
13731
}
13732
return(ctxt);
13733
}
13734
13735
/**
13736
* xmlCreateEntityParserCtxt:
13737
* @URL: the entity URL
13738
* @ID: the entity PUBLIC ID
13739
* @base: a possible base for the target URI
13740
*
13741
* Create a parser context for an external entity
13742
* Automatic support for ZLIB/Compress compressed document is provided
13743
* by default if found at compile-time.
13744
*
13745
* Returns the new parser context or NULL
13746
*/
13747
xmlParserCtxtPtr
13748
xmlCreateEntityParserCtxt(const xmlChar *URL, const xmlChar *ID,
13749
const xmlChar *base) {
13750
return xmlCreateEntityParserCtxtInternal(NULL, NULL, URL, ID, base, NULL);
13751
13752
}
13753
13754
/************************************************************************
13755
* *
13756
* Front ends when parsing from a file *
13757
* *
13758
************************************************************************/
13759
13760
/**
13761
* xmlCreateURLParserCtxt:
13762
* @filename: the filename or URL
13763
* @options: a combination of xmlParserOption
13764
*
13765
* Create a parser context for a file or URL content.
13766
* Automatic support for ZLIB/Compress compressed document is provided
13767
* by default if found at compile-time and for file accesses
13768
*
13769
* Returns the new parser context or NULL
13770
*/
13771
xmlParserCtxtPtr
13772
xmlCreateURLParserCtxt(const char *filename, int options)
13773
{
13774
xmlParserCtxtPtr ctxt;
13775
xmlParserInputPtr inputStream;
13776
char *directory = NULL;
13777
13778
ctxt = xmlNewParserCtxt();
13779
if (ctxt == NULL) {
13780
xmlErrMemory(NULL, "cannot allocate parser context");
13781
return(NULL);
13782
}
13783
13784
if (options)
13785
xmlCtxtUseOptionsInternal(ctxt, options);
13786
ctxt->linenumbers = 1;
13787
13788
inputStream = xmlLoadExternalEntity(filename, NULL, ctxt);
13789
if (inputStream == NULL) {
13790
xmlFreeParserCtxt(ctxt);
13791
return(NULL);
13792
}
13793
13794
inputPush(ctxt, inputStream);
13795
if ((ctxt->directory == NULL) && (directory == NULL))
13796
directory = xmlParserGetDirectory(filename);
13797
if ((ctxt->directory == NULL) && (directory != NULL))
13798
ctxt->directory = directory;
13799
13800
return(ctxt);
13801
}
13802
13803
/**
13804
* xmlCreateFileParserCtxt:
13805
* @filename: the filename
13806
*
13807
* Create a parser context for a file content.
13808
* Automatic support for ZLIB/Compress compressed document is provided
13809
* by default if found at compile-time.
13810
*
13811
* Returns the new parser context or NULL
13812
*/
13813
xmlParserCtxtPtr
13814
xmlCreateFileParserCtxt(const char *filename)
13815
{
13816
return(xmlCreateURLParserCtxt(filename, 0));
13817
}
13818
13819
#ifdef LIBXML_SAX1_ENABLED
13820
/**
13821
* xmlSAXParseFileWithData:
13822
* @sax: the SAX handler block
13823
* @filename: the filename
13824
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
13825
* documents
13826
* @data: the userdata
13827
*
13828
* DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13829
*
13830
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
13831
* compressed document is provided by default if found at compile-time.
13832
* It use the given SAX function block to handle the parsing callback.
13833
* If sax is NULL, fallback to the default DOM tree building routines.
13834
*
13835
* User data (void *) is stored within the parser context in the
13836
* context's _private member, so it is available nearly everywhere in libxml
13837
*
13838
* Returns the resulting document tree
13839
*/
13840
13841
xmlDocPtr
13842
xmlSAXParseFileWithData(xmlSAXHandlerPtr sax, const char *filename,
13843
int recovery, void *data) {
13844
xmlDocPtr ret;
13845
xmlParserCtxtPtr ctxt;
13846
13847
xmlInitParser();
13848
13849
ctxt = xmlCreateFileParserCtxt(filename);
13850
if (ctxt == NULL) {
13851
return(NULL);
13852
}
13853
if (sax != NULL) {
13854
if (ctxt->sax != NULL)
13855
xmlFree(ctxt->sax);
13856
ctxt->sax = sax;
13857
}
13858
xmlDetectSAX2(ctxt);
13859
if (data!=NULL) {
13860
ctxt->_private = data;
13861
}
13862
13863
if (ctxt->directory == NULL)
13864
ctxt->directory = xmlParserGetDirectory(filename);
13865
13866
ctxt->recovery = recovery;
13867
13868
xmlParseDocument(ctxt);
13869
13870
if ((ctxt->wellFormed) || recovery) {
13871
ret = ctxt->myDoc;
13872
if ((ret != NULL) && (ctxt->input->buf != NULL)) {
13873
if (ctxt->input->buf->compressed > 0)
13874
ret->compression = 9;
13875
else
13876
ret->compression = ctxt->input->buf->compressed;
13877
}
13878
}
13879
else {
13880
ret = NULL;
13881
xmlFreeDoc(ctxt->myDoc);
13882
ctxt->myDoc = NULL;
13883
}
13884
if (sax != NULL)
13885
ctxt->sax = NULL;
13886
xmlFreeParserCtxt(ctxt);
13887
13888
return(ret);
13889
}
13890
13891
/**
13892
* xmlSAXParseFile:
13893
* @sax: the SAX handler block
13894
* @filename: the filename
13895
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
13896
* documents
13897
*
13898
* DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
13899
*
13900
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
13901
* compressed document is provided by default if found at compile-time.
13902
* It use the given SAX function block to handle the parsing callback.
13903
* If sax is NULL, fallback to the default DOM tree building routines.
13904
*
13905
* Returns the resulting document tree
13906
*/
13907
13908
xmlDocPtr
13909
xmlSAXParseFile(xmlSAXHandlerPtr sax, const char *filename,
13910
int recovery) {
13911
return(xmlSAXParseFileWithData(sax,filename,recovery,NULL));
13912
}
13913
13914
/**
13915
* xmlRecoverDoc:
13916
* @cur: a pointer to an array of xmlChar
13917
*
13918
* DEPRECATED: Use xmlReadDoc with XML_PARSE_RECOVER.
13919
*
13920
* parse an XML in-memory document and build a tree.
13921
* In the case the document is not Well Formed, a attempt to build a
13922
* tree is tried anyway
13923
*
13924
* Returns the resulting document tree or NULL in case of failure
13925
*/
13926
13927
xmlDocPtr
13928
xmlRecoverDoc(const xmlChar *cur) {
13929
return(xmlSAXParseDoc(NULL, cur, 1));
13930
}
13931
13932
/**
13933
* xmlParseFile:
13934
* @filename: the filename
13935
*
13936
* DEPRECATED: Use xmlReadFile.
13937
*
13938
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
13939
* compressed document is provided by default if found at compile-time.
13940
*
13941
* Returns the resulting document tree if the file was wellformed,
13942
* NULL otherwise.
13943
*/
13944
13945
xmlDocPtr
13946
xmlParseFile(const char *filename) {
13947
return(xmlSAXParseFile(NULL, filename, 0));
13948
}
13949
13950
/**
13951
* xmlRecoverFile:
13952
* @filename: the filename
13953
*
13954
* DEPRECATED: Use xmlReadFile with XML_PARSE_RECOVER.
13955
*
13956
* parse an XML file and build a tree. Automatic support for ZLIB/Compress
13957
* compressed document is provided by default if found at compile-time.
13958
* In the case the document is not Well Formed, it attempts to build
13959
* a tree anyway
13960
*
13961
* Returns the resulting document tree or NULL in case of failure
13962
*/
13963
13964
xmlDocPtr
13965
xmlRecoverFile(const char *filename) {
13966
return(xmlSAXParseFile(NULL, filename, 1));
13967
}
13968
13969
13970
/**
13971
* xmlSetupParserForBuffer:
13972
* @ctxt: an XML parser context
13973
* @buffer: a xmlChar * buffer
13974
* @filename: a file name
13975
*
13976
* DEPRECATED: Don't use.
13977
*
13978
* Setup the parser context to parse a new buffer; Clears any prior
13979
* contents from the parser context. The buffer parameter must not be
13980
* NULL, but the filename parameter can be
13981
*/
13982
void
13983
xmlSetupParserForBuffer(xmlParserCtxtPtr ctxt, const xmlChar* buffer,
13984
const char* filename)
13985
{
13986
xmlParserInputPtr input;
13987
13988
if ((ctxt == NULL) || (buffer == NULL))
13989
return;
13990
13991
input = xmlNewInputStream(ctxt);
13992
if (input == NULL) {
13993
xmlErrMemory(NULL, "parsing new buffer: out of memory\n");
13994
xmlClearParserCtxt(ctxt);
13995
return;
13996
}
13997
13998
xmlClearParserCtxt(ctxt);
13999
if (filename != NULL)
14000
input->filename = (char *) xmlCanonicPath((const xmlChar *)filename);
14001
input->base = buffer;
14002
input->cur = buffer;
14003
input->end = &buffer[xmlStrlen(buffer)];
14004
inputPush(ctxt, input);
14005
}
14006
14007
/**
14008
* xmlSAXUserParseFile:
14009
* @sax: a SAX handler
14010
* @user_data: The user data returned on SAX callbacks
14011
* @filename: a file name
14012
*
14013
* DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadFile.
14014
*
14015
* parse an XML file and call the given SAX handler routines.
14016
* Automatic support for ZLIB/Compress compressed document is provided
14017
*
14018
* Returns 0 in case of success or a error number otherwise
14019
*/
14020
int
14021
xmlSAXUserParseFile(xmlSAXHandlerPtr sax, void *user_data,
14022
const char *filename) {
14023
int ret = 0;
14024
xmlParserCtxtPtr ctxt;
14025
14026
ctxt = xmlCreateFileParserCtxt(filename);
14027
if (ctxt == NULL) return -1;
14028
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14029
xmlFree(ctxt->sax);
14030
ctxt->sax = sax;
14031
xmlDetectSAX2(ctxt);
14032
14033
if (user_data != NULL)
14034
ctxt->userData = user_data;
14035
14036
xmlParseDocument(ctxt);
14037
14038
if (ctxt->wellFormed)
14039
ret = 0;
14040
else {
14041
if (ctxt->errNo != 0)
14042
ret = ctxt->errNo;
14043
else
14044
ret = -1;
14045
}
14046
if (sax != NULL)
14047
ctxt->sax = NULL;
14048
if (ctxt->myDoc != NULL) {
14049
xmlFreeDoc(ctxt->myDoc);
14050
ctxt->myDoc = NULL;
14051
}
14052
xmlFreeParserCtxt(ctxt);
14053
14054
return ret;
14055
}
14056
#endif /* LIBXML_SAX1_ENABLED */
14057
14058
/************************************************************************
14059
* *
14060
* Front ends when parsing from memory *
14061
* *
14062
************************************************************************/
14063
14064
/**
14065
* xmlCreateMemoryParserCtxt:
14066
* @buffer: a pointer to a char array
14067
* @size: the size of the array
14068
*
14069
* Create a parser context for an XML in-memory document.
14070
*
14071
* Returns the new parser context or NULL
14072
*/
14073
xmlParserCtxtPtr
14074
xmlCreateMemoryParserCtxt(const char *buffer, int size) {
14075
xmlParserCtxtPtr ctxt;
14076
xmlParserInputPtr input;
14077
xmlParserInputBufferPtr buf;
14078
14079
if (buffer == NULL)
14080
return(NULL);
14081
if (size <= 0)
14082
return(NULL);
14083
14084
ctxt = xmlNewParserCtxt();
14085
if (ctxt == NULL)
14086
return(NULL);
14087
14088
buf = xmlParserInputBufferCreateMem(buffer, size, XML_CHAR_ENCODING_NONE);
14089
if (buf == NULL) {
14090
xmlFreeParserCtxt(ctxt);
14091
return(NULL);
14092
}
14093
14094
input = xmlNewInputStream(ctxt);
14095
if (input == NULL) {
14096
xmlFreeParserInputBuffer(buf);
14097
xmlFreeParserCtxt(ctxt);
14098
return(NULL);
14099
}
14100
14101
input->filename = NULL;
14102
input->buf = buf;
14103
xmlBufResetInput(input->buf->buffer, input);
14104
14105
inputPush(ctxt, input);
14106
return(ctxt);
14107
}
14108
14109
#ifdef LIBXML_SAX1_ENABLED
14110
/**
14111
* xmlSAXParseMemoryWithData:
14112
* @sax: the SAX handler block
14113
* @buffer: an pointer to a char array
14114
* @size: the size of the array
14115
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14116
* documents
14117
* @data: the userdata
14118
*
14119
* DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14120
*
14121
* parse an XML in-memory block and use the given SAX function block
14122
* to handle the parsing callback. If sax is NULL, fallback to the default
14123
* DOM tree building routines.
14124
*
14125
* User data (void *) is stored within the parser context in the
14126
* context's _private member, so it is available nearly everywhere in libxml
14127
*
14128
* Returns the resulting document tree
14129
*/
14130
14131
xmlDocPtr
14132
xmlSAXParseMemoryWithData(xmlSAXHandlerPtr sax, const char *buffer,
14133
int size, int recovery, void *data) {
14134
xmlDocPtr ret;
14135
xmlParserCtxtPtr ctxt;
14136
14137
xmlInitParser();
14138
14139
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14140
if (ctxt == NULL) return(NULL);
14141
if (sax != NULL) {
14142
if (ctxt->sax != NULL)
14143
xmlFree(ctxt->sax);
14144
ctxt->sax = sax;
14145
}
14146
xmlDetectSAX2(ctxt);
14147
if (data!=NULL) {
14148
ctxt->_private=data;
14149
}
14150
14151
ctxt->recovery = recovery;
14152
14153
xmlParseDocument(ctxt);
14154
14155
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14156
else {
14157
ret = NULL;
14158
xmlFreeDoc(ctxt->myDoc);
14159
ctxt->myDoc = NULL;
14160
}
14161
if (sax != NULL)
14162
ctxt->sax = NULL;
14163
xmlFreeParserCtxt(ctxt);
14164
14165
return(ret);
14166
}
14167
14168
/**
14169
* xmlSAXParseMemory:
14170
* @sax: the SAX handler block
14171
* @buffer: an pointer to a char array
14172
* @size: the size of the array
14173
* @recovery: work in recovery mode, i.e. tries to read not Well Formed
14174
* documents
14175
*
14176
* DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14177
*
14178
* parse an XML in-memory block and use the given SAX function block
14179
* to handle the parsing callback. If sax is NULL, fallback to the default
14180
* DOM tree building routines.
14181
*
14182
* Returns the resulting document tree
14183
*/
14184
xmlDocPtr
14185
xmlSAXParseMemory(xmlSAXHandlerPtr sax, const char *buffer,
14186
int size, int recovery) {
14187
return xmlSAXParseMemoryWithData(sax, buffer, size, recovery, NULL);
14188
}
14189
14190
/**
14191
* xmlParseMemory:
14192
* @buffer: an pointer to a char array
14193
* @size: the size of the array
14194
*
14195
* DEPRECATED: Use xmlReadMemory.
14196
*
14197
* parse an XML in-memory block and build a tree.
14198
*
14199
* Returns the resulting document tree
14200
*/
14201
14202
xmlDocPtr xmlParseMemory(const char *buffer, int size) {
14203
return(xmlSAXParseMemory(NULL, buffer, size, 0));
14204
}
14205
14206
/**
14207
* xmlRecoverMemory:
14208
* @buffer: an pointer to a char array
14209
* @size: the size of the array
14210
*
14211
* DEPRECATED: Use xmlReadMemory with XML_PARSE_RECOVER.
14212
*
14213
* parse an XML in-memory block and build a tree.
14214
* In the case the document is not Well Formed, an attempt to
14215
* build a tree is tried anyway
14216
*
14217
* Returns the resulting document tree or NULL in case of error
14218
*/
14219
14220
xmlDocPtr xmlRecoverMemory(const char *buffer, int size) {
14221
return(xmlSAXParseMemory(NULL, buffer, size, 1));
14222
}
14223
14224
/**
14225
* xmlSAXUserParseMemory:
14226
* @sax: a SAX handler
14227
* @user_data: The user data returned on SAX callbacks
14228
* @buffer: an in-memory XML document input
14229
* @size: the length of the XML document in bytes
14230
*
14231
* DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadMemory.
14232
*
14233
* parse an XML in-memory buffer and call the given SAX handler routines.
14234
*
14235
* Returns 0 in case of success or a error number otherwise
14236
*/
14237
int xmlSAXUserParseMemory(xmlSAXHandlerPtr sax, void *user_data,
14238
const char *buffer, int size) {
14239
int ret = 0;
14240
xmlParserCtxtPtr ctxt;
14241
14242
xmlInitParser();
14243
14244
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14245
if (ctxt == NULL) return -1;
14246
if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
14247
xmlFree(ctxt->sax);
14248
ctxt->sax = sax;
14249
xmlDetectSAX2(ctxt);
14250
14251
if (user_data != NULL)
14252
ctxt->userData = user_data;
14253
14254
xmlParseDocument(ctxt);
14255
14256
if (ctxt->wellFormed)
14257
ret = 0;
14258
else {
14259
if (ctxt->errNo != 0)
14260
ret = ctxt->errNo;
14261
else
14262
ret = -1;
14263
}
14264
if (sax != NULL)
14265
ctxt->sax = NULL;
14266
if (ctxt->myDoc != NULL) {
14267
xmlFreeDoc(ctxt->myDoc);
14268
ctxt->myDoc = NULL;
14269
}
14270
xmlFreeParserCtxt(ctxt);
14271
14272
return ret;
14273
}
14274
#endif /* LIBXML_SAX1_ENABLED */
14275
14276
/**
14277
* xmlCreateDocParserCtxt:
14278
* @str: a pointer to an array of xmlChar
14279
*
14280
* Creates a parser context for an XML in-memory document.
14281
*
14282
* Returns the new parser context or NULL
14283
*/
14284
xmlParserCtxtPtr
14285
xmlCreateDocParserCtxt(const xmlChar *str) {
14286
xmlParserCtxtPtr ctxt;
14287
xmlParserInputPtr input;
14288
xmlParserInputBufferPtr buf;
14289
14290
if (str == NULL)
14291
return(NULL);
14292
14293
ctxt = xmlNewParserCtxt();
14294
if (ctxt == NULL)
14295
return(NULL);
14296
14297
buf = xmlParserInputBufferCreateString(str);
14298
if (buf == NULL) {
14299
xmlFreeParserCtxt(ctxt);
14300
return(NULL);
14301
}
14302
14303
input = xmlNewInputStream(ctxt);
14304
if (input == NULL) {
14305
xmlFreeParserInputBuffer(buf);
14306
xmlFreeParserCtxt(ctxt);
14307
return(NULL);
14308
}
14309
14310
input->filename = NULL;
14311
input->buf = buf;
14312
xmlBufResetInput(input->buf->buffer, input);
14313
14314
inputPush(ctxt, input);
14315
return(ctxt);
14316
}
14317
14318
#ifdef LIBXML_SAX1_ENABLED
14319
/**
14320
* xmlSAXParseDoc:
14321
* @sax: the SAX handler block
14322
* @cur: a pointer to an array of xmlChar
14323
* @recovery: work in recovery mode, i.e. tries to read no Well Formed
14324
* documents
14325
*
14326
* DEPRECATED: Use xmlNewSAXParserCtxt and xmlCtxtReadDoc.
14327
*
14328
* parse an XML in-memory document and build a tree.
14329
* It use the given SAX function block to handle the parsing callback.
14330
* If sax is NULL, fallback to the default DOM tree building routines.
14331
*
14332
* Returns the resulting document tree
14333
*/
14334
14335
xmlDocPtr
14336
xmlSAXParseDoc(xmlSAXHandlerPtr sax, const xmlChar *cur, int recovery) {
14337
xmlDocPtr ret;
14338
xmlParserCtxtPtr ctxt;
14339
xmlSAXHandlerPtr oldsax = NULL;
14340
14341
if (cur == NULL) return(NULL);
14342
14343
14344
ctxt = xmlCreateDocParserCtxt(cur);
14345
if (ctxt == NULL) return(NULL);
14346
if (sax != NULL) {
14347
oldsax = ctxt->sax;
14348
ctxt->sax = sax;
14349
ctxt->userData = NULL;
14350
}
14351
xmlDetectSAX2(ctxt);
14352
14353
xmlParseDocument(ctxt);
14354
if ((ctxt->wellFormed) || recovery) ret = ctxt->myDoc;
14355
else {
14356
ret = NULL;
14357
xmlFreeDoc(ctxt->myDoc);
14358
ctxt->myDoc = NULL;
14359
}
14360
if (sax != NULL)
14361
ctxt->sax = oldsax;
14362
xmlFreeParserCtxt(ctxt);
14363
14364
return(ret);
14365
}
14366
14367
/**
14368
* xmlParseDoc:
14369
* @cur: a pointer to an array of xmlChar
14370
*
14371
* DEPRECATED: Use xmlReadDoc.
14372
*
14373
* parse an XML in-memory document and build a tree.
14374
*
14375
* Returns the resulting document tree
14376
*/
14377
14378
xmlDocPtr
14379
xmlParseDoc(const xmlChar *cur) {
14380
return(xmlSAXParseDoc(NULL, cur, 0));
14381
}
14382
#endif /* LIBXML_SAX1_ENABLED */
14383
14384
#ifdef LIBXML_LEGACY_ENABLED
14385
/************************************************************************
14386
* *
14387
* Specific function to keep track of entities references *
14388
* and used by the XSLT debugger *
14389
* *
14390
************************************************************************/
14391
14392
static xmlEntityReferenceFunc xmlEntityRefFunc = NULL;
14393
14394
/**
14395
* xmlAddEntityReference:
14396
* @ent : A valid entity
14397
* @firstNode : A valid first node for children of entity
14398
* @lastNode : A valid last node of children entity
14399
*
14400
* Notify of a reference to an entity of type XML_EXTERNAL_GENERAL_PARSED_ENTITY
14401
*/
14402
static void
14403
xmlAddEntityReference(xmlEntityPtr ent, xmlNodePtr firstNode,
14404
xmlNodePtr lastNode)
14405
{
14406
if (xmlEntityRefFunc != NULL) {
14407
(*xmlEntityRefFunc) (ent, firstNode, lastNode);
14408
}
14409
}
14410
14411
14412
/**
14413
* xmlSetEntityReferenceFunc:
14414
* @func: A valid function
14415
*
14416
* Set the function to call call back when a xml reference has been made
14417
*/
14418
void
14419
xmlSetEntityReferenceFunc(xmlEntityReferenceFunc func)
14420
{
14421
xmlEntityRefFunc = func;
14422
}
14423
#endif /* LIBXML_LEGACY_ENABLED */
14424
14425
/************************************************************************
14426
* *
14427
* New set (2.6.0) of simpler and more flexible APIs *
14428
* *
14429
************************************************************************/
14430
14431
/**
14432
* DICT_FREE:
14433
* @str: a string
14434
*
14435
* Free a string if it is not owned by the "dict" dictionary in the
14436
* current scope
14437
*/
14438
#define DICT_FREE(str) \
14439
if ((str) && ((!dict) || \
14440
(xmlDictOwns(dict, (const xmlChar *)(str)) == 0))) \
14441
xmlFree((char *)(str));
14442
14443
/**
14444
* xmlCtxtReset:
14445
* @ctxt: an XML parser context
14446
*
14447
* Reset a parser context
14448
*/
14449
void
14450
xmlCtxtReset(xmlParserCtxtPtr ctxt)
14451
{
14452
xmlParserInputPtr input;
14453
xmlDictPtr dict;
14454
14455
if (ctxt == NULL)
14456
return;
14457
14458
dict = ctxt->dict;
14459
14460
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
14461
xmlFreeInputStream(input);
14462
}
14463
ctxt->inputNr = 0;
14464
ctxt->input = NULL;
14465
14466
ctxt->spaceNr = 0;
14467
if (ctxt->spaceTab != NULL) {
14468
ctxt->spaceTab[0] = -1;
14469
ctxt->space = &ctxt->spaceTab[0];
14470
} else {
14471
ctxt->space = NULL;
14472
}
14473
14474
14475
ctxt->nodeNr = 0;
14476
ctxt->node = NULL;
14477
14478
ctxt->nameNr = 0;
14479
ctxt->name = NULL;
14480
14481
ctxt->nsNr = 0;
14482
xmlParserNsReset(ctxt->nsdb);
14483
14484
DICT_FREE(ctxt->version);
14485
ctxt->version = NULL;
14486
DICT_FREE(ctxt->encoding);
14487
ctxt->encoding = NULL;
14488
DICT_FREE(ctxt->directory);
14489
ctxt->directory = NULL;
14490
DICT_FREE(ctxt->extSubURI);
14491
ctxt->extSubURI = NULL;
14492
DICT_FREE(ctxt->extSubSystem);
14493
ctxt->extSubSystem = NULL;
14494
if (ctxt->myDoc != NULL)
14495
xmlFreeDoc(ctxt->myDoc);
14496
ctxt->myDoc = NULL;
14497
14498
ctxt->standalone = -1;
14499
ctxt->hasExternalSubset = 0;
14500
ctxt->hasPErefs = 0;
14501
ctxt->html = 0;
14502
ctxt->external = 0;
14503
ctxt->instate = XML_PARSER_START;
14504
ctxt->token = 0;
14505
14506
ctxt->wellFormed = 1;
14507
ctxt->nsWellFormed = 1;
14508
ctxt->disableSAX = 0;
14509
ctxt->valid = 1;
14510
#if 0
14511
ctxt->vctxt.userData = ctxt;
14512
ctxt->vctxt.error = xmlParserValidityError;
14513
ctxt->vctxt.warning = xmlParserValidityWarning;
14514
#endif
14515
ctxt->record_info = 0;
14516
ctxt->checkIndex = 0;
14517
ctxt->endCheckState = 0;
14518
ctxt->inSubset = 0;
14519
ctxt->errNo = XML_ERR_OK;
14520
ctxt->depth = 0;
14521
ctxt->catalogs = NULL;
14522
ctxt->sizeentities = 0;
14523
ctxt->sizeentcopy = 0;
14524
xmlInitNodeInfoSeq(&ctxt->node_seq);
14525
14526
if (ctxt->attsDefault != NULL) {
14527
xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
14528
ctxt->attsDefault = NULL;
14529
}
14530
if (ctxt->attsSpecial != NULL) {
14531
xmlHashFree(ctxt->attsSpecial, NULL);
14532
ctxt->attsSpecial = NULL;
14533
}
14534
14535
#ifdef LIBXML_CATALOG_ENABLED
14536
if (ctxt->catalogs != NULL)
14537
xmlCatalogFreeLocal(ctxt->catalogs);
14538
#endif
14539
ctxt->nbErrors = 0;
14540
ctxt->nbWarnings = 0;
14541
if (ctxt->lastError.code != XML_ERR_OK)
14542
xmlResetError(&ctxt->lastError);
14543
}
14544
14545
/**
14546
* xmlCtxtResetPush:
14547
* @ctxt: an XML parser context
14548
* @chunk: a pointer to an array of chars
14549
* @size: number of chars in the array
14550
* @filename: an optional file name or URI
14551
* @encoding: the document encoding, or NULL
14552
*
14553
* Reset a push parser context
14554
*
14555
* Returns 0 in case of success and 1 in case of error
14556
*/
14557
int
14558
xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
14559
int size, const char *filename, const char *encoding)
14560
{
14561
xmlParserInputPtr inputStream;
14562
xmlParserInputBufferPtr buf;
14563
14564
if (ctxt == NULL)
14565
return(1);
14566
14567
buf = xmlAllocParserInputBuffer(XML_CHAR_ENCODING_NONE);
14568
if (buf == NULL)
14569
return(1);
14570
14571
if (ctxt == NULL) {
14572
xmlFreeParserInputBuffer(buf);
14573
return(1);
14574
}
14575
14576
xmlCtxtReset(ctxt);
14577
14578
if (filename == NULL) {
14579
ctxt->directory = NULL;
14580
} else {
14581
ctxt->directory = xmlParserGetDirectory(filename);
14582
}
14583
14584
inputStream = xmlNewInputStream(ctxt);
14585
if (inputStream == NULL) {
14586
xmlFreeParserInputBuffer(buf);
14587
return(1);
14588
}
14589
14590
if (filename == NULL)
14591
inputStream->filename = NULL;
14592
else
14593
inputStream->filename = (char *)
14594
xmlCanonicPath((const xmlChar *) filename);
14595
inputStream->buf = buf;
14596
xmlBufResetInput(buf->buffer, inputStream);
14597
14598
inputPush(ctxt, inputStream);
14599
14600
if ((size > 0) && (chunk != NULL) && (ctxt->input != NULL) &&
14601
(ctxt->input->buf != NULL)) {
14602
size_t pos = ctxt->input->cur - ctxt->input->base;
14603
int res;
14604
14605
res = xmlParserInputBufferPush(ctxt->input->buf, size, chunk);
14606
xmlBufUpdateInput(ctxt->input->buf->buffer, ctxt->input, pos);
14607
if (res < 0) {
14608
xmlFatalErr(ctxt, ctxt->input->buf->error, NULL);
14609
xmlHaltParser(ctxt);
14610
return(1);
14611
}
14612
}
14613
14614
if (encoding != NULL) {
14615
xmlCharEncodingHandlerPtr hdlr;
14616
14617
hdlr = xmlFindCharEncodingHandler(encoding);
14618
if (hdlr != NULL) {
14619
xmlSwitchToEncoding(ctxt, hdlr);
14620
} else {
14621
xmlFatalErrMsgStr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
14622
"Unsupported encoding %s\n", BAD_CAST encoding);
14623
}
14624
}
14625
14626
return(0);
14627
}
14628
14629
14630
/**
14631
* xmlCtxtUseOptionsInternal:
14632
* @ctxt: an XML parser context
14633
* @options: a combination of xmlParserOption
14634
* @encoding: the user provided encoding to use
14635
*
14636
* Applies the options to the parser context
14637
*
14638
* Returns 0 in case of success, the set of unknown or unimplemented options
14639
* in case of error.
14640
*/
14641
static int
14642
xmlCtxtUseOptionsInternal(xmlParserCtxtPtr ctxt, int options)
14643
{
14644
if (ctxt == NULL)
14645
return(-1);
14646
if (options & XML_PARSE_RECOVER) {
14647
ctxt->recovery = 1;
14648
options -= XML_PARSE_RECOVER;
14649
ctxt->options |= XML_PARSE_RECOVER;
14650
} else
14651
ctxt->recovery = 0;
14652
if (options & XML_PARSE_DTDLOAD) {
14653
ctxt->loadsubset = XML_DETECT_IDS;
14654
options -= XML_PARSE_DTDLOAD;
14655
ctxt->options |= XML_PARSE_DTDLOAD;
14656
} else
14657
ctxt->loadsubset = 0;
14658
if (options & XML_PARSE_DTDATTR) {
14659
ctxt->loadsubset |= XML_COMPLETE_ATTRS;
14660
options -= XML_PARSE_DTDATTR;
14661
ctxt->options |= XML_PARSE_DTDATTR;
14662
}
14663
if (options & XML_PARSE_NOENT) {
14664
ctxt->replaceEntities = 1;
14665
/* ctxt->loadsubset |= XML_DETECT_IDS; */
14666
options -= XML_PARSE_NOENT;
14667
ctxt->options |= XML_PARSE_NOENT;
14668
} else
14669
ctxt->replaceEntities = 0;
14670
if (options & XML_PARSE_PEDANTIC) {
14671
ctxt->pedantic = 1;
14672
options -= XML_PARSE_PEDANTIC;
14673
ctxt->options |= XML_PARSE_PEDANTIC;
14674
} else
14675
ctxt->pedantic = 0;
14676
if (options & XML_PARSE_NOBLANKS) {
14677
ctxt->keepBlanks = 0;
14678
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
14679
options -= XML_PARSE_NOBLANKS;
14680
ctxt->options |= XML_PARSE_NOBLANKS;
14681
} else
14682
ctxt->keepBlanks = 1;
14683
if (options & XML_PARSE_DTDVALID) {
14684
ctxt->validate = 1;
14685
if (options & XML_PARSE_NOWARNING)
14686
ctxt->vctxt.warning = NULL;
14687
if (options & XML_PARSE_NOERROR)
14688
ctxt->vctxt.error = NULL;
14689
options -= XML_PARSE_DTDVALID;
14690
ctxt->options |= XML_PARSE_DTDVALID;
14691
} else
14692
ctxt->validate = 0;
14693
if (options & XML_PARSE_NOWARNING) {
14694
ctxt->sax->warning = NULL;
14695
options -= XML_PARSE_NOWARNING;
14696
}
14697
if (options & XML_PARSE_NOERROR) {
14698
ctxt->sax->error = NULL;
14699
ctxt->sax->fatalError = NULL;
14700
options -= XML_PARSE_NOERROR;
14701
}
14702
#ifdef LIBXML_SAX1_ENABLED
14703
if (options & XML_PARSE_SAX1) {
14704
ctxt->sax->startElementNs = NULL;
14705
ctxt->sax->endElementNs = NULL;
14706
ctxt->sax->initialized = 1;
14707
options -= XML_PARSE_SAX1;
14708
ctxt->options |= XML_PARSE_SAX1;
14709
}
14710
#endif /* LIBXML_SAX1_ENABLED */
14711
if (options & XML_PARSE_NODICT) {
14712
ctxt->dictNames = 0;
14713
options -= XML_PARSE_NODICT;
14714
ctxt->options |= XML_PARSE_NODICT;
14715
} else {
14716
ctxt->dictNames = 1;
14717
}
14718
if (options & XML_PARSE_NOCDATA) {
14719
ctxt->sax->cdataBlock = NULL;
14720
options -= XML_PARSE_NOCDATA;
14721
ctxt->options |= XML_PARSE_NOCDATA;
14722
}
14723
if (options & XML_PARSE_NSCLEAN) {
14724
ctxt->options |= XML_PARSE_NSCLEAN;
14725
options -= XML_PARSE_NSCLEAN;
14726
}
14727
if (options & XML_PARSE_NONET) {
14728
ctxt->options |= XML_PARSE_NONET;
14729
options -= XML_PARSE_NONET;
14730
}
14731
if (options & XML_PARSE_COMPACT) {
14732
ctxt->options |= XML_PARSE_COMPACT;
14733
options -= XML_PARSE_COMPACT;
14734
}
14735
if (options & XML_PARSE_OLD10) {
14736
ctxt->options |= XML_PARSE_OLD10;
14737
options -= XML_PARSE_OLD10;
14738
}
14739
if (options & XML_PARSE_NOBASEFIX) {
14740
ctxt->options |= XML_PARSE_NOBASEFIX;
14741
options -= XML_PARSE_NOBASEFIX;
14742
}
14743
if (options & XML_PARSE_HUGE) {
14744
ctxt->options |= XML_PARSE_HUGE;
14745
options -= XML_PARSE_HUGE;
14746
if (ctxt->dict != NULL)
14747
xmlDictSetLimit(ctxt->dict, 0);
14748
}
14749
if (options & XML_PARSE_OLDSAX) {
14750
ctxt->options |= XML_PARSE_OLDSAX;
14751
options -= XML_PARSE_OLDSAX;
14752
}
14753
if (options & XML_PARSE_IGNORE_ENC) {
14754
ctxt->options |= XML_PARSE_IGNORE_ENC;
14755
options -= XML_PARSE_IGNORE_ENC;
14756
}
14757
if (options & XML_PARSE_BIG_LINES) {
14758
ctxt->options |= XML_PARSE_BIG_LINES;
14759
options -= XML_PARSE_BIG_LINES;
14760
}
14761
ctxt->linenumbers = 1;
14762
return (options);
14763
}
14764
14765
/**
14766
* xmlCtxtUseOptions:
14767
* @ctxt: an XML parser context
14768
* @options: a combination of xmlParserOption
14769
*
14770
* Applies the options to the parser context
14771
*
14772
* Returns 0 in case of success, the set of unknown or unimplemented options
14773
* in case of error.
14774
*/
14775
int
14776
xmlCtxtUseOptions(xmlParserCtxtPtr ctxt, int options)
14777
{
14778
return(xmlCtxtUseOptionsInternal(ctxt, options));
14779
}
14780
14781
/**
14782
* xmlCtxtSetMaxAmplification:
14783
* @ctxt: an XML parser context
14784
* @maxAmpl: maximum amplification factor
14785
*
14786
* To protect against exponential entity expansion ("billion laughs"), the
14787
* size of serialized output is (roughly) limited to the input size
14788
* multiplied by this factor. The default value is 5.
14789
*
14790
* When working with documents making heavy use of entity expansion, it can
14791
* be necessary to increase the value. For security reasons, this should only
14792
* be considered when processing trusted input.
14793
*/
14794
void
14795
xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, unsigned maxAmpl)
14796
{
14797
ctxt->maxAmpl = maxAmpl;
14798
}
14799
14800
/**
14801
* xmlDoRead:
14802
* @ctxt: an XML parser context
14803
* @URL: the base URL to use for the document
14804
* @encoding: the document encoding, or NULL
14805
* @options: a combination of xmlParserOption
14806
* @reuse: keep the context for reuse
14807
*
14808
* Common front-end for the xmlRead functions
14809
*
14810
* Returns the resulting document tree or NULL
14811
*/
14812
static xmlDocPtr
14813
xmlDoRead(xmlParserCtxtPtr ctxt, const char *URL, const char *encoding,
14814
int options, int reuse)
14815
{
14816
xmlDocPtr ret;
14817
14818
xmlCtxtUseOptionsInternal(ctxt, options);
14819
if (encoding != NULL) {
14820
xmlCharEncodingHandlerPtr hdlr;
14821
14822
/*
14823
* TODO: We should consider to set XML_PARSE_IGNORE_ENC if the
14824
* caller provided an encoding. Otherwise, we might switch to
14825
* the encoding from the XML declaration which is likely to
14826
* break things. Also see xmlSwitchInputEncoding.
14827
*/
14828
hdlr = xmlFindCharEncodingHandler(encoding);
14829
if (hdlr != NULL)
14830
xmlSwitchToEncoding(ctxt, hdlr);
14831
}
14832
if ((URL != NULL) && (ctxt->input != NULL) &&
14833
(ctxt->input->filename == NULL))
14834
ctxt->input->filename = (char *) xmlStrdup((const xmlChar *) URL);
14835
xmlParseDocument(ctxt);
14836
if ((ctxt->wellFormed) || ctxt->recovery)
14837
ret = ctxt->myDoc;
14838
else {
14839
ret = NULL;
14840
if (ctxt->myDoc != NULL) {
14841
xmlFreeDoc(ctxt->myDoc);
14842
}
14843
}
14844
ctxt->myDoc = NULL;
14845
if (!reuse) {
14846
xmlFreeParserCtxt(ctxt);
14847
}
14848
14849
return (ret);
14850
}
14851
14852
/**
14853
* xmlReadDoc:
14854
* @cur: a pointer to a zero terminated string
14855
* @URL: the base URL to use for the document
14856
* @encoding: the document encoding, or NULL
14857
* @options: a combination of xmlParserOption
14858
*
14859
* parse an XML in-memory document and build a tree.
14860
*
14861
* Returns the resulting document tree
14862
*/
14863
xmlDocPtr
14864
xmlReadDoc(const xmlChar * cur, const char *URL, const char *encoding, int options)
14865
{
14866
xmlParserCtxtPtr ctxt;
14867
14868
if (cur == NULL)
14869
return (NULL);
14870
xmlInitParser();
14871
14872
ctxt = xmlCreateDocParserCtxt(cur);
14873
if (ctxt == NULL)
14874
return (NULL);
14875
return (xmlDoRead(ctxt, URL, encoding, options, 0));
14876
}
14877
14878
/**
14879
* xmlReadFile:
14880
* @filename: a file or URL
14881
* @encoding: the document encoding, or NULL
14882
* @options: a combination of xmlParserOption
14883
*
14884
* parse an XML file from the filesystem or the network.
14885
*
14886
* Returns the resulting document tree
14887
*/
14888
xmlDocPtr
14889
xmlReadFile(const char *filename, const char *encoding, int options)
14890
{
14891
xmlParserCtxtPtr ctxt;
14892
14893
xmlInitParser();
14894
ctxt = xmlCreateURLParserCtxt(filename, options);
14895
if (ctxt == NULL)
14896
return (NULL);
14897
return (xmlDoRead(ctxt, NULL, encoding, options, 0));
14898
}
14899
14900
/**
14901
* xmlReadMemory:
14902
* @buffer: a pointer to a char array
14903
* @size: the size of the array
14904
* @URL: the base URL to use for the document
14905
* @encoding: the document encoding, or NULL
14906
* @options: a combination of xmlParserOption
14907
*
14908
* parse an XML in-memory document and build a tree.
14909
*
14910
* Returns the resulting document tree
14911
*/
14912
xmlDocPtr
14913
xmlReadMemory(const char *buffer, int size, const char *URL, const char *encoding, int options)
14914
{
14915
xmlParserCtxtPtr ctxt;
14916
14917
xmlInitParser();
14918
ctxt = xmlCreateMemoryParserCtxt(buffer, size);
14919
if (ctxt == NULL)
14920
return (NULL);
14921
return (xmlDoRead(ctxt, URL, encoding, options, 0));
14922
}
14923
14924
/**
14925
* xmlReadFd:
14926
* @fd: an open file descriptor
14927
* @URL: the base URL to use for the document
14928
* @encoding: the document encoding, or NULL
14929
* @options: a combination of xmlParserOption
14930
*
14931
* parse an XML from a file descriptor and build a tree.
14932
* NOTE that the file descriptor will not be closed when the
14933
* reader is closed or reset.
14934
*
14935
* Returns the resulting document tree
14936
*/
14937
xmlDocPtr
14938
xmlReadFd(int fd, const char *URL, const char *encoding, int options)
14939
{
14940
xmlParserCtxtPtr ctxt;
14941
xmlParserInputBufferPtr input;
14942
xmlParserInputPtr stream;
14943
14944
if (fd < 0)
14945
return (NULL);
14946
xmlInitParser();
14947
14948
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
14949
if (input == NULL)
14950
return (NULL);
14951
input->closecallback = NULL;
14952
ctxt = xmlNewParserCtxt();
14953
if (ctxt == NULL) {
14954
xmlFreeParserInputBuffer(input);
14955
return (NULL);
14956
}
14957
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
14958
if (stream == NULL) {
14959
xmlFreeParserInputBuffer(input);
14960
xmlFreeParserCtxt(ctxt);
14961
return (NULL);
14962
}
14963
inputPush(ctxt, stream);
14964
return (xmlDoRead(ctxt, URL, encoding, options, 0));
14965
}
14966
14967
/**
14968
* xmlReadIO:
14969
* @ioread: an I/O read function
14970
* @ioclose: an I/O close function
14971
* @ioctx: an I/O handler
14972
* @URL: the base URL to use for the document
14973
* @encoding: the document encoding, or NULL
14974
* @options: a combination of xmlParserOption
14975
*
14976
* parse an XML document from I/O functions and source and build a tree.
14977
*
14978
* Returns the resulting document tree
14979
*/
14980
xmlDocPtr
14981
xmlReadIO(xmlInputReadCallback ioread, xmlInputCloseCallback ioclose,
14982
void *ioctx, const char *URL, const char *encoding, int options)
14983
{
14984
xmlParserCtxtPtr ctxt;
14985
xmlParserInputBufferPtr input;
14986
xmlParserInputPtr stream;
14987
14988
if (ioread == NULL)
14989
return (NULL);
14990
xmlInitParser();
14991
14992
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
14993
XML_CHAR_ENCODING_NONE);
14994
if (input == NULL) {
14995
if (ioclose != NULL)
14996
ioclose(ioctx);
14997
return (NULL);
14998
}
14999
ctxt = xmlNewParserCtxt();
15000
if (ctxt == NULL) {
15001
xmlFreeParserInputBuffer(input);
15002
return (NULL);
15003
}
15004
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15005
if (stream == NULL) {
15006
xmlFreeParserInputBuffer(input);
15007
xmlFreeParserCtxt(ctxt);
15008
return (NULL);
15009
}
15010
inputPush(ctxt, stream);
15011
return (xmlDoRead(ctxt, URL, encoding, options, 0));
15012
}
15013
15014
/**
15015
* xmlCtxtReadDoc:
15016
* @ctxt: an XML parser context
15017
* @str: a pointer to a zero terminated string
15018
* @URL: the base URL to use for the document
15019
* @encoding: the document encoding, or NULL
15020
* @options: a combination of xmlParserOption
15021
*
15022
* parse an XML in-memory document and build a tree.
15023
* This reuses the existing @ctxt parser context
15024
*
15025
* Returns the resulting document tree
15026
*/
15027
xmlDocPtr
15028
xmlCtxtReadDoc(xmlParserCtxtPtr ctxt, const xmlChar *str,
15029
const char *URL, const char *encoding, int options)
15030
{
15031
xmlParserInputBufferPtr input;
15032
xmlParserInputPtr stream;
15033
15034
if (ctxt == NULL)
15035
return (NULL);
15036
if (str == NULL)
15037
return (NULL);
15038
xmlInitParser();
15039
15040
xmlCtxtReset(ctxt);
15041
15042
input = xmlParserInputBufferCreateString(str);
15043
if (input == NULL) {
15044
return(NULL);
15045
}
15046
15047
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15048
if (stream == NULL) {
15049
xmlFreeParserInputBuffer(input);
15050
return(NULL);
15051
}
15052
15053
inputPush(ctxt, stream);
15054
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15055
}
15056
15057
/**
15058
* xmlCtxtReadFile:
15059
* @ctxt: an XML parser context
15060
* @filename: a file or URL
15061
* @encoding: the document encoding, or NULL
15062
* @options: a combination of xmlParserOption
15063
*
15064
* parse an XML file from the filesystem or the network.
15065
* This reuses the existing @ctxt parser context
15066
*
15067
* Returns the resulting document tree
15068
*/
15069
xmlDocPtr
15070
xmlCtxtReadFile(xmlParserCtxtPtr ctxt, const char *filename,
15071
const char *encoding, int options)
15072
{
15073
xmlParserInputPtr stream;
15074
15075
if (filename == NULL)
15076
return (NULL);
15077
if (ctxt == NULL)
15078
return (NULL);
15079
xmlInitParser();
15080
15081
xmlCtxtReset(ctxt);
15082
15083
stream = xmlLoadExternalEntity(filename, NULL, ctxt);
15084
if (stream == NULL) {
15085
return (NULL);
15086
}
15087
inputPush(ctxt, stream);
15088
return (xmlDoRead(ctxt, NULL, encoding, options, 1));
15089
}
15090
15091
/**
15092
* xmlCtxtReadMemory:
15093
* @ctxt: an XML parser context
15094
* @buffer: a pointer to a char array
15095
* @size: the size of the array
15096
* @URL: the base URL to use for the document
15097
* @encoding: the document encoding, or NULL
15098
* @options: a combination of xmlParserOption
15099
*
15100
* parse an XML in-memory document and build a tree.
15101
* This reuses the existing @ctxt parser context
15102
*
15103
* Returns the resulting document tree
15104
*/
15105
xmlDocPtr
15106
xmlCtxtReadMemory(xmlParserCtxtPtr ctxt, const char *buffer, int size,
15107
const char *URL, const char *encoding, int options)
15108
{
15109
xmlParserInputBufferPtr input;
15110
xmlParserInputPtr stream;
15111
15112
if (ctxt == NULL)
15113
return (NULL);
15114
if (buffer == NULL)
15115
return (NULL);
15116
xmlInitParser();
15117
15118
xmlCtxtReset(ctxt);
15119
15120
input = xmlParserInputBufferCreateStatic(buffer, size,
15121
XML_CHAR_ENCODING_NONE);
15122
if (input == NULL) {
15123
return(NULL);
15124
}
15125
15126
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15127
if (stream == NULL) {
15128
xmlFreeParserInputBuffer(input);
15129
return(NULL);
15130
}
15131
15132
inputPush(ctxt, stream);
15133
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15134
}
15135
15136
/**
15137
* xmlCtxtReadFd:
15138
* @ctxt: an XML parser context
15139
* @fd: an open file descriptor
15140
* @URL: the base URL to use for the document
15141
* @encoding: the document encoding, or NULL
15142
* @options: a combination of xmlParserOption
15143
*
15144
* parse an XML from a file descriptor and build a tree.
15145
* This reuses the existing @ctxt parser context
15146
* NOTE that the file descriptor will not be closed when the
15147
* reader is closed or reset.
15148
*
15149
* Returns the resulting document tree
15150
*/
15151
xmlDocPtr
15152
xmlCtxtReadFd(xmlParserCtxtPtr ctxt, int fd,
15153
const char *URL, const char *encoding, int options)
15154
{
15155
xmlParserInputBufferPtr input;
15156
xmlParserInputPtr stream;
15157
15158
if (fd < 0)
15159
return (NULL);
15160
if (ctxt == NULL)
15161
return (NULL);
15162
xmlInitParser();
15163
15164
xmlCtxtReset(ctxt);
15165
15166
15167
input = xmlParserInputBufferCreateFd(fd, XML_CHAR_ENCODING_NONE);
15168
if (input == NULL)
15169
return (NULL);
15170
input->closecallback = NULL;
15171
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15172
if (stream == NULL) {
15173
xmlFreeParserInputBuffer(input);
15174
return (NULL);
15175
}
15176
inputPush(ctxt, stream);
15177
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15178
}
15179
15180
/**
15181
* xmlCtxtReadIO:
15182
* @ctxt: an XML parser context
15183
* @ioread: an I/O read function
15184
* @ioclose: an I/O close function
15185
* @ioctx: an I/O handler
15186
* @URL: the base URL to use for the document
15187
* @encoding: the document encoding, or NULL
15188
* @options: a combination of xmlParserOption
15189
*
15190
* parse an XML document from I/O functions and source and build a tree.
15191
* This reuses the existing @ctxt parser context
15192
*
15193
* Returns the resulting document tree
15194
*/
15195
xmlDocPtr
15196
xmlCtxtReadIO(xmlParserCtxtPtr ctxt, xmlInputReadCallback ioread,
15197
xmlInputCloseCallback ioclose, void *ioctx,
15198
const char *URL,
15199
const char *encoding, int options)
15200
{
15201
xmlParserInputBufferPtr input;
15202
xmlParserInputPtr stream;
15203
15204
if (ioread == NULL)
15205
return (NULL);
15206
if (ctxt == NULL)
15207
return (NULL);
15208
xmlInitParser();
15209
15210
xmlCtxtReset(ctxt);
15211
15212
input = xmlParserInputBufferCreateIO(ioread, ioclose, ioctx,
15213
XML_CHAR_ENCODING_NONE);
15214
if (input == NULL) {
15215
if (ioclose != NULL)
15216
ioclose(ioctx);
15217
return (NULL);
15218
}
15219
stream = xmlNewIOInputStream(ctxt, input, XML_CHAR_ENCODING_NONE);
15220
if (stream == NULL) {
15221
xmlFreeParserInputBuffer(input);
15222
return (NULL);
15223
}
15224
inputPush(ctxt, stream);
15225
return (xmlDoRead(ctxt, URL, encoding, options, 1));
15226
}
15227
15228