Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
wine-mirror
GitHub Repository: wine-mirror/wine
Path: blob/master/libs/xml2/parserInternals.c
4389 views
1
/*
2
* parserInternals.c : Internal routines (and obsolete ones) needed for the
3
* XML and HTML parsers.
4
*
5
* See Copyright for the status of this software.
6
*
7
* [email protected]
8
*/
9
10
#define IN_LIBXML
11
#include "libxml.h"
12
13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18
19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22
23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/entities.h>
28
#include <libxml/xmlerror.h>
29
#include <libxml/encoding.h>
30
#include <libxml/xmlIO.h>
31
#include <libxml/uri.h>
32
#include <libxml/dict.h>
33
#include <libxml/xmlsave.h>
34
#ifdef LIBXML_CATALOG_ENABLED
35
#include <libxml/catalog.h>
36
#endif
37
#include <libxml/chvalid.h>
38
39
#define CUR(ctxt) ctxt->input->cur
40
#define END(ctxt) ctxt->input->end
41
42
#include "private/buf.h"
43
#include "private/enc.h"
44
#include "private/error.h"
45
#include "private/io.h"
46
#include "private/parser.h"
47
48
/*
49
* XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
50
* factor of serialized output after entity expansion.
51
*/
52
#define XML_MAX_AMPLIFICATION_DEFAULT 5
53
54
/*
55
* Various global defaults for parsing
56
*/
57
58
/**
59
* xmlCheckVersion:
60
* @version: the include version number
61
*
62
* check the compiled lib version against the include one.
63
* This can warn or immediately kill the application
64
*/
65
void
66
xmlCheckVersion(int version) {
67
int myversion = LIBXML_VERSION;
68
69
xmlInitParser();
70
71
if ((myversion / 10000) != (version / 10000)) {
72
xmlGenericError(xmlGenericErrorContext,
73
"Fatal: program compiled against libxml %d using libxml %d\n",
74
(version / 10000), (myversion / 10000));
75
fprintf(stderr,
76
"Fatal: program compiled against libxml %d using libxml %d\n",
77
(version / 10000), (myversion / 10000));
78
}
79
if ((myversion / 100) < (version / 100)) {
80
xmlGenericError(xmlGenericErrorContext,
81
"Warning: program compiled against libxml %d using older %d\n",
82
(version / 100), (myversion / 100));
83
}
84
}
85
86
87
/************************************************************************
88
* *
89
* Some factorized error routines *
90
* *
91
************************************************************************/
92
93
94
/**
95
* xmlErrMemory:
96
* @ctxt: an XML parser context
97
* @extra: extra information
98
*
99
* Handle a redefinition of attribute error
100
*/
101
void
102
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
103
{
104
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
105
(ctxt->instate == XML_PARSER_EOF))
106
return;
107
if (ctxt != NULL) {
108
ctxt->errNo = XML_ERR_NO_MEMORY;
109
ctxt->instate = XML_PARSER_EOF;
110
ctxt->disableSAX = 1;
111
}
112
if (extra)
113
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
114
XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
115
NULL, NULL, 0, 0,
116
"Memory allocation failed : %s\n", extra);
117
else
118
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
119
XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
120
NULL, NULL, 0, 0, "Memory allocation failed\n");
121
}
122
123
/**
124
* __xmlErrEncoding:
125
* @ctxt: an XML parser context
126
* @xmlerr: the error number
127
* @msg: the error message
128
* @str1: an string info
129
* @str2: an string info
130
*
131
* Handle an encoding error
132
*/
133
void
134
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
135
const char *msg, const xmlChar * str1, const xmlChar * str2)
136
{
137
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
138
(ctxt->instate == XML_PARSER_EOF))
139
return;
140
if (ctxt != NULL)
141
ctxt->errNo = xmlerr;
142
__xmlRaiseError(NULL, NULL, NULL,
143
ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
144
NULL, 0, (const char *) str1, (const char *) str2,
145
NULL, 0, 0, msg, str1, str2);
146
if (ctxt != NULL) {
147
ctxt->wellFormed = 0;
148
if (ctxt->recovery == 0)
149
ctxt->disableSAX = 1;
150
}
151
}
152
153
/**
154
* xmlErrInternal:
155
* @ctxt: an XML parser context
156
* @msg: the error message
157
* @str: error information
158
*
159
* Handle an internal error
160
*/
161
static void LIBXML_ATTR_FORMAT(2,0)
162
xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
163
{
164
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
165
(ctxt->instate == XML_PARSER_EOF))
166
return;
167
if (ctxt != NULL)
168
ctxt->errNo = XML_ERR_INTERNAL_ERROR;
169
__xmlRaiseError(NULL, NULL, NULL,
170
ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
171
XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
172
0, 0, msg, str);
173
if (ctxt != NULL) {
174
ctxt->wellFormed = 0;
175
if (ctxt->recovery == 0)
176
ctxt->disableSAX = 1;
177
}
178
}
179
180
/**
181
* xmlFatalErr:
182
* @ctxt: an XML parser context
183
* @error: the error number
184
* @info: extra information string
185
*
186
* Handle a fatal parser error, i.e. violating Well-Formedness constraints
187
*/
188
void
189
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
190
{
191
const char *errmsg;
192
193
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
194
(ctxt->instate == XML_PARSER_EOF))
195
return;
196
switch (error) {
197
case XML_ERR_INVALID_HEX_CHARREF:
198
errmsg = "CharRef: invalid hexadecimal value";
199
break;
200
case XML_ERR_INVALID_DEC_CHARREF:
201
errmsg = "CharRef: invalid decimal value";
202
break;
203
case XML_ERR_INVALID_CHARREF:
204
errmsg = "CharRef: invalid value";
205
break;
206
case XML_ERR_INTERNAL_ERROR:
207
errmsg = "internal error";
208
break;
209
case XML_ERR_PEREF_AT_EOF:
210
errmsg = "PEReference at end of document";
211
break;
212
case XML_ERR_PEREF_IN_PROLOG:
213
errmsg = "PEReference in prolog";
214
break;
215
case XML_ERR_PEREF_IN_EPILOG:
216
errmsg = "PEReference in epilog";
217
break;
218
case XML_ERR_PEREF_NO_NAME:
219
errmsg = "PEReference: no name";
220
break;
221
case XML_ERR_PEREF_SEMICOL_MISSING:
222
errmsg = "PEReference: expecting ';'";
223
break;
224
case XML_ERR_ENTITY_LOOP:
225
errmsg = "Detected an entity reference loop";
226
break;
227
case XML_ERR_ENTITY_NOT_STARTED:
228
errmsg = "EntityValue: \" or ' expected";
229
break;
230
case XML_ERR_ENTITY_PE_INTERNAL:
231
errmsg = "PEReferences forbidden in internal subset";
232
break;
233
case XML_ERR_ENTITY_NOT_FINISHED:
234
errmsg = "EntityValue: \" or ' expected";
235
break;
236
case XML_ERR_ATTRIBUTE_NOT_STARTED:
237
errmsg = "AttValue: \" or ' expected";
238
break;
239
case XML_ERR_LT_IN_ATTRIBUTE:
240
errmsg = "Unescaped '<' not allowed in attributes values";
241
break;
242
case XML_ERR_LITERAL_NOT_STARTED:
243
errmsg = "SystemLiteral \" or ' expected";
244
break;
245
case XML_ERR_LITERAL_NOT_FINISHED:
246
errmsg = "Unfinished System or Public ID \" or ' expected";
247
break;
248
case XML_ERR_MISPLACED_CDATA_END:
249
errmsg = "Sequence ']]>' not allowed in content";
250
break;
251
case XML_ERR_URI_REQUIRED:
252
errmsg = "SYSTEM or PUBLIC, the URI is missing";
253
break;
254
case XML_ERR_PUBID_REQUIRED:
255
errmsg = "PUBLIC, the Public Identifier is missing";
256
break;
257
case XML_ERR_HYPHEN_IN_COMMENT:
258
errmsg = "Comment must not contain '--' (double-hyphen)";
259
break;
260
case XML_ERR_PI_NOT_STARTED:
261
errmsg = "xmlParsePI : no target name";
262
break;
263
case XML_ERR_RESERVED_XML_NAME:
264
errmsg = "Invalid PI name";
265
break;
266
case XML_ERR_NOTATION_NOT_STARTED:
267
errmsg = "NOTATION: Name expected here";
268
break;
269
case XML_ERR_NOTATION_NOT_FINISHED:
270
errmsg = "'>' required to close NOTATION declaration";
271
break;
272
case XML_ERR_VALUE_REQUIRED:
273
errmsg = "Entity value required";
274
break;
275
case XML_ERR_URI_FRAGMENT:
276
errmsg = "Fragment not allowed";
277
break;
278
case XML_ERR_ATTLIST_NOT_STARTED:
279
errmsg = "'(' required to start ATTLIST enumeration";
280
break;
281
case XML_ERR_NMTOKEN_REQUIRED:
282
errmsg = "NmToken expected in ATTLIST enumeration";
283
break;
284
case XML_ERR_ATTLIST_NOT_FINISHED:
285
errmsg = "')' required to finish ATTLIST enumeration";
286
break;
287
case XML_ERR_MIXED_NOT_STARTED:
288
errmsg = "MixedContentDecl : '|' or ')*' expected";
289
break;
290
case XML_ERR_PCDATA_REQUIRED:
291
errmsg = "MixedContentDecl : '#PCDATA' expected";
292
break;
293
case XML_ERR_ELEMCONTENT_NOT_STARTED:
294
errmsg = "ContentDecl : Name or '(' expected";
295
break;
296
case XML_ERR_ELEMCONTENT_NOT_FINISHED:
297
errmsg = "ContentDecl : ',' '|' or ')' expected";
298
break;
299
case XML_ERR_PEREF_IN_INT_SUBSET:
300
errmsg =
301
"PEReference: forbidden within markup decl in internal subset";
302
break;
303
case XML_ERR_GT_REQUIRED:
304
errmsg = "expected '>'";
305
break;
306
case XML_ERR_CONDSEC_INVALID:
307
errmsg = "XML conditional section '[' expected";
308
break;
309
case XML_ERR_EXT_SUBSET_NOT_FINISHED:
310
errmsg = "Content error in the external subset";
311
break;
312
case XML_ERR_CONDSEC_INVALID_KEYWORD:
313
errmsg =
314
"conditional section INCLUDE or IGNORE keyword expected";
315
break;
316
case XML_ERR_CONDSEC_NOT_FINISHED:
317
errmsg = "XML conditional section not closed";
318
break;
319
case XML_ERR_XMLDECL_NOT_STARTED:
320
errmsg = "Text declaration '<?xml' required";
321
break;
322
case XML_ERR_XMLDECL_NOT_FINISHED:
323
errmsg = "parsing XML declaration: '?>' expected";
324
break;
325
case XML_ERR_EXT_ENTITY_STANDALONE:
326
errmsg = "external parsed entities cannot be standalone";
327
break;
328
case XML_ERR_ENTITYREF_SEMICOL_MISSING:
329
errmsg = "EntityRef: expecting ';'";
330
break;
331
case XML_ERR_DOCTYPE_NOT_FINISHED:
332
errmsg = "DOCTYPE improperly terminated";
333
break;
334
case XML_ERR_LTSLASH_REQUIRED:
335
errmsg = "EndTag: '</' not found";
336
break;
337
case XML_ERR_EQUAL_REQUIRED:
338
errmsg = "expected '='";
339
break;
340
case XML_ERR_STRING_NOT_CLOSED:
341
errmsg = "String not closed expecting \" or '";
342
break;
343
case XML_ERR_STRING_NOT_STARTED:
344
errmsg = "String not started expecting ' or \"";
345
break;
346
case XML_ERR_ENCODING_NAME:
347
errmsg = "Invalid XML encoding name";
348
break;
349
case XML_ERR_STANDALONE_VALUE:
350
errmsg = "standalone accepts only 'yes' or 'no'";
351
break;
352
case XML_ERR_DOCUMENT_EMPTY:
353
errmsg = "Document is empty";
354
break;
355
case XML_ERR_DOCUMENT_END:
356
errmsg = "Extra content at the end of the document";
357
break;
358
case XML_ERR_NOT_WELL_BALANCED:
359
errmsg = "chunk is not well balanced";
360
break;
361
case XML_ERR_EXTRA_CONTENT:
362
errmsg = "extra content at the end of well balanced chunk";
363
break;
364
case XML_ERR_VERSION_MISSING:
365
errmsg = "Malformed declaration expecting version";
366
break;
367
case XML_ERR_NAME_TOO_LONG:
368
errmsg = "Name too long";
369
break;
370
case XML_ERR_INVALID_ENCODING:
371
errmsg = "Invalid bytes in character encoding";
372
break;
373
case XML_IO_UNKNOWN:
374
errmsg = "I/O error";
375
break;
376
#if 0
377
case:
378
errmsg = "";
379
break;
380
#endif
381
default:
382
errmsg = "Unregistered error message";
383
}
384
if (ctxt != NULL)
385
ctxt->errNo = error;
386
if (info == NULL) {
387
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
388
XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
389
errmsg);
390
} else {
391
__xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
392
XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
393
errmsg, info);
394
}
395
if (ctxt != NULL) {
396
ctxt->wellFormed = 0;
397
if (ctxt->recovery == 0)
398
ctxt->disableSAX = 1;
399
}
400
}
401
402
/**
403
* xmlErrEncodingInt:
404
* @ctxt: an XML parser context
405
* @error: the error number
406
* @msg: the error message
407
* @val: an integer value
408
*
409
* n encoding error
410
*/
411
static void LIBXML_ATTR_FORMAT(3,0)
412
xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
413
const char *msg, int val)
414
{
415
if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
416
(ctxt->instate == XML_PARSER_EOF))
417
return;
418
if (ctxt != NULL)
419
ctxt->errNo = error;
420
__xmlRaiseError(NULL, NULL, NULL,
421
ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
422
NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
423
if (ctxt != NULL) {
424
ctxt->wellFormed = 0;
425
if (ctxt->recovery == 0)
426
ctxt->disableSAX = 1;
427
}
428
}
429
430
/**
431
* xmlIsLetter:
432
* @c: an unicode character (int)
433
*
434
* Check whether the character is allowed by the production
435
* [84] Letter ::= BaseChar | Ideographic
436
*
437
* Returns 0 if not, non-zero otherwise
438
*/
439
int
440
xmlIsLetter(int c) {
441
return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
442
}
443
444
/************************************************************************
445
* *
446
* Input handling functions for progressive parsing *
447
* *
448
************************************************************************/
449
450
/* we need to keep enough input to show errors in context */
451
#define LINE_LEN 80
452
453
/**
454
* xmlHaltParser:
455
* @ctxt: an XML parser context
456
*
457
* Blocks further parser processing don't override error
458
* for internal use
459
*/
460
void
461
xmlHaltParser(xmlParserCtxtPtr ctxt) {
462
if (ctxt == NULL)
463
return;
464
ctxt->instate = XML_PARSER_EOF;
465
ctxt->disableSAX = 1;
466
while (ctxt->inputNr > 1)
467
xmlFreeInputStream(inputPop(ctxt));
468
if (ctxt->input != NULL) {
469
/*
470
* in case there was a specific allocation deallocate before
471
* overriding base
472
*/
473
if (ctxt->input->free != NULL) {
474
ctxt->input->free((xmlChar *) ctxt->input->base);
475
ctxt->input->free = NULL;
476
}
477
if (ctxt->input->buf != NULL) {
478
xmlFreeParserInputBuffer(ctxt->input->buf);
479
ctxt->input->buf = NULL;
480
}
481
ctxt->input->cur = BAD_CAST"";
482
ctxt->input->length = 0;
483
ctxt->input->base = ctxt->input->cur;
484
ctxt->input->end = ctxt->input->cur;
485
}
486
}
487
488
/**
489
* xmlParserInputRead:
490
* @in: an XML parser input
491
* @len: an indicative size for the lookahead
492
*
493
* DEPRECATED: This function was internal and is deprecated.
494
*
495
* Returns -1 as this is an error to use it.
496
*/
497
int
498
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
499
return(-1);
500
}
501
502
/**
503
* xmlParserGrow:
504
* @ctxt: an XML parser context
505
*
506
* Grow the input buffer.
507
*
508
* Returns the number of bytes read or -1 in case of error.
509
*/
510
int
511
xmlParserGrow(xmlParserCtxtPtr ctxt) {
512
xmlParserInputPtr in = ctxt->input;
513
xmlParserInputBufferPtr buf = in->buf;
514
ptrdiff_t curEnd = in->end - in->cur;
515
ptrdiff_t curBase = in->cur - in->base;
516
int ret;
517
518
if (buf == NULL)
519
return(0);
520
/* Don't grow push parser buffer. */
521
if ((ctxt->progressive) && (ctxt->inputNr <= 1))
522
return(0);
523
/* Don't grow memory buffers. */
524
if ((buf->encoder == NULL) && (buf->readcallback == NULL))
525
return(0);
526
if (buf->error != 0)
527
return(-1);
528
529
if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
530
(curBase > XML_MAX_LOOKUP_LIMIT)) &&
531
((ctxt->options & XML_PARSE_HUGE) == 0)) {
532
xmlErrMemory(ctxt, "Huge input lookup");
533
xmlHaltParser(ctxt);
534
return(-1);
535
}
536
537
if (curEnd >= INPUT_CHUNK)
538
return(0);
539
540
ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
541
xmlBufUpdateInput(buf->buffer, in, curBase);
542
543
if (ret < 0) {
544
xmlFatalErr(ctxt, buf->error, NULL);
545
/* Buffer contents may be lost in case of memory errors. */
546
if (buf->error == XML_ERR_NO_MEMORY)
547
xmlHaltParser(ctxt);
548
}
549
550
return(ret);
551
}
552
553
/**
554
* xmlParserInputGrow:
555
* @in: an XML parser input
556
* @len: an indicative size for the lookahead
557
*
558
* DEPRECATED: Don't use.
559
*
560
* This function increase the input for the parser. It tries to
561
* preserve pointers to the input buffer, and keep already read data
562
*
563
* Returns the amount of char read, or -1 in case of error, 0 indicate the
564
* end of this entity
565
*/
566
int
567
xmlParserInputGrow(xmlParserInputPtr in, int len) {
568
int ret;
569
size_t indx;
570
571
if ((in == NULL) || (len < 0)) return(-1);
572
if (in->buf == NULL) return(-1);
573
if (in->base == NULL) return(-1);
574
if (in->cur == NULL) return(-1);
575
if (in->buf->buffer == NULL) return(-1);
576
577
/* Don't grow memory buffers. */
578
if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
579
return(0);
580
581
indx = in->cur - in->base;
582
if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
583
return(0);
584
}
585
ret = xmlParserInputBufferGrow(in->buf, len);
586
587
in->base = xmlBufContent(in->buf->buffer);
588
if (in->base == NULL) {
589
in->base = BAD_CAST "";
590
in->cur = in->base;
591
in->end = in->base;
592
return(-1);
593
}
594
in->cur = in->base + indx;
595
in->end = xmlBufEnd(in->buf->buffer);
596
597
return(ret);
598
}
599
600
/**
601
* xmlParserShrink:
602
* @ctxt: an XML parser context
603
*
604
* Shrink the input buffer.
605
*/
606
void
607
xmlParserShrink(xmlParserCtxtPtr ctxt) {
608
xmlParserInputPtr in = ctxt->input;
609
xmlParserInputBufferPtr buf = in->buf;
610
size_t used;
611
612
if (buf == NULL)
613
return;
614
/* Don't shrink pull parser memory buffers. */
615
if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) &&
616
(buf->encoder == NULL) &&
617
(buf->readcallback == NULL))
618
return;
619
620
used = in->cur - in->base;
621
/*
622
* Do not shrink on large buffers whose only a tiny fraction
623
* was consumed
624
*/
625
if (used > INPUT_CHUNK) {
626
size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
627
628
if (res > 0) {
629
used -= res;
630
if ((res > ULONG_MAX) ||
631
(in->consumed > ULONG_MAX - (unsigned long)res))
632
in->consumed = ULONG_MAX;
633
else
634
in->consumed += res;
635
}
636
}
637
638
xmlBufUpdateInput(buf->buffer, in, used);
639
}
640
641
/**
642
* xmlParserInputShrink:
643
* @in: an XML parser input
644
*
645
* DEPRECATED: Don't use.
646
*
647
* This function removes used input for the parser.
648
*/
649
void
650
xmlParserInputShrink(xmlParserInputPtr in) {
651
size_t used;
652
size_t ret;
653
654
if (in == NULL) return;
655
if (in->buf == NULL) return;
656
if (in->base == NULL) return;
657
if (in->cur == NULL) return;
658
if (in->buf->buffer == NULL) return;
659
660
used = in->cur - in->base;
661
/*
662
* Do not shrink on large buffers whose only a tiny fraction
663
* was consumed
664
*/
665
if (used > INPUT_CHUNK) {
666
ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
667
if (ret > 0) {
668
used -= ret;
669
if ((ret > ULONG_MAX) ||
670
(in->consumed > ULONG_MAX - (unsigned long)ret))
671
in->consumed = ULONG_MAX;
672
else
673
in->consumed += ret;
674
}
675
}
676
677
if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
678
xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
679
}
680
681
in->base = xmlBufContent(in->buf->buffer);
682
if (in->base == NULL) {
683
/* TODO: raise error */
684
in->base = BAD_CAST "";
685
in->cur = in->base;
686
in->end = in->base;
687
return;
688
}
689
in->cur = in->base + used;
690
in->end = xmlBufEnd(in->buf->buffer);
691
}
692
693
/************************************************************************
694
* *
695
* UTF8 character input and related functions *
696
* *
697
************************************************************************/
698
699
/**
700
* xmlNextChar:
701
* @ctxt: the XML parser context
702
*
703
* DEPRECATED: Internal function, do not use.
704
*
705
* Skip to the next char input char.
706
*/
707
708
void
709
xmlNextChar(xmlParserCtxtPtr ctxt)
710
{
711
const unsigned char *cur;
712
size_t avail;
713
int c;
714
715
if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
716
(ctxt->input == NULL))
717
return;
718
719
avail = ctxt->input->end - ctxt->input->cur;
720
721
if (avail < INPUT_CHUNK) {
722
xmlParserGrow(ctxt);
723
if ((ctxt->instate == XML_PARSER_EOF) ||
724
(ctxt->input->cur >= ctxt->input->end))
725
return;
726
avail = ctxt->input->end - ctxt->input->cur;
727
}
728
729
cur = ctxt->input->cur;
730
c = *cur;
731
732
if (c < 0x80) {
733
if (c == '\n') {
734
ctxt->input->cur++;
735
ctxt->input->line++;
736
ctxt->input->col = 1;
737
} else if (c == '\r') {
738
/*
739
* 2.11 End-of-Line Handling
740
* the literal two-character sequence "#xD#xA" or a standalone
741
* literal #xD, an XML processor must pass to the application
742
* the single character #xA.
743
*/
744
ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
745
ctxt->input->line++;
746
ctxt->input->col = 1;
747
return;
748
} else {
749
ctxt->input->cur++;
750
ctxt->input->col++;
751
}
752
} else {
753
ctxt->input->col++;
754
755
if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
756
goto encoding_error;
757
758
if (c < 0xe0) {
759
/* 2-byte code */
760
if (c < 0xc2)
761
goto encoding_error;
762
ctxt->input->cur += 2;
763
} else {
764
unsigned int val = (c << 8) | cur[1];
765
766
if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
767
goto encoding_error;
768
769
if (c < 0xf0) {
770
/* 3-byte code */
771
if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
772
goto encoding_error;
773
ctxt->input->cur += 3;
774
} else {
775
if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
776
goto encoding_error;
777
778
/* 4-byte code */
779
if ((val < 0xf090) || (val >= 0xf490))
780
goto encoding_error;
781
ctxt->input->cur += 4;
782
}
783
}
784
}
785
786
return;
787
788
encoding_error:
789
/* Only report the first error */
790
if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
791
if ((ctxt == NULL) || (ctxt->input == NULL) ||
792
(ctxt->input->end - ctxt->input->cur < 4)) {
793
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
794
"Input is not proper UTF-8, indicate encoding !\n",
795
NULL, NULL);
796
} else {
797
char buffer[150];
798
799
snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
800
ctxt->input->cur[0], ctxt->input->cur[1],
801
ctxt->input->cur[2], ctxt->input->cur[3]);
802
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
803
"Input is not proper UTF-8, indicate encoding !\n%s",
804
BAD_CAST buffer, NULL);
805
}
806
ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
807
}
808
ctxt->input->cur++;
809
return;
810
}
811
812
/**
813
* xmlCurrentChar:
814
* @ctxt: the XML parser context
815
* @len: pointer to the length of the char read
816
*
817
* DEPRECATED: Internal function, do not use.
818
*
819
* The current char value, if using UTF-8 this may actually span multiple
820
* bytes in the input buffer. Implement the end of line normalization:
821
* 2.11 End-of-Line Handling
822
* Wherever an external parsed entity or the literal entity value
823
* of an internal parsed entity contains either the literal two-character
824
* sequence "#xD#xA" or a standalone literal #xD, an XML processor
825
* must pass to the application the single character #xA.
826
* This behavior can conveniently be produced by normalizing all
827
* line breaks to #xA on input, before parsing.)
828
*
829
* Returns the current char value and its length
830
*/
831
832
int
833
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
834
const unsigned char *cur;
835
size_t avail;
836
int c;
837
838
if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
839
if (ctxt->instate == XML_PARSER_EOF)
840
return(0);
841
842
avail = ctxt->input->end - ctxt->input->cur;
843
844
if (avail < INPUT_CHUNK) {
845
xmlParserGrow(ctxt);
846
if (ctxt->instate == XML_PARSER_EOF)
847
return(0);
848
avail = ctxt->input->end - ctxt->input->cur;
849
}
850
851
cur = ctxt->input->cur;
852
c = *cur;
853
854
if (c < 0x80) {
855
/* 1-byte code */
856
if (c < 0x20) {
857
/*
858
* 2.11 End-of-Line Handling
859
* the literal two-character sequence "#xD#xA" or a standalone
860
* literal #xD, an XML processor must pass to the application
861
* the single character #xA.
862
*/
863
if (c == '\r') {
864
/*
865
* TODO: This function shouldn't change the 'cur' pointer
866
* as side effect, but the NEXTL macro in parser.c relies
867
* on this behavior when incrementing line numbers.
868
*/
869
if (cur[1] == '\n')
870
ctxt->input->cur++;
871
*len = 1;
872
c = '\n';
873
} else if (c == 0) {
874
if (ctxt->input->cur >= ctxt->input->end) {
875
*len = 0;
876
} else {
877
*len = 1;
878
/*
879
* TODO: Null bytes should be handled by callers,
880
* but this can be tricky.
881
*/
882
xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
883
"Char 0x0 out of allowed range\n", c);
884
}
885
} else {
886
*len = 1;
887
}
888
} else {
889
*len = 1;
890
}
891
892
return(c);
893
} else {
894
int val;
895
896
if (avail < 2)
897
goto incomplete_sequence;
898
if ((cur[1] & 0xc0) != 0x80)
899
goto encoding_error;
900
901
if (c < 0xe0) {
902
/* 2-byte code */
903
if (c < 0xc2)
904
goto encoding_error;
905
val = (c & 0x1f) << 6;
906
val |= cur[1] & 0x3f;
907
*len = 2;
908
} else {
909
if (avail < 3)
910
goto incomplete_sequence;
911
if ((cur[2] & 0xc0) != 0x80)
912
goto encoding_error;
913
914
if (c < 0xf0) {
915
/* 3-byte code */
916
val = (c & 0xf) << 12;
917
val |= (cur[1] & 0x3f) << 6;
918
val |= cur[2] & 0x3f;
919
if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
920
goto encoding_error;
921
*len = 3;
922
} else {
923
if (avail < 4)
924
goto incomplete_sequence;
925
if ((cur[3] & 0xc0) != 0x80)
926
goto encoding_error;
927
928
/* 4-byte code */
929
val = (c & 0x0f) << 18;
930
val |= (cur[1] & 0x3f) << 12;
931
val |= (cur[2] & 0x3f) << 6;
932
val |= cur[3] & 0x3f;
933
if ((val < 0x10000) || (val >= 0x110000))
934
goto encoding_error;
935
*len = 4;
936
}
937
}
938
939
return(val);
940
}
941
942
encoding_error:
943
/* Only report the first error */
944
if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
945
if (ctxt->input->end - ctxt->input->cur < 4) {
946
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
947
"Input is not proper UTF-8, indicate encoding !\n",
948
NULL, NULL);
949
} else {
950
char buffer[150];
951
952
snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
953
ctxt->input->cur[0], ctxt->input->cur[1],
954
ctxt->input->cur[2], ctxt->input->cur[3]);
955
__xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
956
"Input is not proper UTF-8, indicate encoding !\n%s",
957
BAD_CAST buffer, NULL);
958
}
959
ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
960
}
961
*len = 1;
962
return(0xFFFD); /* U+FFFD Replacement Character */
963
964
incomplete_sequence:
965
/*
966
* An encoding problem may arise from a truncated input buffer
967
* splitting a character in the middle. In that case do not raise
968
* an error but return 0. This should only happen when push parsing
969
* char data.
970
*/
971
*len = 0;
972
return(0);
973
}
974
975
/**
976
* xmlStringCurrentChar:
977
* @ctxt: the XML parser context
978
* @cur: pointer to the beginning of the char
979
* @len: pointer to the length of the char read
980
*
981
* DEPRECATED: Internal function, do not use.
982
*
983
* The current char value, if using UTF-8 this may actually span multiple
984
* bytes in the input buffer.
985
*
986
* Returns the current char value and its length
987
*/
988
989
int
990
xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
991
const xmlChar *cur, int *len) {
992
int c;
993
994
if ((cur == NULL) || (len == NULL))
995
return(0);
996
997
/* cur is zero-terminated, so we can lie about its length. */
998
*len = 4;
999
c = xmlGetUTF8Char(cur, len);
1000
1001
return((c < 0) ? 0 : c);
1002
}
1003
1004
/**
1005
* xmlCopyCharMultiByte:
1006
* @out: pointer to an array of xmlChar
1007
* @val: the char value
1008
*
1009
* append the char value in the array
1010
*
1011
* Returns the number of xmlChar written
1012
*/
1013
int
1014
xmlCopyCharMultiByte(xmlChar *out, int val) {
1015
if ((out == NULL) || (val < 0)) return(0);
1016
/*
1017
* We are supposed to handle UTF8, check it's valid
1018
* From rfc2044: encoding of the Unicode values on UTF-8:
1019
*
1020
* UCS-4 range (hex.) UTF-8 octet sequence (binary)
1021
* 0000 0000-0000 007F 0xxxxxxx
1022
* 0000 0080-0000 07FF 110xxxxx 10xxxxxx
1023
* 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
1024
*/
1025
if (val >= 0x80) {
1026
xmlChar *savedout = out;
1027
int bits;
1028
if (val < 0x800) { *out++= (val >> 6) | 0xC0; bits= 0; }
1029
else if (val < 0x10000) { *out++= (val >> 12) | 0xE0; bits= 6;}
1030
else if (val < 0x110000) { *out++= (val >> 18) | 0xF0; bits= 12; }
1031
else {
1032
xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
1033
"Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
1034
val);
1035
return(0);
1036
}
1037
for ( ; bits >= 0; bits-= 6)
1038
*out++= ((val >> bits) & 0x3F) | 0x80 ;
1039
return (out - savedout);
1040
}
1041
*out = val;
1042
return 1;
1043
}
1044
1045
/**
1046
* xmlCopyChar:
1047
* @len: Ignored, compatibility
1048
* @out: pointer to an array of xmlChar
1049
* @val: the char value
1050
*
1051
* append the char value in the array
1052
*
1053
* Returns the number of xmlChar written
1054
*/
1055
1056
int
1057
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1058
if ((out == NULL) || (val < 0)) return(0);
1059
/* the len parameter is ignored */
1060
if (val >= 0x80) {
1061
return(xmlCopyCharMultiByte (out, val));
1062
}
1063
*out = val;
1064
return 1;
1065
}
1066
1067
/************************************************************************
1068
* *
1069
* Commodity functions to switch encodings *
1070
* *
1071
************************************************************************/
1072
1073
static xmlCharEncodingHandlerPtr
1074
xmlDetectEBCDIC(xmlParserInputPtr input) {
1075
xmlChar out[200];
1076
xmlCharEncodingHandlerPtr handler;
1077
int inlen, outlen, res, i;
1078
1079
/*
1080
* To detect the EBCDIC code page, we convert the first 200 bytes
1081
* to EBCDIC-US and try to find the encoding declaration.
1082
*/
1083
handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC);
1084
if (handler == NULL)
1085
return(NULL);
1086
outlen = sizeof(out) - 1;
1087
inlen = input->end - input->cur;
1088
res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
1089
if (res < 0)
1090
return(handler);
1091
out[outlen] = 0;
1092
1093
for (i = 0; i < outlen; i++) {
1094
if (out[i] == '>')
1095
break;
1096
if ((out[i] == 'e') &&
1097
(xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1098
int start, cur, quote;
1099
1100
i += 8;
1101
while (IS_BLANK_CH(out[i]))
1102
i += 1;
1103
if (out[i++] != '=')
1104
break;
1105
while (IS_BLANK_CH(out[i]))
1106
i += 1;
1107
quote = out[i++];
1108
if ((quote != '\'') && (quote != '"'))
1109
break;
1110
start = i;
1111
cur = out[i];
1112
while (((cur >= 'a') && (cur <= 'z')) ||
1113
((cur >= 'A') && (cur <= 'Z')) ||
1114
((cur >= '0') && (cur <= '9')) ||
1115
(cur == '.') || (cur == '_') ||
1116
(cur == '-'))
1117
cur = out[++i];
1118
if (cur != quote)
1119
break;
1120
out[i] = 0;
1121
xmlCharEncCloseFunc(handler);
1122
return(xmlFindCharEncodingHandler((char *) out + start));
1123
}
1124
}
1125
1126
/*
1127
* ICU handlers are stateful, so we have to recreate them.
1128
*/
1129
xmlCharEncCloseFunc(handler);
1130
return(xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC));
1131
}
1132
1133
/**
1134
* xmlSwitchEncoding:
1135
* @ctxt: the parser context
1136
* @enc: the encoding value (number)
1137
*
1138
* Use encoding specified by enum to decode input data.
1139
*
1140
* This function can be used to enforce the encoding of chunks passed
1141
* to xmlParseChunk.
1142
*
1143
* Returns 0 in case of success, -1 otherwise
1144
*/
1145
int
1146
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1147
{
1148
xmlCharEncodingHandlerPtr handler = NULL;
1149
int check = 1;
1150
int ret;
1151
1152
if ((ctxt == NULL) || (ctxt->input == NULL))
1153
return(-1);
1154
1155
switch (enc) {
1156
case XML_CHAR_ENCODING_NONE:
1157
case XML_CHAR_ENCODING_UTF8:
1158
case XML_CHAR_ENCODING_ASCII:
1159
check = 0;
1160
break;
1161
case XML_CHAR_ENCODING_EBCDIC:
1162
handler = xmlDetectEBCDIC(ctxt->input);
1163
break;
1164
default:
1165
handler = xmlGetCharEncodingHandler(enc);
1166
break;
1167
}
1168
1169
if ((check) && (handler == NULL)) {
1170
const char *name = xmlGetCharEncodingName(enc);
1171
1172
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1173
"encoding not supported: %s\n",
1174
BAD_CAST (name ? name : "<null>"), NULL);
1175
/*
1176
* TODO: We could recover from errors in external entities
1177
* if we didn't stop the parser. But most callers of this
1178
* function don't check the return value.
1179
*/
1180
xmlStopParser(ctxt);
1181
return(-1);
1182
}
1183
1184
ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1185
1186
if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1187
ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1188
}
1189
1190
return(ret);
1191
}
1192
1193
/**
1194
* xmlSwitchInputEncoding:
1195
* @ctxt: the parser context
1196
* @input: the input stream
1197
* @handler: the encoding handler
1198
*
1199
* DEPRECATED: Internal function, don't use.
1200
*
1201
* Use encoding handler to decode input data.
1202
*
1203
* Returns 0 in case of success, -1 otherwise
1204
*/
1205
int
1206
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1207
xmlCharEncodingHandlerPtr handler)
1208
{
1209
int nbchars;
1210
xmlParserInputBufferPtr in;
1211
1212
if ((input == NULL) || (input->buf == NULL)) {
1213
xmlCharEncCloseFunc(handler);
1214
return (-1);
1215
}
1216
in = input->buf;
1217
1218
input->flags |= XML_INPUT_HAS_ENCODING;
1219
1220
/*
1221
* UTF-8 requires no encoding handler.
1222
*/
1223
if ((handler != NULL) &&
1224
(xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1225
xmlCharEncCloseFunc(handler);
1226
handler = NULL;
1227
}
1228
1229
if (in->encoder == handler)
1230
return (0);
1231
1232
if (in->encoder != NULL) {
1233
/*
1234
* Switching encodings during parsing is a really bad idea,
1235
* but Chromium can switch between ISO-8859-1 and UTF-16 before
1236
* separate calls to xmlParseChunk.
1237
*
1238
* TODO: We should check whether the "raw" input buffer is empty and
1239
* convert the old content using the old encoder.
1240
*/
1241
1242
xmlCharEncCloseFunc(in->encoder);
1243
in->encoder = handler;
1244
return (0);
1245
}
1246
1247
in->encoder = handler;
1248
1249
/*
1250
* Is there already some content down the pipe to convert ?
1251
*/
1252
if (xmlBufIsEmpty(in->buffer) == 0) {
1253
size_t processed;
1254
1255
/*
1256
* Shrink the current input buffer.
1257
* Move it as the raw buffer and create a new input buffer
1258
*/
1259
processed = input->cur - input->base;
1260
xmlBufShrink(in->buffer, processed);
1261
input->consumed += processed;
1262
in->raw = in->buffer;
1263
in->buffer = xmlBufCreate();
1264
in->rawconsumed = processed;
1265
1266
nbchars = xmlCharEncInput(in);
1267
xmlBufResetInput(in->buffer, input);
1268
if (nbchars < 0) {
1269
/* TODO: This could be an out of memory or an encoding error. */
1270
xmlErrInternal(ctxt,
1271
"switching encoding: encoder error\n",
1272
NULL);
1273
xmlHaltParser(ctxt);
1274
return (-1);
1275
}
1276
}
1277
return (0);
1278
}
1279
1280
/**
1281
* xmlSwitchToEncoding:
1282
* @ctxt: the parser context
1283
* @handler: the encoding handler
1284
*
1285
* Use encoding handler to decode input data.
1286
*
1287
* This function can be used to enforce the encoding of chunks passed
1288
* to xmlParseChunk.
1289
*
1290
* Returns 0 in case of success, -1 otherwise
1291
*/
1292
int
1293
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1294
{
1295
if (ctxt == NULL)
1296
return(-1);
1297
return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1298
}
1299
1300
/**
1301
* xmlDetectEncoding:
1302
* @ctxt: the parser context
1303
*
1304
* Handle optional BOM, detect and switch to encoding.
1305
*
1306
* Assumes that there are at least four bytes in the input buffer.
1307
*/
1308
void
1309
xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
1310
const xmlChar *in;
1311
xmlCharEncoding enc;
1312
int bomSize;
1313
int autoFlag = 0;
1314
1315
if (xmlParserGrow(ctxt) < 0)
1316
return;
1317
in = ctxt->input->cur;
1318
if (ctxt->input->end - in < 4)
1319
return;
1320
1321
if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1322
/*
1323
* If the encoding was already set, only skip the BOM which was
1324
* possibly decoded to UTF-8.
1325
*/
1326
if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1327
ctxt->input->cur += 3;
1328
}
1329
1330
return;
1331
}
1332
1333
enc = XML_CHAR_ENCODING_NONE;
1334
bomSize = 0;
1335
1336
switch (in[0]) {
1337
case 0x00:
1338
if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1339
enc = XML_CHAR_ENCODING_UCS4BE;
1340
autoFlag = XML_INPUT_AUTO_OTHER;
1341
} else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1342
enc = XML_CHAR_ENCODING_UTF16BE;
1343
autoFlag = XML_INPUT_AUTO_UTF16BE;
1344
}
1345
break;
1346
1347
case 0x3C:
1348
if (in[1] == 0x00) {
1349
if ((in[2] == 0x00) && (in[3] == 0x00)) {
1350
enc = XML_CHAR_ENCODING_UCS4LE;
1351
autoFlag = XML_INPUT_AUTO_OTHER;
1352
} else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1353
enc = XML_CHAR_ENCODING_UTF16LE;
1354
autoFlag = XML_INPUT_AUTO_UTF16LE;
1355
}
1356
}
1357
break;
1358
1359
case 0x4C:
1360
if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1361
enc = XML_CHAR_ENCODING_EBCDIC;
1362
autoFlag = XML_INPUT_AUTO_OTHER;
1363
}
1364
break;
1365
1366
case 0xEF:
1367
if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1368
enc = XML_CHAR_ENCODING_UTF8;
1369
autoFlag = XML_INPUT_AUTO_UTF8;
1370
bomSize = 3;
1371
}
1372
break;
1373
1374
case 0xFE:
1375
if (in[1] == 0xFF) {
1376
enc = XML_CHAR_ENCODING_UTF16BE;
1377
autoFlag = XML_INPUT_AUTO_UTF16BE;
1378
bomSize = 2;
1379
}
1380
break;
1381
1382
case 0xFF:
1383
if (in[1] == 0xFE) {
1384
enc = XML_CHAR_ENCODING_UTF16LE;
1385
autoFlag = XML_INPUT_AUTO_UTF16LE;
1386
bomSize = 2;
1387
}
1388
break;
1389
}
1390
1391
if (bomSize > 0) {
1392
ctxt->input->cur += bomSize;
1393
}
1394
1395
if (enc != XML_CHAR_ENCODING_NONE) {
1396
ctxt->input->flags |= autoFlag;
1397
xmlSwitchEncoding(ctxt, enc);
1398
}
1399
}
1400
1401
/**
1402
* xmlSetDeclaredEncoding:
1403
* @ctxt: the parser context
1404
* @encoding: declared encoding
1405
*
1406
* Set the encoding from a declaration in the document.
1407
*
1408
* If no encoding was set yet, switch the encoding. Otherwise, only warn
1409
* about encoding mismatches.
1410
*
1411
* Takes ownership of 'encoding'.
1412
*/
1413
void
1414
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
1415
if (ctxt->encoding != NULL)
1416
xmlFree((xmlChar *) ctxt->encoding);
1417
ctxt->encoding = encoding;
1418
1419
if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1420
((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1421
xmlCharEncodingHandlerPtr handler;
1422
1423
handler = xmlFindCharEncodingHandler((const char *) encoding);
1424
if (handler == NULL) {
1425
__xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1426
"Unsupported encoding: %s\n",
1427
encoding, NULL);
1428
return;
1429
}
1430
1431
xmlSwitchToEncoding(ctxt, handler);
1432
ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1433
} else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1434
static const char *allowedUTF8[] = {
1435
"UTF-8", "UTF8", NULL
1436
};
1437
static const char *allowedUTF16LE[] = {
1438
"UTF-16", "UTF-16LE", "UTF16", NULL
1439
};
1440
static const char *allowedUTF16BE[] = {
1441
"UTF-16", "UTF-16BE", "UTF16", NULL
1442
};
1443
const char **allowed = NULL;
1444
const char *autoEnc = NULL;
1445
1446
switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1447
case XML_INPUT_AUTO_UTF8:
1448
allowed = allowedUTF8;
1449
autoEnc = "UTF-8";
1450
break;
1451
case XML_INPUT_AUTO_UTF16LE:
1452
allowed = allowedUTF16LE;
1453
autoEnc = "UTF-16LE";
1454
break;
1455
case XML_INPUT_AUTO_UTF16BE:
1456
allowed = allowedUTF16BE;
1457
autoEnc = "UTF-16BE";
1458
break;
1459
}
1460
1461
if (allowed != NULL) {
1462
const char **p;
1463
int match = 0;
1464
1465
for (p = allowed; *p != NULL; p++) {
1466
if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1467
match = 1;
1468
break;
1469
}
1470
}
1471
1472
if (match == 0) {
1473
xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1474
"Encoding '%s' doesn't match "
1475
"auto-detected '%s'\n",
1476
encoding, BAD_CAST autoEnc);
1477
}
1478
}
1479
}
1480
}
1481
1482
/**
1483
* xmlGetActualEncoding:
1484
* @ctxt: the parser context
1485
*
1486
* Returns the actual used to parse the document. This can differ from
1487
* the declared encoding.
1488
*/
1489
const xmlChar *
1490
xmlGetActualEncoding(xmlParserCtxtPtr ctxt) {
1491
const xmlChar *encoding = NULL;
1492
1493
if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1494
(ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1495
/* Preserve encoding exactly */
1496
encoding = ctxt->encoding;
1497
} else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1498
encoding = BAD_CAST ctxt->input->buf->encoder->name;
1499
} else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1500
encoding = BAD_CAST "UTF-8";
1501
}
1502
1503
return(encoding);
1504
}
1505
1506
/************************************************************************
1507
* *
1508
* Commodity functions to handle entities processing *
1509
* *
1510
************************************************************************/
1511
1512
/**
1513
* xmlFreeInputStream:
1514
* @input: an xmlParserInputPtr
1515
*
1516
* Free up an input stream.
1517
*/
1518
void
1519
xmlFreeInputStream(xmlParserInputPtr input) {
1520
if (input == NULL) return;
1521
1522
if (input->filename != NULL) xmlFree((char *) input->filename);
1523
if (input->directory != NULL) xmlFree((char *) input->directory);
1524
if (input->version != NULL) xmlFree((char *) input->version);
1525
if ((input->free != NULL) && (input->base != NULL))
1526
input->free((xmlChar *) input->base);
1527
if (input->buf != NULL)
1528
xmlFreeParserInputBuffer(input->buf);
1529
xmlFree(input);
1530
}
1531
1532
/**
1533
* xmlNewInputStream:
1534
* @ctxt: an XML parser context
1535
*
1536
* Create a new input stream structure.
1537
*
1538
* Returns the new input stream or NULL
1539
*/
1540
xmlParserInputPtr
1541
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1542
xmlParserInputPtr input;
1543
1544
input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1545
if (input == NULL) {
1546
xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1547
return(NULL);
1548
}
1549
memset(input, 0, sizeof(xmlParserInput));
1550
input->line = 1;
1551
input->col = 1;
1552
1553
/*
1554
* If the context is NULL the id cannot be initialized, but that
1555
* should not happen while parsing which is the situation where
1556
* the id is actually needed.
1557
*/
1558
if (ctxt != NULL) {
1559
if (input->id >= INT_MAX) {
1560
xmlErrMemory(ctxt, "Input ID overflow\n");
1561
return(NULL);
1562
}
1563
input->id = ctxt->input_id++;
1564
}
1565
1566
return(input);
1567
}
1568
1569
/**
1570
* xmlNewIOInputStream:
1571
* @ctxt: an XML parser context
1572
* @input: an I/O Input
1573
* @enc: the charset encoding if known
1574
*
1575
* Create a new input stream structure encapsulating the @input into
1576
* a stream suitable for the parser.
1577
*
1578
* Returns the new input stream or NULL
1579
*/
1580
xmlParserInputPtr
1581
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1582
xmlCharEncoding enc) {
1583
xmlParserInputPtr inputStream;
1584
1585
if (input == NULL) return(NULL);
1586
if (xmlParserDebugEntities)
1587
xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1588
inputStream = xmlNewInputStream(ctxt);
1589
if (inputStream == NULL) {
1590
return(NULL);
1591
}
1592
inputStream->filename = NULL;
1593
inputStream->buf = input;
1594
xmlBufResetInput(inputStream->buf->buffer, inputStream);
1595
1596
if (enc != XML_CHAR_ENCODING_NONE) {
1597
xmlSwitchEncoding(ctxt, enc);
1598
}
1599
1600
return(inputStream);
1601
}
1602
1603
/**
1604
* xmlNewEntityInputStream:
1605
* @ctxt: an XML parser context
1606
* @entity: an Entity pointer
1607
*
1608
* DEPRECATED: Internal function, do not use.
1609
*
1610
* Create a new input stream based on an xmlEntityPtr
1611
*
1612
* Returns the new input stream or NULL
1613
*/
1614
xmlParserInputPtr
1615
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1616
xmlParserInputPtr input;
1617
1618
if (entity == NULL) {
1619
xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1620
NULL);
1621
return(NULL);
1622
}
1623
if (xmlParserDebugEntities)
1624
xmlGenericError(xmlGenericErrorContext,
1625
"new input from entity: %s\n", entity->name);
1626
if (entity->content == NULL) {
1627
switch (entity->etype) {
1628
case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1629
xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1630
entity->name);
1631
break;
1632
case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1633
case XML_EXTERNAL_PARAMETER_ENTITY:
1634
input = xmlLoadExternalEntity((char *) entity->URI,
1635
(char *) entity->ExternalID, ctxt);
1636
if (input != NULL)
1637
input->entity = entity;
1638
return(input);
1639
case XML_INTERNAL_GENERAL_ENTITY:
1640
xmlErrInternal(ctxt,
1641
"Internal entity %s without content !\n",
1642
entity->name);
1643
break;
1644
case XML_INTERNAL_PARAMETER_ENTITY:
1645
xmlErrInternal(ctxt,
1646
"Internal parameter entity %s without content !\n",
1647
entity->name);
1648
break;
1649
case XML_INTERNAL_PREDEFINED_ENTITY:
1650
xmlErrInternal(ctxt,
1651
"Predefined entity %s without content !\n",
1652
entity->name);
1653
break;
1654
}
1655
return(NULL);
1656
}
1657
input = xmlNewInputStream(ctxt);
1658
if (input == NULL) {
1659
return(NULL);
1660
}
1661
if (entity->URI != NULL)
1662
input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1663
input->base = entity->content;
1664
if (entity->length == 0)
1665
entity->length = xmlStrlen(entity->content);
1666
input->cur = entity->content;
1667
input->length = entity->length;
1668
input->end = &entity->content[input->length];
1669
input->entity = entity;
1670
return(input);
1671
}
1672
1673
/**
1674
* xmlNewStringInputStream:
1675
* @ctxt: an XML parser context
1676
* @buffer: an memory buffer
1677
*
1678
* Create a new input stream based on a memory buffer.
1679
* Returns the new input stream
1680
*/
1681
xmlParserInputPtr
1682
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1683
xmlParserInputPtr input;
1684
xmlParserInputBufferPtr buf;
1685
1686
if (buffer == NULL) {
1687
xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1688
NULL);
1689
return(NULL);
1690
}
1691
if (xmlParserDebugEntities)
1692
xmlGenericError(xmlGenericErrorContext,
1693
"new fixed input: %.30s\n", buffer);
1694
buf = xmlParserInputBufferCreateString(buffer);
1695
if (buf == NULL) {
1696
xmlErrMemory(ctxt, NULL);
1697
return(NULL);
1698
}
1699
input = xmlNewInputStream(ctxt);
1700
if (input == NULL) {
1701
xmlErrMemory(ctxt, "couldn't allocate a new input stream\n");
1702
xmlFreeParserInputBuffer(buf);
1703
return(NULL);
1704
}
1705
input->buf = buf;
1706
xmlBufResetInput(input->buf->buffer, input);
1707
return(input);
1708
}
1709
1710
/**
1711
* xmlNewInputFromFile:
1712
* @ctxt: an XML parser context
1713
* @filename: the filename to use as entity
1714
*
1715
* Create a new input stream based on a file or an URL.
1716
*
1717
* Returns the new input stream or NULL in case of error
1718
*/
1719
xmlParserInputPtr
1720
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1721
xmlParserInputBufferPtr buf;
1722
xmlParserInputPtr inputStream;
1723
char *directory = NULL;
1724
xmlChar *URI = NULL;
1725
1726
if (xmlParserDebugEntities)
1727
xmlGenericError(xmlGenericErrorContext,
1728
"new input from file: %s\n", filename);
1729
if (ctxt == NULL) return(NULL);
1730
buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1731
if (buf == NULL) {
1732
if (filename == NULL)
1733
__xmlLoaderErr(ctxt,
1734
"failed to load external entity: NULL filename \n",
1735
NULL);
1736
else
1737
__xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1738
(const char *) filename);
1739
return(NULL);
1740
}
1741
1742
inputStream = xmlNewInputStream(ctxt);
1743
if (inputStream == NULL) {
1744
xmlFreeParserInputBuffer(buf);
1745
return(NULL);
1746
}
1747
1748
inputStream->buf = buf;
1749
inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1750
if (inputStream == NULL)
1751
return(NULL);
1752
1753
if (inputStream->filename == NULL)
1754
URI = xmlStrdup((xmlChar *) filename);
1755
else
1756
URI = xmlStrdup((xmlChar *) inputStream->filename);
1757
directory = xmlParserGetDirectory((const char *) URI);
1758
if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1759
inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1760
if (URI != NULL) xmlFree((char *) URI);
1761
inputStream->directory = directory;
1762
1763
xmlBufResetInput(inputStream->buf->buffer, inputStream);
1764
if ((ctxt->directory == NULL) && (directory != NULL))
1765
ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1766
return(inputStream);
1767
}
1768
1769
/************************************************************************
1770
* *
1771
* Commodity functions to handle parser contexts *
1772
* *
1773
************************************************************************/
1774
1775
/**
1776
* xmlInitSAXParserCtxt:
1777
* @ctxt: XML parser context
1778
* @sax: SAX handlert
1779
* @userData: user data
1780
*
1781
* Initialize a SAX parser context
1782
*
1783
* Returns 0 in case of success and -1 in case of error
1784
*/
1785
1786
static int
1787
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
1788
void *userData)
1789
{
1790
xmlParserInputPtr input;
1791
1792
if(ctxt==NULL) {
1793
xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1794
return(-1);
1795
}
1796
1797
xmlInitParser();
1798
1799
if (ctxt->dict == NULL)
1800
ctxt->dict = xmlDictCreate();
1801
if (ctxt->dict == NULL) {
1802
xmlErrMemory(NULL, "cannot initialize parser context\n");
1803
return(-1);
1804
}
1805
xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1806
1807
if (ctxt->sax == NULL)
1808
ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1809
if (ctxt->sax == NULL) {
1810
xmlErrMemory(NULL, "cannot initialize parser context\n");
1811
return(-1);
1812
}
1813
if (sax == NULL) {
1814
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1815
xmlSAXVersion(ctxt->sax, 2);
1816
ctxt->userData = ctxt;
1817
} else {
1818
if (sax->initialized == XML_SAX2_MAGIC) {
1819
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
1820
} else {
1821
memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1822
memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
1823
}
1824
ctxt->userData = userData ? userData : ctxt;
1825
}
1826
1827
ctxt->maxatts = 0;
1828
ctxt->atts = NULL;
1829
/* Allocate the Input stack */
1830
if (ctxt->inputTab == NULL) {
1831
ctxt->inputTab = (xmlParserInputPtr *)
1832
xmlMalloc(5 * sizeof(xmlParserInputPtr));
1833
ctxt->inputMax = 5;
1834
}
1835
if (ctxt->inputTab == NULL) {
1836
xmlErrMemory(NULL, "cannot initialize parser context\n");
1837
ctxt->inputNr = 0;
1838
ctxt->inputMax = 0;
1839
ctxt->input = NULL;
1840
return(-1);
1841
}
1842
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1843
xmlFreeInputStream(input);
1844
}
1845
ctxt->inputNr = 0;
1846
ctxt->input = NULL;
1847
1848
ctxt->version = NULL;
1849
ctxt->encoding = NULL;
1850
ctxt->standalone = -1;
1851
ctxt->hasExternalSubset = 0;
1852
ctxt->hasPErefs = 0;
1853
ctxt->html = 0;
1854
ctxt->external = 0;
1855
ctxt->instate = XML_PARSER_START;
1856
ctxt->token = 0;
1857
ctxt->directory = NULL;
1858
1859
/* Allocate the Node stack */
1860
if (ctxt->nodeTab == NULL) {
1861
ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1862
ctxt->nodeMax = 10;
1863
}
1864
if (ctxt->nodeTab == NULL) {
1865
xmlErrMemory(NULL, "cannot initialize parser context\n");
1866
ctxt->nodeNr = 0;
1867
ctxt->nodeMax = 0;
1868
ctxt->node = NULL;
1869
ctxt->inputNr = 0;
1870
ctxt->inputMax = 0;
1871
ctxt->input = NULL;
1872
return(-1);
1873
}
1874
ctxt->nodeNr = 0;
1875
ctxt->node = NULL;
1876
1877
/* Allocate the Name stack */
1878
if (ctxt->nameTab == NULL) {
1879
ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1880
ctxt->nameMax = 10;
1881
}
1882
if (ctxt->nameTab == NULL) {
1883
xmlErrMemory(NULL, "cannot initialize parser context\n");
1884
ctxt->nodeNr = 0;
1885
ctxt->nodeMax = 0;
1886
ctxt->node = NULL;
1887
ctxt->inputNr = 0;
1888
ctxt->inputMax = 0;
1889
ctxt->input = NULL;
1890
ctxt->nameNr = 0;
1891
ctxt->nameMax = 0;
1892
ctxt->name = NULL;
1893
return(-1);
1894
}
1895
ctxt->nameNr = 0;
1896
ctxt->name = NULL;
1897
1898
/* Allocate the space stack */
1899
if (ctxt->spaceTab == NULL) {
1900
ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1901
ctxt->spaceMax = 10;
1902
}
1903
if (ctxt->spaceTab == NULL) {
1904
xmlErrMemory(NULL, "cannot initialize parser context\n");
1905
ctxt->nodeNr = 0;
1906
ctxt->nodeMax = 0;
1907
ctxt->node = NULL;
1908
ctxt->inputNr = 0;
1909
ctxt->inputMax = 0;
1910
ctxt->input = NULL;
1911
ctxt->nameNr = 0;
1912
ctxt->nameMax = 0;
1913
ctxt->name = NULL;
1914
ctxt->spaceNr = 0;
1915
ctxt->spaceMax = 0;
1916
ctxt->space = NULL;
1917
return(-1);
1918
}
1919
ctxt->spaceNr = 1;
1920
ctxt->spaceMax = 10;
1921
ctxt->spaceTab[0] = -1;
1922
ctxt->space = &ctxt->spaceTab[0];
1923
ctxt->myDoc = NULL;
1924
ctxt->wellFormed = 1;
1925
ctxt->nsWellFormed = 1;
1926
ctxt->valid = 1;
1927
ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1928
if (ctxt->loadsubset) {
1929
ctxt->options |= XML_PARSE_DTDLOAD;
1930
}
1931
ctxt->validate = xmlDoValidityCheckingDefaultValue;
1932
ctxt->pedantic = xmlPedanticParserDefaultValue;
1933
if (ctxt->pedantic) {
1934
ctxt->options |= XML_PARSE_PEDANTIC;
1935
}
1936
ctxt->linenumbers = xmlLineNumbersDefaultValue;
1937
ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1938
if (ctxt->keepBlanks == 0) {
1939
ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1940
ctxt->options |= XML_PARSE_NOBLANKS;
1941
}
1942
1943
ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
1944
ctxt->vctxt.userData = ctxt;
1945
ctxt->vctxt.error = xmlParserValidityError;
1946
ctxt->vctxt.warning = xmlParserValidityWarning;
1947
if (ctxt->validate) {
1948
if (xmlGetWarningsDefaultValue == 0)
1949
ctxt->vctxt.warning = NULL;
1950
else
1951
ctxt->vctxt.warning = xmlParserValidityWarning;
1952
ctxt->vctxt.nodeMax = 0;
1953
ctxt->options |= XML_PARSE_DTDVALID;
1954
}
1955
ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1956
if (ctxt->replaceEntities) {
1957
ctxt->options |= XML_PARSE_NOENT;
1958
}
1959
ctxt->record_info = 0;
1960
ctxt->checkIndex = 0;
1961
ctxt->inSubset = 0;
1962
ctxt->errNo = XML_ERR_OK;
1963
ctxt->depth = 0;
1964
ctxt->catalogs = NULL;
1965
ctxt->sizeentities = 0;
1966
ctxt->sizeentcopy = 0;
1967
ctxt->input_id = 1;
1968
ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
1969
xmlInitNodeInfoSeq(&ctxt->node_seq);
1970
1971
if (ctxt->nsdb == NULL) {
1972
ctxt->nsdb = xmlParserNsCreate();
1973
if (ctxt->nsdb == NULL) {
1974
xmlErrMemory(ctxt, NULL);
1975
return(-1);
1976
}
1977
}
1978
1979
return(0);
1980
}
1981
1982
/**
1983
* xmlInitParserCtxt:
1984
* @ctxt: an XML parser context
1985
*
1986
* DEPRECATED: Internal function which will be made private in a future
1987
* version.
1988
*
1989
* Initialize a parser context
1990
*
1991
* Returns 0 in case of success and -1 in case of error
1992
*/
1993
1994
int
1995
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1996
{
1997
return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
1998
}
1999
2000
/**
2001
* xmlFreeParserCtxt:
2002
* @ctxt: an XML parser context
2003
*
2004
* Free all the memory used by a parser context. However the parsed
2005
* document in ctxt->myDoc is not freed.
2006
*/
2007
2008
void
2009
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2010
{
2011
xmlParserInputPtr input;
2012
2013
if (ctxt == NULL) return;
2014
2015
while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2016
xmlFreeInputStream(input);
2017
}
2018
if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2019
if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2020
if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2021
if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2022
if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2023
if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2024
if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2025
if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2026
if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2027
#ifdef LIBXML_SAX1_ENABLED
2028
if ((ctxt->sax != NULL) &&
2029
(ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2030
#else
2031
if (ctxt->sax != NULL)
2032
#endif /* LIBXML_SAX1_ENABLED */
2033
xmlFree(ctxt->sax);
2034
if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2035
if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2036
if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2037
if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2038
if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2039
if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2040
if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2041
if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2042
if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2043
if (ctxt->attsDefault != NULL)
2044
xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2045
if (ctxt->attsSpecial != NULL)
2046
xmlHashFree(ctxt->attsSpecial, NULL);
2047
if (ctxt->freeElems != NULL) {
2048
xmlNodePtr cur, next;
2049
2050
cur = ctxt->freeElems;
2051
while (cur != NULL) {
2052
next = cur->next;
2053
xmlFree(cur);
2054
cur = next;
2055
}
2056
}
2057
if (ctxt->freeAttrs != NULL) {
2058
xmlAttrPtr cur, next;
2059
2060
cur = ctxt->freeAttrs;
2061
while (cur != NULL) {
2062
next = cur->next;
2063
xmlFree(cur);
2064
cur = next;
2065
}
2066
}
2067
/*
2068
* cleanup the error strings
2069
*/
2070
if (ctxt->lastError.message != NULL)
2071
xmlFree(ctxt->lastError.message);
2072
if (ctxt->lastError.file != NULL)
2073
xmlFree(ctxt->lastError.file);
2074
if (ctxt->lastError.str1 != NULL)
2075
xmlFree(ctxt->lastError.str1);
2076
if (ctxt->lastError.str2 != NULL)
2077
xmlFree(ctxt->lastError.str2);
2078
if (ctxt->lastError.str3 != NULL)
2079
xmlFree(ctxt->lastError.str3);
2080
2081
#ifdef LIBXML_CATALOG_ENABLED
2082
if (ctxt->catalogs != NULL)
2083
xmlCatalogFreeLocal(ctxt->catalogs);
2084
#endif
2085
xmlFree(ctxt);
2086
}
2087
2088
/**
2089
* xmlNewParserCtxt:
2090
*
2091
* Allocate and initialize a new parser context.
2092
*
2093
* Returns the xmlParserCtxtPtr or NULL
2094
*/
2095
2096
xmlParserCtxtPtr
2097
xmlNewParserCtxt(void)
2098
{
2099
return(xmlNewSAXParserCtxt(NULL, NULL));
2100
}
2101
2102
/**
2103
* xmlNewSAXParserCtxt:
2104
* @sax: SAX handler
2105
* @userData: user data
2106
*
2107
* Allocate and initialize a new SAX parser context. If userData is NULL,
2108
* the parser context will be passed as user data.
2109
*
2110
* Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
2111
*/
2112
2113
xmlParserCtxtPtr
2114
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
2115
{
2116
xmlParserCtxtPtr ctxt;
2117
2118
ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2119
if (ctxt == NULL) {
2120
xmlErrMemory(NULL, "cannot allocate parser context\n");
2121
return(NULL);
2122
}
2123
memset(ctxt, 0, sizeof(xmlParserCtxt));
2124
if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
2125
xmlFreeParserCtxt(ctxt);
2126
return(NULL);
2127
}
2128
return(ctxt);
2129
}
2130
2131
/************************************************************************
2132
* *
2133
* Handling of node information *
2134
* *
2135
************************************************************************/
2136
2137
/**
2138
* xmlClearParserCtxt:
2139
* @ctxt: an XML parser context
2140
*
2141
* Clear (release owned resources) and reinitialize a parser context
2142
*/
2143
2144
void
2145
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2146
{
2147
if (ctxt==NULL)
2148
return;
2149
xmlClearNodeInfoSeq(&ctxt->node_seq);
2150
xmlCtxtReset(ctxt);
2151
}
2152
2153
2154
/**
2155
* xmlParserFindNodeInfo:
2156
* @ctx: an XML parser context
2157
* @node: an XML node within the tree
2158
*
2159
* DEPRECATED: Don't use.
2160
*
2161
* Find the parser node info struct for a given node
2162
*
2163
* Returns an xmlParserNodeInfo block pointer or NULL
2164
*/
2165
const xmlParserNodeInfo *
2166
xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
2167
{
2168
unsigned long pos;
2169
2170
if ((ctx == NULL) || (node == NULL))
2171
return (NULL);
2172
/* Find position where node should be at */
2173
pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2174
if (pos < ctx->node_seq.length
2175
&& ctx->node_seq.buffer[pos].node == node)
2176
return &ctx->node_seq.buffer[pos];
2177
else
2178
return NULL;
2179
}
2180
2181
2182
/**
2183
* xmlInitNodeInfoSeq:
2184
* @seq: a node info sequence pointer
2185
*
2186
* DEPRECATED: Don't use.
2187
*
2188
* -- Initialize (set to initial state) node info sequence
2189
*/
2190
void
2191
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2192
{
2193
if (seq == NULL)
2194
return;
2195
seq->length = 0;
2196
seq->maximum = 0;
2197
seq->buffer = NULL;
2198
}
2199
2200
/**
2201
* xmlClearNodeInfoSeq:
2202
* @seq: a node info sequence pointer
2203
*
2204
* DEPRECATED: Don't use.
2205
*
2206
* -- Clear (release memory and reinitialize) node
2207
* info sequence
2208
*/
2209
void
2210
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2211
{
2212
if (seq == NULL)
2213
return;
2214
if (seq->buffer != NULL)
2215
xmlFree(seq->buffer);
2216
xmlInitNodeInfoSeq(seq);
2217
}
2218
2219
/**
2220
* xmlParserFindNodeInfoIndex:
2221
* @seq: a node info sequence pointer
2222
* @node: an XML node pointer
2223
*
2224
* DEPRECATED: Don't use.
2225
*
2226
* xmlParserFindNodeInfoIndex : Find the index that the info record for
2227
* the given node is or should be at in a sorted sequence
2228
*
2229
* Returns a long indicating the position of the record
2230
*/
2231
unsigned long
2232
xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2233
const xmlNodePtr node)
2234
{
2235
unsigned long upper, lower, middle;
2236
int found = 0;
2237
2238
if ((seq == NULL) || (node == NULL))
2239
return ((unsigned long) -1);
2240
2241
/* Do a binary search for the key */
2242
lower = 1;
2243
upper = seq->length;
2244
middle = 0;
2245
while (lower <= upper && !found) {
2246
middle = lower + (upper - lower) / 2;
2247
if (node == seq->buffer[middle - 1].node)
2248
found = 1;
2249
else if (node < seq->buffer[middle - 1].node)
2250
upper = middle - 1;
2251
else
2252
lower = middle + 1;
2253
}
2254
2255
/* Return position */
2256
if (middle == 0 || seq->buffer[middle - 1].node < node)
2257
return middle;
2258
else
2259
return middle - 1;
2260
}
2261
2262
2263
/**
2264
* xmlParserAddNodeInfo:
2265
* @ctxt: an XML parser context
2266
* @info: a node info sequence pointer
2267
*
2268
* DEPRECATED: Don't use.
2269
*
2270
* Insert node info record into the sorted sequence
2271
*/
2272
void
2273
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2274
const xmlParserNodeInfoPtr info)
2275
{
2276
unsigned long pos;
2277
2278
if ((ctxt == NULL) || (info == NULL)) return;
2279
2280
/* Find pos and check to see if node is already in the sequence */
2281
pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2282
info->node);
2283
2284
if ((pos < ctxt->node_seq.length) &&
2285
(ctxt->node_seq.buffer != NULL) &&
2286
(ctxt->node_seq.buffer[pos].node == info->node)) {
2287
ctxt->node_seq.buffer[pos] = *info;
2288
}
2289
2290
/* Otherwise, we need to add new node to buffer */
2291
else {
2292
if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2293
(ctxt->node_seq.buffer == NULL)) {
2294
xmlParserNodeInfo *tmp_buffer;
2295
unsigned int byte_size;
2296
2297
if (ctxt->node_seq.maximum == 0)
2298
ctxt->node_seq.maximum = 2;
2299
byte_size = (sizeof(*ctxt->node_seq.buffer) *
2300
(2 * ctxt->node_seq.maximum));
2301
2302
if (ctxt->node_seq.buffer == NULL)
2303
tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2304
else
2305
tmp_buffer =
2306
(xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2307
byte_size);
2308
2309
if (tmp_buffer == NULL) {
2310
xmlErrMemory(ctxt, "failed to allocate buffer\n");
2311
return;
2312
}
2313
ctxt->node_seq.buffer = tmp_buffer;
2314
ctxt->node_seq.maximum *= 2;
2315
}
2316
2317
/* If position is not at end, move elements out of the way */
2318
if (pos != ctxt->node_seq.length) {
2319
unsigned long i;
2320
2321
for (i = ctxt->node_seq.length; i > pos; i--)
2322
ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2323
}
2324
2325
/* Copy element and increase length */
2326
ctxt->node_seq.buffer[pos] = *info;
2327
ctxt->node_seq.length++;
2328
}
2329
}
2330
2331
/************************************************************************
2332
* *
2333
* Defaults settings *
2334
* *
2335
************************************************************************/
2336
/**
2337
* xmlPedanticParserDefault:
2338
* @val: int 0 or 1
2339
*
2340
* DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2341
*
2342
* Set and return the previous value for enabling pedantic warnings.
2343
*
2344
* Returns the last value for 0 for no substitution, 1 for substitution.
2345
*/
2346
2347
int
2348
xmlPedanticParserDefault(int val) {
2349
int old = xmlPedanticParserDefaultValue;
2350
2351
xmlPedanticParserDefaultValue = val;
2352
return(old);
2353
}
2354
2355
/**
2356
* xmlLineNumbersDefault:
2357
* @val: int 0 or 1
2358
*
2359
* DEPRECATED: The modern options API always enables line numbers.
2360
*
2361
* Set and return the previous value for enabling line numbers in elements
2362
* contents. This may break on old application and is turned off by default.
2363
*
2364
* Returns the last value for 0 for no substitution, 1 for substitution.
2365
*/
2366
2367
int
2368
xmlLineNumbersDefault(int val) {
2369
int old = xmlLineNumbersDefaultValue;
2370
2371
xmlLineNumbersDefaultValue = val;
2372
return(old);
2373
}
2374
2375
/**
2376
* xmlSubstituteEntitiesDefault:
2377
* @val: int 0 or 1
2378
*
2379
* DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2380
*
2381
* Set and return the previous value for default entity support.
2382
* Initially the parser always keep entity references instead of substituting
2383
* entity values in the output. This function has to be used to change the
2384
* default parser behavior
2385
* SAX::substituteEntities() has to be used for changing that on a file by
2386
* file basis.
2387
*
2388
* Returns the last value for 0 for no substitution, 1 for substitution.
2389
*/
2390
2391
int
2392
xmlSubstituteEntitiesDefault(int val) {
2393
int old = xmlSubstituteEntitiesDefaultValue;
2394
2395
xmlSubstituteEntitiesDefaultValue = val;
2396
return(old);
2397
}
2398
2399
/**
2400
* xmlKeepBlanksDefault:
2401
* @val: int 0 or 1
2402
*
2403
* DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2404
*
2405
* Set and return the previous value for default blanks text nodes support.
2406
* The 1.x version of the parser used an heuristic to try to detect
2407
* ignorable white spaces. As a result the SAX callback was generating
2408
* xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2409
* using the DOM output text nodes containing those blanks were not generated.
2410
* The 2.x and later version will switch to the XML standard way and
2411
* ignorableWhitespace() are only generated when running the parser in
2412
* validating mode and when the current element doesn't allow CDATA or
2413
* mixed content.
2414
* This function is provided as a way to force the standard behavior
2415
* on 1.X libs and to switch back to the old mode for compatibility when
2416
* running 1.X client code on 2.X . Upgrade of 1.X code should be done
2417
* by using xmlIsBlankNode() commodity function to detect the "empty"
2418
* nodes generated.
2419
* This value also affect autogeneration of indentation when saving code
2420
* if blanks sections are kept, indentation is not generated.
2421
*
2422
* Returns the last value for 0 for no substitution, 1 for substitution.
2423
*/
2424
2425
int
2426
xmlKeepBlanksDefault(int val) {
2427
int old = xmlKeepBlanksDefaultValue;
2428
2429
xmlKeepBlanksDefaultValue = val;
2430
#ifdef LIBXML_OUTPUT_ENABLED
2431
if (!val)
2432
xmlIndentTreeOutput = 1;
2433
#endif
2434
return(old);
2435
}
2436
2437