CoCalc -- parserInternals.c

GitHub Repository: wine-mirror/wine
Path: blob/master/libs/xml2/parserInternals.c
⁴³⁸⁹ views
1
/*
2
 * parserInternals.c : Internal routines (and obsolete ones) needed for the
3
 *                     XML and HTML parsers.
4
 *
5
 * See Copyright for the status of this software.
6
 *
7
 * [email protected]
8
 */
9

10
#define IN_LIBXML
11
#include "libxml.h"
12

13
#if defined(_WIN32)
14
#define XML_DIR_SEP '\\'
15
#else
16
#define XML_DIR_SEP '/'
17
#endif
18

19
#include <string.h>
20
#include <ctype.h>
21
#include <stdlib.h>
22

23
#include <libxml/xmlmemory.h>
24
#include <libxml/tree.h>
25
#include <libxml/parser.h>
26
#include <libxml/parserInternals.h>
27
#include <libxml/entities.h>
28
#include <libxml/xmlerror.h>
29
#include <libxml/encoding.h>
30
#include <libxml/xmlIO.h>
31
#include <libxml/uri.h>
32
#include <libxml/dict.h>
33
#include <libxml/xmlsave.h>
34
#ifdef LIBXML_CATALOG_ENABLED
35
#include <libxml/catalog.h>
36
#endif
37
#include <libxml/chvalid.h>
38

39
#define CUR(ctxt) ctxt->input->cur
40
#define END(ctxt) ctxt->input->end
41

42
#include "private/buf.h"
43
#include "private/enc.h"
44
#include "private/error.h"
45
#include "private/io.h"
46
#include "private/parser.h"
47

48
/*
49
 * XML_MAX_AMPLIFICATION_DEFAULT is the default maximum allowed amplification
50
 * factor of serialized output after entity expansion.
51
 */
52
#define XML_MAX_AMPLIFICATION_DEFAULT 5
53

54
/*
55
 * Various global defaults for parsing
56
 */
57

58
/**
59
 * xmlCheckVersion:
60
 * @version: the include version number
61
 *
62
 * check the compiled lib version against the include one.
63
 * This can warn or immediately kill the application
64
 */
65
void
66
xmlCheckVersion(int version) {
67
    int myversion = LIBXML_VERSION;
68

69
    xmlInitParser();
70

71
    if ((myversion / 10000) != (version / 10000)) {
72
	xmlGenericError(xmlGenericErrorContext,
73
		"Fatal: program compiled against libxml %d using libxml %d\n",
74
		(version / 10000), (myversion / 10000));
75
	fprintf(stderr,
76
		"Fatal: program compiled against libxml %d using libxml %d\n",
77
		(version / 10000), (myversion / 10000));
78
    }
79
    if ((myversion / 100) < (version / 100)) {
80
	xmlGenericError(xmlGenericErrorContext,
81
		"Warning: program compiled against libxml %d using older %d\n",
82
		(version / 100), (myversion / 100));
83
    }
84
}
85

86

87
/************************************************************************
88
 *									*
89
 *		Some factorized error routines				*
90
 *									*
91
 ************************************************************************/
92

93

94
/**
95
 * xmlErrMemory:
96
 * @ctxt:  an XML parser context
97
 * @extra:  extra information
98
 *
99
 * Handle a redefinition of attribute error
100
 */
101
void
102
xmlErrMemory(xmlParserCtxtPtr ctxt, const char *extra)
103
{
104
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
105
        (ctxt->instate == XML_PARSER_EOF))
106
	return;
107
    if (ctxt != NULL) {
108
        ctxt->errNo = XML_ERR_NO_MEMORY;
109
        ctxt->instate = XML_PARSER_EOF;
110
        ctxt->disableSAX = 1;
111
    }
112
    if (extra)
113
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
114
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, extra,
115
                        NULL, NULL, 0, 0,
116
                        "Memory allocation failed : %s\n", extra);
117
    else
118
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER,
119
                        XML_ERR_NO_MEMORY, XML_ERR_FATAL, NULL, 0, NULL,
120
                        NULL, NULL, 0, 0, "Memory allocation failed\n");
121
}
122

123
/**
124
 * __xmlErrEncoding:
125
 * @ctxt:  an XML parser context
126
 * @xmlerr:  the error number
127
 * @msg:  the error message
128
 * @str1:  an string info
129
 * @str2:  an string info
130
 *
131
 * Handle an encoding error
132
 */
133
void
134
__xmlErrEncoding(xmlParserCtxtPtr ctxt, xmlParserErrors xmlerr,
135
                 const char *msg, const xmlChar * str1, const xmlChar * str2)
136
{
137
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
138
        (ctxt->instate == XML_PARSER_EOF))
139
	return;
140
    if (ctxt != NULL)
141
        ctxt->errNo = xmlerr;
142
    __xmlRaiseError(NULL, NULL, NULL,
143
                    ctxt, NULL, XML_FROM_PARSER, xmlerr, XML_ERR_FATAL,
144
                    NULL, 0, (const char *) str1, (const char *) str2,
145
                    NULL, 0, 0, msg, str1, str2);
146
    if (ctxt != NULL) {
147
        ctxt->wellFormed = 0;
148
        if (ctxt->recovery == 0)
149
            ctxt->disableSAX = 1;
150
    }
151
}
152

153
/**
154
 * xmlErrInternal:
155
 * @ctxt:  an XML parser context
156
 * @msg:  the error message
157
 * @str:  error information
158
 *
159
 * Handle an internal error
160
 */
161
static void LIBXML_ATTR_FORMAT(2,0)
162
xmlErrInternal(xmlParserCtxtPtr ctxt, const char *msg, const xmlChar * str)
163
{
164
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
165
        (ctxt->instate == XML_PARSER_EOF))
166
	return;
167
    if (ctxt != NULL)
168
        ctxt->errNo = XML_ERR_INTERNAL_ERROR;
169
    __xmlRaiseError(NULL, NULL, NULL,
170
                    ctxt, NULL, XML_FROM_PARSER, XML_ERR_INTERNAL_ERROR,
171
                    XML_ERR_FATAL, NULL, 0, (const char *) str, NULL, NULL,
172
                    0, 0, msg, str);
173
    if (ctxt != NULL) {
174
        ctxt->wellFormed = 0;
175
        if (ctxt->recovery == 0)
176
            ctxt->disableSAX = 1;
177
    }
178
}
179

180
/**
181
 * xmlFatalErr:
182
 * @ctxt:  an XML parser context
183
 * @error:  the error number
184
 * @info:  extra information string
185
 *
186
 * Handle a fatal parser error, i.e. violating Well-Formedness constraints
187
 */
188
void
189
xmlFatalErr(xmlParserCtxtPtr ctxt, xmlParserErrors error, const char *info)
190
{
191
    const char *errmsg;
192

193
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
194
        (ctxt->instate == XML_PARSER_EOF))
195
	return;
196
    switch (error) {
197
        case XML_ERR_INVALID_HEX_CHARREF:
198
            errmsg = "CharRef: invalid hexadecimal value";
199
            break;
200
        case XML_ERR_INVALID_DEC_CHARREF:
201
            errmsg = "CharRef: invalid decimal value";
202
            break;
203
        case XML_ERR_INVALID_CHARREF:
204
            errmsg = "CharRef: invalid value";
205
            break;
206
        case XML_ERR_INTERNAL_ERROR:
207
            errmsg = "internal error";
208
            break;
209
        case XML_ERR_PEREF_AT_EOF:
210
            errmsg = "PEReference at end of document";
211
            break;
212
        case XML_ERR_PEREF_IN_PROLOG:
213
            errmsg = "PEReference in prolog";
214
            break;
215
        case XML_ERR_PEREF_IN_EPILOG:
216
            errmsg = "PEReference in epilog";
217
            break;
218
        case XML_ERR_PEREF_NO_NAME:
219
            errmsg = "PEReference: no name";
220
            break;
221
        case XML_ERR_PEREF_SEMICOL_MISSING:
222
            errmsg = "PEReference: expecting ';'";
223
            break;
224
        case XML_ERR_ENTITY_LOOP:
225
            errmsg = "Detected an entity reference loop";
226
            break;
227
        case XML_ERR_ENTITY_NOT_STARTED:
228
            errmsg = "EntityValue: \" or ' expected";
229
            break;
230
        case XML_ERR_ENTITY_PE_INTERNAL:
231
            errmsg = "PEReferences forbidden in internal subset";
232
            break;
233
        case XML_ERR_ENTITY_NOT_FINISHED:
234
            errmsg = "EntityValue: \" or ' expected";
235
            break;
236
        case XML_ERR_ATTRIBUTE_NOT_STARTED:
237
            errmsg = "AttValue: \" or ' expected";
238
            break;
239
        case XML_ERR_LT_IN_ATTRIBUTE:
240
            errmsg = "Unescaped '<' not allowed in attributes values";
241
            break;
242
        case XML_ERR_LITERAL_NOT_STARTED:
243
            errmsg = "SystemLiteral \" or ' expected";
244
            break;
245
        case XML_ERR_LITERAL_NOT_FINISHED:
246
            errmsg = "Unfinished System or Public ID \" or ' expected";
247
            break;
248
        case XML_ERR_MISPLACED_CDATA_END:
249
            errmsg = "Sequence ']]>' not allowed in content";
250
            break;
251
        case XML_ERR_URI_REQUIRED:
252
            errmsg = "SYSTEM or PUBLIC, the URI is missing";
253
            break;
254
        case XML_ERR_PUBID_REQUIRED:
255
            errmsg = "PUBLIC, the Public Identifier is missing";
256
            break;
257
        case XML_ERR_HYPHEN_IN_COMMENT:
258
            errmsg = "Comment must not contain '--' (double-hyphen)";
259
            break;
260
        case XML_ERR_PI_NOT_STARTED:
261
            errmsg = "xmlParsePI : no target name";
262
            break;
263
        case XML_ERR_RESERVED_XML_NAME:
264
            errmsg = "Invalid PI name";
265
            break;
266
        case XML_ERR_NOTATION_NOT_STARTED:
267
            errmsg = "NOTATION: Name expected here";
268
            break;
269
        case XML_ERR_NOTATION_NOT_FINISHED:
270
            errmsg = "'>' required to close NOTATION declaration";
271
            break;
272
        case XML_ERR_VALUE_REQUIRED:
273
            errmsg = "Entity value required";
274
            break;
275
        case XML_ERR_URI_FRAGMENT:
276
            errmsg = "Fragment not allowed";
277
            break;
278
        case XML_ERR_ATTLIST_NOT_STARTED:
279
            errmsg = "'(' required to start ATTLIST enumeration";
280
            break;
281
        case XML_ERR_NMTOKEN_REQUIRED:
282
            errmsg = "NmToken expected in ATTLIST enumeration";
283
            break;
284
        case XML_ERR_ATTLIST_NOT_FINISHED:
285
            errmsg = "')' required to finish ATTLIST enumeration";
286
            break;
287
        case XML_ERR_MIXED_NOT_STARTED:
288
            errmsg = "MixedContentDecl : '|' or ')*' expected";
289
            break;
290
        case XML_ERR_PCDATA_REQUIRED:
291
            errmsg = "MixedContentDecl : '#PCDATA' expected";
292
            break;
293
        case XML_ERR_ELEMCONTENT_NOT_STARTED:
294
            errmsg = "ContentDecl : Name or '(' expected";
295
            break;
296
        case XML_ERR_ELEMCONTENT_NOT_FINISHED:
297
            errmsg = "ContentDecl : ',' '|' or ')' expected";
298
            break;
299
        case XML_ERR_PEREF_IN_INT_SUBSET:
300
            errmsg =
301
                "PEReference: forbidden within markup decl in internal subset";
302
            break;
303
        case XML_ERR_GT_REQUIRED:
304
            errmsg = "expected '>'";
305
            break;
306
        case XML_ERR_CONDSEC_INVALID:
307
            errmsg = "XML conditional section '[' expected";
308
            break;
309
        case XML_ERR_EXT_SUBSET_NOT_FINISHED:
310
            errmsg = "Content error in the external subset";
311
            break;
312
        case XML_ERR_CONDSEC_INVALID_KEYWORD:
313
            errmsg =
314
                "conditional section INCLUDE or IGNORE keyword expected";
315
            break;
316
        case XML_ERR_CONDSEC_NOT_FINISHED:
317
            errmsg = "XML conditional section not closed";
318
            break;
319
        case XML_ERR_XMLDECL_NOT_STARTED:
320
            errmsg = "Text declaration '<?xml' required";
321
            break;
322
        case XML_ERR_XMLDECL_NOT_FINISHED:
323
            errmsg = "parsing XML declaration: '?>' expected";
324
            break;
325
        case XML_ERR_EXT_ENTITY_STANDALONE:
326
            errmsg = "external parsed entities cannot be standalone";
327
            break;
328
        case XML_ERR_ENTITYREF_SEMICOL_MISSING:
329
            errmsg = "EntityRef: expecting ';'";
330
            break;
331
        case XML_ERR_DOCTYPE_NOT_FINISHED:
332
            errmsg = "DOCTYPE improperly terminated";
333
            break;
334
        case XML_ERR_LTSLASH_REQUIRED:
335
            errmsg = "EndTag: '</' not found";
336
            break;
337
        case XML_ERR_EQUAL_REQUIRED:
338
            errmsg = "expected '='";
339
            break;
340
        case XML_ERR_STRING_NOT_CLOSED:
341
            errmsg = "String not closed expecting \" or '";
342
            break;
343
        case XML_ERR_STRING_NOT_STARTED:
344
            errmsg = "String not started expecting ' or \"";
345
            break;
346
        case XML_ERR_ENCODING_NAME:
347
            errmsg = "Invalid XML encoding name";
348
            break;
349
        case XML_ERR_STANDALONE_VALUE:
350
            errmsg = "standalone accepts only 'yes' or 'no'";
351
            break;
352
        case XML_ERR_DOCUMENT_EMPTY:
353
            errmsg = "Document is empty";
354
            break;
355
        case XML_ERR_DOCUMENT_END:
356
            errmsg = "Extra content at the end of the document";
357
            break;
358
        case XML_ERR_NOT_WELL_BALANCED:
359
            errmsg = "chunk is not well balanced";
360
            break;
361
        case XML_ERR_EXTRA_CONTENT:
362
            errmsg = "extra content at the end of well balanced chunk";
363
            break;
364
        case XML_ERR_VERSION_MISSING:
365
            errmsg = "Malformed declaration expecting version";
366
            break;
367
        case XML_ERR_NAME_TOO_LONG:
368
            errmsg = "Name too long";
369
            break;
370
        case XML_ERR_INVALID_ENCODING:
371
            errmsg = "Invalid bytes in character encoding";
372
            break;
373
        case XML_IO_UNKNOWN:
374
            errmsg = "I/O error";
375
            break;
376
#if 0
377
        case:
378
            errmsg = "";
379
            break;
380
#endif
381
        default:
382
            errmsg = "Unregistered error message";
383
    }
384
    if (ctxt != NULL)
385
	ctxt->errNo = error;
386
    if (info == NULL) {
387
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
388
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s\n",
389
                        errmsg);
390
    } else {
391
        __xmlRaiseError(NULL, NULL, NULL, ctxt, NULL, XML_FROM_PARSER, error,
392
                        XML_ERR_FATAL, NULL, 0, info, NULL, NULL, 0, 0, "%s: %s\n",
393
                        errmsg, info);
394
    }
395
    if (ctxt != NULL) {
396
	ctxt->wellFormed = 0;
397
	if (ctxt->recovery == 0)
398
	    ctxt->disableSAX = 1;
399
    }
400
}
401

402
/**
403
 * xmlErrEncodingInt:
404
 * @ctxt:  an XML parser context
405
 * @error:  the error number
406
 * @msg:  the error message
407
 * @val:  an integer value
408
 *
409
 * n encoding error
410
 */
411
static void LIBXML_ATTR_FORMAT(3,0)
412
xmlErrEncodingInt(xmlParserCtxtPtr ctxt, xmlParserErrors error,
413
                  const char *msg, int val)
414
{
415
    if ((ctxt != NULL) && (ctxt->disableSAX != 0) &&
416
        (ctxt->instate == XML_PARSER_EOF))
417
	return;
418
    if (ctxt != NULL)
419
        ctxt->errNo = error;
420
    __xmlRaiseError(NULL, NULL, NULL,
421
                    ctxt, NULL, XML_FROM_PARSER, error, XML_ERR_FATAL,
422
                    NULL, 0, NULL, NULL, NULL, val, 0, msg, val);
423
    if (ctxt != NULL) {
424
        ctxt->wellFormed = 0;
425
        if (ctxt->recovery == 0)
426
            ctxt->disableSAX = 1;
427
    }
428
}
429

430
/**
431
 * xmlIsLetter:
432
 * @c:  an unicode character (int)
433
 *
434
 * Check whether the character is allowed by the production
435
 * [84] Letter ::= BaseChar | Ideographic
436
 *
437
 * Returns 0 if not, non-zero otherwise
438
 */
439
int
440
xmlIsLetter(int c) {
441
    return(IS_BASECHAR(c) || IS_IDEOGRAPHIC(c));
442
}
443

444
/************************************************************************
445
 *									*
446
 *		Input handling functions for progressive parsing	*
447
 *									*
448
 ************************************************************************/
449

450
/* we need to keep enough input to show errors in context */
451
#define LINE_LEN        80
452

453
/**
454
 * xmlHaltParser:
455
 * @ctxt:  an XML parser context
456
 *
457
 * Blocks further parser processing don't override error
458
 * for internal use
459
 */
460
void
461
xmlHaltParser(xmlParserCtxtPtr ctxt) {
462
    if (ctxt == NULL)
463
        return;
464
    ctxt->instate = XML_PARSER_EOF;
465
    ctxt->disableSAX = 1;
466
    while (ctxt->inputNr > 1)
467
        xmlFreeInputStream(inputPop(ctxt));
468
    if (ctxt->input != NULL) {
469
        /*
470
	 * in case there was a specific allocation deallocate before
471
	 * overriding base
472
	 */
473
        if (ctxt->input->free != NULL) {
474
	    ctxt->input->free((xmlChar *) ctxt->input->base);
475
	    ctxt->input->free = NULL;
476
	}
477
        if (ctxt->input->buf != NULL) {
478
            xmlFreeParserInputBuffer(ctxt->input->buf);
479
            ctxt->input->buf = NULL;
480
        }
481
	ctxt->input->cur = BAD_CAST"";
482
        ctxt->input->length = 0;
483
	ctxt->input->base = ctxt->input->cur;
484
        ctxt->input->end = ctxt->input->cur;
485
    }
486
}
487

488
/**
489
 * xmlParserInputRead:
490
 * @in:  an XML parser input
491
 * @len:  an indicative size for the lookahead
492
 *
493
 * DEPRECATED: This function was internal and is deprecated.
494
 *
495
 * Returns -1 as this is an error to use it.
496
 */
497
int
498
xmlParserInputRead(xmlParserInputPtr in ATTRIBUTE_UNUSED, int len ATTRIBUTE_UNUSED) {
499
    return(-1);
500
}
501

502
/**
503
 * xmlParserGrow:
504
 * @ctxt:  an XML parser context
505
 *
506
 * Grow the input buffer.
507
 *
508
 * Returns the number of bytes read or -1 in case of error.
509
 */
510
int
511
xmlParserGrow(xmlParserCtxtPtr ctxt) {
512
    xmlParserInputPtr in = ctxt->input;
513
    xmlParserInputBufferPtr buf = in->buf;
514
    ptrdiff_t curEnd = in->end - in->cur;
515
    ptrdiff_t curBase = in->cur - in->base;
516
    int ret;
517

518
    if (buf == NULL)
519
        return(0);
520
    /* Don't grow push parser buffer. */
521
    if ((ctxt->progressive) && (ctxt->inputNr <= 1))
522
        return(0);
523
    /* Don't grow memory buffers. */
524
    if ((buf->encoder == NULL) && (buf->readcallback == NULL))
525
        return(0);
526
    if (buf->error != 0)
527
        return(-1);
528

529
    if (((curEnd > XML_MAX_LOOKUP_LIMIT) ||
530
         (curBase > XML_MAX_LOOKUP_LIMIT)) &&
531
        ((ctxt->options & XML_PARSE_HUGE) == 0)) {
532
        xmlErrMemory(ctxt, "Huge input lookup");
533
        xmlHaltParser(ctxt);
534
	return(-1);
535
    }
536

537
    if (curEnd >= INPUT_CHUNK)
538
        return(0);
539

540
    ret = xmlParserInputBufferGrow(buf, INPUT_CHUNK);
541
    xmlBufUpdateInput(buf->buffer, in, curBase);
542

543
    if (ret < 0) {
544
        xmlFatalErr(ctxt, buf->error, NULL);
545
        /* Buffer contents may be lost in case of memory errors. */
546
        if (buf->error == XML_ERR_NO_MEMORY)
547
            xmlHaltParser(ctxt);
548
    }
549

550
    return(ret);
551
}
552

553
/**
554
 * xmlParserInputGrow:
555
 * @in:  an XML parser input
556
 * @len:  an indicative size for the lookahead
557
 *
558
 * DEPRECATED: Don't use.
559
 *
560
 * This function increase the input for the parser. It tries to
561
 * preserve pointers to the input buffer, and keep already read data
562
 *
563
 * Returns the amount of char read, or -1 in case of error, 0 indicate the
564
 * end of this entity
565
 */
566
int
567
xmlParserInputGrow(xmlParserInputPtr in, int len) {
568
    int ret;
569
    size_t indx;
570

571
    if ((in == NULL) || (len < 0)) return(-1);
572
    if (in->buf == NULL) return(-1);
573
    if (in->base == NULL) return(-1);
574
    if (in->cur == NULL) return(-1);
575
    if (in->buf->buffer == NULL) return(-1);
576

577
    /* Don't grow memory buffers. */
578
    if ((in->buf->encoder == NULL) && (in->buf->readcallback == NULL))
579
        return(0);
580

581
    indx = in->cur - in->base;
582
    if (xmlBufUse(in->buf->buffer) > (unsigned int) indx + INPUT_CHUNK) {
583
        return(0);
584
    }
585
    ret = xmlParserInputBufferGrow(in->buf, len);
586

587
    in->base = xmlBufContent(in->buf->buffer);
588
    if (in->base == NULL) {
589
        in->base = BAD_CAST "";
590
        in->cur = in->base;
591
        in->end = in->base;
592
        return(-1);
593
    }
594
    in->cur = in->base + indx;
595
    in->end = xmlBufEnd(in->buf->buffer);
596

597
    return(ret);
598
}
599

600
/**
601
 * xmlParserShrink:
602
 * @ctxt:  an XML parser context
603
 *
604
 * Shrink the input buffer.
605
 */
606
void
607
xmlParserShrink(xmlParserCtxtPtr ctxt) {
608
    xmlParserInputPtr in = ctxt->input;
609
    xmlParserInputBufferPtr buf = in->buf;
610
    size_t used;
611

612
    if (buf == NULL)
613
        return;
614
    /* Don't shrink pull parser memory buffers. */
615
    if (((ctxt->progressive == 0) || (ctxt->inputNr > 1)) &&
616
        (buf->encoder == NULL) &&
617
        (buf->readcallback == NULL))
618
        return;
619

620
    used = in->cur - in->base;
621
    /*
622
     * Do not shrink on large buffers whose only a tiny fraction
623
     * was consumed
624
     */
625
    if (used > INPUT_CHUNK) {
626
	size_t res = xmlBufShrink(buf->buffer, used - LINE_LEN);
627

628
	if (res > 0) {
629
            used -= res;
630
            if ((res > ULONG_MAX) ||
631
                (in->consumed > ULONG_MAX - (unsigned long)res))
632
                in->consumed = ULONG_MAX;
633
            else
634
                in->consumed += res;
635
	}
636
    }
637

638
    xmlBufUpdateInput(buf->buffer, in, used);
639
}
640

641
/**
642
 * xmlParserInputShrink:
643
 * @in:  an XML parser input
644
 *
645
 * DEPRECATED: Don't use.
646
 *
647
 * This function removes used input for the parser.
648
 */
649
void
650
xmlParserInputShrink(xmlParserInputPtr in) {
651
    size_t used;
652
    size_t ret;
653

654
    if (in == NULL) return;
655
    if (in->buf == NULL) return;
656
    if (in->base == NULL) return;
657
    if (in->cur == NULL) return;
658
    if (in->buf->buffer == NULL) return;
659

660
    used = in->cur - in->base;
661
    /*
662
     * Do not shrink on large buffers whose only a tiny fraction
663
     * was consumed
664
     */
665
    if (used > INPUT_CHUNK) {
666
	ret = xmlBufShrink(in->buf->buffer, used - LINE_LEN);
667
	if (ret > 0) {
668
            used -= ret;
669
            if ((ret > ULONG_MAX) ||
670
                (in->consumed > ULONG_MAX - (unsigned long)ret))
671
                in->consumed = ULONG_MAX;
672
            else
673
                in->consumed += ret;
674
	}
675
    }
676

677
    if (xmlBufUse(in->buf->buffer) <= INPUT_CHUNK) {
678
        xmlParserInputBufferRead(in->buf, 2 * INPUT_CHUNK);
679
    }
680

681
    in->base = xmlBufContent(in->buf->buffer);
682
    if (in->base == NULL) {
683
        /* TODO: raise error */
684
        in->base = BAD_CAST "";
685
        in->cur = in->base;
686
        in->end = in->base;
687
        return;
688
    }
689
    in->cur = in->base + used;
690
    in->end = xmlBufEnd(in->buf->buffer);
691
}
692

693
/************************************************************************
694
 *									*
695
 *		UTF8 character input and related functions		*
696
 *									*
697
 ************************************************************************/
698

699
/**
700
 * xmlNextChar:
701
 * @ctxt:  the XML parser context
702
 *
703
 * DEPRECATED: Internal function, do not use.
704
 *
705
 * Skip to the next char input char.
706
 */
707

708
void
709
xmlNextChar(xmlParserCtxtPtr ctxt)
710
{
711
    const unsigned char *cur;
712
    size_t avail;
713
    int c;
714

715
    if ((ctxt == NULL) || (ctxt->instate == XML_PARSER_EOF) ||
716
        (ctxt->input == NULL))
717
        return;
718

719
    avail = ctxt->input->end - ctxt->input->cur;
720

721
    if (avail < INPUT_CHUNK) {
722
        xmlParserGrow(ctxt);
723
        if ((ctxt->instate == XML_PARSER_EOF) ||
724
            (ctxt->input->cur >= ctxt->input->end))
725
            return;
726
        avail = ctxt->input->end - ctxt->input->cur;
727
    }
728

729
    cur = ctxt->input->cur;
730
    c = *cur;
731

732
    if (c < 0x80) {
733
        if (c == '\n') {
734
            ctxt->input->cur++;
735
            ctxt->input->line++;
736
            ctxt->input->col = 1;
737
        } else if (c == '\r') {
738
            /*
739
             *   2.11 End-of-Line Handling
740
             *   the literal two-character sequence "#xD#xA" or a standalone
741
             *   literal #xD, an XML processor must pass to the application
742
             *   the single character #xA.
743
             */
744
            ctxt->input->cur += ((cur[1] == '\n') ? 2 : 1);
745
            ctxt->input->line++;
746
            ctxt->input->col = 1;
747
            return;
748
        } else {
749
            ctxt->input->cur++;
750
            ctxt->input->col++;
751
        }
752
    } else {
753
        ctxt->input->col++;
754

755
        if ((avail < 2) || (cur[1] & 0xc0) != 0x80)
756
            goto encoding_error;
757

758
        if (c < 0xe0) {
759
            /* 2-byte code */
760
            if (c < 0xc2)
761
                goto encoding_error;
762
            ctxt->input->cur += 2;
763
        } else {
764
            unsigned int val = (c << 8) | cur[1];
765

766
            if ((avail < 3) || (cur[2] & 0xc0) != 0x80)
767
                goto encoding_error;
768

769
            if (c < 0xf0) {
770
                /* 3-byte code */
771
                if ((val < 0xe0a0) || ((val >= 0xeda0) && (val < 0xee00)))
772
                    goto encoding_error;
773
                ctxt->input->cur += 3;
774
            } else {
775
                if ((avail < 4) || ((cur[3] & 0xc0) != 0x80))
776
                    goto encoding_error;
777

778
                /* 4-byte code */
779
                if ((val < 0xf090) || (val >= 0xf490))
780
                    goto encoding_error;
781
                ctxt->input->cur += 4;
782
            }
783
        }
784
    }
785

786
    return;
787

788
encoding_error:
789
    /* Only report the first error */
790
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
791
        if ((ctxt == NULL) || (ctxt->input == NULL) ||
792
            (ctxt->input->end - ctxt->input->cur < 4)) {
793
            __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
794
                         "Input is not proper UTF-8, indicate encoding !\n",
795
                         NULL, NULL);
796
        } else {
797
            char buffer[150];
798

799
            snprintf(buffer, 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
800
                            ctxt->input->cur[0], ctxt->input->cur[1],
801
                            ctxt->input->cur[2], ctxt->input->cur[3]);
802
            __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
803
                         "Input is not proper UTF-8, indicate encoding !\n%s",
804
                         BAD_CAST buffer, NULL);
805
        }
806
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
807
    }
808
    ctxt->input->cur++;
809
    return;
810
}
811

812
/**
813
 * xmlCurrentChar:
814
 * @ctxt:  the XML parser context
815
 * @len:  pointer to the length of the char read
816
 *
817
 * DEPRECATED: Internal function, do not use.
818
 *
819
 * The current char value, if using UTF-8 this may actually span multiple
820
 * bytes in the input buffer. Implement the end of line normalization:
821
 * 2.11 End-of-Line Handling
822
 * Wherever an external parsed entity or the literal entity value
823
 * of an internal parsed entity contains either the literal two-character
824
 * sequence "#xD#xA" or a standalone literal #xD, an XML processor
825
 * must pass to the application the single character #xA.
826
 * This behavior can conveniently be produced by normalizing all
827
 * line breaks to #xA on input, before parsing.)
828
 *
829
 * Returns the current char value and its length
830
 */
831

832
int
833
xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
834
    const unsigned char *cur;
835
    size_t avail;
836
    int c;
837

838
    if ((ctxt == NULL) || (len == NULL) || (ctxt->input == NULL)) return(0);
839
    if (ctxt->instate == XML_PARSER_EOF)
840
	return(0);
841

842
    avail = ctxt->input->end - ctxt->input->cur;
843

844
    if (avail < INPUT_CHUNK) {
845
        xmlParserGrow(ctxt);
846
        if (ctxt->instate == XML_PARSER_EOF)
847
            return(0);
848
        avail = ctxt->input->end - ctxt->input->cur;
849
    }
850

851
    cur = ctxt->input->cur;
852
    c = *cur;
853

854
    if (c < 0x80) {
855
	/* 1-byte code */
856
        if (c < 0x20) {
857
            /*
858
             *   2.11 End-of-Line Handling
859
             *   the literal two-character sequence "#xD#xA" or a standalone
860
             *   literal #xD, an XML processor must pass to the application
861
             *   the single character #xA.
862
             */
863
            if (c == '\r') {
864
                /*
865
                 * TODO: This function shouldn't change the 'cur' pointer
866
                 * as side effect, but the NEXTL macro in parser.c relies
867
                 * on this behavior when incrementing line numbers.
868
                 */
869
                if (cur[1] == '\n')
870
                    ctxt->input->cur++;
871
                *len = 1;
872
                c = '\n';
873
            } else if (c == 0) {
874
                if (ctxt->input->cur >= ctxt->input->end) {
875
                    *len = 0;
876
                } else {
877
                    *len = 1;
878
                    /*
879
                     * TODO: Null bytes should be handled by callers,
880
                     * but this can be tricky.
881
                     */
882
                    xmlErrEncodingInt(ctxt, XML_ERR_INVALID_CHAR,
883
                            "Char 0x0 out of allowed range\n", c);
884
                }
885
            } else {
886
                *len = 1;
887
            }
888
        } else {
889
            *len = 1;
890
        }
891

892
        return(c);
893
    } else {
894
        int val;
895

896
        if (avail < 2)
897
            goto incomplete_sequence;
898
        if ((cur[1] & 0xc0) != 0x80)
899
            goto encoding_error;
900

901
        if (c < 0xe0) {
902
            /* 2-byte code */
903
            if (c < 0xc2)
904
                goto encoding_error;
905
            val = (c & 0x1f) << 6;
906
            val |= cur[1] & 0x3f;
907
            *len = 2;
908
        } else {
909
            if (avail < 3)
910
                goto incomplete_sequence;
911
            if ((cur[2] & 0xc0) != 0x80)
912
                goto encoding_error;
913

914
            if (c < 0xf0) {
915
                /* 3-byte code */
916
                val = (c & 0xf) << 12;
917
                val |= (cur[1] & 0x3f) << 6;
918
                val |= cur[2] & 0x3f;
919
                if ((val < 0x800) || ((val >= 0xd800) && (val < 0xe000)))
920
                    goto encoding_error;
921
                *len = 3;
922
            } else {
923
                if (avail < 4)
924
                    goto incomplete_sequence;
925
                if ((cur[3] & 0xc0) != 0x80)
926
                    goto encoding_error;
927

928
                /* 4-byte code */
929
                val = (c & 0x0f) << 18;
930
                val |= (cur[1] & 0x3f) << 12;
931
                val |= (cur[2] & 0x3f) << 6;
932
                val |= cur[3] & 0x3f;
933
                if ((val < 0x10000) || (val >= 0x110000))
934
                    goto encoding_error;
935
                *len = 4;
936
            }
937
        }
938

939
        return(val);
940
    }
941

942
encoding_error:
943
    /* Only report the first error */
944
    if ((ctxt->input->flags & XML_INPUT_ENCODING_ERROR) == 0) {
945
        if (ctxt->input->end - ctxt->input->cur < 4) {
946
            __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
947
                         "Input is not proper UTF-8, indicate encoding !\n",
948
                         NULL, NULL);
949
        } else {
950
            char buffer[150];
951

952
            snprintf(&buffer[0], 149, "Bytes: 0x%02X 0x%02X 0x%02X 0x%02X\n",
953
                            ctxt->input->cur[0], ctxt->input->cur[1],
954
                            ctxt->input->cur[2], ctxt->input->cur[3]);
955
            __xmlErrEncoding(ctxt, XML_ERR_INVALID_CHAR,
956
                         "Input is not proper UTF-8, indicate encoding !\n%s",
957
                         BAD_CAST buffer, NULL);
958
        }
959
        ctxt->input->flags |= XML_INPUT_ENCODING_ERROR;
960
    }
961
    *len = 1;
962
    return(0xFFFD); /* U+FFFD Replacement Character */
963

964
incomplete_sequence:
965
    /*
966
     * An encoding problem may arise from a truncated input buffer
967
     * splitting a character in the middle. In that case do not raise
968
     * an error but return 0. This should only happen when push parsing
969
     * char data.
970
     */
971
    *len = 0;
972
    return(0);
973
}
974

975
/**
976
 * xmlStringCurrentChar:
977
 * @ctxt:  the XML parser context
978
 * @cur:  pointer to the beginning of the char
979
 * @len:  pointer to the length of the char read
980
 *
981
 * DEPRECATED: Internal function, do not use.
982
 *
983
 * The current char value, if using UTF-8 this may actually span multiple
984
 * bytes in the input buffer.
985
 *
986
 * Returns the current char value and its length
987
 */
988

989
int
990
xmlStringCurrentChar(xmlParserCtxtPtr ctxt ATTRIBUTE_UNUSED,
991
                     const xmlChar *cur, int *len) {
992
    int c;
993

994
    if ((cur == NULL) || (len == NULL))
995
        return(0);
996

997
    /* cur is zero-terminated, so we can lie about its length. */
998
    *len = 4;
999
    c = xmlGetUTF8Char(cur, len);
1000

1001
    return((c < 0) ? 0 : c);
1002
}
1003

1004
/**
1005
 * xmlCopyCharMultiByte:
1006
 * @out:  pointer to an array of xmlChar
1007
 * @val:  the char value
1008
 *
1009
 * append the char value in the array
1010
 *
1011
 * Returns the number of xmlChar written
1012
 */
1013
int
1014
xmlCopyCharMultiByte(xmlChar *out, int val) {
1015
    if ((out == NULL) || (val < 0)) return(0);
1016
    /*
1017
     * We are supposed to handle UTF8, check it's valid
1018
     * From rfc2044: encoding of the Unicode values on UTF-8:
1019
     *
1020
     * UCS-4 range (hex.)           UTF-8 octet sequence (binary)
1021
     * 0000 0000-0000 007F   0xxxxxxx
1022
     * 0000 0080-0000 07FF   110xxxxx 10xxxxxx
1023
     * 0000 0800-0000 FFFF   1110xxxx 10xxxxxx 10xxxxxx
1024
     */
1025
    if  (val >= 0x80) {
1026
	xmlChar *savedout = out;
1027
	int bits;
1028
	if (val <   0x800) { *out++= (val >>  6) | 0xC0;  bits=  0; }
1029
	else if (val < 0x10000) { *out++= (val >> 12) | 0xE0;  bits=  6;}
1030
	else if (val < 0x110000)  { *out++= (val >> 18) | 0xF0;  bits=  12; }
1031
	else {
1032
	    xmlErrEncodingInt(NULL, XML_ERR_INVALID_CHAR,
1033
		    "Internal error, xmlCopyCharMultiByte 0x%X out of bound\n",
1034
			      val);
1035
	    return(0);
1036
	}
1037
	for ( ; bits >= 0; bits-= 6)
1038
	    *out++= ((val >> bits) & 0x3F) | 0x80 ;
1039
	return (out - savedout);
1040
    }
1041
    *out = val;
1042
    return 1;
1043
}
1044

1045
/**
1046
 * xmlCopyChar:
1047
 * @len:  Ignored, compatibility
1048
 * @out:  pointer to an array of xmlChar
1049
 * @val:  the char value
1050
 *
1051
 * append the char value in the array
1052
 *
1053
 * Returns the number of xmlChar written
1054
 */
1055

1056
int
1057
xmlCopyChar(int len ATTRIBUTE_UNUSED, xmlChar *out, int val) {
1058
    if ((out == NULL) || (val < 0)) return(0);
1059
    /* the len parameter is ignored */
1060
    if  (val >= 0x80) {
1061
	return(xmlCopyCharMultiByte (out, val));
1062
    }
1063
    *out = val;
1064
    return 1;
1065
}
1066

1067
/************************************************************************
1068
 *									*
1069
 *		Commodity functions to switch encodings			*
1070
 *									*
1071
 ************************************************************************/
1072

1073
static xmlCharEncodingHandlerPtr
1074
xmlDetectEBCDIC(xmlParserInputPtr input) {
1075
    xmlChar out[200];
1076
    xmlCharEncodingHandlerPtr handler;
1077
    int inlen, outlen, res, i;
1078

1079
    /*
1080
     * To detect the EBCDIC code page, we convert the first 200 bytes
1081
     * to EBCDIC-US and try to find the encoding declaration.
1082
     */
1083
    handler = xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC);
1084
    if (handler == NULL)
1085
        return(NULL);
1086
    outlen = sizeof(out) - 1;
1087
    inlen = input->end - input->cur;
1088
    res = xmlEncInputChunk(handler, out, &outlen, input->cur, &inlen);
1089
    if (res < 0)
1090
        return(handler);
1091
    out[outlen] = 0;
1092

1093
    for (i = 0; i < outlen; i++) {
1094
        if (out[i] == '>')
1095
            break;
1096
        if ((out[i] == 'e') &&
1097
            (xmlStrncmp(out + i, BAD_CAST "encoding", 8) == 0)) {
1098
            int start, cur, quote;
1099

1100
            i += 8;
1101
            while (IS_BLANK_CH(out[i]))
1102
                i += 1;
1103
            if (out[i++] != '=')
1104
                break;
1105
            while (IS_BLANK_CH(out[i]))
1106
                i += 1;
1107
            quote = out[i++];
1108
            if ((quote != '\'') && (quote != '"'))
1109
                break;
1110
            start = i;
1111
            cur = out[i];
1112
            while (((cur >= 'a') && (cur <= 'z')) ||
1113
                   ((cur >= 'A') && (cur <= 'Z')) ||
1114
                   ((cur >= '0') && (cur <= '9')) ||
1115
                   (cur == '.') || (cur == '_') ||
1116
                   (cur == '-'))
1117
                cur = out[++i];
1118
            if (cur != quote)
1119
                break;
1120
            out[i] = 0;
1121
            xmlCharEncCloseFunc(handler);
1122
            return(xmlFindCharEncodingHandler((char *) out + start));
1123
        }
1124
    }
1125

1126
    /*
1127
     * ICU handlers are stateful, so we have to recreate them.
1128
     */
1129
    xmlCharEncCloseFunc(handler);
1130
    return(xmlGetCharEncodingHandler(XML_CHAR_ENCODING_EBCDIC));
1131
}
1132

1133
/**
1134
 * xmlSwitchEncoding:
1135
 * @ctxt:  the parser context
1136
 * @enc:  the encoding value (number)
1137
 *
1138
 * Use encoding specified by enum to decode input data.
1139
 *
1140
 * This function can be used to enforce the encoding of chunks passed
1141
 * to xmlParseChunk.
1142
 *
1143
 * Returns 0 in case of success, -1 otherwise
1144
 */
1145
int
1146
xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
1147
{
1148
    xmlCharEncodingHandlerPtr handler = NULL;
1149
    int check = 1;
1150
    int ret;
1151

1152
    if ((ctxt == NULL) || (ctxt->input == NULL))
1153
        return(-1);
1154

1155
    switch (enc) {
1156
	case XML_CHAR_ENCODING_NONE:
1157
	case XML_CHAR_ENCODING_UTF8:
1158
        case XML_CHAR_ENCODING_ASCII:
1159
            check = 0;
1160
            break;
1161
        case XML_CHAR_ENCODING_EBCDIC:
1162
            handler = xmlDetectEBCDIC(ctxt->input);
1163
            break;
1164
        default:
1165
            handler = xmlGetCharEncodingHandler(enc);
1166
            break;
1167
    }
1168

1169
    if ((check) && (handler == NULL)) {
1170
        const char *name = xmlGetCharEncodingName(enc);
1171

1172
        __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1173
                "encoding not supported: %s\n",
1174
                BAD_CAST (name ? name : "<null>"), NULL);
1175
        /*
1176
         * TODO: We could recover from errors in external entities
1177
         * if we didn't stop the parser. But most callers of this
1178
         * function don't check the return value.
1179
         */
1180
        xmlStopParser(ctxt);
1181
        return(-1);
1182
    }
1183

1184
    ret = xmlSwitchInputEncoding(ctxt, ctxt->input, handler);
1185

1186
    if ((ret >= 0) && (enc == XML_CHAR_ENCODING_NONE)) {
1187
        ctxt->input->flags &= ~XML_INPUT_HAS_ENCODING;
1188
    }
1189

1190
    return(ret);
1191
}
1192

1193
/**
1194
 * xmlSwitchInputEncoding:
1195
 * @ctxt:  the parser context
1196
 * @input:  the input stream
1197
 * @handler:  the encoding handler
1198
 *
1199
 * DEPRECATED: Internal function, don't use.
1200
 *
1201
 * Use encoding handler to decode input data.
1202
 *
1203
 * Returns 0 in case of success, -1 otherwise
1204
 */
1205
int
1206
xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
1207
                       xmlCharEncodingHandlerPtr handler)
1208
{
1209
    int nbchars;
1210
    xmlParserInputBufferPtr in;
1211

1212
    if ((input == NULL) || (input->buf == NULL)) {
1213
        xmlCharEncCloseFunc(handler);
1214
	return (-1);
1215
    }
1216
    in = input->buf;
1217

1218
    input->flags |= XML_INPUT_HAS_ENCODING;
1219

1220
    /*
1221
     * UTF-8 requires no encoding handler.
1222
     */
1223
    if ((handler != NULL) &&
1224
        (xmlStrcasecmp(BAD_CAST handler->name, BAD_CAST "UTF-8") == 0)) {
1225
        xmlCharEncCloseFunc(handler);
1226
        handler = NULL;
1227
    }
1228

1229
    if (in->encoder == handler)
1230
        return (0);
1231

1232
    if (in->encoder != NULL) {
1233
        /*
1234
         * Switching encodings during parsing is a really bad idea,
1235
         * but Chromium can switch between ISO-8859-1 and UTF-16 before
1236
         * separate calls to xmlParseChunk.
1237
         *
1238
         * TODO: We should check whether the "raw" input buffer is empty and
1239
         * convert the old content using the old encoder.
1240
         */
1241

1242
        xmlCharEncCloseFunc(in->encoder);
1243
        in->encoder = handler;
1244
        return (0);
1245
    }
1246

1247
    in->encoder = handler;
1248

1249
    /*
1250
     * Is there already some content down the pipe to convert ?
1251
     */
1252
    if (xmlBufIsEmpty(in->buffer) == 0) {
1253
        size_t processed;
1254

1255
        /*
1256
         * Shrink the current input buffer.
1257
         * Move it as the raw buffer and create a new input buffer
1258
         */
1259
        processed = input->cur - input->base;
1260
        xmlBufShrink(in->buffer, processed);
1261
        input->consumed += processed;
1262
        in->raw = in->buffer;
1263
        in->buffer = xmlBufCreate();
1264
        in->rawconsumed = processed;
1265

1266
        nbchars = xmlCharEncInput(in);
1267
        xmlBufResetInput(in->buffer, input);
1268
        if (nbchars < 0) {
1269
            /* TODO: This could be an out of memory or an encoding error. */
1270
            xmlErrInternal(ctxt,
1271
                           "switching encoding: encoder error\n",
1272
                           NULL);
1273
            xmlHaltParser(ctxt);
1274
            return (-1);
1275
        }
1276
    }
1277
    return (0);
1278
}
1279

1280
/**
1281
 * xmlSwitchToEncoding:
1282
 * @ctxt:  the parser context
1283
 * @handler:  the encoding handler
1284
 *
1285
 * Use encoding handler to decode input data.
1286
 *
1287
 * This function can be used to enforce the encoding of chunks passed
1288
 * to xmlParseChunk.
1289
 *
1290
 * Returns 0 in case of success, -1 otherwise
1291
 */
1292
int
1293
xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler)
1294
{
1295
    if (ctxt == NULL)
1296
        return(-1);
1297
    return(xmlSwitchInputEncoding(ctxt, ctxt->input, handler));
1298
}
1299

1300
/**
1301
 * xmlDetectEncoding:
1302
 * @ctxt:  the parser context
1303
 *
1304
 * Handle optional BOM, detect and switch to encoding.
1305
 *
1306
 * Assumes that there are at least four bytes in the input buffer.
1307
 */
1308
void
1309
xmlDetectEncoding(xmlParserCtxtPtr ctxt) {
1310
    const xmlChar *in;
1311
    xmlCharEncoding enc;
1312
    int bomSize;
1313
    int autoFlag = 0;
1314

1315
    if (xmlParserGrow(ctxt) < 0)
1316
        return;
1317
    in = ctxt->input->cur;
1318
    if (ctxt->input->end - in < 4)
1319
        return;
1320

1321
    if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1322
        /*
1323
         * If the encoding was already set, only skip the BOM which was
1324
         * possibly decoded to UTF-8.
1325
         */
1326
        if ((in[0] == 0xEF) && (in[1] == 0xBB) && (in[2] == 0xBF)) {
1327
            ctxt->input->cur += 3;
1328
        }
1329

1330
        return;
1331
    }
1332

1333
    enc = XML_CHAR_ENCODING_NONE;
1334
    bomSize = 0;
1335

1336
    switch (in[0]) {
1337
        case 0x00:
1338
            if ((in[1] == 0x00) && (in[2] == 0x00) && (in[3] == 0x3C)) {
1339
                enc = XML_CHAR_ENCODING_UCS4BE;
1340
                autoFlag = XML_INPUT_AUTO_OTHER;
1341
            } else if ((in[1] == 0x3C) && (in[2] == 0x00) && (in[3] == 0x3F)) {
1342
                enc = XML_CHAR_ENCODING_UTF16BE;
1343
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1344
            }
1345
            break;
1346

1347
        case 0x3C:
1348
            if (in[1] == 0x00) {
1349
                if ((in[2] == 0x00) && (in[3] == 0x00)) {
1350
                    enc = XML_CHAR_ENCODING_UCS4LE;
1351
                    autoFlag = XML_INPUT_AUTO_OTHER;
1352
                } else if ((in[2] == 0x3F) && (in[3] == 0x00)) {
1353
                    enc = XML_CHAR_ENCODING_UTF16LE;
1354
                    autoFlag = XML_INPUT_AUTO_UTF16LE;
1355
                }
1356
            }
1357
            break;
1358

1359
        case 0x4C:
1360
	    if ((in[1] == 0x6F) && (in[2] == 0xA7) && (in[3] == 0x94)) {
1361
	        enc = XML_CHAR_ENCODING_EBCDIC;
1362
                autoFlag = XML_INPUT_AUTO_OTHER;
1363
            }
1364
            break;
1365

1366
        case 0xEF:
1367
            if ((in[1] == 0xBB) && (in[2] == 0xBF)) {
1368
                enc = XML_CHAR_ENCODING_UTF8;
1369
                autoFlag = XML_INPUT_AUTO_UTF8;
1370
                bomSize = 3;
1371
            }
1372
            break;
1373

1374
        case 0xFE:
1375
            if (in[1] == 0xFF) {
1376
                enc = XML_CHAR_ENCODING_UTF16BE;
1377
                autoFlag = XML_INPUT_AUTO_UTF16BE;
1378
                bomSize = 2;
1379
            }
1380
            break;
1381

1382
        case 0xFF:
1383
            if (in[1] == 0xFE) {
1384
                enc = XML_CHAR_ENCODING_UTF16LE;
1385
                autoFlag = XML_INPUT_AUTO_UTF16LE;
1386
                bomSize = 2;
1387
            }
1388
            break;
1389
    }
1390

1391
    if (bomSize > 0) {
1392
        ctxt->input->cur += bomSize;
1393
    }
1394

1395
    if (enc != XML_CHAR_ENCODING_NONE) {
1396
        ctxt->input->flags |= autoFlag;
1397
        xmlSwitchEncoding(ctxt, enc);
1398
    }
1399
}
1400

1401
/**
1402
 * xmlSetDeclaredEncoding:
1403
 * @ctxt:  the parser context
1404
 * @encoding:  declared encoding
1405
 *
1406
 * Set the encoding from a declaration in the document.
1407
 *
1408
 * If no encoding was set yet, switch the encoding. Otherwise, only warn
1409
 * about encoding mismatches.
1410
 *
1411
 * Takes ownership of 'encoding'.
1412
 */
1413
void
1414
xmlSetDeclaredEncoding(xmlParserCtxtPtr ctxt, xmlChar *encoding) {
1415
    if (ctxt->encoding != NULL)
1416
        xmlFree((xmlChar *) ctxt->encoding);
1417
    ctxt->encoding = encoding;
1418

1419
    if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
1420
        ((ctxt->options & XML_PARSE_IGNORE_ENC) == 0)) {
1421
        xmlCharEncodingHandlerPtr handler;
1422

1423
        handler = xmlFindCharEncodingHandler((const char *) encoding);
1424
        if (handler == NULL) {
1425
            __xmlErrEncoding(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
1426
                             "Unsupported encoding: %s\n",
1427
                             encoding, NULL);
1428
            return;
1429
        }
1430

1431
        xmlSwitchToEncoding(ctxt, handler);
1432
        ctxt->input->flags |= XML_INPUT_USES_ENC_DECL;
1433
    } else if (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1434
        static const char *allowedUTF8[] = {
1435
            "UTF-8", "UTF8", NULL
1436
        };
1437
        static const char *allowedUTF16LE[] = {
1438
            "UTF-16", "UTF-16LE", "UTF16", NULL
1439
        };
1440
        static const char *allowedUTF16BE[] = {
1441
            "UTF-16", "UTF-16BE", "UTF16", NULL
1442
        };
1443
        const char **allowed = NULL;
1444
        const char *autoEnc = NULL;
1445

1446
        switch (ctxt->input->flags & XML_INPUT_AUTO_ENCODING) {
1447
            case XML_INPUT_AUTO_UTF8:
1448
                allowed = allowedUTF8;
1449
                autoEnc = "UTF-8";
1450
                break;
1451
            case XML_INPUT_AUTO_UTF16LE:
1452
                allowed = allowedUTF16LE;
1453
                autoEnc = "UTF-16LE";
1454
                break;
1455
            case XML_INPUT_AUTO_UTF16BE:
1456
                allowed = allowedUTF16BE;
1457
                autoEnc = "UTF-16BE";
1458
                break;
1459
        }
1460

1461
        if (allowed != NULL) {
1462
            const char **p;
1463
            int match = 0;
1464

1465
            for (p = allowed; *p != NULL; p++) {
1466
                if (xmlStrcasecmp(encoding, BAD_CAST *p) == 0) {
1467
                    match = 1;
1468
                    break;
1469
                }
1470
            }
1471

1472
            if (match == 0) {
1473
                xmlWarningMsg(ctxt, XML_WAR_ENCODING_MISMATCH,
1474
                              "Encoding '%s' doesn't match "
1475
                              "auto-detected '%s'\n",
1476
                              encoding, BAD_CAST autoEnc);
1477
            }
1478
        }
1479
    }
1480
}
1481

1482
/**
1483
 * xmlGetActualEncoding:
1484
 * @ctxt:  the parser context
1485
 *
1486
 * Returns the actual used to parse the document. This can differ from
1487
 * the declared encoding.
1488
 */
1489
const xmlChar *
1490
xmlGetActualEncoding(xmlParserCtxtPtr ctxt) {
1491
    const xmlChar *encoding = NULL;
1492

1493
    if ((ctxt->input->flags & XML_INPUT_USES_ENC_DECL) ||
1494
        (ctxt->input->flags & XML_INPUT_AUTO_ENCODING)) {
1495
        /* Preserve encoding exactly */
1496
        encoding = ctxt->encoding;
1497
    } else if ((ctxt->input->buf) && (ctxt->input->buf->encoder)) {
1498
        encoding = BAD_CAST ctxt->input->buf->encoder->name;
1499
    } else if (ctxt->input->flags & XML_INPUT_HAS_ENCODING) {
1500
        encoding = BAD_CAST "UTF-8";
1501
    }
1502

1503
    return(encoding);
1504
}
1505

1506
/************************************************************************
1507
 *									*
1508
 *	Commodity functions to handle entities processing		*
1509
 *									*
1510
 ************************************************************************/
1511

1512
/**
1513
 * xmlFreeInputStream:
1514
 * @input:  an xmlParserInputPtr
1515
 *
1516
 * Free up an input stream.
1517
 */
1518
void
1519
xmlFreeInputStream(xmlParserInputPtr input) {
1520
    if (input == NULL) return;
1521

1522
    if (input->filename != NULL) xmlFree((char *) input->filename);
1523
    if (input->directory != NULL) xmlFree((char *) input->directory);
1524
    if (input->version != NULL) xmlFree((char *) input->version);
1525
    if ((input->free != NULL) && (input->base != NULL))
1526
        input->free((xmlChar *) input->base);
1527
    if (input->buf != NULL)
1528
        xmlFreeParserInputBuffer(input->buf);
1529
    xmlFree(input);
1530
}
1531

1532
/**
1533
 * xmlNewInputStream:
1534
 * @ctxt:  an XML parser context
1535
 *
1536
 * Create a new input stream structure.
1537
 *
1538
 * Returns the new input stream or NULL
1539
 */
1540
xmlParserInputPtr
1541
xmlNewInputStream(xmlParserCtxtPtr ctxt) {
1542
    xmlParserInputPtr input;
1543

1544
    input = (xmlParserInputPtr) xmlMalloc(sizeof(xmlParserInput));
1545
    if (input == NULL) {
1546
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1547
	return(NULL);
1548
    }
1549
    memset(input, 0, sizeof(xmlParserInput));
1550
    input->line = 1;
1551
    input->col = 1;
1552

1553
    /*
1554
     * If the context is NULL the id cannot be initialized, but that
1555
     * should not happen while parsing which is the situation where
1556
     * the id is actually needed.
1557
     */
1558
    if (ctxt != NULL) {
1559
        if (input->id >= INT_MAX) {
1560
            xmlErrMemory(ctxt, "Input ID overflow\n");
1561
            return(NULL);
1562
        }
1563
        input->id = ctxt->input_id++;
1564
    }
1565

1566
    return(input);
1567
}
1568

1569
/**
1570
 * xmlNewIOInputStream:
1571
 * @ctxt:  an XML parser context
1572
 * @input:  an I/O Input
1573
 * @enc:  the charset encoding if known
1574
 *
1575
 * Create a new input stream structure encapsulating the @input into
1576
 * a stream suitable for the parser.
1577
 *
1578
 * Returns the new input stream or NULL
1579
 */
1580
xmlParserInputPtr
1581
xmlNewIOInputStream(xmlParserCtxtPtr ctxt, xmlParserInputBufferPtr input,
1582
	            xmlCharEncoding enc) {
1583
    xmlParserInputPtr inputStream;
1584

1585
    if (input == NULL) return(NULL);
1586
    if (xmlParserDebugEntities)
1587
	xmlGenericError(xmlGenericErrorContext, "new input from I/O\n");
1588
    inputStream = xmlNewInputStream(ctxt);
1589
    if (inputStream == NULL) {
1590
	return(NULL);
1591
    }
1592
    inputStream->filename = NULL;
1593
    inputStream->buf = input;
1594
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1595

1596
    if (enc != XML_CHAR_ENCODING_NONE) {
1597
        xmlSwitchEncoding(ctxt, enc);
1598
    }
1599

1600
    return(inputStream);
1601
}
1602

1603
/**
1604
 * xmlNewEntityInputStream:
1605
 * @ctxt:  an XML parser context
1606
 * @entity:  an Entity pointer
1607
 *
1608
 * DEPRECATED: Internal function, do not use.
1609
 *
1610
 * Create a new input stream based on an xmlEntityPtr
1611
 *
1612
 * Returns the new input stream or NULL
1613
 */
1614
xmlParserInputPtr
1615
xmlNewEntityInputStream(xmlParserCtxtPtr ctxt, xmlEntityPtr entity) {
1616
    xmlParserInputPtr input;
1617

1618
    if (entity == NULL) {
1619
        xmlErrInternal(ctxt, "xmlNewEntityInputStream entity = NULL\n",
1620
	               NULL);
1621
	return(NULL);
1622
    }
1623
    if (xmlParserDebugEntities)
1624
	xmlGenericError(xmlGenericErrorContext,
1625
		"new input from entity: %s\n", entity->name);
1626
    if (entity->content == NULL) {
1627
	switch (entity->etype) {
1628
            case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
1629
	        xmlErrInternal(ctxt, "Cannot parse entity %s\n",
1630
		               entity->name);
1631
                break;
1632
            case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
1633
            case XML_EXTERNAL_PARAMETER_ENTITY:
1634
		input = xmlLoadExternalEntity((char *) entity->URI,
1635
		       (char *) entity->ExternalID, ctxt);
1636
                if (input != NULL)
1637
                    input->entity = entity;
1638
                return(input);
1639
            case XML_INTERNAL_GENERAL_ENTITY:
1640
	        xmlErrInternal(ctxt,
1641
		      "Internal entity %s without content !\n",
1642
		               entity->name);
1643
                break;
1644
            case XML_INTERNAL_PARAMETER_ENTITY:
1645
	        xmlErrInternal(ctxt,
1646
		      "Internal parameter entity %s without content !\n",
1647
		               entity->name);
1648
                break;
1649
            case XML_INTERNAL_PREDEFINED_ENTITY:
1650
	        xmlErrInternal(ctxt,
1651
		      "Predefined entity %s without content !\n",
1652
		               entity->name);
1653
                break;
1654
	}
1655
	return(NULL);
1656
    }
1657
    input = xmlNewInputStream(ctxt);
1658
    if (input == NULL) {
1659
	return(NULL);
1660
    }
1661
    if (entity->URI != NULL)
1662
	input->filename = (char *) xmlStrdup((xmlChar *) entity->URI);
1663
    input->base = entity->content;
1664
    if (entity->length == 0)
1665
        entity->length = xmlStrlen(entity->content);
1666
    input->cur = entity->content;
1667
    input->length = entity->length;
1668
    input->end = &entity->content[input->length];
1669
    input->entity = entity;
1670
    return(input);
1671
}
1672

1673
/**
1674
 * xmlNewStringInputStream:
1675
 * @ctxt:  an XML parser context
1676
 * @buffer:  an memory buffer
1677
 *
1678
 * Create a new input stream based on a memory buffer.
1679
 * Returns the new input stream
1680
 */
1681
xmlParserInputPtr
1682
xmlNewStringInputStream(xmlParserCtxtPtr ctxt, const xmlChar *buffer) {
1683
    xmlParserInputPtr input;
1684
    xmlParserInputBufferPtr buf;
1685

1686
    if (buffer == NULL) {
1687
        xmlErrInternal(ctxt, "xmlNewStringInputStream string = NULL\n",
1688
	               NULL);
1689
	return(NULL);
1690
    }
1691
    if (xmlParserDebugEntities)
1692
	xmlGenericError(xmlGenericErrorContext,
1693
		"new fixed input: %.30s\n", buffer);
1694
    buf = xmlParserInputBufferCreateString(buffer);
1695
    if (buf == NULL) {
1696
	xmlErrMemory(ctxt, NULL);
1697
        return(NULL);
1698
    }
1699
    input = xmlNewInputStream(ctxt);
1700
    if (input == NULL) {
1701
        xmlErrMemory(ctxt,  "couldn't allocate a new input stream\n");
1702
	xmlFreeParserInputBuffer(buf);
1703
	return(NULL);
1704
    }
1705
    input->buf = buf;
1706
    xmlBufResetInput(input->buf->buffer, input);
1707
    return(input);
1708
}
1709

1710
/**
1711
 * xmlNewInputFromFile:
1712
 * @ctxt:  an XML parser context
1713
 * @filename:  the filename to use as entity
1714
 *
1715
 * Create a new input stream based on a file or an URL.
1716
 *
1717
 * Returns the new input stream or NULL in case of error
1718
 */
1719
xmlParserInputPtr
1720
xmlNewInputFromFile(xmlParserCtxtPtr ctxt, const char *filename) {
1721
    xmlParserInputBufferPtr buf;
1722
    xmlParserInputPtr inputStream;
1723
    char *directory = NULL;
1724
    xmlChar *URI = NULL;
1725

1726
    if (xmlParserDebugEntities)
1727
	xmlGenericError(xmlGenericErrorContext,
1728
		"new input from file: %s\n", filename);
1729
    if (ctxt == NULL) return(NULL);
1730
    buf = xmlParserInputBufferCreateFilename(filename, XML_CHAR_ENCODING_NONE);
1731
    if (buf == NULL) {
1732
	if (filename == NULL)
1733
	    __xmlLoaderErr(ctxt,
1734
	                   "failed to load external entity: NULL filename \n",
1735
			   NULL);
1736
	else
1737
	    __xmlLoaderErr(ctxt, "failed to load external entity \"%s\"\n",
1738
			   (const char *) filename);
1739
	return(NULL);
1740
    }
1741

1742
    inputStream = xmlNewInputStream(ctxt);
1743
    if (inputStream == NULL) {
1744
	xmlFreeParserInputBuffer(buf);
1745
	return(NULL);
1746
    }
1747

1748
    inputStream->buf = buf;
1749
    inputStream = xmlCheckHTTPInput(ctxt, inputStream);
1750
    if (inputStream == NULL)
1751
        return(NULL);
1752

1753
    if (inputStream->filename == NULL)
1754
	URI = xmlStrdup((xmlChar *) filename);
1755
    else
1756
	URI = xmlStrdup((xmlChar *) inputStream->filename);
1757
    directory = xmlParserGetDirectory((const char *) URI);
1758
    if (inputStream->filename != NULL) xmlFree((char *)inputStream->filename);
1759
    inputStream->filename = (char *) xmlCanonicPath((const xmlChar *) URI);
1760
    if (URI != NULL) xmlFree((char *) URI);
1761
    inputStream->directory = directory;
1762

1763
    xmlBufResetInput(inputStream->buf->buffer, inputStream);
1764
    if ((ctxt->directory == NULL) && (directory != NULL))
1765
        ctxt->directory = (char *) xmlStrdup((const xmlChar *) directory);
1766
    return(inputStream);
1767
}
1768

1769
/************************************************************************
1770
 *									*
1771
 *		Commodity functions to handle parser contexts		*
1772
 *									*
1773
 ************************************************************************/
1774

1775
/**
1776
 * xmlInitSAXParserCtxt:
1777
 * @ctxt:  XML parser context
1778
 * @sax:  SAX handlert
1779
 * @userData:  user data
1780
 *
1781
 * Initialize a SAX parser context
1782
 *
1783
 * Returns 0 in case of success and -1 in case of error
1784
 */
1785

1786
static int
1787
xmlInitSAXParserCtxt(xmlParserCtxtPtr ctxt, const xmlSAXHandler *sax,
1788
                     void *userData)
1789
{
1790
    xmlParserInputPtr input;
1791

1792
    if(ctxt==NULL) {
1793
        xmlErrInternal(NULL, "Got NULL parser context\n", NULL);
1794
        return(-1);
1795
    }
1796

1797
    xmlInitParser();
1798

1799
    if (ctxt->dict == NULL)
1800
	ctxt->dict = xmlDictCreate();
1801
    if (ctxt->dict == NULL) {
1802
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1803
	return(-1);
1804
    }
1805
    xmlDictSetLimit(ctxt->dict, XML_MAX_DICTIONARY_LIMIT);
1806

1807
    if (ctxt->sax == NULL)
1808
	ctxt->sax = (xmlSAXHandler *) xmlMalloc(sizeof(xmlSAXHandler));
1809
    if (ctxt->sax == NULL) {
1810
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1811
	return(-1);
1812
    }
1813
    if (sax == NULL) {
1814
	memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1815
        xmlSAXVersion(ctxt->sax, 2);
1816
        ctxt->userData = ctxt;
1817
    } else {
1818
	if (sax->initialized == XML_SAX2_MAGIC) {
1819
	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandler));
1820
        } else {
1821
	    memset(ctxt->sax, 0, sizeof(xmlSAXHandler));
1822
	    memcpy(ctxt->sax, sax, sizeof(xmlSAXHandlerV1));
1823
        }
1824
        ctxt->userData = userData ? userData : ctxt;
1825
    }
1826

1827
    ctxt->maxatts = 0;
1828
    ctxt->atts = NULL;
1829
    /* Allocate the Input stack */
1830
    if (ctxt->inputTab == NULL) {
1831
	ctxt->inputTab = (xmlParserInputPtr *)
1832
		    xmlMalloc(5 * sizeof(xmlParserInputPtr));
1833
	ctxt->inputMax = 5;
1834
    }
1835
    if (ctxt->inputTab == NULL) {
1836
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1837
	ctxt->inputNr = 0;
1838
	ctxt->inputMax = 0;
1839
	ctxt->input = NULL;
1840
	return(-1);
1841
    }
1842
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
1843
        xmlFreeInputStream(input);
1844
    }
1845
    ctxt->inputNr = 0;
1846
    ctxt->input = NULL;
1847

1848
    ctxt->version = NULL;
1849
    ctxt->encoding = NULL;
1850
    ctxt->standalone = -1;
1851
    ctxt->hasExternalSubset = 0;
1852
    ctxt->hasPErefs = 0;
1853
    ctxt->html = 0;
1854
    ctxt->external = 0;
1855
    ctxt->instate = XML_PARSER_START;
1856
    ctxt->token = 0;
1857
    ctxt->directory = NULL;
1858

1859
    /* Allocate the Node stack */
1860
    if (ctxt->nodeTab == NULL) {
1861
	ctxt->nodeTab = (xmlNodePtr *) xmlMalloc(10 * sizeof(xmlNodePtr));
1862
	ctxt->nodeMax = 10;
1863
    }
1864
    if (ctxt->nodeTab == NULL) {
1865
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1866
	ctxt->nodeNr = 0;
1867
	ctxt->nodeMax = 0;
1868
	ctxt->node = NULL;
1869
	ctxt->inputNr = 0;
1870
	ctxt->inputMax = 0;
1871
	ctxt->input = NULL;
1872
	return(-1);
1873
    }
1874
    ctxt->nodeNr = 0;
1875
    ctxt->node = NULL;
1876

1877
    /* Allocate the Name stack */
1878
    if (ctxt->nameTab == NULL) {
1879
	ctxt->nameTab = (const xmlChar **) xmlMalloc(10 * sizeof(xmlChar *));
1880
	ctxt->nameMax = 10;
1881
    }
1882
    if (ctxt->nameTab == NULL) {
1883
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1884
	ctxt->nodeNr = 0;
1885
	ctxt->nodeMax = 0;
1886
	ctxt->node = NULL;
1887
	ctxt->inputNr = 0;
1888
	ctxt->inputMax = 0;
1889
	ctxt->input = NULL;
1890
	ctxt->nameNr = 0;
1891
	ctxt->nameMax = 0;
1892
	ctxt->name = NULL;
1893
	return(-1);
1894
    }
1895
    ctxt->nameNr = 0;
1896
    ctxt->name = NULL;
1897

1898
    /* Allocate the space stack */
1899
    if (ctxt->spaceTab == NULL) {
1900
	ctxt->spaceTab = (int *) xmlMalloc(10 * sizeof(int));
1901
	ctxt->spaceMax = 10;
1902
    }
1903
    if (ctxt->spaceTab == NULL) {
1904
        xmlErrMemory(NULL, "cannot initialize parser context\n");
1905
	ctxt->nodeNr = 0;
1906
	ctxt->nodeMax = 0;
1907
	ctxt->node = NULL;
1908
	ctxt->inputNr = 0;
1909
	ctxt->inputMax = 0;
1910
	ctxt->input = NULL;
1911
	ctxt->nameNr = 0;
1912
	ctxt->nameMax = 0;
1913
	ctxt->name = NULL;
1914
	ctxt->spaceNr = 0;
1915
	ctxt->spaceMax = 0;
1916
	ctxt->space = NULL;
1917
	return(-1);
1918
    }
1919
    ctxt->spaceNr = 1;
1920
    ctxt->spaceMax = 10;
1921
    ctxt->spaceTab[0] = -1;
1922
    ctxt->space = &ctxt->spaceTab[0];
1923
    ctxt->myDoc = NULL;
1924
    ctxt->wellFormed = 1;
1925
    ctxt->nsWellFormed = 1;
1926
    ctxt->valid = 1;
1927
    ctxt->loadsubset = xmlLoadExtDtdDefaultValue;
1928
    if (ctxt->loadsubset) {
1929
        ctxt->options |= XML_PARSE_DTDLOAD;
1930
    }
1931
    ctxt->validate = xmlDoValidityCheckingDefaultValue;
1932
    ctxt->pedantic = xmlPedanticParserDefaultValue;
1933
    if (ctxt->pedantic) {
1934
        ctxt->options |= XML_PARSE_PEDANTIC;
1935
    }
1936
    ctxt->linenumbers = xmlLineNumbersDefaultValue;
1937
    ctxt->keepBlanks = xmlKeepBlanksDefaultValue;
1938
    if (ctxt->keepBlanks == 0) {
1939
	ctxt->sax->ignorableWhitespace = xmlSAX2IgnorableWhitespace;
1940
	ctxt->options |= XML_PARSE_NOBLANKS;
1941
    }
1942

1943
    ctxt->vctxt.flags = XML_VCTXT_USE_PCTXT;
1944
    ctxt->vctxt.userData = ctxt;
1945
    ctxt->vctxt.error = xmlParserValidityError;
1946
    ctxt->vctxt.warning = xmlParserValidityWarning;
1947
    if (ctxt->validate) {
1948
	if (xmlGetWarningsDefaultValue == 0)
1949
	    ctxt->vctxt.warning = NULL;
1950
	else
1951
	    ctxt->vctxt.warning = xmlParserValidityWarning;
1952
	ctxt->vctxt.nodeMax = 0;
1953
        ctxt->options |= XML_PARSE_DTDVALID;
1954
    }
1955
    ctxt->replaceEntities = xmlSubstituteEntitiesDefaultValue;
1956
    if (ctxt->replaceEntities) {
1957
        ctxt->options |= XML_PARSE_NOENT;
1958
    }
1959
    ctxt->record_info = 0;
1960
    ctxt->checkIndex = 0;
1961
    ctxt->inSubset = 0;
1962
    ctxt->errNo = XML_ERR_OK;
1963
    ctxt->depth = 0;
1964
    ctxt->catalogs = NULL;
1965
    ctxt->sizeentities = 0;
1966
    ctxt->sizeentcopy = 0;
1967
    ctxt->input_id = 1;
1968
    ctxt->maxAmpl = XML_MAX_AMPLIFICATION_DEFAULT;
1969
    xmlInitNodeInfoSeq(&ctxt->node_seq);
1970

1971
    if (ctxt->nsdb == NULL) {
1972
        ctxt->nsdb = xmlParserNsCreate();
1973
        if (ctxt->nsdb == NULL) {
1974
            xmlErrMemory(ctxt, NULL);
1975
            return(-1);
1976
        }
1977
    }
1978

1979
    return(0);
1980
}
1981

1982
/**
1983
 * xmlInitParserCtxt:
1984
 * @ctxt:  an XML parser context
1985
 *
1986
 * DEPRECATED: Internal function which will be made private in a future
1987
 * version.
1988
 *
1989
 * Initialize a parser context
1990
 *
1991
 * Returns 0 in case of success and -1 in case of error
1992
 */
1993

1994
int
1995
xmlInitParserCtxt(xmlParserCtxtPtr ctxt)
1996
{
1997
    return(xmlInitSAXParserCtxt(ctxt, NULL, NULL));
1998
}
1999

2000
/**
2001
 * xmlFreeParserCtxt:
2002
 * @ctxt:  an XML parser context
2003
 *
2004
 * Free all the memory used by a parser context. However the parsed
2005
 * document in ctxt->myDoc is not freed.
2006
 */
2007

2008
void
2009
xmlFreeParserCtxt(xmlParserCtxtPtr ctxt)
2010
{
2011
    xmlParserInputPtr input;
2012

2013
    if (ctxt == NULL) return;
2014

2015
    while ((input = inputPop(ctxt)) != NULL) { /* Non consuming */
2016
        xmlFreeInputStream(input);
2017
    }
2018
    if (ctxt->spaceTab != NULL) xmlFree(ctxt->spaceTab);
2019
    if (ctxt->nameTab != NULL) xmlFree((xmlChar * *)ctxt->nameTab);
2020
    if (ctxt->nodeTab != NULL) xmlFree(ctxt->nodeTab);
2021
    if (ctxt->nodeInfoTab != NULL) xmlFree(ctxt->nodeInfoTab);
2022
    if (ctxt->inputTab != NULL) xmlFree(ctxt->inputTab);
2023
    if (ctxt->version != NULL) xmlFree((char *) ctxt->version);
2024
    if (ctxt->encoding != NULL) xmlFree((char *) ctxt->encoding);
2025
    if (ctxt->extSubURI != NULL) xmlFree((char *) ctxt->extSubURI);
2026
    if (ctxt->extSubSystem != NULL) xmlFree((char *) ctxt->extSubSystem);
2027
#ifdef LIBXML_SAX1_ENABLED
2028
    if ((ctxt->sax != NULL) &&
2029
        (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler))
2030
#else
2031
    if (ctxt->sax != NULL)
2032
#endif /* LIBXML_SAX1_ENABLED */
2033
        xmlFree(ctxt->sax);
2034
    if (ctxt->directory != NULL) xmlFree((char *) ctxt->directory);
2035
    if (ctxt->vctxt.nodeTab != NULL) xmlFree(ctxt->vctxt.nodeTab);
2036
    if (ctxt->atts != NULL) xmlFree((xmlChar * *)ctxt->atts);
2037
    if (ctxt->dict != NULL) xmlDictFree(ctxt->dict);
2038
    if (ctxt->nsTab != NULL) xmlFree(ctxt->nsTab);
2039
    if (ctxt->nsdb != NULL) xmlParserNsFree(ctxt->nsdb);
2040
    if (ctxt->attrHash != NULL) xmlFree(ctxt->attrHash);
2041
    if (ctxt->pushTab != NULL) xmlFree(ctxt->pushTab);
2042
    if (ctxt->attallocs != NULL) xmlFree(ctxt->attallocs);
2043
    if (ctxt->attsDefault != NULL)
2044
        xmlHashFree(ctxt->attsDefault, xmlHashDefaultDeallocator);
2045
    if (ctxt->attsSpecial != NULL)
2046
        xmlHashFree(ctxt->attsSpecial, NULL);
2047
    if (ctxt->freeElems != NULL) {
2048
        xmlNodePtr cur, next;
2049

2050
	cur = ctxt->freeElems;
2051
	while (cur != NULL) {
2052
	    next = cur->next;
2053
	    xmlFree(cur);
2054
	    cur = next;
2055
	}
2056
    }
2057
    if (ctxt->freeAttrs != NULL) {
2058
        xmlAttrPtr cur, next;
2059

2060
	cur = ctxt->freeAttrs;
2061
	while (cur != NULL) {
2062
	    next = cur->next;
2063
	    xmlFree(cur);
2064
	    cur = next;
2065
	}
2066
    }
2067
    /*
2068
     * cleanup the error strings
2069
     */
2070
    if (ctxt->lastError.message != NULL)
2071
        xmlFree(ctxt->lastError.message);
2072
    if (ctxt->lastError.file != NULL)
2073
        xmlFree(ctxt->lastError.file);
2074
    if (ctxt->lastError.str1 != NULL)
2075
        xmlFree(ctxt->lastError.str1);
2076
    if (ctxt->lastError.str2 != NULL)
2077
        xmlFree(ctxt->lastError.str2);
2078
    if (ctxt->lastError.str3 != NULL)
2079
        xmlFree(ctxt->lastError.str3);
2080

2081
#ifdef LIBXML_CATALOG_ENABLED
2082
    if (ctxt->catalogs != NULL)
2083
	xmlCatalogFreeLocal(ctxt->catalogs);
2084
#endif
2085
    xmlFree(ctxt);
2086
}
2087

2088
/**
2089
 * xmlNewParserCtxt:
2090
 *
2091
 * Allocate and initialize a new parser context.
2092
 *
2093
 * Returns the xmlParserCtxtPtr or NULL
2094
 */
2095

2096
xmlParserCtxtPtr
2097
xmlNewParserCtxt(void)
2098
{
2099
    return(xmlNewSAXParserCtxt(NULL, NULL));
2100
}
2101

2102
/**
2103
 * xmlNewSAXParserCtxt:
2104
 * @sax:  SAX handler
2105
 * @userData:  user data
2106
 *
2107
 * Allocate and initialize a new SAX parser context. If userData is NULL,
2108
 * the parser context will be passed as user data.
2109
 *
2110
 * Returns the xmlParserCtxtPtr or NULL if memory allocation failed.
2111
 */
2112

2113
xmlParserCtxtPtr
2114
xmlNewSAXParserCtxt(const xmlSAXHandler *sax, void *userData)
2115
{
2116
    xmlParserCtxtPtr ctxt;
2117

2118
    ctxt = (xmlParserCtxtPtr) xmlMalloc(sizeof(xmlParserCtxt));
2119
    if (ctxt == NULL) {
2120
	xmlErrMemory(NULL, "cannot allocate parser context\n");
2121
	return(NULL);
2122
    }
2123
    memset(ctxt, 0, sizeof(xmlParserCtxt));
2124
    if (xmlInitSAXParserCtxt(ctxt, sax, userData) < 0) {
2125
        xmlFreeParserCtxt(ctxt);
2126
	return(NULL);
2127
    }
2128
    return(ctxt);
2129
}
2130

2131
/************************************************************************
2132
 *									*
2133
 *		Handling of node information				*
2134
 *									*
2135
 ************************************************************************/
2136

2137
/**
2138
 * xmlClearParserCtxt:
2139
 * @ctxt:  an XML parser context
2140
 *
2141
 * Clear (release owned resources) and reinitialize a parser context
2142
 */
2143

2144
void
2145
xmlClearParserCtxt(xmlParserCtxtPtr ctxt)
2146
{
2147
  if (ctxt==NULL)
2148
    return;
2149
  xmlClearNodeInfoSeq(&ctxt->node_seq);
2150
  xmlCtxtReset(ctxt);
2151
}
2152

2153

2154
/**
2155
 * xmlParserFindNodeInfo:
2156
 * @ctx:  an XML parser context
2157
 * @node:  an XML node within the tree
2158
 *
2159
 * DEPRECATED: Don't use.
2160
 *
2161
 * Find the parser node info struct for a given node
2162
 *
2163
 * Returns an xmlParserNodeInfo block pointer or NULL
2164
 */
2165
const xmlParserNodeInfo *
2166
xmlParserFindNodeInfo(const xmlParserCtxtPtr ctx, const xmlNodePtr node)
2167
{
2168
    unsigned long pos;
2169

2170
    if ((ctx == NULL) || (node == NULL))
2171
        return (NULL);
2172
    /* Find position where node should be at */
2173
    pos = xmlParserFindNodeInfoIndex(&ctx->node_seq, node);
2174
    if (pos < ctx->node_seq.length
2175
        && ctx->node_seq.buffer[pos].node == node)
2176
        return &ctx->node_seq.buffer[pos];
2177
    else
2178
        return NULL;
2179
}
2180

2181

2182
/**
2183
 * xmlInitNodeInfoSeq:
2184
 * @seq:  a node info sequence pointer
2185
 *
2186
 * DEPRECATED: Don't use.
2187
 *
2188
 * -- Initialize (set to initial state) node info sequence
2189
 */
2190
void
2191
xmlInitNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2192
{
2193
    if (seq == NULL)
2194
        return;
2195
    seq->length = 0;
2196
    seq->maximum = 0;
2197
    seq->buffer = NULL;
2198
}
2199

2200
/**
2201
 * xmlClearNodeInfoSeq:
2202
 * @seq:  a node info sequence pointer
2203
 *
2204
 * DEPRECATED: Don't use.
2205
 *
2206
 * -- Clear (release memory and reinitialize) node
2207
 *   info sequence
2208
 */
2209
void
2210
xmlClearNodeInfoSeq(xmlParserNodeInfoSeqPtr seq)
2211
{
2212
    if (seq == NULL)
2213
        return;
2214
    if (seq->buffer != NULL)
2215
        xmlFree(seq->buffer);
2216
    xmlInitNodeInfoSeq(seq);
2217
}
2218

2219
/**
2220
 * xmlParserFindNodeInfoIndex:
2221
 * @seq:  a node info sequence pointer
2222
 * @node:  an XML node pointer
2223
 *
2224
 * DEPRECATED: Don't use.
2225
 *
2226
 * xmlParserFindNodeInfoIndex : Find the index that the info record for
2227
 *   the given node is or should be at in a sorted sequence
2228
 *
2229
 * Returns a long indicating the position of the record
2230
 */
2231
unsigned long
2232
xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq,
2233
                           const xmlNodePtr node)
2234
{
2235
    unsigned long upper, lower, middle;
2236
    int found = 0;
2237

2238
    if ((seq == NULL) || (node == NULL))
2239
        return ((unsigned long) -1);
2240

2241
    /* Do a binary search for the key */
2242
    lower = 1;
2243
    upper = seq->length;
2244
    middle = 0;
2245
    while (lower <= upper && !found) {
2246
        middle = lower + (upper - lower) / 2;
2247
        if (node == seq->buffer[middle - 1].node)
2248
            found = 1;
2249
        else if (node < seq->buffer[middle - 1].node)
2250
            upper = middle - 1;
2251
        else
2252
            lower = middle + 1;
2253
    }
2254

2255
    /* Return position */
2256
    if (middle == 0 || seq->buffer[middle - 1].node < node)
2257
        return middle;
2258
    else
2259
        return middle - 1;
2260
}
2261

2262

2263
/**
2264
 * xmlParserAddNodeInfo:
2265
 * @ctxt:  an XML parser context
2266
 * @info:  a node info sequence pointer
2267
 *
2268
 * DEPRECATED: Don't use.
2269
 *
2270
 * Insert node info record into the sorted sequence
2271
 */
2272
void
2273
xmlParserAddNodeInfo(xmlParserCtxtPtr ctxt,
2274
                     const xmlParserNodeInfoPtr info)
2275
{
2276
    unsigned long pos;
2277

2278
    if ((ctxt == NULL) || (info == NULL)) return;
2279

2280
    /* Find pos and check to see if node is already in the sequence */
2281
    pos = xmlParserFindNodeInfoIndex(&ctxt->node_seq, (xmlNodePtr)
2282
                                     info->node);
2283

2284
    if ((pos < ctxt->node_seq.length) &&
2285
        (ctxt->node_seq.buffer != NULL) &&
2286
        (ctxt->node_seq.buffer[pos].node == info->node)) {
2287
        ctxt->node_seq.buffer[pos] = *info;
2288
    }
2289

2290
    /* Otherwise, we need to add new node to buffer */
2291
    else {
2292
        if ((ctxt->node_seq.length + 1 > ctxt->node_seq.maximum) ||
2293
	    (ctxt->node_seq.buffer == NULL)) {
2294
            xmlParserNodeInfo *tmp_buffer;
2295
            unsigned int byte_size;
2296

2297
            if (ctxt->node_seq.maximum == 0)
2298
                ctxt->node_seq.maximum = 2;
2299
            byte_size = (sizeof(*ctxt->node_seq.buffer) *
2300
			(2 * ctxt->node_seq.maximum));
2301

2302
            if (ctxt->node_seq.buffer == NULL)
2303
                tmp_buffer = (xmlParserNodeInfo *) xmlMalloc(byte_size);
2304
            else
2305
                tmp_buffer =
2306
                    (xmlParserNodeInfo *) xmlRealloc(ctxt->node_seq.buffer,
2307
                                                     byte_size);
2308

2309
            if (tmp_buffer == NULL) {
2310
		xmlErrMemory(ctxt, "failed to allocate buffer\n");
2311
                return;
2312
            }
2313
            ctxt->node_seq.buffer = tmp_buffer;
2314
            ctxt->node_seq.maximum *= 2;
2315
        }
2316

2317
        /* If position is not at end, move elements out of the way */
2318
        if (pos != ctxt->node_seq.length) {
2319
            unsigned long i;
2320

2321
            for (i = ctxt->node_seq.length; i > pos; i--)
2322
                ctxt->node_seq.buffer[i] = ctxt->node_seq.buffer[i - 1];
2323
        }
2324

2325
        /* Copy element and increase length */
2326
        ctxt->node_seq.buffer[pos] = *info;
2327
        ctxt->node_seq.length++;
2328
    }
2329
}
2330

2331
/************************************************************************
2332
 *									*
2333
 *		Defaults settings					*
2334
 *									*
2335
 ************************************************************************/
2336
/**
2337
 * xmlPedanticParserDefault:
2338
 * @val:  int 0 or 1
2339
 *
2340
 * DEPRECATED: Use the modern options API with XML_PARSE_PEDANTIC.
2341
 *
2342
 * Set and return the previous value for enabling pedantic warnings.
2343
 *
2344
 * Returns the last value for 0 for no substitution, 1 for substitution.
2345
 */
2346

2347
int
2348
xmlPedanticParserDefault(int val) {
2349
    int old = xmlPedanticParserDefaultValue;
2350

2351
    xmlPedanticParserDefaultValue = val;
2352
    return(old);
2353
}
2354

2355
/**
2356
 * xmlLineNumbersDefault:
2357
 * @val:  int 0 or 1
2358
 *
2359
 * DEPRECATED: The modern options API always enables line numbers.
2360
 *
2361
 * Set and return the previous value for enabling line numbers in elements
2362
 * contents. This may break on old application and is turned off by default.
2363
 *
2364
 * Returns the last value for 0 for no substitution, 1 for substitution.
2365
 */
2366

2367
int
2368
xmlLineNumbersDefault(int val) {
2369
    int old = xmlLineNumbersDefaultValue;
2370

2371
    xmlLineNumbersDefaultValue = val;
2372
    return(old);
2373
}
2374

2375
/**
2376
 * xmlSubstituteEntitiesDefault:
2377
 * @val:  int 0 or 1
2378
 *
2379
 * DEPRECATED: Use the modern options API with XML_PARSE_NOENT.
2380
 *
2381
 * Set and return the previous value for default entity support.
2382
 * Initially the parser always keep entity references instead of substituting
2383
 * entity values in the output. This function has to be used to change the
2384
 * default parser behavior
2385
 * SAX::substituteEntities() has to be used for changing that on a file by
2386
 * file basis.
2387
 *
2388
 * Returns the last value for 0 for no substitution, 1 for substitution.
2389
 */
2390

2391
int
2392
xmlSubstituteEntitiesDefault(int val) {
2393
    int old = xmlSubstituteEntitiesDefaultValue;
2394

2395
    xmlSubstituteEntitiesDefaultValue = val;
2396
    return(old);
2397
}
2398

2399
/**
2400
 * xmlKeepBlanksDefault:
2401
 * @val:  int 0 or 1
2402
 *
2403
 * DEPRECATED: Use the modern options API with XML_PARSE_NOBLANKS.
2404
 *
2405
 * Set and return the previous value for default blanks text nodes support.
2406
 * The 1.x version of the parser used an heuristic to try to detect
2407
 * ignorable white spaces. As a result the SAX callback was generating
2408
 * xmlSAX2IgnorableWhitespace() callbacks instead of characters() one, and when
2409
 * using the DOM output text nodes containing those blanks were not generated.
2410
 * The 2.x and later version will switch to the XML standard way and
2411
 * ignorableWhitespace() are only generated when running the parser in
2412
 * validating mode and when the current element doesn't allow CDATA or
2413
 * mixed content.
2414
 * This function is provided as a way to force the standard behavior
2415
 * on 1.X libs and to switch back to the old mode for compatibility when
2416
 * running 1.X client code on 2.X . Upgrade of 1.X code should be done
2417
 * by using xmlIsBlankNode() commodity function to detect the "empty"
2418
 * nodes generated.
2419
 * This value also affect autogeneration of indentation when saving code
2420
 * if blanks sections are kept, indentation is not generated.
2421
 *
2422
 * Returns the last value for 0 for no substitution, 1 for substitution.
2423
 */
2424

2425
int
2426
xmlKeepBlanksDefault(int val) {
2427
    int old = xmlKeepBlanksDefaultValue;
2428

2429
    xmlKeepBlanksDefaultValue = val;
2430
#ifdef LIBXML_OUTPUT_ENABLED
2431
    if (!val)
2432
        xmlIndentTreeOutput = 1;
2433
#endif
2434
    return(old);
2435
}
2436

2437
Product

Resources

Company